From fa0f0834b021ae452a8109060c740e41b1b2994e Mon Sep 17 00:00:00 2001
From: Jake Awe <jawe@nvidia.com>
Date: Thu, 17 Jul 2025 12:07:07 -0700
Subject: [PATCH 001/366] DOC v25.10 Updates [skip ci]

---
 .../cuda12.9-conda/devcontainer.json          |  8 +--
 .devcontainer/cuda12.9-pip/devcontainer.json  |  6 +-
 .github/workflows/build.yaml                  | 28 +++++-----
 .github/workflows/pandas-tests.yaml           |  2 +-
 .github/workflows/pr.yaml                     | 56 +++++++++----------
 .../workflows/pr_issue_status_automation.yml  |  8 +--
 .github/workflows/test.yaml                   | 30 +++++-----
 .../trigger-breaking-change-alert.yaml        |  2 +-
 README.md                                     |  2 +-
 VERSION                                       |  2 +-
 .../all_cuda-129_arch-aarch64.yaml            | 10 ++--
 .../all_cuda-129_arch-x86_64.yaml             | 10 ++--
 cpp/examples/versions.cmake                   |  2 +-
 dependencies.yaml                             | 54 +++++++++---------
 java/ci/README.md                             |  4 +-
 java/pom.xml                                  |  2 +-
 python/cudf/cudf/VERSION                      |  2 +-
 .../dependencies.yaml                         |  6 +-
 python/cudf/pyproject.toml                    | 14 ++---
 python/cudf_kafka/pyproject.toml              |  2 +-
 python/cudf_polars/docs/overview.md           |  2 +-
 python/cudf_polars/pyproject.toml             |  6 +-
 python/custreamz/pyproject.toml               |  4 +-
 python/dask_cudf/pyproject.toml               |  6 +-
 python/libcudf/pyproject.toml                 |  8 +--
 python/pylibcudf/pyproject.toml               | 10 ++--
 26 files changed, 143 insertions(+), 143 deletions(-)

diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json
index 1e00021f0ed..5c010923260 100644
--- a/.devcontainer/cuda12.9-conda/devcontainer.json
+++ b/.devcontainer/cuda12.9-conda/devcontainer.json
@@ -5,19 +5,19 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:25.08-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.08-cuda12.9-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda"
   ],
   "hostRequirements": {
     "gpu": "optional"
   },
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/cuda:25.8": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:25.10": {
       "version": "12.9",
       "installCompilers": false,
       "installProfilers": true,
@@ -38,7 +38,7 @@
       "installnvJPEG": false,
       "pruneStaticLibs": true
     },
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.8": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/cuda",
diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json
index 0debf91e159..666e6b872f6 100644
--- a/.devcontainer/cuda12.9-pip/devcontainer.json
+++ b/.devcontainer/cuda12.9-pip/devcontainer.json
@@ -5,19 +5,19 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.08-cpp-cuda12.9-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.08-cuda12.9-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip"
   ],
   "hostRequirements": {
     "gpu": "optional"
   },
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.8": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index caea3dad7c4..8576f6af66b 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -46,7 +46,7 @@ jobs:
   cpp-build:
     needs: [telemetry-setup]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
   python-build:
     needs: [telemetry-setup, cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -90,7 +90,7 @@ jobs:
   wheel-build-libcudf:
     needs: [telemetry-setup]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -105,7 +105,7 @@ jobs:
   wheel-publish-libcudf:
     needs: wheel-build-libcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -116,7 +116,7 @@ jobs:
   wheel-build-pylibcudf:
     needs: [telemetry-setup, wheel-build-libcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -128,7 +128,7 @@ jobs:
   wheel-publish-pylibcudf:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -139,7 +139,7 @@ jobs:
   wheel-build-cudf:
     needs: [telemetry-setup, wheel-build-pylibcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -151,7 +151,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -162,7 +162,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: [telemetry-setup, wheel-build-cudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -177,7 +177,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -188,7 +188,7 @@ jobs:
   wheel-build-cudf-polars:
     needs: [telemetry-setup, wheel-build-pylibcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -203,7 +203,7 @@ jobs:
   wheel-publish-cudf-polars:
     needs: wheel-build-cudf-polars
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index ef06159ab90..56fba1f6d8f 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -22,7 +22,7 @@ jobs:
   pandas-tests:
       # run the Pandas unit tests
       secrets: inherit
-      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
       with:
         matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
         build_type: nightly
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 2976f30184d..883d005cc61 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -43,7 +43,7 @@ jobs:
       - telemetry-setup
       - third-party-integration-tests-cudf-pandas
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
@@ -68,7 +68,7 @@ jobs:
   changed-files:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10
     with:
       files_yaml: |
         test_cpp:
@@ -130,14 +130,14 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: "telemetry-summarize spark-rapids-jni"
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
     with:
       build_type: pull-request
       node_type: "cpu16"
@@ -145,7 +145,7 @@ jobs:
   cpp-linters:
     secrets: inherit
     needs: checks
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: pull-request
       script: "ci/cpp_linters.sh"
@@ -153,13 +153,13 @@ jobs:
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.10
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
@@ -167,14 +167,14 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: ci/build_python.sh
   conda-python-cudf-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -183,7 +183,7 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -191,7 +191,7 @@ jobs:
   conda-java-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java
     with:
       build_type: pull-request
@@ -202,7 +202,7 @@ jobs:
   conda-notebook-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
     with:
       build_type: pull-request
@@ -213,7 +213,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: pull-request
       node_type: "gpu-l4-latest-1"
@@ -223,7 +223,7 @@ jobs:
   wheel-build-libcudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -235,7 +235,7 @@ jobs:
   wheel-build-pylibcudf:
     needs: [checks, wheel-build-libcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: "ci/build_wheel_pylibcudf.sh"
@@ -244,7 +244,7 @@ jobs:
   wheel-build-cudf:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: "ci/build_wheel_cudf.sh"
@@ -253,7 +253,7 @@ jobs:
   wheel-tests-cudf:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -261,7 +261,7 @@ jobs:
   wheel-build-cudf-polars:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -273,7 +273,7 @@ jobs:
   wheel-tests-cudf-polars:
     needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -283,7 +283,7 @@ jobs:
   cudf-polars-polars-tests:
     needs: wheel-build-cudf-polars
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -292,7 +292,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -304,7 +304,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: [wheel-build-dask-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -314,7 +314,7 @@ jobs:
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10
     with:
       node_type: "cpu32"
       arch: '["amd64"]'
@@ -326,7 +326,7 @@ jobs:
   unit-tests-cudf-pandas:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -336,7 +336,7 @@ jobs:
   third-party-integration-tests-cudf-pandas:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: pull-request
       branch: ${{ inputs.branch }}
@@ -351,7 +351,7 @@ jobs:
     # run the Pandas unit tests using PR branch
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
@@ -362,7 +362,7 @@ jobs:
   pandas-tests-diff:
     # diff the results of running the Pandas unit tests and publish a job summary
     needs: pandas-tests
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
         node_type: "cpu4"
         build_type: pull-request
@@ -370,7 +370,7 @@ jobs:
   narwhals-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml
index 46973456a90..148d83e73d6 100644
--- a/.github/workflows/pr_issue_status_automation.yml
+++ b/.github/workflows/pr_issue_status_automation.yml
@@ -23,7 +23,7 @@ on:
 
 jobs:
     get-project-id:
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-25.08
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-25.10
       if: github.event.pull_request.state == 'open'
       secrets: inherit
       permissions:
@@ -34,7 +34,7 @@ jobs:
 
     update-status:
       # This job sets the PR and its linked issues to "In Progress" status
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.08
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.10
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
@@ -50,7 +50,7 @@ jobs:
 
     update-sprint:
       # This job sets the PR and its linked issues to the current "Weekly Sprint"
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-25.08
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-25.10
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
@@ -79,7 +79,7 @@ jobs:
 
     update-release:
       # This job sets the PR and its linked issues to the release they are targeting
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.08
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.10
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: [get-project-id, process-branch-name]
       with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 293488e6765..de86f3d5b50 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -24,7 +24,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -41,7 +41,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-memcheck-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -53,7 +53,7 @@ jobs:
       script: "ci/test_cpp_memcheck.sh"
   cpp-linters:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -63,7 +63,7 @@ jobs:
       file_to_upload: iwyu_results.txt
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -73,7 +73,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -82,7 +82,7 @@ jobs:
       script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -94,7 +94,7 @@ jobs:
       script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -106,7 +106,7 @@ jobs:
       script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -115,7 +115,7 @@ jobs:
       script: ci/test_wheel_cudf.sh
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -124,7 +124,7 @@ jobs:
       script: ci/test_wheel_dask_cudf.sh
   unit-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -133,7 +133,7 @@ jobs:
       script: ci/cudf_pandas_scripts/run_tests.sh
   third-party-integration-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -145,7 +145,7 @@ jobs:
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
   wheel-tests-cudf-polars:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -154,7 +154,7 @@ jobs:
       script: "ci/test_wheel_cudf_polars.sh"
   cudf-polars-polars-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -163,7 +163,7 @@ jobs:
       script: "ci/test_cudf_polars_polars_tests.sh"
   narwhals-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 593fcb1086a..48bf37afc40 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}
diff --git a/README.md b/README.md
index d5c5782882f..04980005846 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ pip install cudf-cu12
 cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge/miniforge)) from the `rapidsai` channel:
 
 ```bash
-conda install -c rapidsai -c conda-forge cudf=25.08
+conda install -c rapidsai -c conda-forge cudf=25.10
 ```
 
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
diff --git a/VERSION b/VERSION
index 3af4bda0205..296e35288d1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-25.08.00
+25.10.00
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index f7449e015f9..d53a7b0e731 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -27,7 +27,7 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.3
-- dask-cuda==25.8.*,>=0.0.0a0
+- dask-cuda==25.10.*,>=0.0.0a0
 - dlpack>=0.8,<1.0
 - doxygen=1.9.1
 - fastavro>=0.22.9
@@ -40,9 +40,9 @@ dependencies:
 - ipython
 - jupyter_client
 - libcurand-dev
-- libkvikio==25.8.*,>=0.0.0a0
+- libkvikio==25.10.*,>=0.0.0a0
 - librdkafka>=2.8.0,<2.9.0a0
-- librmm==25.8.*,>=0.0.0a0
+- librmm==25.10.*,>=0.0.0a0
 - make
 - mmh3
 - moto>=4.0.8
@@ -81,10 +81,10 @@ dependencies:
 - python>=3.10,<3.14
 - pytorch>=2.4.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rapids-dask-dependency==25.8.*,>=0.0.0a0
+- rapids-dask-dependency==25.10.*,>=0.0.0a0
 - rapids-logger==0.1.*,>=0.0.0a0
 - rich
-- rmm==25.8.*,>=0.0.0a0
+- rmm==25.10.*,>=0.0.0a0
 - s3fs>=2022.3.0
 - scikit-build-core>=0.10.0
 - scipy
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index d34654274db..96aac76cb86 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -27,7 +27,7 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.3
-- dask-cuda==25.8.*,>=0.0.0a0
+- dask-cuda==25.10.*,>=0.0.0a0
 - dlpack>=0.8,<1.0
 - doxygen=1.9.1
 - fastavro>=0.22.9
@@ -41,9 +41,9 @@ dependencies:
 - jupyter_client
 - libcufile-dev
 - libcurand-dev
-- libkvikio==25.8.*,>=0.0.0a0
+- libkvikio==25.10.*,>=0.0.0a0
 - librdkafka>=2.8.0,<2.9.0a0
-- librmm==25.8.*,>=0.0.0a0
+- librmm==25.10.*,>=0.0.0a0
 - make
 - mmh3
 - moto>=4.0.8
@@ -82,10 +82,10 @@ dependencies:
 - python>=3.10,<3.14
 - pytorch>=2.4.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rapids-dask-dependency==25.8.*,>=0.0.0a0
+- rapids-dask-dependency==25.10.*,>=0.0.0a0
 - rapids-logger==0.1.*,>=0.0.0a0
 - rich
-- rmm==25.8.*,>=0.0.0a0
+- rmm==25.10.*,>=0.0.0a0
 - s3fs>=2022.3.0
 - scikit-build-core>=0.10.0
 - scipy
diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake
index 8d4a8335b47..4ef1f84f8c6 100644
--- a/cpp/examples/versions.cmake
+++ b/cpp/examples/versions.cmake
@@ -12,4 +12,4 @@
 # the License.
 # =============================================================================
 
-set(CUDF_TAG branch-25.08)
+set(CUDF_TAG branch-25.10)
diff --git a/dependencies.yaml b/dependencies.yaml
index 5123071d430..60fb3ab9ddb 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -588,7 +588,7 @@ dependencies:
       - output_types: [conda]
         packages:
           - breathe>=4.35.0
-          - dask-cuda==25.8.*,>=0.0.0a0
+          - dask-cuda==25.10.*,>=0.0.0a0
           - *doxygen
           - make
           - myst-nb
@@ -718,14 +718,14 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - rapids-dask-dependency==25.8.*,>=0.0.0a0
+          - rapids-dask-dependency==25.10.*,>=0.0.0a0
           - nvidia-ml-py
   run_dask_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - pynvml>=12.0.0,<13.0.0a0
-          - rapids-dask-dependency==25.8.*,>=0.0.0a0
+          - rapids-dask-dependency==25.10.*,>=0.0.0a0
   run_custreamz:
     common:
       - output_types: conda
@@ -757,9 +757,9 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - libcudf-example==25.8.*,>=0.0.0a0
-          - libcudf_kafka==25.8.*,>=0.0.0a0
-          - libcudf-tests==25.8.*,>=0.0.0a0
+          - libcudf-example==25.10.*,>=0.0.0a0
+          - libcudf_kafka==25.10.*,>=0.0.0a0
+          - libcudf-tests==25.10.*,>=0.0.0a0
   test_java:
     common:
       - output_types: conda
@@ -874,7 +874,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - dask-cuda==25.8.*,>=0.0.0a0
+          - dask-cuda==25.10.*,>=0.0.0a0
     specific:
       - output_types: [conda, requirements]
         matrices:
@@ -891,7 +891,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - dask-cuda==25.8.*,>=0.0.0a0
+          - dask-cuda==25.10.*,>=0.0.0a0
           - *numpy
           - rich
   test_python_narwhals:
@@ -906,7 +906,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &libcudf_unsuffixed libcudf==25.8.*,>=0.0.0a0
+          - &libcudf_unsuffixed libcudf==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -919,13 +919,13 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - libcudf-cu12==25.8.*,>=0.0.0a0
+              - libcudf-cu12==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*libcudf_unsuffixed]}
   depends_on_pylibcudf:
     common:
       - output_types: conda
         packages:
-          - &pylibcudf_unsuffixed pylibcudf==25.8.*,>=0.0.0a0
+          - &pylibcudf_unsuffixed pylibcudf==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -938,13 +938,13 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - pylibcudf-cu12==25.8.*,>=0.0.0a0
+              - pylibcudf-cu12==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*pylibcudf_unsuffixed]}
   depends_on_pylibcudf_pyarrow:
     common:
       - output_types: conda
         packages:
-          - &plc_unsuffixed pylibcudf==25.8.*,>=0.0.0a0
+          - &plc_unsuffixed pylibcudf==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -957,13 +957,13 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - pylibcudf-cu12[pyarrow]==25.8.*,>=0.0.0a0
+              - pylibcudf-cu12[pyarrow]==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*plc_unsuffixed]}
   depends_on_cudf:
     common:
       - output_types: conda
         packages:
-          - &cudf_unsuffixed cudf==25.8.*,>=0.0.0a0
+          - &cudf_unsuffixed cudf==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -976,13 +976,13 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - cudf-cu12==25.8.*,>=0.0.0a0
+              - cudf-cu12==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*cudf_unsuffixed]}
   depends_on_cudf_kafka:
     common:
       - output_types: conda
         packages:
-          - &cudf_kafka_unsuffixed cudf_kafka==25.8.*,>=0.0.0a0
+          - &cudf_kafka_unsuffixed cudf_kafka==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -995,7 +995,7 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - cudf_kafka-cu12==25.8.*,>=0.0.0a0
+              - cudf_kafka-cu12==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*cudf_kafka_unsuffixed]}
   depends_on_cupy:
     common:
@@ -1013,7 +1013,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &libkvikio_unsuffixed libkvikio==25.8.*,>=0.0.0a0
+          - &libkvikio_unsuffixed libkvikio==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           - --extra-index-url=https://pypi.nvidia.com
@@ -1025,7 +1025,7 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - libkvikio-cu12==25.8.*,>=0.0.0a0
+              - libkvikio-cu12==25.10.*,>=0.0.0a0
           - matrix:
             packages:
               - *libkvikio_unsuffixed
@@ -1033,7 +1033,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &librmm_unsuffixed librmm==25.8.*,>=0.0.0a0
+          - &librmm_unsuffixed librmm==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -1046,7 +1046,7 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - librmm-cu12==25.8.*,>=0.0.0a0
+              - librmm-cu12==25.10.*,>=0.0.0a0
           - matrix:
             packages:
               - *librmm_unsuffixed
@@ -1054,7 +1054,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &rmm_unsuffixed rmm==25.8.*,>=0.0.0a0
+          - &rmm_unsuffixed rmm==25.10.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -1067,7 +1067,7 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - rmm-cu12==25.8.*,>=0.0.0a0
+              - rmm-cu12==25.10.*,>=0.0.0a0
           - matrix:
             packages:
               - *rmm_unsuffixed
@@ -1108,17 +1108,17 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - dask-cudf==25.8.*,>=0.0.0a0
+          - dask-cudf==25.10.*,>=0.0.0a0
   depends_on_custreamz:
     common:
       - output_types: conda
         packages:
-          - custreamz==25.8.*,>=0.0.0a0
+          - custreamz==25.10.*,>=0.0.0a0
   depends_on_cudf_polars:
     common:
       - output_types: conda
         packages:
-          - cudf-polars==25.8.*,>=0.0.0a0
+          - cudf-polars==25.10.*,>=0.0.0a0
   depends_on_narwhals:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/java/ci/README.md b/java/ci/README.md
index bb2ba24fc63..5597cb22109 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:12.9.1-devel-rocky8 bash
 You can download the cuDF repo in the docker container or you can mount it into the container.
 Here I choose to download again in the container.
 ```bash
-git clone --recursive https://github.com/rapidsai/cudf.git -b branch-25.08
+git clone --recursive https://github.com/rapidsai/cudf.git -b branch-25.10
 ```
 
 ### Build cuDF jar with devtoolset
@@ -47,4 +47,4 @@ scl enable gcc-toolset-11 "java/ci/build-in-docker.sh"
 
 ### The output
 
-You can find the cuDF jar in java/target/ like cudf-25.08.0-SNAPSHOT-cuda12.jar.
+You can find the cuDF jar in java/target/ like cudf-25.10.0-SNAPSHOT-cuda12.jar.
diff --git a/java/pom.xml b/java/pom.xml
index 720a176f24e..6db3ae68ece 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
 
     <groupId>ai.rapids</groupId>
     <artifactId>cudf</artifactId>
-    <version>25.08.0-SNAPSHOT</version>
+    <version>25.10.0-SNAPSHOT</version>
 
     <name>cudfjni</name>
     <description>
diff --git a/python/cudf/cudf/VERSION b/python/cudf/cudf/VERSION
index 3af4bda0205..296e35288d1 100644
--- a/python/cudf/cudf/VERSION
+++ b/python/cudf/cudf/VERSION
@@ -1 +1 @@
-25.08.00
+25.10.00
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
index bad5d9af315..98b4a31a391 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -188,7 +188,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - cudf==25.8.*,>=0.0.0a0
+          - cudf==25.10.*,>=0.0.0a0
           - pandas
           - pytest
           - pytest-xdist
@@ -264,13 +264,13 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - cuml==25.8.*,>=0.0.0a0
+          - cuml==25.10.*,>=0.0.0a0
           - scikit-learn
   test_cugraph:
     common:
       - output_types: conda
         packages:
-          - cugraph==25.8.*,>=0.0.0a0
+          - cugraph==25.10.*,>=0.0.0a0
           - networkx
   test_ibis:
     common:
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index ec14f0e209d..55ecf7f46fb 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
     "cuda-python>=12.6.2,<13.0a0",
     "cupy-cuda12x>=12.0.0",
     "fsspec>=0.6.0",
-    "libcudf==25.8.*,>=0.0.0a0",
+    "libcudf==25.10.*,>=0.0.0a0",
     "numba-cuda[cu12]>=0.16.0,<0.17.0a0",
     "numba>=0.59.1,<0.62.0a0",
     "numpy>=1.23,<3.0a0",
@@ -31,9 +31,9 @@ dependencies = [
     "pandas>=2.0,<2.4.0dev0",
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
     "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
-    "pylibcudf==25.8.*,>=0.0.0a0",
+    "pylibcudf==25.10.*,>=0.0.0a0",
     "rich",
-    "rmm==25.8.*,>=0.0.0a0",
+    "rmm==25.10.*,>=0.0.0a0",
     "typing_extensions>=4.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -121,11 +121,11 @@ matrix-entry = "cuda_suffixed=true"
 requires = [
     "cmake>=3.30.4",
     "cython>=3.0.3",
-    "libcudf==25.8.*,>=0.0.0a0",
-    "librmm==25.8.*,>=0.0.0a0",
+    "libcudf==25.10.*,>=0.0.0a0",
+    "librmm==25.10.*,>=0.0.0a0",
     "ninja",
-    "pylibcudf==25.8.*,>=0.0.0a0",
-    "rmm==25.8.*,>=0.0.0a0",
+    "pylibcudf==25.10.*,>=0.0.0a0",
+    "rmm==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.scikit-build]
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index a13ba960506..f4be4552feb 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cudf==25.8.*,>=0.0.0a0",
+    "cudf==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.optional-dependencies]
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index d3087d00f2d..df6af87bbf0 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -8,7 +8,7 @@ You will need:
    preferred configuration. Or else, use
    [rustup](https://www.rust-lang.org/tools/install)
 2. A [cudf development
-   environment](https://github.com/rapidsai/cudf/blob/branch-25.08/CONTRIBUTING.md#setting-up-your-build-environment).
+   environment](https://github.com/rapidsai/cudf/blob/branch-25.10/CONTRIBUTING.md#setting-up-your-build-environment).
    The combined devcontainer works, or whatever your favourite approach is.
 
 :::{note}
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index f29d6370a24..bf3968d22e3 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -20,7 +20,7 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "polars>=1.28,<1.32",
-    "pylibcudf==25.8.*,>=0.0.0a0",
+    "pylibcudf==25.10.*,>=0.0.0a0",
     "typing-extensions; python_version < '3.11'",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -37,7 +37,7 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "dask-cuda==25.8.*,>=0.0.0a0",
+    "dask-cuda==25.10.*,>=0.0.0a0",
     "numpy>=1.23,<3.0a0",
     "pytest",
     "pytest-cov",
@@ -46,7 +46,7 @@ test = [
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 experimental = [
     "nvidia-ml-py",
-    "rapids-dask-dependency==25.8.*,>=0.0.0a0",
+    "rapids-dask-dependency==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index 0bbcadcd8a5..0b961894d2d 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -20,8 +20,8 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "confluent-kafka>=2.8.0,<2.9.0a0",
-    "cudf==25.8.*,>=0.0.0a0",
-    "cudf_kafka==25.8.*,>=0.0.0a0",
+    "cudf==25.10.*,>=0.0.0a0",
+    "cudf_kafka==25.10.*,>=0.0.0a0",
     "streamz",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 0939ba23308..f0d1d91fbfe 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -19,13 +19,13 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cudf==25.8.*,>=0.0.0a0",
+    "cudf==25.10.*,>=0.0.0a0",
     "cupy-cuda12x>=12.0.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
     "pandas>=2.0,<2.4.0dev0",
     "pynvml>=12.0.0,<13.0.0a0",
-    "rapids-dask-dependency==25.8.*,>=0.0.0a0",
+    "rapids-dask-dependency==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -47,7 +47,7 @@ cudf = "dask_cudf.backends:CudfBackendEntrypoint"
 
 [project.optional-dependencies]
 test = [
-    "dask-cuda==25.8.*,>=0.0.0a0",
+    "dask-cuda==25.10.*,>=0.0.0a0",
     "pytest",
     "pytest-cov",
     "pytest-xdist",
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
index 0cdf864bf3a..7b69543d898 100644
--- a/python/libcudf/pyproject.toml
+++ b/python/libcudf/pyproject.toml
@@ -38,8 +38,8 @@ classifiers = [
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dependencies = [
-    "libkvikio==25.8.*,>=0.0.0a0",
-    "librmm==25.8.*,>=0.0.0a0",
+    "libkvikio==25.10.*,>=0.0.0a0",
+    "librmm==25.10.*,>=0.0.0a0",
     "rapids-logger==0.1.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
@@ -80,8 +80,8 @@ dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
     "cmake>=3.30.4",
-    "libkvikio==25.8.*,>=0.0.0a0",
-    "librmm==25.8.*,>=0.0.0a0",
+    "libkvikio==25.10.*,>=0.0.0a0",
+    "librmm==25.10.*,>=0.0.0a0",
     "ninja",
     "rapids-logger==0.1.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index fea40e8821e..561f9eccd64 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -19,10 +19,10 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cuda-python>=12.6.2,<13.0a0",
-    "libcudf==25.8.*,>=0.0.0a0",
+    "libcudf==25.10.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
-    "rmm==25.8.*,>=0.0.0a0",
+    "rmm==25.10.*,>=0.0.0a0",
     "typing_extensions>=4.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -119,10 +119,10 @@ matrix-entry = "cuda_suffixed=true"
 requires = [
     "cmake>=3.30.4",
     "cython>=3.0.3",
-    "libcudf==25.8.*,>=0.0.0a0",
-    "librmm==25.8.*,>=0.0.0a0",
+    "libcudf==25.10.*,>=0.0.0a0",
+    "librmm==25.10.*,>=0.0.0a0",
     "ninja",
-    "rmm==25.8.*,>=0.0.0a0",
+    "rmm==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.scikit-build]

From 40f7ba59bc9cd3ef43085d5825e2d14afadae3d2 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Fri, 18 Jul 2025 09:03:29 -0400
Subject: [PATCH 002/366] update versions

---
 .github/workflows/build.yaml | 2 +-
 .github/workflows/pr.yaml    | 8 ++++----
 .github/workflows/test.yaml  | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 2875cea0f3c..d9bb501c968 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -82,7 +82,7 @@ jobs:
       arch: "amd64"
       branch: ${{ inputs.branch }}
       build_type: ${{ inputs.build_type || 'branch' }}
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       date: ${{ inputs.date }}
       node_type: "gpu-l4-latest-1"
       script: "ci/build_docs.sh"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 908da80d690..f56b086ca8b 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -197,7 +197,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-l4-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: "ci/test_java.sh"
   conda-notebook-tests:
     needs: [conda-python-build, changed-files]
@@ -208,7 +208,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-l4-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: "ci/test_notebooks.sh"
   docs-build:
     needs: conda-python-build
@@ -218,7 +218,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-l4-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: "ci/build_docs.sh"
   wheel-build-libcudf:
     needs: checks
@@ -378,7 +378,7 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: ci/test_narwhals.sh
   spark-rapids-jni:
     needs: changed-files
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 52e09dd7d8b..b8af253bc56 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -49,7 +49,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: "ci/test_cpp_memcheck.sh"
   cpp-linters:
     secrets: inherit
@@ -90,7 +90,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
@@ -102,7 +102,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
@@ -170,5 +170,5 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
-      container_image: "rapidsai/ci-conda:25.08-latest"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: ci/test_narwhals.sh

From 3767b7dd8e49456351d4c0220e342d0b098fe4b8 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 21 Jul 2025 07:47:44 -0400
Subject: [PATCH 003/366] fix merge conflict

---
 dependencies.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 23b003a12a1..63fbbe44f4c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -730,12 +730,8 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-<<<<<<< HEAD
           - rapids-dask-dependency==25.10.*,>=0.0.0a0
           - nvidia-ml-py
-=======
-          - rapids-dask-dependency==25.8.*,>=0.0.0a0
->>>>>>> branch-25.08
   run_dask_cudf:
     common:
       - output_types: [conda, requirements, pyproject]

From 3955e23fd1484fbf0c395c0651da6678a4bcaa2f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 23 Jul 2025 09:57:40 -0700
Subject: [PATCH 004/366] Use more pytest fixtures and avoid GPU
 parameterization in cuDF classic tests (#19419)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids `pytest.mark.parametrize` with GPU objects

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19419
---
 python/cudf/cudf/tests/test_scan.py      |  22 ++--
 python/cudf/cudf/tests/test_serialize.py |   9 +-
 python/cudf/cudf/tests/test_series.py    | 136 +++++++++++------------
 python/cudf/cudf/tests/test_seriesmap.py |   8 +-
 python/cudf/cudf/tests/test_setitem.py   |  25 +++--
 5 files changed, 93 insertions(+), 107 deletions(-)

diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py
index b76566b00e2..d4b21480070 100644
--- a/python/cudf/cudf/tests/test_scan.py
+++ b/python/cudf/cudf/tests/test_scan.py
@@ -1,6 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
-
-from itertools import product
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
@@ -11,18 +9,17 @@
 from cudf.testing import assert_eq
 from cudf.testing._utils import INTEGER_TYPES, NUMERIC_TYPES, gen_rand
 
-params_sizes = [0, 1, 2, 5]
+
+@pytest.fixture(params=NUMERIC_TYPES)
+def dtype(request):
+    return request.param
 
 
-def _gen_params():
-    for t, n in product(NUMERIC_TYPES, params_sizes):
-        if (t == np.int8 or t == np.int16) and n > 20:
-            # to keep data in range
-            continue
-        yield t, n
+@pytest.fixture(params=[0, 1, 5])
+def nelem(request):
+    return request.param
 
 
-@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
 def test_cumsum(dtype, nelem):
     if dtype == np.int8:
         # to keep data in range
@@ -86,7 +83,6 @@ def test_cumsum_decimal(dtype):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
 def test_cummin(dtype, nelem):
     if dtype == np.int8:
         # to keep data in range
@@ -149,7 +145,6 @@ def test_cummin_decimal(dtype):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
 def test_cummax(dtype, nelem):
     if dtype == np.int8:
         # to keep data in range
@@ -212,7 +207,6 @@ def test_cummax_decimal(dtype):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize("dtype,nelem", list(_gen_params()))
 def test_cumprod(dtype, nelem):
     if dtype == np.int8:
         # to keep data in range
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index a4e8ce3015c..1c508307e32 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -428,14 +428,15 @@ def test_serialize_sliced_string():
 @pytest.mark.parametrize(
     "columns",
     [
-        cudf.RangeIndex(2),
-        cudf.Index([1, 2], dtype="int8"),
-        cudf.MultiIndex(
+        lambda: cudf.RangeIndex(2),
+        lambda: cudf.Index([1, 2], dtype="int8"),
+        lambda: cudf.MultiIndex(
             levels=[["a", "b"], [1, 2]], codes=[[0, 1], [0, 1]], names=["a", 0]
         ),
     ],
+    ids=["RangeIndex", "Index", "MultiIndex"],
 )
 def test_serialize_column_types_preserved(columns):
-    expected = cudf.DataFrame([[10, 11]], columns=columns)
+    expected = cudf.DataFrame([[10, 11]], columns=columns())
     result = cudf.DataFrame.deserialize(*expected.serialize())
     assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 751c98cc845..b47addf4311 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -29,8 +29,8 @@
 )
 
 
-def _series_na_data():
-    return [
+@pytest.fixture(
+    params=[
         pd.Series([0, 1, 2, np.nan, 4, None, 6]),
         pd.Series(
             [0, 1, 2, np.nan, 4, None, 6],
@@ -48,6 +48,9 @@ def _series_na_data():
         pd.Series([None]),
         pd.Series(["a", "b", "", "c", None, "e"]),
     ]
+)
+def ps(request):
+    return request.param
 
 
 @pytest.mark.parametrize(
@@ -756,17 +759,18 @@ def test_series_round_half_up():
 
 
 @pytest.mark.parametrize(
-    "series",
+    "series_data",
     [
-        cudf.Series([1.0, None, np.nan, 4.0], nan_as_null=False),
-        cudf.Series([1.24430, None, np.nan, 4.423530], nan_as_null=False),
-        cudf.Series([1.24430, np.nan, 4.423530], nan_as_null=False),
-        cudf.Series([-1.24430, np.nan, -4.423530], nan_as_null=False),
-        cudf.Series(np.repeat(np.nan, 100)),
+        [1.0, None, np.nan, 4.0],
+        [1.24430, None, np.nan, 4.423530],
+        [1.24430, np.nan, 4.423530],
+        [-1.24430, np.nan, -4.423530],
+        np.repeat(np.nan, 100),
     ],
 )
 @pytest.mark.parametrize("decimal", [0, 1, 2, 3])
-def test_round_nan_as_null_false(series, decimal):
+def test_round_nan_as_null_false(series_data, decimal):
+    series = cudf.Series(series_data, nan_as_null=False)
     pser = series.to_pandas()
     result = series.round(decimal)
     expected = pser.round(decimal)
@@ -836,7 +840,6 @@ def test_series_round_decimal(
     assert_eq(result_half_even, expected_ser_half_even)
 
 
-@pytest.mark.parametrize("ps", _series_na_data())
 @pytest.mark.parametrize("nan_as_null", [True, False, None])
 def test_series_isnull_isna(ps, nan_as_null):
     nan_contains = ps.apply(lambda x: isinstance(x, float) and np.isnan(x))
@@ -852,7 +855,6 @@ def test_series_isnull_isna(ps, nan_as_null):
         assert_eq(ps.isna(), gs.isna())
 
 
-@pytest.mark.parametrize("ps", _series_na_data())
 @pytest.mark.parametrize("nan_as_null", [True, False, None])
 def test_series_notnull_notna(ps, nan_as_null):
     nan_contains = ps.apply(lambda x: isinstance(x, float) and np.isnan(x))
@@ -911,43 +913,44 @@ def test_series_memory_usage():
 
 
 @pytest.mark.parametrize(
-    "sr,expected_psr",
+    "sr_data,expected_psr",
     [
         (
-            cudf.Series([1, 2, None, 3], dtype="uint8"),
+            pa.array([1, 2, None, 3], type=pa.uint8()),
             pd.Series([1, 2, None, 3], dtype=pd.UInt8Dtype()),
         ),
         (
-            cudf.Series([23, None, None, 32], dtype="uint16"),
+            pa.array([23, None, None, 32], type=pa.uint16()),
             pd.Series([23, None, None, 32], dtype=pd.UInt16Dtype()),
         ),
         (
-            cudf.Series([None, 123, None, 1], dtype="uint32"),
+            pa.array([None, 123, None, 1], type=pa.uint32()),
             pd.Series([None, 123, None, 1], dtype=pd.UInt32Dtype()),
         ),
         (
-            cudf.Series([234, 2323, 23432, None, None, 224], dtype="uint64"),
+            pa.array([234, 2323, 23432, None, None, 224], type=pa.uint64()),
             pd.Series(
                 [234, 2323, 23432, None, None, 224], dtype=pd.UInt64Dtype()
             ),
         ),
         (
-            cudf.Series([-10, 1, None, -1, None, 3], dtype="int8"),
+            pa.array([-10, 1, None, -1, None, 3], type=pa.int8()),
             pd.Series([-10, 1, None, -1, None, 3], dtype=pd.Int8Dtype()),
         ),
         (
-            cudf.Series([111, None, 222, None, 13], dtype="int16"),
+            pa.array([111, None, 222, None, 13], type=pa.int16()),
             pd.Series([111, None, 222, None, 13], dtype=pd.Int16Dtype()),
         ),
         (
-            cudf.Series([11, None, 22, 33, None, 2, None, 3], dtype="int32"),
+            pa.array([11, None, 22, 33, None, 2, None, 3], type=pa.int32()),
             pd.Series(
                 [11, None, 22, 33, None, 2, None, 3], dtype=pd.Int32Dtype()
             ),
         ),
         (
-            cudf.Series(
-                [32431, None, None, 32322, 0, 10, -32324, None], dtype="int64"
+            pa.array(
+                [32431, None, None, 32322, 0, 10, -32324, None],
+                type=pa.int64(),
             ),
             pd.Series(
                 [32431, None, None, 32322, 0, 10, -32324, None],
@@ -955,9 +958,9 @@ def test_series_memory_usage():
             ),
         ),
         (
-            cudf.Series(
+            pa.array(
                 [True, None, False, None, False, True, True, False],
-                dtype="bool_",
+                type=pa.bool_(),
             ),
             pd.Series(
                 [True, None, False, None, False, True, True, False],
@@ -965,7 +968,7 @@ def test_series_memory_usage():
             ),
         ),
         (
-            cudf.Series(
+            pa.array(
                 [
                     "abc",
                     "a",
@@ -976,7 +979,7 @@ def test_series_memory_usage():
                     None,
                     "rapids ai",
                 ],
-                dtype="object",
+                type=pa.string(),
             ),
             pd.Series(
                 [
@@ -993,9 +996,9 @@ def test_series_memory_usage():
             ),
         ),
         (
-            cudf.Series(
+            pa.array(
                 [1, 2, None, 10.2, None],
-                dtype="float32",
+                type=pa.float32(),
             ),
             pd.Series(
                 [1, 2, None, 10.2, None],
@@ -1004,7 +1007,8 @@ def test_series_memory_usage():
         ),
     ],
 )
-def test_series_to_pandas_nullable_dtypes(sr, expected_psr):
+def test_series_to_pandas_nullable_dtypes(sr_data, expected_psr):
+    sr = cudf.Series(sr_data)
     actual_psr = sr.to_pandas(nullable=True)
 
     assert_eq(actual_psr, expected_psr)
@@ -1066,22 +1070,24 @@ def custom_add_func(sr, val):
 
 
 @pytest.mark.parametrize(
-    "data",
-    [cudf.Series([1, 2, 3]), cudf.Series([10, 11, 12], index=[1, 2, 3])],
+    "pd_data",
+    [pd.Series([1, 2, 3]), pd.Series([10, 11, 12], index=[1, 2, 3])],
 )
 @pytest.mark.parametrize(
     "other",
     [
-        cudf.Series([4, 5, 6]),
-        cudf.Series([4, 5, 6, 7, 8]),
-        cudf.Series([4, np.nan, 6], nan_as_null=False),
+        pd.Series([4, 5, 6]),
+        pd.Series([4, 5, 6, 7, 8]),
+        pd.Series([4, np.nan, 6]),
         [4, np.nan, 6],
         {1: 9},
     ],
 )
-def test_series_update(data, other):
+def test_series_update(pd_data, other):
+    data = cudf.Series.from_pandas(pd_data)
     gs = data.copy(deep=True)
-    if isinstance(other, cudf.Series):
+    if isinstance(other, pd.Series):
+        other = cudf.Series.from_pandas(other, nan_as_null=False)
         g_other = other.copy(deep=True)
         p_other = g_other.to_pandas()
     else:
@@ -1344,28 +1350,20 @@ def test_explode(data, ignore_index, p_index):
 
 
 @pytest.mark.parametrize(
-    "data, expected",
+    "data",
     [
-        (
-            [cudf.Series([1, 2, 3]), cudf.Series([10, 20])],
-            cudf.Series([[1, 2, 3], [10, 20]]),
-        ),
-        (
-            [cudf.Series([1, 2, 3]), None, cudf.Series([10, 20, np.nan])],
-            cudf.Series([[1, 2, 3], None, [10, 20, np.nan]]),
-        ),
-        (
-            [cp.array([5, 6]), cudf.NA, cp.array([1])],
-            cudf.Series([[5, 6], None, [1]]),
-        ),
-        (
-            [None, None, None, None, None, cudf.Series([10, 20])],
-            cudf.Series([None, None, None, None, None, [10, 20]]),
-        ),
+        [[1, 2, 3], [10, 20]],
+        [[1.0, 2.0, 3.0], None, [10.0, 20.0, np.nan]],
+        [[5, 6], None, [1]],
+        [None, None, None, None, None, [10, 20]],
     ],
 )
-def test_nested_series_from_sequence_data(data, expected):
-    actual = cudf.Series(data)
+@pytest.mark.parametrize("klass", [cudf.Series, list, cp.array])
+def test_nested_series_from_sequence_data(data, klass):
+    actual = cudf.Series(
+        [klass(val) if val is not None else val for val in data]
+    )
+    expected = cudf.Series(data)
     assert_eq(actual, expected)
 
 
@@ -1648,14 +1646,15 @@ def test_series_add_suffix():
 
 
 @pytest.mark.parametrize(
-    "cudf_series",
+    "data",
     [
-        cudf.Series([0.25, 0.5, 0.2, -0.05]),
-        cudf.Series([0, 1, 2, np.nan, 4, cudf.NA, 6]),
+        [0.25, 0.5, 0.2, -0.05],
+        [0, 1, 2, np.nan, 4, cudf.NA, 6],
     ],
 )
 @pytest.mark.parametrize("lag", [1, 2, 3, 4])
-def test_autocorr(cudf_series, lag):
+def test_autocorr(data, lag):
+    cudf_series = cudf.Series(data)
     psr = cudf_series.to_pandas()
 
     cudf_corr = cudf_series.autocorr(lag=lag)
@@ -1822,10 +1821,10 @@ def test_isin_numeric(data, values):
     assert_eq(got, expected)
 
 
-@pytest.mark.xfail(raises=TypeError)
 def test_fill_new_category():
     gs = cudf.Series(pd.Categorical(["a", "b", "c"]))
-    gs[0:1] = "d"
+    with pytest.raises(TypeError):
+        gs[0:1] = "d"
 
 
 @pytest.mark.skipif(
@@ -1892,9 +1891,9 @@ def test_isin_datetime(data, values):
     "data",
     [
         [],
-        pd.Series(["this", "is", None, "a", "test"]),
-        pd.Series(["test", "this", "test", "is", None, "test", "a", "test"]),
-        pd.Series(["0", "12", "14"]),
+        ["this", "is", None, "a", "test"],
+        ["test", "this", "test", "is", None, "test", "a", "test"],
+        ["0", "12", "14"],
     ],
 )
 @pytest.mark.parametrize(
@@ -1971,14 +1970,8 @@ def test_diff(dtype, period, data_empty):
         assert_eq(diffed_outcome, expected_outcome)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["a", "b", "c", "d", "e"],
-    ],
-)
-def test_diff_unsupported_dtypes(data):
-    gs = cudf.Series(data)
+def test_diff_unsupported_dtypes():
+    gs = cudf.Series(["a", "b", "c", "d", "e"])
     with pytest.raises(
         TypeError,
         match=r"unsupported operand type\(s\)",
@@ -3007,7 +3000,6 @@ def test_null_like_to_nan_pandas_compat():
         assert_eq(ser, pser)
 
 
-@pytest.mark.parametrize("ps", _series_na_data())
 def test_roundtrip_series_plc_column(ps):
     expect = cudf.Series(ps)
     actual = cudf.Series.from_pylibcudf(*expect.to_pylibcudf())
diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py
index db1de7d0cf4..b232275e8c9 100644
--- a/python/cudf/cudf/tests/test_seriesmap.py
+++ b/python/cudf/cudf/tests/test_seriesmap.py
@@ -1,6 +1,5 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
-from itertools import product
 from math import floor
 
 import numpy as np
@@ -44,11 +43,10 @@ def test_series_map_callable_numeric_basic():
     assert_eq(expected_function, actual_function)
 
 
-@pytest.mark.parametrize("nelem", list(product([2, 10, 100, 1000])))
-def test_series_map_callable_numeric_random(nelem):
+def test_series_map_callable_numeric_random():
     # Generate data
     rng = np.random.default_rng(seed=0)
-    data = rng.random(nelem) * 100
+    data = rng.random(50) * 100
 
     sr = Series(data)
     pdsr = pd.Series(data)
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 2ab9d41cd2c..ffbf21b5548 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -10,10 +10,10 @@
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
-@pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})])
 @pytest.mark.parametrize("arg", [[True, False, True], [True, True, True]])
 @pytest.mark.parametrize("value", [0, -1])
-def test_dataframe_setitem_bool_mask_scaler(df, arg, value):
+def test_dataframe_setitem_bool_mask_scaler(arg, value):
+    df = pd.DataFrame({"a": [1, 2, 3]})
     gdf = cudf.from_pandas(df)
 
     df[arg] = value
@@ -50,8 +50,6 @@ def test_dataframe_setitem_columns(df, arg, value):
     assert_eq(df, gdf, check_dtype=False)
 
 
-@pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})])
-@pytest.mark.parametrize("arg", [["b", "c"]])
 @pytest.mark.parametrize(
     "value",
     [
@@ -66,7 +64,9 @@ def test_dataframe_setitem_columns(df, arg, value):
         np.timedelta64(34234324234324234, "ns"),
     ],
 )
-def test_dataframe_setitem_new_columns(df, arg, value):
+def test_dataframe_setitem_new_columns(value):
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    arg = ["b", "c"]
     gdf = cudf.from_pandas(df)
     cudf_replace_value = value
 
@@ -92,11 +92,11 @@ def test_series_setitem_index():
     assert_eq(df, gdf, check_dtype=False)
 
 
-@pytest.mark.parametrize("psr", [pd.Series([1, 2, 3], index=["a", "b", "c"])])
 @pytest.mark.parametrize(
     "arg", ["b", ["a", "c"], slice(1, 2, 1), [True, False, True]]
 )
-def test_series_set_item(psr, arg):
+def test_series_set_item(arg):
+    psr = pd.Series([1, 2, 3], index=["a", "b", "c"])
     gsr = cudf.from_pandas(psr)
 
     psr[arg] = 11
@@ -135,14 +135,15 @@ def test_setitem_dataframe_series_inplace(index):
 
 
 @pytest.mark.parametrize(
-    "replace_data",
+    "klass",
     [
-        [100, 200, 300, 400, 500],
-        cudf.Series([100, 200, 300, 400, 500]),
-        cudf.Series([100, 200, 300, 400, 500], index=[2, 3, 4, 5, 6]),
+        list,
+        cudf.Series,
+        lambda x: cudf.Series(x, index=[2, 3, 4, 5, 6]),
     ],
 )
-def test_series_set_equal_length_object_by_mask(replace_data):
+def test_series_set_equal_length_object_by_mask(klass):
+    replace_data = klass([100, 200, 300, 400, 500])
     psr = pd.Series([1, 2, 3, 4, 5], dtype="Int64")
     gsr = cudf.from_pandas(psr)
 

From 0c8fa1cce067d5667202391f301fc0f408bede2d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 23 Jul 2025 10:24:16 -0700
Subject: [PATCH 005/366] Update s3 Bucket fixture creation in test_s3 (#19424)

It appears these s3 tests borrowed from some older patterns in pandas tests. This PR updates the S3 bucket fixture creation to how pandas now does it. Namely, now each test will have a unique bucket name and will consistently clear the contents after each tests runs.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19424
---
 python/cudf/cudf/tests/test_s3.py | 525 +++++++++++++-----------------
 1 file changed, 223 insertions(+), 302 deletions(-)

diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index afb82f75bcf..e9c90c899da 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -1,109 +1,77 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
-import os
-import socket
-from contextlib import contextmanager
+import subprocess
+import sys
+import uuid
 from io import BytesIO, StringIO
 
-import numpy as np
 import pandas as pd
 import pytest
 from fsspec.core import get_fs_token_paths
 
 import cudf
+from cudf.io.parquet import ParquetDatasetWriter
 from cudf.testing import assert_eq
 
-moto = pytest.importorskip("moto", minversion="3.1.6")
-boto3 = pytest.importorskip("boto3")
-s3fs = pytest.importorskip("s3fs")
 
-ThreadedMotoServer = pytest.importorskip("moto.server").ThreadedMotoServer
+@pytest.fixture(scope="module")
+def monkeymodule():
+    with pytest.MonkeyPatch.context() as mp:
+        yield mp
 
 
-@pytest.fixture(scope="session")
-def endpoint_ip():
-    return "127.0.0.1"
+@pytest.fixture(scope="module")
+def aws_credentials(monkeymodule):
+    """Mocked AWS Credentials for moto."""
+    monkeymodule.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
+    monkeymodule.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
+    monkeymodule.setenv("S3FS_LOGGING_LEVEL", "DEBUG")
+    monkeymodule.setenv("AWS_SECURITY_TOKEN", "testing")
+    monkeymodule.setenv("AWS_SESSION_TOKEN", "foobar_session_token")
+    monkeymodule.setenv("AWS_DEFAULT_REGION", "us-east-1")
 
 
-@pytest.fixture(scope="session")
-def endpoint_port():
-    # Return a free port per worker session.
-    sock = socket.socket()
-    sock.bind(("127.0.0.1", 0))
-    port = sock.getsockname()[1]
-    sock.close()
-    return port
-
-
-@contextmanager
-def ensure_safe_environment_variables():
+@pytest.fixture(scope="module")
+def moto_server(aws_credentials, monkeymodule):
     """
-    Get a context manager to safely set environment variables
-    All changes will be undone on close, hence environment variables set
-    within this contextmanager will neither persist nor change global state.
+    Fixture to set up moto server in separate process
     """
-    saved_environ = dict(os.environ)
-    try:
-        yield
-    finally:
-        os.environ.clear()
-        os.environ.update(saved_environ)
+    moto_server = pytest.importorskip("moto.server")
+    server = moto_server.ThreadedMotoServer(port=0)
+    server.start()
+    host, port = server.get_host_and_port()
+    url = f"http://{host}:{port}"
+    monkeymodule.setenv("AWS_ENDPOINT_URL", url)
+    yield url
+    server.stop()
 
 
-@pytest.fixture(scope="session")
-def s3_base(endpoint_ip, endpoint_port):
-    """
-    Fixture to set up moto server in separate process
-    """
-    with ensure_safe_environment_variables():
-        # Fake aws credentials exported to prevent botocore looking for
-        # system aws credentials, https://github.com/spulec/moto/issues/1793
-        os.environ["AWS_ACCESS_KEY_ID"] = "foobar_key"
-        os.environ["AWS_SECRET_ACCESS_KEY"] = "foobar_secret"
-        os.environ["S3FS_LOGGING_LEVEL"] = "DEBUG"
-        os.environ["AWS_SECURITY_TOKEN"] = "foobar_security_token"
-        os.environ["AWS_SESSION_TOKEN"] = "foobar_session_token"
-        os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
-
-        # Launching moto in server mode, i.e., as a separate process
-        # with an S3 endpoint on localhost
-
-        endpoint_uri = f"http://{endpoint_ip}:{endpoint_port}/"
-        os.environ["AWS_ENDPOINT_URL"] = endpoint_uri
-
-        server = ThreadedMotoServer(ip_address=endpoint_ip, port=endpoint_port)
-        server.start()
-        yield endpoint_uri
-        server.stop()
-
-
-@pytest.fixture()
-def s3so(endpoint_ip, endpoint_port):
+@pytest.fixture
+def s3so(moto_server):
     """
     Returns s3 storage options to pass to fsspec
     """
-    endpoint_uri = f"http://{endpoint_ip}:{endpoint_port}/"
+    return {"client_kwargs": {"endpoint_url": moto_server}}
 
-    return {"client_kwargs": {"endpoint_url": endpoint_uri}}
 
+@pytest.fixture
+def moto_s3_resource(moto_server):
+    boto3 = pytest.importorskip("boto3")
+    s3 = boto3.resource("s3", endpoint_url=moto_server)
+    return s3
 
-@contextmanager
-def s3_context(s3_base, bucket, files=None):
-    if files is None:
-        files = {}
-    with ensure_safe_environment_variables():
-        client = boto3.client("s3", endpoint_url=s3_base)
-        client.create_bucket(Bucket=bucket, ACL="public-read-write")
-        for f, data in files.items():
-            client.put_object(Bucket=bucket, Key=f, Body=data)
-
-        yield s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_base})
 
-        for f, data in files.items():
-            try:
-                client.delete_object(Bucket=bucket, Key=f)
-            except Exception:
-                pass
+@pytest.fixture
+def s3_bucket_public(moto_s3_resource):
+    """
+    Create a public S3 bucket using moto.
+    """
+    bucket_name = f"cudf-test-{uuid.uuid4()}"
+    bucket = moto_s3_resource.Bucket(bucket_name)
+    bucket.create(ACL="public-read-write")
+    yield bucket
+    bucket.objects.delete()
+    bucket.delete()
 
 
 @pytest.fixture(
@@ -116,84 +84,83 @@ def kvikio_remote_io(request):
 
 
 @pytest.fixture
-def pdf(scope="module"):
-    df = pd.DataFrame()
-    df["Integer"] = np.array([2345, 11987, 9027, 9027])
-    df["Float"] = np.array([9.001, 8.343, 6, 2.781])
-    df["Integer2"] = np.array([2345, 106, 2088, 789277])
-    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
-    df["Boolean"] = np.array([True, False, True, False])
-    return df
+def pdf():
+    return pd.DataFrame(
+        {
+            "Integer": [2345, 11987, 9027, 9027],
+            "Float": [9.001, 8.343, 6, 2.781],
+            "Integer2": [2345, 106, 2088, 789277],
+            "String": ["Alpha", "Beta", "Gamma", "Delta"],
+            "Boolean": [True, False, True, False],
+        }
+    )
 
 
 @pytest.fixture
-def pdf_ext(scope="module"):
-    size = 100
-    df = pd.DataFrame()
-    df["Integer"] = np.array([i for i in range(size)])
-    df["List"] = [[i] for i in range(size)]
-    df["Struct"] = [{"a": i} for i in range(size)]
-    df["String"] = (["Alpha", "Beta", "Gamma", "Delta"] * (-(size // -4)))[
-        :size
-    ]
-    return df
+def pdf_ext():
+    size = 10
+    return pd.DataFrame(
+        {
+            "Integer": [i for i in range(size)],
+            "List": [[i] for i in range(size)],
+            "Struct": [{"a": i} for i in range(size)],
+            "String": (["Alpha", "Beta", "Gamma", "Delta"] * (-(size // -4)))[
+                :size
+            ],
+        }
+    )
 
 
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
-def test_read_csv(s3_base, s3so, pdf, bytes_per_thread):
+def test_read_csv(s3_bucket_public, s3so, pdf, bytes_per_thread):
     # Write to buffer
     fname = "test_csv_reader.csv"
-    bucket = "csv"
     buffer = pdf.to_csv(index=False)
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
 
     # Use fsspec file object
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got = cudf.read_csv(
-            f"s3://{bucket}/{fname}",
-            storage_options=s3so,
-            bytes_per_thread=bytes_per_thread,
-        )
+    got = cudf.read_csv(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        storage_options=s3so,
+        bytes_per_thread=bytes_per_thread,
+    )
     assert_eq(pdf, got)
 
 
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
-def test_read_csv_byte_range(s3_base, s3so, pdf, bytes_per_thread):
+def test_read_csv_byte_range(s3_bucket_public, s3so, pdf, bytes_per_thread):
     # Write to buffer
     fname = "test_csv_reader_byte_range.csv"
-    bucket = "csv"
     buffer = pdf.to_csv(index=False)
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
 
     # Use fsspec file object
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got = cudf.read_csv(
-            f"s3://{bucket}/{fname}",
-            storage_options=s3so,
-            byte_range=(74, 73),
-            bytes_per_thread=bytes_per_thread,
-            header=None,
-            names=["Integer", "Float", "Integer2", "String", "Boolean"],
-        )
+    got = cudf.read_csv(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        storage_options=s3so,
+        byte_range=(74, 73),
+        bytes_per_thread=bytes_per_thread,
+        header=None,
+        names=["Integer", "Float", "Integer2", "String", "Boolean"],
+    )
 
     assert_eq(pdf.iloc[-2:].reset_index(drop=True), got)
 
 
 @pytest.mark.parametrize("chunksize", [None, 3])
-def test_write_csv(s3_base, s3so, pdf, chunksize):
+def test_write_csv(s3_bucket_public, s3so, pdf, chunksize):
     # Write to buffer
     fname = "test_csv_writer.csv"
-    bucket = "csv"
     gdf = cudf.from_pandas(pdf)
-    with s3_context(s3_base=s3_base, bucket=bucket) as s3fs:
-        gdf.to_csv(
-            f"s3://{bucket}/{fname}",
-            index=False,
-            chunksize=chunksize,
-            storage_options=s3so,
-        )
-        assert s3fs.exists(f"s3://{bucket}/{fname}")
-
-        # TODO: Update to use `storage_options` from pandas v1.2.0
-        got = pd.read_csv(s3fs.open(f"s3://{bucket}/{fname}"))
+    gdf.to_csv(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        index=False,
+        chunksize=chunksize,
+        storage_options=s3so,
+    )
+    got = pd.read_csv(
+        f"s3://{s3_bucket_public.name}/{fname}", storage_options=s3so
+    )
 
     assert_eq(pdf, got)
 
@@ -201,7 +168,7 @@ def test_write_csv(s3_base, s3so, pdf, chunksize):
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
 @pytest.mark.parametrize("columns", [None, ["Float", "String"]])
 def test_read_parquet(
-    s3_base,
+    s3_bucket_public,
     s3so,
     kvikio_remote_io,
     pdf,
@@ -209,51 +176,49 @@ def test_read_parquet(
     columns,
 ):
     fname = "test_parquet_reader.parquet"
-    bucket = "parquet"
     buffer = BytesIO()
     pdf.to_parquet(path=buffer)
 
     # Check direct path handling
     buffer.seek(0)
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got1 = cudf.read_parquet(
-            f"s3://{bucket}/{fname}",
-            storage_options=s3so,
-            bytes_per_thread=bytes_per_thread,
-            columns=columns,
-        )
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
+    got1 = cudf.read_parquet(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        storage_options=s3so,
+        bytes_per_thread=bytes_per_thread,
+        columns=columns,
+    )
     expect = pdf[columns] if columns else pdf
     assert_eq(expect, got1)
 
     # Check fsspec file-object handling
     buffer.seek(0)
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        fs = get_fs_token_paths(
-            f"s3://{bucket}/{fname}", storage_options=s3so
-        )[0]
-        with fs.open(f"s3://{bucket}/{fname}", mode="rb") as f:
-            got2 = cudf.read_parquet(
-                f,
-                bytes_per_thread=bytes_per_thread,
-                columns=columns,
-            )
+    fs = get_fs_token_paths(
+        f"s3://{s3_bucket_public.name}/{fname}", storage_options=s3so
+    )[0]
+    with fs.open(f"s3://{s3_bucket_public.name}/{fname}", mode="rb") as f:
+        got2 = cudf.read_parquet(
+            f,
+            bytes_per_thread=bytes_per_thread,
+            columns=columns,
+        )
     assert_eq(expect, got2)
 
 
 @pytest.mark.parametrize("method", ["all", "parquet"])
 @pytest.mark.parametrize("blocksize", [1024 * 1024, 1024])
 def test_read_parquet_prefetch_options(
-    s3_base,
+    s3_bucket_public,
     s3so,
     pdf,
     method,
     blocksize,
 ):
-    bucket = "parquet"
     fname_1 = "test_parquet_reader_prefetch_options_1.parquet"
     buffer_1 = BytesIO()
     pdf.to_parquet(path=buffer_1)
     buffer_1.seek(0)
+    s3_bucket_public.put_object(Key=fname_1, Body=buffer_1)
 
     fname_2 = "test_parquet_reader_prefetch_options_2.parquet"
     buffer_2 = BytesIO()
@@ -261,27 +226,20 @@ def test_read_parquet_prefetch_options(
     pdf_2["Integer"] += 1
     pdf_2.to_parquet(path=buffer_2)
     buffer_2.seek(0)
+    s3_bucket_public.put_object(Key=fname_2, Body=buffer_2)
 
-    with s3_context(
-        s3_base=s3_base,
-        bucket=bucket,
-        files={
-            fname_1: buffer_1,
-            fname_2: buffer_2,
+    got = cudf.read_parquet(
+        [
+            f"s3://{s3_bucket_public.name}/{fname_1}",
+            f"s3://{s3_bucket_public.name}/{fname_2}",
+        ],
+        storage_options=s3so,
+        prefetch_options={
+            "method": method,
+            "blocksize": blocksize,
         },
-    ):
-        got = cudf.read_parquet(
-            [
-                f"s3://{bucket}/{fname_1}",
-                f"s3://{bucket}/{fname_2}",
-            ],
-            storage_options=s3so,
-            prefetch_options={
-                "method": method,
-                "blocksize": blocksize,
-            },
-            columns=["String", "Integer"],
-        )
+        columns=["String", "Integer"],
+    )
 
     expect = pd.concat([pdf, pdf_2], ignore_index=True)[["String", "Integer"]]
     assert_eq(expect, got)
@@ -291,7 +249,7 @@ def test_read_parquet_prefetch_options(
 @pytest.mark.parametrize("columns", [None, ["List", "Struct"]])
 @pytest.mark.parametrize("index", [None, "Integer"])
 def test_read_parquet_ext(
-    s3_base,
+    s3_bucket_public,
     s3so,
     pdf_ext,
     bytes_per_thread,
@@ -299,7 +257,6 @@ def test_read_parquet_ext(
     index,
 ):
     fname = "test_parquet_reader_ext.parquet"
-    bucket = "parquet"
     buffer = BytesIO()
 
     if index:
@@ -309,13 +266,13 @@ def test_read_parquet_ext(
 
     # Check direct path handling
     buffer.seek(0)
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got1 = cudf.read_parquet(
-            f"s3://{bucket}/{fname}",
-            storage_options=s3so,
-            bytes_per_thread=bytes_per_thread,
-            columns=columns,
-        )
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
+    got1 = cudf.read_parquet(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        storage_options=s3so,
+        bytes_per_thread=bytes_per_thread,
+        columns=columns,
+    )
     if index:
         expect = (
             pdf_ext.set_index(index)[columns]
@@ -327,112 +284,90 @@ def test_read_parquet_ext(
     assert_eq(expect, got1)
 
 
-def test_read_parquet_filesystem(s3_base, s3so, pdf):
+def test_read_parquet_filesystem(s3_bucket_public, s3so, pdf):
     fname = "data.0.parquet"
-    # NOTE: Need a unique bucket name when a glob pattern
-    # is used, otherwise fsspec seems to cache the bucket
-    # contents, and later tests using the same bucket name
-    # will fail.
-    bucket = "test_read_parquet_filesystem"
     buffer = BytesIO()
     pdf.to_parquet(path=buffer)
     buffer.seek(0)
     fs = get_fs_token_paths("s3://", mode="rb", storage_options=s3so)[0]
-    with s3_context(
-        s3_base=s3_base,
-        bucket=bucket,
-        files={fname: buffer},
-    ):
-        # Check that a glob pattern works
-        path = f"s3://{bucket}/{'data.*.parquet'}"
-        got = cudf.read_parquet(path, filesystem=fs)
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
+    # Check that a glob pattern works
+    path = f"s3://{s3_bucket_public.name}/{'data.*.parquet'}"
+    got = cudf.read_parquet(path, filesystem=fs)
     assert_eq(pdf, got)
 
 
-def test_read_parquet_multi_file(s3_base, s3so, pdf):
+def test_read_parquet_multi_file(s3_bucket_public, s3so, pdf):
     fname_1 = "test_parquet_reader_multi_file_1.parquet"
     buffer_1 = BytesIO()
     pdf.to_parquet(path=buffer_1)
     buffer_1.seek(0)
+    s3_bucket_public.put_object(Key=fname_1, Body=buffer_1)
 
     fname_2 = "test_parquet_reader_multi_file_2.parquet"
     buffer_2 = BytesIO()
     pdf.to_parquet(path=buffer_2)
     buffer_2.seek(0)
+    s3_bucket_public.put_object(Key=fname_2, Body=buffer_2)
 
-    bucket = "parquet"
-    with s3_context(
-        s3_base=s3_base,
-        bucket=bucket,
-        files={
-            fname_1: buffer_1,
-            fname_2: buffer_2,
-        },
-    ):
-        got = cudf.read_parquet(
-            [
-                f"s3://{bucket}/{fname_1}",
-                f"s3://{bucket}/{fname_2}",
-            ],
-            storage_options=s3so,
-        ).reset_index(drop=True)
+    got = cudf.read_parquet(
+        [
+            f"s3://{s3_bucket_public.name}/{fname_1}",
+            f"s3://{s3_bucket_public.name}/{fname_2}",
+        ],
+        storage_options=s3so,
+    ).reset_index(drop=True)
 
     expect = pd.concat([pdf, pdf], ignore_index=True)
     assert_eq(expect, got)
 
 
-def test_read_parquet_filters(s3_base, s3so, pdf_ext):
+def test_read_parquet_filters(s3_bucket_public, s3so, pdf_ext):
     fname = "test_parquet_reader_filters.parquet"
-    bucket = "parquet"
     buffer = BytesIO()
     pdf_ext.to_parquet(path=buffer)
     buffer.seek(0)
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
     filters = [("String", "==", "Omega")]
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got = cudf.read_parquet(
-            f"s3://{bucket}/{fname}",
-            storage_options=s3so,
-            filters=filters,
-        )
+    got = cudf.read_parquet(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        storage_options=s3so,
+        filters=filters,
+    )
 
     # All row-groups should be filtered out
     assert_eq(pdf_ext.iloc[:0], got.reset_index(drop=True))
 
 
 @pytest.mark.parametrize("partition_cols", [None, ["String"]])
-def test_write_parquet(s3_base, s3so, pdf, partition_cols):
+def test_write_parquet(s3_bucket_public, s3so, pdf, partition_cols):
     fname_cudf = "test_parquet_writer_cudf"
     fname_pandas = "test_parquet_writer_pandas"
-    bucket = "parquet"
     gdf = cudf.from_pandas(pdf)
 
-    with s3_context(s3_base=s3_base, bucket=bucket) as s3fs:
-        gdf.to_parquet(
-            f"s3://{bucket}/{fname_cudf}",
-            partition_cols=partition_cols,
-            storage_options=s3so,
-        )
-        assert s3fs.exists(f"s3://{bucket}/{fname_cudf}")
-        pdf.to_parquet(
-            f"s3://{bucket}/{fname_pandas}",
-            partition_cols=partition_cols,
-            storage_options=s3so,
-        )
-        assert s3fs.exists(f"s3://{bucket}/{fname_pandas}")
+    gdf.to_parquet(
+        f"s3://{s3_bucket_public.name}/{fname_cudf}",
+        partition_cols=partition_cols,
+        storage_options=s3so,
+    )
+    pdf.to_parquet(
+        f"s3://{s3_bucket_public.name}/{fname_pandas}",
+        partition_cols=partition_cols,
+        storage_options=s3so,
+    )
 
-        got = pd.read_parquet(
-            f"s3://{bucket}/{fname_pandas}", storage_options=s3so
-        )
-        expect = cudf.read_parquet(
-            f"s3://{bucket}/{fname_cudf}", storage_options=s3so
-        )
+    got = pd.read_parquet(
+        f"s3://{s3_bucket_public.name}/{fname_pandas}", storage_options=s3so
+    )
+    expect = cudf.read_parquet(
+        f"s3://{s3_bucket_public.name}/{fname_cudf}", storage_options=s3so
+    )
 
     assert_eq(expect, got)
 
 
-def test_read_json(s3_base, s3so):
+def test_read_json(s3_bucket_public, s3so):
     fname = "test_json_reader.json"
-    bucket = "json"
     buffer = (
         '{"amount": 100, "name": "Alice"}\n'
         '{"amount": 200, "name": "Bob"}\n'
@@ -440,103 +375,89 @@ def test_read_json(s3_base, s3so):
         '{"amount": 400, "name": "Dennis"}\n'
     )
 
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got = cudf.read_json(
-            f"s3://{bucket}/{fname}",
-            engine="cudf",
-            orient="records",
-            lines=True,
-            storage_options=s3so,
-        )
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
+    got = cudf.read_json(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        engine="cudf",
+        orient="records",
+        lines=True,
+        storage_options=s3so,
+    )
 
     expect = pd.read_json(StringIO(buffer), lines=True)
     assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("columns", [None, ["string1"]])
-def test_read_orc(s3_base, s3so, datadir, columns):
+def test_read_orc(s3_bucket_public, s3so, datadir, columns):
     source_file = str(datadir / "orc" / "TestOrcFile.testSnappy.orc")
     fname = "test_orc_reader.orc"
-    bucket = "orc"
     expect = pd.read_orc(source_file)
 
     with open(source_file, "rb") as f:
         buffer = f.read()
 
-    with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
-        got = cudf.read_orc(
-            f"s3://{bucket}/{fname}",
-            columns=columns,
-            storage_options=s3so,
-        )
+    s3_bucket_public.put_object(Key=fname, Body=buffer)
+    got = cudf.read_orc(
+        f"s3://{s3_bucket_public.name}/{fname}",
+        columns=columns,
+        storage_options=s3so,
+    )
 
     if columns:
         expect = expect[columns]
     assert_eq(expect, got)
 
 
-def test_write_orc(s3_base, s3so, pdf):
+def test_write_orc(s3_bucket_public, s3so, pdf):
     fname = "test_orc_writer.orc"
-    bucket = "orc"
     gdf = cudf.from_pandas(pdf)
-    with s3_context(s3_base=s3_base, bucket=bucket) as s3fs:
-        gdf.to_orc(f"s3://{bucket}/{fname}", storage_options=s3so)
-        assert s3fs.exists(f"s3://{bucket}/{fname}")
+    gdf.to_orc(f"s3://{s3_bucket_public.name}/{fname}", storage_options=s3so)
 
-        with s3fs.open(f"s3://{bucket}/{fname}") as f:
-            got = pd.read_orc(f)
+    got = pd.read_orc(f"s3://{s3_bucket_public.name}/{fname}")
 
     assert_eq(pdf, got)
 
 
-def test_write_chunked_parquet(s3_base, s3so):
+def test_write_chunked_parquet(s3_bucket_public, s3so):
     df1 = cudf.DataFrame({"b": [10, 11, 12], "a": [1, 2, 3]})
     df2 = cudf.DataFrame({"b": [20, 30, 50], "a": [3, 2, 1]})
     dirname = "chunked_writer_directory"
-    bucket = "parquet"
-    from cudf.io.parquet import ParquetDatasetWriter
-
-    with s3_context(
-        s3_base=s3_base, bucket=bucket, files={dirname: BytesIO()}
-    ) as s3fs:
-        with ParquetDatasetWriter(
-            f"s3://{bucket}/{dirname}",
-            partition_cols=["a"],
-            storage_options=s3so,
-        ) as cw:
-            cw.write_table(df1)
-            cw.write_table(df2)
-
-        # TODO: Replace following workaround with:
-        # expect = cudf.read_parquet(f"s3://{bucket}/{dirname}/",
-        # storage_options=s3so)
-        # after the following bug is fixed:
-        # https://issues.apache.org/jira/browse/ARROW-16438
-
-        dfs = []
-        for folder in {"a=1", "a=2", "a=3"}:
-            assert s3fs.exists(f"s3://{bucket}/{dirname}/{folder}")
-            for file in s3fs.ls(f"s3://{bucket}/{dirname}/{folder}"):
-                df = cudf.read_parquet("s3://" + file, storage_options=s3so)
-                dfs.append(df)
-
-        actual = cudf.concat(dfs).astype("int64")
-        assert_eq(
-            actual.sort_values(["b"]).reset_index(drop=True),
-            cudf.concat([df1, df2]).sort_values(["b"]).reset_index(drop=True),
+    with ParquetDatasetWriter(
+        f"s3://{s3_bucket_public.name}/{dirname}",
+        partition_cols=["a"],
+        storage_options=s3so,
+    ) as cw:
+        cw.write_table(df1)
+        cw.write_table(df2)
+
+    # TODO: Replace following workaround with:
+    # expect = cudf.read_parquet(f"s3://{bucket}/{dirname}/",
+    # storage_options=s3so)
+    # after the following bug is fixed:
+    # https://issues.apache.org/jira/browse/ARROW-16438
+
+    dfs = [
+        cudf.read_parquet(
+            f"s3://{s3_bucket_public.name}/{file.key}", storage_options=s3so
         )
+        for file in s3_bucket_public.objects.all()
+    ]
+
+    actual = cudf.concat(dfs).astype("int64")
+    assert_eq(
+        actual.sort_values(["b"]).reset_index(drop=True),
+        cudf.concat([df1, df2]).sort_values(["b"]).reset_index(drop=True),
+    )
 
 
 def test_no_s3fs_on_cudf_import():
-    import subprocess
-    import sys
-
-    output = subprocess.check_output(
+    output = subprocess.check_call(
         [
             sys.executable,
             "-c",
-            "import cudf; import sys; print('pyarrow._s3fs' in sys.modules)",
+            "import cudf, sys; assert 'pyarrow._s3fs' not in sys.modules",
         ],
         cwd="/",
     )
-    assert output.strip() == b"False"
+    assert output == 0

From a2077705eefd8875a3b7c8f15e0792d57e3a9e70 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 23 Jul 2025 13:25:51 -0700
Subject: [PATCH 006/366] Use more pytest fixtures and avoid GPU
 parameterization in cuDF classic tests (#19450)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Eliminate/reduce parameterizations of input size

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19450
---
 python/cudf/cudf/tests/test_dataframe.py | 780 ++++++++++-------------
 python/cudf/cudf/tests/test_datetime.py  | 688 +++++++++-----------
 python/cudf/cudf/tests/test_decimal.py   | 267 ++++----
 3 files changed, 735 insertions(+), 1000 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ac82f5c8ab2..2671c0bf0f3 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -139,8 +139,8 @@ def test_init_with_missing_columns(index):
     assert_eq(pdf, gdf)
 
 
-def _dataframe_na_data():
-    return [
+@pytest.fixture(
+    params=[
         pd.DataFrame(
             {
                 "a": [0, 1, 2, np.nan, 4, None, 6],
@@ -160,6 +160,9 @@ def _dataframe_na_data():
         pd.DataFrame({"a": ["a", "b", "c", None, "e"]}),
         pd.DataFrame({"a": ["a", "b", "c", "d", "e"]}),
     ]
+)
+def na_data(request):
+    return request.param
 
 
 @pytest.mark.parametrize(
@@ -596,24 +599,6 @@ def test_dataframe_drop_index(pdf, index, inplace):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5},
-            index=pd.MultiIndex(
-                levels=[
-                    ["lama", "cow", "falcon"],
-                    ["speed", "weight", "length"],
-                ],
-                codes=[
-                    [0, 0, 0, 1, 1, 1, 2, 2, 2, 1],
-                    [0, 1, 2, 0, 1, 2, 0, 1, 2, 1],
-                ],
-            ),
-        )
-    ],
-)
 @pytest.mark.parametrize(
     "index,level",
     [
@@ -636,7 +621,19 @@ def test_dataframe_drop_index(pdf, index, inplace):
 )
 @pytest.mark.parametrize("inplace", [True, False])
 def test_dataframe_drop_multiindex(pdf, index, level, inplace):
-    pdf = pdf.copy()
+    pdf = pd.DataFrame(
+        {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5},
+        index=pd.MultiIndex(
+            levels=[
+                ["lama", "cow", "falcon"],
+                ["speed", "weight", "length"],
+            ],
+            codes=[
+                [0, 0, 0, 1, 1, 1, 2, 2, 2, 1],
+                [0, 1, 2, 0, 1, 2, 0, 1, 2, 1],
+            ],
+        ),
+    )
     gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(index=index, inplace=inplace, level=level)
@@ -650,21 +647,16 @@ def test_dataframe_drop_multiindex(pdf, index, level, inplace):
 
 
 @pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame({"a": range(10), "b": range(10, 20), "c": range(1, 11)}),
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
-        ),
-    ],
+    "data",
+    [{"c": range(1, 11)}, {"d": ["a", "v"] * 5}],
 )
 @pytest.mark.parametrize(
     "labels",
     [["a"], ["b"], "a", "b", ["a", "b"]],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_drop_labels_axis_1(pdf, labels, inplace):
-    pdf = pdf.copy()
+def test_dataframe_drop_labels_axis_1(data, labels, inplace):
+    pdf = pd.DataFrame({"a": range(10), "b": range(10, 20), **data})
     gdf = cudf.from_pandas(pdf)
 
     expected = pdf.drop(labels=labels, axis=1, inplace=inplace)
@@ -967,7 +959,7 @@ def test_dataframe_pop():
     assert empty_pdf.empty and empty_gdf.empty
 
 
-@pytest.mark.parametrize("nelem", [0, 3, 100, 1000])
+@pytest.mark.parametrize("nelem", [0, 10])
 def test_dataframe_astype(nelem):
     df = cudf.DataFrame()
     data = np.asarray(range(nelem), dtype=np.int32)
@@ -1100,8 +1092,7 @@ def test_dataframe_to_string_with_masked_data():
     assert got == expect
 
 
-def test_dataframe_to_string_wide(monkeypatch):
-    monkeypatch.setenv("COLUMNS", "79")
+def test_dataframe_to_string_wide():
     # Test basic
     df = cudf.DataFrame({f"a{i}": [0, 1, 2] for i in range(100)})
     with pd.option_context("display.max_columns", 0):
@@ -1188,7 +1179,7 @@ def test_dataframe_add_col_to_object_dataframe():
     cols = ["a", "b", "c"]
     df = pd.DataFrame(columns=cols, dtype="str")
 
-    data = {k: v for (k, v) in zip(cols, [["a"] for _ in cols])}
+    data = {k: ["a"] for k in cols}
 
     gdf = cudf.DataFrame(data)
     gdf = gdf[:0]
@@ -1449,7 +1440,6 @@ def test_assign_callable(mapping):
     assert_eq(expect, actual)
 
 
-@pytest.mark.parametrize("nrows", [1, 8, 100, 1000])
 @pytest.mark.parametrize(
     "method",
     [
@@ -1465,7 +1455,8 @@ def test_assign_callable(mapping):
     ],
 )
 @pytest.mark.parametrize("seed", [None, 42])
-def test_dataframe_hash_values(nrows, method, seed):
+def test_dataframe_hash_values(method, seed):
+    nrows = 10
     warning_expected = seed is not None and method not in {
         "murmur3",
         "xxhash32",
@@ -1597,10 +1588,10 @@ def test_dataframe_hash_values_xxhash64():
     assert_eq(out_df, expected_df)
 
 
-@pytest.mark.parametrize("nrows", [3, 10, 100, 1000])
-@pytest.mark.parametrize("nparts", [1, 2, 8, 13])
-@pytest.mark.parametrize("nkeys", [1, 2])
-def test_dataframe_hash_partition(nrows, nparts, nkeys):
+@pytest.mark.parametrize("nparts", [1, 2])
+def test_dataframe_hash_partition(nparts):
+    nrows = 10
+    nkeys = 2
     rng = np.random.default_rng(seed=0)
     gdf = cudf.DataFrame(
         {f"key{i}": rng.integers(0, 7 - i, nrows) for i in range(nkeys)}
@@ -1627,8 +1618,8 @@ def test_dataframe_hash_partition(nrows, nparts, nkeys):
     assert len(part_unique_keys)
 
 
-@pytest.mark.parametrize("nrows", [3, 10, 50])
-def test_dataframe_hash_partition_masked_value(nrows):
+def test_dataframe_hash_partition_masked_value():
+    nrows = 10
     gdf = cudf.DataFrame()
     gdf["key"] = np.arange(nrows)
     gdf["val"] = np.arange(nrows) + 100
@@ -1648,8 +1639,8 @@ def test_dataframe_hash_partition_masked_value(nrows):
             )
 
 
-@pytest.mark.parametrize("nrows", [3, 10, 50])
-def test_dataframe_hash_partition_masked_keys(nrows):
+def test_dataframe_hash_partition_masked_keys():
+    nrows = 5
     gdf = cudf.DataFrame()
     gdf["key"] = np.arange(nrows)
     gdf["val"] = np.arange(nrows) + 100
@@ -1724,13 +1715,11 @@ def test_dataframe_concat_different_column_types():
         cudf.concat([df1, df2])
 
 
-@pytest.mark.parametrize(
-    "df_1", [cudf.DataFrame({"a": [1, 2], "b": [1, 3]}), cudf.DataFrame({})]
-)
-@pytest.mark.parametrize(
-    "df_2", [cudf.DataFrame({"a": [], "b": []}), cudf.DataFrame({})]
-)
-def test_concat_empty_dataframe(df_1, df_2):
+@pytest.mark.parametrize("df_1_data", [{"a": [1, 2], "b": [1, 3]}, {}])
+@pytest.mark.parametrize("df_2_data", [{"a": [], "b": []}, {}])
+def test_concat_empty_dataframe(df_1_data, df_2_data):
+    df_1 = cudf.DataFrame(df_1_data)
+    df_2 = cudf.DataFrame(df_2_data)
     with _hide_concat_empty_dtype_warning():
         got = cudf.concat([df_1, df_2])
         expect = pd.concat([df_1.to_pandas(), df_2.to_pandas()], sort=False)
@@ -1787,8 +1776,8 @@ def test_concat_different_column_dataframe(df1_d, df2_d):
 @pytest.mark.parametrize(
     "ser_1", [pd.Series([1, 2, 3]), pd.Series([], dtype="float64")]
 )
-@pytest.mark.parametrize("ser_2", [pd.Series([], dtype="float64")])
-def test_concat_empty_series(ser_1, ser_2):
+def test_concat_empty_series(ser_1):
+    ser_2 = pd.Series([], dtype="float64")
     with _hide_concat_empty_dtype_warning():
         got = cudf.concat([cudf.Series(ser_1), cudf.Series(ser_2)])
         expect = pd.concat([ser_1, ser_2])
@@ -1898,7 +1887,7 @@ def test_concat_with_axis():
     )
 
 
-@pytest.mark.parametrize("nrows", [0, 3, 10, 100, 1000])
+@pytest.mark.parametrize("nrows", [0, 3])
 def test_nonmatching_index_setitem(nrows):
     rng = np.random.default_rng(seed=0)
 
@@ -2038,7 +2027,7 @@ def test_index_in_dataframe_constructor():
 dtypes = NUMERIC_TYPES + DATETIME_TYPES + ["bool"]
 
 
-@pytest.mark.parametrize("nelem", [0, 2, 3, 100, 1000])
+@pytest.mark.parametrize("nelem", [0, 2])
 @pytest.mark.parametrize("data_type", dtypes)
 def test_from_arrow(nelem, data_type):
     rng = np.random.default_rng(seed=0)
@@ -2077,7 +2066,7 @@ def test_from_arrow_chunked_categories():
     assert sorted(final_dictionary) == sorted(dictionary.to_pylist())
 
 
-@pytest.mark.parametrize("nelem", [0, 2, 3, 100, 1000])
+@pytest.mark.parametrize("nelem", [0, 2])
 @pytest.mark.parametrize("data_type", dtypes)
 def test_to_arrow(nelem, data_type):
     rng = np.random.default_rng(seed=0)
@@ -2246,8 +2235,8 @@ def test_dataframe_shape_empty():
     assert pdf.shape == gdf.shape
 
 
-@pytest.mark.parametrize("num_cols", [1, 2, 10])
-@pytest.mark.parametrize("num_rows", [1, 2, 20])
+@pytest.mark.parametrize("num_cols", [1, 3])
+@pytest.mark.parametrize("num_rows", [1, 4])
 @pytest.mark.parametrize("dtype", [*dtypes, "object"])
 @pytest.mark.parametrize("nulls", ["none", "some", "all"])
 def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
@@ -2320,8 +2309,8 @@ def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
     assert_eq(expect, got_property.to_pandas(nullable=nullable))
 
 
-@pytest.mark.parametrize("num_cols", [1, 2, 10])
-@pytest.mark.parametrize("num_rows", [1, 2, 20])
+@pytest.mark.parametrize("num_cols", [1, 3])
+@pytest.mark.parametrize("num_rows", [1, 5])
 def test_dataframe_transpose_category(num_cols, num_rows):
     pdf = pd.DataFrame()
 
@@ -2908,8 +2897,8 @@ def test_cuda_array_interface(dtype):
     assert_eq(pd_data, gdf["test"])
 
 
-@pytest.mark.parametrize("nelem", [0, 2, 3, 100])
-@pytest.mark.parametrize("nchunks", [1, 2, 5, 10])
+@pytest.mark.parametrize("nelem", [0, 10])
+@pytest.mark.parametrize("nchunks", [1, 5])
 @pytest.mark.parametrize("data_type", dtypes)
 def test_from_arrow_chunked_arrays(nelem, nchunks, data_type):
     rng = np.random.default_rng(seed=0)
@@ -3011,18 +3000,19 @@ def test_dataframe_boolmask(mask_shape):
 
 
 @pytest.mark.parametrize(
-    "mask",
+    "box",
     [
-        [True, False, True],
+        list,
         pytest.param(
-            cudf.Series([True, False, True]),
+            cudf.Series,
             marks=pytest_xfail(
                 reason="Pandas can't index a multiindex with a Series"
             ),
         ),
     ],
 )
-def test_dataframe_multiindex_boolmask(mask):
+def test_dataframe_multiindex_boolmask(box):
+    mask = box([True, False, True])
     gdf = cudf.DataFrame(
         {"w": [3, 2, 1], "x": [1, 2, 3], "y": [0, 1, 0], "z": [1, 1, 1]}
     )
@@ -3073,7 +3063,7 @@ def test_pandas_non_contiguious():
     assert_eq(gdf.to_pandas(), df)
 
 
-@pytest.mark.parametrize("num_elements", [0, 2, 10, 100])
+@pytest.mark.parametrize("num_elements", [0, 10])
 @pytest.mark.parametrize("null_type", [np.nan, None, "mixed"])
 def test_series_all_null(num_elements, null_type):
     if null_type == "mixed":
@@ -3093,7 +3083,7 @@ def test_series_all_null(num_elements, null_type):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("num_elements", [0, 2, 10, 100])
+@pytest.mark.parametrize("num_elements", [0, 10])
 def test_series_all_valid_nan(num_elements):
     data = [np.nan] * num_elements
     sr = cudf.Series(data, nan_as_null=False)
@@ -3301,16 +3291,6 @@ def test_reset_index_invalid_level():
         pd.DataFrame([1]).reset_index(level=2)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "a": [1, 2, 3, 4, 5],
-            "b": ["a", "b", "c", "d", "e"],
-            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
-        }
-    ],
-)
 @pytest.mark.parametrize(
     "index",
     [
@@ -3346,8 +3326,14 @@ def test_reset_index_invalid_level():
 @pytest.mark.parametrize("drop", [True, False])
 @pytest.mark.parametrize("append", [True, False])
 @pytest.mark.parametrize("inplace", [True, False])
-def test_set_index(data, index, drop, append, inplace):
-    gdf = cudf.DataFrame(data)
+def test_set_index(index, drop, append, inplace):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "b", "c", "d", "e"],
+            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    )
     pdf = gdf.to_pandas()
 
     expected = pdf.set_index(index, inplace=inplace, drop=drop, append=append)
@@ -3359,27 +3345,22 @@ def test_set_index(data, index, drop, append, inplace):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
+@pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])])
+def test_set_index_verify_integrity(index):
+    gdf = cudf.DataFrame(
         {
             "a": [1, 1, 2, 2, 5],
             "b": ["a", "b", "c", "d", "e"],
             "c": [1.0, 2.0, 3.0, 4.0, 5.0],
         }
-    ],
-)
-@pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])])
-@pytest.mark.parametrize("verify_integrity", [True])
-@pytest_xfail
-def test_set_index_verify_integrity(data, index, verify_integrity):
-    gdf = cudf.DataFrame(data)
-    gdf.set_index(index, verify_integrity=verify_integrity)
+    )
+    with pytest.raises(ValueError):
+        gdf.set_index(index, verify_integrity=True)
 
 
 @pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize("nelem", [10, 200, 1333])
-def test_set_index_multi(drop, nelem):
+def test_set_index_multi(drop):
+    nelem = 10
     rng = np.random.default_rng(seed=0)
     a = np.arange(nelem)
     rng.shuffle(a)
@@ -3409,26 +3390,6 @@ def test_set_index_multi(drop, nelem):
     )
 
 
-@pytest.fixture()
-def reindex_data():
-    return cudf.datasets.randomdata(
-        nrows=6,
-        dtypes={
-            "a": "category",
-            "c": float,
-            "d": str,
-        },
-    )
-
-
-@pytest.fixture()
-def reindex_data_numeric():
-    return cudf.datasets.randomdata(
-        nrows=6,
-        dtypes={"a": float, "b": float, "c": float},
-    )
-
-
 @pytest_unmark_spilling
 @pytest.mark.parametrize("copy", [True, False])
 @pytest.mark.parametrize(
@@ -3454,7 +3415,15 @@ def reindex_data_numeric():
         ),
     ],
 )
-def test_dataframe_reindex(copy, reindex_data, args, gd_kwargs):
+def test_dataframe_reindex(copy, args, gd_kwargs):
+    reindex_data = cudf.datasets.randomdata(
+        nrows=6,
+        dtypes={
+            "a": "category",
+            "c": float,
+            "d": str,
+        },
+    )
     pdf, gdf = reindex_data.to_pandas(), reindex_data
 
     gd_kwargs["copy"] = copy
@@ -3487,9 +3456,11 @@ def test_dataframe_reindex(copy, reindex_data, args, gd_kwargs):
         ),
     ],
 )
-def test_dataframe_reindex_fill_value(
-    reindex_data_numeric, args, kwargs, fill_value
-):
+def test_dataframe_reindex_fill_value(args, kwargs, fill_value):
+    reindex_data_numeric = cudf.datasets.randomdata(
+        nrows=6,
+        dtypes={"a": float, "b": float, "c": float},
+    )
     pdf, gdf = reindex_data_numeric.to_pandas(), reindex_data_numeric
     kwargs["fill_value"] = fill_value
     assert_eq(pdf.reindex(*args, **kwargs), gdf.reindex(*args, **kwargs))
@@ -3967,7 +3938,7 @@ def test_select_dtype_datetime_with_frequency():
 
 def test_dataframe_describe_exclude():
     rng = np.random.default_rng(seed=12)
-    data_length = 10000
+    data_length = 10
 
     df = cudf.DataFrame()
     df["x"] = rng.normal(10, 1, data_length)
@@ -3983,7 +3954,7 @@ def test_dataframe_describe_exclude():
 
 def test_dataframe_describe_include():
     rng = np.random.default_rng(seed=12)
-    data_length = 10000
+    data_length = 10
 
     df = cudf.DataFrame()
     df["x"] = rng.normal(10, 1, data_length)
@@ -3998,7 +3969,7 @@ def test_dataframe_describe_include():
 
 def test_dataframe_describe_default():
     rng = np.random.default_rng(seed=12)
-    data_length = 10000
+    data_length = 10
 
     df = cudf.DataFrame()
     df["x"] = rng.normal(10, 1, data_length)
@@ -4012,7 +3983,7 @@ def test_dataframe_describe_default():
 
 def test_series_describe_include_all():
     rng = np.random.default_rng(seed=12)
-    data_length = 10000
+    data_length = 10
 
     df = cudf.DataFrame()
     df["x"] = rng.normal(10, 1, data_length)
@@ -4035,7 +4006,7 @@ def test_series_describe_include_all():
 
 def test_dataframe_describe_percentiles():
     rng = np.random.default_rng(seed=12)
-    data_length = 10000
+    data_length = 100
     sample_percentiles = [0.0, 0.1, 0.33, 0.84, 0.4, 0.99]
 
     df = cudf.DataFrame()
@@ -4092,7 +4063,7 @@ def test_shift(dtype, period, data_empty):
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
+@pytest.mark.parametrize("period", [-15, -1, 0, 1, 15])
 @pytest.mark.parametrize("data_empty", [False, True])
 def test_diff(dtype, period, data_empty):
     if data_empty:
@@ -4116,14 +4087,14 @@ def test_diff(dtype, period, data_empty):
         assert_eq(diffed_outcome, expected_outcome)
 
 
-@pytest.mark.parametrize("df", _dataframe_na_data())
 @pytest.mark.parametrize("nan_as_null", [True, False, None])
 @pytest.mark.parametrize("api_call", ["isnull", "isna", "notna", "notnull"])
-def test_dataframe_isnull_isna_and_reverse(df, nan_as_null, api_call):
+def test_dataframe_isnull_isna_and_reverse(na_data, nan_as_null, api_call):
     def detect_nan(x):
         # Check if the input is a float and if it is nan
         return x.apply(lambda v: isinstance(v, float) and np.isnan(v))
 
+    df = na_data
     nan_contains = df.select_dtypes(object).apply(detect_nan)
     if nan_as_null is False and (
         nan_contains.any().any() and not nan_contains.all().all()
@@ -5282,18 +5253,18 @@ def test_df_constructor_dtype(dtype):
 @pytest.mark.parametrize(
     "data",
     [
-        cudf.datasets.randomdata(
+        lambda: cudf.datasets.randomdata(
             nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": int}
         ),
-        cudf.datasets.randomdata(
+        lambda: cudf.datasets.randomdata(
             nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": str}
         ),
-        cudf.datasets.randomdata(
+        lambda: cudf.datasets.randomdata(
             nrows=10, dtypes={"a": bool, "b": int, "c": float, "d": str}
         ),
-        cudf.DataFrame(),
-        cudf.DataFrame({"a": [0, 1, 2], "b": [1, None, 3]}),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(),
+        lambda: cudf.DataFrame({"a": [0, 1, 2], "b": [1, None, 3]}),
+        lambda: cudf.DataFrame(
             {
                 "a": [1, 2, 3, 4],
                 "b": [7, np.nan, 9, 10],
@@ -5305,7 +5276,7 @@ def test_df_constructor_dtype(dtype):
                 "f": cudf.Series([10, None, np.nan, 11], nan_as_null=False),
             }
         ),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             {
                 "a": [10, 11, 12, 13, 14, 15],
                 "b": cudf.Series(
@@ -5321,7 +5292,7 @@ def test_df_constructor_dtype(dtype):
 @pytest.mark.parametrize("skipna", [True, False])
 @pytest.mark.parametrize("numeric_only", [True, False])
 def test_rowwise_ops(data, op, skipna, numeric_only):
-    gdf = data
+    gdf = data()
     pdf = gdf.to_pandas()
 
     kwargs = {"axis": 1, "skipna": skipna, "numeric_only": numeric_only}
@@ -5419,49 +5390,49 @@ def test_rowwise_ops_nullable_dtypes_partial_null(op):
     [
         (
             "max",
-            cudf.Series(
+            lambda: cudf.Series(
                 [10, None, None, 2234, None, 453],
                 dtype="int64",
             ),
         ),
         (
             "min",
-            cudf.Series(
+            lambda: cudf.Series(
                 [10, None, None, 13, None, 15],
                 dtype="int64",
             ),
         ),
         (
             "sum",
-            cudf.Series(
+            lambda: cudf.Series(
                 [20, None, None, 2247, None, 468],
                 dtype="int64",
             ),
         ),
         (
             "product",
-            cudf.Series(
+            lambda: cudf.Series(
                 [100, None, None, 29042, None, 6795],
                 dtype="int64",
             ),
         ),
         (
             "mean",
-            cudf.Series(
+            lambda: cudf.Series(
                 [10.0, None, None, 1123.5, None, 234.0],
                 dtype="float32",
             ),
         ),
         (
             "var",
-            cudf.Series(
+            lambda: cudf.Series(
                 [0.0, None, None, 1233210.25, None, 47961.0],
                 dtype="float32",
             ),
         ),
         (
             "std",
-            cudf.Series(
+            lambda: cudf.Series(
                 [0.0, None, None, 1110.5, None, 219.0],
                 dtype="float32",
             ),
@@ -5484,6 +5455,7 @@ def test_rowwise_ops_nullable_int_dtypes(op, expected):
     else:
         got = getattr(gdf, op)(axis=1, skipna=False)
 
+    expected = expected()
     assert_eq(got.null_count, expected.null_count)
     assert_eq(got, expected)
 
@@ -5492,62 +5464,62 @@ def test_rowwise_ops_nullable_int_dtypes(op, expected):
     "data",
     [
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": cudf.Series(
+            "t2": pd.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
             ),
         },
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": cudf.Series(
+            "t2": pd.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ns]"
             ),
-            "t3": cudf.Series(
+            "t3": pd.Series(
                 ["1960-08-31 06:00:00", "2030-08-02 10:00:00"], dtype="<M8[s]"
             ),
         },
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": cudf.Series(
+            "t2": pd.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[us]"
             ),
         },
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": cudf.Series(
+            "t2": pd.Series(
                 ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
             ),
-            "i1": cudf.Series([1001, 2002], dtype="int64"),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
         },
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "t2": cudf.Series(["1940-08-31 06:00:00", None], dtype="<M8[ms]"),
-            "i1": cudf.Series([1001, 2002], dtype="int64"),
+            "t2": pd.Series(["1940-08-31 06:00:00", None], dtype="<M8[ms]"),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
         },
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "i1": cudf.Series([1001, 2002], dtype="int64"),
-            "f1": cudf.Series([-100.001, 123.456], dtype="float64"),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
+            "f1": pd.Series([-100.001, 123.456], dtype="float64"),
         },
         {
-            "t1": cudf.Series(
+            "t1": pd.Series(
                 ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
             ),
-            "i1": cudf.Series([1001, 2002], dtype="int64"),
-            "f1": cudf.Series([-100.001, 123.456], dtype="float64"),
-            "b1": cudf.Series([True, False], dtype="bool"),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
+            "f1": pd.Series([-100.001, 123.456], dtype="float64"),
+            "b1": pd.Series([True, False], dtype="bool"),
         },
     ],
 )
@@ -5589,11 +5561,11 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna, numeric_only):
     [
         (
             {
-                "t1": cudf.Series(
+                "t1": pd.Series(
                     ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
                     dtype="<M8[ms]",
                 ),
-                "t2": cudf.Series(
+                "t2": pd.Series(
                     ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
                 ),
             },
@@ -5602,11 +5574,11 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna, numeric_only):
         ),
         (
             {
-                "t1": cudf.Series(
+                "t1": pd.Series(
                     ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
                     dtype="<M8[ms]",
                 ),
-                "t2": cudf.Series(
+                "t2": pd.Series(
                     ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
                 ),
             },
@@ -5615,11 +5587,11 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna, numeric_only):
         ),
         (
             {
-                "t1": cudf.Series(
+                "t1": pd.Series(
                     ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
                     dtype="<M8[ms]",
                 ),
-                "t2": cudf.Series(
+                "t2": pd.Series(
                     ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
                 ),
             },
@@ -5639,23 +5611,14 @@ def test_rowwise_ops_datetime_dtypes_2(data, op, skipna):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        (
-            {
-                "t1": pd.Series(
-                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
-                    dtype="<M8[ns]",
-                ),
-                "t2": pd.Series(
-                    ["1940-08-31 06:00:00", pd.NaT], dtype="<M8[ns]"
-                ),
-            }
-        )
-    ],
-)
-def test_rowwise_ops_datetime_dtypes_pdbug(data):
+def test_rowwise_ops_datetime_dtypes_pdbug():
+    data = {
+        "t1": pd.Series(
+            ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
+            dtype="<M8[ns]",
+        ),
+        "t2": pd.Series(["1940-08-31 06:00:00", pd.NaT], dtype="<M8[ns]"),
+    }
     pdf = pd.DataFrame(data)
     gdf = cudf.from_pandas(pdf)
 
@@ -5702,12 +5665,8 @@ def test_insert(data):
     assert_eq(pdf, gdf)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [{"A": [1, 2, 3], "B": ["a", "b", "c"]}],
-)
-def test_insert_NA(data):
-    pdf = pd.DataFrame.from_dict(data)
+def test_insert_NA():
+    pdf = pd.DataFrame.from_dict({"A": [1, 2, 3], "B": ["a", "b", "c"]})
     gdf = cudf.DataFrame.from_pandas(pdf)
 
     pdf["C"] = pd.NA
@@ -5736,14 +5695,14 @@ def test_cov_nans():
 
 @pytest_unmark_spilling
 @pytest.mark.parametrize(
-    "gsr",
+    "psr",
     [
-        cudf.Series([4, 2, 3]),
-        cudf.Series([4, 2, 3], index=["a", "b", "c"]),
-        cudf.Series([4, 2, 3], index=["a", "b", "d"]),
-        cudf.Series([4, 2], index=["a", "b"]),
-        cudf.Series([4, 2, 3], index=cudf.core.index.RangeIndex(0, 3)),
-        cudf.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
+        pd.Series([4, 2, 3]),
+        pd.Series([4, 2, 3], index=["a", "b", "c"]),
+        pd.Series([4, 2, 3], index=["a", "b", "d"]),
+        pd.Series([4, 2], index=["a", "b"]),
+        pd.Series([4, 2, 3]),
+        pd.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
     ],
 )
 @pytest.mark.parametrize("colnames", [["a", "b", "c"], [0, 1, 2]])
@@ -5764,11 +5723,11 @@ def test_cov_nans():
         operator.ne,
     ],
 )
-def test_df_sr_binop(gsr, colnames, op):
+def test_df_sr_binop(psr, colnames, op):
     data = [[3.0, 2.0, 5.0], [3.0, None, 5.0], [6.0, 7.0, np.nan]]
     data = dict(zip(colnames, data))
 
-    gsr = gsr.astype("float64")
+    gsr = cudf.Series.from_pandas(psr).astype("float64")
 
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas(nullable=True)
@@ -5813,10 +5772,7 @@ def test_df_sr_binop(gsr, colnames, op):
         pytest.param(operator.ne, marks=pytest_xfail()),
     ],
 )
-@pytest.mark.parametrize(
-    "gsr", [cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"])]
-)
-def test_df_sr_binop_col_order(gsr, op):
+def test_df_sr_binop_col_order(op):
     colnames = [0, 1, 2]
     data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
     data = dict(zip(colnames, data))
@@ -5824,6 +5780,7 @@ def test_df_sr_binop_col_order(gsr, op):
     gdf = cudf.DataFrame(data)
     pdf = pd.DataFrame.from_dict(data)
 
+    gsr = cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"])
     psr = gsr.to_pandas()
 
     with expect_warning_if(
@@ -5943,11 +5900,11 @@ def test_memory_usage_list():
     assert expected == df.A.memory_usage()
 
 
-@pytest.mark.parametrize("rows", [10, 100])
-def test_memory_usage_multi(rows):
+def test_memory_usage_multi():
     # We need to sample without replacement to guarantee that the size of the
     # levels are always the same.
     rng = np.random.default_rng(seed=0)
+    rows = 10
     df = pd.DataFrame(
         {
             "A": np.arange(rows, dtype="int32"),
@@ -6009,16 +5966,14 @@ def test_setitem_diff_size_list(list_input, key):
 
 
 @pytest.mark.parametrize(
-    "series_input",
+    "data, index",
     [
-        pytest.param(cudf.Series([1, 2, 3, 4]), id="smaller_cudf"),
-        pytest.param(cudf.Series([1, 2, 3, 4, 5, 6]), id="larger_cudf"),
-        pytest.param(cudf.Series([1, 2, 3], index=[4, 5, 6]), id="index_cudf"),
-        pytest.param(pd.Series([1, 2, 3, 4]), id="smaller_pandas"),
-        pytest.param(pd.Series([1, 2, 3, 4, 5, 6]), id="larger_pandas"),
-        pytest.param(pd.Series([1, 2, 3], index=[4, 5, 6]), id="index_pandas"),
+        [[1, 2, 3, 4], None],
+        [[1, 2, 3, 4, 5, 6], None],
+        [[1, 2, 3], [4, 5, 6]],
     ],
 )
+@pytest.mark.parametrize("klass", [pd.Series, cudf.Series])
 @pytest.mark.parametrize(
     "key",
     [
@@ -6026,10 +5981,11 @@ def test_setitem_diff_size_list(list_input, key):
         pytest.param("id", id="existing_column"),
     ],
 )
-def test_setitem_diff_size_series(series_input, key):
+def test_setitem_diff_size_series(klass, data, index, key):
     gdf = cudf.datasets.randomdata(5)
     pdf = gdf.to_pandas()
 
+    series_input = klass(data, index=index)
     pandas_input = series_input
     if isinstance(pandas_input, cudf.Series):
         pandas_input = pandas_input.to_pandas()
@@ -8438,9 +8394,9 @@ def test_dataframe_iterrows_itertuples():
 
 @pytest_unmark_spilling
 @pytest.mark.parametrize(
-    "df",
+    "pdf",
     [
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": [10, 22, 33],
@@ -8448,40 +8404,34 @@ def test_dataframe_iterrows_itertuples():
                 "d": ["hello", "world", "hello"],
             }
         ),
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": ["hello", "world", "hello"],
                 "c": [0.3234, 0.23432, 0.0],
             }
         ),
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "int_data": [1, 2, 3],
                 "str_data": ["hello", "world", "hello"],
                 "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": cudf.Series(
+                "timedelta_data": pd.Series(
                     [1, 2, 1], dtype="timedelta64[ns]"
                 ),
-                "datetime_data": cudf.Series(
-                    [1, 2, 1], dtype="datetime64[ns]"
-                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
             }
         ),
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "int_data": [1, 2, 3],
                 "str_data": ["hello", "world", "hello"],
                 "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": cudf.Series(
+                "timedelta_data": pd.Series(
                     [1, 2, 1], dtype="timedelta64[ns]"
                 ),
-                "datetime_data": cudf.Series(
-                    [1, 2, 1], dtype="datetime64[ns]"
-                ),
-                "category_data": cudf.Series(
-                    ["a", "a", "b"], dtype="category"
-                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
+                "category_data": pd.Series(["a", "a", "b"], dtype="category"),
             }
         ),
     ],
@@ -8490,8 +8440,8 @@ def test_dataframe_iterrows_itertuples():
     "include",
     [None, "all", ["object"], ["int"], ["object", "int", "category"]],
 )
-def test_describe_misc_include(df, include):
-    pdf = df.to_pandas()
+def test_describe_misc_include(pdf, include):
+    df = cudf.DataFrame.from_pandas(pdf)
 
     expected = pdf.describe(include=include)
     actual = df.describe(include=include)
@@ -8506,9 +8456,9 @@ def test_describe_misc_include(df, include):
 
 @pytest_unmark_spilling
 @pytest.mark.parametrize(
-    "df",
+    "pdf",
     [
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": [10, 22, 33],
@@ -8516,40 +8466,34 @@ def test_describe_misc_include(df, include):
                 "d": ["hello", "world", "hello"],
             }
         ),
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "a": [1, 2, 3],
                 "b": ["hello", "world", "hello"],
                 "c": [0.3234, 0.23432, 0.0],
             }
         ),
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "int_data": [1, 2, 3],
                 "str_data": ["hello", "world", "hello"],
                 "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": cudf.Series(
+                "timedelta_data": pd.Series(
                     [1, 2, 1], dtype="timedelta64[ns]"
                 ),
-                "datetime_data": cudf.Series(
-                    [1, 2, 1], dtype="datetime64[ns]"
-                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
             }
         ),
-        cudf.DataFrame(
+        pd.DataFrame(
             {
                 "int_data": [1, 2, 3],
                 "str_data": ["hello", "world", "hello"],
                 "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": cudf.Series(
+                "timedelta_data": pd.Series(
                     [1, 2, 1], dtype="timedelta64[ns]"
                 ),
-                "datetime_data": cudf.Series(
-                    [1, 2, 1], dtype="datetime64[ns]"
-                ),
-                "category_data": cudf.Series(
-                    ["a", "a", "b"], dtype="category"
-                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
+                "category_data": pd.Series(["a", "a", "b"], dtype="category"),
             }
         ),
     ],
@@ -8557,8 +8501,8 @@ def test_describe_misc_include(df, include):
 @pytest.mark.parametrize(
     "exclude", [None, ["object"], ["int"], ["object", "int", "category"]]
 )
-def test_describe_misc_exclude(df, exclude):
-    pdf = df.to_pandas()
+def test_describe_misc_exclude(pdf, exclude):
+    df = cudf.DataFrame.from_pandas(pdf)
 
     expected = pdf.describe(exclude=exclude)
     actual = df.describe(exclude=exclude)
@@ -8574,19 +8518,19 @@ def test_describe_misc_exclude(df, exclude):
 @pytest.mark.parametrize(
     "df",
     [
-        cudf.DataFrame({"a": [1, 2, 3]}),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame({"a": [1, 2, 3]}),
+        lambda: cudf.DataFrame(
             {"a": [1, 2, 3], "b": ["a", "z", "c"]}, index=["a", "z", "x"]
         ),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             {
                 "a": [1, 2, 3, None, 2, 1, None],
                 "b": ["a", "z", "c", "a", "v", "z", "z"],
             }
         ),
-        cudf.DataFrame({"a": [], "b": []}),
-        cudf.DataFrame({"a": [None, None], "b": [None, None]}),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame({"a": [], "b": []}),
+        lambda: cudf.DataFrame({"a": [None, None], "b": [None, None]}),
+        lambda: cudf.DataFrame(
             {
                 "a": ["hello", "world", "rapids", "ai", "nvidia"],
                 "b": cudf.Series(
@@ -8597,7 +8541,7 @@ def test_describe_misc_exclude(df, exclude):
             },
             index=["a", "b", "c", "d", " e"],
         ),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             {
                 "a": ["hello", None, "world", "rapids", None, "ai", "nvidia"],
                 "b": cudf.Series(
@@ -8610,6 +8554,7 @@ def test_describe_misc_exclude(df, exclude):
 @pytest.mark.parametrize("numeric_only", [True, False])
 @pytest.mark.parametrize("dropna", [True, False])
 def test_dataframe_mode(df, numeric_only, dropna):
+    df = df()
     pdf = df.to_pandas()
 
     expected = pdf.mode(numeric_only=numeric_only, dropna=dropna)
@@ -8680,7 +8625,7 @@ def test_dataframe_error_equality(df1, df2, op):
     "df,expected_pdf",
     [
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series([1, 2, None, 3], dtype="uint8"),
                     "b": cudf.Series([23, None, None, 32], dtype="uint16"),
@@ -8696,7 +8641,7 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series([None, 123, None, 1], dtype="uint32"),
                     "b": cudf.Series(
@@ -8717,7 +8662,7 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [-10, 1, None, -1, None, 3], dtype="int8"
@@ -8739,7 +8684,7 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [11, None, 22, 33, None, 2, None, 3], dtype="int32"
@@ -8764,7 +8709,7 @@ def test_dataframe_error_equality(df1, df2, op):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [True, None, False, None, False, True, True, False],
@@ -8818,7 +8763,7 @@ def test_dataframe_error_equality(df1, df2, op):
     ],
 )
 def test_dataframe_to_pandas_nullable_dtypes(df, expected_pdf):
-    actual_pdf = df.to_pandas(nullable=True)
+    actual_pdf = df().to_pandas(nullable=True)
 
     assert_eq(actual_pdf, expected_pdf)
 
@@ -9041,16 +8986,15 @@ def test_agg_for_dataframes_error(data, aggs):
         gdf.agg(aggs)
 
 
-@pytest.mark.parametrize("aggs", [{"a": np.sum, "b": np.min, "c": np.max}])
-def test_agg_for_unsupported_function(aggs):
+def test_agg_for_unsupported_function():
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
 
     with pytest.raises(NotImplementedError):
-        gdf.agg(aggs)
+        gdf.agg({"a": np.sum, "b": np.min, "c": np.max})
 
 
-@pytest.mark.parametrize("aggs", ["asdf"])
-def test_agg_for_dataframe_with_invalid_function(aggs):
+def test_agg_for_dataframe_with_invalid_function():
+    aggs = "asdf"
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
 
     with pytest.raises(
@@ -9060,8 +9004,8 @@ def test_agg_for_dataframe_with_invalid_function(aggs):
         gdf.agg(aggs)
 
 
-@pytest.mark.parametrize("aggs", [{"a": "asdf"}])
-def test_agg_for_series_with_invalid_function(aggs):
+def test_agg_for_series_with_invalid_function():
+    aggs = {"a": "asdf"}
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
 
     with pytest.raises(
@@ -9152,25 +9096,18 @@ def test_update_for_dataframes(
     assert_eq(pdf, gdf, check_dtype=False)
 
 
-@pytest.mark.parametrize(
-    "join",
-    ["right"],
-)
-def test_update_for_right_join(join):
+def test_update_for_right_join():
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
     other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
 
     with pytest.raises(
         NotImplementedError, match="Only left join is supported"
     ):
-        gdf.update(other_gd, join)
+        gdf.update(other_gd, join="right")
 
 
-@pytest.mark.parametrize(
-    "errors",
-    ["raise"],
-)
-def test_update_for_data_overlap(errors):
+def test_update_for_data_overlap():
+    errors = "raise"
     pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
 
@@ -9186,26 +9123,23 @@ def test_update_for_data_overlap(errors):
 
 
 @pytest.mark.parametrize(
-    "gdf",
+    "data",
     [
-        cudf.DataFrame({"a": [[1], [2], [3]]}),
-        cudf.DataFrame(
-            {
-                "left-a": [0, 1, 2],
-                "a": [[1], None, [3]],
-                "right-a": ["abc", "def", "ghi"],
-            }
-        ),
-        cudf.DataFrame(
-            {
-                "left-a": [[], None, None],
-                "a": [[1], None, [3]],
-                "right-a": ["abc", "def", "ghi"],
-            }
-        ),
+        {"a": [[1], [2], [3]]},
+        {
+            "left-a": [0, 1, 2],
+            "a": [[1], None, [3]],
+            "right-a": ["abc", "def", "ghi"],
+        },
+        {
+            "left-a": [[], None, None],
+            "a": [[1], None, [3]],
+            "right-a": ["abc", "def", "ghi"],
+        },
     ],
 )
-def test_dataframe_roundtrip_arrow_list_dtype(gdf):
+def test_dataframe_roundtrip_arrow_list_dtype(data):
+    gdf = cudf.DataFrame(data)
     table = gdf.to_arrow()
     expected = cudf.DataFrame.from_arrow(table)
 
@@ -9213,30 +9147,27 @@ def test_dataframe_roundtrip_arrow_list_dtype(gdf):
 
 
 @pytest.mark.parametrize(
-    "gdf",
+    "data",
     [
-        cudf.DataFrame({"a": [{"one": 3, "two": 4, "three": 10}]}),
-        cudf.DataFrame(
-            {
-                "left-a": [0, 1, 2],
-                "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
-                "right-a": ["abc", "def", "ghi"],
-            }
-        ),
-        cudf.DataFrame(
-            {
-                "left-a": [{"a": 1}, None, None],
-                "a": [
-                    {"one": 324, "two": 23432, "three": 324},
-                    None,
-                    {"one": 3.24, "two": 1, "three": 324},
-                ],
-                "right-a": ["abc", "def", "ghi"],
-            }
-        ),
+        {"a": [{"one": 3, "two": 4, "three": 10}]},
+        {
+            "left-a": [0, 1, 2],
+            "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
+            "right-a": ["abc", "def", "ghi"],
+        },
+        {
+            "left-a": [{"a": 1}, None, None],
+            "a": [
+                {"one": 324, "two": 23432, "three": 324},
+                None,
+                {"one": 3.24, "two": 1, "three": 324},
+            ],
+            "right-a": ["abc", "def", "ghi"],
+        },
     ],
 )
-def test_dataframe_roundtrip_arrow_struct_dtype(gdf):
+def test_dataframe_roundtrip_arrow_struct_dtype(data):
+    gdf = cudf.DataFrame(data)
     table = gdf.to_arrow()
     expected = cudf.DataFrame.from_arrow(table)
 
@@ -9272,19 +9203,13 @@ def test_rename_for_level_MultiIndex_dataframe(level):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "data", [{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}]
-)
 @pytest.mark.parametrize(
     "columns",
     [{"a": "f", "b": "g"}, {1: 3, 2: 4}, lambda s: 2 * s],
 )
-@pytest.mark.parametrize(
-    "level",
-    [0, 1],
-)
-def test_rename_for_level_MultiColumn_dataframe(data, columns, level):
-    gdf = cudf.DataFrame(data)
+@pytest.mark.parametrize("level", [0, 1])
+def test_rename_for_level_MultiColumn_dataframe(columns, level):
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
     gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
 
     pdf = gdf.to_pandas()
@@ -9388,22 +9313,23 @@ def test_explode_preserve_categorical():
 
 
 @pytest.mark.parametrize(
-    "df,ascending,expected",
+    "data,ascending,expected_data",
     [
         (
-            cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}),
+            {"a": [10, 0, 2], "b": [-10, 10, 1]},
             True,
-            cupy.array([1, 2, 0], dtype="int32"),
+            [1, 2, 0],
         ),
         (
-            cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}),
+            {"a": [10, 0, 2], "b": [-10, 10, 1]},
             False,
-            cupy.array([0, 2, 1], dtype="int32"),
+            [0, 2, 1],
         ),
     ],
 )
-def test_dataframe_argsort(df, ascending, expected):
-    actual = df.argsort(ascending=ascending)
+def test_dataframe_argsort(data, ascending, expected_data):
+    actual = cudf.DataFrame(data).argsort(ascending=ascending)
+    expected = cupy.array(expected_data, dtype="int32")
 
     assert_eq(actual, expected)
 
@@ -9441,12 +9367,8 @@ def test_frame_series_where():
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [{"a": [1, 2, 3], "b": [1, 1, 0]}],
-)
-def test_frame_series_where_other(data):
-    gdf = cudf.DataFrame(data)
+def test_frame_series_where_other():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
     pdf = gdf.to_pandas()
 
     expected = gdf.where(gdf["b"] == 1, cudf.NA)
@@ -9485,8 +9407,8 @@ def test_frame_series_where_other(data):
         (
             {
                 "id": ["a", "a", "b", "b", "c", "c"],
-                "val": cudf.Series(
-                    [None, None, None, None, None, None], dtype="float64"
+                "val": pa.array(
+                    [None, None, None, None, None, None], type=pa.float64()
                 ),
             },
             ["id"],
@@ -9502,10 +9424,7 @@ def test_frame_series_where_other(data):
         ({"id": [1.0], "val1": [2.0], "val2": [3.0]}, ["id"]),
     ],
 )
-@pytest.mark.parametrize(
-    "min_per",
-    [0, 1, 2, 3, 4],
-)
+@pytest.mark.parametrize("min_per", [0, 1, 2])
 def test_pearson_corr_passing(data, gkey, min_per):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
@@ -9552,25 +9471,24 @@ def test_pearson_corr_empty_columns():
 @pytest.mark.parametrize(
     "data",
     [
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
-            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
-        },
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": [1, 1, 1, 2, 2, 2, 3, 3, 3],
-            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
-        },
+        ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+        [1, 1, 1, 2, 2, 2, 3, 3, 3],
     ],
 )
 @pytest.mark.parametrize("gkey", ["id", "val1", "val2"])
 def test_pearson_corr_invalid_column_types(data, gkey):
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": data,
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        }
+    )
     with pytest.raises(
         TypeError,
         match="Correlation accepts only numerical column-pairs",
     ):
-        cudf.DataFrame(data).groupby(gkey).corr("pearson")
+        gdf.groupby(gkey).corr("pearson")
 
 
 def test_pearson_corr_multiindex_dataframe():
@@ -9658,14 +9576,8 @@ def test_dataframe_add_suffix():
         ),
     ],
 )
-@pytest.mark.parametrize(
-    "min_periods",
-    [0, 3],
-)
-@pytest.mark.parametrize(
-    "ddof",
-    [1, 2],
-)
+@pytest.mark.parametrize("min_periods", [0, 3])
+@pytest.mark.parametrize("ddof", [1, 2])
 def test_groupby_covariance(data, gkey, min_periods, ddof):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
@@ -9873,12 +9785,8 @@ def test_dataframe_assign_cp_np_array():
     assert_eq(pdf, gdf)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [{"a": [1, 2, 3], "b": [1, 1, 0]}],
-)
-def test_dataframe_nunique(data):
-    gdf = cudf.DataFrame(data)
+def test_dataframe_nunique():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
     pdf = gdf.to_pandas()
 
     actual = gdf.nunique()
@@ -9901,12 +9809,10 @@ def test_nunique_preserve_column_in_index(columns):
     assert_eq(result, columns, exact=True)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [{"key": [0, 1, 1, 0, 0, 1], "val": [1, 8, 3, 9, -3, 8]}],
-)
-def test_dataframe_nunique_index(data):
-    gdf = cudf.DataFrame(data)
+def test_dataframe_nunique_index():
+    gdf = cudf.DataFrame(
+        {"key": [0, 1, 1, 0, 0, 1], "val": [1, 8, 3, 9, -3, 8]}
+    )
     pdf = gdf.to_pandas()
 
     actual = gdf.index.nunique()
@@ -9945,7 +9851,7 @@ def test_dataframe_rename_columns_keep_type():
         {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
     ],
 )
-@pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5])
+@pytest.mark.parametrize("periods", [-5, 0, 2])
 @pytest.mark.parametrize(
     "fill_method", ["ffill", "bfill", "pad", "backfill", no_default]
 )
@@ -9976,19 +9882,15 @@ def test_mean_timeseries(numeric_only):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
+@pytest.mark.parametrize("numeric_only", [True, False])
+def test_std_different_dtypes(numeric_only):
+    gdf = cudf.DataFrame(
         {
             "a": [1, 2, 3, 4, 5],
             "b": ["a", "b", "c", "d", "e"],
             "c": [1.0, 2.0, 3.0, 4.0, 5.0],
         }
-    ],
-)
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_std_different_dtypes(data, numeric_only):
-    gdf = cudf.DataFrame(data)
+    )
     if not numeric_only:
         gdf = gdf.select_dtypes(include="number")
     pdf = gdf.to_pandas()
@@ -9999,49 +9901,20 @@ def test_std_different_dtypes(data, numeric_only):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
+def test_empty_numeric_only():
+    gdf = cudf.DataFrame(
         {
             "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
             "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
             "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
         }
-    ],
-)
-def test_empty_numeric_only(data):
-    gdf = cudf.DataFrame(data)
+    )
     pdf = gdf.to_pandas()
     expected = pdf.prod(numeric_only=True)
     actual = gdf.prod(numeric_only=True)
     assert_eq(expected, actual, check_dtype=True)
 
 
-@pytest.fixture(params=[0, 10], ids=["empty", "10"])
-def df_eval(request):
-    N = request.param
-    if N == 0:
-        value = np.zeros(0, dtype="int")
-        return cudf.DataFrame(
-            {
-                "a": value,
-                "b": value,
-                "c": value,
-                "d": value,
-            }
-        )
-    int_max = 10
-    rng = cupy.random.default_rng(seed=0)
-    return cudf.DataFrame(
-        {
-            "a": rng.integers(N, size=int_max),
-            "b": rng.integers(N, size=int_max),
-            "c": rng.integers(N, size=int_max),
-            "d": rng.integers(N, size=int_max),
-        }
-    )
-
-
 # Note that for now expressions do not automatically handle casting, so inputs
 # need to be casted appropriately
 @pytest.mark.parametrize(
@@ -10086,7 +9959,10 @@ def df_eval(request):
         ('a == "1"', str),
     ],
 )
-def test_dataframe_eval(df_eval, expr, dtype):
+@pytest.mark.parametrize("nrows", [0, 10])
+def test_dataframe_eval(nrows, expr, dtype):
+    arr = np.ones(nrows)
+    df_eval = cudf.DataFrame({"a": arr, "b": arr, "c": arr, "d": arr})
     df_eval = df_eval.astype(dtype)
     with _hide_ufunc_warnings(expr):
         expect = df_eval.to_pandas().eval(expr)
@@ -10115,9 +9991,10 @@ def test_dataframe_eval(df_eval, expr, dtype):
         "a_b_are_equal = (a == b) = c",
     ],
 )
-def test_dataframe_eval_errors(df_eval, expr):
+def test_dataframe_eval_errors(expr):
+    df = cudf.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]})
     with pytest.raises(ValueError):
-        df_eval.eval(expr)
+        df.eval(expr)
 
 
 def test_dataframe_eval_misc():
@@ -10130,24 +10007,13 @@ def test_dataframe_eval_misc():
 
 
 @pytest.mark.parametrize(
-    "gdf,subset",
+    "data",
     [
-        (
-            cudf.DataFrame(
-                {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
-                index=["falcon", "dog", "cat", "ant"],
-            ),
-            ["num_legs"],
-        ),
-        (
-            cudf.DataFrame(
-                {
-                    "first_name": ["John", "Anne", "John", "Beth"],
-                    "middle_name": ["Smith", None, None, "Louise"],
-                }
-            ),
-            ["first_name"],
-        ),
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        {
+            "first_name": ["John", "Anne", "John", "Beth"],
+            "middle_name": ["Smith", None, None, "Louise"],
+        },
     ],
 )
 @pytest.mark.parametrize("sort", [True, False])
@@ -10156,14 +10022,15 @@ def test_dataframe_eval_misc():
 @pytest.mark.parametrize("dropna", [True, False])
 @pytest.mark.parametrize("use_subset", [True, False])
 def test_value_counts(
-    gdf,
-    subset,
+    data,
     sort,
     ascending,
     normalize,
     dropna,
     use_subset,
 ):
+    subset = [next(iter(data.keys()))]
+    gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
     got = gdf.value_counts(
@@ -10189,30 +10056,33 @@ def test_value_counts(
     else:
         assert_eq(got.sort_index(), expected.sort_index())
 
+
+def test_value_counts_no_subset():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
     with pytest.raises(KeyError):
         gdf.value_counts(subset=["not_a_column_name"])
 
 
-@pytest.fixture
-def wildcard_df():
+def test_multiindex_wildcard_selection_all():
     midx = cudf.MultiIndex.from_tuples(
         [(c1, c2) for c1 in "abc" for c2 in "ab"]
     )
     df = cudf.DataFrame({f"{i}": [i] for i in range(6)})
     df.columns = midx
-    return df
-
-
-def test_multiindex_wildcard_selection_all(wildcard_df):
-    expect = wildcard_df.to_pandas().loc[:, (slice(None), "b")]
-    got = wildcard_df.loc[:, (slice(None), "b")]
+    expect = df.to_pandas().loc[:, (slice(None), "b")]
+    got = df.loc[:, (slice(None), "b")]
     assert_eq(expect, got)
 
 
 @pytest_xfail(reason="Not yet properly supported.")
-def test_multiindex_wildcard_selection_partial(wildcard_df):
-    expect = wildcard_df.to_pandas().loc[:, (slice("a", "b"), "b")]
-    got = wildcard_df.loc[:, (slice("a", "b"), "b")]
+def test_multiindex_wildcard_selection_partial():
+    midx = cudf.MultiIndex.from_tuples(
+        [(c1, c2) for c1 in "abc" for c2 in "ab"]
+    )
+    df = cudf.DataFrame({f"{i}": [i] for i in range(6)})
+    df.columns = midx
+    expect = df.to_pandas().loc[:, (slice("a", "b"), "b")]
+    got = df.loc[:, (slice("a", "b"), "b")]
     assert_eq(expect, got)
 
 
@@ -10621,10 +10491,6 @@ def test_series_data_with_name_with_columns_matching():
     assert_eq(gdf, pdf)
 
 
-@pytest.mark.xfail(
-    version.parse(pd.__version__) < version.parse("2.0"),
-    reason="pandas returns Index[object] instead of RangeIndex",
-)
 def test_series_data_with_name_with_columns_not_matching():
     gdf = cudf.DataFrame(cudf.Series([1], name=2), columns=[1])
     pdf = pd.DataFrame(pd.Series([1], name=2), columns=[1])
@@ -10637,7 +10503,7 @@ def test_series_data_with_name_with_columns_matching_align():
     assert_eq(gdf, pdf)
 
 
-@pytest.mark.parametrize("digits", [0, 1, 3, 4, 10])
+@pytest.mark.parametrize("digits", [0, 1, 4])
 def test_dataframe_round_builtin(digits):
     pdf = pd.DataFrame(
         {
@@ -11075,8 +10941,8 @@ def test_dataframe_multiindex_column_names(names):
     assert_eq(df.columns.names, pdf.columns.names)
 
 
-@pytest.mark.parametrize("pdf", _dataframe_na_data())
-def test_roundtrip_dataframe_plc_table(pdf):
+def test_roundtrip_dataframe_plc_table(na_data):
+    pdf = na_data
     expect = cudf.DataFrame.from_pandas(pdf)
     actual = cudf.DataFrame.from_pylibcudf(*expect.to_pylibcudf())
     assert_eq(expect, actual)
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 6574e3e2fb5..fcc03e43944 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -29,88 +29,55 @@
     expect_warning_if,
 )
 
-_cmpops = [
-    operator.lt,
-    operator.gt,
-    operator.le,
-    operator.ge,
-    operator.eq,
-    operator.ne,
-]
-
-
-def data1():
-    return pd.date_range("20010101", "20020215", freq="400h", name="times")
-
-
-def data2():
-    return pd.date_range(
-        "20010101", freq="243434324423423234ns", name="times", periods=10
-    )
-
-
-def timeseries_us_data():
-    return pd.date_range(
-        "2019-07-16 00:00:00",
-        "2019-07-16 00:00:01",
-        freq="5555us",
-        name="times",
-    )
-
-
-def timestamp_ms_data():
-    return pd.Series(
-        [
-            "2019-07-16 00:00:00.333",
-            "2019-07-16 00:00:00.666",
-            "2019-07-16 00:00:00.888",
-        ]
-    )
-
-
-def timestamp_us_data():
-    return pd.Series(
-        [
-            "2019-07-16 00:00:00.333333",
-            "2019-07-16 00:00:00.666666",
-            "2019-07-16 00:00:00.888888",
-        ]
-    )
-
 
-def timestamp_ns_data():
-    return pd.Series(
-        [
-            "2019-07-16 00:00:00.333333333",
-            "2019-07-16 00:00:00.666666666",
-            "2019-07-16 00:00:00.888888888",
-        ]
-    )
+@pytest.fixture(
+    params=[
+        operator.lt,
+        operator.gt,
+        operator.le,
+        operator.ge,
+        operator.eq,
+        operator.ne,
+    ]
+)
+def op(request):
+    return request.param
 
 
-def numerical_data():
-    return np.arange(1, 10)
+@pytest.fixture(
+    params=[
+        pd.date_range("20010101", "20020215", freq="400h", name="times"),
+        pd.date_range(
+            "20010101", freq="243434324423423234ns", name="times", periods=10
+        ),
+    ]
+)
+def data(request):
+    return request.param
 
 
-fields = [
-    "year",
-    "month",
-    "day",
-    "hour",
-    "minute",
-    "second",
-    "microsecond",
-    "nanosecond",
-    "weekday",
-    "dayofweek",
-    "dayofyear",
-    "day_of_year",
-]
+@pytest.fixture(
+    params=[
+        "year",
+        "month",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "microsecond",
+        "nanosecond",
+        "weekday",
+        "dayofweek",
+        "dayofyear",
+        "day_of_year",
+    ]
+)
+def field(request):
+    return request.param
 
 
-@pytest.mark.parametrize("data", [data1(), data2()])
 def test_series(data):
-    pd_data = pd.Series(data.copy())
+    pd_data = pd.Series(data)
     gdf_data = Series(pd_data)
     assert_eq(pd_data, gdf_data)
 
@@ -181,10 +148,9 @@ def test_datetime_series_binops_numpy(lhs_dtype, rhs_dtype):
     )
 
 
-@pytest.mark.parametrize("data", [data1(), data2()])
 def test_dt_ops(data):
-    pd_data = pd.Series(data.copy())
-    gdf_data = Series(data.copy())
+    pd_data = pd.Series(data)
+    gdf_data = Series(data)
 
     assert_eq(pd_data == pd_data, gdf_data == gdf_data)
     assert_eq(pd_data < pd_data, gdf_data < gdf_data)
@@ -192,22 +158,17 @@ def test_dt_ops(data):
 
 
 # libcudf doesn't respect timezones
-@pytest.mark.parametrize("data", [data1(), data2()])
-@pytest.mark.parametrize("field", fields)
 def test_dt_series(data, field):
-    pd_data = pd.Series(data.copy())
+    pd_data = pd.Series(data)
     gdf_data = Series(pd_data)
     base = getattr(pd_data.dt, field)
     test = getattr(gdf_data.dt, field)
     assert_eq(base, test, check_dtype=False)
 
 
-@pytest.mark.parametrize("data", [data1(), data2()])
-@pytest.mark.parametrize("field", fields)
 def test_dt_index(data, field):
-    pd_data = data.copy()
-    gdf_data = DatetimeIndex(pd_data)
-    assert_eq(getattr(gdf_data, field), getattr(pd_data, field), exact=False)
+    gdf_data = DatetimeIndex(data)
+    assert_eq(getattr(gdf_data, field), getattr(data, field), exact=False)
 
 
 def test_setitem_datetime():
@@ -282,10 +243,9 @@ def test_issue_165():
     assert mask.to_pandas().sum() > 0
 
 
-@pytest.mark.parametrize("data", [data1(), data2()])
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 def test_typecast_from_datetime(data, dtype):
-    pd_data = pd.Series(data.copy())
+    pd_data = pd.Series(data)
     np_data = np.array(pd_data)
     gdf_data = Series(pd_data)
 
@@ -295,13 +255,12 @@ def test_typecast_from_datetime(data, dtype):
     np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
 
 
-@pytest.mark.parametrize("data", [data1(), data2()])
 @pytest.mark.parametrize(
     "dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
 )
 def test_typecast_from_datetime_to_int64_to_datetime(data, dtype):
-    pd_data = pd.Series(data.copy())
+    pd_data = pd.Series(data)
     np_data = np.array(pd_data)
     gdf_data = Series(pd_data)
 
@@ -311,20 +270,43 @@ def test_typecast_from_datetime_to_int64_to_datetime(data, dtype):
     np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
 
 
-@pytest.mark.parametrize("data", [timeseries_us_data()])
 @pytest.mark.parametrize(
     "dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
 )
 def test_typecast_to_different_datetime_resolutions(data, dtype):
-    pd_data = pd.Series(data.copy())
+    data = pd.date_range(
+        "2019-07-16 00:00:00",
+        "2019-07-16 00:00:01",
+        freq="5555us",
+        name="times",
+    )
+    pd_data = pd.Series(data)
     np_data = np.array(pd_data).astype(dtype)
     gdf_series = Series(pd_data).astype(dtype)
     np.testing.assert_equal(np_data, gdf_series.to_numpy())
 
 
 @pytest.mark.parametrize(
-    "data", [timestamp_ms_data(), timestamp_us_data(), timestamp_ns_data()]
+    "data",
+    [
+        [
+            "2019-07-16 00:00:00.333",
+            "2019-07-16 00:00:00.666",
+            "2019-07-16 00:00:00.888",
+        ],
+        [
+            "2019-07-16 00:00:00.333333",
+            "2019-07-16 00:00:00.666666",
+            "2019-07-16 00:00:00.888888",
+        ],
+        [
+            "2019-07-16 00:00:00.333333333",
+            "2019-07-16 00:00:00.666666666",
+            "2019-07-16 00:00:00.888888888",
+        ],
+    ],
+    ids=["ms_data", "us_data", "ns_data"],
 )
 @pytest.mark.parametrize(
     "dtype",
@@ -333,7 +315,7 @@ def test_typecast_to_different_datetime_resolutions(data, dtype):
 def test_string_timstamp_typecast_to_different_datetime_resolutions(
     data, dtype
 ):
-    pd_sr = data
+    pd_sr = pd.Series(data)
     gdf_sr = cudf.Series.from_pandas(pd_sr)
 
     expect = pd_sr.values.astype(dtype)
@@ -342,13 +324,13 @@ def test_string_timstamp_typecast_to_different_datetime_resolutions(
     np.testing.assert_equal(expect, got)
 
 
-@pytest.mark.parametrize("data", [numerical_data()])
 @pytest.mark.parametrize("from_dtype", NUMERIC_TYPES)
 @pytest.mark.parametrize(
     "to_dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
 )
-def test_typecast_to_datetime(data, from_dtype, to_dtype):
+def test_typecast_to_datetime(from_dtype, to_dtype):
+    data = np.arange(1, 10)
     np_data = data.astype(from_dtype)
     gdf_data = Series(np_data)
 
@@ -358,13 +340,13 @@ def test_typecast_to_datetime(data, from_dtype, to_dtype):
     np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
 
 
-@pytest.mark.parametrize("data", [numerical_data()])
 @pytest.mark.parametrize("from_dtype", NUMERIC_TYPES)
 @pytest.mark.parametrize(
     "to_dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
 )
-def test_typecast_to_from_datetime(data, from_dtype, to_dtype):
+def test_typecast_to_from_datetime(from_dtype, to_dtype):
+    data = np.arange(1, 10)
     np_data = data.astype(from_dtype)
     gdf_data = Series(np_data)
 
@@ -374,7 +356,6 @@ def test_typecast_to_from_datetime(data, from_dtype, to_dtype):
     np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
 
 
-@pytest.mark.parametrize("data", [numerical_data()])
 @pytest.mark.parametrize(
     "from_dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
@@ -383,7 +364,8 @@ def test_typecast_to_from_datetime(data, from_dtype, to_dtype):
     "to_dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
 )
-def test_typecast_from_datetime_to_datetime(data, from_dtype, to_dtype):
+def test_typecast_from_datetime_to_datetime(from_dtype, to_dtype):
+    data = np.arange(1, 10)
     np_data = data.astype(from_dtype)
     ser = Series(np_data)
 
@@ -393,10 +375,10 @@ def test_typecast_from_datetime_to_datetime(data, from_dtype, to_dtype):
     np.testing.assert_equal(np_casted, ser_casted.to_numpy())
 
 
-@pytest.mark.parametrize("data", [numerical_data()])
 @pytest.mark.parametrize("nulls", ["some", "all"])
 def test_to_from_pandas_nulls(data, nulls):
-    pd_data = pd.Series(data.copy().astype("datetime64[ns]"))
+    data = np.arange(1, 10)
+    pd_data = pd.Series(data.astype("datetime64[ns]"))
     if nulls == "some":
         # Fill half the values with NaT
         pd_data[list(range(0, len(pd_data), 2))] = np.datetime64("nat", "ns")
@@ -480,16 +462,11 @@ def test_datetime_nunique(data, nulls):
     assert_eq(got, expected)
 
 
-testdata = [
-    (
-        Series(
-            ["2018-01-01", None, "2019-01-31", None, "2018-01-01"],
-            dtype="datetime64[ms]",
-        ),
-        True,
-    ),
-    (
-        Series(
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        [["2018-01-01", None, "2019-01-31", None, "2018-01-01"], True],
+        [
             [
                 "2018-01-01",
                 "2018-01-02",
@@ -497,23 +474,18 @@ def test_datetime_nunique(data, nulls):
                 "2018-03-01",
                 "2018-01-01",
             ],
-            dtype="datetime64[ms]",
-        ),
-        False,
-    ),
-    (
-        Series(
+            False,
+        ],
+        [
             np.array(
                 ["2018-01-01", None, "2019-12-30"], dtype="datetime64[ms]"
-            )
-        ),
-        True,
-    ),
-]
-
-
-@pytest.mark.parametrize("data, expected", testdata)
+            ),
+            True,
+        ],
+    ],
+)
 def test_datetime_has_null_test(data, expected):
+    data = Series(data, dtype="datetime64[ms]")
     pd_data = data.to_pandas()
     count = pd_data.notna().value_counts()
     expected_count = 0
@@ -1038,7 +1010,6 @@ def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op):
 
 @pytest.mark.parametrize("data", ["20110101", "20120101", "20130101"])
 @pytest.mark.parametrize("other_scalars", ["20110101", "20120101", "20130101"])
-@pytest.mark.parametrize("op", _cmpops)
 @pytest.mark.parametrize(
     "dtype",
     ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
@@ -1432,21 +1403,18 @@ def test_isocalendar_series(data):
 @pytest.mark.parametrize(
     "data",
     [
-        pd.DatetimeIndex([], dtype="datetime64[ns]"),
-        pd.DatetimeIndex([None, None], dtype="datetime64[ns]"),
-        pd.DatetimeIndex(
-            [
-                "2020-05-31 08:00:00",
-                "1999-12-31 18:40:00",
-                "2000-12-31 04:00:00",
-            ],
-            dtype="datetime64[ns]",
-        ),
-        pd.DatetimeIndex(["2100-03-14 07:30:00"], dtype="datetime64[ns]"),
+        [],
+        [None, None],
+        [
+            "2020-05-31 08:00:00",
+            "1999-12-31 18:40:00",
+            "2000-12-31 04:00:00",
+        ],
+        ["2100-03-14 07:30:00"],
     ],
 )
 def test_isocalendar_index(data):
-    ps = data.copy()
+    ps = pd.DatetimeIndex(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.isocalendar()
@@ -1474,27 +1442,20 @@ def test_days_in_months(dtype):
     assert_eq(ps.dt.days_in_month, gs.dt.days_in_month)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-05-31",
-            None,
-            "1999-12-01",
-            "2000-12-21",
-            None,
-            "1900-02-28",
-            "1800-03-14",
-            "2100-03-10",
-            "1970-01-01",
-            "1969-12-11",
-        ]
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_is_month_start(data, dtype):
-    # Series
-    ps = pd.Series(data, dtype=dtype)
+def test_is_month_start():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.dt.is_month_start
@@ -1507,19 +1468,6 @@ def test_is_month_start(data, dtype):
 #                       Date Range Tests                         #
 ##################################################################
 
-date_range_test_dates_start = [
-    "2000-02-13 08:41:06",  # leap year
-    "1996-11-21 04:05:30",  # non leap year
-    "1970-01-01 00:00:00",  # unix epoch time 0
-    "1831-05-08 15:23:21",
-]
-date_range_test_dates_end = [
-    "2000-02-13 08:41:06",  # leap year
-    "1996-11-21 04:05:30",  # non leap year
-    "1970-01-01 00:00:00",  # unix epoch time 0
-    "1831-05-08 15:23:21",
-]
-date_range_test_periods = [1, 10, 100]
 date_range_test_freq = [
     {"months": 3, "years": 1},
     {"hours": 10, "days": 57, "nanoseconds": 3},
@@ -1532,22 +1480,49 @@ def test_is_month_start(data, dtype):
 ]
 
 
-@pytest.fixture(params=date_range_test_dates_start[:])
+@pytest.fixture(
+    params=[
+        "2000-02-13 08:41:06",
+        "1996-11-21 04:05:30",
+        "1970-01-01 00:00:00",
+        "1831-05-08 15:23:21",
+    ],
+    ids=["leap_year", "non_leap_year", "unix_epoch_time_0", "random_date"],
+)
 def start(request):
     return request.param
 
 
-@pytest.fixture(params=date_range_test_dates_end[:])
+@pytest.fixture(
+    params=[
+        "2000-02-13 08:41:06",
+        "1996-11-21 04:05:30",
+        "1970-01-01 00:00:00",
+        "1831-05-08 15:23:21",
+    ],
+    ids=["leap_year", "non_leap_year", "unix_epoch_time_0", "random_date"],
+)
 def end(request):
     return request.param
 
 
-@pytest.fixture(params=date_range_test_periods[:])
+@pytest.fixture(params=[1, 10])
 def periods(request):
     return request.param
 
 
-@pytest.fixture(params=date_range_test_freq[:])
+@pytest.fixture(
+    params=[
+        {"months": 3, "years": 1},
+        {"hours": 10, "days": 57, "nanoseconds": 3},
+        "83D",
+        "17h",
+        "-680min",
+        "110546s",
+        "110546789ms",
+        "110546789248us",
+    ]
+)
 def freq(request):
     return request.param
 
@@ -1635,18 +1610,17 @@ def test_date_range_freq_does_not_divide_range():
     )
 
 
-def test_date_range_raise_overflow():
-    # Fixed offset
-    start = np.datetime64(np.iinfo("int64").max, "ns")
-    periods = 2
-    freq = cudf.DateOffset(nanoseconds=1)
-    with pytest.raises(pd.errors.OutOfBoundsDatetime):
-        cudf.date_range(start=start, periods=periods, freq=freq)
-
-    # Non-fixed offset
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"nanoseconds": 1},
+        {"months": 1},
+    ],
+)
+def test_date_range_raise_overflow(kwargs):
     start = np.datetime64(np.iinfo("int64").max, "ns")
     periods = 2
-    freq = cudf.DateOffset(months=1)
+    freq = cudf.DateOffset(**kwargs)
     with pytest.raises(pd.errors.OutOfBoundsDatetime):
         cudf.date_range(start=start, periods=periods, freq=freq)
 
@@ -1699,28 +1673,21 @@ def test_date_range_raise_unsupported(freqstr_unsupported):
 ##################################################################
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-05-31",
-            "2020-02-29",
-            None,
-            "1999-12-01",
-            "2000-12-21",
-            None,
-            "1900-02-28",
-            "1800-03-14",
-            "2100-03-10",
-            "1970-01-01",
-            "1969-12-11",
-        ]
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_is_month_end(data, dtype):
-    # Series
-    ps = pd.Series(data, dtype=dtype)
+def test_is_month_end():
+    data = [
+        "2020-05-31",
+        "2020-02-29",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.dt.is_month_end
@@ -1729,29 +1696,23 @@ def test_is_month_end(data, dtype):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-05-31",
-            None,
-            "1999-12-01",
-            "2000-12-21",
-            None,
-            "1900-01-01",
-            "1800-03-14",
-            "2100-03-10",
-            "1970-01-01",
-            "1969-12-11",
-            "2017-12-30",
-            "2017-12-31",
-            "2018-01-01",
-        ]
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_is_year_start(data, dtype):
-    ps = pd.Series(data, dtype=dtype)
+def test_is_year_start():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-01-01",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+        "2017-12-30",
+        "2017-12-31",
+        "2018-01-01",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.dt.is_year_start
@@ -1760,33 +1721,27 @@ def test_is_year_start(data, dtype):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-05-31",
-            None,
-            "1999-12-01",
-            "2000-12-21",
-            None,
-            "1900-12-31",
-            "1800-03-14",
-            "2017-12-30",
-            "2017-12-31",
-            "2020-12-31 08:00:00",
-            None,
-            "1999-12-31 18:40:00",
-            "2000-12-31 04:00:00",
-            None,
-            "1800-12-14 07:30:00",
-            "2100-12-14 07:30:00",
-            "2020-05-31",
-        ]
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_is_year_end(data, dtype):
-    ps = pd.Series(data, dtype=dtype)
+def test_is_year_end():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-12-31",
+        "1800-03-14",
+        "2017-12-30",
+        "2017-12-31",
+        "2020-12-31 08:00:00",
+        None,
+        "1999-12-31 18:40:00",
+        "2000-12-31 04:00:00",
+        None,
+        "1800-12-14 07:30:00",
+        "2100-12-14 07:30:00",
+        "2020-05-31",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.dt.is_year_end
@@ -1795,31 +1750,24 @@ def test_is_year_end(data, dtype):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-05-01",
-            "2020-05-31",
-            "2020-02-29",
-            None,
-            "1999-12-01",
-            "2000-12-21",
-            None,
-            "1900-02-28",
-            "1800-03-14",
-            "2100-03-10",
-            "1970-04-1",
-            "1970-01-01",
-            "1969-12-11",
-            "2020-12-31",
-        ]
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_is_quarter_start(data, dtype):
-    # Series
-    ps = pd.Series(data, dtype=dtype)
+def test_is_quarter_start():
+    data = [
+        "2020-05-01",
+        "2020-05-31",
+        "2020-02-29",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-04-1",
+        "1970-01-01",
+        "1969-12-11",
+        "2020-12-31",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.dt.is_quarter_start
@@ -1828,31 +1776,24 @@ def test_is_quarter_start(data, dtype):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-05-01",
-            "2020-05-31",
-            "2020-02-29",
-            None,
-            "1999-12-01",
-            "2000-12-21",
-            None,
-            "1900-02-28",
-            "1800-03-14",
-            "2100-03-10",
-            "1970-04-1",
-            "1970-01-01",
-            "1969-12-11",
-            "2020-12-31",
-        ]
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_is_quarter_end(data, dtype):
-    # Series
-    ps = pd.Series(data, dtype=dtype)
+def test_is_quarter_end():
+    data = [
+        "2020-05-01",
+        "2020-05-31",
+        "2020-02-29",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-04-1",
+        "1970-01-01",
+        "1969-12-11",
+        "2020-12-31",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
     gs = cudf.from_pandas(ps)
 
     expect = ps.dt.is_quarter_end
@@ -1871,28 +1812,21 @@ def test_error_values():
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="https://github.com/pandas-dev/pandas/issues/52761",
 )
-@pytest.mark.parametrize(
-    "data",
-    [
-        (
-            [
-                "2020-05-31 08:00:00",
-                "1999-12-31 18:40:10",
-                "2000-12-31 04:00:05",
-                "1900-02-28 07:00:06",
-                "1800-03-14 07:30:20",
-                "2100-03-14 07:30:20",
-                "1970-01-01 00:00:09",
-                "1969-12-31 12:59:10",
-            ]
-        )
-    ],
-)
 @pytest.mark.parametrize("time_type", DATETIME_TYPES)
 @pytest.mark.parametrize(
     "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
 )
 def test_ceil(data, time_type, resolution):
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:10",
+        "2000-12-31 04:00:05",
+        "1900-02-28 07:00:06",
+        "1800-03-14 07:30:20",
+        "2100-03-14 07:30:20",
+        "1970-01-01 00:00:09",
+        "1969-12-31 12:59:10",
+    ]
     gs = cudf.Series(data, dtype=time_type)
     ps = gs.to_pandas()
 
@@ -1905,28 +1839,21 @@ def test_ceil(data, time_type, resolution):
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="https://github.com/pandas-dev/pandas/issues/52761",
 )
-@pytest.mark.parametrize(
-    "data",
-    [
-        (
-            [
-                "2020-05-31 08:00:00",
-                "1999-12-31 18:40:10",
-                "2000-12-31 04:00:05",
-                "1900-02-28 07:00:06",
-                "1800-03-14 07:30:20",
-                "2100-03-14 07:30:20",
-                "1970-01-01 00:00:09",
-                "1969-12-31 12:59:10",
-            ]
-        )
-    ],
-)
 @pytest.mark.parametrize("time_type", DATETIME_TYPES)
 @pytest.mark.parametrize(
     "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
 )
-def test_floor(data, time_type, resolution):
+def test_floor(time_type, resolution):
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:10",
+        "2000-12-31 04:00:05",
+        "1900-02-28 07:00:06",
+        "1800-03-14 07:30:20",
+        "2100-03-14 07:30:20",
+        "1970-01-01 00:00:09",
+        "1969-12-31 12:59:10",
+    ]
     gs = cudf.Series(data, dtype=time_type)
     ps = gs.to_pandas()
 
@@ -1935,28 +1862,21 @@ def test_floor(data, time_type, resolution):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        (
-            [
-                "2020-05-31 08:00:00",
-                "1999-12-31 18:40:10",
-                "2000-12-31 04:00:05",
-                "1900-02-28 07:00:06",
-                "1800-03-14 07:30:20",
-                "2100-03-14 07:30:20",
-                "1970-01-01 00:00:09",
-                "1969-12-31 12:59:10",
-            ]
-        )
-    ],
-)
 @pytest.mark.parametrize("time_type", DATETIME_TYPES)
 @pytest.mark.parametrize(
     "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
 )
-def test_round(data, time_type, resolution):
+def test_round(time_type, resolution):
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:10",
+        "2000-12-31 04:00:05",
+        "1900-02-28 07:00:06",
+        "1800-03-14 07:30:20",
+        "2100-03-14 07:30:20",
+        "1970-01-01 00:00:09",
+        "1969-12-31 12:59:10",
+    ]
     gs = cudf.Series(data, dtype=time_type)
     ps = gs.to_pandas()
 
@@ -2008,31 +1928,23 @@ def test_first(idx, offset):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    # This test case tests correctness when start is end of month
-    "idx, offset",
-    [
-        (
-            pd.DatetimeIndex(
-                [
-                    "2020-01-31",
-                    "2020-02-15",
-                    "2020-02-29",
-                    "2020-03-15",
-                    "2020-03-31",
-                    "2020-04-15",
-                    "2020-04-30",
-                ]
-            ),
-            "3M",
-        )
-    ],
-)
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="warning not present in older pandas versions",
 )
-def test_first_start_at_end_of_month(idx, offset):
+def test_first_start_at_end_of_month():
+    idx = pd.DatetimeIndex(
+        [
+            "2020-01-31",
+            "2020-02-15",
+            "2020-02-29",
+            "2020-03-15",
+            "2020-03-31",
+            "2020-04-15",
+            "2020-04-30",
+        ]
+    )
+    offset = "3M"
     p = pd.Series(range(len(idx)), index=idx)
     g = cudf.from_pandas(p)
 
@@ -2115,7 +2027,6 @@ def test_datetime_constructor(data, dtype):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("op", _cmpops)
 def test_datetime_binop_tz_timestamp(op):
     s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
     pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
@@ -2127,14 +2038,9 @@ def test_datetime_binop_tz_timestamp(op):
         op(s, date_scalar)
 
 
-@pytest.mark.parametrize(
-    "data1", [["20110101", "20120101", None, "20140101", None]]
-)
-@pytest.mark.parametrize(
-    "data2", [["20110101", "20120101", "20130101", None, None]]
-)
-@pytest.mark.parametrize("op", _cmpops)
-def test_datetime_series_cmpops_pandas_compatibility(data1, data2, op):
+def test_datetime_series_cmpops_pandas_compatibility(op):
+    data1 = ["20110101", "20120101", None, "20140101", None]
+    data2 = ["20110101", "20120101", "20130101", None, None]
     gsr1 = cudf.Series(data=data1, dtype="datetime64[ns]")
     psr1 = gsr1.to_pandas()
 
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index 048b3a656e3..2cb16f71011 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 import decimal
 from decimal import Decimal
@@ -6,7 +6,6 @@
 import numpy as np
 import pyarrow as pa
 import pytest
-from packaging import version
 
 import cudf
 from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn
@@ -16,41 +15,36 @@
     FLOAT_TYPES,
     INTEGER_TYPES,
     SIGNED_TYPES,
-    _decimal_series,
     expect_warning_if,
 )
 
-data_ = [
-    [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
-    [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
-    [1],
-    [-1],
-    [1, 2, 3, 4],
-    [42, 17, 41],
-    [1, 2, None, 4],
-    [None, None, None],
-    [],
-]
-typ_ = [
-    pa.decimal128(precision=4, scale=2),
-    pa.decimal128(precision=5, scale=3),
-    pa.decimal128(precision=6, scale=4),
-]
-
-
-@pytest.mark.parametrize("data_", data_)
-@pytest.mark.parametrize("typ_", typ_)
-def test_round_trip_decimal64_column(data_, typ_):
-    pa_arr = pa.array(data_, type=typ_)
-    col_64 = Decimal64Column.from_arrow(pa_arr)
-    assert pa_arr.equals(col_64.to_arrow())
-
 
-@pytest.mark.parametrize("data_", data_)
-@pytest.mark.parametrize("typ_", typ_)
-def test_round_trip_decimal32_column(data_, typ_):
+@pytest.mark.parametrize(
+    "data_",
+    [
+        [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
+        [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
+        [1],
+        [-1],
+        [1, 2, 3, 4],
+        [42, 17, 41],
+        [1, 2, None, 4],
+        [None, None, None],
+        [],
+    ],
+)
+@pytest.mark.parametrize(
+    "typ_",
+    [
+        pa.decimal128(precision=4, scale=2),
+        pa.decimal128(precision=5, scale=3),
+        pa.decimal128(precision=6, scale=4),
+    ],
+)
+@pytest.mark.parametrize("col", [Decimal32Column, Decimal64Column])
+def test_round_trip_decimal_column(data_, typ_, col):
     pa_arr = pa.array(data_, type=typ_)
-    col_32 = Decimal32Column.from_arrow(pa_arr)
+    col_32 = col.from_arrow(pa_arr)
     assert pa_arr.equals(col_32.to_arrow())
 
 
@@ -68,36 +62,29 @@ def test_from_arrow_max_precision_decimal32():
         )
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        cudf.Series(
-            [
-                14.12302,
-                97938.2,
-                np.nan,
-                0.0,
-                -8.302014,
-                np.nan,
-                94.31304,
-                -112.2314,
-                0.3333333,
-                np.nan,
-            ]
-        ),
-    ],
-)
 @pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
 @pytest.mark.parametrize(
     "to_dtype",
     [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
 )
-def test_typecast_from_float_to_decimal(request, data, from_dtype, to_dtype):
+def test_typecast_from_float_to_decimal(request, from_dtype, to_dtype):
+    data = cudf.Series(
+        [
+            14.12302,
+            97938.2,
+            np.nan,
+            0.0,
+            -8.302014,
+            np.nan,
+            94.31304,
+            -112.2314,
+            0.3333333,
+            np.nan,
+        ]
+    )
     request.applymarker(
         pytest.mark.xfail(
-            condition=version.parse(pa.__version__) >= version.parse("13.0.0")
-            and from_dtype == np.dtype("float32")
-            and to_dtype.precision > 12,
+            from_dtype == np.dtype("float32") and to_dtype.precision > 12,
             reason="https://github.com/rapidsai/cudf/issues/14169",
         )
     )
@@ -113,32 +100,27 @@ def test_typecast_from_float_to_decimal(request, data, from_dtype, to_dtype):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        cudf.Series(
-            [
-                14.12302,
-                38.2,
-                np.nan,
-                0.0,
-                -8.302014,
-                np.nan,
-                94.31304,
-                np.nan,
-                -112.2314,
-                0.3333333,
-                np.nan,
-            ]
-        ),
-    ],
-)
 @pytest.mark.parametrize("from_dtype", INTEGER_TYPES)
 @pytest.mark.parametrize(
     "to_dtype",
     [Decimal64Dtype(9, 3), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
 )
-def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
+def test_typecast_from_int_to_decimal(from_dtype, to_dtype):
+    data = cudf.Series(
+        [
+            14.12302,
+            38.2,
+            np.nan,
+            0.0,
+            -8.302014,
+            np.nan,
+            94.31304,
+            np.nan,
+            -112.2314,
+            0.3333333,
+            np.nan,
+        ]
+    )
     got = data.astype(from_dtype)
 
     pa_arr = (
@@ -153,25 +135,6 @@ def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        cudf.Series(
-            [
-                14.12309,
-                2.343942,
-                np.nan,
-                0.0,
-                -8.302082,
-                np.nan,
-                94.31308,
-                -112.2364,
-                -8.029972,
-                np.nan,
-            ]
-        ),
-    ],
-)
 @pytest.mark.parametrize(
     "from_dtype",
     [
@@ -194,7 +157,21 @@ def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype):
         Decimal32Dtype(5, 3),
     ],
 )
-def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
+def test_typecast_to_from_decimal(from_dtype, to_dtype):
+    data = cudf.Series(
+        [
+            14.12309,
+            2.343942,
+            np.nan,
+            0.0,
+            -8.302082,
+            np.nan,
+            94.31308,
+            -112.2364,
+            -8.029972,
+            np.nan,
+        ]
+    )
     if from_dtype.scale > to_dtype.MAX_PRECISION:
         pytest.skip(
             "This is supposed to overflow because the representation value in "
@@ -216,31 +193,26 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype):
     assert_eq(got, expected)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        cudf.Series(
-            [
-                14.12309,
-                2.343942,
-                np.nan,
-                0.0,
-                -8.302082,
-                np.nan,
-                94.31308,
-                -112.2364,
-                -8.029972,
-                np.nan,
-            ]
-        ),
-    ],
-)
 @pytest.mark.parametrize(
     "from_dtype",
     [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(17, 10)],
 )
 @pytest.mark.parametrize("to_dtype", SIGNED_TYPES)
-def test_typecast_from_decimal(data, from_dtype, to_dtype):
+def test_typecast_from_decimal(from_dtype, to_dtype):
+    data = cudf.Series(
+        [
+            14.12309,
+            2.343942,
+            np.nan,
+            0.0,
+            -8.302082,
+            np.nan,
+            94.31308,
+            -112.2364,
+            -8.029972,
+            np.nan,
+        ]
+    )
     got = data.astype(from_dtype)
     pa_arr = got.to_arrow().cast(to_dtype, safe=False)
 
@@ -252,7 +224,7 @@ def test_typecast_from_decimal(data, from_dtype, to_dtype):
 
 
 @pytest.mark.parametrize(
-    "args",
+    "data, dtype, item, to, expect",
     [
         # scatter to a single index
         (
@@ -281,21 +253,21 @@ def test_typecast_from_decimal(data, from_dtype, to_dtype):
             ["1", "2", "3"],
             Decimal64Dtype(1, 0),
             Decimal(5),
-            cudf.Series([True, False, True]),
+            [True, False, True],
             ["5", "2", "5"],
         ),
         (
             ["1.5", "2.5", "3.5"],
             Decimal64Dtype(2, 1),
             Decimal("5.5"),
-            cudf.Series([True, True, True]),
+            [True, True, True],
             ["5.5", "5.5", "5.5"],
         ),
         (
             ["1.0042", "2.0042", "3.0042"],
             Decimal64Dtype(5, 4),
             Decimal("5.0042"),
-            cudf.Series([False, False, True]),
+            [False, False, True],
             ["1.0042", "2.0042", "5.0042"],
         ),
         # We will allow assigning a decimal with less precision
@@ -320,16 +292,15 @@ def test_typecast_from_decimal(data, from_dtype, to_dtype):
         (["1", "2", "3"], Decimal64Dtype(1, 0), 50, 1, pa.lib.ArrowInvalid),
     ],
 )
-def test_series_setitem_decimal(args):
-    data, dtype, item, to, expect = args
-    data = _decimal_series(data, dtype)
+def test_series_setitem_decimal(data, dtype, item, to, expect):
+    data = cudf.Series([Decimal(x) for x in data], dtype=dtype)
 
     if expect is pa.lib.ArrowInvalid:
         with pytest.raises(expect):
             data[to] = item
         return
     else:
-        expect = _decimal_series(expect, dtype)
+        expect = cudf.Series([Decimal(x) for x in expect], dtype=dtype)
         data[to] = item
         assert_eq(data, expect)
 
@@ -347,37 +318,29 @@ def test_series_construction_with_nulls(input_obj):
 @pytest.mark.parametrize(
     "data",
     [
-        {
-            "a": _decimal_series(
-                ["1", "2", "3"], dtype=cudf.Decimal64Dtype(1, 0)
-            )
-        },
-        {
-            "a": _decimal_series(
-                ["1", "2", "3"], dtype=cudf.Decimal64Dtype(1, 0)
-            ),
-            "b": _decimal_series(
-                ["1.0", "2.0", "3.0"], dtype=cudf.Decimal64Dtype(2, 1)
-            ),
-            "c": _decimal_series(
-                ["10.1", "20.2", "30.3"], dtype=cudf.Decimal64Dtype(3, 1)
-            ),
-        },
-        {
-            "a": _decimal_series(
-                ["1", None, "3"], dtype=cudf.Decimal64Dtype(1, 0)
-            ),
-            "b": _decimal_series(
-                ["1.0", "2.0", None], dtype=cudf.Decimal64Dtype(2, 1)
-            ),
-            "c": _decimal_series(
-                [None, "20.2", "30.3"], dtype=cudf.Decimal64Dtype(3, 1)
-            ),
-        },
+        [(["1", "2", "3"], cudf.Decimal64Dtype(1, 0))],
+        [
+            (["1", "2", "3"], cudf.Decimal64Dtype(1, 0)),
+            (["1.0", "2.0", "3.0"], cudf.Decimal64Dtype(2, 1)),
+            (["10.1", "20.2", "30.3"], cudf.Decimal64Dtype(3, 1)),
+        ],
+        [
+            (["1", None, "3"], cudf.Decimal64Dtype(1, 0)),
+            (["1.0", "2.0", None], cudf.Decimal64Dtype(2, 1)),
+            ([None, "20.2", "30.3"], cudf.Decimal64Dtype(3, 1)),
+        ],
     ],
 )
 def test_serialize_decimal_columns(data):
-    df = cudf.DataFrame(data)
+    df = cudf.DataFrame(
+        {
+            str(i): cudf.Series(
+                [Decimal(x) if x is not None else x for x in values],
+                dtype=dtype,
+            )
+            for i, (values, dtype) in enumerate(data)
+        }
+    )
     recreated = df.__class__.deserialize(*df.serialize())
     assert_eq(recreated, df)
 

From 1e195f35a1f84b98a0b6e041b70a5fac4b553b8d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 23 Jul 2025 13:27:11 -0700
Subject: [PATCH 007/366] Use more pytest fixtures and avoid GPU
 parameterization in cuDF classic tests (#19436)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19436
---
 python/cudf/cudf/tests/test_no_cuinit.py     | 82 +++++++-------------
 python/cudf/cudf/tests/test_no_device.py     | 23 +++---
 python/cudf/cudf/tests/test_numpy_interop.py | 17 ++--
 python/cudf/cudf/tests/test_pickling.py      | 35 +++------
 python/cudf/cudf/tests/test_quantiles.py     | 13 +---
 5 files changed, 62 insertions(+), 108 deletions(-)

diff --git a/python/cudf/cudf/tests/test_no_cuinit.py b/python/cudf/cudf/tests/test_no_cuinit.py
index 593d280f960..ed9d6a2a901 100644
--- a/python/cudf/cudf/tests/test_no_cuinit.py
+++ b/python/cudf/cudf/tests/test_no_cuinit.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
-import os
 import subprocess
 import sys
 from shutil import which
@@ -40,7 +39,16 @@ def cuda_gdb(request):
         return gdb
 
 
-def test_cudf_import_no_cuinit(cuda_gdb):
+@pytest.mark.parametrize(
+    "cudf_call, should_be_initialized",
+    [
+        ("import cudf", False),
+        ("import cudf; cudf.Series([1])", True),
+    ],
+)
+def test_rapids_no_initialize_cuinit(
+    cuda_gdb, monkeypatch, cudf_call, should_be_initialized
+):
     # When RAPIDS_NO_INITIALIZE is set, importing cudf should _not_
     # create a CUDA context (i.e. cuInit should not be called).
     # Intercepting the call to cuInit programmatically is tricky since
@@ -50,62 +58,28 @@ def test_cudf_import_no_cuinit(cuda_gdb):
     # needs provide hooks that override dlsym, cuGetProcAddress, and
     # cuInit.
     # Instead, we just run under GDB and see if we hit a breakpoint
-    env = os.environ.copy()
-    env["RAPIDS_NO_INITIALIZE"] = "1"
-    output = subprocess.run(
-        [
-            cuda_gdb,
-            "-x",
-            "-",
-            "--args",
-            sys.executable,
-            "-c",
-            "import cudf",
-        ],
-        input=GDB_COMMANDS,
-        env=env,
-        capture_output=True,
-        text=True,
-        cwd="/",
-    )
-
-    cuInit_called = output.stdout.find("in cuInit ()")
-    print("Command output:\n")  # noqa: T201
-    print("*** STDOUT ***")  # noqa: T201
-    print(output.stdout)  # noqa: T201
-    print("*** STDERR ***")  # noqa: T201
-    print(output.stderr)  # noqa: T201
-    assert output.returncode == 0
-    assert cuInit_called < 0
-
-
-def test_cudf_create_series_cuinit(cuda_gdb):
-    # This tests that our gdb scripting correctly identifies cuInit
-    # when it definitely should have been called.
-    env = os.environ.copy()
-    env["RAPIDS_NO_INITIALIZE"] = "1"
-    output = subprocess.run(
-        [
-            cuda_gdb,
-            "-x",
-            "-",
-            "--args",
-            sys.executable,
-            "-c",
-            "import cudf; cudf.Series([1])",
-        ],
-        input=GDB_COMMANDS,
-        env=env,
-        capture_output=True,
-        text=True,
-        cwd="/",
-    )
+    with monkeypatch.context() as m:
+        m.setenv("RAPIDS_NO_INITIALIZE", "1")
+        output = subprocess.run(
+            [
+                cuda_gdb,
+                "-x",
+                "-",
+                "--args",
+                sys.executable,
+                "-c",
+                cudf_call,
+            ],
+            input=GDB_COMMANDS,
+            capture_output=True,
+            text=True,
+            cwd="/",
+        )
 
-    cuInit_called = output.stdout.find("in cuInit ()")
     print("Command output:\n")  # noqa: T201
     print("*** STDOUT ***")  # noqa: T201
     print(output.stdout)  # noqa: T201
     print("*** STDERR ***")  # noqa: T201
     print(output.stderr)  # noqa: T201
     assert output.returncode == 0
-    assert cuInit_called >= 0
+    assert ("in cuInit ()" in output.stdout) == should_be_initialized
diff --git a/python/cudf/cudf/tests/test_no_device.py b/python/cudf/cudf/tests/test_no_device.py
index 722762b2d0c..f663297eeb6 100644
--- a/python/cudf/cudf/tests/test_no_device.py
+++ b/python/cudf/cudf/tests/test_no_device.py
@@ -1,16 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-import os
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 import subprocess
+import sys
 
 
-def test_cudf_import_no_device():
-    env = os.environ.copy()
-    env["CUDA_VISIBLE_DEVICES"] = "-1"
-    output = subprocess.run(
-        ["python", "-c", "import cudf"],
-        env=env,
-        capture_output=True,
-        text=True,
-        cwd="/",
-    )
-    assert output.returncode == 0
+def test_cudf_import_no_device(monkeypatch):
+    with monkeypatch.context() as m:
+        m.setenv("CUDA_VISIBLE_DEVICES", "-1")
+        output = subprocess.check_call(
+            [sys.executable, "-c", "import cudf"],
+            cwd="/",
+        )
+    assert output == 0
diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py
index fa664d52ecf..0bdb806732b 100644
--- a/python/cudf/cudf/tests/test_numpy_interop.py
+++ b/python/cudf/cudf/tests/test_numpy_interop.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import numpy as np
 import pytest
@@ -78,18 +78,19 @@ def test_numpy_non_contiguious():
 @pytest.mark.parametrize(
     "data",
     [
-        Series([1, 2, 3, -12, 12, 44]),
-        Series([1, 2, 3, -12, 12, 44], dtype="str"),
-        Series([1, 2, 3, -12, 12, 44]).index,
-        DataFrame({"a": [1, 2, 3, -1234], "b": [0.1, 0.2222, 0.4, -3.14]}),
-        DataFrame(
+        lambda: Series([1, 2, 3, -12, 12, 44]),
+        lambda: Series([1, 2, 3, -12, 12, 44], dtype="str"),
+        lambda: DataFrame(
             {"a": [1, 2, 3, -1234], "b": [0.1, 0.2222, 0.4, -3.14]}
-        ).index,
+        ),
     ],
 )
 @pytest.mark.parametrize("dtype", [None, "float", "int", "str"])
 def test_series_dataframe__array__(data, dtype):
-    gs = data
+    gs = data()
 
     with pytest.raises(TypeError):
         gs.__array__(dtype=dtype)
+
+    with pytest.raises(TypeError):
+        gs.index.__array__(dtype=dtype)
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 2f10a5dfd74..ac13056fa7c 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 import pickle
 
@@ -13,7 +13,16 @@
 pytestmark = pytest.mark.spilling
 
 
-def check_serialization(df):
+@pytest.mark.parametrize(
+    "keys",
+    [
+        np.arange(5, dtype=np.float64),
+        pd.Categorical(["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"]),
+    ],
+)
+def test_pickle_dataframe(keys):
+    rng = np.random.default_rng(seed=0)
+    df = DataFrame({"keys": keys, "vals": rng.random(len(keys))})
     # basic
     assert_frame_picklable(df)
     # sliced
@@ -39,28 +48,6 @@ def assert_frame_picklable(df):
     assert_eq(loaded, df)
 
 
-def test_pickle_dataframe_numeric():
-    rng = np.random.default_rng(seed=0)
-    df = DataFrame()
-    nelem = 10
-    df["keys"] = np.arange(nelem, dtype=np.float64)
-    df["vals"] = rng.random(nelem)
-
-    check_serialization(df)
-
-
-def test_pickle_dataframe_categorical():
-    rng = np.random.default_rng(seed=0)
-
-    df = DataFrame()
-    df["keys"] = pd.Categorical(
-        ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"]
-    )
-    df["vals"] = rng.random(len(df))
-
-    check_serialization(df)
-
-
 def test_memory_usage_dataframe():
     rng = np.random.default_rng(seed=0)
     df = DataFrame()
diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py
index 84de2ac38e7..8b2f5acb3e1 100644
--- a/python/cudf/cudf/tests/test_quantiles.py
+++ b/python/cudf/cudf/tests/test_quantiles.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import re
 
@@ -93,14 +93,9 @@ def test_quantile_type_int_float(interpolation):
     assert type(expected) is type(actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [float("nan"), float("nan"), 0.9],
-        [float("nan"), float("nan"), float("nan")],
-    ],
-)
-def test_ignore_nans(data):
+@pytest.mark.parametrize("val", [0.9, float("nan")])
+def test_ignore_nans(val):
+    data = [float("nan"), float("nan"), val]
     psr = pd.Series(data)
     gsr = cudf.Series(data, nan_as_null=False)
 

From 1ba88355499e6c811180503ba1df74d9ffab9d8b Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 23 Jul 2025 18:17:43 -0400
Subject: [PATCH 008/366] Fix missing return in StringFunction.Strptime
 strict=True path (#19464)

When `strict=True` and all strings are valid timestamps, we incorrectly hit the fall through case (ie. Strptime function is not implemented).

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19464
---
 .../cudf_polars/dsl/expressions/string.py     | 24 +++++++------
 .../tests/expressions/test_stringfunction.py  | 34 +++++++++----------
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
index 0b65b5fbd8a..1e70e855cd5 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
@@ -692,13 +692,14 @@ def do_evaluate(
             )
         elif self.name is StringFunction.Name.Strptime:
             # TODO: ignores ambiguous
-            format, strict, exact, cache = self.options
+            format, strict, _, _ = self.options
             col = self.children[0].evaluate(df, context=context)
 
             is_timestamps = plc.strings.convert.convert_datetime.is_timestamp(
                 col.obj, format
             )
 
+            plc_col = col.obj
             if strict:
                 if not plc.reduce.reduce(
                     is_timestamps,
@@ -710,16 +711,17 @@ def do_evaluate(
                 not_timestamps = plc.unary.unary_operation(
                     is_timestamps, plc.unary.UnaryOperator.NOT
                 )
-                null = plc.Scalar.from_py(None, col.obj.type())
-                res = plc.copying.boolean_mask_scatter(
-                    [null], plc.Table([col.obj]), not_timestamps
-                )
-                return Column(
-                    plc.strings.convert.convert_datetime.to_timestamps(
-                        res.columns()[0], self.dtype.plc, format
-                    ),
-                    dtype=self.dtype,
-                )
+                null = plc.Scalar.from_py(None, plc_col.type())
+                plc_col = plc.copying.boolean_mask_scatter(
+                    [null], plc.Table([plc_col]), not_timestamps
+                ).columns()[0]
+
+            return Column(
+                plc.strings.convert.convert_datetime.to_timestamps(
+                    plc_col, self.dtype.plc, format
+                ),
+                dtype=self.dtype,
+            )
         elif self.name is StringFunction.Name.Replace:
             column, target, repl = columns
             n, _ = self.options
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index e9b96b2df62..6515e487e7e 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -256,32 +256,32 @@ def test_split_exact_inclusive_unsupported(ldf_split):
     assert_ir_translation_raises(q, NotImplementedError)
 
 
-@pytest.fixture
-def to_datetime_data():
-    return pl.LazyFrame(
-        {
-            "a": [
-                "2021-01-01",
-                "2021-01-02",
-                "abcd",
-            ]
-        }
-    )
-
-
 @pytest.mark.parametrize("cache", [True, False], ids=lambda cache: f"{cache=}")
 @pytest.mark.parametrize("strict", [True, False], ids=lambda strict: f"{strict=}")
 @pytest.mark.parametrize("exact", [True, False], ids=lambda exact: f"{exact=}")
 @pytest.mark.parametrize("format", ["%Y-%m-%d", None], ids=lambda format: f"{format=}")
-def test_to_datetime(to_datetime_data, cache, strict, format, exact):
-    q = to_datetime_data.select(
+@pytest.mark.parametrize(
+    "values, has_invalid_row",
+    [
+        (["2024-01-01", "2023-12-31", None], False),
+        (["2024-01-01", "foo", None], True),
+    ],
+    ids=["valid", "invalid"],
+)
+def test_to_datetime(values, has_invalid_row, cache, strict, format, exact):
+    df = pl.DataFrame({"a": values})
+    q = df.lazy().select(
         pl.col("a").str.strptime(
-            pl.Datetime("ns"), format=format, cache=cache, strict=strict, exact=exact
+            pl.Datetime("ns"),
+            format=format,
+            cache=cache,
+            strict=strict,
+            exact=exact,
         )
     )
     if cache or format is None or not exact:
         assert_ir_translation_raises(q, NotImplementedError)
-    elif strict:
+    elif strict and has_invalid_row:
         assert_collect_raises(
             q,
             polars_except=pl.exceptions.InvalidOperationError,

From 652edce46f8c516df59922002d6f1c7d7dd69f83 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 24 Jul 2025 10:17:32 -0500
Subject: [PATCH 009/366] Allow latest OS in devcontainers (#19480)

This PR removes the OS suffix from devcontainers, allowing the upstream devcontainer images to determine the OS version.

Contributes to https://github.com/rapidsai/build-planning/issues/200.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Gil Forsyth (https://github.com/gforsyth)

URL: https://github.com/rapidsai/cudf/pull/19480
---
 .devcontainer/cuda12.9-conda/devcontainer.json | 2 +-
 .devcontainer/cuda12.9-pip/devcontainer.json   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json
index 5c010923260..1ed542f11f3 100644
--- a/.devcontainer/cuda12.9-conda/devcontainer.json
+++ b/.devcontainer/cuda12.9-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge"
     }
   },
   "runArgs": [
diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json
index 666e6b872f6..3b35d4398c5 100644
--- a/.devcontainer/cuda12.9-pip/devcontainer.json
+++ b/.devcontainer/cuda12.9-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9"
     }
   },
   "runArgs": [

From a9acbd46b22e36ae28dd6cd86b8ffb7847301aeb Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 24 Jul 2025 10:05:20 -0700
Subject: [PATCH 010/366] Use more pytest fixtures and avoid GPU
 parameterization in test_indexing/joining/monotonic/multiindex.py (#19437)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Adds some `copy()`s in order not to mutate pytest fixtures

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19437
---
 python/cudf/cudf/tests/test_indexing.py   |  93 +++----
 python/cudf/cudf/tests/test_joining.py    | 111 ++++----
 python/cudf/cudf/tests/test_monotonic.py  |  33 ++-
 python/cudf/cudf/tests/test_multiindex.py | 302 +++++++++++-----------
 4 files changed, 255 insertions(+), 284 deletions(-)

diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 281f1d014a1..6d373f56b14 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -819,19 +819,19 @@ def test_empty_boolean_mask(dtype):
     ],
 )
 @pytest.mark.parametrize(
-    "mask",
+    "mask_vals",
     [
         [True, True, True, True],
         [False, False, False, False],
         [True, False, True, False],
         [True, False, False, True],
-        np.array([True, False, True, False]),
-        pd.Series([True, False, True, False]),
-        cudf.Series([True, False, True, False]),
     ],
 )
+@pytest.mark.parametrize(
+    "mask_class", [list, np.array, pd.Series, cudf.Series]
+)
 @pytest.mark.parametrize("nulls", ["one", "some", "all", "none"])
-def test_series_apply_boolean_mask(data, mask, nulls):
+def test_series_apply_boolean_mask(data, mask_vals, mask_class, nulls):
     rng = np.random.default_rng(seed=0)
     psr = pd.Series(data)
 
@@ -853,6 +853,7 @@ def test_series_apply_boolean_mask(data, mask, nulls):
     if psr.dtype == "object" and nulls == "all":
         gsr = cudf.Series([None, None, None, None], dtype="object")
 
+    mask = mask_class(mask_vals)
     if isinstance(mask, cudf.Series):
         expect = psr[mask.to_pandas()]
     else:
@@ -874,11 +875,6 @@ def test_dataframe_apply_boolean_mask():
     assert_eq(pdf[[True, False, True, False]], gdf[[True, False, True, False]])
 
 
-"""
-This test compares cudf and Pandas DataFrame boolean indexing.
-"""
-
-
 @pytest.mark.parametrize(
     "mask_fn", [lambda x: x, lambda x: np.array(x), lambda x: pd.Series(x)]
 )
@@ -1146,6 +1142,8 @@ def test_series_setitem_loc_numeric_index(key, value):
 )
 def test_dataframe_setitem_iloc(key, value, pdf_gdf):
     pdf, gdf = pdf_gdf
+    pdf = pdf.copy()
+    gdf = gdf.copy()
     pdf.iloc[key] = value
     gdf.iloc[key] = value
     assert_eq(pdf, gdf)
@@ -1172,6 +1170,8 @@ def test_dataframe_setitem_iloc(key, value, pdf_gdf):
 )
 def test_dataframe_setitem_loc(key, value, pdf_gdf):
     pdf, gdf = pdf_gdf
+    pdf = pdf.copy()
+    gdf = gdf.copy()
     pdf.loc[key] = value
     gdf.loc[key] = value
     assert_eq(pdf, gdf)
@@ -1202,6 +1202,8 @@ def test_dataframe_setitem_loc_empty_df(key, value):
 )
 def test_dataframe_setitem_iloc_multiindex(key, value, pdf_gdf_multi):
     pdf, gdf = pdf_gdf_multi
+    pdf = pdf.copy()
+    gdf = gdf.copy()
 
     pdf.iloc[key] = value
     gdf.iloc[key] = value
@@ -1386,8 +1388,8 @@ def test_dataframe_sliced(gdf_kwargs, slice):
 @pytest.mark.parametrize(
     "gdf",
     [
-        cudf.DataFrame({"a": range(10000)}),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame({"a": range(10000)}),
+        lambda: cudf.DataFrame(
             {
                 "a": range(10000),
                 "b": range(10000),
@@ -1397,21 +1399,23 @@ def test_dataframe_sliced(gdf_kwargs, slice):
                 "f": range(10000),
             }
         ),
-        cudf.DataFrame({"a": range(20), "b": range(20)}),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame({"a": range(20), "b": range(20)}),
+        lambda: cudf.DataFrame(
             {
                 "a": range(20),
                 "b": range(20),
                 "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
             }
         ),
-        cudf.DataFrame(index=[1, 2, 3]),
-        cudf.DataFrame(index=range(10000)),
-        cudf.DataFrame(columns=["a", "b", "c", "d"]),
-        cudf.DataFrame(columns=["a"], index=range(10000)),
-        cudf.DataFrame(columns=["a", "col2", "...col n"], index=range(10000)),
-        cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(index=[1, 2, 3]),
+        lambda: cudf.DataFrame(index=range(10000)),
+        lambda: cudf.DataFrame(columns=["a", "b", "c", "d"]),
+        lambda: cudf.DataFrame(columns=["a"], index=range(10000)),
+        lambda: cudf.DataFrame(
+            columns=["a", "col2", "...col n"], index=range(10000)
+        ),
+        lambda: cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
+        lambda: cudf.DataFrame(
             columns=["a", "b", "c", "d"],
             index=cudf.Series(range(10000)).astype("str"),
         ),
@@ -1422,6 +1426,7 @@ def test_dataframe_sliced(gdf_kwargs, slice):
     [slice(6), slice(1), slice(7), slice(1, 3)],
 )
 def test_dataframe_iloc_index(gdf, slice):
+    gdf = gdf()
     pdf = gdf.to_pandas()
 
     actual = gdf.iloc[:, slice]
@@ -1595,26 +1600,11 @@ def test_dataframe_iloc_inplace_update(key, value):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "loc_key",
-    [([0, 2], ["x", "y"])],
-)
-@pytest.mark.parametrize(
-    "iloc_key",
-    [[0, 2]],
-)
-@pytest.mark.parametrize(
-    ("data, index"),
-    [
-        (
-            {"x": [10, 20], "y": [30, 40]},
-            [0, 2],
-        )
-    ],
-)
-def test_dataframe_loc_iloc_inplace_update_with_RHS_dataframe(
-    loc_key, iloc_key, data, index
-):
+def test_dataframe_loc_iloc_inplace_update_with_RHS_dataframe():
+    loc_key = ([0, 2], ["x", "y"])
+    iloc_key = [0, 2]
+    data = {"x": [10, 20], "y": [30, 40]}
+    index = [0, 2]
     gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
     pdf = gdf.to_pandas()
 
@@ -1691,16 +1681,18 @@ def test_dataframe_iloc_inplace_update_shape_mismatch_RHS_df():
 
 
 @pytest.mark.parametrize(
-    "array,is_error",
+    "end, second_dim, is_error",
     [
-        (cupy.arange(20, 40).reshape(-1, 2), False),
-        (cupy.arange(20, 50).reshape(-1, 3), True),
-        (np.arange(20, 40).reshape(-1, 2), False),
-        (np.arange(20, 30).reshape(-1, 1), False),
-        (cupy.arange(20, 30).reshape(-1, 1), False),
+        (40, 2, False),
+        (50, 3, True),
+        (30, 1, False),
     ],
 )
-def test_dataframe_indexing_setitem_np_cp_array(array, is_error):
+@pytest.mark.parametrize("mod", [cupy, np])
+def test_dataframe_indexing_setitem_np_cp_array(
+    end, second_dim, is_error, mod
+):
+    array = mod.arange(20, end).reshape(-1, second_dim)
     gdf = cudf.DataFrame({"a": range(10), "b": range(10)})
     pdf = gdf.to_pandas()
     if not is_error:
@@ -2311,12 +2303,11 @@ def test_loc_datetime_monotonic_with_ts(data, scalar):
     assert_eq(actual, expected)
 
 
-@pytest.mark.parametrize("data", [[15, 14, 3, 10, 1]])
 @pytest.mark.parametrize("scalar", [1, 10, 15, 14, 0, 2])
-def test_loc_datetime_random_with_ts(data, scalar):
+def test_loc_datetime_random_with_ts(scalar):
     gdf = cudf.DataFrame(
         {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
-        index=cudf.Index(data, dtype="datetime64[ns]"),
+        index=cudf.Index([15, 14, 3, 10, 1], dtype="datetime64[ns]"),
     )
     pdf = gdf.to_pandas()
 
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index d893974e610..bb24111cfc3 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -19,45 +19,35 @@
 )
 from cudf.utils.dtypes import find_common_type
 
-_JOIN_TYPES = (
-    "left",
-    "inner",
-    "outer",
-    "right",
-    "leftanti",
-    "leftsemi",
-    "cross",
-)
-
 
-def make_params():
-    rng = np.random.default_rng(seed=0)
-
-    hows = _JOIN_TYPES
+@pytest.fixture(
+    params=(
+        "left",
+        "inner",
+        "outer",
+        "right",
+        "leftanti",
+        "leftsemi",
+        "cross",
+    )
+)
+def how(request):
+    return request.param
 
-    # Test specific cases (1)
-    aa = [0, 0, 4, 5, 5]
-    bb = [0, 0, 2, 3, 5]
-    for how in hows:
-        yield (aa, bb, how)
 
-    # Test specific cases (2)
-    aa = [0, 0, 1, 2, 3]
-    bb = [0, 1, 2, 2, 3]
-    for how in hows:
-        yield (aa, bb, how)
+rng = np.random.default_rng(seed=0)
 
-    # Test large random integer inputs
-    aa = rng.integers(0, 50, 100)
-    bb = rng.integers(0, 50, 100)
-    for how in hows:
-        yield (aa, bb, how)
 
-    # Test floating point inputs
-    aa = rng.random(50)
-    bb = rng.random(50)
-    for how in hows:
-        yield (aa, bb, how)
+@pytest.fixture(
+    params=[
+        [[0, 0, 4, 5, 5], [0, 0, 2, 3, 5]],
+        [[0, 0, 1, 2, 3], [0, 1, 2, 2, 3]],
+        [rng.integers(0, 50, 100), rng.integers(0, 50, 100)],
+        [rng.random(50), rng.random(50)],
+    ]
+)
+def aa_bb(request):
+    return request.param
 
 
 def pd_odd_joins(left, right, join_type):
@@ -68,8 +58,6 @@ def pd_odd_joins(left, right, join_type):
 
 
 def assert_join_results_equal(expect, got, how, **kwargs):
-    if how not in _JOIN_TYPES:
-        raise ValueError(f"Unrecognized join type {how}")
     if how == "right":
         got = got[expect.columns]
 
@@ -98,11 +86,14 @@ def assert_join_results_equal(expect, got, how, **kwargs):
         raise ValueError(f"Not a join result: {type(expect).__name__}")
 
 
-@pytest.mark.parametrize("aa,bb,how", make_params())
-def test_dataframe_join_how(aa, bb, how):
-    df = cudf.DataFrame()
-    df["a"] = aa
-    df["b"] = bb
+def test_dataframe_join_how(aa_bb, how):
+    aa, bb = aa_bb
+    df = cudf.DataFrame(
+        {
+            "a": aa,
+            "b": bb,
+        }
+    )
 
     def work_pandas(df, how):
         df1 = df.set_index("a")
@@ -149,21 +140,19 @@ def work_gdf(df):
         #     _sorted_check_series(expect['b'], expect['a'], got['b'],
         #                          got['a'])
         else:
+            magic = 0xDEADBEAF
             for c in expecto.columns:
-                _check_series(expecto[c].fillna(-1), goto[c].fillna(-1))
+                expect = expecto[c].fillna(-1)
+                got = goto[c].fillna(-1)
 
-
-def _check_series(expect, got):
-    magic = 0xDEADBEAF
-
-    direct_equal = np.all(expect.values == got.to_numpy())
-    nanfilled_equal = np.all(
-        expect.fillna(magic).values == got.fillna(magic).to_numpy()
-    )
-    msg = "direct_equal={}, nanfilled_equal={}".format(
-        direct_equal, nanfilled_equal
-    )
-    assert direct_equal or nanfilled_equal, msg
+                direct_equal = np.all(expect.values == got.to_numpy())
+                nanfilled_equal = np.all(
+                    expect.fillna(magic).values == got.fillna(magic).to_numpy()
+                )
+                msg = "direct_equal={}, nanfilled_equal={}".format(
+                    direct_equal, nanfilled_equal
+                )
+                assert direct_equal or nanfilled_equal, msg
 
 
 @pytest.mark.skipif(
@@ -610,11 +599,9 @@ def test_indicator():
     gdf = cudf.DataFrame({"x": [1, 2, 1]})
     gdf.merge(gdf, indicator=False)
 
-    with pytest.raises(NotImplementedError) as info:
+    with pytest.raises(NotImplementedError, match=".*indicator=False.*"):
         gdf.merge(gdf, indicator=True)
 
-    assert "indicator=False" in str(info.value)
-
 
 def test_merge_suffixes():
     pdf = cudf.DataFrame({"x": [1, 2, 1]})
@@ -1787,15 +1774,15 @@ def test_typecast_on_join_indexes_matching_categorical():
 @pytest.mark.parametrize(
     "lhs",
     [
-        cudf.Series([1, 2, 3], name="a"),
-        cudf.DataFrame({"a": [2, 3, 4], "c": [4, 5, 6]}),
+        lambda: cudf.Series([1, 2, 3], name="a"),
+        lambda: cudf.DataFrame({"a": [2, 3, 4], "c": [4, 5, 6]}),
     ],
 )
 @pytest.mark.parametrize(
     "rhs",
     [
-        cudf.Series([1, 2, 3], name="b"),
-        cudf.DataFrame({"b": [2, 3, 4], "c": [4, 5, 6]}),
+        lambda: cudf.Series([1, 2, 3], name="b"),
+        lambda: cudf.DataFrame({"b": [2, 3, 4], "c": [4, 5, 6]}),
     ],
 )
 @pytest.mark.parametrize(
@@ -1816,6 +1803,8 @@ def test_series_dataframe_mixed_merging(lhs, rhs, how, kwargs):
     ):
         pytest.skip("Index joins not compatible with leftsemi and leftanti")
 
+    lhs = lhs()
+    rhs = rhs()
     check_lhs = lhs.copy()
     check_rhs = rhs.copy()
     if isinstance(lhs, cudf.Series):
diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py
index a34c89f55d3..ae5f1e1c90c 100644
--- a/python/cudf/cudf/tests/test_monotonic.py
+++ b/python/cudf/cudf/tests/test_monotonic.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 """
 Tests related to is_unique, is_monotonic_increasing &
@@ -12,7 +12,6 @@
 import cudf
 from cudf import Index, MultiIndex, Series
 from cudf.core.index import CategoricalIndex, DatetimeIndex, RangeIndex
-from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("testrange", [(10, 20, 1), (0, -10, -1), (5, 5, 1)])
@@ -299,22 +298,20 @@ def test_get_slice_bound_missing_str(label, side):
     assert got == expect
 
 
-testdata = [
-    (
-        Series(["2018-01-01", "2019-01-31", None], dtype="datetime64[ms]"),
-        False,
-    ),
-    (Series([1, 2, 3, None]), False),
-    (Series([None, 1, 2, 3]), False),
-    (Series(["a", "b", "c", None]), False),
-    (Series([None, "a", "b", "c"]), False),
-]
-
-
-@pytest.mark.parametrize("data, expected", testdata)
-def test_is_monotonic_always_falls_for_null(data, expected):
-    assert_eq(expected, data.is_monotonic_increasing)
-    assert_eq(expected, data.is_monotonic_decreasing)
+@pytest.mark.parametrize(
+    "data",
+    [
+        [pd.Timestamp("2018-01-01"), pd.Timestamp("2019-01-31"), None],
+        [1, 2, 3, None],
+        [None, 1, 2, 3],
+        ["a", "b", "c", None],
+        [None, "a", "b", "c"],
+    ],
+)
+def test_is_monotonic_always_falls_for_null(data):
+    ser = Series(data)
+    assert ser.is_monotonic_increasing is False
+    assert ser.is_monotonic_decreasing is False
 
 
 @pytest.mark.parametrize("box", [Series, Index])
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index 39315b6198e..4c1f5259c61 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -10,6 +10,7 @@
 import pickle
 import re
 from contextlib import contextmanager
+from functools import reduce
 from io import BytesIO
 
 import cupy as cp
@@ -249,12 +250,14 @@ def pdfIndexNulls():
 
 
 def test_from_pandas(pdf, pdfIndex):
+    pdf = pdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf = cudf.from_pandas(pdf)
     assert_eq(pdf, gdf)
 
 
 def test_multiindex_transpose(pdf, pdfIndex):
+    pdf = pdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf = cudf.from_pandas(pdf)
     assert_eq(pdf.transpose(), gdf.transpose())
@@ -284,6 +287,8 @@ def test_series_multiindex(pdfIndex):
 
 def test_multiindex_take(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(pdf.index.take([0]), gdf.index.take([0]))
@@ -302,6 +307,8 @@ def test_multiindex_take(pdf, gdf, pdfIndex):
 
 def test_multiindex_getitem(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(pdf.index[0], gdf.index[0])
@@ -336,6 +343,8 @@ def test_multiindex_getitem(pdf, gdf, pdfIndex):
 def test_multiindex_loc(pdf, gdf, pdfIndex, key_tuple):
     gdfIndex = cudf.from_pandas(pdfIndex)
     assert_eq(pdfIndex, gdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     # The index is unsorted, which makes things slow but is fine for testing.
@@ -349,14 +358,9 @@ def test_multiindex_loc(pdf, gdf, pdfIndex, key_tuple):
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "indexer",
-    [
-        (([1, 1], [0, 1]), slice(None)),
-        (([1, 1], [1, 0]), slice(None)),
-    ],
-)
-def test_multiindex_compatible_ordering(indexer):
+@pytest.mark.parametrize("second_val", [[0, 1], [1, 0]])
+def test_multiindex_compatible_ordering(second_val):
+    indexer = (([1, 1], second_val), slice(None))
     df = pd.DataFrame(
         {"a": [1, 1, 2, 3], "b": [1, 0, 1, 1], "c": [1, 2, 3, 4]}
     ).set_index(["a", "b"])
@@ -376,15 +380,16 @@ def test_multiindex_compatible_ordering(indexer):
         slice(None),
     ],
 )
-def test_multiindex_loc_slice(pdf, gdf, pdfIndex, arg):
+def test_multiindex_loc_slice(pdf, pdfIndex, arg):
     gdf = cudf.from_pandas(pdf)
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(pdf.loc[arg], gdf.loc[arg])
 
 
-def test_multiindex_loc_errors(pdf, gdf, pdfIndex):
+def test_multiindex_loc_errors(pdf, pdfIndex):
     gdf = cudf.from_pandas(pdf)
     gdfIndex = cudf.from_pandas(pdfIndex)
     gdf.index = gdfIndex
@@ -402,6 +407,8 @@ def test_multiindex_loc_errors(pdf, gdf, pdfIndex):
 def test_multiindex_loc_then_column(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
     assert_eq(pdfIndex, gdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     # The index is unsorted, which makes things slow but is fine for testing.
@@ -413,6 +420,8 @@ def test_multiindex_loc_then_column(pdf, gdf, pdfIndex):
 
 def test_multiindex_loc_rows_0(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
 
@@ -426,6 +435,8 @@ def test_multiindex_loc_rows_0(pdf, gdf, pdfIndex):
 
 def test_multiindex_loc_rows_1_2_key(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(pdf.loc[("c", "forest"), :], gdf.loc[("c", "forest"), :])
@@ -433,6 +444,8 @@ def test_multiindex_loc_rows_1_2_key(pdf, gdf, pdfIndex):
 
 def test_multiindex_loc_rows_1_1_key(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(pdf.loc[("c",), :], gdf.loc[("c",), :])
@@ -467,7 +480,8 @@ def test_multiindex_column_shape():
         ("c", "forest", "clear"),
     ],
 )
-def test_multiindex_columns(pdf, gdf, pdfIndex, query):
+def test_multiindex_columns(pdf, pdfIndex, query):
+    pdf = pdf.copy(deep=False)
     pdf = pdf.T
     gdf = cudf.from_pandas(pdf)
     gdfIndex = cudf.from_pandas(pdfIndex)
@@ -490,8 +504,6 @@ def test_multiindex_from_tuples():
 
 
 def test_multiindex_from_dataframe():
-    if not hasattr(pd.MultiIndex([[]], [[]]), "codes"):
-        pytest.skip()
     pdf = pd.DataFrame(
         [["a", "house"], ["a", "store"], ["b", "house"], ["b", "store"]]
     )
@@ -690,49 +702,44 @@ def test_multiindex_equals():
     assert_eq(mi1.equals(mi2), False)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "Date": [
-                "2020-08-27",
-                "2020-08-28",
-                "2020-08-31",
-                "2020-08-27",
-                "2020-08-28",
-                "2020-08-31",
-                "2020-08-27",
-                "2020-08-28",
-                "2020-08-31",
-            ],
-            "Close": [
-                3400.00,
-                3401.80,
-                3450.96,
-                226.58,
-                228.91,
-                225.53,
-                505.13,
-                525.91,
-                534.98,
-            ],
-            "Symbol": [
-                "AMZN",
-                "AMZN",
-                "AMZN",
-                "MSFT",
-                "MSFT",
-                "MSFT",
-                "NVDA",
-                "NVDA",
-                "NVDA",
-            ],
-        }
-    ],
-)
-@pytest.mark.parametrize("names", [["X", "Y"]])
-def test_multiindex_copy_sem(data, names):
+def test_multiindex_copy_sem():
     """Test semantic equality for MultiIndex.copy"""
+    names = ["X", "Y"]
+    data = {
+        "Date": [
+            "2020-08-27",
+            "2020-08-28",
+            "2020-08-31",
+            "2020-08-27",
+            "2020-08-28",
+            "2020-08-31",
+            "2020-08-27",
+            "2020-08-28",
+            "2020-08-31",
+        ],
+        "Close": [
+            3400.00,
+            3401.80,
+            3450.96,
+            226.58,
+            228.91,
+            225.53,
+            505.13,
+            525.91,
+            534.98,
+        ],
+        "Symbol": [
+            "AMZN",
+            "AMZN",
+            "AMZN",
+            "MSFT",
+            "MSFT",
+            "MSFT",
+            "NVDA",
+            "NVDA",
+            "NVDA",
+        ],
+    }
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
@@ -795,7 +802,7 @@ def test_multiindex_copy_sem(data, names):
                 "NVDA",
             ],
         },
-        cudf.MultiIndex(
+        pd.MultiIndex(
             levels=[[1001, 1002], [2001, 2002]],
             codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
             names=["col1", "col2"],
@@ -809,58 +816,57 @@ def test_multiindex_copy_deep(data, copy_on_write, deep):
     Case1: Constructed from GroupBy, StringColumns
     Case2: Constructed from MultiIndex, NumericColumns
     """
-    original_cow_setting = cudf.get_option("copy_on_write")
-    cudf.set_option("copy_on_write", copy_on_write)
-
-    if isinstance(data, dict):
-        import operator
-        from functools import reduce
-
-        gdf = cudf.DataFrame(data)
-        mi1 = gdf.groupby(["Date", "Symbol"]).mean().index
-        mi2 = mi1.copy(deep=deep)
-
-        lchildren = [col.children for col in mi1._columns]
-        rchildren = [col.children for col in mi2._columns]
-
-        # Flatten
-        lchildren = reduce(operator.add, lchildren)
-        rchildren = reduce(operator.add, rchildren)
-
-        lptrs = [child.base_data.get_ptr(mode="read") for child in lchildren]
-        rptrs = [child.base_data.get_ptr(mode="read") for child in rchildren]
-
-        assert all((x == y) for x, y in zip(lptrs, rptrs))
-
-    elif isinstance(data, cudf.MultiIndex):
-        same_ref = (not deep) or (
-            cudf.get_option("copy_on_write") and not deep
-        )
-        mi1 = data
-        mi2 = mi1.copy(deep=deep)
+    with cudf.option_context("copy_on_write", copy_on_write):
+        if isinstance(data, dict):
+            gdf = cudf.DataFrame(data)
+            mi1 = gdf.groupby(["Date", "Symbol"]).mean().index
+            mi2 = mi1.copy(deep=deep)
+
+            lchildren = [col.children for col in mi1._columns]
+            rchildren = [col.children for col in mi2._columns]
+
+            # Flatten
+            lchildren = reduce(operator.add, lchildren)
+            rchildren = reduce(operator.add, rchildren)
+
+            lptrs = [
+                child.base_data.get_ptr(mode="read") for child in lchildren
+            ]
+            rptrs = [
+                child.base_data.get_ptr(mode="read") for child in rchildren
+            ]
+
+            assert all((x == y) for x, y in zip(lptrs, rptrs))
+
+        elif isinstance(data, pd.MultiIndex):
+            data = cudf.MultiIndex.from_pandas(data)
+            same_ref = (not deep) or (
+                cudf.get_option("copy_on_write") and not deep
+            )
+            mi1 = data
+            mi2 = mi1.copy(deep=deep)
 
-        # Assert ._levels identity
-        lptrs = [
-            lv._column.base_data.get_ptr(mode="read") for lv in mi1._levels
-        ]
-        rptrs = [
-            lv._column.base_data.get_ptr(mode="read") for lv in mi2._levels
-        ]
+            # Assert ._levels identity
+            lptrs = [
+                lv._column.base_data.get_ptr(mode="read") for lv in mi1._levels
+            ]
+            rptrs = [
+                lv._column.base_data.get_ptr(mode="read") for lv in mi2._levels
+            ]
 
-        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
-        # Assert ._codes identity
-        lptrs = [c.base_data.get_ptr(mode="read") for c in mi1._codes]
-        rptrs = [c.base_data.get_ptr(mode="read") for c in mi2._codes]
+            # Assert ._codes identity
+            lptrs = [c.base_data.get_ptr(mode="read") for c in mi1._codes]
+            rptrs = [c.base_data.get_ptr(mode="read") for c in mi2._codes]
 
-        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
-        # Assert ._data identity
-        lptrs = [d.base_data.get_ptr(mode="read") for d in mi1._columns]
-        rptrs = [d.base_data.get_ptr(mode="read") for d in mi2._columns]
+            # Assert ._data identity
+            lptrs = [d.base_data.get_ptr(mode="read") for d in mi1._columns]
+            rptrs = [d.base_data.get_ptr(mode="read") for d in mi2._columns]
 
-        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
-    cudf.set_option("copy_on_write", original_cow_setting)
+            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
 
 @pytest.mark.parametrize(
@@ -894,6 +900,8 @@ def test_multiindex_copy_deep(data, copy_on_write, deep):
 def test_multiindex_iloc(pdf, gdf, pdfIndex, iloc_rows, iloc_columns):
     gdfIndex = cudf.from_pandas(pdfIndex)
     assert_eq(pdfIndex, gdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     presult = pdf.iloc[iloc_rows, iloc_columns]
@@ -950,6 +958,8 @@ def test_multiindex_iloc_scalar():
 def test_multicolumn_iloc(pdf, gdf, pdfIndex, iloc_rows, iloc_columns):
     gdfIndex = cudf.from_pandas(pdfIndex)
     assert_eq(pdfIndex, gdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     pdf = pdf.T
@@ -1001,6 +1011,8 @@ def test_multiindex_groupby_to_frame():
 
 def test_multiindex_reset_index(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(pdf.reset_index(), gdf.reset_index())
@@ -1045,6 +1057,8 @@ def test_multiindex_multicolumn_reset_index():
 
 def test_groupby_multiindex_columns_from_pandas(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     assert_eq(gdf, pdf)
@@ -1053,6 +1067,8 @@ def test_groupby_multiindex_columns_from_pandas(pdf, gdf, pdfIndex):
 
 def test_multiindex_rows_with_wildcard(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    gdf = gdf.copy(deep=False)
     pdf.index = pdfIndex
     gdf.index = gdfIndex
     # The index is unsorted, which makes things slow but is fine for testing.
@@ -1101,6 +1117,7 @@ def test_multiindex_multicolumn_zero_row_slice():
 
 
 def test_multicolumn_loc(pdf, pdfIndex):
+    pdf = pdf.copy(deep=False)
     pdf = pdf.T
     pdf.columns = pdfIndex
     gdf = cudf.from_pandas(pdf)
@@ -1114,6 +1131,7 @@ def test_multicolumn_loc(pdf, pdfIndex):
     reason="https://github.com/pandas-dev/pandas/issues/43351",
 )
 def test_multicolumn_set_item(pdf, pdfIndex):
+    pdf = pdf.copy(deep=False)
     pdf = pdf.T
     pdf.columns = pdfIndex
     gdf = cudf.from_pandas(pdf)
@@ -1181,39 +1199,39 @@ def test_multiindex_to_numpy():
     "gdi, fill_value, expected",
     [
         (
-            cudf.MultiIndex(
+            lambda: cudf.MultiIndex(
                 levels=[[1, 3, 4, None], [1, 2, 5]],
                 codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
                 names=["x", "y"],
             ),
             5,
-            cudf.MultiIndex(
+            lambda: cudf.MultiIndex(
                 levels=[[1, 3, 4, 5], [1, 2, 5]],
                 codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
                 names=["x", "y"],
             ),
         ),
         (
-            cudf.MultiIndex(
+            lambda: cudf.MultiIndex(
                 levels=[[1, 3, 4, None], [1, None, 5]],
                 codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
                 names=["x", "y"],
             ),
             100,
-            cudf.MultiIndex(
+            lambda: cudf.MultiIndex(
                 levels=[[1, 3, 4, 100], [1, 100, 5]],
                 codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
                 names=["x", "y"],
             ),
         ),
         (
-            cudf.MultiIndex(
+            lambda: cudf.MultiIndex(
                 levels=[["a", "b", "c", None], ["1", None, "5"]],
                 codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
                 names=["x", "y"],
             ),
             "100",
-            cudf.MultiIndex(
+            lambda: cudf.MultiIndex(
                 levels=[["a", "b", "c", "100"], ["1", "100", "5"]],
                 codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
                 names=["x", "y"],
@@ -1222,7 +1240,7 @@ def test_multiindex_to_numpy():
     ],
 )
 def test_multiindex_fillna(gdi, fill_value, expected):
-    assert_eq(expected, gdi.fillna(fill_value))
+    assert_eq(expected(), gdi().fillna(fill_value))
 
 
 @pytest.mark.parametrize(
@@ -1478,16 +1496,13 @@ def test_multiindex_argsort(pdi, ascending):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "idx", [pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])]
-)
 @pytest.mark.parametrize(
     "names", [[None, None], ["a", None], ["new name", "another name"]]
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_set_names(idx, names, inplace):
-    pi = idx.copy()
-    gi = cudf.from_pandas(idx)
+def test_multiindex_set_names(names, inplace):
+    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
+    gi = cudf.from_pandas(pi)
 
     expected = pi.set_names(names=names, inplace=inplace)
     actual = gi.set_names(names=names, inplace=inplace)
@@ -1498,18 +1513,7 @@ def test_multiindex_set_names(idx, names, inplace):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.MultiIndex.from_product(
-            [["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
-        ),
-        pd.MultiIndex.from_product(
-            [["python", "cobra"], [2018, 2019], ["aab", "bcd"]],
-            names=[1, 0, 2],
-        ),
-    ],
-)
+@pytest.mark.parametrize("idx_names", [[None, None, None], [1, 0, 2]])
 @pytest.mark.parametrize(
     "level, names",
     [
@@ -1523,10 +1527,12 @@ def test_multiindex_set_names(idx, names, inplace):
 )
 @pytest.mark.parametrize("inplace", [True, False])
 def test_multiindex_set_names_default_and_int_names(
-    idx, level, names, inplace
+    idx_names, level, names, inplace
 ):
-    pi = idx.copy()
-    gi = cudf.from_pandas(idx)
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]], names=idx_names
+    )
+    gi = cudf.from_pandas(pi)
 
     expected = pi.set_names(names=names, level=level, inplace=inplace)
     actual = gi.set_names(names=names, level=level, inplace=inplace)
@@ -1537,15 +1543,6 @@ def test_multiindex_set_names_default_and_int_names(
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.MultiIndex.from_product(
-            [["python", "cobra"], [2018, 2019], ["aab", "bcd"]],
-            names=["one", None, "three"],
-        ),
-    ],
-)
 @pytest.mark.parametrize(
     "level, names",
     [
@@ -1560,9 +1557,12 @@ def test_multiindex_set_names_default_and_int_names(
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_set_names_string_names(idx, level, names, inplace):
-    pi = idx.copy()
-    gi = cudf.from_pandas(idx)
+def test_multiindex_set_names_string_names(level, names, inplace):
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]],
+        names=["one", None, "three"],
+    )
+    gi = cudf.from_pandas(pi)
 
     expected = pi.set_names(names=names, level=level, inplace=inplace)
     actual = gi.set_names(names=names, level=level, inplace=inplace)
@@ -1590,15 +1590,7 @@ def test_multiindex_set_names_error(level, names):
     )
 
 
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]]),
-        pd.MultiIndex.from_product(
-            [["python", "cobra"], [2018, 2019]], names=["old name", None]
-        ),
-    ],
-)
+@pytest.mark.parametrize("name", [None, "old name"])
 @pytest.mark.parametrize(
     "names",
     [
@@ -1611,9 +1603,11 @@ def test_multiindex_set_names_error(level, names):
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_rename(idx, names, inplace):
-    pi = idx.copy()
-    gi = cudf.from_pandas(idx)
+def test_multiindex_rename(name, names, inplace):
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019]], names=[name, None]
+    )
+    gi = cudf.from_pandas(pi)
 
     expected = pi.rename(names=names, inplace=inplace)
     actual = gi.rename(names=names, inplace=inplace)

From d171390f5ec640b57404097e2b8d078da12089c6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 24 Jul 2025 13:00:28 -0700
Subject: [PATCH 011/366] Move test_testing.py to new cudf classic test
 directory structure (#19481)

Towards https://github.com/rapidsai/cudf/issues/9999

* Adding more shared fixtures in conftest.py where applicable
* Further simplify tests

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19481
---
 python/cudf/cudf/testing/_utils.py            |  44 ++
 python/cudf/cudf/tests/conftest.py            |  74 +++
 .../tests/general_utilities/test_testing.py   |   1 -
 python/cudf/cudf/tests/test_testing.py        | 438 ------------------
 .../__init__.py                               |   0
 python/cudf/cudf/tests/testing/conftest.py    |  27 ++
 .../tests/testing/test_assert_column_equal.py |  95 ++++
 .../cudf/cudf/tests/testing/test_assert_eq.py |  45 ++
 .../tests/testing/test_assert_frame_equal.py  |  61 +++
 .../tests/testing/test_assert_index_equal.py  |  85 ++++
 .../tests/testing/test_assert_series_equal.py |  81 ++++
 11 files changed, 512 insertions(+), 439 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/general_utilities/test_testing.py
 delete mode 100644 python/cudf/cudf/tests/test_testing.py
 rename python/cudf/cudf/tests/{general_utilities => testing}/__init__.py (100%)
 create mode 100644 python/cudf/cudf/tests/testing/conftest.py
 create mode 100644 python/cudf/cudf/tests/testing/test_assert_column_equal.py
 create mode 100644 python/cudf/cudf/tests/testing/test_assert_eq.py
 create mode 100644 python/cudf/cudf/tests/testing/test_assert_frame_equal.py
 create mode 100644 python/cudf/cudf/tests/testing/test_assert_index_equal.py
 create mode 100644 python/cudf/cudf/tests/testing/test_assert_series_equal.py

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 43a3ae44f06..fa0bf52279e 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -330,6 +330,50 @@ def assert_column_memory_ne(lhs: ColumnBase, rhs: ColumnBase):
     raise AssertionError("lhs and rhs holds the same memory.")
 
 
+def assert_asserters_equal(
+    pandas_asserter,
+    cudf_asserter,
+    pandas_left,
+    pandas_right,
+    cudf_left,
+    cudf_right,
+    *args,
+    **kwargs,
+):
+    """
+    Assert that a pandas and cudf asserter have equivalent behavior.
+
+    Parameters
+    ----------
+    pandas_asserter : callable
+        A pandas asserter function.
+    cudf_asserter : callable
+        A cudf asserter function.
+    pandas_left : object
+        A pandas object as the left argument to the pandas asserter.
+    pandas_right : object
+        A pandas object as the right argument to the pandas asserter.
+    cudf_left : object
+        A cudf object as the left argument to the cudf asserter.
+    cudf_right : object
+        A cudf object as the right argument to the pandas asserter.
+    *args : tuple
+        Additional arguments to pass to both asserters.
+    **kwargs : dict
+        Additional keyword arguments to both asserters.
+    """
+    # TypeError is raised (erroneously from pandas) when comparing
+    # categorical indices with different categories.
+    exceptions = (AssertionError, TypeError)
+    try:
+        pandas_asserter(pandas_left, pandas_right, *args, **kwargs)
+    except exceptions:
+        with pytest.raises(exceptions):
+            cudf_asserter(cudf_left, cudf_right, *args, **kwargs)
+    else:
+        cudf_asserter(cudf_left, cudf_right, *args, **kwargs)
+
+
 parametrize_numeric_dtypes_pairwise = pytest.mark.parametrize(
     "left_dtype,right_dtype",
     list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)),
diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 4b8bd9c4a1c..1d27963a903 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -194,3 +194,77 @@ def set_decomp_env_vars(monkeypatch, request):
         for key, value in env_vars.items():
             m.setenv(key, value)
         yield
+
+
+signed_integer_types = ["int8", "int16", "int32", "int64"]
+unsigned_integer_types = ["uint8", "uint16", "uint32", "uint64"]
+float_types = ["float32", "float64"]
+datetime_types = [
+    "datetime64[ns]",
+    "datetime64[us]",
+    "datetime64[ms]",
+    "datetime64[s]",
+]
+timedelta_types = [
+    "timedelta64[ns]",
+    "timedelta64[us]",
+    "timedelta64[ms]",
+    "timedelta64[s]",
+]
+string_types = ["str"]
+bool_types = ["bool"]
+category_types = ["category"]
+
+
+@pytest.fixture(params=signed_integer_types)
+def signed_integer_types_as_str(request):
+    """
+    - "int8", "int16", "int32", "int64"
+    - "uint8", "uint16", "uint32", "uint64"
+    """
+    return request.param
+
+
+@pytest.fixture(params=signed_integer_types + unsigned_integer_types)
+def integer_types_as_str(request):
+    """
+    - "int8", "int16", "int32", "int64"
+    - "uint8", "uint16", "uint32", "uint64"
+    """
+    return request.param
+
+
+@pytest.fixture(
+    params=signed_integer_types + unsigned_integer_types + float_types
+)
+def numeric_types_as_str(request):
+    """
+    - "int8", "int16", "int32", "int64"
+    - "uint8", "uint16", "uint32", "uint64"
+    - "float32", "float64"
+    """
+    return request.param
+
+
+@pytest.fixture(
+    params=signed_integer_types
+    + unsigned_integer_types
+    + float_types
+    + datetime_types
+    + timedelta_types
+    + string_types
+    + bool_types
+    + category_types
+)
+def all_supported_types_as_str(request):
+    """
+    - "int8", "int16", "int32", "int64"
+    - "uint8", "uint16", "uint32", "uint64"
+    - "float32", "float64"
+    - "datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"
+    - "timedelta64[ns]", "timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"
+    - "str"
+    - "category"
+    - "bool"
+    """
+    return request.param
diff --git a/python/cudf/cudf/tests/general_utilities/test_testing.py b/python/cudf/cudf/tests/general_utilities/test_testing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/general_utilities/test_testing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py
deleted file mode 100644
index dc14051fe0f..00000000000
--- a/python/cudf/cudf/tests/test_testing.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.core.column.column import as_column
-from cudf.testing import (
-    assert_frame_equal,
-    assert_index_equal,
-    assert_series_equal,
-)
-from cudf.testing._utils import (
-    NUMERIC_TYPES,
-    OTHER_TYPES,
-    assert_column_memory_eq,
-    assert_column_memory_ne,
-)
-from cudf.testing.testing import assert_column_equal, assert_eq
-
-
-@pytest.fixture(
-    params=[
-        pa.array([*range(10)]),
-        pa.array(["hello", "world", "rapids", "AI"]),
-        pa.array([[1, 2, 3], [4, 5], [6], [], [7]]),
-        pa.array([{"f0": "hello", "f1": 42}, {"f0": "world", "f1": 3}]),
-    ]
-)
-def arrow_arrays(request):
-    return request.param
-
-
-@pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]])
-@pytest.mark.parametrize("exact", ["equiv", True, False])
-@pytest.mark.parametrize("check_names", [True, False])
-@pytest.mark.parametrize("rname", ["a", "b"])
-@pytest.mark.parametrize("check_categorical", [True, False])
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + OTHER_TYPES + ["datetime64[ns]"]
-)
-def test_basic_assert_index_equal(
-    rdata,
-    exact,
-    check_names,
-    rname,
-    check_categorical,
-    dtype,
-):
-    p_left = pd.Index([1, 2, 3], name="a", dtype=dtype)
-    p_right = pd.Index(rdata, name=rname, dtype=dtype)
-
-    left = cudf.from_pandas(p_left)
-    right = cudf.from_pandas(p_right)
-
-    kind = None
-    try:
-        pd.testing.assert_index_equal(
-            p_left,
-            p_right,
-            exact=exact,
-            check_names=check_names,
-            check_categorical=check_categorical,
-        )
-    except BaseException as e:
-        kind = type(e)
-        msg = str(e)
-
-    if kind is not None:
-        if (kind is TypeError) and (
-            msg
-            == (
-                "Categoricals can only be compared "
-                "if 'categories' are the same."
-            )
-        ):
-            kind = AssertionError
-        with pytest.raises(kind):
-            assert_index_equal(
-                left,
-                right,
-                exact=exact,
-                check_names=check_names,
-                check_categorical=check_categorical,
-            )
-    else:
-        assert_index_equal(
-            left,
-            right,
-            exact=exact,
-            check_names=check_names,
-            check_categorical=check_categorical,
-        )
-
-
-@pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]])
-@pytest.mark.parametrize("check_names", [True, False])
-@pytest.mark.parametrize("rname", ["a", "b"])
-@pytest.mark.parametrize("check_category_order", [True, False])
-@pytest.mark.parametrize("check_categorical", [True, False])
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + OTHER_TYPES + ["datetime64[ns]"]
-)
-def test_basic_assert_series_equal(
-    rdata,
-    rname,
-    check_names,
-    check_category_order,
-    check_categorical,
-    dtype,
-):
-    p_left = pd.Series([1, 2, 3], name="a", dtype=dtype)
-    p_right = pd.Series(rdata, name=rname, dtype=dtype)
-
-    left = cudf.from_pandas(p_left)
-    right = cudf.from_pandas(p_right)
-
-    kind = None
-    try:
-        pd.testing.assert_series_equal(
-            p_left,
-            p_right,
-            check_names=check_names,
-            check_categorical=check_categorical,
-            check_category_order=check_category_order,
-        )
-    except BaseException as e:
-        kind = type(e)
-
-    if kind is not None:
-        with pytest.raises(kind):
-            assert_series_equal(
-                left,
-                right,
-                check_names=check_names,
-                check_categorical=check_categorical,
-                check_category_order=check_category_order,
-            )
-    else:
-        assert_series_equal(
-            left,
-            right,
-            check_names=check_names,
-            check_categorical=check_categorical,
-            check_category_order=check_category_order,
-        )
-
-
-@pytest.mark.parametrize(
-    "other_data",
-    [
-        ["1", "2", "3"],
-        [[1], [2], [3]],
-        [{"a": 1}, {"a": 2}, {"a": 3}],
-    ],
-)
-def test_assert_column_equal_dtype_edge_cases(other_data):
-    # string series should be 100% different
-    # even when the elements are the same
-    base = as_column([1, 2, 3])
-    other = as_column(other_data)
-
-    # for these dtypes, the diff should always be 100% regardless of the values
-    with pytest.raises(
-        AssertionError, match=r".*values are different \(100.0 %\).*"
-    ):
-        assert_column_equal(base, other, check_dtype=False)
-
-    # the exceptions are the empty and all null cases
-    assert_column_equal(base.slice(0, 0), other.slice(0, 0), check_dtype=False)
-    assert_column_equal(other.slice(0, 0), base.slice(0, 0), check_dtype=False)
-
-    base = as_column(cudf.NA, length=len(base), dtype=base.dtype)
-    other = as_column(cudf.NA, length=len(other), dtype=other.dtype)
-
-    assert_column_equal(base, other, check_dtype=False)
-    assert_column_equal(other, base, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "rdtype", [["int8", "int16", "int64"], ["int64", "int16", "int8"]]
-)
-@pytest.mark.parametrize("rname", [["a", "b", "c"], ["b", "c", "a"]])
-@pytest.mark.parametrize("index", [[1, 2, 3], [3, 2, 1]])
-@pytest.mark.parametrize("check_exact", [True, False])
-@pytest.mark.parametrize("check_dtype", [True, False])
-@pytest.mark.parametrize("check_names", [True, False])
-@pytest.mark.parametrize("check_like", [True, False])
-@pytest.mark.parametrize("mismatch", [True, False])
-def test_basic_assert_frame_equal(
-    rdtype,
-    rname,
-    index,
-    check_exact,
-    check_dtype,
-    check_names,
-    check_like,
-    mismatch,
-):
-    data = [1, 2, 1]
-    p_left = pd.DataFrame(index=[1, 2, 3])
-    p_left["a"] = np.array(data, dtype="int8")
-    p_left["b"] = np.array(data, dtype="int16")
-    if mismatch:
-        p_left["c"] = np.array([1, 2, 3], dtype="int64")
-    else:
-        p_left["c"] = np.array(data, dtype="int64")
-
-    p_right = pd.DataFrame(index=index)
-    for dtype, name in zip(rdtype, rname):
-        p_right[name] = np.array(data, dtype=dtype)
-
-    left = cudf.from_pandas(p_left)
-    right = cudf.from_pandas(p_right)
-
-    kind = None
-    try:
-        pd.testing.assert_frame_equal(
-            p_left,
-            p_right,
-            check_exact=check_exact,
-            check_dtype=check_dtype,
-            check_names=check_names,
-            check_like=check_like,
-        )
-    except BaseException as e:
-        kind = type(e)
-
-    if kind is not None:
-        with pytest.raises(kind):
-            assert_frame_equal(
-                left,
-                right,
-                check_exact=check_exact,
-                check_dtype=check_dtype,
-                check_names=check_names,
-                check_like=check_like,
-            )
-    else:
-        assert_frame_equal(
-            left,
-            right,
-            check_exact=check_exact,
-            check_dtype=check_dtype,
-            check_names=check_names,
-            check_like=check_like,
-        )
-
-
-@pytest.mark.parametrize("rdata", [[0, 1, 2, 3], [0, 1, 2, 4]])
-@pytest.mark.parametrize("check_datetimelike_compat", [True, False])
-def test_datetime_like_compaibility(rdata, check_datetimelike_compat):
-    psr1 = pd.Series([0, 1, 2, 3], dtype="datetime64[ns]")
-    psr2 = pd.Series(rdata, dtype="datetime64[ns]").astype("str")
-
-    sr1 = cudf.from_pandas(psr1)
-    sr2 = cudf.from_pandas(psr2)
-
-    kind = None
-    try:
-        pd.testing.assert_series_equal(
-            psr1, psr2, check_datetimelike_compat=check_datetimelike_compat
-        )
-    except BaseException as e:
-        kind = type(e)
-
-    if kind is not None:
-        with pytest.raises(kind):
-            assert_series_equal(
-                sr1, sr2, check_datetimelike_compat=check_datetimelike_compat
-            )
-    else:
-        assert_series_equal(
-            sr1, sr2, check_datetimelike_compat=check_datetimelike_compat
-        )
-
-
-@pytest.mark.parametrize(
-    "rdata",
-    [
-        [[0, 1, 2, 3], ["G", "O", "N", "E"]],
-        [[0, 1, 2, 4], ["G", "O", "N", "E"]],
-    ],
-)
-def test_multiindex_equal(rdata):
-    pidx1 = pd.MultiIndex.from_arrays(
-        [[0, 1, 2, 3], ["G", "O", "N", "E"]], names=("n", "id")
-    )
-    pidx2 = pd.MultiIndex.from_arrays(rdata, names=("n", "id"))
-
-    idx1 = cudf.from_pandas(pidx1)
-    idx2 = cudf.from_pandas(pidx2)
-
-    kind = None
-    try:
-        pd.testing.assert_index_equal(pidx1, pidx2)
-    except BaseException as e:
-        kind = type(e)
-
-    if kind is not None:
-        with pytest.raises(kind):
-            assert_index_equal(idx1, idx2)
-    else:
-        assert_index_equal(idx1, idx2)
-
-
-@pytest.mark.parametrize("dtype", ["int8", "uint8", "float32"])
-@pytest.mark.parametrize("check_exact", [True, False])
-@pytest.mark.parametrize("check_dtype", [True, False])
-def test_series_different_type_cases(dtype, check_exact, check_dtype):
-    data = [0, 1, 2, 3]
-
-    psr1 = pd.Series(data, dtype="uint8")
-    psr2 = pd.Series(data, dtype=dtype)
-
-    sr1 = cudf.from_pandas(psr1)
-    sr2 = cudf.from_pandas(psr2)
-
-    kind = None
-    try:
-        pd.testing.assert_series_equal(
-            psr1, psr2, check_exact=check_exact, check_dtype=check_dtype
-        )
-    except BaseException as e:
-        kind = type(e)
-
-    if kind is not None:
-        with pytest.raises(kind):
-            assert_series_equal(
-                sr1, sr2, check_exact=check_exact, check_dtype=check_dtype
-            )
-    else:
-        assert_series_equal(
-            sr1, sr2, check_exact=check_exact, check_dtype=check_dtype
-        )
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    ["int8", "int16", "int32", "int64"],
-)
-@pytest.mark.parametrize("exact", ["equiv", True, False])
-def test_range_index_and_int_index_eqaulity(dtype, exact):
-    pidx1 = pd.RangeIndex(0, stop=5, step=1)
-    pidx2 = pd.Index([0, 1, 2, 3, 4])
-    idx1 = cudf.from_pandas(pidx1)
-    idx2 = cudf.Index([0, 1, 2, 3, 4], dtype=dtype)
-
-    kind = None
-    try:
-        pd.testing.assert_index_equal(pidx1, pidx2, exact=exact)
-    except BaseException as e:
-        kind = type(e)
-
-    if kind is not None:
-        with pytest.raises(kind):
-            assert_index_equal(idx1, idx2, exact=exact)
-    else:
-        assert_index_equal(idx1, idx2, exact=exact)
-
-
-@pytest.mark.parametrize(
-    "left, right",
-    [
-        (1493282, 1493282),
-        (1493282.0, 1493282.0 + 1e-8),
-        ("abc", "abc"),
-        (0, np.array(0)),
-        (
-            np.datetime64(123456, "ns"),
-            pd.Timestamp(np.datetime64(123456, "ns")),
-        ),
-        ("int64", np.dtype("int64")),
-        (np.nan, np.nan),
-    ],
-)
-def test_basic_scalar_equality(left, right):
-    assert_eq(left, right)
-
-
-@pytest.mark.parametrize(
-    "left, right",
-    [
-        (1493282, 1493274),
-        (1493282.0, 1493282.0 + 1e-6),
-        ("abc", "abd"),
-        (0, np.array(1)),
-        (
-            np.datetime64(123456, "ns"),
-            pd.Timestamp(np.datetime64(123457, "ns")),
-        ),
-        ("int64", np.dtype("int32")),
-    ],
-)
-def test_basic_scalar_inequality(left, right):
-    with pytest.raises(AssertionError, match=r".*not (almost )?equal.*"):
-        assert_eq(left, right)
-
-
-def test_assert_column_memory_basic(arrow_arrays):
-    left = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
-    right = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
-
-    with pytest.raises(AssertionError):
-        assert_column_memory_eq(left, right)
-    assert_column_memory_ne(left, right)
-
-
-def test_assert_column_memory_slice(arrow_arrays):
-    col = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
-    left = col.slice(0, 1)
-    right = col.slice(1, 2)
-
-    with pytest.raises(AssertionError):
-        assert_column_memory_eq(left, right)
-    assert_column_memory_ne(left, right)
-
-    with pytest.raises(AssertionError):
-        assert_column_memory_eq(left, col)
-    assert_column_memory_ne(left, col)
-
-    with pytest.raises(AssertionError):
-        assert_column_memory_eq(right, col)
-    assert_column_memory_ne(right, col)
-
-
-def test_assert_column_memory_basic_same(arrow_arrays):
-    data = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
-    buf = cudf.core.buffer.as_buffer(data.base_data)
-
-    left = cudf.core.column.build_column(buf, dtype=np.dtype(np.int8))
-    right = cudf.core.column.build_column(buf, dtype=np.dtype(np.int8))
-
-    assert_column_memory_eq(left, right)
-    with pytest.raises(AssertionError):
-        assert_column_memory_ne(left, right)
diff --git a/python/cudf/cudf/tests/general_utilities/__init__.py b/python/cudf/cudf/tests/testing/__init__.py
similarity index 100%
rename from python/cudf/cudf/tests/general_utilities/__init__.py
rename to python/cudf/cudf/tests/testing/__init__.py
diff --git a/python/cudf/cudf/tests/testing/conftest.py b/python/cudf/cudf/tests/testing/conftest.py
new file mode 100644
index 00000000000..5798871dabc
--- /dev/null
+++ b/python/cudf/cudf/tests/testing/conftest.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+
+@pytest.fixture(params=[True, False])
+def check_dtype(request):
+    """Argument for assert_series_equal, assert_frame_equal"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def check_exact(request):
+    """Argument for assert_series_equal, assert_frame_equal"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def check_datetimelike_compat(request):
+    """Argument for assert_series_equal, assert_frame_equal"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def check_names(request):
+    """Argument for assert_series_equal, assert_frame_equal, assert_index_equal"""
+    return request.param
diff --git a/python/cudf/cudf/tests/testing/test_assert_column_equal.py b/python/cudf/cudf/tests/testing/test_assert_column_equal.py
new file mode 100644
index 00000000000..7116c8d187e
--- /dev/null
+++ b/python/cudf/cudf/tests/testing/test_assert_column_equal.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.core.column.column import as_column
+from cudf.testing._utils import (
+    assert_column_memory_eq,
+    assert_column_memory_ne,
+)
+from cudf.testing.testing import assert_column_equal
+
+
+@pytest.fixture(
+    params=[
+        range(10),
+        ["hello", "world", "rapids", "AI"],
+        [[1, 2, 3], [4, 5], [6], [], [7]],
+        [{"f0": "hello", "f1": 42}, {"f0": "world", "f1": 3}],
+    ]
+)
+def arrow_arrays(request):
+    return pa.array(request.param)
+
+
+def test_assert_column_memory_basic(arrow_arrays):
+    left = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
+    right = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
+
+    with pytest.raises(AssertionError):
+        assert_column_memory_eq(left, right)
+    assert_column_memory_ne(left, right)
+
+
+def test_assert_column_memory_slice(arrow_arrays):
+    col = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
+    left = col.slice(0, 1)
+    right = col.slice(1, 2)
+
+    with pytest.raises(AssertionError):
+        assert_column_memory_eq(left, right)
+    assert_column_memory_ne(left, right)
+
+    with pytest.raises(AssertionError):
+        assert_column_memory_eq(left, col)
+    assert_column_memory_ne(left, col)
+
+    with pytest.raises(AssertionError):
+        assert_column_memory_eq(right, col)
+    assert_column_memory_ne(right, col)
+
+
+def test_assert_column_memory_basic_same(arrow_arrays):
+    data = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
+    buf = cudf.core.buffer.as_buffer(data.base_data)
+
+    left = cudf.core.column.build_column(buf, dtype=np.dtype(np.int8))
+    right = cudf.core.column.build_column(buf, dtype=np.dtype(np.int8))
+
+    assert_column_memory_eq(left, right)
+    with pytest.raises(AssertionError):
+        assert_column_memory_ne(left, right)
+
+
+@pytest.mark.parametrize(
+    "other_data",
+    [
+        ["1", "2", "3"],
+        [[1], [2], [3]],
+        [{"a": 1}, {"a": 2}, {"a": 3}],
+    ],
+)
+def test_assert_column_equal_dtype_edge_cases(other_data):
+    # string series should be 100% different
+    # even when the elements are the same
+    base = as_column([1, 2, 3])
+    other = as_column(other_data)
+
+    # for these dtypes, the diff should always be 100% regardless of the values
+    with pytest.raises(
+        AssertionError, match=r".*values are different \(100.0 %\).*"
+    ):
+        assert_column_equal(base, other, check_dtype=False)
+
+    # the exceptions are the empty and all null cases
+    assert_column_equal(base.slice(0, 0), other.slice(0, 0), check_dtype=False)
+    assert_column_equal(other.slice(0, 0), base.slice(0, 0), check_dtype=False)
+
+    base = as_column(cudf.NA, length=len(base), dtype=base.dtype)
+    other = as_column(cudf.NA, length=len(other), dtype=other.dtype)
+
+    assert_column_equal(base, other, check_dtype=False)
+    assert_column_equal(other, base, check_dtype=False)
diff --git a/python/cudf/cudf/tests/testing/test_assert_eq.py b/python/cudf/cudf/tests/testing/test_assert_eq.py
new file mode 100644
index 00000000000..c9c8a44796e
--- /dev/null
+++ b/python/cudf/cudf/tests/testing/test_assert_eq.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from cudf.testing.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "left, right",
+    [
+        (1493282, 1493282),
+        (1493282.0, 1493282.0 + 1e-8),
+        ("abc", "abc"),
+        (0, np.array(0)),
+        (
+            np.datetime64(123456, "ns"),
+            pd.Timestamp(np.datetime64(123456, "ns")),
+        ),
+        ("int64", np.dtype("int64")),
+        (np.nan, np.nan),
+    ],
+)
+def test_basic_scalar_equality(left, right):
+    assert_eq(left, right)
+
+
+@pytest.mark.parametrize(
+    "left, right",
+    [
+        (1493282, 1493274),
+        (1493282.0, 1493282.0 + 1e-6),
+        ("abc", "abd"),
+        (0, np.array(1)),
+        (
+            np.datetime64(123456, "ns"),
+            pd.Timestamp(np.datetime64(123457, "ns")),
+        ),
+        ("int64", np.dtype("int32")),
+    ],
+)
+def test_basic_scalar_inequality(left, right):
+    with pytest.raises(AssertionError, match=r".*not (almost )?equal.*"):
+        assert_eq(left, right)
diff --git a/python/cudf/cudf/tests/testing/test_assert_frame_equal.py b/python/cudf/cudf/tests/testing/test_assert_frame_equal.py
new file mode 100644
index 00000000000..719dc0b1e60
--- /dev/null
+++ b/python/cudf/cudf/tests/testing/test_assert_frame_equal.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_frame_equal
+from cudf.testing._utils import assert_asserters_equal
+
+
+@pytest.fixture(params=[True, False])
+def check_like(request):
+    """Argument for assert_frame_equal"""
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "rdtype", [["int8", "int16", "int64"], ["int64", "int16", "int8"]]
+)
+@pytest.mark.parametrize("rname", [["a", "b", "c"], ["b", "c", "a"]])
+@pytest.mark.parametrize("index", [[1, 2, 3], [3, 2, 1]])
+@pytest.mark.parametrize("mismatch", [True, False])
+def test_basic_assert_frame_equal(
+    rdtype,
+    rname,
+    index,
+    check_exact,
+    check_dtype,
+    check_names,
+    check_like,
+    mismatch,
+):
+    data = [1, 2, 1]
+    p_left = pd.DataFrame(index=[1, 2, 3])
+    p_left["a"] = np.array(data, dtype="int8")
+    p_left["b"] = np.array(data, dtype="int16")
+    if mismatch:
+        p_left["c"] = np.array([1, 2, 3], dtype="int64")
+    else:
+        p_left["c"] = np.array(data, dtype="int64")
+
+    p_right = pd.DataFrame(index=index)
+    for dtype, name in zip(rdtype, rname):
+        p_right[name] = np.array(data, dtype=dtype)
+
+    left = cudf.from_pandas(p_left)
+    right = cudf.from_pandas(p_right)
+
+    assert_asserters_equal(
+        pd.testing.assert_frame_equal,
+        assert_frame_equal,
+        p_left,
+        p_right,
+        left,
+        right,
+        check_exact=check_exact,
+        check_dtype=check_dtype,
+        check_names=check_names,
+        check_like=check_like,
+    )
diff --git a/python/cudf/cudf/tests/testing/test_assert_index_equal.py b/python/cudf/cudf/tests/testing/test_assert_index_equal.py
new file mode 100644
index 00000000000..1b238b5df83
--- /dev/null
+++ b/python/cudf/cudf/tests/testing/test_assert_index_equal.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_index_equal
+from cudf.testing._utils import assert_asserters_equal
+
+
+@pytest.fixture(params=["equiv", True, False])
+def exact(request):
+    """Argument for assert_index_equal"""
+    return request.param
+
+
+def test_range_index_and_int_index_equality(
+    signed_integer_types_as_str, exact
+):
+    pidx1 = pd.RangeIndex(0, stop=5, step=1)
+    pidx2 = pd.Index([0, 1, 2, 3, 4])
+    idx1 = cudf.from_pandas(pidx1)
+    idx2 = cudf.Index([0, 1, 2, 3, 4], dtype=signed_integer_types_as_str)
+
+    assert_asserters_equal(
+        pd.testing.assert_index_equal,
+        assert_index_equal,
+        pidx1,
+        pidx2,
+        idx1,
+        idx2,
+        exact=exact,
+    )
+
+
+@pytest.mark.parametrize("rdata", [3, 4], ids=["same", "different"])
+def test_multiindex_equal(rdata):
+    pidx1 = pd.MultiIndex.from_arrays(
+        [[0, 1, 2, 3], ["G", "O", "N", "E"]], names=("n", "id")
+    )
+    pidx2 = pd.MultiIndex.from_arrays(
+        [[0, 1, 2, rdata], ["G", "O", "N", "E"]], names=("n", "id")
+    )
+
+    idx1 = cudf.from_pandas(pidx1)
+    idx2 = cudf.from_pandas(pidx2)
+
+    assert_asserters_equal(
+        pd.testing.assert_index_equal,
+        assert_index_equal,
+        pidx1,
+        pidx2,
+        idx1,
+        idx2,
+    )
+
+
+@pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]])
+@pytest.mark.parametrize("rname", ["a", "b"])
+@pytest.mark.parametrize("check_categorical", [True, False])
+def test_basic_assert_index_equal(
+    rdata,
+    exact,
+    check_names,
+    rname,
+    check_categorical,
+    all_supported_types_as_str,
+):
+    p_left = pd.Index([1, 2, 3], name="a", dtype=all_supported_types_as_str)
+    p_right = pd.Index(rdata, name=rname, dtype=all_supported_types_as_str)
+
+    left = cudf.from_pandas(p_left)
+    right = cudf.from_pandas(p_right)
+
+    assert_asserters_equal(
+        pd.testing.assert_index_equal,
+        assert_index_equal,
+        p_left,
+        p_right,
+        left,
+        right,
+        exact=exact,
+        check_names=check_names,
+        check_categorical=check_categorical,
+    )
diff --git a/python/cudf/cudf/tests/testing/test_assert_series_equal.py b/python/cudf/cudf/tests/testing/test_assert_series_equal.py
new file mode 100644
index 00000000000..9d6f517c88b
--- /dev/null
+++ b/python/cudf/cudf/tests/testing/test_assert_series_equal.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_series_equal
+from cudf.testing._utils import assert_asserters_equal
+
+
+def test_series_different_type_cases(
+    numeric_types_as_str, check_exact, check_dtype
+):
+    data = [0, 1, 2, 3]
+
+    psr1 = pd.Series(data, dtype="uint8")
+    psr2 = pd.Series(data, dtype=numeric_types_as_str)
+
+    sr1 = cudf.from_pandas(psr1)
+    sr2 = cudf.from_pandas(psr2)
+
+    assert_asserters_equal(
+        pd.testing.assert_series_equal,
+        assert_series_equal,
+        psr1,
+        psr2,
+        sr1,
+        sr2,
+        check_exact=check_exact,
+        check_dtype=check_dtype,
+    )
+
+
+@pytest.mark.parametrize("rdata", [3, 4], ids=["same", "different"])
+def test_datetime_like_compaibility(rdata, check_datetimelike_compat):
+    psr1 = pd.Series([0, 1, 2, 3], dtype="datetime64[ns]")
+    psr2 = pd.Series([0, 1, 2, rdata], dtype="datetime64[ns]").astype("str")
+
+    sr1 = cudf.from_pandas(psr1)
+    sr2 = cudf.from_pandas(psr2)
+
+    assert_asserters_equal(
+        pd.testing.assert_series_equal,
+        assert_series_equal,
+        psr1,
+        psr2,
+        sr1,
+        sr2,
+        check_datetimelike_compat=check_datetimelike_compat,
+    )
+
+
+@pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]])
+@pytest.mark.parametrize("rname", ["a", "b"])
+@pytest.mark.parametrize("check_category_order", [True, False])
+@pytest.mark.parametrize("check_categorical", [True, False])
+def test_basic_assert_series_equal(
+    rdata,
+    rname,
+    check_names,
+    check_category_order,
+    check_categorical,
+    all_supported_types_as_str,
+):
+    p_left = pd.Series([1, 2, 3], name="a", dtype=all_supported_types_as_str)
+    p_right = pd.Series(rdata, name=rname, dtype=all_supported_types_as_str)
+
+    left = cudf.from_pandas(p_left)
+    right = cudf.from_pandas(p_right)
+
+    assert_asserters_equal(
+        pd.testing.assert_series_equal,
+        assert_series_equal,
+        p_left,
+        p_right,
+        left,
+        right,
+        check_names=check_names,
+        check_categorical=check_categorical,
+        check_category_order=check_category_order,
+    )

From 4cc4fd59e34b8560ceb21bf2e411a95f39e1d0b9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 25 Jul 2025 08:34:37 -0700
Subject: [PATCH 012/366] Use GCC 14 in conda builds. (#19192)

conda-forge is migrating to gcc 14, so this PR is updating for alignment.

See https://github.com/rapidsai/build-planning/issues/188

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Gil Forsyth (https://github.com/gforsyth)

URL: https://github.com/rapidsai/cudf/pull/19192
---
 .../all_cuda-129_arch-aarch64.yaml            |  2 +-
 .../all_cuda-129_arch-x86_64.yaml             |  2 +-
 conda/recipes/cudf/conda_build_config.yaml    |  4 +-
 .../cudf_kafka/conda_build_config.yaml        |  4 +-
 conda/recipes/libcudf/conda_build_config.yaml |  4 +-
 .../recipes/pylibcudf/conda_build_config.yaml |  4 +-
 cpp/CMakeLists.txt                            |  5 ++
 cpp/examples/basic/CMakeLists.txt             |  5 ++
 cpp/examples/billion_rows/CMakeLists.txt      |  5 ++
 cpp/examples/interop/CMakeLists.txt           |  5 ++
 cpp/examples/nested_types/CMakeLists.txt      |  5 ++
 cpp/examples/parquet_io/CMakeLists.txt        |  5 ++
 cpp/examples/string_transforms/CMakeLists.txt |  5 ++
 cpp/examples/strings/CMakeLists.txt           |  5 ++
 cpp/libcudf_kafka/CMakeLists.txt              |  5 ++
 cpp/src/io/orc/writer_impl.cu                 | 56 ++++++++++++++++---
 cpp/src/io/parquet/writer_impl.cu             | 36 ++++++++++--
 cpp/src/io/utilities/output_builder.cuh       |  7 +++
 cpp/tests/lists/extract_tests.cpp             | 18 +++++-
 cpp/tests/partitioning/partition_test.cpp     | 28 ++++++----
 .../transform/integration/assert_unary.h      | 34 +++++++----
 dependencies.yaml                             |  4 +-
 python/cudf/CMakeLists.txt                    |  5 ++
 python/pylibcudf/CMakeLists.txt               |  5 ++
 24 files changed, 213 insertions(+), 45 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 8be5f80c743..c536a7d662c 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -33,7 +33,7 @@ dependencies:
 - fastavro>=0.22.9
 - flatbuffers==24.3.25
 - fsspec>=0.6.0
-- gcc_linux-aarch64=13.*
+- gcc_linux-aarch64=14.*
 - hypothesis>=6.131.7
 - identify>=2.5.20
 - include-what-you-use==0.24.0
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 0e6f4994f83..bf9e857f6bc 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -33,7 +33,7 @@ dependencies:
 - fastavro>=0.22.9
 - flatbuffers==24.3.25
 - fsspec>=0.6.0
-- gcc_linux-64=13.*
+- gcc_linux-64=14.*
 - hypothesis>=6.131.7
 - identify>=2.5.20
 - include-what-you-use==0.24.0
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
index c35ea54a784..5c68a5fefcb 100644
--- a/conda/recipes/cudf/conda_build_config.yaml
+++ b/conda/recipes/cudf/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 13
+  - 14
 
 cxx_compiler_version:
-  - 13
+  - 14
 
 c_stdlib:
   - sysroot
diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml
index c35ea54a784..5c68a5fefcb 100644
--- a/conda/recipes/cudf_kafka/conda_build_config.yaml
+++ b/conda/recipes/cudf_kafka/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 13
+  - 14
 
 cxx_compiler_version:
-  - 13
+  - 14
 
 c_stdlib:
   - sysroot
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 5fc7c9eae1b..7ca20165585 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 13
+  - 14
 
 cxx_compiler_version:
-  - 13
+  - 14
 
 cuda_compiler:
   - cuda-nvcc
diff --git a/conda/recipes/pylibcudf/conda_build_config.yaml b/conda/recipes/pylibcudf/conda_build_config.yaml
index c35ea54a784..5c68a5fefcb 100644
--- a/conda/recipes/pylibcudf/conda_build_config.yaml
+++ b/conda/recipes/pylibcudf/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 13
+  - 14
 
 cxx_compiler_version:
-  - 13
+  - 14
 
 c_stdlib:
   - sysroot
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1a158ef4e79..cb277bf22cd 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -133,6 +133,11 @@ set(CUDF_CUDA_FLAGS "")
 set(CUDF_CXX_DEFINITIONS "")
 set(CUDF_CUDA_DEFINITIONS "")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 # Set logging level
 set(LIBCUDF_LOGGING_LEVEL
     "INFO"
diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
index 3ca878f1497..447c8709297 100644
--- a/cpp/examples/basic/CMakeLists.txt
+++ b/cpp/examples/basic/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 # Configure your project here
 add_executable(basic_example src/process_csv.cpp)
 target_link_libraries(basic_example PRIVATE cudf::cudf)
diff --git a/cpp/examples/billion_rows/CMakeLists.txt b/cpp/examples/billion_rows/CMakeLists.txt
index c0de82ac85a..962ff79c537 100644
--- a/cpp/examples/billion_rows/CMakeLists.txt
+++ b/cpp/examples/billion_rows/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 
 add_library(groupby_results OBJECT groupby_results.cpp)
diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt
index 1ea9779d4cc..f974735b979 100644
--- a/cpp/examples/interop/CMakeLists.txt
+++ b/cpp/examples/interop/CMakeLists.txt
@@ -17,6 +17,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 # The Arrow CMake is currently broken if the build type is not set
 set(CMAKE_BUILD_TYPE Release)
 # No need to install Arrow libs when only the final example executable is shipped.
diff --git a/cpp/examples/nested_types/CMakeLists.txt b/cpp/examples/nested_types/CMakeLists.txt
index b5d71d3262d..e91a85d4ee0 100644
--- a/cpp/examples/nested_types/CMakeLists.txt
+++ b/cpp/examples/nested_types/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 # Configure your project here
 add_executable(deduplication deduplication.cpp)
 target_link_libraries(deduplication PRIVATE cudf::cudf)
diff --git a/cpp/examples/parquet_io/CMakeLists.txt b/cpp/examples/parquet_io/CMakeLists.txt
index 7bcd22445dd..4f520b2bdad 100644
--- a/cpp/examples/parquet_io/CMakeLists.txt
+++ b/cpp/examples/parquet_io/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 add_library(parquet_io_utils OBJECT common_utils.cpp io_source.cpp)
 target_compile_features(parquet_io_utils PRIVATE cxx_std_20)
 target_link_libraries(parquet_io_utils PRIVATE cudf::cudf)
diff --git a/cpp/examples/string_transforms/CMakeLists.txt b/cpp/examples/string_transforms/CMakeLists.txt
index c1f3aff2e5c..90830eb2820 100644
--- a/cpp/examples/string_transforms/CMakeLists.txt
+++ b/cpp/examples/string_transforms/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 
 add_executable(compute_checksum_jit compute_checksum_jit.cpp)
diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt
index 7d55d60c062..4e890118dcb 100644
--- a/cpp/examples/strings/CMakeLists.txt
+++ b/cpp/examples/strings/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../fetch_dependencies.cmake)
 include(rapids-cmake)
 rapids_cmake_build_type("Release")
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 
 add_executable(libcudf_apis libcudf_apis.cpp)
diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
index 2a62e79f484..06e03bb84d4 100644
--- a/cpp/libcudf_kafka/CMakeLists.txt
+++ b/cpp/libcudf_kafka/CMakeLists.txt
@@ -29,6 +29,11 @@ project(
 # Set a default build type if none was specified
 rapids_cmake_build_type(Release)
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 # ##################################################################################################
 # * conda environment -----------------------------------------------------------------------------
 rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 5a8744f8afd..612f0b69737 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -910,7 +910,14 @@ encoded_data encode_columns(orc_table_view const& orc_table,
     std::vector<size_type> indices;
     for (auto const& stripe : segmentation.stripes) {
       for (auto rg_idx_it = stripe.cbegin(); rg_idx_it < stripe.cend() - 1; ++rg_idx_it) {
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
         auto const& chunk = chunks[col_idx][*rg_idx_it];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
         indices.push_back(chunk.start_row);
         indices.push_back(chunk.start_row + chunk.num_rows);
       }
@@ -959,8 +966,15 @@ encoded_data encode_columns(orc_table_view const& orc_table,
         if (strm_id >= 0) {
           size_t stripe_size = 0;
           std::for_each(stripe.cbegin(), stripe.cend(), [&](auto rg_idx) {
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
             auto const& ck = chunks[col_idx][rg_idx];
-            auto& strm     = col_streams[rg_idx];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
+            auto& strm = col_streams[rg_idx];
 
             if ((strm_type == CI_DICTIONARY) ||
                 (strm_type == CI_DATA2 && ck.encoding_kind == DICTIONARY_V2)) {
@@ -995,8 +1009,15 @@ encoded_data encode_columns(orc_table_view const& orc_table,
         // Set offsets
         for (auto rg_idx_it = stripe.cbegin(); rg_idx_it < stripe.cend(); ++rg_idx_it) {
           auto const rg_idx = *rg_idx_it;
-          auto const& ck    = chunks[col_idx][rg_idx];
-          auto& strm        = col_streams[rg_idx];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
+          auto const& ck = chunks[col_idx][rg_idx];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
+          auto& strm = col_streams[rg_idx];
 
           if (strm_id < 0 or (strm_type == CI_DATA && streams[strm_id].length == 0 &&
                               (ck.type_kind == DOUBLE || ck.type_kind == FLOAT))) {
@@ -1495,7 +1516,14 @@ void write_index_stream(int32_t stripe_id,
     if (stream.ids[type] > 0) {
       record.pos = 0;
       if (compression != compression_type::NONE) {
-        auto const& ss   = strm_desc[stripe_id][stream.ids[type] - (columns.size() + 1)];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
+        auto const& ss = strm_desc[stripe_id][stream.ids[type] - (columns.size() + 1)];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
         record.blk_pos   = ss.first_block;
         record.comp_pos  = 0;
         record.comp_size = ss.stream_size;
@@ -1522,10 +1550,17 @@ void write_index_stream(int32_t stripe_id,
   auto kind = TypeKind::STRUCT;
   // TBD: Not sure we need an empty index stream for column 0
   if (stream_id != 0) {
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
     auto const& strm = enc_streams[column_id][0];
-    present          = find_record(strm, CI_PRESENT);
-    data             = find_record(strm, CI_DATA);
-    data2            = find_record(strm, CI_DATA2);
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
+    present = find_record(strm, CI_PRESENT);
+    data    = find_record(strm, CI_DATA);
+    data2   = find_record(strm, CI_DATA2);
 
     // Change string dictionary to int from index point of view
     kind = columns[column_id].orc_kind();
@@ -1551,7 +1586,14 @@ void write_index_stream(int32_t stripe_id,
                               : (&rg_stats[column_id * segmentation.num_rowgroups() + rowgroup]));
 
     if (stream_id != 0) {
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
       const auto& strm = enc_streams[column_id][rowgroup];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
       scan_record(strm, CI_PRESENT, present);
       scan_record(strm, CI_DATA, data);
       scan_record(strm, CI_DATA2, data2);
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 20236c63344..41fdbe7e6fc 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -2160,7 +2160,14 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
       auto& row_group = agg_meta->file(p).row_groups[global_r];
 
       for (auto i = 0; i < num_columns; i++) {
-        auto const& ck          = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
+        auto const& ck = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
         auto const dev_bfr      = ck.is_compressed ? ck.compressed_bfr : ck.uncompressed_bfr;
         auto& column_chunk_meta = row_group.columns[i].meta_data;
 
@@ -2203,7 +2210,14 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
         auto& row_group    = agg_meta->file(p).row_groups[global_r];
 
         for (auto i = 0; i < num_columns; i++) {
-          auto const& ck          = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
+          auto const& ck = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
           auto const& col         = col_desc[ck.col_desc_id];
           auto& column_chunk_meta = row_group.columns[i].meta_data;
 
@@ -2446,7 +2460,14 @@ void writer::impl::write_parquet_data_to_sink(
       auto& row_group    = _agg_meta->file(p).row_groups[global_r];
 
       for (std::size_t i = 0; i < num_columns; i++) {
-        auto const& ck     = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
+        auto const& ck = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
         auto const dev_bfr = ck.is_compressed ? ck.compressed_bfr : ck.uncompressed_bfr;
 
         // Skip the range [0, ck.ck_stat_size) since it has already been copied to host
@@ -2493,7 +2514,14 @@ void writer::impl::write_parquet_data_to_sink(
         int const global_r    = global_rowgroup_base[p] + r - first_rg_in_part[p];
         auto const& row_group = _agg_meta->file(p).row_groups[global_r];
         for (std::size_t i = 0; i < num_columns; i++) {
-          EncColumnChunk const& ck      = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
+          EncColumnChunk const& ck = chunks[r][i];
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
           auto const& column_chunk_meta = row_group.columns[i].meta_data;
 
           // start transfer of the column index
diff --git a/cpp/src/io/utilities/output_builder.cuh b/cpp/src/io/utilities/output_builder.cuh
index 3455ba8436a..d8ce108f09d 100644
--- a/cpp/src/io/utilities/output_builder.cuh
+++ b/cpp/src/io/utilities/output_builder.cuh
@@ -294,8 +294,15 @@ class output_builder {
    */
   [[nodiscard]] T back_element(rmm::cuda_stream_view stream) const
   {
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-reference"
+#endif
     auto const& last_nonempty_chunk =
       _chunks.size() > 1 and _chunks.back().is_empty() ? _chunks.rbegin()[1] : _chunks.back();
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
     return last_nonempty_chunk.back_element(stream);
   }
 
diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp
index 2c24f695c29..d727e2ee459 100644
--- a/cpp/tests/lists/extract_tests.cpp
+++ b/cpp/tests/lists/extract_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,19 @@
  * limitations under the License.
  */
 
+// The usage of the lists_column_wrapper `LCW{LCW...` syntax in this file
+// causes gcc14 to throw a maybe-uninitialized warning in the copy constructor
+// of column_view_base. The same usage in every other test file causes no
+// issues, so it seems highly likely to be an incorrect diagnostic.
+// Unfortunately, because the warning is in an included file, neither inserting
+// ignore pragmas around just the includes nor just around the calling code
+// below that uses that syntax (of which there is a decent amount) seems to be
+// sufficient to make the compiler happy, so for now the easiest option is to
+// ignore the warning for the entire file.
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -423,3 +436,6 @@ TEST_F(ListsExtractColumnIndicesTest, ExtractStrings)
 }
 
 CUDF_TEST_PROGRAM_MAIN()
+#if defined(__GNUC__) && (__GNUC__ >= 14)
+#pragma GCC diagnostic pop
+#endif
diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp
index 8ea224eb9fc..da54f2233a0 100644
--- a/cpp/tests/partitioning/partition_test.cpp
+++ b/cpp/tests/partitioning/partition_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,11 @@
 #include <cudf/sorting.hpp>
 #include <cudf/table/table.hpp>
 
+#include <cuda/std/tuple>
+#include <thrust/iterator/zip_iterator.h>
+
+#include <algorithm>
+
 template <typename T>
 class PartitionTest : public cudf::test::BaseFixture {
   using value_type = cudf::test::GetType<T, 0>;
@@ -100,15 +105,18 @@ void expect_equal_partitions(cudf::table_view expected,
   // Split the partitions, sort each partition, then compare for equality
   auto actual_split   = cudf::split(actual, split_points);
   auto expected_split = cudf::split(expected, split_points);
-  std::equal(expected_split.begin(),
-             expected_split.end(),
-             actual_split.begin(),
-             [](cudf::table_view expected, cudf::table_view actual) {
-               auto sorted_expected = cudf::sort(expected);
-               auto sorted_actual   = cudf::sort(actual);
-               CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_expected, *sorted_actual);
-               return true;
-             });
+
+  auto begin =
+    thrust::make_zip_iterator(cuda::std::make_tuple(expected_split.begin(), actual_split.begin()));
+  auto end =
+    thrust::make_zip_iterator(cuda::std::make_tuple(expected_split.end(), actual_split.end()));
+
+  std::for_each(begin, end, [](auto const& zipped) {
+    auto [expected, actual] = zipped;
+    auto sorted_expected    = cudf::sort(expected);
+    auto sorted_actual      = cudf::sort(actual);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_expected, *sorted_actual);
+  });
 }
 
 void run_partition_test(cudf::table_view table_to_partition,
diff --git a/cpp/tests/transform/integration/assert_unary.h b/cpp/tests/transform/integration/assert_unary.h
index 98dc5d1a240..8aa23a03055 100644
--- a/cpp/tests/transform/integration/assert_unary.h
+++ b/cpp/tests/transform/integration/assert_unary.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,11 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
+#include <cuda/std/tuple>
+#include <thrust/iterator/zip_iterator.h>
+
+#include <algorithm>
+
 namespace transformation {
 template <typename TypeOut, typename TypeIn, typename TypeOpe>
 void ASSERT_UNARY(cudf::column_view const& out, cudf::column_view const& in, TypeOpe&& ope)
@@ -31,21 +36,28 @@ void ASSERT_UNARY(cudf::column_view const& out, cudf::column_view const& in, Typ
 
   ASSERT_TRUE(out_data.size() == in_data.size());
 
-  auto data_comparator = [ope](TypeIn const& in, TypeOut const& out) {
-    EXPECT_EQ(out, static_cast<TypeOut>(ope(in)));
-    return true;
-  };
-  std::equal(in_data.begin(), in_data.end(), out_data.begin(), data_comparator);
+  auto begin = thrust::make_zip_iterator(cuda::std::make_tuple(in_data.begin(), out_data.begin()));
+  auto end   = thrust::make_zip_iterator(cuda::std::make_tuple(in_data.end(), out_data.end()));
+
+  std::for_each(begin, end, [ope](auto const& zipped) {
+    auto [in_val, out_val] = zipped;
+    EXPECT_EQ(out_val, static_cast<TypeOut>(ope(in_val)));
+  });
 
   auto in_valid  = in_h.second;
   auto out_valid = out_h.second;
 
   ASSERT_TRUE(out_valid.size() == in_valid.size());
-  auto valid_comparator = [](bool const& in, bool const& out) {
-    EXPECT_EQ(out, in);
-    return true;
-  };
-  std::equal(in_valid.begin(), in_valid.end(), out_valid.begin(), valid_comparator);
+
+  auto valid_begin =
+    thrust::make_zip_iterator(cuda::std::make_tuple(in_valid.begin(), out_valid.begin()));
+  auto valid_end =
+    thrust::make_zip_iterator(cuda::std::make_tuple(in_valid.end(), out_valid.end()));
+
+  std::for_each(valid_begin, valid_end, [](auto const& zipped) {
+    auto [in_flag, out_flag] = zipped;
+    EXPECT_EQ(out_flag, in_flag);
+  });
 }
 
 }  // namespace transformation
diff --git a/dependencies.yaml b/dependencies.yaml
index 8246be146f3..dfcfb40d7a9 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -434,13 +434,13 @@ dependencies:
               arch: x86_64
               cuda: "12.*"
             packages:
-              - gcc_linux-64=13.*
+              - gcc_linux-64=14.*
               - sysroot_linux-64==2.28
           - matrix:
               arch: aarch64
               cuda: "12.*"
             packages:
-              - gcc_linux-aarch64=13.*
+              - gcc_linux-aarch64=14.*
               - sysroot_linux-aarch64==2.28
       - output_types: conda
         matrices:
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index b3f3f0bf56c..530e50d2075 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../../cmake/rapids_config.cmake)
 include(rapids-cuda)
 rapids_cuda_init_architectures(cudf-python)
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 project(
   cudf-python
   VERSION "${RAPIDS_VERSION}"
diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt
index ac0bbb82df5..aad128cb695 100644
--- a/python/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/CMakeLists.txt
@@ -18,6 +18,11 @@ include(../../cmake/rapids_config.cmake)
 include(rapids-cuda)
 rapids_cuda_init_architectures(pylibcudf)
 
+# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
+# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
+# gcc>=14. We can remove this once we upgrade to a newer sccache version.
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 project(
   pylibcudf
   VERSION "${RAPIDS_VERSION}"

From 84149b11ac65516a6fd9cf05b2592fc95ad441f8 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 25 Jul 2025 13:58:10 -0400
Subject: [PATCH 013/366] Update build infra to support new branching strategy
 (#19445)

rapids_config will use `RAPIDS_BRANCH` contents to determine what branch to use

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19445
---
 RAPIDS_BRANCH             |  1 +
 cmake/RAPIDS.cmake        |  6 +++---
 cmake/rapids_config.cmake | 10 ++++++++++
 3 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 RAPIDS_BRANCH

diff --git a/RAPIDS_BRANCH b/RAPIDS_BRANCH
new file mode 100644
index 00000000000..9b1c52d9415
--- /dev/null
+++ b/RAPIDS_BRANCH
@@ -0,0 +1 @@
+branch-25.10
diff --git a/cmake/RAPIDS.cmake b/cmake/RAPIDS.cmake
index d112951d3c1..40de7cefcd2 100644
--- a/cmake/RAPIDS.cmake
+++ b/cmake/RAPIDS.cmake
@@ -18,9 +18,9 @@
 cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 # Allow users to control which version is used
-if(NOT rapids-cmake-version OR NOT rapids-cmake-version MATCHES [[^([0-9][0-9])\.([0-9][0-9])$]])
+if(NOT rapids-cmake-branch OR NOT rapids-cmake-version)
   message(
-    FATAL_ERROR "The CMake variable rapids-cmake-version must be defined in the format MAJOR.MINOR."
+    FATAL_ERROR "The CMake variable `rapids-cmake-branch` or `rapids-cmake-version` must be defined"
   )
 endif()
 
@@ -33,7 +33,7 @@ endif()
 # Allow users to control which branch is fetched
 if(NOT rapids-cmake-branch)
   # Define a default branch if the user doesn't set one
-  set(rapids-cmake-branch "branch-${rapids-cmake-version}")
+  set(rapids-cmake-branch "release/${rapids-cmake-version}")
 endif()
 
 # Allow users to control the exact URL passed to FetchContent
diff --git a/cmake/rapids_config.cmake b/cmake/rapids_config.cmake
index abe468dce80..b706c926e7a 100644
--- a/cmake/rapids_config.cmake
+++ b/cmake/rapids_config.cmake
@@ -26,5 +26,15 @@ else()
   )
 endif()
 
+# Use STRINGS to trim whitespace/newlines
+file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH" _rapids_branch)
+if(NOT _rapids_branch)
+  message(
+    FATAL_ERROR
+      "Could not determine branch name to use for checking out rapids-cmake. The file \"${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH\" is missing."
+  )
+endif()
+
 set(rapids-cmake-version "${RAPIDS_VERSION_MAJOR_MINOR}")
+set(rapids-cmake-branch "${_rapids_branch}")
 include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake")

From fb80addb3a84d6a811d3169855852062e3e8a326 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 28 Jul 2025 09:53:06 -0400
Subject: [PATCH 014/366] Implement top k expression in cudf-polars using
 `cudf::top_k` (#19431)

Now that we have #19303, we can implement top k directly as a unary expression instead of translating the polars IR into a `Sort` + `Slice`. This also has the benefit of getting free improvement once `cudf::top_k` uses CUB's implementation.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19431
---
 .../cudf_polars/dsl/expressions/unary.py      | 19 ++++-
 .../cudf_polars/cudf_polars/dsl/translate.py  | 11 ---
 .../pylibcudf/pylibcudf/libcudf/sorting.pxd   | 65 ++++++++++-------
 python/pylibcudf/pylibcudf/sorting.pxd        |  6 +-
 python/pylibcudf/pylibcudf/sorting.pyi        |  8 +++
 python/pylibcudf/pylibcudf/sorting.pyx        | 69 ++++++++++++++++++-
 6 files changed, 140 insertions(+), 38 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index f55e0b9aeed..9147e4a6dbe 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -5,7 +5,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar, cast
 
 import pylibcudf as plc
 
@@ -111,6 +111,7 @@ class UnaryFunction(Expr):
             "unique",
             "value_counts",
             "null_count",
+            "top_k",
         }
     )
     _supported_cum_aggs = frozenset(
@@ -140,6 +141,7 @@ def __init__(
             "cum_sum",
             "drop_nulls",
             "unique",
+            "top_k",
         )
 
         if self.name not in UnaryFunction._supported_fns:
@@ -339,6 +341,21 @@ def do_evaluate(
                 ),
                 dtype=self.dtype,
             )
+        elif self.name == "top_k":
+            (column, k) = (
+                child.evaluate(df, context=context) for child in self.children
+            )
+            (reverse,) = self.options
+            return Column(
+                plc.sorting.top_k(
+                    column.obj,
+                    cast(Literal, self.children[1]).value,
+                    plc.types.Order.ASCENDING
+                    if reverse
+                    else plc.types.Order.DESCENDING,
+                ),
+                dtype=self.dtype,
+            )
         elif self.name in self._OP_MAPPING:
             column = self.children[0].evaluate(df, context=context)
             if column.obj.type().id() != self.dtype.id():
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index ab29752b8b9..50e8dd8690f 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -675,17 +675,6 @@ def _(
             )
         elif name == "pow":
             return expr.BinOp(dtype, plc.binaryop.BinaryOperator.POW, *children)
-        elif name in "top_k":
-            (col, k) = children
-            assert isinstance(k, expr.Literal)
-            (descending,) = options
-            return expr.Slice(
-                dtype,
-                0,
-                k.value,
-                expr.Sort(dtype, (False, True, not descending), col),
-            )
-
         return expr.UnaryFunction(dtype, name, options, *children)
     raise NotImplementedError(
         f"No handler for Expr function node with {name=}"
diff --git a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
index 342545a0eec..a5fc13dc90e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
@@ -1,5 +1,5 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-cimport pylibcudf.libcudf.types as libcudf_types
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
@@ -9,73 +9,92 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
+from pylibcudf.libcudf.types cimport (
+    order,
+    null_order,
+    null_policy,
+    null_order,
+    size_type
+)
 
 
 cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] sorted_order(
         table_view source_table,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] stable_sorted_order(
         table_view source_table,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rank(
         column_view input_view,
         rank_method method,
-        libcudf_types.order column_order,
-        libcudf_types.null_policy null_handling,
-        libcudf_types.null_order null_precedence,
+        order column_order,
+        null_policy null_handling,
+        null_order null_precedence,
         bool percentage) except +libcudf_exception_handler
 
     cdef bool is_sorted(
         const table_view& table,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] segmented_sort_by_key(
         const table_view& values,
         const table_view& keys,
         const column_view& segment_offsets,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_segmented_sort_by_key(
         const table_view& values,
         const table_view& keys,
         const column_view& segment_offsets,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] sort_by_key(
         const table_view& values,
         const table_view& keys,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_sort_by_key(
         const table_view& values,
         const table_view& keys,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] sort(
         table_view source_table,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_sort(
         table_view source_table,
-        vector[libcudf_types.order] column_order,
-        vector[libcudf_types.null_order] null_precedence
+        vector[order] column_order,
+        vector[null_order] null_precedence
+    ) except +libcudf_exception_handler
+
+    cdef unique_ptr[column] top_k(
+        const column_view& col,
+        size_type k,
+        order sort_order,
+    ) except +libcudf_exception_handler
+
+    cdef unique_ptr[column] top_k_order(
+        const column_view& col,
+        size_type k,
+        order sort_order,
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/sorting.pxd b/python/pylibcudf/pylibcudf/sorting.pxd
index 8127ab21ad1..91f8354f965 100644
--- a/python/pylibcudf/pylibcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/sorting.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from pylibcudf.libcudf.aggregation cimport rank_method
@@ -60,3 +60,7 @@ cpdef Table stable_sort_by_key(
 cpdef Table sort(Table source_table, list column_order, list null_precedence)
 
 cpdef Table stable_sort(Table source_table, list column_order, list null_precedence)
+
+cpdef Column top_k(Column col, size_type k, order sort_order = *)
+
+cpdef Column top_k_order(Column col, size_type k, order sort_order = *)
diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi
index 5255d869a4d..07ad962d0ce 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyi
+++ b/python/pylibcudf/pylibcudf/sorting.pyi
@@ -62,3 +62,11 @@ def stable_sort(
     column_order: list[Order],
     null_precedence: list[NullOrder],
 ) -> Table: ...
+def top_k(
+    col: Column,
+    k: int,
+    sort_order: Order = Order.DESCENDING,
+) -> Column: ...
+def top_k_order(
+    col: Column, k: int, sort_order: Order = Order.DESCENDING
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx
index fb29ef8c571..31efc018d6d 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyx
+++ b/python/pylibcudf/pylibcudf/sorting.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -7,7 +7,7 @@ from pylibcudf.libcudf cimport sorting as cpp_sorting
 from pylibcudf.libcudf.aggregation cimport rank_method
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
-from pylibcudf.libcudf.types cimport null_order, null_policy, order
+from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type
 
 from .column cimport Column
 from .table cimport Table
@@ -393,3 +393,68 @@ cpdef Table stable_sort(Table source_table, list column_order, list null_precede
             c_null_precedence,
         )
     return Table.from_libcudf(move(c_result))
+
+
+cpdef Column top_k(Column col, size_type k, order sort_order = order.DESCENDING):
+    """
+    Computes the top-k values of a column.
+
+    For details, see :cpp:func:`top_k`.
+
+    Parameters
+    ----------
+    col : Column
+        The input column.
+    k : int
+        The number of top values to retrieve.
+    sort_order : Order, default DESCENDING
+        The desired order of the top values. If ASCENDING, the smallest `k` values
+        are returned. If DESCENDING, the largest `k` values are returned.
+
+    Returns
+    -------
+    Column
+        A column of the top ``k`` elements from the input.
+    """
+    cdef unique_ptr[column] c_result
+    with nogil:
+        c_result = cpp_sorting.top_k(
+            col.view(),
+            k,
+            sort_order,
+        )
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column top_k_order(Column col, size_type k, order sort_order = order.DESCENDING):
+    """
+    Computes the indices of the top-k values of a column.
+
+    This returns the row indices of the top-k elements.
+
+    For details, see :cpp:func:`top_k_order`.
+
+    Parameters
+    ----------
+    col : Column
+        The input column.
+    k : int
+        The number of top values to retrieve.
+    sort_order : Order, default DESCENDING
+        The desired order of the top values. If ASCENDING, the indices of the smallest
+        `k` values are returned. If DESCENDING, the indices of the largest `k` values
+        are returned.
+
+    Returns
+    -------
+    Column
+        A column of the indices of the top ``k`` elements.
+    """
+    cdef unique_ptr[column] c_result
+    with nogil:
+        c_result = cpp_sorting.top_k_order(
+            col.view(),
+            k,
+            sort_order,
+        )
+    return Column.from_libcudf(move(c_result))

From 62ab698383b83b4fe56a27063d2bdd43b8447d63 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 28 Jul 2025 10:27:30 -0700
Subject: [PATCH 015/366] Don't run serial cudf_pandas tests when testing
 multiple pandas versions (#19507)

`test_rmm_option_on_import`, which was marked as `serial` (https://github.com/rapidsai/cudf/pull/19345), appears to still flakily fail e.g. https://github.com/rapidsai/cudf/actions/runs/16531500545/job/46759205034?pr=19506.

It appears when testing `cudf_pandas` tests with different versions, we also need to add `-m "not serial"`

Additionally, changed `--numprocesses=1` to pass 0 instead so we don't incur the overhead of running these tests in a separate, single process

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19507
---
 ci/cudf_pandas_scripts/run_tests.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index e953f7be090..439645a0add 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -116,7 +116,7 @@ python -m pytest -p cudf.pandas \
 # More details: https://github.com/rapidsai/cudf/pull/16930#issuecomment-2707873968
 python -m pytest -p cudf.pandas \
     --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
-    --numprocesses=1 \
+    --numprocesses=0 \
     -k "profiler" \
     ./python/cudf/cudf_pandas_tests/
 
@@ -133,15 +133,19 @@ for version in "${versions[@]}"; do
         --numprocesses=8 \
         --dist=worksteal \
         -k "not profiler" \
+        -m "not serial" \
         --cov-config=./python/cudf/.coveragerc \
         --cov=cudf \
         --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
         --cov-report=term \
         ./python/cudf/cudf_pandas_tests/
 
+    # NOTE: We don't currently run serial tests (only 1 as of 2025-07-25)
+    # with multiple versions of pandas.
+
     python -m pytest -p cudf.pandas \
         --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
-        --numprocesses=1 \
+        --numprocesses=0 \
         -k "profiler" \
         ./python/cudf/cudf_pandas_tests/
 done

From a6de6709336ecf199170bb47404240faf0cda78f Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 28 Jul 2025 17:31:33 -0400
Subject: [PATCH 016/366] Improve readability when printing pylibcudf enums
 (#19451)

This PR makes readability better when printing public enums in pylibcudf by setting `__str__ = __repr__`

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19451
---
 docs/cudf/source/pylibcudf/developer_docs.md     | 16 ++++++++++++++++
 python/pylibcudf/pylibcudf/aggregation.pyx       |  8 ++++++++
 python/pylibcudf/pylibcudf/binaryop.pyx          |  4 +++-
 python/pylibcudf/pylibcudf/copying.pyx           |  9 ++++++++-
 python/pylibcudf/pylibcudf/datetime.pyx          |  3 +++
 python/pylibcudf/pylibcudf/expressions.pyx       |  4 ++++
 python/pylibcudf/pylibcudf/io/json.pyx           |  4 ++++
 python/pylibcudf/pylibcudf/io/types.pyx          |  6 ++++++
 python/pylibcudf/pylibcudf/labeling.pyx          |  4 +++-
 python/pylibcudf/pylibcudf/lists.pyx             |  3 +++
 python/pylibcudf/pylibcudf/reduce.pyx            |  4 +++-
 python/pylibcudf/pylibcudf/replace.pyx           |  4 +++-
 python/pylibcudf/pylibcudf/round.pyx             |  4 +++-
 python/pylibcudf/pylibcudf/stream_compaction.pyx |  4 +++-
 .../pylibcudf/pylibcudf/strings/char_types.pyx   |  4 +++-
 python/pylibcudf/pylibcudf/strings/combine.pyx   |  5 ++++-
 .../pylibcudf/pylibcudf/strings/regex_flags.pyx  |  4 +++-
 python/pylibcudf/pylibcudf/strings/side_type.pyx |  4 +++-
 python/pylibcudf/pylibcudf/strings/translate.pyx |  4 +++-
 python/pylibcudf/pylibcudf/types.pyx             | 11 +++++++++++
 python/pylibcudf/pylibcudf/unary.pyx             |  4 +++-
 21 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/docs/cudf/source/pylibcudf/developer_docs.md b/docs/cudf/source/pylibcudf/developer_docs.md
index e33c0b9834c..105dbb61a1b 100644
--- a/docs/cudf/source/pylibcudf/developer_docs.md
+++ b/docs/cudf/source/pylibcudf/developer_docs.md
@@ -225,6 +225,22 @@ from pylibcudf.libcudf.copying import \
     out_of_bounds_policy as OutOfBoundsPolicy  # no-cython-lint
 ```
 
+### Enum string representations
+
+By default, Cython's `cpdef enum class` generates a valid Python `Enum` type for
+each C++ enum. However, the default `__str__` implementation for these enums is not
+very informative. It returns the underlying value (e.g., `11` instead of `<type_id.BOOL8: 11>`).
+
+To improve developer experience, we manually set `__str__ = __repr__` for all public
+enums. This ensures that printing an enum from Python returns a meaningful name like:
+
+```python
+>>> from pylibcudf.types import TypeId
+>>> print(TypeId.INT32)
+<type_id.INT32>
+```
+
+
 ### Handling overloaded functions in libcudf
 As a C++ library, libcudf makes extensive use of function overloading.
 For example, both of the following functions exist in libcudf:
diff --git a/python/pylibcudf/pylibcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/aggregation.pyx
index 123754fd1ce..9bef36e5c06 100644
--- a/python/pylibcudf/pylibcudf/aggregation.pyx
+++ b/python/pylibcudf/pylibcudf/aggregation.pyx
@@ -917,3 +917,11 @@ cpdef bool is_valid_aggregation(DataType source, Aggregation agg):
     True if the aggregation is supported.
     """
     return cpp_is_valid_aggregation(source.c_obj, agg.kind())
+
+Kind.__str__ = Kind.__repr__
+BitwiseOp.__str__ = BitwiseOp.__repr__
+CorrelationType.__str__ = CorrelationType.__repr__
+EWMHistory.__str__ = EWMHistory.__repr__
+RankMethod.__str__ = RankMethod.__repr__
+RankPercentage.__str__ = RankPercentage.__repr__
+UdfType.__str__ = UdfType.__repr__
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx
index b7b4ecc6e83..c6827431646 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyx
+++ b/python/pylibcudf/pylibcudf/binaryop.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator import dereference
 
@@ -114,3 +114,5 @@ cpdef bool is_supported_operation(
         rhs.c_obj,
         op
     )
+
+BinaryOperator.__str__ = BinaryOperator.__repr__
diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx
index fb8b6f9890e..3b0ba0d9555 100644
--- a/python/pylibcudf/pylibcudf/copying.pyx
+++ b/python/pylibcudf/pylibcudf/copying.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from cython.operator import dereference
 
@@ -29,6 +29,8 @@ from pylibcudf.libcudf.copying import \
     mask_allocation_policy as MaskAllocationPolicy  # no-cython-lint
 from pylibcudf.libcudf.copying import \
     out_of_bounds_policy as OutOfBoundsPolicy  # no-cython-lint
+from pylibcudf.libcudf.copying import \
+    sample_with_replacement as SampleWithReplacement  # no-cython-lint
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -39,6 +41,7 @@ from .utils cimport _as_vector
 __all__ = [
     "MaskAllocationPolicy",
     "OutOfBoundsPolicy",
+    "SampleWithReplacement",
     "allocate_like",
     "boolean_mask_scatter",
     "copy_if_else",
@@ -589,3 +592,7 @@ cpdef Scalar get_element(Column input_column, size_type index):
         c_output = cpp_copying.get_element(input_column.view(), index)
 
     return Scalar.from_libcudf(move(c_output))
+
+OutOfBoundsPolicy.__str__ = OutOfBoundsPolicy.__repr__
+MaskAllocationPolicy.__str__ = MaskAllocationPolicy.__repr__
+SampleWithReplacement.__str__ = SampleWithReplacement.__repr__
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index 15aee4c3e9e..da736755848 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -299,3 +299,6 @@ cpdef Column days_in_month(Column input):
     with nogil:
         result = cpp_days_in_month(input.view())
     return Column.from_libcudf(move(result))
+
+DatetimeComponent.__str__ = DatetimeComponent.__repr__
+RoundingFrequency.__str__ = RoundingFrequency.__repr__
diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx
index 22586eee6d1..ec4db948126 100644
--- a/python/pylibcudf/pylibcudf/expressions.pyx
+++ b/python/pylibcudf/pylibcudf/expressions.pyx
@@ -480,3 +480,7 @@ def to_expression(str expr, tuple column_names):
         {name: ColumnReference(i) for i, name in enumerate(column_names)}
     )
     return visitor.visit(ast.parse(expr))
+
+
+ASTOperator.__str__ = ASTOperator.__repr__
+TableReference.__str__ = TableReference.__repr__
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index b91d011adcd..97ab5faf9a0 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -34,6 +34,8 @@ from pylibcudf.libcudf.io.types cimport (
     table_with_metadata,
 )
 
+from pylibcudf.libcudf.io.json import json_recovery_mode_t as JsonRecoveryModeType  # no-cython-lint
+
 from pylibcudf.libcudf.types cimport data_type, size_type
 from pylibcudf.libcudf.column.column cimport column, column_contents
 
@@ -1087,3 +1089,5 @@ cpdef bool is_supported_write_json(DataType type):
     For details, see :cpp:func:`is_supported_write_json`.
     """
     return cpp_is_supported_write_json(type.c_obj)
+
+JsonRecoveryModeType.__str__ = JsonRecoveryModeType.__repr__
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 659f3a948ab..af57af6e694 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -661,3 +661,9 @@ cdef class SinkInfo:
             self.c_obj = sink_info(paths)
 
     __hash__ = None
+
+ColumnEncoding.__str__ = ColumnEncoding.__repr__
+CompressionType.__str__ = CompressionType.__repr__
+DictionaryPolicy.__str__ = DictionaryPolicy.__repr__
+QuoteStyle.__str__ = QuoteStyle.__repr__
+StatisticsFreq.__str__ = StatisticsFreq.__repr__
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index cae1830f6b9..0d93463cc7e 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -53,3 +53,5 @@ cpdef Column label_bins(
         )
 
     return Column.from_libcudf(move(c_result))
+
+Inclusive.__str__ = Inclusive.__repr__
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index d7e237ac474..eee5a43f6a8 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -677,3 +677,6 @@ cpdef Column distinct(Column input, null_equality nulls_equal, nan_equality nans
             nans_equal,
         )
     return Column.from_libcudf(move(c_result))
+
+ConcatenateNullPolicy.__str__ = ConcatenateNullPolicy.__repr__
+DuplicateFindOption.__str__ = DuplicateFindOption.__repr__
diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx
index 1d6ffd9de10..1fa10dcd376 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyx
+++ b/python/pylibcudf/pylibcudf/reduce.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -102,3 +102,5 @@ cpdef tuple minmax(Column col):
         Scalar.from_libcudf(move(result.first)),
         Scalar.from_libcudf(move(result.second)),
     )
+
+ScanType.__str__ = ScanType.__repr__
diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx
index 51be2b29277..d84f814a5ee 100644
--- a/python/pylibcudf/pylibcudf/replace.pyx
+++ b/python/pylibcudf/pylibcudf/replace.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 
 from cython.operator import dereference
@@ -200,3 +200,5 @@ cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=False):
 
     if not inplace:
         return Column.from_libcudf(move(c_result))
+
+ReplacePolicy.__str__ = ReplacePolicy.__repr__
diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
index 09e5a9cc3bc..024cf47a224 100644
--- a/python/pylibcudf/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -47,3 +47,5 @@ cpdef Column round(
         )
 
     return Column.from_libcudf(move(c_result))
+
+RoundingMethod.__str__ = RoundingMethod.__repr__
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx
index 6e403ca1b07..8f308e3b29e 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyx
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -327,3 +327,5 @@ cpdef size_type distinct_count(
     return cpp_stream_compaction.distinct_count(
         source.view(), null_handling, nan_handling
     )
+
+DuplicateKeepOption.__str__ = DuplicateKeepOption.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx
index 0af4a1f9c37..119daa911f6 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyx
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -92,3 +92,5 @@ cpdef Column filter_characters_of_type(
         )
 
     return Column.from_libcudf(move(c_result))
+
+StringCharacterTypes.__str__ = StringCharacterTypes.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx
index dc1e72c799b..da78c81c0c0 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyx
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
@@ -228,3 +228,6 @@ cpdef Column join_list_elements(
     else:
         raise ValueError("separator must be a Column or a Scalar")
     return Column.from_libcudf(move(c_result))
+
+OutputIfEmptyList.__str__ = OutputIfEmptyList.__repr__
+SeparatorOnNulls.__str__ = SeparatorOnNulls.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyx b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
index 65b504e0dc7..4395c9595bb 100644
--- a/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
+++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
@@ -1,6 +1,8 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.libcudf.strings.regex_flags import \
     regex_flags as RegexFlags  # no-cython-lint
 
 __all__ = ["RegexFlags"]
+
+RegexFlags.__str__ = RegexFlags.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyx b/python/pylibcudf/pylibcudf/strings/side_type.pyx
index 87db4206a9c..1b3c7336ef0 100644
--- a/python/pylibcudf/pylibcudf/strings/side_type.pyx
+++ b/python/pylibcudf/pylibcudf/strings/side_type.pyx
@@ -1,5 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from pylibcudf.libcudf.strings.side_type import \
     side_type as SideType  # no-cython-lint
 
 __all__ = ["SideType"]
+
+SideType.__str__ = SideType.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx
index ba1e8dc5d27..9edb6a3a76f 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyx
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
@@ -117,3 +117,5 @@ cpdef Column filter_characters(
             dereference(c_replacement),
         )
     return Column.from_libcudf(move(c_result))
+
+FilterType.__str__ = FilterType.__repr__
diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx
index 5e269f73f8b..6e3eb19be1a 100644
--- a/python/pylibcudf/pylibcudf/types.pyx
+++ b/python/pylibcudf/pylibcudf/types.pyx
@@ -305,3 +305,14 @@ def _from_arrow(obj: pa.DataType) -> DataType:
 
 SIZE_TYPE = DataType(type_to_id[size_type]())
 SIZE_TYPE_ID = SIZE_TYPE.id()
+
+TypeId.__str__ = TypeId.__repr__
+NanPolicy.__str__ = NanPolicy.__repr__
+NullPolicy.__str__ = NullPolicy.__repr__
+Interpolation.__str__ = Interpolation.__repr__
+MaskState.__str__ = MaskState.__repr__
+NanEquality.__str__ = NanEquality.__repr__
+NullEquality.__str__ = NullEquality.__repr__
+NullOrder.__str__ = NullOrder.__repr__
+Order.__str__ = Order.__repr__
+Sorted.__str__ = Sorted.__repr__
diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx
index b738ab53d1b..3915ed8274a 100644
--- a/python/pylibcudf/pylibcudf/unary.pyx
+++ b/python/pylibcudf/pylibcudf/unary.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -184,3 +184,5 @@ cpdef bool is_supported_cast(DataType from_, DataType to):
     """
     with nogil:
         return cpp_unary.is_supported_cast(from_.c_obj, to.c_obj)
+
+UnaryOperator.__str__ = UnaryOperator.__repr__

From 1eba62d3e3749fdc165dcd9c8d270a54e77f3458 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 28 Jul 2025 20:08:33 -0400
Subject: [PATCH 017/366] Prefer `Column.astype` over `plc.unary.cast` in the
 fill null unary function expression (#19479)

Contributes to https://github.com/rapidsai/cudf/issues/19476 by replacing the direct call to `plc.unary.cast` with `Column.astype` in the `fill_null` unary function expression.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19479
---
 .../cudf_polars/dsl/expressions/unary.py      | 46 ++++++++-----------
 .../cudf_polars/dsl/utils/aggregations.py     |  8 ++++
 .../cudf_polars/cudf_polars/testing/plugin.py |  2 -
 .../cudf_polars/cudf_polars/utils/dtypes.py   | 10 ++--
 python/cudf_polars/tests/test_drop_nulls.py   |  7 ++-
 python/cudf_polars/tests/test_groupby.py      |  8 ++++
 6 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 9147e4a6dbe..64c4cae2378 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -9,7 +9,7 @@
 
 import pylibcudf as plc
 
-from cudf_polars.containers import Column, DataType
+from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal
 from cudf_polars.utils import dtypes
@@ -311,19 +311,20 @@ def do_evaluate(
             column = self.children[0].evaluate(df, context=context)
             if column.null_count == 0:
                 return column
-            if isinstance(self.children[1], Literal):
-                arg = plc.Scalar.from_py(
-                    self.children[1].value, self.children[1].dtype.plc
-                )
+            fill_value = self.children[1]
+            if isinstance(fill_value, Literal):
+                arg = plc.Scalar.from_py(fill_value.value, fill_value.dtype.plc)
             else:
-                evaluated = self.children[1].evaluate(df, context=context)
+                evaluated = fill_value.evaluate(df, context=context)
                 arg = evaluated.obj_scalar if evaluated.is_scalar else evaluated.obj
             if isinstance(arg, plc.Scalar) and dtypes.can_cast(
-                column.obj.type(), arg.type()
+                column.dtype.plc, arg.type()
             ):  # pragma: no cover
-                arg = plc.unary.cast(
-                    plc.Column.from_scalar(arg, 1), column.obj.type()
-                ).to_scalar()
+                arg = (
+                    Column(plc.Column.from_scalar(arg, 1), dtype=fill_value.dtype)
+                    .astype(column.dtype)
+                    .obj.to_scalar()
+                )
             return Column(plc.replace.replace_nulls(column.obj, arg), dtype=self.dtype)
         elif self.name == "as_struct":
             children = [
@@ -358,7 +359,7 @@ def do_evaluate(
             )
         elif self.name in self._OP_MAPPING:
             column = self.children[0].evaluate(df, context=context)
-            if column.obj.type().id() != self.dtype.id():
+            if column.dtype.plc.id() != self.dtype.id():
                 arg = plc.unary.cast(column.obj, self.dtype.plc)
             else:
                 arg = column.obj
@@ -369,7 +370,7 @@ def do_evaluate(
         elif self.name in UnaryFunction._supported_cum_aggs:
             column = self.children[0].evaluate(df, context=context)
             plc_col = column.obj
-            col_type = column.obj.type()
+            col_type = column.dtype.plc
             # cum_sum casts
             # Int8, UInt8, Int16, UInt16 -> Int64 for overflow prevention
             # Bool -> UInt32
@@ -380,26 +381,19 @@ def do_evaluate(
                 self.name == "cum_sum"
                 and col_type.id()
                 in {
-                    plc.types.TypeId.INT8,
-                    plc.types.TypeId.UINT8,
-                    plc.types.TypeId.INT16,
-                    plc.types.TypeId.UINT16,
+                    plc.TypeId.INT8,
+                    plc.TypeId.UINT8,
+                    plc.TypeId.INT16,
+                    plc.TypeId.UINT16,
                 }
             ) or (
                 self.name == "cum_prod"
                 and plc.traits.is_integral(col_type)
                 and plc.types.size_of(col_type) <= 4
             ):
-                plc_col = plc.unary.cast(
-                    plc_col, plc.types.DataType(plc.types.TypeId.INT64)
-                )
-            elif (
-                self.name == "cum_sum"
-                and column.obj.type().id() == plc.types.TypeId.BOOL8
-            ):
-                plc_col = plc.unary.cast(
-                    plc_col, plc.types.DataType(plc.types.TypeId.UINT32)
-                )
+                plc_col = plc.unary.cast(plc_col, plc.DataType(plc.TypeId.INT64))
+            elif self.name == "cum_sum" and column.dtype.plc.id() == plc.TypeId.BOOL8:
+                plc_col = plc.unary.cast(plc_col, plc.DataType(plc.TypeId.UINT32))
             if self.name == "cum_sum":
                 agg = plc.aggregation.sum()
             elif self.name == "cum_prod":
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index fef9ddfb428..a8280c4c3bc 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -151,6 +151,14 @@ def decompose_single_agg(
                 # agg.
                 replace_nulls(col, 0, is_top=is_top),
             )
+        elif agg.name == "mean":
+            post_agg_col: expr.Expr = expr.Col(
+                DataType(pl.Float64), name
+            )  # libcudf promotes to float64
+            if agg.dtype.plc.id() == plc.TypeId.FLOAT32:
+                # Cast back to float32 to match Polars
+                post_agg_col = expr.Cast(agg.dtype, post_agg_col)
+            return [(named_expr, True)], named_expr.reconstruct(post_agg_col)
         else:
             return [(named_expr, True)], named_expr.reconstruct(
                 expr.Col(agg.dtype, name)
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index e5aca6766e2..f1d7820c809 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -137,7 +137,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "https://github.com/rapidsai/cudf/issues/19408",
     "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
     "tests/unit/operations/test_group_by.py::test_group_by_shorthand_quantile": "libcudf quantiles are round to nearest ties to even, polars quantiles are round to nearest ties away from zero",
-    "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input11-expected11-input_dtype11-output_dtype11]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input12-expected12-input_dtype12-output_dtype12]": "Unsupported groupby-agg for a particular dtype",
@@ -162,7 +161,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception",
     "tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match",
     "tests/unit/test_cse.py::test_nested_cache_no_panic_16553": "Needs https://github.com/rapidsai/cudf/issues/18630",
-    "tests/unit/test_empty.py::test_empty_9137": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised",
     "tests/unit/test_predicates.py::test_predicate_pushdown_split_pushable": "Casting that raises not supported on GPU",
     "tests/unit/io/test_scan.py::test_async_read_21945[scan_type0]": "Debug output on stderr doesn't match",
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 350aae54238..a38652f84d9 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -21,18 +21,20 @@
 
 def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     """
-    Can we cast (via :func:`~.pylibcudf.unary.cast`) between two datatypes.
+    Determine whether a cast between two datatypes is supported by cudf-polars.
 
     Parameters
     ----------
-    from_
+    from_ : pylibcudf.DataType
         Source datatype
-    to
+
+    to : pylibcudf.DataType
         Target datatype
 
     Returns
     -------
-    True if casting is supported, False otherwise
+    bool
+        True if the cast is supported, False otherwise.
     """
     to_is_empty = to.id() == plc.TypeId.EMPTY
     from_is_empty = from_.id() == plc.TypeId.EMPTY
diff --git a/python/cudf_polars/tests/test_drop_nulls.py b/python/cudf_polars/tests/test_drop_nulls.py
index 5dfe9f66a97..46a8007a805 100644
--- a/python/cudf_polars/tests/test_drop_nulls.py
+++ b/python/cudf_polars/tests/test_drop_nulls.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -46,6 +46,11 @@ def test_fill_null(null_data, value):
     assert_gpu_result_equal(q)
 
 
+def test_fill_null_with_string():
+    q = pl.LazyFrame({"a": [None, "a"]}).select(pl.col("a").fill_null("b"))
+    assert_gpu_result_equal(q)
+
+
 @pytest.mark.parametrize(
     "strategy", ["forward", "backward", "min", "max", "mean", "zero", "one"]
 )
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index ab66700f2f6..f60a46b52d6 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -303,3 +303,11 @@ def test_groupby_null_count_raises(df: pl.LazyFrame):
 def test_groupby_unsupported_non_pointwise_boolean_function(df: pl.LazyFrame, expr):
     q = df.group_by("key1").agg(expr)
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_groupby_mean_type_promotion(df: pl.LazyFrame) -> None:
+    df = df.with_columns(pl.col("float").cast(pl.Float32))
+
+    q = df.group_by("key1").agg(pl.col("float").mean())
+
+    assert_gpu_result_equal(q, check_row_order=False)

From 6ed64e88bef407fda6dbb92212eee6759dcb1e5a Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Mon, 28 Jul 2025 20:25:15 -0700
Subject: [PATCH 018/366] Add primitive row dispatch support for semi/anti join
 and cudf::contains (#19518)

This PR is a follow-up to #19361, which was reverted due to a NaN handling bug and incorrect CG size used in explicit instantiations. This revised PR addresses those issues and retargets the work for the 25.10 release.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19518
---
 cpp/CMakeLists.txt                            |   3 +
 .../cudf/table/primitive_row_operators.cuh    |  13 +
 cpp/src/search/contains_table.cu              | 307 ++++--------------
 cpp/src/search/contains_table_impl.cu         | 126 +++++++
 cpp/src/search/contains_table_impl.cuh        | 268 +++++++++++++++
 cpp/src/search/contains_table_impl_nested.cu  |  94 ++++++
 .../search/contains_table_impl_primitive.cu   |  42 +++
 cpp/tests/search/search_test.cpp              |  27 ++
 8 files changed, 641 insertions(+), 239 deletions(-)
 create mode 100644 cpp/src/search/contains_table_impl.cu
 create mode 100644 cpp/src/search/contains_table_impl.cuh
 create mode 100644 cpp/src/search/contains_table_impl_nested.cu
 create mode 100644 cpp/src/search/contains_table_impl_primitive.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2b42e243033..653c61fcb96 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -686,6 +686,9 @@ add_library(
   src/search/contains_column.cu
   src/search/contains_scalar.cu
   src/search/contains_table.cu
+  src/search/contains_table_impl.cu
+  src/search/contains_table_impl_nested.cu
+  src/search/contains_table_impl_primitive.cu
   src/search/search_ordered.cu
   src/sort/is_sorted.cu
   src/sort/rank.cu
diff --git a/cpp/include/cudf/table/primitive_row_operators.cuh b/cpp/include/cudf/table/primitive_row_operators.cuh
index 1659101cb2b..3016422938e 100644
--- a/cpp/include/cudf/table/primitive_row_operators.cuh
+++ b/cpp/include/cudf/table/primitive_row_operators.cuh
@@ -143,6 +143,19 @@ class row_equality_comparator {
                                                           rhs_row_index);
   }
 
+  /**
+   * @brief Compares the specified rows for equality.
+   *
+   * @param lhs_index The index of the first row to compare (in the lhs table)
+   * @param rhs_index The index of the second row to compare (in the rhs table)
+   * @return Boolean indicating if both rows are equal
+   */
+  __device__ bool operator()(cudf::experimental::row::lhs_index_type lhs_index,
+                             cudf::experimental::row::rhs_index_type rhs_index) const
+  {
+    return (*this)(static_cast<size_type>(lhs_index), static_cast<size_type>(rhs_index));
+  }
+
  private:
   cudf::nullate::DYNAMIC _has_nulls;
   table_device_view _lhs;
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index 245d9471550..b5d364a2590 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -14,177 +14,25 @@
  * limitations under the License.
  */
 
-#include "join/join_common_utils.cuh"
+#include "contains_table_impl.cuh"
 
-#include <cudf/detail/cuco_helpers.hpp>
-#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/search.hpp>
-#include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/table/primitive_row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
-#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_checks.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
 #include <cuco/static_set.cuh>
-#include <cuda/functional>
-#include <thrust/iterator/counting_iterator.h>
 
-#include <type_traits>
+#include <algorithm>
 
 namespace cudf::detail {
 
-namespace {
-
-using cudf::experimental::row::lhs_index_type;
-using cudf::experimental::row::rhs_index_type;
-
-/**
- * @brief An hasher adapter wrapping both haystack hasher and needles hasher
- */
-template <typename HaystackHasher, typename NeedleHasher>
-struct hasher_adapter {
-  hasher_adapter(HaystackHasher const& haystack_hasher, NeedleHasher const& needle_hasher)
-    : _haystack_hasher{haystack_hasher}, _needle_hasher{needle_hasher}
-  {
-  }
-
-  __device__ constexpr auto operator()(lhs_index_type idx) const noexcept
-  {
-    return _needle_hasher(static_cast<size_type>(idx));
-  }
-
-  __device__ constexpr auto operator()(rhs_index_type idx) const noexcept
-  {
-    return _haystack_hasher(static_cast<size_type>(idx));
-  }
-
- private:
-  HaystackHasher const _haystack_hasher;
-  NeedleHasher const _needle_hasher;
-};
-
-/**
- * @brief An comparator adapter wrapping both self comparator and two table comparator
- */
-template <typename SelfEqual, typename TwoTableEqual>
-struct comparator_adapter {
-  comparator_adapter(SelfEqual const& self_equal, TwoTableEqual const& two_table_equal)
-    : _self_equal{self_equal}, _two_table_equal{two_table_equal}
-  {
-  }
-
-  __device__ constexpr auto operator()(rhs_index_type lhs_index,
-                                       rhs_index_type rhs_index) const noexcept
-  {
-    auto const lhs = static_cast<size_type>(lhs_index);
-    auto const rhs = static_cast<size_type>(rhs_index);
-
-    return _self_equal(lhs, rhs);
-  }
-
-  __device__ constexpr auto operator()(lhs_index_type lhs_index,
-                                       rhs_index_type rhs_index) const noexcept
-  {
-    return _two_table_equal(lhs_index, rhs_index);
-  }
-
- private:
-  SelfEqual const _self_equal;
-  TwoTableEqual const _two_table_equal;
-};
-
-/**
- * @brief Build a row bitmask for the input table.
- *
- * The output bitmask will have invalid bits corresponding to the input rows having nulls (at
- * any nested level) and vice versa.
- *
- * @param input The input table
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @return A pair of pointer to the output bitmask and the buffer containing the bitmask
- */
-std::pair<rmm::device_buffer, bitmask_type const*> build_row_bitmask(table_view const& input,
-                                                                     rmm::cuda_stream_view stream)
-{
-  auto const nullable_columns = get_nullable_columns(input);
-  CUDF_EXPECTS(nullable_columns.size() > 0,
-               "The input table has nulls thus it should have nullable columns.");
-
-  // If there are more than one nullable column, we compute `bitmask_and` of their null masks.
-  // Otherwise, we have only one nullable column and can use its null mask directly.
-  if (nullable_columns.size() > 1) {
-    auto row_bitmask =
-      cudf::detail::bitmask_and(
-        table_view{nullable_columns}, stream, cudf::get_current_device_resource_ref())
-        .first;
-    auto const row_bitmask_ptr = static_cast<bitmask_type const*>(row_bitmask.data());
-    return std::pair(std::move(row_bitmask), row_bitmask_ptr);
-  }
-
-  return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask());
-}
-
-/**
- * @brief Invokes the given `func` with desired comparators based on the specified `compare_nans`
- * parameter
- *
- * @tparam HasNested Flag indicating whether there are nested columns in haystack or needles
- * @tparam Hasher Type of device hash function
- * @tparam Func Type of the helper function doing `contains` check
- *
- * @param compare_nulls Control whether nulls should be compared as equal or not
- * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not
- * @param haystack_has_nulls Flag indicating whether haystack has nulls or not
- * @param has_any_nulls Flag indicating whether there are nested nulls is either haystack or needles
- * @param self_equal Self table comparator
- * @param two_table_equal Two table comparator
- * @param d_hasher Device hash functor
- * @param func The input functor to invoke
- */
-template <bool HasNested, typename Hasher, typename Func>
-void dispatch_nan_comparator(
-  null_equality compare_nulls,
-  nan_equality compare_nans,
-  bool haystack_has_nulls,
-  bool has_any_nulls,
-  cudf::experimental::row::equality::self_comparator self_equal,
-  cudf::experimental::row::equality::two_table_comparator two_table_equal,
-  Hasher const& d_hasher,
-  Func&& func)
-{
-  // Distinguish probing scheme CG sizes between nested and flat types for better performance
-  auto const probing_scheme = [&]() {
-    if constexpr (HasNested) {
-      return cuco::linear_probing<4, Hasher>{d_hasher};
-    } else {
-      return cuco::linear_probing<1, Hasher>{d_hasher};
-    }
-  }();
-
-  if (compare_nans == nan_equality::ALL_EQUAL) {
-    using nan_equal_comparator =
-      cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
-    auto const d_self_equal = self_equal.equal_to<HasNested>(
-      nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{});
-    auto const d_two_table_equal = two_table_equal.equal_to<HasNested>(
-      nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{});
-    func(d_self_equal, d_two_table_equal, probing_scheme);
-  } else {
-    using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator;
-    auto const d_self_equal      = self_equal.equal_to<HasNested>(
-      nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{});
-    auto const d_two_table_equal = two_table_equal.equal_to<HasNested>(
-      nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{});
-    func(d_self_equal, d_two_table_equal, probing_scheme);
-  }
-}
-
-}  // namespace
-
 rmm::device_uvector<bool> contains(table_view const& haystack,
                                    table_view const& needles,
                                    null_equality compare_nulls,
@@ -203,94 +51,75 @@ rmm::device_uvector<bool> contains(table_view const& haystack,
   auto const preprocessed_haystack =
     cudf::experimental::row::equality::preprocessed_table::create(haystack, stream);
 
-  auto const haystack_hasher   = cudf::experimental::row::hash::row_hasher(preprocessed_haystack);
-  auto const d_haystack_hasher = haystack_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls});
-  auto const needle_hasher     = cudf::experimental::row::hash::row_hasher(preprocessed_needles);
-  auto const d_needle_hasher   = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls});
-  auto const d_hasher          = hasher_adapter{d_haystack_hasher, d_needle_hasher};
-
-  auto const self_equal = cudf::experimental::row::equality::self_comparator(preprocessed_haystack);
-  auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator(
-    preprocessed_needles, preprocessed_haystack);
-
   // The output vector.
   auto contained = rmm::device_uvector<bool>(needles.num_rows(), stream, mr);
 
-  auto const haystack_iter = cudf::detail::make_counting_transform_iterator(
-    size_type{0}, cuda::proclaim_return_type<rhs_index_type>([] __device__(auto idx) {
-      return rhs_index_type{idx};
-    }));
-  auto const needles_iter = cudf::detail::make_counting_transform_iterator(
-    size_type{0}, cuda::proclaim_return_type<lhs_index_type>([] __device__(auto idx) {
-      return lhs_index_type{idx};
-    }));
-
-  auto const helper_func =
-    [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) {
-      auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};
-
-      auto set = cuco::static_set{
-        cuco::extent{compute_hash_table_size(haystack.num_rows())},
-        cuco::empty_key{rhs_index_type{-1}},
-        d_equal,
-        probing_scheme,
-        {},
-        {},
-        cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
-        stream.value()};
-
-      if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) {
-        auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream);
-        auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
-
-        // If the haystack table has nulls but they are compared unequal, don't insert them.
-        // Otherwise, it was known to cause performance issue:
-        // - https://github.com/rapidsai/cudf/pull/6943
-        // - https://github.com/rapidsai/cudf/pull/8277
-        set.insert_if_async(haystack_iter,
-                            haystack_iter + haystack.num_rows(),
-                            thrust::counting_iterator<size_type>(0),  // stencil
-                            row_is_valid{row_bitmask_ptr},
-                            stream.value());
-      } else {
-        set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value());
-      }
-
-      if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) {
-        auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream);
-        auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
-        set.contains_if_async(needles_iter,
-                              needles_iter + needles.num_rows(),
-                              thrust::counting_iterator<size_type>(0),  // stencil
-                              row_is_valid{row_bitmask_ptr},
-                              contained.begin(),
-                              stream.value());
-      } else {
-        set.contains_async(
-          needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value());
-      }
-    };
-
-  if (cudf::detail::has_nested_columns(haystack)) {
-    dispatch_nan_comparator<true>(compare_nulls,
-                                  compare_nans,
-                                  haystack_has_nulls,
-                                  has_any_nulls,
-                                  self_equal,
-                                  two_table_equal,
-                                  d_hasher,
-                                  helper_func);
+  // Only use primitive row operators for non-floating-point types since they don't handle NaN
+  // equality
+  auto const has_floating_point =
+    std::any_of(haystack.begin(), haystack.end(), [](auto const& col) {
+      return cudf::is_floating_point(col.type());
+    });
+  if (cudf::is_primitive_row_op_compatible(haystack) && !has_floating_point) {
+    auto const d_haystack_hasher =
+      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_any_nulls}, preprocessed_haystack};
+    auto const d_needle_hasher =
+      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_any_nulls}, preprocessed_needles};
+    auto const d_hasher     = hasher_adapter{d_haystack_hasher, d_needle_hasher};
+    auto const d_self_equal = cudf::row::primitive::row_equality_comparator{
+      nullate::DYNAMIC{has_any_nulls}, preprocessed_haystack, preprocessed_haystack, compare_nulls};
+    auto const d_two_table_equal = cudf::row::primitive::row_equality_comparator{
+      nullate::DYNAMIC{has_any_nulls}, preprocessed_needles, preprocessed_haystack, compare_nulls};
+    auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};
+    perform_contains(haystack,
+                     needles,
+                     haystack_has_nulls,
+                     needles_has_nulls,
+                     compare_nulls,
+                     d_equal,
+                     cuco::linear_probing<1, decltype(d_hasher)>{d_hasher},
+                     contained,
+                     stream);
   } else {
-    dispatch_nan_comparator<false>(compare_nulls,
-                                   compare_nans,
-                                   haystack_has_nulls,
-                                   has_any_nulls,
-                                   self_equal,
-                                   two_table_equal,
-                                   d_hasher,
-                                   helper_func);
+    auto const haystack_hasher   = cudf::experimental::row::hash::row_hasher(preprocessed_haystack);
+    auto const d_haystack_hasher = haystack_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls});
+    auto const needle_hasher     = cudf::experimental::row::hash::row_hasher(preprocessed_needles);
+    auto const d_needle_hasher   = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls});
+    auto const d_hasher          = hasher_adapter{d_haystack_hasher, d_needle_hasher};
+
+    auto const self_equal =
+      cudf::experimental::row::equality::self_comparator(preprocessed_haystack);
+    auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator(
+      preprocessed_needles, preprocessed_haystack);
+
+    if (cudf::detail::has_nested_columns(haystack)) {
+      dispatch_nan_comparator<true>(haystack,
+                                    needles,
+                                    compare_nulls,
+                                    compare_nans,
+                                    haystack_has_nulls,
+                                    needles_has_nulls,
+                                    has_any_nulls,
+                                    self_equal,
+                                    two_table_equal,
+                                    d_hasher,
+                                    contained,
+                                    stream);
+    } else {
+      dispatch_nan_comparator<false>(haystack,
+                                     needles,
+                                     compare_nulls,
+                                     compare_nans,
+                                     haystack_has_nulls,
+                                     needles_has_nulls,
+                                     has_any_nulls,
+                                     self_equal,
+                                     two_table_equal,
+                                     d_hasher,
+                                     contained,
+                                     stream);
+    }
   }
-
   return contained;
 }
 
diff --git a/cpp/src/search/contains_table_impl.cu b/cpp/src/search/contains_table_impl.cu
new file mode 100644
index 00000000000..dcbd4855b73
--- /dev/null
+++ b/cpp/src/search/contains_table_impl.cu
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "contains_table_impl.cuh"
+
+#include <cudf/hashing/detail/helper_functions.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <cuco/static_set.cuh>
+
+namespace cudf::detail {
+
+using cudf::experimental::row::lhs_index_type;
+using cudf::experimental::row::rhs_index_type;
+
+/**
+ * @brief Build a row bitmask for the input table.
+ *
+ * The output bitmask will have invalid bits corresponding to the input rows having nulls (at
+ * any nested level) and vice versa.
+ *
+ * @param input The input table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A pair of pointer to the output bitmask and the buffer containing the bitmask
+ */
+std::pair<rmm::device_buffer, bitmask_type const*> build_row_bitmask(table_view const& input,
+                                                                     rmm::cuda_stream_view stream)
+{
+  auto const nullable_columns = get_nullable_columns(input);
+  CUDF_EXPECTS(nullable_columns.size() > 0,
+               "The input table has nulls thus it should have nullable columns.");
+
+  // If there are more than one nullable column, we compute `bitmask_and` of their null masks.
+  // Otherwise, we have only one nullable column and can use its null mask directly.
+  if (nullable_columns.size() > 1) {
+    auto row_bitmask =
+      cudf::detail::bitmask_and(
+        table_view{nullable_columns}, stream, cudf::get_current_device_resource_ref())
+        .first;
+    auto const row_bitmask_ptr = static_cast<bitmask_type const*>(row_bitmask.data());
+    return std::pair(std::move(row_bitmask), row_bitmask_ptr);
+  }
+
+  return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask());
+}
+
+// Explicit instantiations for non-nested types (HasNested=false)
+using hasher_adapter_t = hasher_adapter<
+  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                   nullate::DYNAMIC>,
+  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                   nullate::DYNAMIC>>;
+
+template void dispatch_nan_comparator<false, hasher_adapter_t>(
+  table_view const& haystack,
+  table_view const& needles,
+  null_equality compare_nulls,
+  nan_equality compare_nans,
+  bool haystack_has_nulls,
+  bool needles_has_nulls,
+  bool has_any_nulls,
+  cudf::experimental::row::equality::self_comparator self_equal,
+  cudf::experimental::row::equality::two_table_comparator two_table_equal,
+  hasher_adapter_t const& d_hasher,
+  rmm::device_uvector<bool>& contained,
+  rmm::cuda_stream_view stream);
+
+// For HasNested=false (non-nested columns) with nan_equal_comparator
+using nan_equal_self_comparator = cudf::experimental::row::equality::device_row_comparator<
+  false,
+  cudf::nullate::DYNAMIC,
+  cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+
+using nan_equal_two_table_comparator =
+  cudf::experimental::row::equality::strong_index_comparator_adapter<nan_equal_self_comparator>;
+
+using nan_equal_comparator_adapter =
+  comparator_adapter<nan_equal_self_comparator, nan_equal_two_table_comparator>;
+
+template void perform_contains(table_view const& haystack,
+                               table_view const& needles,
+                               bool haystack_has_nulls,
+                               bool needles_has_nulls,
+                               null_equality compare_nulls,
+                               nan_equal_comparator_adapter const& d_equal,
+                               cuco::linear_probing<1, hasher_adapter_t> const& probing_scheme,
+                               rmm::device_uvector<bool>& contained,
+                               rmm::cuda_stream_view stream);
+
+// For HasNested=false (non-nested columns) with nan_unequal_comparator
+using nan_unequal_self_comparator = cudf::experimental::row::equality::device_row_comparator<
+  false,
+  cudf::nullate::DYNAMIC,
+  cudf::experimental::row::equality::physical_equality_comparator>;
+
+using nan_unequal_two_table_comparator =
+  cudf::experimental::row::equality::strong_index_comparator_adapter<nan_unequal_self_comparator>;
+
+using nan_unequal_comparator_adapter =
+  comparator_adapter<nan_unequal_self_comparator, nan_unequal_two_table_comparator>;
+
+template void perform_contains(table_view const& haystack,
+                               table_view const& needles,
+                               bool haystack_has_nulls,
+                               bool needles_has_nulls,
+                               null_equality compare_nulls,
+                               nan_unequal_comparator_adapter const& d_equal,
+                               cuco::linear_probing<1, hasher_adapter_t> const& probing_scheme,
+                               rmm::device_uvector<bool>& contained,
+                               rmm::cuda_stream_view stream);
+
+}  // namespace cudf::detail
diff --git a/cpp/src/search/contains_table_impl.cuh b/cpp/src/search/contains_table_impl.cuh
new file mode 100644
index 00000000000..dedf79fc0f7
--- /dev/null
+++ b/cpp/src/search/contains_table_impl.cuh
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "join/join_common_utils.cuh"
+
+#include <cudf/detail/cuco_helpers.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/search.hpp>
+#include <cudf/hashing/detail/helper_functions.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cuco/static_set.cuh>
+#include <thrust/iterator/counting_iterator.h>
+
+namespace cudf::detail {
+
+using cudf::experimental::row::lhs_index_type;
+using cudf::experimental::row::rhs_index_type;
+
+/**
+ * @brief An hasher adapter wrapping both haystack hasher and needles hasher
+ */
+template <typename HaystackHasher, typename NeedleHasher>
+struct hasher_adapter {
+  hasher_adapter(HaystackHasher const& haystack_hasher, NeedleHasher const& needle_hasher)
+    : _haystack_hasher{haystack_hasher}, _needle_hasher{needle_hasher}
+  {
+  }
+
+  __device__ constexpr auto operator()(lhs_index_type idx) const noexcept
+  {
+    return _needle_hasher(static_cast<size_type>(idx));
+  }
+
+  __device__ constexpr auto operator()(rhs_index_type idx) const noexcept
+  {
+    return _haystack_hasher(static_cast<size_type>(idx));
+  }
+
+ private:
+  HaystackHasher const _haystack_hasher;
+  NeedleHasher const _needle_hasher;
+};
+
+/**
+ * @brief An comparator adapter wrapping both self comparator and two table comparator
+ */
+template <typename SelfEqual, typename TwoTableEqual>
+struct comparator_adapter {
+  comparator_adapter(SelfEqual const& self_equal, TwoTableEqual const& two_table_equal)
+    : _self_equal{self_equal}, _two_table_equal{two_table_equal}
+  {
+  }
+
+  __device__ constexpr auto operator()(rhs_index_type lhs_index,
+                                       rhs_index_type rhs_index) const noexcept
+  {
+    auto const lhs = static_cast<size_type>(lhs_index);
+    auto const rhs = static_cast<size_type>(rhs_index);
+
+    return _self_equal(lhs, rhs);
+  }
+
+  __device__ constexpr auto operator()(lhs_index_type lhs_index,
+                                       rhs_index_type rhs_index) const noexcept
+  {
+    return _two_table_equal(lhs_index, rhs_index);
+  }
+
+ private:
+  SelfEqual const _self_equal;
+  TwoTableEqual const _two_table_equal;
+};
+
+/**
+ * @brief Build a row bitmask for the input table.
+ *
+ * The output bitmask will have invalid bits corresponding to the input rows having nulls (at
+ * any nested level) and vice versa.
+ *
+ * @param input The input table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A pair of pointer to the output bitmask and the buffer containing the bitmask
+ */
+std::pair<rmm::device_buffer, bitmask_type const*> build_row_bitmask(table_view const& input,
+                                                                     rmm::cuda_stream_view stream);
+
+/**
+ * @brief Helper function to perform the contains operation using a hash set
+ *
+ * @tparam Comparator Type of the equality comparator
+ * @tparam ProbingScheme Type of the probing scheme
+ *
+ * @param haystack The haystack table view
+ * @param needles The needles table view
+ * @param haystack_has_nulls Flag indicating whether haystack has nulls
+ * @param needles_has_nulls Flag indicating whether needles has nulls
+ * @param compare_nulls Control whether nulls should be compared as equal or not
+ * @param d_equal The equality comparator
+ * @param probing_scheme The probing scheme for the hash set
+ * @param contained The output vector to store results
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+template <typename Comparator, typename ProbingScheme>
+void perform_contains(table_view const& haystack,
+                      table_view const& needles,
+                      bool haystack_has_nulls,
+                      bool needles_has_nulls,
+                      null_equality compare_nulls,
+                      Comparator const& d_equal,
+                      ProbingScheme const& probing_scheme,
+                      rmm::device_uvector<bool>& contained,
+                      rmm::cuda_stream_view stream)
+{
+  auto const haystack_iter = cudf::detail::make_counting_transform_iterator(
+    size_type{0}, cuda::proclaim_return_type<rhs_index_type>([] __device__(auto idx) {
+      return rhs_index_type{idx};
+    }));
+
+  auto const needles_iter = cudf::detail::make_counting_transform_iterator(
+    size_type{0}, cuda::proclaim_return_type<lhs_index_type>([] __device__(auto idx) {
+      return lhs_index_type{idx};
+    }));
+
+  auto set = cuco::static_set{
+    cuco::extent{compute_hash_table_size(haystack.num_rows())},
+    cuco::empty_key{rhs_index_type{-1}},
+    d_equal,
+    probing_scheme,
+    {},
+    {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
+
+  if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) {
+    auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream);
+    auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
+
+    // If the haystack table has nulls but they are compared unequal, don't insert them.
+    // Otherwise, it was known to cause performance issue:
+    // - https://github.com/rapidsai/cudf/pull/6943
+    // - https://github.com/rapidsai/cudf/pull/8277
+    set.insert_if_async(haystack_iter,
+                        haystack_iter + haystack.num_rows(),
+                        thrust::counting_iterator<size_type>(0),  // stencil
+                        row_is_valid{row_bitmask_ptr},
+                        stream.value());
+  } else {
+    set.insert_async(haystack_iter, haystack_iter + haystack.num_rows(), stream.value());
+  }
+
+  if (needles_has_nulls && compare_nulls == null_equality::UNEQUAL) {
+    auto const bitmask_buffer_and_ptr = build_row_bitmask(needles, stream);
+    auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
+    set.contains_if_async(needles_iter,
+                          needles_iter + needles.num_rows(),
+                          thrust::counting_iterator<size_type>(0),  // stencil
+                          row_is_valid{row_bitmask_ptr},
+                          contained.begin(),
+                          stream.value());
+  } else {
+    set.contains_async(
+      needles_iter, needles_iter + needles.num_rows(), contained.begin(), stream.value());
+  }
+}
+
+/**
+ * @brief Invokes perform_contains with desired comparators based on the specified `compare_nans`
+ * parameter
+ *
+ * @tparam HasNested Flag indicating whether there are nested columns in haystack or needles
+ * @tparam Hasher Type of device hash function
+ *
+ * @param haystack The haystack table view
+ * @param needles The needles table view
+ * @param compare_nulls Control whether nulls should be compared as equal or not
+ * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not
+ * @param haystack_has_nulls Flag indicating whether haystack has nulls or not
+ * @param needles_has_nulls Flag indicating whether needles has nulls or not
+ * @param has_any_nulls Flag indicating whether there are nested nulls is either haystack or needles
+ * @param self_equal Self table comparator
+ * @param two_table_equal Two table comparator
+ * @param d_hasher Device hash functor
+ * @param contained The output vector to store results
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+template <bool HasNested, typename Hasher>
+void dispatch_nan_comparator(
+  table_view const& haystack,
+  table_view const& needles,
+  null_equality compare_nulls,
+  nan_equality compare_nans,
+  bool haystack_has_nulls,
+  bool needles_has_nulls,
+  bool has_any_nulls,
+  cudf::experimental::row::equality::self_comparator self_equal,
+  cudf::experimental::row::equality::two_table_comparator two_table_equal,
+  Hasher const& d_hasher,
+  rmm::device_uvector<bool>& contained,
+  rmm::cuda_stream_view stream)
+{
+  // Distinguish probing scheme CG sizes between nested and flat types for better performance
+  auto const probing_scheme = [&]() {
+    if constexpr (HasNested) {
+      return cuco::linear_probing<4, Hasher>{d_hasher};
+    } else {
+      return cuco::linear_probing<1, Hasher>{d_hasher};
+    }
+  }();
+
+  if (compare_nans == nan_equality::ALL_EQUAL) {
+    using nan_equal_comparator =
+      cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
+    auto const d_self_equal = self_equal.equal_to<HasNested>(
+      nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{});
+    auto const d_two_table_equal = two_table_equal.equal_to<HasNested>(
+      nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_equal_comparator{});
+    auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};
+    perform_contains(haystack,
+                     needles,
+                     haystack_has_nulls,
+                     needles_has_nulls,
+                     compare_nulls,
+                     d_equal,
+                     probing_scheme,
+                     contained,
+                     stream);
+  } else {
+    using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator;
+    auto const d_self_equal      = self_equal.equal_to<HasNested>(
+      nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{});
+    auto const d_two_table_equal = two_table_equal.equal_to<HasNested>(
+      nullate::DYNAMIC{has_any_nulls}, compare_nulls, nan_unequal_comparator{});
+    auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};
+    perform_contains(haystack,
+                     needles,
+                     haystack_has_nulls,
+                     needles_has_nulls,
+                     compare_nulls,
+                     d_equal,
+                     probing_scheme,
+                     contained,
+                     stream);
+  }
+}
+
+}  // namespace cudf::detail
diff --git a/cpp/src/search/contains_table_impl_nested.cu b/cpp/src/search/contains_table_impl_nested.cu
new file mode 100644
index 00000000000..e90870508af
--- /dev/null
+++ b/cpp/src/search/contains_table_impl_nested.cu
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "contains_table_impl.cuh"
+
+#include <cudf/hashing/detail/helper_functions.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/utilities/memory_resource.hpp>
+
+namespace cudf::detail {
+
+// Explicit instantiations to reduce build time
+using hasher_adapter_t = hasher_adapter<
+  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                   nullate::DYNAMIC>,
+  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                   nullate::DYNAMIC>>;
+
+template void dispatch_nan_comparator<true, hasher_adapter_t>(
+  table_view const& haystack,
+  table_view const& needles,
+  null_equality compare_nulls,
+  nan_equality compare_nans,
+  bool haystack_has_nulls,
+  bool needles_has_nulls,
+  bool has_any_nulls,
+  cudf::experimental::row::equality::self_comparator self_equal,
+  cudf::experimental::row::equality::two_table_comparator two_table_equal,
+  hasher_adapter_t const& d_hasher,
+  rmm::device_uvector<bool>& contained,
+  rmm::cuda_stream_view stream);
+
+// Explicit instantiations for perform_contains with nested types (experimental row operations)
+
+// For HasNested=true (nested columns) with nan_equal_comparator
+using nan_equal_self_comparator_nested = cudf::experimental::row::equality::device_row_comparator<
+  true,
+  cudf::nullate::DYNAMIC,
+  cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+
+using nan_equal_two_table_comparator_nested =
+  cudf::experimental::row::equality::strong_index_comparator_adapter<
+    nan_equal_self_comparator_nested>;
+
+using nan_equal_comparator_adapter_nested =
+  comparator_adapter<nan_equal_self_comparator_nested, nan_equal_two_table_comparator_nested>;
+
+template void perform_contains(table_view const& haystack,
+                               table_view const& needles,
+                               bool haystack_has_nulls,
+                               bool needles_has_nulls,
+                               null_equality compare_nulls,
+                               nan_equal_comparator_adapter_nested const& d_equal,
+                               cuco::linear_probing<4, hasher_adapter_t> const& probing_scheme,
+                               rmm::device_uvector<bool>& contained,
+                               rmm::cuda_stream_view stream);
+
+// For HasNested=true (nested columns) with nan_unequal_comparator
+using nan_unequal_self_comparator_nested = cudf::experimental::row::equality::device_row_comparator<
+  true,
+  cudf::nullate::DYNAMIC,
+  cudf::experimental::row::equality::physical_equality_comparator>;
+
+using nan_unequal_two_table_comparator_nested =
+  cudf::experimental::row::equality::strong_index_comparator_adapter<
+    nan_unequal_self_comparator_nested>;
+
+using nan_unequal_comparator_adapter_nested =
+  comparator_adapter<nan_unequal_self_comparator_nested, nan_unequal_two_table_comparator_nested>;
+
+template void perform_contains(table_view const& haystack,
+                               table_view const& needles,
+                               bool haystack_has_nulls,
+                               bool needles_has_nulls,
+                               null_equality compare_nulls,
+                               nan_unequal_comparator_adapter_nested const& d_equal,
+                               cuco::linear_probing<4, hasher_adapter_t> const& probing_scheme,
+                               rmm::device_uvector<bool>& contained,
+                               rmm::cuda_stream_view stream);
+
+}  // namespace cudf::detail
diff --git a/cpp/src/search/contains_table_impl_primitive.cu b/cpp/src/search/contains_table_impl_primitive.cu
new file mode 100644
index 00000000000..9d925321509
--- /dev/null
+++ b/cpp/src/search/contains_table_impl_primitive.cu
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "contains_table_impl.cuh"
+
+#include <cudf/table/primitive_row_operators.cuh>
+
+namespace cudf::detail {
+
+// Explicit instantiation for perform_contains with primitive row operations
+using primitive_hasher_adapter_type =
+  hasher_adapter<cudf::row::primitive::row_hasher<>, cudf::row::primitive::row_hasher<>>;
+
+using primitive_comparator_adapter_type =
+  comparator_adapter<cudf::row::primitive::row_equality_comparator,
+                     cudf::row::primitive::row_equality_comparator>;
+
+template void perform_contains(
+  table_view const& haystack,
+  table_view const& needles,
+  bool haystack_has_nulls,
+  bool needles_has_nulls,
+  null_equality compare_nulls,
+  primitive_comparator_adapter_type const& d_equal,
+  cuco::linear_probing<1, primitive_hasher_adapter_type> const& probing_scheme,
+  rmm::device_uvector<bool>& contained,
+  rmm::cuda_stream_view stream);
+
+}  // namespace cudf::detail
diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp
index ea566fcbd38..5788fd1d5d6 100644
--- a/cpp/tests/search/search_test.cpp
+++ b/cpp/tests/search/search_test.cpp
@@ -20,10 +20,15 @@
 #include <cudf_test/testing_main.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/detail/search.hpp>
 #include <cudf/search.hpp>
 
+#include <rmm/device_buffer.hpp>
+
+#include <thrust/host_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <limits>
 #include <numeric>
 
 struct SearchTest : public cudf::test::BaseFixture {};
@@ -1819,6 +1824,28 @@ TEST_F(SearchTest, multi_contains_empty_input_set_string)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
 }
 
+TEST_F(SearchTest, multi_contains_primitive_nan_unequal_bug)
+{
+  auto nan_val = std::numeric_limits<float>::quiet_NaN();
+
+  fixed_width_column_wrapper<float> haystack{1.0f, nan_val, 3.0f};
+  fixed_width_column_wrapper<float> needles{nan_val};
+
+  auto result = cudf::detail::contains(cudf::table_view{{haystack}},
+                                       cudf::table_view{{needles}},
+                                       cudf::null_equality::EQUAL,
+                                       cudf::nan_equality::UNEQUAL,
+                                       cudf::get_default_stream(),
+                                       cudf::get_current_device_resource_ref());
+
+  thrust::host_vector<bool> result_host(result.size());
+  CUDF_CUDA_TRY(cudaMemcpy(
+    result_host.data(), result.data(), result.size() * sizeof(bool), cudaMemcpyDeviceToHost));
+
+  // With nan_equality::UNEQUAL, NaN should not match NaN
+  EXPECT_FALSE(result_host[0]);
+}
+
 template <typename T>
 struct FixedPointTestAllReps : public cudf::test::BaseFixture {};
 

From 6b06d2003dd3facd4495d01c563d96a694ef6422 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 29 Jul 2025 06:11:31 -0400
Subject: [PATCH 019/366] Fix value counts expression when the column has nulls
 (#19524)

Closes #19523. This is really a one-line fix. We need to pass `null_handling=plc.types.NullPolicy.INCLUDE` to `GroupBy` in the value counts expression implementation. This ensures that we group by nulls keys too. PR also replaces `query` with `q` to match convention used in other cudf-polars tests.

Authors:
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19524
---
 .../cudf_polars/dsl/expressions/unary.py      |  6 +-
 .../tests/expressions/test_struct.py          | 70 ++++++++++---------
 2 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 64c4cae2378..440a2efa9cc 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -257,9 +257,9 @@ def do_evaluate(
                 )
                 for child in self.children
             ]
-            (keys_table, (counts_table,)) = plc.groupby.GroupBy(df.table).aggregate(
-                gb_requests
-            )
+            (keys_table, (counts_table,)) = plc.groupby.GroupBy(
+                df.table, null_handling=plc.types.NullPolicy.INCLUDE
+            ).aggregate(gb_requests)
             if sort:
                 sort_indices = plc.sorting.stable_sorted_order(
                     counts_table,
diff --git a/python/cudf_polars/tests/expressions/test_struct.py b/python/cudf_polars/tests/expressions/test_struct.py
index ea82019ecc9..2e4686b9079 100644
--- a/python/cudf_polars/tests/expressions/test_struct.py
+++ b/python/cudf_polars/tests/expressions/test_struct.py
@@ -27,8 +27,8 @@ def test_field_getitem(request, ldf):
             reason="not supported until polars 1.31",
         )
     )
-    query = ldf.select(pl.col("a").struct[0])
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").struct[0])
+    assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("fields", [("b",), ("b", "d"), ("^b.*|f.*$",)])
@@ -39,8 +39,8 @@ def test_field(request, ldf, fields):
             reason="not supported until polars 1.31",
         )
     )
-    query = ldf.select(pl.col("a").struct.field(*fields))
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").struct.field(*fields))
+    assert_gpu_result_equal(q)
 
 
 def test_unnest(request, ldf):
@@ -50,8 +50,8 @@ def test_unnest(request, ldf):
             reason="not supported until polars 1.31",
         )
     )
-    query = ldf.select(pl.col("a").struct.unnest())
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").struct.unnest())
+    assert_gpu_result_equal(q)
 
 
 def test_json_encode(request, ldf):
@@ -61,12 +61,12 @@ def test_json_encode(request, ldf):
             reason="not supported until polars 1.31",
         )
     )
-    query = ldf.select(pl.col("a").struct.json_encode())
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").struct.json_encode())
+    assert_gpu_result_equal(q)
 
     ldf_newlines = pl.LazyFrame({"a": [{"b": "c\nd", "d": "\r\nz"}]})
-    query = ldf_newlines.select(pl.col("a").struct.json_encode())
-    assert_gpu_result_equal(query)
+    q = ldf_newlines.select(pl.col("a").struct.json_encode())
+    assert_gpu_result_equal(q)
 
 
 def test_rename_fields(request, ldf):
@@ -76,17 +76,15 @@ def test_rename_fields(request, ldf):
             reason="not supported until polars 1.31",
         )
     )
-    query = ldf.select(
-        pl.col("a").struct.rename_fields(["1", "2", "3"]).struct.unnest()
-    )
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").struct.rename_fields(["1", "2", "3"]).struct.unnest())
+    assert_gpu_result_equal(q)
 
 
 def test_with_fields(ldf):
-    query = ldf.select(
+    q = ldf.select(
         pl.col("a").struct.with_fields(pl.field("b").str.len_chars()).struct.unnest()
     )
-    assert_ir_translation_raises(query, NotImplementedError)
+    assert_ir_translation_raises(q, NotImplementedError)
 
 
 @pytest.mark.parametrize(
@@ -101,47 +99,51 @@ def test_prefix_suffix_fields(request, ldf, expr):
             reason="not supported until polars 1.31",
         )
     )
-    query = ldf.select(expr("foo").struct.unnest())
-    assert_gpu_result_equal(query)
+    q = ldf.select(expr("foo").struct.unnest())
+    assert_gpu_result_equal(q)
 
 
 def test_map_field_names(ldf):
-    query = ldf.select(pl.col("a").name.map_fields(lambda x: x.upper()).struct.unnest())
-    assert_ir_translation_raises(query, NotImplementedError)
+    q = ldf.select(pl.col("a").name.map_fields(lambda x: x.upper()).struct.unnest())
+    assert_ir_translation_raises(q, NotImplementedError)
 
 
 @pytest.mark.parametrize("name", [None, "my_count"])
 @pytest.mark.parametrize("normalize", [True, False])
 def test_value_counts(ldf, name, normalize):
     # sort=True since order is non-deterministic
-    query = ldf.select(
-        pl.col("a").value_counts(sort=True, name=name, normalize=normalize)
-    )
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").value_counts(sort=True, name=name, normalize=normalize))
+    assert_gpu_result_equal(q)
 
 
 def test_value_counts_normalize_div_by_zero():
     ldf = pl.LazyFrame({"a": []}, schema={"a": pl.Int64()})
-    query = ldf.select(pl.col("a").value_counts(normalize=True))
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.col("a").value_counts(normalize=True))
+    assert_gpu_result_equal(q)
 
 
 def test_groupby_value_counts_notimplemented():
     lgb = pl.LazyFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}).group_by("a")
     value_counts_expr = pl.col("b").value_counts()
-    query = lgb.agg(value_counts_expr)
-    assert_ir_translation_raises(query, NotImplementedError)
+    q = lgb.agg(value_counts_expr)
+    assert_ir_translation_raises(q, NotImplementedError)
 
-    query = lgb.agg(value_counts_expr.first())
-    assert_ir_translation_raises(query, NotImplementedError)
+    q = lgb.agg(value_counts_expr.first())
+    assert_ir_translation_raises(q, NotImplementedError)
 
 
 def test_struct(ldf):
-    query = ldf.select(pl.struct(pl.all()))
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.struct(pl.all()))
+    assert_gpu_result_equal(q)
 
 
 def test_nested_struct():
     ldf = pl.LazyFrame({"a": [{"x": {"i": 0, "j": 0}, "y": {"i": 0, "k": 1}}]})
-    query = ldf.select(pl.struct(pl.all()))
-    assert_gpu_result_equal(query)
+    q = ldf.select(pl.struct(pl.all()))
+    assert_gpu_result_equal(q)
+
+
+def test_value_counts_with_nulls(ldf):
+    ldf_with_nulls = ldf.select(c=pl.Series(["x", None, "y", "x", None, "x"]))
+    q = ldf_with_nulls.select(pl.col("c").value_counts(sort=True))
+    assert_gpu_result_equal(q)

From 4d79b06a058a954e49d2326211790bf5e00cf6b6 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 29 Jul 2025 14:53:46 +0100
Subject: [PATCH 020/366] Fix clang-tools version pinning (#19529)

We specify this in two places, use the exact pin rather than fuzzy pin in both.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19529
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 1 -
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 1 -
 dependencies.yaml                                 | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index c536a7d662c..15af26470a4 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -12,7 +12,6 @@ dependencies:
 - c-compiler
 - cachetools
 - certifi
-- clang-tools=20.1.4
 - clang-tools==20.1.4
 - clang==20.1.4
 - cmake>=3.30.4
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index bf9e857f6bc..e606a1cd4c3 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -12,7 +12,6 @@ dependencies:
 - c-compiler
 - cachetools
 - certifi
-- clang-tools=20.1.4
 - clang-tools==20.1.4
 - clang==20.1.4
 - cmake>=3.30.4
diff --git a/dependencies.yaml b/dependencies.yaml
index dfcfb40d7a9..a03f5ac2c3f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -563,7 +563,7 @@ dependencies:
       - output_types: conda
         packages:
           - clang==20.1.4
-          - clang-tools=20.1.4
+          - clang-tools==20.1.4
   clang_tidy:
     common:
       - output_types: conda

From 67bc0734626d73b6bf950dbe2296f28c2300c578 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 29 Jul 2025 07:58:58 -0700
Subject: [PATCH 021/366] Add cudf_polars unit test for `is_in([])` expr
 (#19525)

closes https://github.com/rapidsai/cudf/issues/18853

Appears it has been fixed in the meantime

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19525
---
 python/cudf_polars/tests/expressions/test_booleanfunction.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py
index 94a6f50fcfe..200b9571c7f 100644
--- a/python/cudf_polars/tests/expressions/test_booleanfunction.py
+++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py
@@ -191,6 +191,7 @@ def test_boolean_horizontal(expr, has_nulls, wide):
             marks=pytest.mark.xfail(reason="Need to support implode agg"),
         ),
         pl.col("a").is_in([1, 2, 3]),
+        pl.col("a").is_in([]),
         pl.col("a").is_in([3, 4, 2]),
         pl.col("c").is_in([10, None, 11]),
     ],

From c6efdfc1ea36fbbb8bebf01bd8c6d6132b294408 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 29 Jul 2025 13:17:25 -0400
Subject: [PATCH 022/366] [FEA] Add chunked Parquet sink support using the
 libcudf writer (#19015)

Closes https://github.com/rapidsai/cudf/issues/18969. This PR adds support for chunked writing to cudf-polars using the libcudf writer. Chunked writing is not turned on by default in cudf-polars and is controlled via the `n_output_chunks` in the GPU engine config.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19015
---
 python/cudf_polars/cudf_polars/dsl/ir.py      | 124 ++++++++++++++----
 .../cudf_polars/cudf_polars/dsl/translate.py  |   1 +
 .../cudf_polars/experimental/io.py            |  24 +---
 .../cudf_polars/testing/asserts.py            |   6 +-
 .../cudf_polars/cudf_polars/utils/config.py   |  15 ++-
 python/cudf_polars/tests/test_config.py       |   7 +-
 python/cudf_polars/tests/test_sink.py         |  21 ++-
 python/pylibcudf/pylibcudf/io/parquet.pxd     |   4 +
 python/pylibcudf/pylibcudf/io/parquet.pyi     |   2 +
 python/pylibcudf/pylibcudf/io/parquet.pyx     |  44 ++++++-
 10 files changed, 195 insertions(+), 53 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 16fef477ab2..28b20c835a0 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -668,18 +668,33 @@ def read_csv_header(
 class Sink(IR):
     """Sink a dataframe to a file."""
 
-    __slots__ = ("cloud_options", "kind", "options", "path")
-    _non_child = ("schema", "kind", "path", "options", "cloud_options")
+    __slots__ = ("cloud_options", "kind", "options", "parquet_options", "path")
+    _non_child = (
+        "schema",
+        "kind",
+        "path",
+        "parquet_options",
+        "options",
+        "cloud_options",
+    )
 
     kind: str
+    """The type of file to write to. Eg. Parquet, CSV, etc."""
     path: str
+    """The path to write to"""
+    parquet_options: ParquetOptions
+    """GPU-specific configuration options"""
+    cloud_options: dict[str, Any] | None
+    """Cloud-related authentication options, currently ignored."""
     options: dict[str, Any]
+    """Sink options from Polars"""
 
     def __init__(
         self,
         schema: Schema,
         kind: str,
         path: str,
+        parquet_options: ParquetOptions,
         options: dict[str, Any],
         cloud_options: dict[str, Any],
         df: IR,
@@ -687,10 +702,11 @@ def __init__(
         self.schema = schema
         self.kind = kind
         self.path = path
+        self.parquet_options = parquet_options
         self.options = options
         self.cloud_options = cloud_options
         self.children = (df,)
-        self._non_child_args = (schema, kind, path, options)
+        self._non_child_args = (schema, kind, path, parquet_options, options)
         if self.cloud_options is not None and any(
             self.cloud_options.get(k) is not None
             for k in ("config", "credential_provider")
@@ -783,6 +799,7 @@ def get_hashable(self) -> Hashable:
             schema_hash,
             self.kind,
             self.path,
+            self.parquet_options,
             json.dumps(self.options),
             json.dumps(self.cloud_options),
         )  # pragma: no cover
@@ -821,6 +838,85 @@ def _write_json(cls, target: plc.io.SinkInfo, df: DataFrame) -> None:
         )
         plc.io.json.write_json(options)
 
+    @staticmethod
+    def _make_parquet_metadata(df: DataFrame) -> plc.io.types.TableInputMetadata:
+        """Create TableInputMetadata and set column names."""
+        metadata = plc.io.types.TableInputMetadata(df.table)
+        for i, name in enumerate(df.column_names):
+            metadata.column_metadata[i].set_name(name)
+        return metadata
+
+    @staticmethod
+    def _apply_parquet_writer_options(
+        builder: plc.io.parquet.ChunkedParquetWriterOptionsBuilder
+        | plc.io.parquet.ParquetWriterOptionsBuilder,
+        options: dict[str, Any],
+    ) -> (
+        plc.io.parquet.ChunkedParquetWriterOptionsBuilder
+        | plc.io.parquet.ParquetWriterOptionsBuilder
+    ):
+        """Apply writer options to the builder."""
+        compression = options.get("compression")
+        if compression and compression != "Uncompressed":
+            compression_type = getattr(
+                plc.io.types.CompressionType, compression.upper()
+            )
+            builder = builder.compression(compression_type)
+
+        if (data_page_size := options.get("data_page_size")) is not None:
+            builder = builder.max_page_size_bytes(data_page_size)
+
+        if (row_group_size := options.get("row_group_size")) is not None:
+            builder = builder.row_group_size_rows(row_group_size)
+
+        return builder
+
+    @classmethod
+    def _write_parquet(
+        cls,
+        target: plc.io.SinkInfo,
+        parquet_options: ParquetOptions,
+        options: dict[str, Any],
+        df: DataFrame,
+    ) -> None:
+        metadata: plc.io.types.TableInputMetadata = cls._make_parquet_metadata(df)
+
+        builder: (
+            plc.io.parquet.ChunkedParquetWriterOptionsBuilder
+            | plc.io.parquet.ParquetWriterOptionsBuilder
+        )
+
+        if (
+            parquet_options.chunked
+            and parquet_options.n_output_chunks != 1
+            and df.table.num_rows() != 0
+        ):
+            builder = plc.io.parquet.ChunkedParquetWriterOptions.builder(
+                target
+            ).metadata(metadata)
+            builder = cls._apply_parquet_writer_options(builder, options)
+            writer_options = builder.build()
+            writer = plc.io.parquet.ChunkedParquetWriter.from_options(writer_options)
+
+            # TODO: Can be based on a heuristic that estimates chunk size
+            # from the input table size and available GPU memory.
+            num_chunks = parquet_options.n_output_chunks
+            table_chunks = plc.copying.split(
+                df.table,
+                [i * df.table.num_rows() // num_chunks for i in range(1, num_chunks)],
+            )
+            for chunk in table_chunks:
+                writer.write(chunk)
+            writer.close([])
+
+        else:
+            builder = plc.io.parquet.ParquetWriterOptions.builder(
+                target, df.table
+            ).metadata(metadata)
+            builder = cls._apply_parquet_writer_options(builder, options)
+            writer_options = builder.build()
+            plc.io.parquet.write_parquet(writer_options)
+
     @classmethod
     @nvtx_annotate_cudf_polars(message="Sink")
     def do_evaluate(
@@ -828,6 +924,7 @@ def do_evaluate(
         schema: Schema,
         kind: str,
         path: str,
+        parquet_options: ParquetOptions,
         options: dict[str, Any],
         df: DataFrame,
     ) -> DataFrame:
@@ -838,27 +935,8 @@ def do_evaluate(
             Path(path).parent.mkdir(parents=True, exist_ok=True)
         if kind == "Csv":
             cls._write_csv(target, options, df)
-
         elif kind == "Parquet":
-            metadata = plc.io.types.TableInputMetadata(df.table)
-            for i, name in enumerate(df.column_names):
-                metadata.column_metadata[i].set_name(name)
-
-            builder = plc.io.parquet.ParquetWriterOptions.builder(target, df.table)
-            compression = options["compression"]
-            if compression != "Uncompressed":
-                builder.compression(
-                    getattr(plc.io.types.CompressionType, compression.upper())
-                )
-
-            writer_options = builder.metadata(metadata).build()
-            if options["data_page_size"] is not None:
-                writer_options.set_max_page_size_bytes(options["data_page_size"])
-            if options["row_group_size"] is not None:
-                writer_options.set_row_group_size_rows(options["row_group_size"])
-
-            plc.io.parquet.write_parquet(writer_options)
-
+            cls._write_parquet(target, parquet_options, options, df)
         elif kind == "Json":
             cls._write_json(target, df)
 
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 50e8dd8690f..6db8085fab4 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -523,6 +523,7 @@ def _(node: pl_ir.Sink, translator: Translator, schema: Schema) -> ir.IR:
         schema=schema,
         kind=sink_kind,
         path=file["target"],
+        parquet_options=translator.config_options.parquet_options,
         options=options,
         cloud_options=cloud_options,
         df=translator.translate_ir(n=node.input),
diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index 92d0e5b9cf1..e45b6aae470 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -416,12 +416,13 @@ def _sink_to_directory(
     schema: Schema,
     kind: str,
     path: str,
+    parquet_options: ParquetOptions,
     options: dict[str, Any],
     df: DataFrame,
     ready: None,
 ) -> DataFrame:
     """Sink a partition to a new file."""
-    return Sink.do_evaluate(schema, kind, path, options, df)
+    return Sink.do_evaluate(schema, kind, path, parquet_options, options, df)
 
 
 def _sink_to_parquet_file(
@@ -434,23 +435,11 @@ def _sink_to_parquet_file(
     """Sink a partition to an open Parquet file."""
     # Set up a new chunked Parquet writer if necessary.
     if writer is None:
-        metadata = plc.io.types.TableInputMetadata(df.table)
-        for i, name in enumerate(df.column_names):
-            metadata.column_metadata[i].set_name(name)
-
+        metadata = Sink._make_parquet_metadata(df)
         sink = plc.io.types.SinkInfo([path])
-        builder = plc.io.parquet.ChunkedParquetWriterOptions.builder(sink)
-        compression = options["compression"]
-        if compression != "Uncompressed":
-            builder.compression(
-                getattr(plc.io.types.CompressionType, compression.upper())
-            )
-
-        if options["data_page_size"] is not None:
-            builder.max_page_size_bytes(options["data_page_size"])
-        if options["row_group_size"] is not None:
-            builder.row_group_size_rows(options["row_group_size"])
-
+        builder = Sink._apply_parquet_writer_options(
+            plc.io.parquet.ChunkedParquetWriterOptions.builder(sink), options
+        )
         writer_options = builder.metadata(metadata).build()
         writer = plc.io.parquet.ChunkedParquetWriter.from_options(writer_options)
 
@@ -565,6 +554,7 @@ def _directory_sink_graph(
             sink.schema,
             sink.kind,
             f"{sink.path}/part.{str(i).zfill(width)}.{suffix}",
+            sink.parquet_options,
             sink.options,
             (child_name, i),
             setup_name,
diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py
index 19b4f2e16b9..256e0a6267f 100644
--- a/python/cudf_polars/cudf_polars/testing/asserts.py
+++ b/python/cudf_polars/cudf_polars/testing/asserts.py
@@ -13,7 +13,7 @@
 from polars.testing.asserts import assert_frame_equal
 
 from cudf_polars.dsl.translate import Translator
-from cudf_polars.utils.config import StreamingFallbackMode
+from cudf_polars.utils.config import ConfigOptions, StreamingFallbackMode
 
 if TYPE_CHECKING:
     from cudf_polars.typing import OptimizationArgs
@@ -381,9 +381,9 @@ def assert_sink_result_equal(
     # the multi-partition executor might produce multiple files, one per partition.
     if (
         isinstance(engine, GPUEngine)
-        and engine.config["executor"] == "streaming"
+        and ConfigOptions.from_polars_engine(engine).executor.name == "streaming"
         and gpu_path.is_dir()
-    ):
+    ):  # pragma: no cover
         result = read_fn(gpu_path.joinpath("*"), **read_kwargs)
     else:
         result = read_fn(gpu_path, **read_kwargs)
diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py
index 0aa97fcd650..578dfd0694e 100644
--- a/python/cudf_polars/cudf_polars/utils/config.py
+++ b/python/cudf_polars/cudf_polars/utils/config.py
@@ -176,9 +176,11 @@ class ParquetOptions:
     Parameters
     ----------
     chunked
-        Whether to use libcudf's ``ChunkedParquetReader`` to read the parquet
-        dataset in chunks. This is useful when reading very large parquet
-        files.
+        Whether to use libcudf's ``ChunkedParquetReader`` or ``ChunkedParquetWriter``
+        to read/write the parquet dataset in chunks. This is useful when reading/writing
+        very large parquet files.
+    n_output_chunks
+        Split the dataframe in ``n_output_chunks`` when using libcudf's ``ChunkedParquetWriter``.
     chunk_read_limit
         Limit on total number of bytes to be returned per read, or 0 if
         there is no limit.
@@ -206,6 +208,11 @@ class ParquetOptions:
             f"{_env_prefix}__CHUNKED", _bool_converter, default=True
         )
     )
+    n_output_chunks: int = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__N_OUTPUT_CHUNKS", int, default=1
+        )
+    )
     chunk_read_limit: int = dataclasses.field(
         default_factory=_make_default_factory(
             f"{_env_prefix}__CHUNK_READ_LIMIT", int, default=0
@@ -230,6 +237,8 @@ class ParquetOptions:
     def __post_init__(self) -> None:  # noqa: D105
         if not isinstance(self.chunked, bool):
             raise TypeError("chunked must be a bool")
+        if not isinstance(self.n_output_chunks, int):
+            raise TypeError("n_output_chunks must be an int")
         if not isinstance(self.chunk_read_limit, int):
             raise TypeError("chunk_read_limit must be an int")
         if not isinstance(self.pass_read_limit, int):
diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py
index c7ef911f239..52651fbe5c8 100644
--- a/python/cudf_polars/tests/test_config.py
+++ b/python/cudf_polars/tests/test_config.py
@@ -134,14 +134,16 @@ def test_parquet_options(executor: str) -> None:
         )
     )
     assert config.parquet_options.chunked is True
+    assert config.parquet_options.n_output_chunks == 1
 
     config = ConfigOptions.from_polars_engine(
         pl.GPUEngine(
             executor=executor,
-            parquet_options={"chunked": False},
+            parquet_options={"chunked": False, "n_output_chunks": 16},
         )
     )
     assert config.parquet_options.chunked is False
+    assert config.parquet_options.n_output_chunks == 16
 
 
 def test_validate_streaming_executor_shuffle_method(rapidsmpf_available) -> None:
@@ -299,6 +301,7 @@ def test_executor_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
 def test_parquet_options_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
     with monkeypatch.context() as m:
         m.setenv("CUDF_POLARS__PARQUET_OPTIONS__CHUNKED", "0")
+        m.setenv("CUDF_POLARS__PARQUET_OPTIONS__N_OUTPUT_CHUNKS", "2")
         m.setenv("CUDF_POLARS__PARQUET_OPTIONS__CHUNK_READ_LIMIT", "100")
         m.setenv("CUDF_POLARS__PARQUET_OPTIONS__PASS_READ_LIMIT", "200")
         m.setenv("CUDF_POLARS__PARQUET_OPTIONS__MAX_FOOTER_SAMPLES", "0")
@@ -308,6 +311,7 @@ def test_parquet_options_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
         engine = pl.GPUEngine()
         config = ConfigOptions.from_polars_engine(engine)
         assert config.parquet_options.chunked is False
+        assert config.parquet_options.n_output_chunks == 2
         assert config.parquet_options.chunk_read_limit == 100
         assert config.parquet_options.pass_read_limit == 200
         assert config.parquet_options.max_footer_samples == 0
@@ -401,6 +405,7 @@ def test_cardinality_factor_compat() -> None:
     "option",
     [
         "chunked",
+        "n_output_chunks",
         "chunk_read_limit",
         "pass_read_limit",
         "max_footer_samples",
diff --git a/python/cudf_polars/tests/test_sink.py b/python/cudf_polars/tests/test_sink.py
index a21adbda833..81267ea2ade 100644
--- a/python/cudf_polars/tests/test_sink.py
+++ b/python/cudf_polars/tests/test_sink.py
@@ -78,7 +78,11 @@ def test_sink_ndjson(df, tmp_path):
 @pytest.mark.parametrize("mkdir", [True, False])
 @pytest.mark.parametrize("data_page_size", [None, 256_000])
 @pytest.mark.parametrize("row_group_size", [None, 1_000])
-def test_sink_parquet(df, tmp_path, mkdir, data_page_size, row_group_size):
+@pytest.mark.parametrize("is_chunked", [False, True])
+@pytest.mark.parametrize("n_output_chunks", [1, 4, 8])
+def test_sink_parquet(
+    df, tmp_path, mkdir, data_page_size, row_group_size, is_chunked, n_output_chunks
+):
     assert_sink_result_equal(
         df,
         tmp_path / "out.parquet",
@@ -87,6 +91,10 @@ def test_sink_parquet(df, tmp_path, mkdir, data_page_size, row_group_size):
             "data_page_size": data_page_size,
             "row_group_size": row_group_size,
         },
+        engine=pl.GPUEngine(
+            raise_on_fail=True,
+            parquet_options={"chunked": is_chunked, "n_output_chunks": n_output_chunks},
+        ),
     )
 
 
@@ -131,3 +139,14 @@ def test_sink_csv_nested_data(tmp_path):
         pl.exceptions.ComputeError, match="CSV format does not support nested data"
     ):
         lf.sink_csv(path, engine=pl.GPUEngine())
+
+
+def test_chunked_sink_empty_table_to_parquet(tmp_path):
+    assert_sink_result_equal(
+        pl.LazyFrame(),
+        tmp_path / "out.parquet",
+        engine=pl.GPUEngine(
+            raise_on_fail=True,
+            parquet_options={"chunked": True, "n_output_chunks": 2},
+        ),
+    )
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd
index b9710689ac6..2a925b23f6e 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pxd
+++ b/python/pylibcudf/pylibcudf/io/parquet.pxd
@@ -152,6 +152,10 @@ cdef class ParquetWriterOptionsBuilder:
 
     cpdef ParquetWriterOptionsBuilder write_arrow_schema(self, bool enabled)
 
+    cpdef ParquetWriterOptionsBuilder row_group_size_rows(self, size_type val)
+
+    cpdef ParquetWriterOptionsBuilder max_page_size_bytes(self, size_t val)
+
     cpdef ParquetWriterOptions build(self)
 
 cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = *)
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi
index 1543ae875a7..ab7e82ce1c7 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyi
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyi
@@ -80,6 +80,8 @@ class ParquetWriterOptionsBuilder:
     def dictionary_policy(self, val: DictionaryPolicy) -> Self: ...
     def utc_timestamps(self, enabled: bool) -> Self: ...
     def write_arrow_schema(self, enabled: bool) -> Self: ...
+    def row_group_size_rows(self, val: int) -> Self: ...
+    def max_page_size_bytes(self, val: int) -> Self: ...
     def build(self) -> ParquetWriterOptions: ...
 
 def write_parquet(
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
index 1774370523c..42803d8d8fb 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -45,15 +45,17 @@ from pylibcudf.utils cimport _get_stream
 
 __all__ = [
     "ChunkedParquetReader",
+    "ChunkedParquetWriterOptions",
+    "ChunkedParquetWriterOptionsBuilder",
+    "ParquetChunkedWriter",
+    "ParquetReaderOptions",
+    "ParquetReaderOptionsBuilder",
     "ParquetWriterOptions",
     "ParquetWriterOptionsBuilder",
+    "is_supported_write_parquet",
+    "merge_row_group_metadata",
     "read_parquet",
     "write_parquet",
-    "ParquetReaderOptions",
-    "ParquetReaderOptionsBuilder",
-    "ChunkedParquetWriterOptions",
-    "ChunkedParquetWriterOptionsBuilder"
-    "merge_row_group_metadata",
 ]
 
 
@@ -917,6 +919,38 @@ cdef class ParquetWriterOptionsBuilder:
         self.c_obj.write_arrow_schema(enabled)
         return self
 
+    cpdef ParquetWriterOptionsBuilder row_group_size_rows(self, size_type val):
+        """
+        Sets the maximum row group size, in rows.
+
+        Parameters
+        ----------
+        val : size_type
+            Maximum row group size, in rows to set
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.row_group_size_rows(val)
+        return self
+
+    cpdef ParquetWriterOptionsBuilder max_page_size_bytes(self, size_t val):
+        """
+        Sets the maximum uncompressed page size, in bytes.
+
+        Parameters
+        ----------
+        val : size_t
+            Maximum uncompressed page size, in bytes to set
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.max_page_size_bytes(val)
+        return self
+
     cpdef ParquetWriterOptions build(self):
         """
         Create a ParquetWriterOptions from the set options.

From ca872a38539292b73130d4d3432da5b40842d9ad Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 29 Jul 2025 12:26:04 -0500
Subject: [PATCH 023/366] Add missing import of pyarrow.parquet when reading
 specified row_groups. (#19509)

Closes #19508.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19509
---
 python/cudf/cudf/io/parquet.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 352e0e6bfe8..5e14065c08f 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -16,7 +16,6 @@
 
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 
 import pylibcudf as plc
 
@@ -2291,8 +2290,10 @@ def _process_metadata(
                 range_index_meta = index_col[0]
 
             if row_groups is not None:
+                import pyarrow.parquet as pq
+
                 per_file_metadata = [
-                    pa.parquet.read_metadata(
+                    pq.read_metadata(
                         # Pyarrow cannot read directly from bytes
                         io.BytesIO(s) if isinstance(s, bytes) else s
                     )

From 67dae5d252e37f689a8a4e9506ec25db39d3b410 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 29 Jul 2025 14:27:21 -0400
Subject: [PATCH 024/366] Expose `nvtext::letter_type` to python (#19520)

This PR exposes `nvtext::letter_type` enum and improves readability when printing it. See also #19451

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/19520
---
 .../pylibcudf/libcudf/CMakeLists.txt          |  2 ++
 .../pylibcudf/libcudf/nvtext/CMakeLists.txt   | 23 +++++++++++++++++++
 .../pylibcudf/libcudf/nvtext/stemmer.pyx      |  0
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyx |  8 +++++--
 4 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/nvtext/CMakeLists.txt
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pyx

diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
index 0182cf89ca5..7a6f3d0f7da 100644
--- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
@@ -23,6 +23,8 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp
 )
+
+add_subdirectory(nvtext)
 add_subdirectory(io)
 add_subdirectory(lists)
 add_subdirectory(strings)
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/nvtext/CMakeLists.txt
new file mode 100644
index 00000000000..ac447646c4d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/CMakeLists.txt
@@ -0,0 +1,23 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources stemmer.pyx)
+
+set(linked_libraries cudf::cudf)
+
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp_nvtext_
+)
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
index c9e4f1274e4..a64e414b850 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -12,7 +12,9 @@ from pylibcudf.libcudf.nvtext.stemmer cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
-__all__ = ["is_letter", "porter_stemmer_measure"]
+from pylibcudf.libcudf.nvtext.stemmer import letter_type as LetterType # no-cython-lint
+
+__all__ = ["is_letter", "porter_stemmer_measure", "LetterType"]
 
 cpdef Column is_letter(
     Column input,
@@ -75,3 +77,5 @@ cpdef Column porter_stemmer_measure(Column input):
         c_result = cpp_porter_stemmer_measure(input.view())
 
     return Column.from_libcudf(move(c_result))
+
+LetterType.__str__ = LetterType.__repr__

From 595c27ff4c66d60f7d6b8d9e881b3df5da6483c9 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 30 Jul 2025 09:42:11 -0700
Subject: [PATCH 025/366] Make nvCOMP ZLIB (de)compression available by default
 (#19528)

closes https://github.com/rapidsai/cudf/issues/19489

nvCOMP ZLIB (de)compression has now been well-tested. Marking this as stable, which means that it's available by default (without the env var).

Also expanded test coverage for the ZLIB decompression/compression and updated the user guide.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19528
---
 cpp/src/io/comp/nvcomp_adapter.cpp      | 12 +++-------
 cpp/tests/io/comp/comp_test.cpp         | 10 +++++----
 cpp/tests/io/orc_chunked_reader_test.cu |  1 +
 cpp/tests/io/orc_test.cpp               |  2 ++
 docs/cudf/source/user_guide/io/io.md    | 30 ++++++++++++-------------
 5 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index 50f486d2f12..2887072fce6 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -522,13 +522,7 @@ std::optional<std::string> is_compression_disabled_impl(compression_type compres
                                                         feature_status_parameters params)
 {
   switch (compression) {
-    case compression_type::DEFLATE: {
-      if (not params.are_all_integrations_enabled) {
-        return "DEFLATE compression is experimental, you can enable it through "
-               "`LIBCUDF_NVCOMP_POLICY` environment variable.";
-      }
-      return std::nullopt;
-    }
+    case compression_type::DEFLATE:
     case compression_type::LZ4:
     case compression_type::SNAPPY:
     case compression_type::ZSTD:
@@ -544,14 +538,14 @@ std::optional<std::string> is_decompression_disabled_impl(compression_type compr
                                                           feature_status_parameters params)
 {
   switch (compression) {
-    case compression_type::DEFLATE:
     case compression_type::GZIP: {
       if (not params.are_all_integrations_enabled) {
-        return "DEFLATE decompression is experimental, you can enable it through "
+        return "GZIP decompression is experimental, you can enable it through "
                "`LIBCUDF_NVCOMP_POLICY` environment variable.";
       }
       return std::nullopt;
     }
+    case compression_type::DEFLATE:
     case compression_type::LZ4:
     case compression_type::SNAPPY:
     case compression_type::ZSTD: {
diff --git a/cpp/tests/io/comp/comp_test.cpp b/cpp/tests/io/comp/comp_test.cpp
index 3dd1bcf1b56..70b6e78364b 100644
--- a/cpp/tests/io/comp/comp_test.cpp
+++ b/cpp/tests/io/comp/comp_test.cpp
@@ -340,8 +340,9 @@ TEST_F(NvcompConfigTest, Compression)
   auto const& comp_disabled = nvcomp::is_compression_disabled;
 
   EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {true, true}));
-  // all integrations enabled required
-  EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {false, true}));
+  EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {false, true}));
+  // stable integrations enabled required
+  EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {false, false}));
 
   EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {true, true}));
   EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {false, true}));
@@ -360,8 +361,9 @@ TEST_F(NvcompConfigTest, Decompression)
   auto const& decomp_disabled = nvcomp::is_decompression_disabled;
 
   EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {true, true}));
-  // all integrations enabled required
-  EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {false, true}));
+  EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {false, true}));
+  // stable integrations enabled required
+  EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {false, false}));
 
   EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {true, true}));
   EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {false, true}));
diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu
index d8d1d48551f..a0deb6c80e9 100644
--- a/cpp/tests/io/orc_chunked_reader_test.cu
+++ b/cpp/tests/io/orc_chunked_reader_test.cu
@@ -1536,6 +1536,7 @@ INSTANTIATE_TEST_CASE_P(Nvcomp,
                                            ::testing::Values(cudf::io::compression_type::AUTO,
                                                              cudf::io::compression_type::SNAPPY,
                                                              cudf::io::compression_type::LZ4,
+                                                             cudf::io::compression_type::ZLIB,
                                                              cudf::io::compression_type::ZSTD)));
 
 INSTANTIATE_TEST_CASE_P(DeviceInternal,
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index dbec125d22a..b0b83f7f419 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -2311,6 +2311,7 @@ INSTANTIATE_TEST_CASE_P(Nvcomp,
                                            ::testing::Values(cudf::io::compression_type::AUTO,
                                                              cudf::io::compression_type::SNAPPY,
                                                              cudf::io::compression_type::LZ4,
+                                                             cudf::io::compression_type::ZLIB,
                                                              cudf::io::compression_type::ZSTD)));
 
 INSTANTIATE_TEST_CASE_P(DeviceInternal,
@@ -2332,6 +2333,7 @@ INSTANTIATE_TEST_CASE_P(Nvcomp,
                                            ::testing::Values(cudf::io::compression_type::AUTO,
                                                              cudf::io::compression_type::SNAPPY,
                                                              cudf::io::compression_type::LZ4,
+                                                             cudf::io::compression_type::ZLIB,
                                                              cudf::io::compression_type::ZSTD)));
 
 INSTANTIATE_TEST_CASE_P(DeviceInternal,
diff --git a/docs/cudf/source/user_guide/io/io.md b/docs/cudf/source/user_guide/io/io.md
index 600a97d6e16..b38c563b87e 100644
--- a/docs/cudf/source/user_guide/io/io.md
+++ b/docs/cudf/source/user_guide/io/io.md
@@ -153,21 +153,21 @@ If no value is set, behavior will be the same as the "STABLE" option.
 .. table:: Current policy for nvCOMP use for different types
     :widths: 20 20 20 20 20 20 20 20 20 20
 
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    |                       |       CSV       |            Parquet          |       JSON       |             ORC             |  AVRO  |
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    | Compression Type      | Writer | Reader |    Writer    |    Reader    | Writer¹ | Reader |    Writer    |    Reader    | Reader |
-    +=======================+========+========+==============+==============+=========+========+==============+==============+========+
-    | Snappy                | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌     |
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    | ZSTD                  | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌     |
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    | DEFLATE               | ❌     | ❌     | ❌           | ❌           | ❌      | ❌     | Experimental | Experimental | ❌     |
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    | LZ4                   | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌     |
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    | GZIP                  | ❌     | ❌     | Experimental | Experimental | ❌      | ❌     | ❌           | ❌           | ❌     |
-    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
+    |                       |       CSV       |            Parquet          |       JSON       |             ORC             |     AVRO     |
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
+    | Compression Type      | Writer | Reader |    Writer    |    Reader    | Writer¹ | Reader |    Writer    |    Reader    |    Reader    |
+    +=======================+========+========+==============+==============+=========+========+==============+==============+==============+
+    | Snappy                | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | Stable       |
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
+    | ZSTD                  | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌           |
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
+    | DEFLATE               | ❌     | ❌     | ❌           | ❌           | ❌      | ❌     | Stable       | Stable       | ❌           |
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
+    | LZ4                   | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌           |
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
+    | GZIP                  | ❌     | ❌     | ❌           | Experimental | ❌      | ❌     | ❌           | ❌           | ❌           |
+    +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------------+
 
 ```
 

From 34e01b2ee68c65f851e5abe509b14d1c44302fa9 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 30 Jul 2025 11:39:41 -0700
Subject: [PATCH 026/366] Add nvtx ranges and minor fix for `lists` types in
 the next-gen parquet reader (#19493)

Contributes to #19469

This PR introduces fixes to multiple minor bugs and edge cases in the next-gen parquet reader. These bugs were discovered while working on a new example called `hybrid_scan` in PR #19469.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19493
---
 .../experimental/hybrid_scan_helpers.cpp      |  19 ++-
 .../parquet/experimental/hybrid_scan_impl.cpp |  71 +++++++++---
 .../parquet/experimental/hybrid_scan_impl.hpp |   4 +-
 .../experimental/hybrid_scan_preprocess.cu    |   2 +
 .../parquet/experimental/page_index_filter.cu | 109 ++++++++----------
 .../io/experimental/hybrid_scan_common.cpp    |   2 +-
 .../io/experimental/hybrid_scan_test.cpp      |  75 ++++++++++++
 cpp/tests/io/parquet_common.cpp               |   6 +-
 8 files changed, 198 insertions(+), 90 deletions(-)

diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
index ccf1e60823b..3932e9bc15c 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
@@ -90,10 +90,13 @@ aggregate_reader_metadata::aggregate_reader_metadata(cudf::host_span<uint8_t con
   num_rows          = calc_num_rows();
   num_row_groups    = calc_num_row_groups();
 
-  // Force all columns to be nullable
+  // Force all leaf columns to be nullable
   auto& schema = per_file_metadata.front().schema;
   std::for_each(schema.begin(), schema.end(), [](auto& col) {
-    col.repetition_type = FieldRepetitionType::OPTIONAL;
+    // Modifying the repetition type of lists converts them to structs, so we must skip that
+    auto const is_leaf_col =
+      not(col.type == Type::UNDEFINED or col.is_stub() or col.is_list() or col.is_struct());
+    if (is_leaf_col) { col.repetition_type = FieldRepetitionType::OPTIONAL; }
   });
 
   // Collect and apply arrow:schema from Parquet's key value metadata section
@@ -239,15 +242,9 @@ aggregate_reader_metadata::select_payload_columns(
                                                               int schema_idx) {
     auto const& schema_elem     = get_schema(schema_idx);
     std::string const curr_path = path_till_now + schema_elem.name;
-    // If the current path is not a filter column, then add it and its children to the list of valid
-    // payload columns
-    if (filter_columns_set.count(curr_path) == 0) {
-      valid_payload_columns.push_back(curr_path);
-      // Add all children as well
-      for (auto const& child_idx : schema_elem.children_idx) {
-        add_column_path(curr_path + ".", child_idx);
-      }
-    }
+    // Add the current path to the list of valid payload columns if it is not a filter column
+    // TODO: Add children when AST filter expressions start supporting nested struct columns
+    if (filter_columns_set.count(curr_path) == 0) { valid_payload_columns.push_back(curr_path); }
   };
 
   // Add all but filter columns to valid payload columns
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index 3daed026a60..cbb794c0b90 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -20,7 +20,9 @@
 #include "hybrid_scan_helpers.hpp"
 #include "io/parquet/reader_impl_chunking_utils.cuh"
 
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/stream_compaction.hpp>
+#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/transform.hpp>
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/filling.hpp>
@@ -167,6 +169,8 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
 {
+  CUDF_FUNC_RANGE();
+
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
@@ -191,6 +195,8 @@ hybrid_scan_reader_impl::secondary_filters_byte_ranges(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   parquet_reader_options const& options)
 {
+  CUDF_FUNC_RANGE();
+
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Filter expression must not be empty");
 
@@ -224,6 +230,8 @@ hybrid_scan_reader_impl::filter_row_groups_with_dictionary_pages(
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
 {
+  CUDF_FUNC_RANGE();
+
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
@@ -294,6 +302,8 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
+  CUDF_FUNC_RANGE();
+
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
   table_metadata metadata;
@@ -322,6 +332,8 @@ hybrid_scan_reader_impl::filter_data_pages_with_stats(
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
+  CUDF_FUNC_RANGE();
+
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
   table_metadata metadata;
@@ -349,6 +361,8 @@ std::pair<std::vector<byte_range_info>, std::vector<cudf::size_type>>
 hybrid_scan_reader_impl::get_input_column_chunk_byte_ranges(
   cudf::host_span<std::vector<size_type> const> row_group_indices) const
 {
+  CUDF_FUNC_RANGE();
+
   // Descriptors for all the chunks that make up the selected columns
   auto const num_input_columns = _input_columns.size();
   auto const num_row_groups =
@@ -430,6 +444,8 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
+  CUDF_FUNC_RANGE();
+
   reset_internal_state();
 
   table_metadata metadata;
@@ -450,7 +466,7 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
 
   prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
 
-  return read_chunk_internal(read_columns_mode::FILTER_COLUMNS, row_mask);
+  return read_chunk_internal(read_mode::READ_ALL, read_columns_mode::FILTER_COLUMNS, row_mask);
 }
 
 table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
@@ -464,6 +480,8 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
   CUDF_EXPECTS(row_mask.null_count() == 0,
                "Row mask must not have any nulls when materializing payload column");
 
+  CUDF_FUNC_RANGE();
+
   reset_internal_state();
 
   initialize_options(row_group_indices, options, stream);
@@ -477,7 +495,7 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
 
   prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
 
-  return read_chunk_internal(read_columns_mode::PAYLOAD_COLUMNS, row_mask);
+  return read_chunk_internal(read_mode::READ_ALL, read_columns_mode::PAYLOAD_COLUMNS, row_mask);
 }
 
 void hybrid_scan_reader_impl::reset_internal_state()
@@ -495,6 +513,8 @@ void hybrid_scan_reader_impl::reset_internal_state()
   _input_pass_read_limit   = 0;
   _output_chunk_read_limit = 0;
   _strings_to_categorical  = false;
+  _reader_column_schema.reset();
+  _expr_conv = named_to_reference_converter(std::nullopt, table_metadata{});
 }
 
 void hybrid_scan_reader_impl::initialize_options(
@@ -542,7 +562,7 @@ void hybrid_scan_reader_impl::prepare_data(
 
 template <typename RowMaskView>
 table_with_metadata hybrid_scan_reader_impl::read_chunk_internal(
-  read_columns_mode read_columns_mode, RowMaskView row_mask)
+  read_mode mode, read_columns_mode read_columns_mode, RowMaskView row_mask)
 {
   // If `_output_metadata` has been constructed, just copy it over.
   auto out_metadata = _output_metadata ? table_metadata{*_output_metadata} : table_metadata{};
@@ -555,8 +575,7 @@ table_with_metadata hybrid_scan_reader_impl::read_chunk_internal(
   // Copy number of total input row groups and number of surviving row groups from predicate
   // pushdown.
   out_metadata.num_input_row_groups = _file_itm_data.num_input_row_groups;
-  // Copy the number surviving row groups from each predicate pushdown only if the filter has
-  // value.
+  // Copy the number surviving row groups from each predicate pushdown only if the filter has value
   if (_expr_conv.get_converted_expr().has_value()) {
     out_metadata.num_row_groups_after_stats_filter =
       _file_itm_data.surviving_row_groups.after_stats_filter;
@@ -566,6 +585,13 @@ table_with_metadata hybrid_scan_reader_impl::read_chunk_internal(
 
   // no work to do (this can happen on the first pass if we have no rows to read)
   if (!has_more_work()) {
+    // Check if number of rows per source should be included in output metadata.
+    if (include_output_num_rows_per_source()) {
+      // Empty dataframe case: Simply initialize to a list of zeros
+      out_metadata.num_rows_per_source =
+        std::vector<size_t>(_file_itm_data.num_rows_per_source.size(), 0);
+    }
+
     // Finalize output
     return finalize_output(read_columns_mode, out_metadata, out_columns, row_mask);
   }
@@ -575,10 +601,10 @@ table_with_metadata hybrid_scan_reader_impl::read_chunk_internal(
   auto const& read_info = subpass.output_chunk_read_info[subpass.current_output_chunk];
 
   // Allocate memory buffers for the output columns.
-  allocate_columns(read_mode::READ_ALL, read_info.skip_rows, read_info.num_rows);
+  allocate_columns(mode, read_info.skip_rows, read_info.num_rows);
 
   // Parse data into the output buffers.
-  decode_page_data(read_mode::READ_ALL, read_info.skip_rows, read_info.num_rows);
+  decode_page_data(mode, read_info.skip_rows, read_info.num_rows);
 
   // Create the final output cudf columns.
   for (size_t i = 0; i < _output_buffers.size(); ++i) {
@@ -597,13 +623,30 @@ table_with_metadata hybrid_scan_reader_impl::read_chunk_internal(
     }
     // Only construct `out_metadata` if `_output_metadata` has not been cached.
     if (!_output_metadata) {
-      cudf::io::column_name_info& col_name = out_metadata.schema_info[i];
+      column_name_info& col_name = out_metadata.schema_info[i];
       out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream));
     } else {
       out_columns.emplace_back(make_column(_output_buffers[i], nullptr, metadata, _stream));
     }
   }
 
+  out_columns =
+    cudf::structs::detail::enforce_null_consistency(std::move(out_columns), _stream, _mr);
+
+  // Check if number of rows per source should be included in output metadata.
+  if (include_output_num_rows_per_source()) {
+    // For chunked reading, compute the output number of rows per source
+    if (mode == read_mode::CHUNKED_READ) {
+      out_metadata.num_rows_per_source =
+        calculate_output_num_rows_per_source(read_info.skip_rows, read_info.num_rows);
+    }
+    // Simply move the number of rows per file if reading all at once
+    else {
+      // Move is okay here as we are reading in one go.
+      out_metadata.num_rows_per_source = std::move(_file_itm_data.num_rows_per_source);
+    }
+  }
+
   // Add empty columns if needed. Filter output columns based on filter.
   return finalize_output(read_columns_mode, out_metadata, out_columns, row_mask);
 }
@@ -619,19 +662,17 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
   // read)
   for (size_t i = out_columns.size(); i < _output_buffers.size(); ++i) {
     if (!_output_metadata) {
-      cudf::io::column_name_info& col_name = out_metadata.schema_info[i];
-      out_columns.emplace_back(
-        cudf::io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr));
+      column_name_info& col_name = out_metadata.schema_info[i];
+      out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr));
     } else {
-      out_columns.emplace_back(
-        cudf::io::detail::empty_like(_output_buffers[i], nullptr, _stream, _mr));
+      out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], nullptr, _stream, _mr));
     }
   }
 
   if (!_output_metadata) {
     populate_metadata(out_metadata);
     // Finally, save the output table metadata into `_output_metadata` for reuse next time.
-    _output_metadata = std::make_unique<cudf::io::table_metadata>(out_metadata);
+    _output_metadata = std::make_unique<table_metadata>(out_metadata);
   }
 
   // advance output chunk/subpass/pass info for non-empty tables if and only if we are in bounds
@@ -685,6 +726,8 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
 
 void hybrid_scan_reader_impl::set_page_mask(cudf::host_span<std::vector<bool> const> data_page_mask)
 {
+  CUDF_FUNC_RANGE();
+
   auto const& pass   = _pass_itm_data;
   auto const& chunks = pass->chunks;
 
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
index 7ebe4df865d..c6519200252 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
@@ -365,13 +365,15 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    * This function is called internally and expects all preprocessing steps have already been done.
    *
    * @tparam RowMaskView View type of the row mask column
+   * @param[in] mode Read mode indicating if we are reading all at once or chunk by chunk
    * @param[in] read_columns_mode Read mode indicating if we are reading filter or payload columns
    * @param[in,out] row_mask Boolean column indicating which rows need to be read after page-pruning
    *                         for filter columns, or after materialize step for payload columns
    * @return The output table along with columns' metadata
    */
   template <typename RowMaskView>
-  table_with_metadata read_chunk_internal(read_columns_mode read_columns_mode,
+  table_with_metadata read_chunk_internal(read_mode mode,
+                                          read_columns_mode read_columns_mode,
                                           RowMaskView row_mask);
 
  private:
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu b/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
index 94b58f89f92..9a707e6577d 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
@@ -314,6 +314,8 @@ void hybrid_scan_reader_impl::update_row_mask(cudf::column_view in_row_mask,
                                               cudf::mutable_column_view out_row_mask,
                                               rmm::cuda_stream_view stream)
 {
+  CUDF_FUNC_RANGE();
+
   auto const total_rows = static_cast<cudf::size_type>(in_row_mask.size());
 
   CUDF_EXPECTS(total_rows == out_row_mask.size(),
diff --git a/cpp/src/io/parquet/experimental/page_index_filter.cu b/cpp/src/io/parquet/experimental/page_index_filter.cu
index e3f1f043047..42a27e3af48 100644
--- a/cpp/src/io/parquet/experimental/page_index_filter.cu
+++ b/cpp/src/io/parquet/experimental/page_index_filter.cu
@@ -28,6 +28,7 @@
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/types.hpp>
+#include <cudf/logger.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -702,8 +703,12 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
 
   auto const has_page_index =
     compute_has_page_index(per_file_metadata, row_group_indices, output_column_schemas);
-  CUDF_EXPECTS(has_page_index,
-               "Data page mask computation requires the Parquet page index for all output columns");
+
+  // TODO: Don't use page pruning in case of lists and structs until we support them
+  if (not has_page_index) {
+    CUDF_LOG_WARN("Encountered missing Parquet page index for one or more output columns");
+    return {};  // An empty data page mask indicates all pages are required
+  }
 
   // Compute page row counts, offsets, and column chunk page offsets for each column
   std::vector<cudf::detail::host_vector<size_type>> page_row_counts;
@@ -768,12 +773,7 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
   auto data_page_mask = std::vector<std::vector<bool>>();
   data_page_mask.reserve(num_columns);
 
-  std::atomic<size_t> total_surviving_pages{0};
-
-  // Tasks to compute data page mask for each column
-  std::vector<std::future<std::vector<bool>>> data_page_mask_tasks;
-  data_page_mask_tasks.reserve(num_columns);
-  auto streams = cudf::detail::fork_streams(stream, num_columns);
+  auto total_surviving_pages = size_t{0};
 
   // For all columns, look up which pages contain at least one required row. i.e.
   // !validity_it[row_idx] or is_row_required[row_idx] satisfies, and add its byte range to the
@@ -782,60 +782,49 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
     thrust::counting_iterator<size_t>(0),
     thrust::counting_iterator(num_columns),
     [&](auto const col_idx) {
-      data_page_mask_tasks.emplace_back(
-        cudf::detail::host_worker_pool().submit_task([&, col_idx = col_idx] {
-          // Construct a row indices mapping based on page row counts and offsets
-          auto const total_pages_in_this_column = page_row_counts[col_idx].size();
-
-          auto const page_indices = make_page_indices_async(
-            page_row_counts[col_idx], page_row_offsets[col_idx], total_rows, streams[col_idx]);
-
-          // Device vector to hold page indices with at least one required row
-          rmm::device_uvector<size_type> select_page_indices(total_rows, streams[col_idx], mr);
-
-          // Copy page indices with at least one required row
-          auto const filtered_pages_end_iter = thrust::copy_if(
-            rmm::exec_policy_nosync(streams[col_idx]),
-            page_indices.begin(),
-            page_indices.end(),
-            thrust::counting_iterator<size_type>(0),
-            select_page_indices.begin(),
-            is_row_required_fn{row_mask.nullable(), row_mask.null_mask(), row_mask.data<bool>()});
-
-          // Remove duplicate page indices across (presorted) rows
-          auto const filtered_uniq_page_end_iter =
-            thrust::unique(rmm::exec_policy_nosync(streams[col_idx]),
-                           select_page_indices.begin(),
-                           filtered_pages_end_iter);
-
-          // Number of final filtered pages for this column
-          size_t const num_surviving_pages_this_column =
-            thrust::distance(select_page_indices.begin(), filtered_uniq_page_end_iter);
-
-          total_surviving_pages.fetch_add(num_surviving_pages_this_column);
-
-          // Copy the filtered page indices for this column to host
-          auto host_select_page_indices = cudf::detail::make_host_vector(
-            cudf::device_span<cudf::size_type const>{select_page_indices.data(),
-                                                     num_surviving_pages_this_column},
-            streams[col_idx]);
-
-          // Vector to data page mask the this column
-          auto valid_pages = std::vector<bool>(total_pages_in_this_column, false);
-          std::for_each(host_select_page_indices.begin(),
-                        host_select_page_indices.end(),
-                        [&](auto const page_idx) { valid_pages[page_idx] = true; });
-
-          return valid_pages;
-        }));
+      // Construct a row indices mapping based on page row counts and offsets
+      auto const total_pages_in_this_column = page_row_counts[col_idx].size();
+
+      auto const page_indices = make_page_indices_async(
+        page_row_counts[col_idx], page_row_offsets[col_idx], total_rows, stream);
+
+      // Device vector to hold page indices with at least one required row
+      rmm::device_uvector<size_type> select_page_indices(total_rows, stream, mr);
+
+      // Copy page indices with at least one required row
+      auto const filtered_pages_end_iter = thrust::copy_if(
+        rmm::exec_policy_nosync(stream),
+        page_indices.begin(),
+        page_indices.end(),
+        thrust::counting_iterator<size_type>(0),
+        select_page_indices.begin(),
+        is_row_required_fn{row_mask.nullable(), row_mask.null_mask(), row_mask.data<bool>()});
+
+      // Remove duplicate page indices across (presorted) rows
+      auto const filtered_uniq_page_end_iter = thrust::unique(
+        rmm::exec_policy_nosync(stream), select_page_indices.begin(), filtered_pages_end_iter);
+
+      // Number of final filtered pages for this column
+      size_t const num_surviving_pages_this_column =
+        thrust::distance(select_page_indices.begin(), filtered_uniq_page_end_iter);
+
+      total_surviving_pages += num_surviving_pages_this_column;
+
+      // Copy the filtered page indices for this column to host
+      auto host_select_page_indices = cudf::detail::make_host_vector(
+        cudf::device_span<cudf::size_type const>{select_page_indices.data(),
+                                                 num_surviving_pages_this_column},
+        stream);
+
+      // Vector to data page mask the this column
+      auto valid_pages = std::vector<bool>(total_pages_in_this_column, false);
+      std::for_each(host_select_page_indices.begin(),
+                    host_select_page_indices.end(),
+                    [&](auto const page_idx) { valid_pages[page_idx] = true; });
+
+      data_page_mask.push_back(std::move(valid_pages));
     });
 
-  // Collect results from all tasks
-  std::transform(data_page_mask_tasks.begin(),
-                 data_page_mask_tasks.end(),
-                 std::back_inserter(data_page_mask),
-                 [](auto& task) { return std::move(task).get(); });
-
   // Total number of input pages across all columns
   auto const total_pages = std::accumulate(
     page_row_counts.cbegin(),
diff --git a/cpp/tests/io/experimental/hybrid_scan_common.cpp b/cpp/tests/io/experimental/hybrid_scan_common.cpp
index c2bdca7c0e2..1224d9d4988 100644
--- a/cpp/tests/io/experimental/hybrid_scan_common.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_common.cpp
@@ -89,7 +89,7 @@ cudf::test::strings_column_wrapper constant_strings(cudf::size_type value)
 
   auto elements =
     thrust::make_transform_iterator(thrust::make_constant_iterator(value), [](auto i) {
-      std::array<char, 30> buf;
+      std::array<char, 30> buf{};
       snprintf(buf.data(), buf.size(), "%04d", i);
       return std::string(buf.data());
     });
diff --git a/cpp/tests/io/experimental/hybrid_scan_test.cpp b/cpp/tests/io/experimental/hybrid_scan_test.cpp
index 7b62a2efa6f..316e55a02eb 100644
--- a/cpp/tests/io/experimental/hybrid_scan_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_test.cpp
@@ -362,3 +362,78 @@ TEST_F(HybridScanTest, PruneDataPagesOnlyAndScanAllColumns)
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected->select({1, 2}), read_payload_table->view());
   }
 }
+
+TEST_F(HybridScanTest, MaterializeListPayloadColumn)
+{
+  srand(0xc0ffee);
+  using T = uint32_t;
+
+  // Parquet buffer
+  std::vector<char> parquet_buffer;
+  {
+    auto constexpr num_rows = num_ordered_rows;
+    // int32_t column
+    auto col0 = testdata::ascending<T>();
+    // string column
+    auto col1 = testdata::ascending<cudf::string_view>();
+    // list<bool> column
+    auto bools_iter =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+    auto bools_col =
+      cudf::test::fixed_width_column_wrapper<bool>(bools_iter, bools_iter + num_rows);
+    auto offsets_iter = thrust::counting_iterator<int32_t>(0);
+    auto offsets_col =
+      cudf::test::fixed_width_column_wrapper<int32_t>(offsets_iter, offsets_iter + num_rows + 1);
+    auto col2 = cudf::make_lists_column(
+      num_rows, offsets_col.release(), bools_col.release(), 0, rmm::device_buffer{});
+
+    // Input table
+    auto table = cudf::table_view{{col0, col1, *col2}};
+    cudf::io::table_input_metadata expected_metadata(table);
+    expected_metadata.column_metadata[0].set_name("col0");
+    expected_metadata.column_metadata[1].set_name("col1");
+    expected_metadata.column_metadata[2].set_name("col2");
+
+    // Write to parquet buffer
+    cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&parquet_buffer}, table)
+        .metadata(std::move(expected_metadata))
+        .row_group_size_rows(page_size_for_ordered_tests)
+        .max_page_size_rows(page_size_for_ordered_tests / 5)
+        .compression(cudf::io::compression_type::AUTO)
+        .dictionary_policy(cudf::io::dictionary_policy::ALWAYS)
+        .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN);
+    cudf::io::write_parquet(out_opts);
+  }
+
+  // Filtering AST - table[0] < 100
+  auto constexpr num_filter_columns = 1;
+  auto literal_value                = cudf::numeric_scalar<uint32_t>(100);
+  auto literal                      = cudf::ast::literal(literal_value);
+  auto col_ref_0                    = cudf::ast::column_name_reference("col0");
+  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+  auto stream     = cudf::get_default_stream();
+  auto mr         = cudf::get_current_device_resource_ref();
+  auto aligned_mr = rmm::mr::aligned_resource_adaptor<rmm::mr::device_memory_resource>(
+    cudf::get_current_device_resource(), bloom_filter_alignment);
+
+  // Read parquet using the hybrid scan reader
+  auto [read_filter_table, read_payload_table, read_filter_meta, read_payload_meta, row_mask] =
+    hybrid_scan(parquet_buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
+
+  CUDF_EXPECTS(read_filter_table->num_rows() == read_payload_table->num_rows(),
+               "Filter and payload tables should have the same number of rows");
+
+  // Check equivalence (equal without checking nullability) with the parquet file read with the
+  // original reader
+  {
+    cudf::io::parquet_reader_options const options =
+      cudf::io::parquet_reader_options::builder(
+        cudf::io::source_info(cudf::host_span<char>(parquet_buffer.data(), parquet_buffer.size())))
+        .filter(filter_expression);
+    auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}), read_payload_table->view());
+  }
+}
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index 4a6a79c8f2a..aa0f5fafe4e 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -479,7 +479,7 @@ std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_colum
 ascending()
 {
   auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
+    std::array<char, 30> buf{};
     snprintf(buf.data(), buf.size(), "%09d", i);
     return std::string(buf.data());
   });
@@ -491,7 +491,7 @@ std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_colum
 descending()
 {
   auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
+    std::array<char, 30> buf{};
     snprintf(buf.data(), buf.size(), "%09d", static_cast<short>(num_ordered_rows - i));
     return std::string(buf.data());
   });
@@ -503,7 +503,7 @@ std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_colum
 unordered()
 {
   auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
+    std::array<char, 30> buf{};
     snprintf(buf.data(), buf.size(), "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
     return std::string(buf.data());
   });

From aafc3f87f0415112970fb66155ef7574cefb3d44 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 30 Jul 2025 12:11:20 -0700
Subject: [PATCH 027/366] Derive and use page mask at subpass level for chunked
 reads (#19515)

Contributes to #19526

This PR implements deriving and using a page mask at subpass level in Parquet reader. This is to enable chunked reading in the next-gen reader.

Note: PR #19526 implements chunking in the next-gen parquet reader and tests this feature so no new tests here.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19515
---
 .../experimental/hybrid_scan_chunking.cu      |  6 +--
 .../parquet/experimental/hybrid_scan_impl.cpp | 49 +++++++++++++------
 .../parquet/experimental/hybrid_scan_impl.hpp |  4 +-
 cpp/src/io/parquet/reader_impl.cpp            | 15 +++---
 cpp/src/io/parquet/reader_impl.hpp            | 12 ++++-
 cpp/src/io/parquet/reader_impl_chunking.cu    | 35 ++++++++++++-
 6 files changed, 91 insertions(+), 30 deletions(-)

diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu b/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu
index c54410de7fe..71f0533de31 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu
@@ -50,6 +50,9 @@ void hybrid_scan_reader_impl::handle_chunking(
   if (!_pass_itm_data) {
     // setup the next pass
     setup_next_pass(std::move(column_chunk_buffers), options);
+
+    // Must be called as soon as we create the pass
+    set_pass_page_mask(data_page_mask);
   }
 
   auto& pass = *_pass_itm_data;
@@ -81,9 +84,6 @@ void hybrid_scan_reader_impl::handle_chunking(
     }
   }
 
-  // Must be called before `setup_next_subpass()` to select pages to decompress
-  set_page_mask(data_page_mask);
-
   // setup the next sub pass
   setup_next_subpass(read_mode::READ_ALL);
 }
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index cbb794c0b90..48aa1d6c40c 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -504,7 +504,8 @@ void hybrid_scan_reader_impl::reset_internal_state()
   _file_preprocessed = false;
   _has_page_index    = false;
   _pass_itm_data.reset();
-  _page_mask.clear();
+  _pass_page_mask.clear();
+  _subpass_page_mask.clear();
   _output_metadata.reset();
   _options.timestamp_type = cudf::data_type{};
   _options.num_rows       = std::nullopt;
@@ -724,19 +725,20 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
   }
 }
 
-void hybrid_scan_reader_impl::set_page_mask(cudf::host_span<std::vector<bool> const> data_page_mask)
+void hybrid_scan_reader_impl::set_pass_page_mask(
+  cudf::host_span<std::vector<bool> const> data_page_mask)
 {
   CUDF_FUNC_RANGE();
 
   auto const& pass   = _pass_itm_data;
   auto const& chunks = pass->chunks;
 
-  _page_mask             = cudf::detail::make_empty_host_vector<bool>(pass->pages.size(), _stream);
+  _pass_page_mask        = cudf::detail::make_empty_host_vector<bool>(pass->pages.size(), _stream);
   auto const num_columns = _input_columns.size();
 
   // Handle the empty page mask case
   if (data_page_mask.empty()) {
-    std::fill(_page_mask.begin(), _page_mask.end(), true);
+    std::fill(_pass_page_mask.begin(), _pass_page_mask.end(), true);
     return;
   }
 
@@ -744,23 +746,38 @@ void hybrid_scan_reader_impl::set_page_mask(cudf::host_span<std::vector<bool> co
     thrust::counting_iterator<size_t>(0),
     thrust::counting_iterator(_input_columns.size()),
     [&](auto col_idx) {
-      auto const& col_page_mask = data_page_mask[col_idx];
-      size_t num_inserted_pages = 0;
+      auto const& col_page_mask      = data_page_mask[col_idx];
+      size_t num_inserted_data_pages = 0;
 
       for (size_t chunk_idx = col_idx; chunk_idx < chunks.size(); chunk_idx += num_columns) {
-        if (chunks[chunk_idx].num_dict_pages > 0) { _page_mask.push_back(true); }
-        // Sanitize the column's page mask and insert
-        CUDF_EXPECTS(col_page_mask.size() >= num_inserted_pages + chunks[chunk_idx].num_data_pages,
-                     "Encountered invalid data page mask size");
-        _page_mask.insert(
-          _page_mask.end(),
-          col_page_mask.begin() + num_inserted_pages,
-          col_page_mask.begin() + num_inserted_pages + chunks[chunk_idx].num_data_pages);
-        num_inserted_pages += chunks[chunk_idx].num_data_pages;
+        // Insert a true value for each dictionary page
+        if (chunks[chunk_idx].num_dict_pages > 0) { _pass_page_mask.push_back(true); }
+
+        // Number of data pages in this column chunk
+        auto const num_data_pages_this_col_chunk = chunks[chunk_idx].num_data_pages;
+
+        // Make sure we have enough page mask for this column chunk
+        CUDF_EXPECTS(
+          col_page_mask.size() >= num_inserted_data_pages + num_data_pages_this_col_chunk,
+          "Encountered invalid data page mask size");
+
+        // Insert page mask for this column chunk
+        _pass_page_mask.insert(
+          _pass_page_mask.end(),
+          col_page_mask.begin() + num_inserted_data_pages,
+          col_page_mask.begin() + num_inserted_data_pages + num_data_pages_this_col_chunk);
+
+        // Update the number of inserted data pages
+        num_inserted_data_pages += num_data_pages_this_col_chunk;
       }
-      CUDF_EXPECTS(num_inserted_pages == col_page_mask.size(),
+      // Make sure we inserted exactly the number of data pages for this column
+      CUDF_EXPECTS(num_inserted_data_pages == col_page_mask.size(),
                    "Encountered mismatch in number of data pages and page mask size");
     });
+
+  // Make sure we inserted exactly the number of pages for this pass
+  CUDF_EXPECTS(_pass_page_mask.size() == pass->pages.size(),
+               "Encountered mismatch in number of pass pages and page mask size");
 }
 
 }  // namespace cudf::io::parquet::experimental::detail
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
index c6519200252..b2c0d40a7d1 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
@@ -217,11 +217,11 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
                           rmm::cuda_stream_view stream);
 
   /**
-   * @brief Set the mask for pages
+   * @brief Set the page mask for the pass pages
    *
    * @param data_page_mask Input data page mask from page-pruning step
    */
-  void set_page_mask(cudf::host_span<std::vector<bool> const> data_page_mask);
+  void set_pass_page_mask(cudf::host_span<std::vector<bool> const> data_page_mask);
 
   /**
    * @brief Fill a BOOL8 row mask column with the specified value
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 854856a1b40..96aa416d27b 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -213,15 +213,15 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
     }
   }
 
-  // Use the `_page_mask` from page pruning stage, if non empty, otherwise set all pages in this
-  // subpass to be decoded
+  // Use the `_subpass_page_mask` derived from `_page_mask` if non empty, otherwise set all pages in
+  // this subpass to be decoded
   auto host_page_mask = [&]() {
-    if (_page_mask.empty()) {
+    if (_subpass_page_mask.empty()) {
       auto page_mask = cudf::detail::make_host_vector<bool>(subpass.pages.size(), _stream);
       std::fill(page_mask.begin(), page_mask.end(), true);
       return page_mask;
     } else {
-      return _page_mask;
+      return _subpass_page_mask;
     }
   }();
 
@@ -527,7 +527,9 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
 }
 
 reader_impl::reader_impl()
-  : _options{}, _page_mask{cudf::detail::make_host_vector<bool>(0, cudf::get_default_stream())}
+  : _options{},
+    _pass_page_mask{cudf::detail::make_host_vector<bool>(0, cudf::get_default_stream())},
+    _subpass_page_mask{cudf::detail::make_host_vector<bool>(0, cudf::get_default_stream())}
 {
 }
 
@@ -557,7 +559,8 @@ reader_impl::reader_impl(std::size_t chunk_read_limit,
              options.get_num_rows(),
              options.get_row_groups()},
     _sources{std::move(sources)},
-    _page_mask{cudf::detail::make_host_vector<bool>(0, _stream)},
+    _pass_page_mask{cudf::detail::make_host_vector<bool>(0, _stream)},
+    _subpass_page_mask{cudf::detail::make_host_vector<bool>(0, _stream)},
     _output_chunk_read_limit{chunk_read_limit},
     _input_pass_read_limit{pass_read_limit}
 {
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 3848eac8f6d..9312257a7ec 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -177,6 +177,11 @@ class reader_impl {
    */
   void setup_next_subpass(read_mode mode);
 
+  /**
+   * @brief Copies over the relevant page mask information for the subpass
+   */
+  void set_subpass_page_mask();
+
   /**
    * @brief Read a chunk of data and return an output table.
    *
@@ -417,8 +422,11 @@ class reader_impl {
   // _output_buffers associated schema indices
   std::vector<int> _output_column_schemas;
 
-  // Page mask for filtering out data pages
-  cudf::detail::host_vector<bool> _page_mask;
+  // Page mask for filtering out pass data pages
+  cudf::detail::host_vector<bool> _pass_page_mask;
+
+  // Page mask for filtering out subpass data pages
+  cudf::detail::host_vector<bool> _subpass_page_mask;
 
   // _output_buffers associated metadata
   std::unique_ptr<table_metadata> _output_metadata;
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
index 0a46fb1877c..4b42ef36436 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -23,6 +23,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <thrust/gather.h>
 #include <thrust/transform_scan.h>
 
 #include <numeric>
@@ -337,6 +338,9 @@ void reader_impl::setup_next_subpass(read_mode mode)
   std::transform(
     h_spans.begin(), h_spans.end(), subpass.column_page_count.begin(), get_span_size{});
 
+  // Set the page mask information for the subpass
+  set_subpass_page_mask();
+
   // decompress the data pages in this subpass; also decompress the dictionary pages in this pass,
   // if this is the first subpass in the pass
   if (pass.has_compressed_data) {
@@ -344,7 +348,7 @@ void reader_impl::setup_next_subpass(read_mode mode)
       decompress_page_data(pass.chunks,
                            is_first_subpass ? pass.pages : host_span<PageInfo>{},
                            subpass.pages,
-                           _page_mask,
+                           _subpass_page_mask,
                            _stream,
                            _mr);
 
@@ -664,4 +668,33 @@ void reader_impl::compute_output_chunks_for_subpass()
     c_info, subpass.pages, subpass.skip_rows, subpass.num_rows, _output_chunk_read_limit, _stream);
 }
 
+void reader_impl::set_subpass_page_mask()
+{
+  auto const& pass    = _pass_itm_data;
+  auto const& subpass = pass->subpass;
+
+  // Create a host vector to store the subpass page mask
+  _subpass_page_mask = cudf::detail::make_host_vector<bool>(subpass->pages.size(), _stream);
+
+  // Fill with all true if no pass level page mask is available
+  if (_pass_page_mask.empty()) {
+    std::fill(_subpass_page_mask.begin(), _subpass_page_mask.end(), true);
+    return;
+  }
+
+  // If this is the only subpass, move the pass level page mask data as is
+  if (subpass->single_subpass) {
+    std::move(_pass_page_mask.begin(), _pass_page_mask.end(), _subpass_page_mask.begin());
+    return;
+  }
+
+  // Use the pass page index mask to gather the subpass page mask from the pass level page mask
+  auto const host_page_src_index = cudf::detail::make_host_vector(subpass->page_src_index, _stream);
+  thrust::gather(thrust::seq,
+                 host_page_src_index.begin(),
+                 host_page_src_index.end(),
+                 _pass_page_mask.begin(),
+                 _subpass_page_mask.begin());
+}
+
 }  // namespace cudf::io::parquet::detail

From 5652d7e3cf1438f133cf28cfda0076a9d7d12205 Mon Sep 17 00:00:00 2001
From: Shruti Shivakumar <shruti.shivakumar@gmail.com>
Date: Wed, 30 Jul 2025 12:16:46 -0700
Subject: [PATCH 028/366] Fix logic for number of unique values generated by
 data profile in benchmarks (#19540)

This PR clarifies and corrects the usage of cardinality in the data generator, and restores the previous distribution of the data generated for cardinality `c = 0` . When `c` is a non-negative integer less than the total number of values to be generated, then the number of unique elements in the output is at most `c`. Otherwise, if the cardinality is greater than or equal to the total number of values required, then the cardinality of the output is enforced i.e. the number of unique elements in the output is exactly `c`.

Authors:
  - Shruti Shivakumar (https://github.com/shrshi)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19540
---
 cpp/benchmarks/common/generate_input.cu       | 2 +-
 cpp/benchmarks/common/generate_input.hpp      | 9 ++++++---
 cpp/benchmarks/join/generate_input_tables.cuh | 7 ++++---
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 55c5133fa03..0488a59099d 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -549,7 +549,7 @@ struct create_rand_col_fn {
                                            thrust::minstd_rand& engine,
                                            cudf::size_type num_rows)
   {
-    if (profile.get_cardinality() == 0 || profile.get_cardinality() >= num_rows) {
+    if (profile.get_cardinality() >= num_rows) {
       return create_distinct_rows_column<T>(profile, engine, num_rows);
     }
     return create_random_column<T>(profile, engine, num_rows);
diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp
index 57834fd11d2..b900acc2f20 100644
--- a/cpp/benchmarks/common/generate_input.hpp
+++ b/cpp/benchmarks/common/generate_input.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -238,8 +238,11 @@ class data_profile {
 
   double bool_probability_true           = 0.5;
   std::optional<double> null_probability = 0.01;
-  cudf::size_type cardinality            = 2000;
-  cudf::size_type avg_run_length         = 4;
+  cudf::size_type cardinality =
+    2000;  /// Upper bound on the number of unique values generated if `0 <= cardinality < n`, where
+           /// `n` is the total number of values to be generated. If `cardinality >= n`, n` unique
+           /// values of the requested data type are generated.
+  cudf::size_type avg_run_length = 4;
 
  public:
   template <typename T,
diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
index 6d9c51eb699..07b741f71a6 100644
--- a/cpp/benchmarks/join/generate_input_tables.cuh
+++ b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -158,8 +158,9 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> generate_i
     static_cast<cudf::size_type>(build_table_numrows / multiplicity);
 
   double const null_probability = Nullable ? 0.3 : 0;
-  auto const profile =
-    data_profile{data_profile_builder().null_probability(null_probability).cardinality(0)};
+  auto const profile            = data_profile{data_profile_builder()
+                                      .null_probability(null_probability)
+                                      .cardinality(unique_rows_build_table_numrows + 1)};
   auto unique_rows_build_table =
     create_random_table(key_types, row_count{unique_rows_build_table_numrows + 1}, profile, 1);
 
@@ -227,7 +228,7 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> generate_i
   auto probe_cols = probe_table->release();
   for (auto i = 0; i < num_payload_cols; i++) {
     build_cols.emplace_back(cudf::sequence(build_table_numrows, *init));
-    probe_cols.emplace_back(cudf::sequence(build_table_numrows, *init));
+    probe_cols.emplace_back(cudf::sequence(probe_table_numrows, *init));
   }
 
   return std::pair{std::make_unique<cudf::table>(std::move(build_cols)),

From 5baa96b84657c01026c7c558bd55d13aacaa75e0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 30 Jul 2025 15:50:11 -0700
Subject: [PATCH 029/366] Construct more cuDF classic Columns with pylibcudf
 instead of using Buffers (#19535)

Towards https://github.com/rapidsai/cudf/issues/18726

To eventually back a cuDF column with a pylibcudf Column, we should always be constructing a cuDF Column from a pylibcudf Column.

Currently there are places where we construct a cuDF column from the custom Buffer class via an RMM object, so converting this to just use pylibcudf instead

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19535
---
 python/cudf/cudf/core/column/column.py        | 26 ++++--
 python/cudf/cudf/core/column/numerical.py     |  4 +-
 python/cudf/cudf/core/multiindex.py           |  4 +-
 .../cudf/pandas/scripts/conftest-patch.py     | 91 +------------------
 python/cudf/cudf/tests/test_multiindex.py     |  9 ++
 5 files changed, 34 insertions(+), 100 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 3eb2e5dbeb3..ac2d9d12752 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2573,19 +2573,25 @@ def column_empty(
     elif isinstance(dtype, CategoricalDtype):
         data = None
         children = (
-            cudf.core.column.NumericalColumn(
-                data=as_buffer(
-                    rmm.DeviceBuffer(size=row_count * SIZE_TYPE_DTYPE.itemsize)
-                ),
-                size=None,
-                dtype=SIZE_TYPE_DTYPE,
+            ColumnBase.from_pylibcudf(
+                plc.Column.from_scalar(
+                    plc.Scalar.from_py(
+                        None, dtype_to_pylibcudf_type(SIZE_TYPE_DTYPE)
+                    ),
+                    row_count,
+                )
             ),
         )
-    elif dtype.kind in "OU" and not isinstance(dtype, DecimalDtype):
-        data = as_buffer(rmm.DeviceBuffer(size=0))
-        children = (as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),)
     else:
-        data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize))
+        col = ColumnBase.from_pylibcudf(
+            plc.Column.from_scalar(
+                plc.Scalar.from_py(None, dtype_to_pylibcudf_type(dtype)),
+                row_count,
+            )
+        )._with_type_metadata(dtype)
+        if for_numba:
+            col = col.set_mask(None)
+        return col
 
     if row_count > 0 and not for_numba:
         mask = as_buffer(
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 05120c6e687..2292b92e68e 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -15,7 +15,7 @@
 import cudf
 from cudf.api.types import is_scalar
 from cudf.core._internals import binaryop
-from cudf.core.buffer import acquire_spill_lock, as_buffer
+from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column.column import ColumnBase, as_column, column_empty
 from cudf.core.column.numerical_base import NumericalBaseColumn
 from cudf.core.dtypes import CategoricalDtype
@@ -307,7 +307,7 @@ def nans_to_nulls(self: Self) -> Self:
             mask, _ = plc.transform.nans_to_nulls(
                 self.to_pylibcudf(mode="read")
             )
-            return self.set_mask(as_buffer(mask))
+            return self.set_mask(mask)
 
     def _normalize_binop_operand(self, other: Any) -> pa.Scalar | ColumnBase:
         if isinstance(other, ColumnBase):
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index e69e45bf733..f3bdfce4321 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1654,7 +1654,9 @@ def droplevel(self, level=-1) -> Self | Index:
             new_data.pop(self._data.names[i])
 
         if len(new_data) == 1:
-            return _index_from_data(new_data)
+            return Index._from_column(
+                next(iter(new_data.values())), name=new_names[0]
+            )
         else:
             mi = type(self)._from_data(new_data)
             mi.names = new_names
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 3ef660fa614..9520f09945e 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -1551,7 +1551,6 @@ def pytest_unconfigure(config):
     "tests/base/test_misc.py::test_memory_usage[datetime-tz]",
     "tests/base/test_misc.py::test_memory_usage[datetime]",
     "tests/base/test_misc.py::test_memory_usage[dict-series]",
-    "tests/base/test_misc.py::test_memory_usage[empty]",
     "tests/base/test_misc.py::test_memory_usage[float-series]",
     "tests/base/test_misc.py::test_memory_usage[float16-series]",
     "tests/base/test_misc.py::test_memory_usage[float32-series]",
@@ -1582,7 +1581,6 @@ def pytest_unconfigure(config):
     "tests/base/test_misc.py::test_memory_usage[series-with-complex64-index]",
     "tests/base/test_misc.py::test_memory_usage[series-with-datetime-index]",
     "tests/base/test_misc.py::test_memory_usage[series-with-datetime-tz-index]",
-    "tests/base/test_misc.py::test_memory_usage[series-with-empty-index]",
     "tests/base/test_misc.py::test_memory_usage[series-with-float32-index]",
     "tests/base/test_misc.py::test_memory_usage[series-with-float64-index]",
     "tests/base/test_misc.py::test_memory_usage[series-with-int16-index]",
@@ -3541,124 +3539,88 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint8-absolute]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint8-negative]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint8-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[double-frame-index1]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-frame-index2]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-frame-index3]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-series-index2]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-series-index3]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[double-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[double-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[double-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[double-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[double-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ms]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ms]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ms]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ms]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ms]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ms]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ns]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ns]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ns]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ns]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ns]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[ns]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[s]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[s]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[s]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[s]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[s]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[s]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[us]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[us]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[us]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[us]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[us]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[duration[us]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[float-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[float-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[float-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[float-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[float-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[float-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int16-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int16-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int16-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int16-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int16-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int16-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int32-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int32-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int32-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int32-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int32-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int32-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int64-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int64-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int64-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int64-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int64-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int64-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int8-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int8-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int8-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int8-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int8-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[int8-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ms]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ms]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ms]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ms]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ms]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ms]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ns]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ns]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ns]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ns]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ns]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[ns]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[s]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[s]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[s]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[s]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[s]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[s]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[us]-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[us]-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[us]-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[us]-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[us]-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[timestamp[us]-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint16-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint16-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint16-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint16-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint16-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint16-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint32-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint32-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint32-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint32-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint32-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint32-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint64-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint64-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint64-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint64-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint64-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint64-series-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-frame-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-frame-index3]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-series-index1]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-series-index3]",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and",
@@ -4170,70 +4132,48 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_to_numpy[UInt32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_to_numpy[UInt64Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_to_numpy[UInt8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[BooleanDtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[BooleanDtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[BooleanDtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[BooleanDtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[BooleanDtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[BooleanDtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float32Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float32Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float32Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float32Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float32Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float32Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float64Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float64Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float64Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float64Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float64Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Float64Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int16Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int16Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int16Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int16Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int16Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int16Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int32Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int32Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int32Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int32Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int32Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int32Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int64Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int64Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int64Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int64Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int64Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int64Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int8Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int8Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int8Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int8Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int8Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[Int8Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt16Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt16Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt16Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt16Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt16Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt16Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt32Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt32Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt32Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt32Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt32Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt32Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt64Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt64Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt64Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt64Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt64Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt64Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-frame-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-frame-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-frame-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-series-index1]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-series-index3]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[BooleanDtype]",
@@ -4383,10 +4323,8 @@ def pytest_unconfigure(config):
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unique[object-<lambda>-Series]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unique[object-unique-<lambda>]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unique[object-unique-Series]",
-    "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-frame-index1]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-frame-index2]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-frame-index3]",
-    "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-series-index1]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-series-index2]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-series-index3]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_value_counts[float-data_missing-True]",
@@ -4923,16 +4861,12 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_unstack[python-True-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[python-True-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[python-True-series-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-False-frame-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-False-frame-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-False-frame-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-False-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-False-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-False-series-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-True-frame-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-True-frame-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-True-frame-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-True-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-True-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[pyarrow]-True-series-index3]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-False-frame-index0]",
@@ -4951,28 +4885,20 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-True-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-True-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-True-series-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-frame-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-frame-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-frame-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-series-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-frame-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-frame-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-frame-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-series-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-frame-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-frame-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-frame-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-series-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-frame-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-frame-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-frame-index3]",
-    "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-series-index1]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-series-index2]",
     "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-series-index3]",
     "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[pyarrow-False]",
@@ -6115,7 +6041,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_preserve_categorical_dtype[True-True]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_unstack[False]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_unstack[True]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_bool",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_long_index",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_mixed_extension_types[0]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_mixed_extension_types[1]",
@@ -6154,11 +6079,8 @@ def pytest_unconfigure(config):
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_unstack_wrong_level_name[True-unstack]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_categorical_columns",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_preserve_types",
-    "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_unobserved_keys[False]",
-    "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_unobserved_keys[True]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_with_level_has_nan",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_with_missing_int_cast_to_float",
-    "tests/frame/test_stack_unstack.py::test_unstack_non_slice_like_blocks",
     "tests/frame/test_subclass.py::TestDataFrameSubclassing::test_asof",
     "tests/frame/test_subclass.py::TestDataFrameSubclassing::test_equals_subclass",
     "tests/frame/test_subclass.py::TestDataFrameSubclassing::test_frame_subclassing_and_slicing",
@@ -7030,7 +6952,6 @@ def pytest_unconfigure(config):
     "tests/groupby/aggregate/test_aggregate.py::test_groupby_aggregate_empty_key_empty_return",
     "tests/groupby/aggregate/test_aggregate.py::test_groupby_aggregation_multi_level_column",
     "tests/groupby/aggregate/test_aggregate.py::test_multiindex_custom_func[<lambda>0]",
-    "tests/groupby/aggregate/test_aggregate.py::test_nonagg_agg",
     "tests/groupby/aggregate/test_aggregate.py::test_order_aggregate_multiple_funcs",
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_EA_known_dtypes[data1-prod-large_int-False]",
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_EA_known_dtypes[data1-prod-large_int-True]",
@@ -8728,7 +8649,6 @@ def pytest_unconfigure(config):
     "tests/indexes/test_old_base.py::TestBase::test_getitem_2d_deprecated[simple_index4]",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[bool-dtype]",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[categorical]",
-    "tests/indexes/test_old_base.py::TestBase::test_memory_usage[empty]",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[float32]",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[float64]",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[int16]",
@@ -8942,7 +8862,6 @@ def pytest_unconfigure(config):
     "tests/indexing/multiindex/test_loc.py::test_loc_getitem_drops_levels_for_one_row_dataframe",
     "tests/indexing/multiindex/test_loc.py::test_loc_getitem_duplicates_multiindex_missing_indexers[indexer6-pos6]",
     "tests/indexing/multiindex/test_loc.py::test_loc_getitem_index_differently_ordered_slice_none_duplicates[indexer0]",
-    "tests/indexing/multiindex/test_loc.py::test_loc_getitem_int",
     "tests/indexing/multiindex/test_loc.py::test_loc_getitem_int_raises_exception",
     "tests/indexing/multiindex/test_loc.py::test_loc_with_mi_indexer",
     "tests/indexing/multiindex/test_multiindex.py::TestMultiIndexBasic::test_rename_multiindex_with_duplicates",
@@ -8973,7 +8892,6 @@ def pytest_unconfigure(config):
     "tests/indexing/multiindex/test_slice.py::TestMultiIndexSlicers::test_per_axis_per_level_setitem",
     "tests/indexing/multiindex/test_sorted.py::TestMultiIndexSorted::test_argsort_with_na",
     "tests/indexing/multiindex/test_sorted.py::TestMultiIndexSorted::test_frame_getitem_not_sorted",
-    "tests/indexing/multiindex/test_sorted.py::TestMultiIndexSorted::test_series_getitem_not_sorted",
     "tests/indexing/test_at.py::test_at_timezone",
     "tests/indexing/test_categorical.py::TestCategoricalIndex::test_ix_categorical_index",
     "tests/indexing/test_categorical.py::TestCategoricalIndex::test_loc_getitem_with_non_string_categories[False-idx_values17]",
@@ -11200,7 +11118,6 @@ def pytest_unconfigure(config):
     "tests/resample/test_time_grouper.py::test_aggregate_normal[std]",
     "tests/resample/test_time_grouper.py::test_aggregate_normal[sum]",
     "tests/resample/test_time_grouper.py::test_aggregate_normal[var]",
-    "tests/resample/test_time_grouper.py::test_apply",
     "tests/resample/test_time_grouper.py::test_apply_iteration",
     "tests/resample/test_time_grouper.py::test_repr",
     "tests/resample/test_time_grouper.py::test_upsample_sum[prod-method_args4-expected_values4]",
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index 4c1f5259c61..d274166d0d2 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -2188,3 +2188,12 @@ def test_from_arrays_infer_names(idx):
     expected = pd.MultiIndex.from_arrays(arrays)
     result = cudf.MultiIndex.from_arrays(arrays)
     assert_eq(result, expected)
+
+
+def test_multiindex_droplevel_single_level_none_names():
+    data = [(1, 2), (3, 4)]
+    pidx = pd.MultiIndex.from_tuples(data, names=[None, None])
+    gidx = cudf.MultiIndex.from_tuples(data, names=[None, None])
+    result = gidx.droplevel(0)
+    expected = pidx.droplevel(0)
+    assert_eq(result, expected)

From 7fddc5ff47b79cc59dea97908b81e452da73cfb0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 30 Jul 2025 15:51:26 -0700
Subject: [PATCH 030/366] Move test_series.py to new cudf classic test
 directory structure (#19485)

Towards https://github.com/rapidsai/cudf/issues/9999

This splits `test_series.py` into a directory structure like

* `test/series/indexing/*` for loc/iloc/`__getitem__`/`__setitem__`
* `test/series/methods/*` for `Series.some_method`
* `test/series/test_constructors.py`
* `test/series/test_binops.py`


Additionally
* Adding more shared fixtures in conftest.py where applicable
* Further simplify tests
* Removed existing stub test files that are no longer applicable

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19485
---
 python/cudf/cudf/tests/conftest.py            |   71 +
 python/cudf/cudf/tests/reshape/__init__.py    |    0
 python/cudf/cudf/tests/reshape/test_concat.py |  184 +
 .../cudf/tests/series/indexing/__init__.py    |    0
 .../tests/series/indexing/test_setitem.py     |   73 +
 .../cudf/tests/series/methods/__init__.py     |    0
 .../series/methods/test_add_prefix_suffix.py  |   25 +
 .../cudf/tests/series/methods/test_astype.py  |  103 +
 .../tests/series/methods/test_autocorr.py     |   34 +
 .../cudf/tests/series/methods/test_between.py |   62 +
 .../tests/series/methods/test_contains.py     |   19 +
 .../cudf/tests/series/methods/test_count.py   |   28 +
 .../tests/series/methods/test_describe.py     |   60 +
 .../cudf/tests/series/methods/test_diff.py    |   61 +
 .../tests/series/methods/test_digitize.py     |   36 +
 .../cudf/tests/series/methods/test_drop.py    |  162 +
 .../tests/series/methods/test_duplicated.py   |   29 +
 .../cudf/tests/series/methods/test_equals.py  |   19 +
 .../cudf/tests/series/methods/test_explode.py |   31 +
 .../tests/series/methods/test_factorize.py    |   31 +
 .../cudf/tests/series/methods/test_fillna.py  |   40 +
 .../tests/series/methods/test_hash_values.py  |   47 +
 .../cudf/tests/series/methods/test_isin.py    |  170 +
 .../tests/series/methods/test_isna_notnull.py |   61 +
 .../tests/series/methods/test_memory_usage.py |   21 +
 .../cudf/tests/series/methods/test_mode.py    |   70 +
 .../series/methods/test_nans_to_nulls.py      |   12 +
 .../series/methods/test_nlargest_nsmallest.py |   18 +
 .../cudf/tests/series/methods/test_nunique.py |   25 +
 .../cudf/tests/series/methods/test_pipe.py    |   64 +
 .../cudf/tests/series/methods/test_reindex.py |   19 +
 .../cudf/tests/series/methods/test_rename.py  |   37 +
 .../tests/series/methods/test_reset_index.py  |  139 +
 .../cudf/tests/series/methods/test_round.py   |  149 +
 .../tests/series/methods/test_sort_index.py   |   46 +
 .../cudf/tests/series/methods/test_squeeze.py |   21 +
 .../cudf/tests/series/methods/test_to_cupy.py |   43 +
 .../cudf/tests/series/methods/test_to_dict.py |   18 +
 .../tests/series/methods/test_to_pandas.py    |  154 +
 .../cudf/tests/series/methods/test_tolist.py  |   20 +
 .../tests/series/methods/test_transpose.py    |   28 +
 .../tests/series/methods/test_truncate.py     |   50 +
 .../cudf/tests/series/methods/test_update.py  |   38 +
 .../tests/series/methods/test_value_counts.py |  146 +
 .../cudf/tests/series/methods/test_where.py   |   25 +
 .../cudf/cudf/tests/series/test_accessors.py  |    1 -
 .../cudf/cudf/tests/series/test_attributes.py |  144 +-
 .../tests/series/test_binary_operations.py    |    1 -
 python/cudf/cudf/tests/series/test_binops.py  |   35 +
 .../cudf/cudf/tests/series/test_categorial.py |    1 -
 .../cudf/cudf/tests/series/test_combining.py  |    1 -
 .../cudf/tests/series/test_computation.py     |    1 -
 .../cudf/tests/series/test_constructing.py    |   12 -
 .../cudf/tests/series/test_constructors.py    |  637 ++++
 .../tests/series/test_function_application.py |    1 -
 .../cudf/cudf/tests/series/test_indexing.py   |    1 -
 .../tests/series/test_io_serialization.py     |    1 -
 python/cudf/cudf/tests/series/test_missing.py |    1 -
 .../cudf/cudf/tests/series/test_reshaping.py  |    1 -
 .../cudf/cudf/tests/series/test_selecting.py  |    1 -
 python/cudf/cudf/tests/series/test_sorting.py |    1 -
 .../cudf/cudf/tests/series/test_timeseries.py |    1 -
 python/cudf/cudf/tests/test_series.py         | 3111 -----------------
 63 files changed, 3304 insertions(+), 3137 deletions(-)
 create mode 100644 python/cudf/cudf/tests/reshape/__init__.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_concat.py
 create mode 100644 python/cudf/cudf/tests/series/indexing/__init__.py
 create mode 100644 python/cudf/cudf/tests/series/indexing/test_setitem.py
 create mode 100644 python/cudf/cudf/tests/series/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_add_prefix_suffix.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_astype.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_autocorr.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_between.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_contains.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_count.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_describe.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_diff.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_digitize.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_drop.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_duplicated.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_equals.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_explode.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_factorize.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_fillna.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_hash_values.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_isin.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_isna_notnull.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_memory_usage.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_mode.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_nans_to_nulls.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_nunique.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_pipe.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_reindex.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_rename.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_reset_index.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_round.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_sort_index.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_squeeze.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_cupy.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_dict.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_pandas.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_tolist.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_transpose.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_truncate.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_update.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_value_counts.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_where.py
 delete mode 100644 python/cudf/cudf/tests/series/test_accessors.py
 delete mode 100644 python/cudf/cudf/tests/series/test_binary_operations.py
 create mode 100644 python/cudf/cudf/tests/series/test_binops.py
 delete mode 100644 python/cudf/cudf/tests/series/test_categorial.py
 delete mode 100644 python/cudf/cudf/tests/series/test_combining.py
 delete mode 100644 python/cudf/cudf/tests/series/test_computation.py
 delete mode 100644 python/cudf/cudf/tests/series/test_constructing.py
 create mode 100644 python/cudf/cudf/tests/series/test_constructors.py
 delete mode 100644 python/cudf/cudf/tests/series/test_function_application.py
 delete mode 100644 python/cudf/cudf/tests/series/test_indexing.py
 delete mode 100644 python/cudf/cudf/tests/series/test_io_serialization.py
 delete mode 100644 python/cudf/cudf/tests/series/test_missing.py
 delete mode 100644 python/cudf/cudf/tests/series/test_reshaping.py
 delete mode 100644 python/cudf/cudf/tests/series/test_selecting.py
 delete mode 100644 python/cudf/cudf/tests/series/test_sorting.py
 delete mode 100644 python/cudf/cudf/tests/series/test_timeseries.py
 delete mode 100644 python/cudf/cudf/tests/test_series.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 1d27963a903..0a57efe2fbd 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -246,6 +246,47 @@ def numeric_types_as_str(request):
     return request.param
 
 
+@pytest.fixture(
+    params=signed_integer_types
+    + unsigned_integer_types
+    + float_types
+    + bool_types
+)
+def numeric_and_bool_types_as_str(request):
+    """
+    - "int8", "int16", "int32", "int64"
+    - "uint8", "uint16", "uint32", "uint64"
+    - "float32", "float64"
+    - "bool"
+    """
+    return request.param
+
+
+@pytest.fixture(params=datetime_types)
+def datetime_types_as_str(request):
+    """
+    - "datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"
+    """
+    return request.param
+
+
+@pytest.fixture(params=timedelta_types)
+def timedelta_types_as_str(request):
+    """
+    - "timedelta64[ns]", "timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"
+    """
+    return request.param
+
+
+@pytest.fixture(params=datetime_types + timedelta_types)
+def temporal_types_as_str(request):
+    """
+    - "datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"
+    - "timedelta64[ns]", "timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"
+    """
+    return request.param
+
+
 @pytest.fixture(
     params=signed_integer_types
     + unsigned_integer_types
@@ -268,3 +309,33 @@ def all_supported_types_as_str(request):
     - "bool"
     """
     return request.param
+
+
+@pytest.fixture(params=[True, False])
+def dropna(request):
+    """Param for `dropna` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False, None])
+def nan_as_null(request):
+    """Param for `nan_as_null` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def inplace(request):
+    """Param for `inplace` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def ignore_index(request):
+    """Param for `ignore_index` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def ascending(request):
+    """Param for `ascending` argument"""
+    return request.param
diff --git a/python/cudf/cudf/tests/reshape/__init__.py b/python/cudf/cudf/tests/reshape/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/reshape/test_concat.py b/python/cudf/cudf/tests/reshape/test_concat.py
new file mode 100644
index 00000000000..8da43038159
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_concat.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, -2, 3, -4],
+        [1.0, 12.221, 12.34, 13.324, 324.3242],
+    ],
+)
+@pytest.mark.parametrize(
+    "others",
+    [
+        [-10, 11, -12, 13],
+        [0.1, 0.002, 324.2332, 0.2342],
+    ],
+)
+def test_series_concat_basic(data, others, ignore_index):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    other_ps = pd.Series(others)
+    other_gs = cudf.Series(others)
+
+    expected = pd.concat([psr, other_ps], ignore_index=ignore_index)
+    actual = cudf.concat([gsr, other_gs], ignore_index=ignore_index)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "123"],
+        ["a"],
+    ],
+)
+@pytest.mark.parametrize(
+    "others",
+    [
+        ["abc", "123"],
+        ["a"],
+        ["+", "-", "!", "_", "="],
+    ],
+)
+def test_series_concat_basic_str(data, others, ignore_index):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    other_ps = pd.Series(others)
+    other_gs = cudf.Series(others)
+
+    expected = pd.concat([psr, other_ps], ignore_index=ignore_index)
+    actual = cudf.concat([gsr, other_gs], ignore_index=ignore_index)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series(["abc", "123"], index=[10, 20]),
+        pd.Series(["a"], index=[10]),
+    ],
+)
+@pytest.mark.parametrize(
+    "others",
+    [
+        pd.Series(["abc", "123"], index=[50, 20]),
+        pd.Series(["a"], index=[11]),
+        pd.Series(["+", "-", "!", "_", "="], index=[12, 13, 14, 15, 16]),
+    ],
+)
+def test_series_concat_series_with_index(data, others, ignore_index):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    other_ps = others
+    other_gs = cudf.from_pandas(others)
+
+    expected = pd.concat([psr, other_ps], ignore_index=ignore_index)
+    actual = cudf.concat([gsr, other_gs], ignore_index=ignore_index)
+
+    assert_eq(expected, actual)
+
+
+def test_series_concat_error_mixed_types():
+    gsr = cudf.Series([1, 2, 3, 4])
+    other = cudf.Series(["a", "b", "c", "d"])
+
+    with pytest.raises(
+        TypeError,
+        match="cudf does not support mixed types, please type-cast "
+        "both series to same dtypes.",
+    ):
+        cudf.concat([gsr, other])
+
+    with pytest.raises(
+        TypeError,
+        match="cudf does not support mixed types, please type-cast "
+        "both series to same dtypes.",
+    ):
+        cudf.concat([gsr, gsr, other, gsr, other])
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series([-1, 2, -3, 4], index=["a", "b", "c", "d"]),
+        pd.Series(
+            [1.0, 12.221, 12.34, 13.324, 324.3242],
+            index=[
+                "float one",
+                "float two",
+                "float three",
+                "float four",
+                "float five",
+            ],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "others",
+    [
+        [
+            pd.Series([-10, 11, 12, 13], index=["a", "b", "c", "d"]),
+            pd.Series([12, 14, -15, 27], index=["d", "e", "z", "x"]),
+        ],
+        [
+            pd.Series(
+                [0.1, 0.002, 324.2332, 0.2342], index=["-", "+", "%", "#"]
+            ),
+            pd.Series([12, 14, 15, 27], index=["d", "e", "z", "x"]),
+        ]
+        * 3,
+    ],
+)
+def test_series_concat_list_series_with_index(data, others, ignore_index):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    other_ps = others
+    other_gs = [cudf.from_pandas(obj) for obj in others]
+
+    expected = pd.concat([psr, *other_ps], ignore_index=ignore_index)
+    actual = cudf.concat([gsr, *other_gs], ignore_index=ignore_index)
+
+    assert_eq(expected, actual)
+
+
+def test_series_concat_existing_buffers():
+    a1 = np.arange(10, dtype=np.float64)
+    gs = cudf.Series(a1)
+
+    # Add new buffer
+    a2 = cudf.Series(np.arange(5))
+    gs = cudf.concat([gs, a2])
+    assert len(gs) == 15
+    np.testing.assert_equal(gs.to_numpy(), np.hstack([a1, a2.to_numpy()]))
+
+    # Ensure appending to previous buffer
+    a3 = cudf.Series(np.arange(3))
+    gs = cudf.concat([gs, a3])
+    assert len(gs) == 18
+    a4 = np.hstack([a1, a2.to_numpy(), a3.to_numpy()])
+    np.testing.assert_equal(gs.to_numpy(), a4)
+
+    # Appending different dtype
+    a5 = cudf.Series(np.array([1, 2, 3], dtype=np.int32))
+    a6 = cudf.Series(np.array([4.5, 5.5, 6.5], dtype=np.float64))
+    gs = cudf.concat([a5, a6])
+    np.testing.assert_equal(
+        gs.to_numpy(), np.hstack([a5.to_numpy(), a6.to_numpy()])
+    )
+    gs = cudf.concat([cudf.Series(a6), a5])
+    np.testing.assert_equal(
+        gs.to_numpy(), np.hstack([a6.to_numpy(), a5.to_numpy()])
+    )
diff --git a/python/cudf/cudf/tests/series/indexing/__init__.py b/python/cudf/cudf/tests/series/indexing/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
new file mode 100644
index 00000000000..88a014191bc
--- /dev/null
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+def test_fill_new_category():
+    gs = cudf.Series(pd.Categorical(["a", "b", "c"]))
+    with pytest.raises(TypeError):
+        gs[0:1] = "d"
+
+
+@pytest.mark.parametrize("dtype", ["int64", "float64"])
+@pytest.mark.parametrize("bool_scalar", [True, False])
+def test_set_bool_error(dtype, bool_scalar):
+    sr = cudf.Series([1, 2, 3], dtype=dtype)
+    psr = sr.to_pandas(nullable=True)
+
+    assert_exceptions_equal(
+        lfunc=sr.__setitem__,
+        rfunc=psr.__setitem__,
+        lfunc_args_and_kwargs=([bool_scalar],),
+        rfunc_args_and_kwargs=([bool_scalar],),
+    )
+
+
+@pytest.mark.parametrize(
+    "data", [[0, 1, 2], ["a", "b", "c"], [0.324, 32.32, 3243.23]]
+)
+def test_series_setitem_nat_with_non_datetimes(data):
+    s = cudf.Series(data)
+    with pytest.raises(TypeError):
+        s[0] = cudf.NaT
+
+
+def test_series_string_setitem():
+    gs = cudf.Series(["abc", "def", "ghi", "xyz", "pqr"])
+    ps = gs.to_pandas()
+
+    gs[0] = "NaT"
+    gs[1] = "NA"
+    gs[2] = "<NA>"
+    gs[3] = "NaN"
+
+    ps[0] = "NaT"
+    ps[1] = "NA"
+    ps[2] = "<NA>"
+    ps[3] = "NaN"
+
+    assert_eq(gs, ps)
+
+
+def test_series_error_nan_non_float_dtypes():
+    s = cudf.Series(["a", "b", "c"])
+    with pytest.raises(TypeError):
+        s[0] = np.nan
+
+    s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    with pytest.raises(TypeError):
+        s[0] = np.nan
+
+
+def test_series_setitem_mixed_bool_dtype():
+    s = cudf.Series([True, False, True])
+    with pytest.raises(TypeError):
+        s[0] = 10
diff --git a/python/cudf/cudf/tests/series/methods/__init__.py b/python/cudf/cudf/tests/series/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/series/methods/test_add_prefix_suffix.py b/python/cudf/cudf/tests/series/methods/test_add_prefix_suffix.py
new file mode 100644
index 00000000000..1483968d100
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_add_prefix_suffix.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_series_add_prefix():
+    cd_s = cudf.Series([1, 2, 3, 4])
+    pd_s = cd_s.to_pandas()
+
+    got = cd_s.add_prefix("item_")
+    expected = pd_s.add_prefix("item_")
+
+    assert_eq(got, expected)
+
+
+def test_series_add_suffix():
+    cd_s = cudf.Series([1, 2, 3, 4])
+    pd_s = cd_s.to_pandas()
+
+    got = cd_s.add_suffix("_item")
+    expected = pd_s.add_suffix("_item")
+
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
new file mode 100644
index 00000000000..6825af8442f
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_series_typecast_to_object_error():
+    actual = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(TypeError):
+            actual.astype(object)
+        with pytest.raises(TypeError):
+            actual.astype(np.dtype("object"))
+        new_series = actual.astype("str")
+        assert new_series[0] == "1970-01-01 00:00:00.000000001"
+
+
+def test_series_typecast_to_object():
+    actual = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    with cudf.option_context("mode.pandas_compatible", False):
+        new_series = actual.astype(object)
+        assert new_series[0] == "1970-01-01 00:00:00.000000001"
+        new_series = actual.astype(np.dtype("object"))
+        assert new_series[0] == "1970-01-01 00:00:00.000000001"
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pd.ArrowDtype(pa.int8()),
+        pd.ArrowDtype(pa.int16()),
+        pd.ArrowDtype(pa.int32()),
+        pd.ArrowDtype(pa.int64()),
+        pd.ArrowDtype(pa.uint8()),
+        pd.ArrowDtype(pa.uint16()),
+        pd.ArrowDtype(pa.uint32()),
+        pd.ArrowDtype(pa.uint64()),
+        pd.ArrowDtype(pa.float32()),
+        pd.ArrowDtype(pa.float64()),
+        pd.Int8Dtype(),
+        pd.Int16Dtype(),
+        pd.Int32Dtype(),
+        pd.Int64Dtype(),
+        pd.UInt8Dtype(),
+        pd.UInt16Dtype(),
+        pd.UInt32Dtype(),
+        pd.UInt64Dtype(),
+        pd.Float32Dtype(),
+        pd.Float64Dtype(),
+    ],
+)
+@pytest.mark.parametrize("klass", [cudf.Series, cudf.DataFrame, cudf.Index])
+@pytest.mark.parametrize("kind", [lambda x: x, str], ids=["obj", "string"])
+def test_astype_pandas_nullable_pandas_compat(dtype, klass, kind):
+    ser = klass([1, 2, 3])
+    with cudf.option_context("mode.pandas_compatible", True):
+        actual = ser.astype(kind(dtype))
+        expected = klass([1, 2, 3], dtype=kind(dtype))
+        assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "type1",
+    [
+        "category",
+        "interval[int64, right]",
+        "int64",
+        "float64",
+        "str",
+        "datetime64[ns]",
+        "timedelta64[ns]",
+    ],
+)
+@pytest.mark.parametrize(
+    "type2",
+    [
+        "category",
+        "interval[int64, right]",
+        "int64",
+        "float64",
+        "str",
+        "datetime64[ns]",
+        "timedelta64[ns]",
+    ],
+)
+@pytest.mark.parametrize(
+    "as_dtype", [lambda x: x, cudf.dtype], ids=["string", "object"]
+)
+@pytest.mark.parametrize("copy", [True, False])
+def test_empty_astype_always_castable(type1, type2, as_dtype, copy):
+    ser = cudf.Series([], dtype=as_dtype(type1))
+    result = ser.astype(as_dtype(type2), copy=copy)
+    expected = cudf.Series([], dtype=as_dtype(type2))
+    assert_eq(result, expected)
+    if not copy and cudf.dtype(type1) == cudf.dtype(type2):
+        assert ser._column is result._column
+    else:
+        assert ser._column is not result._column
diff --git a/python/cudf/cudf/tests/series/methods/test_autocorr.py b/python/cudf/cudf/tests/series/methods/test_autocorr.py
new file mode 100644
index 00000000000..4e7b6ad8e19
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_autocorr.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    expect_warning_if,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0.25, 0.5, 0.2, -0.05],
+        [0, 1, 2, np.nan, 4, cudf.NA, 6],
+    ],
+)
+@pytest.mark.parametrize("lag", [1, 4])
+def test_autocorr(data, lag):
+    cudf_series = cudf.Series(data)
+    psr = cudf_series.to_pandas()
+
+    cudf_corr = cudf_series.autocorr(lag=lag)
+
+    # autocorrelation is undefined (nan) for less than two entries, but pandas
+    # short-circuits when there are 0 entries and bypasses the numpy function
+    # call that generates an error.
+    num_both_valid = (psr.notna() & psr.shift(lag).notna()).sum()
+    with expect_warning_if(num_both_valid == 1, RuntimeWarning):
+        pd_corr = psr.autocorr(lag=lag)
+
+    assert_eq(pd_corr, cudf_corr)
diff --git a/python/cudf/cudf/tests/series/methods/test_between.py b/python/cudf/cudf/tests/series/methods/test_between.py
new file mode 100644
index 00000000000..c3bb486e00c
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_between.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(params=["both", "neither", "left", "right"])
+def inclusive(request):
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "data,left,right",
+    [
+        ([0, 1, 2, 3, 4, 5, 10], 0, 5),
+        ([0, 1, 2, 3, 4, 5, 10], 10, 1),
+        ([0, 1, 2, 3, 4, 5], [0, 10, 11] * 2, [1, 2, 5] * 2),
+        (["a", "few", "set", "of", "strings", "xyz", "abc"], "banana", "few"),
+        (["a", "few", "set", "of", "strings", "xyz", "abc"], "phone", "hello"),
+        (
+            ["a", "few", "set", "of", "strings", "xyz", "abc"],
+            ["a", "hello", "rapids", "ai", "world", "chars", "strs"],
+            ["yes", "no", "hi", "bye", "test", "pass", "fail"],
+        ),
+        ([0, 1, 2, np.nan, 4, np.nan, 10], 10, 1),
+    ],
+)
+def test_series_between(data, left, right, inclusive):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps, nan_as_null=False)
+
+    expected = ps.between(left, right, inclusive=inclusive)
+    actual = gs.between(left, right, inclusive=inclusive)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data,left,right",
+    [
+        ([0, 1, 2, None, 4, 5, 10], 0, 5),
+        ([0, 1, 2, 3, None, 5, 10], 10, 1),
+        ([None, 1, 2, 3, 4, None], [0, 10, 11] * 2, [1, 2, 5] * 2),
+        (
+            ["a", "few", "set", None, "strings", "xyz", "abc"],
+            ["a", "hello", "rapids", "ai", "world", "chars", "strs"],
+            ["yes", "no", "hi", "bye", "test", "pass", "fail"],
+        ),
+    ],
+)
+def test_series_between_with_null(data, left, right, inclusive):
+    gs = cudf.Series(data)
+    ps = gs.to_pandas(nullable=True)
+
+    expected = ps.between(left, right, inclusive=inclusive)
+    actual = gs.between(left, right, inclusive=inclusive)
+
+    assert_eq(expected, actual.to_pandas(nullable=True))
diff --git a/python/cudf/cudf/tests/series/methods/test_contains.py b/python/cudf/cudf/tests/series/methods/test_contains.py
new file mode 100644
index 00000000000..a80502b67e5
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_contains.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("data", [[True, False, None], [10, 200, 300]])
+@pytest.mark.parametrize("index", [None, [10, 20, 30]])
+def test_series_contains(data, index):
+    ps = pd.Series(data, index=index)
+    gs = cudf.Series(data, index=index)
+
+    assert_eq(1 in ps, 1 in gs)
+    assert_eq(10 in ps, 10 in gs)
+    assert_eq(True in ps, True in gs)
+    assert_eq(False in ps, False in gs)
diff --git a/python/cudf/cudf/tests/series/methods/test_count.py b/python/cudf/cudf/tests/series/methods/test_count.py
new file mode 100644
index 00000000000..6dd5e6994ff
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_count.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_series_count_invalid_param():
+    s = cudf.Series([], dtype="float64")
+    with pytest.raises(TypeError):
+        s.count(skipna=True)
+
+
+def test_series_dataframe_count_float():
+    gs = cudf.Series([1, 2, 3, None, np.nan, 10], nan_as_null=False)
+    ps = cudf.Series([1, 2, 3, None, np.nan, 10])
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        assert_eq(ps.count(), gs.count())
+        assert_eq(ps.to_frame().count(), gs.to_frame().count())
+    with cudf.option_context("mode.pandas_compatible", False):
+        assert_eq(gs.count(), gs.to_pandas(nullable=True).count())
+        assert_eq(
+            gs.to_frame().count(),
+            gs.to_frame().to_pandas(nullable=True).count(),
+        )
diff --git a/python/cudf/cudf/tests/series/methods/test_describe.py b/python/cudf/cudf/tests/series/methods/test_describe.py
new file mode 100644
index 00000000000..2936fa83aba
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_describe.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_series_describe_numeric(numeric_types_as_str):
+    ps = pd.Series([0, 1, 2, 3, 1, 2, 3], dtype=numeric_types_as_str)
+    gs = cudf.from_pandas(ps)
+    actual = gs.describe()
+    expected = ps.describe()
+
+    assert_eq(expected, actual, check_dtype=True)
+
+
+def test_series_describe_temporal(temporal_types_as_str, request):
+    if "ms" in temporal_types_as_str:
+        request.applymarker(
+            pytest.mark.xfail(
+                reason=f"string formatting of {temporal_types_as_str} incorrect in cuDF"
+            )
+        )
+    gs = cudf.Series([0, 1, 2, 3, 1, 2, 3], dtype=temporal_types_as_str)
+    ps = gs.to_pandas()
+
+    expected = ps.describe()
+    actual = gs.describe()
+
+    assert_eq(expected.astype("str"), actual)
+
+
+@pytest.mark.parametrize(
+    "ps",
+    [
+        pd.Series(["a", "b", "c", "d", "e", "a"]),
+        pd.Series([True, False, True, True, False]),
+        pd.Series([], dtype="str"),
+        pd.Series(["a", "b", "c", "a"], dtype="category"),
+        pd.Series(["d", "e", "f"], dtype="category"),
+        pd.Series(pd.Categorical(["d", "e", "f"], categories=["f", "e", "d"])),
+        pd.Series(
+            pd.Categorical(
+                ["d", "e", "f"], categories=["f", "e", "d"], ordered=True
+            )
+        ),
+    ],
+)
+def test_series_describe_other_types(ps):
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.describe()
+    actual = gs.describe()
+
+    if len(ps) == 0:
+        assert_eq(expected.fillna("a").astype("str"), actual.fillna("a"))
+    else:
+        assert_eq(expected.astype("str"), actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_diff.py b/python/cudf/cudf/tests/series/methods/test_diff.py
new file mode 100644
index 00000000000..1706f75a16f
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_diff.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    gen_rand,
+)
+
+
+@pytest.mark.parametrize("period", [-20, 1, 0, 1, 2])
+@pytest.mark.parametrize("data_empty", [False, True])
+def test_diff(numeric_types_as_str, period, data_empty):
+    if data_empty:
+        data = None
+    else:
+        if np.dtype(numeric_types_as_str) == np.int8:
+            # to keep data in range
+            data = gen_rand(numeric_types_as_str, 100000, low=-2, high=2)
+        else:
+            data = gen_rand(numeric_types_as_str, 100000)
+
+    gs = cudf.Series(data, dtype=numeric_types_as_str)
+    ps = pd.Series(data, dtype=numeric_types_as_str)
+
+    expected_outcome = ps.diff(period)
+    diffed_outcome = gs.diff(period).astype(expected_outcome.dtype)
+
+    if data_empty:
+        assert_eq(diffed_outcome, expected_outcome, check_index_type=False)
+    else:
+        assert_eq(diffed_outcome, expected_outcome)
+
+
+def test_diff_unsupported_dtypes():
+    gs = cudf.Series(["a", "b", "c", "d", "e"])
+    with pytest.raises(
+        TypeError,
+        match=r"unsupported operand type\(s\)",
+    ):
+        gs.diff()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.date_range("2020-01-01", "2020-01-06", freq="D"),
+        [True, True, True, False, True, True],
+        [1.0, 2.0, 3.5, 4.0, 5.0, -1.7],
+        [1, 2, 3, 3, 4, 5],
+        [np.nan, None, None, np.nan, np.nan, None],
+    ],
+)
+def test_diff_many_dtypes(data):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps)
+    assert_eq(ps.diff(), gs.diff())
+    assert_eq(ps.diff(periods=2), gs.diff(periods=2))
diff --git a/python/cudf/cudf/tests/series/methods/test_digitize.py b/python/cudf/cudf/tests/series/methods/test_digitize.py
new file mode 100644
index 00000000000..0403a740bab
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_digitize.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("right", [True, False])
+@pytest.mark.parametrize("bins_box", [np.asarray, cudf.Series])
+def test_series_digitize(right, numeric_and_bool_types_as_str, bins_box):
+    num_rows = 20
+    num_bins = 5
+    rng = np.random.default_rng(seed=0)
+    data = rng.integers(0, 100, num_rows).astype(numeric_and_bool_types_as_str)
+    bins = np.unique(
+        np.sort(
+            rng.integers(2, 95, num_bins).astype(numeric_and_bool_types_as_str)
+        )
+    )
+    s = cudf.Series(data)
+    indices = s.digitize(bins_box(bins), right)
+    np.testing.assert_array_equal(
+        np.digitize(data, bins, right), indices.to_numpy()
+    )
+
+
+def test_series_digitize_invalid_bins():
+    rng = np.random.default_rng(seed=0)
+    s = cudf.Series(rng.integers(0, 30, 80), dtype="int32")
+    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
+
+    with pytest.raises(
+        ValueError, match="`bins` cannot contain null entries."
+    ):
+        s.digitize(bins)
diff --git a/python/cudf/cudf/tests/series/methods/test_drop.py b/python/cudf/cudf/tests/series/methods/test_drop.py
new file mode 100644
index 00000000000..96c7135cd7e
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_drop.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.mark.parametrize(
+    "ps",
+    [
+        pd.Series(["a"] * 20, index=range(0, 20)),
+        pd.Series(["b", None] * 10, index=range(0, 20), name="ASeries"),
+        pd.Series(
+            ["b", None] * 5,
+            index=pd.Index(list(range(10)), dtype="uint64"),
+            name="BSeries",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "labels",
+    [[0], 1, pd.Index([1, 3, 5]), np.array([1, 3, 5], dtype="float32")],
+)
+def test_series_drop_labels(ps, labels, inplace):
+    ps = ps.copy()
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.drop(labels=labels, axis=0, inplace=inplace)
+    actual = gs.drop(labels=labels, axis=0, inplace=inplace)
+
+    if inplace:
+        expected = ps
+        actual = gs
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("data", [["a"] * 20, ["b", None] * 10])
+@pytest.mark.parametrize("index", [[0], 1, pd.Index([1, 3, 5])])
+def test_series_drop_index(data, index, inplace):
+    ps = pd.Series(data, index=range(0, 20), name="a")
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.drop(index=index, inplace=inplace)
+    actual = gs.drop(index=index, inplace=inplace)
+
+    if inplace:
+        expected = ps
+        actual = gs
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "index,level",
+    [
+        ("cow", 0),
+        ("lama", 0),
+        ("falcon", 0),
+        ("speed", 1),
+        ("weight", 1),
+        ("length", 1),
+        (
+            "cow",
+            None,
+        ),
+        (
+            "lama",
+            None,
+        ),
+        (
+            "falcon",
+            None,
+        ),
+    ],
+)
+def test_series_drop_multiindex(index, level, inplace):
+    ps = pd.Series(
+        ["a" if i % 2 == 0 else "b" for i in range(0, 10)],
+        index=pd.MultiIndex(
+            levels=[
+                ["lama", "cow", "falcon"],
+                ["speed", "weight", "length"],
+            ],
+            codes=[
+                [0, 0, 0, 1, 1, 1, 2, 2, 2, 1],
+                [0, 1, 2, 0, 1, 2, 0, 1, 2, 1],
+            ],
+        ),
+        name="abc",
+    )
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.drop(index=index, inplace=inplace, level=level)
+    actual = gs.drop(index=index, inplace=inplace, level=level)
+
+    if inplace:
+        expected = ps
+        actual = gs
+
+    assert_eq(expected, actual)
+
+
+def test_series_drop_edge_inputs():
+    gs = cudf.Series([42], name="a")
+    ps = gs.to_pandas()
+
+    assert_eq(ps.drop(columns=["b"]), gs.drop(columns=["b"]))
+
+    assert_eq(ps.drop(columns="b"), gs.drop(columns="b"))
+
+    assert_exceptions_equal(
+        lfunc=ps.drop,
+        rfunc=gs.drop,
+        lfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
+        rfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=ps.drop,
+        rfunc=gs.drop,
+        lfunc_args_and_kwargs=([], {}),
+        rfunc_args_and_kwargs=([], {}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=ps.drop,
+        rfunc=gs.drop,
+        lfunc_args_and_kwargs=(["b"], {"axis": 1}),
+        rfunc_args_and_kwargs=(["b"], {"axis": 1}),
+    )
+
+
+def test_series_drop_raises():
+    gs = cudf.Series([10, 20, 30], index=["x", "y", "z"], name="c")
+    ps = gs.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=ps.drop,
+        rfunc=gs.drop,
+        lfunc_args_and_kwargs=(["p"],),
+        rfunc_args_and_kwargs=(["p"],),
+    )
+
+    # dtype specified mismatch
+    assert_exceptions_equal(
+        lfunc=ps.drop,
+        rfunc=gs.drop,
+        lfunc_args_and_kwargs=([3],),
+        rfunc_args_and_kwargs=([3],),
+    )
+
+    expect = ps.drop("p", errors="ignore")
+    actual = gs.drop("p", errors="ignore")
+
+    assert_eq(actual, expect)
diff --git a/python/cudf/cudf/tests/series/methods/test_duplicated.py b/python/cudf/cudf/tests/series/methods/test_duplicated.py
new file mode 100644
index 00000000000..47c0b537560
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_duplicated.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data,index",
+    [
+        ([1, 2, 3], [10, 11, 12]),
+        ([1, 2, 3, 1, 1, 2, 3, 2], [10, 20, 23, 24, 25, 26, 27, 28]),
+        ([1, None, 2, None, 3, None, 3, 1], [5, 6, 7, 8, 9, 10, 11, 12]),
+        ([np.nan, 1.0, np.nan, 5.4, 5.4, 1.0], ["a", "b", "c", "d", "e", "f"]),
+        (
+            ["lama", "cow", "lama", None, "beetle", "lama", None, None],
+            [1, 4, 10, 11, 2, 100, 200, 400],
+        ),
+    ],
+)
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@pytest.mark.parametrize("name", [None, "a"])
+def test_series_duplicated(data, index, keep, name):
+    gs = cudf.Series(data, index=index, name=name)
+    ps = gs.to_pandas()
+
+    assert_eq(gs.duplicated(keep=keep), ps.duplicated(keep=keep))
diff --git a/python/cudf/cudf/tests/series/methods/test_equals.py b/python/cudf/cudf/tests/series/methods/test_equals.py
new file mode 100644
index 00000000000..d2e1db334ab
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_equals.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "lhs, rhs", [("a", "a"), ("a", "b"), (1, 1.0), (None, None), (None, "a")]
+)
+def test_equals_names(lhs, rhs):
+    lhs = cudf.Series([1, 2], name=lhs)
+    rhs = cudf.Series([1, 2], name=rhs)
+
+    got = lhs.equals(rhs)
+    expect = lhs.to_pandas().equals(rhs.to_pandas())
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_explode.py b/python/cudf/cudf/tests/series/methods/test_explode.py
new file mode 100644
index 00000000000..0cb54c98aac
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_explode.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],
+)
+@pytest.mark.parametrize(
+    "p_index",
+    [
+        None,
+        ["ia", "ib", "ic", "id", "ie"],
+        pd.MultiIndex.from_tuples(
+            [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b")]
+        ),
+    ],
+)
+def test_explode(data, ignore_index, p_index):
+    pdf = pd.Series(data, index=p_index, name="someseries")
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.explode(ignore_index)
+    got = gdf.explode(ignore_index)
+
+    assert_eq(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/series/methods/test_factorize.py b/python/cudf/cudf/tests/series/methods/test_factorize.py
new file mode 100644
index 00000000000..98cc8187fce
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_factorize.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 2, 1],
+        [1, 2, None, 3, 1, 1],
+        [],
+        ["a", "b", "c", None, "z", "a"],
+    ],
+)
+@pytest.mark.parametrize("use_na_sentinel", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+def test_series_factorize_use_na_sentinel(data, use_na_sentinel, sort):
+    gsr = cudf.Series(data)
+    psr = gsr.to_pandas(nullable=True)
+
+    expected_labels, expected_cats = psr.factorize(
+        use_na_sentinel=use_na_sentinel, sort=sort
+    )
+    actual_labels, actual_cats = gsr.factorize(
+        use_na_sentinel=use_na_sentinel, sort=sort
+    )
+    assert_eq(expected_labels, actual_labels.get())
+    assert_eq(expected_cats, actual_cats.to_pandas(nullable=True))
diff --git a/python/cudf/cudf/tests/series/methods/test_fillna.py b/python/cudf/cudf/tests/series/methods/test_fillna.py
new file mode 100644
index 00000000000..d317ff85596
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_fillna.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, None, 11, 2.0, np.nan],
+        [np.nan],
+        [None, None, None],
+        [np.nan, 1, 10, 393.32, np.nan],
+    ],
+)
+@pytest.mark.parametrize("nan_as_null", [True, False])
+@pytest.mark.parametrize("fill_value", [1.2, 332, np.nan])
+def test_fillna_with_nan(data, nan_as_null, fill_value):
+    gs = cudf.Series(data, dtype="float64", nan_as_null=nan_as_null)
+    ps = gs.to_pandas()
+
+    expected = ps.fillna(fill_value)
+    actual = gs.fillna(fill_value)
+
+    assert_eq(expected, actual)
+
+
+def test_fillna_categorical_with_non_categorical_raises():
+    ser = cudf.Series([1, None], dtype="category")
+    with pytest.raises(TypeError):
+        ser.fillna(cudf.Series([1, 2]))
+
+
+def test_fillna_categorical_with_different_categories_raises():
+    ser = cudf.Series([1, None], dtype="category")
+    with pytest.raises(TypeError):
+        ser.fillna(cudf.Series([1, 2]), dtype="category")
diff --git a/python/cudf/cudf/tests/series/methods/test_hash_values.py b/python/cudf/cudf/tests/series/methods/test_hash_values.py
new file mode 100644
index 00000000000..9e64e4dfab6
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_hash_values.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import hashlib
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "method", ["md5", "sha1", "sha224", "sha256", "sha384", "sha512"]
+)
+def test_series_hash_values(method):
+    inputs = cudf.Series(
+        [
+            "",
+            "0",
+            "A 56 character string to test message padding algorithm.",
+            "A 63 character string to test message padding algorithm, again.",
+            "A 64 character string to test message padding algorithm, again!!",
+            (
+                "A very long (greater than 128 bytes/char string) to execute "
+                "a multi hash-step data point in the hash function being "
+                "tested. This string needed to be longer."
+            ),
+            "All work and no play makes Jack a dull boy",
+            "!\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~",
+            "\x00\x00\x00\x10\x00\x00\x00\x00",
+            "\x00\x00\x00\x00",
+        ]
+    )
+
+    def hashlib_compute_digest(data):
+        hasher = getattr(hashlib, method)()
+        hasher.update(data.encode("utf-8"))
+        return hasher.hexdigest()
+
+    hashlib_validation = inputs.to_pandas().apply(hashlib_compute_digest)
+    validation_results = cudf.Series(hashlib_validation)
+    hash_values = inputs.hash_values(method=method)
+    assert_eq(hash_values, validation_results)
+
+
+def test_series_hash_values_invalid_method():
+    inputs = cudf.Series(["", "0"])
+    with pytest.raises(ValueError):
+        inputs.hash_values(method="invalid_method")
diff --git a/python/cudf/cudf/tests/series/methods/test_isin.py b/python/cudf/cudf/tests/series/methods/test_isin.py
new file mode 100644
index 00000000000..dfb7252775a
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_isin.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    expect_warning_if,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [0, 12, 14],
+        [0, 14, 12, 12, 3, 10, 12, 14],
+        np.random.default_rng(seed=0).integers(-100, 100, 200),
+        pd.Series([0.0, 1.0, None, 10.0]),
+        [None, None, None, None],
+        [np.nan, None, -1, 2, 3],
+        [1, 2],
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        np.random.default_rng(seed=0).integers(-100, 100, 10),
+        [],
+        [np.nan, None, -1, 2, 3],
+        [1.0, 12.0, None, None, 120],
+        [0.1, 12.1, 14.1],
+        [0, 14, 12, 12, 3, 10, 12, 14, None],
+        [None, None, None],
+        ["0", "12", "14"],
+        ["0", "12", "14", "a"],
+        [1.0, 2.5],
+    ],
+)
+def test_isin_numeric(data, values):
+    rng = np.random.default_rng(seed=0)
+    index = rng.integers(0, 100, len(data))
+    psr = pd.Series(data, index=index)
+    gsr = cudf.Series.from_pandas(psr, nan_as_null=False)
+
+    expected = psr.isin(values)
+    got = gsr.isin(values)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning newly introduced in pandas-2.2.0",
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        pd.Series(
+            ["2018-01-01", "2019-04-03", None, "2019-12-30"],
+            dtype="datetime64[ns]",
+        ),
+        pd.Series(
+            [
+                "2018-01-01",
+                "2019-04-03",
+                None,
+                "2019-12-30",
+                "2018-01-01",
+                "2018-01-01",
+            ],
+            dtype="datetime64[ns]",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        [],
+        [1514764800000000000, 1577664000000000000],
+        [
+            1514764800000000000,
+            1577664000000000000,
+            1577664000000000000,
+            1577664000000000000,
+            1514764800000000000,
+        ],
+        ["2019-04-03", "2019-12-30", "2012-01-01"],
+        [
+            "2012-01-01",
+            "2012-01-01",
+            "2012-01-01",
+            "2019-04-03",
+            "2019-12-30",
+            "2012-01-01",
+        ],
+    ],
+)
+def test_isin_datetime(data, values):
+    psr = pd.Series(data)
+    gsr = cudf.Series.from_pandas(psr)
+
+    is_len_str = isinstance(next(iter(values), None), str) and len(data)
+    with expect_warning_if(is_len_str):
+        got = gsr.isin(values)
+    with expect_warning_if(is_len_str):
+        expected = psr.isin(values)
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        ["this", "is", None, "a", "test"],
+        ["test", "this", "test", "is", None, "test", "a", "test"],
+        ["0", "12", "14"],
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        [],
+        ["this", "is"],
+        [None, None, None],
+        ["12", "14", "19"],
+        [12, 14, 19],
+        ["is", "this", "is", "this", "is"],
+    ],
+)
+def test_isin_string(data, values):
+    psr = pd.Series(data)
+    gsr = cudf.Series.from_pandas(psr)
+
+    got = gsr.isin(values)
+    expected = psr.isin(values)
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        pd.Series(["a", "b", "c", "c", "c", "d", "e"], dtype="category"),
+        pd.Series(["a", "b", None, "c", "d", "e"], dtype="category"),
+        pd.Series([0, 3, 10, 12], dtype="category"),
+        pd.Series([0, 3, 10, 12, 0, 10, 3, 0, 0, 3, 3], dtype="category"),
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        [],
+        ["a", "b", None, "f", "words"],
+        ["0", "12", None, "14"],
+        [0, 10, 12, None, 39, 40, 1000],
+        [0, 0, 0, 0, 3, 3, 3, None, 1, 2, 3],
+    ],
+)
+def test_isin_categorical(data, values):
+    psr = pd.Series(data)
+    gsr = cudf.Series.from_pandas(psr)
+
+    got = gsr.isin(values)
+    expected = psr.isin(values)
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_isna_notnull.py b/python/cudf/cudf/tests/series/methods/test_isna_notnull.py
new file mode 100644
index 00000000000..484ee48a5a9
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_isna_notnull.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.errors import MixedTypeError
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(
+    params=[
+        pd.Series([0, 1, 2, np.nan, 4, None, 6]),
+        pd.Series(
+            [0, 1, 2, np.nan, 4, None, 6],
+            index=["q", "w", "e", "r", "t", "y", "u"],
+            name="a",
+        ),
+        pd.Series([0, 1, 2, 3, 4]),
+        pd.Series(["a", "b", "u", "h", "d"]),
+        pd.Series([None, None, np.nan, None, np.inf, -np.inf]),
+        pd.Series([], dtype="float64"),
+        pd.Series(
+            [pd.NaT, pd.Timestamp("1939-05-27"), pd.Timestamp("1940-04-25")]
+        ),
+        pd.Series([np.nan]),
+        pd.Series([None]),
+        pd.Series(["a", "b", "", "c", None, "e"]),
+    ]
+)
+def ps(request):
+    return request.param
+
+
+def test_series_isnull_isna(ps, nan_as_null):
+    nan_contains = ps.apply(lambda x: isinstance(x, float) and np.isnan(x))
+    if nan_as_null is False and (
+        nan_contains.any() and not nan_contains.all() and ps.dtype == object
+    ):
+        with pytest.raises(MixedTypeError):
+            cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
+    else:
+        gs = cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
+
+        assert_eq(ps.isnull(), gs.isnull())
+        assert_eq(ps.isna(), gs.isna())
+
+
+def test_series_notnull_notna(ps, nan_as_null):
+    nan_contains = ps.apply(lambda x: isinstance(x, float) and np.isnan(x))
+    if nan_as_null is False and (
+        nan_contains.any() and not nan_contains.all() and ps.dtype == object
+    ):
+        with pytest.raises(MixedTypeError):
+            cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
+    else:
+        gs = cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
+
+        assert_eq(ps.notnull(), gs.notnull())
+        assert_eq(ps.notna(), gs.notna())
diff --git a/python/cudf/cudf/tests/series/methods/test_memory_usage.py b/python/cudf/cudf/tests/series/methods/test_memory_usage.py
new file mode 100644
index 00000000000..f9c81aaaef9
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_memory_usage.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+
+
+def test_series_memory_usage():
+    sr = cudf.Series([1, 2, 3, 4], dtype="int64")
+    assert sr.memory_usage() == 32
+
+    sliced_sr = sr[2:]
+    assert sliced_sr.memory_usage() == 16
+
+    sliced_sr[3] = None
+    assert sliced_sr.memory_usage() == 80
+
+    sr = cudf.Series(["hello world", "rapids ai", "abc", "z"])
+    assert sr.memory_usage() == 44
+
+    assert sr[3:].memory_usage() == 9  # z
+    assert sr[:1].memory_usage() == 19  # hello world
diff --git a/python/cudf/cudf/tests/series/methods/test_mode.py b/python/cudf/cudf/tests/series/methods/test_mode.py
new file mode 100644
index 00000000000..bfd096e9343
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_mode.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "gs",
+    [
+        lambda: cudf.Series([1, 2, 3]),
+        lambda: cudf.Series([None]),
+        lambda: cudf.Series([4]),
+        lambda: cudf.Series([2, 3, -1, 0, 1], name="test name"),
+        lambda: cudf.Series(
+            [1, 2, 3, None, 2, 1], index=["a", "v", "d", "e", "f", "g"]
+        ),
+        lambda: cudf.Series([1, 2, 3, None, 2, 1, None], name="abc"),
+        lambda: cudf.Series(["ab", "bc", "ab", None, "bc", None, None]),
+        lambda: cudf.Series([None, None, None, None, None], dtype="str"),
+        lambda: cudf.Series([None, None, None, None, None]),
+        lambda: cudf.Series(
+            [
+                123213,
+                23123,
+                123123,
+                12213123,
+                12213123,
+                12213123,
+                23123,
+                2312323123,
+                None,
+                None,
+            ],
+            dtype="timedelta64[ns]",
+        ),
+        lambda: cudf.Series(
+            [
+                None,
+                1,
+                2,
+                3242434,
+                3233243,
+                1,
+                2,
+                1023,
+                None,
+                12213123,
+                None,
+                2312323123,
+                None,
+                None,
+            ],
+            dtype="datetime64[ns]",
+        ),
+        lambda: cudf.Series(name="empty series", dtype="float64"),
+        lambda: cudf.Series(
+            ["a", "b", "c", " ", "a", "b", "z"], dtype="category"
+        ),
+    ],
+)
+def test_series_mode(gs, dropna):
+    gs = gs()
+    ps = gs.to_pandas()
+
+    expected = ps.mode(dropna=dropna)
+    actual = gs.mode(dropna=dropna)
+
+    assert_eq(expected, actual, check_dtype=False)
diff --git a/python/cudf/cudf/tests/series/methods/test_nans_to_nulls.py b/python/cudf/cudf/tests/series/methods/test_nans_to_nulls.py
new file mode 100644
index 00000000000..3dc2619ab5d
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_nans_to_nulls.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("value", [1, 1.1])
+def test_nans_to_nulls_noop_copies_column(value):
+    ser1 = cudf.Series([value])
+    ser2 = ser1.nans_to_nulls()
+    assert ser1._column is not ser2._column
diff --git a/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py b/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py
new file mode 100644
index 00000000000..329c7f96602
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.mark.parametrize("attr", ["nlargest", "nsmallest"])
+def test_series_nlargest_nsmallest_str_error(attr):
+    gs = cudf.Series(["a", "b", "c", "d", "e"])
+    ps = gs.to_pandas()
+
+    assert_exceptions_equal(
+        getattr(gs, attr), getattr(ps, attr), ([], {"n": 1}), ([], {"n": 1})
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_nunique.py b/python/cudf/cudf/tests/series/methods/test_nunique.py
new file mode 100644
index 00000000000..19ff910e316
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_nunique.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_nunique_all_null(dropna):
+    data = [None, None]
+    pd_ser = pd.Series(data)
+    cudf_ser = cudf.Series(data)
+    result = pd_ser.nunique(dropna=dropna)
+    expected = cudf_ser.nunique(dropna=dropna)
+    assert result == expected
+
+
+def test_series_nunique():
+    cd_s = cudf.Series([1, 3, 5, 7, 7])
+    pd_s = cd_s.to_pandas()
+
+    actual = cd_s.nunique()
+    expected = pd_s.nunique()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_pipe.py b/python/cudf/cudf/tests/series/methods/test_pipe.py
new file mode 100644
index 00000000000..3f47aa5bf78
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_pipe.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+def test_series_pipe():
+    psr = pd.Series([10, 20, 30, 40])
+    gsr = cudf.Series([10, 20, 30, 40])
+
+    def custom_add_func(sr, val):
+        new_sr = sr + val
+        return new_sr
+
+    def custom_to_str_func(sr, val):
+        new_sr = sr.astype("str") + val
+        return new_sr
+
+    expected = (
+        psr.pipe(custom_add_func, 11)
+        .pipe(custom_add_func, val=12)
+        .pipe(custom_to_str_func, "rapids")
+    )
+    actual = (
+        gsr.pipe(custom_add_func, 11)
+        .pipe(custom_add_func, val=12)
+        .pipe(custom_to_str_func, "rapids")
+    )
+
+    assert_eq(expected, actual)
+
+    expected = (
+        psr.pipe((custom_add_func, "sr"), val=11)
+        .pipe(custom_add_func, val=1)
+        .pipe(custom_to_str_func, "rapids-ai")
+    )
+    actual = (
+        gsr.pipe((custom_add_func, "sr"), val=11)
+        .pipe(custom_add_func, val=1)
+        .pipe(custom_to_str_func, "rapids-ai")
+    )
+
+    assert_eq(expected, actual)
+
+
+def test_series_pipe_error():
+    psr = pd.Series([10, 20, 30, 40])
+    gsr = cudf.Series([10, 20, 30, 40])
+
+    def custom_add_func(sr, val):
+        new_sr = sr + val
+        return new_sr
+
+    assert_exceptions_equal(
+        lfunc=psr.pipe,
+        rfunc=gsr.pipe,
+        lfunc_args_and_kwargs=([(custom_add_func, "val")], {"val": 11}),
+        rfunc_args_and_kwargs=([(custom_add_func, "val")], {"val": 11}),
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_reindex.py b/python/cudf/cudf/tests/series/methods/test_reindex.py
new file mode 100644
index 00000000000..a2bcac2fb3d
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_reindex.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+def test_series_duplicate_index_reindex():
+    gs = cudf.Series([0, 1, 2, 3], index=[0, 0, 1, 1])
+    ps = gs.to_pandas()
+
+    assert_exceptions_equal(
+        gs.reindex,
+        ps.reindex,
+        lfunc_args_and_kwargs=([10, 11, 12, 13], {}),
+        rfunc_args_and_kwargs=([10, 11, 12, 13], {}),
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_rename.py b/python/cudf/cudf/tests/series/methods/test_rename.py
new file mode 100644
index 00000000000..f5705511d5c
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_rename.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_series_unique_pandas_compatibility():
+    gs = cudf.Series([10, 11, 12, 11, 10])
+    ps = gs.to_pandas()
+    with cudf.option_context("mode.pandas_compatible", True):
+        actual = gs.unique()
+    expected = ps.unique()
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("initial_name", [None, "a"])
+@pytest.mark.parametrize("name", [None, "a"])
+def test_series_rename(initial_name, name):
+    gsr = cudf.Series([1, 2, 3], name=initial_name)
+    psr = pd.Series([1, 2, 3], name=initial_name)
+
+    assert_eq(gsr, psr)
+
+    actual = gsr.rename(name)
+    expected = psr.rename(name)
+
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("index", [lambda x: x * 2, {1: 2}])
+def test_rename_index_not_supported(index):
+    ser = cudf.Series(range(2))
+    with pytest.raises(NotImplementedError):
+        ser.rename(index=index)
diff --git a/python/cudf/cudf/tests/series/methods/test_reset_index.py b/python/cudf/cudf/tests/series/methods/test_reset_index.py
new file mode 100644
index 00000000000..a1025a4dfda
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_reset_index.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.api.extensions import no_default
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.fixture(params=[True, False])
+def drop(request):
+    """Param for `drop` argument"""
+    return request.param
+
+
+@pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]])
+@pytest.mark.parametrize("original_name", [None, "original_ser"])
+@pytest.mark.parametrize("name", [None, "ser", no_default])
+def test_reset_index(level, drop, inplace, original_name, name):
+    midx = pd.MultiIndex.from_tuples(
+        [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=["l0", None]
+    )
+    ps = pd.Series(range(4), index=midx, name=original_name)
+    gs = cudf.from_pandas(ps)
+
+    if not drop and inplace:
+        pytest.skip(
+            "For exception checks, see "
+            "test_reset_index_dup_level_name_exceptions"
+        )
+
+    expect = ps.reset_index(level=level, drop=drop, name=name, inplace=inplace)
+
+    got = gs.reset_index(level=level, drop=drop, name=name, inplace=inplace)
+    if inplace:
+        expect = ps
+        got = gs
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("level", [None, 0, 1, [None]])
+@pytest.mark.parametrize("original_name", [None, "original_ser"])
+@pytest.mark.parametrize("name", [None, "ser"])
+def test_reset_index_dup_level_name(level, drop, inplace, original_name, name):
+    # midx levels are named [None, None]
+    midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
+    ps = pd.Series(range(4), index=midx, name=original_name)
+    gs = cudf.from_pandas(ps)
+    if level == [None] or not drop and inplace:
+        pytest.skip(
+            "For exception checks, see "
+            "test_reset_index_dup_level_name_exceptions"
+        )
+
+    expect = ps.reset_index(level=level, drop=drop, inplace=inplace, name=name)
+    got = gs.reset_index(level=level, drop=drop, inplace=inplace, name=name)
+    if inplace:
+        expect = ps
+        got = gs
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("original_name", [None, "original_ser"])
+@pytest.mark.parametrize("name", [None, "ser"])
+def test_reset_index_named(drop, inplace, original_name, name):
+    ps = pd.Series(range(4), index=["x", "y", "z", "w"], name=original_name)
+    gs = cudf.from_pandas(ps)
+
+    ps.index.name = "cudf"
+    gs.index.name = "cudf"
+
+    if not drop and inplace:
+        pytest.skip(
+            "For exception checks, see "
+            "test_reset_index_dup_level_name_exceptions"
+        )
+
+    expect = ps.reset_index(drop=drop, inplace=inplace, name=name)
+    got = gs.reset_index(drop=drop, inplace=inplace, name=name)
+
+    if inplace:
+        expect = ps
+        got = gs
+
+    assert_eq(expect, got)
+
+
+def test_reset_index_dup_level_name_exceptions():
+    midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
+    ps = pd.Series(range(4), index=midx)
+    gs = cudf.from_pandas(ps)
+
+    # Should specify duplicate level names with level number.
+    assert_exceptions_equal(
+        lfunc=ps.reset_index,
+        rfunc=gs.reset_index,
+        lfunc_args_and_kwargs=(
+            [],
+            {"level": [None]},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"level": [None]},
+        ),
+    )
+
+    # Cannot use drop=False and inplace=True to turn a series into dataframe.
+    assert_exceptions_equal(
+        lfunc=ps.reset_index,
+        rfunc=gs.reset_index,
+        lfunc_args_and_kwargs=(
+            [],
+            {"drop": False, "inplace": True},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"drop": False, "inplace": True},
+        ),
+    )
+
+    # Pandas raises the above exception should these two inputs crosses.
+    assert_exceptions_equal(
+        lfunc=ps.reset_index,
+        rfunc=gs.reset_index,
+        lfunc_args_and_kwargs=(
+            [],
+            {"level": [None], "drop": False, "inplace": True},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"level": [None], "drop": False, "inplace": True},
+        ),
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_round.py b/python/cudf/cudf/tests/series/methods/test_round.py
new file mode 100644
index 00000000000..320371a7f38
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_round.py
@@ -0,0 +1,149 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import decimal
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "arr",
+    [
+        np.random.default_rng(seed=0).normal(-100, 100, 10),
+        np.random.default_rng(seed=0).integers(-50, 50, 10),
+        np.zeros(10),
+        np.repeat([-0.6459412758761901], 10),
+        np.repeat(np.nan, 10),
+        np.array([1.123, 2.343, np.nan, 0.0]),
+        np.arange(-100.5, 101.5, 1),
+    ],
+)
+@pytest.mark.parametrize("decimals", [-3, -1, 0, 1, 12, np.int8(1)])
+def test_series_round(arr, decimals, nan_as_null):
+    pser = pd.Series(arr)
+    ser = cudf.Series(arr, nan_as_null=nan_as_null)
+    result = ser.round(decimals)
+    expected = pser.round(decimals)
+
+    assert_eq(result, expected)
+
+
+def test_series_round_half_up():
+    s = cudf.Series([0.0, 1.0, 1.2, 1.7, 0.5, 1.5, 2.5, None])
+    expect = cudf.Series([0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, None])
+    got = s.round(how="half_up")
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "series_data",
+    [
+        [1.0, None, np.nan, 4.0],
+        [1.24430, None, np.nan, 4.423530],
+        [1.24430, np.nan, 4.423530],
+        [-1.24430, np.nan, -4.423530],
+        np.repeat(np.nan, 100),
+    ],
+)
+@pytest.mark.parametrize("decimal", [0, 1, 3])
+def test_round_nan_as_null_false(series_data, decimal):
+    series = cudf.Series(series_data, nan_as_null=False)
+    pser = series.to_pandas()
+    result = series.round(decimal)
+    expected = pser.round(decimal)
+    assert_eq(result, expected, atol=1e-10)
+
+
+@pytest.mark.parametrize(
+    "data, dtype, decimals, expected_half_up, expected_half_even",
+    [
+        (
+            [1.234, 2.345, 3.456],
+            cudf.Decimal32Dtype(precision=5, scale=3),
+            2,
+            [1.23, 2.35, 3.46],
+            [1.23, 2.34, 3.46],
+        ),
+        (
+            [1.234, 2.345, 3.456],
+            cudf.Decimal32Dtype(precision=5, scale=3),
+            0,
+            [1.0, 2.0, 3.0],
+            [1.0, 2.0, 3.0],
+        ),
+        (
+            [1.234, 2.345, 3.456],
+            cudf.Decimal32Dtype(precision=5, scale=3),
+            3,
+            [1.234, 2.345, 3.456],
+            [1.234, 2.345, 3.456],
+        ),
+        (
+            [1.234567, 2.345678, 3.456789],
+            cudf.Decimal64Dtype(precision=10, scale=6),
+            4,
+            [1.2346, 2.3457, 3.4568],
+            [1.2346, 2.3457, 3.4568],
+        ),
+        (
+            [1.234567, 2.345678, 3.456789],
+            cudf.Decimal64Dtype(precision=10, scale=6),
+            2,
+            [1.23, 2.35, 3.46],
+            [1.23, 2.35, 3.46],
+        ),
+        (
+            [1.234567, 2.345678, 3.456789],
+            cudf.Decimal64Dtype(precision=10, scale=6),
+            6,
+            [1.234567, 2.345678, 3.456789],
+            [1.234567, 2.345678, 3.456789],
+        ),
+    ],
+)
+def test_series_round_decimal(
+    data, dtype, decimals, expected_half_up, expected_half_even
+):
+    ser = cudf.Series(data).astype(dtype)
+
+    result_half_up = ser.round(decimals=decimals, how="half_up").astype(dtype)
+    expected_ser_half_up = cudf.Series(expected_half_up).astype(dtype)
+    assert_eq(result_half_up, expected_ser_half_up)
+
+    result_half_even = ser.round(decimals=decimals, how="half_even").astype(
+        dtype
+    )
+    expected_ser_half_even = cudf.Series(expected_half_even).astype(dtype)
+    assert_eq(result_half_even, expected_ser_half_even)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1.2234242333234, 323432.3243423, np.nan],
+        pd.Series([34224, 324324, 324342], dtype="datetime64[ns]"),
+        pd.Series([224.242, None, 2424.234324], dtype="category"),
+        [
+            decimal.Decimal("342.3243234234242"),
+            decimal.Decimal("89.32432497687622"),
+            None,
+        ],
+    ],
+)
+@pytest.mark.parametrize("digits", [0, 1, 7])
+def test_series_round_builtin(data, digits):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps, nan_as_null=False)
+
+    # TODO: Remove `to_frame` workaround
+    # after following issue is fixed:
+    # https://github.com/pandas-dev/pandas/issues/55114
+    expected = round(ps.to_frame(), digits)[0]
+    expected.name = None
+    actual = round(gs, digits)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_sort_index.py b/python/cudf/cudf/tests/series/methods/test_sort_index.py
new file mode 100644
index 00000000000..4b2fddf7425
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_sort_index.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(0, 3, 1),
+        [3.0, 1.0, np.nan],
+        ["a", "z", None],
+        pd.RangeIndex(4, -1, -2),
+    ],
+)
+@pytest.mark.parametrize("axis", [0, "index"])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_series_sort_index(
+    index, axis, ascending, inplace, ignore_index, na_position
+):
+    ps = pd.Series([10, 3, 12], index=index)
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+    got = gs.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+
+    if inplace is True:
+        assert_eq(ps, gs, check_index_type=True)
+    else:
+        assert_eq(expected, got, check_index_type=True)
diff --git a/python/cudf/cudf/tests/series/methods/test_squeeze.py b/python/cudf/cudf/tests/series/methods/test_squeeze.py
new file mode 100644
index 00000000000..4e39c235e83
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_squeeze.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("axis", [None, 0, "index"])
+@pytest.mark.parametrize("data", [[1, 2], [1]])
+def test_squeeze(axis, data):
+    ser = cudf.Series(data)
+    result = ser.squeeze(axis=axis)
+    expected = ser.to_pandas().squeeze(axis=axis)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("axis", [1, "columns"])
+def test_squeeze_invalid_axis(axis):
+    with pytest.raises(ValueError):
+        cudf.Series([1]).squeeze(axis=axis)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_cupy.py b/python/cudf/cudf/tests/series/methods/test_to_cupy.py
new file mode 100644
index 00000000000..54897a73b0f
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_cupy.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("has_nulls", [False, True])
+@pytest.mark.parametrize("use_na_value", [False, True])
+def test_series_to_cupy(
+    numeric_and_bool_types_as_str, has_nulls, use_na_value
+):
+    size = 10
+    if numeric_and_bool_types_as_str == "bool":
+        np_data = np.array([True, False] * (size // 2), dtype=bool)
+    else:
+        np_data = np.arange(size, dtype=numeric_and_bool_types_as_str)
+
+    if has_nulls:
+        np_data = np_data.astype("object")
+        np_data[::2] = None
+
+    sr = cudf.Series(np_data, dtype=numeric_and_bool_types_as_str)
+
+    if not has_nulls:
+        assert_eq(sr.values, cp.asarray(sr))
+        return
+
+    if has_nulls and not use_na_value:
+        with pytest.raises(ValueError, match="Column must have no nulls"):
+            sr.to_cupy()
+        return
+
+    na_value = {
+        "bool": False,
+        "float32": 0.0,
+        "float64": 0.0,
+    }.get(numeric_and_bool_types_as_str, 0)
+    expected = cp.asarray(sr.fillna(na_value)) if has_nulls else cp.asarray(sr)
+    assert_eq(sr.to_cupy(na_value=na_value), expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_dict.py b/python/cudf/cudf/tests/series/methods/test_to_dict.py
new file mode 100644
index 00000000000..9bbb4b9e652
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_dict.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+from collections import OrderedDict, defaultdict
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("into", [dict, OrderedDict, defaultdict(list)])
+def test_series_to_dict(into):
+    gs = cudf.Series(["ab", "de", "zx"], index=[10, 20, 100])
+    ps = gs.to_pandas()
+
+    actual = gs.to_dict(into=into)
+    expected = ps.to_dict(into=into)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_pandas.py b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
new file mode 100644
index 00000000000..8e7ced2ea30
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import datetime
+import decimal
+
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "sr_data,expected_psr",
+    [
+        (
+            pa.array([1, 2, None, 3], type=pa.uint8()),
+            pd.Series([1, 2, None, 3], dtype=pd.UInt8Dtype()),
+        ),
+        (
+            pa.array([23, None, None, 32], type=pa.uint16()),
+            pd.Series([23, None, None, 32], dtype=pd.UInt16Dtype()),
+        ),
+        (
+            pa.array([None, 123, None, 1], type=pa.uint32()),
+            pd.Series([None, 123, None, 1], dtype=pd.UInt32Dtype()),
+        ),
+        (
+            pa.array([234, 2323, 23432, None, None, 224], type=pa.uint64()),
+            pd.Series(
+                [234, 2323, 23432, None, None, 224], dtype=pd.UInt64Dtype()
+            ),
+        ),
+        (
+            pa.array([-10, 1, None, -1, None, 3], type=pa.int8()),
+            pd.Series([-10, 1, None, -1, None, 3], dtype=pd.Int8Dtype()),
+        ),
+        (
+            pa.array([111, None, 222, None, 13], type=pa.int16()),
+            pd.Series([111, None, 222, None, 13], dtype=pd.Int16Dtype()),
+        ),
+        (
+            pa.array([11, None, 22, 33, None, 2, None, 3], type=pa.int32()),
+            pd.Series(
+                [11, None, 22, 33, None, 2, None, 3], dtype=pd.Int32Dtype()
+            ),
+        ),
+        (
+            pa.array(
+                [32431, None, None, 32322, 0, 10, -32324, None],
+                type=pa.int64(),
+            ),
+            pd.Series(
+                [32431, None, None, 32322, 0, 10, -32324, None],
+                dtype=pd.Int64Dtype(),
+            ),
+        ),
+        (
+            pa.array(
+                [True, None, False, None, False, True, True, False],
+                type=pa.bool_(),
+            ),
+            pd.Series(
+                [True, None, False, None, False, True, True, False],
+                dtype=pd.BooleanDtype(),
+            ),
+        ),
+        (
+            pa.array(
+                [
+                    "abc",
+                    "a",
+                    None,
+                    "hello world",
+                    "foo buzz",
+                    "",
+                    None,
+                    "rapids ai",
+                ],
+                type=pa.string(),
+            ),
+            pd.Series(
+                [
+                    "abc",
+                    "a",
+                    None,
+                    "hello world",
+                    "foo buzz",
+                    "",
+                    None,
+                    "rapids ai",
+                ],
+                dtype=pd.StringDtype(),
+            ),
+        ),
+        (
+            pa.array(
+                [1, 2, None, 10.2, None],
+                type=pa.float32(),
+            ),
+            pd.Series(
+                [1, 2, None, 10.2, None],
+                dtype=pd.Float32Dtype(),
+            ),
+        ),
+    ],
+)
+def test_series_to_pandas_nullable_dtypes(sr_data, expected_psr):
+    sr = cudf.Series(sr_data)
+    actual_psr = sr.to_pandas(nullable=True)
+
+    assert_eq(actual_psr, expected_psr)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+        {"1": 2},
+        [1],
+        decimal.Decimal("1.0"),
+    ],
+)
+def test_series_to_pandas_arrow_type_nullable_raises(scalar):
+    pa_array = pa.array([scalar, None])
+    ser = cudf.Series(pa_array)
+    with pytest.raises(ValueError, match=".* cannot both be set"):
+        ser.to_pandas(nullable=True, arrow_type=True)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+        {"1": 2},
+        [1],
+        decimal.Decimal("1.0"),
+    ],
+)
+def test_series_to_pandas_arrow_type(scalar):
+    pa_array = pa.array([scalar, None])
+    ser = cudf.Series(pa_array)
+    result = ser.to_pandas(arrow_type=True)
+    expected = pd.Series(pd.arrays.ArrowExtensionArray(pa_array))
+    pd.testing.assert_series_equal(result, expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_tolist.py b/python/cudf/cudf/tests/series/methods/test_tolist.py
new file mode 100644
index 00000000000..f1675198642
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_tolist.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import re
+
+import pytest
+
+import cudf
+
+
+def test_series_tolist():
+    gsr = cudf.Series([1, 2, 3])
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            r"cuDF does not support conversion to host memory "
+            r"via the `tolist()` method. Consider using "
+            r"`.to_arrow().to_pylist()` to construct a Python list."
+        ),
+    ):
+        gsr.tolist()
diff --git a/python/cudf/cudf/tests/series/methods/test_transpose.py b/python/cudf/cudf/tests/series/methods/test_transpose.py
new file mode 100644
index 00000000000..14fa4606400
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_transpose.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0, 1, 2, 3],
+        ["abc", "a", None, "hello world", "foo buzz", "", None, "rapids ai"],
+    ],
+)
+def test_series_transpose(data):
+    psr = pd.Series(data=data)
+    csr = cudf.Series(data=data)
+
+    cudf_transposed = csr.transpose()
+    pd_transposed = psr.transpose()
+    cudf_property = csr.T
+    pd_property = psr.T
+
+    assert_eq(pd_transposed, cudf_transposed)
+    assert_eq(pd_property, cudf_property)
+    assert_eq(cudf_transposed, csr)
diff --git a/python/cudf/cudf/tests/series/methods/test_truncate.py b/python/cudf/cudf/tests/series/methods/test_truncate.py
new file mode 100644
index 00000000000..8a471f0f7a9
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_truncate.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+def test_series_truncate():
+    csr = cudf.Series([1, 2, 3, 4])
+    psr = csr.to_pandas()
+
+    assert_eq(csr.truncate(), psr.truncate())
+    assert_eq(csr.truncate(1, 2), psr.truncate(1, 2))
+    assert_eq(csr.truncate(before=1, after=2), psr.truncate(before=1, after=2))
+
+
+def test_series_truncate_errors():
+    csr = cudf.Series([1, 2, 3, 4])
+    with pytest.raises(ValueError):
+        csr.truncate(axis=1)
+    with pytest.raises(ValueError):
+        csr.truncate(copy=False)
+
+    csr.index = [3, 2, 1, 6]
+    psr = csr.to_pandas()
+    assert_exceptions_equal(
+        lfunc=csr.truncate,
+        rfunc=psr.truncate,
+    )
+
+
+def test_series_truncate_datetimeindex():
+    dates = cudf.date_range(
+        "2021-01-01 23:45:00", "2021-01-02 23:46:00", freq="s"
+    )
+    csr = cudf.Series(range(len(dates)), index=dates)
+    psr = csr.to_pandas()
+
+    assert_eq(
+        csr.truncate(
+            before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+        ),
+        psr.truncate(
+            before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+        ),
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_update.py b/python/cudf/cudf/tests/series/methods/test_update.py
new file mode 100644
index 00000000000..51211f94e0e
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_update.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("index", [None, [1, 2, 3]])
+@pytest.mark.parametrize(
+    "other",
+    [
+        pd.Series([4, 5, 6]),
+        pd.Series([4, 5, 6, 7, 8]),
+        pd.Series([4, np.nan, 6]),
+        [4, np.nan, 6],
+        {1: 9},
+    ],
+)
+def test_series_update(index, other):
+    pd_data = pd.Series([1, 2, 3], index=index)
+    data = cudf.Series.from_pandas(pd_data)
+    gs = data.copy(deep=True)
+    if isinstance(other, pd.Series):
+        other = cudf.Series.from_pandas(other, nan_as_null=False)
+        g_other = other.copy(deep=True)
+        p_other = g_other.to_pandas()
+    else:
+        g_other = other
+        p_other = other
+
+    ps = gs.to_pandas()
+
+    ps.update(p_other)
+    gs.update(g_other)
+    assert_eq(gs, ps)
diff --git a/python/cudf/cudf/tests/series/methods/test_value_counts.py b/python/cudf/cudf/tests/series/methods/test_value_counts.py
new file mode 100644
index 00000000000..68246ecfbf6
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_value_counts.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+from string import ascii_letters, digits
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core.column.column import as_column
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(params=[True, False])
+def normalize(request):
+    """Argument for value_counts"""
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        pd.date_range("2010-01-01", "2010-02-01"),
+        [None, None],
+        [pd.Timestamp(2020, 1, 1), pd.NaT],
+    ],
+)
+def test_series_datetime_value_counts(data, normalize, dropna):
+    psr = pd.Series(data, dtype="datetime64[ns]")
+    gsr = cudf.from_pandas(psr)
+    expected = psr.value_counts(dropna=dropna, normalize=normalize)
+    got = gsr.value_counts(dropna=dropna, normalize=normalize)
+
+    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=False)
+    assert_eq(
+        expected.reset_index(drop=True),
+        got.reset_index(drop=True),
+        check_dtype=False,
+        check_index_type=True,
+    )
+
+
+def test_categorical_value_counts(dropna, normalize):
+    num_elements = 20
+    rng = np.random.default_rng(seed=12)
+    pd_cat = pd.Categorical(
+        pd.Series(
+            rng.choice(list(ascii_letters + digits), num_elements),
+            dtype="category",
+        )
+    )
+
+    # gdf
+    gdf_value_counts = cudf.Series(pd_cat).value_counts(
+        dropna=dropna, normalize=normalize
+    )
+
+    # pandas
+    pdf_value_counts = pd.Series(pd_cat).value_counts(
+        dropna=dropna, normalize=normalize
+    )
+
+    # verify
+    assert_eq(
+        pdf_value_counts.sort_index(),
+        gdf_value_counts.sort_index(),
+        check_dtype=False,
+        check_index_type=True,
+    )
+    assert_eq(
+        pdf_value_counts.reset_index(drop=True),
+        gdf_value_counts.reset_index(drop=True),
+        check_dtype=False,
+        check_index_type=True,
+    )
+
+
+def test_series_value_counts(dropna, normalize):
+    rng = np.random.default_rng(seed=0)
+    size = 10
+    arr = rng.integers(low=-1, high=10, size=size)
+    mask = arr != -1
+    sr = cudf.Series._from_column(
+        as_column(arr).set_mask(cudf.Series(mask)._column.as_mask())
+    )
+    sr.name = "col"
+
+    expect = (
+        sr.to_pandas()
+        .value_counts(dropna=dropna, normalize=normalize)
+        .sort_index()
+    )
+    got = sr.value_counts(dropna=dropna, normalize=normalize).sort_index()
+
+    assert_eq(expect, got, check_dtype=True, check_index_type=False)
+
+
+@pytest.mark.parametrize("bins", [1, 3])
+def test_series_value_counts_bins(bins):
+    psr = pd.Series([1.0, 2.0, 2.0, 3.0, 3.0, 3.0])
+    gsr = cudf.from_pandas(psr)
+
+    expected = psr.value_counts(bins=bins)
+    got = gsr.value_counts(bins=bins)
+
+    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=True)
+
+
+@pytest.mark.parametrize("bins", [1, 3])
+def test_series_value_counts_bins_dropna(bins, dropna):
+    psr = pd.Series([1.0, 2.0, 2.0, 3.0, 3.0, 3.0, np.nan])
+    gsr = cudf.from_pandas(psr)
+
+    expected = psr.value_counts(bins=bins, dropna=dropna)
+    got = gsr.value_counts(bins=bins, dropna=dropna)
+
+    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=True)
+
+
+def test_series_value_counts_optional_arguments(ascending, dropna, normalize):
+    psr = pd.Series([1.0, 2.0, 2.0, 3.0, 3.0, 3.0, None])
+    gsr = cudf.from_pandas(psr)
+
+    expected = psr.value_counts(
+        ascending=ascending, dropna=dropna, normalize=normalize
+    )
+    got = gsr.value_counts(
+        ascending=ascending, dropna=dropna, normalize=normalize
+    )
+
+    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=True)
+    assert_eq(
+        expected.reset_index(drop=True),
+        got.reset_index(drop=True),
+        check_dtype=True,
+    )
+
+
+def test_series_categorical_missing_value_count():
+    ps = pd.Series(pd.Categorical(list("abcccb"), categories=list("cabd")))
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.value_counts()
+    actual = gs.value_counts()
+
+    assert_eq(expected, actual, check_dtype=False)
diff --git a/python/cudf/cudf/tests/series/methods/test_where.py b/python/cudf/cudf/tests/series/methods/test_where.py
new file mode 100644
index 00000000000..e0f01fd3cb8
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_where.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import re
+
+import pytest
+
+import cudf
+
+
+def test_series_where_mixed_dtypes_error():
+    s = cudf.Series(["a", "b", "c"])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "cudf does not support mixed types, please type-cast "
+            "the column of dataframe/series and other "
+            "to same dtypes."
+        ),
+    ):
+        s.where([True, False, True], [1, 2, 3])
+
+
+def test_series_where_mixed_bool_dtype():
+    s = cudf.Series([True, False, True])
+    with pytest.raises(TypeError):
+        s.where(~s, 10)
diff --git a/python/cudf/cudf/tests/series/test_accessors.py b/python/cudf/cudf/tests/series/test_accessors.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_accessors.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 06777c8e6af..7ae81a6c257 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -1 +1,143 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import re
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(
+    params=[
+        pd.Series([0, 1, 2, np.nan, 4, None, 6]),
+        pd.Series(
+            [0, 1, 2, np.nan, 4, None, 6],
+            index=["q", "w", "e", "r", "t", "y", "u"],
+            name="a",
+        ),
+        pd.Series([0, 1, 2, 3, 4]),
+        pd.Series(["a", "b", "u", "h", "d"]),
+        pd.Series([None, None, np.nan, None, np.inf, -np.inf]),
+        pd.Series([], dtype="float64"),
+        pd.Series(
+            [pd.NaT, pd.Timestamp("1939-05-27"), pd.Timestamp("1940-04-25")]
+        ),
+        pd.Series([np.nan]),
+        pd.Series([None]),
+        pd.Series(["a", "b", "", "c", None, "e"]),
+    ]
+)
+def ps(request):
+    return request.param
+
+
+def test_series_iter_error():
+    gs = cudf.Series([1, 2, 3])
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"{gs.__class__.__name__} object is not iterable. "
+            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
+            f"if you wish to iterate over the values."
+        ),
+    ):
+        iter(gs)
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"{gs.__class__.__name__} object is not iterable. "
+            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
+            f"if you wish to iterate over the values."
+        ),
+    ):
+        gs.items()
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"{gs.__class__.__name__} object is not iterable. "
+            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
+            f"if you wish to iterate over the values."
+        ),
+    ):
+        gs.iteritems()
+
+    with pytest.raises(TypeError):
+        iter(gs._column)
+
+
+@pytest.mark.parametrize("data", [[], [None, None], ["a", None]])
+def test_series_size(data):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    assert_eq(psr.size, gsr.size)
+
+
+def test_set_index_unequal_length():
+    s = cudf.Series(dtype="float64")
+    with pytest.raises(ValueError):
+        s.index = [1, 2, 3]
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [1, 2, 3, 4],
+        ["a", "b", "c"],
+        [1.2, 2.2, 4.5],
+        [np.nan, np.nan],
+        [None, None, None],
+    ],
+)
+def test_axes(data):
+    csr = cudf.Series(data)
+    psr = csr.to_pandas()
+
+    expected = psr.axes
+    actual = csr.axes
+
+    for e, a in zip(expected, actual, strict=True):
+        assert_eq(e, a)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3],
+        pytest.param(
+            [np.nan, 10, 15, 16],
+            marks=pytest.mark.xfail(
+                reason="https://github.com/pandas-dev/pandas/issues/49818"
+            ),
+        ),
+        [np.nan, None, 10, 20],
+        ["ab", "zx", "pq"],
+        ["ab", "zx", None, "pq"],
+        [],
+    ],
+)
+def test_series_hasnans(data):
+    gs = cudf.Series(data, nan_as_null=False)
+    ps = gs.to_pandas(nullable=True)
+
+    # Check type to avoid mixing Python bool and NumPy bool
+    assert isinstance(gs.hasnans, bool)
+    assert gs.hasnans == ps.hasnans
+
+
+def test_dtype_dtypes_equal():
+    ser = cudf.Series([0])
+    assert ser.dtype is ser.dtypes
+    assert ser.dtypes is ser.to_pandas().dtypes
+
+
+def test_roundtrip_series_plc_column(ps):
+    expect = cudf.Series(ps)
+    actual = cudf.Series.from_pylibcudf(*expect.to_pylibcudf())
+    assert_eq(expect, actual)
diff --git a/python/cudf/cudf/tests/series/test_binary_operations.py b/python/cudf/cudf/tests/series/test_binary_operations.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_binary_operations.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_binops.py b/python/cudf/cudf/tests/series/test_binops.py
new file mode 100644
index 00000000000..1c288a48e90
--- /dev/null
+++ b/python/cudf/cudf/tests/series/test_binops.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import operator
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.mark.parametrize(
+    "sr1", [pd.Series([10, 11, 12], index=["a", "b", "z"]), pd.Series(["a"])]
+)
+@pytest.mark.parametrize(
+    "sr2",
+    [pd.Series([], dtype="float64"), pd.Series(["a", "a", "c", "z", "A"])],
+)
+@pytest.mark.parametrize(
+    "op",
+    [
+        operator.eq,
+        operator.ne,
+        operator.lt,
+        operator.gt,
+        operator.le,
+        operator.ge,
+    ],
+)
+def test_series_error_equality(sr1, sr2, op):
+    gsr1 = cudf.from_pandas(sr1)
+    gsr2 = cudf.from_pandas(sr2)
+
+    assert_exceptions_equal(op, op, ([sr1, sr2],), ([gsr1, gsr2],))
diff --git a/python/cudf/cudf/tests/series/test_categorial.py b/python/cudf/cudf/tests/series/test_categorial.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_categorial.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_combining.py b/python/cudf/cudf/tests/series/test_combining.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_combining.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_computation.py b/python/cudf/cudf/tests/series/test_computation.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_computation.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_constructing.py b/python/cudf/cudf/tests/series/test_constructing.py
deleted file mode 100644
index 6600e99ade3..00000000000
--- a/python/cudf/cudf/tests/series/test_constructing.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-import numpy as np
-
-import cudf
-
-
-def test_construct_int_series_with_nulls_compat_mode():
-    # in compatibility mode, constructing a Series
-    # with nulls should result in a floating Series:
-    with cudf.option_context("mode.pandas_compatible", True):
-        s = cudf.Series([1, 2, None])
-    assert s.dtype == np.dtype("float64")
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
new file mode 100644
index 00000000000..b8bca586361
--- /dev/null
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -0,0 +1,637 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import decimal
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.core.column.column import as_column
+from cudf.errors import MixedTypeError
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_construct_int_series_with_nulls_compat_mode():
+    # in compatibility mode, constructing a Series
+    # with nulls should result in a floating Series:
+    with cudf.option_context("mode.pandas_compatible", True):
+        s = cudf.Series([1, 2, None])
+    assert s.dtype == np.dtype("float64")
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": 1, "b": 2, "c": 24, "d": 1010},
+        {"a": 1},
+        {1: "a", 2: "b", 24: "c", 1010: "d"},
+        {1: "a"},
+        {"a": [1]},
+    ],
+)
+def test_series_init_dict(data):
+    pandas_series = pd.Series(data)
+    cudf_series = cudf.Series(data)
+
+    assert_eq(pandas_series, cudf_series)
+
+
+def test_series_unitness_np_datetimelike_units():
+    data = np.array([np.timedelta64(1)])
+    with pytest.raises(TypeError):
+        cudf.Series(data)
+    with pytest.raises(TypeError):
+        pd.Series(data)
+
+
+def test_list_category_like_maintains_dtype():
+    dtype = cudf.CategoricalDtype(categories=[1, 2, 3, 4], ordered=True)
+    data = [1, 2, 3]
+    result = cudf.Series._from_column(as_column(data, dtype=dtype))
+    expected = pd.Series(data, dtype=dtype.to_pandas())
+    assert_eq(result, expected)
+
+
+def test_list_interval_like_maintains_dtype():
+    dtype = cudf.IntervalDtype(subtype=np.int8)
+    data = [pd.Interval(1, 2)]
+    result = cudf.Series._from_column(as_column(data, dtype=dtype))
+    expected = pd.Series(data, dtype=dtype.to_pandas())
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "klass", [cudf.Series, cudf.Index, pd.Series, pd.Index]
+)
+def test_series_from_named_object_name_priority(klass):
+    result = cudf.Series(klass([1], name="a"), name="b")
+    assert result.name == "b"
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": 1, "b": 2, "c": 3},
+        cudf.Series([1, 2, 3], index=list("abc")),
+        pd.Series([1, 2, 3], index=list("abc")),
+    ],
+)
+def test_series_from_object_with_index_index_arg_reindex(data):
+    result = cudf.Series(data, index=list("bca"))
+    expected = cudf.Series([2, 3, 1], index=list("bca"))
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {0: 1, 1: 2, 2: 3},
+        cudf.Series([1, 2, 3]),
+        cudf.Index([1, 2, 3]),
+        pd.Series([1, 2, 3]),
+        pd.Index([1, 2, 3]),
+        [1, 2, 3],
+    ],
+)
+def test_series_dtype_astypes(data):
+    result = cudf.Series(data, dtype="float64")
+    expected = cudf.Series([1.0, 2.0, 3.0])
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("pa_type", [pa.string, pa.large_string])
+def test_series_from_large_string(pa_type):
+    pa_string_array = pa.array(["a", "b", "c"]).cast(pa_type())
+    got = cudf.Series(pa_string_array)
+    expected = pd.Series(pa_string_array)
+
+    assert_eq(expected, got)
+
+
+def test_series_init_with_nans():
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.Series([1, 2, 3, np.nan])
+    assert gs.dtype == np.dtype("float64")
+    ps = pd.Series([1, 2, 3, np.nan])
+    assert_eq(ps, gs)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[1, 2, 3], [10, 20]],
+        [[1.0, 2.0, 3.0], None, [10.0, 20.0, np.nan]],
+        [[5, 6], None, [1]],
+        [None, None, None, None, None, [10, 20]],
+    ],
+)
+@pytest.mark.parametrize("klass", [cudf.Series, list, cp.array])
+def test_nested_series_from_sequence_data(data, klass):
+    actual = cudf.Series(
+        [klass(val) if val is not None else val for val in data]
+    )
+    expected = cudf.Series(data)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        lambda: cp.ones(5, dtype=cp.float16),
+        lambda: np.ones(5, dtype="float16"),
+        lambda: pd.Series([0.1, 1.2, 3.3], dtype="float16"),
+        pytest.param(
+            lambda: pa.array(np.ones(5, dtype="float16")),
+            marks=pytest.mark.xfail(
+                reason="https://issues.apache.org/jira/browse/ARROW-13762"
+            ),
+        ),
+    ],
+)
+def test_series_raises_float16(data):
+    data = data()
+    with pytest.raises(TypeError):
+        cudf.Series(data)
+
+
+@pytest.mark.parametrize(
+    "data", [[True, False, None, True, False], [None, None], []]
+)
+@pytest.mark.parametrize("bool_dtype", ["bool", "boolean", pd.BooleanDtype()])
+def test_nullable_bool_dtype_series(data, bool_dtype):
+    psr = pd.Series(data, dtype=pd.BooleanDtype())
+    gsr = cudf.Series(data, dtype=bool_dtype)
+
+    assert_eq(psr, gsr.to_pandas(nullable=True))
+
+
+@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
+@pytest.mark.parametrize("klass", [pd.Timestamp, pd.Timedelta])
+def test_temporal_scalar_series_init(data, klass):
+    scalar = klass(data)
+    expected = pd.Series([scalar])
+    actual = cudf.Series([scalar])
+
+    assert_eq(expected, actual)
+
+    expected = pd.Series(scalar)
+    actual = cudf.Series(scalar)
+
+    assert_eq(expected, actual)
+
+
+def test_series_from_series_index_no_shallow_copy():
+    ser1 = cudf.Series(range(3), index=list("abc"))
+    ser2 = cudf.Series(ser1)
+    assert ser1.index is ser2.index
+
+
+def test_int8_int16_construction():
+    s = cudf.Series([np.int8(8), np.int16(128)])
+    assert s.dtype == np.dtype("i2")
+
+
+@pytest.mark.parametrize(
+    "data", [[0, 1, 2, 3, 4], range(5), [np.int8(8), np.int16(128)]]
+)
+def test_default_integer_bitwidth_construction(default_integer_bitwidth, data):
+    s = cudf.Series(data)
+    assert s.dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
+
+
+@pytest.mark.parametrize("data", [[1.5, 2.5, 4.5], [1000, 2000, 4000, 3.14]])
+def test_default_float_bitwidth_construction(default_float_bitwidth, data):
+    s = cudf.Series(data)
+    assert s.dtype == np.dtype(f"f{default_float_bitwidth // 8}")
+
+
+def test_series_ordered_dedup():
+    # part of https://github.com/rapidsai/cudf/issues/11486
+    rng = np.random.default_rng(seed=0)
+    sr = cudf.Series(rng.integers(0, 100, 1000))
+    # pandas unique() preserves order
+    expect = pd.Series(sr.to_pandas().unique())
+    got = cudf.Series._from_column(sr._column.unique())
+    assert_eq(expect.values, got.values)
+
+
+def test_int64_equality():
+    s = cudf.Series(np.asarray([2**63 - 10, 2**63 - 100], dtype=np.int64))
+    assert (s != np.int64(2**63 - 1)).all()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": 1, "b": 2, "c": 24, "d": 1010},
+        {"a": 1},
+    ],
+)
+@pytest.mark.parametrize(
+    "index", [None, ["b", "c"], ["d", "a", "c", "b"], ["a"]]
+)
+def test_series_init_dict_with_index(data, index):
+    pandas_series = pd.Series(data, index=index)
+    cudf_series = cudf.Series(data, index=index)
+
+    assert_eq(pandas_series, cudf_series)
+
+
+def test_series_data_and_index_length_mismatch():
+    assert_exceptions_equal(
+        lfunc=pd.Series,
+        rfunc=cudf.Series,
+        lfunc_args_and_kwargs=([], {"data": [11], "index": [10, 11]}),
+        rfunc_args_and_kwargs=([], {"data": [11], "index": [10, 11]}),
+    )
+
+
+@pytest.mark.parametrize(
+    "dtype", ["datetime64[ns]", "timedelta64[ns]", "object", "str"]
+)
+def test_series_mixed_dtype_error(dtype):
+    ps = pd.concat([pd.Series([1, 2, 3], dtype=dtype), pd.Series([10, 11])])
+    with pytest.raises(TypeError):
+        cudf.Series(ps)
+    with pytest.raises(TypeError):
+        cudf.Series(ps.array)
+
+
+@pytest.mark.parametrize("data", ["abc", None, 1, 3.7])
+@pytest.mark.parametrize(
+    "index", [None, ["b", "c"], ["d", "a", "c", "b"], ["a"]]
+)
+def test_series_init_scalar_with_index(data, index):
+    pandas_series = pd.Series(data, index=index)
+    cudf_series = cudf.Series(data, index=index)
+
+    assert_eq(
+        pandas_series,
+        cudf_series,
+        check_index_type=data is not None or index is not None,
+        check_dtype=data is not None,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4],
+        [10, 20, None, None],
+    ],
+)
+@pytest.mark.parametrize("copy", [True, False])
+def test_series_copy(data, copy):
+    psr = pd.Series(data)
+    gsr = cudf.from_pandas(psr)
+
+    new_psr = pd.Series(psr, copy=copy)
+    new_gsr = cudf.Series(gsr, copy=copy)
+
+    new_psr.iloc[0] = 999
+    new_gsr.iloc[0] = 999
+
+    assert_eq(psr, gsr)
+    assert_eq(new_psr, new_gsr)
+
+
+def test_series_init_from_series_and_index():
+    ser = cudf.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])
+    result = cudf.Series(ser, index=list("abcd"))
+    expected = cudf.Series([-5, 7, 3, 4], index=list("abcd"))
+    assert_eq(result, expected)
+
+
+def test_series_constructor_unbounded_sequence():
+    class A:
+        def __getitem__(self, key):
+            return 1
+
+    with pytest.raises(TypeError):
+        cudf.Series(A())
+
+
+def test_series_constructor_error_mixed_type():
+    with pytest.raises(MixedTypeError):
+        cudf.Series(["abc", np.nan, "123"], nan_as_null=False)
+
+
+def test_series_from_pandas_sparse():
+    pser = pd.Series(range(2), dtype=pd.SparseDtype(np.int64, 0))
+    with pytest.raises(NotImplementedError):
+        cudf.Series(pser)
+
+
+def test_multi_dim_series_error():
+    arr = cp.array([(1, 2), (3, 4)])
+    with pytest.raises(ValueError):
+        cudf.Series(arr)
+
+
+def test_bool_series_mixed_dtype_error():
+    ps = pd.Series([True, False, None])
+    all_bool_ps = pd.Series([True, False, True], dtype="object")
+    # ps now has `object` dtype, which
+    # isn't supported by `cudf`.
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(TypeError):
+            cudf.Series(ps)
+        with pytest.raises(TypeError):
+            cudf.from_pandas(ps)
+        with pytest.raises(TypeError):
+            cudf.Series(ps, dtype=bool)
+        expected = cudf.Series(all_bool_ps, dtype=bool)
+        assert_eq(expected, all_bool_ps.astype(bool))
+    nan_bools_mix = pd.Series([True, False, True, np.nan], dtype="object")
+    gs = cudf.Series(nan_bools_mix, nan_as_null=True)
+    assert_eq(gs.to_pandas(nullable=True), nan_bools_mix.astype("boolean"))
+    with pytest.raises(TypeError):
+        cudf.Series(nan_bools_mix, nan_as_null=False)
+
+
+@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
+@pytest.mark.parametrize(
+    "data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]
+)
+def test_nan_as_null_from_arrow_objects(klass, data):
+    result = klass(data, nan_as_null=True)
+    expected = klass(pa.array([None], type=pa.float64()))
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("reso", ["M", "ps"])
+@pytest.mark.parametrize("typ", ["M", "m"])
+def test_series_invalid_reso_dtype(reso, typ):
+    with pytest.raises(TypeError):
+        cudf.Series([], dtype=f"{typ}8[{reso}]")
+
+
+@pytest.mark.parametrize("base_name", [None, "a"])
+def test_series_to_frame_none_name(base_name):
+    result = cudf.Series(range(1), name=base_name).to_frame(name=None)
+    expected = pd.Series(range(1), name=base_name).to_frame(name=None)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index])
+@pytest.mark.parametrize(
+    "data",
+    [
+        pa.array([1, None], type=pa.int64()),
+        pa.chunked_array([[1, None]], type=pa.int64()),
+    ],
+)
+def test_from_arrow_array_dtype(klass, data):
+    obj = klass(data, dtype="int8")
+    assert obj.dtype == np.dtype("int8")
+
+
+@pytest.mark.parametrize(
+    "nat, value",
+    [
+        [np.datetime64("nat", "ns"), np.datetime64("2020-01-01", "ns")],
+        [np.timedelta64("nat", "ns"), np.timedelta64(1, "ns")],
+    ],
+)
+@pytest.mark.parametrize("nan_as_null", [True, False])
+def test_series_np_array_nat_nan_as_nulls(nat, value, nan_as_null):
+    expected = np.array([nat, value])
+    ser = cudf.Series(expected, nan_as_null=nan_as_null)
+    assert ser[0] is pd.NaT
+    assert ser[1] == value
+
+
+def test_null_like_to_nan_pandas_compat():
+    with cudf.option_context("mode.pandas_compatible", True):
+        ser = cudf.Series([1, 2, np.nan, 10, None])
+        pser = pd.Series([1, 2, np.nan, 10, None])
+
+        assert pser.dtype == ser.dtype
+        assert_eq(ser, pser)
+
+
+def test_non_strings_dtype_object_pandas_compat_raises():
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(TypeError):
+            cudf.Series([1], dtype=object)
+
+
+@pytest.mark.parametrize("arr", [np.array, cp.array, pd.Series])
+def test_construct_nonnative_array(arr):
+    data = [1, 2, 3.5, 4]
+    dtype = np.dtype("f4")
+    native = arr(data, dtype=dtype)
+    nonnative = arr(data, dtype=dtype.newbyteorder())
+    result = cudf.Series(nonnative)
+    expected = cudf.Series(native)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("nan_as_null", [True, False])
+def test_construct_all_pd_NA_with_dtype(nan_as_null):
+    result = cudf.Series(
+        [pd.NA, pd.NA], dtype=np.dtype(np.float64), nan_as_null=nan_as_null
+    )
+    expected = cudf.Series(pa.array([None, None], type=pa.float64()))
+    assert_eq(result, expected)
+
+
+def test_series_empty_dtype():
+    expected = pd.Series([])
+    actual = cudf.Series([])
+    assert_eq(expected, actual, check_dtype=True)
+
+
+@pytest.mark.parametrize("data", [None, {}, []])
+def test_series_empty_index_rangeindex(data):
+    expected = cudf.RangeIndex(0)
+    result = cudf.Series(data).index
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "pandas_type",
+    [
+        pd.ArrowDtype(pa.int8()),
+        pd.ArrowDtype(pa.int16()),
+        pd.ArrowDtype(pa.int32()),
+        pd.ArrowDtype(pa.int64()),
+        pd.ArrowDtype(pa.uint8()),
+        pd.ArrowDtype(pa.uint16()),
+        pd.ArrowDtype(pa.uint32()),
+        pd.ArrowDtype(pa.uint64()),
+        pd.ArrowDtype(pa.float32()),
+        pd.ArrowDtype(pa.float64()),
+        pd.Int8Dtype(),
+        pd.Int16Dtype(),
+        pd.Int32Dtype(),
+        pd.Int64Dtype(),
+        pd.UInt8Dtype(),
+        pd.UInt16Dtype(),
+        pd.UInt32Dtype(),
+        pd.UInt64Dtype(),
+        pd.Float32Dtype(),
+        pd.Float64Dtype(),
+    ],
+)
+def test_series_arrow_numeric_types_roundtrip(pandas_type):
+    ps = pd.Series([1, 2, 3], dtype=pandas_type)
+    pi = pd.Index(ps)
+    pdf = ps.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.from_pandas(ps)
+        assert_eq(ps, gs)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gi = cudf.from_pandas(pi)
+        assert_eq(pi, gi)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.from_pandas(pdf)
+        assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "pandas_type", [pd.ArrowDtype(pa.bool_()), pd.BooleanDtype()]
+)
+def test_series_arrow_bool_types_roundtrip(pandas_type):
+    ps = pd.Series([True, False, None], dtype=pandas_type)
+    pi = pd.Index(ps)
+    pdf = ps.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.from_pandas(ps)
+        assert_eq(ps, gs)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gi = cudf.from_pandas(pi)
+        assert_eq(pi, gi)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.from_pandas(pdf)
+        assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "pandas_type", [pd.ArrowDtype(pa.string()), pd.StringDtype()]
+)
+def test_series_arrow_string_types_roundtrip(pandas_type):
+    ps = pd.Series(["abc", None, "xyz"], dtype=pandas_type)
+    pi = pd.Index(ps)
+    pdf = ps.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.from_pandas(ps)
+        assert_eq(ps, gs)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gi = cudf.from_pandas(pi)
+        assert_eq(pi, gi)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.from_pandas(pdf)
+        assert_eq(pdf, gdf)
+
+
+def test_series_arrow_category_types_roundtrip():
+    pa_array = pa.array(pd.Series([1, 2, 3], dtype="category"))
+    ps = pd.Series([1, 2, 3], dtype=pd.ArrowDtype(pa_array.type))
+    pi = pd.Index(ps)
+    pdf = pi.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.from_pandas(ps)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.from_pandas(pi)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.from_pandas(pdf)
+
+
+@pytest.mark.parametrize(
+    "pa_type",
+    [pa.decimal128(10, 2), pa.decimal128(5, 2), pa.decimal128(20, 2)],
+)
+def test_series_arrow_decimal_types_roundtrip(pa_type):
+    ps = pd.Series(
+        [
+            decimal.Decimal("1.2"),
+            decimal.Decimal("20.56"),
+            decimal.Decimal("3"),
+        ],
+        dtype=pd.ArrowDtype(pa_type),
+    )
+    pdf = ps.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.from_pandas(ps)
+        assert_eq(ps, gs)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.from_pandas(pdf)
+        assert_eq(pdf, gdf)
+
+
+def test_series_arrow_struct_types_roundtrip():
+    ps = pd.Series(
+        [{"a": 1}, {"b": "abc"}],
+        dtype=pd.ArrowDtype(pa.struct({"a": pa.int64(), "b": pa.string()})),
+    )
+    pdf = ps.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.from_pandas(ps)
+        assert_eq(ps, gs)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.from_pandas(pdf)
+        assert_eq(pdf, gdf)
+
+
+def test_series_arrow_list_types_roundtrip():
+    ps = pd.Series([[1], [2], [4]], dtype=pd.ArrowDtype(pa.list_(pa.int64())))
+    with cudf.option_context("mode.pandas_compatible", True):
+        gs = cudf.from_pandas(ps)
+        assert_eq(ps, gs)
+    pdf = ps.to_frame()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.from_pandas(pdf)
+        assert_eq(pdf, gdf)
+
+
+def test_series_error_nan_mixed_types():
+    ps = pd.Series([np.nan, "ab", "cd"])
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(MixedTypeError):
+            cudf.from_pandas(ps)
+
+
+@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index])
+def test_from_pandas_object_dtype_passed_dtype(klass):
+    result = klass(pd.Series([True, False], dtype=object), dtype="int8")
+    expected = klass(pa.array([1, 0], type=pa.int8()))
+    assert_eq(result, expected)
+
+
+def test_to_dense_array():
+    rng = np.random.default_rng(seed=0)
+    data = rng.random(8)
+    mask = np.asarray([0b11010110]).astype(np.byte)
+    sr = cudf.Series._from_column(
+        as_column(data, dtype=np.float64).set_mask(mask)
+    )
+    assert sr.has_nulls
+    assert sr.null_count != len(sr)
+    filled = sr.to_numpy(na_value=np.nan)
+    dense = sr.dropna().to_numpy()
+    assert dense.size < filled.size
+    assert filled.size == len(sr)
diff --git a/python/cudf/cudf/tests/series/test_function_application.py b/python/cudf/cudf/tests/series/test_function_application.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_function_application.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_indexing.py b/python/cudf/cudf/tests/series/test_indexing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_indexing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_io_serialization.py b/python/cudf/cudf/tests/series/test_io_serialization.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_io_serialization.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_missing.py b/python/cudf/cudf/tests/series/test_missing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_missing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_reshaping.py b/python/cudf/cudf/tests/series/test_reshaping.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_reshaping.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_selecting.py b/python/cudf/cudf/tests/series/test_selecting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_selecting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_sorting.py b/python/cudf/cudf/tests/series/test_sorting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_sorting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/series/test_timeseries.py b/python/cudf/cudf/tests/series/test_timeseries.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/series/test_timeseries.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
deleted file mode 100644
index 45a0532ed8c..00000000000
--- a/python/cudf/cudf/tests/test_series.py
+++ /dev/null
@@ -1,3111 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-import datetime
-import decimal
-import hashlib
-import operator
-import re
-from collections import OrderedDict, defaultdict
-from string import ascii_letters, digits
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.api.extensions import no_default
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.core.column.column import as_column
-from cudf.errors import MixedTypeError
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    NUMERIC_TYPES,
-    SERIES_OR_INDEX_NAMES,
-    TIMEDELTA_TYPES,
-    assert_exceptions_equal,
-    expect_warning_if,
-    gen_rand,
-)
-
-
-@pytest.fixture(
-    params=[
-        pd.Series([0, 1, 2, np.nan, 4, None, 6]),
-        pd.Series(
-            [0, 1, 2, np.nan, 4, None, 6],
-            index=["q", "w", "e", "r", "t", "y", "u"],
-            name="a",
-        ),
-        pd.Series([0, 1, 2, 3, 4]),
-        pd.Series(["a", "b", "u", "h", "d"]),
-        pd.Series([None, None, np.nan, None, np.inf, -np.inf]),
-        pd.Series([], dtype="float64"),
-        pd.Series(
-            [pd.NaT, pd.Timestamp("1939-05-27"), pd.Timestamp("1940-04-25")]
-        ),
-        pd.Series([np.nan]),
-        pd.Series([None]),
-        pd.Series(["a", "b", "", "c", None, "e"]),
-    ]
-)
-def ps(request):
-    return request.param
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": 1, "b": 2, "c": 24, "d": 1010},
-        {"a": 1},
-        {1: "a", 2: "b", 24: "c", 1010: "d"},
-        {1: "a"},
-    ],
-)
-def test_series_init_dict(data):
-    pandas_series = pd.Series(data)
-    cudf_series = cudf.Series(data)
-
-    assert_eq(pandas_series, cudf_series)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "a": [1, 2, 3],
-            "b": [2, 3, 5],
-            "c": [24, 12212, 22233],
-            "d": [1010, 101010, 1111],
-        },
-        {"a": [1]},
-    ],
-)
-def test_series_init_dict_lists(data):
-    assert_eq(pd.Series(data), cudf.Series(data))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4],
-        [1.0, 12.221, 12.34, 13.324, 324.3242],
-        [-10, -1111, 100, 11, 133],
-    ],
-)
-@pytest.mark.parametrize(
-    "others",
-    [
-        [10, 11, 12, 13],
-        [0.1, 0.002, 324.2332, 0.2342],
-        [-10, -1111, 100, 11, 133],
-    ],
-)
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_series_concat_basic(data, others, ignore_index):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    other_ps = pd.Series(others)
-    other_gs = cudf.Series(others)
-
-    expected = pd.concat([psr, other_ps], ignore_index=ignore_index)
-    actual = cudf.concat([gsr, other_gs], ignore_index=ignore_index)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "abc",
-            "def",
-            "this is a string",
-            "this is another string",
-            "a",
-            "b",
-            "c",
-        ],
-        ["a"],
-    ],
-)
-@pytest.mark.parametrize(
-    "others",
-    [
-        [
-            "abc",
-            "def",
-            "this is a string",
-            "this is another string",
-            "a",
-            "b",
-            "c",
-        ],
-        ["a"],
-        ["1", "2", "3", "4", "5"],
-        ["+", "-", "!", "_", "="],
-    ],
-)
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_series_concat_basic_str(data, others, ignore_index):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    other_ps = pd.Series(others)
-    other_gs = cudf.Series(others)
-
-    expected = pd.concat([psr, other_ps], ignore_index=ignore_index)
-    actual = cudf.concat([gsr, other_gs], ignore_index=ignore_index)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series(
-            [
-                "abc",
-                "def",
-                "this is a string",
-                "this is another string",
-                "a",
-                "b",
-                "c",
-            ],
-            index=[10, 20, 30, 40, 50, 60, 70],
-        ),
-        pd.Series(["a"], index=[2]),
-    ],
-)
-@pytest.mark.parametrize(
-    "others",
-    [
-        pd.Series(
-            [
-                "abc",
-                "def",
-                "this is a   string",
-                "this is another string",
-                "a",
-                "b",
-                "c",
-            ],
-            index=[10, 20, 30, 40, 50, 60, 70],
-        ),
-        pd.Series(["a"], index=[133]),
-        pd.Series(["1", "2", "3", "4", "5"], index=[-10, 22, 33, 44, 49]),
-        pd.Series(["+", "-", "!", "_", "="], index=[11, 22, 33, 44, 2]),
-    ],
-)
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_series_concat_series_with_index(data, others, ignore_index):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    other_ps = others
-    other_gs = cudf.from_pandas(others)
-
-    expected = pd.concat([psr, other_ps], ignore_index=ignore_index)
-    actual = cudf.concat([gsr, other_gs], ignore_index=ignore_index)
-
-    assert_eq(expected, actual)
-
-
-def test_series_concat_error_mixed_types():
-    gsr = cudf.Series([1, 2, 3, 4])
-    other = cudf.Series(["a", "b", "c", "d"])
-
-    with pytest.raises(
-        TypeError,
-        match="cudf does not support mixed types, please type-cast "
-        "both series to same dtypes.",
-    ):
-        cudf.concat([gsr, other])
-
-    with pytest.raises(
-        TypeError,
-        match="cudf does not support mixed types, please type-cast "
-        "both series to same dtypes.",
-    ):
-        cudf.concat([gsr, gsr, other, gsr, other])
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([1, 2, 3, 4], index=["a", "b", "c", "d"]),
-        pd.Series(
-            [1.0, 12.221, 12.34, 13.324, 324.3242],
-            index=[
-                "float one",
-                "float two",
-                "float three",
-                "float four",
-                "float five",
-            ],
-        ),
-        pd.Series(
-            [-10, -1111, 100, 11, 133],
-            index=["one", "two", "three", "four", "five"],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "others",
-    [
-        [
-            pd.Series([10, 11, 12, 13], index=["a", "b", "c", "d"]),
-            pd.Series([12, 14, 15, 27], index=["d", "e", "z", "x"]),
-        ],
-        [
-            pd.Series([10, 11, 12, 13], index=["a", "b", "c", "d"]),
-            pd.Series([12, 14, 15, 27], index=["d", "e", "z", "x"]),
-        ]
-        * 25,
-        [
-            pd.Series(
-                [0.1, 0.002, 324.2332, 0.2342], index=["-", "+", "%", "#"]
-            ),
-            pd.Series([12, 14, 15, 27], index=["d", "e", "z", "x"]),
-        ]
-        * 46,
-        [
-            pd.Series(
-                [-10, -1111, 100, 11, 133],
-                index=["aa", "vv", "bb", "dd", "ll"],
-            )
-        ],
-    ],
-)
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_series_concat_list_series_with_index(data, others, ignore_index):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    other_ps = others
-    other_gs = [cudf.from_pandas(obj) for obj in others]
-
-    expected = pd.concat([psr, *other_ps], ignore_index=ignore_index)
-    actual = cudf.concat([gsr, *other_gs], ignore_index=ignore_index)
-
-    assert_eq(expected, actual)
-
-
-def test_series_concat_existing_buffers():
-    a1 = np.arange(10, dtype=np.float64)
-    gs = cudf.Series(a1)
-
-    # Add new buffer
-    a2 = cudf.Series(np.arange(5))
-    gs = cudf.concat([gs, a2])
-    assert len(gs) == 15
-    np.testing.assert_equal(gs.to_numpy(), np.hstack([a1, a2.to_numpy()]))
-
-    # Ensure appending to previous buffer
-    a3 = cudf.Series(np.arange(3))
-    gs = cudf.concat([gs, a3])
-    assert len(gs) == 18
-    a4 = np.hstack([a1, a2.to_numpy(), a3.to_numpy()])
-    np.testing.assert_equal(gs.to_numpy(), a4)
-
-    # Appending different dtype
-    a5 = cudf.Series(np.array([1, 2, 3], dtype=np.int32))
-    a6 = cudf.Series(np.array([4.5, 5.5, 6.5], dtype=np.float64))
-    gs = cudf.concat([a5, a6])
-    np.testing.assert_equal(
-        gs.to_numpy(), np.hstack([a5.to_numpy(), a6.to_numpy()])
-    )
-    gs = cudf.concat([cudf.Series(a6), a5])
-    np.testing.assert_equal(
-        gs.to_numpy(), np.hstack([a6.to_numpy(), a5.to_numpy()])
-    )
-
-
-def test_series_column_iter_error():
-    gs = cudf.Series([1, 2, 3])
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"{gs.__class__.__name__} object is not iterable. "
-            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
-            f"if you wish to iterate over the values."
-        ),
-    ):
-        iter(gs)
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"{gs.__class__.__name__} object is not iterable. "
-            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
-            f"if you wish to iterate over the values."
-        ),
-    ):
-        gs.items()
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"{gs.__class__.__name__} object is not iterable. "
-            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
-            f"if you wish to iterate over the values."
-        ),
-    ):
-        gs.iteritems()
-
-    with pytest.raises(TypeError):
-        iter(gs._column)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1.0, 2.0, None, 4.0, 5.0],
-        ["a", "b", "c", "d", "e"],
-        ["a", "b", None, "d", "e"],
-        [None, None, None, None, None],
-        np.array(["1991-11-20", "2004-12-04"], dtype=np.datetime64),
-        np.array(["1991-11-20", None], dtype=np.datetime64),
-        np.array(
-            ["1991-11-20 05:15:00", "2004-12-04 10:00:00"], dtype=np.datetime64
-        ),
-        np.array(["1991-11-20 05:15:00", None], dtype=np.datetime64),
-    ],
-)
-def test_series_tolist(data):
-    psr = pd.Series(data)
-    gsr = cudf.from_pandas(psr)
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            r"cuDF does not support conversion to host memory "
-            r"via the `tolist()` method. Consider using "
-            r"`.to_arrow().to_pylist()` to construct a Python list."
-        ),
-    ):
-        gsr.tolist()
-
-
-@pytest.mark.parametrize(
-    "data",
-    [[], [None, None], ["a"], ["a", "b", "c"] * 500, [1.0, 2.0, 0.3] * 57],
-)
-def test_series_size(data):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    assert_eq(psr.size, gsr.size)
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-def test_series_describe_numeric(dtype):
-    ps = pd.Series([0, 1, 2, 3, 1, 2, 3], dtype=dtype)
-    gs = cudf.from_pandas(ps)
-    actual = gs.describe()
-    expected = ps.describe()
-
-    assert_eq(expected, actual, check_dtype=True)
-
-
-@pytest.mark.parametrize("dtype", ["datetime64[ns]"])
-def test_series_describe_datetime(dtype):
-    # Note that other datetime units are not tested because pandas does not
-    # support them. When specified coarser units, cuDF datetime columns cannot
-    # represent fractional time for quantiles of the column, which may require
-    # interpolation, this differs from pandas which always stay in [ns] unit.
-    gs = cudf.Series([0, 1, 2, 3, 1, 2, 3], dtype=dtype)
-    ps = gs.to_pandas()
-
-    # Treating datetimes as categoricals is deprecated in pandas and will
-    # be removed in future. Future behavior is treating datetime as numeric.
-    expected = ps.describe()
-    actual = gs.describe()
-
-    assert_eq(expected.astype("str"), actual)
-
-
-@pytest.mark.parametrize("dtype", TIMEDELTA_TYPES)
-def test_series_describe_timedelta(dtype):
-    ps = pd.Series([0, 1, 2, 3, 1, 2, 3], dtype=dtype)
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.describe()
-    actual = gs.describe()
-
-    assert_eq(actual, expected.astype("str"))
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series(["a", "b", "c", "d", "e", "a"]),
-        pd.Series([True, False, True, True, False]),
-        pd.Series([], dtype="str"),
-        pd.Series(["a", "b", "c", "a"], dtype="category"),
-        pd.Series(["d", "e", "f"], dtype="category"),
-        pd.Series(pd.Categorical(["d", "e", "f"], categories=["f", "e", "d"])),
-        pd.Series(
-            pd.Categorical(
-                ["d", "e", "f"], categories=["f", "e", "d"], ordered=True
-            )
-        ),
-    ],
-)
-def test_series_describe_other_types(ps):
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.describe()
-    actual = gs.describe()
-
-    if len(ps) == 0:
-        assert_eq(expected.fillna("a").astype("str"), actual.fillna("a"))
-    else:
-        assert_eq(expected.astype("str"), actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 2, 1],
-        [1, 2, None, 3, 1, 1],
-        [],
-        ["a", "b", "c", None, "z", "a"],
-    ],
-)
-@pytest.mark.parametrize("use_na_sentinel", [True, False])
-def test_series_factorize_use_na_sentinel(data, use_na_sentinel):
-    gsr = cudf.Series(data)
-    psr = gsr.to_pandas(nullable=True)
-
-    expected_labels, expected_cats = psr.factorize(
-        use_na_sentinel=use_na_sentinel, sort=True
-    )
-    actual_labels, actual_cats = gsr.factorize(
-        use_na_sentinel=use_na_sentinel, sort=True
-    )
-    assert_eq(expected_labels, actual_labels.get())
-    assert_eq(expected_cats, actual_cats.to_pandas(nullable=True))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 2, 1],
-        [1, 2, None, 3, 1, 1],
-        [],
-        ["a", "b", "c", None, "z", "a"],
-    ],
-)
-@pytest.mark.parametrize("sort", [True, False])
-def test_series_factorize_sort(data, sort):
-    gsr = cudf.Series(data)
-    psr = gsr.to_pandas(nullable=True)
-
-    expected_labels, expected_cats = psr.factorize(sort=sort)
-    actual_labels, actual_cats = gsr.factorize(sort=sort)
-    assert_eq(expected_labels, actual_labels.get())
-    assert_eq(expected_cats, actual_cats.to_pandas(nullable=True))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([], dtype="datetime64[ns]"),
-        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
-        pd.Series([None, None], dtype="datetime64[ns]"),
-    ],
-)
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
-@pytest.mark.parametrize("nulls", ["none", "some"])
-def test_series_datetime_value_counts(data, nulls, normalize, dropna):
-    psr = data.copy()
-    rng = np.random.default_rng(seed=0)
-    if len(data) > 0:
-        if nulls == "one":
-            p = rng.integers(0, len(data))
-            psr[p] = None
-        elif nulls == "some":
-            p = rng.integers(0, len(data), 2)
-            psr[p] = None
-
-    gsr = cudf.from_pandas(psr)
-    expected = psr.value_counts(dropna=dropna, normalize=normalize)
-    got = gsr.value_counts(dropna=dropna, normalize=normalize)
-
-    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=False)
-    assert_eq(
-        expected.reset_index(drop=True),
-        got.reset_index(drop=True),
-        check_dtype=False,
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
-@pytest.mark.parametrize("num_elements", [10, 100, 1000])
-def test_categorical_value_counts(dropna, normalize, num_elements):
-    # create categorical series
-    rng = np.random.default_rng(seed=12)
-    pd_cat = pd.Categorical(
-        pd.Series(
-            rng.choice(list(ascii_letters + digits), num_elements),
-            dtype="category",
-        )
-    )
-
-    # gdf
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series.from_pandas(pd_cat)
-    gdf_value_counts = gdf["a"].value_counts(
-        dropna=dropna, normalize=normalize
-    )
-
-    # pandas
-    pdf = pd.DataFrame()
-    pdf["a"] = pd_cat
-    pdf_value_counts = pdf["a"].value_counts(
-        dropna=dropna, normalize=normalize
-    )
-
-    # verify
-    assert_eq(
-        pdf_value_counts.sort_index(),
-        gdf_value_counts.sort_index(),
-        check_dtype=False,
-        check_index_type=True,
-    )
-    assert_eq(
-        pdf_value_counts.reset_index(drop=True),
-        gdf_value_counts.reset_index(drop=True),
-        check_dtype=False,
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
-def test_series_value_counts(dropna, normalize):
-    rng = np.random.default_rng(seed=0)
-    for size in [10**x for x in range(5)]:
-        arr = rng.integers(low=-1, high=10, size=size)
-        mask = arr != -1
-        sr = cudf.Series._from_column(
-            as_column(arr).set_mask(cudf.Series(mask)._column.as_mask())
-        )
-        sr.name = "col"
-
-        expect = (
-            sr.to_pandas()
-            .value_counts(dropna=dropna, normalize=normalize)
-            .sort_index()
-        )
-        got = sr.value_counts(dropna=dropna, normalize=normalize).sort_index()
-
-        assert_eq(expect, got, check_dtype=True, check_index_type=False)
-
-
-@pytest.mark.parametrize("bins", [1, 2, 3])
-def test_series_value_counts_bins(bins):
-    psr = pd.Series([1.0, 2.0, 2.0, 3.0, 3.0, 3.0])
-    gsr = cudf.from_pandas(psr)
-
-    expected = psr.value_counts(bins=bins)
-    got = gsr.value_counts(bins=bins)
-
-    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=True)
-
-
-@pytest.mark.parametrize("bins", [1, 2, 3])
-@pytest.mark.parametrize("dropna", [True, False])
-def test_series_value_counts_bins_dropna(bins, dropna):
-    psr = pd.Series([1.0, 2.0, 2.0, 3.0, 3.0, 3.0, np.nan])
-    gsr = cudf.from_pandas(psr)
-
-    expected = psr.value_counts(bins=bins, dropna=dropna)
-    got = gsr.value_counts(bins=bins, dropna=dropna)
-
-    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=True)
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
-def test_series_value_counts_optional_arguments(ascending, dropna, normalize):
-    psr = pd.Series([1.0, 2.0, 2.0, 3.0, 3.0, 3.0, None])
-    gsr = cudf.from_pandas(psr)
-
-    expected = psr.value_counts(
-        ascending=ascending, dropna=dropna, normalize=normalize
-    )
-    got = gsr.value_counts(
-        ascending=ascending, dropna=dropna, normalize=normalize
-    )
-
-    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=True)
-    assert_eq(
-        expected.reset_index(drop=True),
-        got.reset_index(drop=True),
-        check_dtype=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "gs",
-    [
-        cudf.Series([1, 2, 3]),
-        cudf.Series([None]),
-        cudf.Series([4]),
-        cudf.Series([2, 3, -1, 0, 1], name="test name"),
-        cudf.Series(
-            [1, 2, 3, None, 2, 1], index=["a", "v", "d", "e", "f", "g"]
-        ),
-        cudf.Series([1, 2, 3, None, 2, 1, None], name="abc"),
-        cudf.Series(["ab", "bc", "ab", None, "bc", None, None]),
-        cudf.Series([None, None, None, None, None], dtype="str"),
-        cudf.Series([None, None, None, None, None]),
-        cudf.Series(
-            [
-                123213,
-                23123,
-                123123,
-                12213123,
-                12213123,
-                12213123,
-                23123,
-                2312323123,
-                None,
-                None,
-            ],
-            dtype="timedelta64[ns]",
-        ),
-        cudf.Series(
-            [
-                None,
-                1,
-                2,
-                3242434,
-                3233243,
-                1,
-                2,
-                1023,
-                None,
-                12213123,
-                None,
-                2312323123,
-                None,
-                None,
-            ],
-            dtype="datetime64[ns]",
-        ),
-        cudf.Series(name="empty series", dtype="float64"),
-        cudf.Series(["a", "b", "c", " ", "a", "b", "z"], dtype="category"),
-    ],
-)
-@pytest.mark.parametrize("dropna", [True, False])
-def test_series_mode(gs, dropna):
-    ps = gs.to_pandas()
-
-    expected = ps.mode(dropna=dropna)
-    actual = gs.mode(dropna=dropna)
-
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "arr",
-    [
-        np.random.default_rng(seed=0).normal(-100, 100, 1000),
-        np.random.default_rng(seed=0).integers(-50, 50, 1000),
-        np.zeros(100),
-        np.repeat([-0.6459412758761901], 100),
-        np.repeat(np.nan, 100),
-        np.array([1.123, 2.343, np.nan, 0.0]),
-        np.arange(-100.5, 101.5, 1),
-    ],
-)
-@pytest.mark.parametrize("decimals", [-5, -3, -1, 0, 1, 4, 12, np.int8(1)])
-def test_series_round(arr, decimals):
-    pser = pd.Series(arr)
-    ser = cudf.Series(arr)
-    result = ser.round(decimals)
-    expected = pser.round(decimals)
-
-    assert_eq(result, expected)
-    rng = np.random.default_rng(seed=0)
-    # with nulls, maintaining existing null mask
-    arr = arr.astype("float64")  # for pandas nulls
-    arr.ravel()[rng.choice(arr.shape[0], arr.shape[0] // 2, replace=False)] = (
-        np.nan
-    )
-
-    pser = pd.Series(arr)
-    ser = cudf.Series(arr)
-    result = ser.round(decimals)
-    expected = pser.round(decimals)
-
-    assert_eq(result, expected)
-
-
-def test_series_round_half_up():
-    s = cudf.Series([0.0, 1.0, 1.2, 1.7, 0.5, 1.5, 2.5, None])
-    expect = cudf.Series([0.0, 1.0, 1.0, 2.0, 1.0, 2.0, 3.0, None])
-    got = s.round(how="half_up")
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "series_data",
-    [
-        [1.0, None, np.nan, 4.0],
-        [1.24430, None, np.nan, 4.423530],
-        [1.24430, np.nan, 4.423530],
-        [-1.24430, np.nan, -4.423530],
-        np.repeat(np.nan, 100),
-    ],
-)
-@pytest.mark.parametrize("decimal", [0, 1, 2, 3])
-def test_round_nan_as_null_false(series_data, decimal):
-    series = cudf.Series(series_data, nan_as_null=False)
-    pser = series.to_pandas()
-    result = series.round(decimal)
-    expected = pser.round(decimal)
-    assert_eq(result, expected, atol=1e-10)
-
-
-@pytest.mark.parametrize(
-    "data, dtype, decimals, expected_half_up, expected_half_even",
-    [
-        (
-            [1.234, 2.345, 3.456],
-            cudf.Decimal32Dtype(precision=5, scale=3),
-            2,
-            [1.23, 2.35, 3.46],
-            [1.23, 2.34, 3.46],
-        ),
-        (
-            [1.234, 2.345, 3.456],
-            cudf.Decimal32Dtype(precision=5, scale=3),
-            0,
-            [1.0, 2.0, 3.0],
-            [1.0, 2.0, 3.0],
-        ),
-        (
-            [1.234, 2.345, 3.456],
-            cudf.Decimal32Dtype(precision=5, scale=3),
-            3,
-            [1.234, 2.345, 3.456],
-            [1.234, 2.345, 3.456],
-        ),
-        (
-            [1.234567, 2.345678, 3.456789],
-            cudf.Decimal64Dtype(precision=10, scale=6),
-            4,
-            [1.2346, 2.3457, 3.4568],
-            [1.2346, 2.3457, 3.4568],
-        ),
-        (
-            [1.234567, 2.345678, 3.456789],
-            cudf.Decimal64Dtype(precision=10, scale=6),
-            2,
-            [1.23, 2.35, 3.46],
-            [1.23, 2.35, 3.46],
-        ),
-        (
-            [1.234567, 2.345678, 3.456789],
-            cudf.Decimal64Dtype(precision=10, scale=6),
-            6,
-            [1.234567, 2.345678, 3.456789],
-            [1.234567, 2.345678, 3.456789],
-        ),
-    ],
-)
-def test_series_round_decimal(
-    data, dtype, decimals, expected_half_up, expected_half_even
-):
-    ser = cudf.Series(data).astype(dtype)
-
-    result_half_up = ser.round(decimals=decimals, how="half_up").astype(dtype)
-    expected_ser_half_up = cudf.Series(expected_half_up).astype(dtype)
-    assert_eq(result_half_up, expected_ser_half_up)
-
-    result_half_even = ser.round(decimals=decimals, how="half_even").astype(
-        dtype
-    )
-    expected_ser_half_even = cudf.Series(expected_half_even).astype(dtype)
-    assert_eq(result_half_even, expected_ser_half_even)
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False, None])
-def test_series_isnull_isna(ps, nan_as_null):
-    nan_contains = ps.apply(lambda x: isinstance(x, float) and np.isnan(x))
-    if nan_as_null is False and (
-        nan_contains.any() and not nan_contains.all() and ps.dtype == object
-    ):
-        with pytest.raises(MixedTypeError):
-            cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
-    else:
-        gs = cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
-
-        assert_eq(ps.isnull(), gs.isnull())
-        assert_eq(ps.isna(), gs.isna())
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False, None])
-def test_series_notnull_notna(ps, nan_as_null):
-    nan_contains = ps.apply(lambda x: isinstance(x, float) and np.isnan(x))
-    if nan_as_null is False and (
-        nan_contains.any() and not nan_contains.all() and ps.dtype == object
-    ):
-        with pytest.raises(MixedTypeError):
-            cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
-    else:
-        gs = cudf.Series.from_pandas(ps, nan_as_null=nan_as_null)
-
-        assert_eq(ps.notnull(), gs.notnull())
-        assert_eq(ps.notna(), gs.notna())
-
-
-@pytest.mark.parametrize(
-    "sr1", [pd.Series([10, 11, 12], index=["a", "b", "z"]), pd.Series(["a"])]
-)
-@pytest.mark.parametrize(
-    "sr2",
-    [pd.Series([], dtype="float64"), pd.Series(["a", "a", "c", "z", "A"])],
-)
-@pytest.mark.parametrize(
-    "op",
-    [
-        operator.eq,
-        operator.ne,
-        operator.lt,
-        operator.gt,
-        operator.le,
-        operator.ge,
-    ],
-)
-def test_series_error_equality(sr1, sr2, op):
-    gsr1 = cudf.from_pandas(sr1)
-    gsr2 = cudf.from_pandas(sr2)
-
-    assert_exceptions_equal(op, op, ([sr1, sr2],), ([gsr1, gsr2],))
-
-
-def test_series_memory_usage():
-    sr = cudf.Series([1, 2, 3, 4], dtype="int64")
-    assert sr.memory_usage() == 32
-
-    sliced_sr = sr[2:]
-    assert sliced_sr.memory_usage() == 16
-
-    sliced_sr[3] = None
-    assert sliced_sr.memory_usage() == 80
-
-    sr = cudf.Series(["hello world", "rapids ai", "abc", "z"])
-    assert sr.memory_usage() == 44
-
-    assert sr[3:].memory_usage() == 9  # z
-    assert sr[:1].memory_usage() == 19  # hello world
-
-
-@pytest.mark.parametrize(
-    "sr_data,expected_psr",
-    [
-        (
-            pa.array([1, 2, None, 3], type=pa.uint8()),
-            pd.Series([1, 2, None, 3], dtype=pd.UInt8Dtype()),
-        ),
-        (
-            pa.array([23, None, None, 32], type=pa.uint16()),
-            pd.Series([23, None, None, 32], dtype=pd.UInt16Dtype()),
-        ),
-        (
-            pa.array([None, 123, None, 1], type=pa.uint32()),
-            pd.Series([None, 123, None, 1], dtype=pd.UInt32Dtype()),
-        ),
-        (
-            pa.array([234, 2323, 23432, None, None, 224], type=pa.uint64()),
-            pd.Series(
-                [234, 2323, 23432, None, None, 224], dtype=pd.UInt64Dtype()
-            ),
-        ),
-        (
-            pa.array([-10, 1, None, -1, None, 3], type=pa.int8()),
-            pd.Series([-10, 1, None, -1, None, 3], dtype=pd.Int8Dtype()),
-        ),
-        (
-            pa.array([111, None, 222, None, 13], type=pa.int16()),
-            pd.Series([111, None, 222, None, 13], dtype=pd.Int16Dtype()),
-        ),
-        (
-            pa.array([11, None, 22, 33, None, 2, None, 3], type=pa.int32()),
-            pd.Series(
-                [11, None, 22, 33, None, 2, None, 3], dtype=pd.Int32Dtype()
-            ),
-        ),
-        (
-            pa.array(
-                [32431, None, None, 32322, 0, 10, -32324, None],
-                type=pa.int64(),
-            ),
-            pd.Series(
-                [32431, None, None, 32322, 0, 10, -32324, None],
-                dtype=pd.Int64Dtype(),
-            ),
-        ),
-        (
-            pa.array(
-                [True, None, False, None, False, True, True, False],
-                type=pa.bool_(),
-            ),
-            pd.Series(
-                [True, None, False, None, False, True, True, False],
-                dtype=pd.BooleanDtype(),
-            ),
-        ),
-        (
-            pa.array(
-                [
-                    "abc",
-                    "a",
-                    None,
-                    "hello world",
-                    "foo buzz",
-                    "",
-                    None,
-                    "rapids ai",
-                ],
-                type=pa.string(),
-            ),
-            pd.Series(
-                [
-                    "abc",
-                    "a",
-                    None,
-                    "hello world",
-                    "foo buzz",
-                    "",
-                    None,
-                    "rapids ai",
-                ],
-                dtype=pd.StringDtype(),
-            ),
-        ),
-        (
-            pa.array(
-                [1, 2, None, 10.2, None],
-                type=pa.float32(),
-            ),
-            pd.Series(
-                [1, 2, None, 10.2, None],
-                dtype=pd.Float32Dtype(),
-            ),
-        ),
-    ],
-)
-def test_series_to_pandas_nullable_dtypes(sr_data, expected_psr):
-    sr = cudf.Series(sr_data)
-    actual_psr = sr.to_pandas(nullable=True)
-
-    assert_eq(actual_psr, expected_psr)
-
-
-def test_series_pipe():
-    psr = pd.Series([10, 20, 30, 40])
-    gsr = cudf.Series([10, 20, 30, 40])
-
-    def custom_add_func(sr, val):
-        new_sr = sr + val
-        return new_sr
-
-    def custom_to_str_func(sr, val):
-        new_sr = sr.astype("str") + val
-        return new_sr
-
-    expected = (
-        psr.pipe(custom_add_func, 11)
-        .pipe(custom_add_func, val=12)
-        .pipe(custom_to_str_func, "rapids")
-    )
-    actual = (
-        gsr.pipe(custom_add_func, 11)
-        .pipe(custom_add_func, val=12)
-        .pipe(custom_to_str_func, "rapids")
-    )
-
-    assert_eq(expected, actual)
-
-    expected = (
-        psr.pipe((custom_add_func, "sr"), val=11)
-        .pipe(custom_add_func, val=1)
-        .pipe(custom_to_str_func, "rapids-ai")
-    )
-    actual = (
-        gsr.pipe((custom_add_func, "sr"), val=11)
-        .pipe(custom_add_func, val=1)
-        .pipe(custom_to_str_func, "rapids-ai")
-    )
-
-    assert_eq(expected, actual)
-
-
-def test_series_pipe_error():
-    psr = pd.Series([10, 20, 30, 40])
-    gsr = cudf.Series([10, 20, 30, 40])
-
-    def custom_add_func(sr, val):
-        new_sr = sr + val
-        return new_sr
-
-    assert_exceptions_equal(
-        lfunc=psr.pipe,
-        rfunc=gsr.pipe,
-        lfunc_args_and_kwargs=([(custom_add_func, "val")], {"val": 11}),
-        rfunc_args_and_kwargs=([(custom_add_func, "val")], {"val": 11}),
-    )
-
-
-@pytest.mark.parametrize(
-    "pd_data",
-    [pd.Series([1, 2, 3]), pd.Series([10, 11, 12], index=[1, 2, 3])],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        pd.Series([4, 5, 6]),
-        pd.Series([4, 5, 6, 7, 8]),
-        pd.Series([4, np.nan, 6]),
-        [4, np.nan, 6],
-        {1: 9},
-    ],
-)
-def test_series_update(pd_data, other):
-    data = cudf.Series.from_pandas(pd_data)
-    gs = data.copy(deep=True)
-    if isinstance(other, pd.Series):
-        other = cudf.Series.from_pandas(other, nan_as_null=False)
-        g_other = other.copy(deep=True)
-        p_other = g_other.to_pandas()
-    else:
-        g_other = other
-        p_other = other
-
-    ps = gs.to_pandas()
-
-    ps.update(p_other)
-    gs.update(g_other)
-    assert_eq(gs, ps)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, None, 11, 2.0, np.nan],
-        [np.nan],
-        [None, None, None],
-        [np.nan, 1, 10, 393.32, np.nan],
-    ],
-)
-@pytest.mark.parametrize("nan_as_null", [True, False])
-@pytest.mark.parametrize("fill_value", [1.2, 332, np.nan])
-def test_fillna_with_nan(data, nan_as_null, fill_value):
-    gs = cudf.Series(data, dtype="float64", nan_as_null=nan_as_null)
-    ps = gs.to_pandas()
-
-    expected = ps.fillna(fill_value)
-    actual = gs.fillna(fill_value)
-
-    assert_eq(expected, actual)
-
-
-def test_fillna_categorical_with_non_categorical_raises():
-    ser = cudf.Series([1, None], dtype="category")
-    with pytest.raises(TypeError):
-        ser.fillna(cudf.Series([1, 2]))
-
-
-def test_fillna_categorical_with_different_categories_raises():
-    ser = cudf.Series([1, None], dtype="category")
-    with pytest.raises(TypeError):
-        ser.fillna(cudf.Series([1, 2]), dtype="category")
-
-
-def test_series_mask_mixed_dtypes_error():
-    s = cudf.Series(["a", "b", "c"])
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "cudf does not support mixed types, please type-cast "
-            "the column of dataframe/series and other "
-            "to same dtypes."
-        ),
-    ):
-        s.where([True, False, True], [1, 2, 3])
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series(["a"] * 20, index=range(0, 20)),
-        pd.Series(["b", None] * 10, index=range(0, 20), name="ASeries"),
-        pd.Series(
-            ["b", None] * 5,
-            index=pd.Index(list(range(10)), dtype="uint64"),
-            name="BSeries",
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "labels",
-    [
-        [1],
-        [0],
-        1,
-        5,
-        [5, 9],
-        pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
-        pd.Index([0, 1, 2, 3, 4], dtype="float32"),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_series_drop_labels(ps, labels, inplace):
-    ps = ps.copy()
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.drop(labels=labels, axis=0, inplace=inplace)
-    actual = gs.drop(labels=labels, axis=0, inplace=inplace)
-
-    if inplace:
-        expected = ps
-        actual = gs
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series(["a"] * 20, index=range(0, 20)),
-        pd.Series(["b", None] * 10, index=range(0, 20), name="ASeries"),
-    ],
-)
-@pytest.mark.parametrize(
-    "index",
-    [[1], [0], 1, 5, [5, 9], pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_series_drop_index(ps, index, inplace):
-    ps = ps.copy()
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.drop(index=index, inplace=inplace)
-    actual = gs.drop(index=index, inplace=inplace)
-
-    if inplace:
-        expected = ps
-        actual = gs
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series(
-            ["a" if i % 2 == 0 else "b" for i in range(0, 10)],
-            index=pd.MultiIndex(
-                levels=[
-                    ["lama", "cow", "falcon"],
-                    ["speed", "weight", "length"],
-                ],
-                codes=[
-                    [0, 0, 0, 1, 1, 1, 2, 2, 2, 1],
-                    [0, 1, 2, 0, 1, 2, 0, 1, 2, 1],
-                ],
-            ),
-            name="abc",
-        )
-    ],
-)
-@pytest.mark.parametrize(
-    "index,level",
-    [
-        ("cow", 0),
-        ("lama", 0),
-        ("falcon", 0),
-        ("speed", 1),
-        ("weight", 1),
-        ("length", 1),
-        (
-            "cow",
-            None,
-        ),
-        (
-            "lama",
-            None,
-        ),
-        (
-            "falcon",
-            None,
-        ),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_series_drop_multiindex(ps, index, level, inplace):
-    ps = ps.copy()
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.drop(index=index, inplace=inplace, level=level)
-    actual = gs.drop(index=index, inplace=inplace, level=level)
-
-    if inplace:
-        expected = ps
-        actual = gs
-
-    assert_eq(expected, actual)
-
-
-def test_series_drop_edge_inputs():
-    gs = cudf.Series([42], name="a")
-    ps = gs.to_pandas()
-
-    assert_eq(ps.drop(columns=["b"]), gs.drop(columns=["b"]))
-
-    assert_eq(ps.drop(columns="b"), gs.drop(columns="b"))
-
-    assert_exceptions_equal(
-        lfunc=ps.drop,
-        rfunc=gs.drop,
-        lfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
-        rfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=ps.drop,
-        rfunc=gs.drop,
-        lfunc_args_and_kwargs=([], {}),
-        rfunc_args_and_kwargs=([], {}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=ps.drop,
-        rfunc=gs.drop,
-        lfunc_args_and_kwargs=(["b"], {"axis": 1}),
-        rfunc_args_and_kwargs=(["b"], {"axis": 1}),
-    )
-
-
-def test_series_drop_raises():
-    gs = cudf.Series([10, 20, 30], index=["x", "y", "z"], name="c")
-    ps = gs.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=ps.drop,
-        rfunc=gs.drop,
-        lfunc_args_and_kwargs=(["p"],),
-        rfunc_args_and_kwargs=(["p"],),
-    )
-
-    # dtype specified mismatch
-    assert_exceptions_equal(
-        lfunc=ps.drop,
-        rfunc=gs.drop,
-        lfunc_args_and_kwargs=([3],),
-        rfunc_args_and_kwargs=([3],),
-    )
-
-    expect = ps.drop("p", errors="ignore")
-    actual = gs.drop("p", errors="ignore")
-
-    assert_eq(actual, expect)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],
-)
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize(
-    "p_index",
-    [
-        None,
-        ["ia", "ib", "ic", "id", "ie"],
-        pd.MultiIndex.from_tuples(
-            [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b")]
-        ),
-    ],
-)
-def test_explode(data, ignore_index, p_index):
-    pdf = pd.Series(data, index=p_index, name="someseries")
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.explode(ignore_index)
-    got = gdf.explode(ignore_index)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[1, 2, 3], [10, 20]],
-        [[1.0, 2.0, 3.0], None, [10.0, 20.0, np.nan]],
-        [[5, 6], None, [1]],
-        [None, None, None, None, None, [10, 20]],
-    ],
-)
-@pytest.mark.parametrize("klass", [cudf.Series, list, cp.array])
-def test_nested_series_from_sequence_data(data, klass):
-    actual = cudf.Series(
-        [klass(val) if val is not None else val for val in data]
-    )
-    expected = cudf.Series(data)
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        cp.ones(5, dtype=cp.float16),
-        np.ones(5, dtype="float16"),
-        pd.Series([0.1, 1.2, 3.3], dtype="float16"),
-        pytest.param(
-            pa.array(np.ones(5, dtype="float16")),
-            marks=pytest.mark.xfail(
-                reason="https://issues.apache.org/jira/browse/ARROW-13762"
-            ),
-        ),
-    ],
-)
-def test_series_raises_float16(data):
-    with pytest.raises(TypeError):
-        cudf.Series(data)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        pd.RangeIndex(0, 3, 1),
-        [3.0, 1.0, np.nan],
-        ["a", "z", None],
-        pd.RangeIndex(4, -1, -2),
-    ],
-)
-@pytest.mark.parametrize("axis", [0, "index"])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize("inplace", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-def test_series_sort_index(
-    index, axis, ascending, inplace, ignore_index, na_position
-):
-    ps = pd.Series([10, 3, 12], index=index)
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.sort_index(
-        axis=axis,
-        ascending=ascending,
-        ignore_index=ignore_index,
-        inplace=inplace,
-        na_position=na_position,
-    )
-    got = gs.sort_index(
-        axis=axis,
-        ascending=ascending,
-        ignore_index=ignore_index,
-        inplace=inplace,
-        na_position=na_position,
-    )
-
-    if inplace is True:
-        assert_eq(ps, gs, check_index_type=True)
-    else:
-        assert_eq(expected, got, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "method", ["md5", "sha1", "sha224", "sha256", "sha384", "sha512"]
-)
-def test_series_hash_values(method):
-    inputs = cudf.Series(
-        [
-            "",
-            "0",
-            "A 56 character string to test message padding algorithm.",
-            "A 63 character string to test message padding algorithm, again.",
-            "A 64 character string to test message padding algorithm, again!!",
-            (
-                "A very long (greater than 128 bytes/char string) to execute "
-                "a multi hash-step data point in the hash function being "
-                "tested. This string needed to be longer."
-            ),
-            "All work and no play makes Jack a dull boy",
-            "!\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~",
-            "\x00\x00\x00\x10\x00\x00\x00\x00",
-            "\x00\x00\x00\x00",
-        ]
-    )
-
-    def hashlib_compute_digest(data):
-        hasher = getattr(hashlib, method)()
-        hasher.update(data.encode("utf-8"))
-        return hasher.hexdigest()
-
-    hashlib_validation = inputs.to_pandas().apply(hashlib_compute_digest)
-    validation_results = cudf.Series(hashlib_validation)
-    hash_values = inputs.hash_values(method=method)
-    assert_eq(hash_values, validation_results)
-
-
-def test_series_hash_values_invalid_method():
-    inputs = cudf.Series(["", "0"])
-    with pytest.raises(ValueError):
-        inputs.hash_values(method="invalid_method")
-
-
-def test_set_index_unequal_length():
-    s = cudf.Series(dtype="float64")
-    with pytest.raises(ValueError):
-        s.index = [1, 2, 3]
-
-
-@pytest.mark.parametrize(
-    "lhs, rhs", [("a", "a"), ("a", "b"), (1, 1.0), (None, None), (None, "a")]
-)
-def test_equals_names(lhs, rhs):
-    lhs = cudf.Series([1, 2], name=lhs)
-    rhs = cudf.Series([1, 2], name=rhs)
-
-    got = lhs.equals(rhs)
-    expect = lhs.to_pandas().equals(rhs.to_pandas())
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data", [[True, False, None, True, False], [None, None], []]
-)
-@pytest.mark.parametrize("bool_dtype", ["bool", "boolean", pd.BooleanDtype()])
-def test_nullable_bool_dtype_series(data, bool_dtype):
-    psr = pd.Series(data, dtype=pd.BooleanDtype())
-    gsr = cudf.Series(data, dtype=bool_dtype)
-
-    assert_eq(psr, gsr.to_pandas(nullable=True))
-
-
-@pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]])
-@pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize("original_name", [None, "original_ser"])
-@pytest.mark.parametrize("name", [None, "ser", no_default])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_reset_index(level, drop, inplace, original_name, name):
-    midx = pd.MultiIndex.from_tuples(
-        [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=["l0", None]
-    )
-    ps = pd.Series(range(4), index=midx, name=original_name)
-    gs = cudf.from_pandas(ps)
-
-    if not drop and inplace:
-        pytest.skip(
-            "For exception checks, see "
-            "test_reset_index_dup_level_name_exceptions"
-        )
-
-    expect = ps.reset_index(level=level, drop=drop, name=name, inplace=inplace)
-
-    got = gs.reset_index(level=level, drop=drop, name=name, inplace=inplace)
-    if inplace:
-        expect = ps
-        got = gs
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("level", [None, 0, 1, [None]])
-@pytest.mark.parametrize("drop", [False, True])
-@pytest.mark.parametrize("inplace", [False, True])
-@pytest.mark.parametrize("original_name", [None, "original_ser"])
-@pytest.mark.parametrize("name", [None, "ser"])
-def test_reset_index_dup_level_name(level, drop, inplace, original_name, name):
-    # midx levels are named [None, None]
-    midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
-    ps = pd.Series(range(4), index=midx, name=original_name)
-    gs = cudf.from_pandas(ps)
-    if level == [None] or not drop and inplace:
-        pytest.skip(
-            "For exception checks, see "
-            "test_reset_index_dup_level_name_exceptions"
-        )
-
-    expect = ps.reset_index(level=level, drop=drop, inplace=inplace, name=name)
-    got = gs.reset_index(level=level, drop=drop, inplace=inplace, name=name)
-    if inplace:
-        expect = ps
-        got = gs
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize("inplace", [True, False])
-@pytest.mark.parametrize("original_name", [None, "original_ser"])
-@pytest.mark.parametrize("name", [None, "ser"])
-def test_reset_index_named(drop, inplace, original_name, name):
-    ps = pd.Series(range(4), index=["x", "y", "z", "w"], name=original_name)
-    gs = cudf.from_pandas(ps)
-
-    ps.index.name = "cudf"
-    gs.index.name = "cudf"
-
-    if not drop and inplace:
-        pytest.skip(
-            "For exception checks, see "
-            "test_reset_index_dup_level_name_exceptions"
-        )
-
-    expect = ps.reset_index(drop=drop, inplace=inplace, name=name)
-    got = gs.reset_index(drop=drop, inplace=inplace, name=name)
-
-    if inplace:
-        expect = ps
-        got = gs
-
-    assert_eq(expect, got)
-
-
-def test_reset_index_dup_level_name_exceptions():
-    midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
-    ps = pd.Series(range(4), index=midx)
-    gs = cudf.from_pandas(ps)
-
-    # Should specify duplicate level names with level number.
-    assert_exceptions_equal(
-        lfunc=ps.reset_index,
-        rfunc=gs.reset_index,
-        lfunc_args_and_kwargs=(
-            [],
-            {"level": [None]},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"level": [None]},
-        ),
-    )
-
-    # Cannot use drop=False and inplace=True to turn a series into dataframe.
-    assert_exceptions_equal(
-        lfunc=ps.reset_index,
-        rfunc=gs.reset_index,
-        lfunc_args_and_kwargs=(
-            [],
-            {"drop": False, "inplace": True},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"drop": False, "inplace": True},
-        ),
-    )
-
-    # Pandas raises the above exception should these two inputs crosses.
-    assert_exceptions_equal(
-        lfunc=ps.reset_index,
-        rfunc=gs.reset_index,
-        lfunc_args_and_kwargs=(
-            [],
-            {"level": [None], "drop": False, "inplace": True},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"level": [None], "drop": False, "inplace": True},
-        ),
-    )
-
-
-def test_series_add_prefix():
-    cd_s = cudf.Series([1, 2, 3, 4])
-    pd_s = cd_s.to_pandas()
-
-    got = cd_s.add_prefix("item_")
-    expected = pd_s.add_prefix("item_")
-
-    assert_eq(got, expected)
-
-
-def test_series_add_suffix():
-    cd_s = cudf.Series([1, 2, 3, 4])
-    pd_s = cd_s.to_pandas()
-
-    got = cd_s.add_suffix("_item")
-    expected = pd_s.add_suffix("_item")
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [0.25, 0.5, 0.2, -0.05],
-        [0, 1, 2, np.nan, 4, cudf.NA, 6],
-    ],
-)
-@pytest.mark.parametrize("lag", [1, 2, 3, 4])
-def test_autocorr(data, lag):
-    cudf_series = cudf.Series(data)
-    psr = cudf_series.to_pandas()
-
-    cudf_corr = cudf_series.autocorr(lag=lag)
-
-    # autocorrelation is undefined (nan) for less than two entries, but pandas
-    # short-circuits when there are 0 entries and bypasses the numpy function
-    # call that generates an error.
-    num_both_valid = (psr.notna() & psr.shift(lag).notna()).sum()
-    with expect_warning_if(num_both_valid == 1, RuntimeWarning):
-        pd_corr = psr.autocorr(lag=lag)
-
-    assert_eq(pd_corr, cudf_corr)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [0, 1, 2, 3],
-        ["abc", "a", None, "hello world", "foo buzz", "", None, "rapids ai"],
-    ],
-)
-def test_series_transpose(data):
-    psr = pd.Series(data=data)
-    csr = cudf.Series(data=data)
-
-    cudf_transposed = csr.transpose()
-    pd_transposed = psr.transpose()
-    cudf_property = csr.T
-    pd_property = psr.T
-
-    assert_eq(pd_transposed, cudf_transposed)
-    assert_eq(pd_property, cudf_property)
-    assert_eq(cudf_transposed, csr)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [1, 3, 5, 7, 7],
-)
-def test_series_nunique(data):
-    cd_s = cudf.Series(data)
-    pd_s = cd_s.to_pandas()
-
-    actual = cd_s.nunique()
-    expected = pd_s.nunique()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [1, 3, 5, 7, 7],
-)
-def test_series_nunique_index(data):
-    cd_s = cudf.Series(data)
-    pd_s = cd_s.to_pandas()
-
-    actual = cd_s.index.nunique()
-    expected = pd_s.index.nunique()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [1, 2, 3, 4],
-        ["a", "b", "c"],
-        [1.2, 2.2, 4.5],
-        [np.nan, np.nan],
-        [None, None, None],
-    ],
-)
-def test_axes(data):
-    csr = cudf.Series(data)
-    psr = csr.to_pandas()
-
-    expected = psr.axes
-    actual = csr.axes
-
-    for e, a in zip(expected, actual):
-        assert_eq(e, a)
-
-
-def test_series_truncate():
-    csr = cudf.Series([1, 2, 3, 4])
-    psr = csr.to_pandas()
-
-    assert_eq(csr.truncate(), psr.truncate())
-    assert_eq(csr.truncate(1, 2), psr.truncate(1, 2))
-    assert_eq(csr.truncate(before=1, after=2), psr.truncate(before=1, after=2))
-
-
-def test_series_truncate_errors():
-    csr = cudf.Series([1, 2, 3, 4])
-    with pytest.raises(ValueError):
-        csr.truncate(axis=1)
-    with pytest.raises(ValueError):
-        csr.truncate(copy=False)
-
-    csr.index = [3, 2, 1, 6]
-    psr = csr.to_pandas()
-    assert_exceptions_equal(
-        lfunc=csr.truncate,
-        rfunc=psr.truncate,
-    )
-
-
-def test_series_truncate_datetimeindex():
-    dates = cudf.date_range(
-        "2021-01-01 23:45:00", "2021-01-02 23:46:00", freq="s"
-    )
-    csr = cudf.Series(range(len(dates)), index=dates)
-    psr = csr.to_pandas()
-
-    assert_eq(
-        csr.truncate(
-            before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
-        ),
-        psr.truncate(
-            before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
-        ),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [0, 12, 14],
-        [0, 14, 12, 12, 3, 10, 12, 14],
-        np.random.default_rng(seed=0).integers(-100, 100, 200),
-        pd.Series([0.0, 1.0, None, 10.0]),
-        [None, None, None, None],
-        [np.nan, None, -1, 2, 3],
-        [1, 2],
-    ],
-)
-@pytest.mark.parametrize(
-    "values",
-    [
-        np.random.default_rng(seed=0).integers(-100, 100, 10),
-        [],
-        [np.nan, None, -1, 2, 3],
-        [1.0, 12.0, None, None, 120],
-        [0.1, 12.1, 14.1],
-        [0, 14, 12, 12, 3, 10, 12, 14, None],
-        [None, None, None],
-        ["0", "12", "14"],
-        ["0", "12", "14", "a"],
-        [1.0, 2.5],
-    ],
-)
-def test_isin_numeric(data, values):
-    rng = np.random.default_rng(seed=0)
-    index = rng.integers(0, 100, len(data))
-    psr = pd.Series(data, index=index)
-    gsr = cudf.Series.from_pandas(psr, nan_as_null=False)
-
-    expected = psr.isin(values)
-    got = gsr.isin(values)
-
-    assert_eq(got, expected)
-
-
-def test_fill_new_category():
-    gs = cudf.Series(pd.Categorical(["a", "b", "c"]))
-    with pytest.raises(TypeError):
-        gs[0:1] = "d"
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Warning newly introduced in pandas-2.2.0",
-)
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        pd.Series(
-            ["2018-01-01", "2019-04-03", None, "2019-12-30"],
-            dtype="datetime64[ns]",
-        ),
-        pd.Series(
-            [
-                "2018-01-01",
-                "2019-04-03",
-                None,
-                "2019-12-30",
-                "2018-01-01",
-                "2018-01-01",
-            ],
-            dtype="datetime64[ns]",
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "values",
-    [
-        [],
-        [1514764800000000000, 1577664000000000000],
-        [
-            1514764800000000000,
-            1577664000000000000,
-            1577664000000000000,
-            1577664000000000000,
-            1514764800000000000,
-        ],
-        ["2019-04-03", "2019-12-30", "2012-01-01"],
-        [
-            "2012-01-01",
-            "2012-01-01",
-            "2012-01-01",
-            "2019-04-03",
-            "2019-12-30",
-            "2012-01-01",
-        ],
-    ],
-)
-def test_isin_datetime(data, values):
-    psr = pd.Series(data)
-    gsr = cudf.Series.from_pandas(psr)
-
-    is_len_str = isinstance(next(iter(values), None), str) and len(data)
-    with expect_warning_if(is_len_str):
-        got = gsr.isin(values)
-    with expect_warning_if(is_len_str):
-        expected = psr.isin(values)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        ["this", "is", None, "a", "test"],
-        ["test", "this", "test", "is", None, "test", "a", "test"],
-        ["0", "12", "14"],
-    ],
-)
-@pytest.mark.parametrize(
-    "values",
-    [
-        [],
-        ["this", "is"],
-        [None, None, None],
-        ["12", "14", "19"],
-        [12, 14, 19],
-        ["is", "this", "is", "this", "is"],
-    ],
-)
-def test_isin_string(data, values):
-    psr = pd.Series(data)
-    gsr = cudf.Series.from_pandas(psr)
-
-    got = gsr.isin(values)
-    expected = psr.isin(values)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        pd.Series(["a", "b", "c", "c", "c", "d", "e"], dtype="category"),
-        pd.Series(["a", "b", None, "c", "d", "e"], dtype="category"),
-        pd.Series([0, 3, 10, 12], dtype="category"),
-        pd.Series([0, 3, 10, 12, 0, 10, 3, 0, 0, 3, 3], dtype="category"),
-    ],
-)
-@pytest.mark.parametrize(
-    "values",
-    [
-        [],
-        ["a", "b", None, "f", "words"],
-        ["0", "12", None, "14"],
-        [0, 10, 12, None, 39, 40, 1000],
-        [0, 0, 0, 0, 3, 3, 3, None, 1, 2, 3],
-    ],
-)
-def test_isin_categorical(data, values):
-    psr = pd.Series(data)
-    gsr = cudf.Series.from_pandas(psr)
-
-    got = gsr.isin(values)
-    expected = psr.isin(values)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
-@pytest.mark.parametrize("data_empty", [False, True])
-def test_diff(dtype, period, data_empty):
-    if data_empty:
-        data = None
-    else:
-        if dtype == np.int8:
-            # to keep data in range
-            data = gen_rand(dtype, 100000, low=-2, high=2)
-        else:
-            data = gen_rand(dtype, 100000)
-
-    gs = cudf.Series(data, dtype=dtype)
-    ps = pd.Series(data, dtype=dtype)
-
-    expected_outcome = ps.diff(period)
-    diffed_outcome = gs.diff(period).astype(expected_outcome.dtype)
-
-    if data_empty:
-        assert_eq(diffed_outcome, expected_outcome, check_index_type=False)
-    else:
-        assert_eq(diffed_outcome, expected_outcome)
-
-
-def test_diff_unsupported_dtypes():
-    gs = cudf.Series(["a", "b", "c", "d", "e"])
-    with pytest.raises(
-        TypeError,
-        match=r"unsupported operand type\(s\)",
-    ):
-        gs.diff()
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.date_range("2020-01-01", "2020-01-06", freq="D"),
-        [True, True, True, False, True, True],
-        [1.0, 2.0, 3.5, 4.0, 5.0, -1.7],
-        [1, 2, 3, 3, 4, 5],
-        [np.nan, None, None, np.nan, np.nan, None],
-    ],
-)
-def test_diff_many_dtypes(data):
-    ps = pd.Series(data)
-    gs = cudf.from_pandas(ps)
-    assert_eq(ps.diff(), gs.diff())
-    assert_eq(ps.diff(periods=2), gs.diff(periods=2))
-
-
-@pytest.mark.parametrize("num_rows", [1, 100])
-@pytest.mark.parametrize("num_bins", [1, 10])
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "bool"])
-@pytest.mark.parametrize("series_bins", [True, False])
-def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
-    rng = np.random.default_rng(seed=0)
-    data = rng.integers(0, 100, num_rows).astype(dtype)
-    bins = np.unique(np.sort(rng.integers(2, 95, num_bins).astype(dtype)))
-    s = cudf.Series(data)
-    if series_bins:
-        s_bins = cudf.Series(bins)
-        indices = s.digitize(s_bins, right)
-    else:
-        indices = s.digitize(bins, right)
-    np.testing.assert_array_equal(
-        np.digitize(data, bins, right), indices.to_numpy()
-    )
-
-
-def test_series_digitize_invalid_bins():
-    rng = np.random.default_rng(seed=0)
-    s = cudf.Series(rng.integers(0, 30, 80), dtype="int32")
-    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
-
-    with pytest.raises(
-        ValueError, match="`bins` cannot contain null entries."
-    ):
-        _ = s.digitize(bins)
-
-
-@pytest.mark.parametrize(
-    "data,left,right",
-    [
-        ([0, 1, 2, 3, 4, 5, 10], 0, 5),
-        ([0, 1, 2, 3, 4, 5, 10], 10, 1),
-        ([0, 1, 2, 3, 4, 5], [0, 10, 11] * 2, [1, 2, 5] * 2),
-        (["a", "few", "set", "of", "strings", "xyz", "abc"], "banana", "few"),
-        (["a", "few", "set", "of", "strings", "xyz", "abc"], "phone", "hello"),
-        (
-            ["a", "few", "set", "of", "strings", "xyz", "abc"],
-            ["a", "hello", "rapids", "ai", "world", "chars", "strs"],
-            ["yes", "no", "hi", "bye", "test", "pass", "fail"],
-        ),
-        ([0, 1, 2, np.nan, 4, np.nan, 10], 10, 1),
-    ],
-)
-@pytest.mark.parametrize("inclusive", ["both", "neither", "left", "right"])
-def test_series_between(data, left, right, inclusive):
-    ps = pd.Series(data)
-    gs = cudf.from_pandas(ps, nan_as_null=False)
-
-    expected = ps.between(left, right, inclusive=inclusive)
-    actual = gs.between(left, right, inclusive=inclusive)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data,left,right",
-    [
-        ([0, 1, 2, None, 4, 5, 10], 0, 5),
-        ([0, 1, 2, 3, None, 5, 10], 10, 1),
-        ([None, 1, 2, 3, 4, None], [0, 10, 11] * 2, [1, 2, 5] * 2),
-        (
-            ["a", "few", "set", None, "strings", "xyz", "abc"],
-            ["a", "hello", "rapids", "ai", "world", "chars", "strs"],
-            ["yes", "no", "hi", "bye", "test", "pass", "fail"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("inclusive", ["both", "neither", "left", "right"])
-def test_series_between_with_null(data, left, right, inclusive):
-    gs = cudf.Series(data)
-    ps = gs.to_pandas(nullable=True)
-
-    expected = ps.between(left, right, inclusive=inclusive)
-    actual = gs.between(left, right, inclusive=inclusive)
-
-    assert_eq(expected, actual.to_pandas(nullable=True))
-
-
-def test_default_construction():
-    s = cudf.Series([np.int8(8), np.int16(128)])
-    assert s.dtype == np.dtype("i2")
-
-
-@pytest.mark.parametrize(
-    "data", [[0, 1, 2, 3, 4], range(5), [np.int8(8), np.int16(128)]]
-)
-def test_default_integer_bitwidth_construction(default_integer_bitwidth, data):
-    s = cudf.Series(data)
-    assert s.dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
-
-
-@pytest.mark.parametrize("data", [[1.5, 2.5, 4.5], [1000, 2000, 4000, 3.14]])
-def test_default_float_bitwidth_construction(default_float_bitwidth, data):
-    s = cudf.Series(data)
-    assert s.dtype == np.dtype(f"f{default_float_bitwidth // 8}")
-
-
-def test_series_ordered_dedup():
-    # part of https://github.com/rapidsai/cudf/issues/11486
-    rng = np.random.default_rng(seed=0)
-    sr = cudf.Series(rng.integers(0, 100, 1000))
-    # pandas unique() preserves order
-    expect = pd.Series(sr.to_pandas().unique())
-    got = cudf.Series._from_column(sr._column.unique())
-    assert_eq(expect.values, got.values)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "float64"])
-@pytest.mark.parametrize("bool_scalar", [True, False])
-def test_set_bool_error(dtype, bool_scalar):
-    sr = cudf.Series([1, 2, 3], dtype=dtype)
-    psr = sr.to_pandas(nullable=True)
-
-    assert_exceptions_equal(
-        lfunc=sr.__setitem__,
-        rfunc=psr.__setitem__,
-        lfunc_args_and_kwargs=([bool_scalar],),
-        rfunc_args_and_kwargs=([bool_scalar],),
-    )
-
-
-def test_int64_equality():
-    s = cudf.Series(np.asarray([2**63 - 10, 2**63 - 100], dtype=np.int64))
-    assert (s != np.int64(2**63 - 1)).all()
-
-
-@pytest.mark.parametrize("into", [dict, OrderedDict, defaultdict(list)])
-def test_series_to_dict(into):
-    gs = cudf.Series(["ab", "de", "zx"], index=[10, 20, 100])
-    ps = gs.to_pandas()
-
-    actual = gs.to_dict(into=into)
-    expected = ps.to_dict(into=into)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3],
-        pytest.param(
-            [np.nan, 10, 15, 16],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/49818"
-            ),
-        ),
-        [np.nan, None, 10, 20],
-        ["ab", "zx", "pq"],
-        ["ab", "zx", None, "pq"],
-        [],
-    ],
-)
-def test_series_hasnans(data):
-    gs = cudf.Series(data, nan_as_null=False)
-    ps = gs.to_pandas(nullable=True)
-
-    # Check type to avoid mixing Python bool and NumPy bool
-    assert isinstance(gs.hasnans, bool)
-    assert gs.hasnans == ps.hasnans
-
-
-@pytest.mark.parametrize(
-    "data,index",
-    [
-        ([1, 2, 3], [10, 11, 12]),
-        ([1, 2, 3, 1, 1, 2, 3, 2], [10, 20, 23, 24, 25, 26, 27, 28]),
-        ([1, None, 2, None, 3, None, 3, 1], [5, 6, 7, 8, 9, 10, 11, 12]),
-        ([np.nan, 1.0, np.nan, 5.4, 5.4, 1.0], ["a", "b", "c", "d", "e", "f"]),
-        (
-            ["lama", "cow", "lama", None, "beetle", "lama", None, None],
-            [1, 4, 10, 11, 2, 100, 200, 400],
-        ),
-    ],
-)
-@pytest.mark.parametrize("keep", ["first", "last", False])
-@pytest.mark.parametrize("name", [None, "a"])
-def test_series_duplicated(data, index, keep, name):
-    gs = cudf.Series(data, index=index, name=name)
-    ps = gs.to_pandas()
-
-    assert_eq(gs.duplicated(keep=keep), ps.duplicated(keep=keep))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4],
-        [10, 20, None, None],
-    ],
-)
-@pytest.mark.parametrize("copy", [True, False])
-def test_series_copy(data, copy):
-    psr = pd.Series(data)
-    gsr = cudf.from_pandas(psr)
-
-    new_psr = pd.Series(psr, copy=copy)
-    new_gsr = cudf.Series(gsr, copy=copy)
-
-    new_psr.iloc[0] = 999
-    new_gsr.iloc[0] = 999
-
-    assert_eq(psr, gsr)
-    assert_eq(new_psr, new_gsr)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": 1, "b": 2, "c": 24, "d": 1010},
-        {"a": 1},
-    ],
-)
-@pytest.mark.parametrize(
-    "index", [None, ["b", "c"], ["d", "a", "c", "b"], ["a"]]
-)
-def test_series_init_dict_with_index(data, index):
-    pandas_series = pd.Series(data, index=index)
-    cudf_series = cudf.Series(data, index=index)
-
-    assert_eq(pandas_series, cudf_series)
-
-
-@pytest.mark.parametrize("data", ["abc", None, 1, 3.7])
-@pytest.mark.parametrize(
-    "index", [None, ["b", "c"], ["d", "a", "c", "b"], ["a"]]
-)
-def test_series_init_scalar_with_index(data, index):
-    pandas_series = pd.Series(data, index=index)
-    cudf_series = cudf.Series(data, index=index)
-
-    assert_eq(
-        pandas_series,
-        cudf_series,
-        check_index_type=data is not None or index is not None,
-        check_dtype=data is not None,
-    )
-
-
-def test_series_init_error():
-    assert_exceptions_equal(
-        lfunc=pd.Series,
-        rfunc=cudf.Series,
-        lfunc_args_and_kwargs=([], {"data": [11], "index": [10, 11]}),
-        rfunc_args_and_kwargs=([], {"data": [11], "index": [10, 11]}),
-    )
-
-
-def test_series_init_from_series_and_index():
-    ser = cudf.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])
-    result = cudf.Series(ser, index=list("abcd"))
-    expected = cudf.Series([-5, 7, 3, 4], index=list("abcd"))
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype", ["datetime64[ns]", "timedelta64[ns]", "object", "str"]
-)
-def test_series_mixed_dtype_error(dtype):
-    ps = pd.concat([pd.Series([1, 2, 3], dtype=dtype), pd.Series([10, 11])])
-    with pytest.raises(TypeError):
-        cudf.Series(ps)
-    with pytest.raises(TypeError):
-        cudf.Series(ps.array)
-
-
-@pytest.mark.parametrize("data", [[True, False, None], [10, 200, 300]])
-@pytest.mark.parametrize("index", [None, [10, 20, 30]])
-def test_series_contains(data, index):
-    ps = pd.Series(data, index=index)
-    gs = cudf.Series(data, index=index)
-
-    assert_eq(1 in ps, 1 in gs)
-    assert_eq(10 in ps, 10 in gs)
-    assert_eq(True in ps, True in gs)
-    assert_eq(False in ps, False in gs)
-
-
-def test_series_from_pandas_sparse():
-    pser = pd.Series(range(2), dtype=pd.SparseDtype(np.int64, 0))
-    with pytest.raises(NotImplementedError):
-        cudf.Series(pser)
-
-
-def test_series_constructor_unbounded_sequence():
-    class A:
-        def __getitem__(self, key):
-            return 1
-
-    with pytest.raises(TypeError):
-        cudf.Series(A())
-
-
-def test_series_constructor_error_mixed_type():
-    with pytest.raises(MixedTypeError):
-        cudf.Series(["abc", np.nan, "123"], nan_as_null=False)
-
-
-def test_series_typecast_to_object_error():
-    actual = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(TypeError):
-            actual.astype(object)
-        with pytest.raises(TypeError):
-            actual.astype(np.dtype("object"))
-        new_series = actual.astype("str")
-        assert new_series[0] == "1970-01-01 00:00:00.000000001"
-
-
-def test_series_typecast_to_object():
-    actual = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    with cudf.option_context("mode.pandas_compatible", False):
-        new_series = actual.astype(object)
-        assert new_series[0] == "1970-01-01 00:00:00.000000001"
-        new_series = actual.astype(np.dtype("object"))
-        assert new_series[0] == "1970-01-01 00:00:00.000000001"
-
-
-@pytest.mark.parametrize("attr", ["nlargest", "nsmallest"])
-def test_series_nlargest_nsmallest_str_error(attr):
-    gs = cudf.Series(["a", "b", "c", "d", "e"])
-    ps = gs.to_pandas()
-
-    assert_exceptions_equal(
-        getattr(gs, attr), getattr(ps, attr), ([], {"n": 1}), ([], {"n": 1})
-    )
-
-
-def test_series_unique_pandas_compatibility():
-    gs = cudf.Series([10, 11, 12, 11, 10])
-    ps = gs.to_pandas()
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = gs.unique()
-    expected = ps.unique()
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
-@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
-def test_series_rename(initial_name, name):
-    gsr = cudf.Series([1, 2, 3], name=initial_name)
-    psr = pd.Series([1, 2, 3], name=initial_name)
-
-    assert_eq(gsr, psr)
-
-    actual = gsr.rename(name)
-    expected = psr.rename(name)
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("index", [lambda x: x * 2, {1: 2}])
-def test_rename_index_not_supported(index):
-    ser = cudf.Series(range(2))
-    with pytest.raises(NotImplementedError):
-        ser.rename(index=index)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1.2234242333234, 323432.3243423, np.nan],
-        pd.Series([34224, 324324, 324342], dtype="datetime64[ns]"),
-        pd.Series([224.242, None, 2424.234324], dtype="category"),
-        [
-            decimal.Decimal("342.3243234234242"),
-            decimal.Decimal("89.32432497687622"),
-            None,
-        ],
-    ],
-)
-@pytest.mark.parametrize("digits", [0, 1, 3, 4, 10])
-def test_series_round_builtin(data, digits):
-    ps = pd.Series(data)
-    gs = cudf.from_pandas(ps, nan_as_null=False)
-
-    # TODO: Remove `to_frame` workaround
-    # after following issue is fixed:
-    # https://github.com/pandas-dev/pandas/issues/55114
-    expected = round(ps.to_frame(), digits)[0]
-    expected.name = None
-    actual = round(gs, digits)
-
-    assert_eq(expected, actual)
-
-
-def test_series_empty_dtype():
-    expected = pd.Series([])
-    actual = cudf.Series([])
-    assert_eq(expected, actual, check_dtype=True)
-
-
-@pytest.mark.parametrize("data", [None, {}, []])
-def test_series_empty_index_rangeindex(data):
-    expected = cudf.RangeIndex(0)
-    result = cudf.Series(data).index
-    assert_eq(result, expected)
-
-
-def test_series_count_invalid_param():
-    s = cudf.Series([], dtype="float64")
-    with pytest.raises(TypeError):
-        s.count(skipna=True)
-
-
-@pytest.mark.parametrize(
-    "data", [[0, 1, 2], ["a", "b", "c"], [0.324, 32.32, 3243.23]]
-)
-def test_series_setitem_nat_with_non_datetimes(data):
-    s = cudf.Series(data)
-    with pytest.raises(TypeError):
-        s[0] = cudf.NaT
-
-
-def test_series_string_setitem():
-    gs = cudf.Series(["abc", "def", "ghi", "xyz", "pqr"])
-    ps = gs.to_pandas()
-
-    gs[0] = "NaT"
-    gs[1] = "NA"
-    gs[2] = "<NA>"
-    gs[3] = "NaN"
-
-    ps[0] = "NaT"
-    ps[1] = "NA"
-    ps[2] = "<NA>"
-    ps[3] = "NaN"
-
-    assert_eq(gs, ps)
-
-
-def test_multi_dim_series_error():
-    arr = cp.array([(1, 2), (3, 4)])
-    with pytest.raises(ValueError):
-        cudf.Series(arr)
-
-
-def test_bool_series_mixed_dtype_error():
-    ps = pd.Series([True, False, None])
-    all_bool_ps = pd.Series([True, False, True], dtype="object")
-    # ps now has `object` dtype, which
-    # isn't supported by `cudf`.
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(TypeError):
-            cudf.Series(ps)
-        with pytest.raises(TypeError):
-            cudf.from_pandas(ps)
-        with pytest.raises(TypeError):
-            cudf.Series(ps, dtype=bool)
-        expected = cudf.Series(all_bool_ps, dtype=bool)
-        assert_eq(expected, all_bool_ps.astype(bool))
-    nan_bools_mix = pd.Series([True, False, True, np.nan], dtype="object")
-    gs = cudf.Series(nan_bools_mix, nan_as_null=True)
-    assert_eq(gs.to_pandas(nullable=True), nan_bools_mix.astype("boolean"))
-    with pytest.raises(TypeError):
-        cudf.Series(nan_bools_mix, nan_as_null=False)
-
-
-@pytest.mark.parametrize(
-    "pandas_type",
-    [
-        pd.ArrowDtype(pa.int8()),
-        pd.ArrowDtype(pa.int16()),
-        pd.ArrowDtype(pa.int32()),
-        pd.ArrowDtype(pa.int64()),
-        pd.ArrowDtype(pa.uint8()),
-        pd.ArrowDtype(pa.uint16()),
-        pd.ArrowDtype(pa.uint32()),
-        pd.ArrowDtype(pa.uint64()),
-        pd.ArrowDtype(pa.float32()),
-        pd.ArrowDtype(pa.float64()),
-        pd.Int8Dtype(),
-        pd.Int16Dtype(),
-        pd.Int32Dtype(),
-        pd.Int64Dtype(),
-        pd.UInt8Dtype(),
-        pd.UInt16Dtype(),
-        pd.UInt32Dtype(),
-        pd.UInt64Dtype(),
-        pd.Float32Dtype(),
-        pd.Float64Dtype(),
-    ],
-)
-def test_series_arrow_numeric_types_roundtrip(pandas_type):
-    ps = pd.Series([1, 2, 3], dtype=pandas_type)
-    pi = pd.Index(ps)
-    pdf = ps.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.from_pandas(ps)
-        assert_eq(ps, gs)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gi = cudf.from_pandas(pi)
-        assert_eq(pi, gi)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.from_pandas(pdf)
-        assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "pandas_type", [pd.ArrowDtype(pa.bool_()), pd.BooleanDtype()]
-)
-def test_series_arrow_bool_types_roundtrip(pandas_type):
-    ps = pd.Series([True, False, None], dtype=pandas_type)
-    pi = pd.Index(ps)
-    pdf = ps.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.from_pandas(ps)
-        assert_eq(ps, gs)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gi = cudf.from_pandas(pi)
-        assert_eq(pi, gi)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.from_pandas(pdf)
-        assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "pandas_type", [pd.ArrowDtype(pa.string()), pd.StringDtype()]
-)
-def test_series_arrow_string_types_roundtrip(pandas_type):
-    ps = pd.Series(["abc", None, "xyz"], dtype=pandas_type)
-    pi = pd.Index(ps)
-    pdf = ps.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.from_pandas(ps)
-        assert_eq(ps, gs)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gi = cudf.from_pandas(pi)
-        assert_eq(pi, gi)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.from_pandas(pdf)
-        assert_eq(pdf, gdf)
-
-
-def test_series_arrow_category_types_roundtrip():
-    pa_array = pa.array(pd.Series([1, 2, 3], dtype="category"))
-    ps = pd.Series([1, 2, 3], dtype=pd.ArrowDtype(pa_array.type))
-    pi = pd.Index(ps)
-    pdf = pi.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.from_pandas(ps)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.from_pandas(pi)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.from_pandas(pdf)
-
-
-@pytest.mark.parametrize(
-    "pa_type",
-    [pa.decimal128(10, 2), pa.decimal128(5, 2), pa.decimal128(20, 2)],
-)
-def test_series_arrow_decimal_types_roundtrip(pa_type):
-    ps = pd.Series(
-        [
-            decimal.Decimal("1.2"),
-            decimal.Decimal("20.56"),
-            decimal.Decimal("3"),
-        ],
-        dtype=pd.ArrowDtype(pa_type),
-    )
-    pdf = ps.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.from_pandas(ps)
-        assert_eq(ps, gs)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.from_pandas(pdf)
-        assert_eq(pdf, gdf)
-
-
-def test_series_arrow_struct_types_roundtrip():
-    ps = pd.Series(
-        [{"a": 1}, {"b": "abc"}],
-        dtype=pd.ArrowDtype(pa.struct({"a": pa.int64(), "b": pa.string()})),
-    )
-    pdf = ps.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.from_pandas(ps)
-        assert_eq(ps, gs)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.from_pandas(pdf)
-        assert_eq(pdf, gdf)
-
-
-def test_series_arrow_list_types_roundtrip():
-    ps = pd.Series([[1], [2], [4]], dtype=pd.ArrowDtype(pa.list_(pa.int64())))
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.from_pandas(ps)
-        assert_eq(ps, gs)
-    pdf = ps.to_frame()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.from_pandas(pdf)
-        assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("base_name", [None, "a"])
-def test_series_to_frame_none_name(base_name):
-    result = cudf.Series(range(1), name=base_name).to_frame(name=None)
-    expected = pd.Series(range(1), name=base_name).to_frame(name=None)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
-@pytest.mark.parametrize(
-    "data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]
-)
-def test_nan_as_null_from_arrow_objects(klass, data):
-    result = klass(data, nan_as_null=True)
-    expected = klass(pa.array([None], type=pa.float64()))
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("reso", ["M", "ps"])
-@pytest.mark.parametrize("typ", ["M", "m"])
-def test_series_invalid_reso_dtype(reso, typ):
-    with pytest.raises(TypeError):
-        cudf.Series([], dtype=f"{typ}8[{reso}]")
-
-
-def test_series_categorical_missing_value_count():
-    ps = pd.Series(pd.Categorical(list("abcccb"), categories=list("cabd")))
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.value_counts()
-    actual = gs.value_counts()
-
-    assert_eq(expected, actual, check_dtype=False)
-
-
-def test_series_error_nan_mixed_types():
-    ps = pd.Series([np.nan, "ab", "cd"])
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(MixedTypeError):
-            cudf.from_pandas(ps)
-
-
-def test_series_error_nan_non_float_dtypes():
-    s = cudf.Series(["a", "b", "c"])
-    with pytest.raises(TypeError):
-        s[0] = np.nan
-
-    s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    with pytest.raises(TypeError):
-        s[0] = np.nan
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pd.ArrowDtype(pa.int8()),
-        pd.ArrowDtype(pa.int16()),
-        pd.ArrowDtype(pa.int32()),
-        pd.ArrowDtype(pa.int64()),
-        pd.ArrowDtype(pa.uint8()),
-        pd.ArrowDtype(pa.uint16()),
-        pd.ArrowDtype(pa.uint32()),
-        pd.ArrowDtype(pa.uint64()),
-        pd.ArrowDtype(pa.float32()),
-        pd.ArrowDtype(pa.float64()),
-        pd.Int8Dtype(),
-        pd.Int16Dtype(),
-        pd.Int32Dtype(),
-        pd.Int64Dtype(),
-        pd.UInt8Dtype(),
-        pd.UInt16Dtype(),
-        pd.UInt32Dtype(),
-        pd.UInt64Dtype(),
-        pd.Float32Dtype(),
-        pd.Float64Dtype(),
-    ],
-)
-@pytest.mark.parametrize("klass", [cudf.Series, cudf.DataFrame, cudf.Index])
-@pytest.mark.parametrize("kind", [lambda x: x, str], ids=["obj", "string"])
-def test_astype_pandas_nullable_pandas_compat(dtype, klass, kind):
-    ser = klass([1, 2, 3])
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = ser.astype(kind(dtype))
-        expected = klass([1, 2, 3], dtype=kind(dtype))
-        assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index])
-@pytest.mark.parametrize(
-    "data",
-    [
-        pa.array([1, None], type=pa.int64()),
-        pa.chunked_array([[1, None]], type=pa.int64()),
-    ],
-)
-def test_from_arrow_array_dtype(klass, data):
-    obj = klass(data, dtype="int8")
-    assert obj.dtype == np.dtype("int8")
-
-
-@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index])
-def test_from_pandas_object_dtype_passed_dtype(klass):
-    result = klass(pd.Series([True, False], dtype=object), dtype="int8")
-    expected = klass(pa.array([1, 0], type=pa.int8()))
-    assert_eq(result, expected)
-
-
-def test_series_where_mixed_bool_dtype():
-    s = cudf.Series([True, False, True])
-    with pytest.raises(TypeError):
-        s.where(~s, 10)
-
-
-def test_series_setitem_mixed_bool_dtype():
-    s = cudf.Series([True, False, True])
-    with pytest.raises(TypeError):
-        s[0] = 10
-
-
-@pytest.mark.parametrize(
-    "nat, value",
-    [
-        [np.datetime64("nat", "ns"), np.datetime64("2020-01-01", "ns")],
-        [np.timedelta64("nat", "ns"), np.timedelta64(1, "ns")],
-    ],
-)
-@pytest.mark.parametrize("nan_as_null", [True, False])
-def test_series_np_array_nat_nan_as_nulls(nat, value, nan_as_null):
-    expected = np.array([nat, value])
-    ser = cudf.Series(expected, nan_as_null=nan_as_null)
-    assert ser[0] is pd.NaT
-    assert ser[1] == value
-
-
-def test_series_unitness_np_datetimelike_units():
-    data = np.array([np.timedelta64(1)])
-    with pytest.raises(TypeError):
-        cudf.Series(data)
-    with pytest.raises(TypeError):
-        pd.Series(data)
-
-
-def test_series_duplicate_index_reindex():
-    gs = cudf.Series([0, 1, 2, 3], index=[0, 0, 1, 1])
-    ps = gs.to_pandas()
-
-    assert_exceptions_equal(
-        gs.reindex,
-        ps.reindex,
-        lfunc_args_and_kwargs=([10, 11, 12, 13], {}),
-        rfunc_args_and_kwargs=([10, 11, 12, 13], {}),
-    )
-
-
-def test_list_category_like_maintains_dtype():
-    dtype = cudf.CategoricalDtype(categories=[1, 2, 3, 4], ordered=True)
-    data = [1, 2, 3]
-    result = cudf.Series._from_column(as_column(data, dtype=dtype))
-    expected = pd.Series(data, dtype=dtype.to_pandas())
-    assert_eq(result, expected)
-
-
-def test_list_interval_like_maintains_dtype():
-    dtype = cudf.IntervalDtype(subtype=np.int8)
-    data = [pd.Interval(1, 2)]
-    result = cudf.Series._from_column(as_column(data, dtype=dtype))
-    expected = pd.Series(data, dtype=dtype.to_pandas())
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "klass", [cudf.Series, cudf.Index, pd.Series, pd.Index]
-)
-def test_series_from_named_object_name_priority(klass):
-    result = cudf.Series(klass([1], name="a"), name="b")
-    assert result.name == "b"
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": 1, "b": 2, "c": 3},
-        cudf.Series([1, 2, 3], index=list("abc")),
-        pd.Series([1, 2, 3], index=list("abc")),
-    ],
-)
-def test_series_from_object_with_index_index_arg_reindex(data):
-    result = cudf.Series(data, index=list("bca"))
-    expected = cudf.Series([2, 3, 1], index=list("bca"))
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {0: 1, 1: 2, 2: 3},
-        cudf.Series([1, 2, 3]),
-        cudf.Index([1, 2, 3]),
-        pd.Series([1, 2, 3]),
-        pd.Index([1, 2, 3]),
-        [1, 2, 3],
-    ],
-)
-def test_series_dtype_astypes(data):
-    result = cudf.Series(data, dtype="float64")
-    expected = cudf.Series([1.0, 2.0, 3.0])
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("pa_type", [pa.string, pa.large_string])
-def test_series_from_large_string(pa_type):
-    pa_string_array = pa.array(["a", "b", "c"]).cast(pa_type())
-    got = cudf.Series(pa_string_array)
-    expected = pd.Series(pa_string_array)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-        {"1": 2},
-        [1],
-        decimal.Decimal("1.0"),
-    ],
-)
-def test_series_to_pandas_arrow_type_nullable_raises(scalar):
-    pa_array = pa.array([scalar, None])
-    ser = cudf.Series(pa_array)
-    with pytest.raises(ValueError, match=".* cannot both be set"):
-        ser.to_pandas(nullable=True, arrow_type=True)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-        {"1": 2},
-        [1],
-        decimal.Decimal("1.0"),
-    ],
-)
-def test_series_to_pandas_arrow_type(scalar):
-    pa_array = pa.array([scalar, None])
-    ser = cudf.Series(pa_array)
-    result = ser.to_pandas(arrow_type=True)
-    expected = pd.Series(pd.arrays.ArrowExtensionArray(pa_array))
-    pd.testing.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("axis", [None, 0, "index"])
-@pytest.mark.parametrize("data", [[1, 2], [1]])
-def test_squeeze(axis, data):
-    ser = cudf.Series(data)
-    result = ser.squeeze(axis=axis)
-    expected = ser.to_pandas().squeeze(axis=axis)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("axis", [1, "columns"])
-def test_squeeze_invalid_axis(axis):
-    with pytest.raises(ValueError):
-        cudf.Series([1]).squeeze(axis=axis)
-
-
-def test_series_init_with_nans():
-    with cudf.option_context("mode.pandas_compatible", True):
-        gs = cudf.Series([1, 2, 3, np.nan])
-    assert gs.dtype == np.dtype("float64")
-    ps = pd.Series([1, 2, 3, np.nan])
-    assert_eq(ps, gs)
-
-
-@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
-def test_timestamp_series_init(data):
-    scalar = pd.Timestamp(data)
-    expected = pd.Series([scalar])
-    actual = cudf.Series([scalar])
-
-    assert_eq(expected, actual)
-
-    expected = pd.Series(scalar)
-    actual = cudf.Series(scalar)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
-def test_timedelta_series_init(data):
-    scalar = pd.Timedelta(data)
-    expected = pd.Series([scalar])
-    actual = cudf.Series([scalar])
-
-    assert_eq(expected, actual)
-
-    expected = pd.Series(scalar)
-    actual = cudf.Series(scalar)
-
-    assert_eq(expected, actual)
-
-
-def test_series_from_series_index_no_shallow_copy():
-    ser1 = cudf.Series(range(3), index=list("abc"))
-    ser2 = cudf.Series(ser1)
-    assert ser1.index is ser2.index
-
-
-@pytest.mark.parametrize("value", [1, 1.1])
-def test_nans_to_nulls_noop_copies_column(value):
-    ser1 = cudf.Series([value])
-    ser2 = ser1.nans_to_nulls()
-    assert ser1._column is not ser2._column
-
-
-@pytest.mark.parametrize("dropna", [False, True])
-def test_nunique_all_null(dropna):
-    data = [None, None]
-    pd_ser = pd.Series(data)
-    cudf_ser = cudf.Series(data)
-    result = pd_ser.nunique(dropna=dropna)
-    expected = cudf_ser.nunique(dropna=dropna)
-    assert result == expected
-
-
-@pytest.mark.parametrize(
-    "type1",
-    [
-        "category",
-        "interval[int64, right]",
-        "int64",
-        "float64",
-        "str",
-        "datetime64[ns]",
-        "timedelta64[ns]",
-    ],
-)
-@pytest.mark.parametrize(
-    "type2",
-    [
-        "category",
-        "interval[int64, right]",
-        "int64",
-        "float64",
-        "str",
-        "datetime64[ns]",
-        "timedelta64[ns]",
-    ],
-)
-@pytest.mark.parametrize(
-    "as_dtype", [lambda x: x, cudf.dtype], ids=["string", "object"]
-)
-@pytest.mark.parametrize("copy", [True, False])
-def test_empty_astype_always_castable(type1, type2, as_dtype, copy):
-    ser = cudf.Series([], dtype=as_dtype(type1))
-    result = ser.astype(as_dtype(type2), copy=copy)
-    expected = cudf.Series([], dtype=as_dtype(type2))
-    assert_eq(result, expected)
-    if not copy and cudf.dtype(type1) == cudf.dtype(type2):
-        assert ser._column is result._column
-    else:
-        assert ser._column is not result._column
-
-
-def test_dtype_dtypes_equal():
-    ser = cudf.Series([0])
-    assert ser.dtype is ser.dtypes
-    assert ser.dtypes is ser.to_pandas().dtypes
-
-
-def test_null_like_to_nan_pandas_compat():
-    with cudf.option_context("mode.pandas_compatible", True):
-        ser = cudf.Series([1, 2, np.nan, 10, None])
-        pser = pd.Series([1, 2, np.nan, 10, None])
-
-        assert pser.dtype == ser.dtype
-        assert_eq(ser, pser)
-
-
-def test_roundtrip_series_plc_column(ps):
-    expect = cudf.Series(ps)
-    actual = cudf.Series.from_pylibcudf(*expect.to_pylibcudf())
-    assert_eq(expect, actual)
-
-
-def test_non_strings_dtype_object_pandas_compat_raises():
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(TypeError):
-            cudf.Series([1], dtype=object)
-
-
-def test_series_dataframe_count_float():
-    gs = cudf.Series([1, 2, 3, None, np.nan, 10], nan_as_null=False)
-    ps = cudf.Series([1, 2, 3, None, np.nan, 10])
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        assert_eq(ps.count(), gs.count())
-        assert_eq(ps.to_frame().count(), gs.to_frame().count())
-    with cudf.option_context("mode.pandas_compatible", False):
-        assert_eq(gs.count(), gs.to_pandas(nullable=True).count())
-        assert_eq(
-            gs.to_frame().count(),
-            gs.to_frame().to_pandas(nullable=True).count(),
-        )
-
-
-@pytest.mark.parametrize("arr", [np.array, cp.array, pd.Series])
-def test_construct_nonnative_array(arr):
-    data = [1, 2, 3.5, 4]
-    dtype = np.dtype("f4")
-    native = arr(data, dtype=dtype)
-    nonnative = arr(data, dtype=dtype.newbyteorder())
-    result = cudf.Series(nonnative)
-    expected = cudf.Series(native)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False])
-def test_construct_all_pd_NA_with_dtype(nan_as_null):
-    result = cudf.Series(
-        [pd.NA, pd.NA], dtype=np.dtype(np.float64), nan_as_null=nan_as_null
-    )
-    expected = cudf.Series(pa.array([None, None], type=pa.float64()))
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-        "uint8",
-        "uint16",
-        "uint32",
-        "uint64",
-        "float32",
-        "float64",
-        "bool",
-    ],
-)
-@pytest.mark.parametrize("has_nulls", [False, True])
-@pytest.mark.parametrize("use_na_value", [False, True])
-def test_series_to_cupy(dtype, has_nulls, use_na_value):
-    size = 10
-    if dtype == "bool":
-        np_data = np.array([True, False] * (size // 2), dtype=bool)
-    else:
-        np_data = np.arange(size, dtype=dtype)
-
-    if has_nulls:
-        np_data = np_data.astype("object")
-        np_data[::2] = None
-
-    sr = cudf.Series(np_data, dtype=dtype)
-
-    if not has_nulls:
-        assert_eq(sr.values, cp.asarray(sr))
-        return
-
-    if has_nulls and not use_na_value:
-        with pytest.raises(ValueError, match="Column must have no nulls"):
-            sr.to_cupy()
-        return
-
-    na_value = {
-        "bool": False,
-        "float32": 0.0,
-        "float64": 0.0,
-    }.get(dtype, 0)
-    expected = cp.asarray(sr.fillna(na_value)) if has_nulls else cp.asarray(sr)
-    assert_eq(sr.to_cupy(na_value=na_value), expected)
-
-
-def test_to_dense_array():
-    rng = np.random.default_rng(seed=0)
-    data = rng.random(8)
-    mask = np.asarray([0b11010110]).astype(np.byte)
-    sr = cudf.Series._from_column(
-        as_column(data, dtype=np.float64).set_mask(mask)
-    )
-    assert sr.has_nulls
-    assert sr.null_count != len(sr)
-    filled = sr.to_numpy(na_value=np.nan)
-    dense = sr.dropna().to_numpy()
-    assert dense.size < filled.size
-    assert filled.size == len(sr)

From 47c5732cdba8fe3cbae3d44648618001b89f0fc2 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Wed, 30 Jul 2025 17:16:50 -0700
Subject: [PATCH 031/366] Get rid of CG logic in the mixed semi-join kernel
 (#19536)

This PR simplifies the mixed semi-join kernel by removing CG-related logic, as the underlying code path is only ever used with a CG size of 1.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19536
---
 cpp/src/join/mixed_join_kernels_semi.cu | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
index 4c063b6202e..3cf081e5ded 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -23,13 +23,9 @@
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <cub/cub.cuh>
-
 namespace cudf {
 namespace detail {
 
-namespace cg = cooperative_groups;
-
 template <cudf::size_type block_size, bool has_nulls>
 CUDF_KERNEL void __launch_bounds__(block_size)
   mixed_join_semi(table_device_view left_table,
@@ -41,10 +37,6 @@ CUDF_KERNEL void __launch_bounds__(block_size)
                   cudf::device_span<bool> left_table_keep_mask,
                   cudf::ast::detail::expression_device_view device_expression_data)
 {
-  auto constexpr cg_size = hash_set_ref_type::cg_size;
-
-  auto const tile = cg::tiled_partition<cg_size>(cg::this_thread_block());
-
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is
   // used to circumvent conflicts between arrays of different types between
@@ -53,7 +45,7 @@ CUDF_KERNEL void __launch_bounds__(block_size)
   auto intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
   auto thread_intermediate_storage =
-    intermediate_storage + (tile.meta_group_rank() * device_expression_data.num_intermediates);
+    intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates);
 
   // Equality evaluator to use
   auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
@@ -69,15 +61,13 @@ CUDF_KERNEL void __launch_bounds__(block_size)
 
   // Total number of rows to query the set
   auto const outer_num_rows = left_table.num_rows();
-  // Grid stride for the tile
-  auto const cg_grid_stride = cudf::detail::grid_1d::grid_stride<block_size>() / cg_size;
+  auto const grid_stride    = cudf::detail::grid_1d::grid_stride<block_size>();
 
   // Find all the rows in the left table that are in the hash table
-  for (auto outer_row_index = cudf::detail::grid_1d::global_thread_id<block_size>() / cg_size;
+  for (auto outer_row_index = cudf::detail::grid_1d::global_thread_id<block_size>();
        outer_row_index < outer_num_rows;
-       outer_row_index += cg_grid_stride) {
-    auto const result = set_ref_equality.contains(tile, outer_row_index);
-    if (tile.thread_rank() == 0) { left_table_keep_mask[outer_row_index] = result; }
+       outer_row_index += grid_stride) {
+    left_table_keep_mask[outer_row_index] = set_ref_equality.contains(outer_row_index);
   }
 }
 

From 05b83e5dade9c0515e7c1c1821edcc6959e24618 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 30 Jul 2025 18:37:36 -0700
Subject: [PATCH 032/366] Move timeout in cudf.pandas pandas unit tests script
 to ci script (#19542)

When running `run-pandas-tests.sh` to debug pandas unit tests, it seems like setting `breakpoint()`s does not drop into pdb (seems to hang) due to the `timeout` command. Additionally, I think we're more concerned about timeouts when running in CI than locally, so moving this `timeout` command to the CI script instead.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cudf/pull/19542
---
 ci/cudf_pandas_scripts/pandas-tests/run.sh          | 2 +-
 python/cudf/cudf/pandas/scripts/run-pandas-tests.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
index 74d1fc4bdaf..25ce70da01f 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/run.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -33,7 +33,7 @@ RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
-bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
+timeout 90m bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   --numprocesses 5 \
   --tb=line \
   -vv \
diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
index ccf33ea6ee8..7ceff137df7 100755
--- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
+++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
@@ -155,7 +155,7 @@ PYTEST_IGNORES=("--ignore=tests/io/parser/common/test_read_errors.py"
 )
 
 
-PANDAS_CI="1" timeout 90m python -m pytest -p cudf.pandas \
+PANDAS_CI="1" python -m pytest -p cudf.pandas \
     --import-mode=importlib \
     -k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and $TEST_THAT_NEED_REASON_TO_SKIP and $TEST_THAT_USE_STRING_DTYPE_GROUPBY and $TEST_THAT_USE_WEAKREFS" \
     "${PYTEST_IGNORES[@]}" \

From 39a09ca7b9c4da3118e81aea0fe213dda195ab1b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 30 Jul 2025 18:38:46 -0700
Subject: [PATCH 033/366] Move test_avro/test_api_types.py and some DataFrame
 tests to new cudf classic test directory structure (#19490)

Towards https://github.com/rapidsai/cudf/issues/9999

Also starts a `test/dataframe` structure similar to https://github.com/rapidsai/cudf/pull/19485

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19490
---
 python/cudf/cudf/tests/conftest.py            |  28 +
 .../cudf/tests/dataframe/methods/__init__.py  |   0
 .../{ => dataframe/methods}/test_applymap.py  |   0
 .../test_convert_dtypes.py}                   |   0
 .../methods/test_nlargest_nsmallest.py        |  57 ++
 .../dataframe/methods/test_scatter_by_map.py  |  94 +++
 .../dataframe/methods/test_sort_values.py     | 194 ++++++
 .../tests/dataframe/methods/test_to_arrow.py  |  15 +
 .../cudf/tests/dataframe/test_attributes.py   |   1 -
 .../tests/dataframe/test_binary_operations.py |   1 -
 .../cudf/tests/dataframe/test_combining.py    |   1 -
 .../cudf/tests/dataframe/test_computation.py  |   1 -
 .../cudf/tests/dataframe/test_constructing.py |   1 -
 .../dataframe/test_function_application.py    |   1 -
 .../cudf/tests/dataframe/test_indexing.py     |   1 -
 .../tests/dataframe/test_io_serialization.py  |  50 --
 .../cudf/cudf/tests/dataframe/test_missing.py |   1 -
 .../cudf/tests/dataframe/test_reindexing.py   |   1 -
 .../cudf/tests/dataframe/test_reshaping.py    |   1 -
 .../cudf/tests/dataframe/test_selecting.py    |   1 -
 .../cudf/cudf/tests/dataframe/test_sorting.py |   1 -
 .../cudf/tests/dataframe/test_timeseries.py   |   1 -
 .../{ => general_functions}/test_api_types.py |   0
 .../general_functions/test_conversion.py      |   1 -
 .../test_data_manipulation.py                 |   1 -
 .../general_functions/test_datetimelike.py    |   1 -
 .../cudf/cudf/tests/input_output/test_avro.py | 658 +++++++++++++++++-
 .../cudf/tests/input_output/test_parquet.py   |  35 +
 .../test_avro_reader_fastavro_integration.py  | 658 ------------------
 python/cudf/cudf/tests/test_sorting.py        | 325 +--------
 30 files changed, 1081 insertions(+), 1049 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/__init__.py
 rename python/cudf/cudf/tests/{ => dataframe/methods}/test_applymap.py (100%)
 rename python/cudf/cudf/tests/dataframe/{test_conversion.py => methods/test_convert_dtypes.py} (100%)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_scatter_by_map.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_attributes.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_binary_operations.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_combining.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_computation.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_constructing.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_function_application.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_indexing.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_io_serialization.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_missing.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_reindexing.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_reshaping.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_selecting.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_sorting.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_timeseries.py
 rename python/cudf/cudf/tests/{ => general_functions}/test_api_types.py (100%)
 delete mode 100644 python/cudf/cudf/tests/general_functions/test_conversion.py
 delete mode 100644 python/cudf/cudf/tests/general_functions/test_data_manipulation.py
 delete mode 100644 python/cudf/cudf/tests/general_functions/test_datetimelike.py
 delete mode 100644 python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 0a57efe2fbd..9d43aeff1fd 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -234,6 +234,14 @@ def integer_types_as_str(request):
     return request.param
 
 
+@pytest.fixture(params=float_types)
+def float_types_as_str(request):
+    """
+    - "float32", "float64"
+    """
+    return request.param
+
+
 @pytest.fixture(
     params=signed_integer_types + unsigned_integer_types + float_types
 )
@@ -287,6 +295,26 @@ def temporal_types_as_str(request):
     return request.param
 
 
+@pytest.fixture(
+    params=signed_integer_types
+    + unsigned_integer_types
+    + float_types
+    + bool_types
+    + datetime_types
+    + timedelta_types
+)
+def numeric_and_temporal_types_as_str(request):
+    """
+    - "int8", "int16", "int32", "int64"
+    - "uint8", "uint16", "uint32", "uint64"
+    - "float32", "float64"
+    - "bool"
+    - "datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"
+    - "timedelta64[ns]", "timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"
+    """
+    return request.param
+
+
 @pytest.fixture(
     params=signed_integer_types
     + unsigned_integer_types
diff --git a/python/cudf/cudf/tests/dataframe/methods/__init__.py b/python/cudf/cudf/tests/dataframe/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/dataframe/methods/test_applymap.py
similarity index 100%
rename from python/cudf/cudf/tests/test_applymap.py
rename to python/cudf/cudf/tests/dataframe/methods/test_applymap.py
diff --git a/python/cudf/cudf/tests/dataframe/test_conversion.py b/python/cudf/cudf/tests/dataframe/methods/test_convert_dtypes.py
similarity index 100%
rename from python/cudf/cudf/tests/dataframe/test_conversion.py
rename to python/cudf/cudf/tests/dataframe/methods/test_convert_dtypes.py
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py b/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py
new file mode 100644
index 00000000000..6c148cef4a6
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from cudf import DataFrame
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("n", [10, 5])
+@pytest.mark.parametrize("op", ["nsmallest", "nlargest"])
+@pytest.mark.parametrize("columns", ["a", ["b", "a"]])
+def test_dataframe_nlargest_nsmallest(n, op, columns):
+    nelem = 10
+    rng = np.random.default_rng(seed=0)
+    aa = rng.random(nelem)
+    bb = rng.random(nelem)
+
+    df = DataFrame({"a": aa, "b": bb})
+    pdf = df.to_pandas()
+    assert_eq(getattr(df, op)(n, columns), getattr(pdf, op)(n, columns))
+
+
+@pytest.mark.parametrize(
+    "sliceobj", [slice(1, None), slice(None, -1), slice(1, -1)]
+)
+def test_dataframe_nlargest_sliced(sliceobj):
+    nelem = 20
+    n = 10
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame()
+    df["a"] = rng.random(nelem)
+    df["b"] = rng.random(nelem)
+
+    expect = df[sliceobj].nlargest(n, "a")
+    gdf = DataFrame.from_pandas(df)
+    got = gdf[sliceobj].nlargest(n, "a")
+    assert (got.to_pandas() == expect).all().all()
+
+
+@pytest.mark.parametrize(
+    "sliceobj", [slice(1, None), slice(None, -1), slice(1, -1)]
+)
+def test_dataframe_nsmallest_sliced(sliceobj):
+    nelem = 20
+    n = 10
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame()
+    df["a"] = rng.random(nelem)
+    df["b"] = rng.random(nelem)
+
+    expect = df[sliceobj].nsmallest(n, "a")
+    gdf = DataFrame.from_pandas(df)
+    got = gdf[sliceobj].nsmallest(n, "a")
+    assert (got.to_pandas() == expect).all().all()
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_scatter_by_map.py b/python/cudf/cudf/tests/dataframe/methods/test_scatter_by_map.py
new file mode 100644
index 00000000000..b6897b3c052
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_scatter_by_map.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pytest
+
+from cudf import DataFrame
+from cudf.core.column import NumericalColumn
+
+
+@pytest.mark.parametrize("map_size", [1, 8])
+@pytest.mark.parametrize("nelem", [1, 10])
+@pytest.mark.parametrize("keep", [True, False])
+def test_dataframe_scatter_by_map(map_size, nelem, keep):
+    strlist = ["dog", "cat", "fish", "bird", "pig", "fox", "cow", "goat"]
+    rng = np.random.default_rng(seed=0)
+    df = DataFrame(
+        {
+            "a": rng.choice(strlist[:map_size], nelem),
+            "b": rng.uniform(low=0, high=map_size, size=nelem),
+            "c": rng.integers(map_size, size=nelem),
+        }
+    )
+    df["d"] = df["a"].astype("category")
+
+    def _check_scatter_by_map(dfs, col):
+        assert len(dfs) == map_size
+        nrows = 0
+        name = col.name
+        for i, df in enumerate(dfs):
+            nrows += len(df)
+            if len(df) > 0:
+                # Make sure the column types were preserved
+                assert isinstance(df[name]._column, type(col._column))
+            try:
+                sr = df[name].astype(np.int32)
+            except ValueError:
+                sr = df[name]
+            assert sr.nunique() <= 1
+            if sr.nunique() == 1:
+                if isinstance(df[name]._column, NumericalColumn):
+                    assert sr.iloc[0] == i
+        assert nrows == nelem
+
+    with pytest.warns(UserWarning):
+        _check_scatter_by_map(
+            df.scatter_by_map("a", map_size, keep_index=keep), df["a"]
+        )
+    _check_scatter_by_map(
+        df.scatter_by_map("b", map_size, keep_index=keep), df["b"]
+    )
+    _check_scatter_by_map(
+        df.scatter_by_map("c", map_size, keep_index=keep), df["c"]
+    )
+    with pytest.warns(UserWarning):
+        _check_scatter_by_map(
+            df.scatter_by_map("d", map_size, keep_index=keep), df["d"]
+        )
+
+    if map_size == 2 and nelem == 100:
+        with pytest.warns(UserWarning):
+            df.scatter_by_map("a")  # Auto-detect map_size
+        with pytest.raises(ValueError):
+            with pytest.warns(UserWarning):
+                df.scatter_by_map("a", map_size=1, debug=True)  # Bad map_size
+
+    # Test Index
+    df2 = df.set_index("c")
+    generic_result = df2.scatter_by_map("b", map_size, keep_index=keep)
+    _check_scatter_by_map(generic_result, df2["b"])
+    if keep:
+        for frame in generic_result:
+            assert isinstance(frame.index, type(df2.index))
+
+    # Test MultiIndex
+    df2 = df.set_index(["a", "c"])
+    multiindex_result = df2.scatter_by_map("b", map_size, keep_index=keep)
+    _check_scatter_by_map(multiindex_result, df2["b"])
+    if keep:
+        for frame in multiindex_result:
+            assert isinstance(frame.index, type(df2.index))
+
+
+@pytest.mark.parametrize("ids", [[-1, 0, 1, 0], [0, 2, 3, 0]])
+def test_dataframe_scatter_by_map_7513(ids):
+    df = DataFrame({"id": ids, "val": [0, 1, 2, 3]})
+    with pytest.raises(ValueError):
+        df.scatter_by_map(df["id"])
+
+
+def test_dataframe_scatter_by_map_empty():
+    df = DataFrame({"a": [], "b": []}, dtype="float64")
+    scattered = df.scatter_by_map(df["a"])
+    assert len(scattered) == 0
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py b/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
new file mode 100644
index 00000000000..1c322ff67af
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
@@ -0,0 +1,194 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import string
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from cudf import DataFrame, option_context
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+    expect_warning_if,
+)
+
+
+def test_dataframe_sort_values(numeric_types_as_str):
+    nelem = 25
+    rng = np.random.default_rng(seed=0)
+    df = DataFrame()
+    df["a"] = aa = (100 * rng.random(nelem)).astype(numeric_types_as_str)
+    df["b"] = bb = (100 * rng.random(nelem)).astype(numeric_types_as_str)
+    sorted_df = df.sort_values(by="a")
+    # Check
+    sorted_index = np.argsort(aa, kind="mergesort")
+    assert_eq(sorted_df.index.values, sorted_index)
+    assert_eq(sorted_df["a"].values, aa[sorted_index])
+    assert_eq(sorted_df["b"].values, bb[sorted_index])
+
+
+def test_sort_values_nans_pandas_compat():
+    data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
+    with option_context("mode.pandas_compatible", True):
+        result = DataFrame(data).sort_values("b", na_position="first")
+    expected = pd.DataFrame(data).sort_values("b", na_position="first")
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("index", ["a", "b", ["a", "b"]])
+def test_dataframe_sort_values_ignore_index(index, ignore_index):
+    if (
+        PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
+        and isinstance(index, list)
+        and not ignore_index
+    ):
+        pytest.skip(
+            reason="Unstable sorting by pandas(numpy): https://github.com/pandas-dev/pandas/issues/57531"
+        )
+
+    gdf = DataFrame(
+        {"a": [1, 3, 5, 2, 4], "b": [1, 1, 2, 2, 3], "c": [9, 7, 7, 7, 1]}
+    )
+    gdf = gdf.set_index(index)
+
+    pdf = gdf.to_pandas()
+
+    expect = pdf.sort_values(list(pdf.columns), ignore_index=ignore_index)
+    got = gdf.sort_values((gdf.columns), ignore_index=ignore_index)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "sliceobj", [slice(1, None), slice(None, -1), slice(1, -1)]
+)
+def test_dataframe_sort_values_sliced(sliceobj):
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame({"a": rng.random(20)})
+
+    expect = df[sliceobj]["a"].sort_values()
+    gdf = DataFrame.from_pandas(df)
+    got = gdf[sliceobj]["a"].sort_values()
+    assert (got.to_pandas() == expect).all()
+
+
+@pytest.mark.parametrize("num_rows", [0, 5])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_dataframe_multi_column(
+    num_rows, numeric_and_temporal_types_as_str, ascending, na_position
+):
+    num_cols = 5
+    rng = np.random.default_rng(seed=0)
+    by = list(string.ascii_lowercase[:num_cols])
+    pdf = pd.DataFrame()
+
+    for i in range(5):
+        colname = string.ascii_lowercase[i]
+        data = rng.integers(0, 26, num_rows).astype(
+            numeric_and_temporal_types_as_str
+        )
+        pdf[colname] = data
+
+    gdf = DataFrame.from_pandas(pdf)
+
+    got = gdf.sort_values(by, ascending=ascending, na_position=na_position)
+    expect = pdf.sort_values(by, ascending=ascending, na_position=na_position)
+
+    assert_eq(
+        got[by].reset_index(drop=True), expect[by].reset_index(drop=True)
+    )
+
+
+@pytest.mark.parametrize("num_rows", [0, 3])
+@pytest.mark.parametrize("nulls", ["some", "all"])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_dataframe_multi_column_nulls(
+    num_rows, float_types_as_str, nulls, ascending, na_position
+):
+    num_cols = 2
+    rng = np.random.default_rng(seed=0)
+    by = list(string.ascii_lowercase[:num_cols])
+    pdf = pd.DataFrame()
+
+    for colname in string.ascii_lowercase[:3]:
+        data = rng.integers(0, 26, num_rows).astype(float_types_as_str)
+        if nulls == "some":
+            idx = np.array([], dtype="int64")
+            if num_rows > 0:
+                idx = rng.choice(
+                    num_rows, size=int(num_rows / 4), replace=False
+                )
+            data[idx] = np.nan
+        elif nulls == "all":
+            data[:] = np.nan
+        pdf[colname] = data
+
+    gdf = DataFrame.from_pandas(pdf)
+
+    got = gdf.sort_values(by, ascending=ascending, na_position=na_position)
+    expect = pdf.sort_values(by, ascending=ascending, na_position=na_position)
+
+    assert_eq(
+        got[by].reset_index(drop=True), expect[by].reset_index(drop=True)
+    )
+
+
+@pytest.mark.parametrize("ascending1", [True, False])
+@pytest.mark.parametrize("ascending2", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_dataframe_multi_column_nulls_multiple_ascending(
+    ascending1, ascending2, na_position
+):
+    ascending = (ascending1, ascending2)
+    pdf = pd.DataFrame(
+        {"a": [3, 1, None, 2, 2, None, 1], "b": [1, 2, 3, 4, 5, 6, 7]}
+    )
+    gdf = DataFrame.from_pandas(pdf)
+    expect = pdf.sort_values(
+        by=["a", "b"], ascending=ascending, na_position=na_position
+    )
+    actual = gdf.sort_values(
+        by=["a", "b"], ascending=ascending, na_position=na_position
+    )
+
+    assert_eq(actual, expect)
+
+
+@pytest.mark.parametrize(
+    "kind", ["quicksort", "mergesort", "heapsort", "stable"]
+)
+def test_dataframe_sort_values_kind(numeric_types_as_str, kind):
+    nelem = 20
+    rng = np.random.default_rng(seed=0)
+    df = DataFrame()
+    df["a"] = aa = (100 * rng.random(nelem)).astype(numeric_types_as_str)
+    df["b"] = bb = (100 * rng.random(nelem)).astype(numeric_types_as_str)
+    with expect_warning_if(kind != "quicksort", UserWarning):
+        sorted_df = df.sort_values(by="a", kind=kind)
+    # Check
+    sorted_index = np.argsort(aa, kind="mergesort")
+    assert_eq(sorted_df.index.values, sorted_index)
+    assert_eq(sorted_df["a"].values, aa[sorted_index])
+    assert_eq(sorted_df["b"].values, bb[sorted_index])
+
+
+def test_sort_values_by_index_level():
+    df = pd.DataFrame({"a": [1, 3, 2]}, index=pd.Index([1, 3, 2], name="b"))
+    cudf_df = DataFrame.from_pandas(df)
+    result = cudf_df.sort_values("b")
+    expected = df.sort_values("b")
+    assert_eq(result, expected)
+
+
+def test_sort_values_by_ambiguous():
+    df = pd.DataFrame({"a": [1, 3, 2]}, index=pd.Index([1, 3, 2], name="a"))
+    cudf_df = DataFrame.from_pandas(df)
+
+    assert_exceptions_equal(
+        lfunc=df.sort_values,
+        rfunc=cudf_df.sort_values,
+        lfunc_args_and_kwargs=(["a"], {}),
+        rfunc_args_and_kwargs=(["a"], {}),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
new file mode 100644
index 00000000000..4bbed8fab9e
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("preserve_index", [False, True, None])
+def test_dataframe_to_arrow_preserve_index(preserve_index):
+    df = cudf.DataFrame({"x": ["cat", "dog"] * 5})
+    pf = df.to_pandas()
+    expect = pa.Table.from_pandas(pf, preserve_index=preserve_index).schema
+    got = df.to_arrow(preserve_index=preserve_index).schema
+    assert expect == got
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_binary_operations.py b/python/cudf/cudf/tests/dataframe/test_binary_operations.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_binary_operations.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_combining.py b/python/cudf/cudf/tests/dataframe/test_combining.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_combining.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_computation.py b/python/cudf/cudf/tests/dataframe/test_computation.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_computation.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_constructing.py b/python/cudf/cudf/tests/dataframe/test_constructing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_constructing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_function_application.py b/python/cudf/cudf/tests/dataframe/test_function_application.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_function_application.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_indexing.py b/python/cudf/cudf/tests/dataframe/test_indexing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_indexing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_io_serialization.py b/python/cudf/cudf/tests/dataframe/test_io_serialization.py
deleted file mode 100644
index 04508dbf2ec..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_io_serialization.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
-from io import BytesIO
-
-import pandas as pd
-import pyarrow as pa
-import pyarrow.parquet as pq
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-@pytest.mark.parametrize(
-    "index",
-    [range(1, 11), list(range(1, 11)), range(1, 11)[::2]],
-    ids=["RangeIndex", "IntIndex", "StridedRange"],
-)
-@pytest.mark.parametrize("write_index", [False, True, None])
-@pytest.mark.parametrize("empty", [False, True], ids=["nonempty", "empty"])
-def test_dataframe_parquet_roundtrip(index, write_index, empty):
-    if empty:
-        data = {}
-    else:
-        data = {"a": [i * 2 for i in index]}
-    df = cudf.DataFrame(data=data, index=index)
-    pf = pd.DataFrame(data=data, index=index)
-    gpu_buf = BytesIO()
-    cpu_buf = BytesIO()
-
-    df.to_parquet(gpu_buf, index=write_index)
-    pf.to_parquet(cpu_buf, index=write_index)
-    gpu_table = pq.read_table(gpu_buf)
-    cpu_table = pq.read_table(cpu_buf)
-    metadata_equal = (
-        gpu_table.schema.pandas_metadata == cpu_table.schema.pandas_metadata
-    )
-    assert metadata_equal
-
-    gpu_read = cudf.read_parquet(gpu_buf)
-    cpu_read = cudf.read_parquet(cpu_buf)
-    assert_eq(gpu_read, cpu_read)
-
-
-@pytest.mark.parametrize("preserve_index", [False, True, None])
-def test_dataframe_to_arrow_preserve_index(preserve_index):
-    df = cudf.DataFrame({"x": ["cat", "dog"] * 5})
-    pf = df.to_pandas()
-    expect = pa.Table.from_pandas(pf, preserve_index=preserve_index).schema
-    got = df.to_arrow(preserve_index=preserve_index).schema
-    assert expect == got
diff --git a/python/cudf/cudf/tests/dataframe/test_missing.py b/python/cudf/cudf/tests/dataframe/test_missing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_missing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_reindexing.py b/python/cudf/cudf/tests/dataframe/test_reindexing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_reindexing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_reshaping.py b/python/cudf/cudf/tests/dataframe/test_reshaping.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_reshaping.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_selecting.py b/python/cudf/cudf/tests/dataframe/test_selecting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_selecting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_sorting.py b/python/cudf/cudf/tests/dataframe/test_sorting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_sorting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/dataframe/test_timeseries.py b/python/cudf/cudf/tests/dataframe/test_timeseries.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_timeseries.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/general_functions/test_api_types.py
similarity index 100%
rename from python/cudf/cudf/tests/test_api_types.py
rename to python/cudf/cudf/tests/general_functions/test_api_types.py
diff --git a/python/cudf/cudf/tests/general_functions/test_conversion.py b/python/cudf/cudf/tests/general_functions/test_conversion.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/general_functions/test_conversion.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/general_functions/test_data_manipulation.py b/python/cudf/cudf/tests/general_functions/test_data_manipulation.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/general_functions/test_data_manipulation.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/general_functions/test_datetimelike.py b/python/cudf/cudf/tests/general_functions/test_datetimelike.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/general_functions/test_datetimelike.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/input_output/test_avro.py b/python/cudf/cudf/tests/input_output/test_avro.py
index 06777c8e6af..43cf83dc050 100644
--- a/python/cudf/cudf/tests/input_output/test_avro.py
+++ b/python/cudf/cudf/tests/input_output/test_avro.py
@@ -1 +1,657 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import datetime
+import io
+import pathlib
+
+import fastavro
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing.dataset_generator import rand_dataframe
+
+
+def cudf_from_avro_util(schema: dict, records: list) -> cudf.DataFrame:
+    schema = [] if schema is None else fastavro.parse_schema(schema)
+    buffer = io.BytesIO()
+    fastavro.writer(buffer, schema, records)
+    buffer.seek(0)
+    return cudf.read_avro(buffer)
+
+
+@pytest.fixture(
+    params=[
+        ("boolean", "bool"),
+        ("int", "int32"),
+        ("long", "int64"),
+        ("float", "float32"),
+        ("double", "float64"),
+        ("bytes", "str"),
+        ("string", "str"),
+    ]
+)
+def avro_type_params(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def nullable(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def prepend_null(request):
+    return request.param
+
+
+@pytest.mark.parametrize("namespace", [None, "root_ns"])
+def test_can_detect_dtype_from_avro_type(
+    avro_type_params, namespace, nullable
+):
+    avro_type, expected_dtype = avro_type_params
+    avro_type = avro_type if not nullable else ["null", avro_type]
+
+    schema = fastavro.parse_schema(
+        {
+            "type": "record",
+            "name": "test",
+            "namespace": namespace,
+            "fields": [{"name": "prop", "type": avro_type}],
+        }
+    )
+
+    actual = cudf_from_avro_util(schema, [])
+
+    expected = cudf.DataFrame(
+        {"prop": cudf.Series(None, None, expected_dtype)}
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("namespace", [None, "root_ns"])
+def test_can_detect_dtype_from_avro_type_nested(
+    avro_type_params, namespace, nullable
+):
+    avro_type, expected_dtype = avro_type_params
+    avro_type = avro_type if not nullable else ["null", avro_type]
+
+    schema_leaf = {
+        "name": "leaf",
+        "type": "record",
+        "fields": [{"name": "prop3", "type": avro_type}],
+    }
+
+    schema_child = {
+        "name": "child",
+        "type": "record",
+        "fields": [{"name": "prop2", "type": schema_leaf}],
+    }
+
+    schema_root = {
+        "name": "root",
+        "type": "record",
+        "namespace": namespace,
+        "fields": [{"name": "prop1", "type": schema_child}],
+    }
+
+    actual = cudf_from_avro_util(schema_root, [])
+
+    col_name = "{ns}child.{ns}leaf.prop3".format(
+        ns="" if namespace is None else namespace + "."
+    )
+
+    expected = cudf.DataFrame(
+        {col_name: cudf.Series(None, None, expected_dtype)}
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "avro_type, cudf_type, avro_val, cudf_val",
+    [
+        ("boolean", "bool", True, True),
+        ("boolean", "bool", False, False),
+        ("int", "int32", 1234, 1234),
+        ("long", "int64", 1234, 1234),
+        ("float", "float32", 12.34, 12.34),
+        ("double", "float64", 12.34, 12.34),
+        ("string", "str", "heyϴ", "heyϴ"),
+    ],
+)
+def test_can_parse_single_value(avro_type, cudf_type, avro_val, cudf_val):
+    schema_root = {
+        "name": "root",
+        "type": "record",
+        "fields": [{"name": "prop", "type": ["null", avro_type]}],
+    }
+
+    records = [
+        {"prop": avro_val},
+    ]
+
+    actual = cudf_from_avro_util(schema_root, records)
+
+    expected = cudf.DataFrame(
+        {"prop": cudf.Series(data=[cudf_val], dtype=cudf_type)}
+    )
+
+    assert_eq(expected, actual)
+
+
+def test_can_parse_single_null(avro_type_params):
+    avro_type, expected_dtype = avro_type_params
+    schema_root = {
+        "name": "root",
+        "type": "record",
+        "fields": [{"name": "prop", "type": ["null", avro_type]}],
+    }
+
+    records = [{"prop": None}]
+
+    actual = cudf_from_avro_util(schema_root, records)
+
+    expected = cudf.DataFrame(
+        {"prop": cudf.Series(data=[None], dtype=expected_dtype)}
+    )
+
+    assert_eq(expected, actual)
+
+
+def test_can_parse_no_data(avro_type_params):
+    avro_type, expected_dtype = avro_type_params
+    schema_root = {
+        "name": "root",
+        "type": "record",
+        "fields": [{"name": "prop", "type": ["null", avro_type]}],
+    }
+
+    records = []
+
+    actual = cudf_from_avro_util(schema_root, records)
+
+    expected = cudf.DataFrame(
+        {"prop": cudf.Series(data=[], dtype=expected_dtype)}
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.xfail(
+    reason="cudf avro reader is unable to parse zero-field metadata."
+)
+def test_can_parse_no_fields(avro_type_params):
+    avro_type, expected_dtype = avro_type_params
+    schema_root = {
+        "name": "root",
+        "type": "record",
+        "fields": [],
+    }
+
+    records = []
+
+    actual = cudf_from_avro_util(schema_root, records)
+
+    expected = cudf.DataFrame()
+
+    assert_eq(expected, actual)
+
+
+def test_can_parse_no_schema():
+    schema_root = None
+    records = []
+    actual = cudf_from_avro_util(schema_root, records)
+    expected = cudf.DataFrame()
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("rows", [0, 20])
+@pytest.mark.parametrize("codec", ["null", "deflate", "snappy"])
+def test_avro_decompression(set_decomp_env_vars, rows, codec):
+    schema = {
+        "name": "root",
+        "type": "record",
+        "fields": [
+            {"name": "0", "type": "int"},
+            {"name": "1", "type": "string"},
+        ],
+    }
+
+    # N.B. rand_dataframe() is brutally slow for some reason.  Switching to
+    #      np.random() speeds things up by a factor of 10.
+    #      See also: https://github.com/rapidsai/cudf/issues/13128
+    df = rand_dataframe(
+        [
+            {"dtype": "int32", "null_frequency": 0, "cardinality": 1000},
+            {
+                "dtype": "str",
+                "null_frequency": 0,
+                "cardinality": 100,
+                "max_string_length": 10,
+            },
+        ],
+        rows,
+        seed=0,
+        use_threads=False,
+    )
+    expected_df = cudf.DataFrame.from_arrow(df)
+
+    records = df.to_pandas().to_dict(orient="records")
+
+    buffer = io.BytesIO()
+    fastavro.writer(buffer, schema, records, codec=codec)
+    buffer.seek(0)
+    got_df = cudf.read_avro(buffer)
+
+    assert_eq(expected_df, got_df)
+
+
+@pytest.mark.parametrize("namespace", [None, "root_ns"])
+def test_can_detect_dtypes_from_avro_logical_type(
+    namespace,
+    nullable,
+    prepend_null,
+):
+    logical_type = "date"
+    primitive_type = "int"
+    expected_dtype = "datetime64[s]"
+    avro_type = [{"logicalType": logical_type, "type": primitive_type}]
+    if nullable:
+        if prepend_null:
+            avro_type.insert(0, "null")
+        else:
+            avro_type.append("null")
+
+    schema = fastavro.parse_schema(
+        {
+            "type": "record",
+            "name": "test",
+            "namespace": namespace,
+            "fields": [{"name": "prop", "type": avro_type}],
+        }
+    )
+
+    actual = cudf_from_avro_util(schema, [])
+
+    expected = cudf.DataFrame(
+        {"prop": cudf.Series(None, None, expected_dtype)}
+    )
+
+    assert_eq(expected, actual)
+
+
+def get_days_from_epoch(date: datetime.date | None) -> int | None:
+    if date is None:
+        return None
+    return (date - datetime.date(1970, 1, 1)).days
+
+
+@pytest.mark.parametrize("namespace", [None, "root_ns"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas (datetime(9999, ...) too large)",
+)
+def test_can_parse_avro_date_logical_type(namespace, nullable, prepend_null):
+    avro_type = {"logicalType": "date", "type": "int"}
+    if nullable:
+        if prepend_null:
+            avro_type = ["null", avro_type]
+        else:
+            avro_type = [avro_type, "null"]
+
+    schema_dict = {
+        "type": "record",
+        "name": "test",
+        "fields": [
+            {"name": "o_date", "type": avro_type},
+        ],
+    }
+
+    if namespace:
+        schema_dict["namespace"] = namespace
+
+    schema = fastavro.parse_schema(schema_dict)
+
+    # Insert some None values in no particular order.  These will get converted
+    # into avro "nulls" by the fastavro writer (or filtered out if we're not
+    # nullable).  The first and last dates are epoch min/max values, the rest
+    # are arbitrarily chosen.
+    dates = [
+        None,
+        datetime.date(1970, 1, 1),
+        datetime.date(1970, 1, 2),
+        datetime.date(1981, 10, 25),
+        None,
+        None,
+        datetime.date(2012, 5, 18),
+        None,
+        datetime.date(2019, 9, 3),
+        None,
+        datetime.date(9999, 12, 31),
+    ]
+
+    if not nullable:
+        dates = [date for date in dates if date is not None]
+
+    days_from_epoch = [get_days_from_epoch(date) for date in dates]
+
+    records = [{"o_date": day} for day in days_from_epoch]
+
+    actual = cudf_from_avro_util(schema, records)
+
+    expected = cudf.DataFrame(
+        {"o_date": cudf.Series(dates, dtype="datetime64[s]")}
+    )
+
+    assert_eq(expected, actual)
+
+
+def test_alltypes_plain_avro():
+    # During development of the logical type support, the Java avro tests were
+    # triggering CUDA kernel crashes (null pointer dereferences).  We were able
+    # to replicate the behavior in a C++ test case, and then subsequently came
+    # up with this Python unit test to also trigger the problematic code path.
+    #
+    # So, unlike the other tests, this test is inherently reactive in nature,
+    # added simply to verify we fixed the problematic code path that was
+    # causing CUDA kernel crashes.
+    #
+    # See https://github.com/rapidsai/cudf/pull/12788#issuecomment-1468822875
+    # for more information.
+    relpath = "../../../../java/src/test/resources/alltypes_plain.avro"
+    path = pathlib.Path(__file__).parent.parent.joinpath(relpath).resolve()
+    assert path.is_file(), path
+    path = str(path)
+
+    with open(path, "rb") as f:
+        reader = fastavro.reader(f)
+        records = [record for record in reader]
+
+    # For reference:
+    #
+    # >>> from pprint import pprint
+    # >>> pprint(reader.writer_schema)
+    # {'fields': [{'name': 'id', 'type': ['int', 'null']},
+    #             {'name': 'bool_col', 'type': ['boolean', 'null']},
+    #             {'name': 'tinyint_col', 'type': ['int', 'null']},
+    #             {'name': 'smallint_col', 'type': ['int', 'null']},
+    #             {'name': 'int_col', 'type': ['int', 'null']},
+    #             {'name': 'bigint_col', 'type': ['long', 'null']},
+    #             {'name': 'float_col', 'type': ['float', 'null']},
+    #             {'name': 'double_col', 'type': ['double', 'null']},
+    #             {'name': 'date_string_col', 'type': ['bytes', 'null']},
+    #             {'name': 'string_col', 'type': ['bytes', 'null']},
+    #             {'name': 'timestamp_col',
+    #              'type': [{'logicalType': 'timestamp-micros',
+    #                        'type': 'long'},
+    #                       'null']}],
+    #  'name': 'topLevelRecord',
+    #  'type': 'record'}
+    #
+    # >>> pprint(records[0])
+    # {'bigint_col': 0,
+    #  'bool_col': True,
+    #  'date_string_col': b'03/01/09',
+    #  'double_col': 0.0,
+    #  'float_col': 0.0,
+    #  'id': 4,
+    #  'int_col': 0,
+    #  'smallint_col': 0,
+    #  'string_col': b'0',
+    #  'timestamp_col': datetime.datetime(2009, 3, 1, 0, 0,
+    #                                     tzinfo=datetime.timezone.utc),
+    #  'tinyint_col': 0}
+
+    # Nothing particularly special about these columns, other than them being
+    # the ones that @davidwendt used to coerce the crash.
+    columns = ["bool_col", "int_col", "timestamp_col"]
+
+    # This next line would trigger the fatal CUDA kernel crash.
+    actual = cudf.read_avro(path, columns=columns)
+
+    # If we get here, we haven't crashed, obviously.  Verify the returned data
+    # frame meets our expectations.  We need to fiddle with the dtypes of the
+    # expected data frame in order to correctly match the schema definition and
+    # our corresponding read_avro()-returned data frame.
+
+    data = [{column: row[column] for column in columns} for row in records]
+
+    # discard timezone information as we don't support it:
+    expected = pd.DataFrame(data)
+    expected["timestamp_col"].dt.tz_localize(None)
+
+    # The fastavro.reader supports the `'logicalType': 'timestamp-micros'` used
+    # by the 'timestamp_col' column, which is converted into Python
+    # datetime.datetime() objects (see output of pprint(records[0]) above).
+    # As we don't support that logical type yet in cudf, we need to convert to
+    # int64, then divide by 1000 to convert from nanoseconds to microseconds.
+    timestamps = expected["timestamp_col"].astype("int64")
+    timestamps //= 1000
+    expected["timestamp_col"] = timestamps
+
+    # Furthermore, we need to force the 'int_col' into an int32, per the schema
+    # definition.  (It ends up as an int64 due to cudf.DataFrame() defaulting
+    # all Python int values to int64 sans a dtype= override.)
+    expected["int_col"] = expected["int_col"].astype("int32")
+
+    assert_eq(actual, expected)
+
+
+def multiblock_testname_ids(param):
+    (total_rows, num_rows, skip_rows, sync_interval) = param
+    return f"{total_rows=}-{num_rows=}-{skip_rows=}-{sync_interval=}"
+
+
+# The following values are used to test various boundary conditions associated
+# with multiblock avro files.  Each tuple consists of four values: total number
+# of rows to generate, number of rows to limit the result set to, number of
+# rows to skip, and number of rows per block.  If the total number of rows and
+# number of rows (i.e. first and second tuple elements) are equal, it means
+# that all rows will be returned.  If the rows per block also equals the first
+# two numbers, it means that a single block will be used.
+@pytest.fixture(
+    ids=multiblock_testname_ids,
+    params=[
+        (10, 10, 9, 9),
+        (10, 10, 9, 5),
+        (10, 10, 9, 3),
+        (10, 10, 9, 2),
+        (10, 10, 9, 10),
+        (10, 10, 8, 2),
+        (10, 10, 5, 5),
+        (10, 10, 2, 9),
+        (10, 10, 2, 2),
+        (10, 10, 1, 9),
+        (10, 10, 1, 5),
+        (10, 10, 1, 2),
+        (10, 10, 1, 10),
+        (10, 10, 10, 9),
+        (10, 10, 10, 5),
+        (10, 10, 10, 2),
+        (10, 10, 10, 10),
+        (10, 10, 0, 9),
+        (10, 10, 0, 5),
+        (10, 10, 0, 2),
+        (10, 10, 0, 10),
+        (100, 100, 99, 10),
+        (100, 100, 90, 90),
+        (100, 100, 90, 89),
+        (100, 100, 90, 88),
+        (100, 100, 90, 87),
+        (100, 100, 90, 5),
+        (100, 100, 89, 90),
+        (100, 100, 87, 90),
+        (100, 100, 50, 7),
+        (100, 100, 50, 31),
+        (10, 1, 8, 9),
+        (100, 1, 99, 10),
+        (100, 1, 98, 10),
+        (100, 1, 97, 10),
+        (100, 3, 90, 87),
+        (100, 4, 90, 5),
+        (100, 2, 89, 90),
+        (100, 9, 87, 90),
+        (100, 20, 50, 7),
+        (100, 10, 50, 31),
+        (100, 20, 50, 31),
+        (100, 30, 50, 31),
+        (256, 256, 0, 256),
+        (256, 256, 0, 32),
+        (256, 256, 0, 31),
+        (256, 256, 0, 33),
+        (256, 256, 31, 32),
+        (256, 256, 32, 31),
+        (256, 256, 31, 33),
+        (512, 512, 0, 32),
+        (512, 512, 0, 31),
+        (512, 512, 0, 33),
+        (512, 512, 31, 32),
+        (512, 512, 32, 31),
+        (512, 512, 31, 33),
+        (1024, 1024, 0, 1),
+        (1024, 1024, 0, 3),
+        (1024, 1024, 0, 7),
+        (1024, 1024, 0, 8),
+        (1024, 1024, 0, 9),
+        (1024, 1024, 0, 15),
+        (1024, 1024, 0, 16),
+        (1024, 1024, 0, 17),
+        (1024, 1024, 0, 32),
+        (1024, 1024, 0, 31),
+        (1024, 1024, 0, 33),
+        (1024, 1024, 31, 32),
+        (1024, 1024, 32, 31),
+        (1024, 1024, 31, 33),
+        (2048, 2048, 0, 31),
+        (2048, 2048, 0, 32),
+        (2048, 2048, 0, 33),
+        (2048, 2048, 0, 2048),
+    ],
+)
+def total_rows_and_num_rows_and_skip_rows_and_rows_per_block(request):
+    return request.param
+
+
+# N.B. The float32 and float64 types are chosen specifically to exercise
+#      the only path in the avro reader GPU code that can process multiple
+#      rows in parallel (via warp-level parallelism).  See the logic around
+#      the line `if (cur + min_row_size * rows_remaining == end)` in
+#      gpuDecodeAvroColumnData().
+@pytest.mark.parametrize("dtype", ["str", "float32", "float64"])
+@pytest.mark.parametrize(
+    "use_sync_interval",
+    [True, False],
+    ids=["use_sync_interval", "ignore_sync_interval"],
+)
+@pytest.mark.parametrize("codec", ["null", "deflate", "snappy"])
+def test_avro_reader_multiblock(
+    dtype,
+    codec,
+    use_sync_interval,
+    total_rows_and_num_rows_and_skip_rows_and_rows_per_block,
+):
+    (
+        total_rows,
+        num_rows,
+        skip_rows,
+        rows_per_block,
+    ) = total_rows_and_num_rows_and_skip_rows_and_rows_per_block
+
+    assert total_rows >= num_rows
+    assert rows_per_block <= total_rows
+
+    limit_rows = num_rows != total_rows
+    if limit_rows:
+        assert total_rows >= num_rows + skip_rows
+
+    if dtype == "str":
+        avro_type = "string"
+
+        # Generate a list of strings, each of which is a 6-digit number, padded
+        # with leading zeros.  This data set was very useful during development
+        # of the multiblock avro reader logic, as you get implicit feedback as
+        # to what may have gone wrong when the test fails, based on the
+        # expected vs actual values.
+        values = [f"{i:0>6}" for i in range(0, total_rows)]
+
+        # Strings are encoded in avro with a zigzag-encoded length prefix, and
+        # then the string data.  As all of our strings are fixed at length 6,
+        # we only need one byte to encode the length prefix (0xc).  Thus, our
+        # bytes per row is 6 + 1 = 7.
+        bytes_per_row = len(values[0]) + 1
+        assert bytes_per_row == 7, bytes_per_row
+    else:
+        assert dtype in ("float32", "float64")
+        avro_type = "float" if dtype == "float32" else "double"
+        rng = np.random.default_rng(seed=0)
+        # We don't use rand_dataframe() here, because it increases the
+        # execution time of each test by a factor of 10 or more (it appears
+        # to use a very costly approach to generating random data).
+        # See also: https://github.com/rapidsai/cudf/issues/13128
+        values = rng.random(total_rows).astype(dtype)
+        bytes_per_row = values.dtype.itemsize
+
+    # The sync_interval is the number of bytes between sync blocks.  We know
+    # how many bytes we need per row, so we can calculate the number of bytes
+    # per block by multiplying the number of rows per block by the bytes per
+    # row.  This is the sync interval.
+    total_bytes_per_block = rows_per_block * bytes_per_row
+    sync_interval = total_bytes_per_block
+
+    source_df = cudf.DataFrame({"0": pd.Series(values)})
+
+    if limit_rows:
+        expected_df = source_df[skip_rows : skip_rows + num_rows].reset_index(
+            drop=True
+        )
+    else:
+        expected_df = source_df[skip_rows:].reset_index(drop=True)
+
+    records = source_df.to_pandas().to_dict(orient="records")
+
+    schema = {
+        "name": "root",
+        "type": "record",
+        "fields": [
+            {"name": "0", "type": avro_type},
+        ],
+    }
+
+    if use_sync_interval:
+        kwds = {"sync_interval": sync_interval}
+    else:
+        kwds = {}
+
+    kwds["codec"] = codec
+
+    buffer = io.BytesIO()
+    fastavro.writer(buffer, schema, records, **kwds)
+    buffer.seek(0)
+
+    if not limit_rows:
+        # Explicitly set num_rows to None if we want to read all rows.  This
+        # ensures we exercise the logic behind a read_avro() call where the
+        # caller doesn't specify the number of rows desired (which will be the
+        # most common use case).
+        num_rows = None
+    actual_df = cudf.read_avro(buffer, skiprows=skip_rows, num_rows=num_rows)
+
+    assert_eq(expected_df, actual_df)
diff --git a/python/cudf/cudf/tests/input_output/test_parquet.py b/python/cudf/cudf/tests/input_output/test_parquet.py
index cde1bccf2f7..a377bcac285 100644
--- a/python/cudf/cudf/tests/input_output/test_parquet.py
+++ b/python/cudf/cudf/tests/input_output/test_parquet.py
@@ -1,10 +1,14 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
+from io import BytesIO
+
 import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
+import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 def test_parquet_long_list(tmpdir):
@@ -55,3 +59,34 @@ def test_parquet_long_list(tmpdir):
     actual = cudf.read_parquet(file_name)
     expected = pd.read_parquet(file_name)
     assert actual.to_arrow().equals(pa.Table.from_pandas(expected))
+
+
+@pytest.mark.parametrize(
+    "index",
+    [range(1, 11), list(range(1, 11)), range(1, 11)[::2]],
+    ids=["RangeIndex", "IntIndex", "StridedRange"],
+)
+@pytest.mark.parametrize("write_index", [False, True, None])
+@pytest.mark.parametrize("empty", [False, True], ids=["nonempty", "empty"])
+def test_dataframe_parquet_roundtrip(index, write_index, empty):
+    if empty:
+        data = {}
+    else:
+        data = {"a": [i * 2 for i in index]}
+    df = cudf.DataFrame(data=data, index=index)
+    pf = pd.DataFrame(data=data, index=index)
+    gpu_buf = BytesIO()
+    cpu_buf = BytesIO()
+
+    df.to_parquet(gpu_buf, index=write_index)
+    pf.to_parquet(cpu_buf, index=write_index)
+    gpu_table = pq.read_table(gpu_buf)
+    cpu_table = pq.read_table(cpu_buf)
+    metadata_equal = (
+        gpu_table.schema.pandas_metadata == cpu_table.schema.pandas_metadata
+    )
+    assert metadata_equal
+
+    gpu_read = cudf.read_parquet(gpu_buf)
+    cpu_read = cudf.read_parquet(cpu_buf)
+    assert_eq(gpu_read, cpu_read)
diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
deleted file mode 100644
index 6f66ba79098..00000000000
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ /dev/null
@@ -1,658 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import datetime
-import io
-import pathlib
-
-import fastavro
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import assert_eq
-from cudf.testing.dataset_generator import rand_dataframe
-
-
-def cudf_from_avro_util(schema: dict, records: list) -> cudf.DataFrame:
-    schema = [] if schema is None else fastavro.parse_schema(schema)
-    buffer = io.BytesIO()
-    fastavro.writer(buffer, schema, records)
-    buffer.seek(0)
-    return cudf.read_avro(buffer)
-
-
-@pytest.fixture(
-    params=[
-        ("boolean", "bool"),
-        ("int", "int32"),
-        ("long", "int64"),
-        ("float", "float32"),
-        ("double", "float64"),
-        ("bytes", "str"),
-        ("string", "str"),
-    ]
-)
-def avro_type_params(request):
-    return request.param
-
-
-@pytest.fixture(params=[True, False])
-def nullable(request):
-    return request.param
-
-
-@pytest.fixture(params=[True, False])
-def prepend_null(request):
-    return request.param
-
-
-@pytest.mark.parametrize("namespace", [None, "root_ns"])
-def test_can_detect_dtype_from_avro_type(
-    avro_type_params, namespace, nullable
-):
-    avro_type, expected_dtype = avro_type_params
-    avro_type = avro_type if not nullable else ["null", avro_type]
-
-    schema = fastavro.parse_schema(
-        {
-            "type": "record",
-            "name": "test",
-            "namespace": namespace,
-            "fields": [{"name": "prop", "type": avro_type}],
-        }
-    )
-
-    actual = cudf_from_avro_util(schema, [])
-
-    expected = cudf.DataFrame(
-        {"prop": cudf.Series(None, None, expected_dtype)}
-    )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("namespace", [None, "root_ns"])
-def test_can_detect_dtype_from_avro_type_nested(
-    avro_type_params, namespace, nullable
-):
-    avro_type, expected_dtype = avro_type_params
-    avro_type = avro_type if not nullable else ["null", avro_type]
-
-    schema_leaf = {
-        "name": "leaf",
-        "type": "record",
-        "fields": [{"name": "prop3", "type": avro_type}],
-    }
-
-    schema_child = {
-        "name": "child",
-        "type": "record",
-        "fields": [{"name": "prop2", "type": schema_leaf}],
-    }
-
-    schema_root = {
-        "name": "root",
-        "type": "record",
-        "namespace": namespace,
-        "fields": [{"name": "prop1", "type": schema_child}],
-    }
-
-    actual = cudf_from_avro_util(schema_root, [])
-
-    col_name = "{ns}child.{ns}leaf.prop3".format(
-        ns="" if namespace is None else namespace + "."
-    )
-
-    expected = cudf.DataFrame(
-        {col_name: cudf.Series(None, None, expected_dtype)}
-    )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "avro_type, cudf_type, avro_val, cudf_val",
-    [
-        ("boolean", "bool", True, True),
-        ("boolean", "bool", False, False),
-        ("int", "int32", 1234, 1234),
-        ("long", "int64", 1234, 1234),
-        ("float", "float32", 12.34, 12.34),
-        ("double", "float64", 12.34, 12.34),
-        ("string", "str", "heyϴ", "heyϴ"),
-        # ("bytes", "str", "heyϴ", "heyϴ"),
-    ],
-)
-def test_can_parse_single_value(avro_type, cudf_type, avro_val, cudf_val):
-    schema_root = {
-        "name": "root",
-        "type": "record",
-        "fields": [{"name": "prop", "type": ["null", avro_type]}],
-    }
-
-    records = [
-        {"prop": avro_val},
-    ]
-
-    actual = cudf_from_avro_util(schema_root, records)
-
-    expected = cudf.DataFrame(
-        {"prop": cudf.Series(data=[cudf_val], dtype=cudf_type)}
-    )
-
-    assert_eq(expected, actual)
-
-
-def test_can_parse_single_null(avro_type_params):
-    avro_type, expected_dtype = avro_type_params
-    schema_root = {
-        "name": "root",
-        "type": "record",
-        "fields": [{"name": "prop", "type": ["null", avro_type]}],
-    }
-
-    records = [{"prop": None}]
-
-    actual = cudf_from_avro_util(schema_root, records)
-
-    expected = cudf.DataFrame(
-        {"prop": cudf.Series(data=[None], dtype=expected_dtype)}
-    )
-
-    assert_eq(expected, actual)
-
-
-def test_can_parse_no_data(avro_type_params):
-    avro_type, expected_dtype = avro_type_params
-    schema_root = {
-        "name": "root",
-        "type": "record",
-        "fields": [{"name": "prop", "type": ["null", avro_type]}],
-    }
-
-    records = []
-
-    actual = cudf_from_avro_util(schema_root, records)
-
-    expected = cudf.DataFrame(
-        {"prop": cudf.Series(data=[], dtype=expected_dtype)}
-    )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.xfail(
-    reason="cudf avro reader is unable to parse zero-field metadata."
-)
-def test_can_parse_no_fields(avro_type_params):
-    avro_type, expected_dtype = avro_type_params
-    schema_root = {
-        "name": "root",
-        "type": "record",
-        "fields": [],
-    }
-
-    records = []
-
-    actual = cudf_from_avro_util(schema_root, records)
-
-    expected = cudf.DataFrame()
-
-    assert_eq(expected, actual)
-
-
-def test_can_parse_no_schema():
-    schema_root = None
-    records = []
-    actual = cudf_from_avro_util(schema_root, records)
-    expected = cudf.DataFrame()
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("rows", [0, 1, 10, 1000])
-@pytest.mark.parametrize("codec", ["null", "deflate", "snappy"])
-def test_avro_decompression(set_decomp_env_vars, rows, codec):
-    schema = {
-        "name": "root",
-        "type": "record",
-        "fields": [
-            {"name": "0", "type": "int"},
-            {"name": "1", "type": "string"},
-        ],
-    }
-
-    # N.B. rand_dataframe() is brutally slow for some reason.  Switching to
-    #      np.random() speeds things up by a factor of 10.
-    #      See also: https://github.com/rapidsai/cudf/issues/13128
-    df = rand_dataframe(
-        [
-            {"dtype": "int32", "null_frequency": 0, "cardinality": 1000},
-            {
-                "dtype": "str",
-                "null_frequency": 0,
-                "cardinality": 100,
-                "max_string_length": 10,
-            },
-        ],
-        rows,
-        seed=0,
-        use_threads=False,
-    )
-    expected_df = cudf.DataFrame.from_arrow(df)
-
-    records = df.to_pandas().to_dict(orient="records")
-
-    buffer = io.BytesIO()
-    fastavro.writer(buffer, schema, records, codec=codec)
-    buffer.seek(0)
-    got_df = cudf.read_avro(buffer)
-
-    assert_eq(expected_df, got_df)
-
-
-@pytest.mark.parametrize("namespace", [None, "root_ns"])
-def test_can_detect_dtypes_from_avro_logical_type(
-    namespace,
-    nullable,
-    prepend_null,
-):
-    logical_type = "date"
-    primitive_type = "int"
-    expected_dtype = "datetime64[s]"
-    avro_type = [{"logicalType": logical_type, "type": primitive_type}]
-    if nullable:
-        if prepend_null:
-            avro_type.insert(0, "null")
-        else:
-            avro_type.append("null")
-
-    schema = fastavro.parse_schema(
-        {
-            "type": "record",
-            "name": "test",
-            "namespace": namespace,
-            "fields": [{"name": "prop", "type": avro_type}],
-        }
-    )
-
-    actual = cudf_from_avro_util(schema, [])
-
-    expected = cudf.DataFrame(
-        {"prop": cudf.Series(None, None, expected_dtype)}
-    )
-
-    assert_eq(expected, actual)
-
-
-def get_days_from_epoch(date: datetime.date | None) -> int | None:
-    if date is None:
-        return None
-    return (date - datetime.date(1970, 1, 1)).days
-
-
-@pytest.mark.parametrize("namespace", [None, "root_ns"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas (datetime(9999, ...) too large)",
-)
-def test_can_parse_avro_date_logical_type(namespace, nullable, prepend_null):
-    avro_type = {"logicalType": "date", "type": "int"}
-    if nullable:
-        if prepend_null:
-            avro_type = ["null", avro_type]
-        else:
-            avro_type = [avro_type, "null"]
-
-    schema_dict = {
-        "type": "record",
-        "name": "test",
-        "fields": [
-            {"name": "o_date", "type": avro_type},
-        ],
-    }
-
-    if namespace:
-        schema_dict["namespace"] = namespace
-
-    schema = fastavro.parse_schema(schema_dict)
-
-    # Insert some None values in no particular order.  These will get converted
-    # into avro "nulls" by the fastavro writer (or filtered out if we're not
-    # nullable).  The first and last dates are epoch min/max values, the rest
-    # are arbitrarily chosen.
-    dates = [
-        None,
-        datetime.date(1970, 1, 1),
-        datetime.date(1970, 1, 2),
-        datetime.date(1981, 10, 25),
-        None,
-        None,
-        datetime.date(2012, 5, 18),
-        None,
-        datetime.date(2019, 9, 3),
-        None,
-        datetime.date(9999, 12, 31),
-    ]
-
-    if not nullable:
-        dates = [date for date in dates if date is not None]
-
-    days_from_epoch = [get_days_from_epoch(date) for date in dates]
-
-    records = [{"o_date": day} for day in days_from_epoch]
-
-    actual = cudf_from_avro_util(schema, records)
-
-    expected = cudf.DataFrame(
-        {"o_date": cudf.Series(dates, dtype="datetime64[s]")}
-    )
-
-    assert_eq(expected, actual)
-
-
-def test_alltypes_plain_avro():
-    # During development of the logical type support, the Java avro tests were
-    # triggering CUDA kernel crashes (null pointer dereferences).  We were able
-    # to replicate the behavior in a C++ test case, and then subsequently came
-    # up with this Python unit test to also trigger the problematic code path.
-    #
-    # So, unlike the other tests, this test is inherently reactive in nature,
-    # added simply to verify we fixed the problematic code path that was
-    # causing CUDA kernel crashes.
-    #
-    # See https://github.com/rapidsai/cudf/pull/12788#issuecomment-1468822875
-    # for more information.
-    relpath = "../../../../java/src/test/resources/alltypes_plain.avro"
-    path = pathlib.Path(__file__).parent.joinpath(relpath).resolve()
-    assert path.is_file(), path
-    path = str(path)
-
-    with open(path, "rb") as f:
-        reader = fastavro.reader(f)
-        records = [record for record in reader]
-
-    # For reference:
-    #
-    # >>> from pprint import pprint
-    # >>> pprint(reader.writer_schema)
-    # {'fields': [{'name': 'id', 'type': ['int', 'null']},
-    #             {'name': 'bool_col', 'type': ['boolean', 'null']},
-    #             {'name': 'tinyint_col', 'type': ['int', 'null']},
-    #             {'name': 'smallint_col', 'type': ['int', 'null']},
-    #             {'name': 'int_col', 'type': ['int', 'null']},
-    #             {'name': 'bigint_col', 'type': ['long', 'null']},
-    #             {'name': 'float_col', 'type': ['float', 'null']},
-    #             {'name': 'double_col', 'type': ['double', 'null']},
-    #             {'name': 'date_string_col', 'type': ['bytes', 'null']},
-    #             {'name': 'string_col', 'type': ['bytes', 'null']},
-    #             {'name': 'timestamp_col',
-    #              'type': [{'logicalType': 'timestamp-micros',
-    #                        'type': 'long'},
-    #                       'null']}],
-    #  'name': 'topLevelRecord',
-    #  'type': 'record'}
-    #
-    # >>> pprint(records[0])
-    # {'bigint_col': 0,
-    #  'bool_col': True,
-    #  'date_string_col': b'03/01/09',
-    #  'double_col': 0.0,
-    #  'float_col': 0.0,
-    #  'id': 4,
-    #  'int_col': 0,
-    #  'smallint_col': 0,
-    #  'string_col': b'0',
-    #  'timestamp_col': datetime.datetime(2009, 3, 1, 0, 0,
-    #                                     tzinfo=datetime.timezone.utc),
-    #  'tinyint_col': 0}
-
-    # Nothing particularly special about these columns, other than them being
-    # the ones that @davidwendt used to coerce the crash.
-    columns = ["bool_col", "int_col", "timestamp_col"]
-
-    # This next line would trigger the fatal CUDA kernel crash.
-    actual = cudf.read_avro(path, columns=columns)
-
-    # If we get here, we haven't crashed, obviously.  Verify the returned data
-    # frame meets our expectations.  We need to fiddle with the dtypes of the
-    # expected data frame in order to correctly match the schema definition and
-    # our corresponding read_avro()-returned data frame.
-
-    data = [{column: row[column] for column in columns} for row in records]
-
-    # discard timezone information as we don't support it:
-    expected = pd.DataFrame(data)
-    expected["timestamp_col"].dt.tz_localize(None)
-
-    # The fastavro.reader supports the `'logicalType': 'timestamp-micros'` used
-    # by the 'timestamp_col' column, which is converted into Python
-    # datetime.datetime() objects (see output of pprint(records[0]) above).
-    # As we don't support that logical type yet in cudf, we need to convert to
-    # int64, then divide by 1000 to convert from nanoseconds to microseconds.
-    timestamps = expected["timestamp_col"].astype("int64")
-    timestamps //= 1000
-    expected["timestamp_col"] = timestamps
-
-    # Furthermore, we need to force the 'int_col' into an int32, per the schema
-    # definition.  (It ends up as an int64 due to cudf.DataFrame() defaulting
-    # all Python int values to int64 sans a dtype= override.)
-    expected["int_col"] = expected["int_col"].astype("int32")
-
-    assert_eq(actual, expected)
-
-
-def multiblock_testname_ids(param):
-    (total_rows, num_rows, skip_rows, sync_interval) = param
-    return f"{total_rows=}-{num_rows=}-{skip_rows=}-{sync_interval=}"
-
-
-# The following values are used to test various boundary conditions associated
-# with multiblock avro files.  Each tuple consists of four values: total number
-# of rows to generate, number of rows to limit the result set to, number of
-# rows to skip, and number of rows per block.  If the total number of rows and
-# number of rows (i.e. first and second tuple elements) are equal, it means
-# that all rows will be returned.  If the rows per block also equals the first
-# two numbers, it means that a single block will be used.
-@pytest.fixture(
-    ids=multiblock_testname_ids,
-    params=[
-        (10, 10, 9, 9),
-        (10, 10, 9, 5),
-        (10, 10, 9, 3),
-        (10, 10, 9, 2),
-        (10, 10, 9, 10),
-        (10, 10, 8, 2),
-        (10, 10, 5, 5),
-        (10, 10, 2, 9),
-        (10, 10, 2, 2),
-        (10, 10, 1, 9),
-        (10, 10, 1, 5),
-        (10, 10, 1, 2),
-        (10, 10, 1, 10),
-        (10, 10, 10, 9),
-        (10, 10, 10, 5),
-        (10, 10, 10, 2),
-        (10, 10, 10, 10),
-        (10, 10, 0, 9),
-        (10, 10, 0, 5),
-        (10, 10, 0, 2),
-        (10, 10, 0, 10),
-        (100, 100, 99, 10),
-        (100, 100, 90, 90),
-        (100, 100, 90, 89),
-        (100, 100, 90, 88),
-        (100, 100, 90, 87),
-        (100, 100, 90, 5),
-        (100, 100, 89, 90),
-        (100, 100, 87, 90),
-        (100, 100, 50, 7),
-        (100, 100, 50, 31),
-        (10, 1, 8, 9),
-        (100, 1, 99, 10),
-        (100, 1, 98, 10),
-        (100, 1, 97, 10),
-        (100, 3, 90, 87),
-        (100, 4, 90, 5),
-        (100, 2, 89, 90),
-        (100, 9, 87, 90),
-        (100, 20, 50, 7),
-        (100, 10, 50, 31),
-        (100, 20, 50, 31),
-        (100, 30, 50, 31),
-        (256, 256, 0, 256),
-        (256, 256, 0, 32),
-        (256, 256, 0, 31),
-        (256, 256, 0, 33),
-        (256, 256, 31, 32),
-        (256, 256, 32, 31),
-        (256, 256, 31, 33),
-        (512, 512, 0, 32),
-        (512, 512, 0, 31),
-        (512, 512, 0, 33),
-        (512, 512, 31, 32),
-        (512, 512, 32, 31),
-        (512, 512, 31, 33),
-        (1024, 1024, 0, 1),
-        (1024, 1024, 0, 3),
-        (1024, 1024, 0, 7),
-        (1024, 1024, 0, 8),
-        (1024, 1024, 0, 9),
-        (1024, 1024, 0, 15),
-        (1024, 1024, 0, 16),
-        (1024, 1024, 0, 17),
-        (1024, 1024, 0, 32),
-        (1024, 1024, 0, 31),
-        (1024, 1024, 0, 33),
-        (1024, 1024, 31, 32),
-        (1024, 1024, 32, 31),
-        (1024, 1024, 31, 33),
-        (2048, 2048, 0, 31),
-        (2048, 2048, 0, 32),
-        (2048, 2048, 0, 33),
-        (2048, 2048, 0, 2048),
-    ],
-)
-def total_rows_and_num_rows_and_skip_rows_and_rows_per_block(request):
-    return request.param
-
-
-# N.B. The float32 and float64 types are chosen specifically to exercise
-#      the only path in the avro reader GPU code that can process multiple
-#      rows in parallel (via warp-level parallelism).  See the logic around
-#      the line `if (cur + min_row_size * rows_remaining == end)` in
-#      gpuDecodeAvroColumnData().
-@pytest.mark.parametrize("dtype", ["str", "float32", "float64"])
-@pytest.mark.parametrize(
-    "use_sync_interval",
-    [True, False],
-    ids=["use_sync_interval", "ignore_sync_interval"],
-)
-@pytest.mark.parametrize("codec", ["null", "deflate", "snappy"])
-def test_avro_reader_multiblock(
-    dtype,
-    codec,
-    use_sync_interval,
-    total_rows_and_num_rows_and_skip_rows_and_rows_per_block,
-):
-    (
-        total_rows,
-        num_rows,
-        skip_rows,
-        rows_per_block,
-    ) = total_rows_and_num_rows_and_skip_rows_and_rows_per_block
-
-    assert total_rows >= num_rows
-    assert rows_per_block <= total_rows
-
-    limit_rows = num_rows != total_rows
-    if limit_rows:
-        assert total_rows >= num_rows + skip_rows
-
-    if dtype == "str":
-        avro_type = "string"
-
-        # Generate a list of strings, each of which is a 6-digit number, padded
-        # with leading zeros.  This data set was very useful during development
-        # of the multiblock avro reader logic, as you get implicit feedback as
-        # to what may have gone wrong when the test fails, based on the
-        # expected vs actual values.
-        values = [f"{i:0>6}" for i in range(0, total_rows)]
-
-        # Strings are encoded in avro with a zigzag-encoded length prefix, and
-        # then the string data.  As all of our strings are fixed at length 6,
-        # we only need one byte to encode the length prefix (0xc).  Thus, our
-        # bytes per row is 6 + 1 = 7.
-        bytes_per_row = len(values[0]) + 1
-        assert bytes_per_row == 7, bytes_per_row
-    else:
-        assert dtype in ("float32", "float64")
-        avro_type = "float" if dtype == "float32" else "double"
-        rng = np.random.default_rng(seed=0)
-        # We don't use rand_dataframe() here, because it increases the
-        # execution time of each test by a factor of 10 or more (it appears
-        # to use a very costly approach to generating random data).
-        # See also: https://github.com/rapidsai/cudf/issues/13128
-        values = rng.random(total_rows).astype(dtype)
-        bytes_per_row = values.dtype.itemsize
-
-    # The sync_interval is the number of bytes between sync blocks.  We know
-    # how many bytes we need per row, so we can calculate the number of bytes
-    # per block by multiplying the number of rows per block by the bytes per
-    # row.  This is the sync interval.
-    total_bytes_per_block = rows_per_block * bytes_per_row
-    sync_interval = total_bytes_per_block
-
-    source_df = cudf.DataFrame({"0": pd.Series(values)})
-
-    if limit_rows:
-        expected_df = source_df[skip_rows : skip_rows + num_rows].reset_index(
-            drop=True
-        )
-    else:
-        expected_df = source_df[skip_rows:].reset_index(drop=True)
-
-    records = source_df.to_pandas().to_dict(orient="records")
-
-    schema = {
-        "name": "root",
-        "type": "record",
-        "fields": [
-            {"name": "0", "type": avro_type},
-        ],
-    }
-
-    if use_sync_interval:
-        kwds = {"sync_interval": sync_interval}
-    else:
-        kwds = {}
-
-    kwds["codec"] = codec
-
-    buffer = io.BytesIO()
-    fastavro.writer(buffer, schema, records, **kwds)
-    buffer.seek(0)
-
-    if not limit_rows:
-        # Explicitly set num_rows to None if we want to read all rows.  This
-        # ensures we exercise the logic behind a read_avro() call where the
-        # caller doesn't specify the number of rows desired (which will be the
-        # most common use case).
-        num_rows = None
-    actual_df = cudf.read_avro(buffer, skiprows=skip_rows, num_rows=num_rows)
-
-    assert_eq(expected_df, actual_df)
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index be1e278eea6..2ee6904ef4d 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -1,20 +1,14 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
-import string
 
 import numpy as np
 import pandas as pd
 import pytest
 
-from cudf import DataFrame, Series, option_context
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.core.column import NumericalColumn
+from cudf import Series
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
     assert_exceptions_equal,
-    expect_warning_if,
 )
 
 
@@ -42,52 +36,6 @@ def sliceobj(request):
     return request.param
 
 
-def test_dataframe_sort_values(nelem, dtype):
-    rng = np.random.default_rng(seed=0)
-    df = DataFrame()
-    df["a"] = aa = (100 * rng.random(nelem)).astype(dtype)
-    df["b"] = bb = (100 * rng.random(nelem)).astype(dtype)
-    sorted_df = df.sort_values(by="a")
-    # Check
-    sorted_index = np.argsort(aa, kind="mergesort")
-    assert_eq(sorted_df.index.values, sorted_index)
-    assert_eq(sorted_df["a"].values, aa[sorted_index])
-    assert_eq(sorted_df["b"].values, bb[sorted_index])
-
-
-def test_sort_values_nans_pandas_compat():
-    data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
-    with option_context("mode.pandas_compatible", True):
-        result = DataFrame(data).sort_values("b", na_position="first")
-    expected = pd.DataFrame(data).sort_values("b", na_position="first")
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize("index", ["a", "b", ["a", "b"]])
-def test_dataframe_sort_values_ignore_index(index, ignore_index):
-    if (
-        PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
-        and isinstance(index, list)
-        and not ignore_index
-    ):
-        pytest.skip(
-            reason="Unstable sorting by pandas(numpy): https://github.com/pandas-dev/pandas/issues/57531"
-        )
-
-    gdf = DataFrame(
-        {"a": [1, 3, 5, 2, 4], "b": [1, 1, 2, 2, 3], "c": [9, 7, 7, 7, 1]}
-    )
-    gdf = gdf.set_index(index)
-
-    pdf = gdf.to_pandas()
-
-    expect = pdf.sort_values(list(pdf.columns), ignore_index=ignore_index)
-    got = gdf.sort_values((gdf.columns), ignore_index=ignore_index)
-
-    assert_eq(expect, got)
-
-
 @pytest.mark.parametrize("ignore_index", [True, False])
 def test_series_sort_values_ignore_index(ignore_index):
     gsr = Series([1, 3, 5, 2, 4])
@@ -98,18 +46,6 @@ def test_series_sort_values_ignore_index(ignore_index):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("nelem", [10, 100])
-def test_dataframe_sort_values_sliced(nelem, sliceobj):
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame()
-    df["a"] = rng.random(nelem)
-
-    expect = df[sliceobj]["a"].sort_values()
-    gdf = DataFrame.from_pandas(df)
-    got = gdf[sliceobj]["a"].sort_values()
-    assert (got.to_pandas() == expect).all()
-
-
 @pytest.mark.parametrize("asc", [True, False])
 def test_series_argsort(nelem, dtype, asc):
     rng = np.random.default_rng(seed=0)
@@ -171,262 +107,3 @@ def test_series_nsmallest(data, n):
         lfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
         rfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
     )
-
-
-@pytest.mark.parametrize("nelem,n", [(1, 1), (100, 100), (10, 5), (100, 10)])
-@pytest.mark.parametrize("op", ["nsmallest", "nlargest"])
-@pytest.mark.parametrize("columns", ["a", ["b", "a"]])
-def test_dataframe_nlargest_nsmallest(nelem, n, op, columns):
-    rng = np.random.default_rng(seed=0)
-    aa = rng.random(nelem)
-    bb = rng.random(nelem)
-
-    df = DataFrame({"a": aa, "b": bb})
-    pdf = df.to_pandas()
-    assert_eq(getattr(df, op)(n, columns), getattr(pdf, op)(n, columns))
-
-
-@pytest.mark.parametrize("counts", [(10, 5), (100, 10)])
-def test_dataframe_nlargest_sliced(counts, sliceobj):
-    nelem, n = counts
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame()
-    df["a"] = rng.random(nelem)
-    df["b"] = rng.random(nelem)
-
-    expect = df[sliceobj].nlargest(n, "a")
-    gdf = DataFrame.from_pandas(df)
-    got = gdf[sliceobj].nlargest(n, "a")
-    assert (got.to_pandas() == expect).all().all()
-
-
-@pytest.mark.parametrize("counts", [(10, 5), (100, 10)])
-def test_dataframe_nsmallest_sliced(counts, sliceobj):
-    nelem, n = counts
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame()
-    df["a"] = rng.random(nelem)
-    df["b"] = rng.random(nelem)
-
-    expect = df[sliceobj].nsmallest(n, "a")
-    gdf = DataFrame.from_pandas(df)
-    got = gdf[sliceobj].nsmallest(n, "a")
-    assert (got.to_pandas() == expect).all().all()
-
-
-@pytest.mark.parametrize("num_cols", [1, 2, 3, 5])
-@pytest.mark.parametrize("num_rows", [0, 1, 2, 1000])
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-def test_dataframe_multi_column(
-    num_cols, num_rows, dtype, ascending, na_position
-):
-    rng = np.random.default_rng(seed=0)
-    by = list(string.ascii_lowercase[:num_cols])
-    pdf = pd.DataFrame()
-
-    for i in range(5):
-        colname = string.ascii_lowercase[i]
-        data = rng.integers(0, 26, num_rows).astype(dtype)
-        pdf[colname] = data
-
-    gdf = DataFrame.from_pandas(pdf)
-
-    got = gdf.sort_values(by, ascending=ascending, na_position=na_position)
-    expect = pdf.sort_values(by, ascending=ascending, na_position=na_position)
-
-    assert_eq(
-        got[by].reset_index(drop=True), expect[by].reset_index(drop=True)
-    )
-
-
-@pytest.mark.parametrize("num_cols", [1, 2, 3])
-@pytest.mark.parametrize("num_rows", [0, 1, 2, 3, 5])
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
-@pytest.mark.parametrize("nulls", ["some", "all"])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-def test_dataframe_multi_column_nulls(
-    num_cols, num_rows, dtype, nulls, ascending, na_position
-):
-    rng = np.random.default_rng(seed=0)
-    by = list(string.ascii_lowercase[:num_cols])
-    pdf = pd.DataFrame()
-
-    for i in range(3):
-        colname = string.ascii_lowercase[i]
-        data = rng.integers(0, 26, num_rows).astype(dtype)
-        if nulls == "some":
-            idx = np.array([], dtype="int64")
-            if num_rows > 0:
-                idx = rng.choice(
-                    num_rows, size=int(num_rows / 4), replace=False
-                )
-            data[idx] = np.nan
-        elif nulls == "all":
-            data[:] = np.nan
-        pdf[colname] = data
-
-    gdf = DataFrame.from_pandas(pdf)
-
-    got = gdf.sort_values(by, ascending=ascending, na_position=na_position)
-    expect = pdf.sort_values(by, ascending=ascending, na_position=na_position)
-
-    assert_eq(
-        got[by].reset_index(drop=True), expect[by].reset_index(drop=True)
-    )
-
-
-@pytest.mark.parametrize("ascending1", [True, False])
-@pytest.mark.parametrize("ascending2", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-def test_dataframe_multi_column_nulls_multiple_ascending(
-    ascending1, ascending2, na_position
-):
-    ascending = (ascending1, ascending2)
-    pdf = pd.DataFrame(
-        {"a": [3, 1, None, 2, 2, None, 1], "b": [1, 2, 3, 4, 5, 6, 7]}
-    )
-    gdf = DataFrame.from_pandas(pdf)
-    expect = pdf.sort_values(
-        by=["a", "b"], ascending=ascending, na_position=na_position
-    )
-    actual = gdf.sort_values(
-        by=["a", "b"], ascending=ascending, na_position=na_position
-    )
-
-    assert_eq(actual, expect)
-
-
-@pytest.mark.parametrize("nelem", [1, 100])
-def test_series_nlargest_nelem(nelem):
-    rng = np.random.default_rng(seed=0)
-    elems = rng.random(nelem)
-    gds = Series(elems).nlargest(nelem)
-    pds = pd.Series(elems).nlargest(nelem)
-
-    assert (pds == gds.to_pandas()).all().all()
-
-
-@pytest.mark.parametrize("map_size", [1, 2, 8])
-@pytest.mark.parametrize("nelem", [1, 10, 100])
-@pytest.mark.parametrize("keep", [True, False])
-def test_dataframe_scatter_by_map(map_size, nelem, keep):
-    strlist = ["dog", "cat", "fish", "bird", "pig", "fox", "cow", "goat"]
-    rng = np.random.default_rng(seed=0)
-    df = DataFrame(
-        {
-            "a": rng.choice(strlist[:map_size], nelem),
-            "b": rng.uniform(low=0, high=map_size, size=nelem),
-            "c": rng.integers(map_size, size=nelem),
-        }
-    )
-    df["d"] = df["a"].astype("category")
-
-    def _check_scatter_by_map(dfs, col):
-        assert len(dfs) == map_size
-        nrows = 0
-        name = col.name
-        for i, df in enumerate(dfs):
-            nrows += len(df)
-            if len(df) > 0:
-                # Make sure the column types were preserved
-                assert isinstance(df[name]._column, type(col._column))
-            try:
-                sr = df[name].astype(np.int32)
-            except ValueError:
-                sr = df[name]
-            assert sr.nunique() <= 1
-            if sr.nunique() == 1:
-                if isinstance(df[name]._column, NumericalColumn):
-                    assert sr.iloc[0] == i
-        assert nrows == nelem
-
-    with pytest.warns(UserWarning):
-        _check_scatter_by_map(
-            df.scatter_by_map("a", map_size, keep_index=keep), df["a"]
-        )
-    _check_scatter_by_map(
-        df.scatter_by_map("b", map_size, keep_index=keep), df["b"]
-    )
-    _check_scatter_by_map(
-        df.scatter_by_map("c", map_size, keep_index=keep), df["c"]
-    )
-    with pytest.warns(UserWarning):
-        _check_scatter_by_map(
-            df.scatter_by_map("d", map_size, keep_index=keep), df["d"]
-        )
-
-    if map_size == 2 and nelem == 100:
-        with pytest.warns(UserWarning):
-            df.scatter_by_map("a")  # Auto-detect map_size
-        with pytest.raises(ValueError):
-            with pytest.warns(UserWarning):
-                df.scatter_by_map("a", map_size=1, debug=True)  # Bad map_size
-
-    # Test Index
-    df2 = df.set_index("c")
-    generic_result = df2.scatter_by_map("b", map_size, keep_index=keep)
-    _check_scatter_by_map(generic_result, df2["b"])
-    if keep:
-        for frame in generic_result:
-            isinstance(frame.index, type(df2.index))
-
-    # Test MultiIndex
-    df2 = df.set_index(["a", "c"])
-    multiindex_result = df2.scatter_by_map("b", map_size, keep_index=keep)
-    _check_scatter_by_map(multiindex_result, df2["b"])
-    if keep:
-        for frame in multiindex_result:
-            isinstance(frame.index, type(df2.index))
-
-
-@pytest.mark.parametrize(
-    "kind", ["quicksort", "mergesort", "heapsort", "stable"]
-)
-def test_dataframe_sort_values_kind(nelem, dtype, kind):
-    rng = np.random.default_rng(seed=0)
-    df = DataFrame()
-    df["a"] = aa = (100 * rng.random(nelem)).astype(dtype)
-    df["b"] = bb = (100 * rng.random(nelem)).astype(dtype)
-    with expect_warning_if(kind != "quicksort", UserWarning):
-        sorted_df = df.sort_values(by="a", kind=kind)
-    # Check
-    sorted_index = np.argsort(aa, kind="mergesort")
-    assert_eq(sorted_df.index.values, sorted_index)
-    assert_eq(sorted_df["a"].values, aa[sorted_index])
-    assert_eq(sorted_df["b"].values, bb[sorted_index])
-
-
-@pytest.mark.parametrize("ids", [[-1, 0, 1, 0], [0, 2, 3, 0]])
-def test_dataframe_scatter_by_map_7513(ids):
-    df = DataFrame({"id": ids, "val": [0, 1, 2, 3]})
-    with pytest.raises(ValueError):
-        df.scatter_by_map(df["id"])
-
-
-def test_dataframe_scatter_by_map_empty():
-    df = DataFrame({"a": [], "b": []}, dtype="float64")
-    scattered = df.scatter_by_map(df["a"])
-    assert len(scattered) == 0
-
-
-def test_sort_values_by_index_level():
-    df = pd.DataFrame({"a": [1, 3, 2]}, index=pd.Index([1, 3, 2], name="b"))
-    cudf_df = DataFrame.from_pandas(df)
-    result = cudf_df.sort_values("b")
-    expected = df.sort_values("b")
-    assert_eq(result, expected)
-
-
-def test_sort_values_by_ambiguous():
-    df = pd.DataFrame({"a": [1, 3, 2]}, index=pd.Index([1, 3, 2], name="a"))
-    cudf_df = DataFrame.from_pandas(df)
-
-    assert_exceptions_equal(
-        lfunc=df.sort_values,
-        rfunc=cudf_df.sort_values,
-        lfunc_args_and_kwargs=(["a"], {}),
-        rfunc_args_and_kwargs=(["a"], {}),
-    )

From 4956cd75529ded809eb6408e977e4fa9a44b5451 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 30 Jul 2025 21:56:20 -0400
Subject: [PATCH 034/366] Make dividing a boolean column return f64 dtype in
 cudf-polars (#19443)

Closes https://github.com/rapidsai/cudf/issues/19408

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19443
---
 python/cudf_polars/cudf_polars/dsl/translate.py          | 2 ++
 python/cudf_polars/cudf_polars/testing/plugin.py         | 8 --------
 .../cudf_polars/tests/expressions/test_numeric_binops.py | 9 +++++++++
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 6db8085fab4..5c5537be43e 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -864,6 +864,8 @@ def _(
     dtype: DataType,
     schema: Schema,
 ) -> expr.Expr:
+    if plc.traits.is_boolean(dtype.plc) and node.op == pl_expr.Operator.TrueDivide:
+        dtype = DataType(pl.Float64())
     return expr.BinOp(
         dtype,
         expr.BinOp._MAPPING[node.op],
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index f1d7820c809..e205a7299d3 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -127,14 +127,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/lazyframe/test_lazyframe.py::test_lazy_cache_hit": "Debug output on stderr doesn't match",
     "tests/unit/operations/aggregation/test_aggregations.py::test_binary_op_agg_context_no_simplify_expr_12423": "groupby-agg of just literals should not produce collect_list",
     "tests/unit/operations/aggregation/test_aggregations.py::test_nan_inf_aggregation": "treatment of nans and nulls together is different in libcudf and polars in groupby-agg context",
-    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "https://github.com/rapidsai/cudf/issues/19408",
-    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "https://github.com/rapidsai/cudf/issues/19408",
     "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
     "tests/unit/operations/test_group_by.py::test_group_by_shorthand_quantile": "libcudf quantiles are round to nearest ties to even, polars quantiles are round to nearest ties away from zero",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
diff --git a/python/cudf_polars/tests/expressions/test_numeric_binops.py b/python/cudf_polars/tests/expressions/test_numeric_binops.py
index 62e14a0f5f3..690b4173b8c 100644
--- a/python/cudf_polars/tests/expressions/test_numeric_binops.py
+++ b/python/cudf_polars/tests/expressions/test_numeric_binops.py
@@ -97,3 +97,12 @@ def test_floor_div_binop_by_zero(zero, ltype):
     q = df.select(pl.col("a") // zero)
 
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("divisor", [1, 2.0])
+def test_true_div_boolean_column(divisor):
+    df = pl.LazyFrame({"a": [True, False]})
+
+    q = df.select(pl.col("a") / divisor)
+
+    assert_gpu_result_equal(q)

From dfba7d3fced0063914a837d77f4c1d3fa77a338a Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 31 Jul 2025 09:39:36 -0400
Subject: [PATCH 035/366] Fix contiguous-split nvbench cmake build (#19534)

Fixes the cmake configure line for `CONTIGUOUS_SPLIT_NVBENCH`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19534
---
 cpp/benchmarks/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 12ebdf8ef2b..c8d2cae9fd2 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -151,7 +151,7 @@ ConfigureNVBench(
 
 # ##################################################################################################
 # * contiguous_split benchmark  -------------------------------------------------------------------
-ConfigureBench(CONTIGUOUS_SPLIT_NVBENCH contiguous_split/contiguous_split.cpp)
+ConfigureNVBench(CONTIGUOUS_SPLIT_NVBENCH contiguous_split/contiguous_split.cpp)
 
 # ##################################################################################################
 # * lists scatter benchmark -----------------------------------------------------------------------

From 2741661ea039b4277af14b129eae5e7361a1e0f7 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 31 Jul 2025 09:39:57 -0400
Subject: [PATCH 036/366] Remove c++ stringview interop example (#19516)

Removes the Arrow stringview interop example since libcudf supports this conversion in its interop APIs now.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Basit Ayantunde (https://github.com/lamarrr)

URL: https://github.com/rapidsai/cudf/pull/19516
---
 cpp/examples/build.sh               |   1 -
 cpp/examples/interop/CMakeLists.txt |  34 ------
 cpp/examples/interop/interop.cpp    | 177 ----------------------------
 3 files changed, 212 deletions(-)
 delete mode 100644 cpp/examples/interop/CMakeLists.txt
 delete mode 100644 cpp/examples/interop/interop.cpp

diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh
index e6ceaf5b6e6..7296a1afd04 100755
--- a/cpp/examples/build.sh
+++ b/cpp/examples/build.sh
@@ -63,4 +63,3 @@ build_example string_transforms
 build_example nested_types
 build_example parquet_io
 build_example billion_rows
-build_example interop
diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt
deleted file mode 100644
index f974735b979..00000000000
--- a/cpp/examples/interop/CMakeLists.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
-
-cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
-
-include(../set_cuda_architecture.cmake)
-
-rapids_cuda_init_architectures(interop_example)
-
-project(
-  interop_example
-  VERSION 0.0.1
-  LANGUAGES CXX CUDA
-)
-
-include(../fetch_dependencies.cmake)
-
-include(rapids-cmake)
-rapids_cmake_build_type("Release")
-
-# For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
-# version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
-# gcc>=14. We can remove this once we upgrade to a newer sccache version.
-set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
-
-# The Arrow CMake is currently broken if the build type is not set
-set(CMAKE_BUILD_TYPE Release)
-# No need to install Arrow libs when only the final example executable is shipped.
-set(CUDF_EXCLUDE_ARROW_FROM_ALL ON)
-include(../../cmake/thirdparty/get_arrow.cmake)
-
-add_executable(interop interop.cpp)
-target_link_libraries(interop PRIVATE cudf::cudf)
-target_compile_features(interop PRIVATE cxx_std_20)
-target_link_libraries(interop PRIVATE ${ARROW_LIBRARIES})
diff --git a/cpp/examples/interop/interop.cpp b/cpp/examples/interop/interop.cpp
deleted file mode 100644
index b01b04489a6..00000000000
--- a/cpp/examples/interop/interop.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2024-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/interop.hpp>
-#include <cudf/io/csv.hpp>
-#include <cudf/utilities/memory_resource.hpp>
-
-#include <rmm/mr/device/device_memory_resource.hpp>
-
-#include <arrow/array/array_binary.h>
-#include <arrow/type.h>
-
-// Helper functions to create StringViews
-inline arrow::StringViewType::c_type to_inline_string_view(const void* data, int32_t const& size)
-{
-  arrow::StringViewType::c_type out;
-  out.inlined = {size, {}};
-  memcpy(&out.inlined.data, data, size);
-  return out;
-}
-inline arrow::StringViewType::c_type to_inline_string_view(std::string_view const& v)
-{
-  return to_inline_string_view(v.data(), static_cast<int32_t>(v.size()));
-}
-inline arrow::StringViewType::c_type to_string_view(const void* data,
-                                                    int32_t const& size,
-                                                    int32_t const& buffer_index,
-                                                    int32_t const& offset)
-{
-  if (size <= arrow::StringViewType::kInlineSize) { return to_inline_string_view(data, size); }
-  arrow::StringViewType::c_type out;
-  out.ref = {size, {}, buffer_index, offset};
-  memcpy(&out.ref.prefix, data, sizeof(out.ref.prefix));
-  return out;
-}
-inline arrow::StringViewType::c_type to_string_view(std::string_view const& v,
-                                                    int32_t const& buffer_index,
-                                                    int32_t const& offset)
-{
-  return to_string_view(v.data(), static_cast<int32_t>(v.size()), buffer_index, offset);
-}
-
-/**
- * @brief Create a StringViewArray
- *
- * @param data_buffers The data buffers
- * @param views The string views
- * @param validate Whether to validate the array
- */
-arrow::Result<std::shared_ptr<arrow::StringViewArray>> make_string_view_array(
-  arrow::BufferVector const& data_buffers,
-  std::vector<arrow::StringViewType::c_type> const& views,
-  bool validate = true)
-{
-  auto const length = static_cast<int64_t>(views.size());
-  auto const arr    = std::make_shared<arrow::StringViewArray>(
-    arrow::utf8_view(), length, arrow::Buffer::FromVector(views), std::move(data_buffers));
-  if (validate) { RETURN_NOT_OK(arr->ValidateFull()); }
-  return arr;
-}
-
-/**
- * @brief Convert a vector of strings into a vector of the
- * constituent chars and a vector of offsets.
- *
- * @param strings The vector of strings
- */
-auto make_chars_and_offsets(std::vector<std::string> const& strings)
-{
-  std::vector<char> chars{};
-  std::vector<cudf::size_type> offsets(1, 0);
-  for (auto& str : strings) {
-    chars.insert(chars.end(), std::cbegin(str), std::cend(str));
-    auto const last_offset = static_cast<std::size_t>(offsets.back());
-    auto const next_offset = last_offset + str.length();
-    CUDF_EXPECTS(
-      next_offset < static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
-      "Cannot use arrow_string_view_to_cudf_column to build a large strings column");
-    offsets.push_back(static_cast<cudf::size_type>(next_offset));
-  }
-  return std::make_tuple(std::move(chars), std::move(offsets));
-};
-
-/**
- * @brief Convert an Arrow StringViewArray to a cudf::column
- *
- * @param array The Arrow StringViewArray
- * @param stream The CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- */
-std::unique_ptr<cudf::column> arrow_string_view_to_cudf_column(
-  std::shared_ptr<arrow::StringViewArray> const& array,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
-{
-  // Convert the string views into chars and offsets
-  std::vector<std::string> strings;
-  for (auto i = 0; i < array->length(); i++) {
-    strings.push_back(array->GetString(i));
-  }
-  auto const [chars, offsets] = make_chars_and_offsets(strings);
-
-  // Copy the chars vector to the device
-  rmm::device_uvector<char> d_chars(chars.size(), stream, mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    d_chars.data(), chars.data(), chars.size() * sizeof(char), cudaMemcpyDefault, stream.value()));
-
-  // Copy the offsets vector to the device
-  // and wrap it in a cudf::column
-  rmm::device_uvector<cudf::size_type> d_offsets(offsets.size(), stream, mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(d_offsets.data(),
-                                offsets.data(),
-                                offsets.size() * sizeof(cudf::size_type),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  auto offsets_col =
-    std::make_unique<cudf::column>(std::move(d_offsets), rmm::device_buffer{0, stream, mr}, 0);
-
-  // Create a string column out of the chars and offsets
-  return cudf::make_strings_column(array->length(),
-                                   std::move(offsets_col),
-                                   d_chars.release(),
-                                   0,
-                                   rmm::device_buffer{0, stream, mr});
-}
-
-int main(int argc, char** argv)
-{
-  std::vector<std::shared_ptr<arrow::Buffer>> data_buffers;
-  std::vector<arrow::StringViewType::c_type> views;
-
-  // Define the data buffers and string views
-  auto const buffer_a =
-    arrow::Buffer::FromString("hello rapids teamapache arrow interopnvidiacudf");
-  data_buffers.push_back(buffer_a);
-  views.push_back(to_string_view("hello rapid steam", 0, 0));
-  views.push_back(to_string_view("apache arrow interop", 0, 17));
-  views.push_back(to_inline_string_view("nvidia"));
-  views.push_back(to_inline_string_view("cudf"));
-
-  // Create a StringViewArray
-  auto const string_view_col = make_string_view_array(data_buffers, views, true).ValueOrDie();
-  std::cout << string_view_col->ToString() << std::endl;
-
-  // Convert the StringViewArray to a cudf::column
-  auto const cudf_col = arrow_string_view_to_cudf_column(string_view_col);
-
-  // Write the cudf::column as CSV
-  auto const tbl_view                  = cudf::table_view({cudf_col->view()});
-  std::vector<std::string> const names = {"col_a"};
-
-  std::vector<char> h_buffer;
-  cudf::io::csv_writer_options writer_options =
-    cudf::io::csv_writer_options::builder(cudf::io::sink_info(&h_buffer), tbl_view)
-      .include_header(not names.empty())
-      .names(names);
-
-  cudf::io::write_csv(writer_options);
-  auto const result = std::string(h_buffer.data(), h_buffer.size());
-  std::cout << result << std::endl;
-
-  return 0;
-}

From 0b5a8949de6b4dcf80cee2166cba680676049b1d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 31 Jul 2025 10:48:29 -0700
Subject: [PATCH 037/366] Simplify clang dependency spec (#19546)

This is a slight simplification on https://github.com/rapidsai/cudf/pull/19529/ by having clang versions specified in only one place and splitting out include-what-you-use.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19546
---
 dependencies.yaml | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index a03f5ac2c3f..0153304a4e2 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,8 +10,7 @@ files:
       - build_all
       - build_cpp
       - build_python_common
-      - clang_format
-      - clang_tidy
+      - clang
       - cuda
       - cuda_version
       - depends_on_cupy
@@ -22,6 +21,7 @@ files:
       - depends_on_rmm
       - develop
       - docs
+      - iwyu
       - notebooks
       - py_version
       - pyarrow_run
@@ -124,10 +124,11 @@ files:
     includes:
       - build_all
       - build_base
-      - clang_tidy
+      - clang
       - cuda
       - cuda_version
       - develop
+      - iwyu
       - py_version
   docs:
     output: none
@@ -558,18 +559,16 @@ dependencies:
       - output_types: conda
         packages:
           - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version.
-  clang_format:
+  clang:
     common:
       - output_types: conda
         packages:
           - clang==20.1.4
           - clang-tools==20.1.4
-  clang_tidy:
+  iwyu:
     common:
       - output_types: conda
         packages:
-          - clang==20.1.4
-          - clang-tools==20.1.4
           - include-what-you-use==0.24.0
   docs:
     common:

From de8b114f81cca754e8d188ccf9ff6f8543af553a Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Thu, 31 Jul 2025 14:46:22 -0700
Subject: [PATCH 038/366] Separate row mask and page mask computation and usage
 (#19537)

Contributes to #19526

This PR separates the computation of row mask and data page mask for filter columns providing more control to the `materialize_filter_columns`.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19537
---
 .../cudf/io/experimental/hybrid_scan.hpp      | 106 +++++++------
 .../io/parquet/experimental/hybrid_scan.cpp   |  27 ++--
 .../experimental/hybrid_scan_helpers.hpp      |   7 +-
 .../parquet/experimental/hybrid_scan_impl.cpp |  44 +++---
 .../parquet/experimental/hybrid_scan_impl.hpp |  15 +-
 .../parquet/experimental/page_index_filter.cu |   2 +-
 .../experimental/hybrid_scan_filters_test.cpp | 140 ++++++++----------
 .../io/experimental/hybrid_scan_test.cpp      |  23 ++-
 8 files changed, 184 insertions(+), 180 deletions(-)

diff --git a/cpp/include/cudf/io/experimental/hybrid_scan.hpp b/cpp/include/cudf/io/experimental/hybrid_scan.hpp
index f7f31874d5e..c6a8038d3eb 100644
--- a/cpp/include/cudf/io/experimental/hybrid_scan.hpp
+++ b/cpp/include/cudf/io/experimental/hybrid_scan.hpp
@@ -50,6 +50,15 @@ namespace io::parquet::experimental {
  * @file
  */
 
+/**
+ * @brief Whether to compute and use a page mask using the row mask to skip decompression and
+ * decoding of the masked pages
+ */
+enum class use_data_page_mask : bool {
+  YES = true,  ///< Compute and use a data page mask
+  NO  = false  ///< Do not compute or use a data page mask
+};
+
 /**
  * @brief The experimental parquet reader class to optimally read parquet files subject to
  *        highly selective filters, called a Hybrid Scan operation
@@ -171,15 +180,15 @@ namespace io::parquet::experimental {
  * }
  * @endcode
  *
- * Filter column page pruning (OPTIONAL): Once the row groups are filtered, the next step is to
- * optionally prune the data pages within the current span of row groups subject to the same filter
- * expression using page statistics contained in the page index of the parquet file. To get started,
- * first set up the page index using the `setup_page_index()` function if not previously done and
- * then filter the data pages using the `filter_data_pages_with_stats()` function. This function
- * returns a row mask. i.e. BOOL8 column indicating which rows may survive in the materialized table
- * of filter columns (first reader pass), and a data page mask. i.e. a vector of boolean host
- * vectors indicating which data pages for each filter column need to be processed to materialize
- * the table filter columns (first reader pass).
+ * Build an initial row mask: Once the row groups are filtered, the next step is to build an
+ * initial row mask column to indicate which rows in the current span of row groups will survive in
+ * the read table. This initial row mask may be a BOOL8 cudf column of size equal to the
+ * total number of rows in the current span of row groups (computed by `total_rows_in_row_groups()`)
+ * containing all `true` values. Alternatively, the row mask may be built with
+ * the `build_row_mask_with_page_index_stats()` function and contain a `true` value for only the
+ * rows that survive the page-level statistics from the page index subject to the same filter as row
+ * groups. Note that this step requires the page index to be set up using the `setup_page_index()`
+ * function.
  * @code{.cpp}
  * // If not already done, get the page index byte range
  * auto page_index_byte_range = reader->page_index_byte_range();
@@ -190,24 +199,28 @@ namespace io::parquet::experimental {
  * // If not already done, Set up the page index now
  * reader->setup_page_index(page_index_bytes);
  *
- * // Optional: Prune filter column data pages using statistics in page index
- * auto [row_mask, data_page_mask] =
- *   reader->filter_data_pages_with_stats(current_row_group_indices, options, stream, mr);
+ * // Build a row mask column containing all `true` values
+ * auto const num_rows = reader->total_rows_in_row_groups(current_row_group_indices);
+ * auto row_mask = cudf::make_numeric_column(
+ *     cudf::data_type{cudf::type_id::BOOL8}, num_rows, rmm::device_buffer{}, 0, stream, mr);
+ *
+ * // Alternatively, build a row mask column indicating only the rows that survive the page-level
+ * statistics in the page index
+ * row_mask = reader->build_row_mask_with_page_index_stats(current_row_group_indices, options,
+ *                                                         stream, mr);
  * @endcode
  *
- * Materialize filter columns: Once we are finished with pruning row groups and filter column data
- * pages, the next step is to materialize filter columns into a table (first reader pass). This is
+ * Materialize filter columns: Once we are done with pruning row groups and constructing the row
+ * mask, the next step is to materialize filter columns into a table (first reader pass). This is
  * done using the `materialize_filter_columns()` function. This function requires a vector of device
- * buffers containing column chunk data for the current list of row groups, and the data page and
- * row masks obtained from the page pruning step. The function returns a table of materialized
- * filter columns and also updates the row mask column to only the valid rows that satisfy the
- * filter expression. If no row group pruning is needed, pass a span of all row group indices from
- * `all_row_groups()` function as the current list of row groups. Similarly, if no page pruning is
- * desired, pass an empty span as data page mask and a mutable view of a BOOL8 column of size equal
- * to total number of rows in the current row groups list (computed by `total_rows_in_row_groups()`)
- * containing all `true` values as row mask. Further, the byte ranges for the required column chunk
- * data may be obtained using the `filter_column_chunks_byte_ranges()` function and read into a
- * corresponding vector of vectors of device buffers.
+ * buffers containing column chunk data for the current list of row groups, and a mutable view of
+ * the current row mask. The function optionally builds a mask for the current data pages using the
+ * input row mask to skip decompression and decoding of the pruned pages based on the
+ * `mask_data_pages` argument. The filter columns are then read into a table and filtered based on
+ * the filter expression and the row mask is updated to only indicate the rows that survive in the
+ * read table. The final table is returned. The byte ranges for the required column chunk data may
+ * be obtained using the `filter_column_chunks_byte_ranges()` function and read into a corresponding
+ * vector of vectors of device buffers.
  * @code{.cpp}
  * // Get byte ranges of column chunk byte ranges from the reader
  * auto const filter_column_chunk_byte_ranges =
@@ -219,24 +232,21 @@ namespace io::parquet::experimental {
  *
  * // Materialize the table with only the filter columns
  * auto [filter_table, filter_metadata] =
- *   reader->materialize_filter_columns(data_page_mask,
- *                                      current_row_group_indices,
+ *   reader->materialize_filter_columns(current_row_group_indices,
  *                                      std::move(filter_column_chunk_buffers),
  *                                      row_mask->mutable_view(),
+ *                                      use_data_page_mask::YES/NO,
  *                                      options,
  *                                      stream);
  * @endcode
  *
  * Materialize payload columns: Once the filter columns are materialized, the final step is to
  * materialize the payload columns into another table (second reader pass). This is done using the
- * `materialize_payload_columns()` function. This function requires a vector of device buffers
- * containing column chunk data for the current list of row groups, and the updated row mask from
- * the `materialize_filter_columns()`. The function uses the row mask - may be a BOOL8 column of
- * size equal to total number of rows in the current row groups list containing all `true` values if
- * no pruning is desired - to internally prune payload column data pages and mask the materialized
- * payload columns to the desired rows. Similar to the first reader pass, the byte ranges for the
- * required column chunk data may be obtained using the `payload_column_chunks_byte_ranges()`
- * function and read into a corresponding vector of vectors of device buffers.
+ * `materialize_payload_columns()` function which is identical to the `materialize_filter_columns()`
+ * in terms of functionality except that it accepts an immutable view of the row mask and uses it to
+ * filter the read output table before returning it. The byte ranges for the required column chunk
+ * data may be obtained using the `payload_column_chunks_byte_ranges()` function and read into a
+ * corresponding vector of vectors of device buffers.
  * @code{.cpp}
  * // Get column chunk byte ranges from the reader
  * auto const payload_column_chunk_byte_ranges =
@@ -251,6 +261,7 @@ namespace io::parquet::experimental {
  *   reader->materialize_payload_columns(current_row_group_indices,
  *                                       std::move(payload_column_chunk_buffers),
  *                                       row_mask->view(),
+ *                                       use_data_page_mask::YES/NO,
  *                                       options,
  *                                       stream);
  * @endcode
@@ -258,7 +269,7 @@ namespace io::parquet::experimental {
  * Once both reader passes are complete, the filter and payload column tables may be trivially
  * combined by releasing the columns from both tables and moving them into a new cudf table.
  *
- * @note The performance advantage of this reader is most pronounced when the filter expression
+ * @note The performance advantage of this reader is most prominent when the filter expression
  * is highly selective, i.e. when the data in filter columns are at least partially ordered and the
  * number of rows that survive the filter is small compared to the total number of rows in the
  * parquet file. Otherwise, the performance is identical to the `cudf::io::read_parquet()` function.
@@ -390,21 +401,21 @@ class hybrid_scan_reader {
     rmm::cuda_stream_view stream) const;
 
   /**
-   * @brief Filter data pages of filter columns using page statistics from page index metadata
+   * @brief Builds a boolean column indicating which rows survive the page statistics in the page
+   * index
    *
    * @param row_group_indices Input row groups indices
    * @param options Parquet reader options
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned column's device memory
-   * @return A pair of boolean column indicating rows corresponding to data pages after
-   *         page-pruning, and a list of boolean vectors indicating which data pages are not pruned,
-   *         one per filter column.
+   * @return A boolean column indicating which filter column rows survive the statistics in the page
+   * index
    */
-  [[nodiscard]] std::pair<std::unique_ptr<cudf::column>, std::vector<std::vector<bool>>>
-  filter_data_pages_with_stats(cudf::host_span<size_type const> row_group_indices,
-                               parquet_reader_options const& options,
-                               rmm::cuda_stream_view stream,
-                               rmm::device_async_resource_ref mr) const;
+  [[nodiscard]] std::unique_ptr<cudf::column> build_row_mask_with_page_index_stats(
+    cudf::host_span<size_type const> row_group_indices,
+    parquet_reader_options const& options,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) const;
 
   /**
    * @brief Get byte ranges of column chunks of filter columns
@@ -421,20 +432,19 @@ class hybrid_scan_reader {
    * @brief Materializes filter columns and updates the input row mask to only the rows
    *        that exist in the output table
    *
-   * @param page_mask Boolean vectors indicating which data pages are not pruned, one per filter
-   *                  column. All data pages considered not pruned if empty
    * @param row_group_indices Input row groups indices
    * @param column_chunk_buffers Device buffers containing column chunk data of filter columns
    * @param[in,out] row_mask Mutable boolean column indicating surviving rows from page pruning
+   * @param mask_data_pages Whether to build and use a data page mask using the row mask
    * @param options Parquet reader options
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @return Table of materialized filter columns and metadata
    */
   [[nodiscard]] table_with_metadata materialize_filter_columns(
-    cudf::host_span<std::vector<bool> const> page_mask,
     cudf::host_span<size_type const> row_group_indices,
     std::vector<rmm::device_buffer> column_chunk_buffers,
     cudf::mutable_column_view row_mask,
+    use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream) const;
 
@@ -455,6 +465,7 @@ class hybrid_scan_reader {
    * @param row_group_indices Input row groups indices
    * @param column_chunk_buffers Device buffers containing column chunk data of payload columns
    * @param row_mask Boolean column indicating which rows need to be read. All rows read if empty
+   * @param mask_data_pages Whether to build and use a data page mask using the row mask
    * @param options Parquet reader options
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @return Table of materialized payload columns and metadata
@@ -463,6 +474,7 @@ class hybrid_scan_reader {
     cudf::host_span<size_type const> row_group_indices,
     std::vector<rmm::device_buffer> column_chunk_buffers,
     cudf::column_view row_mask,
+    use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream) const;
 
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan.cpp b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
index fc8ef765a50..f14ff508561 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
@@ -123,17 +123,17 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_bloom_fi
     .front();
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::vector<std::vector<bool>>>
-hybrid_scan_reader::filter_data_pages_with_stats(cudf::host_span<size_type const> row_group_indices,
-                                                 parquet_reader_options const& options,
-                                                 rmm::cuda_stream_view stream,
-                                                 rmm::device_async_resource_ref mr) const
+std::unique_ptr<cudf::column> hybrid_scan_reader::build_row_mask_with_page_index_stats(
+  cudf::host_span<size_type const> row_group_indices,
+  parquet_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
 {
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
 
-  return _impl->filter_data_pages_with_stats(input_row_group_indices, options, stream, mr);
+  return _impl->build_row_mask_with_page_index_stats(input_row_group_indices, options, stream, mr);
 }
 
 [[nodiscard]] std::vector<text::byte_range_info>
@@ -148,10 +148,10 @@ hybrid_scan_reader::filter_column_chunks_byte_ranges(
 }
 
 table_with_metadata hybrid_scan_reader::materialize_filter_columns(
-  cudf::host_span<std::vector<bool> const> data_page_mask,
   cudf::host_span<size_type const> row_group_indices,
   std::vector<rmm::device_buffer> column_chunk_buffers,
   cudf::mutable_column_view row_mask,
+  use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
@@ -159,10 +159,10 @@ table_with_metadata hybrid_scan_reader::materialize_filter_columns(
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
 
-  return _impl->materialize_filter_columns(data_page_mask,
-                                           input_row_group_indices,
+  return _impl->materialize_filter_columns(input_row_group_indices,
                                            std::move(column_chunk_buffers),
                                            row_mask,
+                                           mask_data_pages,
                                            options,
                                            stream);
 }
@@ -181,6 +181,7 @@ table_with_metadata hybrid_scan_reader::materialize_payload_columns(
   cudf::host_span<size_type const> row_group_indices,
   std::vector<rmm::device_buffer> column_chunk_buffers,
   cudf::column_view row_mask,
+  use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
@@ -188,8 +189,12 @@ table_with_metadata hybrid_scan_reader::materialize_payload_columns(
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
 
-  return _impl->materialize_payload_columns(
-    input_row_group_indices, std::move(column_chunk_buffers), row_mask, options, stream);
+  return _impl->materialize_payload_columns(input_row_group_indices,
+                                            std::move(column_chunk_buffers),
+                                            row_mask,
+                                            mask_data_pages,
+                                            options,
+                                            stream);
 }
 
 }  // namespace cudf::io::parquet::experimental
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
index 4372da85451..a60ad2a79ba 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
@@ -237,19 +237,20 @@ class aggregate_reader_metadata : public aggregate_reader_metadata_base {
     rmm::cuda_stream_view stream) const;
 
   /**
-   * @brief Filter data pages using statistics page-level statistics based on predicate filter
+   * @brief Builds a row mask based on the data pages that survive page-level statistics based on
+   * predicate filter
    *
    * @param row_group_indices Input row groups indices
    * @param output_dtypes Datatypes of output columns
    * @param output_column_schemas schema indices of output columns
-   * @param filter AST expression to filter data pages based on `PageIndex` statistics
+   * @param filter AST expression to filter data pages based on page index statistics
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned column's device memory
    *
    * @return A boolean column representing a mask of rows surviving the predicate filter at
    *         page-level
    */
-  [[nodiscard]] std::unique_ptr<cudf::column> filter_data_pages_with_stats(
+  [[nodiscard]] std::unique_ptr<cudf::column> build_row_mask_with_page_index_stats(
     cudf::host_span<std::vector<size_type> const> row_group_indices,
     cudf::host_span<cudf::data_type const> output_dtypes,
     cudf::host_span<cudf::size_type const> output_column_schemas,
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index 48aa1d6c40c..354eb2adde1 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -322,8 +322,7 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
     stream);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::vector<std::vector<bool>>>
-hybrid_scan_reader_impl::filter_data_pages_with_stats(
+std::unique_ptr<cudf::column> hybrid_scan_reader_impl::build_row_mask_with_page_index_stats(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream,
@@ -343,18 +342,13 @@ hybrid_scan_reader_impl::filter_data_pages_with_stats(
                "Columns names in filter expression must be convertible to index references");
   auto output_dtypes = get_output_types(_output_buffers_template);
 
-  auto row_mask =
-    _extended_metadata->filter_data_pages_with_stats(row_group_indices,
-                                                     output_dtypes,
-                                                     _output_column_schemas,
-                                                     expr_conv.get_converted_expr().value(),
-                                                     stream,
-                                                     mr);
-
-  auto data_page_mask = _extended_metadata->compute_data_page_mask(
-    row_mask->view(), row_group_indices, output_dtypes, _output_column_schemas, stream);
-
-  return {std::move(row_mask), std::move(data_page_mask)};
+  return _extended_metadata->build_row_mask_with_page_index_stats(
+    row_group_indices,
+    output_dtypes,
+    _output_column_schemas,
+    expr_conv.get_converted_expr().value(),
+    stream,
+    mr);
 }
 
 std::pair<std::vector<byte_range_info>, std::vector<cudf::size_type>>
@@ -434,10 +428,10 @@ hybrid_scan_reader_impl::payload_column_chunks_byte_ranges(
 }
 
 table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
-  cudf::host_span<std::vector<bool> const> data_page_mask,
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   std::vector<rmm::device_buffer> column_chunk_buffers,
   cudf::mutable_column_view row_mask,
+  use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
 {
@@ -458,11 +452,13 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
 
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
-  // If the data page mask is empty, fill the row mask with all true values
-  if (data_page_mask.empty()) {
-    auto const value = cudf::numeric_scalar<bool>(true, true, stream);
-    cudf::fill_in_place(row_mask, 0, row_mask.size(), value, stream);
-  }
+  auto output_dtypes = get_output_types(_output_buffers_template);
+
+  auto data_page_mask =
+    (mask_data_pages == use_data_page_mask::YES)
+      ? _extended_metadata->compute_data_page_mask(
+          row_mask, row_group_indices, output_dtypes, _output_column_schemas, stream)
+      : std::vector<std::vector<bool>>{};
 
   prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
 
@@ -473,6 +469,7 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   std::vector<rmm::device_buffer> column_chunk_buffers,
   cudf::column_view row_mask,
+  use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
 {
@@ -490,8 +487,11 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
 
   auto output_dtypes = get_output_types(_output_buffers_template);
 
-  auto data_page_mask = _extended_metadata->compute_data_page_mask(
-    row_mask, row_group_indices, output_dtypes, _output_column_schemas, stream);
+  auto data_page_mask =
+    (mask_data_pages == use_data_page_mask::YES)
+      ? _extended_metadata->compute_data_page_mask(
+          row_mask, row_group_indices, output_dtypes, _output_column_schemas, stream)
+      : std::vector<std::vector<bool>>{};
 
   prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
 
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
index b2c0d40a7d1..feca87aeef4 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
@@ -120,13 +120,13 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
     rmm::cuda_stream_view stream);
 
   /**
-   * @copydoc cudf::io::experimental::hybrid_scan::filter_data_pages_with_stats
+   * @copydoc cudf::io::experimental::hybrid_scan::build_row_mask_with_page_index_stats
    */
-  [[nodiscard]] std::pair<std::unique_ptr<cudf::column>, std::vector<std::vector<bool>>>
-  filter_data_pages_with_stats(cudf::host_span<std::vector<size_type> const> row_group_indices,
-                               parquet_reader_options const& options,
-                               rmm::cuda_stream_view stream,
-                               rmm::device_async_resource_ref mr);
+  [[nodiscard]] std::unique_ptr<cudf::column> build_row_mask_with_page_index_stats(
+    cudf::host_span<std::vector<size_type> const> row_group_indices,
+    parquet_reader_options const& options,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr);
 
   /**
    * @brief Fetches byte ranges of column chunks of filter columns
@@ -144,10 +144,10 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    * @copydoc cudf::io::experimental::hybrid_scan::materialize_filter_columns
    */
   [[nodiscard]] table_with_metadata materialize_filter_columns(
-    cudf::host_span<std::vector<bool> const> data_page_pask,
     cudf::host_span<std::vector<size_type> const> row_group_indices,
     std::vector<rmm::device_buffer> column_chunk_buffers,
     cudf::mutable_column_view row_mask,
+    use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream);
 
@@ -170,6 +170,7 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
     cudf::host_span<std::vector<size_type> const> row_group_indices,
     std::vector<rmm::device_buffer> column_chunk_buffers,
     cudf::column_view row_mask,
+    use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream);
 
diff --git a/cpp/src/io/parquet/experimental/page_index_filter.cu b/cpp/src/io/parquet/experimental/page_index_filter.cu
index 42a27e3af48..91311322c8d 100644
--- a/cpp/src/io/parquet/experimental/page_index_filter.cu
+++ b/cpp/src/io/parquet/experimental/page_index_filter.cu
@@ -603,7 +603,7 @@ struct is_row_required_fn {
 
 }  // namespace
 
-std::unique_ptr<cudf::column> aggregate_reader_metadata::filter_data_pages_with_stats(
+std::unique_ptr<cudf::column> aggregate_reader_metadata::build_row_mask_with_page_index_stats(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   cudf::host_span<cudf::data_type const> output_dtypes,
   cudf::host_span<cudf::size_type const> output_column_schemas,
diff --git a/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp b/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
index 1870adc8dea..6f47a0e2351 100644
--- a/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
@@ -248,63 +248,55 @@ TYPED_TEST(PageFilteringWithPageIndexStats, FilterPagesWithPageIndexStats)
     reinterpret_cast<uint8_t const*>(file_buffer.data()), file_buffer.size());
 
   // Helper function to test data page filteration using page index stats
-  auto const test_filter_data_pages_with_stats =
-    [&](cudf::ast::operation const& filter_expression,
-        cudf::size_type const num_filter_columns,
-        cudf::size_type const expected_num_pages_after_page_index_filter,
-        rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-        rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) {
-      // Create reader options with empty source info
-      cudf::io::parquet_reader_options options =
-        cudf::io::parquet_reader_options::builder().filter(filter_expression);
-
-      // Fetch footer and page index bytes from the buffer.
-      auto const footer_buffer = fetch_footer_bytes(file_buffer_span);
-
-      // Create hybrid scan reader with footer bytes
-      auto const reader = std::make_unique<cudf::io::parquet::experimental::hybrid_scan_reader>(
-        footer_buffer, options);
-
-      // Get all row groups from the reader
-      auto input_row_group_indices = reader->all_row_groups(options);
-
-      // Span to track current row group indices
-      auto current_row_group_indices = cudf::host_span<cudf::size_type>(input_row_group_indices);
-
-      // Calling `filter_data_pages_with_stats` before setting up the page index should raise an
-      // error
-      EXPECT_THROW(std::ignore = reader->filter_data_pages_with_stats(
-                     current_row_group_indices, options, stream, mr),
-                   std::runtime_error);
-
-      // Set up the page index
-      auto const page_index_byte_range = reader->page_index_byte_range();
-      auto const page_index_buffer =
-        fetch_page_index_bytes(file_buffer_span, page_index_byte_range);
-      reader->setup_page_index(page_index_buffer);
-
-      // Filter the data pages with page index stats
-      auto const [row_mask, data_page_mask] =
-        reader->filter_data_pages_with_stats(current_row_group_indices, options, stream, mr);
-      EXPECT_EQ(data_page_mask.size(), num_filter_columns);
-
-      auto const expected_num_rows = reader->total_rows_in_row_groups(current_row_group_indices);
-      EXPECT_EQ(row_mask->type().id(), cudf::type_id::BOOL8);
-      EXPECT_EQ(row_mask->size(), expected_num_rows);
-      EXPECT_EQ(row_mask->null_count(), 0);
-
-      // Half the pages should survive the page index filter
-
-      // Count the number of pages that survive the page index filter
-      auto const num_pages_after_page_index_filter =
-        std::accumulate(data_page_mask.begin(),
-                        data_page_mask.end(),
-                        cudf::size_type{0},
-                        [](auto sum, auto const& page_mask) {
-                          return sum + std::count(page_mask.cbegin(), page_mask.cend(), true);
-                        });
-      EXPECT_EQ(num_pages_after_page_index_filter, expected_num_pages_after_page_index_filter);
-    };
+  auto const test_filter_data_pages_with_stats = [&](cudf::ast::operation const& filter_expression,
+                                                     cudf::size_type const expected_surviving_rows,
+                                                     rmm::cuda_stream_view stream =
+                                                       cudf::get_default_stream(),
+                                                     rmm::device_async_resource_ref mr =
+                                                       cudf::get_current_device_resource_ref()) {
+    // Create reader options with empty source info
+    cudf::io::parquet_reader_options options =
+      cudf::io::parquet_reader_options::builder().filter(filter_expression);
+
+    // Fetch footer and page index bytes from the buffer.
+    auto const footer_buffer = fetch_footer_bytes(file_buffer_span);
+
+    // Create hybrid scan reader with footer bytes
+    auto const reader =
+      std::make_unique<cudf::io::parquet::experimental::hybrid_scan_reader>(footer_buffer, options);
+
+    // Get all row groups from the reader
+    auto input_row_group_indices = reader->all_row_groups(options);
+
+    // Span to track current row group indices
+    auto current_row_group_indices = cudf::host_span<cudf::size_type>(input_row_group_indices);
+
+    // Calling `filter_data_pages_with_stats` before setting up the page index should raise an
+    // error
+    EXPECT_THROW(std::ignore = reader->build_row_mask_with_page_index_stats(
+                   current_row_group_indices, options, stream, mr),
+                 std::runtime_error);
+
+    // Set up the page index
+    auto const page_index_byte_range = reader->page_index_byte_range();
+    auto const page_index_buffer = fetch_page_index_bytes(file_buffer_span, page_index_byte_range);
+    reader->setup_page_index(page_index_buffer);
+
+    // Filter the data pages with page index stats
+    auto const row_mask =
+      reader->build_row_mask_with_page_index_stats(current_row_group_indices, options, stream, mr);
+
+    auto const expected_num_rows = reader->total_rows_in_row_groups(current_row_group_indices);
+    EXPECT_EQ(row_mask->type().id(), cudf::type_id::BOOL8);
+    EXPECT_EQ(row_mask->size(), expected_num_rows);
+    EXPECT_EQ(row_mask->null_count(), 0);
+
+    // Copy the row mask to the host and count the number of surviving rows
+    auto const host_row_mask = cudf::detail::make_host_vector<bool>(
+      {row_mask->view().data<bool>(), static_cast<size_t>(row_mask->view().size())}, stream);
+    EXPECT_EQ(std::count(host_row_mask.begin(), host_row_mask.end(), true),
+              expected_surviving_rows);
+  };
 
   // Filtering AST - table[0] < 100
   {
@@ -312,12 +304,11 @@ TYPED_TEST(PageFilteringWithPageIndexStats, FilterPagesWithPageIndexStats)
     auto const literal     = cudf::ast::literal(literal_value);
     auto const col_ref     = cudf::ast::column_name_reference("col0");
     auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref, literal);
-    auto constexpr num_filter_columns = 1;
-    // Half the pages should be filtered out by the page index filter
-    auto constexpr expected_num_pages_after_page_index_filter =
-      num_concat * (num_ordered_rows / page_size_for_ordered_tests) / 2;
-    test_filter_data_pages_with_stats(
-      filter_expression, num_filter_columns, expected_num_pages_after_page_index_filter);
+    // Half the pages (unsigned) or 3/4th the pages (signed) should be filtered out by the page
+    // index filter
+    auto constexpr expected_surviving_rows =
+      (num_concat * num_ordered_rows) / (std::is_signed_v<T> ? 4 : 2);
+    test_filter_data_pages_with_stats(filter_expression, expected_surviving_rows);
   }
 
   // Filtering AST - table[2] >= 10000
@@ -327,12 +318,11 @@ TYPED_TEST(PageFilteringWithPageIndexStats, FilterPagesWithPageIndexStats)
     auto col_ref       = cudf::ast::column_name_reference("col2");
     auto filter_expression =
       cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, col_ref, literal);
-    auto constexpr num_filter_columns = 1;
-    // Half the pages should be filtered out by the page index filter
-    auto constexpr expected_num_pages_after_page_index_filter =
-      num_concat * (num_ordered_rows / page_size_for_ordered_tests) / 2;
-    test_filter_data_pages_with_stats(
-      filter_expression, num_filter_columns, expected_num_pages_after_page_index_filter);
+    // Half the pages (unsigned) or 3/4th the pages (signed) should be filtered out by the page
+    // index filter
+    auto constexpr expected_surviving_rows =
+      (num_concat * num_ordered_rows) / (std::is_signed_v<T> ? 4 : 2);
+    test_filter_data_pages_with_stats(filter_expression, expected_surviving_rows);
   }
 
   // Filtering AST - table[0] < 50 AND table[2] < "000010000"
@@ -351,11 +341,9 @@ TYPED_TEST(PageFilteringWithPageIndexStats, FilterPagesWithPageIndexStats)
 
     auto filter_expression = cudf::ast::operation(
       cudf::ast::ast_operator::LOGICAL_AND, filter_expression1, filter_expression2);
-    auto constexpr num_filter_columns = 2;
     // Only one page per num_concat per filter column should survive
-    auto constexpr expected_num_pages_after_page_index_filter = 1 * num_concat * num_filter_columns;
-    test_filter_data_pages_with_stats(
-      filter_expression, num_filter_columns, expected_num_pages_after_page_index_filter);
+    auto constexpr expected_surviving_rows = num_concat * page_size_for_ordered_tests;
+    test_filter_data_pages_with_stats(filter_expression, expected_surviving_rows);
   }
 
   // Filtering AST - table[0] > 150 OR table[2] < "000005000"
@@ -374,12 +362,10 @@ TYPED_TEST(PageFilteringWithPageIndexStats, FilterPagesWithPageIndexStats)
 
     auto filter_expression = cudf::ast::operation(
       cudf::ast::ast_operator::LOGICAL_OR, filter_expression1, filter_expression2);
-    auto constexpr num_filter_columns = 2;
     // Two pages (3rd and 0th from respective conditions) per num_concat per filter column should
     // survive
-    auto constexpr expected_num_pages_after_page_index_filter = 2 * num_concat * num_filter_columns;
-    test_filter_data_pages_with_stats(
-      filter_expression, num_filter_columns, expected_num_pages_after_page_index_filter);
+    auto constexpr expected_surviving_rows = 2 * num_concat * page_size_for_ordered_tests;
+    test_filter_data_pages_with_stats(filter_expression, expected_surviving_rows);
   }
 }
 
diff --git a/cpp/tests/io/experimental/hybrid_scan_test.cpp b/cpp/tests/io/experimental/hybrid_scan_test.cpp
index 316e55a02eb..6ec572d1e49 100644
--- a/cpp/tests/io/experimental/hybrid_scan_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_test.cpp
@@ -141,10 +141,8 @@ auto hybrid_scan(std::vector<char>& buffer,
   }
 
   // Filter data pages with page index stats
-  auto [row_mask, data_page_mask] =
-    reader->filter_data_pages_with_stats(current_row_group_indices, options, stream, mr);
-
-  EXPECT_EQ(data_page_mask.size(), num_filter_columns);
+  auto row_mask =
+    reader->build_row_mask_with_page_index_stats(current_row_group_indices, options, stream, mr);
 
   // Get column chunk byte ranges from the reader
   auto const filter_column_chunk_byte_ranges =
@@ -156,10 +154,10 @@ auto hybrid_scan(std::vector<char>& buffer,
 
   // Materialize the table with only the filter columns
   auto [filter_table, filter_metadata] =
-    reader->materialize_filter_columns(data_page_mask,
-                                       current_row_group_indices,
+    reader->materialize_filter_columns(current_row_group_indices,
                                        std::move(filter_column_chunk_buffers),
                                        row_mask->mutable_view(),
+                                       cudf::io::parquet::experimental::use_data_page_mask::YES,
                                        options,
                                        stream);
 
@@ -168,14 +166,15 @@ auto hybrid_scan(std::vector<char>& buffer,
     reader->payload_column_chunks_byte_ranges(current_row_group_indices, options);
 
   // Fetch column chunk device buffers from the input buffer
-  [[maybe_unused]] auto payload_column_chunk_buffers =
+  auto payload_column_chunk_buffers =
     fetch_byte_ranges(file_buffer_span, payload_column_chunk_byte_ranges, stream, mr);
 
   // Materialize the table with only the payload columns
-  [[maybe_unused]] auto [payload_table, payload_metadata] =
+  auto [payload_table, payload_metadata] =
     reader->materialize_payload_columns(current_row_group_indices,
                                         std::move(payload_column_chunk_buffers),
                                         row_mask->view(),
+                                        cudf::io::parquet::experimental::use_data_page_mask::YES,
                                         options,
                                         stream);
 
@@ -228,7 +227,7 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanAllColumns)
       cudf::io::parquet_reader_options::builder(
         cudf::io::source_info(cudf::host_span<char>(parquet_buffer.data(), parquet_buffer.size())))
         .filter(filter_expression);
-    auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
+    auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}), read_payload_table->view());
   }
@@ -275,7 +274,7 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanSelectColumns)
       cudf::io::parquet_reader_options::builder(
         cudf::io::source_info(cudf::host_span<char>(parquet_buffer.data(), parquet_buffer.size())))
         .filter(filter_expression);
-    auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
+    auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({2}), read_payload_table->view());
   }
@@ -298,7 +297,7 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanSelectColumns)
       cudf::io::parquet_reader_options::builder(
         cudf::io::source_info(cudf::host_span<char>(parquet_buffer.data(), parquet_buffer.size())))
         .filter(filter_expression);
-    auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
+    auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({2, 1}), read_payload_table->view());
   }
@@ -341,7 +340,7 @@ TEST_F(HybridScanTest, PruneDataPagesOnlyAndScanAllColumns)
       cudf::io::parquet_reader_options::builder(
         cudf::io::source_info(cudf::host_span<char>(buffer.data(), buffer.size())))
         .filter(filter_expression);
-    auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
+    auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}), read_payload_table->view());
   }

From f16d1d007bb3cb3d423d4f7f43fadf0b71261bc8 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 31 Jul 2025 18:44:49 -0700
Subject: [PATCH 039/366] Add support for streams to all copying APIs. (#19553)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19553
---
 python/pylibcudf/pylibcudf/column.pyx         |   3 +-
 python/pylibcudf/pylibcudf/copying.pxd        |  31 +++--
 python/pylibcudf/pylibcudf/copying.pyi        |  48 +++++--
 python/pylibcudf/pylibcudf/copying.pyx        | 124 +++++++++++++-----
 .../pylibcudf/pylibcudf/libcudf/copying.pxd   |  56 +++++---
 .../libcudf/utilities/default_stream.pxd      |   3 +
 6 files changed, 196 insertions(+), 69 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index eb0ed33f1e8..40ca7b0e4c3 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -32,6 +32,7 @@ from pylibcudf.libcudf.strings.strings_column_view cimport strings_column_view
 from pylibcudf.libcudf.types cimport size_type, size_of as cpp_size_of, bitmask_type
 from pylibcudf.libcudf.utilities.traits cimport is_fixed_width
 from pylibcudf.libcudf.copying cimport get_element
+from pylibcudf.libcudf.utilities.default_stream cimport get_default_stream
 
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
@@ -719,7 +720,7 @@ cdef class Column:
         cdef unique_ptr[scalar] result
 
         with nogil:
-            result = get_element(cv, 0)
+            result = get_element(cv, 0, get_default_stream())
 
         return Scalar.from_libcudf(move(result))
 
diff --git a/python/pylibcudf/pylibcudf/copying.pxd b/python/pylibcudf/pylibcudf/copying.pxd
index 7dfed437673..892769582c0 100644
--- a/python/pylibcudf/pylibcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/copying.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool as cbool
 from pylibcudf.libcudf.copying cimport (
@@ -7,6 +7,8 @@ from pylibcudf.libcudf.copying cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
@@ -35,14 +37,19 @@ ctypedef fused RightCopyIfElseOperand:
 cpdef Table gather(
     Table source_table,
     Column gather_map,
-    out_of_bounds_policy bounds_policy
+    out_of_bounds_policy bounds_policy,
+    Stream stream=*
 )
 
-cpdef Table scatter(TableOrListOfScalars source, Column scatter_map, Table target_table)
+cpdef Table scatter(
+    TableOrListOfScalars source, Column scatter_map, Table target_table, Stream stream=*
+)
 
 cpdef ColumnOrTable empty_like(ColumnOrTable input)
 
-cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*)
+cpdef Column allocate_like(
+    Column input_column, mask_allocation_policy policy, size=*, Stream stream=*
+)
 
 cpdef Column copy_range_in_place(
     Column input_column,
@@ -50,6 +57,7 @@ cpdef Column copy_range_in_place(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
+    Stream stream=*
 )
 
 cpdef Column copy_range(
@@ -58,24 +66,27 @@ cpdef Column copy_range(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
+    Stream stream=*
 )
 
-cpdef Column shift(Column input, size_type offset, Scalar fill_value)
+cpdef Column shift(Column input, size_type offset, Scalar fill_value, Stream stream=*)
 
-cpdef list slice(ColumnOrTable input, list indices)
+cpdef list slice(ColumnOrTable input, list indices, Stream stream=*)
 
-cpdef list split(ColumnOrTable input, list splits)
+cpdef list split(ColumnOrTable input, list splits, Stream stream=*)
 
 cpdef Column copy_if_else(
     LeftCopyIfElseOperand lhs,
     RightCopyIfElseOperand rhs,
-    Column boolean_mask
+    Column boolean_mask,
+    Stream stream=*
 )
 
 cpdef Table boolean_mask_scatter(
     TableOrListOfScalars input,
     Table target,
-    Column boolean_mask
+    Column boolean_mask,
+    Stream stream=*
 )
 
-cpdef Scalar get_element(Column input_column, size_type index)
+cpdef Scalar get_element(Column input_column, size_type index, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi
index 6cf4ed48724..864f1170993 100644
--- a/python/pylibcudf/pylibcudf/copying.pyi
+++ b/python/pylibcudf/pylibcudf/copying.pyi
@@ -3,6 +3,8 @@
 from enum import IntEnum
 from typing import TypeVar
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
@@ -19,14 +21,23 @@ class OutOfBoundsPolicy(IntEnum):
 ColumnOrTable = TypeVar("ColumnOrTable", Column, Table)
 
 def gather(
-    source_table: Table, gather_map: Column, bounds_policy: OutOfBoundsPolicy
+    source_table: Table,
+    gather_map: Column,
+    bounds_policy: OutOfBoundsPolicy,
+    stream: Stream | None = None,
 ) -> Table: ...
 def scatter(
-    source: Table | list[Scalar], scatter_map: Column, target_table: Table
+    source: Table | list[Scalar],
+    scatter_map: Column,
+    target_table: Table,
+    stream: Stream | None = None,
 ) -> Table: ...
 def empty_like(input: ColumnOrTable) -> ColumnOrTable: ...
 def allocate_like(
-    input_column: Column, policy: MaskAllocationPolicy, size: int | None = None
+    input_column: Column,
+    policy: MaskAllocationPolicy,
+    size: int | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
 def copy_range_in_place(
     input_column: Column,
@@ -34,6 +45,7 @@ def copy_range_in_place(
     input_begin: int,
     input_end: int,
     target_begin: int,
+    stream: Stream | None = None,
 ) -> Column: ...
 def copy_range(
     input_column: Column,
@@ -41,14 +53,32 @@ def copy_range(
     input_begin: int,
     input_end: int,
     target_begin: int,
+    stream: Stream | None = None,
+) -> Column: ...
+def shift(
+    input: Column,
+    offset: int,
+    fill_value: Scalar,
+    stream: Stream | None = None,
 ) -> Column: ...
-def shift(input: Column, offset: int, fill_value: Scalar) -> Column: ...
-def slice(input: ColumnOrTable, indices: list[int]) -> list[ColumnOrTable]: ...
-def split(input: ColumnOrTable, splits: list[int]) -> list[ColumnOrTable]: ...
+def slice(
+    input: ColumnOrTable, indices: list[int], stream: Stream | None = None
+) -> list[ColumnOrTable]: ...
+def split(
+    input: ColumnOrTable, splits: list[int], stream: Stream | None = None
+) -> list[ColumnOrTable]: ...
 def copy_if_else(
-    lhs: Column | Scalar, rhs: Column | Scalar, boolean_mask: Column
+    lhs: Column | Scalar,
+    rhs: Column | Scalar,
+    boolean_mask: Column,
+    stream: Stream | None = None,
 ) -> Column: ...
 def boolean_mask_scatter(
-    input: Table | list[Scalar], target: Table, boolean_mask: Column
+    input: Table | list[Scalar],
+    target: Table,
+    boolean_mask: Column,
+    stream: Stream | None = None,
 ) -> Table: ...
-def get_element(input_column: Column, index: int) -> Scalar: ...
+def get_element(
+    input_column: Column, index: int, stream: Stream | None = None
+) -> Scalar: ...
diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx
index 3b0ba0d9555..d7f7fa14068 100644
--- a/python/pylibcudf/pylibcudf/copying.pyx
+++ b/python/pylibcudf/pylibcudf/copying.pyx
@@ -32,10 +32,12 @@ from pylibcudf.libcudf.copying import \
 from pylibcudf.libcudf.copying import \
     sample_with_replacement as SampleWithReplacement  # no-cython-lint
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
-from .utils cimport _as_vector
+from .utils cimport _as_vector, _get_stream
 
 
 __all__ = [
@@ -59,7 +61,8 @@ __all__ = [
 cpdef Table gather(
     Table source_table,
     Column gather_map,
-    out_of_bounds_policy bounds_policy
+    out_of_bounds_policy bounds_policy,
+    Stream stream=None
 ):
     """Select rows from source_table according to the provided gather_map.
 
@@ -74,6 +77,8 @@ cpdef Table gather(
     bounds_policy : out_of_bounds_policy
         Controls whether out of bounds indices are checked and nullified in the
         output or if indices are assumed to be in bounds.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -86,20 +91,24 @@ cpdef Table gather(
         If the gather_map contains nulls.
     """
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_copying.gather(
             source_table.view(),
             gather_map.view(),
-            bounds_policy
+            bounds_policy,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Table scatter(
     TableOrListOfScalars source,
     Column scatter_map,
-    Table target_table
+    Table target_table,
+    Stream stream=None
 ):
     """Scatter from source into target_table according to scatter_map.
 
@@ -116,6 +125,8 @@ cpdef Table scatter(
         A mapping from rows in source to rows in target_table.
     target_table : Table
         The table object into which to scatter data.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -138,12 +149,15 @@ cpdef Table scatter(
     """
     cdef unique_ptr[table] c_result
     cdef vector[reference_wrapper[const scalar]] source_scalars
+    stream = _get_stream(stream)
+
     if TableOrListOfScalars is Table:
         with nogil:
             c_result = cpp_copying.scatter(
                 source.view(),
                 scatter_map.view(),
                 target_table.view(),
+                stream.view()
             )
     else:
         source_scalars = _as_vector(source)
@@ -152,8 +166,9 @@ cpdef Table scatter(
                 source_scalars,
                 scatter_map.view(),
                 target_table.view(),
+                stream.view()
             )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef ColumnOrTable empty_like(ColumnOrTable input):
@@ -184,7 +199,7 @@ cpdef ColumnOrTable empty_like(ColumnOrTable input):
 
 
 cpdef Column allocate_like(
-    Column input_column, mask_allocation_policy policy, size=None
+    Column input_column, mask_allocation_policy policy, size=None, Stream stream=None
 ):
     """Allocate a column with the same type as input_column.
 
@@ -199,6 +214,8 @@ cpdef Column allocate_like(
     size : int, optional
         The number of elements to allocate in the output column. If not
         specified, the size of the input column is used.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -208,15 +225,17 @@ cpdef Column allocate_like(
 
     cdef unique_ptr[column] c_result
     cdef size_type c_size = size if size is not None else input_column.size()
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_copying.allocate_like(
                 input_column.view(),
                 c_size,
                 policy,
+                stream.view()
             )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column copy_range_in_place(
@@ -225,6 +244,7 @@ cpdef Column copy_range_in_place(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
+    Stream stream=None
 ):
     """Copy a range of elements from input_column to target_column.
 
@@ -244,6 +264,8 @@ cpdef Column copy_range_in_place(
         The index of the last element in input_column to copy.
     target_begin : int
         The index of the first element in target_column to overwrite.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Raises
     ------
@@ -261,13 +283,16 @@ cpdef Column copy_range_in_place(
     # try and pass a temporary that decays to an rvalue reference in where the
     # function requires an lvalue reference.
     cdef mutable_column_view target_view = target_column.mutable_view()
+    stream = _get_stream(stream)
+
     with nogil:
         cpp_copying.copy_range_in_place(
             input_column.view(),
             target_view,
             input_begin,
             input_end,
-            target_begin
+            target_begin,
+            stream.view()
         )
 
 
@@ -277,6 +302,7 @@ cpdef Column copy_range(
     size_type input_begin,
     size_type input_end,
     size_type target_begin,
+    Stream stream=None
 ):
     """Copy a range of elements from input_column to target_column.
 
@@ -294,6 +320,8 @@ cpdef Column copy_range(
         The index of the last element in input_column to copy.
     target_begin : int
         The index of the first element in target_column to overwrite.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -309,6 +337,7 @@ cpdef Column copy_range(
         If target and source have different types.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_copying.copy_range(
@@ -316,13 +345,16 @@ cpdef Column copy_range(
             target_column.view(),
             input_begin,
             input_end,
-            target_begin
+            target_begin,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column shift(Column input, size_type offset, Scalar fill_value):
+cpdef Column shift(
+    Column input, size_type offset, Scalar fill_value, Stream stream=None
+):
     """Shift the elements of input by offset.
 
     For details on the implementation, see :cpp:func:`shift`.
@@ -336,6 +368,8 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_value):
     fill_values : Scalar
         The value to use for elements that are shifted in from outside the
         bounds of the input column.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -349,16 +383,19 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_value):
         of fixed width or string type.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_copying.shift(
                 input.view(),
                 offset,
-                dereference(fill_value.c_obj)
+                dereference(fill_value.c_obj),
+                stream.view()
             )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef list slice(ColumnOrTable input, list indices):
+cpdef list slice(ColumnOrTable input, list indices, Stream stream=None):
     """Slice input according to indices.
 
     For details on the implementation, see :cpp:func:`slice`.
@@ -369,6 +406,8 @@ cpdef list slice(ColumnOrTable input, list indices):
         The column or table to slice.
     indices : List[int]
         The indices to select from input.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -387,9 +426,11 @@ cpdef list slice(ColumnOrTable input, list indices):
     cdef vector[column_view] c_col_result
     cdef vector[table_view] c_tbl_result
     cdef int i
+    stream = _get_stream(stream)
+
     if ColumnOrTable is Column:
         with nogil:
-            c_col_result = cpp_copying.slice(input.view(), c_indices)
+            c_col_result = cpp_copying.slice(input.view(), c_indices, stream.view())
 
         return [
             Column.from_column_view(c_col_result[i], input)
@@ -397,7 +438,7 @@ cpdef list slice(ColumnOrTable input, list indices):
         ]
     else:
         with nogil:
-            c_tbl_result = cpp_copying.slice(input.view(), c_indices)
+            c_tbl_result = cpp_copying.slice(input.view(), c_indices, stream.view())
 
         return [
             Table.from_table_view(c_tbl_result[i], input)
@@ -405,7 +446,7 @@ cpdef list slice(ColumnOrTable input, list indices):
         ]
 
 
-cpdef list split(ColumnOrTable input, list splits):
+cpdef list split(ColumnOrTable input, list splits, Stream stream=None):
     """Split input into multiple.
 
     For details on the implementation, see :cpp:func:`split`.
@@ -416,6 +457,8 @@ cpdef list split(ColumnOrTable input, list splits):
         The column to split.
     splits : List[int]
         The indices at which to split the column.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -426,10 +469,11 @@ cpdef list split(ColumnOrTable input, list splits):
     cdef vector[column_view] c_col_result
     cdef vector[table_view] c_tbl_result
     cdef int i
+    stream = _get_stream(stream)
 
     if ColumnOrTable is Column:
         with nogil:
-            c_col_result = cpp_copying.split(input.view(), c_splits)
+            c_col_result = cpp_copying.split(input.view(), c_splits, stream.view())
 
         return [
             Column.from_column_view(c_col_result[i], input)
@@ -437,7 +481,7 @@ cpdef list split(ColumnOrTable input, list splits):
         ]
     else:
         with nogil:
-            c_tbl_result = cpp_copying.split(input.view(), c_splits)
+            c_tbl_result = cpp_copying.split(input.view(), c_splits, stream.view())
 
         return [
             Table.from_table_view(c_tbl_result[i], input)
@@ -448,7 +492,8 @@ cpdef list split(ColumnOrTable input, list splits):
 cpdef Column copy_if_else(
     LeftCopyIfElseOperand lhs,
     RightCopyIfElseOperand rhs,
-    Column boolean_mask
+    Column boolean_mask,
+    Stream stream=None
 ):
     """Copy elements from lhs or rhs into a new column according to boolean_mask.
 
@@ -464,6 +509,8 @@ cpdef Column copy_if_else(
         boolean_mask is False.
     boolean_mask : Column
         The boolean mask to use to select elements from lhs and rhs.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -480,37 +527,43 @@ cpdef Column copy_if_else(
         columns), or if lhs and rhs are not of the same length (if both are columns).
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
 
     if LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Column:
         with nogil:
             result = cpp_copying.copy_if_else(
                 lhs.view(),
                 rhs.view(),
-                boolean_mask.view()
+                boolean_mask.view(),
+                stream.view()
             )
     elif LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Scalar:
         with nogil:
             result = cpp_copying.copy_if_else(
-                lhs.view(), dereference(rhs.c_obj), boolean_mask.view()
+                lhs.view(), dereference(rhs.c_obj), boolean_mask.view(), stream.view()
             )
     elif LeftCopyIfElseOperand is Scalar and RightCopyIfElseOperand is Column:
         with nogil:
             result = cpp_copying.copy_if_else(
-                dereference(lhs.c_obj), rhs.view(), boolean_mask.view()
+                dereference(lhs.c_obj), rhs.view(), boolean_mask.view(), stream.view()
             )
     else:
         with nogil:
             result = cpp_copying.copy_if_else(
-                dereference(lhs.c_obj), dereference(rhs.c_obj), boolean_mask.view()
+                dereference(lhs.c_obj),
+                dereference(rhs.c_obj),
+                boolean_mask.view(),
+                stream.view()
             )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Table boolean_mask_scatter(
     TableOrListOfScalars input,
     Table target,
-    Column boolean_mask
+    Column boolean_mask,
+    Stream stream=None
 ):
     """Scatter rows from input into target according to boolean_mask.
 
@@ -527,6 +580,8 @@ cpdef Table boolean_mask_scatter(
         The table object into which to scatter data.
     boolean_mask : Column
         A mapping from rows in input to rows in target.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -545,13 +600,15 @@ cpdef Table boolean_mask_scatter(
     """
     cdef unique_ptr[table] result
     cdef vector[reference_wrapper[const scalar]] source_scalars
+    stream = _get_stream(stream)
 
     if TableOrListOfScalars is Table:
         with nogil:
             result = cpp_copying.boolean_mask_scatter(
                 input.view(),
                 target.view(),
-                boolean_mask.view()
+                boolean_mask.view(),
+                stream.view()
             )
     else:
         source_scalars = _as_vector(input)
@@ -560,12 +617,13 @@ cpdef Table boolean_mask_scatter(
                 source_scalars,
                 target.view(),
                 boolean_mask.view(),
+                stream.view()
             )
 
-    return Table.from_libcudf(move(result))
+    return Table.from_libcudf(move(result), stream)
 
 
-cpdef Scalar get_element(Column input_column, size_type index):
+cpdef Scalar get_element(Column input_column, size_type index, Stream stream=None):
     """Get the element at index from input_column.
 
     For details on the implementation, see :cpp:func:`get_element`.
@@ -576,6 +634,8 @@ cpdef Scalar get_element(Column input_column, size_type index):
         The column from which to get the element.
     index : int
         The index of the element to get.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -588,8 +648,10 @@ cpdef Scalar get_element(Column input_column, size_type index):
         If index is out of bounds.
     """
     cdef unique_ptr[scalar] c_output
+    stream = _get_stream(stream)
+
     with nogil:
-        c_output = cpp_copying.get_element(input_column.view(), index)
+        c_output = cpp_copying.get_element(input_column.view(), index, stream.view())
 
     return Scalar.from_libcudf(move(c_output))
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
index 5a05284e86a..5ee4b9879eb 100644
--- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t, int64_t, uint8_t
 from libcpp cimport bool
 from libcpp.functional cimport reference_wrapper
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 ctypedef const scalar constscalar
 
@@ -27,25 +28,29 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] gather (
         const table_view& source_table,
         const column_view& gather_map,
-        out_of_bounds_policy policy
+        out_of_bounds_policy policy,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] shift(
         const column_view& input,
         size_type offset,
-        const scalar& fill_values
+        const scalar& fill_values,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] scatter (
         const table_view& source_table,
         const column_view& scatter_map,
         const table_view& target_table,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] scatter (
         const vector[reference_wrapper[constscalar]]& source_scalars,
         const column_view& indices,
         const table_view& target,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cpdef enum class mask_allocation_policy(int32_t):
@@ -59,13 +64,15 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[column] allocate_like (
         const column_view& input_column,
-        mask_allocation_policy policy
+        mask_allocation_policy policy,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] allocate_like (
         const column_view& input_column,
         size_type size,
-        mask_allocation_policy policy
+        mask_allocation_policy policy,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] empty_like (
@@ -77,7 +84,8 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         mutable_column_view& target_column,
         size_type input_begin,
         size_type input_end,
-        size_type target_begin
+        size_type target_begin,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_range (
@@ -85,68 +93,80 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         const column_view& target_column,
         size_type input_begin,
         size_type input_end,
-        size_type target_begin
+        size_type target_begin,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef vector[column_view] slice (
         const column_view& input_column,
-        vector[size_type] indices
+        vector[size_type] indices,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef vector[table_view] slice (
         const table_view& input_table,
-        vector[size_type] indices
+        vector[size_type] indices,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef vector[column_view] split (
         const column_view& input_column,
-        vector[size_type] splits
+        vector[size_type] splits,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef vector[table_view] split (
         const table_view& input_table,
-        vector[size_type] splits
+        vector[size_type] splits,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const column_view& lhs,
         const column_view& rhs,
-        const column_view& boolean_mask
+        const column_view& boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const scalar& lhs,
         const column_view& rhs,
-        const column_view& boolean_mask
+        const column_view& boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const column_view& lhs,
         const scalar& rhs,
-        const column_view boolean_mask
+        const column_view boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] copy_if_else (
         const scalar& lhs,
         const scalar& rhs,
-        const column_view boolean_mask
+        const column_view boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] boolean_mask_scatter (
         const table_view& input,
         const table_view& target,
-        const column_view& boolean_mask
+        const column_view& boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] boolean_mask_scatter (
         const vector[reference_wrapper[constscalar]]& input,
         const table_view& target,
-        const column_view& boolean_mask
+        const column_view& boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[scalar] get_element (
         const column_view& input,
-        size_type index
+        size_type index,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cpdef enum class sample_with_replacement(bool):
diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd
index d94915a419f..538d233b63d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd
@@ -1,6 +1,9 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/utilities/default_stream.hpp" namespace "cudf" nogil:
     cdef bool is_ptds_enabled()
+    cdef cuda_stream_view get_default_stream()

From c80d49c5fbf821bd11a3d997be3aadfd7a002ab9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Fri, 1 Aug 2025 10:59:24 -0500
Subject: [PATCH 040/366] Capture commit hashes in pdsh benchmarks (#19548)

This updates the software versions we record in the pdsh benchmarks to include the commit hash for cudf-polars and rapidsmpf.

Note that this changes the serialized representation. Instead of

```json
"software": {
  "cudf_polars": "{version}"
}
```

it will now be:

```json
"software": {
  "cudf_polars": {
    "version": "{version}",
    "commit": "{commit}"
  }
}
```

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19548
---
 .../experimental/benchmarks/utils.py          | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index a63f1025d60..7e56e3c57d3 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -54,14 +54,22 @@ class Record:
     duration: float
 
 
+@dataclasses.dataclass
+class VersionInfo:
+    """Information about the commit of the software used to run the query."""
+
+    version: str
+    commit: str
+
+
 @dataclasses.dataclass
 class PackageVersions:
     """Information about the versions of the software used to run the query."""
 
-    cudf_polars: str
+    cudf_polars: str | VersionInfo
     polars: str
     python: str
-    rapidsmpf: str | None
+    rapidsmpf: str | VersionInfo | None
 
     @classmethod
     def collect(cls) -> PackageVersions:
@@ -71,15 +79,24 @@ def collect(cls) -> PackageVersions:
             "polars",
             "rapidsmpf",
         ]
-        versions = {}
+        versions: dict[str, str | VersionInfo | None] = {}
         for name in packages:
             try:
                 package = importlib.import_module(name)
-                versions[name] = package.__version__
             except (AttributeError, ImportError):  # noqa: PERF203
                 versions[name] = None
+            else:
+                if name in ("cudf_polars", "rapidsmpf"):
+                    versions[name] = VersionInfo(
+                        version=package.__version__,
+                        commit=package.__git_commit__,
+                    )
+                else:
+                    versions[name] = package.__version__
+
         versions["python"] = ".".join(str(v) for v in sys.version_info[:3])
-        return cls(**versions)
+        # we manually ensure that only cudf-polars and rapidsmpf have a VersionInfo
+        return cls(**versions)  # type: ignore[arg-type]
 
 
 @dataclasses.dataclass

From 37a496796dc056a8801075bb16112823f00aadcf Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 1 Aug 2025 09:53:46 -0700
Subject: [PATCH 041/366] Move str accessor tests in test_string.py to new cudf
 classic test directory structure (#19557)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19557
---
 .../cudf/tests/series/accessors/__init__.py   |    0
 .../cudf/tests/series/accessors/test_str.py   | 2847 ++++++++++++++++
 python/cudf/cudf/tests/test_string.py         | 2874 +----------------
 3 files changed, 2871 insertions(+), 2850 deletions(-)
 create mode 100644 python/cudf/cudf/tests/series/accessors/__init__.py
 create mode 100644 python/cudf/cudf/tests/series/accessors/test_str.py

diff --git a/python/cudf/cudf/tests/series/accessors/__init__.py b/python/cudf/cudf/tests/series/accessors/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/series/accessors/test_str.py b/python/cudf/cudf/tests/series/accessors/test_str.py
new file mode 100644
index 00000000000..509ade12ce1
--- /dev/null
+++ b/python/cudf/cudf/tests/series/accessors/test_str.py
@@ -0,0 +1,2847 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import json
+import re
+import urllib.parse
+from contextlib import nullcontext as does_not_raise
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf import concat
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+    expect_warning_if,
+)
+
+
+def raise_builder(flags, exceptions):
+    if any(flags):
+        return pytest.raises(exceptions)
+    else:
+        return does_not_raise()
+
+
+@pytest.fixture(
+    params=[
+        ["AbC", "de", "FGHI", "j", "kLm"],
+        ["nOPq", None, "RsT", None, "uVw"],
+        [None, None, None, None, None],
+    ],
+    ids=["no_nulls", "some_nulls", "all_nulls"],
+)
+def data(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[None, [10, 11, 12, 13, 14]], ids=["None_index", "Set_index"]
+)
+def index(request):
+    return request.param
+
+
+@pytest.fixture
+def ps_gs(data, index):
+    ps = pd.Series(data, index=index, dtype="str", name="nice name")
+    gs = cudf.Series(data, index=index, dtype="str", name="nice name")
+    return (ps, gs)
+
+
+def test_getitem_out_of_bounds():
+    data = ["123", "12", "1"]
+    pd_ser = pd.Series(data)
+    cudf_ser = cudf.Series(data)
+    expected = pd_ser.str[2]
+    result = cudf_ser.str[2]
+    assert_eq(result, expected)
+
+    expected = pd_ser.str[-2]
+    result = cudf_ser.str[-2]
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("method", ["startswith", "endswith"])
+@pytest.mark.parametrize("pat", [None, (1, 2), pd.Series([1])])
+def test_startsendwith_invalid_pat(method, pat):
+    ser = cudf.Series(["1"])
+    with pytest.raises(TypeError):
+        getattr(ser.str, method)(pat)
+
+
+@pytest.mark.parametrize("method", ["rindex", "index"])
+def test_index_int64_pandas_compat(method):
+    data = ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"]
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(cudf.Series(data).str, method)("E", 4, 8)
+    expected = getattr(pd.Series(data).str, method)("E", 4, 8)
+    assert_eq(result, expected)
+
+
+def test_replace_invalid_scalar_repl():
+    ser = cudf.Series(["1"])
+    with pytest.raises(TypeError):
+        ser.str.replace("1", 2)
+
+
+def test_string_methods_setattr():
+    ser = cudf.Series(["ab", "cd", "ef"])
+    pser = ser.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=ser.str.__setattr__,
+        rfunc=pser.str.__setattr__,
+        lfunc_args_and_kwargs=(("a", "b"),),
+        rfunc_args_and_kwargs=(("a", "b"),),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            "category":"reference",
+                            "author":"Nigel Rees",
+                            "title":"Sayings of the Century",
+                            "price":8.95
+                        },
+                        {
+                            "category":"fiction",
+                            "author":"Evelyn Waugh",
+                            "title":"Sword of Honour",
+                            "price":12.99
+                        }
+                    ]
+                }
+            }
+            """
+        ],
+        [
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            "category":"reference",
+                            "author":"Nigel Rees",
+                            "title":"Sayings of the Century",
+                            "price":8.95
+                        }
+                    ]
+                }
+            }
+            """,
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            "category":"fiction",
+                            "author":"Evelyn Waugh",
+                            "title":"Sword of Honour",
+                            "price":12.99
+                        }
+                    ]
+                }
+            }
+            """,
+        ],
+    ],
+)
+def test_string_get_json_object_n(data):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        json.loads(gs.str.get_json_object("$.store")[0]),
+        ps.apply(lambda x: json.loads(x)["store"])[0],
+    )
+    assert_eq(
+        json.loads(gs.str.get_json_object("$.store.book")[0]),
+        ps.apply(lambda x: json.loads(x)["store"]["book"])[0],
+    )
+    assert_eq(
+        gs.str.get_json_object("$.store.book[0].category"),
+        ps.apply(lambda x: json.loads(x)["store"]["book"][0]["category"]),
+    )
+
+
+@pytest.mark.parametrize(
+    "json_path", ["$.store", "$.store.book", "$.store.book[*].category", " "]
+)
+def test_string_get_json_object_empty_json_strings(json_path):
+    gs = cudf.Series(
+        [
+            """
+            {
+                "":{
+                    "":[
+                        {
+                            "":"",
+                            "":"",
+                            "":""
+                        },
+                        {
+                            "":"fiction",
+                            "":"",
+                            "title":""
+                        }
+                    ]
+                }
+            }
+            """
+        ]
+    )
+
+    got = gs.str.get_json_object(json_path)
+    expect = cudf.Series([None], dtype="object")
+
+    assert_eq(got, expect)
+
+
+@pytest.mark.parametrize("json_path", ["a", ".", "/.store"])
+def test_string_get_json_object_invalid_JSONPath(json_path):
+    gs = cudf.Series(
+        [
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            "category":"reference",
+                            "author":"Nigel Rees",
+                            "title":"Sayings of the Century",
+                            "price":8.95
+                        },
+                        {
+                            "category":"fiction",
+                            "author":"Evelyn Waugh",
+                            "title":"Sword of Honour",
+                            "price":12.99
+                        }
+                    ]
+                }
+            }
+            """
+        ]
+    )
+
+    with pytest.raises(ValueError):
+        gs.str.get_json_object(json_path)
+
+
+def test_string_get_json_object_allow_single_quotes():
+    gs = cudf.Series(
+        [
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            'author':"Nigel Rees",
+                            "title":'Sayings of the Century',
+                            "price":8.95
+                        },
+                        {
+                            "category":"fiction",
+                            "author":"Evelyn Waugh",
+                            'title':"Sword of Honour",
+                            "price":12.99
+                        }
+                    ]
+                }
+            }
+            """
+        ]
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[0].author", allow_single_quotes=True
+        ),
+        cudf.Series(["Nigel Rees"]),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[*].title", allow_single_quotes=True
+        ),
+        cudf.Series(["['Sayings of the Century',\"Sword of Honour\"]"]),
+    )
+
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[0].author", allow_single_quotes=False
+        ),
+        cudf.Series([None]),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[*].title", allow_single_quotes=False
+        ),
+        cudf.Series([None]),
+    )
+
+
+def test_string_get_json_object_strip_quotes_from_single_strings():
+    gs = cudf.Series(
+        [
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            "author":"Nigel Rees",
+                            "title":"Sayings of the Century",
+                            "price":8.95
+                        },
+                        {
+                            "category":"fiction",
+                            "author":"Evelyn Waugh",
+                            "title":"Sword of Honour",
+                            "price":12.99
+                        }
+                    ]
+                }
+            }
+            """
+        ]
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[0].author", strip_quotes_from_single_strings=True
+        ),
+        cudf.Series(["Nigel Rees"]),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[*].title", strip_quotes_from_single_strings=True
+        ),
+        cudf.Series(['["Sayings of the Century","Sword of Honour"]']),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[0].author", strip_quotes_from_single_strings=False
+        ),
+        cudf.Series(['"Nigel Rees"']),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[*].title", strip_quotes_from_single_strings=False
+        ),
+        cudf.Series(['["Sayings of the Century","Sword of Honour"]']),
+    )
+
+
+def test_string_get_json_object_missing_fields_as_nulls():
+    gs = cudf.Series(
+        [
+            """
+            {
+                "store":{
+                    "book":[
+                        {
+                            "author":"Nigel Rees",
+                            "title":"Sayings of the Century",
+                            "price":8.95
+                        },
+                        {
+                            "category":"fiction",
+                            "author":"Evelyn Waugh",
+                            "title":"Sword of Honour",
+                            "price":12.99
+                        }
+                    ]
+                }
+            }
+            """
+        ]
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[0].category", missing_fields_as_nulls=True
+        ),
+        cudf.Series(["null"]),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[*].category", missing_fields_as_nulls=True
+        ),
+        cudf.Series(['[null,"fiction"]']),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[0].category", missing_fields_as_nulls=False
+        ),
+        cudf.Series([None]),
+    )
+    assert_eq(
+        gs.str.get_json_object(
+            "$.store.book[*].category", missing_fields_as_nulls=False
+        ),
+        cudf.Series(['["fiction"]']),
+    )
+
+
+def test_str_join_lists_error():
+    sr = cudf.Series([["a", "a"], ["b"], ["c"]])
+
+    with pytest.raises(
+        ValueError, match="sep_na_rep cannot be defined when `sep` is scalar."
+    ):
+        sr.str.join(sep="-", sep_na_rep="-")
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "string_na_rep should be a string scalar, got [10, 20] of type "
+            ": <class 'list'>"
+        ),
+    ):
+        sr.str.join(string_na_rep=[10, 20])
+
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "sep should be of similar size to the series, got: 2, expected: 3"
+        ),
+    ):
+        sr.str.join(sep=["=", "-"])
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "sep_na_rep should be a string scalar, got "
+            "['na'] of type: <class 'list'>"
+        ),
+    ):
+        sr.str.join(sep=["-", "+", "."], sep_na_rep=["na"])
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "sep should be an str, array-like or Series object, "
+            "found <class 'cudf.core.dataframe.DataFrame'>"
+        ),
+    ):
+        sr.str.join(sep=cudf.DataFrame())
+
+
+@pytest.mark.parametrize(
+    "sr,sep,string_na_rep,sep_na_rep,expected",
+    [
+        (
+            [["a", "a"], ["b"], ["c"]],
+            "-",
+            None,
+            None,
+            ["a-a", "b", "c"],
+        ),
+        (
+            [["a", "b"], [None], [None, "hello", None, "world"]],
+            "__",
+            "=",
+            None,
+            ["a__b", None, "=__hello__=__world"],
+        ),
+        (
+            [
+                ["a", None, "b"],
+                [None],
+                [None, "hello", None, "world"],
+                None,
+            ],
+            ["-", "_", "**", "!"],
+            None,
+            None,
+            ["a--b", None, "**hello****world", None],
+        ),
+        (
+            [
+                ["a", None, "b"],
+                [None],
+                [None, "hello", None, "world"],
+                None,
+            ],
+            ["-", "_", "**", None],
+            "rep_str",
+            "sep_str",
+            ["a-rep_str-b", None, "rep_str**hello**rep_str**world", None],
+        ),
+        (
+            [[None, "a"], [None], None],
+            ["-", "_", None],
+            "rep_str",
+            None,
+            ["rep_str-a", None, None],
+        ),
+        (
+            [[None, "a"], [None], None],
+            ["-", "_", None],
+            None,
+            "sep_str",
+            ["-a", None, None],
+        ),
+    ],
+)
+def test_str_join_lists(sr, sep, string_na_rep, sep_na_rep, expected):
+    sr = cudf.Series(sr)
+    actual = sr.str.join(
+        sep=sep, string_na_rep=string_na_rep, sep_na_rep=sep_na_rep
+    )
+    expected = cudf.Series(expected)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "patterns, expected",
+    [
+        (
+            lambda: ["a", "s", "g", "i", "o", "r"],
+            [
+                [-1, 0, 5, 3, -1, 2],
+                [-1, -1, -1, -1, 1, -1],
+                [2, 0, -1, -1, -1, 3],
+                [-1, -1, -1, 0, -1, -1],
+            ],
+        ),
+        (
+            lambda: cudf.Series(["a", "string", "g", "inn", "o", "r", "sea"]),
+            [
+                [-1, 0, 5, -1, -1, 2, -1],
+                [-1, -1, -1, -1, 1, -1, -1],
+                [2, -1, -1, -1, -1, 3, 0],
+                [-1, -1, -1, -1, -1, -1, -1],
+            ],
+        ),
+    ],
+)
+def test_str_find_multiple(patterns, expected):
+    s = cudf.Series(["strings", "to", "search", "in"])
+    t = patterns()
+
+    expected = cudf.Series(expected)
+
+    # We convert to pandas because find_multiple returns ListDtype(int32)
+    # and expected is ListDtype(int64).
+    # Currently there is no easy way to type-cast these to match.
+    assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas())
+
+    s = cudf.Index(s)
+    t = cudf.Index(t)
+
+    expected.index = s
+
+    assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas())
+
+
+def test_str_find_multiple_error():
+    s = cudf.Series(["strings", "to", "search", "in"])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "patterns should be an array-like or a Series object, found "
+            "<class 'str'>"
+        ),
+    ):
+        s.str.find_multiple("a")
+
+    t = cudf.Series([1, 2, 3])
+    with pytest.raises(
+        TypeError,
+        match=re.escape("patterns can only be of 'string' dtype, got: int64"),
+    ):
+        s.str.find_multiple(t)
+
+
+def test_str_iterate_error():
+    s = cudf.Series(["abc", "xyz"])
+    with pytest.raises(TypeError):
+        iter(s.str)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "pqr", "tuv"],
+        ["aaaaaaaaaaaa", None],
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        0,
+        1,
+        2,
+        slice(0, 1, 2),
+        slice(0, 5, 2),
+        slice(-1, -2, 1),
+        slice(-1, -2, -1),
+        slice(-2, -1, -1),
+        slice(-2, -1, 1),
+        slice(0),
+        slice(None),
+    ],
+)
+def test_string_str_subscriptable(data, index):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    assert_eq(psr.str[index], gsr.str[index])
+
+    psi = pd.Index(data)
+    gsi = cudf.Index(data)
+
+    assert_eq(psi.str[index], gsi.str[index])
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (["aaaaaaaaaaaa"], [12]),
+        (["abc", "d", "ef"], [3, 1, 2]),
+        (["Hello", "Bye", "Thanks 😊"], [5, 3, 11]),
+        (["\n\t", "Bye", "Thanks 😊"], [2, 3, 11]),
+    ],
+)
+def test_string_str_byte_count(data, expected):
+    sr = cudf.Series(data)
+    expected = cudf.Series(expected, dtype="int32")
+    actual = sr.str.byte_count()
+    assert_eq(expected, actual)
+
+    si = cudf.Index(data)
+    expected = cudf.Index(expected, dtype="int32")
+    actual = si.str.byte_count()
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (["1", "2", "3", "4", "5"], [True, True, True, True, True]),
+        (
+            ["1.1", "2.0", "3.2", "4.3", "5."],
+            [False, False, False, False, False],
+        ),
+        (
+            [".12312", "213123.", ".3223.", "323423.."],
+            [False, False, False, False],
+        ),
+        ([""], [False]),
+        (
+            ["1..1", "+2", "++3", "4++", "-5"],
+            [False, True, False, False, True],
+        ),
+        (
+            [
+                "24313345435345 ",
+                "+2632726478",
+                "++367293674326",
+                "4382493264392746.237649274692++",
+                "-578239479238469264",
+            ],
+            [False, True, False, False, True],
+        ),
+        (
+            ["2a2b", "a+b", "++a", "a.b++", "-b"],
+            [False, False, False, False, False],
+        ),
+        (
+            ["2a2b", "1+3", "9.0++a", "+", "-"],
+            [False, False, False, False, False],
+        ),
+    ],
+)
+def test_str_isinteger(data, expected):
+    sr = cudf.Series(data, dtype="str")
+    expected = cudf.Series(expected)
+    actual = sr.str.isinteger()
+    assert_eq(expected, actual)
+
+    sr = cudf.Index(data)
+    expected = cudf.Index(expected)
+    actual = sr.str.isinteger()
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (["1", "2", "3", "4", "5"], [True, True, True, True, True]),
+        (["1.1", "2.0", "3.2", "4.3", "5."], [True, True, True, True, True]),
+        ([""], [False]),
+        (
+            [".12312", "213123.", ".3223.", "323423.."],
+            [True, True, False, False],
+        ),
+        (
+            ["1.00.323.1", "+2.1", "++3.30", "4.9991++", "-5.3"],
+            [False, True, False, False, True],
+        ),
+        (
+            [
+                "24313345435345 ",
+                "+2632726478",
+                "++367293674326",
+                "4382493264392746.237649274692++",
+                "-578239479238469264",
+            ],
+            [False, True, False, False, True],
+        ),
+        (
+            [
+                "24313345435345.32732 ",
+                "+2632726478.3627638276",
+                "++0.326294632367293674326",
+                "4382493264392746.237649274692++",
+                "-57823947923.8469264",
+            ],
+            [False, True, False, False, True],
+        ),
+        (
+            ["2a2b", "a+b", "++a", "a.b++", "-b"],
+            [False, False, False, False, False],
+        ),
+        (
+            ["2a2b", "1+3", "9.0++a", "+", "-"],
+            [False, False, False, False, False],
+        ),
+    ],
+)
+def test_str_isfloat(data, expected):
+    sr = cudf.Series(data, dtype="str")
+    expected = cudf.Series(expected)
+    actual = sr.str.isfloat()
+    assert_eq(expected, actual)
+
+    sr = cudf.Index(data)
+    expected = cudf.Index(expected)
+    actual = sr.str.isfloat()
+    assert_eq(expected, actual)
+
+
+def test_string_isipv4():
+    gsr = cudf.Series(
+        [
+            "",
+            None,
+            "1...1",
+            "141.168.0.1",
+            "127.0.0.1",
+            "1.255.0.1",
+            "256.27.28.26",
+            "25.257.28.26",
+            "25.27.258.26",
+            "25.27.28.256",
+            "-1.0.0.0",
+        ]
+    )
+    got = gsr.str.isipv4()
+    expected = cudf.Series(
+        [
+            False,
+            None,
+            False,
+            True,
+            True,
+            True,
+            False,
+            False,
+            False,
+            False,
+            False,
+        ]
+    )
+    assert_eq(expected, got)
+
+
+def test_string_ip4_to_int():
+    gsr = cudf.Series(
+        ["", None, "hello", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
+    )
+    expected = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449])
+
+    got = gsr.str.ip2int()
+    assert_eq(expected, got)
+
+    got = gsr.str.ip_to_int()  # alias
+    assert_eq(expected, got)
+
+
+def test_string_istimestamp():
+    gsr = cudf.Series(
+        [
+            "",
+            None,
+            "20201009 123456.987654AM+0100",
+            "1920111 012345.000001",
+            "18201235 012345.1",
+            "20201009 250001.2",
+            "20201009 129901.3",
+            "20201009 123499.4",
+            "20201009 000000.500000PM-0130",
+            "20201009:000000.600000",
+            "20201009 010203.700000PM-2500",
+            "20201009 010203.800000AM+0590",
+            "20201009 010203.900000AP-0000",
+        ]
+    )
+    got = gsr.str.istimestamp(r"%Y%m%d %H%M%S.%f%p%z")
+    expected = cudf.Series(
+        [
+            False,
+            None,
+            True,
+            False,
+            False,
+            False,
+            False,
+            False,
+            True,
+            False,
+            False,
+            False,
+            False,
+        ]
+    )
+    assert_eq(expected, got)
+
+
+def test_istimestamp_empty():
+    gsr = cudf.Series([], dtype="object")
+    result = gsr.str.istimestamp("%Y%m%d")
+    expected = cudf.Series([], dtype="bool")
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["f0:18:98:22:c2:e4", "00:00:00:00:00:00", "ff:ff:ff:ff:ff:ff"],
+        ["f0189822c2e4", "000000000000", "ffffffffffff"],
+        ["0xf0189822c2e4", "0x000000000000", "0xffffffffffff"],
+        ["0Xf0189822c2e4", "0X000000000000", "0Xffffffffffff"],
+    ],
+)
+def test_string_hex_to_int(data):
+    gsr = cudf.Series(data)
+
+    expected = cudf.Series([263988422296292, 0, 281474976710655])
+
+    got = gsr.str.htoi()
+    assert_eq(expected, got)
+
+    got = gsr.str.hex_to_int()  # alias
+    assert_eq(expected, got)
+
+
+def test_string_ishex():
+    gsr = cudf.Series(["", None, "0x01a2b3c4d5e6f", "0789", "ABCDEF0"])
+    got = gsr.str.ishex()
+    expected = cudf.Series([False, None, True, True, True])
+    assert_eq(expected, got)
+
+
+def test_string_str_code_points():
+    data = [
+        "abc",
+        "Def",
+        None,
+        "jLl",
+        "dog and cat",
+        "accénted",
+        "",
+        " 1234 ",
+        "XYZ",
+    ]
+    gs = cudf.Series(data)
+    expected = [
+        97,
+        98,
+        99,
+        68,
+        101,
+        102,
+        106,
+        76,
+        108,
+        100,
+        111,
+        103,
+        32,
+        97,
+        110,
+        100,
+        32,
+        99,
+        97,
+        116,
+        97,
+        99,
+        99,
+        50089,
+        110,
+        116,
+        101,
+        100,
+        32,
+        49,
+        50,
+        51,
+        52,
+        32,
+        88,
+        89,
+        90,
+    ]
+    expected = cudf.Series(expected)
+
+    assert_eq(expected, gs.str.code_points(), check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["http://www.hellow.com", "/home/nvidia/nfs", "123.45 ~ABCDEF"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+    ],
+)
+def test_string_str_url_encode(data):
+    gs = cudf.Series(data)
+
+    got = gs.str.url_encode()
+    expected = pd.Series([urllib.parse.quote(url, safe="~") for url in data])
+    assert_eq(expected, got)
+
+
+def test_string_str_decode_url():
+    data = [
+        "http://www.hellow.com?k1=acc%C3%A9nted&k2=a%2F/b.c",
+        "%2Fhome%2fnfs",
+        "987%20ZYX",
+    ]
+    gs = cudf.Series(data)
+
+    got = gs.str.url_decode()
+    expected = pd.Series([urllib.parse.unquote(url) for url in data])
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+        ["line to be wrapped", "another line to be wrapped"],
+        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+def test_string_str_translate(data):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    assert_eq(
+        ps.str.translate(str.maketrans({"a": "z"})),
+        gs.str.translate(str.maketrans({"a": "z"})),
+    )
+    assert_eq(
+        pd.Index(ps).str.translate(str.maketrans({"a": "z"})),
+        cudf.Index(gs).str.translate(str.maketrans({"a": "z"})),
+    )
+    assert_eq(
+        ps.str.translate(str.maketrans({"a": "z", "i": "$", "z": "1"})),
+        gs.str.translate(str.maketrans({"a": "z", "i": "$", "z": "1"})),
+    )
+    assert_eq(
+        pd.Index(ps).str.translate(
+            str.maketrans({"a": "z", "i": "$", "z": "1"})
+        ),
+        cudf.Index(gs).str.translate(
+            str.maketrans({"a": "z", "i": "$", "z": "1"})
+        ),
+    )
+    assert_eq(
+        ps.str.translate(
+            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
+        ),
+        gs.str.translate(
+            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
+        ),
+    )
+    assert_eq(
+        pd.Index(ps).str.translate(
+            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
+        ),
+        cudf.Index(gs).str.translate(
+            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
+        ),
+    )
+    assert_eq(
+        ps.str.translate(str.maketrans({"é": "É"})),
+        gs.str.translate(str.maketrans({"é": "É"})),
+    )
+
+
+def test_string_str_filter_characters():
+    data = [
+        "hello world",
+        "A+B+C+D",
+        "?!@#$%^&*()",
+        "accént",
+        None,
+        "$1.50",
+        "",
+    ]
+    gs = cudf.Series(data)
+    expected = cudf.Series(
+        ["helloworld", "ABCD", "", "accnt", None, "150", ""]
+    )
+    filter = {"a": "z", "A": "Z", "0": "9"}
+    assert_eq(expected, gs.str.filter_characters(filter))
+
+    expected = cudf.Series([" ", "+++", "?!@#$%^&*()", "é", None, "$.", ""])
+    assert_eq(expected, gs.str.filter_characters(filter, False))
+
+    expected = cudf.Series(
+        ["hello world", "A B C D", "           ", "acc nt", None, " 1 50", ""]
+    )
+    assert_eq(expected, gs.str.filter_characters(filter, True, " "))
+
+    with pytest.raises(TypeError):
+        gs.str.filter_characters(filter, True, ["a"])
+
+
+@pytest.mark.parametrize(
+    "data,sub,er",
+    [
+        (["abc", "xyz", "a", "ab", "123", "097"], "a", ValueError),
+        (["A B", "1.5", "3,000"], "abc", ValueError),
+        (["23", "³", "⅕", ""], "⅕", ValueError),
+        ([" ", "\t\r\n ", ""], "\n", ValueError),
+        (["$", "B", "Aab$", "$$ca", "C$B$", "cat"], "$", ValueError),
+        (["line to be wrapped", "another line to be wrapped"], " ", None),
+        (
+            ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+            "+",
+            ValueError,
+        ),
+        (["line to be wrapped", "another line to be wrapped"], "", None),
+    ],
+)
+def test_string_str_rindex(data, sub, er):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    if er is None:
+        assert_eq(ps.str.rindex(sub), gs.str.rindex(sub), check_dtype=False)
+        assert_eq(
+            pd.Index(ps).str.rindex(sub),
+            cudf.Index(gs).str.rindex(sub),
+            exact=False,
+        )
+
+    try:
+        ps.str.rindex(sub)
+    except er:
+        pass
+    else:
+        assert not er
+
+    try:
+        gs.str.rindex(sub)
+    except er:
+        pass
+    else:
+        assert not er
+
+
+@pytest.mark.parametrize(
+    "data,sub,expect",
+    [
+        (
+            ["abc", "xyz", "a", "ab", "123", "097"],
+            ["b", "y", "a", "c", "4", "8"],
+            [True, True, True, False, False, False],
+        ),
+        (
+            ["A B", "1.5", "3,000", "23", "³", "⅕"],
+            ["A B", ".", ",", "1", " ", " "],
+            [True, True, True, False, False, False],
+        ),
+        (
+            [" ", "\t", "\r", "\f ", "\n", ""],
+            ["", "\t", "\r", "xx", "yy", "zz"],
+            [True, True, True, False, False, False],
+        ),
+        (
+            ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+            ["$", "B", "ab", "*", "@", "dog"],
+            [True, True, True, False, False, False],
+        ),
+        (
+            ["hello", "there", "world", "-1234", None, "accént"],
+            ["lo", "e", "o", "+1234", " ", "e"],
+            [True, True, True, False, None, False],
+        ),
+        (
+            ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", "", "x", None],
+            ["A", "B", "C", " ", "y", "e"],
+            [True, True, True, False, False, None],
+        ),
+    ],
+)
+def test_string_contains_multi(data, sub, expect):
+    gs = cudf.Series(data)
+    sub = cudf.Series(sub)
+    got = gs.str.contains(sub)
+    expect = cudf.Series(expect)
+    assert_eq(expect, got, check_dtype=False)
+
+
+# Pandas does not allow 'case' or 'flags' if 'pat' is re.Pattern
+# This covers contains, match, count, and replace
+@pytest.mark.parametrize(
+    "pat",
+    [re.compile("[n-z]"), re.compile("[A-Z]"), re.compile("de"), "A"],
+)
+@pytest.mark.parametrize("repl", ["xyz", "", " "])
+def test_string_compiled_re(ps_gs, pat, repl):
+    ps, gs = ps_gs
+
+    expect = ps.str.contains(pat, regex=True)
+    got = gs.str.contains(pat, regex=True)
+    assert_eq(expect, got)
+
+    expect = ps.str.match(pat)
+    got = gs.str.match(pat)
+    assert_eq(expect, got)
+
+    expect = ps.str.count(pat)
+    got = gs.str.count(pat)
+    assert_eq(expect, got, check_dtype=False)
+
+    expect = ps.str.replace(pat, repl, regex=True)
+    got = gs.str.replace(pat, repl, regex=True)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+        ["line to be wrapped", "another line to be wrapped"],
+        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize("pat", ["", " ", "a", "abc", "cat", "$", "\n"])
+def test_string_str_match(data, pat):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    assert_eq(ps.str.match(pat), gs.str.match(pat))
+    assert_eq(
+        pd.Index(pd.Index(ps).str.match(pat)), cudf.Index(gs).str.match(pat)
+    )
+
+
+@pytest.mark.parametrize(
+    "data,sub,er",
+    [
+        (["abc", "xyz", "a", "ab", "123", "097"], "a", ValueError),
+        (["A B", "1.5", "3,000"], "abc", ValueError),
+        (["23", "³", "⅕", ""], "⅕", ValueError),
+        ([" ", "\t\r\n ", ""], "\n", ValueError),
+        (["$", "B", "Aab$", "$$ca", "C$B$", "cat"], "$", ValueError),
+        (["line to be wrapped", "another line to be wrapped"], " ", None),
+        (
+            ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+            "+",
+            ValueError,
+        ),
+        (["line to be wrapped", "another line to be wrapped"], "", None),
+    ],
+)
+def test_string_str_index(data, sub, er):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    if er is None:
+        assert_eq(ps.str.index(sub), gs.str.index(sub), check_dtype=False)
+
+    try:
+        ps.str.index(sub)
+    except er:
+        pass
+    else:
+        assert not er
+
+    try:
+        gs.str.index(sub)
+    except er:
+        pass
+    else:
+        assert not er
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["str_foo", "str_bar", "no_prefix", "", None],
+        ["foo_str", "bar_str", "no_suffix", "", None],
+    ],
+)
+def test_string_remove_suffix_prefix(data):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    got = gs.str.removeprefix("str_")
+    expect = ps.str.removeprefix("str_")
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+    got = gs.str.removesuffix("_str")
+    expect = ps.str.removesuffix("_str")
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+        ["line to be wrapped", "another line to be wrapped"],
+        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize(
+    "sub",
+    ["", " ", "a", "abc", "cat", "$", "\n"],
+)
+def test_string_find(data, sub):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    got = gs.str.find(sub)
+    expect = ps.str.find(sub)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.find(sub, start=1)
+    expect = ps.str.find(sub, start=1)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.find(sub, end=10)
+    expect = ps.str.find(sub, end=10)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.find(sub, start=2, end=10)
+    expect = ps.str.find(sub, start=2, end=10)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.rfind(sub)
+    expect = ps.str.rfind(sub)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.rfind(sub, start=1)
+    expect = ps.str.rfind(sub, start=1)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.rfind(sub, end=10)
+    expect = ps.str.rfind(sub, end=10)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+    got = gs.str.rfind(sub, start=2, end=10)
+    expect = ps.str.rfind(sub, start=2, end=10)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+        ["line to be wrapped", "another line to be wrapped"],
+        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize(
+    "pat",
+    ["", None, " ", "a", "abc", "cat", "$", "\n"],
+)
+def test_string_starts_ends(data, pat):
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    if pat is None:
+        assert_exceptions_equal(
+            lfunc=ps.str.startswith,
+            rfunc=gs.str.startswith,
+            lfunc_args_and_kwargs=([pat],),
+            rfunc_args_and_kwargs=([pat],),
+        )
+        assert_exceptions_equal(
+            lfunc=ps.str.endswith,
+            rfunc=gs.str.endswith,
+            lfunc_args_and_kwargs=([pat],),
+            rfunc_args_and_kwargs=([pat],),
+        )
+    else:
+        assert_eq(
+            ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
+        )
+        assert_eq(
+            ps.str.endswith(pat), gs.str.endswith(pat), check_dtype=False
+        )
+
+
+@pytest.mark.parametrize(
+    "data,pat",
+    [
+        (
+            ["abc", "xyz", "a", "ab", "123", "097"],
+            ("abc", "x", "a", "b", "3", "7"),
+        ),
+        (["A B", "1.5", "3,000"], ("A ", ".", ",")),
+        (["23", "³", "⅕", ""], ("23", "³", "⅕", "")),
+        ([" ", "\t\r\n ", ""], ("d", "\n ", "")),
+        (
+            ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
+            ("$", "$", "a", "<", "(", "#"),
+        ),
+        (
+            ["line to be wrapped", "another line to be wrapped"],
+            ("another", "wrapped"),
+        ),
+        (
+            ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+            ("hsdjfk", "", "ll", "+", "-", "w", "-", "én"),
+        ),
+        (
+            ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+            ("1. Ant.  ", "2. Bee!\n", "3. Cat?\t", ""),
+        ),
+    ],
+)
+def test_string_starts_ends_list_like_pat(data, pat):
+    gs = cudf.Series(data)
+
+    starts_expected = []
+    ends_expected = []
+    for i in range(len(pat)):
+        if data[i] is None:
+            starts_expected.append(None)
+            ends_expected.append(None)
+        else:
+            if pat[i] is None:
+                starts_expected.append(False)
+                ends_expected.append(False)
+            else:
+                starts_expected.append(data[i].startswith(pat[i]))
+                ends_expected.append(data[i].endswith(pat[i]))
+    starts_expected = pd.Series(starts_expected)
+    ends_expected = pd.Series(ends_expected)
+    assert_eq(starts_expected, gs.str.startswith(pat), check_dtype=False)
+    assert_eq(ends_expected, gs.str.endswith(pat), check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "find",
+    [
+        "(\\d)(\\d)",
+        "(\\d)(\\d)",
+        "(\\d)(\\d)",
+        "(\\d)(\\d)",
+        "([a-z])-([a-z])",
+        "([a-z])-([a-zé])",
+        "([a-z])-([a-z])",
+        "([a-z])-([a-zé])",
+        re.compile("([A-Z])(\\d)"),
+    ],
+)
+@pytest.mark.parametrize(
+    "replace",
+    ["\\1-\\2", "V\\2-\\1", "\\1 \\2", "\\2 \\1", "X\\1+\\2Z", "X\\1+\\2Z"],
+)
+def test_string_replace_with_backrefs(find, replace):
+    s = [
+        "A543",
+        "Z756",
+        "",
+        None,
+        "tést-string",
+        "two-thréé four-fivé",
+        "abcd-éfgh",
+        "tést-string-again",
+    ]
+    ps = pd.Series(s)
+    gs = cudf.Series(s)
+    got = gs.str.replace_with_backrefs(find, replace)
+    expected = ps.str.replace(find, replace, regex=True)
+    assert_eq(got, expected)
+
+    got = cudf.Index(gs).str.replace_with_backrefs(find, replace)
+    expected = pd.Index(ps).str.replace(find, replace, regex=True)
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["$", "B", "Aab$", "$$ca", "C$B$", "cat", "cat\ndog"],
+        ["line\nto be wrapped", "another\nline\nto be wrapped"],
+    ],
+)
+@pytest.mark.parametrize(
+    "pat",
+    ["a", " ", "\t", "another", "0", r"\$", "^line$", "line.*be", "cat$"],
+)
+@pytest.mark.parametrize("flags", [0, re.MULTILINE, re.DOTALL])
+def test_string_count(data, pat, flags):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        gs.str.count(pat=pat, flags=flags),
+        ps.str.count(pat=pat, flags=flags),
+        check_dtype=False,
+    )
+    assert_eq(
+        cudf.Index(gs).str.count(pat=pat),
+        pd.Index(ps).str.count(pat=pat),
+        exact=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "pat, flags",
+    [
+        ("Monkey", 0),
+        ("on", 0),
+        ("b", 0),
+        ("on$", 0),
+        ("on$", re.MULTILINE),
+        ("o.*k", re.DOTALL),
+    ],
+)
+def test_string_findall(pat, flags):
+    test_data = ["Lion", "Monkey", "Rabbit", "Don\nkey"]
+    ps = pd.Series(test_data)
+    gs = cudf.Series(test_data)
+
+    expected = ps.str.findall(pat, flags)
+    actual = gs.str.findall(pat, flags)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pat, flags, pos",
+    [
+        ("Monkey", 0, [-1, 0, -1, -1]),
+        ("on", 0, [2, 1, -1, 1]),
+        ("bit", 0, [-1, -1, 3, -1]),
+        ("on$", 0, [2, -1, -1, -1]),
+        ("on$", re.MULTILINE, [2, -1, -1, 1]),
+        ("o.*k", re.DOTALL, [-1, 1, -1, 1]),
+    ],
+)
+def test_string_find_re(pat, flags, pos):
+    test_data = ["Lion", "Monkey", "Rabbit", "Don\nkey"]
+    gs = cudf.Series(test_data)
+
+    expected = pd.Series(pos, dtype=np.int32)
+    actual = gs.str.find_re(pat, flags)
+    assert_eq(expected, actual)
+
+
+def test_string_replace_multi():
+    ps = pd.Series(["hello", "goodbye"])
+    gs = cudf.Series(["hello", "goodbye"])
+    expect = ps.str.replace("e", "E").str.replace("o", "O")
+    got = gs.str.replace(["e", "o"], ["E", "O"])
+
+    assert_eq(expect, got)
+
+    ps = pd.Series(["foo", "fuz", np.nan])
+    gs = cudf.Series.from_pandas(ps)
+
+    expect = ps.str.replace("f.", "ba", regex=True)
+    got = gs.str.replace(["f."], ["ba"], regex=True)
+    assert_eq(expect, got)
+
+    ps = pd.Series(["f.o", "fuz", np.nan])
+    gs = cudf.Series.from_pandas(ps)
+
+    expect = ps.str.replace("f.", "ba", regex=False)
+    got = gs.str.replace(["f."], ["ba"], regex=False)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["+23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize("width", [0, 1, 25])
+@pytest.mark.parametrize("side", ["left", "right", "both"])
+@pytest.mark.parametrize("fillchar", [" ", ".", "\n", "+", "\t"])
+def test_strings_pad_tests(data, width, side, fillchar):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        ps.str.pad(width=width, side=side, fillchar=fillchar),
+        gs.str.pad(width=width, side=side, fillchar=fillchar),
+    )
+
+    gi = cudf.Index(data)
+    pi = pd.Index(data)
+
+    assert_eq(
+        pi.str.pad(width=width, side=side, fillchar=fillchar),
+        gi.str.pad(width=width, side=side, fillchar=fillchar),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        pytest.param([" ", "\t\r\n ", ""], marks=pytest.mark.xfail),
+        ["leopard", "Golden Eagle", "SNAKE", ""],
+        ["line to be wrapped", "another line to be wrapped"],
+    ],
+)
+@pytest.mark.parametrize("width", [1, 20])
+def test_string_wrap(data, width):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        gs.str.wrap(
+            width=width,
+            break_long_words=False,
+            expand_tabs=False,
+            replace_whitespace=True,
+            drop_whitespace=True,
+            break_on_hyphens=False,
+        ),
+        ps.str.wrap(
+            width=width,
+            break_long_words=False,
+            expand_tabs=False,
+            replace_whitespace=True,
+            drop_whitespace=True,
+            break_on_hyphens=False,
+        ),
+    )
+
+    gi = cudf.Index(data)
+    pi = pd.Index(data)
+
+    assert_eq(
+        gi.str.wrap(
+            width=width,
+            break_long_words=False,
+            expand_tabs=False,
+            replace_whitespace=True,
+            drop_whitespace=True,
+            break_on_hyphens=False,
+        ),
+        pi.str.wrap(
+            width=width,
+            break_long_words=False,
+            expand_tabs=False,
+            replace_whitespace=True,
+            drop_whitespace=True,
+            break_on_hyphens=False,
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["³", "⅕", ""],
+        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
+        [" ", "\t\r\n ", ""],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize("width", [0, 20])
+def test_strings_zfill_tests(data, width):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(ps.str.zfill(width=width), gs.str.zfill(width=width))
+
+    gi = cudf.Index(data)
+    pi = pd.Index(data)
+
+    assert_eq(pi.str.zfill(width=width), gi.str.zfill(width=width))
+
+
+def test_string_strip_fail():
+    gs = cudf.Series(["a", "aa", ""])
+    with pytest.raises(TypeError):
+        gs.str.strip(["a"])
+    with pytest.raises(TypeError):
+        gs.str.lstrip(["a"])
+    with pytest.raises(TypeError):
+        gs.str.rstrip(["a"])
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["koala", "fox", "chameleon"],
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        [
+            "this is a regular sentence",
+            "https://docs.python.org/3/tutorial/index.html",
+            None,
+        ],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize("width", [0, 20])
+@pytest.mark.parametrize("fillchar", ["⅕", "1", ".", "t", " ", ","])
+def test_strings_filling_tests(data, width, fillchar):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        ps.str.center(width=width, fillchar=fillchar),
+        gs.str.center(width=width, fillchar=fillchar),
+    )
+    assert_eq(
+        ps.str.ljust(width=width, fillchar=fillchar),
+        gs.str.ljust(width=width, fillchar=fillchar),
+    )
+    assert_eq(
+        ps.str.rjust(width=width, fillchar=fillchar),
+        gs.str.rjust(width=width, fillchar=fillchar),
+    )
+
+    gi = cudf.Index(data)
+    pi = pd.Index(data)
+
+    assert_eq(
+        pi.str.center(width=width, fillchar=fillchar),
+        gi.str.center(width=width, fillchar=fillchar),
+    )
+    assert_eq(
+        pi.str.ljust(width=width, fillchar=fillchar),
+        gi.str.ljust(width=width, fillchar=fillchar),
+    )
+    assert_eq(
+        pi.str.rjust(width=width, fillchar=fillchar),
+        gi.str.rjust(width=width, fillchar=fillchar),
+    )
+
+
+@pytest.mark.parametrize("n", [-1, 0, 1, 4])
+@pytest.mark.parametrize("expand", [True, False])
+def test_string_rsplit_re(n, expand):
+    data = ["a b", " c ", "   d", "e   ", "f"]
+    ps = pd.Series(data, dtype="str")
+    gs = cudf.Series(data, dtype="str")
+
+    # Pandas does not yet support the regex parameter for rsplit
+    import inspect
+
+    assert (
+        "regex"
+        not in inspect.signature(pd.Series.str.rsplit).parameters.keys()
+    )
+
+    expect = ps.str.rsplit(pat=" ", n=n, expand=expand)
+    got = gs.str.rsplit(pat="\\s", n=n, expand=expand, regex=True)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["koala", "fox", "chameleon"],
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        [
+            "this is a regular sentence",
+            "https://docs.python.org/3/tutorial/index.html",
+            None,
+        ],
+    ],
+)
+@pytest.mark.parametrize("n", [-1, 0, 1, 4])
+@pytest.mark.parametrize("expand", [True, False])
+def test_strings_split(data, n, expand):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        ps.str.split(n=n, expand=expand).reset_index(),
+        gs.str.split(n=n, expand=expand).reset_index(),
+        check_index_type=False,
+    )
+
+    assert_eq(
+        ps.str.split(",", n=n, expand=expand),
+        gs.str.split(",", n=n, expand=expand),
+    )
+    assert_eq(
+        ps.str.split("-", n=n, expand=expand),
+        gs.str.split("-", n=n, expand=expand),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["koala", "fox", "chameleon"],
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        [
+            "this is a regular sentence",
+            "https://docs.python.org/3/tutorial/index.html",
+            None,
+        ],
+        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
+    ],
+)
+@pytest.mark.parametrize(
+    "to_strip", ["⅕", None, "123.", ".!? \n\t", "123.!? \n\t", " ", ".", ","]
+)
+def test_strings_strip_tests(data, to_strip):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(ps.str.strip(to_strip=to_strip), gs.str.strip(to_strip=to_strip))
+    assert_eq(
+        ps.str.rstrip(to_strip=to_strip), gs.str.rstrip(to_strip=to_strip)
+    )
+    assert_eq(
+        ps.str.lstrip(to_strip=to_strip), gs.str.lstrip(to_strip=to_strip)
+    )
+
+    gi = cudf.Index(data)
+    pi = pd.Index(data)
+
+    assert_eq(pi.str.strip(to_strip=to_strip), gi.str.strip(to_strip=to_strip))
+    assert_eq(
+        pi.str.rstrip(to_strip=to_strip), gi.str.rstrip(to_strip=to_strip)
+    )
+    assert_eq(
+        pi.str.lstrip(to_strip=to_strip), gi.str.lstrip(to_strip=to_strip)
+    )
+
+
+def test_string_is_title():
+    data = [
+        "leopard",
+        "Golden Eagle",
+        "SNAKE",
+        "",
+        "!A",
+        "hello World",
+        "A B C",
+        "#",
+        "AƻB",
+        "Ⓑⓖ",
+        "Art of War",
+    ]
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(gs.str.istitle(), ps.str.istitle())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["koala", "fox", "chameleon"],
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+    ],
+)
+def test_strings_rpartition(data):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(ps.str.rpartition(), gs.str.rpartition())
+    assert_eq(ps.str.rpartition("-"), gs.str.rpartition("-"))
+    assert_eq(ps.str.rpartition(","), gs.str.rpartition(","))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["koala", "fox", "chameleon"],
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+    ],
+)
+def test_strings_partition(data):
+    gs = cudf.Series(data, name="str_name")
+    ps = pd.Series(data, name="str_name")
+
+    assert_eq(ps.str.partition(), gs.str.partition())
+    assert_eq(ps.str.partition(","), gs.str.partition(","))
+    assert_eq(ps.str.partition("-"), gs.str.partition("-"))
+
+    gi = cudf.Index(data, name="new name")
+    pi = pd.Index(data, name="new name")
+    assert_eq(pi.str.partition(), gi.str.partition())
+    assert_eq(pi.str.partition(","), gi.str.partition(","))
+    assert_eq(pi.str.partition("-"), gi.str.partition("-"))
+
+
+def test_string_partition_fail():
+    gs = cudf.Series(["abc", "aa", "cba"])
+    with pytest.raises(TypeError):
+        gs.str.partition(["a"])
+    with pytest.raises(TypeError):
+        gs.str.rpartition(["a"])
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["koala", "fox", "chameleon"],
+        ["A,,B", "1,,5", "3,00,0"],
+        ["Linda van der Berg", "George Pitt-Rivers"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        [
+            "this is a regular sentence",
+            "https://docs.python.org/3/tutorial/index.html",
+            None,
+        ],
+    ],
+)
+@pytest.mark.parametrize("n", [-1, 2, 9])
+@pytest.mark.parametrize("expand", [True, False])
+def test_strings_rsplit(data, n, expand):
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(
+        ps.str.rsplit(n=n, expand=expand).reset_index(),
+        gs.str.rsplit(n=n, expand=expand).reset_index(),
+        check_index_type=False,
+    )
+    assert_eq(
+        ps.str.rsplit(",", n=n, expand=expand),
+        gs.str.rsplit(",", n=n, expand=expand),
+    )
+    assert_eq(
+        ps.str.rsplit("-", n=n, expand=expand),
+        gs.str.rsplit("-", n=n, expand=expand),
+    )
+
+
+@pytest.fixture(
+    params=[
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
+        ["koala", "fox", "chameleon"],
+        [
+            "1234567890",
+            "de",
+            "1.75",
+            "-34",
+            "+9.8",
+            "7¼",
+            "x³",
+            "2³",
+            "12⅝",
+            "",
+            "\t\r\n ",
+        ],
+        ["one", "one1", "1", ""],
+        ["A B", "1.5", "3,000"],
+        ["23", "³", "⅕", ""],
+        [" ", "\t\r\n ", ""],
+        ["leopard", "Golden Eagle", "SNAKE", ""],
+        [r"¯\_(ツ)_/¯", "(╯°□°)╯︵ ┻━┻", "┬─┬ノ( º _ ºノ)"],
+        ["a1", "A1", "a!", "A!", "!1", "aA"],
+        [
+            None,
+            "The quick bRoWn fox juMps over the laze DOG",
+            '123nr98nv9rev!$#INF4390v03n1243<>?}{:-"',
+            "accénted",
+        ],
+    ]
+)
+def data_char_types(request):
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "type_op",
+    [
+        "isdecimal",
+        "isalnum",
+        "isalpha",
+        "isdigit",
+        "isnumeric",
+        "isupper",
+        "islower",
+    ],
+)
+def test_string_char_types(type_op, data_char_types):
+    gs = cudf.Series(data_char_types)
+    ps = pd.Series(data_char_types)
+
+    assert_eq(getattr(gs.str, type_op)(), getattr(ps.str, type_op)())
+
+
+def test_string_filter_alphanum():
+    data = ["1234567890", "!@#$%^&*()", ",./<>?;:[]}{|+=", "abc DEF"]
+    expected = []
+    for st in data:
+        rs = ""
+        for c in st:
+            if str.isalnum(c):
+                rs = rs + c
+        expected.append(rs)
+
+    gs = cudf.Series(data)
+    assert_eq(gs.str.filter_alphanum(), cudf.Series(expected))
+
+    expected = []
+    for st in data:
+        rs = ""
+        for c in st:
+            if not str.isalnum(c):
+                rs = rs + c
+        expected.append(rs)
+    assert_eq(gs.str.filter_alphanum(keep=False), cudf.Series(expected))
+
+    expected = []
+    for st in data:
+        rs = ""
+        for c in st:
+            if str.isalnum(c):
+                rs = rs + c
+            else:
+                rs = rs + "*"
+        expected.append(rs)
+    assert_eq(gs.str.filter_alphanum("*"), cudf.Series(expected))
+
+    expected = []
+    for st in data:
+        rs = ""
+        for c in st:
+            if not str.isalnum(c):
+                rs = rs + c
+            else:
+                rs = rs + "*"
+        expected.append(rs)
+    assert_eq(gs.str.filter_alphanum("*", keep=False), cudf.Series(expected))
+
+    with pytest.raises(TypeError):
+        gs.str.filter_alphanum(["a"])
+
+
+@pytest.mark.parametrize(
+    "case_op",
+    [
+        "title",
+        "capitalize",
+        "lower",
+        "upper",
+        "swapcase",
+        "isdecimal",
+        "isalnum",
+        "isalpha",
+        "isdigit",
+        "isnumeric",
+        "isspace",
+    ],
+)
+def test_string_char_case(case_op, data_char_types):
+    gs = cudf.Series(data_char_types)
+    ps = pd.Series(data_char_types)
+    assert_eq(getattr(gs.str, case_op)(), getattr(ps.str, case_op)())
+
+
+def test_string_isempty(data_char_types):
+    gs = cudf.Series(data_char_types)
+    ps = pd.Series(data_char_types)
+    assert_eq(gs.str.isempty(), ps == "")
+
+
+@pytest.mark.parametrize(
+    "string",
+    [
+        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
+    ],
+)
+@pytest.mark.parametrize("index", [-100, -3, -1, 0, 1, 4, 50])
+def test_string_get(string, index):
+    pds = pd.Series(string)
+    gds = cudf.Series(string)
+
+    assert_eq(
+        pds.str.get(index).fillna(""),
+        gds.str.get(index).fillna(""),
+    )
+
+
+@pytest.mark.parametrize(
+    "string",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
+        ["koala", "fox", "chameleon"],
+    ],
+)
+@pytest.mark.parametrize("number", [-10, 0, 1, 3, 10])
+@pytest.mark.parametrize("diff", [0, 3])
+def test_string_slice_str(string, number, diff):
+    pds = pd.Series(string)
+    gds = cudf.Series(string)
+
+    assert_eq(pds.str.slice(start=number), gds.str.slice(start=number))
+    assert_eq(pds.str.slice(stop=number), gds.str.slice(stop=number))
+    assert_eq(pds.str.slice(), gds.str.slice())
+    assert_eq(
+        pds.str.slice(start=number, stop=number + diff),
+        gds.str.slice(start=number, stop=number + diff),
+    )
+    if diff != 0:
+        assert_eq(pds.str.slice(step=diff), gds.str.slice(step=diff))
+        assert_eq(
+            pds.str.slice(start=number, stop=number + diff, step=diff),
+            gds.str.slice(start=number, stop=number + diff, step=diff),
+        )
+
+
+def test_string_slice_from():
+    gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])
+    d_starts = cudf.Series([2, 3, 0, -1, -1], dtype=np.int32)
+    d_stops = cudf.Series([-1, -1, 0, -1, -1], dtype=np.int32)
+    got = gs.str.slice_from(starts=d_starts, stops=d_stops)
+    expected = cudf.Series(["llo world", "y accéntéd", "", None, ""])
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "string",
+    [
+        ["abc", "xyz", "a", "ab", "123", "097"],
+        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
+        ["koala", "fox", "chameleon"],
+    ],
+)
+@pytest.mark.parametrize("number", [0, 1, 10])
+@pytest.mark.parametrize("diff", [0, 3])
+@pytest.mark.parametrize("repl", ["2", "!!"])
+def test_string_slice_replace(string, number, diff, repl):
+    pds = pd.Series(string)
+    gds = cudf.Series(string)
+
+    assert_eq(
+        pds.str.slice_replace(start=number, repl=repl),
+        gds.str.slice_replace(start=number, repl=repl),
+        check_dtype=False,
+    )
+    assert_eq(
+        pds.str.slice_replace(stop=number, repl=repl),
+        gds.str.slice_replace(stop=number, repl=repl),
+    )
+    assert_eq(pds.str.slice_replace(), gds.str.slice_replace())
+    assert_eq(
+        pds.str.slice_replace(start=number, stop=number + diff),
+        gds.str.slice_replace(start=number, stop=number + diff),
+    )
+    assert_eq(
+        pds.str.slice_replace(start=number, stop=number + diff, repl=repl),
+        gds.str.slice_replace(start=number, stop=number + diff, repl=repl),
+        check_dtype=False,
+    )
+
+
+def test_string_slice_replace_fail():
+    gs = cudf.Series(["abc", "xyz", ""])
+    with pytest.raises(TypeError):
+        gs.str.slice_replace(0, 1, ["_"])
+
+
+def test_string_insert():
+    gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])
+
+    ps = pd.Series(["hello world", "holy accéntéd", "batman", None, ""])
+
+    assert_eq(gs.str.insert(0, ""), gs)
+    assert_eq(gs.str.insert(0, "+"), "+" + ps)
+    assert_eq(gs.str.insert(-1, "---"), ps + "---")
+    assert_eq(
+        gs.str.insert(5, "---"),
+        ps.str.slice(stop=5) + "---" + ps.str.slice(start=5),
+    )
+
+    with pytest.raises(TypeError):
+        gs.str.insert(0, ["+"])
+
+
+def test_string_slice():
+    df = cudf.DataFrame({"a": ["hello", "world"]})
+    pdf = pd.DataFrame({"a": ["hello", "world"]})
+    a_slice_got = df.a.str.slice(0, 2)
+    a_slice_expected = pdf.a.str.slice(0, 2)
+
+    assert isinstance(a_slice_got, cudf.Series)
+    assert_eq(a_slice_expected, a_slice_got)
+
+
+@pytest.mark.parametrize("pat", [None, "\\s+"])
+@pytest.mark.parametrize("regex", [False, True])
+@pytest.mark.parametrize("expand", [False, True])
+def test_string_split_all_empty(pat, regex, expand):
+    ps = pd.Series(["", "", "", ""], dtype="str")
+    gs = cudf.Series(["", "", "", ""], dtype="str")
+
+    expect = ps.str.split(pat=pat, expand=expand, regex=regex)
+    got = gs.str.split(pat=pat, expand=expand, regex=regex)
+
+    if isinstance(got, cudf.DataFrame):
+        assert_eq(expect, got, check_column_type=False)
+    else:
+        assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["a b", " c ", "   d", "e   ", "f"],
+        ["a-b", "-c-", "---d", "e---", "f"],
+        ["ab", "c", "d", "e", "f"],
+        [None, None, None, None, None],
+    ],
+)
+@pytest.mark.parametrize("pat", [None, " ", "\\-+", "\\s+"])
+@pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
+@pytest.mark.parametrize("expand", [True, False])
+def test_string_split_re(data, pat, n, expand):
+    ps = pd.Series(data, dtype="str")
+    gs = cudf.Series(data, dtype="str")
+
+    expect = ps.str.split(pat=pat, n=n, expand=expand, regex=True)
+    got = gs.str.split(pat=pat, n=n, expand=expand, regex=True)
+
+    assert_eq(expect, got)
+
+
+def test_string_lower(ps_gs):
+    ps, gs = ps_gs
+
+    expect = ps.str.lower()
+    got = gs.str.lower()
+
+    assert_eq(expect, got)
+
+
+def test_string_upper(ps_gs):
+    ps, gs = ps_gs
+
+    expect = ps.str.upper()
+    got = gs.str.upper()
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["a b", " c ", "   d", "e   ", "f"],
+        ["a-b", "-c-", "---d", "e---", "f"],
+        ["ab", "c", "d", "e", "f"],
+        [None, None, None, None, None],
+    ],
+)
+@pytest.mark.parametrize("pat", [None, " ", "-"])
+@pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
+@pytest.mark.parametrize("expand", [True, False])
+def test_string_split(data, pat, n, expand):
+    ps = pd.Series(data, dtype="str")
+    gs = cudf.Series(data, dtype="str")
+
+    expect = ps.str.split(pat=pat, n=n, expand=expand)
+    got = gs.str.split(pat=pat, n=n, expand=expand)
+
+    assert_eq(expect, got)
+
+
+# Pandas doesn't respect the `n` parameter so ignoring it in test parameters
+@pytest.mark.parametrize(
+    "pat,regex",
+    [("a", False), ("f", False), (r"[a-z]", True), (r"[A-Z]", True)],
+)
+@pytest.mark.parametrize("repl", ["qwerty", "", " "])
+@pytest.mark.parametrize("case,case_raise", [(None, 0), (True, 1), (False, 1)])
+@pytest.mark.parametrize("flags,flags_raise", [(0, 0), (re.U, 1)])
+def test_string_replace(
+    ps_gs, pat, repl, case, case_raise, flags, flags_raise, regex
+):
+    ps, gs = ps_gs
+
+    expectation = raise_builder([case_raise, flags_raise], NotImplementedError)
+
+    with expectation:
+        expect = ps.str.replace(pat, repl, case=case, flags=flags, regex=regex)
+        got = gs.str.replace(pat, repl, case=case, flags=flags, regex=regex)
+
+        assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("pat", ["A*", "F?H?"])
+def test_string_replace_zero_length(ps_gs, pat):
+    ps, gs = ps_gs
+
+    expect = ps.str.replace(pat, "_", regex=True)
+    got = gs.str.replace(pat, "_", regex=True)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "pat,regex",
+    [
+        ("a", False),
+        ("a", True),
+        ("f", False),
+        (r"[a-z]", True),
+        (r"[A-Z]", True),
+        ("hello", False),
+        ("FGHI", False),
+    ],
+)
+@pytest.mark.parametrize(
+    "flags,flags_raise",
+    [(0, 0), (re.MULTILINE | re.DOTALL, 0), (re.I, 1), (re.I | re.DOTALL, 1)],
+)
+@pytest.mark.parametrize("na,na_raise", [(np.nan, 0), (None, 1), ("", 1)])
+def test_string_contains(ps_gs, pat, regex, flags, flags_raise, na, na_raise):
+    ps, gs = ps_gs
+
+    expectation = does_not_raise()
+    if flags_raise or na_raise:
+        expectation = pytest.raises(NotImplementedError)
+
+    with expectation:
+        with expect_warning_if(
+            na == "" or (na is None and not (flags_raise or na_raise)),
+            match=(
+                "Allowing a non-bool 'na' in obj.str.contains is deprecated "
+                "and will raise in a future version."
+            ),
+        ):
+            expect = ps.str.contains(pat, flags=flags, na=na, regex=regex)
+            got = gs.str.contains(pat, flags=flags, na=na, regex=regex)
+        assert_eq(expect, got)
+
+
+def test_string_contains_case(ps_gs):
+    ps, gs = ps_gs
+    with pytest.raises(NotImplementedError):
+        gs.str.contains("A", case=False)
+    expected = ps.str.contains("A", regex=False, case=False)
+    got = gs.str.contains("A", regex=False, case=False)
+    assert_eq(expected, got)
+    got = gs.str.contains("a", regex=False, case=False)
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "pat,esc,expect",
+    [
+        ("abc", "", [True, False, False, False, False, False]),
+        ("b%", "/", [False, True, False, False, False, False]),
+        ("%b", ":", [False, True, False, False, False, False]),
+        ("%b%", "*", [True, True, False, False, False, False]),
+        ("___", "", [True, True, True, False, False, False]),
+        ("__/%", "/", [False, False, True, False, False, False]),
+        ("55/____", "/", [False, False, False, True, False, False]),
+        ("%:%%", ":", [False, False, True, False, False, False]),
+        ("55*_100", "*", [False, False, False, True, False, False]),
+        ("abc", "abc", [True, False, False, False, False, False]),
+    ],
+)
+def test_string_like(pat, esc, expect):
+    expectation = does_not_raise()
+    if len(esc) > 1:
+        expectation = pytest.raises(ValueError)
+
+    with expectation:
+        gs = cudf.Series(["abc", "bab", "99%", "55_100", "", "556100"])
+        got = gs.str.like(pat, esc)
+        expect = cudf.Series(expect)
+        assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "repeats",
+    [
+        2,
+        0,
+        -3,
+        [5, 4, 3, 2, 6],
+        [5, None, 3, 2, 6],
+        [0, 0, 0, 0, 0],
+        [-1, -2, -3, -4, -5],
+        [None, None, None, None, None],
+    ],
+)
+def test_string_repeat(data, repeats):
+    ps = pd.Series(["hello", "world", None, "", "!"])
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.str.repeat(repeats)
+    got = gs.str.repeat(repeats)
+
+    assert_eq(expect, got)
+
+
+def test_string_cat_str_error():
+    gs = cudf.Series(["a", "v", "s"])
+    # https://github.com/pandas-dev/pandas/issues/28277
+    # ability to pass StringMethods is being removed in future.
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "others must be Series, Index, DataFrame, np.ndarrary "
+            "or list-like (either containing only strings or "
+            "containing only objects of type Series/Index/"
+            "np.ndarray[1-dim])"
+        ),
+    ):
+        gs.str.cat(gs.str)
+
+
+@pytest.mark.parametrize("sep", ["", " ", ",", "|||"])
+def test_string_join(ps_gs, sep):
+    ps, gs = ps_gs
+
+    expect = ps.str.join(sep)
+    got = gs.str.join(sep)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("pat", [r"(a)", r"(f)", r"([a-z])", r"([A-Z])"])
+@pytest.mark.parametrize("expand", [True, False])
+@pytest.mark.parametrize(
+    "flags,flags_raise", [(0, 0), (re.M | re.S, 0), (re.I, 1)]
+)
+def test_string_extract(ps_gs, pat, expand, flags, flags_raise):
+    ps, gs = ps_gs
+    expectation = raise_builder([flags_raise], NotImplementedError)
+
+    with expectation:
+        expect = ps.str.extract(pat, flags=flags, expand=expand)
+        got = gs.str.extract(pat, flags=flags, expand=expand)
+
+        assert_eq(expect, got)
+
+
+def test_string_invalid_regex():
+    gs = cudf.Series(["a"])
+    with pytest.raises(RuntimeError):
+        gs.str.extract(r"{\}")
+
+
+def _cat_convert_seq_to_cudf(others):
+    pd_others = others
+    if isinstance(pd_others, (pd.Series, pd.Index)):
+        gd_others = cudf.from_pandas(pd_others)
+    else:
+        gd_others = pd_others
+    if isinstance(gd_others, (list, tuple)):
+        temp_tuple = [
+            cudf.from_pandas(elem)
+            if isinstance(elem, (pd.Series, pd.Index))
+            else elem
+            for elem in gd_others
+        ]
+
+        if isinstance(gd_others, tuple):
+            gd_others = tuple(temp_tuple)
+        else:
+            gd_others = list(temp_tuple)
+    return gd_others
+
+
+@pytest.mark.parametrize(
+    "data",
+    [["a", None, "c", None, "e"], ["a", "b", "c", "d", "a"]],
+)
+@pytest.mark.parametrize(
+    "others",
+    [
+        None,
+        ["f", "g", "h", "i", "j"],
+        pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
+        pd.Index(["f", "g", "h", "i", "j"]),
+        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+        [
+            np.array(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ],
+        [
+            pd.Series(["f", "g", "h", "i", "j"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+        ],
+        pytest.param(
+            [
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "g", "h", "i", "j"]),
+            ],
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/5862"
+            ),
+        ),
+        pytest.param(
+            (
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Series(["f", "g", "h", "i", "j"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["1", "2", "3", "4", "5"]),
+                np.array(["f", "a", "b", "f", "a"]),
+                pd.Index(["f", "g", "h", "i", "j"]),
+            ),
+            marks=pytest.mark.xfail(
+                reason="https://github.com/pandas-dev/pandas/issues/33436"
+            ),
+        ),
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["a", "b", "c", "d", "e"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["a", "b", "c", "d", "e"],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=[10, 11, 12, 13, 14],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=[10, 15, 11, 13, 14],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["1", "2", "3", "4", "5"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["1", "2", "3", "4", "5"],
+            ),
+        ],
+    ],
+)
+@pytest.mark.parametrize("sep", [None, "", " ", ",", "|||"])
+@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
+@pytest.mark.parametrize("name", [None, "This is the name"])
+def test_string_index_duplicate_str_cat(data, others, sep, na_rep, name):
+    pi, gi = pd.Index(data, name=name), cudf.Index(data, name=name)
+
+    pd_others = others
+    gd_others = _cat_convert_seq_to_cudf(others)
+
+    got = gi.str.cat(others=gd_others, sep=sep, na_rep=na_rep)
+    expect = pi.str.cat(others=pd_others, sep=sep, na_rep=na_rep)
+
+    # TODO: Remove got.sort_values call once we have `join` param support
+    # in `.str.cat`
+    # https://github.com/rapidsai/cudf/issues/5862
+
+    assert_eq(
+        expect.sort_values() if not isinstance(expect, str) else expect,
+        got.sort_values() if not isinstance(got, str) else got,
+        exact=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "others",
+    [
+        None,
+        ["f", "g", "h", "i", "j"],
+        ("f", "g", "h", "i", "j"),
+        pd.Series(["f", "g", "h", "i", "j"]),
+        pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
+        pd.Index(["f", "g", "h", "i", "j"]),
+        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+        (
+            np.array(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ),
+        [
+            np.array(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ],
+        [
+            pd.Series(["f", "g", "h", "i", "j"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+        ],
+        (
+            pd.Series(["f", "g", "h", "i", "j"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+        ),
+        [
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ],
+        (
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ),
+        (
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["1", "2", "3", "4", "5"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+        ),
+        [
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+        ],
+        [
+            pd.Series(["hello", "world", "abc", "xyz", "pqr"]),
+            pd.Series(["abc", "xyz", "hello", "pqr", "world"]),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=[10, 11, 12, 13, 14],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=[10, 15, 11, 13, 14],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["10", "11", "12", "13", "14"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["10", "11", "12", "13", "14"],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["10", "11", "12", "13", "14"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["10", "15", "11", "13", "14"],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["1", "2", "3", "4", "5"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["10", "11", "12", "13", "14"],
+            ),
+        ],
+    ],
+)
+@pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
+@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
+@pytest.mark.parametrize(
+    "index",
+    [["1", "2", "3", "4", "5"]],
+)
+def test_string_cat(ps_gs, others, sep, na_rep, index):
+    ps, gs = ps_gs
+
+    pd_others = others
+    gd_others = _cat_convert_seq_to_cudf(others)
+
+    expect = ps.str.cat(others=pd_others, sep=sep, na_rep=na_rep)
+    got = gs.str.cat(others=gd_others, sep=sep, na_rep=na_rep)
+    assert_eq(expect, got)
+
+    ps.index = index
+    gs.index = index
+
+    expect = ps.str.cat(others=ps.index, sep=sep, na_rep=na_rep)
+    got = gs.str.cat(others=gs.index, sep=sep, na_rep=na_rep)
+
+    assert_eq(expect, got)
+
+    expect = ps.str.cat(others=[ps.index, ps.index], sep=sep, na_rep=na_rep)
+    got = gs.str.cat(others=[gs.index, gs.index], sep=sep, na_rep=na_rep)
+
+    assert_eq(expect, got)
+
+    expect = ps.str.cat(others=(ps.index, ps.index), sep=sep, na_rep=na_rep)
+    got = gs.str.cat(others=(gs.index, gs.index), sep=sep, na_rep=na_rep)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["1", "2", "3", "4", "5"],
+        ["a", "b", "c", "d", "e"],
+        ["a", "b", "c", None, "e"],
+    ],
+)
+@pytest.mark.parametrize(
+    "others",
+    [
+        None,
+        ["f", "g", "h", "i", "j"],
+        ("f", "g", "h", "i", "j"),
+        pd.Series(["f", "g", "h", "i", "j"]),
+        pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
+        pd.Index(["f", "g", "h", "i", "j"]),
+        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
+        (
+            np.array(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ),
+        [
+            np.array(["f", "g", "h", "i", "j"]),
+            np.array(["f", "g", "h", "i", "j"]),
+        ],
+        [
+            pd.Series(["f", "g", "h", "i", "j"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+        ],
+        (
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["1", "2", "3", "4", "5"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+        ),
+        [
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Series(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+            np.array(["f", "a", "b", "f", "a"]),
+            pd.Index(["f", "g", "h", "i", "j"]),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["a", "b", "c", "d", "e"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["a", "b", "c", "d", "e"],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=[10, 11, 12, 13, 14],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=[10, 15, 11, 13, 14],
+            ),
+        ],
+        [
+            pd.Series(
+                ["hello", "world", "abc", "xyz", "pqr"],
+                index=["1", "2", "3", "4", "5"],
+            ),
+            pd.Series(
+                ["abc", "xyz", "hello", "pqr", "world"],
+                index=["1", "2", "3", "4", "5"],
+            ),
+        ],
+    ],
+)
+@pytest.mark.parametrize("sep", [None, "", " ", "|", "|||"])
+@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
+@pytest.mark.parametrize("name", [None, "This is the name"])
+def test_string_index_str_cat(data, others, sep, na_rep, name):
+    pi, gi = pd.Index(data, name=name), cudf.Index(data, name=name)
+
+    pd_others = others
+    gd_others = _cat_convert_seq_to_cudf(others)
+
+    expect = pi.str.cat(others=pd_others, sep=sep, na_rep=na_rep)
+    got = gi.str.cat(others=gd_others, sep=sep, na_rep=na_rep)
+
+    assert_eq(
+        expect,
+        got,
+        exact=False,
+    )
+
+
+def test_string_len(ps_gs):
+    ps, gs = ps_gs
+
+    expect = ps.str.len()
+    got = gs.str.len()
+
+    # Can't handle nulls in Pandas so use PyArrow instead
+    # Pandas will return as a float64 so need to typecast to int32
+    expect = pa.array(expect, from_pandas=True).cast(pa.int32())
+    got = got.to_arrow()
+    assert pa.Array.equals(expect, got)
+
+
+def test_string_concat():
+    data1 = ["a", "b", "c", "d", "e"]
+    data2 = ["f", "g", "h", "i", "j"]
+    index = [1, 2, 3, 4, 5]
+
+    ps1 = pd.Series(data1, index=index)
+    ps2 = pd.Series(data2, index=index)
+    gs1 = cudf.Series(data1, index=index)
+    gs2 = cudf.Series(data2, index=index)
+
+    expect = pd.concat([ps1, ps2])
+    got = concat([gs1, gs2])
+
+    assert_eq(expect, got)
+
+    expect = ps1.str.cat(ps2)
+    got = gs1.str.cat(gs2)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("name", [None, "new name", 123])
+def test_string_misc_name(ps_gs, name):
+    ps, gs = ps_gs
+    ps.name = name
+    gs.name = name
+
+    expect = ps.str.slice(0, 1)
+    got = gs.str.slice(0, 1)
+
+    assert_eq(expect, got)
+    assert_eq(ps + ps, gs + gs)
+    assert_eq(ps + "RAPIDS", gs + "RAPIDS")
+    assert_eq("RAPIDS" + ps, "RAPIDS" + gs)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 7a66b2bbfcf..4b9e47ed275 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1,9 +1,5 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
-import json
-import re
-import urllib.parse
-from contextlib import nullcontext as does_not_raise
 from decimal import Decimal
 from sys import getsizeof
 
@@ -16,7 +12,6 @@
 import rmm
 
 import cudf
-from cudf import concat
 from cudf.core.buffer import as_buffer
 from cudf.core.column.string import StringColumn
 from cudf.core.index import Index
@@ -25,18 +20,10 @@
     DATETIME_TYPES,
     NUMERIC_TYPES,
     assert_exceptions_equal,
-    expect_warning_if,
 )
 from cudf.utils import dtypes as dtypeutils
 
 
-def raise_builder(flags, exceptions):
-    if any(flags):
-        return pytest.raises(exceptions)
-    else:
-        return does_not_raise()
-
-
 @pytest.fixture(
     params=[
         ["AbC", "de", "FGHI", "j", "kLm"],
@@ -347,27 +334,6 @@ def test_string_empty_numeric_astype(dtype):
     assert_eq(expect, got)
 
 
-def test_string_concat():
-    data1 = ["a", "b", "c", "d", "e"]
-    data2 = ["f", "g", "h", "i", "j"]
-    index = [1, 2, 3, 4, 5]
-
-    ps1 = pd.Series(data1, index=index)
-    ps2 = pd.Series(data2, index=index)
-    gs1 = cudf.Series(data1, index=index)
-    gs2 = cudf.Series(data2, index=index)
-
-    expect = pd.concat([ps1, ps2])
-    got = concat([gs1, gs2])
-
-    assert_eq(expect, got)
-
-    expect = ps1.str.cat(ps2)
-    got = gs1.str.cat(gs2)
-
-    assert_eq(expect, got)
-
-
 @pytest.mark.parametrize("ascending", [True, False])
 def test_string_sort(ps_gs, ascending):
     ps, gs = ps_gs
@@ -378,635 +344,6 @@ def test_string_sort(ps_gs, ascending):
     assert_eq(expect, got)
 
 
-def test_string_len(ps_gs):
-    ps, gs = ps_gs
-
-    expect = ps.str.len()
-    got = gs.str.len()
-
-    # Can't handle nulls in Pandas so use PyArrow instead
-    # Pandas will return as a float64 so need to typecast to int32
-    expect = pa.array(expect, from_pandas=True).cast(pa.int32())
-    got = got.to_arrow()
-    assert pa.Array.equals(expect, got)
-
-
-def _cat_convert_seq_to_cudf(others):
-    pd_others = others
-    if isinstance(pd_others, (pd.Series, pd.Index)):
-        gd_others = cudf.from_pandas(pd_others)
-    else:
-        gd_others = pd_others
-    if isinstance(gd_others, (list, tuple)):
-        temp_tuple = [
-            cudf.from_pandas(elem)
-            if isinstance(elem, (pd.Series, pd.Index))
-            else elem
-            for elem in gd_others
-        ]
-
-        if isinstance(gd_others, tuple):
-            gd_others = tuple(temp_tuple)
-        else:
-            gd_others = list(temp_tuple)
-    return gd_others
-
-
-@pytest.mark.parametrize(
-    "others",
-    [
-        None,
-        ["f", "g", "h", "i", "j"],
-        ("f", "g", "h", "i", "j"),
-        pd.Series(["f", "g", "h", "i", "j"]),
-        pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pd.Index(["f", "g", "h", "i", "j"]),
-        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
-        (
-            np.array(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ),
-        [
-            np.array(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ],
-        [
-            pd.Series(["f", "g", "h", "i", "j"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-        ],
-        (
-            pd.Series(["f", "g", "h", "i", "j"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-        ),
-        [
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ],
-        (
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ),
-        (
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["1", "2", "3", "4", "5"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-        ),
-        [
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-        ],
-        [
-            pd.Series(["hello", "world", "abc", "xyz", "pqr"]),
-            pd.Series(["abc", "xyz", "hello", "pqr", "world"]),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=[10, 11, 12, 13, 14],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=[10, 15, 11, 13, 14],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["10", "11", "12", "13", "14"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["10", "11", "12", "13", "14"],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["10", "11", "12", "13", "14"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["10", "15", "11", "13", "14"],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["1", "2", "3", "4", "5"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["10", "11", "12", "13", "14"],
-            ),
-        ],
-    ],
-)
-@pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
-@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
-@pytest.mark.parametrize(
-    "index",
-    [["1", "2", "3", "4", "5"]],
-)
-def test_string_cat(ps_gs, others, sep, na_rep, index):
-    ps, gs = ps_gs
-
-    pd_others = others
-    gd_others = _cat_convert_seq_to_cudf(others)
-
-    expect = ps.str.cat(others=pd_others, sep=sep, na_rep=na_rep)
-    got = gs.str.cat(others=gd_others, sep=sep, na_rep=na_rep)
-    assert_eq(expect, got)
-
-    ps.index = index
-    gs.index = index
-
-    expect = ps.str.cat(others=ps.index, sep=sep, na_rep=na_rep)
-    got = gs.str.cat(others=gs.index, sep=sep, na_rep=na_rep)
-
-    assert_eq(expect, got)
-
-    expect = ps.str.cat(others=[ps.index, ps.index], sep=sep, na_rep=na_rep)
-    got = gs.str.cat(others=[gs.index, gs.index], sep=sep, na_rep=na_rep)
-
-    assert_eq(expect, got)
-
-    expect = ps.str.cat(others=(ps.index, ps.index), sep=sep, na_rep=na_rep)
-    got = gs.str.cat(others=(gs.index, gs.index), sep=sep, na_rep=na_rep)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["1", "2", "3", "4", "5"],
-        ["a", "b", "c", "d", "e"],
-        ["a", "b", "c", None, "e"],
-    ],
-)
-@pytest.mark.parametrize(
-    "others",
-    [
-        None,
-        ["f", "g", "h", "i", "j"],
-        ("f", "g", "h", "i", "j"),
-        pd.Series(["f", "g", "h", "i", "j"]),
-        pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pd.Index(["f", "g", "h", "i", "j"]),
-        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
-        (
-            np.array(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ),
-        [
-            np.array(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ],
-        [
-            pd.Series(["f", "g", "h", "i", "j"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-        ],
-        (
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["1", "2", "3", "4", "5"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-        ),
-        [
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-            np.array(["f", "a", "b", "f", "a"]),
-            pd.Index(["f", "g", "h", "i", "j"]),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["a", "b", "c", "d", "e"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["a", "b", "c", "d", "e"],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=[10, 11, 12, 13, 14],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=[10, 15, 11, 13, 14],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["1", "2", "3", "4", "5"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["1", "2", "3", "4", "5"],
-            ),
-        ],
-    ],
-)
-@pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
-@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
-@pytest.mark.parametrize("name", [None, "This is the name"])
-def test_string_index_str_cat(data, others, sep, na_rep, name):
-    pi, gi = pd.Index(data, name=name), cudf.Index(data, name=name)
-
-    pd_others = others
-    gd_others = _cat_convert_seq_to_cudf(others)
-
-    expect = pi.str.cat(others=pd_others, sep=sep, na_rep=na_rep)
-    got = gi.str.cat(others=gd_others, sep=sep, na_rep=na_rep)
-
-    assert_eq(
-        expect,
-        got,
-        exact=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [["a", None, "c", None, "e"], ["a", "b", "c", "d", "a"]],
-)
-@pytest.mark.parametrize(
-    "others",
-    [
-        None,
-        ["f", "g", "h", "i", "j"],
-        pd.Series(["AbC", "de", "FGHI", "j", "kLm"]),
-        pd.Index(["f", "g", "h", "i", "j"]),
-        pd.Index(["AbC", "de", "FGHI", "j", "kLm"]),
-        [
-            np.array(["f", "g", "h", "i", "j"]),
-            np.array(["f", "g", "h", "i", "j"]),
-        ],
-        [
-            pd.Series(["f", "g", "h", "i", "j"]),
-            pd.Series(["f", "g", "h", "i", "j"]),
-        ],
-        pytest.param(
-            [
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "g", "h", "i", "j"]),
-            ],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/5862"
-            ),
-        ),
-        pytest.param(
-            (
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Series(["f", "g", "h", "i", "j"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["1", "2", "3", "4", "5"]),
-                np.array(["f", "a", "b", "f", "a"]),
-                pd.Index(["f", "g", "h", "i", "j"]),
-            ),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/33436"
-            ),
-        ),
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["a", "b", "c", "d", "e"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["a", "b", "c", "d", "e"],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=[10, 11, 12, 13, 14],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=[10, 15, 11, 13, 14],
-            ),
-        ],
-        [
-            pd.Series(
-                ["hello", "world", "abc", "xyz", "pqr"],
-                index=["1", "2", "3", "4", "5"],
-            ),
-            pd.Series(
-                ["abc", "xyz", "hello", "pqr", "world"],
-                index=["1", "2", "3", "4", "5"],
-            ),
-        ],
-    ],
-)
-@pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
-@pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
-@pytest.mark.parametrize("name", [None, "This is the name"])
-def test_string_index_duplicate_str_cat(data, others, sep, na_rep, name):
-    pi, gi = pd.Index(data, name=name), cudf.Index(data, name=name)
-
-    pd_others = others
-    gd_others = _cat_convert_seq_to_cudf(others)
-
-    got = gi.str.cat(others=gd_others, sep=sep, na_rep=na_rep)
-    expect = pi.str.cat(others=pd_others, sep=sep, na_rep=na_rep)
-
-    # TODO: Remove got.sort_values call once we have `join` param support
-    # in `.str.cat`
-    # https://github.com/rapidsai/cudf/issues/5862
-
-    assert_eq(
-        expect.sort_values() if not isinstance(expect, str) else expect,
-        got.sort_values() if not isinstance(got, str) else got,
-        exact=False,
-    )
-
-
-def test_string_cat_str_error():
-    gs = cudf.Series(["a", "v", "s"])
-    # https://github.com/pandas-dev/pandas/issues/28277
-    # ability to pass StringMethods is being removed in future.
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "others must be Series, Index, DataFrame, np.ndarrary "
-            "or list-like (either containing only strings or "
-            "containing only objects of type Series/Index/"
-            "np.ndarray[1-dim])"
-        ),
-    ):
-        gs.str.cat(gs.str)
-
-
-@pytest.mark.parametrize("sep", ["", " ", "|", ",", "|||"])
-def test_string_join(ps_gs, sep):
-    ps, gs = ps_gs
-
-    expect = ps.str.join(sep)
-    got = gs.str.join(sep)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("pat", [r"(a)", r"(f)", r"([a-z])", r"([A-Z])"])
-@pytest.mark.parametrize("expand", [True, False])
-@pytest.mark.parametrize(
-    "flags,flags_raise", [(0, 0), (re.M | re.S, 0), (re.I, 1)]
-)
-def test_string_extract(ps_gs, pat, expand, flags, flags_raise):
-    ps, gs = ps_gs
-    expectation = raise_builder([flags_raise], NotImplementedError)
-
-    with expectation:
-        expect = ps.str.extract(pat, flags=flags, expand=expand)
-        got = gs.str.extract(pat, flags=flags, expand=expand)
-
-        assert_eq(expect, got)
-
-
-def test_string_invalid_regex():
-    gs = cudf.Series(["a"])
-    with pytest.raises(RuntimeError):
-        gs.str.extract(r"{\}")
-
-
-@pytest.mark.parametrize(
-    "pat,regex",
-    [
-        ("a", False),
-        ("a", True),
-        ("f", False),
-        (r"[a-z]", True),
-        (r"[A-Z]", True),
-        ("hello", False),
-        ("FGHI", False),
-    ],
-)
-@pytest.mark.parametrize(
-    "flags,flags_raise",
-    [(0, 0), (re.MULTILINE | re.DOTALL, 0), (re.I, 1), (re.I | re.DOTALL, 1)],
-)
-@pytest.mark.parametrize("na,na_raise", [(np.nan, 0), (None, 1), ("", 1)])
-def test_string_contains(ps_gs, pat, regex, flags, flags_raise, na, na_raise):
-    ps, gs = ps_gs
-
-    expectation = does_not_raise()
-    if flags_raise or na_raise:
-        expectation = pytest.raises(NotImplementedError)
-
-    with expectation:
-        with expect_warning_if(
-            na == "" or (na is None and not (flags_raise or na_raise)),
-            match=(
-                "Allowing a non-bool 'na' in obj.str.contains is deprecated "
-                "and will raise in a future version."
-            ),
-        ):
-            expect = ps.str.contains(pat, flags=flags, na=na, regex=regex)
-            got = gs.str.contains(pat, flags=flags, na=na, regex=regex)
-        assert_eq(expect, got)
-
-
-def test_string_contains_case(ps_gs):
-    ps, gs = ps_gs
-    with pytest.raises(NotImplementedError):
-        gs.str.contains("A", case=False)
-    expected = ps.str.contains("A", regex=False, case=False)
-    got = gs.str.contains("A", regex=False, case=False)
-    assert_eq(expected, got)
-    got = gs.str.contains("a", regex=False, case=False)
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "pat,esc,expect",
-    [
-        ("abc", "", [True, False, False, False, False, False]),
-        ("b%", "/", [False, True, False, False, False, False]),
-        ("%b", ":", [False, True, False, False, False, False]),
-        ("%b%", "*", [True, True, False, False, False, False]),
-        ("___", "", [True, True, True, False, False, False]),
-        ("__/%", "/", [False, False, True, False, False, False]),
-        ("55/____", "/", [False, False, False, True, False, False]),
-        ("%:%%", ":", [False, False, True, False, False, False]),
-        ("55*_100", "*", [False, False, False, True, False, False]),
-        ("abc", "abc", [True, False, False, False, False, False]),
-    ],
-)
-def test_string_like(pat, esc, expect):
-    expectation = does_not_raise()
-    if len(esc) > 1:
-        expectation = pytest.raises(ValueError)
-
-    with expectation:
-        gs = cudf.Series(["abc", "bab", "99%", "55_100", "", "556100"])
-        got = gs.str.like(pat, esc)
-        expect = cudf.Series(expect)
-        assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [["hello", "world", None, "", "!"]],
-)
-@pytest.mark.parametrize(
-    "repeats",
-    [
-        2,
-        0,
-        -3,
-        [5, 4, 3, 2, 6],
-        [5, None, 3, 2, 6],
-        [0, 0, 0, 0, 0],
-        [-1, -2, -3, -4, -5],
-        [None, None, None, None, None],
-    ],
-)
-def test_string_repeat(data, repeats):
-    ps = pd.Series(data)
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.str.repeat(repeats)
-    got = gs.str.repeat(repeats)
-
-    assert_eq(expect, got)
-
-
-# Pandas doesn't respect the `n` parameter so ignoring it in test parameters
-@pytest.mark.parametrize(
-    "pat,regex",
-    [("a", False), ("f", False), (r"[a-z]", True), (r"[A-Z]", True)],
-)
-@pytest.mark.parametrize("repl", ["qwerty", "", " "])
-@pytest.mark.parametrize("case,case_raise", [(None, 0), (True, 1), (False, 1)])
-@pytest.mark.parametrize("flags,flags_raise", [(0, 0), (re.U, 1)])
-def test_string_replace(
-    ps_gs, pat, repl, case, case_raise, flags, flags_raise, regex
-):
-    ps, gs = ps_gs
-
-    expectation = raise_builder([case_raise, flags_raise], NotImplementedError)
-
-    with expectation:
-        expect = ps.str.replace(pat, repl, case=case, flags=flags, regex=regex)
-        got = gs.str.replace(pat, repl, case=case, flags=flags, regex=regex)
-
-        assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("pat", ["A*", "F?H?"])
-def test_string_replace_zero_length(ps_gs, pat):
-    ps, gs = ps_gs
-
-    expect = ps.str.replace(pat, "_", regex=True)
-    got = gs.str.replace(pat, "_", regex=True)
-
-    assert_eq(expect, got)
-
-
-def test_string_lower(ps_gs):
-    ps, gs = ps_gs
-
-    expect = ps.str.lower()
-    got = gs.str.lower()
-
-    assert_eq(expect, got)
-
-
-def test_string_upper(ps_gs):
-    ps, gs = ps_gs
-
-    expect = ps.str.upper()
-    got = gs.str.upper()
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["a b", " c ", "   d", "e   ", "f"],
-        ["a-b", "-c-", "---d", "e---", "f"],
-        ["ab", "c", "d", "e", "f"],
-        [None, None, None, None, None],
-    ],
-)
-@pytest.mark.parametrize("pat", [None, " ", "-"])
-@pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False])
-def test_string_split(data, pat, n, expand):
-    ps = pd.Series(data, dtype="str")
-    gs = cudf.Series(data, dtype="str")
-
-    expect = ps.str.split(pat=pat, n=n, expand=expand)
-    got = gs.str.split(pat=pat, n=n, expand=expand)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["a b", " c ", "   d", "e   ", "f"],
-        ["a-b", "-c-", "---d", "e---", "f"],
-        ["ab", "c", "d", "e", "f"],
-        [None, None, None, None, None],
-    ],
-)
-@pytest.mark.parametrize("pat", [None, " ", "\\-+", "\\s+"])
-@pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False])
-def test_string_split_re(data, pat, n, expand):
-    ps = pd.Series(data, dtype="str")
-    gs = cudf.Series(data, dtype="str")
-
-    expect = ps.str.split(pat=pat, n=n, expand=expand, regex=True)
-    got = gs.str.split(pat=pat, n=n, expand=expand, regex=True)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("pat", [None, "\\s+"])
-@pytest.mark.parametrize("regex", [False, True])
-@pytest.mark.parametrize("expand", [False, True])
-def test_string_split_all_empty(pat, regex, expand):
-    ps = pd.Series(["", "", "", ""], dtype="str")
-    gs = cudf.Series(["", "", "", ""], dtype="str")
-
-    expect = ps.str.split(pat=pat, expand=expand, regex=regex)
-    got = gs.str.split(pat=pat, expand=expand, regex=regex)
-
-    if isinstance(got, cudf.DataFrame):
-        assert_eq(expect, got, check_column_type=False)
-    else:
-        assert_eq(expect, got)
-
-
 @pytest.mark.parametrize(
     "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
 )
@@ -1134,16 +471,6 @@ def test_string_unique(item):
     assert_eq(pres, gres)
 
 
-def test_string_slice():
-    df = cudf.DataFrame({"a": ["hello", "world"]})
-    pdf = pd.DataFrame({"a": ["hello", "world"]})
-    a_slice_got = df.a.str.slice(0, 2)
-    a_slice_expected = pdf.a.str.slice(0, 2)
-
-    assert isinstance(a_slice_got, cudf.Series)
-    assert_eq(a_slice_expected, a_slice_got)
-
-
 def test_string_equality():
     data1 = ["b", "c", "d", "a", "c"]
     data2 = ["a", None, "c", "a", "c"]
@@ -1193,21 +520,6 @@ def test_string_binary_op_add(lhs, rhs):
     assert_eq(pds, gds)
 
 
-@pytest.mark.parametrize("name", [None, "new name", 123])
-def test_string_misc_name(ps_gs, name):
-    ps, gs = ps_gs
-    ps.name = name
-    gs.name = name
-
-    expect = ps.str.slice(0, 1)
-    got = gs.str.slice(0, 1)
-
-    assert_eq(expect, got)
-    assert_eq(ps + ps, gs + gs)
-    assert_eq(ps + "RAPIDS", gs + "RAPIDS")
-    assert_eq("RAPIDS" + ps, "RAPIDS" + gs)
-
-
 def test_string_no_children_properties():
     empty_col = StringColumn(
         as_buffer(rmm.DeviceBuffer(size=0)),
@@ -1224,1378 +536,41 @@ def test_string_no_children_properties():
     assert getsizeof(empty_col) >= 0  # Accounts for Python GC overhead
 
 
-@pytest.mark.parametrize(
-    "string",
-    [
-        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
-    ],
-)
-@pytest.mark.parametrize(
-    "index", [-100, -5, -2, -6, -1, 0, 1, 2, 3, 9, 10, 100]
-)
-def test_string_get(string, index):
-    pds = pd.Series(string)
-    gds = cudf.Series(string)
+def test_string_table_view_creation():
+    data = ["hi"] * 25 + [None] * 2027
+    psr = pd.Series(data)
+    gsr = cudf.Series.from_pandas(psr)
 
-    assert_eq(
-        pds.str.get(index).fillna(""),
-        gds.str.get(index).fillna(""),
-    )
+    expect = psr[:1]
+    got = gsr[:1]
+
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize(
-    "string",
+    "data,dtype",
     [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
-        ["koala", "fox", "chameleon"],
+        (["0.1", "10.2", "10.876"], "float"),
+        (["-0.1", "10.2", "+10.876"], "float"),
+        (["1", "10.2", "10.876"], "float32"),
+        (["+123", "6344556789", "0"], "int"),
+        (["+123", "6344556789", "0"], "uint64"),
+        (["+123", "6344556789", "0"], "float"),
+        (["0.1", "-10.2", "10.876", None], "float"),
     ],
 )
-@pytest.mark.parametrize(
-    "number",
-    [-10, 0, 1, 3, 10],
-)
-@pytest.mark.parametrize(
-    "diff",
-    [0, 2, 5, 9],
-)
-def test_string_slice_str(string, number, diff):
-    pds = pd.Series(string)
-    gds = cudf.Series(string)
-
-    assert_eq(pds.str.slice(start=number), gds.str.slice(start=number))
-    assert_eq(pds.str.slice(stop=number), gds.str.slice(stop=number))
-    assert_eq(pds.str.slice(), gds.str.slice())
-    assert_eq(
-        pds.str.slice(start=number, stop=number + diff),
-        gds.str.slice(start=number, stop=number + diff),
-    )
-    if diff != 0:
-        assert_eq(pds.str.slice(step=diff), gds.str.slice(step=diff))
-        assert_eq(
-            pds.str.slice(start=number, stop=number + diff, step=diff),
-            gds.str.slice(start=number, stop=number + diff, step=diff),
-        )
-
+@pytest.mark.parametrize("obj_type", [None, "str", "category"])
+def test_string_typecast(data, obj_type, dtype):
+    psr = pd.Series(data, dtype=obj_type)
+    gsr = cudf.Series(data, dtype=obj_type)
 
-def test_string_slice_from():
-    gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])
-    d_starts = cudf.Series([2, 3, 0, -1, -1], dtype=np.int32)
-    d_stops = cudf.Series([-1, -1, 0, -1, -1], dtype=np.int32)
-    got = gs.str.slice_from(starts=d_starts, stops=d_stops)
-    expected = cudf.Series(["llo world", "y accéntéd", "", None, ""])
-    assert_eq(got, expected)
+    expect = psr.astype(dtype=dtype)
+    actual = gsr.astype(dtype=dtype)
+    assert_eq(expect, actual)
 
 
 @pytest.mark.parametrize(
-    "string",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
-        ["koala", "fox", "chameleon"],
-    ],
-)
-@pytest.mark.parametrize("number", [0, 1, 10])
-@pytest.mark.parametrize("diff", [0, 2, 9])
-@pytest.mark.parametrize("repr", ["2", "!!"])
-def test_string_slice_replace(string, number, diff, repr):
-    pds = pd.Series(string)
-    gds = cudf.Series(string)
-
-    assert_eq(
-        pds.str.slice_replace(start=number, repl=repr),
-        gds.str.slice_replace(start=number, repl=repr),
-        check_dtype=False,
-    )
-    assert_eq(
-        pds.str.slice_replace(stop=number, repl=repr),
-        gds.str.slice_replace(stop=number, repl=repr),
-    )
-    assert_eq(pds.str.slice_replace(), gds.str.slice_replace())
-    assert_eq(
-        pds.str.slice_replace(start=number, stop=number + diff),
-        gds.str.slice_replace(start=number, stop=number + diff),
-    )
-    assert_eq(
-        pds.str.slice_replace(start=number, stop=number + diff, repl=repr),
-        gds.str.slice_replace(start=number, stop=number + diff, repl=repr),
-        check_dtype=False,
-    )
-
-
-def test_string_slice_replace_fail():
-    gs = cudf.Series(["abc", "xyz", ""])
-    with pytest.raises(TypeError):
-        gs.str.slice_replace(0, 1, ["_"])
-
-
-def test_string_insert():
-    gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])
-
-    ps = pd.Series(["hello world", "holy accéntéd", "batman", None, ""])
-
-    assert_eq(gs.str.insert(0, ""), gs)
-    assert_eq(gs.str.insert(0, "+"), "+" + ps)
-    assert_eq(gs.str.insert(-1, "---"), ps + "---")
-    assert_eq(
-        gs.str.insert(5, "---"),
-        ps.str.slice(stop=5) + "---" + ps.str.slice(start=5),
-    )
-
-    with pytest.raises(TypeError):
-        gs.str.insert(0, ["+"])
-
-
-@pytest.fixture(
-    params=[
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""],
-        ["koala", "fox", "chameleon"],
-        [
-            "1234567890",
-            "de",
-            "1.75",
-            "-34",
-            "+9.8",
-            "7¼",
-            "x³",
-            "2³",
-            "12⅝",
-            "",
-            "\t\r\n ",
-        ],
-        ["one", "one1", "1", ""],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["leopard", "Golden Eagle", "SNAKE", ""],
-        [r"¯\_(ツ)_/¯", "(╯°□°)╯︵ ┻━┻", "┬─┬ノ( º _ ºノ)"],
-        ["a1", "A1", "a!", "A!", "!1", "aA"],
-        [
-            None,
-            "The quick bRoWn fox juMps over the laze DOG",
-            '123nr98nv9rev!$#INF4390v03n1243<>?}{:-"',
-            "accénted",
-        ],
-    ]
-)
-def data_char_types(request):
-    return request.param
-
-
-@pytest.mark.parametrize(
-    "type_op",
-    [
-        "isdecimal",
-        "isalnum",
-        "isalpha",
-        "isdigit",
-        "isnumeric",
-        "isupper",
-        "islower",
-    ],
-)
-def test_string_char_types(type_op, data_char_types):
-    gs = cudf.Series(data_char_types)
-    ps = pd.Series(data_char_types)
-
-    assert_eq(getattr(gs.str, type_op)(), getattr(ps.str, type_op)())
-
-
-def test_string_filter_alphanum():
-    data = ["1234567890", "!@#$%^&*()", ",./<>?;:[]}{|+=", "abc DEF"]
-    expected = []
-    for st in data:
-        rs = ""
-        for c in st:
-            if str.isalnum(c):
-                rs = rs + c
-        expected.append(rs)
-
-    gs = cudf.Series(data)
-    assert_eq(gs.str.filter_alphanum(), cudf.Series(expected))
-
-    expected = []
-    for st in data:
-        rs = ""
-        for c in st:
-            if not str.isalnum(c):
-                rs = rs + c
-        expected.append(rs)
-    assert_eq(gs.str.filter_alphanum(keep=False), cudf.Series(expected))
-
-    expected = []
-    for st in data:
-        rs = ""
-        for c in st:
-            if str.isalnum(c):
-                rs = rs + c
-            else:
-                rs = rs + "*"
-        expected.append(rs)
-    assert_eq(gs.str.filter_alphanum("*"), cudf.Series(expected))
-
-    expected = []
-    for st in data:
-        rs = ""
-        for c in st:
-            if not str.isalnum(c):
-                rs = rs + c
-            else:
-                rs = rs + "*"
-        expected.append(rs)
-    assert_eq(gs.str.filter_alphanum("*", keep=False), cudf.Series(expected))
-
-    with pytest.raises(TypeError):
-        gs.str.filter_alphanum(["a"])
-
-
-@pytest.mark.parametrize(
-    "case_op", ["title", "capitalize", "lower", "upper", "swapcase"]
-)
-def test_string_char_case(case_op, data_char_types):
-    gs = cudf.Series(data_char_types)
-    ps = pd.Series(data_char_types)
-
-    s = gs.str
-    a = getattr(s, case_op)
-
-    assert_eq(a(), getattr(ps.str, case_op)())
-
-    assert_eq(gs.str.capitalize(), ps.str.capitalize())
-    assert_eq(gs.str.isdecimal(), ps.str.isdecimal())
-    assert_eq(gs.str.isalnum(), ps.str.isalnum())
-    assert_eq(gs.str.isalpha(), ps.str.isalpha())
-    assert_eq(gs.str.isdigit(), ps.str.isdigit())
-    assert_eq(gs.str.isnumeric(), ps.str.isnumeric())
-    assert_eq(gs.str.isspace(), ps.str.isspace())
-
-    assert_eq(gs.str.isempty(), ps == "")
-
-
-def test_string_is_title():
-    data = [
-        "leopard",
-        "Golden Eagle",
-        "SNAKE",
-        "",
-        "!A",
-        "hello World",
-        "A B C",
-        "#",
-        "AƻB",
-        "Ⓑⓖ",
-        "Art of War",
-    ]
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(gs.str.istitle(), ps.str.istitle())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["koala", "fox", "chameleon"],
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-    ],
-)
-def test_strings_rpartition(data):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(ps.str.rpartition(), gs.str.rpartition())
-    assert_eq(ps.str.rpartition("-"), gs.str.rpartition("-"))
-    assert_eq(ps.str.rpartition(","), gs.str.rpartition(","))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["koala", "fox", "chameleon"],
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-    ],
-)
-def test_strings_partition(data):
-    gs = cudf.Series(data, name="str_name")
-    ps = pd.Series(data, name="str_name")
-
-    assert_eq(ps.str.partition(), gs.str.partition())
-    assert_eq(ps.str.partition(","), gs.str.partition(","))
-    assert_eq(ps.str.partition("-"), gs.str.partition("-"))
-
-    gi = cudf.Index(data, name="new name")
-    pi = pd.Index(data, name="new name")
-    assert_eq(pi.str.partition(), gi.str.partition())
-    assert_eq(pi.str.partition(","), gi.str.partition(","))
-    assert_eq(pi.str.partition("-"), gi.str.partition("-"))
-
-
-def test_string_partition_fail():
-    gs = cudf.Series(["abc", "aa", "cba"])
-    with pytest.raises(TypeError):
-        gs.str.partition(["a"])
-    with pytest.raises(TypeError):
-        gs.str.rpartition(["a"])
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["koala", "fox", "chameleon"],
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        [
-            "this is a regular sentence",
-            "https://docs.python.org/3/tutorial/index.html",
-            None,
-        ],
-    ],
-)
-@pytest.mark.parametrize("n", [-1, 2, 1, 9])
-@pytest.mark.parametrize("expand", [True, False])
-def test_strings_rsplit(data, n, expand):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        ps.str.rsplit(n=n, expand=expand).reset_index(),
-        gs.str.rsplit(n=n, expand=expand).reset_index(),
-        check_index_type=False,
-    )
-    assert_eq(
-        ps.str.rsplit(",", n=n, expand=expand),
-        gs.str.rsplit(",", n=n, expand=expand),
-    )
-    assert_eq(
-        ps.str.rsplit("-", n=n, expand=expand),
-        gs.str.rsplit("-", n=n, expand=expand),
-    )
-
-
-@pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False])
-def test_string_rsplit_re(n, expand):
-    data = ["a b", " c ", "   d", "e   ", "f"]
-    ps = pd.Series(data, dtype="str")
-    gs = cudf.Series(data, dtype="str")
-
-    # Pandas does not yet support the regex parameter for rsplit
-    import inspect
-
-    assert (
-        "regex"
-        not in inspect.signature(pd.Series.str.rsplit).parameters.keys()
-    )
-
-    expect = ps.str.rsplit(pat=" ", n=n, expand=expand)
-    got = gs.str.rsplit(pat="\\s", n=n, expand=expand, regex=True)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["koala", "fox", "chameleon"],
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        [
-            "this is a regular sentence",
-            "https://docs.python.org/3/tutorial/index.html",
-            None,
-        ],
-    ],
-)
-@pytest.mark.parametrize("n", [-1, 2, 1, 9])
-@pytest.mark.parametrize("expand", [True, False])
-def test_strings_split(data, n, expand):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        ps.str.split(n=n, expand=expand).reset_index(),
-        gs.str.split(n=n, expand=expand).reset_index(),
-        check_index_type=False,
-    )
-
-    assert_eq(
-        ps.str.split(",", n=n, expand=expand),
-        gs.str.split(",", n=n, expand=expand),
-    )
-    assert_eq(
-        ps.str.split("-", n=n, expand=expand),
-        gs.str.split("-", n=n, expand=expand),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["koala", "fox", "chameleon"],
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        [
-            "this is a regular sentence",
-            "https://docs.python.org/3/tutorial/index.html",
-            None,
-        ],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize(
-    "to_strip", ["⅕", None, "123.", ".!? \n\t", "123.!? \n\t", " ", ".", ","]
-)
-def test_strings_strip_tests(data, to_strip):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(ps.str.strip(to_strip=to_strip), gs.str.strip(to_strip=to_strip))
-    assert_eq(
-        ps.str.rstrip(to_strip=to_strip), gs.str.rstrip(to_strip=to_strip)
-    )
-    assert_eq(
-        ps.str.lstrip(to_strip=to_strip), gs.str.lstrip(to_strip=to_strip)
-    )
-
-    gi = cudf.Index(data)
-    pi = pd.Index(data)
-
-    assert_eq(pi.str.strip(to_strip=to_strip), gi.str.strip(to_strip=to_strip))
-    assert_eq(
-        pi.str.rstrip(to_strip=to_strip), gi.str.rstrip(to_strip=to_strip)
-    )
-    assert_eq(
-        pi.str.lstrip(to_strip=to_strip), gi.str.lstrip(to_strip=to_strip)
-    )
-
-
-def test_string_strip_fail():
-    gs = cudf.Series(["a", "aa", ""])
-    with pytest.raises(TypeError):
-        gs.str.strip(["a"])
-    with pytest.raises(TypeError):
-        gs.str.lstrip(["a"])
-    with pytest.raises(TypeError):
-        gs.str.rstrip(["a"])
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["koala", "fox", "chameleon"],
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        [
-            "this is a regular sentence",
-            "https://docs.python.org/3/tutorial/index.html",
-            None,
-        ],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize("width", [0, 1, 4, 9, 100])
-@pytest.mark.parametrize("fillchar", ["⅕", "1", ".", "t", " ", ","])
-def test_strings_filling_tests(data, width, fillchar):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        ps.str.center(width=width, fillchar=fillchar),
-        gs.str.center(width=width, fillchar=fillchar),
-    )
-    assert_eq(
-        ps.str.ljust(width=width, fillchar=fillchar),
-        gs.str.ljust(width=width, fillchar=fillchar),
-    )
-    assert_eq(
-        ps.str.rjust(width=width, fillchar=fillchar),
-        gs.str.rjust(width=width, fillchar=fillchar),
-    )
-
-    gi = cudf.Index(data)
-    pi = pd.Index(data)
-
-    assert_eq(
-        pi.str.center(width=width, fillchar=fillchar),
-        gi.str.center(width=width, fillchar=fillchar),
-    )
-    assert_eq(
-        pi.str.ljust(width=width, fillchar=fillchar),
-        gi.str.ljust(width=width, fillchar=fillchar),
-    )
-    assert_eq(
-        pi.str.rjust(width=width, fillchar=fillchar),
-        gi.str.rjust(width=width, fillchar=fillchar),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["³", "⅕", ""],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-        [" ", "\t\r\n ", ""],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize("width", [0, 1, 4, 6, 9, 100])
-def test_strings_zfill_tests(data, width):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(ps.str.zfill(width=width), gs.str.zfill(width=width))
-
-    gi = cudf.Index(data)
-    pi = pd.Index(data)
-
-    assert_eq(pi.str.zfill(width=width), gi.str.zfill(width=width))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["A,,B", "1,,5", "3,00,0"],
-        ["Linda van der Berg", "George Pitt-Rivers"],
-        ["+23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize("width", [0, 1, 4, 9, 100])
-@pytest.mark.parametrize(
-    "side",
-    ["left", "right", "both"],
-)
-@pytest.mark.parametrize("fillchar", [" ", ".", "\n", "+", "\t"])
-def test_strings_pad_tests(data, width, side, fillchar):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        ps.str.pad(width=width, side=side, fillchar=fillchar),
-        gs.str.pad(width=width, side=side, fillchar=fillchar),
-    )
-
-    gi = cudf.Index(data)
-    pi = pd.Index(data)
-
-    assert_eq(
-        pi.str.pad(width=width, side=side, fillchar=fillchar),
-        gi.str.pad(width=width, side=side, fillchar=fillchar),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        # [" ", "\t\r\n ", ""],
-        ["leopard", "Golden Eagle", "SNAKE", ""],
-        ["line to be wrapped", "another line to be wrapped"],
-    ],
-)
-@pytest.mark.parametrize("width", [1, 4, 8, 12, 100])
-def test_string_wrap(data, width):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        gs.str.wrap(
-            width=width,
-            break_long_words=False,
-            expand_tabs=False,
-            replace_whitespace=True,
-            drop_whitespace=True,
-            break_on_hyphens=False,
-        ),
-        ps.str.wrap(
-            width=width,
-            break_long_words=False,
-            expand_tabs=False,
-            replace_whitespace=True,
-            drop_whitespace=True,
-            break_on_hyphens=False,
-        ),
-    )
-
-    gi = cudf.Index(data)
-    pi = pd.Index(data)
-
-    assert_eq(
-        gi.str.wrap(
-            width=width,
-            break_long_words=False,
-            expand_tabs=False,
-            replace_whitespace=True,
-            drop_whitespace=True,
-            break_on_hyphens=False,
-        ),
-        pi.str.wrap(
-            width=width,
-            break_long_words=False,
-            expand_tabs=False,
-            replace_whitespace=True,
-            drop_whitespace=True,
-            break_on_hyphens=False,
-        ),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["$", "B", "Aab$", "$$ca", "C$B$", "cat", "cat\ndog"],
-        ["line\nto be wrapped", "another\nline\nto be wrapped"],
-    ],
-)
-@pytest.mark.parametrize(
-    "pat",
-    ["a", " ", "\t", "another", "0", r"\$", "^line$", "line.*be", "cat$"],
-)
-@pytest.mark.parametrize("flags", [0, re.MULTILINE, re.DOTALL])
-def test_string_count(data, pat, flags):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        gs.str.count(pat=pat, flags=flags),
-        ps.str.count(pat=pat, flags=flags),
-        check_dtype=False,
-    )
-    assert_eq(
-        cudf.Index(gs).str.count(pat=pat),
-        pd.Index(ps).str.count(pat=pat),
-        exact=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "pat, flags",
-    [
-        ("Monkey", 0),
-        ("on", 0),
-        ("b", 0),
-        ("on$", 0),
-        ("on$", re.MULTILINE),
-        ("o.*k", re.DOTALL),
-    ],
-)
-def test_string_findall(pat, flags):
-    test_data = ["Lion", "Monkey", "Rabbit", "Don\nkey"]
-    ps = pd.Series(test_data)
-    gs = cudf.Series(test_data)
-
-    expected = ps.str.findall(pat, flags)
-    actual = gs.str.findall(pat, flags)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "pat, flags, pos",
-    [
-        ("Monkey", 0, [-1, 0, -1, -1]),
-        ("on", 0, [2, 1, -1, 1]),
-        ("bit", 0, [-1, -1, 3, -1]),
-        ("on$", 0, [2, -1, -1, -1]),
-        ("on$", re.MULTILINE, [2, -1, -1, 1]),
-        ("o.*k", re.DOTALL, [-1, 1, -1, 1]),
-    ],
-)
-def test_string_find_re(pat, flags, pos):
-    test_data = ["Lion", "Monkey", "Rabbit", "Don\nkey"]
-    gs = cudf.Series(test_data)
-
-    expected = pd.Series(pos, dtype=np.int32)
-    actual = gs.str.find_re(pat, flags)
-    assert_eq(expected, actual)
-
-
-def test_string_replace_multi():
-    ps = pd.Series(["hello", "goodbye"])
-    gs = cudf.Series(["hello", "goodbye"])
-    expect = ps.str.replace("e", "E").str.replace("o", "O")
-    got = gs.str.replace(["e", "o"], ["E", "O"])
-
-    assert_eq(expect, got)
-
-    ps = pd.Series(["foo", "fuz", np.nan])
-    gs = cudf.Series.from_pandas(ps)
-
-    expect = ps.str.replace("f.", "ba", regex=True)
-    got = gs.str.replace(["f."], ["ba"], regex=True)
-    assert_eq(expect, got)
-
-    ps = pd.Series(["f.o", "fuz", np.nan])
-    gs = cudf.Series.from_pandas(ps)
-
-    expect = ps.str.replace("f.", "ba", regex=False)
-    got = gs.str.replace(["f."], ["ba"], regex=False)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "find",
-    [
-        "(\\d)(\\d)",
-        "(\\d)(\\d)",
-        "(\\d)(\\d)",
-        "(\\d)(\\d)",
-        "([a-z])-([a-z])",
-        "([a-z])-([a-zé])",
-        "([a-z])-([a-z])",
-        "([a-z])-([a-zé])",
-        re.compile("([A-Z])(\\d)"),
-    ],
-)
-@pytest.mark.parametrize(
-    "replace",
-    ["\\1-\\2", "V\\2-\\1", "\\1 \\2", "\\2 \\1", "X\\1+\\2Z", "X\\1+\\2Z"],
-)
-def test_string_replace_with_backrefs(find, replace):
-    s = [
-        "A543",
-        "Z756",
-        "",
-        None,
-        "tést-string",
-        "two-thréé four-fivé",
-        "abcd-éfgh",
-        "tést-string-again",
-    ]
-    ps = pd.Series(s)
-    gs = cudf.Series(s)
-    got = gs.str.replace_with_backrefs(find, replace)
-    expected = ps.str.replace(find, replace, regex=True)
-    assert_eq(got, expected)
-
-    got = cudf.Index(gs).str.replace_with_backrefs(find, replace)
-    expected = pd.Index(ps).str.replace(find, replace, regex=True)
-    assert_eq(got, expected)
-
-
-def test_string_table_view_creation():
-    data = ["hi"] * 25 + [None] * 2027
-    psr = pd.Series(data)
-    gsr = cudf.Series.from_pandas(psr)
-
-    expect = psr[:1]
-    got = gsr[:1]
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-        ["line to be wrapped", "another line to be wrapped"],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize(
-    "pat",
-    ["", None, " ", "a", "abc", "cat", "$", "\n"],
-)
-def test_string_starts_ends(data, pat):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    if pat is None:
-        assert_exceptions_equal(
-            lfunc=ps.str.startswith,
-            rfunc=gs.str.startswith,
-            lfunc_args_and_kwargs=([pat],),
-            rfunc_args_and_kwargs=([pat],),
-        )
-        assert_exceptions_equal(
-            lfunc=ps.str.endswith,
-            rfunc=gs.str.endswith,
-            lfunc_args_and_kwargs=([pat],),
-            rfunc_args_and_kwargs=([pat],),
-        )
-    else:
-        assert_eq(
-            ps.str.startswith(pat), gs.str.startswith(pat), check_dtype=False
-        )
-        assert_eq(
-            ps.str.endswith(pat), gs.str.endswith(pat), check_dtype=False
-        )
-
-
-@pytest.mark.parametrize(
-    "data,pat",
-    [
-        (
-            ["abc", "xyz", "a", "ab", "123", "097"],
-            ("abc", "x", "a", "b", "3", "7"),
-        ),
-        (["A B", "1.5", "3,000"], ("A ", ".", ",")),
-        (["23", "³", "⅕", ""], ("23", "³", "⅕", "")),
-        ([" ", "\t\r\n ", ""], ("d", "\n ", "")),
-        (
-            ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-            ("$", "$", "a", "<", "(", "#"),
-        ),
-        (
-            ["line to be wrapped", "another line to be wrapped"],
-            ("another", "wrapped"),
-        ),
-        (
-            ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-            ("hsdjfk", "", "ll", "+", "-", "w", "-", "én"),
-        ),
-        (
-            ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-            ("1. Ant.  ", "2. Bee!\n", "3. Cat?\t", ""),
-        ),
-    ],
-)
-def test_string_starts_ends_list_like_pat(data, pat):
-    gs = cudf.Series(data)
-
-    starts_expected = []
-    ends_expected = []
-    for i in range(len(pat)):
-        if data[i] is None:
-            starts_expected.append(None)
-            ends_expected.append(None)
-        else:
-            if pat[i] is None:
-                starts_expected.append(False)
-                ends_expected.append(False)
-            else:
-                starts_expected.append(data[i].startswith(pat[i]))
-                ends_expected.append(data[i].endswith(pat[i]))
-    starts_expected = pd.Series(starts_expected)
-    ends_expected = pd.Series(ends_expected)
-    assert_eq(starts_expected, gs.str.startswith(pat), check_dtype=False)
-    assert_eq(ends_expected, gs.str.endswith(pat), check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["str_foo", "str_bar", "no_prefix", "", None],
-        ["foo_str", "bar_str", "no_suffix", "", None],
-    ],
-)
-def test_string_remove_suffix_prefix(data):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    got = gs.str.removeprefix("str_")
-    expect = ps.str.removeprefix("str_")
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-    got = gs.str.removesuffix("_str")
-    expect = ps.str.removesuffix("_str")
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-        ["line to be wrapped", "another line to be wrapped"],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize(
-    "sub",
-    ["", " ", "a", "abc", "cat", "$", "\n"],
-)
-def test_string_find(data, sub):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    got = gs.str.find(sub)
-    expect = ps.str.find(sub)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.find(sub, start=1)
-    expect = ps.str.find(sub, start=1)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.find(sub, end=10)
-    expect = ps.str.find(sub, end=10)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.find(sub, start=2, end=10)
-    expect = ps.str.find(sub, start=2, end=10)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.rfind(sub)
-    expect = ps.str.rfind(sub)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.rfind(sub, start=1)
-    expect = ps.str.rfind(sub, start=1)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.rfind(sub, end=10)
-    expect = ps.str.rfind(sub, end=10)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-    got = gs.str.rfind(sub, start=2, end=10)
-    expect = ps.str.rfind(sub, start=2, end=10)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "data,sub,er",
-    [
-        (["abc", "xyz", "a", "ab", "123", "097"], "a", ValueError),
-        (["A B", "1.5", "3,000"], "abc", ValueError),
-        (["23", "³", "⅕", ""], "⅕", ValueError),
-        ([" ", "\t\r\n ", ""], "\n", ValueError),
-        (["$", "B", "Aab$", "$$ca", "C$B$", "cat"], "$", ValueError),
-        (["line to be wrapped", "another line to be wrapped"], " ", None),
-        (
-            ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-            "+",
-            ValueError,
-        ),
-        (["line to be wrapped", "another line to be wrapped"], "", None),
-    ],
-)
-def test_string_str_index(data, sub, er):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    if er is None:
-        assert_eq(ps.str.index(sub), gs.str.index(sub), check_dtype=False)
-
-    try:
-        ps.str.index(sub)
-    except er:
-        pass
-    else:
-        assert not er
-
-    try:
-        gs.str.index(sub)
-    except er:
-        pass
-    else:
-        assert not er
-
-
-@pytest.mark.parametrize(
-    "data,sub,er",
-    [
-        (["abc", "xyz", "a", "ab", "123", "097"], "a", ValueError),
-        (["A B", "1.5", "3,000"], "abc", ValueError),
-        (["23", "³", "⅕", ""], "⅕", ValueError),
-        ([" ", "\t\r\n ", ""], "\n", ValueError),
-        (["$", "B", "Aab$", "$$ca", "C$B$", "cat"], "$", ValueError),
-        (["line to be wrapped", "another line to be wrapped"], " ", None),
-        (
-            ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-            "+",
-            ValueError,
-        ),
-        (["line to be wrapped", "another line to be wrapped"], "", None),
-    ],
-)
-def test_string_str_rindex(data, sub, er):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    if er is None:
-        assert_eq(ps.str.rindex(sub), gs.str.rindex(sub), check_dtype=False)
-        assert_eq(
-            pd.Index(ps).str.rindex(sub),
-            cudf.Index(gs).str.rindex(sub),
-            exact=False,
-        )
-
-    try:
-        ps.str.rindex(sub)
-    except er:
-        pass
-    else:
-        assert not er
-
-    try:
-        gs.str.rindex(sub)
-    except er:
-        pass
-    else:
-        assert not er
-
-
-@pytest.mark.parametrize(
-    "data,sub,expect",
-    [
-        (
-            ["abc", "xyz", "a", "ab", "123", "097"],
-            ["b", "y", "a", "c", "4", "8"],
-            [True, True, True, False, False, False],
-        ),
-        (
-            ["A B", "1.5", "3,000", "23", "³", "⅕"],
-            ["A B", ".", ",", "1", " ", " "],
-            [True, True, True, False, False, False],
-        ),
-        (
-            [" ", "\t", "\r", "\f ", "\n", ""],
-            ["", "\t", "\r", "xx", "yy", "zz"],
-            [True, True, True, False, False, False],
-        ),
-        (
-            ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-            ["$", "B", "ab", "*", "@", "dog"],
-            [True, True, True, False, False, False],
-        ),
-        (
-            ["hello", "there", "world", "-1234", None, "accént"],
-            ["lo", "e", "o", "+1234", " ", "e"],
-            [True, True, True, False, None, False],
-        ),
-        (
-            ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", "", "x", None],
-            ["A", "B", "C", " ", "y", "e"],
-            [True, True, True, False, False, None],
-        ),
-    ],
-)
-def test_string_contains_multi(data, sub, expect):
-    gs = cudf.Series(data)
-    sub = cudf.Series(sub)
-    got = gs.str.contains(sub)
-    expect = cudf.Series(expect)
-    assert_eq(expect, got, check_dtype=False)
-
-
-# Pandas does not allow 'case' or 'flags' if 'pat' is re.Pattern
-# This covers contains, match, count, and replace
-@pytest.mark.parametrize(
-    "pat",
-    [re.compile("[n-z]"), re.compile("[A-Z]"), re.compile("de"), "A"],
-)
-@pytest.mark.parametrize("repl", ["xyz", "", " "])
-def test_string_compiled_re(ps_gs, pat, repl):
-    ps, gs = ps_gs
-
-    expect = ps.str.contains(pat, regex=True)
-    got = gs.str.contains(pat, regex=True)
-    assert_eq(expect, got)
-
-    expect = ps.str.match(pat)
-    got = gs.str.match(pat)
-    assert_eq(expect, got)
-
-    expect = ps.str.count(pat)
-    got = gs.str.count(pat)
-    assert_eq(expect, got, check_dtype=False)
-
-    expect = ps.str.replace(pat, repl, regex=True)
-    got = gs.str.replace(pat, repl, regex=True)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-        ["line to be wrapped", "another line to be wrapped"],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-@pytest.mark.parametrize("pat", ["", " ", "a", "abc", "cat", "$", "\n"])
-def test_string_str_match(data, pat):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    assert_eq(ps.str.match(pat), gs.str.match(pat))
-    assert_eq(
-        pd.Index(pd.Index(ps).str.match(pat)), cudf.Index(gs).str.match(pat)
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "a", "ab", "123", "097"],
-        ["A B", "1.5", "3,000"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-        ["line to be wrapped", "another line to be wrapped"],
-        ["hello", "there", "world", "+1234", "-1234", None, "accént", ""],
-        ["1. Ant.  ", "2. Bee!\n", "3. Cat?\t", None],
-    ],
-)
-def test_string_str_translate(data):
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    assert_eq(
-        ps.str.translate(str.maketrans({"a": "z"})),
-        gs.str.translate(str.maketrans({"a": "z"})),
-    )
-    assert_eq(
-        pd.Index(ps).str.translate(str.maketrans({"a": "z"})),
-        cudf.Index(gs).str.translate(str.maketrans({"a": "z"})),
-    )
-    assert_eq(
-        ps.str.translate(str.maketrans({"a": "z", "i": "$", "z": "1"})),
-        gs.str.translate(str.maketrans({"a": "z", "i": "$", "z": "1"})),
-    )
-    assert_eq(
-        pd.Index(ps).str.translate(
-            str.maketrans({"a": "z", "i": "$", "z": "1"})
-        ),
-        cudf.Index(gs).str.translate(
-            str.maketrans({"a": "z", "i": "$", "z": "1"})
-        ),
-    )
-    assert_eq(
-        ps.str.translate(
-            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
-        ),
-        gs.str.translate(
-            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
-        ),
-    )
-    assert_eq(
-        pd.Index(ps).str.translate(
-            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
-        ),
-        cudf.Index(gs).str.translate(
-            str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."})
-        ),
-    )
-    assert_eq(
-        ps.str.translate(str.maketrans({"é": "É"})),
-        gs.str.translate(str.maketrans({"é": "É"})),
-    )
-
-
-def test_string_str_filter_characters():
-    data = [
-        "hello world",
-        "A+B+C+D",
-        "?!@#$%^&*()",
-        "accént",
-        None,
-        "$1.50",
-        "",
-    ]
-    gs = cudf.Series(data)
-    expected = cudf.Series(
-        ["helloworld", "ABCD", "", "accnt", None, "150", ""]
-    )
-    filter = {"a": "z", "A": "Z", "0": "9"}
-    assert_eq(expected, gs.str.filter_characters(filter))
-
-    expected = cudf.Series([" ", "+++", "?!@#$%^&*()", "é", None, "$.", ""])
-    assert_eq(expected, gs.str.filter_characters(filter, False))
-
-    expected = cudf.Series(
-        ["hello world", "A B C D", "           ", "acc nt", None, " 1 50", ""]
-    )
-    assert_eq(expected, gs.str.filter_characters(filter, True, " "))
-
-    with pytest.raises(TypeError):
-        gs.str.filter_characters(filter, True, ["a"])
-
-
-def test_string_str_code_points():
-    data = [
-        "abc",
-        "Def",
-        None,
-        "jLl",
-        "dog and cat",
-        "accénted",
-        "",
-        " 1234 ",
-        "XYZ",
-    ]
-    gs = cudf.Series(data)
-    expected = [
-        97,
-        98,
-        99,
-        68,
-        101,
-        102,
-        106,
-        76,
-        108,
-        100,
-        111,
-        103,
-        32,
-        97,
-        110,
-        100,
-        32,
-        99,
-        97,
-        116,
-        97,
-        99,
-        99,
-        50089,
-        110,
-        116,
-        101,
-        100,
-        32,
-        49,
-        50,
-        51,
-        52,
-        32,
-        88,
-        89,
-        90,
-    ]
-    expected = cudf.Series(expected)
-
-    assert_eq(expected, gs.str.code_points(), check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["http://www.hellow.com", "/home/nvidia/nfs", "123.45 ~ABCDEF"],
-        ["23", "³", "⅕", ""],
-        [" ", "\t\r\n ", ""],
-        ["$", "B", "Aab$", "$$ca", "C$B$", "cat"],
-    ],
-)
-def test_string_str_url_encode(data):
-    gs = cudf.Series(data)
-
-    got = gs.str.url_encode()
-    expected = pd.Series([urllib.parse.quote(url, safe="~") for url in data])
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "http://www.hellow.com?k1=acc%C3%A9nted&k2=a%2F/b.c",
-            "%2Fhome%2fnfs",
-            "987%20ZYX",
-        ]
-    ],
-)
-def test_string_str_decode_url(data):
-    gs = cudf.Series(data)
-
-    got = gs.str.url_decode()
-    expected = pd.Series([urllib.parse.unquote(url) for url in data])
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data,dtype",
-    [
-        (["0.1", "10.2", "10.876"], "float"),
-        (["-0.1", "10.2", "+10.876"], "float"),
-        (["1", "10.2", "10.876"], "float32"),
-        (["+123", "6344556789", "0"], "int"),
-        (["+123", "6344556789", "0"], "uint64"),
-        (["+123", "6344556789", "0"], "float"),
-        (["0.1", "-10.2", "10.876", None], "float"),
-    ],
-)
-@pytest.mark.parametrize("obj_type", [None, "str", "category"])
-def test_string_typecast(data, obj_type, dtype):
-    psr = pd.Series(data, dtype=obj_type)
-    gsr = cudf.Series(data, dtype=obj_type)
-
-    expect = psr.astype(dtype=dtype)
-    actual = gsr.astype(dtype=dtype)
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "data,dtype",
+    "data,dtype",
     [
         (["0.1", "10.2", "10.876"], "int"),
         (["1", "10.2", "+10.876"], "int"),
@@ -2636,93 +611,6 @@ def test_string_typecast_error(data, obj_type, dtype):
     )
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["f0:18:98:22:c2:e4", "00:00:00:00:00:00", "ff:ff:ff:ff:ff:ff"],
-        ["f0189822c2e4", "000000000000", "ffffffffffff"],
-        ["0xf0189822c2e4", "0x000000000000", "0xffffffffffff"],
-        ["0Xf0189822c2e4", "0X000000000000", "0Xffffffffffff"],
-    ],
-)
-def test_string_hex_to_int(data):
-    gsr = cudf.Series(data)
-
-    expected = cudf.Series([263988422296292, 0, 281474976710655])
-
-    got = gsr.str.htoi()
-    assert_eq(expected, got)
-
-    got = gsr.str.hex_to_int()  # alias
-    assert_eq(expected, got)
-
-
-def test_string_ishex():
-    gsr = cudf.Series(["", None, "0x01a2b3c4d5e6f", "0789", "ABCDEF0"])
-    got = gsr.str.ishex()
-    expected = cudf.Series([False, None, True, True, True])
-    assert_eq(expected, got)
-
-
-def test_string_istimestamp():
-    gsr = cudf.Series(
-        [
-            "",
-            None,
-            "20201009 123456.987654AM+0100",
-            "1920111 012345.000001",
-            "18201235 012345.1",
-            "20201009 250001.2",
-            "20201009 129901.3",
-            "20201009 123499.4",
-            "20201009 000000.500000PM-0130",
-            "20201009:000000.600000",
-            "20201009 010203.700000PM-2500",
-            "20201009 010203.800000AM+0590",
-            "20201009 010203.900000AP-0000",
-        ]
-    )
-    got = gsr.str.istimestamp(r"%Y%m%d %H%M%S.%f%p%z")
-    expected = cudf.Series(
-        [
-            False,
-            None,
-            True,
-            False,
-            False,
-            False,
-            False,
-            False,
-            True,
-            False,
-            False,
-            False,
-            False,
-        ]
-    )
-    assert_eq(expected, got)
-
-
-def test_istimestamp_empty():
-    gsr = cudf.Series([], dtype="object")
-    result = gsr.str.istimestamp("%Y%m%d")
-    expected = cudf.Series([], dtype="bool")
-    assert_eq(result, expected)
-
-
-def test_string_ip4_to_int():
-    gsr = cudf.Series(
-        ["", None, "hello", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
-    )
-    expected = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449])
-
-    got = gsr.str.ip2int()
-    assert_eq(expected, got)
-
-    got = gsr.str.ip_to_int()  # alias
-    assert_eq(expected, got)
-
-
 def test_string_int_to_ipv4():
     gsr = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449]).astype(
         "uint32"
@@ -2736,41 +624,6 @@ def test_string_int_to_ipv4():
     assert_eq(expected, got)
 
 
-def test_string_isipv4():
-    gsr = cudf.Series(
-        [
-            "",
-            None,
-            "1...1",
-            "141.168.0.1",
-            "127.0.0.1",
-            "1.255.0.1",
-            "256.27.28.26",
-            "25.257.28.26",
-            "25.27.258.26",
-            "25.27.28.256",
-            "-1.0.0.0",
-        ]
-    )
-    got = gsr.str.isipv4()
-    expected = cudf.Series(
-        [
-            False,
-            None,
-            False,
-            True,
-            True,
-            True,
-            False,
-            False,
-            False,
-            False,
-            False,
-        ]
-    )
-    assert_eq(expected, got)
-
-
 @pytest.mark.parametrize(
     "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"uint32"}))
 )
@@ -2780,170 +633,6 @@ def test_string_int_to_ipv4_dtype_fail(dtype):
         gsr._column.int2ip()
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "xyz", "pqr", "tuv"],
-        ["aaaaaaaaaaaa"],
-        ["aaaaaaaaaaaa", "bdfeqwert", "poiuytre"],
-    ],
-)
-@pytest.mark.parametrize(
-    "index",
-    [
-        0,
-        1,
-        2,
-        slice(0, 1, 2),
-        slice(0, 5, 2),
-        slice(-1, -2, 1),
-        slice(-1, -2, -1),
-        slice(-2, -1, -1),
-        slice(-2, -1, 1),
-        slice(0),
-        slice(None),
-    ],
-)
-def test_string_str_subscriptable(data, index):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    assert_eq(psr.str[index], gsr.str[index])
-
-    psi = pd.Index(data)
-    gsi = cudf.Index(data)
-
-    assert_eq(psi.str[index], gsi.str[index])
-
-
-@pytest.mark.parametrize(
-    "data,expected",
-    [
-        (["abc", "xyz", "pqr", "tuv"], [3, 3, 3, 3]),
-        (["aaaaaaaaaaaa"], [12]),
-        (["aaaaaaaaaaaa", "bdfeqwert", "poiuytre"], [12, 9, 8]),
-        (["abc", "d", "ef"], [3, 1, 2]),
-        (["Hello", "Bye", "Thanks 😊"], [5, 3, 11]),
-        (["\n\t", "Bye", "Thanks 😊"], [2, 3, 11]),
-    ],
-)
-def test_string_str_byte_count(data, expected):
-    sr = cudf.Series(data)
-    expected = cudf.Series(expected, dtype="int32")
-    actual = sr.str.byte_count()
-    assert_eq(expected, actual)
-
-    si = cudf.Index(data)
-    expected = cudf.Index(expected, dtype="int32")
-    actual = si.str.byte_count()
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data,expected",
-    [
-        (["1", "2", "3", "4", "5"], [True, True, True, True, True]),
-        (
-            ["1.1", "2.0", "3.2", "4.3", "5."],
-            [False, False, False, False, False],
-        ),
-        (
-            [".12312", "213123.", ".3223.", "323423.."],
-            [False, False, False, False],
-        ),
-        ([""], [False]),
-        (
-            ["1..1", "+2", "++3", "4++", "-5"],
-            [False, True, False, False, True],
-        ),
-        (
-            [
-                "24313345435345 ",
-                "+2632726478",
-                "++367293674326",
-                "4382493264392746.237649274692++",
-                "-578239479238469264",
-            ],
-            [False, True, False, False, True],
-        ),
-        (
-            ["2a2b", "a+b", "++a", "a.b++", "-b"],
-            [False, False, False, False, False],
-        ),
-        (
-            ["2a2b", "1+3", "9.0++a", "+", "-"],
-            [False, False, False, False, False],
-        ),
-    ],
-)
-def test_str_isinteger(data, expected):
-    sr = cudf.Series(data, dtype="str")
-    expected = cudf.Series(expected)
-    actual = sr.str.isinteger()
-    assert_eq(expected, actual)
-
-    sr = cudf.Index(data)
-    expected = cudf.Index(expected)
-    actual = sr.str.isinteger()
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data,expected",
-    [
-        (["1", "2", "3", "4", "5"], [True, True, True, True, True]),
-        (["1.1", "2.0", "3.2", "4.3", "5."], [True, True, True, True, True]),
-        ([""], [False]),
-        (
-            [".12312", "213123.", ".3223.", "323423.."],
-            [True, True, False, False],
-        ),
-        (
-            ["1.00.323.1", "+2.1", "++3.30", "4.9991++", "-5.3"],
-            [False, True, False, False, True],
-        ),
-        (
-            [
-                "24313345435345 ",
-                "+2632726478",
-                "++367293674326",
-                "4382493264392746.237649274692++",
-                "-578239479238469264",
-            ],
-            [False, True, False, False, True],
-        ),
-        (
-            [
-                "24313345435345.32732 ",
-                "+2632726478.3627638276",
-                "++0.326294632367293674326",
-                "4382493264392746.237649274692++",
-                "-57823947923.8469264",
-            ],
-            [False, True, False, False, True],
-        ),
-        (
-            ["2a2b", "a+b", "++a", "a.b++", "-b"],
-            [False, False, False, False, False],
-        ),
-        (
-            ["2a2b", "1+3", "9.0++a", "+", "-"],
-            [False, False, False, False, False],
-        ),
-    ],
-)
-def test_str_isfloat(data, expected):
-    sr = cudf.Series(data, dtype="str")
-    expected = cudf.Series(expected)
-    actual = sr.str.isfloat()
-    assert_eq(expected, actual)
-
-    sr = cudf.Index(data)
-    expected = cudf.Index(expected)
-    actual = sr.str.isfloat()
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize(
     "data",
     [
@@ -3061,473 +750,6 @@ def test_string_slice_with_mask():
     assert_eq(actual, expected)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            "category":"reference",
-                            "author":"Nigel Rees",
-                            "title":"Sayings of the Century",
-                            "price":8.95
-                        },
-                        {
-                            "category":"fiction",
-                            "author":"Evelyn Waugh",
-                            "title":"Sword of Honour",
-                            "price":12.99
-                        }
-                    ]
-                }
-            }
-            """
-        ],
-        [
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            "category":"reference",
-                            "author":"Nigel Rees",
-                            "title":"Sayings of the Century",
-                            "price":8.95
-                        }
-                    ]
-                }
-            }
-            """,
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            "category":"fiction",
-                            "author":"Evelyn Waugh",
-                            "title":"Sword of Honour",
-                            "price":12.99
-                        }
-                    ]
-                }
-            }
-            """,
-        ],
-    ],
-)
-def test_string_get_json_object_n(data):
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(
-        json.loads(gs.str.get_json_object("$.store")[0]),
-        ps.apply(lambda x: json.loads(x)["store"])[0],
-    )
-    assert_eq(
-        json.loads(gs.str.get_json_object("$.store.book")[0]),
-        ps.apply(lambda x: json.loads(x)["store"]["book"])[0],
-    )
-    assert_eq(
-        gs.str.get_json_object("$.store.book[0].category"),
-        ps.apply(lambda x: json.loads(x)["store"]["book"][0]["category"]),
-    )
-
-
-@pytest.mark.parametrize(
-    "json_path", ["$.store", "$.store.book", "$.store.book[*].category", " "]
-)
-def test_string_get_json_object_empty_json_strings(json_path):
-    gs = cudf.Series(
-        [
-            """
-            {
-                "":{
-                    "":[
-                        {
-                            "":"",
-                            "":"",
-                            "":""
-                        },
-                        {
-                            "":"fiction",
-                            "":"",
-                            "title":""
-                        }
-                    ]
-                }
-            }
-            """
-        ]
-    )
-
-    got = gs.str.get_json_object(json_path)
-    expect = cudf.Series([None], dtype="object")
-
-    assert_eq(got, expect)
-
-
-@pytest.mark.parametrize("json_path", ["a", ".", "/.store"])
-def test_string_get_json_object_invalid_JSONPath(json_path):
-    gs = cudf.Series(
-        [
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            "category":"reference",
-                            "author":"Nigel Rees",
-                            "title":"Sayings of the Century",
-                            "price":8.95
-                        },
-                        {
-                            "category":"fiction",
-                            "author":"Evelyn Waugh",
-                            "title":"Sword of Honour",
-                            "price":12.99
-                        }
-                    ]
-                }
-            }
-            """
-        ]
-    )
-
-    with pytest.raises(ValueError):
-        gs.str.get_json_object(json_path)
-
-
-def test_string_get_json_object_allow_single_quotes():
-    gs = cudf.Series(
-        [
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            'author':"Nigel Rees",
-                            "title":'Sayings of the Century',
-                            "price":8.95
-                        },
-                        {
-                            "category":"fiction",
-                            "author":"Evelyn Waugh",
-                            'title':"Sword of Honour",
-                            "price":12.99
-                        }
-                    ]
-                }
-            }
-            """
-        ]
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[0].author", allow_single_quotes=True
-        ),
-        cudf.Series(["Nigel Rees"]),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[*].title", allow_single_quotes=True
-        ),
-        cudf.Series(["['Sayings of the Century',\"Sword of Honour\"]"]),
-    )
-
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[0].author", allow_single_quotes=False
-        ),
-        cudf.Series([None]),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[*].title", allow_single_quotes=False
-        ),
-        cudf.Series([None]),
-    )
-
-
-def test_string_get_json_object_strip_quotes_from_single_strings():
-    gs = cudf.Series(
-        [
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            "author":"Nigel Rees",
-                            "title":"Sayings of the Century",
-                            "price":8.95
-                        },
-                        {
-                            "category":"fiction",
-                            "author":"Evelyn Waugh",
-                            "title":"Sword of Honour",
-                            "price":12.99
-                        }
-                    ]
-                }
-            }
-            """
-        ]
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[0].author", strip_quotes_from_single_strings=True
-        ),
-        cudf.Series(["Nigel Rees"]),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[*].title", strip_quotes_from_single_strings=True
-        ),
-        cudf.Series(['["Sayings of the Century","Sword of Honour"]']),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[0].author", strip_quotes_from_single_strings=False
-        ),
-        cudf.Series(['"Nigel Rees"']),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[*].title", strip_quotes_from_single_strings=False
-        ),
-        cudf.Series(['["Sayings of the Century","Sword of Honour"]']),
-    )
-
-
-def test_string_get_json_object_missing_fields_as_nulls():
-    gs = cudf.Series(
-        [
-            """
-            {
-                "store":{
-                    "book":[
-                        {
-                            "author":"Nigel Rees",
-                            "title":"Sayings of the Century",
-                            "price":8.95
-                        },
-                        {
-                            "category":"fiction",
-                            "author":"Evelyn Waugh",
-                            "title":"Sword of Honour",
-                            "price":12.99
-                        }
-                    ]
-                }
-            }
-            """
-        ]
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[0].category", missing_fields_as_nulls=True
-        ),
-        cudf.Series(["null"]),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[*].category", missing_fields_as_nulls=True
-        ),
-        cudf.Series(['[null,"fiction"]']),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[0].category", missing_fields_as_nulls=False
-        ),
-        cudf.Series([None]),
-    )
-    assert_eq(
-        gs.str.get_json_object(
-            "$.store.book[*].category", missing_fields_as_nulls=False
-        ),
-        cudf.Series(['["fiction"]']),
-    )
-
-
-def test_str_join_lists_error():
-    sr = cudf.Series([["a", "a"], ["b"], ["c"]])
-
-    with pytest.raises(
-        ValueError, match="sep_na_rep cannot be defined when `sep` is scalar."
-    ):
-        sr.str.join(sep="-", sep_na_rep="-")
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "string_na_rep should be a string scalar, got [10, 20] of type "
-            ": <class 'list'>"
-        ),
-    ):
-        sr.str.join(string_na_rep=[10, 20])
-
-    with pytest.raises(
-        ValueError,
-        match=re.escape(
-            "sep should be of similar size to the series, got: 2, expected: 3"
-        ),
-    ):
-        sr.str.join(sep=["=", "-"])
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "sep_na_rep should be a string scalar, got "
-            "['na'] of type: <class 'list'>"
-        ),
-    ):
-        sr.str.join(sep=["-", "+", "."], sep_na_rep=["na"])
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "sep should be an str, array-like or Series object, "
-            "found <class 'cudf.core.dataframe.DataFrame'>"
-        ),
-    ):
-        sr.str.join(sep=cudf.DataFrame())
-
-
-@pytest.mark.parametrize(
-    "sr,sep,string_na_rep,sep_na_rep,expected",
-    [
-        (
-            [["a", "a"], ["b"], ["c"]],
-            "-",
-            None,
-            None,
-            ["a-a", "b", "c"],
-        ),
-        (
-            [["a", "b"], [None], [None, "hello", None, "world"]],
-            "__",
-            "=",
-            None,
-            ["a__b", None, "=__hello__=__world"],
-        ),
-        (
-            [
-                ["a", None, "b"],
-                [None],
-                [None, "hello", None, "world"],
-                None,
-            ],
-            ["-", "_", "**", "!"],
-            None,
-            None,
-            ["a--b", None, "**hello****world", None],
-        ),
-        (
-            [
-                ["a", None, "b"],
-                [None],
-                [None, "hello", None, "world"],
-                None,
-            ],
-            ["-", "_", "**", None],
-            "rep_str",
-            "sep_str",
-            ["a-rep_str-b", None, "rep_str**hello**rep_str**world", None],
-        ),
-        (
-            [[None, "a"], [None], None],
-            ["-", "_", None],
-            "rep_str",
-            None,
-            ["rep_str-a", None, None],
-        ),
-        (
-            [[None, "a"], [None], None],
-            ["-", "_", None],
-            None,
-            "sep_str",
-            ["-a", None, None],
-        ),
-    ],
-)
-def test_str_join_lists(sr, sep, string_na_rep, sep_na_rep, expected):
-    sr = cudf.Series(sr)
-    actual = sr.str.join(
-        sep=sep, string_na_rep=string_na_rep, sep_na_rep=sep_na_rep
-    )
-    expected = cudf.Series(expected)
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "patterns, expected",
-    [
-        (
-            lambda: ["a", "s", "g", "i", "o", "r"],
-            [
-                [-1, 0, 5, 3, -1, 2],
-                [-1, -1, -1, -1, 1, -1],
-                [2, 0, -1, -1, -1, 3],
-                [-1, -1, -1, 0, -1, -1],
-            ],
-        ),
-        (
-            lambda: cudf.Series(["a", "string", "g", "inn", "o", "r", "sea"]),
-            [
-                [-1, 0, 5, -1, -1, 2, -1],
-                [-1, -1, -1, -1, 1, -1, -1],
-                [2, -1, -1, -1, -1, 3, 0],
-                [-1, -1, -1, -1, -1, -1, -1],
-            ],
-        ),
-    ],
-)
-def test_str_find_multiple(patterns, expected):
-    s = cudf.Series(["strings", "to", "search", "in"])
-    t = patterns()
-
-    expected = cudf.Series(expected)
-
-    # We convert to pandas because find_multiple returns ListDtype(int32)
-    # and expected is ListDtype(int64).
-    # Currently there is no easy way to type-cast these to match.
-    assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas())
-
-    s = cudf.Index(s)
-    t = cudf.Index(t)
-
-    expected.index = s
-
-    assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas())
-
-
-def test_str_find_multiple_error():
-    s = cudf.Series(["strings", "to", "search", "in"])
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "patterns should be an array-like or a Series object, found "
-            "<class 'str'>"
-        ),
-    ):
-        s.str.find_multiple("a")
-
-    t = cudf.Series([1, 2, 3])
-    with pytest.raises(
-        TypeError,
-        match=re.escape("patterns can only be of 'string' dtype, got: int64"),
-    ):
-        s.str.find_multiple(t)
-
-
-def test_str_iterate_error():
-    s = cudf.Series(["abc", "xyz"])
-    with pytest.raises(TypeError):
-        iter(s.str)
-
-
 def test_string_reduction_error():
     s = cudf.Series([None, None], dtype="str")
     ps = s.to_pandas(nullable=True)
@@ -3544,51 +766,3 @@ def test_string_reduction_error():
         lfunc_args_and_kwargs=([], {"skipna": False}),
         rfunc_args_and_kwargs=([], {"skipna": False}),
     )
-
-
-def test_getitem_out_of_bounds():
-    data = ["123", "12", "1"]
-    pd_ser = pd.Series(data)
-    cudf_ser = cudf.Series(data)
-    expected = pd_ser.str[2]
-    result = cudf_ser.str[2]
-    assert_eq(result, expected)
-
-    expected = pd_ser.str[-2]
-    result = cudf_ser.str[-2]
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("method", ["startswith", "endswith"])
-@pytest.mark.parametrize("pat", [None, (1, 2), pd.Series([1])])
-def test_startsendwith_invalid_pat(method, pat):
-    ser = cudf.Series(["1"])
-    with pytest.raises(TypeError):
-        getattr(ser.str, method)(pat)
-
-
-@pytest.mark.parametrize("method", ["rindex", "index"])
-def test_index_int64_pandas_compat(method):
-    data = ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"]
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(cudf.Series(data).str, method)("E", 4, 8)
-    expected = getattr(pd.Series(data).str, method)("E", 4, 8)
-    assert_eq(result, expected)
-
-
-def test_replace_invalid_scalar_repl():
-    ser = cudf.Series(["1"])
-    with pytest.raises(TypeError):
-        ser.str.replace("1", 2)
-
-
-def test_string_methods_setattr():
-    ser = cudf.Series(["ab", "cd", "ef"])
-    pser = ser.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=ser.str.__setattr__,
-        rfunc=pser.str.__setattr__,
-        lfunc_args_and_kwargs=(("a", "b"),),
-        rfunc_args_and_kwargs=(("a", "b"),),
-    )

From af5fb305cb06d6e2218f1142165856b325c2cbbe Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 1 Aug 2025 09:54:12 -0700
Subject: [PATCH 042/366] Move (most of) test_timedelta.py and test_struct.py
 to new cudf classic test directory structure (#19551)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

The rest of the `test_timedelta.py` tests are waiting for reduction and binops tests to cleaned up like in e.g https://github.com/rapidsai/cudf/pull/19473

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19551
---
 python/cudf/cudf/tests/conftest.py            |  12 +-
 .../indexing}/__init__.py                     |   0
 .../tests/dataframe/indexing/test_getitem.py  |  10 +
 .../tests/dataframe/methods/test_to_struct.py |  58 ++
 .../tests/{structs => dtypes}/__init__.py     |   0
 .../cudf/tests/dtypes/test_structdtype.py     |  27 +
 .../cudf/cudf/tests/indexes/index/__init__.py |   0
 .../tests/indexes/index/test_constructor.py   |  35 ++
 .../indexes/timedelta/test_components.py      |   1 -
 .../indexes/timedelta/test_constructing.py    |   1 -
 .../indexes/timedelta/test_conversion.py      |   1 -
 .../tests/indexes/timedeltaindex/__init__.py  |   0
 .../timedeltaindex/methods/__init__.py        |   0
 .../methods/test_total_seconds.py             |  37 ++
 .../indexes/timedeltaindex/test_attributes.py | 100 ++++
 .../timedeltaindex/test_constructor.py        |  32 +
 .../cudf/tests/series/accessors/test_dt.py    |  95 +++
 .../tests/series/accessors/test_struct.py     |  68 +++
 .../tests/series/indexing/test_getitem.py     | 150 +++++
 .../cudf/tests/series/indexing/test_iloc.py   |  23 +
 .../tests/series/indexing/test_setitem.py     |  45 ++
 .../cudf/tests/series/methods/test_astype.py  | 213 +++++++
 .../cudf/tests/series/methods/test_copy.py    |   9 +
 .../cudf/tests/series/methods/test_fillna.py  |  41 ++
 .../tests/series/methods/test_memory_usage.py |  23 +-
 .../tests/series/methods/test_to_numpy.py     |  34 ++
 .../tests/series/methods/test_to_pandas.py    |  43 ++
 .../cudf/cudf/tests/series/test_attributes.py |  30 +-
 .../cudf/tests/series/test_constructors.py    | 117 ++++
 .../cudf/tests/structs/test_struct_methods.py |   1 -
 python/cudf/cudf/tests/test_struct.py         | 403 -------------
 python/cudf/cudf/tests/test_timedelta.py      | 561 ------------------
 32 files changed, 1189 insertions(+), 981 deletions(-)
 rename python/cudf/cudf/tests/{indexes/timedelta => dataframe/indexing}/__init__.py (100%)
 create mode 100644 python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_struct.py
 rename python/cudf/cudf/tests/{structs => dtypes}/__init__.py (100%)
 create mode 100644 python/cudf/cudf/tests/dtypes/test_structdtype.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/test_constructor.py
 delete mode 100644 python/cudf/cudf/tests/indexes/timedelta/test_components.py
 delete mode 100644 python/cudf/cudf/tests/indexes/timedelta/test_constructing.py
 delete mode 100644 python/cudf/cudf/tests/indexes/timedelta/test_conversion.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_total_seconds.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/test_constructor.py
 create mode 100644 python/cudf/cudf/tests/series/accessors/test_dt.py
 create mode 100644 python/cudf/cudf/tests/series/accessors/test_struct.py
 create mode 100644 python/cudf/cudf/tests/series/indexing/test_getitem.py
 create mode 100644 python/cudf/cudf/tests/series/indexing/test_iloc.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_copy.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_numpy.py
 delete mode 100644 python/cudf/cudf/tests/structs/test_struct_methods.py
 delete mode 100644 python/cudf/cudf/tests/test_struct.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 9d43aeff1fd..008867211f4 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -146,18 +146,14 @@ def pytest_sessionfinish(session, exitstatus):
 
 @pytest.fixture(params=[32, 64])
 def default_integer_bitwidth(request):
-    old_default = cudf.get_option("default_integer_bitwidth")
-    cudf.set_option("default_integer_bitwidth", request.param)
-    yield request.param
-    cudf.set_option("default_integer_bitwidth", old_default)
+    with cudf.option_context("default_integer_bitwidth", request.param):
+        yield request.param
 
 
 @pytest.fixture(params=[32, 64])
 def default_float_bitwidth(request):
-    old_default = cudf.get_option("default_float_bitwidth")
-    cudf.set_option("default_float_bitwidth", request.param)
-    yield request.param
-    cudf.set_option("default_float_bitwidth", old_default)
+    with cudf.option_context("default_float_bitwidth", request.param):
+        yield request.param
 
 
 @pytest.hookimpl(tryfirst=True, hookwrapper=True)
diff --git a/python/cudf/cudf/tests/indexes/timedelta/__init__.py b/python/cudf/cudf/tests/dataframe/indexing/__init__.py
similarity index 100%
rename from python/cudf/cudf/tests/indexes/timedelta/__init__.py
rename to python/cudf/cudf/tests/dataframe/indexing/__init__.py
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
new file mode 100644
index 00000000000..6453a4abca1
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_struct_of_struct_loc():
+    df = cudf.DataFrame({"col": [{"a": {"b": 1}}]})
+    expect = cudf.Series([{"a": {"b": 1}}], name="col")
+    assert_eq(expect, df["col"])
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_struct.py b/python/cudf/cudf/tests/dataframe/methods/test_to_struct.py
new file mode 100644
index 00000000000..e6a65ad5823
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_struct.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_struct_with_datetime_and_timedelta(temporal_types_as_str):
+    df = cudf.DataFrame(
+        {
+            "a": [12, 232, 2334],
+            "datetime": cudf.Series(
+                [23432, 3432423, 324324], dtype=temporal_types_as_str
+            ),
+        }
+    )
+    series = df.to_struct()
+    a_array = np.array([12, 232, 2334])
+    datetime_array = np.array([23432, 3432423, 324324]).astype(
+        temporal_types_as_str
+    )
+
+    actual = series.to_pandas()
+    values_list = []
+    for i, val in enumerate(a_array):
+        values_list.append({"a": val, "datetime": datetime_array[i]})
+
+    expected = pd.Series(values_list)
+    assert_eq(expected, actual)
+
+
+def test_dataframe_to_struct():
+    df = cudf.DataFrame()
+    expect = cudf.Series(dtype=cudf.StructDtype({}))
+    got = df.to_struct()
+    assert_eq(expect, got)
+
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
+    expect = cudf.Series(
+        [{"a": 1, "b": "x"}, {"a": 2, "b": "y"}, {"a": 3, "b": "z"}]
+    )
+    got = df.to_struct()
+    assert_eq(expect, got)
+
+    # check that a copy was made:
+    df["a"][0] = 5
+    assert_eq(got, expect)
+
+    # check that a non-string (but convertible to string) named column can be
+    # converted to struct
+    df = cudf.DataFrame([[1, 2], [3, 4]], columns=[(1, "b"), 0])
+    expect = cudf.Series([{"(1, 'b')": 1, "0": 2}, {"(1, 'b')": 3, "0": 4}])
+    with pytest.warns(UserWarning, match="will be casted"):
+        got = df.to_struct()
+    assert_eq(got, expect)
diff --git a/python/cudf/cudf/tests/structs/__init__.py b/python/cudf/cudf/tests/dtypes/__init__.py
similarity index 100%
rename from python/cudf/cudf/tests/structs/__init__.py
rename to python/cudf/cudf/tests/dtypes/__init__.py
diff --git a/python/cudf/cudf/tests/dtypes/test_structdtype.py b/python/cudf/cudf/tests/dtypes/test_structdtype.py
new file mode 100644
index 00000000000..5ccb4640b76
--- /dev/null
+++ b/python/cudf/cudf/tests/dtypes/test_structdtype.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "fields",
+    [
+        {"a": np.dtype(np.int64)},
+        {"a": np.dtype(np.int64), "b": None},
+        {
+            "a": cudf.ListDtype(np.dtype(np.int64)),
+            "b": cudf.Decimal64Dtype(1, 0),
+        },
+        {
+            "a": cudf.ListDtype(cudf.StructDtype({"b": np.dtype(np.int64)})),
+            "b": cudf.ListDtype(cudf.ListDtype(np.dtype(np.int64))),
+        },
+    ],
+)
+def test_serialize_struct_dtype(fields):
+    dtype = cudf.StructDtype(fields)
+    recreated = dtype.__class__.device_deserialize(*dtype.device_serialize())
+    assert recreated == dtype
diff --git a/python/cudf/cudf/tests/indexes/index/__init__.py b/python/cudf/cudf/tests/indexes/index/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/index/test_constructor.py b/python/cudf/cudf/tests/indexes/index/test_constructor.py
new file mode 100644
index 00000000000..7e9a577b28f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/test_constructor.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+def test_infer_timedelta_index(data, timedelta_types_as_str):
+    gdi = cudf.Index(data, dtype=timedelta_types_as_str)
+    pdi = gdi.to_pandas()
+
+    assert_eq(pdi, gdi)
diff --git a/python/cudf/cudf/tests/indexes/timedelta/test_components.py b/python/cudf/cudf/tests/indexes/timedelta/test_components.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/timedelta/test_components.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/timedelta/test_constructing.py b/python/cudf/cudf/tests/indexes/timedelta/test_constructing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/timedelta/test_constructing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/timedelta/test_conversion.py b/python/cudf/cudf/tests/indexes/timedelta/test_conversion.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/timedelta/test_conversion.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/__init__.py b/python/cudf/cudf/tests/indexes/timedeltaindex/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/methods/__init__.py b/python/cudf/cudf/tests/indexes/timedeltaindex/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_total_seconds.py b/python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_total_seconds.py
new file mode 100644
index 00000000000..14d8f75d1a3
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_total_seconds.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+def test_timedelta_index_total_seconds(data, timedelta_types_as_str):
+    gi = cudf.TimedeltaIndex(data, dtype=timedelta_types_as_str)
+    pi = gi.to_pandas()
+
+    expected = pi.total_seconds()
+    actual = gi.total_seconds()
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py b/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py
new file mode 100644
index 00000000000..b74e173bd4e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+@pytest.mark.parametrize("name", ["abcd", None])
+def test_timedelta_index_properties(data, timedelta_types_as_str, name):
+    gdi = cudf.TimedeltaIndex(data, dtype=timedelta_types_as_str, name=name)
+    pdi = gdi.to_pandas()
+
+    def local_assert(expected, actual):
+        if actual._column.null_count:
+            assert_eq(expected, actual.astype("float64"))
+        else:
+            assert_eq(expected, actual)
+
+    expected_days = pdi.days
+    actual_days = gdi.days
+
+    local_assert(expected_days, actual_days)
+
+    expected_seconds = pdi.seconds
+    actual_seconds = gdi.seconds
+
+    local_assert(expected_seconds, actual_seconds)
+
+    expected_microseconds = pdi.microseconds
+    actual_microseconds = gdi.microseconds
+
+    local_assert(expected_microseconds, actual_microseconds)
+
+    expected_nanoseconds = pdi.nanoseconds
+    actual_nanoseconds = gdi.nanoseconds
+
+    local_assert(expected_nanoseconds, actual_nanoseconds)
+
+    expected_components = pdi.components
+    actual_components = gdi.components
+
+    if actual_components.isnull().any().any():
+        assert_eq(expected_components, actual_components.astype("float"))
+    else:
+        assert_eq(
+            expected_components,
+            actual_components,
+            check_index_type=not actual_components.empty,
+        )
+
+
+def test_tdi_unit():
+    pd_tdi = pd.TimedeltaIndex(
+        ["1 day", "2 days", "3 days"], dtype="timedelta64[ns]"
+    )
+    cudf_tdi = cudf.from_pandas(pd_tdi)
+
+    result = pd_tdi.unit
+    expected = cudf_tdi.unit
+    assert result == expected
+
+
+def test_tdi_asi8():
+    pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
+    cudf_tdi = cudf.from_pandas(pd_tdi)
+
+    result = pd_tdi.asi8
+    expected = cudf_tdi.asi8
+    assert_eq(result, expected)
+
+
+def test_error_values():
+    s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
+    with pytest.raises(NotImplementedError, match="cupy does not support"):
+        s.values
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/test_constructor.py b/python/cudf/cudf/tests/indexes/timedeltaindex/test_constructor.py
new file mode 100644
index 00000000000..568a9a4edb5
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/test_constructor.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_timedelta_constructor():
+    data = [43534, 43543, 37897, 2000]
+    dtype = "timedelta64[ns]"
+    expected = pd.TimedeltaIndex(data=data, dtype=dtype)
+    actual = cudf.TimedeltaIndex(data=data, dtype=dtype)
+
+    assert_eq(expected, actual)
+
+    expected = pd.TimedeltaIndex(data=pd.Series(data), dtype=dtype)
+    actual = cudf.TimedeltaIndex(data=cudf.Series(data), dtype=dtype)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("name", [None, "delta-index"])
+def test_create_TimedeltaIndex(timedelta_types_as_str, name):
+    gdi = cudf.TimedeltaIndex(
+        [1132223, 2023232, 342234324, 4234324],
+        dtype=timedelta_types_as_str,
+        name=name,
+    )
+    pdi = gdi.to_pandas()
+    assert_eq(pdi, gdi)
diff --git a/python/cudf/cudf/tests/series/accessors/test_dt.py b/python/cudf/cudf/tests/series/accessors/test_dt.py
new file mode 100644
index 00000000000..40b604bc043
--- /dev/null
+++ b/python/cudf/cudf/tests/series/accessors/test_dt.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(
+    params=[
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [
+            136457654736252,
+            134736784364431,
+            245345345545332,
+            223432411,
+            2343241,
+            3634548734,
+            23234,
+        ],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ]
+)
+def timedelta_data(request):
+    return request.param
+
+
+def test_timedelta_dt_components(timedelta_data, timedelta_types_as_str):
+    gsr = cudf.Series(timedelta_data, dtype=timedelta_types_as_str)
+    psr = gsr.to_pandas()
+
+    expected = psr.dt.components
+    actual = gsr.dt.components
+
+    if gsr.isnull().any():
+        assert_eq(expected, actual.astype("float"))
+    else:
+        assert_eq(expected, actual)
+
+
+def test_timedelta_dt_properties(timedelta_data, timedelta_types_as_str):
+    gsr = cudf.Series(timedelta_data, dtype=timedelta_types_as_str)
+    psr = gsr.to_pandas()
+
+    def local_assert(expected, actual, **kwargs):
+        if gsr.isnull().any():
+            assert_eq(expected, actual.astype("float"), **kwargs)
+        else:
+            assert_eq(expected, actual, **kwargs)
+
+    expected_days = psr.dt.days
+    actual_days = gsr.dt.days
+
+    local_assert(expected_days, actual_days, check_dtype=False)
+
+    expected_seconds = psr.dt.seconds
+    actual_seconds = gsr.dt.seconds
+
+    local_assert(expected_seconds, actual_seconds, check_dtype=False)
+
+    expected_microseconds = psr.dt.microseconds
+    actual_microseconds = gsr.dt.microseconds
+
+    local_assert(expected_microseconds, actual_microseconds, check_dtype=False)
+
+    expected_nanoseconds = psr.dt.nanoseconds
+    actual_nanoseconds = gsr.dt.nanoseconds
+
+    local_assert(expected_nanoseconds, actual_nanoseconds, check_dtype=False)
+
+
+def test_timedelta_series_total_seconds(
+    timedelta_data, timedelta_types_as_str
+):
+    gsr = cudf.Series(timedelta_data, dtype=timedelta_types_as_str)
+    psr = gsr.to_pandas()
+
+    expected = psr.dt.total_seconds()
+    actual = gsr.dt.total_seconds()
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/accessors/test_struct.py b/python/cudf/cudf/tests/series/accessors/test_struct.py
new file mode 100644
index 00000000000..ce68e097302
--- /dev/null
+++ b/python/cudf/cudf/tests/series/accessors/test_struct.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_struct_iterate_error():
+    s = cudf.Series(
+        [{"f2": {"a": "sf21"}, "f1": "a"}, {"f1": "sf12", "f2": None}]
+    )
+    with pytest.raises(TypeError):
+        iter(s.struct)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [{}],
+        [{"a": None}],
+        [{"a": 1}],
+        [{"a": "one"}],
+        [{"a": 1}, {"a": 2}],
+        [{"a": 1, "b": "one"}, {"a": 2, "b": "two"}],
+        [{"b": "two", "a": None}, None, {"a": "one", "b": "two"}],
+    ],
+)
+def test_struct_field_errors(data):
+    got = cudf.Series(data)
+
+    with pytest.raises(KeyError):
+        got.struct.field("notWithinFields")
+
+    with pytest.raises(IndexError):
+        got.struct.field(100)
+
+
+def test_struct_explode():
+    s = cudf.Series([], dtype=cudf.StructDtype({}))
+    expect = cudf.DataFrame({})
+    assert_eq(expect, s.struct.explode())
+
+    s = cudf.Series(
+        [
+            {"a": 1, "b": "x"},
+            {"a": 2, "b": "y"},
+            {"a": 3, "b": "z"},
+            {"a": 4, "b": "a"},
+        ]
+    )
+    expect = cudf.DataFrame({"a": [1, 2, 3, 4], "b": ["x", "y", "z", "a"]})
+    got = s.struct.explode()
+    assert_eq(expect, got)
+
+    # check that a copy was made:
+    got["a"][0] = 5
+    assert_eq(s.struct.explode(), expect)
+
+
+@pytest.mark.parametrize(
+    "key, expect", [(0, [1, 3]), (1, [2, 4]), ("a", [1, 3]), ("b", [2, 4])]
+)
+def test_struct_for_field(key, expect):
+    sr = cudf.Series([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
+    expect = cudf.Series(expect)
+    got = sr.struct.field(key)
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/indexing/test_getitem.py b/python/cudf/cudf/tests/series/indexing/test_getitem.py
new file mode 100644
index 00000000000..568d6761cdd
--- /dev/null
+++ b/python/cudf/cudf/tests/series/indexing/test_getitem.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data, idx, expected",
+    [
+        (
+            [{"f2": {"a": "sf21"}, "f1": "a"}, {"f1": "sf12", "f2": None}],
+            0,
+            {"f1": "a", "f2": {"a": "sf21"}},
+        ),
+        (
+            [
+                {"f2": {"a": "sf21"}},
+                {"f1": "sf12", "f2": None},
+            ],
+            0,
+            {"f1": cudf.NA, "f2": {"a": "sf21"}},
+        ),
+        (
+            [{"a": "123"}, {"a": "sf12", "b": {"a": {"b": "c"}}}],
+            1,
+            {"a": "sf12", "b": {"a": {"b": "c"}}},
+        ),
+    ],
+)
+def test_nested_struct_extract_host_scalars(data, idx, expected):
+    series = cudf.Series(data)
+
+    def _nested_na_replace(struct_scalar):
+        """
+        Replace `cudf.NA` with `None` in the dict
+        """
+        for key, value in struct_scalar.items():
+            if value is cudf.NA:
+                struct_scalar[key] = None
+        return struct_scalar
+
+    assert _nested_na_replace(series[idx]) == _nested_na_replace(expected)
+
+
+def test_nested_struct_from_pandas_empty():
+    # tests constructing nested structs columns that would result in
+    # libcudf EMPTY type child columns inheriting their parent's null
+    # mask. See GH PR: #10761
+    pdf = pd.Series([[{"c": {"x": None}}], [{"c": None}]])
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(pdf, gdf)
+
+
+def test_struct_int_values():
+    series = cudf.Series(
+        [{"a": 1, "b": 2}, {"a": 10, "b": None}, {"a": 5, "b": 6}]
+    )
+    actual_series = series.to_pandas()
+
+    assert isinstance(actual_series[0]["b"], int)
+    assert isinstance(actual_series[1]["b"], type(None))
+    assert isinstance(actual_series[2]["b"], int)
+
+
+def test_struct_slice_nested_struct():
+    data = [
+        {"a": {"b": 42, "c": "abc"}},
+        {"a": {"b": 42, "c": "hello world"}},
+    ]
+
+    got = cudf.Series(data)[0:1]
+    expect = cudf.Series(data[0:1])
+    assert got.to_arrow() == expect.to_arrow()
+
+
+@pytest.mark.parametrize(
+    "series, slce",
+    [
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
+                {},
+                None,
+            ],
+            slice(1, None),
+        ),
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
+                {},
+                None,
+                {"d": ["Hello", "rapids"]},
+                None,
+                cudf.NA,
+            ],
+            slice(1, 5),
+        ),
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
+                {},
+                None,
+                {"c": 5},
+                None,
+                cudf.NA,
+            ],
+            slice(None, 4),
+        ),
+        ([{"a": {"b": 42, "c": -1}}, {"a": {"b": 0, "c": None}}], slice(0, 1)),
+    ],
+)
+def test_struct_slice(series, slce):
+    got = cudf.Series(series)[slce]
+    expected = cudf.Series(series[slce])
+    assert got.to_arrow() == expected.to_arrow()
+
+
+@pytest.mark.parametrize(
+    "series, expected",
+    [
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
+                {},
+            ],
+            {"a": "Hello world", "b": [], "c": cudf.NA},
+        ),
+        ([{}], {}),
+        (
+            [{"b": True}, {"a": 1, "c": [1, 2, 3], "d": "1", "b": False}],
+            {"a": cudf.NA, "c": cudf.NA, "d": cudf.NA, "b": True},
+        ),
+    ],
+)
+def test_struct_getitem(series, expected):
+    sr = cudf.Series(series)
+    assert sr[0] == expected
+
+
+def test_timedelta_getitem_na():
+    s = cudf.Series([1, 2, None, 3], dtype="timedelta64[ns]")
+    assert s[2] is cudf.NaT
diff --git a/python/cudf/cudf/tests/series/indexing/test_iloc.py b/python/cudf/cudf/tests/series/indexing/test_iloc.py
new file mode 100644
index 00000000000..0d460c11dc8
--- /dev/null
+++ b/python/cudf/cudf/tests/series/indexing/test_iloc.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "indices",
+    [slice(0, 3), slice(1, 4), slice(None, None, 2), slice(1, None, 2)],
+    ids=[":3", "1:4", "0::2", "1::2"],
+)
+@pytest.mark.parametrize(
+    "values",
+    [[None, {}, {}, None], [{}, {}, {}, {}]],
+    ids=["nulls", "no_nulls"],
+)
+def test_struct_empty_children_slice(indices, values):
+    s = cudf.Series(values)
+    actual = s.iloc[indices]
+    expect = cudf.Series(values[indices], index=range(len(values))[indices])
+    assert_eq(actual, expect)
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
index 88a014191bc..f6ef3d4ddb6 100644
--- a/python/cudf/cudf/tests/series/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -71,3 +71,48 @@ def test_series_setitem_mixed_bool_dtype():
     s = cudf.Series([True, False, True])
     with pytest.raises(TypeError):
         s[0] = 10
+
+
+@pytest.mark.parametrize(
+    "data, item",
+    [
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
+                {"a": "abcde", "b": [4, 5, 6], "c": 9},
+            ],
+            {"a": "Hello world", "b": [], "c": cudf.NA},
+        ),
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
+                {"a": "abcde", "b": [4, 5, 6], "c": 9},
+            ],
+            {},
+        ),
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
+                {"a": "abcde", "b": [4, 5, 6], "c": 9},
+            ],
+            cudf.NA,
+        ),
+        (
+            [
+                {"a": "Hello world", "b": []},
+                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
+                {"a": "abcde", "b": [4, 5, 6], "c": 9},
+            ],
+            {"a": "Second element", "b": [1, 2], "c": 1000},
+        ),
+    ],
+)
+def test_struct_setitem(data, item):
+    sr = cudf.Series(data)
+    sr[1] = item
+    data[1] = item
+    expected = cudf.Series(data)
+    assert sr.to_arrow() == expected.to_arrow()
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 6825af8442f..8373b173815 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -7,6 +8,7 @@
 
 import cudf
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 def test_series_typecast_to_object_error():
@@ -101,3 +103,214 @@ def test_empty_astype_always_castable(type1, type2, as_dtype, copy):
         assert ser._column is result._column
     else:
         assert ser._column is not result._column
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [12, 12, 22, 343, 4353534, 435342],
+        [0.3534, 12, 22, 343, 43.53534, 4353.42],
+        cp.asarray([10, 20, 30, 100]),
+    ],
+)
+@pytest.mark.parametrize("cast_dtype", ["int64", "category"])
+def test_timedelta_from_typecast(data, timedelta_types_as_str, cast_dtype):
+    if timedelta_types_as_str != "timedelta64[ns]":
+        pytest.skip(
+            "Bug in pandas : https://github.com/pandas-dev/pandas/issues/35465"
+        )
+    psr = pd.Series(
+        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
+        dtype=timedelta_types_as_str,
+    )
+    gsr = cudf.Series(data, dtype=timedelta_types_as_str)
+
+    if cast_dtype == "int64":
+        assert_eq(psr.values.view(cast_dtype), gsr.astype(cast_dtype).values)
+    else:
+        assert_eq(psr.astype(cast_dtype), gsr.astype(cast_dtype))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [12, 12, 22, 343, 4353534, 435342],
+        [0.3534, 12, 22, 343, 43.53534, 4353.42],
+        cp.asarray([10, 20, 30, 100]),
+    ],
+)
+def test_timedelta_to_typecast(data, timedelta_types_as_str):
+    psr = pd.Series(cp.asnumpy(data) if isinstance(data, cp.ndarray) else data)
+    gsr = cudf.Series(data)
+
+    assert_eq(
+        psr.astype(timedelta_types_as_str), gsr.astype(timedelta_types_as_str)
+    )
+
+
+@pytest.mark.parametrize("data", [[], [1, 2, 3, 4, 5]])
+def test_numeric_to_timedelta(
+    data, numeric_types_as_str, timedelta_types_as_str
+):
+    sr = cudf.Series(data, dtype=numeric_types_as_str)
+    psr = sr.to_pandas()
+
+    actual = sr.astype(timedelta_types_as_str)
+    expected = psr.astype(timedelta_types_as_str)
+
+    assert_eq(expected, actual)
+
+
+def test_timedelta_datetime_cast_invalid():
+    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
+    psr = sr.to_pandas()
+
+    assert_exceptions_equal(
+        psr.astype,
+        sr.astype,
+        (["datetime64[ns]"],),
+        (["datetime64[ns]"],),
+    )
+
+    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    psr = sr.to_pandas()
+
+    assert_exceptions_equal(
+        psr.astype,
+        sr.astype,
+        (["timedelta64[ns]"],),
+        (["timedelta64[ns]"],),
+    )
+
+
+@pytest.mark.parametrize(
+    "sr_data, sr_dtype, exp_data, exp_dtype",
+    [
+        [
+            [1, 2, 3],
+            "timedelta64[ns]",
+            [
+                "0 days 00:00:00.000000001",
+                "0 days 00:00:00.000000002",
+                "0 days 00:00:00.000000003",
+            ],
+            None,
+        ],
+        [
+            [1000000, 200000, 3000000],
+            "timedelta64[ms]",
+            ["0 days 00:16:40", "0 days 00:03:20", "0 days 00:50:00"],
+            None,
+        ],
+        [
+            [1000000, 200000, 3000000],
+            "timedelta64[s]",
+            ["11 days 13:46:40", "2 days 07:33:20", "34 days 17:20:00"],
+            None,
+        ],
+        [
+            [None, None, None, None, None],
+            "timedelta64[us]",
+            [None, None, None, None, None],
+            "str",
+        ],
+        [
+            [
+                136457654,
+                None,
+                245345345,
+                223432411,
+                None,
+                3634548734,
+                23234,
+            ],
+            "timedelta64[us]",
+            [
+                "0 days 00:02:16.457654",
+                None,
+                "0 days 00:04:05.345345",
+                "0 days 00:03:43.432411",
+                None,
+                "0 days 01:00:34.548734",
+                "0 days 00:00:00.023234",
+            ],
+            None,
+        ],
+        [
+            [
+                136457654,
+                None,
+                245345345,
+                223432411,
+                None,
+                3634548734,
+                23234,
+            ],
+            "timedelta64[ms]",
+            [
+                "1 days 13:54:17.654",
+                None,
+                "2 days 20:09:05.345",
+                "2 days 14:03:52.411",
+                None,
+                "42 days 01:35:48.734",
+                "0 days 00:00:23.234",
+            ],
+            None,
+        ],
+        [
+            [
+                136457654,
+                None,
+                245345345,
+                223432411,
+                None,
+                3634548734,
+                23234,
+            ],
+            "timedelta64[s]",
+            [
+                "1579 days 08:54:14",
+                None,
+                "2839 days 15:29:05",
+                "2586 days 00:33:31",
+                None,
+                "42066 days 12:52:14",
+                "0 days 06:27:14",
+            ],
+            None,
+        ],
+        [
+            [
+                136457654,
+                None,
+                245345345,
+                223432411,
+                None,
+                3634548734,
+                23234,
+            ],
+            "timedelta64[ns]",
+            [
+                "0 days 00:00:00.136457654",
+                None,
+                "0 days 00:00:00.245345345",
+                "0 days 00:00:00.223432411",
+                None,
+                "0 days 00:00:03.634548734",
+                "0 days 00:00:00.000023234",
+            ],
+            None,
+        ],
+    ],
+)
+def test_timedelta_str_roundtrip(sr_data, sr_dtype, exp_data, exp_dtype):
+    gsr = cudf.Series(sr_data, dtype=sr_dtype)
+    actual_series = gsr.astype("str")
+
+    expected_series = cudf.Series(exp_data, dtype=exp_dtype)
+    assert_eq(expected_series, actual_series)
+
+    assert_eq(gsr, actual_series.astype(gsr.dtype))
diff --git a/python/cudf/cudf/tests/series/methods/test_copy.py b/python/cudf/cudf/tests/series/methods/test_copy.py
new file mode 100644
index 00000000000..4637e821642
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_copy.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_struct_of_struct_copy():
+    sr = cudf.Series([{"a": {"b": 1}}])
+    assert_eq(sr, sr.copy())
diff --git a/python/cudf/cudf/tests/series/methods/test_fillna.py b/python/cudf/cudf/tests/series/methods/test_fillna.py
index d317ff85596..e64fb209519 100644
--- a/python/cudf/cudf/tests/series/methods/test_fillna.py
+++ b/python/cudf/cudf/tests/series/methods/test_fillna.py
@@ -38,3 +38,44 @@ def test_fillna_categorical_with_different_categories_raises():
     ser = cudf.Series([1, None], dtype="category")
     with pytest.raises(TypeError):
         ser.fillna(cudf.Series([1, 2]), dtype="category")
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [12, 12, 22, 343, 4353534, 435342],
+        [0.3534, 12, 22, 343, 43.53534, 4353.42],
+        np.array([10, 20, 30, 100]),
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        np.timedelta64(4, "s"),
+        np.timedelta64(456, "D"),
+        np.timedelta64("nat"),
+        np.timedelta64(1, "s"),
+        np.timedelta64(1, "ms"),
+        np.timedelta64(1, "us"),
+        np.timedelta64(1, "ns"),
+        "NaT",
+    ],
+)
+def test_timedelta_fillna(data, timedelta_types_as_str, fill_value):
+    sr = cudf.Series(data, dtype=timedelta_types_as_str)
+    psr = sr.to_pandas()
+
+    expected = psr.dropna()
+    actual = sr.dropna()
+
+    assert_eq(expected, actual)
+
+    expected = psr.fillna(fill_value)
+    actual = sr.fillna(fill_value)
+    assert_eq(expected, actual)
+
+    expected = expected.dropna()
+    actual = actual.dropna()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_memory_usage.py b/python/cudf/cudf/tests/series/methods/test_memory_usage.py
index f9c81aaaef9..003e2f61960 100644
--- a/python/cudf/cudf/tests/series/methods/test_memory_usage.py
+++ b/python/cudf/cudf/tests/series/methods/test_memory_usage.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-
 import cudf
+from cudf.testing import assert_eq
 
 
 def test_series_memory_usage():
@@ -19,3 +19,24 @@ def test_series_memory_usage():
 
     assert sr[3:].memory_usage() == 9  # z
     assert sr[:1].memory_usage() == 19  # hello world
+
+
+def test_struct_with_null_memory_usage():
+    df = cudf.DataFrame(
+        {
+            "a": cudf.Series([1, 2, -1, -1, 3], dtype="int64"),
+            "b": cudf.Series([10, 20, -1, -1, 30], dtype="int64"),
+        }
+    )
+    s = df.to_struct()
+    assert s.memory_usage() == 80
+
+    s[2:4] = None
+    assert s.memory_usage() == 272
+
+
+def test_struct_memory_usage():
+    s = cudf.Series([{"a": 1, "b": 10}, {"a": 2, "b": 20}, {"a": 3, "b": 30}])
+    df = s.struct.explode()
+
+    assert_eq(s.memory_usage(), df.memory_usage().sum())
diff --git a/python/cudf/cudf/tests/series/methods/test_to_numpy.py b/python/cudf/cudf/tests/series/methods/test_to_numpy.py
new file mode 100644
index 00000000000..6e94e355d61
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_numpy.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+    ],
+)
+def test_timedelta_series_to_numpy(data, timedelta_types_as_str):
+    gsr = cudf.Series(data, dtype=timedelta_types_as_str)
+
+    expected = np.array(
+        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
+        dtype=timedelta_types_as_str,
+    )
+    expected = expected[~np.isnan(expected)]
+
+    actual = gsr.dropna().to_numpy()
+
+    np.testing.assert_array_equal(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_pandas.py b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
index 8e7ced2ea30..bc78a8c7871 100644
--- a/python/cudf/cudf/tests/series/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
@@ -3,6 +3,8 @@
 import datetime
 import decimal
 
+import cupy as cp
+import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
@@ -152,3 +154,44 @@ def test_series_to_pandas_arrow_type(scalar):
     result = ser.to_pandas(arrow_type=True)
     expected = pd.Series(pd.arrays.ArrowExtensionArray(pa_array))
     pd.testing.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+    ],
+)
+def test_timedelta_series_to_pandas(data, timedelta_types_as_str):
+    gsr = cudf.Series(data, dtype=timedelta_types_as_str)
+
+    expected = np.array(
+        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
+        dtype=timedelta_types_as_str,
+    )
+
+    expected = pd.Series(expected)
+    actual = gsr.to_pandas()
+
+    assert_eq(expected, actual)
+
+
+def test_writable_numpy_array_timedelta():
+    gi = cudf.Index([1, 2, 3], dtype="timedelta64[ns]")
+    expected_flags = pd.Index(
+        [1, 2, 3], dtype="timedelta64[ns]"
+    )._data._ndarray.flags
+
+    actual_flags = gi.to_pandas()._data._ndarray.flags
+    assert expected_flags.c_contiguous == actual_flags.c_contiguous
+    assert expected_flags.f_contiguous == actual_flags.f_contiguous
+    assert expected_flags.writeable == actual_flags.writeable
+    assert expected_flags.aligned == actual_flags.aligned
+    assert expected_flags.writebackifcopy == actual_flags.writebackifcopy
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 7ae81a6c257..6cc5999ab4f 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -137,7 +137,29 @@ def test_dtype_dtypes_equal():
     assert ser.dtypes is ser.to_pandas().dtypes
 
 
-def test_roundtrip_series_plc_column(ps):
-    expect = cudf.Series(ps)
-    actual = cudf.Series.from_pylibcudf(*expect.to_pylibcudf())
-    assert_eq(expect, actual)
+@pytest.mark.parametrize("data", [[], [1, 2, 3, 4, 5]])
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        2,
+        3,
+        "a",
+        np.timedelta64(1, "s"),
+        np.timedelta64(2, "s"),
+        np.timedelta64(2, "D"),
+        np.timedelta64(3, "ms"),
+        np.timedelta64(4, "us"),
+        np.timedelta64(5, "ns"),
+        np.timedelta64(6, "ns"),
+        np.datetime64(6, "s"),
+    ],
+)
+def test_timedelta_contains(data, timedelta_types_as_str, scalar):
+    sr = cudf.Series(data, dtype=timedelta_types_as_str)
+    psr = sr.to_pandas()
+
+    expected = scalar in sr
+    actual = scalar in psr
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index b8bca586361..6357680b368 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -14,6 +14,66 @@
 from cudf.testing._utils import assert_exceptions_equal
 
 
+@pytest.fixture(
+    params=[
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [
+            136457654736252,
+            134736784364431,
+            245345345545332,
+            223432411,
+            2343241,
+            3634548734,
+            23234,
+        ],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ]
+)
+def timedelta_data(request):
+    return request.param
+
+
+def test_timedelta_series_create(timedelta_data, timedelta_types_as_str):
+    if timedelta_types_as_str != "timedelta64[ns]":
+        pytest.skip(
+            "Bug in pandas : https://github.com/pandas-dev/pandas/issues/35465"
+        )
+    psr = pd.Series(
+        cp.asnumpy(timedelta_data)
+        if isinstance(timedelta_data, cp.ndarray)
+        else timedelta_data,
+        dtype=timedelta_types_as_str,
+    )
+    gsr = cudf.Series(timedelta_data, dtype=timedelta_types_as_str)
+
+    assert_eq(psr, gsr)
+
+
+def test_timedelta_from_pandas(timedelta_data, timedelta_types_as_str):
+    psr = pd.Series(
+        cp.asnumpy(timedelta_data)
+        if isinstance(timedelta_data, cp.ndarray)
+        else timedelta_data,
+        dtype=timedelta_types_as_str,
+    )
+    gsr = cudf.from_pandas(psr)
+
+    assert_eq(psr, gsr)
+
+
 def test_construct_int_series_with_nulls_compat_mode():
     # in compatibility mode, constructing a Series
     # with nulls should result in a floating Series:
@@ -635,3 +695,60 @@ def test_to_dense_array():
     dense = sr.dropna().to_numpy()
     assert dense.size < filled.size
     assert filled.size == len(sr)
+
+
+@pytest.mark.parametrize(
+    "ps",
+    [
+        pd.Series([0, 1, 2, np.nan, 4, None, 6]),
+        pd.Series(
+            [0, 1, 2, np.nan, 4, None, 6],
+            index=["q", "w", "e", "r", "t", "y", "u"],
+            name="a",
+        ),
+        pd.Series([0, 1, 2, 3, 4]),
+        pd.Series(["a", "b", "u", "h", "d"]),
+        pd.Series([None, None, np.nan, None, np.inf, -np.inf]),
+        pd.Series([], dtype="float64"),
+        pd.Series(
+            [pd.NaT, pd.Timestamp("1939-05-27"), pd.Timestamp("1940-04-25")]
+        ),
+        pd.Series([np.nan]),
+        pd.Series([None]),
+        pd.Series(["a", "b", "", "c", None, "e"]),
+    ],
+)
+def test_roundtrip_series_plc_column(ps):
+    expect = cudf.Series(ps)
+    actual = cudf.Series.from_pylibcudf(*expect.to_pylibcudf())
+    assert_eq(expect, actual)
+
+
+def test_series_construction_with_nulls():
+    fields = [
+        pa.array([1], type=pa.int64()),
+        pa.array([None], type=pa.int64()),
+        pa.array([3], type=pa.int64()),
+    ]
+    expect = pa.StructArray.from_arrays(fields, ["a", "b", "c"])
+    got = cudf.Series(expect).to_arrow()
+
+    assert expect == got
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [{}],
+        [{"a": None}],
+        [{"a": 1}],
+        [{"a": "one"}],
+        [{"a": 1}, {"a": 2}],
+        [{"a": 1, "b": "one"}, {"a": 2, "b": "two"}],
+        [{"b": "two", "a": None}, None, {"a": "one", "b": "two"}],
+    ],
+)
+def test_create_struct_series(data):
+    expect = pd.Series(data)
+    got = cudf.Series(data)
+    assert_eq(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/structs/test_struct_methods.py b/python/cudf/cudf/tests/structs/test_struct_methods.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/structs/test_struct_methods.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
deleted file mode 100644
index 13eed9d0d77..00000000000
--- a/python/cudf/cudf/tests/test_struct.py
+++ /dev/null
@@ -1,403 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-from cudf.testing._utils import DATETIME_TYPES, TIMEDELTA_TYPES
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [{}],
-        [{"a": None}],
-        [{"a": 1}],
-        [{"a": "one"}],
-        [{"a": 1}, {"a": 2}],
-        [{"a": 1, "b": "one"}, {"a": 2, "b": "two"}],
-        [{"b": "two", "a": None}, None, {"a": "one", "b": "two"}],
-    ],
-)
-def test_create_struct_series(data):
-    expect = pd.Series(data)
-    got = cudf.Series(data)
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_struct_of_struct_copy():
-    sr = cudf.Series([{"a": {"b": 1}}])
-    assert_eq(sr, sr.copy())
-
-
-def test_struct_of_struct_loc():
-    df = cudf.DataFrame({"col": [{"a": {"b": 1}}]})
-    expect = cudf.Series([{"a": {"b": 1}}], name="col")
-    assert_eq(expect, df["col"])
-
-
-@pytest.mark.parametrize(
-    "key, expect", [(0, [1, 3]), (1, [2, 4]), ("a", [1, 3]), ("b", [2, 4])]
-)
-def test_struct_for_field(key, expect):
-    sr = cudf.Series([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
-    expect = cudf.Series(expect)
-    got = sr.struct.field(key)
-    assert_eq(expect, got)
-
-
-def test_series_construction_with_nulls():
-    fields = [
-        pa.array([1], type=pa.int64()),
-        pa.array([None], type=pa.int64()),
-        pa.array([3], type=pa.int64()),
-    ]
-    expect = pa.StructArray.from_arrays(fields, ["a", "b", "c"])
-    got = cudf.Series(expect).to_arrow()
-
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "fields",
-    [
-        {"a": np.dtype(np.int64)},
-        {"a": np.dtype(np.int64), "b": None},
-        {
-            "a": cudf.ListDtype(np.dtype(np.int64)),
-            "b": cudf.Decimal64Dtype(1, 0),
-        },
-        {
-            "a": cudf.ListDtype(cudf.StructDtype({"b": np.dtype(np.int64)})),
-            "b": cudf.ListDtype(cudf.ListDtype(np.dtype(np.int64))),
-        },
-    ],
-)
-def test_serialize_struct_dtype(fields):
-    dtype = cudf.StructDtype(fields)
-    recreated = dtype.__class__.device_deserialize(*dtype.device_serialize())
-    assert recreated == dtype
-
-
-@pytest.mark.parametrize(
-    "series, expected",
-    [
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
-                {},
-            ],
-            {"a": "Hello world", "b": [], "c": cudf.NA},
-        ),
-        ([{}], {}),
-        (
-            [{"b": True}, {"a": 1, "c": [1, 2, 3], "d": "1", "b": False}],
-            {"a": cudf.NA, "c": cudf.NA, "d": cudf.NA, "b": True},
-        ),
-    ],
-)
-def test_struct_getitem(series, expected):
-    sr = cudf.Series(series)
-    assert sr[0] == expected
-
-
-@pytest.mark.parametrize(
-    "data, item",
-    [
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
-                {"a": "abcde", "b": [4, 5, 6], "c": 9},
-            ],
-            {"a": "Hello world", "b": [], "c": cudf.NA},
-        ),
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
-                {"a": "abcde", "b": [4, 5, 6], "c": 9},
-            ],
-            {},
-        ),
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
-                {"a": "abcde", "b": [4, 5, 6], "c": 9},
-            ],
-            cudf.NA,
-        ),
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": cudf.NA},
-                {"a": "abcde", "b": [4, 5, 6], "c": 9},
-            ],
-            {"a": "Second element", "b": [1, 2], "c": 1000},
-        ),
-    ],
-)
-def test_struct_setitem(data, item):
-    sr = cudf.Series(data)
-    sr[1] = item
-    data[1] = item
-    expected = cudf.Series(data)
-    assert sr.to_arrow() == expected.to_arrow()
-
-
-def test_struct_explode():
-    s = cudf.Series([], dtype=cudf.StructDtype({}))
-    expect = cudf.DataFrame({})
-    assert_eq(expect, s.struct.explode())
-
-    s = cudf.Series(
-        [
-            {"a": 1, "b": "x"},
-            {"a": 2, "b": "y"},
-            {"a": 3, "b": "z"},
-            {"a": 4, "b": "a"},
-        ]
-    )
-    expect = cudf.DataFrame({"a": [1, 2, 3, 4], "b": ["x", "y", "z", "a"]})
-    got = s.struct.explode()
-    assert_eq(expect, got)
-
-    # check that a copy was made:
-    got["a"][0] = 5
-    assert_eq(s.struct.explode(), expect)
-
-
-def test_dataframe_to_struct():
-    df = cudf.DataFrame()
-    expect = cudf.Series(dtype=cudf.StructDtype({}))
-    got = df.to_struct()
-    assert_eq(expect, got)
-
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
-    expect = cudf.Series(
-        [{"a": 1, "b": "x"}, {"a": 2, "b": "y"}, {"a": 3, "b": "z"}]
-    )
-    got = df.to_struct()
-    assert_eq(expect, got)
-
-    # check that a copy was made:
-    df["a"][0] = 5
-    assert_eq(got, expect)
-
-    # check that a non-string (but convertible to string) named column can be
-    # converted to struct
-    df = cudf.DataFrame([[1, 2], [3, 4]], columns=[(1, "b"), 0])
-    expect = cudf.Series([{"(1, 'b')": 1, "0": 2}, {"(1, 'b')": 3, "0": 4}])
-    with pytest.warns(UserWarning, match="will be casted"):
-        got = df.to_struct()
-    assert_eq(got, expect)
-
-
-@pytest.mark.parametrize(
-    "series, slce",
-    [
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
-                {},
-                None,
-            ],
-            slice(1, None),
-        ),
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
-                {},
-                None,
-                {"d": ["Hello", "rapids"]},
-                None,
-                cudf.NA,
-            ],
-            slice(1, 5),
-        ),
-        (
-            [
-                {"a": "Hello world", "b": []},
-                {"a": "CUDF", "b": [1, 2, 3], "c": 1},
-                {},
-                None,
-                {"c": 5},
-                None,
-                cudf.NA,
-            ],
-            slice(None, 4),
-        ),
-        ([{"a": {"b": 42, "c": -1}}, {"a": {"b": 0, "c": None}}], slice(0, 1)),
-    ],
-)
-def test_struct_slice(series, slce):
-    got = cudf.Series(series)[slce]
-    expected = cudf.Series(series[slce])
-    assert got.to_arrow() == expected.to_arrow()
-
-
-def test_struct_slice_nested_struct():
-    data = [
-        {"a": {"b": 42, "c": "abc"}},
-        {"a": {"b": 42, "c": "hello world"}},
-    ]
-
-    got = cudf.Series(data)[0:1]
-    expect = cudf.Series(data[0:1])
-    assert got.to_arrow() == expect.to_arrow()
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [{}],
-        [{"a": None}],
-        [{"a": 1}],
-        [{"a": "one"}],
-        [{"a": 1}, {"a": 2}],
-        [{"a": 1, "b": "one"}, {"a": 2, "b": "two"}],
-        [{"b": "two", "a": None}, None, {"a": "one", "b": "two"}],
-    ],
-)
-def test_struct_field_errors(data):
-    got = cudf.Series(data)
-
-    with pytest.raises(KeyError):
-        got.struct.field("notWithinFields")
-
-    with pytest.raises(IndexError):
-        got.struct.field(100)
-
-
-@pytest.mark.parametrize("dtype", DATETIME_TYPES + TIMEDELTA_TYPES)
-def test_struct_with_datetime_and_timedelta(dtype):
-    df = cudf.DataFrame(
-        {
-            "a": [12, 232, 2334],
-            "datetime": cudf.Series([23432, 3432423, 324324], dtype=dtype),
-        }
-    )
-    series = df.to_struct()
-    a_array = np.array([12, 232, 2334])
-    datetime_array = np.array([23432, 3432423, 324324]).astype(dtype)
-
-    actual = series.to_pandas()
-    values_list = []
-    for i, val in enumerate(a_array):
-        values_list.append({"a": val, "datetime": datetime_array[i]})
-
-    expected = pd.Series(values_list)
-    assert_eq(expected, actual)
-
-
-def test_struct_int_values():
-    series = cudf.Series(
-        [{"a": 1, "b": 2}, {"a": 10, "b": None}, {"a": 5, "b": 6}]
-    )
-    actual_series = series.to_pandas()
-
-    assert isinstance(actual_series[0]["b"], int)
-    assert isinstance(actual_series[1]["b"], type(None))
-    assert isinstance(actual_series[2]["b"], int)
-
-
-def test_nested_struct_from_pandas_empty():
-    # tests constructing nested structs columns that would result in
-    # libcudf EMPTY type child columns inheriting their parent's null
-    # mask. See GH PR: #10761
-    pdf = pd.Series([[{"c": {"x": None}}], [{"c": None}]])
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf, gdf)
-
-
-def _nested_na_replace(struct_scalar):
-    """
-    Replace `cudf.NA` with `None` in the dict
-    """
-    for key, value in struct_scalar.items():
-        if value is cudf.NA:
-            struct_scalar[key] = None
-    return struct_scalar
-
-
-@pytest.mark.parametrize(
-    "data, idx, expected",
-    [
-        (
-            [{"f2": {"a": "sf21"}, "f1": "a"}, {"f1": "sf12", "f2": None}],
-            0,
-            {"f1": "a", "f2": {"a": "sf21"}},
-        ),
-        (
-            [
-                {"f2": {"a": "sf21"}},
-                {"f1": "sf12", "f2": None},
-            ],
-            0,
-            {"f1": cudf.NA, "f2": {"a": "sf21"}},
-        ),
-        (
-            [{"a": "123"}, {"a": "sf12", "b": {"a": {"b": "c"}}}],
-            1,
-            {"a": "sf12", "b": {"a": {"b": "c"}}},
-        ),
-    ],
-)
-def test_nested_struct_extract_host_scalars(data, idx, expected):
-    series = cudf.Series(data)
-
-    assert _nested_na_replace(series[idx]) == _nested_na_replace(expected)
-
-
-def test_struct_memory_usage():
-    s = cudf.Series([{"a": 1, "b": 10}, {"a": 2, "b": 20}, {"a": 3, "b": 30}])
-    df = s.struct.explode()
-
-    assert_eq(s.memory_usage(), df.memory_usage().sum())
-
-
-def test_struct_with_null_memory_usage():
-    df = cudf.DataFrame(
-        {
-            "a": cudf.Series([1, 2, -1, -1, 3], dtype="int64"),
-            "b": cudf.Series([10, 20, -1, -1, 30], dtype="int64"),
-        }
-    )
-    s = df.to_struct()
-    assert s.memory_usage() == 80
-
-    s[2:4] = None
-    assert s.memory_usage() == 272
-
-
-@pytest.mark.parametrize(
-    "indices",
-    [slice(0, 3), slice(1, 4), slice(None, None, 2), slice(1, None, 2)],
-    ids=[":3", "1:4", "0::2", "1::2"],
-)
-@pytest.mark.parametrize(
-    "values",
-    [[None, {}, {}, None], [{}, {}, {}, {}]],
-    ids=["nulls", "no_nulls"],
-)
-def test_struct_empty_children_slice(indices, values):
-    s = cudf.Series(values)
-    actual = s.iloc[indices]
-    expect = cudf.Series(values[indices], index=range(len(values))[indices])
-    assert_eq(actual, expect)
-
-
-def test_struct_iterate_error():
-    s = cudf.Series(
-        [{"f2": {"a": "sf21"}, "f1": "a"}, {"f1": "sf12", "f2": None}]
-    )
-    with pytest.raises(TypeError):
-        iter(s.struct)
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index f0e6503c55a..28741b9f592 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -14,38 +14,6 @@
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
-@pytest.fixture(
-    params=[
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [1],
-        [12, 11, 232, 223432411, 2343241, 234324, 23234],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-        [
-            136457654736252,
-            134736784364431,
-            245345345545332,
-            223432411,
-            2343241,
-            3634548734,
-            23234,
-        ],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-    ]
-)
-def data(request):
-    return request.param
-
-
 @pytest.fixture(
     params=[
         [1000000, 200000, 3000000],
@@ -74,127 +42,6 @@ def timedelta_dtype(request):
     return request.param
 
 
-def test_timedelta_series_create(data, timedelta_dtype):
-    if timedelta_dtype != "timedelta64[ns]":
-        pytest.skip(
-            "Bug in pandas : https://github.com/pandas-dev/pandas/issues/35465"
-        )
-    psr = pd.Series(
-        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
-        dtype=timedelta_dtype,
-    )
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-
-    assert_eq(psr, gsr)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [12, 12, 22, 343, 4353534, 435342],
-        [0.3534, 12, 22, 343, 43.53534, 4353.42],
-        cp.asarray([10, 20, 30, 100]),
-    ],
-)
-@pytest.mark.parametrize("cast_dtype", ["int64", "category"])
-def test_timedelta_from_typecast(data, timedelta_dtype, cast_dtype):
-    if timedelta_dtype != "timedelta64[ns]":
-        pytest.skip(
-            "Bug in pandas : https://github.com/pandas-dev/pandas/issues/35465"
-        )
-    psr = pd.Series(
-        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
-        dtype=timedelta_dtype,
-    )
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-
-    if cast_dtype == "int64":
-        assert_eq(psr.values.view(cast_dtype), gsr.astype(cast_dtype).values)
-    else:
-        assert_eq(psr.astype(cast_dtype), gsr.astype(cast_dtype))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [12, 12, 22, 343, 4353534, 435342],
-        [0.3534, 12, 22, 343, 43.53534, 4353.42],
-        cp.asarray([10, 20, 30, 100]),
-    ],
-)
-def test_timedelta_to_typecast(data, timedelta_dtype):
-    psr = pd.Series(cp.asnumpy(data) if isinstance(data, cp.ndarray) else data)
-    gsr = cudf.Series(data)
-
-    assert_eq(psr.astype(timedelta_dtype), gsr.astype(timedelta_dtype))
-
-
-def test_timedelta_from_pandas(data, timedelta_dtype):
-    psr = pd.Series(
-        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
-        dtype=timedelta_dtype,
-    )
-    gsr = cudf.from_pandas(psr)
-
-    assert_eq(psr, gsr)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-    ],
-)
-def test_timedelta_series_to_numpy(data, timedelta_dtype):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-
-    expected = np.array(
-        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
-        dtype=timedelta_dtype,
-    )
-    expected = expected[~np.isnan(expected)]
-
-    actual = gsr.dropna().to_numpy()
-
-    np.testing.assert_array_equal(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-    ],
-)
-def test_timedelta_series_to_pandas(data, timedelta_dtype):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-
-    expected = np.array(
-        cp.asnumpy(data) if isinstance(data, cp.ndarray) else data,
-        dtype=timedelta_dtype,
-    )
-
-    expected = pd.Series(expected)
-    actual = gsr.to_pandas()
-
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize(
     "data,other",
     [
@@ -520,57 +367,6 @@ def test_timedelta_reduction_ops(
         assert_eq(expected, actual)
 
 
-def test_timedelta_dt_components(data, timedelta_dtype):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = gsr.to_pandas()
-
-    expected = psr.dt.components
-    actual = gsr.dt.components
-
-    if gsr.isnull().any():
-        assert_eq(expected, actual.astype("float"))
-    else:
-        assert_eq(expected, actual)
-
-
-def test_timedelta_dt_properties(data, timedelta_dtype):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = gsr.to_pandas()
-
-    def local_assert(expected, actual, **kwargs):
-        if gsr.isnull().any():
-            assert_eq(expected, actual.astype("float"), **kwargs)
-        else:
-            assert_eq(expected, actual, **kwargs)
-
-    expected_days = psr.dt.days
-    actual_days = gsr.dt.days
-
-    local_assert(expected_days, actual_days, check_dtype=False)
-
-    expected_seconds = psr.dt.seconds
-    actual_seconds = gsr.dt.seconds
-
-    local_assert(expected_seconds, actual_seconds, check_dtype=False)
-
-    expected_microseconds = psr.dt.microseconds
-    actual_microseconds = gsr.dt.microseconds
-
-    local_assert(expected_microseconds, actual_microseconds, check_dtype=False)
-
-    expected_nanoseconds = psr.dt.nanoseconds
-    actual_nanoseconds = gsr.dt.nanoseconds
-
-    local_assert(expected_nanoseconds, actual_nanoseconds, check_dtype=False)
-
-
-def test_timedelta_index(data, timedelta_dtype):
-    gdi = cudf.Index(data, dtype=timedelta_dtype)
-    pdi = gdi.to_pandas()
-
-    assert_eq(pdi, gdi)
-
-
 @pytest.mark.parametrize("datetime_dtype", utils.DATETIME_TYPES)
 def test_timedelta_index_datetime_index_ops(
     data_non_overflow, datetime_dtype, timedelta_dtype
@@ -721,213 +517,6 @@ def test_timedelta_index_ops_with_scalars(
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("name", ["abcd", None])
-def test_timedelta_index_properties(data, timedelta_dtype, name):
-    gdi = cudf.Index(data, dtype=timedelta_dtype, name=name)
-    pdi = gdi.to_pandas()
-
-    def local_assert(expected, actual):
-        if actual._column.null_count:
-            assert_eq(expected, actual.astype("float64"))
-        else:
-            assert_eq(expected, actual)
-
-    expected_days = pdi.days
-    actual_days = gdi.days
-
-    local_assert(expected_days, actual_days)
-
-    expected_seconds = pdi.seconds
-    actual_seconds = gdi.seconds
-
-    local_assert(expected_seconds, actual_seconds)
-
-    expected_microseconds = pdi.microseconds
-    actual_microseconds = gdi.microseconds
-
-    local_assert(expected_microseconds, actual_microseconds)
-
-    expected_nanoseconds = pdi.nanoseconds
-    actual_nanoseconds = gdi.nanoseconds
-
-    local_assert(expected_nanoseconds, actual_nanoseconds)
-
-    expected_components = pdi.components
-    actual_components = gdi.components
-
-    if actual_components.isnull().any().any():
-        assert_eq(expected_components, actual_components.astype("float"))
-    else:
-        assert_eq(
-            expected_components,
-            actual_components,
-            check_index_type=not actual_components.empty,
-        )
-
-
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        np.timedelta64(4, "s"),
-        np.timedelta64(456, "D"),
-        np.timedelta64("nat"),
-        np.timedelta64(1, "s"),
-        np.timedelta64(1, "ms"),
-        np.timedelta64(1, "us"),
-        np.timedelta64(1, "ns"),
-        "NaT",
-    ],
-)
-def test_timedelta_fillna(data, timedelta_dtype, fill_value):
-    sr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = sr.to_pandas()
-
-    expected = psr.dropna()
-    actual = sr.dropna()
-
-    assert_eq(expected, actual)
-
-    expected = psr.fillna(fill_value)
-    actual = sr.fillna(fill_value)
-    assert_eq(expected, actual)
-
-    expected = expected.dropna()
-    actual = actual.dropna()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "sr_data, sr_dtype, exp_data, exp_dtype",
-    [
-        [
-            [1, 2, 3],
-            "timedelta64[ns]",
-            [
-                "0 days 00:00:00.000000001",
-                "0 days 00:00:00.000000002",
-                "0 days 00:00:00.000000003",
-            ],
-            None,
-        ],
-        [
-            [1000000, 200000, 3000000],
-            "timedelta64[ms]",
-            ["0 days 00:16:40", "0 days 00:03:20", "0 days 00:50:00"],
-            None,
-        ],
-        [
-            [1000000, 200000, 3000000],
-            "timedelta64[s]",
-            ["11 days 13:46:40", "2 days 07:33:20", "34 days 17:20:00"],
-            None,
-        ],
-        [
-            [None, None, None, None, None],
-            "timedelta64[us]",
-            [None, None, None, None, None],
-            "str",
-        ],
-        [
-            [
-                136457654,
-                None,
-                245345345,
-                223432411,
-                None,
-                3634548734,
-                23234,
-            ],
-            "timedelta64[us]",
-            [
-                "0 days 00:02:16.457654",
-                None,
-                "0 days 00:04:05.345345",
-                "0 days 00:03:43.432411",
-                None,
-                "0 days 01:00:34.548734",
-                "0 days 00:00:00.023234",
-            ],
-            None,
-        ],
-        [
-            [
-                136457654,
-                None,
-                245345345,
-                223432411,
-                None,
-                3634548734,
-                23234,
-            ],
-            "timedelta64[ms]",
-            [
-                "1 days 13:54:17.654",
-                None,
-                "2 days 20:09:05.345",
-                "2 days 14:03:52.411",
-                None,
-                "42 days 01:35:48.734",
-                "0 days 00:00:23.234",
-            ],
-            None,
-        ],
-        [
-            [
-                136457654,
-                None,
-                245345345,
-                223432411,
-                None,
-                3634548734,
-                23234,
-            ],
-            "timedelta64[s]",
-            [
-                "1579 days 08:54:14",
-                None,
-                "2839 days 15:29:05",
-                "2586 days 00:33:31",
-                None,
-                "42066 days 12:52:14",
-                "0 days 06:27:14",
-            ],
-            None,
-        ],
-        [
-            [
-                136457654,
-                None,
-                245345345,
-                223432411,
-                None,
-                3634548734,
-                23234,
-            ],
-            "timedelta64[ns]",
-            [
-                "0 days 00:00:00.136457654",
-                None,
-                "0 days 00:00:00.245345345",
-                "0 days 00:00:00.223432411",
-                None,
-                "0 days 00:00:03.634548734",
-                "0 days 00:00:00.000023234",
-            ],
-            None,
-        ],
-    ],
-)
-def test_timedelta_str_roundtrip(sr_data, sr_dtype, exp_data, exp_dtype):
-    gsr = cudf.Series(sr_data, dtype=sr_dtype)
-    actual_series = gsr.astype("str")
-
-    expected_series = cudf.Series(exp_data, dtype=exp_dtype)
-    assert_eq(expected_series, actual_series)
-
-    assert_eq(gsr, actual_series.astype(gsr.dtype))
-
-
 def test_timedelta_invalid_ops():
     sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
     psr = sr.to_pandas()
@@ -1029,68 +618,6 @@ def test_timedelta_invalid_ops():
     )
 
 
-def test_timedelta_datetime_cast_invalid():
-    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
-    psr = sr.to_pandas()
-
-    assert_exceptions_equal(
-        psr.astype,
-        sr.astype,
-        (["datetime64[ns]"],),
-        (["datetime64[ns]"],),
-    )
-
-    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    psr = sr.to_pandas()
-
-    assert_exceptions_equal(
-        psr.astype,
-        sr.astype,
-        (["timedelta64[ns]"],),
-        (["timedelta64[ns]"],),
-    )
-
-
-@pytest.mark.parametrize("data", [[], [1, 2, 3, 4, 5]])
-@pytest.mark.parametrize("dtype", utils.NUMERIC_TYPES)
-def test_numeric_to_timedelta(data, dtype, timedelta_dtype):
-    sr = cudf.Series(data, dtype=dtype)
-    psr = sr.to_pandas()
-
-    actual = sr.astype(timedelta_dtype)
-    expected = psr.astype(timedelta_dtype)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("data", [[], [1, 2, 3, 4, 5]])
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        2,
-        3,
-        "a",
-        np.timedelta64(1, "s"),
-        np.timedelta64(2, "s"),
-        np.timedelta64(2, "D"),
-        np.timedelta64(3, "ms"),
-        np.timedelta64(4, "us"),
-        np.timedelta64(5, "ns"),
-        np.timedelta64(6, "ns"),
-        np.datetime64(6, "s"),
-    ],
-)
-def test_timedelta_contains(data, timedelta_dtype, scalar):
-    sr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = sr.to_pandas()
-
-    expected = scalar in sr
-    actual = scalar in psr
-
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
 @pytest.mark.parametrize("ddof", [1, 2, 3])
 def test_timedelta_std(data, timedelta_dtype, ddof):
@@ -1135,37 +662,6 @@ def test_timedelta_reductions(data, op, timedelta_dtype):
         assert_eq(expected.to_numpy(), actual)
 
 
-def test_error_values():
-    s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
-    with pytest.raises(NotImplementedError, match="cupy does not support"):
-        s.values
-
-
-@pytest.mark.parametrize("name", [None, "delta-index"])
-def test_create_TimedeltaIndex(timedelta_dtype, name):
-    gdi = cudf.TimedeltaIndex(
-        [1132223, 2023232, 342234324, 4234324],
-        dtype=timedelta_dtype,
-        name=name,
-    )
-    pdi = gdi.to_pandas()
-    assert_eq(pdi, gdi)
-
-
-def test_timedelta_constructor():
-    data = [43534, 43543, 37897, 2000]
-    dtype = "timedelta64[ns]"
-    expected = pd.TimedeltaIndex(data=data, dtype=dtype)
-    actual = cudf.TimedeltaIndex(data=data, dtype=dtype)
-
-    assert_eq(expected, actual)
-
-    expected = pd.TimedeltaIndex(data=pd.Series(data), dtype=dtype)
-    actual = cudf.TimedeltaIndex(data=cudf.Series(data), dtype=dtype)
-
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize("op", [operator.add, operator.sub])
 def test_timdelta_binop_tz_timestamp(op):
     s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
@@ -1177,11 +673,6 @@ def test_timdelta_binop_tz_timestamp(op):
         op(s, date_tz_scalar)
 
 
-def test_timedelta_getitem_na():
-    s = cudf.Series([1, 2, None, 3], dtype="timedelta64[ns]")
-    assert s[2] is cudf.NaT
-
-
 @pytest.mark.parametrize(
     "op",
     [
@@ -1228,55 +719,3 @@ def test_tdi_reductions(method, kwargs):
     result = getattr(pd_tdi, method)(**kwargs)
     expected = getattr(cudf_tdi, method)(**kwargs)
     assert result == expected
-
-
-def test_tdi_asi8():
-    pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
-    cudf_tdi = cudf.from_pandas(pd_tdi)
-
-    result = pd_tdi.asi8
-    expected = cudf_tdi.asi8
-    assert_eq(result, expected)
-
-
-def test_tdi_unit():
-    pd_tdi = pd.TimedeltaIndex(
-        ["1 day", "2 days", "3 days"], dtype="timedelta64[ns]"
-    )
-    cudf_tdi = cudf.from_pandas(pd_tdi)
-
-    result = pd_tdi.unit
-    expected = cudf_tdi.unit
-    assert result == expected
-
-
-def test_timedelta_series_total_seconds(data, timedelta_dtype):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = gsr.to_pandas()
-
-    expected = psr.dt.total_seconds()
-    actual = gsr.dt.total_seconds()
-    assert_eq(expected, actual)
-
-
-def test_timedelta_index_total_seconds(data, timedelta_dtype):
-    gi = cudf.Index(data, dtype=timedelta_dtype)
-    pi = gi.to_pandas()
-
-    expected = pi.total_seconds()
-    actual = gi.total_seconds()
-    assert_eq(expected, actual)
-
-
-def test_writable_numpy_array():
-    gi = cudf.Index([1, 2, 3], dtype="timedelta64[ns]")
-    expected_flags = pd.Index(
-        [1, 2, 3], dtype="timedelta64[ns]"
-    )._data._ndarray.flags
-
-    actual_flags = gi.to_pandas()._data._ndarray.flags
-    assert expected_flags.c_contiguous == actual_flags.c_contiguous
-    assert expected_flags.f_contiguous == actual_flags.f_contiguous
-    assert expected_flags.writeable == actual_flags.writeable
-    assert expected_flags.aligned == actual_flags.aligned
-    assert expected_flags.writebackifcopy == actual_flags.writebackifcopy

From 2553df0a3c63fd49991db0f37ac8a968ee358e98 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 1 Aug 2025 14:34:33 -0400
Subject: [PATCH 043/366] Use no_validity() instead of null_probability(0) in
 benchmarks profile (#19554)

The `no_validity()` for a `data_profile` for generating benchmark data is more reliable than `null_probability(0,0)` since no mask is created.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19554
---
 cpp/benchmarks/common/generate_input.cu  | 6 ++----
 cpp/benchmarks/common/generate_input.hpp | 6 +++---
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 0488a59099d..d8e868a4ae8 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -1063,12 +1063,10 @@ std::unique_ptr<cudf::column> create_string_column(cudf::size_type num_rows,
   auto const num_matches = (static_cast<int64_t>(num_rows) * hit_rate) / 100;
 
   // Create a randomized gather-map to build a column out of the strings in data.
-  data_profile gather_profile =
-    data_profile_builder().cardinality(0).null_probability(0.0).distribution(
-      cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1);
+  data_profile gather_profile = data_profile_builder().cardinality(0).no_validity().distribution(
+    cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1);
   auto gather_table =
     create_random_table({cudf::type_id::INT32}, row_count{num_rows}, gather_profile);
-  gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0);
 
   // Create scatter map by placing 0-index values throughout the gather-map
   auto scatter_data = cudf::sequence(num_matches,
diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp
index b900acc2f20..a89fb2429bf 100644
--- a/cpp/benchmarks/common/generate_input.hpp
+++ b/cpp/benchmarks/common/generate_input.hpp
@@ -449,14 +449,14 @@ class data_profile {
  * For example, `data_profile` initialization
  * @code{.pseudo}
  * data_profile profile;
- * profile.set_null_probability(0.0);
+ * profile.set_null_probability(0.01);
  * profile.set_cardinality(0);
  * profile.set_distribution_params(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 100);
  * @endcode
  * becomes
  * @code{.pseudo}
  * data_profile const profile =
- *   data_profile_builder().cardinality(0).null_probability(0.0).distribution(
+ *   data_profile_builder().cardinality(0).null_probability(0.01).distribution(
  *     cudf::type_id::INT32, distribution_id::UNIFORM, 0, 100);
  * @endcode
  * The builder makes it easier to have immutable `data_profile` objects even with the complex
@@ -465,7 +465,7 @@ class data_profile {
  *
  * The builder API also includes a few additional convenience setters:
  * Overload of `distribution` that only takes the distribution type (not the range).
- * `no_validity`, which is a simpler equivalent of `null_probability(std::nullopr)`.
+ * `no_validity`, which is a simpler equivalent of `null_probability(std::nullopt)`.
  */
 class data_profile_builder {
   data_profile profile;

From 79f24164cbd7b433cb0e0ee17d65ec3aa8d25278 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Aug 2025 12:14:45 -0700
Subject: [PATCH 044/366] Add streams to all single-function modules (#19559)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19559
---
 python/pylibcudf/pylibcudf/concatenate.pxd    |  6 +++--
 python/pylibcudf/pylibcudf/concatenate.pyi    |  3 +++
 python/pylibcudf/pylibcudf/concatenate.pyx    | 18 +++++++++-----
 python/pylibcudf/pylibcudf/interop.pxd        |  5 ++--
 python/pylibcudf/pylibcudf/interop.pyi        |  8 +++++--
 python/pylibcudf/pylibcudf/interop.pyx        | 19 +++++++++++----
 python/pylibcudf/pylibcudf/json.pxd           |  7 ++++--
 python/pylibcudf/pylibcudf/json.pyi           |  7 +++++-
 python/pylibcudf/pylibcudf/json.pyx           | 18 ++++++++++----
 python/pylibcudf/pylibcudf/labeling.pxd       |  7 ++++--
 python/pylibcudf/pylibcudf/labeling.pyi       |  3 +++
 python/pylibcudf/pylibcudf/labeling.pyx       | 13 ++++++++--
 .../pylibcudf/libcudf/concatenate.pxd         |  9 ++++---
 .../pylibcudf/pylibcudf/libcudf/interop.pxd   |  8 +++++--
 python/pylibcudf/pylibcudf/libcudf/json.pxd   |  5 +++-
 .../pylibcudf/pylibcudf/libcudf/labeling.pxd  |  7 ++++--
 python/pylibcudf/pylibcudf/libcudf/merge.pxd  |  5 +++-
 .../libcudf/nvtext/edit_distance.pxd          | 10 +++++---
 python/pylibcudf/pylibcudf/libcudf/round.pxd  |  5 +++-
 .../pylibcudf/libcudf/strings/repeat.pxd      | 12 +++++++---
 .../pylibcudf/libcudf/strings/substring.pxd   | 12 +++++++---
 .../pylibcudf/pylibcudf/libcudf/transpose.pxd |  7 ++++--
 python/pylibcudf/pylibcudf/merge.pxd          |  5 +++-
 python/pylibcudf/pylibcudf/merge.pyi          |  3 +++
 python/pylibcudf/pylibcudf/merge.pyx          | 13 ++++++++--
 .../pylibcudf/nvtext/edit_distance.pxd        |  7 +++---
 .../pylibcudf/nvtext/edit_distance.pyi        | 10 ++++++--
 .../pylibcudf/nvtext/edit_distance.pyx        | 24 +++++++++++++------
 python/pylibcudf/pylibcudf/round.pxd          |  7 ++++--
 python/pylibcudf/pylibcudf/round.pyi          |  3 +++
 python/pylibcudf/pylibcudf/round.pyx          | 15 +++++++++---
 python/pylibcudf/pylibcudf/strings/repeat.pxd |  7 ++++--
 python/pylibcudf/pylibcudf/strings/repeat.pyi |  8 ++++++-
 python/pylibcudf/pylibcudf/strings/repeat.pyx | 21 ++++++++++++----
 python/pylibcudf/pylibcudf/strings/slice.pxd  |  6 +++--
 python/pylibcudf/pylibcudf/strings/slice.pyi  |  3 +++
 python/pylibcudf/pylibcudf/strings/slice.pyx  | 19 +++++++++++----
 python/pylibcudf/pylibcudf/transpose.pxd      |  6 +++--
 python/pylibcudf/pylibcudf/transpose.pyi      |  5 +++-
 python/pylibcudf/pylibcudf/transpose.pyx      | 15 ++++++++----
 40 files changed, 280 insertions(+), 91 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/concatenate.pxd
index c506ffb93c9..629c88161ae 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/concatenate.pxd
@@ -1,10 +1,12 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from .table cimport Table
 
+from rmm.pylibrmm.stream cimport Stream
+
 
 # There is no way to define a fused type that is a list of other objects, so we cannot
 # unify the column and table paths without using runtime dispatch instead. In this case
 # we choose to prioritize API consistency over performance, so we use the same function
 # with a bit of runtime dispatch overhead.
-cpdef concatenate(list objects)
+cpdef concatenate(list objects, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi
index 79076f509e0..020ba1eb997 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyi
+++ b/python/pylibcudf/pylibcudf/concatenate.pyi
@@ -1,8 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
 def concatenate[ColumnOrTable: (Column, Table)](
     objects: list[ColumnOrTable],
+    stream: Stream | None = None,
 ) -> ColumnOrTable: ...
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx
index 42c5f34cf3e..2937732e2cd 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyx
+++ b/python/pylibcudf/pylibcudf/concatenate.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -9,18 +9,23 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = ["concatenate"]
 
-cpdef concatenate(list objects):
+cpdef concatenate(list objects, Stream stream=None):
     """Concatenate columns or tables.
 
     Parameters
     ----------
     objects : Union[List[Column], List[Table]]
         The list of Columns or Tables to concatenate.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -32,6 +37,7 @@ cpdef concatenate(list objects):
 
     cdef vector[column_view] c_columns
     cdef vector[table_view] c_tables
+    stream = _get_stream(stream)
 
     cdef unique_ptr[column] c_col_result
     cdef unique_ptr[table] c_tbl_result
@@ -41,14 +47,14 @@ cpdef concatenate(list objects):
             c_tables.push_back((<Table?>tbl).view())
 
         with nogil:
-            c_tbl_result = cpp_concatenate.concatenate(c_tables)
-        return Table.from_libcudf(move(c_tbl_result))
+            c_tbl_result = cpp_concatenate.concatenate(c_tables, stream.view())
+        return Table.from_libcudf(move(c_tbl_result), stream)
     elif isinstance(objects[0], Column):
         for column in objects:
             c_columns.push_back((<Column?>column).view())
 
         with nogil:
-            c_col_result = cpp_concatenate.concatenate(c_columns)
-        return Column.from_libcudf(move(c_col_result))
+            c_col_result = cpp_concatenate.concatenate(c_columns, stream.view())
+        return Column.from_libcudf(move(c_col_result), stream)
     else:
         raise ValueError("input must be a list of Columns or Tables")
diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd
index 7cf3be08e09..a02261db74c 100644
--- a/python/pylibcudf/pylibcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/interop.pxd
@@ -1,7 +1,8 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.table cimport Table
+from rmm.pylibrmm.stream cimport Stream
 
-cpdef Table from_dlpack(object managed_tensor)
+cpdef Table from_dlpack(object managed_tensor, Stream stream=*)
 
-cpdef object to_dlpack(Table input)
+cpdef object to_dlpack(Table input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi
index 63de816010b..decd0e412c4 100644
--- a/python/pylibcudf/pylibcudf/interop.pyi
+++ b/python/pylibcudf/pylibcudf/interop.pyi
@@ -6,6 +6,8 @@ from typing import Any, overload
 
 import pyarrow as pa
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
@@ -48,5 +50,7 @@ def to_arrow(
 def to_arrow(
     obj: Scalar, metadata: ColumnMetadata | str | None = None
 ) -> pa.Scalar[Any]: ...
-def from_dlpack(managed_tensor: Any) -> Table: ...
-def to_dlpack(input: Table) -> Any: ...
+def from_dlpack(
+    managed_tensor: Any, stream: Stream | None = None
+) -> Table: ...
+def to_dlpack(input: Table, stream: Stream | None = None) -> Any: ...
diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx
index f31b82153c1..0595bb4a777 100644
--- a/python/pylibcudf/pylibcudf/interop.pyx
+++ b/python/pylibcudf/pylibcudf/interop.pyx
@@ -18,11 +18,14 @@ from pylibcudf.libcudf.interop cimport (
 )
 from pylibcudf.libcudf.table.table cimport table
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
 from .types cimport DataType, type_id
 from .types import LIBCUDF_TO_ARROW_TYPES
+from .utils cimport _get_stream
 from ._interop_helpers import ColumnMetadata
 
 try:
@@ -177,7 +180,7 @@ if pa is not None:
         return to_arrow(Column.from_scalar(plc_object, 1), metadata=metadata)[0]
 
 
-cpdef Table from_dlpack(object managed_tensor):
+cpdef Table from_dlpack(object managed_tensor, Stream stream=None):
     """
     Convert a DLPack DLTensor into a cudf table.
 
@@ -187,6 +190,8 @@ cpdef Table from_dlpack(object managed_tensor):
     ----------
     managed_tensor : PyCapsule
         A 1D or 2D column-major (Fortran order) tensor.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -202,6 +207,7 @@ cpdef Table from_dlpack(object managed_tensor):
     if dlpack_tensor is NULL:
         raise ValueError("PyCapsule object contained a NULL pointer")
     PyCapsule_SetName(managed_tensor, "used_dltensor")
+    stream = _get_stream(stream)
 
     # Note: A copy is always performed when converting the dlpack
     # data to a libcudf table. We also delete the dlpack_tensor pointer
@@ -209,14 +215,14 @@ cpdef Table from_dlpack(object managed_tensor):
     # TODO: https://github.com/rapidsai/cudf/issues/10874
     # TODO: https://github.com/rapidsai/cudf/issues/10849
     with nogil:
-        c_result = cpp_from_dlpack(dlpack_tensor)
+        c_result = cpp_from_dlpack(dlpack_tensor, stream.view())
 
-    cdef Table result = Table.from_libcudf(move(c_result))
+    cdef Table result = Table.from_libcudf(move(c_result), stream)
     dlpack_tensor.deleter(dlpack_tensor)
     return result
 
 
-cpdef object to_dlpack(Table input):
+cpdef object to_dlpack(Table input, Stream stream=None):
     """
     Convert a cudf table into a DLPack DLTensor.
 
@@ -226,6 +232,8 @@ cpdef object to_dlpack(Table input):
     ----------
     input : Table
         A 1D or 2D column-major (Fortran order) tensor.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -239,9 +247,10 @@ cpdef object to_dlpack(Table input):
                 "Input is required to have null count as zero."
             )
     cdef DLManagedTensor *dlpack_tensor
+    stream = _get_stream(stream)
 
     with nogil:
-        dlpack_tensor = cpp_to_dlpack(input.view())
+        dlpack_tensor = cpp_to_dlpack(input.view(), stream.view())
 
     return PyCapsule_New(
         dlpack_tensor,
diff --git a/python/pylibcudf/pylibcudf/json.pxd b/python/pylibcudf/pylibcudf/json.pxd
index 87a87349b8a..81062980608 100644
--- a/python/pylibcudf/pylibcudf/json.pxd
+++ b/python/pylibcudf/pylibcudf/json.pxd
@@ -1,9 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.json cimport get_json_object_options
 from pylibcudf.scalar cimport Scalar
 
+from rmm.pylibrmm.stream cimport Stream
+
 
 cdef class GetJsonObjectOptions:
     cdef get_json_object_options options
@@ -12,5 +14,6 @@ cdef class GetJsonObjectOptions:
 cpdef Column get_json_object(
     Column col,
     Scalar json_path,
-    GetJsonObjectOptions options=*
+    GetJsonObjectOptions options=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi
index b93d4876dab..e7d00013103 100644
--- a/python/pylibcudf/pylibcudf/json.pyi
+++ b/python/pylibcudf/pylibcudf/json.pyi
@@ -1,5 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
@@ -19,5 +21,8 @@ class GetJsonObjectOptions:
     def set_missing_fields_as_nulls(self, val: bool) -> None: ...
 
 def get_json_object(
-    col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None
+    col: Column,
+    json_path: Scalar,
+    options: GetJsonObjectOptions | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx
index 5ec1e1be971..2836eedc5ff 100644
--- a/python/pylibcudf/pylibcudf/json.pyx
+++ b/python/pylibcudf/pylibcudf/json.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp cimport bool
@@ -10,6 +10,10 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.scalar cimport Scalar
 
+from rmm.pylibrmm.stream cimport Stream
+
+from .utils cimport _get_stream
+
 __all__ = ["GetJsonObjectOptions", "get_json_object"]
 
 cdef class GetJsonObjectOptions:
@@ -113,7 +117,8 @@ cdef class GetJsonObjectOptions:
 cpdef Column get_json_object(
     Column col,
     Scalar json_path,
-    GetJsonObjectOptions options=None
+    GetJsonObjectOptions options=None,
+    Stream stream=None
 ):
     """
     Apply a JSONPath string to all rows in an input strings column.
@@ -131,6 +136,9 @@ cpdef Column get_json_object(
     options : GetJsonObjectOptions
         Options for controlling the behavior of the function.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -144,12 +152,14 @@ cpdef Column get_json_object(
         options = GetJsonObjectOptions()
 
     cdef cpp_json.get_json_object_options c_options = options.options
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_json.get_json_object(
             col.view(),
             dereference(c_json_path),
-            c_options
+            c_options,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd
index b1f9f2e806d..13205ee19ba 100644
--- a/python/pylibcudf/pylibcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/labeling.pxd
@@ -1,14 +1,17 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from pylibcudf.libcudf.labeling cimport inclusive
 
 from .column cimport Column
 
+from rmm.pylibrmm.stream cimport Stream
+
 
 cpdef Column label_bins(
     Column input,
     Column left_edges,
     inclusive left_inclusive,
     Column right_edges,
-    inclusive right_inclusive
+    inclusive right_inclusive,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi
index c3a75d10baf..7f0a42cad9c 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyi
+++ b/python/pylibcudf/pylibcudf/labeling.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
 class Inclusive(IntEnum):
@@ -14,4 +16,5 @@ def label_bins(
     left_inclusive: Inclusive,
     right_edges: Column,
     right_inclusive: Inclusive,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index 0d93463cc7e..d5be40f2558 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -8,7 +8,10 @@ from pylibcudf.libcudf.labeling cimport inclusive
 
 from pylibcudf.libcudf.labeling import inclusive as Inclusive  # no-cython-lint
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
+from .utils cimport _get_stream
 
 __all__ = ["Inclusive", "label_bins"]
 
@@ -17,7 +20,8 @@ cpdef Column label_bins(
     Column left_edges,
     inclusive left_inclusive,
     Column right_edges,
-    inclusive right_inclusive
+    inclusive right_inclusive,
+    Stream stream=None
 ):
     """Labels elements based on membership in the specified bins.
 
@@ -35,6 +39,8 @@ cpdef Column label_bins(
         Column of the right edge of each bin.
     right_inclusive : Inclusive
         Whether or not the right edge is inclusive.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -43,6 +49,8 @@ cpdef Column label_bins(
         according to the specified bins.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_labeling.label_bins(
             input.view(),
@@ -50,8 +58,9 @@ cpdef Column label_bins(
             left_inclusive,
             right_edges.view(),
             right_inclusive,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 Inclusive.__str__ = Inclusive.__repr__
diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
index 0a827b21cda..3711105401d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view
 from pylibcudf.libcudf.utilities.span cimport host_span
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
@@ -20,8 +21,10 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
     # ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate(
-        const vector[column_view] columns
+        const vector[column_view] columns,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[table] concatenate(
-        const vector[table_view] tables
+        const vector[table_view] tables,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
index e626c2380d1..257bdcea739 100644
--- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
@@ -9,6 +9,8 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "dlpack/dlpack.h" nogil:
     ctypedef struct DLManagedTensor:
@@ -33,11 +35,13 @@ cdef extern from "cudf/interop.hpp" nogil:
 cdef extern from "cudf/interop.hpp" namespace "cudf" \
         nogil:
     cdef unique_ptr[table] from_dlpack(
-        const DLManagedTensor* managed_tensor
+        const DLManagedTensor* managed_tensor,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     DLManagedTensor* to_dlpack(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef cppclass column_metadata:
diff --git a/python/pylibcudf/pylibcudf/libcudf/json.pxd b/python/pylibcudf/pylibcudf/libcudf/json.pxd
index d5bdd6d299a..06fb0e6ef99 100644
--- a/python/pylibcudf/pylibcudf/libcudf/json.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/json.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -7,6 +7,8 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil:
     cdef cppclass get_json_object_options:
@@ -26,4 +28,5 @@ cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil:
         column_view col,
         string_scalar json_path,
         get_json_object_options options,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
index e5dbec879ce..d2681c46b15 100644
--- a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
@@ -1,10 +1,12 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp cimport int
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
     cpdef enum class inclusive(int):
@@ -16,5 +18,6 @@ cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
         const column_view &left_edges,
         inclusive left_inclusive,
         const column_view &right_edges,
-        inclusive right_inclusive
+        inclusive right_inclusive,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
index f546ae3bbdd..fa17e1000f9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 cimport pylibcudf.libcudf.types as libcudf_types
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
@@ -6,6 +6,8 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/merge.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] merge (
@@ -13,4 +15,5 @@ cdef extern from "cudf/merge.hpp" namespace "cudf" nogil:
         vector[libcudf_types.size_type] key_cols,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
index fbb1c0b2f4c..0d54a3cdc11 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd
@@ -1,18 +1,22 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "nvtext/edit_distance.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[column] edit_distance(
         const column_view & strings,
-        const column_view & targets
+        const column_view & targets,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] edit_distance_matrix(
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd
index efd9e3de25d..58bc9b5a4dc 100644
--- a/python/pylibcudf/pylibcudf/libcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd
@@ -1,10 +1,12 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
 
@@ -16,4 +18,5 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
         const column_view& input,
         int32_t decimal_places,
         rounding_method method,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
index de65b554eba..91ec6a53d50 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd
@@ -1,18 +1,24 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/repeat_strings.hpp" namespace "cudf::strings" \
         nogil:
 
     cdef unique_ptr[column] repeat_strings(
         column_view input,
-        size_type repeat_times) except +libcudf_exception_handler
+        size_type repeat_times,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] repeat_strings(
         column_view input,
-        column_view repeat_times) except +libcudf_exception_handler
+        column_view repeat_times,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
index f573870583d..e1857fde33d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
@@ -6,15 +6,21 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
 from pylibcudf.libcudf.types cimport size_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] slice_strings(
         column_view source_strings,
         numeric_scalar[size_type] start,
         numeric_scalar[size_type] end,
-        numeric_scalar[size_type] step) except +libcudf_exception_handler
+        numeric_scalar[size_type] step,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] slice_strings(
         column_view source_strings,
         column_view starts,
-        column_view stops) except +libcudf_exception_handler
+        column_view stops,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
index fde49afd99c..ab0944914f2 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
@@ -1,15 +1,18 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table_view cimport table_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil:
     cdef pair[
         unique_ptr[column],
         table_view
     ] transpose(
-        table_view input_table
+        table_view input_table,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/merge.pxd b/python/pylibcudf/pylibcudf/merge.pxd
index 4b598aa8f4f..0cfe40dbe2c 100644
--- a/python/pylibcudf/pylibcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/merge.pxd
@@ -1,11 +1,14 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from .table cimport Table
 
+from rmm.pylibrmm.stream cimport Stream
+
 
 cpdef Table merge (
     list tables_to_merge,
     list key_cols,
     list column_order,
     list null_precedence,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi
index b18eb01f8a2..26cfdc6ea0c 100644
--- a/python/pylibcudf/pylibcudf/merge.pyi
+++ b/python/pylibcudf/pylibcudf/merge.pyi
@@ -1,5 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, Order
 
@@ -8,4 +10,5 @@ def merge(
     key_cols: list[int],
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx
index c051cdc0c66..cc585b11cc5 100644
--- a/python/pylibcudf/pylibcudf/merge.pyx
+++ b/python/pylibcudf/pylibcudf/merge.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -8,7 +8,10 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_order, order, size_type
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = ["merge"]
 
@@ -17,6 +20,7 @@ cpdef Table merge (
     list key_cols,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Merge a set of sorted tables.
 
@@ -32,6 +36,8 @@ cpdef Table merge (
         Whether each column should be sorted in ascending or descending order.
     null_precedence : List[NullOrder]
         Whether nulls should come before or after non-nulls.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -47,11 +53,14 @@ cpdef Table merge (
         c_tables_to_merge.push_back((<Table?> tbl).view())
 
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_merge.merge(
             c_tables_to_merge,
             c_key_cols,
             c_column_order,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
index 446b95afabb..b915f8753dd 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
@@ -1,8 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column edit_distance(Column input, Column targets)
+cpdef Column edit_distance(Column input, Column targets, Stream stream=*)
 
-cpdef Column edit_distance_matrix(Column input)
+cpdef Column edit_distance_matrix(Column input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
index 85bbbb880ee..50b3495f625 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
@@ -1,6 +1,12 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
-def edit_distance(input: Column, targets: Column) -> Column: ...
-def edit_distance_matrix(input: Column) -> Column: ...
+def edit_distance(
+    input: Column, targets: Column, stream: Stream | None = None
+) -> Column: ...
+def edit_distance_matrix(
+    input: Column, stream: Stream | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
index eceeaff24e3..00d0ea26c0d 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -9,9 +9,13 @@ from pylibcudf.libcudf.nvtext.edit_distance cimport (
     edit_distance_matrix as cpp_edit_distance_matrix,
 )
 
+from rmm.pylibrmm.stream cimport Stream
+
+from ..utils cimport _get_stream
+
 __all__ = ["edit_distance", "edit_distance_matrix"]
 
-cpdef Column edit_distance(Column input, Column targets):
+cpdef Column edit_distance(Column input, Column targets, Stream stream=None):
     """
     Returns the edit distance between individual strings in two strings columns
 
@@ -23,6 +27,8 @@ cpdef Column edit_distance(Column input, Column targets):
         Input strings
     targets : Column
         Strings to compute edit distance against
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -32,14 +38,15 @@ cpdef Column edit_distance(Column input, Column targets):
     cdef column_view c_strings = input.view()
     cdef column_view c_targets = targets.view()
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_edit_distance(c_strings, c_targets)
+        c_result = cpp_edit_distance(c_strings, c_targets, stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column edit_distance_matrix(Column input):
+cpdef Column edit_distance_matrix(Column input, Stream stream=None):
     """
     Returns the edit distance between all strings in the input strings column
 
@@ -49,6 +56,8 @@ cpdef Column edit_distance_matrix(Column input):
     ----------
     input : Column
         Input strings
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -57,8 +66,9 @@ cpdef Column edit_distance_matrix(Column input):
     """
     cdef column_view c_strings = input.view()
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_edit_distance_matrix(c_strings)
+        c_result = cpp_edit_distance_matrix(c_strings, stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/round.pxd b/python/pylibcudf/pylibcudf/round.pxd
index c8501b03fad..4ab17203d31 100644
--- a/python/pylibcudf/pylibcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/round.pxd
@@ -1,12 +1,15 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from pylibcudf.libcudf.round cimport rounding_method
 
 from .column cimport Column
 
+from rmm.pylibrmm.stream cimport Stream
+
 
 cpdef Column round(
     Column source,
     int32_t decimal_places = *,
-    rounding_method round_method = *
+    rounding_method round_method = *,
+    Stream stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi
index 410cf5de586..676c6d609aa 100644
--- a/python/pylibcudf/pylibcudf/round.pyi
+++ b/python/pylibcudf/pylibcudf/round.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
 class RoundingMethod(IntEnum):
@@ -12,4 +14,5 @@ def round(
     source: Column,
     decimal_places: int = 0,
     round_method: RoundingMethod = RoundingMethod.HALF_UP,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
index 024cf47a224..0bead59fc3b 100644
--- a/python/pylibcudf/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -9,14 +9,18 @@ from pylibcudf.libcudf.round import \
 
 from pylibcudf.libcudf.column.column cimport column
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
+from .utils cimport _get_stream
 
 __all__ = ["RoundingMethod", "round"]
 
 cpdef Column round(
     Column source,
     int32_t decimal_places = 0,
-    rounding_method round_method = rounding_method.HALF_UP
+    rounding_method round_method = rounding_method.HALF_UP,
+    Stream stream=None
 ):
     """Rounds all the values in a column to the specified number of decimal places.
 
@@ -32,6 +36,8 @@ cpdef Column round(
         The method by which to round each value.
         Can be one of { RoundingMethod.HALF_UP, RoundingMethod.HALF_EVEN }
         (default rounding_method.HALF_UP)
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -39,13 +45,16 @@ cpdef Column round(
         A Column with values rounded
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_round(
             source.view(),
             decimal_places,
-            round_method
+            round_method,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 RoundingMethod.__str__ = RoundingMethod.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/strings/repeat.pxd
index bc70926b6fa..fd97a1671f8 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pxd
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pxd
@@ -1,10 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnorSizeType:
     Column
     size_type
 
-cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times)
+cpdef Column repeat_strings(
+    Column input, ColumnorSizeType repeat_times, Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyi b/python/pylibcudf/pylibcudf/strings/repeat.pyi
index 93a46b71caa..246ac9874c7 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pyi
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pyi
@@ -1,5 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
-def repeat_strings(input: Column, repeat_times: Column | int) -> Column: ...
+def repeat_strings(
+    input: Column,
+    repeat_times: Column | int,
+    stream: Stream | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyx b/python/pylibcudf/pylibcudf/strings/repeat.pyx
index a497b1f438e..409a551c60d 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pyx
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
@@ -6,9 +6,15 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport repeat as cpp_repeat
 from pylibcudf.libcudf.types cimport size_type
 
+from rmm.pylibrmm.stream cimport Stream
+
+from ..utils cimport _get_stream
+
 __all__ = ["repeat_strings"]
 
-cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times):
+cpdef Column repeat_strings(
+    Column input, ColumnorSizeType repeat_times, Stream stream=None
+):
     """
     Repeat each string in the given strings column by the numbers
     of times given in another numeric column.
@@ -22,6 +28,8 @@ cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times):
     repeat_times : Column or int
         Number(s) of times that the corresponding input strings
         for each row are repeated.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -29,20 +37,23 @@ cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times):
         New column containing the repeated strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     if ColumnorSizeType is Column:
         with nogil:
             c_result = cpp_repeat.repeat_strings(
                 input.view(),
-                repeat_times.view()
+                repeat_times.view(),
+                stream.view()
             )
     elif ColumnorSizeType is size_type:
         with nogil:
             c_result = cpp_repeat.repeat_strings(
                 input.view(),
-                repeat_times
+                repeat_times,
+                stream.view()
             )
     else:
         raise ValueError("repeat_times must be size_type or integer")
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pxd b/python/pylibcudf/pylibcudf/strings/slice.pxd
index 01e9f2b3c88..d33e9dd2828 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pxd
+++ b/python/pylibcudf/pylibcudf/strings/slice.pxd
@@ -1,7 +1,8 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -11,5 +12,6 @@ cpdef Column slice_strings(
     Column input,
     ColumnOrScalar start=*,
     ColumnOrScalar stop=*,
-    Scalar step=*
+    Scalar step=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyi b/python/pylibcudf/pylibcudf/strings/slice.pyi
index 7bf9a7cb8c6..668524fc714 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pyi
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyi
@@ -1,5 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
@@ -8,4 +10,5 @@ def slice_strings(
     start: Column | Scalar | None = None,
     stop: Column | Scalar | None = None,
     step: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx
index d32de7c50e0..bf09d3963ff 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pyx
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,6 +13,9 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 
 from cython.operator import dereference
+from rmm.pylibrmm.stream cimport Stream
+
+from ..utils cimport _get_stream
 
 __all__ = ["slice_strings"]
 
@@ -20,7 +23,8 @@ cpdef Column slice_strings(
     Column input,
     ColumnOrScalar start=None,
     ColumnOrScalar stop=None,
-    Scalar step=None
+    Scalar step=None,
+    Stream stream=None
 ):
     """Perform a slice operation on a strings column.
 
@@ -41,6 +45,8 @@ cpdef Column slice_strings(
         The end character position or positions
     step : Scalar
         Distance between input characters retrieved
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -51,6 +57,7 @@ cpdef Column slice_strings(
     cdef numeric_scalar[size_type]* cpp_start
     cdef numeric_scalar[size_type]* cpp_stop
     cdef numeric_scalar[size_type]* cpp_step
+    stream = _get_stream(stream)
 
     if input is None:
         raise ValueError("input cannot be None")
@@ -68,7 +75,8 @@ cpdef Column slice_strings(
             c_result = cpp_slice.slice_strings(
                 input.view(),
                 start.view(),
-                stop.view()
+                stop.view(),
+                stream.view()
             )
 
     elif ColumnOrScalar is Scalar:
@@ -94,9 +102,10 @@ cpdef Column slice_strings(
                 input.view(),
                 dereference(cpp_start),
                 dereference(cpp_stop),
-                dereference(cpp_step)
+                dereference(cpp_step),
+                stream.view()
             )
     else:
         raise ValueError("start, stop, and step must be either Column or Scalar")
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/transpose.pxd b/python/pylibcudf/pylibcudf/transpose.pxd
index 7b5a7676b49..28db765e48f 100644
--- a/python/pylibcudf/pylibcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/transpose.pxd
@@ -1,5 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from .table cimport Table
 
+from rmm.pylibrmm.stream cimport Stream
 
-cpdef Table transpose(Table input_table)
+
+cpdef Table transpose(Table input_table, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi
index a84ab8a60ea..4acbac7ea52 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyi
+++ b/python/pylibcudf/pylibcudf/transpose.pyi
@@ -1,4 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.table import Table
 
-def transpose(input_table: Table) -> Table: ...
+def transpose(input_table: Table, stream: Stream | None = None) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx
index 5eb3e58cebc..cbb23e2358e 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyx
+++ b/python/pylibcudf/pylibcudf/transpose.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
@@ -6,12 +6,15 @@ from pylibcudf.libcudf cimport transpose as cpp_transpose
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table_view cimport table_view
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = ["transpose"]
 
-cpdef Table transpose(Table input_table):
+cpdef Table transpose(Table input_table, Stream stream=None):
     """Transpose a Table.
 
     For details, see :cpp:func:`transpose`.
@@ -20,6 +23,8 @@ cpdef Table transpose(Table input_table):
     ----------
     input_table : Table
         Table to transpose
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -28,12 +33,14 @@ cpdef Table transpose(Table input_table):
     """
     cdef pair[unique_ptr[column], table_view] c_result
     cdef Table owner_table
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_transpose.transpose(input_table.view())
+        c_result = cpp_transpose.transpose(input_table.view(), stream.view())
 
     owner_table = Table(
-        [Column.from_libcudf(move(c_result.first))] * c_result.second.num_columns()
+        [Column.from_libcudf(move(c_result.first), stream)] *
+        c_result.second.num_columns()
     )
 
     return Table.from_table_view(c_result.second, owner_table)

From bf14b22a3ed88776fba15c604029829e21ec861e Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Fri, 1 Aug 2025 12:35:06 -0700
Subject: [PATCH 045/366] Add hash-based SUM_WITH_OVERFLOW aggregation for
 INT64 values (#19403)

Contributes to #19243

This PR introduces a new aggregation kind, `SUM_WITH_OVERFLOW`, which returns a `STRUCT` containing the sum and a boolean indicating overflow. If an overflow occurs, the corresponding row in the overflow boolean column will be set to `true`.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - David Wendt (https://github.com/davidwendt)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Nghia Truong (https://github.com/ttnghia)
  - Lawrence Mitchell (https://github.com/wence-)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19403
---
 cpp/include/cudf/aggregation.hpp              |  84 +++----
 .../cudf/detail/aggregation/aggregation.cuh   |   4 +
 .../cudf/detail/aggregation/aggregation.hpp   |  51 ++++-
 .../detail/aggregation/device_aggregators.cuh |  40 ++++
 cpp/src/aggregation/aggregation.cpp           |  23 ++
 cpp/src/aggregation/aggregation.cu            |  42 +++-
 cpp/src/groupby/groupby.cu                    |   8 +
 cpp/src/groupby/hash/compute_aggregations.cuh |  30 ++-
 .../hash/create_sparse_results_table.cu       |  90 ++++++--
 cpp/src/groupby/hash/groupby.cu               |   5 +-
 .../sort/group_single_pass_reduction_util.cuh |   1 -
 cpp/tests/groupby/sum_tests.cpp               | 206 +++++++++++++++++-
 python/pylibcudf/tests/test_aggregation.py    |   2 +-
 13 files changed, 497 insertions(+), 89 deletions(-)

diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index bb086b611c7..d379674fad6 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -93,45 +93,46 @@ class aggregation {
    * @brief Possible aggregation operations
    */
   enum Kind {
-    SUM,              ///< sum reduction
-    PRODUCT,          ///< product reduction
-    MIN,              ///< min reduction
-    MAX,              ///< max reduction
-    COUNT_VALID,      ///< count number of valid elements
-    COUNT_ALL,        ///< count number of elements
-    ANY,              ///< any reduction
-    ALL,              ///< all reduction
-    SUM_OF_SQUARES,   ///< sum of squares reduction
-    MEAN,             ///< arithmetic mean reduction
-    M2,               ///< sum of squares of differences from the mean
-    VARIANCE,         ///< variance
-    STD,              ///< standard deviation
-    MEDIAN,           ///< median reduction
-    QUANTILE,         ///< compute specified quantile(s)
-    ARGMAX,           ///< Index of max element
-    ARGMIN,           ///< Index of min element
-    NUNIQUE,          ///< count number of unique elements
-    NTH_ELEMENT,      ///< get the nth element
-    ROW_NUMBER,       ///< get row-number of current index (relative to rolling window)
-    EWMA,             ///< get exponential weighted moving average at current index
-    RANK,             ///< get rank of current index
-    COLLECT_LIST,     ///< collect values into a list
-    COLLECT_SET,      ///< collect values into a list without duplicate entries
-    LEAD,             ///< window function, accesses row at specified offset following current row
-    LAG,              ///< window function, accesses row at specified offset preceding current row
-    PTX,              ///< PTX  based UDF aggregation
-    CUDA,             ///< CUDA based UDF aggregation
-    HOST_UDF,         ///< host based UDF aggregation
-    MERGE_LISTS,      ///< merge multiple lists values into one list
-    MERGE_SETS,       ///< merge multiple lists values into one list then drop duplicate entries
-    MERGE_M2,         ///< merge partial values of M2 aggregation,
-    COVARIANCE,       ///< covariance between two sets of elements
-    CORRELATION,      ///< correlation between two sets of elements
-    TDIGEST,          ///< create a tdigest from a set of input values
-    MERGE_TDIGEST,    ///< create a tdigest by merging multiple tdigests together
-    HISTOGRAM,        ///< compute frequency of each element
-    MERGE_HISTOGRAM,  ///< merge partial values of HISTOGRAM aggregation
-    BITWISE_AGG       ///< bitwise aggregation on numeric columns
+    SUM,                ///< sum reduction
+    SUM_WITH_OVERFLOW,  ///< sum reduction with overflow detection
+    PRODUCT,            ///< product reduction
+    MIN,                ///< min reduction
+    MAX,                ///< max reduction
+    COUNT_VALID,        ///< count number of valid elements
+    COUNT_ALL,          ///< count number of elements
+    ANY,                ///< any reduction
+    ALL,                ///< all reduction
+    SUM_OF_SQUARES,     ///< sum of squares reduction
+    MEAN,               ///< arithmetic mean reduction
+    M2,                 ///< sum of squares of differences from the mean
+    VARIANCE,           ///< variance
+    STD,                ///< standard deviation
+    MEDIAN,             ///< median reduction
+    QUANTILE,           ///< compute specified quantile(s)
+    ARGMAX,             ///< Index of max element
+    ARGMIN,             ///< Index of min element
+    NUNIQUE,            ///< count number of unique elements
+    NTH_ELEMENT,        ///< get the nth element
+    ROW_NUMBER,         ///< get row-number of current index (relative to rolling window)
+    EWMA,               ///< get exponential weighted moving average at current index
+    RANK,               ///< get rank of current index
+    COLLECT_LIST,       ///< collect values into a list
+    COLLECT_SET,        ///< collect values into a list without duplicate entries
+    LEAD,               ///< window function, accesses row at specified offset following current row
+    LAG,                ///< window function, accesses row at specified offset preceding current row
+    PTX,                ///< PTX  based UDF aggregation
+    CUDA,               ///< CUDA based UDF aggregation
+    HOST_UDF,           ///< host based UDF aggregation
+    MERGE_LISTS,        ///< merge multiple lists values into one list
+    MERGE_SETS,         ///< merge multiple lists values into one list then drop duplicate entries
+    MERGE_M2,           ///< merge partial values of M2 aggregation,
+    COVARIANCE,         ///< covariance between two sets of elements
+    CORRELATION,        ///< correlation between two sets of elements
+    TDIGEST,            ///< create a tdigest from a set of input values
+    MERGE_TDIGEST,      ///< create a tdigest by merging multiple tdigests together
+    HISTOGRAM,          ///< compute frequency of each element
+    MERGE_HISTOGRAM,    ///< merge partial values of HISTOGRAM aggregation
+    BITWISE_AGG         ///< bitwise aggregation on numeric columns
   };
 
   aggregation() = delete;
@@ -271,6 +272,11 @@ enum class ewm_history : int32_t { INFINITE, FINITE };
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_sum_aggregation();
 
+/// Factory to create a SUM_WITH_OVERFLOW aggregation
+/// @return A SUM_WITH_OVERFLOW aggregation object
+template <typename Base = aggregation>
+std::unique_ptr<Base> make_sum_with_overflow_aggregation();
+
 /// Factory to create a PRODUCT aggregation
 /// @return A PRODUCT aggregation object
 template <typename Base = aggregation>
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh
index 00bdb229391..2124a131c19 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.cuh
+++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh
@@ -78,6 +78,10 @@ struct corresponding_operator<aggregation::SUM> {
   using type = DeviceSum;
 };
 template <>
+struct corresponding_operator<aggregation::SUM_WITH_OVERFLOW> {
+  using type = DeviceSum;
+};
+template <>
 struct corresponding_operator<aggregation::PRODUCT> {
   using type = DeviceProduct;
 };
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 49ad841cc33..81084f8bdfb 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -18,6 +18,7 @@
 
 #include <cudf/aggregation.hpp>
 #include <cudf/detail/utilities/assert.cuh>
+#include <cudf/structs/struct_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
@@ -39,6 +40,8 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class sum_aggregation const& agg);
+  virtual std::vector<std::unique_ptr<aggregation>> visit(
+    data_type col_type, class sum_with_overflow_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class product_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
@@ -116,6 +119,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   // Declare overloads for each kind of a agg to dispatch
   virtual void visit(aggregation const& agg);
   virtual void visit(class sum_aggregation const& agg);
+  virtual void visit(class sum_with_overflow_aggregation const& agg);
   virtual void visit(class product_aggregation const& agg);
   virtual void visit(class min_aggregation const& agg);
   virtual void visit(class max_aggregation const& agg);
@@ -177,6 +181,26 @@ class sum_aggregation final : public rolling_aggregation,
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
+/**
+ * @brief Derived class for specifying a sum_with_overflow aggregation
+ */
+class sum_with_overflow_aggregation final : public groupby_aggregation,
+                                            public groupby_scan_aggregation {
+ public:
+  sum_with_overflow_aggregation() : aggregation(SUM_WITH_OVERFLOW) {}
+
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
+  {
+    return std::make_unique<sum_with_overflow_aggregation>(*this);
+  }
+  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
+    data_type col_type, simple_aggregations_collector& collector) const override
+  {
+    return collector.visit(col_type, *this);
+  }
+  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
 /**
  * @brief Derived class for specifying a product aggregation
  */
@@ -1352,11 +1376,12 @@ constexpr bool is_sum_product_agg(aggregation::Kind k)
          (k == aggregation::SUM_OF_SQUARES);
 }
 
-// Summing/Multiplying integers of any type, always use int64_t accumulator
+// Summing/Multiplying integers of any type, always use int64_t accumulator (except
+// SUM_WITH_OVERFLOW which has its own template)
 template <typename Source, aggregation::Kind k>
-struct target_type_impl<Source,
-                        k,
-                        std::enable_if_t<std::is_integral_v<Source> && is_sum_product_agg(k)>> {
+  requires(std::is_integral_v<Source> && is_sum_product_agg(k) &&
+           k != aggregation::SUM_WITH_OVERFLOW)
+struct target_type_impl<Source, k> {
   using type = int64_t;
 };
 
@@ -1369,12 +1394,12 @@ struct target_type_impl<
   using type = Source;
 };
 
-// Summing/Multiplying float/doubles, use same type accumulator
+// Summing/Multiplying float/doubles, use same type accumulator (except SUM_WITH_OVERFLOW which has
+// its own template)
 template <typename Source, aggregation::Kind k>
-struct target_type_impl<
-  Source,
-  k,
-  std::enable_if_t<std::is_floating_point_v<Source> && is_sum_product_agg(k)>> {
+  requires(std::is_floating_point_v<Source> && is_sum_product_agg(k) &&
+           k != aggregation::SUM_WITH_OVERFLOW)
+struct target_type_impl<Source, k> {
   using type = Source;
 };
 
@@ -1386,6 +1411,12 @@ struct target_type_impl<Source,
   using type = Source;
 };
 
+// SUM_WITH_OVERFLOW always outputs a struct {sum: int64_t, overflow: bool} regardless of input type
+template <typename Source>
+struct target_type_impl<Source, aggregation::SUM_WITH_OVERFLOW> {
+  using type = struct_view;  // SUM_WITH_OVERFLOW outputs a struct with sum and overflow fields
+};
+
 // Always use `double` for M2
 template <typename SourceType>
 struct target_type_impl<SourceType, aggregation::M2> {
@@ -1599,6 +1630,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind
   switch (k) {
     case aggregation::SUM:
       return f.template operator()<aggregation::SUM>(std::forward<Ts>(args)...);
+    case aggregation::SUM_WITH_OVERFLOW:
+      return f.template operator()<aggregation::SUM_WITH_OVERFLOW>(std::forward<Ts>(args)...);
     case aggregation::PRODUCT:
       return f.template operator()<aggregation::PRODUCT>(std::forward<Ts>(args)...);
     case aggregation::MIN:
diff --git a/cpp/include/cudf/detail/aggregation/device_aggregators.cuh b/cpp/include/cudf/detail/aggregation/device_aggregators.cuh
index 3af5afd20cd..0c5b57c51a7 100644
--- a/cpp/include/cudf/detail/aggregation/device_aggregators.cuh
+++ b/cpp/include/cudf/detail/aggregation/device_aggregators.cuh
@@ -25,6 +25,7 @@
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/traits.cuh>
 
+#include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
 namespace cudf::detail {
@@ -154,6 +155,45 @@ struct update_target_element<Source, aggregation::SUM> {
   }
 };
 
+template <typename Source>
+  requires(cuda::std::is_same_v<Source, int64_t>)
+struct update_target_element<Source, aggregation::SUM_WITH_OVERFLOW> {
+  __device__ void operator()(mutable_column_device_view target,
+                             size_type target_index,
+                             column_device_view source,
+                             size_type source_index) const noexcept
+  {
+    // For SUM_WITH_OVERFLOW, target is a struct with sum value at child(0) and overflow flag at
+    // child(1)
+    auto sum_column      = target.child(0);
+    auto overflow_column = target.child(1);
+
+    auto const source_value = source.element<Source>(source_index);
+    auto const old_sum =
+      cudf::detail::atomic_add(&sum_column.element<int64_t>(target_index), source_value);
+
+    // Early exit if overflow is already set to avoid unnecessary overflow checking
+    auto bool_ref = cuda::atomic_ref<bool, cuda::thread_scope_device>{
+      *(overflow_column.data<bool>() + target_index)};
+    if (bool_ref.load(cuda::memory_order_relaxed)) { return; }
+
+    // Check for overflow before performing the addition to avoid UB
+    // For positive overflow: old_sum > 0, source_value > 0, and old_sum > max - source_value
+    // For negative overflow: old_sum < 0, source_value < 0, and old_sum < min - source_value
+    // TODO: to be replaced by CCCL equivalents once https://github.com/NVIDIA/cccl/pull/3755 is
+    // ready
+    auto constexpr int64_max = cuda::std::numeric_limits<int64_t>::max();
+    auto constexpr int64_min = cuda::std::numeric_limits<int64_t>::min();
+    auto const overflow =
+      ((old_sum > 0 && source_value > 0 && old_sum > int64_max - source_value) ||
+       (old_sum < 0 && source_value < 0 && old_sum < int64_min - source_value));
+    if (overflow) {
+      // Atomically set overflow flag to true (use atomic_max since true > false)
+      cudf::detail::atomic_max(&overflow_column.element<bool>(target_index), true);
+    }
+  }
+};
+
 /**
  * @brief Function object to update a single element in a target column using
  * the dictionary key addressed by the specific index.
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 7cf45e48cc3..cb3a2b80ae4 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -41,6 +41,12 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
+std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
+  data_type col_type, sum_with_overflow_aggregation const& agg)
+{
+  return visit(col_type, static_cast<aggregation const&>(agg));
+}
+
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   data_type col_type, product_aggregation const& agg)
 {
@@ -258,6 +264,11 @@ void aggregation_finalizer::visit(sum_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
+void aggregation_finalizer::visit(sum_with_overflow_aggregation const& agg)
+{
+  visit(static_cast<aggregation const&>(agg));
+}
+
 void aggregation_finalizer::visit(product_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
@@ -458,6 +469,18 @@ template CUDF_EXPORT std::unique_ptr<scan_aggregation> make_sum_aggregation<scan
 template CUDF_EXPORT std::unique_ptr<segmented_reduce_aggregation>
 make_sum_aggregation<segmented_reduce_aggregation>();
 
+/// Factory to create a SUM_WITH_OVERFLOW aggregation
+template <typename Base>
+std::unique_ptr<Base> make_sum_with_overflow_aggregation()
+{
+  return std::make_unique<detail::sum_with_overflow_aggregation>();
+}
+template CUDF_EXPORT std::unique_ptr<aggregation> make_sum_with_overflow_aggregation<aggregation>();
+template CUDF_EXPORT std::unique_ptr<groupby_aggregation>
+make_sum_with_overflow_aggregation<groupby_aggregation>();
+template CUDF_EXPORT std::unique_ptr<groupby_scan_aggregation>
+make_sum_with_overflow_aggregation<groupby_scan_aggregation>();
+
 /// Factory to create a PRODUCT aggregation
 template <typename Base>
 std::unique_ptr<Base> make_product_aggregation()
diff --git a/cpp/src/aggregation/aggregation.cu b/cpp/src/aggregation/aggregation.cu
index d4e112da777..c58d1f7af7c 100644
--- a/cpp/src/aggregation/aggregation.cu
+++ b/cpp/src/aggregation/aggregation.cu
@@ -49,13 +49,14 @@ struct identity_initializer {
   template <typename T, aggregation::Kind k>
   static constexpr bool is_supported()
   {
-    return cudf::is_fixed_width<T>() and
-           (k == aggregation::SUM or k == aggregation::MIN or k == aggregation::MAX or
-            k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or
-            k == aggregation::ARGMAX or k == aggregation::ARGMIN or
-            k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or
-            k == aggregation::VARIANCE or
-            (k == aggregation::PRODUCT and is_product_supported<T>()));
+    return (cudf::is_fixed_width<T>() and
+            (k == aggregation::SUM or k == aggregation::MIN or k == aggregation::MAX or
+             k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or
+             k == aggregation::ARGMAX or k == aggregation::ARGMIN or
+             k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or
+             k == aggregation::VARIANCE or
+             (k == aggregation::PRODUCT and is_product_supported<T>()))) or
+           (k == aggregation::SUM_WITH_OVERFLOW and std::is_same_v<T, cudf::struct_view>);
   }
 
   template <typename T, aggregation::Kind k>
@@ -94,11 +95,28 @@ struct identity_initializer {
   void operator()(mutable_column_view const& col, rmm::cuda_stream_view stream)
     requires(is_supported<T, k>())
   {
-    using DeviceType = device_storage_type_t<T>;
-    thrust::fill(rmm::exec_policy(stream),
-                 col.begin<DeviceType>(),
-                 col.end<DeviceType>(),
-                 get_identity<DeviceType, k>());
+    if constexpr (k == aggregation::SUM_WITH_OVERFLOW) {
+      // SUM_WITH_OVERFLOW uses a struct with sum (int64_t) and overflow (bool) children
+      // Initialize sum child to 0 and overflow child to false
+      auto sum_col      = col.child(0);
+      auto overflow_col = col.child(1);
+
+      auto zip_begin = thrust::make_zip_iterator(
+        thrust::make_tuple(sum_col.begin<int64_t>(), overflow_col.begin<bool>()));
+      thrust::fill(rmm::exec_policy_nosync(stream),
+                   zip_begin,
+                   zip_begin + col.size(),
+                   thrust::make_tuple(int64_t{0}, false));
+    } else if constexpr (std::is_same_v<T, cudf::struct_view>) {
+      // This should only happen for SUM_WITH_OVERFLOW, but handle it just in case
+      CUDF_FAIL("Struct columns are only supported for SUM_WITH_OVERFLOW aggregation");
+    } else {
+      using DeviceType = device_storage_type_t<T>;
+      thrust::fill(rmm::exec_policy_nosync(stream),
+                   col.begin<DeviceType>(),
+                   col.end<DeviceType>(),
+                   get_identity<DeviceType, k>());
+    }
   }
 
   template <typename T, aggregation::Kind k>
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index 6f7db4938bb..bf3ee2a49aa 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -124,6 +124,14 @@ struct empty_column_constructor {
     }
     if constexpr (k == aggregation::Kind::MERGE_HISTOGRAM) { return empty_like(values); }
 
+    if constexpr (k == aggregation::Kind::SUM_WITH_OVERFLOW) {
+      // SUM_WITH_OVERFLOW returns a struct with sum (int64_t) and overflow (bool) children
+      std::vector<std::unique_ptr<cudf::column>> children;
+      children.push_back(make_empty_column(cudf::data_type{cudf::type_id::INT64}));
+      children.push_back(make_empty_column(cudf::data_type{cudf::type_id::BOOL8}));
+      return make_structs_column(0, std::move(children), 0, {}, stream, mr);
+    }
+
     if constexpr (k == aggregation::Kind::RANK) {
       auto const& rank_agg = dynamic_cast<cudf::detail::rank_aggregation const&>(agg);
       if (rank_agg._method == cudf::rank_method::AVERAGE or
diff --git a/cpp/src/groupby/hash/compute_aggregations.cuh b/cpp/src/groupby/hash/compute_aggregations.cuh
index b97c8ddf88d..60a8b3c2f38 100644
--- a/cpp/src/groupby/hash/compute_aggregations.cuh
+++ b/cpp/src/groupby/hash/compute_aggregations.cuh
@@ -69,19 +69,29 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
   auto const available_shmem_size = get_available_shared_memory_size(grid_size);
   auto const offsets_buffer_size  = compute_shmem_offsets_size(flattened_values.num_columns()) * 2;
   auto const data_buffer_size     = available_shmem_size - offsets_buffer_size;
-  auto const is_shared_memory_compatible = std::all_of(
-    requests.begin(), requests.end(), [&](cudf::groupby::aggregation_request const& request) {
-      if (cudf::is_dictionary(request.values.type())) { return false; }
-      // Ensure there is enough buffer space to store local aggregations up to the max cardinality
-      // for shared memory aggregations
-      auto const size = cudf::type_dispatcher<cudf::dispatch_storage_type>(request.values.type(),
-                                                                           size_of_functor{});
-      return data_buffer_size >= (size * GROUPBY_CARDINALITY_THRESHOLD);
+
+  // Check if any aggregation is SUM_WITH_OVERFLOW, which should always use global memory
+  auto const has_sum_with_overflow =
+    std::any_of(agg_kinds.begin(), agg_kinds.end(), [](aggregation::Kind k) {
+      return k == aggregation::SUM_WITH_OVERFLOW;
     });
 
+  auto const is_shared_memory_compatible =
+    !has_sum_with_overflow &&
+    std::all_of(
+      requests.begin(), requests.end(), [&](cudf::groupby::aggregation_request const& request) {
+        if (cudf::is_dictionary(request.values.type())) { return false; }
+        // Ensure there is enough buffer space to store local aggregations up to the max cardinality
+        // for shared memory aggregations
+        auto const size = cudf::type_dispatcher<cudf::dispatch_storage_type>(request.values.type(),
+                                                                             size_of_functor{});
+        return data_buffer_size >= (size * GROUPBY_CARDINALITY_THRESHOLD);
+      });
+
   // Performs naive global memory aggregations when the workload is not compatible with shared
-  // memory, such as when aggregating dictionary columns or when there is insufficient dynamic
-  // shared memory for shared memory aggregations.
+  // memory, such as when aggregating dictionary columns, when there is insufficient dynamic
+  // shared memory for shared memory aggregations, or when SUM_WITH_OVERFLOW aggregations are
+  // present.
   if (!is_shared_memory_compatible) {
     return compute_global_memory_aggs(num_rows,
                                       skip_rows_with_nulls,
diff --git a/cpp/src/groupby/hash/create_sparse_results_table.cu b/cpp/src/groupby/hash/create_sparse_results_table.cu
index 562cd9142ae..dd622bbcc8f 100644
--- a/cpp/src/groupby/hash/create_sparse_results_table.cu
+++ b/cpp/src/groupby/hash/create_sparse_results_table.cu
@@ -21,6 +21,7 @@
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/null_mask.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
@@ -35,6 +36,80 @@
 #include <vector>
 
 namespace cudf::groupby::detail::hash {
+namespace {
+/**
+ * @brief Functor to create sparse result columns for hash-based groupby aggregations
+ *
+ * This functor handles the creation of appropriately typed and sized columns for each
+ * aggregation, including special handling for SUM_WITH_OVERFLOW which requires a struct column.
+ */
+struct sparse_column_creator {
+  rmm::cuda_stream_view stream;
+
+  explicit sparse_column_creator(rmm::cuda_stream_view stream) : stream(stream) {}
+
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const& col,
+                                           cudf::aggregation::Kind const& agg) const
+  {
+    auto const nullable =
+      (agg == cudf::aggregation::COUNT_VALID or agg == cudf::aggregation::COUNT_ALL)
+        ? false
+        : (col.has_nulls() or agg == cudf::aggregation::VARIANCE or agg == cudf::aggregation::STD);
+    auto const mask_flag = (nullable) ? cudf::mask_state::ALL_NULL : cudf::mask_state::UNALLOCATED;
+    auto const col_type  = cudf::is_dictionary(col.type())
+                             ? cudf::dictionary_column_view(col).keys().type()
+                             : col.type();
+
+    // Special handling for SUM_WITH_OVERFLOW which needs a struct column
+    if (agg == cudf::aggregation::SUM_WITH_OVERFLOW) {
+      // Lambda to create empty columns for better readability
+      auto make_empty_column = [&stream = this->stream](cudf::type_id type_id,
+                                                        cudf::size_type size,
+                                                        cudf::mask_state mask_state) {
+        return make_fixed_width_column(cudf::data_type{type_id}, size, mask_state, stream);
+      };
+
+      // Lambda to create children for SUM_WITH_OVERFLOW struct column
+      auto make_children = [&make_empty_column](cudf::size_type size, cudf::mask_state mask_state) {
+        std::vector<std::unique_ptr<cudf::column>> children;
+        // Create sum child column (int64_t) - no null mask needed, struct-level mask handles
+        // nullability
+        children.push_back(
+          make_empty_column(cudf::type_id::INT64, size, cudf::mask_state::UNALLOCATED));
+        // Create overflow child column (bool) - no null mask needed, only value matters
+        children.push_back(
+          make_empty_column(cudf::type_id::BOOL8, size, cudf::mask_state::UNALLOCATED));
+        return children;
+      };
+
+      if (col.size() == 0) {
+        // For empty columns, create empty struct column manually
+        auto children = make_children(0, cudf::mask_state::UNALLOCATED);
+        return create_structs_hierarchy(0, std::move(children), 0, {}, stream);
+      } else {
+        auto children = make_children(col.size(), mask_flag);
+
+        // Create struct column with the children
+        // For SUM_WITH_OVERFLOW, make struct nullable if input has nulls (same as other
+        // aggregations)
+        if (nullable) {
+          // Start with ALL_NULL, results will be marked valid during aggregation
+          auto null_mask  = cudf::create_null_mask(col.size(), cudf::mask_state::ALL_NULL, stream);
+          auto null_count = col.size();  // All null initially
+          return create_structs_hierarchy(
+            col.size(), std::move(children), null_count, std::move(null_mask), stream);
+        } else {
+          return create_structs_hierarchy(col.size(), std::move(children), 0, {}, stream);
+        }
+      }
+    } else {
+      return make_fixed_width_column(
+        cudf::detail::target_type(col_type, agg), col.size(), mask_flag, stream);
+    }
+  }
+};
+}  // anonymous namespace
+
 template <typename SetType>
 void extract_populated_keys(SetType const& key_set,
                             rmm::device_uvector<cudf::size_type>& populated_keys,
@@ -61,20 +136,7 @@ cudf::table create_sparse_results_table(cudf::table_view const& flattened_values
                  flattened_values.end(),
                  agg_kinds.begin(),
                  std::back_inserter(sparse_columns),
-                 [stream](auto const& col, auto const& agg) {
-                   auto const nullable =
-                     (agg == cudf::aggregation::COUNT_VALID or agg == cudf::aggregation::COUNT_ALL)
-                       ? false
-                       : (col.has_nulls() or agg == cudf::aggregation::VARIANCE or
-                          agg == cudf::aggregation::STD);
-                   auto const mask_flag =
-                     (nullable) ? cudf::mask_state::ALL_NULL : cudf::mask_state::UNALLOCATED;
-                   auto const col_type = cudf::is_dictionary(col.type())
-                                           ? cudf::dictionary_column_view(col).keys().type()
-                                           : col.type();
-                   return make_fixed_width_column(
-                     cudf::detail::target_type(col_type, agg), col.size(), mask_flag, stream);
-                 });
+                 sparse_column_creator{stream});
   cudf::table sparse_table(std::move(sparse_columns));
   // If no direct aggregations, initialize the sparse table
   // only for the keys inserted in global hash set
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 30e1d52fdbf..b9f08c2c505 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,7 +44,8 @@ namespace {
  * @brief List of aggregation operations that can be computed with a hash-based
  * implementation.
  */
-constexpr std::array<aggregation::Kind, 12> hash_aggregations{aggregation::SUM,
+constexpr std::array<aggregation::Kind, 13> hash_aggregations{aggregation::SUM,
+                                                              aggregation::SUM_WITH_OVERFLOW,
                                                               aggregation::PRODUCT,
                                                               aggregation::MIN,
                                                               aggregation::MAX,
diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
index 9dba468bf14..5afdc82892e 100644
--- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
+++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
@@ -152,7 +152,6 @@ struct group_reduction_functor<
                                         cudf::device_span<cudf::size_type const> group_labels,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
-
   {
     using SourceDType = device_storage_type_t<T>;
     using ResultType  = cudf::detail::target_type_t<T, K>;
diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp
index 5f5329e5d7a..f0ab60faab2 100644
--- a/cpp/tests/groupby/sum_tests.cpp
+++ b/cpp/tests/groupby/sum_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -230,3 +230,207 @@ TYPED_TEST(GroupBySumFixedPointTest, GroupByHashSumDecimalAsValue)
     EXPECT_THROW(test_single_agg(keys, vals, expect_keys, {}, std::move(agg8)), cudf::logic_error);
   }
 }
+
+// SUM_WITH_OVERFLOW tests - only supports int64_t input values and outputs int64_t
+template <typename V>
+struct groupby_sum_with_overflow_test : public cudf::test::BaseFixture {};
+
+using sum_with_overflow_supported_types = cudf::test::Types<int64_t>;
+
+TYPED_TEST_SUITE(groupby_sum_with_overflow_test, sum_with_overflow_supported_types);
+
+TYPED_TEST(groupby_sum_with_overflow_test, basic)
+{
+  using V = TypeParam;
+
+  cudf::test::fixed_width_column_wrapper<K> keys{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  cudf::test::fixed_width_column_wrapper<V> vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  cudf::test::fixed_width_column_wrapper<K> expect_keys{1, 2, 3};
+
+  // Create expected struct column with sum and overflow children
+  auto sum_col      = cudf::test::fixed_width_column_wrapper<int64_t>{9, 19, 17};
+  auto overflow_col = cudf::test::fixed_width_column_wrapper<bool>{false, false, false};
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(sum_col.release());
+  children.push_back(overflow_col.release());
+  auto expect_vals = cudf::create_structs_hierarchy(3, std::move(children), 0, {});
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, *expect_vals, std::move(agg));
+
+  // Note: SUM_WITH_OVERFLOW only works with hash groupby, not sort groupby
+}
+
+TYPED_TEST(groupby_sum_with_overflow_test, empty_cols)
+{
+  using V = TypeParam;
+
+  cudf::test::fixed_width_column_wrapper<K> keys{};
+  cudf::test::fixed_width_column_wrapper<V> vals{};
+
+  cudf::test::fixed_width_column_wrapper<K> expect_keys{};
+
+  // Create expected empty struct column with sum and overflow children
+  auto sum_col      = cudf::test::fixed_width_column_wrapper<int64_t>{};
+  auto overflow_col = cudf::test::fixed_width_column_wrapper<bool>{};
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(sum_col.release());
+  children.push_back(overflow_col.release());
+  auto expect_vals = cudf::create_structs_hierarchy(0, std::move(children), 0, {});
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, *expect_vals, std::move(agg));
+
+  // Note: SUM_WITH_OVERFLOW only works with hash groupby, not sort groupby
+}
+
+TYPED_TEST(groupby_sum_with_overflow_test, zero_valid_keys)
+{
+  using V = TypeParam;
+
+  cudf::test::fixed_width_column_wrapper<K> keys({1, 2, 3}, cudf::test::iterators::all_nulls());
+  cudf::test::fixed_width_column_wrapper<V> vals{3, 4, 5};
+
+  cudf::test::fixed_width_column_wrapper<K> expect_keys{};
+
+  // Create expected empty struct column with sum and overflow children
+  auto sum_col      = cudf::test::fixed_width_column_wrapper<int64_t>{};
+  auto overflow_col = cudf::test::fixed_width_column_wrapper<bool>{};
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(sum_col.release());
+  children.push_back(overflow_col.release());
+  auto expect_vals = cudf::create_structs_hierarchy(0, std::move(children), 0, {});
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, *expect_vals, std::move(agg));
+
+  // Note: SUM_WITH_OVERFLOW only works with hash groupby, not sort groupby
+}
+
+TYPED_TEST(groupby_sum_with_overflow_test, zero_valid_values)
+{
+  using V = TypeParam;
+
+  cudf::test::fixed_width_column_wrapper<K> keys{1, 1, 1};
+  cudf::test::fixed_width_column_wrapper<V> vals({3, 4, 5}, cudf::test::iterators::all_nulls());
+
+  cudf::test::fixed_width_column_wrapper<K> expect_keys{1};
+
+  // Create expected struct column with sum and overflow children (null result)
+  // Child columns have no null masks, only struct-level null mask matters
+  auto sum_col      = cudf::test::fixed_width_column_wrapper<int64_t>({0});
+  auto overflow_col = cudf::test::fixed_width_column_wrapper<bool>({false});
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(sum_col.release());
+  children.push_back(overflow_col.release());
+  std::vector<int> validity{0};  // null struct
+  auto [validity_mask, null_count] =
+    cudf::test::detail::make_null_mask(validity.begin(), validity.end());
+  auto expect_vals =
+    cudf::create_structs_hierarchy(1, std::move(children), null_count, std::move(validity_mask));
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, *expect_vals, std::move(agg));
+
+  // Note: SUM_WITH_OVERFLOW only works with hash groupby, not sort groupby
+}
+
+TYPED_TEST(groupby_sum_with_overflow_test, null_keys_and_values)
+{
+  using V = TypeParam;
+
+  cudf::test::fixed_width_column_wrapper<K> keys(
+    {1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+    {true, true, true, true, true, true, true, false, true, true, true});
+  cudf::test::fixed_width_column_wrapper<V> vals({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4},
+                                                 {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});
+
+  //  { 1, 1,     2, 2, 2,   3, 3,    4}
+  cudf::test::fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4},
+                                                        cudf::test::iterators::no_nulls());
+
+  // Create expected struct column with sum and overflow children
+  //  { 3, 6,     1, 4, 9,   2, 8,    -}
+  // Child columns have no null masks, only struct-level null mask matters
+  auto sum_col      = cudf::test::fixed_width_column_wrapper<int64_t>({9, 14, 10, 0});
+  auto overflow_col = cudf::test::fixed_width_column_wrapper<bool>({false, false, false, false});
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(sum_col.release());
+  children.push_back(overflow_col.release());
+  std::vector<int> validity{1, 1, 1, 0};
+  auto [validity_mask, null_count] =
+    cudf::test::detail::make_null_mask(validity.begin(), validity.end());
+  auto expect_vals =
+    cudf::create_structs_hierarchy(4, std::move(children), null_count, std::move(validity_mask));
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, *expect_vals, std::move(agg));
+
+  // Note: SUM_WITH_OVERFLOW only works with hash groupby, not sort groupby
+}
+
+TYPED_TEST(groupby_sum_with_overflow_test, overflow_detection)
+{
+  using V = TypeParam;
+
+  cudf::test::fixed_width_column_wrapper<K> keys{1, 2, 3, 4, 1, 2, 2, 1, 3, 3, 2, 4, 4};
+  // Mix of values that will cause positive and negative overflow for some groups but not others
+  cudf::test::fixed_width_column_wrapper<V> vals{
+    9223372036854775800L,    // Close to INT64_MAX
+    100L,                    // Small value
+    200L,                    // Small value
+    -9223372036854775800L,   // Close to INT64_MIN
+    20L,                     // Small value that will cause positive overflow when added to first
+    200L,                    // Small value
+    300L,                    // Small value
+    9223372036854775800L,    // Close to INT64_MAX
+    9223372036854775800L,    // Close to INT64_MAX
+    1L,                      // Small value
+    400L,                    // Small value
+    -20L,                    // Small value that will cause negative overflow when added to fourth
+    -9223372036854775800L};  // Close to INT64_MIN
+
+  cudf::test::fixed_width_column_wrapper<K> expect_keys{1, 2, 3, 4};
+
+  // Create expected struct column with sum and overflow children
+  // Group 1: 9223372036854775800 + 20 + 9223372036854775800 = positive overflow
+  // Group 2: 100 + 200 + 300 + 400 = 1000 (no overflow)
+  // Group 3: 200 + 9223372036854775800 + 1 = positive overflow
+  // Group 4: -9223372036854775800 + (-20) + (-9223372036854775800) = negative overflow
+  auto sum_col = cudf::test::fixed_width_column_wrapper<int64_t>{
+    4L,                     // Positive overflow result for group 1
+    1000L,                  // Normal sum for group 2 (no overflow)
+    -9223372036854775615L,  // Positive overflow result for group 3
+    -4L                     // Negative overflow result for group 4
+  };
+  auto overflow_col = cudf::test::fixed_width_column_wrapper<bool>{true, false, true, true};
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(sum_col.release());
+  children.push_back(overflow_col.release());
+  auto expect_vals = cudf::create_structs_hierarchy(4, std::move(children), 0, {});
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, *expect_vals, std::move(agg));
+
+  // Note: SUM_WITH_OVERFLOW only works with hash groupby, not sort groupby
+}
+
+// Test that SUM_WITH_OVERFLOW throws an error for invalid value types
+TEST(groupby_sum_with_overflow_error_test, invalid_value_type)
+{
+  using K = int32_t;
+  using V = int32_t;  // Invalid type for SUM_WITH_OVERFLOW, should only support int64_t
+
+  cudf::test::fixed_width_column_wrapper<K> keys{1, 1, 1, 2, 2, 2, 3, 3, 3};
+  cudf::test::fixed_width_column_wrapper<V> vals{1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  cudf::test::fixed_width_column_wrapper<K> expect_keys{1, 2, 3};
+
+  auto agg = cudf::make_sum_with_overflow_aggregation<cudf::groupby_aggregation>();
+
+  // SUM_WITH_OVERFLOW should throw a logic_error when used with non-int64_t value types
+  EXPECT_THROW(
+    test_single_agg(keys, vals, expect_keys, {}, std::move(agg), force_use_sort_impl::NO),
+    cudf::logic_error);
+}
diff --git a/python/pylibcudf/tests/test_aggregation.py b/python/pylibcudf/tests/test_aggregation.py
index f1a4a38a83f..8250ab16acf 100644
--- a/python/pylibcudf/tests/test_aggregation.py
+++ b/python/pylibcudf/tests/test_aggregation.py
@@ -4,4 +4,4 @@
 
 
 def test_repr_name():
-    assert repr(plc.aggregation.any()) == "<Aggregation(<Kind.ANY: 6>)>"
+    assert repr(plc.aggregation.any()) == "<Aggregation(<Kind.ANY: 7>)>"

From d33498b001c5f07148091b6458d73c22053d650a Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 4 Aug 2025 08:56:45 -0400
Subject: [PATCH 046/366] Rework fill/repeat benchmark to use nvbench (#19556)

Converts the googlebench FILL_BENCH to use nvbench. Currently this is only the `cudf::repeat` API.
Also added an axis to measure int32 as well as double.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19556
---
 cpp/benchmarks/CMakeLists.txt     |  2 +-
 cpp/benchmarks/filling/repeat.cpp | 68 ++++++++++++++-----------------
 2 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index c8d2cae9fd2..21f56e1331c 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -250,7 +250,7 @@ ConfigureNVBench(REPLACE_NVBENCH replace/nulls.cpp)
 
 # ##################################################################################################
 # * filling benchmark -----------------------------------------------------------------------------
-ConfigureBench(FILL_BENCH filling/repeat.cpp)
+ConfigureNVBench(FILL_NVBENCH filling/repeat.cpp)
 
 # ##################################################################################################
 # * groupby benchmark -----------------------------------------------------------------------------
diff --git a/cpp/benchmarks/filling/repeat.cpp b/cpp/benchmarks/filling/repeat.cpp
index 0abef46acac..92559b50bec 100644
--- a/cpp/benchmarks/filling/repeat.cpp
+++ b/cpp/benchmarks/filling/repeat.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,57 +15,49 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
 
 #include <cudf/filling.hpp>
 
-class Repeat : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-template <class TypeParam, bool nulls>
-void BM_repeat(benchmark::State& state)
+namespace {
+template <typename TypeParam>
+void nvbench_repeat(nvbench::state& state, nvbench::type_list<TypeParam>)
 {
-  auto const n_rows = static_cast<cudf::size_type>(state.range(0));
-  auto const n_cols = static_cast<cudf::size_type>(state.range(1));
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_cols = static_cast<cudf::size_type>(state.get_int64("num_cols"));
+  auto const nulls    = state.get_int64("nulls");
 
   auto const input_table =
-    create_sequence_table(cycle_dtypes({cudf::type_to_id<TypeParam>()}, n_cols),
-                          row_count{n_rows},
-                          nulls ? std::optional<double>{1.0} : std::nullopt);
+    create_sequence_table(cycle_dtypes({cudf::type_to_id<TypeParam>()}, num_cols),
+                          row_count{num_rows},
+                          nulls ? std::optional<double>{0.1} : std::nullopt);
   // Create table view
-  auto input = cudf::table_view(*input_table);
+  auto const input = input_table->view();
 
   // repeat counts
-  using sizeT                = cudf::size_type;
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
-    cudf::type_to_id<sizeT>(), distribution_id::UNIFORM, 0, 3);
-  auto repeat_count = create_random_column(cudf::type_to_id<sizeT>(), row_count{n_rows}, profile);
+    cudf::type_to_id<cudf::size_type>(), distribution_id::UNIFORM, 0, 3);
+  auto counts =
+    create_random_column(cudf::type_to_id<cudf::size_type>(), row_count{num_rows}, profile);
 
-  // warm up
-  auto output = cudf::repeat(input, *repeat_count);
+  auto output = cudf::repeat(input, counts->view());
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf::repeat(input, *repeat_count);
-  }
+  state.add_global_memory_reads(input_table->alloc_size());
+  state.add_global_memory_writes(output->alloc_size());
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
 
-  auto data_bytes =
-    (input.num_columns() * input.num_rows() + output->num_columns() * output->num_rows()) *
-    sizeof(TypeParam);
-  auto null_bytes =
-    nulls ? input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()) +
-              output->num_columns() * cudf::bitmask_allocation_size_bytes(output->num_rows())
-          : 0;
-  state.SetBytesProcessed(state.iterations() * (data_bytes + null_bytes));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = cudf::repeat(input, counts->view()); });
 }
+}  // namespace
 
-#define REPEAT_BENCHMARK_DEFINE(name, type, nulls)                                                \
-  BENCHMARK_DEFINE_F(Repeat, name)(::benchmark::State & state) { BM_repeat<type, nulls>(state); } \
-  BENCHMARK_REGISTER_F(Repeat, name)                                                              \
-    ->RangeMultiplier(8)                                                                          \
-    ->Ranges({{1 << 10, 1 << 26}, {1, 8}})                                                        \
-    ->UseManualTime()                                                                             \
-    ->Unit(benchmark::kMillisecond);
+using Types = nvbench::type_list<double, int32_t>;
 
-REPEAT_BENCHMARK_DEFINE(double_nulls, double, true);
-REPEAT_BENCHMARK_DEFINE(double_no_nulls, double, false);
+NVBENCH_BENCH_TYPES(nvbench_repeat, NVBENCH_TYPE_AXES(Types))
+  .set_name("repeat")
+  .set_type_axes_names({"DataType"})
+  .add_int64_power_of_two_axis("num_rows", {10, 14, 18, 22, 26})
+  .add_int64_axis("num_cols", {1, 2, 4, 8})
+  .add_int64_axis("nulls", {0, 1});

From 64c016de4f737b2dca543feeca16ae1c36e33354 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 4 Aug 2025 08:57:39 -0400
Subject: [PATCH 047/366] Remove deprecated subword-tokenizer APIs (#19498)

Removes the subword-tokenizer APIs from cudf which has been deprecated and replaced with the wordpiece-tokenizer APIs.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19498
---
 cpp/CMakeLists.txt                            |   4 -
 cpp/include/nvtext/detail/load_hash_file.hpp  |  50 --
 cpp/include/nvtext/subword_tokenize.hpp       | 172 ------
 .../detail/codepoint_metadata.ah              |   0
 cpp/src/text/{subword => }/detail/cp_data.h   |   0
 cpp/src/text/normalize.cu                     |  71 ++-
 cpp/src/text/normalize.cuh                    |  21 +-
 cpp/src/text/subword/data_normalizer.cu       | 280 ---------
 .../text/subword/detail/data_normalizer.hpp   |  97 ---
 cpp/src/text/subword/detail/hash_utils.cuh    | 172 ------
 .../text/subword/detail/tokenizer_utils.cuh   |  76 ---
 .../subword/detail/wordpiece_tokenizer.hpp    | 106 ----
 cpp/src/text/subword/load_hash_file.cu        | 301 ----------
 cpp/src/text/subword/subword_tokenize.cu      | 300 ----------
 cpp/src/text/subword/wordpiece_tokenizer.cu   | 562 ------------------
 .../pylibcudf/api_docs/nvtext/index.rst       |   1 -
 .../api_docs/nvtext/subword_tokenize.rst      |   6 -
 .../api_docs/wordpiece_tokenizer.rst          |   7 -
 python/cudf/cudf/core/column/string.py        |  26 -
 python/cudf/cudf/core/subword_tokenizer.py    | 299 ----------
 .../libcudf/nvtext/subword_tokenize.pxd       |  55 --
 .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt |   1 -
 .../pylibcudf/pylibcudf/nvtext/__init__.pxd   |   2 -
 python/pylibcudf/pylibcudf/nvtext/__init__.py |   2 -
 .../pylibcudf/nvtext/subword_tokenize.pxd     |  20 -
 .../pylibcudf/nvtext/subword_tokenize.pyi     |  15 -
 .../pylibcudf/nvtext/subword_tokenize.pyx     |  87 ---
 27 files changed, 89 insertions(+), 2644 deletions(-)
 delete mode 100644 cpp/include/nvtext/detail/load_hash_file.hpp
 delete mode 100644 cpp/include/nvtext/subword_tokenize.hpp
 rename cpp/src/text/{subword => }/detail/codepoint_metadata.ah (100%)
 rename cpp/src/text/{subword => }/detail/cp_data.h (100%)
 delete mode 100644 cpp/src/text/subword/data_normalizer.cu
 delete mode 100644 cpp/src/text/subword/detail/data_normalizer.hpp
 delete mode 100644 cpp/src/text/subword/detail/hash_utils.cuh
 delete mode 100644 cpp/src/text/subword/detail/tokenizer_utils.cuh
 delete mode 100644 cpp/src/text/subword/detail/wordpiece_tokenizer.hpp
 delete mode 100644 cpp/src/text/subword/load_hash_file.cu
 delete mode 100644 cpp/src/text/subword/subword_tokenize.cu
 delete mode 100644 cpp/src/text/subword/wordpiece_tokenizer.cu
 delete mode 100644 docs/cudf/source/pylibcudf/api_docs/nvtext/subword_tokenize.rst
 delete mode 100644 python/cudf/cudf/core/subword_tokenizer.py
 delete mode 100644 python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
 delete mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd
 delete mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 653c61fcb96..50b96a9baf4 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -798,10 +798,6 @@ add_library(
   src/text/stemmer.cu
   src/text/bpe/byte_pair_encoding.cu
   src/text/bpe/load_merge_pairs.cu
-  src/text/subword/data_normalizer.cu
-  src/text/subword/load_hash_file.cu
-  src/text/subword/subword_tokenize.cu
-  src/text/subword/wordpiece_tokenizer.cu
   src/text/tokenize.cu
   src/text/vocabulary_tokenize.cu
   src/text/wordpiece_tokenize.cu
diff --git a/cpp/include/nvtext/detail/load_hash_file.hpp b/cpp/include/nvtext/detail/load_hash_file.hpp
deleted file mode 100644
index 1334cbf47ea..00000000000
--- a/cpp/include/nvtext/detail/load_hash_file.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/column/column.hpp>
-#include <cudf/utilities/memory_resource.hpp>
-
-#include <nvtext/subword_tokenize.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-
-#include <cstdint>
-#include <cstring>
-
-namespace CUDF_EXPORT nvtext {
-namespace detail {
-
-/**
- * @brief Load the hashed vocabulary file into device memory.
- *
- * The object here can be used to call the subword_tokenize without
- * incurring the cost of loading the same file each time.
- *
- * @param filename_hashed_vocabulary A path to the preprocessed vocab.txt file.
- *        Note that this is the file AFTER python/perfect_hash.py has been used
- *        for preprocessing.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Memory resource to allocate any returned objects.
- * @return vocabulary hash-table elements
- */
-std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
-  std::string const& filename_hashed_vocabulary,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr);
-
-}  // namespace detail
-}  // namespace CUDF_EXPORT nvtext
diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp
deleted file mode 100644
index 6e04ec6a5a3..00000000000
--- a/cpp/include/nvtext/subword_tokenize.hpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2020-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/utilities/export.hpp>
-#include <cudf/utilities/memory_resource.hpp>
-
-namespace CUDF_EXPORT nvtext {
-
-/**
- * @addtogroup nvtext_tokenize
- * @{
- * @file
- */
-
-/**
- * @brief The vocabulary data for use with the subword_tokenize function.
- */
-struct hashed_vocabulary {
-  uint16_t first_token_id{};            ///< The first token id in the vocabulary
-  uint16_t separator_token_id{};        ///< The separator token id in the vocabulary
-  uint16_t unknown_token_id{};          ///< The unknown token id in the vocabulary
-  uint32_t outer_hash_a{};              ///< The a parameter for the outer hash
-  uint32_t outer_hash_b{};              ///< The b parameter for the outer hash
-  uint16_t num_bins{};                  ///< Number of bins
-  std::unique_ptr<cudf::column> table;  ///< uint64 column, the flattened hash table with key, value
-                                        ///< pairs packed in 64-bits
-  std::unique_ptr<cudf::column> bin_coefficients;  ///< uint64 column, containing the hashing
-                                                   ///< parameters for each hash bin on the GPU
-  std::unique_ptr<cudf::column> bin_offsets;  ///< uint16 column, containing the start index of each
-                                              ///< bin in the flattened hash table
-  std::unique_ptr<cudf::column>
-    cp_metadata;  ///< uint32 column, The code point metadata table to use for normalization
-  std::unique_ptr<cudf::column>
-    aux_cp_table;  ///< uint64 column, The auxiliary code point table to use for normalization
-};
-
-/**
- * @brief Load the hashed vocabulary file into device memory.
- *
- * The object here can be used to call the subword_tokenize without
- * incurring the cost of loading the same file each time.
- *
- * @deprecated in 25.06 and to be removed in a future release
- *
- * @throw cudf::logic_error if the `filename_hashed_vocabulary` could not be opened.
- *
- * @param filename_hashed_vocabulary A path to the preprocessed vocab.txt file.
- *        Note that this is the file AFTER python/perfect_hash.py has been used
- *        for preprocessing.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Memory resource to allocate any returned objects.
- * @return vocabulary hash-table elements
- */
-[[deprecated]] std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
-  std::string const& filename_hashed_vocabulary,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief Result object for the subword_tokenize functions.
- */
-struct tokenizer_result {
-  /**
-   * @brief The number of rows for the output token-ids.
-   */
-  uint32_t nrows_tensor{};
-  /**
-   * @brief The number of token-ids in each row.
-   */
-  uint32_t sequence_length{};
-  /**
-   * @brief A vector of token-ids for each row.
-   *
-   * The data is a flat matrix (nrows_tensor x sequence_length) of token-ids.
-   * This column is of type UINT32 with no null entries.
-   */
-  std::unique_ptr<cudf::column> tensor_token_ids;
-  /**
-   * @brief This mask identifies which tensor-token-ids are valid.
-   *
-   * This column is of type UINT32 with no null entries.
-   */
-  std::unique_ptr<cudf::column> tensor_attention_mask;
-  /**
-   * @brief The metadata for each tensor row.
-   *
-   * There are three elements per tensor row [row-id, start_pos, stop_pos])
-   * This column is of type UINT32 with no null entries.
-   */
-  std::unique_ptr<cudf::column> tensor_metadata;
-};
-
-/**
- * @brief Creates a tokenizer that cleans the text, splits it into tokens and
- *        returns token-ids from an input vocabulary.
- *
- * @deprecated in 25.06 and to be removed in a future release
- * Use nvtext::wordpiece_tokenize instead
- *
- * The strings are first normalized by converting to lower-case, removing
- * punctuation, replacing a select set of multi-byte characters and
- * whitespace characters.
- *
- * The strings are then tokenized by using whitespace as a delimiter.
- * Consecutive delimiters are ignored. Each token is then assigned
- * a 4-byte token-id mapped from the provided vocabulary table.
- *
- * Essentially each string is converted into one or more vectors of token-ids
- * in the output column. The total number of these vectors times `max_sequence_length`
- * is the size of the `tensor_token_ids` output column. For `do_truncate==true`:
- * ```
- * size of tensor_token_ids = max_sequence_length * strings.size()
- * size of tensor_attention_mask = max_sequence_length * strings.size()
- * size of tensor_metadata = 3 * strings.size()
- * ```
- *
- * For `do_truncate==false` the number of rows per output string depends on the
- * number of tokens resolved and the `stride` value which may repeat tokens
- * in subsequent overflow rows.
- *
- * This function requires about 21x the number of character bytes in the input
- * strings column as working memory.
- *
- * @throw cudf::logic_error if `stride > max_sequence_length`
- * @throw std::overflow_error if `max_sequence_length * max_rows_tensor`
- *        exceeds the column size limit
- *
- * @param strings The input strings to tokenize.
- * @param vocabulary_table The vocabulary table pre-loaded into this object.
- * @param max_sequence_length Limit of the number of token-ids per row in final tensor
- *        for each string.
- * @param stride Each row in the output token-ids will replicate `max_sequence_length - stride`
- *        the token-ids from the previous row, unless it is the first string.
- * @param do_lower_case If true, the tokenizer will convert uppercase characters in the
- *        input stream to lower-case and strip accents from those characters.
- *        If false, accented and uppercase characters are not transformed.
- * @param do_truncate If true, the tokenizer will discard all the token-ids after
- *        `max_sequence_length` for each input string. If false, it will use a new row
- *        in the output token-ids to continue generating the output.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Memory resource to allocate any returned objects.
- * @return token-ids, attention-mask, and metadata
- */
-[[deprecated]] tokenizer_result subword_tokenize(
-  cudf::strings_column_view const& strings,
-  hashed_vocabulary const& vocabulary_table,
-  uint32_t max_sequence_length,
-  uint32_t stride,
-  bool do_lower_case,
-  bool do_truncate,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/** @} */  // end of group
-}  // namespace CUDF_EXPORT nvtext
diff --git a/cpp/src/text/subword/detail/codepoint_metadata.ah b/cpp/src/text/detail/codepoint_metadata.ah
similarity index 100%
rename from cpp/src/text/subword/detail/codepoint_metadata.ah
rename to cpp/src/text/detail/codepoint_metadata.ah
diff --git a/cpp/src/text/subword/detail/cp_data.h b/cpp/src/text/detail/cp_data.h
similarity index 100%
rename from cpp/src/text/subword/detail/cp_data.h
rename to cpp/src/text/detail/cp_data.h
diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu
index 701056e44f2..a0ddf2c2b6e 100644
--- a/cpp/src/text/normalize.cu
+++ b/cpp/src/text/normalize.cu
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 
+#include "text/detail/codepoint_metadata.ah"
 #include "text/normalize.cuh"
-#include "text/subword/detail/data_normalizer.hpp"
-#include "text/subword/detail/tokenizer_utils.cuh"
 #include "text/utilities/tokenize_ops.cuh"
 
 #include <cudf/column/column.hpp>
@@ -136,6 +135,74 @@ std::unique_ptr<cudf::column> normalize_spaces(cudf::strings_column_view const&
                                    cudf::detail::copy_bitmask(strings.parent(), stream, mr));
 }
 
+/**
+ * @brief Retrieve the code point metadata table.
+ *
+ * Build the code point metadata table in device memory
+ * using the vector pieces from codepoint_metadata.ah
+ */
+rmm::device_uvector<codepoint_metadata_type> get_codepoint_metadata(rmm::cuda_stream_view stream)
+{
+  auto table_vector = rmm::device_uvector<codepoint_metadata_type>(codepoint_metadata_size, stream);
+  auto table        = table_vector.data();
+  thrust::fill(rmm::exec_policy(stream),
+               table + cp_section1_end,
+               table + codepoint_metadata_size,
+               codepoint_metadata_default_value);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(table,
+                                codepoint_metadata,
+                                cp_section1_end * sizeof(codepoint_metadata[0]),  // 1st section
+                                cudaMemcpyDefault,
+                                stream.value()));
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    table + cp_section2_begin,
+    cp_metadata_917505_917999,
+    (cp_section2_end - cp_section2_begin + 1) * sizeof(codepoint_metadata[0]),  // 2nd section
+    cudaMemcpyDefault,
+    stream.value()));
+  return table_vector;
+}
+
+/**
+ * @brief Retrieve the aux code point data table.
+ *
+ * Build the aux code point data table in device memory
+ * using the vector pieces from codepoint_metadata.ah
+ */
+rmm::device_uvector<aux_codepoint_data_type> get_aux_codepoint_data(rmm::cuda_stream_view stream)
+{
+  auto table_vector = rmm::device_uvector<aux_codepoint_data_type>(aux_codepoint_data_size, stream);
+  auto table        = table_vector.data();
+  thrust::fill(rmm::exec_policy(stream),
+               table + aux_section1_end,
+               table + aux_codepoint_data_size,
+               aux_codepoint_default_value);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(table,
+                                aux_codepoint_data,
+                                aux_section1_end * sizeof(aux_codepoint_data[0]),  // 1st section
+                                cudaMemcpyDefault,
+                                stream.value()));
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    table + aux_section2_begin,
+    aux_cp_data_44032_55203,
+    (aux_section2_end - aux_section2_begin + 1) * sizeof(aux_codepoint_data[0]),  // 2nd section
+    cudaMemcpyDefault,
+    stream.value()));
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    table + aux_section3_begin,
+    aux_cp_data_70475_71099,
+    (aux_section3_end - aux_section3_begin + 1) * sizeof(aux_codepoint_data[0]),  // 3rd section
+    cudaMemcpyDefault,
+    stream.value()));
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    table + aux_section4_begin,
+    aux_cp_data_119134_119232,
+    (aux_section4_end - aux_section4_begin + 1) * sizeof(aux_codepoint_data[0]),  // 4th section
+    cudaMemcpyDefault,
+    stream.value()));
+  return table_vector;
+}
+
 }  // namespace detail
 
 // external APIs
diff --git a/cpp/src/text/normalize.cuh b/cpp/src/text/normalize.cuh
index 3972726d536..c2a18e8a137 100644
--- a/cpp/src/text/normalize.cuh
+++ b/cpp/src/text/normalize.cuh
@@ -16,7 +16,12 @@
 
 #pragma once
 
-#include "text/subword/detail/cp_data.h"
+#include "text/detail/cp_data.h"
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cstdint>
 
 namespace nvtext {
 namespace detail {
@@ -96,5 +101,19 @@ __device__ constexpr bool is_multi_char_transform(uint32_t metadata)
  */
 __device__ constexpr bool is_head_byte(unsigned char utf8_byte) { return (utf8_byte >> 6) != 2; }
 
+/**
+ * @brief Retrieve the code point metadata table.
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+rmm::device_uvector<codepoint_metadata_type> get_codepoint_metadata(rmm::cuda_stream_view stream);
+
+/**
+ * @brief Retrieve the auxiliary code point metadata table.
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+rmm::device_uvector<aux_codepoint_data_type> get_aux_codepoint_data(rmm::cuda_stream_view stream);
+
 }  // namespace detail
 }  // namespace nvtext
diff --git a/cpp/src/text/subword/data_normalizer.cu b/cpp/src/text/subword/data_normalizer.cu
deleted file mode 100644
index d9943fb781b..00000000000
--- a/cpp/src/text/subword/data_normalizer.cu
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright (c) 2020-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "text/normalize.cuh"
-#include "text/subword/detail/data_normalizer.hpp"
-#include "text/subword/detail/tokenizer_utils.cuh"
-
-#include <cudf/detail/offsets_iterator_factory.cuh>
-#include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/grid_1d.cuh>
-#include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/strings/detail/utilities.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <cuda/std/functional>
-#include <thrust/for_each.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/pair.h>
-#include <thrust/remove.h>
-#include <thrust/scan.h>
-#include <thrust/transform.h>
-
-namespace nvtext {
-namespace detail {
-namespace {
-
-/**
- * @brief Converts a UTF-8 character into a unicode code point value.
- *
- * If the byte at start_byte_for_thread is the first byte of a UTF-8 character (head byte),
- * the UTF-8 character is converted to a unicode code point and returned.
- *
- * If the byte at start_byte_for_thread is not a head byte, 0 is returned.
- *
- * All threads start reading bytes from the pointer denoted by strings.
- *
- * @param strings A pointer to the start of the sequence of characters to be analyzed.
- * @param start_byte_for_thread Which byte to start analyzing
- * @return New code point value for this byte.
- */
-__device__ uint32_t
-extract_code_points_from_utf8(unsigned char const* strings,
-                              size_t const total_bytes,
-                              cudf::thread_index_type const start_byte_for_thread)
-{
-  constexpr uint8_t max_utf8_blocks_for_char    = 4;
-  uint8_t utf8_blocks[max_utf8_blocks_for_char] = {0};
-
-  for (int i = 0; i < cuda::std::min(static_cast<size_t>(max_utf8_blocks_for_char),
-                                     total_bytes - start_byte_for_thread);
-       ++i) {
-    utf8_blocks[i] = strings[start_byte_for_thread + i];
-  }
-
-  uint8_t const length_encoding_bits = utf8_blocks[0] >> 3;
-  // UTF-8 format is variable-width character encoding using up to 4 bytes.
-  // If the first byte is:
-  // - [x00-x7F] -- beginning of a 1-byte character (ASCII)
-  // - [xC0-xDF] -- beginning of a 2-byte character
-  // - [xE0-xEF] -- beginning of a 3-byte character
-  // - [xF0-xF7] -- beginning of a 3-byte character
-  // Anything else is an intermediate byte [x80-xBF].
-  // So shifted by 3 bits this becomes
-  // - [x00-x0F]  or leb < 16
-  // - [x18-x1B]  or 24 <= leb <= 27
-  // - [x1C-x1D]  or 28 <= leb <= 29
-  // - [x1E-x1F]  or leb >= 30
-  // The remaining bits are part of the value as specified by the mask
-  // specified by x's below.
-  // - b0xxxxxxx = x7F
-  // - b110xxxxx = x1F
-  // - b1110xxxx = x0F
-  // - b11110xxx = x07
-  using encoding_length_pair = thrust::pair<uint8_t, uint8_t>;
-  // Set the number of characters and the top masks based on the length encoding bits.
-  encoding_length_pair const char_encoding_length = [length_encoding_bits] {
-    if (length_encoding_bits < 16) return encoding_length_pair{1, 0x7F};
-    if (length_encoding_bits >= 24 && length_encoding_bits <= 27)
-      return encoding_length_pair{2, 0x1F};
-    if (length_encoding_bits == 28 || length_encoding_bits == 29)
-      return encoding_length_pair{3, 0x0F};
-    if (length_encoding_bits == 30) return encoding_length_pair{4, 0x07};
-    return encoding_length_pair{0, 0};
-  }();
-
-  // Now pack up the bits into a uint32_t.
-  // Move the first set of values into bits 19-24 in the 32-bit value.
-  uint32_t code_point = (utf8_blocks[0] & char_encoding_length.second) << 18;
-  // Move the remaining values which are 6 bits (mask b10xxxxxx = x3F)
-  // from the remaining bytes into successive positions in the 32-bit result.
-  code_point |= ((utf8_blocks[1] & 0x3F) << 12);
-  code_point |= ((utf8_blocks[2] & 0x3F) << 6);
-  code_point |= utf8_blocks[3] & 0x3F;
-
-  // Adjust the final result by shifting by the character length.
-  uint8_t const shift_amt = 24 - 6 * char_encoding_length.first;
-  code_point >>= shift_amt;
-  return code_point;
-}
-
-/**
- * @brief Normalize the characters for the strings input.
- *
- * Characters are replaced, padded, or removed depending on the `do_lower_case` input
- * as well as the metadata values for each code point found in `cp_metadata`.
- *
- * First, each character is converted from UTF-8 to a unicode code point value.
- * This value is then looked up in the `cp_metadata` table to determine its fate.
- * The end result is a set of code point values for each character.
- * The normalized set of characters make it easier for the tokenizer to identify
- * tokens and match up token ids.
- *
- * @param[in] strings The input strings with characters to normalize to code point values.
- * @param[in] total_bytes Total number of bytes in the input `strings` vector.
- * @param[in] cp_metadata The metadata lookup table for every unicode code point value.
- * @param[in] aux_table Aux table for mapping some multi-byte code point values.
- * @param[in] do_lower_case True if normalization should include lower-casing.
- * @param[out] code_points The resulting code point values from normalization.
- * @param[out] chars_per_thread Output number of code point values per string.
- */
-CUDF_KERNEL void kernel_data_normalizer(unsigned char const* strings,
-                                        size_t const total_bytes,
-                                        uint32_t const* cp_metadata,
-                                        uint64_t const* aux_table,
-                                        bool const do_lower_case,
-                                        uint32_t* code_points,
-                                        uint32_t* chars_per_thread)
-{
-  constexpr uint32_t init_val                     = (1 << FILTER_BIT);
-  uint32_t replacement_code_points[MAX_NEW_CHARS] = {init_val, init_val, init_val};
-
-  auto const char_for_thread = cudf::detail::grid_1d::global_thread_id();
-  uint32_t num_new_chars     = 0;
-
-  if (char_for_thread < total_bytes) {
-    auto const code_point = extract_code_points_from_utf8(strings, total_bytes, char_for_thread);
-    auto const metadata   = cp_metadata[code_point];
-
-    if (is_head_byte(strings[char_for_thread]) && !should_remove_cp(metadata, do_lower_case)) {
-      num_new_chars = 1;
-      // Apply lower cases and accent stripping if necessary
-      auto const new_cp =
-        do_lower_case || always_replace(metadata) ? get_first_cp(metadata) : code_point;
-      replacement_code_points[0] = new_cp == 0 ? code_point : new_cp;
-
-      if (do_lower_case && is_multi_char_transform(metadata)) {
-        auto const next_cps          = aux_table[code_point];
-        replacement_code_points[1]   = static_cast<uint32_t>(next_cps >> 32);
-        auto const potential_next_cp = static_cast<uint32_t>(next_cps);
-        replacement_code_points[2] =
-          potential_next_cp != 0 ? potential_next_cp : replacement_code_points[2];
-        num_new_chars = 2 + (potential_next_cp != 0);
-      }
-
-      if (should_add_spaces(metadata, do_lower_case)) {
-        // Need to shift all existing code-points up one
-        // This is a rotate right. There is no thrust equivalent at this time.
-        for (int loc = num_new_chars; loc > 0; --loc) {
-          replacement_code_points[loc] = replacement_code_points[loc - 1];
-        }
-
-        // Write the required spaces at the end
-        replacement_code_points[0]                 = SPACE_CODE_POINT;
-        replacement_code_points[num_new_chars + 1] = SPACE_CODE_POINT;
-        num_new_chars += 2;
-      }
-    }
-  }
-
-  chars_per_thread[char_for_thread] = num_new_chars;
-
-  using BlockStore =
-    cub::BlockStore<uint32_t, THREADS_PER_BLOCK, MAX_NEW_CHARS, cub::BLOCK_STORE_WARP_TRANSPOSE>;
-  __shared__ typename BlockStore::TempStorage temp_storage;
-
-  // Now we perform coalesced writes back to global memory using cub.
-  uint32_t* block_base = code_points + blockIdx.x * blockDim.x * MAX_NEW_CHARS;
-  BlockStore(temp_storage).Store(block_base, replacement_code_points);
-}
-
-}  // namespace
-
-data_normalizer::data_normalizer(codepoint_metadata_type const* cp_metadata,
-                                 aux_codepoint_data_type const* aux_table,
-                                 bool do_lower_case)
-  : d_cp_metadata{cp_metadata}, d_aux_table{aux_table}, do_lower_case{do_lower_case}
-{
-}
-
-uvector_pair data_normalizer::normalize(cudf::strings_column_view const& input,
-                                        rmm::cuda_stream_view stream) const
-{
-  if (input.is_empty()) {
-    return uvector_pair{std::make_unique<rmm::device_uvector<uint32_t>>(0, stream),
-                        std::make_unique<rmm::device_uvector<int64_t>>(0, stream)};
-  }
-
-  // copy offsets to working memory
-  auto const num_offsets = input.size() + 1;
-  auto d_strings_offsets = std::make_unique<rmm::device_uvector<int64_t>>(num_offsets, stream);
-  auto const d_offsets =
-    cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());
-  thrust::transform(rmm::exec_policy(stream),
-                    thrust::counting_iterator<cudf::size_type>(0),
-                    thrust::counting_iterator<cudf::size_type>(num_offsets),
-                    d_strings_offsets->begin(),
-                    [d_offsets] __device__(auto idx) {
-                      auto const offset = d_offsets[0];  // adjust for any offset to the offsets
-                      return d_offsets[idx] - offset;
-                    });
-  auto const bytes_count = d_strings_offsets->element(input.size(), stream);
-  if (bytes_count == 0) {  // if no bytes, nothing to do
-    return uvector_pair{std::make_unique<rmm::device_uvector<uint32_t>>(0, stream),
-                        std::make_unique<rmm::device_uvector<int64_t>>(0, stream)};
-  }
-
-  int64_t const threads_per_block = THREADS_PER_BLOCK;
-  size_t const num_blocks        = cudf::util::div_rounding_up_safe(bytes_count, threads_per_block);
-  size_t const threads_on_device = threads_per_block * num_blocks;
-  size_t const max_new_char_total = MAX_NEW_CHARS * threads_on_device;
-
-  auto d_code_points = std::make_unique<rmm::device_uvector<uint32_t>>(max_new_char_total, stream);
-  rmm::device_uvector<uint32_t> d_chars_per_thread(threads_on_device, stream);
-  auto const d_strings = input.chars_begin(stream) + cudf::strings::detail::get_offset_value(
-                                                       input.offsets(), input.offset(), stream);
-  kernel_data_normalizer<<<num_blocks, threads_per_block, 0, stream.value()>>>(
-    reinterpret_cast<unsigned char const*>(d_strings),
-    bytes_count,
-    d_cp_metadata,
-    d_aux_table,
-    do_lower_case,
-    d_code_points->data(),
-    d_chars_per_thread.data());
-
-  // Remove the 'empty' code points from the vector
-  thrust::remove(rmm::exec_policy(stream),
-                 d_code_points->begin(),
-                 d_code_points->end(),
-                 uint32_t{1 << FILTER_BIT});
-
-  // We also need to prefix sum the number of characters up to an including
-  // the current character in order to get the new strings lengths.
-  thrust::inclusive_scan(rmm::exec_policy(stream),
-                         d_chars_per_thread.begin(),
-                         d_chars_per_thread.end(),
-                         d_chars_per_thread.begin());
-
-  // This will reset the offsets to the new generated code point values
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<uint32_t>(1),
-    input.size(),
-    update_strings_lengths_fn{d_chars_per_thread.data(), d_strings_offsets->data()});
-
-  auto const num_chars = d_strings_offsets->element(input.size(), stream);
-  d_code_points->resize(num_chars, stream);  // should be smaller than original allocated size
-
-  // return the normalized code points and the new offsets
-  return uvector_pair(std::move(d_code_points), std::move(d_strings_offsets));
-}
-
-}  // namespace detail
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/detail/data_normalizer.hpp b/cpp/src/text/subword/detail/data_normalizer.hpp
deleted file mode 100644
index c70e3734691..00000000000
--- a/cpp/src/text/subword/detail/data_normalizer.hpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "text/subword/detail/cp_data.h"
-
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/types.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-
-using uvector_pair = std::pair<std::unique_ptr<rmm::device_uvector<uint32_t>>,
-                               std::unique_ptr<rmm::device_uvector<int64_t>>>;
-
-namespace nvtext {
-namespace detail {
-
-/**
- * @brief Performs text cleaning for the tokenizers.
- *
- * Every instantiation of this class will transfer the meta data over to the GPU.
- * It is advised to create one class and reuse that class as needed.
- *
- * Converts characters to lowercase, adds spaces around punctuation and multi-byte
- * characters, strips accents from letters in the text and standardizes whitespace
- * characters to all be the code point for the " " literal.
- *
- * The algorithm produces two vectors of integers `uvector_pair`.
- * The first is the size of 3 uint32 values per input byte (of the strings buffer).
- * The second is the same size as the input offsets vector -- number of strings + 1.
- *
- * A temporary buffer is created equal to 1 uint32 value per input byte.
- * This means 16x the number bytes of the input strings buffer must be available
- * to call the `normalize()` function in this class.
- */
-class data_normalizer {
- public:
-  /**
-   * @brief Create instance of the normalizer.
-   *
-   * @param cp_metadata The code point metadata table to use for normalization.
-   * @param aux_table The auxiliary code point table.
-   * @param do_lower_case If true, the normalizer will convert uppercase characters in the
-   *        input stream to lower case and strip accents from those characters.
-   *        If false, accented and uppercase characters are not transformed.
-   */
-  data_normalizer(codepoint_metadata_type const* cp_metadata,
-                  aux_codepoint_data_type const* aux_table,
-                  bool do_lower_case = true);
-
-  /**
-   * @brief Normalize a vector of strings.
-   *
-   * If `do_lower_case` is true, this function will convert each character to lowercase
-   * and strip accents from the characters. If false it will do all other conversions
-   * in the class description except lower-casing and punctuation stripping.
-   *
-   * The result of this function returns two pointers to GPU data.
-   * The first pointer is to a contiguous array of unicode code points corresponding to the
-   * characters in the text after running normalization. The second pointer is to the
-   * offsets of the strings in the code point array. That is, string `i` starts at
-   * `result.second->data()[i]`.
-   * This array will always be of length `input.size() + 1` since we need one entry
-   * for each input and a last entry which has the total number of bytes.
-   *
-   * @param input Strings to normalize
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   * @return Two pointers to GPU data buffers. The first is a pointer
-   *         to the code points array and the second is a pointer to the offsets
-   *         used to locate the code points for each string.
-   */
-  uvector_pair normalize(cudf::strings_column_view const& input,
-                         rmm::cuda_stream_view stream) const;
-
- private:
-  bool const do_lower_case;
-  codepoint_metadata_type const* d_cp_metadata;
-  aux_codepoint_data_type const* d_aux_table;
-};
-
-}  // namespace detail
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/detail/hash_utils.cuh b/cpp/src/text/subword/detail/hash_utils.cuh
deleted file mode 100644
index dc0737118e8..00000000000
--- a/cpp/src/text/subword/detail/hash_utils.cuh
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cstdint>
-
-namespace nvtext {
-namespace detail {
-
-// Used for hashing functions in this file
-constexpr uint64_t PRIME = 281474976710677;
-
-/**
- * @brief This does a multiply mod 48 without overflow for the sdbm hash "pop" method.
- *
- * This method computes the bottom 48 bits of the result of multiplying two numbers
- * respecting the restrictions specified by the parameters.
- *
- * It works by splitting `num` into 16 bit chunks and performing repeated multiplies.
- * The result of all of those multiplies are added together.
- *
- * @param num_48bit A multiplicand that is at most 48 bits.
- * @param num Any 64 bit number to multiply by num_48bit mod 2**48
- * @return (num_48bit * num) mod 2**48
- */
-__device__ uint64_t mul_mod_48(uint64_t num_48bit, uint64_t num)
-{
-  constexpr uint64_t mask          = (1ULL << 48) - 1;
-  constexpr uint8_t bit_chunk_size = 16;
-
-  uint64_t result = 0;
-#pragma unroll
-  for (uint8_t i = 0; i < sizeof(num) / 2; ++i) {
-    auto const shift_amt = bit_chunk_size * i;
-    auto const bottom_16 = static_cast<uint16_t>(num >> shift_amt);
-    // update result
-    result = result + ((num_48bit * bottom_16) << shift_amt);
-    result &= mask;
-  }
-  return result;
-}
-
-/**
- * @brief Computes the sdbm hash for the sequence starting at sequence_start up to length sequences.
- *
- * A start value for the sdbm hash can optionally be given. This is useful when checking if elements
- * starting with "##" exist in the table since we can pass in the hash of "##" as the start value.
- *
- * @param sequence_start Code points to hash
- * @param length Number of code points to hash
- * @param start_value Initializes the hash computation.
- * @return The sdbm hash of all elements in range `[sequence_start, sequence_start + length)`
- */
-__device__ uint64_t sdbm_hash(uint32_t const* sequence_start,
-                              uint32_t length,
-                              uint64_t start_value = 0)
-{
-  // This expression computes h_{i} = (65599*h{i-1} + new_val) mod 2^48 and was obtained from here:
-  // http://www.cse.yorku.ca/~oz/hash.html
-
-  constexpr uint64_t mask = (1ULL << 48) - 1;
-  uint64_t hash_value     = start_value;
-
-  for (int i = 0; i < length; ++i) {
-    hash_value = ((hash_value << 6) + (hash_value << 16) - hash_value) & mask;
-    hash_value = (hash_value + (sequence_start[i] & mask)) & mask;
-  }
-
-  return hash_value;
-}
-
-/**
- * @brief Removes the last value added to the hash.
- *
- * If we have `current_hash = sdbm_hash("dog")` then, `prev_sdbm_hash(current_hash, cp(g))`
- * returns the `sdbm_hash("do")` where it is assumed cp returns the unicode code point for a
- * given letter.
- *
- * @param current_hash The current value used to compute the previous sdbm.
- * @param last_val Last value used in the hash sequence.
- * @return The hash value before that new value was added.
- */
-__device__ uint64_t prev_sdbm_hash(uint64_t current_hash, uint32_t last_val)
-{
-  constexpr uint64_t mask = (1ULL << 48) - 1;
-  // Multiplicative inverse of 65599 under mod 2**48
-  constexpr uint64_t mod_inverse = 24320495251391;
-  uint64_t const prev_hash =
-    mul_mod_48(mod_inverse, current_hash) - mul_mod_48(mod_inverse, last_val);
-  return prev_hash & mask;
-}
-
-/**
- * @brief The hash function used for accesses to the table.
- *
- * This is a universal hash function with parameters chosen to achieve perfect hashing.
- *
- * Algorithm is `((a*k + b) % PRIME) % table_size` where @ref PRIME is globally defined
- * as 281474976710677
- *
- * @param key Value to hash
- * @param a Outer table first constant
- * @param b Outer table second constant
- * @param table_size Number of bins in the hash table.
- * @return The computed hash value.
- */
-__device__ uint32_t hash(uint64_t key, uint64_t a, uint64_t b, uint32_t table_size)
-{
-  return ((a * key + b) % PRIME) % table_size;
-}
-
-/**
- * @brief Retrieves the value associated with key in the hash table.
- *
- * If there is no value in the table with the input key, -1 is returned.
- *
- * This method will ALWAYS return the correct value if a key is in the table. However, some
- * code point sequences may hash to the same key in which case an incorrect value is returned.
- * This collision is rare and will not likely affect the model's performance.
- *
- * @param key The key to search for in the hash table
- * @param hash_table A pointer to the flattened hash table
- * @param bin_coefficients A pointer to the hashing parameters for each bin in the hash table.
- * @param bin_offsets A pointer to the start of each bin in the hash table.
- * @return -1 if key is not in the hash table. If the key is in the table returns an index in
- *         [0, vocab_size) indicating the index for the token in the bert model.
- */
-__device__ int retrieve(uint64_t const key,
-                        uint32_t const outer_table_a,
-                        uint32_t const outer_table_b,
-                        uint16_t const num_bins,
-                        uint64_t const* hash_table,
-                        uint64_t const* bin_coefficients,
-                        uint16_t const* bin_offsets)
-{
-  auto const hash_bin        = hash(key, outer_table_a, outer_table_b, num_bins);
-  auto const bin_params      = bin_coefficients[hash_bin];
-  auto const start_ht_offset = bin_offsets[hash_bin];
-
-  // The shift constants are due to how the hash coefficients are packed and are
-  // obtained from the python script perfect_hash.py which generates the expected tables.
-  auto const inner_bin_a = bin_params >> 16;
-  auto const inner_bin_b = (bin_params >> 9) & ((1 << 7) - 1);
-  auto const bin_size    = static_cast<uint8_t>(bin_params);
-
-  if (bin_size == 0) { return -1; }  // key hash has no bin parameters
-
-  auto const inner_offset = hash(key, inner_bin_a, inner_bin_b, bin_size);
-  auto const kv_pair      = hash_table[start_ht_offset + inner_offset];
-
-  auto const expected_key = kv_pair >> 16;
-  // extract value from encoded key-value
-  int value = kv_pair & ((1 << 16) - 1);
-  return key == expected_key ? value : -1;
-}
-
-}  // namespace detail
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/detail/tokenizer_utils.cuh b/cpp/src/text/subword/detail/tokenizer_utils.cuh
deleted file mode 100644
index 01df910d420..00000000000
--- a/cpp/src/text/subword/detail/tokenizer_utils.cuh
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "text/subword/detail/cp_data.h"
-
-#include <cudf/types.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-
-#include <cstdint>
-
-namespace nvtext {
-namespace detail {
-
-constexpr int THREADS_PER_BLOCK = 64;
-
-/**
- * @brief In-place update of offsets values.
- *
- * In the `d_chars_up_to_idx`, the last character of each string is basically
- * the offset (i.e. the number of characters) in that string.
- *
- * Example
- * @code{.pseudo}
- * // 3 strings with sizes 5,4,2
- * d_offsets = [0,5,9,11]
- * // code points generated per character (as offsets)
- * // 2nd string has an extra code point at its first char
- * d_chars_up_to_idx = [1,2,3,4,5,6,8,9,10,11,12]
- * d_chars_up_to_idx[d_offsets[1-3]] is [5,10,12]
- * => d_offsets becomes [0,5,10,12]
- * @endcode
- */
-struct update_strings_lengths_fn {
-  uint32_t const* d_chars_up_to_idx;
-  int64_t* d_offsets;
-
-  __device__ void operator()(cudf::size_type idx)
-  {
-    auto const offset = d_offsets[idx];
-    d_offsets[idx]    = offset > 0 ? d_chars_up_to_idx[offset - 1] : 0;
-  }
-};
-
-/**
- * @brief Retrieve the code point metadata table.
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-rmm::device_uvector<codepoint_metadata_type> get_codepoint_metadata(rmm::cuda_stream_view stream);
-
-/**
- * @brief Retrieve the auxiliary code point metadata table.
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-rmm::device_uvector<aux_codepoint_data_type> get_aux_codepoint_data(rmm::cuda_stream_view stream);
-
-}  // namespace detail
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/detail/wordpiece_tokenizer.hpp b/cpp/src/text/subword/detail/wordpiece_tokenizer.hpp
deleted file mode 100644
index 244fe5092e7..00000000000
--- a/cpp/src/text/subword/detail/wordpiece_tokenizer.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "text/subword/detail/data_normalizer.hpp"
-
-#include <cudf/strings/strings_column_view.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-
-namespace nvtext {
-
-struct hashed_vocabulary;
-
-namespace detail {
-
-/**
- * @brief This splits words into tokens contained in the model vocabulary file.
- *
- * The tokenizer first normalizes the character bytes, identifies the words in
- * each string, and then converts each word in to a integer token-id per the
- * provided vocabulary hash table.
- *
- * The `tokenize()` function produces two device vectors `uvector_pair`.
- * The first is the token-ids for each word identified in the input strings.
- * The second is the offsets to identify which ids go with each string.
- *
- * Temporary buffers are created equal to 3 uint32 values plus 1 byte per input byte.
- * Also the normalize step allocates an additional 16x bytes per input byte but 8x
- * of this memory is reused by the `tokenize()` function.
- * This means 13x + 8x = 21x the number bytes of the input strings buffer must be
- * available to call the `tokenize()` function in this class.
- */
-class wordpiece_tokenizer {
- public:
-  /**
-   * @brief Creates a full tokenizer that cleans the text and splits it into tokens.
-   *
-   * @param vocab_table The preprocessed hashed vocabulary data.
-   * @param max_sequence_length Limit the number of token-ids per row in the output
-   * @param stride Each row in tensor-token-ids will replicate `max_sequence_length - stride`
-   *        token-ids from the previous row, unless it is the first string.
-   * @param do_truncate If true, the tokenizer will discard all the token-ids after
-   *        `max_sequence_length` for each input string. If false, it will use a
-   *        new row in the tensor-token-ids to continue generating the output.
-   * @param do_lower_case If true, the tokenizer will convert uppercase characters in the
-   *        input stream to lowercase and strip accents from those characters.
-   *        If false, accented and uppercase characters are not transformed.
-   * @param max_word_length The length of the longest word that will be tokenized. Words
-   *        longer than this will simply be replaced by the unknown token
-   *        specified in the `vocab_file`.
-   */
-  wordpiece_tokenizer(hashed_vocabulary const& vocab_table,
-                      uint32_t max_sequence_length,
-                      uint32_t stride,
-                      bool do_truncate,
-                      bool do_lower_case,
-                      uint32_t max_word_length = 200);
-
-  /**
-   * @brief Splits the input text into token ids.
-   *
-   * This class is simply a wrapper around the basic and word piece tokenizers.
-   *
-   * @param input Strings to tokenize
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   * @return Pointer to token-ids and token-id offsets
-   */
-  uvector_pair tokenize(cudf::strings_column_view const& input, rmm::cuda_stream_view stream);
-
- private:
-  /**
-   * @brief Splits the code points from the normalizer into tokens.
-   *
-   * @param[in,out] cps_and_offsets The output code points and offsets
-   *        from the normalizer.
-   *        The data is modified to contain the token ids and token counts
-   *        per string.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  void tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stream_view stream);
-
-  hashed_vocabulary const& vocab_table;
-  data_normalizer normalizer;  // removes punctuation, accents, etc
-  uint32_t const max_sequence_length;
-  uint32_t const stride;
-  bool const do_truncate;
-  uint32_t const max_word_length;
-};
-
-}  // namespace detail
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu
deleted file mode 100644
index 55f1a19381c..00000000000
--- a/cpp/src/text/subword/load_hash_file.cu
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2020-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "text/subword/detail/codepoint_metadata.ah"
-#include "text/subword/detail/tokenizer_utils.cuh"
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/cuda_memcpy.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/memory_resource.hpp>
-
-#include <nvtext/detail/load_hash_file.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/fill.h>
-
-#include <algorithm>
-#include <cstdint>
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-namespace nvtext {
-namespace detail {
-
-/**
- * @brief Retrieve the code point metadata table.
- *
- * Build the code point metadata table in device memory
- * using the vector pieces from codepoint_metadata.ah
- */
-rmm::device_uvector<codepoint_metadata_type> get_codepoint_metadata(rmm::cuda_stream_view stream)
-{
-  auto table_vector = rmm::device_uvector<codepoint_metadata_type>(codepoint_metadata_size, stream);
-  auto table        = table_vector.data();
-  thrust::fill(rmm::exec_policy(stream),
-               table + cp_section1_end,
-               table + codepoint_metadata_size,
-               codepoint_metadata_default_value);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(table,
-                                codepoint_metadata,
-                                cp_section1_end * sizeof(codepoint_metadata[0]),  // 1st section
-                                cudaMemcpyDefault,
-                                stream.value()));
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    table + cp_section2_begin,
-    cp_metadata_917505_917999,
-    (cp_section2_end - cp_section2_begin + 1) * sizeof(codepoint_metadata[0]),  // 2nd section
-    cudaMemcpyDefault,
-    stream.value()));
-  return table_vector;
-}
-
-/**
- * @brief Retrieve the aux code point data table.
- *
- * Build the aux code point data table in device memory
- * using the vector pieces from codepoint_metadata.ah
- */
-rmm::device_uvector<aux_codepoint_data_type> get_aux_codepoint_data(rmm::cuda_stream_view stream)
-{
-  auto table_vector = rmm::device_uvector<aux_codepoint_data_type>(aux_codepoint_data_size, stream);
-  auto table        = table_vector.data();
-  thrust::fill(rmm::exec_policy(stream),
-               table + aux_section1_end,
-               table + aux_codepoint_data_size,
-               aux_codepoint_default_value);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(table,
-                                aux_codepoint_data,
-                                aux_section1_end * sizeof(aux_codepoint_data[0]),  // 1st section
-                                cudaMemcpyDefault,
-                                stream.value()));
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    table + aux_section2_begin,
-    aux_cp_data_44032_55203,
-    (aux_section2_end - aux_section2_begin + 1) * sizeof(aux_codepoint_data[0]),  // 2nd section
-    cudaMemcpyDefault,
-    stream.value()));
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    table + aux_section3_begin,
-    aux_cp_data_70475_71099,
-    (aux_section3_end - aux_section3_begin + 1) * sizeof(aux_codepoint_data[0]),  // 3rd section
-    cudaMemcpyDefault,
-    stream.value()));
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    table + aux_section4_begin,
-    aux_cp_data_119134_119232,
-    (aux_section4_end - aux_section4_begin + 1) * sizeof(aux_codepoint_data[0]),  // 4th section
-    cudaMemcpyDefault,
-    stream.value()));
-  return table_vector;
-}
-
-namespace {
-/**
- * @brief Convert string to uint32.
- *
- * This just wraps the std::stoi but provides a nice error message
- * in case the hash file format is incorrect.
- */
-uint32_t str_to_uint32(std::string const& str, uint64_t line_no)
-{
-  try {
-    return std::stoi(str);  // there is no std::stoui
-  } catch (std::exception const& exc) {
-    std::string message("Line ");
-    message += std::to_string(line_no) + ": ";
-    message += "cannot convert integer from '";
-    message += str;
-    message += "': ";
-    message += exc.what();
-    std::cerr << message << std::endl;
-    throw;
-  }
-}
-
-/**
- * @brief Convert string to uint64.
- *
- * This just wraps the std::stoul but provides a nice error message
- * in case the hash file format is incorrect.
- */
-uint64_t str_to_uint64(std::string const& str, uint64_t line_no)
-{
-  try {
-    return std::stoul(str);
-  } catch (std::exception const& exc) {
-    std::string message("Line ");
-    message += std::to_string(line_no) + ": ";
-    message += "cannot convert integer from '";
-    message += str;
-    message += "': ";
-    message += exc.what();
-    std::cerr << message << std::endl;
-    throw;
-  }
-}
-}  // namespace
-
-/**
- * @brief Loads a text file representing the hashed vocabulary into hashed_vocabulary struct.
- *
- * @code{.pseudo}
- * Format of the file (ASCII text file with numbers):
- * First 3 lines have the following values:
- *  outer_hash_a
- *  outer_hash_b
- *  number-of-bins
- * The next number-of-bins lines has two values in each line separated by a space
- *  coefficient offset
- *  ...
- * Next line has the size (number of lines) of the table followed
- * by the table values -- one value per line.
- * The last three lines:
- *  unknown_token_id
- *  first_token_id
- *  separator_token_id
- * @endcode
- *
- * @param filename_hashed_vocabulary Path to text file containing hashed vocabulary
- * @return object containing hash table elements for the wordpiece tokenizer
- */
-std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
-  std::string const& filename_hashed_vocabulary,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  hashed_vocabulary result;
-  std::ifstream hash_file(filename_hashed_vocabulary);
-  CUDF_EXPECTS(hash_file.good(), "Could not open " + filename_hashed_vocabulary);
-
-  uint64_t line_no = 1;
-  std::string line;
-  std::getline(hash_file, line);
-  result.outer_hash_a = str_to_uint32(line, line_no++);
-
-  std::getline(hash_file, line);
-  result.outer_hash_b = str_to_uint32(line, line_no++);
-
-  std::getline(hash_file, line);
-  result.num_bins = str_to_uint32(line, line_no++);
-
-  auto bin_coefficients = cudf::detail::make_host_vector<uint64_t>(result.num_bins, stream);
-  auto bin_offsets      = cudf::detail::make_host_vector<uint16_t>(result.num_bins, stream);
-
-  for (int i = 0; i < result.num_bins; ++i) {
-    std::getline(hash_file, line);
-    size_t loc_of_space = line.find(' ');
-    CUDF_EXPECTS(loc_of_space != line.npos, "invalid hash file format");
-
-    std::string first_num  = line.substr(0, loc_of_space);
-    std::string second_num = line.substr(loc_of_space + 1, line.length());
-
-    bin_coefficients[i] = str_to_uint64(first_num, line_no);
-    bin_offsets[i]      = str_to_uint32(second_num, line_no);
-    ++line_no;
-  }
-
-  std::getline(hash_file, line);
-  uint64_t hash_table_length = str_to_uint64(line, line_no++);
-  auto table                 = cudf::detail::make_host_vector<uint64_t>(hash_table_length, stream);
-
-  std::generate(table.begin(), table.end(), [&hash_file, &line_no]() {
-    std::string line;
-    std::getline(hash_file, line);
-    return str_to_uint64(line, line_no++);
-  });
-
-  std::getline(hash_file, line);
-  result.unknown_token_id = str_to_uint32(line, line_no++);
-
-  std::getline(hash_file, line);
-  result.first_token_id = str_to_uint32(line, line_no++);
-
-  std::getline(hash_file, line);
-  result.separator_token_id = str_to_uint32(line, line_no++);
-
-  // Transfer hash table to columns
-  result.table = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT64},
-                                           table.size(),
-                                           cudf::mask_state::UNALLOCATED,
-                                           stream,
-                                           mr);
-  cudf::detail::cuda_memcpy_async<uint64_t>(
-    cudf::device_span<uint64_t>(result.table->mutable_view().data<uint64_t>(), table.size()),
-    table,
-    stream);
-
-  result.bin_coefficients = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT64},
-                                                      bin_coefficients.size(),
-                                                      cudf::mask_state::UNALLOCATED,
-                                                      stream,
-                                                      mr);
-  cudf::detail::cuda_memcpy_async<uint64_t>(
-    cudf::device_span<uint64_t>(result.bin_coefficients->mutable_view().data<uint64_t>(),
-                                bin_coefficients.size()),
-    bin_coefficients,
-    stream);
-
-  result.bin_offsets = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT16},
-                                                 bin_offsets.size(),
-                                                 cudf::mask_state::UNALLOCATED,
-                                                 stream,
-                                                 mr);
-  cudf::detail::cuda_memcpy_async<uint16_t>(
-    cudf::device_span<uint16_t>(result.bin_offsets->mutable_view().data<uint16_t>(),
-                                bin_offsets.size()),
-    bin_offsets,
-    stream);
-
-  auto cp_metadata            = detail::get_codepoint_metadata(stream);
-  auto const cp_metadata_size = static_cast<cudf::size_type>(cp_metadata.size());
-  result.cp_metadata = std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::UINT32},
-                                                      cp_metadata_size,
-                                                      cp_metadata.release(),
-                                                      rmm::device_buffer{},
-                                                      0);
-
-  auto aux_cp_table            = detail::get_aux_codepoint_data(stream);
-  auto const aux_cp_table_size = static_cast<cudf::size_type>(aux_cp_table.size());
-  result.aux_cp_table = std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::UINT64},
-                                                       aux_cp_table_size,
-                                                       aux_cp_table.release(),
-                                                       rmm::device_buffer{},
-                                                       0);
-
-  return std::make_unique<hashed_vocabulary>(std::move(result));
-}
-
-}  // namespace detail
-
-std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
-  std::string const& filename_hashed_vocabulary,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::load_vocabulary_file(filename_hashed_vocabulary, stream, mr);
-}
-
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/subword_tokenize.cu b/cpp/src/text/subword/subword_tokenize.cu
deleted file mode 100644
index 58653be7dd7..00000000000
--- a/cpp/src/text/subword/subword_tokenize.cu
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright (c) 2020-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "text/subword/detail/wordpiece_tokenizer.hpp"
-
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/get_value.cuh>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/sequence.hpp>
-#include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/grid_1d.cuh>
-#include <cudf/scalar/scalar.hpp>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/memory_resource.hpp>
-
-#include <nvtext/detail/load_hash_file.hpp>
-#include <nvtext/subword_tokenize.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/for_each.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/tabulate.h>
-#include <thrust/transform_scan.h>
-
-namespace nvtext {
-namespace detail {
-namespace {
-
-/**
- * @brief Convert tokens and row2tensor map to final tensor data.
- *
- * @param[in] token_ids Tokens from tokenizer
- * @param[in] offsets Offsets to each string's output row of tokens
- * @param[in] row2tensor String to tensor token counts
- * @param[in] row2row_within_tensor Token counts within sub-rows of the output
- * @param[in] max_sequence_length Maximum number of tokens in a row
- * @param[in] nrows_tensor_token_ids Total number of output tensor rows
- * @param[in] stride Number of tokens in sub-rows
- * @param[in] do_truncate True if tokens should not spill into sub-rows in the output
- * @param[out] final_tensor Output vector of token-ids
- * @param[out] attn_mask Identifies valid token id entries
- * @param[out] metadata Additional data per row
- */
-CUDF_KERNEL void kernel_compute_tensor_metadata(
-  // input
-  uint32_t const* token_ids,
-  int64_t const* offsets,
-  uint32_t const* row2tensor,
-  uint32_t const* row2row_within_tensor,
-  uint32_t max_sequence_length,
-  uint32_t nrows_tensor_token_ids,
-  uint32_t stride,
-  bool do_truncate,
-  // output
-  uint32_t* final_tensor,
-  uint32_t* attn_mask,
-  uint32_t* metadata)
-{
-  auto const output_idx = cudf::detail::grid_1d::global_thread_id();
-
-  uint32_t const absolute_row_id = output_idx / max_sequence_length;
-  if (absolute_row_id >= nrows_tensor_token_ids) { return; }
-  uint32_t const tensor_id               = row2tensor[absolute_row_id];
-  uint32_t const row_within_tensor       = row2row_within_tensor[absolute_row_id];
-  uint32_t const offset_token_ids_tensor = offsets[tensor_id];
-  uint32_t const n_tokens_tensor         = offsets[tensor_id + 1] - offset_token_ids_tensor;
-  // check for last row within tensor
-  bool const last_row_of_tensor = (absolute_row_id == nrows_tensor_token_ids - 1) ||
-                                  (row2tensor[absolute_row_id + 1] != tensor_id);
-  // compute input offset to retrieve token ids
-  uint32_t const token_idx = output_idx % max_sequence_length;
-  uint32_t const row_offset_token_ids =
-    offset_token_ids_tensor + token_idx +
-    (row_within_tensor ? (max_sequence_length + (stride * (row_within_tensor - 1))) : 0);
-
-  if (row_within_tensor == 0) {
-    if (token_idx < n_tokens_tensor) {
-      // copy token ids
-      final_tensor[output_idx] = token_ids[row_offset_token_ids];
-      attn_mask[output_idx]    = 1;
-    } else {
-      // pad with 0
-      final_tensor[output_idx] = 0;
-      attn_mask[output_idx]    = 0;
-    }
-  } else {
-    uint32_t const n_replicates = max_sequence_length - stride;
-    if ((row_offset_token_ids - n_replicates) < (offset_token_ids_tensor + n_tokens_tensor)) {
-      // replicate elements from previous row or copy new tokens
-      final_tensor[output_idx] = token_ids[row_offset_token_ids - n_replicates];
-      attn_mask[output_idx]    = 1;
-    } else {
-      // pad with 0
-      final_tensor[output_idx] = 0;
-      attn_mask[output_idx]    = 0;
-    }
-  }
-
-  // write metadata
-  if (token_idx == 0) {
-    auto const metadata_idx    = absolute_row_id * 3;  // three metadata values per output row
-    metadata[metadata_idx]     = tensor_id;
-    metadata[metadata_idx + 1] = (row_within_tensor == 0) ? 0 : (max_sequence_length - stride) / 2;
-    metadata[metadata_idx + 2] = [&] {
-      if (!last_row_of_tensor) return max_sequence_length - (max_sequence_length - stride) / 2 - 1;
-      if (n_tokens_tensor <= max_sequence_length)  // we fit, all good
-        return (n_tokens_tensor > 0) ? (n_tokens_tensor - 1) : 0;
-      if (do_truncate) return (max_sequence_length - 1);
-
-      auto const final_row_value =
-        (max_sequence_length - stride) + (n_tokens_tensor - max_sequence_length) % stride;
-      return (final_row_value > 0) ? (final_row_value - 1) : 0;
-    }();
-  }
-}
-
-// this happens if there are no tokens in the input
-tokenizer_result build_empty_result(cudf::size_type size,
-                                    uint32_t max_sequence_length,
-                                    rmm::cuda_stream_view stream,
-                                    rmm::device_async_resource_ref mr)
-{
-  auto zero = cudf::numeric_scalar<uint32_t>(0, true, stream);
-  auto ids  = cudf::detail::sequence(size * max_sequence_length, zero, zero, stream, mr);
-  auto mask = cudf::detail::sequence(size * max_sequence_length, zero, zero, stream, mr);
-
-  auto metadata = cudf::make_numeric_column(
-    cudf::data_type{cudf::type_id::UINT32}, size * 3, cudf::mask_state::UNALLOCATED, stream, mr);
-  thrust::tabulate(rmm::exec_policy(stream),
-                   metadata->mutable_view().begin<uint32_t>(),
-                   metadata->mutable_view().end<uint32_t>(),
-                   [] __device__(auto idx) { return ((idx % 3) == 0) ? idx : 0; });
-  metadata->set_null_count(0);
-
-  return tokenizer_result{
-    0, max_sequence_length, std::move(ids), std::move(mask), std::move(metadata)};
-}
-
-}  // namespace
-
-tokenizer_result subword_tokenize(cudf::strings_column_view const& strings,
-                                  hashed_vocabulary const& vocab_table,
-                                  uint32_t max_sequence_length,
-                                  uint32_t stride,
-                                  bool do_lower_case,
-                                  bool do_truncate,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr)
-{
-  CUDF_EXPECTS(stride <= max_sequence_length,
-               "stride must be less than or equal to max_sequence_length");
-  auto const strings_count = strings.size();
-  if (strings_count == strings.null_count()) {  // empty or all-null returns empty
-    return tokenizer_result{0,
-                            max_sequence_length,
-                            cudf::make_empty_column(cudf::data_type{cudf::type_id::UINT32}),
-                            cudf::make_empty_column(cudf::data_type{cudf::type_id::UINT32}),
-                            cudf::make_empty_column(cudf::data_type{cudf::type_id::UINT32})};
-  }
-  CUDF_EXPECTS(
-    max_sequence_length <=
-      (static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()) / strings_count),
-    "max_sequence_length times number of input rows exceeds the column size limit",
-    std::overflow_error);
-
-  // Create tokenizer
-  wordpiece_tokenizer tokenizer(
-    vocab_table, max_sequence_length, stride, do_truncate, do_lower_case);
-  // Run tokenizer
-  auto const tokens = tokenizer.tokenize(strings, stream);
-  // assign output components
-  auto device_token_ids = tokens.first->data();
-  auto device_offsets   = tokens.second->data();
-
-  // Format output from tokenizer
-  // Each string can create 1 or more tensor entries.
-  // Compute the string-per-tensor offsets values by scanning
-  // over the number of tokens for each string.
-  rmm::device_uvector<uint32_t> offsets_per_tensor(strings_count + 1, stream);
-  auto d_offsets_per_tensor = offsets_per_tensor.data();
-
-  thrust::transform_exclusive_scan(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<cudf::size_type>(0),
-    thrust::make_counting_iterator<cudf::size_type>(strings_count + 1),
-    offsets_per_tensor.begin(),
-    [device_offsets, do_truncate, max_sequence_length, stride, strings_count] __device__(
-      cudf::size_type idx) {
-      uint32_t const num_tokens =
-        idx < strings_count ? device_offsets[idx + 1] - device_offsets[idx] : 0;
-      if (do_truncate || num_tokens <= max_sequence_length) return uint32_t{1};
-      return 1 + ((num_tokens - max_sequence_length + stride - 1) / stride);
-    },
-    uint32_t{0},
-    cuda::std::plus<uint32_t>());
-  // last element is the total number of output rows
-  uint32_t const nrows_tensor_token_ids = offsets_per_tensor.element(strings_count, stream);
-  // if there are no tokens at all, build a specific empty result
-  if (nrows_tensor_token_ids == 0) {
-    return build_empty_result(strings_count, max_sequence_length, stream, mr);
-  }
-
-  // compute global_row to tensor, and global_row to within_tensor_row correspondence
-  rmm::device_uvector<uint32_t> row2tensor(nrows_tensor_token_ids, stream);
-  auto d_row2tensor = row2tensor.data();
-  rmm::device_uvector<uint32_t> row2row_within_tensor(nrows_tensor_token_ids, stream);
-  auto d_row2row_within_tensor = row2row_within_tensor.data();
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<uint32_t>(0),
-    strings_count,
-    [d_offsets_per_tensor, d_row2tensor, d_row2row_within_tensor] __device__(auto idx) {
-      uint32_t offset = d_offsets_per_tensor[idx];
-      uint32_t nrows  = d_offsets_per_tensor[idx + 1] - offset;
-      for (uint32_t jdx = 0; jdx < nrows; ++jdx) {
-        d_row2tensor[jdx + offset]            = idx;
-        d_row2row_within_tensor[jdx + offset] = jdx;
-      }
-    });
-
-  // create output data columns
-  auto tensor_token_ids = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT32},
-                                                    nrows_tensor_token_ids * max_sequence_length,
-                                                    cudf::mask_state::UNALLOCATED,
-                                                    stream,
-                                                    mr);
-  auto tensor_attention_mask =
-    cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT32},
-                              nrows_tensor_token_ids * max_sequence_length,
-                              cudf::mask_state::UNALLOCATED,
-                              stream,
-                              mr);
-  auto tensor_metadata = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT32},
-                                                   nrows_tensor_token_ids * 3,
-                                                   cudf::mask_state::UNALLOCATED,
-                                                   stream,
-                                                   mr);
-
-  // compute final-tensor, mask, and metadata
-  constexpr int block_size = 256;
-  cudf::detail::grid_1d const grid{
-    static_cast<cudf::size_type>(nrows_tensor_token_ids * max_sequence_length), block_size};
-  kernel_compute_tensor_metadata<<<grid.num_blocks,
-                                   grid.num_threads_per_block,
-                                   0,
-                                   stream.value()>>>(
-    device_token_ids,
-    device_offsets,
-    d_row2tensor,
-    d_row2row_within_tensor,
-    max_sequence_length,
-    nrows_tensor_token_ids,
-    stride,
-    do_truncate,
-    tensor_token_ids->mutable_view().data<uint32_t>(),
-    tensor_attention_mask->mutable_view().data<uint32_t>(),
-    tensor_metadata->mutable_view().data<uint32_t>());
-
-  return tokenizer_result{nrows_tensor_token_ids,
-                          max_sequence_length,
-                          std::move(tensor_token_ids),
-                          std::move(tensor_attention_mask),
-                          std::move(tensor_metadata)};
-}
-
-}  // namespace detail
-
-tokenizer_result subword_tokenize(cudf::strings_column_view const& strings,
-                                  hashed_vocabulary const& vocabulary_table,
-                                  uint32_t max_sequence_length,
-                                  uint32_t stride,
-                                  bool do_lower_case,
-                                  bool do_truncate,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::subword_tokenize(
-    strings, vocabulary_table, max_sequence_length, stride, do_lower_case, do_truncate, stream, mr);
-}
-
-}  // namespace nvtext
diff --git a/cpp/src/text/subword/wordpiece_tokenizer.cu b/cpp/src/text/subword/wordpiece_tokenizer.cu
deleted file mode 100644
index a2de52b5659..00000000000
--- a/cpp/src/text/subword/wordpiece_tokenizer.cu
+++ /dev/null
@@ -1,562 +0,0 @@
-/*
- * Copyright (c) 2020-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "text/subword/detail/hash_utils.cuh"
-#include "text/subword/detail/tokenizer_utils.cuh"
-#include "text/subword/detail/wordpiece_tokenizer.hpp"
-
-#include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/grid_1d.cuh>
-#include <cudf/strings/string_view.cuh>
-#include <cudf/utilities/error.hpp>
-
-#include <nvtext/subword_tokenize.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <cuda/std/functional>
-#include <cuda/std/iterator>
-#include <cuda/std/limits>
-#include <thrust/copy.h>
-#include <thrust/execution_policy.h>
-#include <thrust/fill.h>
-#include <thrust/find.h>
-#include <thrust/for_each.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/remove.h>
-#include <thrust/transform.h>
-#include <thrust/transform_scan.h>
-
-namespace nvtext {
-namespace detail {
-namespace {
-/**
- * @brief Initializes the token-ids, word-indices, and token counts vectors.
- *
- * Each thread process a single code point from `code_points`.
- * This also locates the start and end of each word within the `code_points` buffer.
- * A word start is identified as a non-space character that appears right after a space.
- * A word end is identified as a space character that appears right after a non-space one.
- * If the code point at this thread does not represent a word start or word end,
- * a max uint32_t value is written to the appropriate vector instead.
- * A post processing step is required to filter the relevant values in these
- * vectors.
- *
- * It is guaranteed that the same number of valid values will be written to both the
- * start and end indices and that after the select step, the two arrays will be aligned.
- * That is, `start_word_indices[word]` and `end_word_indices[word]` are the start and
- * end for the same word.
- *
- * Memory required is 13 bytes per code point values:
- * - 4 bytes each for `start_word_indices` and `end_word_indices`
- * - 4 bytes for each `token_ids`
- * - 1 byte for each `tokens_per_word`
- * Also, there is a code point value for each byte in the input strings.
- *
- * @param[in] code_points A pointer to the code points in the strings after normalization.
- * @param[out] start_word_indices An array of size `num_code_points` which will contain the
- *        starting index for each word.
- * @param[out] end_word_indices An array of size `num_code_points` which will contain the
- *        ending index for each word.
- * @param num_code_points The total number of code_points.
- * @param[out] token_ids An array of size `num_code_points` which will hold the token ids.
- *        This kernel just sets all the values to max uint32_t.
- * @param[out] tokens_per_word An array of size `num_code_points` which hold the number of
- *        tokens. This kernel just sets all the values to 0.
- */
-CUDF_KERNEL void init_data_and_mark_word_start_and_ends(uint32_t const* code_points,
-                                                        uint32_t* start_word_indices,
-                                                        uint32_t* end_word_indices,
-                                                        size_t num_code_points,
-                                                        uint32_t* token_ids,
-                                                        uint8_t* tokens_per_word)
-{
-  auto const char_for_thread = cudf::detail::grid_1d::global_thread_id();
-
-  // Deal with the start_word_indices array
-  if (char_for_thread < num_code_points) {
-    uint32_t val_to_write = cuda::std::numeric_limits<uint32_t>::max();
-    if ((code_points[char_for_thread] != SPACE_CODE_POINT) && (char_for_thread > 0) &&
-        (code_points[char_for_thread - 1] == SPACE_CODE_POINT)) {
-      val_to_write = char_for_thread;
-    }
-    start_word_indices[char_for_thread] = val_to_write;
-
-    // Deal with the end_word_indices_array
-    val_to_write = cuda::std::numeric_limits<uint32_t>::max();
-    if ((code_points[char_for_thread] != SPACE_CODE_POINT) &&
-        (char_for_thread + 1 < num_code_points) &&
-        (code_points[char_for_thread + 1] == SPACE_CODE_POINT)) {
-      val_to_write = char_for_thread + 1;
-    }
-    end_word_indices[char_for_thread] = val_to_write;
-
-    token_ids[char_for_thread]       = cuda::std::numeric_limits<uint32_t>::max();
-    tokens_per_word[char_for_thread] = 0;
-  }
-}
-
-/**
- * @brief Resolves the string boundaries for the start and end words.
- *
- * This kernel should be called after `init_data_and_mark_word_start_and_ends` with at
- * least `num_strings` total threads.
- *
- * The start and end indices are updated to honor the string boundaries
- * within the strings array. This corrects any word ranges that span across
- * individual strings.
- *
- * @param code_points A pointer to the code points in the strings.
- * @param strings_offsets An array containing the index of the starting character of each string
- *        with an extra space at the end containing the total number of characters. As a result,
- *        this array is of length num_strings + 1.
- * @param start_word_indices An array which will contain the starting index for each word scattered
- *        throughout. If an index does not represent a word start, the max-uint32_t value is written
- *        to indicate this.
- * @param end_word_indices An array which will contain the one past the end index for each word
- *        scattered throughout. If an index does not represent a word end, the max uint32_t value is
- *        written to indicate this.
- * @param num_strings The total number of strings to be processed.
- */
-CUDF_KERNEL void mark_string_start_and_ends(uint32_t const* code_points,
-                                            int64_t const* strings_offsets,
-                                            uint32_t* start_word_indices,
-                                            uint32_t* end_word_indices,
-                                            uint32_t num_strings)
-{
-  auto const idx = cudf::detail::grid_1d::global_thread_id();
-  // Ensure the starting character of each strings is written to the word start array.
-  if (idx <= num_strings) {
-    auto const offset = strings_offsets[idx];
-
-    if ((idx < num_strings) && (code_points[offset] != SPACE_CODE_POINT)) {
-      start_word_indices[offset] = offset;
-    }
-
-    if ((offset > 0) && (code_points[offset - 1] != SPACE_CODE_POINT)) {
-      end_word_indices[offset - 1] = offset;
-    }
-  }
-}
-
-/**
- * @brief Currently supported special tokens.
- *
- * Code logic expects these to be 3 upper-case characters along
- * with a single trailing space.
- */
-__constant__ char special_tokens[35]{"BOS EOS UNK SEP PAD CLS MASK "};
-constexpr cudf::size_type MIN_ST_WIDTH = 4;  // Min token size in special_tokens
-constexpr cudf::size_type MAX_ST_WIDTH = 5;  // Max token size in special_tokens
-
-struct mark_special_tokens {
-  /**
-   * @brief Check given code-point array to the list of known
-   * special tokens.
-   */
-  __device__ bool is_special_token(uint32_t const* token, cudf::size_type size) const
-  {
-    if (size < MIN_ST_WIDTH || size > MAX_ST_WIDTH) return false;
-    char str_token[MAX_ST_WIDTH];
-    // convert code-points to chars
-    thrust::transform(thrust::seq, token, token + size, str_token, [](uint32_t cp) {
-      // also upper-case them to match again special_tokens array
-      return static_cast<char>(cp >= 'a' ? cp - 'a' + 'A' : cp);
-    });
-    // search the special tokens array for the str_token
-    cudf::string_view tokens(special_tokens, sizeof(special_tokens));
-    return tokens.find(str_token, size) != cudf::string_view::npos;
-  }
-
-  /**
-   * @brief Check code-points for special tokens and adjust indices.
-   *
-   * Tokens will appear in the `code_points` array as:
-   * `_[_ttt_]_` where `_` are single space characters and
-   *                   ttt is the variable-length token name
-   *
-   * The logic below uses the following variables to represent position
-   * values in the `code_points` array after locating a special token:
-   * ```
-   * _ [ _ t t t _  ] _
-   *   ^   ^     ^  ^
-   *   si  sp    ep ei
-   * ```
-   * where `si` is `start_index`
-   *       `sp` is `start_pos`
-   *       `ep` is `end_pos`
-   *       `ei` is `end_index`
-   *
-   * When a special token is found, the `code_points` are adjusted
-   * to remove the spaces and capitalize the name.
-   * ```
-   * _ [ _ t t t _ ] _  is updated to
-   * _ [ T T T ] _ ] _
-   * ```
-   * This is required for the downstream word-piece tokenizer to
-   * match it to the vocabulary hash table.
-   *
-   * The `start_word_indices` and `end_word_indices` are updated to
-   * identify the token and to ignore the extra trailing `]` character.
-   */
-  __device__ void operator()(size_t idx) const
-  {
-    uint32_t const start_index = start_word_indices[idx];
-    if ((start_index == cuda::std::numeric_limits<uint32_t>::max()) ||
-        ((start_index + MIN_ST_WIDTH + 2) > num_code_points))
-      return;
-    if (code_points[start_index] != '[') return;
-
-    // check for matching end bracket
-    uint32_t const start_pos = start_index + 2;  // after the space delimiter
-    // search for next start-word and then check it is a ']'
-    uint32_t const end_index = [&] {
-      auto const begin = start_word_indices + start_pos;
-      auto const width =
-        cuda::std::min(static_cast<size_t>(MAX_ST_WIDTH + 1), (num_code_points - start_pos));
-      auto const end = begin + width;
-      // checking the next start-word is more reliable than arbitrarily searching for ']'
-      // in case the text is split across string rows
-      auto const iter = thrust::find_if(thrust::seq, begin + 1, end, [](auto swi) {
-        return swi != cuda::std::numeric_limits<uint32_t>::max();
-      });
-      return iter == end ? start_index : static_cast<uint32_t>(iter - start_word_indices);
-    }();
-    if (code_points[end_index] != ']') return;
-
-    // check for special token
-    auto const size = static_cast<cudf::size_type>(end_index - start_pos);
-    if (!is_special_token(code_points + start_pos, size)) return;
-
-    // special token found
-    // adjust code-points
-    auto const end_pos = end_index - 2;
-    // change _[_ttt_]_ to _[TTT]_
-    for (auto left_idx = start_pos - 1; left_idx <= end_pos; ++left_idx) {
-      auto const cp         = code_points[left_idx + 1];
-      code_points[left_idx] = cp >= 'a' ? cp - 'a' + 'A' : cp;
-    }
-    code_points[end_pos] = ']';
-
-    // erase the intermediate indices
-    thrust::fill(thrust::seq,
-                 start_word_indices + start_index + 1,  // keep the first one
-                 start_word_indices + end_index + 1,
-                 cuda::std::numeric_limits<uint32_t>::max());
-    thrust::fill(thrust::seq,
-                 end_word_indices + start_index,
-                 end_word_indices + end_index + 1,
-                 cuda::std::numeric_limits<uint32_t>::max());
-
-    // reset the new end-word index
-    end_word_indices[end_pos] = end_pos + 1;
-  }
-
-  uint32_t* const code_points;
-  uint32_t* const start_word_indices;
-  uint32_t* const end_word_indices;
-  size_t const num_code_points;
-};
-
-/**
- * @brief Converts words into token ids.
- *
- * Each thread is assigned a word to convert based on the `hash_table`. Each thread converts
- * its word and writes the number of tokens it found in the `tokens_per_word` array.
- *
- * The `tokens_per_word` array is kept to the length `num_code_points + 1`. This means each thread
- * can write its number of tokens to the `tokens_per_word` corresponding to the starting
- * character of each word. Since strings must start at some word, we can prefix sum this array
- * and use the strings_lengths code point offsets to directly index the number of tokens in each
- * string.
- *
- * The `token_ids` array should be initialized to the max uint32_t before calling this kernel.
- *
- * @param code_points An array containing all of the code points to be processed
- * @param hash_table An array containing the flattened hash table with key, value pairs
- *        packed in 64-bits
- * @param bin_coefficients A pointer to the GPU pointer containing the hashing parameters for
- *        each hash bin on the GPU.
- * @param bin_offsets: A pointer to the GPU pointer containing the start index of each bin in
- *        the flattened hash table.
- * @param token_ids The index for each token found during tokenization. This is of length
- *        num_code_points. In most cases, multiple characters will collapse to one token. In these
- *        cases, the max uint32_t will be in place. Cub will be used later to filter out these
- *        invalid ids later.
- * @param word_starts An array of length `num_code_points`. The first total word elements contains
- *        the index of the first character for each word.
- * @param word_ends An array of length num_code_points. The first total_words elements contains the
- *        past the end index for each word. This array is kept aligned with the initial
- *        token_ids array containing the word start code points.
- *        `word_ends[word] - filtered_start_indices[word] = word_length`
- * @param tokens_per_word An array of size num_code_points that will contain the number of tokens in
- *        each word in a string. This array can be exclusive summed and the result used in
- *        conjunction with the strings lengths array to find the tokens in each string. This is
- *        possible since the number of tokens in each word will be placed at the index corresponding
- *        to the start character of a word. If we assume prefix_summed is the prefix sum of the
- *        tokens_per_word array, then `prefix_summed[strings_lengths[string_idx] - 1]` is the number
- *        of tokens found before the start of string.
- * @param unk_token_id The token id to be place for unknown tokens
- * @param max_word_length The maximum length of a word. Any word longer than this length is
- *        replaced by the unknown token.
- * @param total_words The total number of white space separated words
- * @param outer_hash_a_param The a parameter for the outer hash
- * @param outer_hash_b_param: The b parameter for the outer hash
- * @param num_outer_bins: The number of bins for the outer hash
- */
-CUDF_KERNEL void kernel_wordpiece_tokenizer(uint32_t const* code_points,
-                                            uint64_t const* hash_table,
-                                            uint64_t const* bin_coefficients,
-                                            uint16_t const* bin_offsets,
-                                            uint16_t unk_token_id,
-                                            uint32_t outer_hash_a_param,
-                                            uint32_t outer_hash_b_param,
-                                            uint16_t num_outer_bins,
-                                            uint32_t const* word_starts,
-                                            uint32_t const* word_ends,
-                                            uint32_t max_word_length,
-                                            uint32_t total_words,
-                                            uint32_t* token_ids,
-                                            uint8_t* tokens_per_word)
-{
-  auto const word_to_tokenize = cudf::detail::grid_1d::global_thread_id();
-
-  if (word_to_tokenize >= total_words) { return; }
-  // Each thread gets the start code_point offset for each word and resets the token_id memory to
-  // the default value. In a post processing step, all of these values will be removed.
-  auto const token_start = word_starts[word_to_tokenize];
-  auto const token_end   = word_ends[word_to_tokenize];
-  auto const word_length = token_end - token_start;
-
-  // The sdbm hash of "##"
-  constexpr uint32_t hashtag_hash = 2296000;
-  uint16_t num_values_tokenized   = 0;
-  // initialize start, end
-  uint32_t start = token_start;
-  uint32_t end   = token_end;
-
-  if (word_length > max_word_length) {
-    start                        = token_end;
-    num_values_tokenized         = 1;
-    token_ids[token_start]       = unk_token_id;
-    tokens_per_word[token_start] = num_values_tokenized;
-  }
-
-  while (start < token_end) {
-    end = token_end;
-    // init token_id to no token
-    int token_id = -1;
-    // compute current length
-    uint32_t const length = token_end - start;
-    uint64_t substr_hash =
-      sdbm_hash(code_points + start, length, start == token_start ? 0 : hashtag_hash);
-    while (start < end) {
-      token_id = retrieve(substr_hash,
-                          outer_hash_a_param,
-                          outer_hash_b_param,
-                          num_outer_bins,
-                          hash_table,
-                          bin_coefficients,
-                          bin_offsets);
-      if (token_id != -1) { break; }
-      --end;
-      // Pop off the last value from the substr hash
-      substr_hash = prev_sdbm_hash(substr_hash, code_points[end]);
-    }
-
-    if (token_id == -1) {
-      end      = token_end;
-      token_id = unk_token_id;
-      // We need to clean up the global array. This case is very uncommon.
-      //  Only 0.016% of words cannot be resolved to a token from the squad dev set.
-      for (uint32_t i = 1; i < num_values_tokenized; ++i) {
-        token_ids[token_start + i] = cuda::std::numeric_limits<uint32_t>::max();
-      }
-      num_values_tokenized = 0;
-    }
-
-    token_ids[token_start + num_values_tokenized] = token_id;
-    ++num_values_tokenized;
-    start = end;
-  }
-
-  tokens_per_word[token_start] = num_values_tokenized;
-}
-
-}  // namespace
-
-wordpiece_tokenizer::wordpiece_tokenizer(hashed_vocabulary const& vocab_table,
-                                         uint32_t max_sequence_length,
-                                         uint32_t stride,
-                                         bool do_truncate,
-                                         bool do_lower_case,
-                                         uint32_t max_word_length)
-  : vocab_table(vocab_table),
-    normalizer(vocab_table.cp_metadata->view().data<codepoint_metadata_type>(),
-               vocab_table.aux_cp_table->view().data<aux_codepoint_data_type>(),
-               do_lower_case),
-    max_sequence_length{max_sequence_length},
-    stride(stride),
-    do_truncate(do_truncate),
-    max_word_length{max_word_length}
-{
-}
-
-uvector_pair wordpiece_tokenizer::tokenize(cudf::strings_column_view const& input,
-                                           rmm::cuda_stream_view stream)
-{
-  auto cps_and_offsets = normalizer.normalize(input, stream);
-  tokenize(cps_and_offsets, stream);
-  return uvector_pair(std::move(cps_and_offsets.first), std::move(cps_and_offsets.second));
-}
-
-struct copy_if_fn {  // inline lambda not allowed in private or protected member function
-  __device__ bool operator()(uint32_t cp)
-  {
-    return cp != cuda::std::numeric_limits<uint32_t>::max();
-  }
-};
-
-struct tranform_fn {  // just converting uint8 value to uint32
-  __device__ uint32_t operator()(uint8_t count) { return count; }
-};
-
-void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stream_view stream)
-{
-  auto device_code_points     = cps_and_offsets.first->data();
-  auto const num_code_points  = cps_and_offsets.first->size();
-  auto device_strings_offsets = cps_and_offsets.second->data();
-  auto const num_strings      = cps_and_offsets.second->size() - 1;
-
-  size_t const four_byte_cp_chunks = 1 + (num_code_points - 1) / sizeof(uint32_t);
-  size_t const rounded_num_cps     = sizeof(uint32_t) * four_byte_cp_chunks;
-  rmm::device_uvector<uint8_t> device_tokens_per_word(rounded_num_cps, stream);
-  rmm::device_uvector<uint32_t> device_token_ids(num_code_points, stream);
-  rmm::device_uvector<uint32_t> device_word_indices(2 * num_code_points, stream);
-
-  // make device_start_word_indices and device_end_word_indices contiguous
-  uint32_t* device_start_word_indices = device_word_indices.data();
-  uint32_t* device_end_word_indices   = device_start_word_indices + num_code_points;
-
-  cudf::detail::grid_1d const grid_init{static_cast<cudf::size_type>(num_code_points),
-                                        THREADS_PER_BLOCK};
-  detail::init_data_and_mark_word_start_and_ends<<<grid_init.num_blocks,
-                                                   grid_init.num_threads_per_block,
-                                                   0,
-                                                   stream.value()>>>(device_code_points,
-                                                                     device_start_word_indices,
-                                                                     device_end_word_indices,
-                                                                     num_code_points,
-                                                                     device_token_ids.data(),
-                                                                     device_tokens_per_word.data());
-  CUDF_CHECK_CUDA(stream.value());
-
-  cudf::detail::grid_1d const grid_mark{static_cast<cudf::size_type>(num_strings + 1),
-                                        THREADS_PER_BLOCK};
-  detail::mark_string_start_and_ends<<<grid_mark.num_blocks,
-                                       grid_mark.num_threads_per_block,
-                                       0,
-                                       stream.value()>>>(device_code_points,
-                                                         device_strings_offsets,
-                                                         device_start_word_indices,
-                                                         device_end_word_indices,
-                                                         num_strings);
-  CUDF_CHECK_CUDA(stream.value());
-
-  // check for special tokens and adjust indices
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_t>(0),
-    num_code_points,
-    mark_special_tokens{
-      device_code_points, device_start_word_indices, device_end_word_indices, num_code_points});
-
-  // Now start_word_indices has the word starts scattered throughout the array. We need to select
-  // all values not equal to the max uint32_t and place them at the start of the array. We leverage
-  // the fact that the start_word_indices and the end_word indices are contiguous to only launch one
-  // device select kernel.
-  auto itr_end = thrust::remove(rmm::exec_policy(stream),
-                                device_word_indices.begin(),
-                                device_word_indices.end(),
-                                cuda::std::numeric_limits<uint32_t>::max());
-
-  // The number of tokens selected will be double the number of words since we
-  // select from both the start and end index arrays.
-  uint32_t const num_words = cuda::std::distance(device_word_indices.begin(), itr_end) / 2;
-
-  // We need to change the end_word_indices pointer after the selection is complete
-  device_end_word_indices = device_start_word_indices + num_words;
-
-  if (num_words > 0) {
-    cudf::detail::grid_1d const grid{static_cast<cudf::size_type>(num_words), THREADS_PER_BLOCK};
-    detail::kernel_wordpiece_tokenizer<<<grid.num_blocks,
-                                         grid.num_threads_per_block,
-                                         0,
-                                         stream.value()>>>(
-      device_code_points,
-      vocab_table.table->view().data<uint64_t>(),
-      vocab_table.bin_coefficients->view().data<uint64_t>(),
-      vocab_table.bin_offsets->view().data<uint16_t>(),
-      vocab_table.unknown_token_id,
-      vocab_table.outer_hash_a,
-      vocab_table.outer_hash_b,
-      vocab_table.num_bins,
-      device_start_word_indices,
-      device_end_word_indices,
-      max_word_length,
-      num_words,
-      device_token_ids.data(),
-      device_tokens_per_word.data());
-    CUDF_CHECK_CUDA(stream.value());
-  }
-
-  // Repurpose the input array for the token ids. In the worst case, each code point ends up being a
-  // token so this will always have enough memory to store the contiguous tokens.
-  uint32_t* contiguous_token_ids = device_code_points;
-  auto const copy_size           =  // thrust::copy_if limited to copying int-max values
-    cuda::std::min(device_token_ids.size(),
-                   static_cast<std::size_t>(cuda::std::numeric_limits<int>::max()));
-  auto ids_itr       = device_token_ids.begin();
-  auto const ids_end = device_token_ids.end();
-  while (ids_itr != ids_end) {
-    auto const copy_end  = (static_cast<std::size_t>(std::distance(ids_itr, ids_end)) <= copy_size)
-                             ? ids_end
-                             : ids_itr + copy_size;
-    contiguous_token_ids = thrust::copy_if(
-      rmm::exec_policy(stream), ids_itr, copy_end, contiguous_token_ids, copy_if_fn{});
-    ids_itr = copy_end;
-  }
-
-  // Repurpose start word indices since it is the same size and type as the required output.
-  uint32_t* token_id_counts = device_start_word_indices;
-  thrust::transform_inclusive_scan(rmm::exec_policy(stream),
-                                   device_tokens_per_word.data(),
-                                   device_tokens_per_word.data() + num_code_points,
-                                   token_id_counts,
-                                   tranform_fn{},
-                                   cuda::std::plus<uint32_t>());
-
-  // Update the device_strings_offsets using the token_id_counts
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<uint32_t>(1),
-                     num_strings,
-                     update_strings_lengths_fn{token_id_counts, device_strings_offsets});
-}
-
-}  // namespace detail
-}  // namespace nvtext
diff --git a/docs/cudf/source/pylibcudf/api_docs/nvtext/index.rst b/docs/cudf/source/pylibcudf/api_docs/nvtext/index.rst
index 9ba47fd8d70..00314bceeb9 100644
--- a/docs/cudf/source/pylibcudf/api_docs/nvtext/index.rst
+++ b/docs/cudf/source/pylibcudf/api_docs/nvtext/index.rst
@@ -13,5 +13,4 @@ nvtext
     normalize
     replace
     stemmer
-    subword_tokenize
     tokenize
diff --git a/docs/cudf/source/pylibcudf/api_docs/nvtext/subword_tokenize.rst b/docs/cudf/source/pylibcudf/api_docs/nvtext/subword_tokenize.rst
deleted file mode 100644
index 818714bec6a..00000000000
--- a/docs/cudf/source/pylibcudf/api_docs/nvtext/subword_tokenize.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-================
-subword_tokenize
-================
-
-.. automodule:: pylibcudf.nvtext.subword_tokenize
-   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/wordpiece_tokenizer.rst b/docs/cudf/source/user_guide/api_docs/wordpiece_tokenizer.rst
index 188f0294a86..dc78447808b 100644
--- a/docs/cudf/source/user_guide/api_docs/wordpiece_tokenizer.rst
+++ b/docs/cudf/source/user_guide/api_docs/wordpiece_tokenizer.rst
@@ -10,10 +10,3 @@ Constructor
 
    WordPieceVocabulary
    WordPieceVocabulary.tokenize
-
-.. currentmodule:: cudf.core.subword_tokenizer
-.. autosummary::
-   :toctree: api/
-
-   SubwordTokenizer
-   SubwordTokenizer.__call__
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index f906b83d5a7..1fa05377099 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -835,32 +835,6 @@ def is_letter(self, is_vowel: bool, index: int | NumericalColumn) -> Self:
             )
         )
 
-    @acquire_spill_lock()
-    def subword_tokenize(
-        self,
-        hashed_vocabulary: plc.nvtext.subword_tokenize.HashedVocabulary,
-        max_sequence_length: int = 64,
-        stride: int = 48,
-        do_lower: bool = True,
-        do_truncate: bool = False,
-    ) -> tuple[ColumnBase, ColumnBase, ColumnBase]:
-        """
-        Subword tokenizes text series by using the pre-loaded hashed vocabulary
-        """
-        result = plc.nvtext.subword_tokenize.subword_tokenize(
-            self.to_pylibcudf(mode="read"),
-            hashed_vocabulary,
-            max_sequence_length,
-            stride,
-            do_lower,
-            do_truncate,
-        )
-        # return the 3 tensor components
-        tokens = type(self).from_pylibcudf(result[0])
-        masks = type(self).from_pylibcudf(result[1])
-        metadata = type(self).from_pylibcudf(result[2])
-        return tokens, masks, metadata
-
     @acquire_spill_lock()
     def tokenize_scalar(self, delimiter: plc.Scalar) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
deleted file mode 100644
index 27976988efb..00000000000
--- a/python/cudf/cudf/core/subword_tokenizer.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-
-from __future__ import annotations
-
-import warnings
-
-import cupy as cp
-import numpy as np
-
-import pylibcudf as plc
-
-
-def _cast_to_appropriate_type(ar, cast_type):
-    if cast_type == "cp":
-        return ar
-
-    if cast_type == "pt":
-        from torch.utils.dlpack import from_dlpack
-
-    elif cast_type == "tf":
-        from tensorflow.experimental.dlpack import from_dlpack
-
-    return from_dlpack(ar.astype(np.dtype(np.int32)).__dlpack__())
-
-
-class SubwordTokenizer:
-    """
-    Run CUDA BERT subword tokenizer on cuDF strings column.
-    Encodes words to token ids using vocabulary from a pretrained
-    tokenizer.
-    This function requires about 21x the number of character bytes
-    in the input strings column as working memory.
-
-    Parameters
-    ----------
-    hash_file : str
-        Path to hash file containing vocabulary of words with token-ids.
-        This can be created from the raw vocabulary
-        using the ``cudf.utils.hash_vocab_utils.hash_vocab`` function
-
-    do_lower : bool, Default is True
-        If set to True, original text will be lowercased before encoding.
-
-    Returns
-    -------
-    SubwordTokenizer
-    """
-
-    def __init__(self, hash_file: str, do_lower_case: bool = True):
-        self.do_lower_case = do_lower_case
-        self.vocab_file = plc.nvtext.subword_tokenize.HashedVocabulary(
-            hash_file
-        )
-        warnings.warn(
-            "SubwordTokenizer is deprecated and will be removed in a future "
-            "version. Use WordPieceVocabulary instead.",
-            FutureWarning,
-        )
-
-    def __call__(
-        self,
-        text,
-        max_length: int,
-        max_num_rows: int,
-        add_special_tokens: bool = True,
-        padding: str = "max_length",
-        truncation: bool | str = False,
-        stride: int = 0,
-        return_tensors: str = "cp",
-        return_token_type_ids: bool = False,
-    ):
-        """
-        Run CUDA BERT subword tokenizer on cuDF strings column.
-        Encodes words to token ids using vocabulary from a
-        pretrained tokenizer.
-
-        Parameters
-        ----------
-        text : cudf string series
-            The batch of sequences to be encoded.
-
-        max_length : int
-            Controls the maximum length to use or pad to.
-
-        max_num_rows : int
-            Maximum number of rows for the output token-ids expected to
-            be generated by the tokenizer.
-            Used for allocating temporary working memory on the GPU device.
-            If the output generates a larger number of rows,
-            behavior is undefined.
-            This will vary based on stride, truncation, and max_length.
-            For example, for non-overlapping sequences output rows will be
-            the same as input rows.
-            A good default can be twice the max_length
-
-        add_special_tokens : bool, optional, defaults to True
-            Whether or not to encode the sequences with the special tokens
-            of the BERT classification model
-
-        padding : "max_length"
-            Pad to a maximum length specified with the argument max_length
-
-        truncation : bool, defaults to False
-            True:
-            Truncate to a maximum length specified with the argument max_length
-            False or 'do_not_truncate': default
-            No truncation (Output differs from HuggingFace)
-
-        stride : int, optional, defaults to 0
-            The value of this argument defines the number of
-            overlapping tokens.
-            The information about the overlapping tokens is
-            present in the metadata outputted.
-
-        return_tensors : str, {"cp", "pt", "tf"} defaults to "cp"
-            "cp" : Return cupy cp.ndarray objects
-            "tf" : Return TensorFlow tf.constant objects
-            "pt" : Return PyTorch torch.Tensor objects
-
-
-        return_token_type_ids : bool, optional
-            Only False currently supported
-
-        Returns
-        -------
-        An encoding with the following fields:
-            input_ids:(type defined by return_tensors)
-                A tensor of token ids to be fed to the model.
-            attention_mask: (type defined by return_tensors)
-                A tensor of indices specifying which tokens
-                should be attended to by the model
-            metadata: (type defined by return_tensors)
-                Each row contains the index id of the original string and the
-                first and last index of the token-ids that are non-padded and
-                non-overlapping
-
-        Examples
-        --------
-        >>> import cudf
-        >>> from cudf.utils.hash_vocab_utils import hash_vocab
-        >>> hash_vocab('bert-base-cased-vocab.txt', 'voc_hash.txt')
-
-
-        >>> from cudf.core.subword_tokenizer import SubwordTokenizer
-        >>> cudf_tokenizer = SubwordTokenizer('voc_hash.txt',
-        ...                                    do_lower_case=True)
-        >>> str_series = cudf.Series(['This is the', 'best book'])
-        >>> tokenizer_output = cudf_tokenizer(str_series,
-        ...                                   max_length=8,
-        ...                                   max_num_rows=len(str_series),
-        ...                                   padding='max_length',
-        ...                                   return_tensors='pt',
-        ...                                   truncation=True)
-        >>> tokenizer_output['input_ids']
-        tensor([[ 101, 1142, 1110, 1103,  102,    0,    0,    0],
-                [ 101, 1436, 1520,  102,    0,    0,    0,    0]],
-                device='cuda:0',
-               dtype=torch.int32)
-        >>> tokenizer_output['attention_mask']
-        tensor([[1, 1, 1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 1, 0, 0, 0, 0]],
-                device='cuda:0', dtype=torch.int32)
-        >>> tokenizer_output['metadata']
-        tensor([[0, 1, 3],
-                [1, 1, 2]], device='cuda:0', dtype=torch.int32)
-        """
-
-        if return_token_type_ids:
-            # raise not currently supported
-            # Can also return zeros
-            error_msg = "Returning token_type_ids is currently supported"
-            raise NotImplementedError(error_msg)
-
-        if truncation in (False, "do_not_truncate"):
-            if add_special_tokens:
-                error_msg = (
-                    "Adding special tokens is not supported "
-                    f"with truncation = {truncation}. "
-                )
-                recommendation = (
-                    "Custom Cupy kernel can potentially "
-                    "be used to add it. For reference "
-                    "see: _bert_add_special_tokens"
-                )
-                raise NotImplementedError(error_msg + recommendation)
-
-            truncation = False
-            warning_msg = (
-                "When truncation is not True, the behavior currently differs "
-                "from HuggingFace as cudf always returns overflowing tokens"
-            )
-            warnings.warn(warning_msg)
-
-        if padding != "max_length":
-            error_msg = (
-                "Only padding to the provided max_lengthis currently supported"
-            )
-            raise NotImplementedError(error_msg)
-
-        if max_length <= stride:
-            error_msg = "Stride should be less than max_length"
-            raise ValueError(error_msg)
-
-        if return_tensors not in {"cp", "pt", "tf"}:
-            error_msg = (
-                "Only cupy(cp), pytorch(pt) and tensorflow(tf) "
-                "tensors are supported"
-            )
-            raise NotImplementedError(error_msg)
-
-        stride = max_length - stride
-        # behavior varies from subword_tokenize but maps with huggingface
-
-        input_ids, attention_mask, metadata = text._column.subword_tokenize(
-            self.vocab_file,
-            max_sequence_length=max_length,
-            stride=stride,
-            do_lower=self.do_lower_case,
-            do_truncate=truncation,
-        )
-
-        tokenizer_output = {
-            "input_ids": cp.asarray(input_ids).reshape(-1, max_length),
-            "attention_mask": cp.asarray(attention_mask).reshape(
-                -1, max_length
-            ),
-            "metadata": cp.asarray(metadata).reshape(-1, 3),
-        }
-
-        if add_special_tokens:
-            tokenizer_output = _bert_add_special_tokens(tokenizer_output)
-
-        tokenizer_output = {
-            k: _cast_to_appropriate_type(v, return_tensors)
-            for k, v in tokenizer_output.items()
-        }
-
-        return tokenizer_output
-
-
-def _bert_add_special_tokens(token_o):
-    """
-    Adds special tokens (CLS,SEP) which are often used by pre-trained BERT
-    models to input_ids and adjusts attention_mask and metadata to account
-    for them.
-    """
-    max_length = token_o["input_ids"].shape[1]
-    seq_end_col = max_length - (token_o["input_ids"][:, ::-1] != 0).argmax(1)
-    # clipping to take overflow into account
-    seq_end_col = cp.clip(seq_end_col + 1, a_min=None, a_max=max_length - 1)
-
-    _bert_add_special_tokens_input_ids(token_o["input_ids"], seq_end_col)
-    _bert_add_special_tokens_attention_mask(
-        token_o["attention_mask"], seq_end_col
-    )
-    _bert_add_special_tokens_metadata(token_o["metadata"], max_length)
-
-    return token_o
-
-
-def _bert_add_special_tokens_input_ids(input_ids, seq_end_col):
-    """
-    Add token ids for special tokens ([CLS] and [SEP]) to
-    the start and end of each sequence
-    """
-    # Mark sequence start with [CLS] token mapping to the start of sequence
-    input_ids[:, 1:-1] = input_ids[:, 0:-2]
-    input_ids[:, 0] = 101
-    # Mark end of sequence [SEP]
-
-    input_ids[
-        cp.arange(0, input_ids.shape[0], dtype=cp.uint32), seq_end_col
-    ] = 102
-
-
-def _bert_add_special_tokens_attention_mask(attention_mask, seq_end_col):
-    """
-    Mark attention mask for special tokens ([CLS] and [SEP]) with 1
-    """
-    # Copy attention masks for all but last two
-    attention_mask[:, 1:-1] = attention_mask[:, 0:-2]
-    # Mark [CLS] token with 1
-    attention_mask[:, 0] = 1
-    # Mark [SEP] token with 1
-    attention_mask[
-        cp.arange(0, attention_mask.shape[0], dtype=cp.uint32), seq_end_col
-    ] = 1
-
-
-def _bert_add_special_tokens_metadata(metadata, max_length):
-    """
-    Edit metadata to account for the added special tokens ([CLS] and [SEP])
-    """
-    # metadata seq starts from plus 1
-    metadata[:, 1] = metadata[:, 1] + 1
-    # clip done to take overflow into account
-    metadata[:, 2] = cp.clip(
-        metadata[:, 2] + 1, a_min=None, a_max=max_length - 2
-    )
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
deleted file mode 100644
index 1ac69c87c4b..00000000000
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/subword_tokenize.pxd
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-from libc.stdint cimport uint16_t, uint32_t
-from libcpp cimport bool
-from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
-from pylibcudf.exception_handler cimport libcudf_exception_handler
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-
-
-cdef extern from "nvtext/subword_tokenize.hpp" namespace "nvtext" nogil:
-    cdef cppclass tokenizer_result:
-        uint32_t nrows_tensor
-        uint32_t sequence_length
-        unique_ptr[column] tensor_token_ids
-        unique_ptr[column] tensor_attention_mask
-        unique_ptr[column] tensor_metadata
-
-    cdef cppclass hashed_vocabulary:
-        uint16_t first_token_id
-        uint16_t separator_token_id
-        uint16_t unknown_token_id
-        uint32_t outer_hash_a
-        uint32_t outer_hash_b
-        uint16_t num_bin
-        unique_ptr[column] table
-        unique_ptr[column] bin_coefficients
-        unique_ptr[column] bin_offsets
-        unique_ptr[column] cp_metadata
-        unique_ptr[column] aux_cp_table
-
-    cdef unique_ptr[hashed_vocabulary] load_vocabulary_file(
-        const string &filename_hashed_vocabulary
-    ) except +libcudf_exception_handler
-
-    cdef tokenizer_result subword_tokenize(
-        const column_view & strings,
-        hashed_vocabulary & hashed_vocabulary_obj,
-        uint32_t max_sequence_length,
-        uint32_t stride,
-        bool do_lower,
-        bool do_truncate
-    ) except +libcudf_exception_handler
-
-    cdef tokenizer_result subword_tokenize(
-        const column_view &strings,
-        const string &filename_hashed_vocabulary,
-        uint32_t max_sequence_length,
-        uint32_t stride,
-        bool do_lower,
-        bool do_truncate
-    ) except +libcudf_exception_handler
-
-cdef extern from "<utility>" namespace "std" nogil:
-    cdef tokenizer_result move(tokenizer_result)
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
index 842d6079749..5b0ecb6a7a8 100644
--- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -24,7 +24,6 @@ set(cython_sources
     replace.pyx
     stemmer.pyx
     tokenize.pyx
-    subword_tokenize.pyx
     wordpiece_tokenize.pyx
 )
 
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
index 0fe6f657051..704fd03b242 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -11,7 +11,6 @@ from . cimport (
     normalize,
     replace,
     stemmer,
-    subword_tokenize,
     tokenize,
     wordpiece_tokenize,
 )
@@ -27,7 +26,6 @@ __all__ = [
     "normalize",
     "replace",
     "stemmer",
-    "subword_tokenize",
     "tokenize",
     "wordpiece_tokenize",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
index 4b18c2fce09..b67a5bcbff4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.py
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -11,7 +11,6 @@
     normalize,
     replace,
     stemmer,
-    subword_tokenize,
     tokenize,
     wordpiece_tokenize,
 )
@@ -27,7 +26,6 @@
     "normalize",
     "replace",
     "stemmer",
-    "subword_tokenize",
     "tokenize",
     "wordpiece_tokenize",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd
deleted file mode 100644
index 091c7b897ac..00000000000
--- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pxd
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from libc.stdint cimport uint32_t
-from libcpp cimport bool
-from libcpp.memory cimport unique_ptr
-from pylibcudf.column cimport Column
-from pylibcudf.libcudf.nvtext.subword_tokenize cimport hashed_vocabulary
-
-
-cdef class HashedVocabulary:
-    cdef unique_ptr[hashed_vocabulary] c_obj
-
-cpdef tuple[Column, Column, Column] subword_tokenize(
-    Column input,
-    HashedVocabulary vocabulary_table,
-    uint32_t max_sequence_length,
-    uint32_t stride,
-    bool do_lower_case,
-    bool do_truncate,
-)
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
deleted file mode 100644
index f6618e296b1..00000000000
--- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from pylibcudf.column import Column
-
-class HashedVocabulary:
-    def __init__(self, hash_file: str): ...
-
-def subword_tokenize(
-    input: Column,
-    vocabulary_table: HashedVocabulary,
-    max_sequence_length: int,
-    stride: int,
-    do_lower_case: bool,
-    do_truncate: bool,
-) -> tuple[Column, Column, Column]: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
deleted file mode 100644
index 14fb6f5fe1e..00000000000
--- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cython.operator cimport dereference
-from libc.stdint cimport uint32_t
-from libcpp cimport bool
-from libcpp.string cimport string
-from libcpp.utility cimport move
-from pylibcudf.column cimport Column
-from pylibcudf.libcudf.nvtext.subword_tokenize cimport (
-    load_vocabulary_file as cpp_load_vocabulary_file,
-    move as tr_move,
-    subword_tokenize as cpp_subword_tokenize,
-    tokenizer_result as cpp_tokenizer_result,
-)
-
-__all__ = ["HashedVocabulary", "subword_tokenize"]
-
-cdef class HashedVocabulary:
-    """The vocabulary data for use with the subword_tokenize function.
-
-    For details, see :cpp:class:`cudf::nvtext::hashed_vocabulary`.
-    """
-    def __cinit__(self, hash_file):
-        cdef string c_hash_file = <string>str(hash_file).encode()
-        with nogil:
-            self.c_obj = move(cpp_load_vocabulary_file(c_hash_file))
-
-    __hash__ = None
-
-cpdef tuple[Column, Column, Column] subword_tokenize(
-    Column input,
-    HashedVocabulary vocabulary_table,
-    uint32_t max_sequence_length,
-    uint32_t stride,
-    bool do_lower_case,
-    bool do_truncate,
-):
-    """
-    Creates a tokenizer that cleans the text, splits it into
-    tokens and returns token-ids from an input vocabulary.
-
-    For details, see cpp:func:`subword_tokenize`
-
-    Parameters
-    ----------
-    input : Column
-        The input strings to tokenize.
-    vocabulary_table : HashedVocabulary
-        The vocabulary table pre-loaded into this object.
-    max_sequence_length : uint32_t
-        Limit of the number of token-ids per row in final tensor for each string.
-    stride : uint32_t
-        Each row in the output token-ids will replicate
-        ``max_sequence_length`` - ``stride`` the token-ids
-        from the previous row, unless it is the first string.
-    do_lower_case : bool
-        If true, the tokenizer will convert uppercase characters in the
-        input stream to lower-case and strip accents from those characters.
-        If false, accented and uppercase characters are not transformed.
-    do_truncate : bool
-        If true, the tokenizer will discard all the token-ids after
-        ``max_sequence_length`` for each input string. If false, it
-        will use a new row in the output token-ids to continue
-        generating the output.
-
-    Returns
-    -------
-    tuple[Column, Column, Column]
-        A tuple of three columns containing the
-        tokens, masks, and metadata.
-    """
-    cdef cpp_tokenizer_result c_result
-    with nogil:
-        c_result = tr_move(
-            cpp_subword_tokenize(
-                input.view(),
-                dereference(vocabulary_table.c_obj.get()),
-                max_sequence_length,
-                stride,
-                do_lower_case,
-                do_truncate,
-            )
-        )
-    cdef Column tokens = Column.from_libcudf(move(c_result.tensor_token_ids))
-    cdef Column masks = Column.from_libcudf(move(c_result.tensor_attention_mask))
-    cdef Column metadata = Column.from_libcudf(move(c_result.tensor_metadata))
-    return tokens, masks, metadata

From a3ce3a1b1bf730df7efa3a80163acd93115f84cd Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 4 Aug 2025 12:49:14 -0400
Subject: [PATCH 048/366] Fix OOB memcheck error in group_rank_to_percentage
 utility (#19567)

Fixes out-of-bounds memory read access in `cudf::groupby::detail::group_rank_to_percentage` utility.
```
========= Invalid __global__ read of size 4 bytes
=========     at cudf::groupby::detail::group_rank_to_percentage(cudf::rank_method, cudf::rank_percentage, const cudf::column_view &, const cudf::column_view &, cudf::device_span<const int, (unsigned long)18446744073709551615>, cudf::device_span<const int, (unsigned long)18446744073709551615>, rmm::cuda_stream_view, cuda::mr::__4::basic_resource_ref<(cuda::mr::__4::_AllocType)1, cuda::mr::__4::device_accessible>)::[lambda(int) (instance 1)]::operator ()(int) const+0x1650 in group_rank_scan.cu:313
=========     by thread (0,0,0) in block (0,0,0)
=========     Access at 0x7beb8dc01dfc is out of bounds
=========     and is 4 bytes before the nearest allocation at 0x7beb8dc01e00 of size 400 bytes
=========         Device Frame: thrust::cuda_cub::__tabulate::functor<double *, cudf::groupby::detail::group_rank_to_percentage(cudf::rank_method, cudf::rank_percentage, const cudf::column_view &, const cudf::column_view &, cudf::device_span<const int, (unsigned long)18446744073709551615>, cudf::device_span<const int, (unsigned long)18446744073709551615>, rmm::cuda_stream_view, cuda::mr::__4::basic_resource_ref<(cuda::mr::__4::_AllocType)1, cuda::mr::__4::device_accessible>)::[lambda(int) (instance 1)], long>::operator ()(long)+0x1520 in tabulate.h:66
...
```

This was found using compute-sanitizer on the following pytest command in `/cudf/python/cudf/cudf/tests`:
```
pytest test_groupby.py -k'test_groupby_2keys_rank[True-keep-True-dense-100]'
```

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)
  - Nghia Truong (https://github.com/ttnghia)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

URL: https://github.com/rapidsai/cudf/pull/19567
---
 cpp/src/groupby/sort/group_rank_scan.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu
index a0ba81bccb2..8e266d42077 100644
--- a/cpp/src/groupby/sort/group_rank_scan.cu
+++ b/cpp/src/groupby/sort/group_rank_scan.cu
@@ -300,7 +300,7 @@ std::unique_ptr<column> group_rank_to_percentage(rank_method const method,
                        double const r   = is_double ? d_rank[row_index] : s_rank[row_index];
                        auto const count = dcount[labels[row_index]];
                        size_type const last_rank_index = offsets[labels[row_index]] + count - 1;
-                       auto const last_rank            = s_rank[last_rank_index];
+                       auto const last_rank = last_rank_index < 0 ? 1 : s_rank[last_rank_index];
                        return percentage == rank_percentage::ZERO_NORMALIZED
                                 ? r / last_rank
                                 : one_normalized(r, last_rank);

From bfa6da8b6a959aef2636c9ed5cf6995a7533d730 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 4 Aug 2025 13:46:08 -0400
Subject: [PATCH 049/366] Avoid querying device memory on systems without it in
 dask-cudf (#19577)

This PR updates the dask-cudf's `_get_device_size` (which is used to determine the blocksize when reading parquet files) to catch `pynvml.NVMLError_NotSupported`, which is raised on systems without traditional dedicated GPU memory.

Related PR #19575

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)

URL: https://github.com/rapidsai/cudf/pull/19577
---
 python/dask_cudf/dask_cudf/io/parquet.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index e075971f01f..3fa5a81965b 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -52,12 +52,16 @@ def _get_device_size():
         if index and not index.isnumeric():
             # This means index is UUID. This works for both MIG and non-MIG device UUIDs.
             handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(index))
+            if pynvml.nvmlDeviceIsMigDeviceHandle(handle):
+                handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(
+                    handle
+                )
         else:
             # This is a device index
             handle = pynvml.nvmlDeviceGetHandleByIndex(int(index))
         return pynvml.nvmlDeviceGetMemoryInfo(handle).total
 
-    except ValueError:
+    except (ValueError, pynvml.NVMLError_NotSupported):
         # Fall back to a conservative 8GiB default
         return 8 * 1024**3
 

From cdf6a3ab79e7b38d03d14b65158fd969a240eb13 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 4 Aug 2025 13:46:47 -0400
Subject: [PATCH 050/366] Avoid querying device memory on systems without it in
 cudf-polars benchmarks (#19575)

This PR updates the cudf-polars benchmark suite to catch `pynvml.NVMLError_NotSupported`, which is raised on systems without traditional dedicated GPU memory.

This should have been included in [#19444](https://github.com/rapidsai/cudf/pull/19444), which closed [#19427](https://github.com/rapidsai/cudf/issues/19427). It was an oversight by me due to testing primarily on systems with dedicated device memory.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19575
---
 .../experimental/benchmarks/utils.py          | 34 +++++++++++++------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 7e56e3c57d3..11e6442baab 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -105,23 +105,35 @@ class GPUInfo:
 
     name: str
     index: int
-    free_memory: int
-    used_memory: int
-    total_memory: int
+    free_memory: int | None
+    used_memory: int | None
+    total_memory: int | None
 
     @classmethod
     def from_index(cls, index: int) -> GPUInfo:
         """Create a GPUInfo from an index."""
         pynvml.nvmlInit()
         handle = pynvml.nvmlDeviceGetHandleByIndex(index)
-        memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        return cls(
-            name=pynvml.nvmlDeviceGetName(handle),
-            index=index,
-            free_memory=memory.free,
-            used_memory=memory.used,
-            total_memory=memory.total,
-        )
+        try:
+            memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            return cls(
+                name=pynvml.nvmlDeviceGetName(handle),
+                index=index,
+                free_memory=memory.free,
+                used_memory=memory.used,
+                total_memory=memory.total,
+            )
+        except pynvml.NVMLError_NotSupported:
+            # Happens on systems without traditional GPU memory (e.g., Grace Hopper),
+            # where nvmlDeviceGetMemoryInfo is not supported.
+            # See: https://github.com/rapidsai/cudf/issues/19427
+            return cls(
+                name=pynvml.nvmlDeviceGetName(handle),
+                index=index,
+                free_memory=None,
+                used_memory=None,
+                total_memory=None,
+            )
 
 
 @dataclasses.dataclass

From f1892a687cf197d8fb9785d25bb2f728be3ffbd0 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 4 Aug 2025 13:48:24 -0400
Subject: [PATCH 051/366] Expose `filter` and `columns` parquet reader builder
 options to python (#19566)

Exposes `filter` and `columns` options so we can use chaining style when building the options object before passing them to the reader.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19566
---
 python/pylibcudf/pylibcudf/io/parquet.pxd |  2 ++
 python/pylibcudf/pylibcudf/io/parquet.pyx | 37 ++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd
index 2a925b23f6e..63d96c4af3e 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pxd
+++ b/python/pylibcudf/pylibcudf/io/parquet.pxd
@@ -53,6 +53,8 @@ cdef class ParquetReaderOptionsBuilder:
     cpdef ParquetReaderOptionsBuilder use_pandas_metadata(self, bool val)
     cpdef ParquetReaderOptionsBuilder allow_mismatched_pq_schemas(self, bool val)
     cpdef ParquetReaderOptionsBuilder use_arrow_schema(self, bool val)
+    cpdef ParquetReaderOptionsBuilder filter(self, Expression filter)
+    cpdef ParquetReaderOptionsBuilder columns(self, list col_names)
     cpdef build(self)
 
 
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
index 42803d8d8fb..29dfb8df0aa 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -171,7 +171,7 @@ cdef class ParquetReaderOptions:
         -------
         None
         """
-        self.c_obj.set_filter(<expression &>dereference(filter.c_obj.get()))
+        self.c_obj.set_filter(<expression &>dereference(filter.c_obj))
 
 
 cdef class ParquetReaderOptionsBuilder:
@@ -241,6 +241,41 @@ cdef class ParquetReaderOptionsBuilder:
         self.c_obj.use_arrow_schema(val)
         return self
 
+    cpdef ParquetReaderOptionsBuilder filter(self, Expression filter):
+        """
+        Sets AST based filter for predicate pushdown.
+
+        Parameters
+        ----------
+        filter : Expression
+            AST expression to use as filter
+
+        Returns
+        -------
+        ParquetReaderOptionsBuilder
+        """
+        self.c_obj.filter(<expression &>dereference(filter.c_obj))
+        return self
+
+    cpdef ParquetReaderOptionsBuilder columns(self, list col_names):
+        """
+        Sets names of the columns to be read.
+
+        Parameters
+        ----------
+        col_names : list[str]
+            List of column names
+
+        Returns
+        -------
+        ParquetReaderOptionsBuilder
+        """
+        cdef vector[string] vec
+        for name in col_names:
+            vec.push_back(<string>str(name).encode())
+        self.c_obj.columns(vec)
+        return self
+
     cpdef build(self):
         """Create a ParquetReaderOptions object"""
         cdef ParquetReaderOptions parquet_options = ParquetReaderOptions.__new__(

From e977f5c54e0db6648287e231cb1ee45b72979ae0 Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Mon, 4 Aug 2025 14:56:30 -0400
Subject: [PATCH 052/366] Update rapids-build-backend to 0.4.0 (#19580)

Issue: https://github.com/rapidsai/build-planning/issues/207

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19580
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 2 +-
 conda/recipes/cudf-polars/recipe.yaml             | 2 +-
 conda/recipes/cudf/recipe.yaml                    | 2 +-
 conda/recipes/cudf_kafka/recipe.yaml              | 2 +-
 conda/recipes/custreamz/recipe.yaml               | 2 +-
 conda/recipes/dask-cudf/recipe.yaml               | 2 +-
 conda/recipes/pylibcudf/recipe.yaml               | 2 +-
 dependencies.yaml                                 | 2 +-
 python/cudf/pyproject.toml                        | 2 +-
 python/cudf_kafka/pyproject.toml                  | 2 +-
 python/cudf_polars/pyproject.toml                 | 2 +-
 python/custreamz/pyproject.toml                   | 2 +-
 python/dask_cudf/pyproject.toml                   | 2 +-
 python/libcudf/pyproject.toml                     | 2 +-
 python/pylibcudf/pyproject.toml                   | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 15af26470a4..0e95832dddd 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -81,7 +81,7 @@ dependencies:
 - python-xxhash
 - python>=3.10,<3.14
 - pytorch>=2.4.0
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
+- rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==25.10.*,>=0.0.0a0
 - rapids-logger==0.1.*,>=0.0.0a0
 - rich
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index e606a1cd4c3..e96b8d81953 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -82,7 +82,7 @@ dependencies:
 - python-xxhash
 - python>=3.10,<3.14
 - pytorch>=2.4.0
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
+- rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==25.10.*,>=0.0.0a0
 - rapids-logger==0.1.*,>=0.0.0a0
 - rich
diff --git a/conda/recipes/cudf-polars/recipe.yaml b/conda/recipes/cudf-polars/recipe.yaml
index 46ab07ab81c..d3c080249f1 100644
--- a/conda/recipes/cudf-polars/recipe.yaml
+++ b/conda/recipes/cudf-polars/recipe.yaml
@@ -43,7 +43,7 @@ requirements:
   host:
     - python =${{ py_version }}
     - pip
-    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - setuptools
     - cuda-version =${{ cuda_version }}
   run:
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 98e82c95bce..38e32b5c1f2 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -52,7 +52,7 @@ requirements:
     - python =${{ py_version }}
     - pip
     - cython >=3.0.3
-    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - scikit-build-core >=0.10.0
     - dlpack >=0.8,<1.0
     - numba-cuda >=0.14.0,<0.15.0a0
diff --git a/conda/recipes/cudf_kafka/recipe.yaml b/conda/recipes/cudf_kafka/recipe.yaml
index 0d2c8cc39cc..100dc270915 100644
--- a/conda/recipes/cudf_kafka/recipe.yaml
+++ b/conda/recipes/cudf_kafka/recipe.yaml
@@ -55,7 +55,7 @@ requirements:
     - cuda-version =${{ cuda_version }}
     - pylibcudf =${{ version }}
     - libcudf_kafka =${{ version }}
-    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - scikit-build-core >=0.10.0
     - cuda-cudart-dev
   run:
diff --git a/conda/recipes/custreamz/recipe.yaml b/conda/recipes/custreamz/recipe.yaml
index 36535c4f472..4e8644b046e 100644
--- a/conda/recipes/custreamz/recipe.yaml
+++ b/conda/recipes/custreamz/recipe.yaml
@@ -28,7 +28,7 @@ requirements:
   host:
     - python =${{ py_version }}
     - pip
-    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - setuptools
     - python-confluent-kafka >=2.8.0,<2.9.0a0
     - cudf_kafka =${{ version }}
diff --git a/conda/recipes/dask-cudf/recipe.yaml b/conda/recipes/dask-cudf/recipe.yaml
index eaa05196c9d..fc9e20f4192 100644
--- a/conda/recipes/dask-cudf/recipe.yaml
+++ b/conda/recipes/dask-cudf/recipe.yaml
@@ -28,7 +28,7 @@ requirements:
   host:
     - python =${{ py_version }}
     - pip
-    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - setuptools
     - cuda-version =${{ cuda_version }}
   run:
diff --git a/conda/recipes/pylibcudf/recipe.yaml b/conda/recipes/pylibcudf/recipe.yaml
index e5fec6983c4..2d2cf0a630f 100644
--- a/conda/recipes/pylibcudf/recipe.yaml
+++ b/conda/recipes/pylibcudf/recipe.yaml
@@ -52,7 +52,7 @@ requirements:
     - python =${{ py_version }}
     - pip
     - cython >=3.0.3
-    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - scikit-build-core >=0.10.0
     - dlpack >=0.8,<1.0
     - libcudf =${{ version }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 0153304a4e2..7e53ff8a959 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -465,7 +465,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0
+          - &rapids_build_backend rapids-build-backend>=0.4.0,<0.5.0.dev0
       - output_types: conda
         packages:
           - scikit-build-core>=0.10.0
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index f1a678e5326..bd0bf9087a0 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -3,7 +3,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "scikit-build-core[pyproject]>=0.10.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index f4be4552feb..c5c0837ad67 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -3,7 +3,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "scikit-build-core[pyproject]>=0.10.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index a69b5551b09..472520df984 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -3,7 +3,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index 0b961894d2d..244906cf204 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -3,7 +3,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index f0d1d91fbfe..b8603849892 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -3,7 +3,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
index 7b69543d898..bc0bf5b1edf 100644
--- a/python/libcudf/pyproject.toml
+++ b/python/libcudf/pyproject.toml
@@ -15,7 +15,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "scikit-build-core[pyproject]>=0.10.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 561f9eccd64..85d8693b1c3 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -3,7 +3,7 @@
 [build-system]
 build-backend = "rapids_build_backend.build"
 requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
+    "rapids-build-backend>=0.4.0,<0.5.0.dev0",
     "scikit-build-core[pyproject]>=0.10.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 

From dbb33055295e61d810f2f9e412dda445b42c572c Mon Sep 17 00:00:00 2001
From: Gil Forsyth <gforsyth@users.noreply.github.com>
Date: Mon, 4 Aug 2025 16:10:33 -0400
Subject: [PATCH 053/366] ci(labeler): update labeler action to @v5 (#19581)

Bumps the version of `actions/labeler` to `@v5` and updates the syntax in
the `labeler.yml` file to account for breaking changes in that version bump.

xref: rapidsai/ops#2968

Authors:
  - Gil Forsyth (https://github.com/gforsyth)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19581
---
 .github/labeler.yml           | 42 +++++++++++++++++++++--------------
 .github/workflows/labeler.yml |  2 +-
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 63ef619b64e..87077c8ea9e 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,26 +1,34 @@
 # Documentation for config - https://github.com/actions/labeler#common-examples
 
 Python:
-  - 'python/**'
-  - 'notebooks/**'
-
+  - changed-files:
+      any-glob-to-any-file:
+        - 'python/**'
+        - 'notebooks/**'
 cudf.pandas:
-  - 'python/cudf/cudf/pandas/**'
-  - 'python/cudf/cudf_pandas_tests/**'
-
+  - changed-files:
+      any-glob-to-any-file:
+        - 'python/cudf/cudf/pandas/**'
+        - 'python/cudf/cudf_pandas_tests/**'
 cudf-polars:
-  - 'python/cudf_polars/**'
-
+  - changed-files:
+      any-glob-to-any-file:
+        - 'python/cudf_polars/**'
 pylibcudf:
-  - 'python/pylibcudf/**'
-
+  - changed-files:
+      any-glob-to-any-file:
+        - 'python/pylibcudf/**'
 libcudf:
-  - 'cpp/**'
-
+  - changed-files:
+      any-glob-to-any-file:
+        - 'cpp/**'
 CMake:
-  - '**/CMakeLists.txt'
-  - '**/cmake/**'
-  - '**/*.cmake'
-
+  - changed-files:
+      any-glob-to-any-file:
+        - '**/CMakeLists.txt'
+        - '**/cmake/**'
+        - '**/*.cmake'
 Java:
-  - 'java/**'
+  - changed-files:
+      any-glob-to-any-file:
+        - 'java/**'
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index acfefc5e4af..fa134653d49 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -13,6 +13,6 @@ jobs:
         persist-credentials: false
         sparse-checkout: .github/labeler.yml
         sparse-checkout-cone-mode: false
-    - uses: actions/labeler@v4
+    - uses: actions/labeler@v5
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"

From 5853debea4ae85b48b99e8f556973ea244f6a71a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 4 Aug 2025 16:08:35 -0700
Subject: [PATCH 054/366] Construct cuDF classic columns with
 __array_interface__ through pylibcudf (#19538)

Towards https://github.com/rapidsai/cudf/issues/18726

Use `pylibcudf.Column.from_array_interface` or pyarrow to convert an object implementing `__array_interface__` to a cuDF classic column instead of use the custom Buffer class

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19538
---
 python/cudf/cudf/core/column/column.py        | 67 ++++++++++---------
 .../cudf/pandas/scripts/conftest-patch.py     | 56 ++--------------
 .../cudf/tests/series/test_constructors.py    |  5 ++
 3 files changed, 45 insertions(+), 83 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index ac2d9d12752..f3d8a7798be 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -3132,34 +3132,37 @@ def as_column(
             # TODO: Or treat as scalar?
             arbitrary = arbitrary[np.newaxis]
 
-        if arbitrary.dtype.kind in "OSU":
-            if pd.isna(arbitrary).any():
-                new_array = pa.array(arbitrary)
+        if arbitrary.dtype.kind == "O":
+            is_na = pd.isna(arbitrary)
+            if is_na.any():
+                if is_na.all():
+                    # Avoid pyarrow converting np.ndarray[object] of all NaNs to float
+                    raise MixedTypeError(
+                        "Cannot have all NaN values with object dtype."
+                    )
+                arbitrary = pa.array(arbitrary)
             else:
                 # Let pandas potentially infer object type
                 # e.g. np.array([pd.Timestamp(...)], dtype=object) -> datetime64
-                new_array = pd.Series(arbitrary)
-            res = as_column(new_array, dtype=dtype, nan_as_null=nan_as_null)
-            if (
-                cudf.get_option("mode.pandas_compatible")
-                and res.dtype.kind == "f"
-                and arbitrary.dtype.kind == "O"
-            ):
-                raise MixedTypeError(
-                    "Cannot create column with mixed types, "
-                    "pandas Series with object dtype cannot be converted to cudf Series with float dtype."
-                )
-            return res
+                arbitrary = pd.Series(arbitrary)
+            return as_column(arbitrary, dtype=dtype, nan_as_null=nan_as_null)
+        elif arbitrary.dtype.kind in "SU":
+            result_column = ColumnBase.from_arrow(pa.array(arbitrary))
+            if dtype is not None:
+                result_column = result_column.astype(dtype)
+            return result_column
         elif arbitrary.dtype.kind in "biuf":
-            from_pandas = nan_as_null is None or nan_as_null
             if not arbitrary.dtype.isnative:
-                # Not supported by pyarrow
+                # Not supported by pylibcudf
                 arbitrary = arbitrary.astype(arbitrary.dtype.newbyteorder("="))
-            return as_column(
-                pa.array(arbitrary, from_pandas=from_pandas),
-                dtype=dtype,
-                nan_as_null=nan_as_null,
+            result_column = ColumnBase.from_pylibcudf(
+                plc.Column.from_array_interface(arbitrary)
             )
+            if nan_as_null is not False:
+                result_column = result_column.nans_to_nulls()
+            if dtype is not None:
+                result_column = result_column.astype(dtype)
+            return result_column
         elif arbitrary.dtype.kind in "mM":
             time_unit = np.datetime_data(arbitrary.dtype)[0]
             if time_unit in ("D", "W", "M", "Y"):
@@ -3175,7 +3178,7 @@ def as_column(
             is_nat = np.isnat(arbitrary)
             mask = None
             if is_nat.any():
-                if nan_as_null is None or nan_as_null:
+                if nan_as_null is not False:
                     # Convert NaT to NA, which pyarrow does by default
                     return as_column(
                         pa.array(arbitrary),
@@ -3184,16 +3187,16 @@ def as_column(
                     )
                 # Consider NaT as NA in the mask
                 # but maintain NaT as a value
-                mask = as_column(~is_nat).as_mask()
-            buffer = as_buffer(arbitrary.view("|u1"))
-            col = build_column(
-                data=buffer,
-                mask=mask,
-                dtype=arbitrary.dtype,
-            )
-            if dtype:
-                col = col.astype(dtype)
-            return col
+                mask = plc.Column.from_array_interface(~is_nat)
+            plc_column = plc.Column.from_array_interface(arbitrary)
+            if mask is not None:
+                plc_column = plc_column.with_mask(
+                    *plc.transform.bools_to_mask(mask)
+                )
+            result_column = ColumnBase.from_pylibcudf(plc_column)
+            if dtype is not None:
+                result_column = result_column.astype(dtype)
+            return result_column
         else:
             raise NotImplementedError(f"{arbitrary.dtype} not supported")
     elif (view := as_memoryview(arbitrary)) is not None:
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 9520f09945e..4dc5b3d42ab 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -5005,10 +5005,6 @@ def pytest_unconfigure(config):
     "tests/frame/indexing/test_indexing.py::test_adding_new_conditional_column_with_string[string[pyarrow_numpy]-True]",
     "tests/frame/indexing/test_indexing.py::test_object_casting_indexing_wraps_datetimelike",
     "tests/frame/indexing/test_insert.py::TestDataFrameInsert::test_insert",
-    "tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_frame_setitem_datetime64_col_other_units[h]",
-    "tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_frame_setitem_datetime64_col_other_units[m]",
-    "tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_frame_setitem_existing_datetime64_col_other_units[h]",
-    "tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_frame_setitem_existing_datetime64_col_other_units[m]",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_list_missing_columns[columns3-box3-expected3]",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetItemSlicing::test_setitem_slice_indexer_broadcasting_rhs[1-Series-iloc]",
@@ -5089,27 +5085,9 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_dt64tz",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_dt64tz_to_str",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_extension_dtypes_duplicate_col[Int64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_from_datetimelike_to_object[h-M8]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_from_datetimelike_to_object[h-m8]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_from_datetimelike_to_object[m-M8]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_from_datetimelike_to_object[m-m8]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_str_float",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_td64_to_string[DataFrame]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_td64_to_string[Series]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetime_unit[h]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetime_unit[m]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[h-M8-float64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[h-M8-int64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[h-m8-float64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[h-m8-int64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[m-M8-float64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[m-M8-int64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[m-m8-float64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_datetimelike_unit[m-m8-int64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_incorrect_datetimelike[h]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_incorrect_datetimelike[m]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_timedelta_unit[h]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_to_timedelta_unit[m]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_with_exclude_string",
     "tests/frame/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_string_missing",
     "tests/frame/methods/test_astype.py::test_astype_to_string_not_modifying_input[pyarrow_numpy-None]",
@@ -5176,6 +5154,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict1-input_index1-expected_dict1-expected_index1]",
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict2-input_index2-expected_dict2-expected_index2]",
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict3-input_index3-expected_dict3-expected_index3]",
+    "tests/frame/methods/test_explode.py::test_multi_columns_nan_empty",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_dict_inplace_nonunique_columns",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_dtype_conversion",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_on_column_view",
@@ -5372,6 +5351,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_set_index.py::TestSetIndex::test_set_index_pass_single_array[True-True-test-<lambda>1]",
     "tests/frame/methods/test_set_index.py::TestSetIndex::test_set_index_pass_single_array[True-True-test-Index]",
     "tests/frame/methods/test_set_index.py::TestSetIndexCustomLabelType::test_set_index_custom_label_hashable_iterable",
+    "tests/frame/methods/test_shift.py::TestDataFrameShift::test_shift_bool",
     "tests/frame/methods/test_shift.py::TestDataFrameShift::test_shift_dt64values_axis1_invalid_fill[datetime64[ns]-False]",
     "tests/frame/methods/test_shift.py::TestDataFrameShift::test_shift_dt64values_axis1_invalid_fill[timedelta64[ns]-False]",
     "tests/frame/methods/test_shift.py::TestDataFrameShift::test_shift_dt64values_int_fill_deprecated",
@@ -5499,7 +5479,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_arithmetic.py::test_pow_with_realignment[python]",
     "tests/frame/test_block_internals.py::TestDataFrameBlockInternals::test_consolidate",
     "tests/frame/test_block_internals.py::TestDataFrameBlockInternals::test_construction_with_conversions",
-    "tests/frame/test_block_internals.py::TestDataFrameBlockInternals::test_constructor_with_convert",
     "tests/frame/test_block_internals.py::TestDataFrameBlockInternals::test_modify_values",
     "tests/frame/test_block_internals.py::TestDataFrameBlockInternals::test_stale_cached_series_bug_473",
     "tests/frame/test_block_internals.py::test_update_inplace_sets_valid_block_values",
@@ -5532,14 +5511,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_construct_from_list_of_datetimes",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_construct_ndarray_with_nas_and_int_dtype",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_construct_with_two_categoricalindex_series",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[h-A]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[h-C]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[h-F]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[h-K]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[m-A]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[m-C]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[m-F]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_non_ns[m-K]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_with_nulls[arr0]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_with_nulls[arr1]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_dict",
@@ -5564,14 +5535,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_mixed_dict_and_Series",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_ndarray_copy",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_rec",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[h-A]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[h-C]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[h-F]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[h-K]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[m-A]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[m-C]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[m-F]",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_timedelta_non_ns[m-K]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_with_datetimes1",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_with_datetimes2",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_with_datetimes3",
@@ -5810,6 +5773,7 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_dict_nocopy[UInt8-uint64-False]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_dict_nocopy[UInt8-uint8-False]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_frame_string_inference",
+    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_frame_string_inference_array_string_dtype",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_from_datetime_subclass",
     "tests/frame/test_constructors.py::TestFromScalar::test_from_out_of_bounds_ns_datetime[DataFrame-None-datetime64]",
     "tests/frame/test_constructors.py::TestFromScalar::test_from_out_of_bounds_ns_datetime[DataFrame-dict-datetime64]",
@@ -11258,8 +11222,6 @@ def pytest_unconfigure(config):
     "tests/reshape/merge/test_merge.py::TestMerge::test_merge_on_index_with_more_values[index9-expected_index9-right]",
     "tests/reshape/merge/test_merge.py::TestMerge::test_merge_right_index_right",
     "tests/reshape/merge/test_merge.py::TestMerge::test_merge_take_missing_values_from_index_of_other_dtype",
-    "tests/reshape/merge/test_merge.py::TestMerge::test_other_timedelta_unit[h]",
-    "tests/reshape/merge/test_merge.py::TestMerge::test_other_timedelta_unit[m]",
     "tests/reshape/merge/test_merge.py::TestMergeCategorical::test_dtype_on_categorical_dates",
     "tests/reshape/merge/test_merge.py::TestMergeCategorical::test_dtype_on_merged_different[inner-<lambda>1]",
     "tests/reshape/merge/test_merge.py::TestMergeCategorical::test_dtype_on_merged_different[left-<lambda>0]",
@@ -13060,6 +13022,7 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Float64-input_data4-to_replace4-expected_data4]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Int64-input_data2-to_replace2-expected_data2]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[string-input_data5-to_replace5-expected_data5]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_explicit_none",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_mixed_types",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_na_in_obj_column[Int64]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_no_cast[ser0-exp0]",
@@ -13235,14 +13198,6 @@ def pytest_unconfigure(config):
     "tests/series/test_arithmetic.py::test_none_comparison[python-uint64]",
     "tests/series/test_arithmetic.py::test_none_comparison[python-uint8]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_categorical_sideeffects_free",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[h-M-float64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[h-M-int64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[h-m-float64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[h-m-int64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[m-M-float64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[m-M-int64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[m-m-float64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_construction_to_datetimelike_unit[m-m-int64]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_bool_dtype_missing_values",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_coerce_float_fail[int16]",
@@ -13273,7 +13228,6 @@ def pytest_unconfigure(config):
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_data_aware_dtype_naive[tzutc()-True]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_data_aware_dtype_naive[zoneinfo.ZoneInfo(key='US/Pacific')-True]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_data_aware_dtype_naive[zoneinfo.ZoneInfo(key='UTC')-True]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_datetime64_bigendian",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_datetimes_with_nulls",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_dict_datetime64_index",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_dtype_datetime64_10",
@@ -13303,6 +13257,7 @@ def pytest_unconfigure(config):
     "tests/series/test_constructors.py::TestSeriesConstructors::test_series_ctor_plus_datetimeindex",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_series_from_index_dtype_equal_does_not_copy",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_series_string_inference",
+    "tests/series/test_constructors.py::TestSeriesConstructors::test_series_string_inference_array_string_dtype",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_series_string_inference_scalar",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_series_string_inference_storage_definition",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_series_string_with_na_inference[None]",
@@ -13862,7 +13817,6 @@ def pytest_unconfigure(config):
     "tests/tools/test_to_numeric.py::test_to_numeric_large_float_not_downcast_to_float_32[9876543210.0]",
     "tests/tools/test_to_timedelta.py::TestTimedeltas::test_to_timedelta_nullable_int64_dtype[None-None]",
     "tests/tools/test_to_timedelta.py::TestTimedeltas::test_to_timedelta_nullable_int64_dtype[expected_val0-2]",
-    "tests/tools/test_to_timedelta.py::TestTimedeltas::test_to_timedelta_oob_non_nano",
     "tests/tools/test_to_timedelta.py::TestTimedeltas::test_to_timedelta_series",
     "tests/tools/test_to_timedelta.py::TestTimedeltas::test_to_timedelta_via_apply",
     "tests/tseries/frequencies/test_inference.py::test_infer_freq_tz_transition[zoneinfo.ZoneInfo(key='US/Pacific')-10min-date_pair1]",
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index 6357680b368..d5a9ea50317 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -697,6 +697,11 @@ def test_to_dense_array():
     assert filled.size == len(sr)
 
 
+def test_series_np_array_all_nan_object_raises():
+    with pytest.raises(MixedTypeError):
+        cudf.Series(np.array([np.nan, np.nan], dtype=object))
+
+
 @pytest.mark.parametrize(
     "ps",
     [

From b65d6ee26d384cd1adac438fed3a99ca24d61c79 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 5 Aug 2025 10:35:48 -0400
Subject: [PATCH 055/366] Update cudf to handle CUDA 13 changes (#19585)

Required changes to support CUDA 13

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19585
---
 cpp/src/io/json/write_json.cu      |  5 +++--
 cpp/src/transform/row_bit_count.cu |  9 +++++++--
 cpp/src/utilities/prefetch.cpp     | 11 ++++++++++-
 python/cudf/udf_cpp/CMakeLists.txt |  8 ++++++--
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index 01f9df2f4cc..e83ad15720b 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -714,9 +714,10 @@ struct column_to_strings_fn {
                                      host_span<column_name_info const> children_names) const
     requires(std::is_same_v<column_type, cudf::struct_view>)
   {
+    auto structs_view   = structs_column_view{column};
     auto const child_it = cudf::detail::make_counting_transform_iterator(
-      0, [&stream = stream_, structs_view = structs_column_view{column}](auto const child_idx) {
-        return structs_view.get_sliced_child(child_idx, stream);
+      0, [&stream = stream_, &s_v = structs_view](auto const child_idx) {
+        return s_v.get_sliced_child(child_idx, stream);
       });
     auto col_string = operator()(child_it,
                                  child_it + column.num_children(),
diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index ded16ef958d..c941d6bfaeb 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -230,8 +230,13 @@ struct flatten_functor {
     info.push_back({cur_depth, branch_depth_start, branch_depth_end});
 
     lists_column_view lcv(col);
-    auto iter = cudf::detail::make_counting_transform_iterator(
-      0, [col = lcv.get_sliced_child(stream)](auto) { return col; });
+    auto sliced_child = lcv.get_sliced_child(stream);
+
+    // We don't pass sliced_child by value as that will generate
+    // invocation of a host function ( ~column_view() ) in a host/device
+    // context when compiling with CUDA 13
+    auto iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](auto) { return sliced_child; });
     h_info.complex_type_count++;
 
     flatten_hierarchy(
diff --git a/cpp/src/utilities/prefetch.cpp b/cpp/src/utilities/prefetch.cpp
index 6c9f677afb3..be28d54214e 100644
--- a/cpp/src/utilities/prefetch.cpp
+++ b/cpp/src/utilities/prefetch.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,7 +69,16 @@ cudaError_t prefetch_noexcept(std::string_view key,
       std::cerr << "Prefetching " << size << " bytes for key " << key << " at location " << ptr
                 << std::endl;
     }
+
+#if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
+    cudaMemLocation location{
+      (device_id.value() == cudaCpuDeviceId) ? cudaMemLocationTypeHost : cudaMemLocationTypeDevice,
+      device_id.value()};
+    constexpr int flags = 0;
+    auto result         = cudaMemPrefetchAsync(ptr, size, location, flags, stream.value());
+#else
     auto result = cudaMemPrefetchAsync(ptr, size, device_id.value(), stream.value());
+#endif
     // Need to flush the CUDA error so that the context is not corrupted.
     if (result == cudaErrorInvalidValue) { cudaGetLastError(); }
     return result;
diff --git a/python/cudf/udf_cpp/CMakeLists.txt b/python/cudf/udf_cpp/CMakeLists.txt
index 0c12a022f22..2f7d332b877 100644
--- a/python/cudf/udf_cpp/CMakeLists.txt
+++ b/python/cudf/udf_cpp/CMakeLists.txt
@@ -74,9 +74,13 @@ install(TARGETS cudf_strings_udf DESTINATION ./cudf/_lib/)
 # Create the shim library for each architecture.
 set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true)
 
-# always build a default PTX file in case RAPIDS_NO_INITIALIZE is set and the device cc can't be
+# always build a default architecture in case RAPIDS_NO_INITIALIZE is set and the device cc can't be
 # safely queried through a context
-list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "70")
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0.0)
+  list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "75")
+else()
+  list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "70")
+endif()
 
 list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-real" "")
 list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-virtual" "")

From eff56237a3927b01ec09f727f51cf5667c257545 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:52:06 -0700
Subject: [PATCH 056/366] Use more pytest fixtures and avoid GPU
 parameterization in test_replace/reshape/rolling.py (#19426)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids `pytest.mark.parametrize` with GPU objects

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19426
---
 python/cudf/cudf/tests/test_replace.py | 122 ++++++++++++++-----------
 python/cudf/cudf/tests/test_reshape.py |  18 ++--
 python/cudf/cudf/tests/test_rolling.py |  19 ++--
 3 files changed, 85 insertions(+), 74 deletions(-)

diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index 8ea0d205e8b..b1efcef5d1e 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import operator
 import re
@@ -27,11 +27,11 @@
 
 
 @pytest.mark.parametrize(
-    "gsr",
+    "gsr_data, dtype",
     [
-        cudf.Series([5, 1, 2, 3, None, 243, None, 4]),
-        cudf.Series(["one", "two", "three", None, "one"], dtype="category"),
-        cudf.Series([*list(range(400)), None]),
+        [[5, 1, 2, 3, None, 243, None, 4], None],
+        [["one", "two", "three", None, "one"], "category"],
+        [[*list(range(400)), None], None],
     ],
 )
 @pytest.mark.parametrize(
@@ -49,7 +49,8 @@
         (np.inf, 4),
     ],
 )
-def test_series_replace_all(gsr, to_replace, value):
+def test_series_replace_all(gsr_data, dtype, to_replace, value):
+    gsr = cudf.Series(gsr_data, dtype=dtype)
     psr = gsr.to_pandas()
 
     gd_to_replace = to_replace
@@ -183,28 +184,30 @@ def test_series_replace_with_nulls():
     reason="warning introduced in pandas-2.2.0",
 )
 @pytest.mark.parametrize(
-    "df",
+    "data, dtype",
     [
-        cudf.DataFrame(
+        (
             {
                 "a": [0, 1, None, 2, 3],
                 "b": [3, 2, 2, 3, None],
                 "c": ["abc", "def", ".", None, None],
-            }
+            },
+            None,
         ),
-        cudf.DataFrame(
+        (
             {
                 "a": ["one", "two", None, "three"],
                 "b": ["one", None, "two", "three"],
             },
-            dtype="category",
+            "category",
         ),
-        cudf.DataFrame(
+        (
             {
                 "col one": [None, 10, 11, None, 1000, 500, 600],
                 "col two": ["abc", "def", "ghi", None, "pp", None, "a"],
                 "a": [0.324, 0.234, 324.342, 23.32, 9.9, None, None],
-            }
+            },
+            None,
         ),
     ],
 )
@@ -245,8 +248,8 @@ def test_series_replace_with_nulls():
         ),
     ],
 )
-def test_dataframe_replace(df, to_replace, value):
-    gdf = df
+def test_dataframe_replace(data, dtype, to_replace, value):
+    gdf = cudf.DataFrame(data, dtype=dtype)
     pdf = gdf.to_pandas()
 
     pd_value = value
@@ -262,7 +265,7 @@ def test_dataframe_replace(df, to_replace, value):
         gd_to_replace = to_replace
 
     can_warn = (
-        isinstance(df["a"].dtype, cudf.CategoricalDtype)
+        isinstance(gdf["a"].dtype, cudf.CategoricalDtype)
         and isinstance(to_replace, str)
         and to_replace == "two"
         and isinstance(value, str)
@@ -394,26 +397,29 @@ def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
 
 
 @pytest.mark.parametrize(
-    "gsr_data",
+    "gsr_data, dtype",
     [
-        cudf.Series(["2.34", "5.2", "7.47", None, "92.29", None]).astype(
-            Decimal64Dtype(7, 2)
+        (["2.34", "5.2", "7.47", None, "92.29", None], Decimal64Dtype(7, 2)),
+        (
+            ["-74.56", None, "-23.73", "34.55", "2.89", None],
+            Decimal32Dtype(7, 2),
         ),
-        cudf.Series(["-74.56", None, "-23.73", "34.55", "2.89", None]).astype(
-            Decimal32Dtype(7, 2)
+        (
+            ["85.955", np.nan, "-3.243", np.nan, "29.492", np.nan],
+            Decimal64Dtype(8, 3),
+        ),
+        (
+            ["2.964", None, "57.432", "-989.330", None, "56.444"],
+            Decimal64Dtype(8, 3),
+        ),
+        (
+            [np.nan, "55.2498", np.nan, "-5.2965", "-28.9423", np.nan],
+            Decimal64Dtype(10, 4),
+        ),
+        (
+            ["2.964", None, "54347.432", "-989.330", None, "56.444"],
+            Decimal128Dtype(20, 7),
         ),
-        cudf.Series(
-            ["85.955", np.nan, "-3.243", np.nan, "29.492", np.nan]
-        ).astype(Decimal64Dtype(8, 3)),
-        cudf.Series(
-            ["2.964", None, "57.432", "-989.330", None, "56.444"]
-        ).astype(Decimal64Dtype(8, 3)),
-        cudf.Series(
-            [np.nan, "55.2498", np.nan, "-5.2965", "-28.9423", np.nan]
-        ).astype(Decimal64Dtype(10, 4)),
-        cudf.Series(
-            ["2.964", None, "54347.432", "-989.330", None, "56.444"]
-        ).astype(Decimal128Dtype(20, 7)),
     ],
 )
 @pytest.mark.parametrize(
@@ -433,8 +439,8 @@ def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_decimal(gsr_data, fill_value, inplace):
-    gsr = gsr_data.copy(deep=True)
+def test_fillna_decimal(gsr_data, dtype, fill_value, inplace):
+    gsr = cudf.Series(gsr_data).astype(dtype)
     psr = gsr.to_pandas()
 
     if isinstance(fill_value, cudf.Series):
@@ -1306,67 +1312,73 @@ def test_series_replace_errors():
     "gsr,old,new,expected",
     [
         (
-            cudf.Series(["a", "b", "c", None]),
+            lambda: cudf.Series(["a", "b", "c", None]),
             None,
             "a",
-            cudf.Series(["a", "b", "c", "a"]),
+            lambda: cudf.Series(["a", "b", "c", "a"]),
         ),
         (
-            cudf.Series(["a", "b", "c", None]),
+            lambda: cudf.Series(["a", "b", "c", None]),
             [None, "a", "a"],
             ["c", "b", "d"],
-            cudf.Series(["d", "b", "c", "c"]),
+            lambda: cudf.Series(["d", "b", "c", "c"]),
         ),
         (
-            cudf.Series(["a", "b", "c", None]),
+            lambda: cudf.Series(["a", "b", "c", None]),
             [None, "a"],
             ["b", None],
-            cudf.Series([None, "b", "c", "b"]),
+            lambda: cudf.Series([None, "b", "c", "b"]),
         ),
         (
-            cudf.Series(["a", "b", "c", None]),
+            lambda: cudf.Series(["a", "b", "c", None]),
             [None, None],
             [None, None],
-            cudf.Series(["a", "b", "c", None]),
+            lambda: cudf.Series(["a", "b", "c", None]),
+        ),
+        (
+            lambda: cudf.Series([1, 2, None, 3]),
+            None,
+            10,
+            lambda: cudf.Series([1, 2, 10, 3]),
         ),
-        (cudf.Series([1, 2, None, 3]), None, 10, cudf.Series([1, 2, 10, 3])),
         (
-            cudf.Series([1, 2, None, 3]),
+            lambda: cudf.Series([1, 2, None, 3]),
             [None, 1, 1],
             [3, 2, 4],
-            cudf.Series([4, 2, 3, 3]),
+            lambda: cudf.Series([4, 2, 3, 3]),
         ),
         (
-            cudf.Series([1, 2, None, 3]),
+            lambda: cudf.Series([1, 2, None, 3]),
             [None, 1],
             [2, None],
-            cudf.Series([None, 2, 2, 3]),
+            lambda: cudf.Series([None, 2, 2, 3]),
         ),
         (
-            cudf.Series(["a", "q", "t", None], dtype="category"),
+            lambda: cudf.Series(["a", "q", "t", None], dtype="category"),
             None,
             "z",
-            cudf.Series(["a", "q", "t", "z"], dtype="category"),
+            lambda: cudf.Series(["a", "q", "t", "z"], dtype="category"),
         ),
         (
-            cudf.Series(["a", "q", "t", None], dtype="category"),
+            lambda: cudf.Series(["a", "q", "t", None], dtype="category"),
             [None, "a", "q"],
             ["z", None, None],
-            cudf.Series([None, None, "t", "z"], dtype="category"),
+            lambda: cudf.Series([None, None, "t", "z"], dtype="category"),
         ),
         (
-            cudf.Series(["a", None, "t", None], dtype="category"),
+            lambda: cudf.Series(["a", None, "t", None], dtype="category"),
             [None, "t"],
             ["p", None],
-            cudf.Series(["a", "p", None, "p"], dtype="category"),
+            lambda: cudf.Series(["a", "p", None, "p"], dtype="category"),
         ),
     ],
 )
 def test_replace_nulls(gsr, old, new, expected):
+    gsr = gsr()
     with expect_warning_if(isinstance(gsr.dtype, cudf.CategoricalDtype)):
         actual = gsr.replace(old, new)
     assert_eq(
-        expected.sort_values().reset_index(drop=True),
+        expected().sort_values().reset_index(drop=True),
         actual.sort_values().reset_index(drop=True),
     )
 
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index b13b0db2679..84cf6136255 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -667,10 +667,6 @@ def test_unstack_multiindex(level):
     )
 
 
-@pytest.mark.parametrize(
-    "data",
-    [{"A": [1.0, 2.0, 3.0, 4.0, 5.0], "B": [11.0, 12.0, 13.0, 14.0, 15.0]}],
-)
 @pytest.mark.parametrize(
     "index",
     [
@@ -695,7 +691,11 @@ def test_unstack_multiindex(level):
         ),
     ],
 )
-def test_unstack_index(data, index, col_idx):
+def test_unstack_index(index, col_idx):
+    data = {
+        "A": [1.0, 2.0, 3.0, 4.0, 5.0],
+        "B": [11.0, 12.0, 13.0, 14.0, 15.0],
+    }
     pdf = pd.DataFrame(data)
     gdf = cudf.from_pandas(pdf)
 
@@ -733,9 +733,9 @@ def test_pivot_duplicate_error():
 @pytest.mark.parametrize(
     "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
 )
-@pytest.mark.parametrize("fill_value", [0])
-def test_pivot_table_simple(aggfunc, fill_value):
+def test_pivot_table_simple(aggfunc):
     rng = np.random.default_rng(seed=0)
+    fill_value = 0
     pdf = pd.DataFrame(
         {
             "A": ["one", "one", "two", "three"] * 6,
@@ -768,9 +768,9 @@ def test_pivot_table_simple(aggfunc, fill_value):
 @pytest.mark.parametrize(
     "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
 )
-@pytest.mark.parametrize("fill_value", [0])
-def test_dataframe_pivot_table_simple(aggfunc, fill_value):
+def test_dataframe_pivot_table_simple(aggfunc):
     rng = np.random.default_rng(seed=0)
+    fill_value = 0
     pdf = pd.DataFrame(
         {
             "A": ["one", "one", "two", "three"] * 6,
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index 87b509ce22b..30958f29a8f 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -102,13 +102,13 @@ def test_rolling_dataframe_basic(data, agg, nulls, center):
 @pytest.mark.parametrize(
     "agg",
     [
-        pytest.param("sum"),
-        pytest.param("min"),
-        pytest.param("max"),
-        pytest.param("mean"),
-        pytest.param("count"),
-        pytest.param("std"),
-        pytest.param("var"),
+        "sum",
+        "min",
+        "max",
+        "mean",
+        "count",
+        "std",
+        "var",
     ],
 )
 def test_rolling_with_offset(agg):
@@ -134,9 +134,8 @@ def test_rolling_with_offset(agg):
 @pytest.mark.parametrize("agg", ["std", "var"])
 @pytest.mark.parametrize("ddof", [0, 1])
 @pytest.mark.parametrize("center", [True, False])
-@pytest.mark.parametrize("seed", [100, 2000])
 @pytest.mark.parametrize("window_size", [2, 10, 100])
-def test_rolling_var_std_large(agg, ddof, center, seed, window_size):
+def test_rolling_var_std_large(agg, ddof, center, window_size):
     iupper_bound = math.sqrt(np.iinfo(np.int64).max / window_size)
     ilower_bound = -math.sqrt(abs(np.iinfo(np.int64).min) / window_size)
 
@@ -170,7 +169,7 @@ def test_rolling_var_std_large(agg, ddof, center, seed, window_size):
         ],
         rows=n_rows,
         use_threads=False,
-        seed=seed,
+        seed=100,
     )
     pdf = data.to_pandas()
     gdf = cudf.from_pandas(pdf)

From cec71d1ae9f9828dc8719f2b9a34d700daf59835 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 11:17:58 -0700
Subject: [PATCH 057/366] Use more pytest fixtures and avoid GPU
 parameterization in test_groupby/index.py (#19438)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Eliminate/reduce parameterizations of input size

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19438
---
 python/cudf/cudf/tests/test_groupby.py | 176 ++++-----
 python/cudf/cudf/tests/test_index.py   | 528 +++++++++----------------
 2 files changed, 268 insertions(+), 436 deletions(-)

diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 898b056f263..f332fb37e56 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -106,8 +106,8 @@ def pdf(gdf):
     return gdf.to_pandas()
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
-def test_groupby_mean(nelem):
+def test_groupby_mean():
+    nelem = 20
     got_df = make_frame(DataFrame, nelem=nelem).groupby(["x", "y"]).mean()
     expect_df = (
         make_frame(pd.DataFrame, nelem=nelem).groupby(["x", "y"]).mean()
@@ -115,8 +115,8 @@ def test_groupby_mean(nelem):
     assert_groupby_results_equal(got_df, expect_df)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
-def test_groupby_mean_3level(nelem):
+def test_groupby_mean_3level():
+    nelem = 20
     lvls = "z"
     bys = list("xyz")
     got_df = (
@@ -132,8 +132,8 @@ def test_groupby_mean_3level(nelem):
     assert_groupby_results_equal(got_df, expect_df)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
-def test_groupby_agg_mean_min(nelem):
+def test_groupby_agg_mean_min():
+    nelem = 20
     got_df = (
         make_frame(DataFrame, nelem=nelem)
         .groupby(["x", "y"])
@@ -147,8 +147,8 @@ def test_groupby_agg_mean_min(nelem):
     assert_groupby_results_equal(got_df, expect_df)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
-def test_groupby_agg_min_max_dictargs(nelem):
+def test_groupby_agg_min_max_dictargs():
+    nelem = 20
     expect_df = (
         make_frame(pd.DataFrame, nelem=nelem, extra_vals="ab")
         .groupby(["x", "y"])
@@ -162,8 +162,8 @@ def test_groupby_agg_min_max_dictargs(nelem):
     assert_groupby_results_equal(expect_df, got_df)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
-def test_groupby_agg_min_max_dictlist(nelem):
+def test_groupby_agg_min_max_dictlist():
+    nelem = 20
     expect_df = (
         make_frame(pd.DataFrame, nelem=nelem, extra_vals="ab")
         .groupby(["x", "y"])
@@ -334,23 +334,24 @@ def foo(df):
     assert_groupby_results_equal(expect, got)
 
 
-def create_test_groupby_apply_args_params():
-    def f1(df, k):
-        df["out"] = df["val1"] + df["val2"] + k
-        return df
+def f1(df, k):
+    df["out"] = df["val1"] + df["val2"] + k
+    return df
 
-    def f2(df, k, L):
-        df["out"] = df["val1"] - df["val2"] + (k / L)
-        return df
 
-    def f3(df, k, L, m):
-        df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m
-        return df
+def f2(df, k, L):
+    df["out"] = df["val1"] - df["val2"] + (k / L)
+    return df
 
-    return [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
 
+def f3(df, k, L, m):
+    df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m
+    return df
 
-@pytest.mark.parametrize("func,args", create_test_groupby_apply_args_params())
+
+@pytest.mark.parametrize(
+    "func,args", [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+)
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="Fails in older versions of pandas",
@@ -621,7 +622,6 @@ def test_groupby_apply_jit_reductions_special_vals(
         )
 
 
-@pytest.mark.parametrize("dtype", ["float64"])
 @pytest.mark.parametrize("func", ["idxmax", "idxmin"])
 @pytest.mark.parametrize(
     "special_val",
@@ -642,21 +642,20 @@ def test_groupby_apply_jit_reductions_special_vals(
     reason="include_groups keyword new in pandas 2.2",
 )
 def test_groupby_apply_jit_idx_reductions_special_vals(
-    func, dtype, dataset, groupby_jit_datasets, special_val
+    func, dataset, groupby_jit_datasets, special_val
 ):
     dataset = groupby_jit_datasets[dataset].copy(deep=True)
     groupby_apply_jit_idx_reductions_special_vals_inner(
-        func, dataset, dtype, special_val
+        func, dataset, "float64", special_val
     )
 
 
-@pytest.mark.parametrize("dtype", ["int32"])
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="Fails in older versions of pandas",
 )
-def test_groupby_apply_jit_sum_integer_overflow(dtype):
-    max = np.iinfo(dtype).max
+def test_groupby_apply_jit_sum_integer_overflow():
+    max = np.iinfo("int32").max
 
     data = DataFrame(
         {
@@ -821,21 +820,20 @@ def test_groupby_apply_jit_basic(func, groupby_jit_data_small):
     run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1", "key2"])
 
 
-def create_test_groupby_apply_jit_args_params():
-    def f1(df, k):
-        return df["val1"].max() + df["val2"].min() + k
+def f1(df, k):
+    return df["val1"].max() + df["val2"].min() + k
+
 
-    def f2(df, k, L):
-        return df["val1"].sum() - df["val2"].var() + (k / L)
+def f2(df, k, L):
+    return df["val1"].sum() - df["val2"].var() + (k / L)
 
-    def f3(df, k, L, m):
-        return ((k * df["val1"].mean()) + (L * df["val2"].std())) / m
 
-    return [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+def f3(df, k, L, m):
+    return ((k * df["val1"].mean()) + (L * df["val2"].std())) / m
 
 
 @pytest.mark.parametrize(
-    "func,args", create_test_groupby_apply_jit_args_params()
+    "func,args", [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
 )
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
@@ -1002,7 +1000,6 @@ def pdf_func(df):
     assert_groupby_results_equal(expect, got)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000])
 @pytest.mark.parametrize(
     "func",
     [
@@ -1018,8 +1015,9 @@ def pdf_func(df):
         "prod",
     ],
 )
-def test_groupby_2keys_agg(nelem, func):
+def test_groupby_2keys_agg(func):
     # gdf (Note: lack of multiIndex)
+    nelem = 20
     expect_df = (
         make_frame(pd.DataFrame, nelem=nelem).groupby(["x", "y"]).agg(func)
     )
@@ -1029,8 +1027,8 @@ def test_groupby_2keys_agg(nelem, func):
     assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
 
 
-@pytest.mark.parametrize("num_groups", [2, 3, 10, 50, 100])
-@pytest.mark.parametrize("nelem_per_group", [1, 10, 100])
+@pytest.mark.parametrize("num_groups", [2, 20])
+@pytest.mark.parametrize("nelem_per_group", [1, 10])
 @pytest.mark.parametrize(
     "func",
     ["min", "max", "count", "sum"],
@@ -1777,14 +1775,16 @@ def test_groupby_cumcount(index):
     )
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 1000])
 @pytest.mark.parametrize("as_index", [True, False])
 @pytest.mark.parametrize(
     "agg", ["min", "max", "idxmin", "idxmax", "mean", "count"]
 )
-def test_groupby_datetime(nelem, as_index, agg):
+def test_groupby_datetime(request, as_index, agg):
+    nelem = 20
     if agg == "mean" and as_index is True:
-        return
+        request.applymarker(
+            pytest.mark.xfail(reason="Invalid type/aggregation combination")
+        )
     check_dtype = agg not in ("mean", "count", "idxmin", "idxmax")
     pdf = make_frame(pd.DataFrame, nelem=nelem, with_datetime=True)
     gdf = make_frame(cudf.DataFrame, nelem=nelem, with_datetime=True)
@@ -2457,7 +2457,11 @@ def test_groupby_nonempty_no_keys(pdf):
 @pytest.mark.parametrize(
     "by,data",
     [
-        # ([], []),  # error?
+        pytest.param(
+            [],
+            [],
+            marks=pytest.mark.xfail(reason="dtype always cast to object"),
+        ),
         ([1, 1, 2, 2], [0, 0, 1, 1]),
         ([1, 2, 3, 4], [0, 0, 0, 0]),
         ([1, 2, 1, 2], [0, 1, 1, 1]),
@@ -2477,11 +2481,11 @@ def test_groupby_unique(by, data, dtype):
     assert_groupby_results_equal(expect, got)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
 @pytest.mark.parametrize(
     "func", ["cummin", "cummax", "cumcount", "cumsum", "cumprod"]
 )
-def test_groupby_2keys_scan(nelem, func):
+def test_groupby_2keys_scan(func):
+    nelem = 20
     pdf = make_frame(pd.DataFrame, nelem=nelem)
     expect_df = pdf.groupby(["x", "y"], sort=True).agg(func)
     gdf = cudf.from_pandas(pdf)
@@ -2506,12 +2510,12 @@ def test_groupby_2keys_scan(nelem, func):
     assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
 
 
-@pytest.mark.parametrize("nelem", [100, 1000])
 @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
 @pytest.mark.parametrize("ascending", [True, False])
 @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
 @pytest.mark.parametrize("pct", [False, True])
-def test_groupby_2keys_rank(nelem, method, ascending, na_option, pct):
+def test_groupby_2keys_rank(method, ascending, na_option, pct):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2592,11 +2596,11 @@ def test_groupby_mix_agg_scan():
         gb.agg(func)
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
 @pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
 @pytest.mark.parametrize("direction", [1, -1])
 @pytest.mark.parametrize("fill_value", [None, np.nan, 42])
-def test_groupby_shift_row(nelem, shift_perc, direction, fill_value):
+def test_groupby_shift_row(shift_perc, direction, fill_value):
+    nelem = 20
     pdf = make_frame(pd.DataFrame, nelem=nelem, extra_vals=["val2"])
     gdf = cudf.from_pandas(pdf)
     n_shift = int(nelem * shift_perc) * direction
@@ -2611,7 +2615,6 @@ def test_groupby_shift_row(nelem, shift_perc, direction, fill_value):
     )
 
 
-@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
 @pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
 @pytest.mark.parametrize("direction", [1, -1])
 @pytest.mark.parametrize(
@@ -2632,9 +2635,8 @@ def test_groupby_shift_row(nelem, shift_perc, direction, fill_value):
         ),
     ],
 )
-def test_groupby_shift_row_mixed_numerics(
-    nelem, shift_perc, direction, fill_value
-):
+def test_groupby_shift_row_mixed_numerics(shift_perc, direction, fill_value):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2669,10 +2671,10 @@ def test_groupby_shift_row_mixed_numerics(
 
 # TODO: Shifting list columns is currently unsupported because we cannot
 # construct a null list scalar in python. Support once it is added.
-@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
 @pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
 @pytest.mark.parametrize("direction", [1, -1])
-def test_groupby_shift_row_mixed(nelem, shift_perc, direction):
+def test_groupby_shift_row_mixed(shift_perc, direction):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2705,7 +2707,6 @@ def test_groupby_shift_row_mixed(nelem, shift_perc, direction):
     )
 
 
-@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
 @pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
 @pytest.mark.parametrize("direction", [1, -1])
 @pytest.mark.parametrize(
@@ -2719,9 +2720,8 @@ def test_groupby_shift_row_mixed(nelem, shift_perc, direction):
         ]
     ],
 )
-def test_groupby_shift_row_mixed_fill(
-    nelem, shift_perc, direction, fill_value
-):
+def test_groupby_shift_row_mixed_fill(shift_perc, direction, fill_value):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2763,9 +2763,9 @@ def test_groupby_shift_row_mixed_fill(
     )
 
 
-@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
 @pytest.mark.parametrize("fill_value", [None, 0, 42])
-def test_groupby_shift_row_zero_shift(nelem, fill_value):
+def test_groupby_shift_row_zero_shift(fill_value):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2796,10 +2796,10 @@ def test_groupby_shift_row_zero_shift(nelem, fill_value):
     )
 
 
-@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
 @pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
 @pytest.mark.parametrize("direction", [1, -1])
-def test_groupby_diff_row(nelem, shift_perc, direction):
+def test_groupby_diff_row(shift_perc, direction):
+    nelem = 20
     pdf = make_frame(pd.DataFrame, nelem=nelem, extra_vals=["val2"])
     gdf = cudf.from_pandas(pdf)
     n_shift = int(nelem * shift_perc) * direction
@@ -2812,10 +2812,10 @@ def test_groupby_diff_row(nelem, shift_perc, direction):
     )
 
 
-@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
 @pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
 @pytest.mark.parametrize("direction", [1, -1])
-def test_groupby_diff_row_mixed_numerics(nelem, shift_perc, direction):
+def test_groupby_diff_row_mixed_numerics(shift_perc, direction):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2849,8 +2849,8 @@ def test_groupby_diff_row_mixed_numerics(nelem, shift_perc, direction):
     )
 
 
-@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
-def test_groupby_diff_row_zero_shift(nelem):
+def test_groupby_diff_row_zero_shift():
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2881,12 +2881,12 @@ def test_groupby_diff_row_zero_shift(nelem):
     )
 
 
-@pytest.mark.parametrize("nelem", [10, 100, 1000])
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="warning not present in older pandas versions",
 )
-def test_groupby_fillna_multi_value(nelem):
+def test_groupby_fillna_multi_value():
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -2930,12 +2930,12 @@ def test_groupby_fillna_multi_value(nelem):
 
 
 # TODO: cudf.fillna does not support decimal column to column fill yet
-@pytest.mark.parametrize("nelem", [10, 100, 1000])
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="warning not present in older pandas versions",
 )
-def test_groupby_fillna_multi_value_df(nelem):
+def test_groupby_fillna_multi_value_df():
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -3010,9 +3010,9 @@ def test_groupby_various_by_fillna(by, data, args):
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="warning not present in older pandas versions",
 )
-@pytest.mark.parametrize("nelem", [10, 100, 1000])
 @pytest.mark.parametrize("method", ["ffill", "bfill"])
-def test_groupby_fillna_method(nelem, method):
+def test_groupby_fillna_method(method):
+    nelem = 20
     t = rand_dataframe(
         dtypes_meta=[
             {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
@@ -3502,20 +3502,6 @@ def test_groupby_group_keys(group_keys, by):
     assert_eq(actual, expected)
 
 
-@pytest.fixture
-def df_ngroup():
-    df = cudf.DataFrame(
-        {
-            "a": [2, 2, 1, 1, 2, 3],
-            "b": [1, 2, 1, 2, 1, 2],
-            "c": ["a", "a", "b", "c", "d", "c"],
-        },
-        index=[1, 3, 5, 7, 4, 2],
-    )
-    df.index.name = "foo"
-    return df
-
-
 @pytest.mark.parametrize(
     "by",
     [
@@ -3528,7 +3514,16 @@ def df_ngroup():
     ],
 )
 @pytest.mark.parametrize("ascending", [True, False])
-def test_groupby_ngroup(by, ascending, df_ngroup):
+def test_groupby_ngroup(by, ascending):
+    df_ngroup = cudf.DataFrame(
+        {
+            "a": [2, 2, 1, 1, 2, 3],
+            "b": [1, 2, 1, 2, 1, 2],
+            "c": ["a", "a", "b", "c", "d", "c"],
+        },
+        index=[1, 3, 5, 7, 4, 2],
+    )
+    df_ngroup.index.name = "foo"
     by = by()
     expected = df_ngroup.to_pandas().groupby(by).ngroup(ascending=ascending)
     actual = df_ngroup.groupby(by).ngroup(ascending=ascending)
@@ -3928,7 +3923,6 @@ def test_group_by_value_counts(normalize, sort, ascending, dropna, as_index):
     assert_groupby_results_equal(
         actual,
         expected,
-        check_names=False,
         check_index_type=False,
         as_index=as_index,
         by=["gender", "education"],
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index e7e702a0b8c..9ee55790b73 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -25,12 +25,9 @@
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
     ALL_TYPES,
-    FLOAT_TYPES,
     NUMERIC_TYPES,
     OTHER_TYPES,
     SERIES_OR_INDEX_NAMES,
-    SIGNED_INTEGER_TYPES,
-    UNSIGNED_TYPES,
     assert_column_memory_eq,
     assert_column_memory_ne,
     assert_exceptions_equal,
@@ -309,121 +306,51 @@ def test_set_index_as_property():
     assert_eq(head.index, idx[:5])
 
 
-@pytest.mark.parametrize("name", ["x"])
-def test_index_copy_range(name, deep=True):
-    cidx = cudf.RangeIndex(1, 5)
-    pidx = cidx.to_pandas()
-
-    pidx_copy = pidx.copy(name=name, deep=deep)
-    cidx_copy = cidx.copy(name=name, deep=deep)
-
-    assert_eq(pidx_copy, cidx_copy)
-
-
-@pytest.mark.parametrize("name", ["x"])
-def test_index_copy_datetime(name, deep=True):
-    cidx = cudf.DatetimeIndex(["2001", "2002", "2003"])
-    pidx = cidx.to_pandas()
-
-    pidx_copy = pidx.copy(name=name, deep=deep)
-    cidx_copy = cidx.copy(name=name, deep=deep)
-
-    assert_eq(pidx_copy, cidx_copy)
-
-
-@pytest.mark.parametrize("name", ["x"])
-def test_index_copy_string(name, deep=True):
-    cidx = cudf.Index(["a", "b", "c"])
-    pidx = cidx.to_pandas()
-
-    pidx_copy = pidx.copy(name=name, deep=deep)
-    cidx_copy = cidx.copy(name=name, deep=deep)
-
-    assert_eq(pidx_copy, cidx_copy)
-
-
-@pytest.mark.parametrize("name", ["x"])
-def test_index_copy_integer(name, deep=True):
-    """Test for NumericIndex Copy Casts"""
-    cidx = cudf.Index([1, 2, 3])
-    pidx = cidx.to_pandas()
-
-    pidx_copy = pidx.copy(name=name, deep=deep)
-    cidx_copy = cidx.copy(name=name, deep=deep)
-
-    assert_eq(pidx_copy, cidx_copy)
-
-
-@pytest.mark.parametrize("name", ["x"])
-def test_index_copy_float(name, deep=True):
-    """Test for NumericIndex Copy Casts"""
-    cidx = cudf.Index([1.0, 2.0, 3.0])
-    pidx = cidx.to_pandas()
-
-    pidx_copy = pidx.copy(name=name, deep=deep)
-    cidx_copy = cidx.copy(name=name, deep=deep)
-
-    assert_eq(pidx_copy, cidx_copy)
-
-
-@pytest.mark.parametrize("name", ["x"])
-def test_index_copy_category(name, deep=True):
-    cidx = cudf.core.index.CategoricalIndex([1, 2, 3])
+@pytest.mark.parametrize(
+    "data",
+    [
+        range(1, 5),
+        [1, 2, 3, 4],
+        pd.DatetimeIndex(["2001", "2002", "2003"]),
+        ["a", "b", "c"],
+        pd.CategoricalIndex(["a", "b", "c"]),
+    ],
+)
+@pytest.mark.parametrize("deep", [True, False])
+@pytest.mark.parametrize("copy_on_write", [True, False])
+def test_index_copy(data, deep, copy_on_write):
+    name = "x"
+    cidx = cudf.Index(data)
     pidx = cidx.to_pandas()
 
     pidx_copy = pidx.copy(name=name, deep=deep)
     cidx_copy = cidx.copy(name=name, deep=deep)
 
-    assert_column_memory_ne(cidx._column, cidx_copy._column)
     assert_eq(pidx_copy, cidx_copy)
 
+    with cudf.option_context("copy_on_write", copy_on_write):
+        if not isinstance(cidx, cudf.RangeIndex):
+            if (
+                isinstance(cidx._column, cudf.core.column.StringColumn)
+                or not deep
+                or (copy_on_write and not deep)
+            ):
+                # StringColumn is immutable hence, deep copies of a
+                # Index with string dtype will share the same StringColumn.
 
-@pytest.mark.parametrize("deep", [True, False])
-@pytest.mark.parametrize(
-    "idx",
-    [
-        cudf.DatetimeIndex(["2001", "2002", "2003"]),
-        cudf.Index(["a", "b", "c"]),
-        cudf.Index([1, 2, 3]),
-        cudf.Index([1.0, 2.0, 3.0]),
-        cudf.CategoricalIndex([1, 2, 3]),
-        cudf.CategoricalIndex(["a", "b", "c"]),
-    ],
-)
-@pytest.mark.parametrize("copy_on_write", [True, False])
-def test_index_copy_deep(idx, deep, copy_on_write):
-    """Test if deep copy creates a new instance for device data."""
-    idx_copy = idx.copy(deep=deep)
-    original_cow_setting = cudf.get_option("copy_on_write")
-    cudf.set_option("copy_on_write", copy_on_write)
-    if (
-        isinstance(idx._column, cudf.core.column.StringColumn)
-        or not deep
-        or (cudf.get_option("copy_on_write") and not deep)
-    ):
-        # StringColumn is immutable hence, deep copies of a
-        # Index with string dtype will share the same StringColumn.
-
-        # When `copy_on_write` is turned on, Index objects will
-        # have unique column object but they all point to same
-        # data pointers.
-        assert_column_memory_eq(idx._column, idx_copy._column)
-    else:
-        assert_column_memory_ne(idx._column, idx_copy._column)
-    cudf.set_option("copy_on_write", original_cow_setting)
+                # When `copy_on_write` is turned on, Index objects will
+                # have unique column object but they all point to same
+                # data pointers.
+                assert_column_memory_eq(cidx._column, cidx_copy._column)
+            else:
+                assert_column_memory_ne(cidx._column, cidx_copy._column)
 
 
-@pytest.mark.parametrize("idx", [[1, None, 3, None, 5]])
-def test_index_isna(idx):
+def test_index_isna_notna():
+    idx = [1, None, 3, None, 5]
     pidx = pd.Index(idx, name="idx")
     gidx = cudf.Index(idx, name="idx")
     assert_eq(gidx.isna(), pidx.isna())
-
-
-@pytest.mark.parametrize("idx", [[1, None, 3, None, 5]])
-def test_index_notna(idx):
-    pidx = pd.Index(idx, name="idx")
-    gidx = cudf.Index(idx, name="idx")
     assert_eq(gidx.notna(), pidx.notna())
 
 
@@ -1283,39 +1210,6 @@ def test_index_basic(data, dtype, name):
     assert_eq(pdi, gdi)
 
 
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize("name", [1, "a", None])
-@pytest.mark.parametrize("dtype", SIGNED_INTEGER_TYPES)
-def test_integer_index_apis(data, name, dtype):
-    pindex = pd.Index(data, dtype=dtype, name=name)
-    gindex = cudf.Index(data, dtype=dtype, name=name)
-
-    assert_eq(pindex, gindex)
-    assert gindex.dtype == dtype
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize("name", [1, "a", None])
-@pytest.mark.parametrize("dtype", UNSIGNED_TYPES)
-def test_unsigned_integer_index_apis(data, name, dtype):
-    pindex = pd.Index(data, dtype=dtype, name=name)
-    gindex = cudf.Index(data, dtype=dtype, name=name)
-
-    assert_eq(pindex, gindex)
-    assert gindex.dtype == dtype
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize("name", [1, "a", None])
-@pytest.mark.parametrize("dtype", FLOAT_TYPES)
-def test_float_index_apis(data, name, dtype):
-    pindex = pd.Index(data, dtype=dtype, name=name)
-    gindex = cudf.Index(data, dtype=dtype, name=name)
-
-    assert_eq(pindex, gindex)
-    assert gindex.dtype == dtype
-
-
 @pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
 @pytest.mark.parametrize("categories", [[1, 2], None])
 @pytest.mark.parametrize(
@@ -1716,12 +1610,11 @@ def test_index_set_names(idx, names, inplace):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("idx", [pd.Index([1, 2, 3], name="abc")])
 @pytest.mark.parametrize("level", [1, [0], "abc"])
 @pytest.mark.parametrize("names", [None, "a"])
-def test_index_set_names_error(idx, level, names):
-    pi = idx.copy()
-    gi = cudf.from_pandas(idx)
+def test_index_set_names_error(level, names):
+    pi = pd.Index([1, 2, 3], name="abc")
+    gi = cudf.from_pandas(pi)
 
     assert_exceptions_equal(
         lfunc=pi.set_names,
@@ -1732,13 +1625,12 @@ def test_index_set_names_error(idx, level, names):
 
 
 @pytest.mark.parametrize(
-    "idx",
-    [pd.Index([1, 3, 6]), pd.Index([6, 1, 3])],  # monotonic  # non-monotonic
+    "data", [[1, 3, 6], [6, 1, 3]], ids=["monotonic", "non-monotonic"]
 )
-@pytest.mark.parametrize("key", [list(range(0, 8))])
 @pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
-def test_get_indexer_single_unique_numeric(idx, key, method):
-    pi = idx
+def test_get_indexer_single_unique_numeric(data, method):
+    key = list(range(0, 8))
+    pi = pd.Index(data)
     gi = cudf.from_pandas(pi)
 
     if (
@@ -1763,22 +1655,19 @@ def test_get_indexer_single_unique_numeric(idx, key, method):
 
 
 @pytest.mark.parametrize(
-    "idx",
-    [pd.RangeIndex(3, 100, 4)],
-)
-@pytest.mark.parametrize(
-    "key",
+    "rng",
     [
-        list(range(1, 20, 3)),
-        list(range(20, 35, 3)),
-        list(range(35, 77, 3)),
-        list(range(77, 110, 3)),
+        range(1, 20, 3),
+        range(20, 35, 3),
+        range(35, 77, 3),
+        range(77, 110, 3),
     ],
 )
 @pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
 @pytest.mark.parametrize("tolerance", [None, 0, 1, 13, 20])
-def test_get_indexer_rangeindex(idx, key, method, tolerance):
-    pi = idx
+def test_get_indexer_rangeindex(rng, method, tolerance):
+    key = list(rng)
+    pi = pd.RangeIndex(3, 100, 4)
     gi = cudf.from_pandas(pi)
 
     expected = pi.get_indexer(
@@ -1797,13 +1686,9 @@ def test_get_indexer_rangeindex(idx, key, method, tolerance):
     assert_eq(expected, got, check_dtype=True)
 
 
-@pytest.mark.parametrize(
-    "idx",
-    [pd.RangeIndex(3, 100, 4)],
-)
 @pytest.mark.parametrize("key", list(range(1, 110, 3)))
-def test_get_loc_rangeindex(idx, key):
-    pi = idx
+def test_get_loc_rangeindex(key):
+    pi = pd.RangeIndex(3, 100, 4)
     gi = cudf.from_pandas(pi)
     if (
         (key not in pi)
@@ -1828,14 +1713,15 @@ def test_get_loc_rangeindex(idx, key):
 @pytest.mark.parametrize(
     "idx",
     [
-        pd.Index([1, 3, 3, 6]),  # monotonic increasing
-        pd.Index([6, 1, 3, 3]),  # non-monotonic
-        pd.Index([4, 3, 2, 1, 0]),  # monotonic decreasing
+        [1, 3, 3, 6],
+        [6, 1, 3, 3],
+        [4, 3, 2, 1, 0],
     ],
+    ids=["monotonic increasing", "non-monotonic", "monotonic decreasing"],
 )
 @pytest.mark.parametrize("key", [0, 3, 6, 7, 4])
 def test_get_loc_duplicate_numeric(idx, key):
-    pi = idx
+    pi = pd.Index(idx)
     gi = cudf.from_pandas(pi)
 
     if key not in pi:
@@ -1855,15 +1741,16 @@ def test_get_loc_duplicate_numeric(idx, key):
 @pytest.mark.parametrize(
     "idx",
     [
-        pd.Index([-1, 2, 3, 6]),  # monotonic
-        pd.Index([6, 1, 3, 4]),  # non-monotonic
+        [-1, 2, 3, 6],
+        [6, 1, 3, 4],
     ],
+    ids=["monotonic", "non-monotonic"],
 )
 @pytest.mark.parametrize("key", [[0, 3, 1], [6, 7]])
 @pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
 @pytest.mark.parametrize("tolerance", [None, 1, 2])
 def test_get_indexer_single_duplicate_numeric(idx, key, method, tolerance):
-    pi = idx
+    pi = pd.Index(idx)
     gi = cudf.from_pandas(pi)
 
     if not pi.is_monotonic_increasing and method is not None:
@@ -1884,12 +1771,10 @@ def test_get_indexer_single_duplicate_numeric(idx, key, method, tolerance):
         assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "idx", [pd.Index(["b", "f", "m", "q"]), pd.Index(["m", "f", "b", "q"])]
-)
+@pytest.mark.parametrize("idx", [["b", "f", "m", "q"], ["m", "f", "b", "q"]])
 @pytest.mark.parametrize("key", ["a", "f", "n", "z"])
 def test_get_loc_single_unique_string(idx, key):
-    pi = idx
+    pi = pd.Index(idx)
     gi = cudf.from_pandas(pi)
 
     if key not in pi:
@@ -1906,13 +1791,11 @@ def test_get_loc_single_unique_string(idx, key):
         assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "idx", [pd.Index(["b", "f", "m", "q"]), pd.Index(["m", "f", "b", "q"])]
-)
+@pytest.mark.parametrize("idx", [["b", "f", "m", "q"], ["m", "f", "b", "q"]])
 @pytest.mark.parametrize("key", [["a", "f", "n", "z"], ["p", "p", "b"]])
 @pytest.mark.parametrize("method", [None, "ffill", "bfill"])
 def test_get_indexer_single_unique_string(idx, key, method):
-    pi = idx
+    pi = pd.Index(idx)
     gi = cudf.from_pandas(pi)
 
     if not pi.is_monotonic_increasing and method is not None:
@@ -1929,12 +1812,10 @@ def test_get_indexer_single_unique_string(idx, key, method):
         assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "idx", [pd.Index(["b", "m", "m", "q"]), pd.Index(["m", "f", "m", "q"])]
-)
+@pytest.mark.parametrize("idx", [["b", "m", "m", "q"], ["m", "f", "m", "q"]])
 @pytest.mark.parametrize("key", ["a", "f", "n", "z"])
 def test_get_loc_single_duplicate_string(idx, key):
-    pi = idx
+    pi = pd.Index(idx)
     gi = cudf.from_pandas(pi)
 
     if key not in pi:
@@ -1951,13 +1832,11 @@ def test_get_loc_single_duplicate_string(idx, key):
         assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "idx", [pd.Index(["b", "m", "m", "q"]), pd.Index(["a", "f", "m", "q"])]
-)
+@pytest.mark.parametrize("idx", [["b", "m", "m", "q"], ["a", "f", "m", "q"]])
 @pytest.mark.parametrize("key", [["a"], ["f", "n", "z"]])
 @pytest.mark.parametrize("method", [None, "ffill", "bfill"])
 def test_get_indexer_single_duplicate_string(idx, key, method):
-    pi = idx
+    pi = pd.Index(idx)
     gi = cudf.from_pandas(pi)
 
     if (
@@ -1984,21 +1863,16 @@ def test_get_indexer_single_duplicate_string(idx, key, method):
 
 
 @pytest.mark.parametrize(
-    "idx",
+    "data",
     [
-        pd.MultiIndex.from_tuples(
-            [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)]
-        ),
-        pd.MultiIndex.from_tuples(
-            [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)]
-        ),
-        pd.MultiIndex.from_tuples(
-            [(1, 1, 1), (1, 1, 2), (1, 1, 2), (1, 2, 3), (2, 1, 1), (2, 2, 1)]
-        ),
+        [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)],
+        [(1, 1, 1), (1, 1, 2), (1, 1, 2), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
     ],
 )
 @pytest.mark.parametrize("key", [1, (1, 2), (1, 2, 3), (2, 1, 1), (9, 9, 9)])
-def test_get_loc_multi_numeric(idx, key):
+def test_get_loc_multi_numeric(data, key):
+    idx = pd.MultiIndex.from_tuples(data)
     pi = idx.sort_values()
     gi = cudf.from_pandas(pi)
 
@@ -2017,22 +1891,17 @@ def test_get_loc_multi_numeric(idx, key):
 
 
 @pytest.mark.parametrize(
-    "idx",
+    "data",
     [
-        pd.MultiIndex.from_tuples(
-            [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)]
-        ),
-        pd.MultiIndex.from_tuples(
-            [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)]
-        ),
-        pd.MultiIndex.from_tuples(
-            [(1, 1, 1), (1, 1, 2), (1, 1, 24), (1, 2, 3), (2, 1, 1), (2, 2, 1)]
-        ),
+        [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)],
+        [(1, 1, 1), (1, 1, 2), (1, 1, 24), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
     ],
 )
 @pytest.mark.parametrize("key", [[(1, 2, 3)], [(9, 9, 9)]])
 @pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_multi_numeric(idx, key, method):
+def test_get_indexer_multi_numeric(data, key, method):
+    idx = pd.MultiIndex.from_tuples(data)
     pi = idx.sort_values()
     gi = cudf.from_pandas(pi)
 
@@ -2047,14 +1916,6 @@ def test_get_indexer_multi_numeric(idx, key, method):
     assert_eq(expected, got, check_dtype=True)
 
 
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.MultiIndex.from_tuples(
-            [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 1), (1, 1, 1), (2, 2, 1)]
-        )
-    ],
-)
 @pytest.mark.parametrize(
     "key, result",
     [
@@ -2065,8 +1926,10 @@ def test_get_indexer_multi_numeric(idx, key, method):
         ((9, 9, 9), None),
     ],
 )
-def test_get_loc_multi_numeric_deviate(idx, key, result):
-    pi = idx
+def test_get_loc_multi_numeric_deviate(key, result):
+    pi = pd.MultiIndex.from_tuples(
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 1), (1, 1, 1), (2, 2, 1)]
+    )
     gi = cudf.from_pandas(pi)
 
     with expect_warning_if(
@@ -2138,64 +2001,55 @@ def test_get_indexer_multi_error(method):
 
 
 @pytest.mark.parametrize(
-    "idx",
+    "data",
     [
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "a"),
-                ("a", "a", "b"),
-                ("a", "b", "a"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "b"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("a", "a", "a"),
-                ("a", "b", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "a"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("a", "a", "b"),
-                ("a", "b", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "a"),
-                ("a", "a", "b"),
-                ("a", "a", "b"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "b"),
-                ("b", "a", "a"),
-                ("b", "a", "a"),
-                ("a", "a", "a"),
-                ("a", "b", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
+        [
+            ("a", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "b"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "a"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "a", "b"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "b"),
+            ("b", "a", "a"),
+            ("b", "a", "a"),
+            ("a", "a", "a"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
     ],
 )
 @pytest.mark.parametrize(
     "key", ["a", ("a", "a"), ("a", "b", "c"), ("b", "c", "a"), ("z", "z", "z")]
 )
-def test_get_loc_multi_string(idx, key):
+def test_get_loc_multi_string(data, key):
+    idx = pd.MultiIndex.from_tuples(data)
     pi = idx.sort_values()
     gi = cudf.from_pandas(pi)
 
@@ -2214,45 +2068,40 @@ def test_get_loc_multi_string(idx, key):
 
 
 @pytest.mark.parametrize(
-    "idx",
+    "data",
     [
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "a"),
-                ("a", "a", "b"),
-                ("a", "b", "a"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "b"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("a", "a", "a"),
-                ("a", "b", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-        pd.MultiIndex.from_tuples(
-            [
-                ("a", "a", "a"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("a", "a", "b"),
-                ("a", "b", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
+        [
+            ("a", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "b"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "a"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
     ],
 )
 @pytest.mark.parametrize(
     "key", [[("a", "b", "c"), ("b", "c", "a")], [("z", "z", "z")]]
 )
 @pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_multi_string(idx, key, method):
+def test_get_indexer_multi_string(data, key, method):
+    idx = pd.MultiIndex.from_tuples(data)
     pi = idx.sort_values()
     gi = cudf.from_pandas(pi)
 
@@ -2672,8 +2521,9 @@ def test_isin_multiindex(data, values, level, err):
         )
 
 
-@pytest.fixture(
-    params=[
+@pytest.mark.parametrize(
+    "rangeindex",
+    [
         range(np.random.default_rng(seed=0).integers(0, 100)),
         range(9, 12, 2),
         range(20, 30),
@@ -2681,19 +2531,14 @@ def test_isin_multiindex(data, values, level, err):
         range(0, 10, -2),
         range(0, -10, 2),
         range(0, -10, -2),
-    ]
+    ],
 )
-def rangeindex(request):
-    """Create a cudf RangeIndex of different `nrows`"""
-    return cudf.RangeIndex(request.param)
-
-
 @pytest.mark.parametrize(
     "func",
     ["nunique", "min", "max", "any", "values"],
 )
 def test_rangeindex_methods(rangeindex, func):
-    gidx = rangeindex
+    gidx = cudf.RangeIndex(rangeindex)
     pidx = gidx.to_pandas()
 
     if func == "values":
@@ -2856,8 +2701,9 @@ def test_rangeindex_append_return_rangeindex():
     assert_eq(result, expected)
 
 
-@pytest.fixture(
-    params=[
+@pytest.mark.parametrize(
+    "index",
+    [
         range(np.random.default_rng(seed=0).integers(0, 100)),
         range(0, 10, -2),
         range(0, -10, 2),
@@ -2868,13 +2714,8 @@ def test_rangeindex_append_return_rangeindex():
         [None, "a", "3.2", "z", None, None],
         pd.Series(["a", "b", None], dtype="category"),
         np.array([1, 2, 3, None], dtype="datetime64[s]"),
-    ]
+    ],
 )
-def index(request):
-    """Create a cudf Index of different dtypes"""
-    return cudf.Index(request.param)
-
-
 @pytest.mark.parametrize(
     "func",
     [
@@ -2885,7 +2726,7 @@ def index(request):
     ],
 )
 def test_index_methods(index, func):
-    gidx = index
+    gidx = cudf.Index(index)
     pidx = gidx.to_pandas()
 
     if func == "append":
@@ -3032,23 +2873,18 @@ def test_index_to_pandas_nullable(data, expected_dtype):
     assert_eq(pi, expected)
 
 
-class TestIndexScalarGetItem:
-    @pytest.fixture(
-        params=[range(1, 10, 2), [1, 2, 3], ["a", "b", "c"], [1.5, 2.5, 3.5]]
-    )
-    def index_values(self, request):
-        return request.param
-
-    @pytest.fixture(params=[int, np.int8, np.int32, np.int64])
-    def i(self, request):
-        return request.param(1)
-
-    def test_scalar_getitem(self, index_values, i):
-        index = cudf.Index(index_values)
+@pytest.mark.parametrize(
+    "index_values",
+    [range(1, 10, 2), [1, 2, 3], ["a", "b", "c"], [1.5, 2.5, 3.5]],
+)
+@pytest.mark.parametrize("i_type", [int, np.int8, np.int32, np.int64])
+def test_scalar_getitem(index_values, i_type):
+    i = i_type(1)
+    index = cudf.Index(index_values)
 
-        assert not isinstance(index[i], cudf.Index)
-        assert index[i] == index_values[i]
-        assert_eq(index, index.to_pandas())
+    assert not isinstance(index[i], cudf.Index)
+    assert index[i] == index_values[i]
+    assert_eq(index, index.to_pandas())
 
 
 @pytest.mark.parametrize(
@@ -3197,14 +3033,15 @@ def test_from_pandas_rangeindex_return_rangeindex():
 
 
 @pytest.mark.parametrize(
-    "idx",
+    "data",
     [
-        cudf.RangeIndex(1),
-        cudf.DatetimeIndex(np.array([1, 2], dtype="datetime64[ns]")),
-        cudf.TimedeltaIndex(np.array([1, 2], dtype="timedelta64[ns]")),
+        range(1),
+        np.array([1, 2], dtype="datetime64[ns]"),
+        np.array([1, 2], dtype="timedelta64[ns]"),
     ],
 )
-def test_index_to_pandas_nullable_notimplemented(idx):
+def test_index_to_pandas_nullable_notimplemented(data):
+    idx = cudf.Index(data)
     with pytest.raises(NotImplementedError):
         idx.to_pandas(nullable=True)
 
@@ -3340,12 +3177,13 @@ def test_index_datetime_repeat():
 @pytest.mark.parametrize(
     "index",
     [
-        cudf.Index([1]),
-        cudf.RangeIndex(1),
-        cudf.MultiIndex(levels=[[0]], codes=[[0]]),
+        lambda: cudf.Index([1]),
+        lambda: cudf.RangeIndex(1),
+        lambda: cudf.MultiIndex(levels=[[0]], codes=[[0]]),
     ],
 )
 def test_index_assignment_no_shallow_copy(index):
+    index = index()
     df = cudf.DataFrame(range(1))
     df.index = index
     assert df.index is index

From ff8c4b9927db5845b27ba1b7330340943a26068d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 11:21:00 -0700
Subject: [PATCH 058/366] Clean testing/_utils.py (#19506)

Working to have less places where we define "testing utilities" e.g. `conftest.py` vs `testing/*.py`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19506
---
 python/cudf/cudf/testing/_utils.py            | 72 -------------------
 .../cudf/tests/test_cuda_array_interface.py   |  2 +-
 python/cudf/cudf/tests/test_dataframe.py      |  3 +-
 python/cudf/cudf/tests/test_dlpack.py         |  2 +-
 python/cudf/cudf/tests/test_udf_masked_ops.py | 46 +++++++++++-
 .../test_disable_pandas_accelerator.py        | 28 ++++----
 .../dask_cudf/tests/test_accessor.py          |  3 +-
 7 files changed, 62 insertions(+), 94 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index fa0bf52279e..41401ab4bde 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -3,7 +3,6 @@
 
 import itertools
 import string
-import time
 from collections import abc
 from contextlib import contextmanager
 from decimal import Decimal
@@ -12,23 +11,11 @@
 import numpy as np
 import pandas as pd
 import pytest
-from numba.core.typing import signature as nb_signature
-from numba.core.typing.templates import AbstractTemplate
-from numba.cuda.cudadecl import registry as cuda_decl_registry
-from numba.cuda.cudaimpl import lower as cuda_lower
 
 import pylibcudf as plc
 
 import cudf
 from cudf.core.column.column import as_column
-from cudf.core.udf.strings_lowering import (
-    cast_string_view_to_managed_udf_string,
-)
-from cudf.core.udf.strings_typing import (
-    StringView,
-    managed_udf_string,
-    string_view,
-)
 from cudf.utils import dtypes as dtypeutils
 from cudf.utils.temporal import unit_to_nanoseconds_conversion
 
@@ -291,11 +278,6 @@ def _decimal_series(input, dtype):
     )
 
 
-@contextmanager
-def does_not_raise():
-    yield
-
-
 def assert_column_memory_eq(lhs: ColumnBase, rhs: ColumnBase):
     """Assert the memory location and size of `lhs` and `rhs` are equivalent.
 
@@ -391,57 +373,3 @@ def expect_warning_if(condition, warning=FutureWarning, *args, **kwargs):
             yield
     else:
         yield
-
-
-def sv_to_managed_udf_str(sv):
-    """
-    Cast a string_view object to a managed_udf_string object
-
-    This placeholder function never runs in python
-    It exists only for numba to have something to replace
-    with the typing and lowering code below
-
-    This is similar conceptually to needing a translation
-    engine to emit an expression in target language "B" when
-    there is no equivalent in the source language "A" to
-    translate from. This function effectively defines the
-    expression in language "A" and the associated typing
-    and lowering describe the translation process, despite
-    the expression having no meaning in language "A"
-    """
-    pass
-
-
-@cuda_decl_registry.register_global(sv_to_managed_udf_str)
-class StringViewToUDFStringDecl(AbstractTemplate):
-    def generic(self, args, kws):
-        if isinstance(args[0], StringView) and len(args) == 1:
-            return nb_signature(managed_udf_string, string_view)
-
-
-@cuda_lower(sv_to_managed_udf_str, string_view)
-def sv_to_udf_str_testing_lowering(context, builder, sig, args):
-    return cast_string_view_to_managed_udf_string(
-        context, builder, sig.args[0], sig.return_type, args[0]
-    )
-
-
-class cudf_timeout:
-    """
-    Context manager to raise a TimeoutError after a specified number of seconds.
-    """
-
-    def __init__(self, timeout):
-        self.timeout = timeout
-
-    def __enter__(self):
-        self.start_time = time.perf_counter()
-
-    def __exit__(self, *args):
-        elapsed_time = (
-            time.perf_counter() - self.start_time
-        )  # Calculate elapsed time
-        if elapsed_time >= self.timeout:
-            raise TimeoutError(
-                f"Expected to finish in {self.timeout=} seconds but took {elapsed_time=} seconds"
-            )
diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
index 18067d4cf20..e163f62282b 100644
--- a/python/cudf/cudf/tests/test_cuda_array_interface.py
+++ b/python/cudf/cudf/tests/test_cuda_array_interface.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import types
-from contextlib import ExitStack as does_not_raise
+from contextlib import nullcontext as does_not_raise
 
 import cupy
 import numba.cuda
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 2671c0bf0f3..328d6fbca7b 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -13,7 +13,7 @@
 import textwrap
 import warnings
 from collections import OrderedDict, defaultdict, namedtuple
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext as does_not_raise
 from copy import copy
 
 import cupy
@@ -40,7 +40,6 @@
     DATETIME_TYPES,
     NUMERIC_TYPES,
     assert_exceptions_equal,
-    does_not_raise,
     expect_warning_if,
     gen_rand,
 )
diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py
index 37180871086..ffb33870323 100644
--- a/python/cudf/cudf/tests/test_dlpack.py
+++ b/python/cudf/cudf/tests/test_dlpack.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import itertools
-from contextlib import ExitStack as does_not_raise
+from contextlib import nullcontext as does_not_raise
 
 import cupy
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 985766b59c7..958a3657abb 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -5,6 +5,10 @@
 import numpy as np
 import pytest
 from numba import cuda
+from numba.core.typing import signature as nb_signature
+from numba.core.typing.templates import AbstractTemplate
+from numba.cuda.cudadecl import registry as cuda_decl_registry
+from numba.cuda.cudaimpl import lower as cuda_lower
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
@@ -16,15 +20,55 @@
     unary_ops,
 )
 from cudf.core.udf.api import Masked
+from cudf.core.udf.strings_lowering import (
+    cast_string_view_to_managed_udf_string,
+)
+from cudf.core.udf.strings_typing import (
+    StringView,
+    managed_udf_string,
+    string_view,
+)
 from cudf.core.udf.utils import precompiled
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
     _decimal_series,
     parametrize_numeric_dtypes_pairwise,
-    sv_to_managed_udf_str,
 )
 
 
+def sv_to_managed_udf_str(sv):
+    """
+    Cast a string_view object to a managed_udf_string object
+
+    This placeholder function never runs in python
+    It exists only for numba to have something to replace
+    with the typing and lowering code below
+
+    This is similar conceptually to needing a translation
+    engine to emit an expression in target language "B" when
+    there is no equivalent in the source language "A" to
+    translate from. This function effectively defines the
+    expression in language "A" and the associated typing
+    and lowering describe the translation process, despite
+    the expression having no meaning in language "A"
+    """
+    pass
+
+
+@cuda_decl_registry.register_global(sv_to_managed_udf_str)
+class StringViewToUDFStringDecl(AbstractTemplate):
+    def generic(self, args, kws):
+        if isinstance(args[0], StringView) and len(args) == 1:
+            return nb_signature(managed_udf_string, string_view)
+
+
+@cuda_lower(sv_to_managed_udf_str, string_view)
+def sv_to_udf_str_testing_lowering(context, builder, sig, args):
+    return cast_string_view_to_managed_udf_string(
+        context, builder, sig.args[0], sig.return_type, args[0]
+    )
+
+
 @pytest.fixture(scope="module")
 def str_udf_data():
     return cudf.DataFrame(
diff --git a/python/cudf/cudf_pandas_tests/test_disable_pandas_accelerator.py b/python/cudf/cudf_pandas_tests/test_disable_pandas_accelerator.py
index 7110da677f9..b4c2b3aff25 100644
--- a/python/cudf/cudf_pandas_tests/test_disable_pandas_accelerator.py
+++ b/python/cudf/cudf_pandas_tests/test_disable_pandas_accelerator.py
@@ -2,30 +2,26 @@
 
 import os
 import subprocess
+import sys
 
 import pytest
 
-from cudf.testing import _utils as utils
-
 
 @pytest.mark.flaky(reruns=3, reruns_delay=30)
 def test_disable_pandas_accelerator_multi_threaded():
     data_directory = os.path.dirname(os.path.abspath(__file__))
-    # Create a copy of the current environment variables
-    env = os.environ.copy()
 
-    with utils.cudf_timeout(20):
-        sp_completed = subprocess.run(
-            [
-                "python",
-                "-m",
-                "cudf.pandas",
-                data_directory + "/data/disable_cudf_pandas_multi_thread.py",
-            ],
-            capture_output=True,
-            text=True,
-            env=env,
-        )
+    sp_completed = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "cudf.pandas",
+            data_directory + "/data/disable_cudf_pandas_multi_thread.py",
+        ],
+        capture_output=True,
+        text=True,
+        timeout=20,
+    )
     assert sp_completed.returncode == 0
     output = sp_completed.stdout
 
diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py
index 8d69940ab84..13407f5d56f 100644
--- a/python/dask_cudf/dask_cudf/tests/test_accessor.py
+++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
+from contextlib import nullcontext as does_not_raise
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -10,7 +12,6 @@
 
 from cudf import DataFrame, Series, date_range
 from cudf.testing import assert_eq
-from cudf.testing._utils import does_not_raise
 
 import dask_cudf
 

From 4d2cd545e66bb56ce7460d71bdbae76db9a38047 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 11:24:27 -0700
Subject: [PATCH 059/366] Use more pytest fixtures and avoid GPU
 parameterization in test_query/rank/reduction/repr.py (#19434)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Uses more context managers so state doesn't get altered if a test fails
* `python/cudf/cudf/tests/test_query_mask.py` contained essentially the same test multiple times, so it was moved to `test_query.py`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19434
---
 python/cudf/cudf/tests/test_query.py      | 105 ++++----
 python/cudf/cudf/tests/test_query_mask.py |  71 ------
 python/cudf/cudf/tests/test_rank.py       |  29 +--
 python/cudf/cudf/tests/test_reductions.py |  22 +-
 python/cudf/cudf/tests/test_repr.py       | 279 ++++++++++++----------
 5 files changed, 224 insertions(+), 282 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/test_query_mask.py

diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py
index fc30132458e..ddb8a7ffd37 100644
--- a/python/cudf/cudf/tests/test_query.py
+++ b/python/cudf/cudf/tests/test_query.py
@@ -3,7 +3,6 @@
 
 import datetime
 import inspect
-from itertools import product
 
 import numpy as np
 import pandas as pd
@@ -14,13 +13,15 @@
 from cudf.testing import assert_eq
 from cudf.utils import queryutils
 
-_params_query_parser = []
-_params_query_parser.append(("a > @b", ("a", "__CUDF_ENVREF__b")))
-_params_query_parser.append(("(a + b) <= @c", ("a", "b", "__CUDF_ENVREF__c")))
-_params_query_parser.append(("a > b if a > 0 else b > a", ("a", "b")))
 
-
-@pytest.mark.parametrize("text,expect_args", _params_query_parser)
+@pytest.mark.parametrize(
+    "text,expect_args",
+    [
+        ("a > @b", ("a", "__CUDF_ENVREF__b")),
+        ("(a + b) <= @c", ("a", "b", "__CUDF_ENVREF__c")),
+        ("a > b if a > 0 else b > a", ("a", "b")),
+    ],
+)
 def test_query_parser(text, expect_args):
     info = queryutils.query_parser(text)
     fn = queryutils.query_builder(info, "myfoo")
@@ -29,21 +30,18 @@ def test_query_parser(text, expect_args):
     assert tuple(argspec.args) == tuple(expect_args)
 
 
-params_query_data = list(product([1, 2, 7, 8, 9, 16, 100, 129], range(2)))
-params_query_fn = [
-    (lambda a, b: a < b, "a < b"),
-    (lambda a, b: a * 2 >= b, "a * 2 >= b"),
-    (lambda a, b: 2 * (a + b) > (a + b) / 2, "2 * (a + b) > (a + b) / 2"),
-]
-nulls = [True, False]
-
-
+@pytest.mark.parametrize("nelem", [1, 10])
 @pytest.mark.parametrize(
-    "data,fn,nulls", product(params_query_data, params_query_fn, nulls)
+    "fn",
+    [
+        (lambda a, b: a < b, "a < b"),
+        (lambda a, b: a * 2 >= b, "a * 2 >= b"),
+        (lambda a, b: 2 * (a + b) > (a + b) / 2, "2 * (a + b) > (a + b) / 2"),
+    ],
 )
-def test_query(data, fn, nulls):
+@pytest.mark.parametrize("nulls", [True, False])
+def test_query(nelem, fn, nulls):
     # prepare
-    nelem, seed = data
     expect_fn, query_expr = fn
     rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame()
@@ -55,21 +53,19 @@ def test_query(data, fn, nulls):
     assert_eq(pdf.query(query_expr), gdf.query(query_expr))
 
 
-params_query_env_fn = [
-    (lambda a, b, c, d: a * c > b + d, "a * @c > b + @d"),
-    (
-        lambda a, b, c, d: ((a / c) < d) | ((b**c) > d),
-        "((a / @c) < @d) | ((b ** @c) > @d)",
-    ),
-]
-
-
+@pytest.mark.parametrize("nelem", [1, 10])
 @pytest.mark.parametrize(
-    "data,fn", product(params_query_data, params_query_env_fn)
+    "fn",
+    [
+        (lambda a, b, c, d: a * c > b + d, "a * @c > b + @d"),
+        (
+            lambda a, b, c, d: ((a / c) < d) | ((b**c) > d),
+            "((a / @c) < @d) | ((b ** @c) > @d)",
+        ),
+    ],
 )
-def test_query_ref_env(data, fn):
+def test_query_ref_env(nelem, fn):
     # prepare
-    nelem, seed = data
     expect_fn, query_expr = fn
     rng = np.random.default_rng(seed=0)
     df = DataFrame()
@@ -225,16 +221,9 @@ def test_query_with_index_keyword(query, a_val, b_val, c_val):
     assert_eq(out, expect)
 
 
-@pytest.mark.parametrize(
-    "data, query",
-    [
-        # Only need to test the dtypes that pandas
-        # supports but that we do not
-        (["a", "b", "c"], "data == 'a'"),
-    ],
-)
-def test_query_unsupported_dtypes(data, query):
-    gdf = cudf.DataFrame({"data": data})
+def test_query_unsupported_dtypes():
+    query = "data == 'a'"
+    gdf = cudf.DataFrame({"data": ["a", "b", "c"]})
 
     # make sure the query works in pandas
     pdf = gdf.to_pandas()
@@ -246,3 +235,37 @@ def test_query_unsupported_dtypes(data, query):
     # but fails in cuDF
     with pytest.raises(TypeError):
         gdf.query(query)
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        [0, 1.0, 2.0, None, np.nan, None, 3, 5],
+        [0, 1.0, 2.0, None, 3, np.nan, None, 4],
+        [0, 1.0, 2.0, None, 3, np.nan, None, 4, None, 9],
+    ],
+)
+@pytest.mark.parametrize("nan_as_null", [True, False])
+@pytest.mark.parametrize(
+    "query",
+    [
+        "a == 3",
+        pytest.param(
+            "a != 3",
+            marks=pytest.mark.xfail(reason="incompatible with pandas"),
+        ),
+        "a < 3",
+        "a <= 3",
+        "a < 3",
+        "a >= 3",
+    ],
+)
+def test_query_mask(values, nan_as_null, query):
+    data = {"a": values}
+    pdf = pd.DataFrame(data)
+    gdf = cudf.DataFrame(data, nan_as_null=nan_as_null)
+
+    pdf_q_res = pdf.query(query)
+    gdf_q_res = gdf.query(query)
+
+    assert_eq(pdf_q_res, gdf_q_res)
diff --git a/python/cudf/cudf/tests/test_query_mask.py b/python/cudf/cudf/tests/test_query_mask.py
deleted file mode 100644
index 9372681187d..00000000000
--- a/python/cudf/cudf/tests/test_query_mask.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-_data = [
-    {"a": [0, 1.0, 2.0, None, np.nan, None, 3, 5]},
-    {"a": [0, 1.0, 2.0, None, 3, np.nan, None, 4]},
-    {"a": [0, 1.0, 2.0, None, 3, np.nan, None, 4, None, 9]},
-]
-_queries = [
-    "a == 3",
-    # "a != 3", # incompatible with pandas
-    "a < 3",
-    "a <= 3",
-    "a < 3",
-    "a >= 3",
-]
-
-
-@pytest.mark.parametrize("data", _data)
-@pytest.mark.parametrize("query", _queries)
-def test_mask_0(data, query):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q_res = pdf.query(query)
-    gdf_q_res = gdf.query(query)
-
-    assert_eq(pdf_q_res, gdf_q_res)
-
-
-@pytest.mark.parametrize("data", _data)
-@pytest.mark.parametrize("nan_as_null", [False, True])
-@pytest.mark.parametrize("query", _queries)
-def test_mask_1(data, nan_as_null, query):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame.from_pandas(pdf, nan_as_null=nan_as_null)
-
-    pdf_q_res = pdf.query(query)
-    gdf_q_res = gdf.query(query)
-
-    assert_eq(pdf_q_res, gdf_q_res)
-
-
-@pytest.mark.parametrize("data", _data)
-@pytest.mark.parametrize("query", _queries)
-def test_mask_2(data, query):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame(data)
-
-    pdf_q_res = pdf.query(query)
-    gdf_q_res = gdf.query(query)
-
-    assert_eq(pdf_q_res, gdf_q_res)
-
-
-@pytest.mark.parametrize("data", _data)
-@pytest.mark.parametrize("query", _queries)
-def test_dataframe_initializer(data, query):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame(data)
-
-    pdf_q_res = pdf.query(query)
-    gdf_q_res = gdf.query(query)
-
-    assert_eq(pdf_q_res, gdf_q_res)
diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py
index fdb005d0ba9..07a844333cf 100644
--- a/python/cudf/cudf/tests/test_rank.py
+++ b/python/cudf/cudf/tests/test_rank.py
@@ -1,7 +1,4 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-from itertools import chain, combinations_with_replacement, product
-
 import numpy as np
 import pandas as pd
 import pytest
@@ -38,8 +35,8 @@ def test_rank_all_arguments(
         # not supported by pandas
         return
 
-    pdf = pdf.copy(deep=True)  # for parallel pytest
     if numeric_only:
+        pdf = pdf.copy(deep=True)  # for parallel pytest
         pdf["str"] = np.array(
             ["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"]
         )
@@ -127,25 +124,11 @@ def test_rank_error_arguments(pdf):
 
 
 @pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
-@pytest.mark.parametrize(
-    "elem,dtype",
-    list(
-        product(
-            combinations_with_replacement(
-                [
-                    np.full((3,), np.nan),
-                    100 * np.random.default_rng(seed=0).random(10),
-                    np.full((3,), np.inf),
-                    np.full((3,), -np.inf),
-                ],
-                4,
-            ),
-            [np.int32, np.int64, np.float32, np.float64],
-        )
-    ),
-)
-def test_series_rank_combinations(elem, dtype):
-    aa = np.fromiter(chain.from_iterable(elem), np.float64).astype(dtype)
+@pytest.mark.parametrize("elem1", [np.nan, np.inf, -np.inf, 1.43])
+@pytest.mark.parametrize("elem2", [np.nan, np.inf, -np.inf, 1.43])
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_series_rank_combinations(elem1, elem2, dtype):
+    aa = np.array([elem1, elem2], dtype=np.float64).astype(dtype)
     gdf = DataFrame({"a": aa})
     df = pd.DataFrame({"a": aa})
     ranked_gs = gdf["a"].rank(method="first")
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index a234f6655ba..144d79ca94a 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -1,8 +1,5 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-
 from decimal import Decimal
-from itertools import product
 
 import numpy as np
 import pandas as pd
@@ -17,14 +14,17 @@
 from cudf.testing import _utils as utils, assert_eq
 from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if, gen_rand
 
-params_dtype = NUMERIC_TYPES
 
-params_sizes = [1, 2, 3, 127, 128, 129, 200, 10000]
+@pytest.fixture(params=NUMERIC_TYPES)
+def dtype(request):
+    return request.param
+
 
-params = list(product(params_dtype, params_sizes))
+@pytest.fixture(params=[1, 20])
+def nelem(request):
+    return request.param
 
 
-@pytest.mark.parametrize("dtype,nelem", params)
 def test_sum(dtype, nelem):
     dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
@@ -86,7 +86,6 @@ def test_sum_string():
         Decimal128Dtype(20, 7),
     ],
 )
-@pytest.mark.parametrize("nelem", params_sizes)
 def test_sum_decimal(dtype, nelem):
     data = [str(x) for x in gen_rand("int64", nelem, seed=0) / 100]
 
@@ -96,7 +95,6 @@ def test_sum_decimal(dtype, nelem):
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize("dtype,nelem", params)
 def test_product(dtype, nelem):
     rng = np.random.default_rng(seed=0)
     dtype = cudf.dtype(dtype).type
@@ -162,7 +160,6 @@ def test_product_decimal(dtype):
 accuracy_for_dtype = {np.float64: 6, np.float32: 5}
 
 
-@pytest.mark.parametrize("dtype,nelem", params)
 def test_sum_of_squares(dtype, nelem):
     dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
@@ -228,7 +225,6 @@ def test_sum_of_squares_decimal(dtype):
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize("dtype,nelem", params)
 def test_min(dtype, nelem):
     dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
@@ -274,7 +270,6 @@ def test_min(dtype, nelem):
         Decimal128Dtype(20, 7),
     ],
 )
-@pytest.mark.parametrize("nelem", params_sizes)
 def test_min_decimal(dtype, nelem):
     data = [str(x) for x in gen_rand("int64", nelem) / 100]
 
@@ -284,7 +279,6 @@ def test_min_decimal(dtype, nelem):
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize("dtype,nelem", params)
 def test_max(dtype, nelem):
     dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
@@ -330,7 +324,6 @@ def test_max(dtype, nelem):
         Decimal128Dtype(20, 7),
     ],
 )
-@pytest.mark.parametrize("nelem", params_sizes)
 def test_max_decimal(dtype, nelem):
     data = [str(x) for x in gen_rand("int64", nelem) / 100]
 
@@ -340,7 +333,6 @@ def test_max_decimal(dtype, nelem):
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize("nelem", params_sizes)
 def test_sum_masked(nelem):
     dtype = np.float64
     data = gen_rand(dtype, nelem)
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index d4885c4c9fc..89fa6a0bb78 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -12,17 +12,21 @@
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
-repr_categories = [
-    "uint16",
-    "int64",
-    "float64",
-    "str",
-    "category",
-    "datetime64[ns]",
-]
+
+@pytest.fixture(
+    params=[
+        "uint16",
+        "int64",
+        "float64",
+        "str",
+        "category",
+        "datetime64[ns]",
+    ]
+)
+def dtype(request):
+    return request.param
 
 
-@pytest.mark.parametrize("dtype", repr_categories)
 @pytest.mark.parametrize("nrows", [0, 5, 10])
 def test_null_series(nrows, dtype):
     rng = np.random.default_rng(seed=0)
@@ -40,57 +44,50 @@ def test_null_series(nrows, dtype):
     else:
         ps = sr.to_pandas()
 
-    pd.options.display.max_rows = int(nrows)
-    psrepr = repr(ps).replace("NaN", "<NA>").replace("None", "<NA>")
-    if "UInt" in psrepr:
-        psrepr = psrepr.replace("UInt", "uint")
-    elif "Int" in psrepr:
-        psrepr = psrepr.replace("Int", "int")
-    assert psrepr.split() == repr(sr).split()
-    pd.reset_option("display.max_rows")
-
-
-dtype_categories = [
-    "float32",
-    "float64",
-    "datetime64[ns]",
-    "str",
-    "category",
-]
+    with pd.option_context("display.max_rows", int(nrows)):
+        psrepr = repr(ps).replace("NaN", "<NA>").replace("None", "<NA>")
+        if "UInt" in psrepr:
+            psrepr = psrepr.replace("UInt", "uint")
+        elif "Int" in psrepr:
+            psrepr = psrepr.replace("Int", "int")
+        assert psrepr.split() == repr(sr).split()
 
 
 @pytest.mark.parametrize("ncols", [1, 2, 3, 4, 5, 10])
 def test_null_dataframe(ncols):
+    dtype_categories = [
+        "float32",
+        "float64",
+        "datetime64[ns]",
+        "str",
+        "category",
+    ]
     rng = np.random.default_rng(seed=0)
     size = 20
     gdf = cudf.DataFrame()
-    for idx, dtype in enumerate(dtype_categories):
+    for dtype in dtype_categories:
         sr = cudf.Series(rng.integers(0, 128, size)).astype(dtype)
         sr[rng.choice([False, True], size=size)] = None
         gdf[dtype] = sr
     pdf = gdf.to_pandas()
-    pd.options.display.max_columns = int(ncols)
-    pdf_repr = repr(pdf).replace("NaN", "<NA>").replace("None", "<NA>")
-    assert pdf_repr.split() == repr(gdf).split()
-    pd.reset_option("display.max_columns")
+    with pd.option_context("display.max_columns", int(ncols)):
+        pdf_repr = repr(pdf).replace("NaN", "<NA>").replace("None", "<NA>")
+        assert pdf_repr.split() == repr(gdf).split()
 
 
-@pytest.mark.parametrize("dtype", repr_categories)
 @pytest.mark.parametrize("nrows", [None, 0, 1, 2, 9, 10, 11, 19, 20, 21])
 def test_full_series(nrows, dtype):
     size = 20
     rng = np.random.default_rng(seed=0)
     ps = pd.Series(rng.integers(0, 100, size)).astype(dtype)
     sr = cudf.from_pandas(ps)
-    pd.options.display.max_rows = nrows
-    assert repr(ps) == repr(sr)
-    pd.reset_option("display.max_rows")
+    with pd.option_context("display.max_rows", nrows):
+        assert repr(ps) == repr(sr)
 
 
 @pytest.mark.parametrize("nrows", [5, 10, 15])
 @pytest.mark.parametrize("ncols", [5, 10, 15])
 @pytest.mark.parametrize("size", [20, 21])
-@pytest.mark.parametrize("dtype", repr_categories)
 def test_full_dataframe_20(dtype, size, nrows, ncols):
     rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame(
@@ -195,15 +192,12 @@ def test_MI():
         [0, 1, 2, 3, 0, 1, 2, 3, 0, 1],
         [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
     ]
-    pd.options.display.max_rows = 999
-    pd.options.display.max_columns = 0
-    gdf = gdf.set_index(cudf.MultiIndex(levels=levels, codes=codes))
-    pdf = gdf.to_pandas()
-    assert repr(gdf) == repr(pdf)
-    assert repr(gdf.index) == repr(pdf.index)
-    assert repr(gdf.T) == repr(pdf.T)
-    pd.reset_option("display.max_rows")
-    pd.reset_option("display.max_columns")
+    with pd.option_context("display.max_rows", 999, "display.max_columns", 0):
+        gdf = gdf.set_index(cudf.MultiIndex(levels=levels, codes=codes))
+        pdf = gdf.to_pandas()
+        assert repr(gdf) == repr(pdf)
+        assert repr(gdf.index) == repr(pdf.index)
+        assert repr(gdf.T) == repr(pdf.T)
 
 
 @pytest.mark.parametrize("nrows", [0, 1, 3, 5, 10])
@@ -215,13 +209,12 @@ def test_groupby_MI(nrows, ncols):
     pdf = gdf.to_pandas()
     gdg = gdf.groupby(["a", "b"], sort=True).count()
     pdg = pdf.groupby(["a", "b"], sort=True).count()
-    pd.options.display.max_rows = nrows
-    pd.options.display.max_columns = ncols
-    assert repr(gdg) == repr(pdg)
-    assert repr(gdg.index) == repr(pdg.index)
-    assert repr(gdg.T) == repr(pdg.T)
-    pd.reset_option("display.max_rows")
-    pd.reset_option("display.max_columns")
+    with pd.option_context(
+        "display.max_rows", nrows, "display.max_columns", ncols
+    ):
+        assert repr(gdg) == repr(pdg)
+        assert repr(gdg.index) == repr(pdg.index)
+        assert repr(gdg.T) == repr(pdg.T)
 
 
 @pytest.mark.parametrize("dtype", utils.NUMERIC_TYPES)
@@ -241,23 +234,25 @@ def test_generic_index(length, dtype):
 @pytest.mark.parametrize(
     "gdf",
     [
-        cudf.DataFrame({"a": range(10000)}),
-        cudf.DataFrame({"a": range(10000), "b": range(10000)}),
-        cudf.DataFrame({"a": range(20), "b": range(20)}),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame({"a": range(10000)}),
+        lambda: cudf.DataFrame({"a": range(10000), "b": range(10000)}),
+        lambda: cudf.DataFrame({"a": range(20), "b": range(20)}),
+        lambda: cudf.DataFrame(
             {
                 "a": range(20),
                 "b": range(20),
                 "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
             }
         ),
-        cudf.DataFrame(index=[1, 2, 3]),
-        cudf.DataFrame(index=range(10000)),
-        cudf.DataFrame(columns=["a", "b", "c", "d"]),
-        cudf.DataFrame(columns=["a"], index=range(10000)),
-        cudf.DataFrame(columns=["a", "col2", "...col n"], index=range(10000)),
-        cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(index=[1, 2, 3]),
+        lambda: cudf.DataFrame(index=range(10000)),
+        lambda: cudf.DataFrame(columns=["a", "b", "c", "d"]),
+        lambda: cudf.DataFrame(columns=["a"], index=range(10000)),
+        lambda: cudf.DataFrame(
+            columns=["a", "col2", "...col n"], index=range(10000)
+        ),
+        lambda: cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
+        lambda: cudf.DataFrame(
             columns=["a", "b", "c", "d"],
             index=cudf.Series(range(10000)).astype("str"),
         ),
@@ -277,50 +272,52 @@ def test_generic_index(length, dtype):
 @pytest.mark.parametrize("max_seq_items", [1, 10, 60, 10000, None])
 @pytest.mark.parametrize("max_rows", [1, 10, 60, 10000, None])
 def test_dataframe_sliced(gdf, slice, max_seq_items, max_rows):
-    pd.options.display.max_seq_items = max_seq_items
-    pd.options.display.max_rows = max_rows
-    pdf = gdf.to_pandas()
+    gdf = gdf()
+    with pd.option_context(
+        "display.max_seq_items", max_seq_items, "display.max_rows", max_rows
+    ):
+        pdf = gdf.to_pandas()
 
-    sliced_gdf = gdf[slice]
-    sliced_pdf = pdf[slice]
+        sliced_gdf = gdf[slice]
+        sliced_pdf = pdf[slice]
 
-    expected_repr = repr(sliced_pdf).replace("None", "<NA>")
-    actual_repr = repr(sliced_gdf)
+        expected_repr = repr(sliced_pdf).replace("None", "<NA>")
+        actual_repr = repr(sliced_gdf)
 
-    assert expected_repr == actual_repr
-    pd.reset_option("display.max_rows")
-    pd.reset_option("display.max_seq_items")
+        assert expected_repr == actual_repr
 
 
 @pytest.mark.parametrize(
     "index,expected_repr",
     [
         (
-            cudf.Index([1, 2, 3, None]),
+            lambda: cudf.Index([1, 2, 3, None]),
             "Index([1, 2, 3, <NA>], dtype='int64')",
         ),
         (
-            cudf.Index([None, 2.2, 3.324342, None]),
+            lambda: cudf.Index([None, 2.2, 3.324342, None]),
             "Index([<NA>, 2.2, 3.324342, <NA>], dtype='float64')",
         ),
         (
-            cudf.Index([None, None, None], name="hello"),
+            lambda: cudf.Index([None, None, None], name="hello"),
             "Index([<NA>, <NA>, <NA>], dtype='object', name='hello')",
         ),
         (
-            cudf.Index([None, None, None], dtype="float", name="hello"),
+            lambda: cudf.Index(
+                [None, None, None], dtype="float", name="hello"
+            ),
             "Index([<NA>, <NA>, <NA>], dtype='float64', name='hello')",
         ),
         (
-            cudf.Index([None], dtype="float64", name="hello"),
+            lambda: cudf.Index([None], dtype="float64", name="hello"),
             "Index([<NA>], dtype='float64', name='hello')",
         ),
         (
-            cudf.Index([None], dtype="int8", name="hello"),
+            lambda: cudf.Index([None], dtype="int8", name="hello"),
             "Index([<NA>], dtype='int8', name='hello')",
         ),
         (
-            cudf.Index([None] * 50, dtype="object"),
+            lambda: cudf.Index([None] * 50, dtype="object"),
             "Index([<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>"
             ", <NA>, <NA>,\n       <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
             "<NA>, <NA>, <NA>, <NA>, <NA>,\n       <NA>, <NA>, <NA>, <NA>, "
@@ -329,63 +326,72 @@ def test_dataframe_sliced(gdf, slice, max_seq_items, max_rows):
             "<NA>,\n       <NA>, <NA>],\n      dtype='object')",
         ),
         (
-            cudf.Index([None] * 20, dtype="uint32"),
+            lambda: cudf.Index([None] * 20, dtype="uint32"),
             "Index([<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
             "<NA>,\n       <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
             "<NA>,\n       <NA>, <NA>],\n      dtype='uint32')",
         ),
         (
-            cudf.Index(
+            lambda: cudf.Index(
                 [None, 111, 22, 33, None, 23, 34, 2343, None], dtype="int16"
             ),
             "Index([<NA>, 111, 22, 33, <NA>, 23, 34, 2343, <NA>], "
             "dtype='int16')",
         ),
         (
-            cudf.Index([1, 2, 3, None], dtype="category"),
+            lambda: cudf.Index([1, 2, 3, None], dtype="category"),
             "CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], "
             "ordered=False, dtype='category')",
         ),
         (
-            cudf.Index([None, None], dtype="category"),
+            lambda: cudf.Index([None, None], dtype="category"),
             "CategoricalIndex([<NA>, <NA>], categories=[], ordered=False, "
             "dtype='category')",
         ),
         (
-            cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[ns]")),
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[ns]")
+            ),
             "DatetimeIndex([1970-01-01 00:00:00.000000010, "
             "1970-01-01 00:00:00.000000020,"
             "\n       1970-01-01 00:00:00.000000030, NaT],\n      "
             "dtype='datetime64[ns]')",
         ),
         (
-            cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[s]")),
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[s]")
+            ),
             "DatetimeIndex([1970-01-01 00:00:10, "
             "1970-01-01 00:00:20, 1970-01-01 00:00:30,\n"
             "       NaT],\n      dtype='datetime64[s]')",
         ),
         (
-            cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[us]")),
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[us]")
+            ),
             "DatetimeIndex([1970-01-01 00:00:00.000010, "
             "1970-01-01 00:00:00.000020,\n       "
             "1970-01-01 00:00:00.000030, NaT],\n      "
             "dtype='datetime64[us]')",
         ),
         (
-            cudf.Index(np.array([10, 20, 30, None], dtype="datetime64[ms]")),
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[ms]")
+            ),
             "DatetimeIndex([1970-01-01 00:00:00.010, "
             "1970-01-01 00:00:00.020,\n       "
             "1970-01-01 00:00:00.030, NaT],\n      "
             "dtype='datetime64[ms]')",
         ),
         (
-            cudf.Index(np.array([None] * 10, dtype="datetime64[ms]")),
+            lambda: cudf.Index(np.array([None] * 10, dtype="datetime64[ms]")),
             "DatetimeIndex([NaT, NaT, NaT, NaT, NaT, NaT, NaT, NaT, "
             "NaT, NaT], dtype='datetime64[ms]')",
         ),
     ],
 )
 def test_generic_index_null(index, expected_repr):
+    index = index()
     actual_repr = repr(index)
 
     assert expected_repr == actual_repr
@@ -598,7 +604,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
     "ser, expected_repr",
     [
         (
-            cudf.Series([], dtype="timedelta64[ns]"),
+            lambda: cudf.Series([], dtype="timedelta64[ns]"),
             textwrap.dedent(
                 """
             Series([], dtype: timedelta64[ns])
@@ -606,7 +612,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series([], dtype="timedelta64[ms]"),
+            lambda: cudf.Series([], dtype="timedelta64[ms]"),
             textwrap.dedent(
                 """
             Series([], dtype: timedelta64[ms])
@@ -614,7 +620,9 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series([1000000, 200000, 3000000], dtype="timedelta64[ns]"),
+            lambda: cudf.Series(
+                [1000000, 200000, 3000000], dtype="timedelta64[ns]"
+            ),
             textwrap.dedent(
                 """
             0    0 days 00:00:00.001000
@@ -625,7 +633,9 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series([1000000, 200000, 3000000], dtype="timedelta64[ms]"),
+            lambda: cudf.Series(
+                [1000000, 200000, 3000000], dtype="timedelta64[ms]"
+            ),
             textwrap.dedent(
                 """
             0    0 days 00:16:40
@@ -636,7 +646,9 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series([1000000, 200000, None], dtype="timedelta64[ns]"),
+            lambda: cudf.Series(
+                [1000000, 200000, None], dtype="timedelta64[ns]"
+            ),
             textwrap.dedent(
                 """
             0    0 days 00:00:00.001000000
@@ -647,7 +659,9 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series([1000000, 200000, None], dtype="timedelta64[ms]"),
+            lambda: cudf.Series(
+                [1000000, 200000, None], dtype="timedelta64[ms]"
+            ),
             textwrap.dedent(
                 """
             0    0 days 00:16:40
@@ -658,7 +672,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [None, None, None, None, None], dtype="timedelta64[ns]"
             ),
             textwrap.dedent(
@@ -673,7 +687,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [None, None, None, None, None], dtype="timedelta64[ms]"
             ),
             textwrap.dedent(
@@ -688,7 +702,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [12, 12, 22, 343, 4353534, 435342], dtype="timedelta64[ns]"
             ),
             textwrap.dedent(
@@ -704,7 +718,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [12, 12, 22, 343, 4353534, 435342], dtype="timedelta64[ms]"
             ),
             textwrap.dedent(
@@ -720,7 +734,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
                 dtype="timedelta64[ns]",
             ),
@@ -738,7 +752,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
                 dtype="timedelta64[ms]",
             ),
@@ -756,7 +770,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [
                     13645765432432,
                     134736784,
@@ -782,7 +796,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [
                     13645765432432,
                     134736784,
@@ -808,7 +822,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [
                     13645765432432,
                     134736784,
@@ -835,7 +849,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
         ),
         (
-            cudf.Series(
+            lambda: cudf.Series(
                 [
                     13645765432432,
                     134736784,
@@ -866,7 +880,7 @@ def test_timedelta_series_s_us_repr(data, dtype):
 )
 def test_timedelta_series_ns_ms_repr(ser, expected_repr):
     expected = expected_repr
-    actual = repr(ser)
+    actual = repr(ser())
 
     assert expected.split() == actual.split()
 
@@ -875,7 +889,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr):
     "df,expected_repr",
     [
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [1000000, 200000, 3000000], dtype="timedelta64[s]"
@@ -892,7 +906,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [
@@ -923,7 +937,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [
@@ -954,7 +968,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         [1, 2, 3, 4, 5, 6, 7],
@@ -987,7 +1001,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series(
                         ["a", "f", "q", "e", "w", "e", "t"],
@@ -1022,7 +1036,7 @@ def test_timedelta_series_ns_ms_repr(ser, expected_repr):
     ],
 )
 def test_timedelta_dataframe_repr(df, expected_repr):
-    actual_repr = repr(df)
+    actual_repr = repr(df())
 
     assert actual_repr.split() == expected_repr.split()
 
@@ -1031,20 +1045,22 @@ def test_timedelta_dataframe_repr(df, expected_repr):
     "index, expected_repr",
     [
         (
-            cudf.Index([1000000, 200000, 3000000], dtype="timedelta64[ms]"),
+            lambda: cudf.Index(
+                [1000000, 200000, 3000000], dtype="timedelta64[ms]"
+            ),
             "TimedeltaIndex(['0 days 00:16:40', "
             "'0 days 00:03:20', '0 days 00:50:00'], "
             "dtype='timedelta64[ms]')",
         ),
         (
-            cudf.Index(
+            lambda: cudf.Index(
                 [None, None, None, None, None], dtype="timedelta64[us]"
             ),
             "TimedeltaIndex([NaT, NaT, NaT, NaT, NaT], "
             "dtype='timedelta64[us]')",
         ),
         (
-            cudf.Index(
+            lambda: cudf.Index(
                 [
                     136457654,
                     None,
@@ -1063,7 +1079,7 @@ def test_timedelta_dataframe_repr(df, expected_repr):
             "      dtype='timedelta64[us]')",
         ),
         (
-            cudf.Index(
+            lambda: cudf.Index(
                 [
                     136457654,
                     None,
@@ -1083,7 +1099,7 @@ def test_timedelta_dataframe_repr(df, expected_repr):
     ],
 )
 def test_timedelta_index_repr(index, expected_repr):
-    actual_repr = repr(index)
+    actual_repr = repr(index())
 
     assert actual_repr.split() == expected_repr.split()
 
@@ -1111,18 +1127,17 @@ def test_timedelta_index_repr(index, expected_repr):
 )
 @pytest.mark.parametrize("max_seq_items", [None, 1, 2, 5, 10, 100])
 def test_multiindex_repr(pmi, max_seq_items):
-    pd.set_option("display.max_seq_items", max_seq_items)
-    gmi = cudf.from_pandas(pmi)
+    with pd.option_context("display.max_seq_items", max_seq_items):
+        gmi = cudf.from_pandas(pmi)
 
-    assert repr(gmi) == repr(pmi)
-    pd.reset_option("display.max_seq_items")
+        assert repr(gmi) == repr(pmi)
 
 
 @pytest.mark.parametrize(
     "gdi, expected_repr",
     [
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": [None, 1, 2, 3],
                     "b": ["abc", None, "xyz", None],
@@ -1142,7 +1157,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series([None, np.nan, 2, 3], nan_as_null=False),
                     "b": ["abc", None, "xyz", None],
@@ -1162,7 +1177,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series([None, 1, 2, 3], dtype="datetime64[ns]"),
                     "b": ["abc", None, "xyz", None],
@@ -1182,7 +1197,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": cudf.Series([None, 1, 2, 3], dtype="datetime64[ns]"),
                     "b": ["abc", None, "xyz", None],
@@ -1202,7 +1217,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": ["abc", None, "xyz", None],
                     "b": cudf.Series([None, 1, 2, 3], dtype="timedelta64[ns]"),
@@ -1222,7 +1237,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": ["abc", None, "xyz", None],
                     "b": cudf.Series([None, 1, 2, 3], dtype="timedelta64[ns]"),
@@ -1242,7 +1257,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": [None, None, None, None],
                     "b": cudf.Series(
@@ -1264,7 +1279,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": [1, 2, None, 3, 5],
                     "b": [
@@ -1294,7 +1309,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": [1, 2, None, 3, 5],
                     "b": [
@@ -1324,7 +1339,7 @@ def test_multiindex_repr(pmi, max_seq_items):
             ),
         ),
         (
-            cudf.DataFrame(
+            lambda: cudf.DataFrame(
                 {
                     "a": ["(abc", "2", None, "3", "5"],
                     "b": [
@@ -1356,7 +1371,7 @@ def test_multiindex_repr(pmi, max_seq_items):
     ],
 )
 def test_multiindex_null_repr(gdi, expected_repr):
-    actual_repr = repr(gdi)
+    actual_repr = repr(gdi())
 
     assert actual_repr.split() == expected_repr.split()
 

From e5fd4e638b26f645726c23eba86aa0de1fea1c87 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Tue, 5 Aug 2025 11:56:13 -0700
Subject: [PATCH 060/366] Increase alignment requirement for parquet bloom
 filter to 256 (#19573)

Closes #19539. Related to https://github.com/rapidsai/rmm/issues/2002

This PR increases the alignment requirement for parquet bloom filters from `32` to `256`(== cuda allocation alignment) to avoid allocating invalid sized buffers and/or data corruption with aligned mr adapter. Doing so, doesn't really affect anything as 256 byte aligned addresses are also 32 byte aligned.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19573
---
 cpp/src/io/parquet/bloom_filter_reader.cu     |  10 +++--
 .../io/experimental/hybrid_scan_test.cpp      |   3 +-
 .../parquet/bloom_filter_alignment.parquet    | Bin 0 -> 1805 bytes
 python/cudf/cudf/tests/test_parquet.py        |  41 ++++++++++++++++++
 4 files changed, 49 insertions(+), 5 deletions(-)
 create mode 100644 python/cudf/cudf/tests/data/parquet/bloom_filter_alignment.parquet

diff --git a/cpp/src/io/parquet/bloom_filter_reader.cu b/cpp/src/io/parquet/bloom_filter_reader.cu
index f222365de18..d2d7fcac959 100644
--- a/cpp/src/io/parquet/bloom_filter_reader.cu
+++ b/cpp/src/io/parquet/bloom_filter_reader.cu
@@ -402,10 +402,12 @@ size_t aggregate_reader_metadata::get_bloom_filter_alignment() const
   // Required alignment:
   // https://github.com/NVIDIA/cuCollections/blob/deab5799f3e4226cb8a49acf2199c03b14941ee4/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh#L55-L67
   using policy_type = cuco::arrow_filter_policy<cuda::std::byte, cudf::hashing::detail::XXHash_64>;
-  return alignof(cuco::bloom_filter_ref<cuda::std::byte,
-                                        cuco::extent<std::size_t>,
-                                        cuco::thread_scope_thread,
-                                        policy_type>::filter_block_type);
+  auto constexpr alignment = alignof(cuco::bloom_filter_ref<cuda::std::byte,
+                                                            cuco::extent<std::size_t>,
+                                                            cuco::thread_scope_thread,
+                                                            policy_type>::filter_block_type);
+  static_assert((alignment & (alignment - 1)) == 0, "Alignment must be a power of 2");
+  return std::max<size_t>(alignment, rmm::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 std::vector<rmm::device_buffer> aggregate_reader_metadata::read_bloom_filters(
diff --git a/cpp/tests/io/experimental/hybrid_scan_test.cpp b/cpp/tests/io/experimental/hybrid_scan_test.cpp
index 6ec572d1e49..5cb787f451e 100644
--- a/cpp/tests/io/experimental/hybrid_scan_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_test.cpp
@@ -31,9 +31,10 @@
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
 
+#include <rmm/aligned.hpp>
 #include <rmm/mr/device/aligned_resource_adaptor.hpp>
 
-auto constexpr bloom_filter_alignment = 32;
+auto constexpr bloom_filter_alignment = rmm::CUDA_ALLOCATION_ALIGNMENT;
 
 namespace {
 
diff --git a/python/cudf/cudf/tests/data/parquet/bloom_filter_alignment.parquet b/python/cudf/cudf/tests/data/parquet/bloom_filter_alignment.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..26441e59257a3a79546a061950e0b7fdcb0fad31
GIT binary patch
literal 1805
zcmb`IZ%i9y9LJw4Z7F{WE%a{fEb{QL>9($e0;3wHl-07aQaa`$nl0>l_t2j0_3-Y>
zpT#A(kTKjeOy4N42$%&;G`?Jni;`?=jM)pLaanv}w#W=7rpd@MjWhn9GIS7MEM`qV
z-(H`4?)&>ae}2!!Ljx_Wk$uO&&Ksg^ghdFQXY%J5_%#4UfDteOW}q0b03|>vPzIC(
zR-gi?1Z;pEr~;}12S9x}U>o2BT)=kV3BV2108aw700!!SdY}Pd5j$JNCVY^cjWx57
z##hWNO)kQiLtVH$%)1XP_@cKu<BfgKljDOI<zu%`o$x}BzYO&Ix&grm@0GG=iXM#c
zQ<IG*8L~2{+iLMcg8O?x!c?B45~0#V<p7m~RC=i#q7tRjM<qt3pGq90Ww+l4j=MvT
z&GGPy1;;*l!_ghyA1Ztv&!n6#V}B}+vSGSB%clHMx?FKyNb^a8$9N5kybPBjUDYGH
zGBQF$Jf^5=DVwB=8J0wxRdk#rI+k>-WU(&Egf3wWTJ;pcBdQ|i1g6}`>!!m_MHN*d
zT1`oxs%u!5G!2ZrOw6JX&D75%+;~jVQ<%qr5ON&EDSnhN``UV>VI7lkA;o8tgk$zu
z&$VTwG%5HCN=8u!wj+nEh$O?t+K^4=)5H~Xq5LyrysGk9eS+?*n1VH;j!FW-8AafA
z36hnvSf0SVN_d)bM9F1Eh@Bw1PE^R8@p+*Kk2PIU$*xEPa*%PIWW~DUR;gwWa+q|Z
zj_bH)7z5_B6Ih0BWi{`24of<%j2Bb|ZfMd;Q;j#OXgZDcvXWrJlesLP(^HCc5@vHG
zr^3KALTg3LjbtRqjj&uJttkm6Nz$0BMrESJR#MrOl++BZJ4jfA^aHJasJb>P?Ns(s
z>7Wv%@>Gr0*xEv`0wDez3bwXA^mJ?c|JXr4Ow>9zmDMEUhU(n_`J1lQAAM!#jqb^{
zJGUz?Gz9Br-uPH}`xoZ^p#6qdn0~hF)qvZwwCCpa#Yul`srL-mc<sa06vBH~7HscM
z%Ip@u*qtmo)F0L>wE5Xvmv5E^rsG#bJAJ=BT{XC9HT^s@T)Fu9rG=H{esuTz+(Lfa
zl^wO_na`@v4o-a;{N2fnZ}p3}n=jw{6?v9RU-~vC^_*Vg`IJ2{yYk+fOX1KF%lE5|
z_b+@j_1dM2bD`XiyN<nO&QGrfg99Fm{gmwe^TZ5ma7EjZ(+Yb!EhdIEAw^^!4kXsl
zWN?<LL+dLuq@|nc5nH~gO%j{nB%^KCfBK0;6QGrgxZJKz4|0P&YjbTfVhvn`TQzZ4
ziacK!JQlap*Z@1gX{6^f2Twly-)={qKWywg+@ogYTg)V}VD@zd_kq!qH+#M*ayxD7
zhe;?D?_R(9w<YwTr4Z#2`qAifRa>HO9%~`~cNN@JiD#|SbGO*-bo^H>ILkH;7APeM
zAAW>xH2Npih3MSb1}@^UAg?zJ%M|X$qeRtcy^pr|Tm8-0o5)ES5qGo*?Lu>hAc{NC
Lb(pd^ya@jWHUP16

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index d1e82a552ad..94eb2c794a5 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -4523,6 +4523,47 @@ def test_parquet_bloom_filters(
     )
 
 
+@pytest.fixture(params=["cuda", "pool", "cuda_async"])
+def memory_resource(request):
+    import rmm
+
+    current_mr = rmm.mr.get_current_device_resource()
+
+    kind = request.param
+    if kind == "cuda":
+        mr = rmm.mr.CudaMemoryResource()
+    elif kind == "pool":
+        base = rmm.mr.CudaMemoryResource()
+        free, _ = rmm.mr.available_device_memory()
+        size = int(round(free * 0.5 / 256) * 256)
+        mr = rmm.mr.PoolMemoryResource(base, size, size)
+    elif kind == "cuda_async":
+        mr = rmm.mr.CudaAsyncMemoryResource()
+
+    rmm.mr.set_current_device_resource(mr)
+
+    try:
+        yield mr
+    finally:
+        rmm.mr.set_current_device_resource(current_mr)
+
+
+@pytest.mark.parametrize("columns", [["r_reason_desc"], None])
+def test_parquet_bloom_filters_alignment(datadir, columns, memory_resource):
+    fname = datadir / "bloom_filter_alignment.parquet"
+    filters = [("r_reason_desc", "==", "Did not like the color")]
+
+    # Read expected table using pyarrow
+    expected = pq.read_table(fname, columns=columns, filters=filters)
+
+    # Read with cudf using the memory resource from fixture
+    read = cudf.read_parquet(
+        fname, columns=columns, filters=filters
+    ).to_arrow()
+
+    assert_eq(expected, read)
+
+
 def test_parquet_reader_unsupported_compression(datadir):
     fname = datadir / "hadoop_lz4_compressed.parquet"
 

From f9b180c50cbfd7dab999a4ef60772d7eb11f94a3 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:39:13 -0700
Subject: [PATCH 061/366] Remove cudf/_fuzz_testing directory (#19510)

Discussed offline, `_fuzz_testing` contained a set of scripts that were once used for testing major IO refactors. It appears it hasn't been used in quite some time, and aspirationally we would like to move towards using library like `hypothesis` for continual fuzz testing xref https://github.com/rapidsai/cudf/issues/1612

Marking as non-breaking as this was "private" functionality in the `cudf` namespace

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19510
---
 python/cudf/cudf/_fuzz_testing/__init__.py    |   0
 python/cudf/cudf/_fuzz_testing/avro.py        | 117 --------
 python/cudf/cudf/_fuzz_testing/csv.py         | 203 --------------
 python/cudf/cudf/_fuzz_testing/fuzzer.py      | 114 --------
 python/cudf/cudf/_fuzz_testing/io.py          | 110 --------
 python/cudf/cudf/_fuzz_testing/json.py        | 193 -------------
 python/cudf/cudf/_fuzz_testing/main.py        |  45 ---
 python/cudf/cudf/_fuzz_testing/orc.py         | 199 --------------
 python/cudf/cudf/_fuzz_testing/parquet.py     | 169 ------------
 .../_fuzz_testing/tests/fuzz_test_avro.py     |  38 ---
 .../cudf/_fuzz_testing/tests/fuzz_test_csv.py | 132 ---------
 .../_fuzz_testing/tests/fuzz_test_json.py     |  94 -------
 .../cudf/_fuzz_testing/tests/fuzz_test_orc.py |  98 -------
 .../_fuzz_testing/tests/fuzz_test_parquet.py  | 106 --------
 .../cudf/cudf/_fuzz_testing/tests/readme.md   | 100 -------
 python/cudf/cudf/_fuzz_testing/utils.py       | 257 ------------------
 16 files changed, 1975 deletions(-)
 delete mode 100644 python/cudf/cudf/_fuzz_testing/__init__.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/avro.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/csv.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/fuzzer.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/io.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/json.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/main.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/orc.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/parquet.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_avro.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/tests/fuzz_test_parquet.py
 delete mode 100644 python/cudf/cudf/_fuzz_testing/tests/readme.md
 delete mode 100644 python/cudf/cudf/_fuzz_testing/utils.py

diff --git a/python/cudf/cudf/_fuzz_testing/__init__.py b/python/cudf/cudf/_fuzz_testing/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_fuzz_testing/avro.py b/python/cudf/cudf/_fuzz_testing/avro.py
deleted file mode 100644
index 172193aa672..00000000000
--- a/python/cudf/cudf/_fuzz_testing/avro.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import copy
-import io
-import logging
-import random
-
-import numpy as np
-
-import cudf
-from cudf._fuzz_testing.io import IOFuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    _generate_rand_meta,
-    pandas_to_avro,
-    pyarrow_to_pandas,
-)
-from cudf.testing import dataset_generator as dg
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-class AvroReader(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-        self._df = None
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                - {"category"}
-                # No unsigned support in avro:
-                # https://avro.apache.org/docs/current/spec.html
-                - cudf.utils.dtypes.UNSIGNED_TYPES
-                # TODO: Remove DATETIME_TYPES once
-                # following bug is fixed:
-                # https://github.com/rapidsai/cudf/issues/6482
-                - cudf.utils.dtypes.DATETIME_TYPES
-                # TODO: Remove DURATION_TYPES once
-                # following bug is fixed:
-                # https://github.com/rapidsai/cudf/issues/6604
-                - cudf.utils.dtypes.TIMEDELTA_TYPES
-            )
-            seed = random.randint(0, 2**32 - 1)
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_cols"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-        self._df = df
-        logging.info(f"Shape of DataFrame generated: {table.shape}")
-
-        file_obj = io.BytesIO()
-        pandas_to_avro(df, file_io_obj=file_obj)
-        file_obj.seek(0)
-        buf = file_obj.read()
-        self._current_buffer = copy.copy(buf)
-        return (df, buf)
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            with open(file_name + "_crash.avro", "wb") as crash_dataset:
-                crash_dataset.write(self._current_buffer)
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if values == ALL_POSSIBLE_VALUES:
-                if param == "columns":
-                    col_size = self._rand(len(self._df.columns))
-                    params_dict[param] = list(
-                        np.unique(rng.choice(self._df.columns, col_size))
-                    )
-                elif param in ("skiprows", "num_rows"):
-                    params_dict[param] = rng.choice(
-                        [None, self._rand(len(self._df))]
-                    )
-            else:
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
diff --git a/python/cudf/cudf/_fuzz_testing/csv.py b/python/cudf/cudf/_fuzz_testing/csv.py
deleted file mode 100644
index fa3ed40ce91..00000000000
--- a/python/cudf/cudf/_fuzz_testing/csv.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import logging
-import random
-
-import numpy as np
-
-import cudf
-from cudf._fuzz_testing.io import IOFuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    _generate_rand_meta,
-    pyarrow_to_pandas,
-)
-from cudf.testing import dataset_generator as dg
-from cudf.utils.dtypes import pandas_dtypes_to_np_dtypes
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-class CSVReader(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            seed = random.randint(0, 2**32 - 1)
-            random.seed(seed)
-            dtypes_list = list(cudf.utils.dtypes.ALL_TYPES)
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_columns"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-
-        logging.info(f"Shape of DataFrame generated: {df.shape}")
-        self._current_buffer = df
-        return df.to_csv()
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            self._current_buffer.to_csv(file_name + "_crash.csv")
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if values == ALL_POSSIBLE_VALUES:
-                if param == "usecols":
-                    col_size = self._rand(len(self._df.columns))
-                    col_val = rng.choice(
-                        [
-                            None,
-                            np.unique(rng.choice(self._df.columns, col_size)),
-                        ]
-                    )
-                    params_dict[param] = (
-                        col_val if col_val is None else list(col_val)
-                    )
-                elif param == "dtype":
-                    dtype_val = rng.choice([None, self._df.dtypes.to_dict()])
-                    if dtype_val is not None:
-                        dtype_val = {
-                            col_name: "category"
-                            if isinstance(dtype, cudf.CategoricalDtype)
-                            else pandas_dtypes_to_np_dtypes[dtype]
-                            for col_name, dtype in dtype_val.items()
-                        }
-                    params_dict[param] = dtype_val
-                elif param == "header":
-                    header_val = rng.choice(
-                        ["infer", rng.integers(low=0, high=len(self._df))]
-                    )
-                    params_dict[param] = header_val
-                elif param == "skiprows":
-                    params_dict[param] = rng.integers(
-                        low=0, high=len(self._df)
-                    )
-                elif param == "skipfooter":
-                    params_dict[param] = rng.integers(
-                        low=0, high=len(self._df)
-                    )
-                elif param == "nrows":
-                    nrows_val = rng.choice(
-                        [None, rng.integers(low=0, high=len(self._df))]
-                    )
-                    params_dict[param] = nrows_val
-            else:
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
-
-
-class CSVWriter(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            seed = random.randint(0, 2**32 - 1)
-            random.seed(seed)
-            dtypes_list = list(cudf.utils.dtypes.ALL_TYPES)
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_columns"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-
-        logging.info(f"Shape of DataFrame generated: {df.shape}")
-        self._current_buffer = df
-        return df
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            self._current_buffer.to_csv(file_name + "_crash.csv")
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if values == ALL_POSSIBLE_VALUES:
-                if param == "columns":
-                    col_size = self._rand(len(self._current_buffer.columns))
-                    params_dict[param] = list(
-                        np.unique(
-                            rng.choice(self._current_buffer.columns, col_size)
-                        )
-                    )
-                elif param == "chunksize":
-                    params_dict[param] = rng.choice(
-                        [
-                            None,
-                            rng.integers(
-                                low=1, high=max(1, len(self._current_buffer))
-                            ),
-                        ]
-                    )
-            else:
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
diff --git a/python/cudf/cudf/_fuzz_testing/fuzzer.py b/python/cudf/cudf/_fuzz_testing/fuzzer.py
deleted file mode 100644
index 4b080937a17..00000000000
--- a/python/cudf/cudf/_fuzz_testing/fuzzer.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import datetime
-import json
-import logging
-import os
-import sys
-import traceback
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-class Fuzzer:
-    def __init__(
-        self,
-        target,
-        data_handler_class,
-        dirs=None,
-        crash_reports_dir=None,
-        regression=False,
-        max_rows_size=100_000,
-        max_cols_size=1000,
-        runs=-1,
-        max_string_length=None,
-        params=None,
-        write_data_on_failure=True,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        self._target = target
-        self._dirs = [] if dirs is None else dirs
-        self._crash_dir = crash_reports_dir
-        self._data_handler = data_handler_class(
-            dirs=self._dirs,
-            max_rows=max_rows_size,
-            max_columns=max_cols_size,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-        self._total_executions = 0
-        self._regression = regression
-        self._start_time = None
-        self.runs = runs
-        self.params = params
-        self.write_data_on_failure = write_data_on_failure
-
-    def log_stats(self):
-        end_time = datetime.datetime.now()
-        total_time_taken = end_time - self._start_time
-
-        logging.info(f"Run-Time elapsed (hh:mm:ss.ms) {total_time_taken}")
-
-    def write_crash(self, error):
-        error_file_name = str(datetime.datetime.now())
-        if self._crash_dir:
-            crash_path = os.path.join(
-                self._crash_dir,
-                error_file_name + "_crash.json",
-            )
-            crash_log_path = os.path.join(
-                self._crash_dir,
-                error_file_name + "_crash.log",
-            )
-        else:
-            crash_path = error_file_name + "_crash.json"
-            crash_log_path = error_file_name + "_crash.log"
-
-        with open(crash_path, "w") as f:
-            json.dump(
-                self._data_handler.current_params, f, sort_keys=True, indent=4
-            )
-
-        logging.info(f"Crash params was written to {crash_path}")
-
-        with open(crash_log_path, "w") as f:
-            f.write(str(error))
-        logging.info(f"Crash exception was written to {crash_log_path}")
-
-        if self.write_data_on_failure:
-            self._data_handler.write_data(error_file_name)
-
-    def start(self):
-        while True:
-            logging.info(f"Running test {self._total_executions}")
-            file_name = self._data_handler.generate_input()
-            try:
-                self._start_time = datetime.datetime.now()
-                if self.params is None:
-                    self._target(file_name)
-                else:
-                    self._data_handler.set_rand_params(self.params)
-                    kwargs = self._data_handler._current_params["test_kwargs"]
-                    logging.info(f"Parameters passed: {kwargs!s}")
-                    self._target(file_name, **kwargs)
-            except KeyboardInterrupt:
-                logging.info(
-                    f"Keyboard Interrupt encountered, stopping after "
-                    f"{self.runs} runs."
-                )
-                sys.exit(0)
-            except Exception as e:
-                logging.exception(e)
-                self.write_crash(traceback.format_exc())
-            self.log_stats()
-            if self.runs != -1 and self._total_executions >= self.runs:
-                logging.info(f"Completed {self.runs}, stopping now.")
-                break
-
-            self._total_executions += 1
diff --git a/python/cudf/cudf/_fuzz_testing/io.py b/python/cudf/cudf/_fuzz_testing/io.py
deleted file mode 100644
index a4b8e18d8b4..00000000000
--- a/python/cudf/cudf/_fuzz_testing/io.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import copy
-import json
-import logging
-import os
-import random
-import sys
-
-import numpy as np
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-class IOFuzz:
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-        max_structs_nesting_depth=None,
-        max_struct_null_frequency=None,
-        max_struct_types_at_each_level=None,
-    ):
-        dirs = [] if dirs is None else dirs
-        self._inputs = []
-        self._max_rows = max_rows
-        self._max_columns = max_columns
-        self._max_string_length = max_string_length
-        self._max_lists_length = max_lists_length
-        self._max_lists_nesting_depth = max_lists_nesting_depth
-        self._max_structs_nesting_depth = max_structs_nesting_depth
-        self._max_struct_null_frequency = max_struct_null_frequency
-        self._max_struct_types_at_each_level = max_struct_types_at_each_level
-
-        for i, path in enumerate(dirs):
-            if i == 0 and not os.path.exists(path):
-                raise FileNotFoundError(f"No {path} exists")
-
-            if os.path.isfile(path) and path.endswith("_crash.json"):
-                self._load_params(path)
-            else:
-                for i in os.listdir(path):
-                    file_name = os.path.join(path, i)
-                    if os.path.isfile(file_name) and file_name.endswith(
-                        "_crash.json"
-                    ):
-                        self._load_params(file_name)
-        self._regression = bool(self._inputs)
-        self._idx = 0
-        self._current_params = {}
-        self._current_buffer = None
-
-    def _load_params(self, path):
-        with open(path) as f:
-            params = json.load(f)
-        self._inputs.append(params)
-
-    @staticmethod
-    def _rand(n):
-        return random.randrange(0, n + 1)
-
-    def generate_input(self):
-        raise NotImplementedError("Must be implemented by inherited class")
-
-    @property
-    def current_params(self):
-        return self._current_params
-
-    def get_next_regression_params(self):
-        if self._idx >= len(self._inputs):
-            logging.info(
-                "Reached the end of all crash.json files to run..Exiting.."
-            )
-            sys.exit(0)
-        param = self._inputs[self._idx]
-        dtypes_meta = param["dtypes_meta"]
-        num_rows = param["num_rows"]
-        num_cols = param["num_columns"]
-        seed = param["seed"]
-        random.seed(seed)
-        self._idx += 1
-        self._current_params = copy.copy(param)
-        return dtypes_meta, num_rows, num_cols, seed
-
-    def set_rand_params(self, params):
-        rng = np.random.default_rng(seed=None)
-        params_dict = {
-            param: rng.choice(values) for param, values in params.items()
-        }
-        self._current_params["test_kwargs"] = self.process_kwargs(
-            params_dict=params_dict
-        )
-
-    def process_kwargs(self, params_dict):
-        return {
-            key: bool(value)
-            if isinstance(value, np.bool_)
-            else str(value)
-            if isinstance(value, np.dtype)
-            else value
-            for key, value in params_dict.items()
-        }
diff --git a/python/cudf/cudf/_fuzz_testing/json.py b/python/cudf/cudf/_fuzz_testing/json.py
deleted file mode 100644
index 45d2c8d8cf0..00000000000
--- a/python/cudf/cudf/_fuzz_testing/json.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import logging
-import random
-from collections import abc
-
-import numpy as np
-
-import cudf
-from cudf._fuzz_testing.io import IOFuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    _generate_rand_meta,
-    pyarrow_to_pandas,
-)
-from cudf.testing import dataset_generator as dg
-from cudf.utils.dtypes import pandas_dtypes_to_np_dtypes
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-def _get_dtype_param_value(dtype_val):
-    if dtype_val is not None and isinstance(dtype_val, abc.Mapping):
-        processed_dtypes = {}
-        for col_name, dtype in dtype_val.items():
-            if isinstance(dtype, cudf.CategoricalDtype):
-                processed_dtypes[col_name] = "category"
-            else:
-                processed_dtypes[col_name] = str(
-                    pandas_dtypes_to_np_dtypes.get(dtype, dtype)
-                )
-        return processed_dtypes
-    return dtype_val
-
-
-class JSONReader(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            seed = random.randint(0, 2**32 - 1)
-            random.seed(seed)
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                # https://github.com/pandas-dev/pandas/issues/20599
-                - {"uint64"}
-                # TODO: Remove DATETIME_TYPES after this is fixed:
-                # https://github.com/rapidsai/cudf/issues/6586
-                - set(cudf.utils.dtypes.DATETIME_TYPES)
-            )
-            # TODO: Uncomment following after following
-            # issue is fixed:
-            # https://github.com/rapidsai/cudf/issues/7086
-            # dtypes_list.extend(["list"])
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_columns"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-        self._current_buffer = df
-        logging.info(f"Shape of DataFrame generated: {df.shape}")
-
-        return df.to_json(orient="records", lines=True)
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            self._current_buffer.to_json(
-                file_name + "_crash_json.json", orient="records", lines=True
-            )
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if param == "dtype" and values == ALL_POSSIBLE_VALUES:
-                dtype_val = rng.choice(
-                    [True, self._current_buffer.dtypes.to_dict()]
-                )
-                params_dict[param] = _get_dtype_param_value(dtype_val)
-            else:
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
-
-
-class JSONWriter(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-        )
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            seed = random.randint(0, 2**32 - 1)
-            random.seed(seed)
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                # https://github.com/pandas-dev/pandas/issues/20599
-                - {"uint64"}
-                # TODO: Remove DATETIME_TYPES after this is fixed:
-                # https://github.com/rapidsai/cudf/issues/6586
-                - set(cudf.utils.dtypes.DATETIME_TYPES)
-            )
-            # TODO: Uncomment following after following
-            # issue is fixed:
-            # https://github.com/rapidsai/cudf/issues/7086
-            # dtypes_list.extend(["list"])
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_columns"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-
-        logging.info(f"Shape of DataFrame generated: {df.shape}")
-        self._current_buffer = df
-        return df
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            self._current_buffer.to_json(
-                file_name + "_crash_json.json", lines=True, orient="records"
-            )
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if param == "dtype" and values == ALL_POSSIBLE_VALUES:
-                dtype_val = rng.choice(
-                    [True, self._current_buffer.dtypes.to_dict()]
-                )
-                params_dict[param] = _get_dtype_param_value(dtype_val)
-            else:
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
diff --git a/python/cudf/cudf/_fuzz_testing/main.py b/python/cudf/cudf/_fuzz_testing/main.py
deleted file mode 100644
index 54e49b63e41..00000000000
--- a/python/cudf/cudf/_fuzz_testing/main.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-from cudf._fuzz_testing import fuzzer
-
-
-class PythonFuzz:
-    def __init__(self, func, params=None, data_handle=None, **kwargs):
-        self.function = func
-        self.data_handler_class = data_handle
-        self.fuzz_worker = fuzzer.Fuzzer(
-            target=self.function,
-            data_handler_class=self.data_handler_class,
-            dirs=kwargs.get("dir", None),
-            crash_reports_dir=kwargs.get("crash_reports_dir", None),
-            regression=kwargs.get("regression", False),
-            max_rows_size=kwargs.get("max_rows_size", 100_000),
-            max_cols_size=kwargs.get("max_cols_size", 1000),
-            runs=kwargs.get("runs", -1),
-            max_string_length=kwargs.get("max_string_length", None),
-            params=params,
-            write_data_on_failure=kwargs.get("write_data_on_failure", True),
-            max_lists_length=kwargs.get("max_lists_length", None),
-            max_lists_nesting_depth=kwargs.get(
-                "max_lists_nesting_depth", None
-            ),
-        )
-
-    def __call__(self, *args, **kwargs):
-        self.fuzz_worker.start()
-
-
-# wrap PythonFuzz to allow for deferred calling
-def pythonfuzz(function=None, data_handle=None, params=None, **kwargs):
-    if function:
-        return PythonFuzz(function, params, **kwargs)
-    else:
-
-        def wrapper(function):
-            return PythonFuzz(function, params, data_handle, **kwargs)
-
-        return wrapper
-
-
-if __name__ == "__main__":
-    PythonFuzz(None)
diff --git a/python/cudf/cudf/_fuzz_testing/orc.py b/python/cudf/cudf/_fuzz_testing/orc.py
deleted file mode 100644
index 4d9e4abb09e..00000000000
--- a/python/cudf/cudf/_fuzz_testing/orc.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import copy
-import io
-import logging
-import random
-
-import numpy as np
-import pyarrow as pa
-
-import cudf
-from cudf._fuzz_testing.io import IOFuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    _generate_rand_meta,
-    pyarrow_to_pandas,
-)
-from cudf.testing import dataset_generator as dg
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-class OrcReader(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-        self._df = None
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                - {"category"}
-                # Following dtypes are not supported by orc
-                # https://orc.apache.org/specification/ORCv0/
-                - cudf.utils.dtypes.TIMEDELTA_TYPES
-                - cudf.utils.dtypes.UNSIGNED_TYPES
-                - {"datetime64[ns]"}
-            )
-            seed = random.randint(0, 2**32 - 1)
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_cols"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-        logging.info(f"Shape of DataFrame generated: {table.shape}")
-        self._df = df
-        file_obj = io.BytesIO()
-        pa.orc.write_table(table, file_obj, stripe_size=self._rand(len(df)))
-        file_obj.seek(0)
-        buf = file_obj.read()
-        self._current_buffer = copy.copy(buf)
-        return (df, buf)
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            with open(file_name + "_crash.orc", "wb") as crash_dataset:
-                crash_dataset.write(self._current_buffer)
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if values == ALL_POSSIBLE_VALUES:
-                if param == "columns":
-                    col_size = self._rand(len(self._df.columns))
-                    params_dict[param] = list(
-                        np.unique(rng.choice(self._df.columns, col_size))
-                    )
-                elif param == "stripes":
-                    f = io.BytesIO(self._current_buffer)
-                    orcFile = pa.orc.ORCFile(f)
-                    stripes = list(range(orcFile.nstripes))
-                    params_dict[param] = rng.choice(
-                        [
-                            None,
-                            list(
-                                map(
-                                    int,
-                                    np.unique(
-                                        rng.choice(stripes, orcFile.nstripes)
-                                    ),
-                                )
-                            ),
-                        ]
-                    )
-                elif param == "use_index":
-                    params_dict[param] = rng.choice([True, False])
-                elif param in ("skiprows", "num_rows"):
-                    params_dict[param] = rng.choice(
-                        [None, self._rand(len(self._df))]
-                    )
-            else:
-                if not isinstance(values, list):
-                    raise TypeError("values must be of type list")
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
-
-
-class OrcWriter(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-        self._df = None
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                # TODO: Remove "bool" from below
-                # list after following issue is fixed:
-                # https://github.com/rapidsai/cudf/issues/6763
-                - {"category", "bool"}
-                # Following dtypes are not supported by orc
-                # https://orc.apache.org/specification/ORCv0/
-                - cudf.utils.dtypes.TIMEDELTA_TYPES
-                - cudf.utils.dtypes.UNSIGNED_TYPES
-                # TODO: Remove `DATETIME_TYPES` once
-                # following bug is fixed:
-                # https://github.com/rapidsai/cudf/issues/7355
-                - cudf.utils.dtypes.DATETIME_TYPES
-            )
-            seed = random.randint(0, 2**32 - 1)
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_cols"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-        logging.info(f"Shape of DataFrame generated: {table.shape}")
-        self._df = df
-        return df
-
-    def write_data(self, file_name):
-        # Due to the lack of really fast reference writer we are dumping
-        # the dataframe to a parquet file
-        if self._df is not None:
-            self._df.to_parquet(file_name + "_crash.parquet")
diff --git a/python/cudf/cudf/_fuzz_testing/parquet.py b/python/cudf/cudf/_fuzz_testing/parquet.py
deleted file mode 100644
index bd3df1b0847..00000000000
--- a/python/cudf/cudf/_fuzz_testing/parquet.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import logging
-import random
-
-import numpy as np
-
-import cudf
-from cudf._fuzz_testing.io import IOFuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    _generate_rand_meta,
-    pyarrow_to_pandas,
-)
-from cudf.testing import dataset_generator as dg
-
-logging.basicConfig(
-    format="%(asctime)s %(levelname)-8s %(message)s",
-    level=logging.INFO,
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-
-class ParquetReader(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-        self._df = None
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                - {"category", "datetime64[ns]"}
-                - cudf.utils.dtypes.TIMEDELTA_TYPES
-                # TODO: Remove uint32 below after this bug is fixed
-                # https://github.com/pandas-dev/pandas/issues/37327
-                - {"uint32"}
-                | {"list", "decimal64"}
-            )
-            seed = random.randint(0, 2**32 - 1)
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_cols"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-        logging.info(f"Shape of DataFrame generated: {table.shape}")
-
-        # TODO: Change this to write into
-        # a BytesIO object once below issue is fixed
-        # https://issues.apache.org/jira/browse/ARROW-10123
-
-        # file = io.BytesIO()
-
-        df.to_parquet("temp_file")
-        # file.seek(0)
-        # self._current_buffer = copy.copy(file.read())
-        # return self._current_buffer
-        self._df = df
-        return "temp_file"
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            with open(file_name + "_crash.parquet", "wb") as crash_dataset:
-                crash_dataset.write(self._current_buffer)
-
-    def set_rand_params(self, params):
-        params_dict = {}
-        rng = np.random.default_rng(seed=None)
-        for param, values in params.items():
-            if param == "columns" and values == ALL_POSSIBLE_VALUES:
-                col_size = self._rand(len(self._df.columns))
-                params_dict[param] = list(
-                    np.unique(rng.choice(self._df.columns, col_size))
-                )
-            else:
-                params_dict[param] = rng.choice(values)
-        self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
-
-
-class ParquetWriter(IOFuzz):
-    def __init__(
-        self,
-        dirs=None,
-        max_rows=100_000,
-        max_columns=1000,
-        max_string_length=None,
-        max_lists_length=None,
-        max_lists_nesting_depth=None,
-    ):
-        super().__init__(
-            dirs=dirs,
-            max_rows=max_rows,
-            max_columns=max_columns,
-            max_string_length=max_string_length,
-            max_lists_length=max_lists_length,
-            max_lists_nesting_depth=max_lists_nesting_depth,
-        )
-
-    def generate_input(self):
-        if self._regression:
-            (
-                dtypes_meta,
-                num_rows,
-                num_cols,
-                seed,
-            ) = self.get_next_regression_params()
-        else:
-            seed = random.randint(0, 2**32 - 1)
-            random.seed(seed)
-            dtypes_list = list(
-                cudf.utils.dtypes.ALL_TYPES
-                - {"category", "timedelta64[ns]", "datetime64[ns]"}
-                # TODO: Remove uint32 below after this bug is fixed
-                # https://github.com/pandas-dev/pandas/issues/37327
-                - {"uint32"}
-                | {"list", "decimal64"}
-            )
-            dtypes_meta, num_rows, num_cols = _generate_rand_meta(
-                self, dtypes_list, seed
-            )
-            self._current_params["dtypes_meta"] = dtypes_meta
-            self._current_params["seed"] = seed
-            self._current_params["num_rows"] = num_rows
-            self._current_params["num_columns"] = num_cols
-        logging.info(
-            f"Generating DataFrame with rows: {num_rows} "
-            f"and columns: {num_cols}"
-        )
-
-        table = dg.rand_dataframe(dtypes_meta, num_rows, seed)
-        df = pyarrow_to_pandas(table)
-
-        logging.info(f"Shape of DataFrame generated: {df.shape}")
-        self._current_buffer = df
-        return df
-
-    def write_data(self, file_name):
-        if self._current_buffer is not None:
-            self._current_buffer.to_parquet(file_name + "_crash.parquet")
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_avro.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_avro.py
deleted file mode 100644
index 5a90aec5828..00000000000
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_avro.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-import sys
-
-import cudf
-from cudf._fuzz_testing.avro import AvroReader
-from cudf._fuzz_testing.main import pythonfuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    compare_dataframe,
-    run_test,
-)
-
-
-@pythonfuzz(
-    data_handle=AvroReader,
-    params={
-        "columns": ALL_POSSIBLE_VALUES,
-        "skiprows": ALL_POSSIBLE_VALUES,
-        "num_rows": ALL_POSSIBLE_VALUES,
-    },
-)
-def avro_reader_test(input_tuple, columns, skiprows, num_rows):
-    pdf, parquet_buffer = input_tuple
-    expected_pdf = pdf[skiprows:]
-    if num_rows is not None:
-        expected_pdf = expected_pdf.head(num_rows)
-    if skiprows is not None or num_rows is not None:
-        expected_pdf = expected_pdf.reset_index(drop=True)
-
-    gdf = cudf.read_avro(
-        parquet_buffer, columns=columns, skiprows=skiprows, num_rows=num_rows
-    )
-    compare_dataframe(expected_pdf, gdf)
-
-
-if __name__ == "__main__":
-    run_test(globals(), sys.argv)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
deleted file mode 100644
index d90f3ea1aca..00000000000
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import sys
-from io import StringIO
-
-import pandas as pd
-
-import cudf
-from cudf._fuzz_testing.csv import CSVReader, CSVWriter
-from cudf._fuzz_testing.main import pythonfuzz
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    compare_content,
-    run_test,
-)
-from cudf.testing import assert_eq
-
-
-@pythonfuzz(data_handle=CSVReader)
-def csv_reader_test(csv_buffer):
-    pdf = pd.read_csv(StringIO(csv_buffer))
-    gdf = cudf.read_csv(StringIO(csv_buffer))
-
-    assert_eq(gdf, pdf)
-
-
-@pythonfuzz(data_handle=CSVWriter)
-def csv_writer_test(pdf):
-    gdf = cudf.from_pandas(pdf)
-
-    pd_buffer = pdf.to_csv()
-    gd_buffer = gdf.to_csv()
-
-    compare_content(pd_buffer, gd_buffer)
-    actual = cudf.read_csv(StringIO(gd_buffer))
-    expected = pd.read_csv(StringIO(pd_buffer))
-    assert_eq(actual, expected)
-
-
-@pythonfuzz(
-    data_handle=CSVWriter,
-    params={
-        "sep": list([",", "|", "\t", "\r", "~"]),
-        "header": [True, False],
-        "na_rep": [
-            "",
-            "<NA>",
-            "NA",
-            "_NA_",
-            "__",
-            "<<<<>>>>>",
-            "--<>--",
-            "-+><+-",
-        ],
-        "columns": ALL_POSSIBLE_VALUES,
-        "index": [True, False],
-        "lineterminator": ["\n", "\r", "\r\n"],
-        "chunksize": ALL_POSSIBLE_VALUES,
-    },
-)
-def csv_writer_test_params(
-    pdf, sep, header, na_rep, columns, index, lineterminator, chunksize
-):
-    gdf = cudf.from_pandas(pdf)
-
-    pd_buffer = pdf.to_csv(
-        sep=sep,
-        header=header,
-        na_rep=na_rep,
-        columns=columns,
-        index=index,
-        lineterminator=lineterminator,
-        chunksize=chunksize,
-    )
-    gd_buffer = gdf.to_csv(
-        sep=sep,
-        header=header,
-        na_rep=na_rep,
-        columns=columns,
-        index=index,
-        lineterminator=lineterminator,
-        chunksize=chunksize,
-    )
-
-    # TODO: Uncomment once this issue is fixed
-    # https://github.com/rapidsai/cudf/issues/6418
-    # compare_content(pd_buffer, gd_buffer)
-
-    actual = cudf.read_csv(
-        StringIO(gd_buffer),
-        delimiter=sep,
-        na_values=na_rep,
-        lineterminator=lineterminator,
-    )
-    expected = pd.read_csv(
-        StringIO(pd_buffer),
-        delimiter=sep,
-        na_values=na_rep,
-        lineterminator=lineterminator,
-    )
-    if not header:
-        # TODO: Remove renaming columns once the following bug is fixed:
-        # https://github.com/rapidsai/cudf/issues/6418
-        actual.columns = expected.columns
-
-    assert_eq(actual, expected)
-
-
-@pythonfuzz(
-    data_handle=CSVReader,
-    params={
-        "dtype": ALL_POSSIBLE_VALUES,
-        "usecols": ALL_POSSIBLE_VALUES,
-        "header": ALL_POSSIBLE_VALUES,
-        "skiprows": ALL_POSSIBLE_VALUES,
-        "skipfooter": ALL_POSSIBLE_VALUES,
-        "nrows": ALL_POSSIBLE_VALUES,
-    },
-)
-def csv_reader_test_params(csv_buffer, dtype, header, skiprows):
-    pdf = pd.read_csv(
-        StringIO(csv_buffer), dtype=dtype, header=header, skiprows=skiprows
-    )
-    gdf = cudf.read_csv(
-        StringIO(csv_buffer), dtype=dtype, header=header, skiprows=skiprows
-    )
-
-    assert_eq(gdf, pdf)
-
-
-if __name__ == "__main__":
-    run_test(globals(), sys.argv)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
deleted file mode 100644
index 69e9437be93..00000000000
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import io
-import sys
-
-import pandas as pd
-
-import cudf
-from cudf._fuzz_testing.json import JSONReader, JSONWriter
-from cudf._fuzz_testing.main import pythonfuzz
-from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES, run_test
-from cudf.testing import assert_eq
-
-
-@pythonfuzz(data_handle=JSONReader)
-def json_reader_test(json_buffer):
-    pdf = pd.read_json(io.StringIO(json_buffer), orient="records", lines=True)
-    # Difference in behaviour with pandas
-    # cudf reads column as strings only.
-    pdf.columns = pdf.columns.astype("str")
-    gdf = cudf.read_json(io.StringIO(json_buffer), engine="cudf", lines=True)
-
-    assert_eq(gdf, pdf)
-
-
-@pythonfuzz(data_handle=JSONReader, params={"dtype": ALL_POSSIBLE_VALUES})
-def json_reader_test_params(json_buffer, dtype):
-    pdf = pd.read_json(json_buffer, dtype=dtype, orient="records", lines=True)
-    pdf.columns = pdf.columns.astype("str")
-
-    gdf = cudf.read_json(json_buffer, dtype=dtype, engine="cudf", lines=True)
-
-    assert_eq(gdf, pdf)
-
-
-@pythonfuzz(data_handle=JSONWriter)
-def json_writer_test(pdf):
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_buffer = pdf.to_json(lines=True, orient="records")
-    gdf_buffer = gdf.to_json(lines=True, orient="records")
-
-    # TODO: Uncomment once this is fixed:
-    # https://github.com/rapidsai/cudf/issues/6429
-    # compare_content(pdf_buffer, gdf_buffer)
-
-    actual = cudf.read_json(
-        gdf_buffer, engine="cudf", lines=True, orient="records"
-    )
-    expected = pd.read_json(pdf_buffer, lines=True, orient="records")
-    expected.columns = expected.columns.astype("str")
-    assert_eq(actual, expected)
-
-
-@pythonfuzz(
-    data_handle=JSONWriter,
-    params={
-        "compression": ["gzip", "bz2", "zip", "xz", None],
-        "dtype": ALL_POSSIBLE_VALUES,
-    },
-)
-def json_writer_test_params(pdf, compression, dtype):
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_buffer = pdf.to_json(
-        lines=True, orient="records", compression=compression
-    )
-    gdf_buffer = gdf.to_json(
-        lines=True, orient="records", compression=compression
-    )
-
-    # TODO: Uncomment once this is fixed:
-    # https://github.com/rapidsai/cudf/issues/6429
-    # compare_content(pdf_buffer, gdf_buffer)
-
-    actual = cudf.read_json(
-        io.StringIO(gdf_buffer),
-        engine="cudf",
-        lines=True,
-        orient="records",
-        dtype=dtype,
-    )
-    expected = pd.read_json(
-        io.StringIO(pdf_buffer), lines=True, orient="records", dtype=dtype
-    )
-
-    # Difference in behaviour with pandas
-    # cudf reads column as strings only.
-    expected.columns = expected.columns.astype("str")
-    assert_eq(actual, expected)
-
-
-if __name__ == "__main__":
-    run_test(globals(), sys.argv)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py
deleted file mode 100644
index 977038d1fcb..00000000000
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-import io
-import sys
-
-import cudf
-from cudf._fuzz_testing.main import pythonfuzz
-from cudf._fuzz_testing.orc import OrcReader, OrcWriter
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    compare_dataframe,
-    orc_to_pandas,
-    run_test,
-)
-
-
-@pythonfuzz(
-    data_handle=OrcReader,
-    params={
-        "columns": ALL_POSSIBLE_VALUES,
-        "skiprows": ALL_POSSIBLE_VALUES,
-        "num_rows": ALL_POSSIBLE_VALUES,
-        "use_index": ALL_POSSIBLE_VALUES,
-    },
-)
-def orc_reader_test(input_tuple, columns, skiprows, num_rows, use_index):
-    pdf, file_buffer = input_tuple
-    expected_pdf = pdf.iloc[skiprows:]
-    if num_rows is not None:
-        expected_pdf = expected_pdf.head(num_rows)
-    if skiprows is not None or num_rows is not None:
-        expected_pdf.reset_index(drop=True, inplace=True)
-    if columns is not None and len(columns) > 0:
-        # ORC reader picks columns if only
-        # there are any elements in `columns`
-        expected_pdf = expected_pdf[columns]
-    if use_index is False:
-        expected_pdf.reset_index(drop=True, inplace=True)
-
-    gdf = cudf.read_orc(
-        io.BytesIO(file_buffer),
-        columns=columns,
-        skiprows=skiprows,
-        num_rows=num_rows,
-        use_index=use_index,
-    )
-
-    compare_dataframe(expected_pdf, gdf)
-
-
-@pythonfuzz(
-    data_handle=OrcReader,
-    params={"columns": ALL_POSSIBLE_VALUES, "stripes": ALL_POSSIBLE_VALUES},
-)
-def orc_reader_stripes_test(input_tuple, columns, stripes):
-    _, file_buffer = input_tuple
-    expected_pdf = orc_to_pandas(
-        file_io_obj=io.BytesIO(file_buffer), stripes=stripes
-    )
-
-    if columns is not None and len(columns) > 0:
-        # ORC reader picks columns if only
-        # there are any elements in `columns`
-        expected_pdf = expected_pdf[columns]
-
-    gdf = cudf.read_orc(
-        io.BytesIO(file_buffer), columns=columns, stripes=stripes
-    )
-
-    compare_dataframe(expected_pdf, gdf)
-
-
-@pythonfuzz(
-    data_handle=OrcWriter,
-    params={
-        "compression": [None, "snappy"],
-        "enable_statistics": ["NONE", "STRIPE", "ROWGROUP"],
-    },
-)
-def orc_writer_test(pdf, compression, enable_statistics):
-    file_to_strore = io.BytesIO()
-
-    gdf = cudf.from_pandas(pdf)
-
-    gdf.to_orc(
-        file_to_strore,
-        compression=compression,
-        enable_statistics=enable_statistics,
-    )
-    file_to_strore.seek(0)
-
-    actual_df = cudf.read_orc(file_to_strore)
-
-    compare_dataframe(pdf, actual_df)
-
-
-if __name__ == "__main__":
-    run_test(globals(), sys.argv)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_parquet.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_parquet.py
deleted file mode 100644
index bbc19dce1a4..00000000000
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_parquet.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import sys
-
-import numpy as np
-import pandas as pd
-
-import cudf
-from cudf._fuzz_testing.main import pythonfuzz
-from cudf._fuzz_testing.parquet import ParquetReader, ParquetWriter
-from cudf._fuzz_testing.utils import (
-    ALL_POSSIBLE_VALUES,
-    compare_dataframe,
-    run_test,
-)
-
-
-@pythonfuzz(data_handle=ParquetReader)
-def parquet_reader_test(parquet_buffer):
-    pdf = pd.read_parquet(parquet_buffer)
-    gdf = cudf.read_parquet(parquet_buffer)
-
-    compare_dataframe(gdf, pdf)
-
-
-@pythonfuzz(
-    data_handle=ParquetReader,
-    params={
-        "columns": ALL_POSSIBLE_VALUES,
-        "use_pandas_metadata": [True, False],
-    },
-)
-def parquet_reader_columns(parquet_buffer, columns, use_pandas_metadata):
-    pdf = pd.read_parquet(
-        parquet_buffer,
-        columns=columns,
-        use_pandas_metadata=use_pandas_metadata,
-    )
-
-    gdf = cudf.read_parquet(
-        parquet_buffer,
-        columns=columns,
-        use_pandas_metadata=use_pandas_metadata,
-    )
-
-    compare_dataframe(gdf, pdf)
-
-
-@pythonfuzz(data_handle=ParquetWriter)
-def parquet_writer_test(pdf):
-    pd_file_name = "cpu_pdf.parquet"
-    gd_file_name = "gpu_pdf.parquet"
-
-    gdf = cudf.from_pandas(pdf)
-
-    pdf.to_parquet(pd_file_name)
-    gdf.to_parquet(gd_file_name)
-
-    actual = cudf.read_parquet(gd_file_name)
-    expected = pd.read_parquet(pd_file_name)
-    compare_dataframe(actual, expected)
-
-    actual = cudf.read_parquet(pd_file_name)
-    expected = pd.read_parquet(gd_file_name)
-    compare_dataframe(actual, expected)
-
-
-@pythonfuzz(
-    data_handle=ParquetWriter,
-    params={
-        "row_group_size": np.random.default_rng(seed=0).integers(
-            1, 10000, 100
-        ),
-        "compression": ["snappy", None],
-    },
-)
-def parquet_writer_test_rowgroup_index_compression(
-    pdf, compression, row_group_size
-):
-    pd_file_name = "cpu_pdf.parquet"
-    gd_file_name = "gpu_pdf.parquet"
-
-    gdf = cudf.from_pandas(pdf)
-
-    pdf.to_parquet(
-        pd_file_name,
-        compression=compression,
-        row_group_size=row_group_size,
-    )
-    gdf.to_parquet(
-        gd_file_name,
-        compression=compression,
-        row_group_size=row_group_size,
-    )
-
-    actual = cudf.read_parquet(gd_file_name)
-    expected = pd.read_parquet(pd_file_name)
-    compare_dataframe(actual, expected)
-
-    actual = cudf.read_parquet(pd_file_name)
-    expected = pd.read_parquet(gd_file_name)
-    compare_dataframe(actual, expected, nullable=False)
-
-
-if __name__ == "__main__":
-    run_test(globals(), sys.argv)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/readme.md b/python/cudf/cudf/_fuzz_testing/tests/readme.md
deleted file mode 100644
index f9ef1119a21..00000000000
--- a/python/cudf/cudf/_fuzz_testing/tests/readme.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Fuzz Tests
-
-This directory contains all the Fuzz tests for cudf library.
-
-
-## Steps to write a fuzz test
-
-1. Add a Data Handler class which actually generates the necessary random data according to your requirements. This class should be added in `cudf/cudf/testing/`. A sample data handler class is: `CSVWriter`: https://github.com/rapidsai/cudf/blob/branch-0.16/python/cudf/cudf/testing/csv.py
-2. Data Handlers are registered by the `pythonfuzz` decorator. At runtime, the Fuzzer will continuously run registered fuzz tests.
-
-```python
-from cudf.testing.csv import CSVWriter
-
-@pythonfuzz(data_handle=CSVWriter)
-def csv_writer_test(data_from_generate_input):
-    ...
-    ...
-    ...
-
-if __name__ == "__main__":
-    ...
-    ...
-
-```
-## Steps to run fuzz tests
-
-1. To run a fuzz test, for example a test(method) is in `write_csv.py`:
-
-```bash
-python write_csv.py your_function_name
-```
-
-To run a basic csv write test in `write_csv.py`:
-```bash
-python write_csv.py csv_writer_test
-```
-
-## Tips to run specific crash file/files
-
-Using the `pythonfuzz` decorator pass in `regression=True` with `dirs` having list of directories
-```python
-@pythonfuzz(data_handle=CSVWriter, regression=True, dir=["/cudf/python/cudf/cudf/_fuzz_testing"])
-```
-
-
-## Tips to run for varying parameter combinations
-
-In the `pythonfuzz` decorator you can pass in the function parameters you would like to pass to the
-fuzz-test being written via `params` as a dictionary. The values in dictionary are sampled randomly
-and passed to the `your_custom_fuzz_test`.
-
-If a parameter value depends the kind of input generated by the `data_handle`(in this case `CSVReader`),
-then you can assign `ALL_POSSIBLE_VALUES` constant to it. This constant is used as an identifier by the
-`data_handle` to generate random parameter values for that specific parameter purely based on data.
-To perform this customization `set_rand_params` should be implemented as shown in the below example.
-```python
-from cudf._fuzz_testing.main import pythonfuzz
-from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES
-@pythonfuzz(
-    data_handle=CSVWriter,
-    params={
-        "columns": ALL_POSSIBLE_VALUES,
-        "is_folder": [True, False, None],
-        "chunksize": ALL_POSSIBLE_VALUES,
-    },
-)
-def your_custom_fuzz_test(data_from_data_handle, dtype, is_folder, header):
-    ...
-    ...
-    ...
-```
-
-A sample implementation of `set_rand_params` in a `data_handle` class:
-```
-def set_rand_params(self, params):
-    params_dict = {}
-    for param, values in params.items():
-        if values == ALL_POSSIBLE_VALUES:
-            if param == "columns":
-                col_size = self._rand(len(self._current_buffer.columns))
-                params_dict[param] = list(
-                    np.unique(
-                        np.random.choice(
-                            self._current_buffer.columns, col_size
-                        )
-                    )
-                )
-            elif param == "chunksize":
-                params_dict[param] = np.random.choice(
-                    [
-                        None,
-                        np.random.randint(
-                            low=1, high=max(1, len(self._current_buffer))
-                        ),
-                    ]
-                )
-        else:
-            params_dict[param] = np.random.choice(values)
-    self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
-```
diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py
deleted file mode 100644
index 4a8aea22184..00000000000
--- a/python/cudf/cudf/_fuzz_testing/utils.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import random
-
-import fastavro
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-
-import cudf
-from cudf.testing import assert_eq
-from cudf.utils.dtypes import (
-    pandas_dtypes_to_np_dtypes,
-    pyarrow_dtypes_to_pandas_dtypes,
-)
-
-ALL_POSSIBLE_VALUES = "ALL_POSSIBLE_VALUES"
-
-_PANDAS_TO_AVRO_SCHEMA_MAP = {
-    cudf.dtype("int8"): "int",
-    pd.Int8Dtype(): ["int", "null"],
-    pd.Int16Dtype(): ["int", "null"],
-    pd.Int32Dtype(): ["int", "null"],
-    pd.Int64Dtype(): ["long", "null"],
-    pd.Float32Dtype(): ["float", "null"],
-    pd.Float64Dtype(): ["double", "null"],
-    pd.BooleanDtype(): ["boolean", "null"],
-    pd.StringDtype(): ["string", "null"],
-    cudf.dtype("bool_"): "boolean",
-    cudf.dtype("int16"): "int",
-    cudf.dtype("int32"): "int",
-    cudf.dtype("int64"): "long",
-    cudf.dtype("O"): "string",
-    cudf.dtype("str"): "string",
-    cudf.dtype("float32"): "float",
-    cudf.dtype("float64"): "double",
-    cudf.dtype("<M8[ns]"): {"type": "long", "logicalType": "timestamp-millis"},
-    cudf.dtype("<M8[ms]"): {"type": "long", "logicalType": "timestamp-millis"},
-    cudf.dtype("<M8[us]"): {"type": "long", "logicalType": "timestamp-micros"},
-}
-
-
-def _generate_rand_meta(
-    obj, dtypes_list, null_frequency_override=None, seed=0
-):
-    obj._current_params = {}
-    rng = np.random.default_rng(seed=seed)
-    num_rows = obj._rand(obj._max_rows)
-    num_cols = obj._rand(obj._max_columns)
-
-    dtypes_meta = []
-
-    for _ in range(num_cols):
-        dtype = random.choice(dtypes_list)
-        null_frequency = (
-            random.uniform(0, 1)
-            if null_frequency_override is None
-            else null_frequency_override
-        )
-        # `cardinality` has to be at least 1.
-        cardinality = max(1, obj._rand(obj._max_rows))
-        meta = dict()
-        if dtype == "str":
-            # We want to operate near the limits of string column
-            # Hence creating a string column of size almost
-            # equal to 2 Billion bytes(sizeof(int))
-            if obj._max_string_length is None:
-                meta["max_string_length"] = random.randrange(
-                    0, int(2000000000 / num_rows)
-                )
-            else:
-                meta["max_string_length"] = obj._max_string_length
-        elif dtype == "list":
-            if obj._max_lists_length is None:
-                meta["lists_max_length"] = rng.integers(0, 2000000000)
-            else:
-                meta["lists_max_length"] = obj._max_lists_length
-
-            if obj._max_lists_nesting_depth is None:
-                meta["nesting_max_depth"] = rng.integers(
-                    1, np.iinfo("int64").max
-                )
-            else:
-                meta["nesting_max_depth"] = obj._max_lists_nesting_depth
-
-            meta["value_type"] = random.choice(
-                list(cudf.utils.dtypes.ALL_TYPES - {"category"})
-            )
-        elif dtype == "struct":
-            if obj._max_lists_nesting_depth is None:
-                meta["nesting_max_depth"] = rng.integers(2, 10)
-            else:
-                meta["nesting_max_depth"] = obj._max_lists_nesting_depth
-
-            if obj._max_struct_null_frequency is None:
-                meta["max_null_frequency"] = random.uniform(0, 1)
-            else:
-                meta["max_null_frequency"] = obj._max_struct_null_frequency
-
-            if obj._max_struct_types_at_each_level is None:
-                meta["max_types_at_each_level"] = rng.integers(low=1, high=10)
-            else:
-                meta["max_types_at_each_level"] = (
-                    obj._max_struct_types_at_each_level
-                )
-
-        elif dtype == "decimal64":
-            meta["max_precision"] = cudf.Decimal64Dtype.MAX_PRECISION
-        elif dtype == "decimal32":
-            meta["max_precision"] = cudf.Decimal32Dtype.MAX_PRECISION
-
-        meta["dtype"] = dtype
-        meta["null_frequency"] = null_frequency
-        meta["cardinality"] = cardinality
-        dtypes_meta.append(meta)
-    return dtypes_meta, num_rows, num_cols
-
-
-def run_test(funcs, args):
-    if len(args) != 2:
-        ValueError("Usage is python file_name.py function_name")
-
-    function_name_to_run = args[1]
-    try:
-        funcs[function_name_to_run]()
-    except KeyError:
-        print(  # noqa: T201
-            f"Provided function name({function_name_to_run}) does not exist."
-        )
-
-
-def pyarrow_to_pandas(table):
-    """
-    Converts a pyarrow table to a pandas dataframe
-    with Nullable dtypes.
-
-    Parameters
-    ----------
-    table: Pyarrow Table
-        Pyarrow table to be converted to pandas
-
-    Returns
-    -------
-    DataFrame
-        A Pandas dataframe with nullable dtypes.
-    """
-    df = pd.DataFrame()
-
-    for column in table.columns:
-        if column.type in pyarrow_dtypes_to_pandas_dtypes:
-            df[column._name] = pd.Series(
-                column, dtype=pyarrow_dtypes_to_pandas_dtypes[column.type]
-            )
-        elif isinstance(column.type, pa.StructType):
-            df[column._name] = column.to_pandas(integer_object_nulls=True)
-        else:
-            df[column._name] = column.to_pandas()
-
-    return df
-
-
-def compare_content(a, b):
-    if a == b:
-        return
-    else:
-        raise ValueError(
-            f"Contents of two files are different:\n left: {a} \n right: {b}"
-        )
-
-
-def get_avro_dtype_info(dtype):
-    if dtype in _PANDAS_TO_AVRO_SCHEMA_MAP:
-        return _PANDAS_TO_AVRO_SCHEMA_MAP[dtype]
-    else:
-        raise TypeError(
-            f"Unsupported dtype({dtype}) according to avro spec:"
-            f" https://avro.apache.org/docs/current/spec.html"
-        )
-
-
-def get_avro_schema(df):
-    fields = [
-        {"name": col_name, "type": get_avro_dtype_info(col_dtype)}
-        for col_name, col_dtype in df.dtypes.items()
-    ]
-    schema = {"type": "record", "name": "Root", "fields": fields}
-    return schema
-
-
-def convert_nulls_to_none(records, df):
-    columns_with_nulls = {col for col in df.columns if df[col].isnull().any()}
-    scalar_columns_convert = [
-        col
-        for col in df.columns
-        if df[col].dtype in pandas_dtypes_to_np_dtypes
-        or df[col].dtype.kind in "mM"
-    ]
-
-    for record in records:
-        for col, value in record.items():
-            if col in scalar_columns_convert:
-                if col in columns_with_nulls and value in (pd.NA, pd.NaT):
-                    record[col] = None
-                else:
-                    if isinstance(value, str):
-                        record[col] = value
-                    elif isinstance(value, (pd.Timestamp, pd.Timedelta)):
-                        record[col] = int(value.value)
-                    else:
-                        record[col] = value.item()
-
-    return records
-
-
-def pandas_to_avro(df, file_name=None, file_io_obj=None):
-    schema = get_avro_schema(df)
-    avro_schema = fastavro.parse_schema(schema)
-
-    records = df.to_dict(orient="records")
-    records = convert_nulls_to_none(records, df)
-
-    if file_name is not None:
-        with open(file_name, "wb") as out:
-            fastavro.writer(out, avro_schema, records)
-    elif file_io_obj is not None:
-        fastavro.writer(file_io_obj, avro_schema, records)
-
-
-def orc_to_pandas(file_name=None, file_io_obj=None, stripes=None):
-    if file_name is not None:
-        f = open(file_name, "rb")
-    elif file_io_obj is not None:
-        f = file_io_obj
-
-    if stripes is None:
-        df = pd.read_orc(f)
-    else:
-        orc_file = pa.orc.ORCFile(f)
-        records = [orc_file.read_stripe(i) for i in stripes]
-        pa_table = pa.Table.from_batches(records)
-        df = pa_table.to_pandas()
-
-    return df
-
-
-def compare_dataframe(left, right, nullable=True):
-    if nullable and isinstance(left, cudf.DataFrame):
-        left = left.to_pandas(nullable=True)
-    if nullable and isinstance(right, cudf.DataFrame):
-        right = right.to_pandas(nullable=True)
-
-    if len(left.index) == 0 and len(right.index) == 0:
-        check_index_type = False
-    else:
-        check_index_type = True
-
-    return assert_eq(left, right, check_index_type=check_index_type)

From d64233b92dd631fded54a94b1f92cf3ec5ea1188 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 13:58:15 -0700
Subject: [PATCH 062/366] Use more pytest fixtures and avoid GPU
 parameterization in test_csv/cuda_*/cut.py and more (#19463)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Eliminate/reduce parameterizations of input size

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19463
---
 python/cudf/cudf/tests/test_csv.py            | 467 ++++++++----------
 python/cudf/cudf/tests/test_cuda_apply.py     |  36 +-
 .../cudf/cudf/tests/test_custom_accessor.py   |  48 +-
 python/cudf/cudf/tests/test_cut.py            | 102 ++--
 python/cudf/cudf/tests/test_dataframe_copy.py |  48 +-
 5 files changed, 289 insertions(+), 412 deletions(-)

diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index dbf00d7887e..9494d22a158 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -5,9 +5,7 @@
 import os
 import re
 import shutil
-from collections import OrderedDict
 from io import BytesIO, StringIO
-from pathlib import Path
 
 import cupy as cp
 import numpy as np
@@ -25,41 +23,36 @@
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
-def make_numeric_dataframe(nrows, dtype):
-    df = pd.DataFrame()
-    df["col1"] = np.arange(nrows, dtype=dtype)
-    df["col2"] = np.arange(1, 1 + nrows, dtype=dtype)
-    return df
-
-
-def make_datetime_dataframe(include_non_standard=False):
-    df = pd.DataFrame()
-    df["col1"] = np.array(
-        [
-            "31/10/2010",
-            "05/03/2001",
-            "20/10/1994",
-            "18/10/1990",
-            "1/1/1970",
-            "2016-04-30T01:02:03.000",
-            "2038-01-19 03:14:07",
-        ]
-    )
-    df["col2"] = np.array(
-        [
-            "18/04/1995",
-            "14 / 07 / 1994",
-            "07/06/2006",
-            "16/09/2005",
-            "2/2/1970",
-            "2007-4-30 1:6:40.000PM",
-            "2038-01-19 03:14:08",
-        ]
-    )
-    if include_non_standard:
-        # Last column contains non-standard date formats
-        df["col3"] = np.array(
-            [
+@pytest.fixture
+def numeric_dataframe():
+    return pd.DataFrame(
+        {"col1": [1, 2, 3], "col2": [4, 5, 6]},
+    )
+
+
+@pytest.fixture
+def datetime_dataframe():
+    return pd.DataFrame(
+        {
+            "col1": [
+                "31/10/2010",
+                "05/03/2001",
+                "20/10/1994",
+                "18/10/1990",
+                "1/1/1970",
+                "2016-04-30T01:02:03.000",
+                "2038-01-19 03:14:07",
+            ],
+            "col2": [
+                "18/04/1995",
+                "14 / 07 / 1994",
+                "07/06/2006",
+                "16/09/2005",
+                "2/2/1970",
+                "2007-4-30 1:6:40.000PM",
+                "2038-01-19 03:14:08",
+            ],
+            "col3": [
                 "1 Jan",
                 "2 January 1994",
                 "Feb 2002",
@@ -67,38 +60,33 @@ def make_datetime_dataframe(include_non_standard=False):
                 "1-1-1996",
                 "15-May-2009",
                 "21-Dec-3262",
-            ]
-        )
-    return df
-
-
-def make_numpy_mixed_dataframe():
-    df = pd.DataFrame()
-    df["Integer"] = np.array([2345, 11987, 9027, 9027])
-    df["Date"] = np.array(
-        ["18/04/1995", "14/07/1994", "07/06/2006", "16/09/2005"]
+            ],
+        }
     )
-    df["Float"] = np.array([9.001, 8.343, 6, 2.781])
-    df["Integer2"] = np.array([2345, 106, 2088, 789277])
-    df["Category"] = np.array(["M", "F", "F", "F"])
-    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
-    df["Boolean"] = np.array([True, False, True, False])
-    return df
 
 
 @pytest.fixture
 def pd_mixed_dataframe():
-    return make_numpy_mixed_dataframe()
+    return pd.DataFrame(
+        {
+            "Integer": [2345, 11987, 9027, 9027],
+            "Date": ["18/04/1995", "14/07/1994", "07/06/2006", "16/09/2005"],
+            "Float": [9.001, 8.343, 6, 2.781],
+            "Integer2": [2345, 106, 2088, 789277],
+            "Category": ["M", "F", "F", "F"],
+            "String": ["Alpha", "Beta", "Gamma", "Delta"],
+            "Boolean": [True, False, True, False],
+        }
+    )
 
 
 @pytest.fixture
-def cudf_mixed_dataframe():
-    return cudf.from_pandas(make_numpy_mixed_dataframe())
+def cudf_mixed_dataframe(pd_mixed_dataframe):
+    return cudf.from_pandas(pd_mixed_dataframe)
 
 
-def make_all_numeric_dataframe():
-    df = pd.DataFrame()
-
+@pytest.fixture
+def gdf_np_dtypes():
     gdf_dtypes = [
         "float",
         "float32",
@@ -134,29 +122,17 @@ def make_all_numeric_dataframe():
         np.uint32,
         np.uint64,
     ]
+    return dict(zip(gdf_dtypes, np_dtypes))
 
-    for i in range(len(gdf_dtypes)):
-        df[gdf_dtypes[i]] = np.arange(10, dtype=np_dtypes[i])
 
-    return (
-        df,
-        OrderedDict(zip(gdf_dtypes, gdf_dtypes)),
-        OrderedDict(zip(gdf_dtypes, np_dtypes)),
-    )
-
-
-def make_all_numeric_extremes_dataframe():
-    # integers 0,+1,-1,min,max
-    # float 0.0, -0.0,+1,-1,min,max, nan, esp, espneg, tiny, [-ve values]
-    df, gdf_dtypes, pdf_dtypes = make_all_numeric_dataframe()
-    df = pd.DataFrame()
-
-    for gdf_dtype in gdf_dtypes:
-        np_type = pdf_dtypes[gdf_dtype]
+@pytest.fixture
+def numeric_extremes_dataframe(gdf_np_dtypes):
+    data = {}
+    for typ, np_type in gdf_np_dtypes.items():
         if np.dtype(np_type).kind in "iu":
             itype = np.iinfo(np_type)
             extremes = [0, +1, -1, itype.min, itype.max]
-            df[gdf_dtype] = np.array(extremes * 4).astype(np_type)[:20]
+            data[typ] = np.array(extremes * 4).astype(np_type)[:20]
         else:
             ftype = np.finfo(np_type)
             extremes = [
@@ -177,60 +153,21 @@ def make_all_numeric_extremes_dataframe():
                 -ftype.epsneg,
                 -ftype.tiny,
             ]
-            df[gdf_dtype] = np.array(extremes * 4, dtype=np_type)[:20]
-    return (
-        df,
-        gdf_dtypes,
-        pdf_dtypes,
-    )
-
-
-@pytest.fixture
-def pandas_extreme_numeric_dataframe():
-    return make_all_numeric_extremes_dataframe()[0]
-
-
-@pytest.fixture
-def cudf_extreme_numeric_dataframe(pandas_extreme_numeric_dataframe):
-    return cudf.from_pandas(pandas_extreme_numeric_dataframe)
-
-
-@pytest.fixture
-def path_or_buf(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_path_or_buf.csv")
-    df = make_numeric_dataframe(10, np.int32)
-
-    df.to_csv(fname, index=False, header=False)
-    buffer = df.to_csv(index=False, header=False)
-
-    def _make_path_or_buf(src):
-        if src == "filepath":
-            return str(fname)
-        if src == "pathobj":
-            return fname
-        if src == "bytes_io":
-            return BytesIO(buffer.encode())
-        if src == "string_io":
-            return StringIO(buffer)
-        if src == "url":
-            return Path(fname).as_uri()
-
-        raise ValueError("Invalid source type")
+            data[typ] = np.array(extremes * 4, dtype=np_type)[:20]
+    return pd.DataFrame(data)
 
-    yield _make_path_or_buf
 
-
-dtypes = [np.float64, np.float32, np.int64, np.int32, np.uint64, np.uint32]
-dtypes_dict = {"1": np.float64, "2": np.float32, "3": np.int64, "4": np.int32}
-nelem = [5, 25, 100]
+@pytest.fixture(
+    params=[np.float64, np.float32, np.int64, np.int32, np.uint64, np.uint32]
+)
+def dtype(request):
+    return request.param
 
 
-@pytest.mark.parametrize("dtype", dtypes)
-@pytest.mark.parametrize("nelem", nelem)
-def test_csv_reader_numeric_data(dtype, nelem, tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file1.csv")
+def test_csv_reader_numeric_data(dtype, numeric_dataframe, tmp_path):
+    fname = tmp_path / "tmp_csvreader_file1.csv"
 
-    df = make_numeric_dataframe(nelem, dtype)
+    df = numeric_dataframe.astype(dtype)
     df.to_csv(fname, index=False, header=False)
 
     dtypes = [df[k].dtype for k in df.columns]
@@ -241,8 +178,8 @@ def test_csv_reader_numeric_data(dtype, nelem, tmpdir):
 
 
 @pytest.mark.parametrize("parse_dates", [["date2"], [0], ["date1", 1, "bad"]])
-def test_csv_reader_datetime(parse_dates):
-    df = make_datetime_dataframe(include_non_standard=True)
+def test_csv_reader_datetime(datetime_dataframe, parse_dates):
+    df = datetime_dataframe
     buffer = df.to_csv(index=False, header=False)
 
     gdf = read_csv(
@@ -264,12 +201,14 @@ def test_csv_reader_datetime(parse_dates):
     assert_eq(gdf, pdf)
 
 
-@pytest.mark.parametrize("pandas_arg", [{"delimiter": "|"}, {"sep": "|"}])
-@pytest.mark.parametrize("cudf_arg", [{"sep": "|"}, {"delimiter": "|"}])
+@pytest.mark.parametrize("p_arg", ["delimiter", "sep"])
+@pytest.mark.parametrize("c_arg", ["sep", "delimiter"])
 def test_csv_reader_mixed_data_delimiter_sep(
-    tmpdir, pandas_arg, cudf_arg, pd_mixed_dataframe
+    tmp_path, p_arg, c_arg, pd_mixed_dataframe
 ):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file3.csv")
+    pandas_arg = {p_arg: "|"}
+    cudf_arg = {c_arg: "|"}
+    fname = tmp_path / "tmp_csvreader_file3.csv"
 
     pd_mixed_dataframe.to_csv(fname, sep="|", index=False, header=False)
 
@@ -318,8 +257,8 @@ def test_csv_reader_mixed_data_delimiter_sep(
 
 
 @pytest.mark.parametrize("use_list", [False, True])
-def test_csv_reader_dtype_list(use_list):
-    df = make_numeric_dataframe(10, dtype=np.float32)
+def test_csv_reader_dtype_list(numeric_dataframe, use_list):
+    df = numeric_dataframe.astype(np.float32)
     buffer = df.to_csv(index=False, header=False)
 
     # PANDAS doesn't list but cudf does (treated as implied ordered dict)
@@ -335,31 +274,34 @@ def test_csv_reader_dtype_list(use_list):
 
 
 @pytest.mark.parametrize("use_names", [False, True])
-def test_csv_reader_dtype_dict(use_names):
+def test_csv_reader_dtype_dict(use_names, gdf_np_dtypes):
     # Save with the column header if not explicitly specifying a list of names
-    df, gdf_dtypes, pdf_dtypes = make_all_numeric_dataframe()
-    buffer = df.to_csv(index=False, header=(not use_names))
+    df = pd.DataFrame(
+        {
+            typ: np.zeros(3, dtype=np_type)
+            for typ, np_type in gdf_np_dtypes.items()
+        }
+    )
+    buffer = df.to_csv(index=False, header=not use_names)
     dtypes = df.dtypes.to_dict()
-    gdf_names = list(gdf_dtypes.keys()) if use_names else None
-    pdf_names = list(pdf_dtypes.keys()) if use_names else None
-    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=gdf_names)
-    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=pdf_names)
+    names = list(gdf_np_dtypes.keys()) if use_names else None
+    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=names)
+    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=names)
 
     assert_eq(gdf, pdf)
 
 
 @pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
 @pytest.mark.parametrize("use_names", [True, False])
-def test_csv_reader_dtype_extremes(use_names):
+def test_csv_reader_dtype_extremes(use_names, numeric_extremes_dataframe):
     # Save with the column header if not explicitly specifying a list of names
-    df, gdf_dtypes, pdf_dtypes = make_all_numeric_extremes_dataframe()
-    buffer = df.to_csv(index=False, header=(not use_names))
+    df = numeric_extremes_dataframe
+    buffer = df.to_csv(index=False, header=not use_names)
     dtypes = df.dtypes.to_dict()
-    gdf_names = list(gdf_dtypes.keys()) if use_names else None
-    pdf_names = list(pdf_dtypes.keys()) if use_names else None
+    names = df.columns.to_list() if use_names else None
 
-    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=gdf_names)
-    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=pdf_names)
+    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=names)
+    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=names)
 
     assert_eq(gdf, pdf)
 
@@ -368,8 +310,8 @@ def test_csv_reader_dtype_extremes(use_names):
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="https://github.com/pandas-dev/pandas/issues/52449",
 )
-def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file5.csv")
+def test_csv_reader_skiprows_skipfooter(tmp_path, pd_mixed_dataframe):
+    fname = tmp_path / "tmp_csvreader_file5.csv"
 
     pd_mixed_dataframe.to_csv(
         fname, columns=["Integer", "Date", "Float"], index=False, header=False
@@ -400,8 +342,8 @@ def test_csv_reader_skiprows_skipfooter(tmpdir, pd_mixed_dataframe):
     assert_eq(df_out, out, check_dtype=False)
 
 
-def test_csv_reader_negative_vals(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file6.csv")
+def test_csv_reader_negative_vals(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file6.csv"
 
     names = ["0", "1", "2"]
     dtypes = ["float32", "float32", "float32"]
@@ -425,8 +367,8 @@ def test_csv_reader_negative_vals(tmpdir):
     np.testing.assert_allclose(two, df["2"].to_numpy())
 
 
-def test_csv_reader_strings(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file7.csv")
+def test_csv_reader_strings(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file7.csv"
 
     names = ["text", "int"]
     dtypes = ["str", "int"]
@@ -453,8 +395,8 @@ def test_csv_reader_strings(tmpdir):
     assert df["text"][3] == "d"
 
 
-def test_csv_reader_strings_quotechars(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file8.csv")
+def test_csv_reader_strings_quotechars(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file8.csv"
 
     names = ["text", "int"]
     dtypes = ["str", "int"]
@@ -481,8 +423,8 @@ def test_csv_reader_strings_quotechars(tmpdir):
     assert df["text"][3] == "f,,!.,"
 
 
-def test_csv_reader_usecols_int_char(tmpdir, pd_mixed_dataframe):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file10.csv")
+def test_csv_reader_usecols_int_char(tmp_path, pd_mixed_dataframe):
+    fname = tmp_path / "tmp_csvreader_file10.csv"
     pd_mixed_dataframe.to_csv(
         fname,
         columns=["Integer", "Date", "Float", "Integer2"],
@@ -520,8 +462,8 @@ def test_csv_reader_mangle_dupe_cols(tmpdir, buffer, mangle_dupe_cols):
     assert_eq(cu_df, pd_df)
 
 
-def test_csv_reader_float_decimal(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file12.csv")
+def test_csv_reader_float_decimal(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file12.csv"
 
     names = ["basic_32", "basic_64", "round", "decimal_only", "precision"]
     dtypes = ["float32", "float64", "float64", "float32", "float64"]
@@ -623,8 +565,8 @@ def test_csv_reader_NaN_values():
     assert gdf.dtypes.iloc[0] == np.dtype("object")
 
 
-def test_csv_reader_thousands(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file13.csv")
+def test_csv_reader_thousands(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file13.csv"
 
     names = dtypes = [
         "float32",
@@ -748,8 +690,8 @@ def test_csv_reader_compression(
         (["x", "y"], None, "True,1\nFalse,0", None, None),
     ],
 )
-def test_csv_reader_bools(tmpdir, names, dtypes, data, trues, falses):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file11.csv")
+def test_csv_reader_bools(tmp_path, names, dtypes, data, trues, falses):
+    fname = tmp_path / "tmp_csvreader_file11.csv"
 
     lines = [",".join(names), data]
 
@@ -852,8 +794,8 @@ def test_csv_reader_bools_NA():
     assert_eq(df, expected)
 
 
-def test_csv_quotednumbers(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file12.csv")
+def test_csv_quotednumbers(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file12.csv"
 
     names = ["integer", "decimal"]
     dtypes = ["int32", "float32"]
@@ -881,8 +823,8 @@ def test_csv_quotednumbers(tmpdir):
     np.testing.assert_allclose(decimal_ref, df2["decimal"].to_numpy())
 
 
-def test_csv_reader_nrows(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file14.csv")
+def test_csv_reader_nrows(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file14.csv"
 
     names = ["int1", "int2"]
     dtypes = ["int32", "int32"]
@@ -946,10 +888,9 @@ def test_csv_reader_nrows(tmpdir):
         read_csv(str(fname), nrows=read_rows, skipfooter=1)
 
 
-def test_csv_reader_gzip_compression_strings(tmpdir):
-    fnamebase = tmpdir.mkdir("gdf_csv")
-    fname = fnamebase.join("tmp_csvreader_file15.csv")
-    fnamez = fnamebase.join("tmp_csvreader_file15.csv.gz")
+def test_csv_reader_gzip_compression_strings(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file15.csv"
+    fnamez = tmp_path / "tmp_csvreader_file15.csv.gz"
 
     names = ["text", "int"]
     dtypes = ["str", "int"]
@@ -980,7 +921,7 @@ def test_csv_reader_gzip_compression_strings(tmpdir):
     assert df["text"][3] == "d"
 
 
-@pytest.mark.parametrize("skip_rows", [0, 2, 4])
+@pytest.mark.parametrize("skip_rows", [0, 4])
 @pytest.mark.parametrize("header_row", [0, 2])
 def test_csv_reader_skiprows_header(skip_rows, header_row):
     names = ["float_point", "integer"]
@@ -1074,16 +1015,27 @@ def test_csv_reader_filenotfound(tmpdir):
 
 
 @pytest.mark.parametrize(
-    "src", ["filepath", "pathobj", "bytes_io", "string_io", "url"]
+    "src",
+    [
+        lambda path: str(path),
+        lambda path: path,
+        lambda path: BytesIO(path.read_bytes()),
+        lambda path: StringIO(path.read_text()),
+        lambda path: path.as_uri(),
+    ],
+    ids=["filepath", "pathlib.Path", "ByteIO", "StringIO", "url"],
 )
-def test_csv_reader_filepath_or_buffer(tmpdir, path_or_buf, src):
-    expect = pd.read_csv(path_or_buf("filepath"))
-    got = cudf.read_csv(path_or_buf(src))
+def test_csv_reader_filepath_or_buffer(tmp_path, src):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=np.int32)
+    csv_path = tmp_path / "tmp.csv"
+    df.to_csv(csv_path, index=False, header=False)
+    expect = pd.read_csv(csv_path)
+    got = cudf.read_csv(src(csv_path))
 
     assert_eq(expect, got)
 
 
-def test_small_zip(tmpdir):
+def test_small_zip(tmp_path):
     df = pd.DataFrame(
         {
             "a": [1997] * 2,
@@ -1092,14 +1044,14 @@ def test_small_zip(tmpdir):
         }
     )
 
-    fname = tmpdir.join("small_zip_file.zip")
+    fname = tmp_path / "small_zip_file.zip"
     df.to_csv(fname, index=False)
 
     got = cudf.read_csv(fname)
     assert_eq(df, got)
 
 
-def test_csv_reader_carriage_return(tmpdir):
+def test_csv_reader_carriage_return():
     rows = 100
     names = ["int_row", "int_double_row"]
     buffer = ",".join(names) + "\r\n"
@@ -1145,8 +1097,8 @@ def test_csv_reader_tabs():
 
 
 @pytest.mark.parametrize("segment_bytes", [10000, 19999, 30001, 36000])
-def test_csv_reader_byte_range(tmpdir, segment_bytes):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file16.csv")
+def test_csv_reader_byte_range(tmp_path, segment_bytes):
+    fname = tmp_path / "tmp_csvreader_file16.csv"
 
     names = ["int1", "int2"]
 
@@ -1173,8 +1125,8 @@ def test_csv_reader_byte_range(tmpdir, segment_bytes):
     assert list(df["int2"]) == list(ref_df["int2"])
 
 
-def test_csv_reader_byte_range_type_corner_case(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file17.csv")
+def test_csv_reader_byte_range_type_corner_case(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file17.csv"
 
     cudf.datasets.timeseries(
         start="2000-01-01",
@@ -1396,7 +1348,7 @@ def test_csv_reader_repeated_column_name():
     assert_eq(pdf.columns, gdf.columns)
 
 
-def test_csv_reader_bools_false_positives(tmpdir):
+def test_csv_reader_bools_false_positives():
     # values that are equal to ["True", "TRUE", "False", "FALSE"]
     # when using ints to detect bool values
     items = [3977, 4329, 24015, 27567]
@@ -1408,8 +1360,8 @@ def test_csv_reader_bools_false_positives(tmpdir):
     np.testing.assert_array_equal(items, df["0"].to_numpy())
 
 
-def test_csv_reader_aligned_byte_range(tmpdir):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file19.csv")
+def test_csv_reader_aligned_byte_range(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file19.csv"
     nelem = 1000
 
     input_df = pd.DataFrame(
@@ -1560,8 +1512,8 @@ def test_csv_blank_first_row(lineterminator):
 
 
 @pytest.mark.parametrize("contents", ["", "\n"])
-def test_csv_empty_file(tmpdir, contents):
-    fname = tmpdir.mkdir("gdf_csv").join("test_csv_empty_file.csv")
+def test_csv_empty_file(tmp_path, contents):
+    fname = tmp_path / "test_csv_empty_file.csv"
     with open(fname, "w") as f:
         f.write(contents)
 
@@ -1580,7 +1532,7 @@ def test_csv_empty_file(tmpdir, contents):
 
 
 @pytest.mark.parametrize("contents", ["", "\n"])
-def test_csv_empty_buffer(tmpdir, contents):
+def test_csv_empty_buffer(contents):
     col_names = ["col1", "col2", "col3", "col4"]
     in_dtypes = ["int", "str", "float", "short"]
     out_dtypes = ["int64", "object", "float64", "int16"]
@@ -1613,11 +1565,11 @@ def test_csv_reader_partial_dtype(dtype):
     assert all(names_df.dtypes == ["int16", "int64"])
 
 
-def test_csv_writer_file_handle(tmpdir):
+def test_csv_writer_file_handle(tmp_path):
     df = pd.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
     gdf = cudf.from_pandas(df)
 
-    gdf_df_fname = tmpdir.join("gdf_df_1.csv")
+    gdf_df_fname = tmp_path / "gdf_df_1.csv"
     with open(gdf_df_fname, "w") as f:
         gdf.to_csv(path_or_buf=f, index=False)
     assert os.path.exists(gdf_df_fname)
@@ -1626,11 +1578,11 @@ def test_csv_writer_file_handle(tmpdir):
     assert_eq(gdf, gdf2)
 
 
-def test_csv_writer_file_append(tmpdir):
+def test_csv_writer_file_append(tmp_path):
     gdf1 = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
     gdf2 = cudf.DataFrame({"a": [4, 5, 6], "b": ["foo", "bar", "baz"]})
 
-    gdf_df_fname = tmpdir.join("gdf_df_append.csv")
+    gdf_df_fname = tmp_path / "gdf_df_append.csv"
     with open(gdf_df_fname, "w") as f:
         gdf1.to_csv(f, index=False)
     with open(gdf_df_fname, "a") as f:
@@ -1641,7 +1593,7 @@ def test_csv_writer_file_append(tmpdir):
     assert_eq(result, expected, check_index_type=True)
 
 
-def test_csv_writer_buffer(tmpdir):
+def test_csv_writer_buffer():
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
 
     buffer = BytesIO()
@@ -1651,13 +1603,11 @@ def test_csv_writer_buffer(tmpdir):
     assert_eq(result, gdf)
 
 
-@pytest.mark.parametrize("dtype", dtypes)
-@pytest.mark.parametrize("nelem", nelem)
-def test_csv_writer_numeric_data(dtype, nelem, tmpdir):
-    pdf_df_fname = tmpdir.join("pdf_df_1.csv")
-    gdf_df_fname = tmpdir.join("gdf_df_1.csv")
+def test_csv_writer_numeric_data(dtype, numeric_dataframe, tmp_path):
+    pdf_df_fname = tmp_path / "pdf_df_1.csv"
+    gdf_df_fname = tmp_path / "gdf_df_1.csv"
 
-    df = make_numeric_dataframe(nelem, dtype)
+    df = numeric_dataframe.astype(dtype)
     gdf = cudf.from_pandas(df)
     df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
     gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
@@ -1670,11 +1620,11 @@ def test_csv_writer_numeric_data(dtype, nelem, tmpdir):
     assert_eq(expect, got)
 
 
-def test_csv_writer_datetime_data(tmpdir):
-    pdf_df_fname = tmpdir.join("pdf_df_2.csv")
-    gdf_df_fname = tmpdir.join("gdf_df_2.csv")
+def test_csv_writer_datetime_data(datetime_dataframe, tmp_path):
+    pdf_df_fname = tmp_path / "pdf_df_2.csv"
+    gdf_df_fname = tmp_path / "gdf_df_2.csv"
 
-    df = make_datetime_dataframe()
+    df = datetime_dataframe
     gdf = cudf.from_pandas(df)
     df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
     gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
@@ -1727,15 +1677,14 @@ def test_csv_writer_multichar_terminator(lineterminator, cudf_mixed_dataframe):
         None,
     ],
 )
-@pytest.mark.parametrize(
-    "header", [True, False, np.bool_(True), np.bool_(False)]
-)
-@pytest.mark.parametrize(
-    "index", [True, False, np.bool_(True), np.bool_(False)]
-)
+@pytest.mark.parametrize("header", [True, False])
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("bool_box", [bool, np.bool_])
 def test_csv_writer_column_and_header_options(
-    columns, header, index, pd_mixed_dataframe
+    columns, header, index, bool_box, pd_mixed_dataframe
 ):
+    header = bool_box(header)
+    index = bool_box(index)
     pdf = pd_mixed_dataframe
     df = cudf.from_pandas(pdf)
 
@@ -1755,14 +1704,13 @@ def test_csv_writer_column_and_header_options(
 
 
 def test_csv_writer_empty_columns_parameter(cudf_mixed_dataframe):
-    df = cudf_mixed_dataframe
-    write_str = df.to_csv(columns=[], index=False)
+    write_str = cudf_mixed_dataframe.to_csv(columns=[], index=False)
     assert_eq(write_str, "\n")
 
 
-def test_csv_writer_multiindex(tmpdir):
-    pdf_df_fname = tmpdir.join("pdf_df_3.csv")
-    gdf_df_fname = tmpdir.join("gdf_df_3.csv")
+def test_csv_writer_multiindex(tmp_path):
+    pdf_df_fname = tmp_path / "pdf_df_3.csv"
+    gdf_df_fname = tmp_path / "gdf_df_3.csv"
 
     rng = np.random.default_rng(seed=0)
     gdf = cudf.DataFrame(
@@ -1786,10 +1734,9 @@ def test_csv_writer_multiindex(tmpdir):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("chunksize", [None, 9, 1000])
-@pytest.mark.parametrize("dtype", dtypes)
-def test_csv_writer_chunksize(chunksize, dtype):
-    cu_df = cudf.from_pandas(make_numeric_dataframe(100, dtype))
+@pytest.mark.parametrize("chunksize", [None, 2, 1000])
+def test_csv_writer_chunksize(chunksize, numeric_dataframe, dtype):
+    cu_df = cudf.from_pandas(numeric_dataframe.astype(dtype))
 
     buffer = BytesIO()
     cu_df.to_csv(buffer, chunksize=chunksize, index=False)
@@ -1799,18 +1746,15 @@ def test_csv_writer_chunksize(chunksize, dtype):
 
 
 @pytest.mark.parametrize(
-    "df",
+    "data",
     [
-        cudf.DataFrame({"vals": [1, 2, 3]}),
-        cudf.DataFrame(
-            {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]}
-        ),
-        cudf.DataFrame(
-            {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]}
-        ),
+        {"vals": [1, 2, 3]},
+        {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]},
+        {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]},
     ],
 )
-def test_to_csv_empty_filename(df):
+def test_to_csv_empty_filename(data):
+    df = cudf.DataFrame(data)
     pdf = df.to_pandas()
 
     actual = df.to_csv()
@@ -1820,18 +1764,15 @@ def test_to_csv_empty_filename(df):
 
 
 @pytest.mark.parametrize(
-    "df",
+    "data",
     [
-        cudf.DataFrame({"vals": [1, 2, 3]}),
-        cudf.DataFrame(
-            {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]}
-        ),
-        cudf.DataFrame(
-            {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]}
-        ),
+        {"vals": [1, 2, 3]},
+        {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]},
+        {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]},
     ],
 )
-def test_to_csv_StringIO(df):
+def test_to_csv_StringIO(data):
+    df = cudf.DataFrame(data)
     cudf_io = StringIO()
     pandas_io = StringIO()
 
@@ -1846,8 +1787,8 @@ def test_to_csv_StringIO(df):
     assert cudf_io.read() == pandas_io.read()
 
 
-def test_csv_writer_empty_dataframe(tmpdir):
-    df_fname = tmpdir.join("gdf_df_5.csv")
+def test_csv_writer_empty_dataframe(tmp_path):
+    df_fname = tmp_path / "gdf_df_5.csv"
     gdf = cudf.DataFrame({"float_point": [], "integer": []})
     gdf["float_point"] = gdf["float_point"].astype("float")
     gdf["integer"] = gdf["integer"].astype("int")
@@ -1860,12 +1801,12 @@ def test_csv_writer_empty_dataframe(tmpdir):
     assert all(df.dtypes == ["object", "object"])
 
 
-def test_csv_write_chunksize_corner_case(tmpdir):
+def test_csv_write_chunksize_corner_case(tmp_path):
     # With this num of rows and chunksize
     # libcudf splits table such a way that it
     # will end up creating an empty table slice
     # which caused the issue 5588.
-    df_fname = tmpdir.join("gdf_df_17.csv")
+    df_fname = tmp_path / "gdf_df_17.csv"
     df = cudf.DataFrame({"a": np.arange(10_000)})
     df.to_csv(df_fname, chunksize=1000, index=False)
     got = cudf.read_csv(df_fname)
@@ -1881,35 +1822,30 @@ def test_csv_write_no_caller_manipulation():
 
 
 @pytest.mark.parametrize(
-    "df",
+    "pdf",
     [
-        cudf.DataFrame({"a": [1, 2, 3], "": [10, 20, 40]}),
-        cudf.DataFrame({"": [10, 20, 40], "a": [1, 2, 3]}),
-        cudf.DataFrame(
+        pd.DataFrame({"a": [1, 2, 3], "": [10, 20, 40]}),
+        pd.DataFrame({"": [10, 20, 40], "a": [1, 2, 3]}),
+        pd.DataFrame(
             {"a": [1, 2, 3], "": [10, 20, 40]},
-            index=cudf.Index(["a", "z", "v"], name="custom name"),
+            index=pd.Index(["a", "z", "v"], name="custom name"),
         ),
     ],
 )
 @pytest.mark.parametrize("index", [True, False])
 @pytest.mark.parametrize("columns", [["a"], [""], None])
-def test_csv_write_empty_column_name(df, index, columns):
-    pdf = df.to_pandas()
+def test_csv_write_empty_column_name(pdf, index, columns):
+    df = cudf.DataFrame.from_pandas(pdf)
     expected = pdf.to_csv(index=index, columns=columns)
     actual = df.to_csv(index=index, columns=columns)
 
     assert expected == actual
 
 
-@pytest.mark.parametrize(
-    "df",
-    [
-        cudf.DataFrame(),
-        cudf.DataFrame(index=cudf.Index([], name="index name")),
-    ],
-)
+@pytest.mark.parametrize("idx", [None, pd.Index([], name="index name")])
 @pytest.mark.parametrize("index", [True, False])
-def test_csv_write_empty_dataframe(df, index):
+def test_csv_write_empty_dataframe(idx, index):
+    df = cudf.DataFrame(index=idx)
     pdf = df.to_pandas()
 
     expected = pdf.to_csv(index=index)
@@ -2016,13 +1952,13 @@ def test_csv_reader_datetime_dtypes(dtype):
 @pytest.mark.parametrize(
     "df",
     [
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             {
                 "a": cudf.Series([1, 2, 3, 1, 2], dtype="category"),
                 "b": cudf.Series(["a", "c", "a", "b", "a"], dtype="category"),
             }
         ),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             {
                 "a": cudf.Series([1.1, 2, 3, 1.1, 2], dtype="category"),
                 "b": cudf.Series(
@@ -2030,7 +1966,7 @@ def test_csv_reader_datetime_dtypes(dtype):
                 ),
             }
         ),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             {
                 "b": cudf.Series(
                     [1.1, 2, 3, 1.1, 2],
@@ -2044,6 +1980,7 @@ def test_csv_reader_datetime_dtypes(dtype):
     ],
 )
 def test_csv_writer_category(df):
+    df = df()
     pdf = df.to_pandas()
 
     expected = pdf.to_csv()
@@ -2190,12 +2127,12 @@ def test_default_integer_bitwidth_partial(
 
 @pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
 def test_default_integer_bitwidth_extremes(
-    cudf_extreme_numeric_dataframe, default_integer_bitwidth
+    numeric_extremes_dataframe, default_integer_bitwidth
 ):
     # Test that integer columns in csv are _inferred_ as user specified
     # bitwidth
     buf = BytesIO()
-    cudf_extreme_numeric_dataframe.to_csv(buf)
+    cudf.DataFrame.from_pandas(numeric_extremes_dataframe).to_csv(buf)
     buf.seek(0)
     read = cudf.read_csv(buf)
 
@@ -2264,10 +2201,10 @@ def test_column_selection_plus_column_names(usecols, names):
     )
 
 
-def test_read_compressed_BOM(tmpdir):
+def test_read_compressed_BOM(tmp_path):
     buffer = 'int, string\n1, "a"\n2, "b"\n3, "c"\n'
 
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file20.gz")
+    fname = tmp_path / "tmp_csvreader_file20.gz"
     with gzip.open(fname, "wt", encoding="utf-8") as f:
         f.write(codecs.BOM_UTF8.decode("utf-8"))
         f.write(buffer)
diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py
index d22d33e2957..80c794cb0f4 100644
--- a/python/cudf/cudf/tests/test_cuda_apply.py
+++ b/python/cudf/cudf/tests/test_cuda_apply.py
@@ -67,8 +67,9 @@ def test_dataframe_apply_rows(dtype, has_nulls, pessimistic):
     assert_eq(df_expected, df_actual)
 
 
-@pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129])
-def test_df_apply_rows(nelem):
+def test_df_apply_rows():
+    nelem = 20
+
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
         for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
             out1[i] = extra2 * x - extra1 * y
@@ -100,9 +101,10 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
     np.testing.assert_array_almost_equal(got_out2, expect_out2)
 
 
-@pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129])
-@pytest.mark.parametrize("chunksize", [1, 2, 3, 4, 23])
-def test_df_apply_chunks(nelem, chunksize):
+@pytest.mark.parametrize("chunksize", [1, 4, 23])
+def test_df_apply_chunks(chunksize):
+    nelem = 20
+
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
         for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
             out1[i] = extra2 * x - extra1 * y + z
@@ -134,8 +136,9 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
     np.testing.assert_array_almost_equal(got_out2.to_numpy(), expect_out2)
 
 
-@pytest.mark.parametrize("nelem", [1, 15, 30, 64, 128, 129])
-def test_df_apply_custom_chunks(nelem):
+def test_df_apply_custom_chunks():
+    nelem = 20
+
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
         for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
             out1[i] = extra2 * x - extra1 * y + z
@@ -172,10 +175,11 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
     np.testing.assert_array_almost_equal(got_out2.to_numpy(), expect_out2)
 
 
-@pytest.mark.parametrize("nelem", [1, 15, 30, 64, 128, 129])
 @pytest.mark.parametrize("blkct", [None, 1, 8])
-@pytest.mark.parametrize("tpb", [1, 8, 64])
-def test_df_apply_custom_chunks_blkct_tpb(nelem, blkct, tpb):
+@pytest.mark.parametrize("tpb", [1, 8])
+def test_df_apply_custom_chunks_blkct_tpb(blkct, tpb):
+    nelem = 20
+
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
         for i in range(cuda.threadIdx.x, in1.size, cuda.blockDim.x):
             x = in1[i]
@@ -220,8 +224,9 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
     np.testing.assert_array_almost_equal(got_out2.to_numpy(), expect_out2)
 
 
-@pytest.mark.parametrize("nelem", [1, 2, 64, 128, 1000, 5000])
-def test_df_apply_rows_incols_mapping(nelem):
+def test_df_apply_rows_incols_mapping():
+    nelem = 20
+
     def kernel(x, y, z, out1, out2, extra1, extra2):
         for i, (a, b, c) in enumerate(zip(x, y, z)):
             out1[i] = extra2 * a - extra1 * b
@@ -250,9 +255,10 @@ def kernel(x, y, z, out1, out2, extra1, extra2):
     assert_eq(outdf[["out1", "out2"]], expected_out)
 
 
-@pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129])
-@pytest.mark.parametrize("chunksize", [1, 2, 3, 4, 23])
-def test_df_apply_chunks_incols_mapping(nelem, chunksize):
+@pytest.mark.parametrize("chunksize", [1, 4, 23])
+def test_df_apply_chunks_incols_mapping(chunksize):
+    nelem = 20
+
     def kernel(q, p, r, out1, out2, extra1, extra2):
         for i, (a, b, c) in enumerate(zip(q, p, r)):
             out1[i] = extra2 * a - extra1 * b + c
diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py
index 278e63f3e8b..f45b85e9c8f 100644
--- a/python/cudf/cudf/tests/test_custom_accessor.py
+++ b/python/cudf/cudf/tests/test_custom_accessor.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import pandas as pd
 import pytest
@@ -28,26 +28,20 @@ def bounding_box(self):
         return (min_x, min_y, max_x, max_y)
 
 
-@pytest.mark.parametrize(
-    "gdf", [cudf.datasets.randomdata(nrows=6, dtypes={"x": int, "y": int})]
-)
-def test_dataframe_accessor(gdf):
+def test_dataframe_accessor():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
     pdf = gdf.to_pandas()
 
     assert_eq(gdf.point.bounding_box, pdf.point.bounding_box)
 
 
-@pytest.mark.parametrize(
-    "gdf1", [cudf.datasets.randomdata(nrows=1, dtypes={"x": int, "y": int})]
-)
-@pytest.mark.parametrize(
-    "gdf2", [cudf.datasets.randomdata(nrows=1, dtypes={"x": int, "y": int})]
-)
-def test_dataframe_accessor_idendity(gdf1, gdf2):
+def test_dataframe_accessor_identity():
     """Test for accessor identities
     - An object should hold persistent reference to the same accessor
     - Different objects should hold difference instances of the accessor
     """
+    gdf1 = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    gdf2 = gdf1.copy()
 
     assert gdf1.point is gdf1.point
     assert gdf1.point is not gdf2.point
@@ -65,28 +59,18 @@ def __getitem__(self, i):
         return self._obj[2 * i - 1]
 
 
-@pytest.mark.parametrize("gidx", [cudf.Index(list(range(0, 50)))])
-def test_index_accessor(gidx):
-    pidx = gidx.to_pandas()
+@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
+def test_index_series_accessor(klass):
+    obj = klass([1, 2, 3])
+    pobj = obj.to_pandas()
+    assert_eq(obj.odd[1], pobj.odd[1])
 
-    for i in range(1, 10):
-        assert_eq(gidx.odd[i], pidx.odd[i])
 
-
-@pytest.mark.parametrize("gs", [cudf.Series(list(range(1, 50)))])
-def test_series_accessor(gs):
-    ps = gs.to_pandas()
-
-    for i in range(1, 10):
-        assert_eq(gs.odd[i], ps.odd[i])
-
-
-@pytest.mark.parametrize(
-    "gdf", [cudf.datasets.randomdata(nrows=6, dtypes={"x": int, "y": int})]
-)
-@pytest.mark.parametrize("gidx", [cudf.Index(list(range(1, 50)))])
-@pytest.mark.parametrize("gs", [cudf.Series(list(range(1, 50)))])
-def test_accessor_space_separate(gdf, gidx, gs):
+def test_accessor_space_separate():
+    data = [1, 2, 3]
+    gdf = cudf.DataFrame(data)
+    gidx = cudf.Index(data)
+    gs = cudf.Series(data)
     assert not id(gdf._accessors) == id(gidx._accessors)
     assert not id(gidx._accessors) == id(gs._accessors)
     assert not id(gdf._accessors) == id(gs._accessors)
diff --git a/python/cudf/cudf/tests/test_cut.py b/python/cudf/cudf/tests/test_cut.py
index 3f31da035aa..3fc05599976 100644
--- a/python/cudf/cudf/tests/test_cut.py
+++ b/python/cudf/cudf/tests/test_cut.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 """
 Test related to Cut
@@ -12,19 +12,16 @@
 from cudf.testing import assert_eq
 
 
-@pytest.mark.parametrize(
-    "x", [[1, 7, 5, 4, 6, 3], [1, 7], np.array([1, 7, 5, 4, 6, 3])]
-)
+@pytest.mark.parametrize("box", [list, np.array])
 @pytest.mark.parametrize("bins", [1, 2, 3])
 @pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("include_lowest", [True, False])
-@pytest.mark.parametrize(
-    "ordered", [True]
-)  # if ordered is False we need labels
-@pytest.mark.parametrize("precision", [1, 2, 3])
-def test_cut_basic(x, bins, right, include_lowest, ordered, precision):
+@pytest.mark.parametrize("precision", [1, 3])
+def test_cut_basic(box, bins, right, include_lowest, precision):
     # will test optional labels, retbins and duplicates separately
     # they need more specific parameters to work
+    x = box([1, 7, 5, 4, 6, 3])
+    ordered = True
     pcat = pd.cut(
         x=x,
         bins=bins,
@@ -46,20 +43,16 @@ def test_cut_basic(x, bins, right, include_lowest, ordered, precision):
     assert_eq(pindex, gindex)
 
 
-@pytest.mark.parametrize("x", [[1, 7, 5, 4, 6, 3]])
-@pytest.mark.parametrize("bins", [3])  # labels must be the same len as bins
 @pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("include_lowest", [True, False])
+@pytest.mark.parametrize("ordered", [True, False])
+@pytest.mark.parametrize("precision", [1, 3])
 @pytest.mark.parametrize(
-    "ordered", [True, False]
-)  # labels must be unique if ordered=True
-@pytest.mark.parametrize("precision", [1, 2, 3])
-@pytest.mark.parametrize(
-    "labels", [["bad", "medium", "good"], ["A", "B", "C"], [1, 2, 3], False]
+    "labels", [["bad", "medium", "good"], [1, 2, 3], False]
 )
-def test_cut_labels(
-    x, bins, right, include_lowest, ordered, precision, labels
-):
+def test_cut_labels(right, include_lowest, ordered, precision, labels):
+    x = [1, 7, 5, 4, 6, 3]
+    bins = 3
     pcat = pd.cut(
         x=x,
         bins=bins,
@@ -83,20 +76,14 @@ def test_cut_labels(
     assert_eq(pindex, gindex)
 
 
-@pytest.mark.parametrize("x", [[1, 7, 5, 4, 6, 3]])
-@pytest.mark.parametrize("bins", [3])  # labels must be the same len as bins
 @pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("include_lowest", [True, False])
-@pytest.mark.parametrize(
-    "ordered", [False]
-)  # labels must be unique if ordered=True
-@pytest.mark.parametrize("precision", [1, 2, 3])
-@pytest.mark.parametrize(
-    "labels", [["bad", "good", "good"], ["B", "A", "B"], [1, 2, 2], False]
-)
-def test_cut_labels_non_unique(
-    x, bins, right, include_lowest, ordered, precision, labels
-):
+@pytest.mark.parametrize("precision", [1, 3])
+@pytest.mark.parametrize("labels", [["bad", "good", "good"], [1, 2, 2], False])
+def test_cut_labels_non_unique(right, include_lowest, precision, labels):
+    x = [1, 7, 5, 4, 6, 3]
+    bins = 3
+    ordered = False
     pcat = pd.cut(
         x=x,
         bins=bins,
@@ -134,8 +121,8 @@ def test_cut_labels_non_unique(
     [1, 2, 3, [1, 2, 3], [0, 2, 4, 6, 10]],
 )
 @pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("precision", [3])
-def test_cut_right(x, bins, right, precision):
+def test_cut_right(x, bins, right):
+    precision = 3
     pcat = pd.cut(
         x=x,
         bins=bins,
@@ -168,12 +155,10 @@ def test_cut_right(x, bins, right, precision):
 )
 @pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("include_lowest", [True, False])
-@pytest.mark.parametrize("ordered", [True])
-@pytest.mark.parametrize("precision", [1, 2, 3])
-@pytest.mark.parametrize("duplicates", ["drop"])
-def test_cut_drop_duplicates(
-    x, bins, right, precision, duplicates, ordered, include_lowest
-):
+@pytest.mark.parametrize("precision", [1, 3])
+def test_cut_drop_duplicates(x, bins, right, precision, include_lowest):
+    ordered = True
+    duplicates = "drop"
     pcat = pd.cut(
         x=x,
         bins=bins,
@@ -212,13 +197,12 @@ def test_cut_drop_duplicates(
 )
 @pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("include_lowest", [True, False])
-@pytest.mark.parametrize("ordered", [True])
-@pytest.mark.parametrize("precision", [1, 2, 3])
-@pytest.mark.parametrize("duplicates", ["raises"])
-def test_cut_drop_duplicates_raises(
-    x, bins, right, precision, duplicates, ordered, include_lowest
-):
-    with pytest.raises(ValueError) as excgd:
+@pytest.mark.parametrize("precision", [1, 3])
+def test_cut_drop_duplicates_raises(x, bins, right, precision, include_lowest):
+    ordered = True
+    duplicates = "raise"
+    msg = "Bin edges must be unique"
+    with pytest.raises(ValueError, match=msg):
         cut(
             x=x,
             bins=bins,
@@ -228,7 +212,7 @@ def test_cut_drop_duplicates_raises(
             include_lowest=include_lowest,
             ordered=ordered,
         )
-    with pytest.raises(ValueError) as excpd:
+    with pytest.raises(ValueError, match=msg):
         pd.cut(
             x=x,
             bins=bins,
@@ -239,8 +223,6 @@ def test_cut_drop_duplicates_raises(
             ordered=ordered,
         )
 
-    assert_eq(str(excgd.value), str(excpd.value))
-
 
 @pytest.mark.parametrize(
     "x",
@@ -252,14 +234,11 @@ def test_cut_drop_duplicates_raises(
         np.array([2, 4, 6, 8, 10]),
     ],
 )
-@pytest.mark.parametrize(
-    "bins",
-    [pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])],
-)
 @pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("precision", [1, 2, 3])
+@pytest.mark.parametrize("precision", [1, 3])
 @pytest.mark.parametrize("duplicates", ["drop", "raise"])
-def test_cut_intervalindex_bin(x, bins, right, precision, duplicates):
+def test_cut_intervalindex_bin(x, right, precision, duplicates):
+    bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
     pcat = pd.cut(
         x=x,
         bins=bins,
@@ -279,16 +258,13 @@ def test_cut_intervalindex_bin(x, bins, right, precision, duplicates):
     assert_eq(pindex, gindex)
 
 
-@pytest.mark.parametrize(
-    "x",
-    [pd.Series(np.array([2, 4, 6, 8, 10]), index=["a", "b", "c", "d", "e"])],
-)
-@pytest.mark.parametrize("bins", [1, 2, 3])
+@pytest.mark.parametrize("bins", [1, 3])
 @pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("include_lowest", [True, False])
-@pytest.mark.parametrize("ordered", [True])
-@pytest.mark.parametrize("precision", [3])
-def test_cut_series(x, bins, right, include_lowest, ordered, precision):
+def test_cut_series(bins, right, include_lowest):
+    x = pd.Series(np.array([2, 4, 6, 8, 10]), index=["a", "b", "c", "d", "e"])
+    precision = 3
+    ordered = True
     pcat = pd.cut(
         x=x,
         bins=bins,
diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py
index 3aedbf8365b..53257dc8f29 100644
--- a/python/cudf/cudf/tests/test_dataframe_copy.py
+++ b/python/cudf/cudf/tests/test_dataframe_copy.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 from copy import copy, deepcopy
 
 import cupy as cp
@@ -23,55 +23,29 @@
 
 
 @pytest.mark.parametrize(
-    "copy_parameters",
+    "fn",
     [
-        {"fn": lambda x: x.copy(), "expected_equality": False},
-        {"fn": lambda x: x.copy(deep=True), "expected_equality": False},
-        {"fn": lambda x: copy(x), "expected_equality": False},
-        {"fn": lambda x: deepcopy(x), "expected_equality": False},
-    ],
-)
-def test_dataframe_deep_copy(copy_parameters):
-    pdf = pd.DataFrame(
-        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
-    )
-    gdf = DataFrame.from_pandas(pdf)
-    copy_pdf = copy_parameters["fn"](pdf)
-    copy_gdf = copy_parameters["fn"](gdf)
-    copy_pdf["b"] = [0, 0, 0]
-    copy_gdf["b"] = [0, 0, 0]
-    pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values)
-    gdf_is_equal = np.array_equal(
-        gdf["b"].to_numpy(), copy_gdf["b"].to_numpy()
-    )
-    assert pdf_is_equal == copy_parameters["expected_equality"]
-    assert gdf_is_equal == copy_parameters["expected_equality"]
-
-
-@pytest.mark.parametrize(
-    "copy_parameters",
-    [
-        {"fn": lambda x: x.copy(), "expected_equality": False},
-        {"fn": lambda x: x.copy(deep=True), "expected_equality": False},
-        {"fn": lambda x: copy(x), "expected_equality": False},
-        {"fn": lambda x: deepcopy(x), "expected_equality": False},
+        lambda x: x.copy(),
+        lambda x: x.copy(deep=True),
+        lambda x: copy(x),
+        lambda x: deepcopy(x),
     ],
 )
-def test_dataframe_deep_copy_and_insert(copy_parameters):
+def test_dataframe_deep_copy(fn):
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
     )
     gdf = DataFrame.from_pandas(pdf)
-    copy_pdf = copy_parameters["fn"](pdf)
-    copy_gdf = copy_parameters["fn"](gdf)
+    copy_pdf = fn(pdf)
+    copy_gdf = fn(gdf)
     copy_pdf["b"] = [0, 0, 0]
     copy_gdf["b"] = [0, 0, 0]
     pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values)
     gdf_is_equal = np.array_equal(
         gdf["b"].to_numpy(), copy_gdf["b"].to_numpy()
     )
-    assert pdf_is_equal == copy_parameters["expected_equality"]
-    assert gdf_is_equal == copy_parameters["expected_equality"]
+    assert not pdf_is_equal
+    assert not gdf_is_equal
 
 
 """

From afc0d5d9beeb1041cfdf02c168e8528c778969cf Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 5 Aug 2025 16:17:37 -0500
Subject: [PATCH 063/366] Support `cudf-polars` `str.zfill` (#19081)

Closes https://github.com/rapidsai/cudf/issues/19035
Closes https://github.com/rapidsai/cudf/issues/16480

I believe this needs https://github.com/pola-rs/polars/pull/22985 to pass the one remaining failing test and the column overload described here https://github.com/rapidsai/cudf/issues/19035#issuecomment-2937332033

Authors:
  - https://github.com/brandon-b-miller
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19081
---
 .../cudf_polars/dsl/expressions/string.py     |  70 ++++++++++++
 .../cudf_polars/cudf_polars/testing/plugin.py |   1 +
 .../tests/expressions/test_stringfunction.py  | 100 ++++++++++++++++++
 3 files changed, 171 insertions(+)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
index 9a2cff2c37f..d47828b4175 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
@@ -11,6 +11,7 @@
 from typing import TYPE_CHECKING, Any, ClassVar
 
 from polars.exceptions import InvalidOperationError
+from polars.polars import dtype_str_repr
 
 import pylibcudf as plc
 
@@ -137,6 +138,7 @@ def from_polars(cls, obj: pl_expr.StringFunction) -> Self:
         Name.Reverse,
         Name.Tail,
         Name.Titlecase,
+        Name.ZFill,
     }
     __slots__ = ("_regex_program", "name", "options")
     _non_child = ("dtype", "name", "options")
@@ -264,6 +266,17 @@ def _validate_input(self) -> None:
                 raise NotImplementedError(
                     "strip operations only support scalar patterns"
                 )
+        elif self.name is StringFunction.Name.ZFill:
+            if isinstance(self.children[1], Literal):
+                _, width = self.children
+                assert isinstance(width, Literal)
+                if width.value is not None and width.value < 0:
+                    dtypestr = dtype_str_repr(width.dtype.polars)
+                    raise InvalidOperationError(
+                        f"conversion from `{dtypestr}` to `u64` "
+                        f"failed in column 'literal' for 1 out of "
+                        f"1 values: [{width.value}]"
+                    ) from None
 
     @staticmethod
     def _create_regex_program(
@@ -322,6 +335,63 @@ def do_evaluate(
                 ),
                 dtype=self.dtype,
             )
+        elif self.name is StringFunction.Name.ZFill:
+            # TODO: expensive validation
+            # polars pads based on bytes, libcudf by visual width
+            # only pass chars if the visual width matches the byte length
+            column = self.children[0].evaluate(df, context=context)
+            col_len_bytes = plc.strings.attributes.count_bytes(column.obj)
+            col_len_chars = plc.strings.attributes.count_characters(column.obj)
+            equal = plc.binaryop.binary_operation(
+                col_len_bytes,
+                col_len_chars,
+                plc.binaryop.BinaryOperator.NULL_EQUALS,
+                plc.DataType(plc.TypeId.BOOL8),
+            )
+            if not plc.reduce.reduce(
+                equal,
+                plc.aggregation.all(),
+                plc.DataType(plc.TypeId.BOOL8),
+            ).to_py():
+                raise InvalidOperationError(
+                    "zfill only supports ascii strings with no unicode characters"
+                )
+            if isinstance(self.children[1], Literal):
+                width = self.children[1]
+                assert isinstance(width, Literal)
+                if width.value is None:
+                    return Column(
+                        plc.Column.from_scalar(
+                            plc.Scalar.from_py(None, self.dtype.plc),
+                            column.size,
+                        ),
+                        self.dtype,
+                    )
+                return Column(
+                    plc.strings.padding.zfill(column.obj, width.value), self.dtype
+                )
+            else:
+                col_width = self.children[1].evaluate(df, context=context)
+                assert isinstance(col_width, Column)
+                all_gt_0 = plc.binaryop.binary_operation(
+                    col_width.obj,
+                    plc.Scalar.from_py(0, plc.DataType(plc.TypeId.INT64)),
+                    plc.binaryop.BinaryOperator.GREATER_EQUAL,
+                    plc.DataType(plc.TypeId.BOOL8),
+                )
+
+                if not plc.reduce.reduce(
+                    all_gt_0,
+                    plc.aggregation.all(),
+                    plc.DataType(plc.TypeId.BOOL8),
+                ).to_py():
+                    raise InvalidOperationError("fill conversion failed.")
+
+                return Column(
+                    plc.strings.padding.zfill_by_widths(column.obj, col_width.obj),
+                    self.dtype,
+                )
+
         elif self.name is StringFunction.Name.Contains:
             child, arg = self.children
             column = child.evaluate(df, context=context)
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index e205a7299d3..8491b2fa400 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -144,6 +144,7 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/operations/test_group_by.py::test_group_by_binary_agg_with_literal": "Incorrect broadcasting of literals in groupby-agg",
     "tests/unit/operations/test_group_by.py::test_group_by_lit_series": "Incorrect broadcasting of literals in groupby-agg",
     "tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins",
+    "tests/unit/operations/namespaces/string/test_pad.py::test_str_zfill_unicode_not_respected": "polars doesn't add zeros for unicode characters.",
     "tests/unit/operations/test_rolling.py::test_rolling_group_by_empty_groups_by_take_6330": "Ordering difference, might be polars bug",
     "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU",
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 6515e487e7e..9923f031300 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -504,6 +504,106 @@ def test_string_join(ldf, ignore_nulls, delimiter):
     assert_gpu_result_equal(q)
 
 
+@pytest.mark.parametrize(
+    "fill",
+    [
+        0,
+        1,
+        2,
+        5,
+        999,
+        -1,
+        None,
+    ],
+)
+@pytest.mark.parametrize(
+    "input_strings",
+    [
+        ["1", "0"],
+        ["123", "45"],
+        ["", "0"],
+        ["abc", "def"],
+    ],
+)
+def test_string_zfill(fill, input_strings):
+    ldf = pl.LazyFrame({"a": input_strings})
+    q = ldf.select(pl.col("a").str.zfill(fill))
+
+    if fill is not None and fill < 0:
+        assert_collect_raises(
+            q,
+            polars_except=pl.exceptions.InvalidOperationError,
+            cudf_except=pl.exceptions.ComputeError,
+        )
+    else:
+        assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "fill",
+    [
+        5
+        if not POLARS_VERSION_LT_130
+        else pytest.param(5, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
+        999
+        if not POLARS_VERSION_LT_130
+        else pytest.param(999, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
+    ],
+)
+def test_string_zfill_pl_129(fill):
+    ldf = pl.LazyFrame({"a": ["-1", "+2"]})
+    q = ldf.select(pl.col("a").str.zfill(fill))
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "fill",
+    [
+        0,
+        1,
+        2,
+        5
+        if not POLARS_VERSION_LT_130
+        else pytest.param(5, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
+        999
+        if not POLARS_VERSION_LT_130
+        else pytest.param(999, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
+        -1,
+        pytest.param(None, marks=pytest.mark.xfail(reason="None dtype")),
+    ],
+)
+def test_string_zfill_column(fill):
+    ldf = pl.DataFrame(
+        {
+            "input_strings": ["1", "0", "123", "45", "", "0", "-1", "+2", "abc", "def"],
+            "fill": [fill] * 10,
+        }
+    ).lazy()
+    q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill")))
+    if fill is not None and fill < 0:
+        assert_collect_raises(
+            q,
+            polars_except=pl.exceptions.InvalidOperationError,
+            cudf_except=pl.exceptions.InvalidOperationError
+            if not POLARS_VERSION_LT_130
+            else pl.exceptions.ComputeError,
+        )
+    else:
+        assert_gpu_result_equal(q)
+
+
+def test_string_zfill_forbidden_chars():
+    ldf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
+    q = ldf.select(pl.col("a").str.zfill(3))
+    assert_collect_raises(
+        q,
+        polars_except=(),
+        cudf_except=pl.exceptions.InvalidOperationError
+        if not POLARS_VERSION_LT_130
+        else pl.exceptions.ComputeError,
+    )
+
+
 @pytest.mark.parametrize(
     "width",
     [

From 25a24203bd93b47a72be079afccc39105068eb3e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 17:22:07 -0700
Subject: [PATCH 064/366] Use more pytest fixtures and avoid GPU
 parameterization in test_dropna/factorize.py and more (#19449)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Eliminate/reduce parameterizations of input size

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19449
---
 python/cudf/cudf/tests/test_dlpack.py     | 22 ++++++-----------
 python/cudf/cudf/tests/test_doctests.py   | 24 ++++++------------
 python/cudf/cudf/tests/test_dropna.py     | 30 +++++++++++++----------
 python/cudf/cudf/tests/test_duplicates.py |  8 +++---
 python/cudf/cudf/tests/test_factorize.py  |  6 ++---
 python/cudf/cudf/tests/test_feather.py    | 27 +++++++-------------
 python/cudf/cudf/tests/test_gcs.py        |  5 ++--
 7 files changed, 48 insertions(+), 74 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py
index ffb33870323..44c819cbd68 100644
--- a/python/cudf/cudf/tests/test_dlpack.py
+++ b/python/cudf/cudf/tests/test_dlpack.py
@@ -6,27 +6,19 @@
 import cupy
 import numpy as np
 import pytest
-from packaging import version
 
 import cudf
 from cudf.testing import assert_eq
 
-nelems = [0, 3, 10]
+nelems = [0, 10]
 dtype = [np.uint16, np.int32, np.float64]
 nulls = ["some", "none"]
 params_1d = itertools.product(nelems, dtype, nulls)
 
-ncols = [0, 1, 2]
+ncols = [0, 2]
 params_2d = itertools.product(ncols, nelems, dtype, nulls)
 
 
-if version.parse(cupy.__version__) < version.parse("10"):
-    # fromDlpack deprecated in cupy version 10, replaced by from_dlpack
-    cupy_from_dlpack = cupy.fromDlpack
-else:
-    cupy_from_dlpack = cupy.from_dlpack
-
-
 def data_size_expectation_builder(data, nan_null_param=False):
     if nan_null_param and np.isnan(data).any():
         return pytest.raises((ValueError,))
@@ -117,7 +109,7 @@ def test_to_dlpack_cupy_1d(data_1d):
         cudf_host_array = gs.to_numpy(na_value=np.nan)
         dlt = gs.to_dlpack()
 
-        cupy_array = cupy_from_dlpack(dlt)
+        cupy_array = cupy.from_dlpack(dlt)
         cupy_host_array = cupy_array.get()
 
         assert_eq(cudf_host_array, cupy_host_array)
@@ -131,7 +123,7 @@ def test_to_dlpack_cupy_2d(data_2d):
         cudf_host_array = np.array(gdf.to_pandas()).flatten()
         dlt = gdf.to_dlpack()
 
-        cupy_array = cupy_from_dlpack(dlt)
+        cupy_array = cupy.from_dlpack(dlt)
         cupy_host_array = cupy_array.get().flatten()
 
         assert_eq(cudf_host_array, cupy_host_array)
@@ -167,7 +159,7 @@ def test_to_dlpack_cupy_2d_null(data_2d):
         cudf_host_array = np.array(gdf.to_pandas()).flatten()
         dlt = gdf.to_dlpack()
 
-        cupy_array = cupy_from_dlpack(dlt)
+        cupy_array = cupy.from_dlpack(dlt)
         cupy_host_array = cupy_array.get().flatten()
 
         assert_eq(cudf_host_array, cupy_host_array)
@@ -181,7 +173,7 @@ def test_to_dlpack_cupy_1d_null(data_1d):
         cudf_host_array = gs.to_numpy(na_value=np.nan)
         dlt = gs.to_dlpack()
 
-        cupy_array = cupy_from_dlpack(dlt)
+        cupy_array = cupy.from_dlpack(dlt)
         cupy_host_array = cupy_array.get()
 
         assert_eq(cudf_host_array, cupy_host_array)
@@ -193,7 +185,7 @@ def test_to_dlpack_mixed_dtypes():
     cudf_host_array = df.to_numpy()
     dlt = df.to_dlpack()
 
-    cupy_array = cupy_from_dlpack(dlt)
+    cupy_array = cupy.from_dlpack(dlt)
     cupy_host_array = cupy_array.get()
 
     assert_eq(cudf_host_array, cupy_host_array)
diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 5d3d18cbe95..b196b5314ac 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,10 +1,9 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 import contextlib
 import doctest
 import inspect
 import io
 import itertools
-import os
 
 import numpy as np
 import pytest
@@ -34,9 +33,6 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
     ----------
     obj : module or class
         The object to search for docstring examples.
-    finder : doctest.DocTestFinder, optional
-        The DocTestFinder object to use. If not provided, a DocTestFinder is
-        constructed.
     criteria : callable, optional
         Callable indicating whether to recurse over members of the provided
         object. If not provided, names not defined in the object's ``__all__``
@@ -74,16 +70,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
 
 class TestDoctests:
     @pytest.fixture(autouse=True)
-    def chdir_to_tmp_path(cls, tmp_path):
-        # Some doctests generate files, so this fixture runs the tests in a
-        # temporary directory.
-        original_directory = os.getcwd()
-        os.chdir(tmp_path)
-        yield
-        os.chdir(original_directory)
-
-    @pytest.fixture(autouse=True)
-    def prinoptions(cls):
+    def printoptions(cls):
         # TODO: NumPy now prints scalars as `np.int8(1)`, etc. this should
         #       be adapted evantually.
         if version.parse(np.__version__) >= version.parse("2.0"):
@@ -94,18 +81,21 @@ def prinoptions(cls):
 
     @pytest.mark.parametrize(
         "docstring",
-        itertools.chain(*[_find_doctests_in_obj(mod) for mod in tests]),
+        itertools.chain.from_iterable(
+            _find_doctests_in_obj(mod) for mod in tests
+        ),
         ids=lambda docstring: docstring.name,
     )
     @pytest.mark.skipif(
         PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
         reason="Doctests not expected to pass on older versions of pandas",
     )
-    def test_docstring(self, docstring):
+    def test_docstring(self, docstring, monkeypatch, tmp_path):
         # We ignore differences in whitespace in the doctest output, and enable
         # the use of an ellipsis "..." to match any string in the doctest
         # output. An ellipsis is useful for, e.g., memory addresses or
         # imprecise floating point values.
+        monkeypatch.chdir(tmp_path)
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
         runner = doctest.DocTestRunner(optionflags=optionflags)
 
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index eeac78dbebc..1f927d03e95 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -1,7 +1,8 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -84,22 +85,26 @@ def test_dropna_dataframe(data, how, axis, inplace):
     "data",
     [
         {
-            "a": cudf.Series([None, None, None], dtype="float64"),
-            "b": cudf.Series([1, 2, None]),
+            "a": pa.array([None, None, None], type=pa.float64()),
+            "b": [1, 2, None],
         },
         {
-            "a": cudf.Series([np.nan, np.nan, np.nan], dtype="float64"),
-            "b": cudf.Series([1, 2, None]),
+            "a": pa.array([np.nan, np.nan, np.nan]),
+            "b": [1, 2, None],
         },
-        cudf.Series([None, None, None], dtype="object"),
+        {"a": pa.array([None, None, None], type=pa.string())},
     ],
 )
 @pytest.mark.parametrize("axis", [0, 1])
 def test_dropna_with_all_nulls(how, data, axis):
-    gdf = cudf.DataFrame({"a": data})
+    gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
-    assert_eq(pdf.dropna(axis=axis, how=how), gdf.dropna(axis=axis, how=how))
+    assert_eq(
+        pdf.dropna(axis=axis, how=how),
+        gdf.dropna(axis=axis, how=how),
+        check_dtype=False,
+    )
 
 
 def test_dropna_nan_as_null():
@@ -208,12 +213,12 @@ def test_dropna_thresh_cols(thresh, subset, inplace):
     [
         {
             "key": [1, 2, 10],
-            "val": cudf.Series([np.nan, 3, 1], nan_as_null=False),
+            "val": pa.array([np.nan, 3.0, 1.0]),
             "abc": [np.nan, None, 1],
         },
         {
             "key": [None, 2, 1],
-            "val": cudf.Series([3, np.nan, 0.1], nan_as_null=True),
+            "val": pa.array([3.0, None, 0.1]),
             "abc": [None, 1, None],
         },
     ],
@@ -250,10 +255,9 @@ def test_dropna_index(data, dtype):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("data", [[[1, None, 2], [None, None, 2]]])
 @pytest.mark.parametrize("how", ["all", "any"])
-def test_dropna_multiindex(data, how):
-    pi = pd.MultiIndex.from_arrays(data)
+def test_dropna_multiindex(how):
+    pi = pd.MultiIndex.from_arrays([[1, None, 2], [None, None, 2]])
     gi = cudf.from_pandas(pi)
 
     expect = pi.dropna(how)
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index 67dd7a8388b..c9967b83235 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import itertools
 import random
@@ -156,7 +156,7 @@ def test_drop_duplicates():
     assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
 
-@pytest.mark.skip(reason="cudf does not support duplicate column names yet")
+@pytest.mark.xfail(reason="cudf does not support duplicate column names yet")
 def test_drop_duplicates_with_duplicate_column_names():
     df = pd.DataFrame(
         [[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]
@@ -267,8 +267,8 @@ def test_drop_duplicates_empty(df):
     assert_eq(result, df)
 
 
-@pytest.mark.parametrize("num_columns", [3, 4, 5])
-def test_dataframe_drop_duplicates_numeric_method(num_columns):
+def test_dataframe_drop_duplicates_numeric_method():
+    num_columns = 3
     comb = list(itertools.permutations(range(num_columns), num_columns))
     shuf = list(comb)
     random.Random(num_columns).shuffle(shuf)
diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py
index be81e92835f..d16e725088f 100644
--- a/python/cudf/cudf/tests/test_factorize.py
+++ b/python/cudf/cudf/tests/test_factorize.py
@@ -10,8 +10,7 @@
 from cudf.testing import assert_eq
 
 
-@pytest.mark.parametrize("ncats,nelem", [(2, 2), (2, 10), (10, 100)])
-def test_factorize_series_obj(ncats, nelem):
+def test_factorize_series_obj():
     df = DataFrame()
     rng = np.random.default_rng(seed=0)
 
@@ -31,8 +30,7 @@ def test_factorize_series_obj(ncats, nelem):
     np.testing.assert_array_equal(uvals.get(), handcoded)
 
 
-@pytest.mark.parametrize("ncats,nelem", [(2, 2), (2, 10), (10, 100)])
-def test_factorize_index_obj(ncats, nelem):
+def test_factorize_index_obj():
     df = DataFrame()
     rng = np.random.default_rng(seed=0)
 
diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py
index 6a9dd4c4a66..04c4d55afe6 100644
--- a/python/cudf/cudf/tests/test_feather.py
+++ b/python/cudf/cudf/tests/test_feather.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 import os
 from string import ascii_letters
@@ -13,7 +13,7 @@
 from cudf.testing._utils import NUMERIC_TYPES
 
 
-@pytest.fixture(params=[0, 1, 10, 100])
+@pytest.fixture(params=[0, 10])
 def pdf(request):
     rng = np.random.default_rng(seed=0)
     types = [*NUMERIC_TYPES, "bool"]
@@ -41,25 +41,15 @@ def pdf(request):
     return test_pdf
 
 
-@pytest.fixture
-def gdf(pdf):
-    return cudf.DataFrame.from_pandas(pdf)
-
-
-@pytest.fixture
-def feather_file(tmp_path_factory, pdf):
-    fname = tmp_path_factory.mktemp("feather") / "test.feather"
-    pdf.to_feather(fname)
-    return fname
-
-
 @pytest.mark.filterwarnings("ignore:Using CPU")
 @pytest.mark.filterwarnings("ignore:Strings are not yet supported")
 @pytest.mark.parametrize(
     "columns",
     [["col_int8"], ["col_category"], ["col_int32", "col_float32"], None],
 )
-def test_feather_reader(feather_file, columns):
+def test_feather_reader(pdf, columns, tmp_path):
+    feather_file = tmp_path / "test.feather"
+    pdf.to_feather(feather_file)
     expect = pa.feather.read_table(feather_file, columns=columns).to_pandas()
     got = (
         cudf.read_feather(feather_file, columns=columns)
@@ -71,9 +61,10 @@ def test_feather_reader(feather_file, columns):
 
 
 @pytest.mark.filterwarnings("ignore:Using CPU")
-def test_feather_writer(tmpdir, pdf, gdf):
-    pdf_fname = tmpdir.join("pdf.feather")
-    gdf_fname = tmpdir.join("gdf.feather")
+def test_feather_writer(tmp_path, pdf):
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdf_fname = tmp_path / "pdf.feather"
+    gdf_fname = tmp_path / "gdf.feather"
 
     pdf.to_feather(pdf_fname)
     gdf.to_feather(gdf_fname)
diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py
index 82ecd356bbf..b9b0161e6c0 100644
--- a/python/cudf/cudf/tests/test_gcs.py
+++ b/python/cudf/cudf/tests/test_gcs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import io
 import os
@@ -12,7 +12,6 @@
 
 gcsfs = pytest.importorskip("gcsfs")
 
-TEST_PROJECT = "cudf-gcs-test-project"
 TEST_BUCKET = "cudf-gcs-test-bucket"
 
 
@@ -27,7 +26,7 @@ def pdf(scope="module"):
     return df
 
 
-def test_read_csv(pdf, monkeypatch, tmpdir):
+def test_read_csv(pdf, monkeypatch):
     # Write to buffer
     fpath = TEST_BUCKET + "test_csv_reader.csv"
     buffer = pdf.to_csv(index=False)

From e08b8a3b37f85720d27d8c02426652920ab964cc Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 5 Aug 2025 17:22:15 -0700
Subject: [PATCH 065/366] Move test_unaops/test_unique/test_transform.py to new
 cudf classic test directory structure (#19477)

Towards https://github.com/rapidsai/cudf/issues/9999

* Adding more shared fixtures in `conftest.py` where applicable
* Further simplify tests

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19477
---
 python/cudf/cudf/tests/conftest.py            | 20 +++++
 .../{ => general_functions}/test_unique.py    | 78 +++++++++----------
 .../test_apply.py}                            | 10 +--
 .../cudf/cudf/tests/series/test_reductions.py | 17 ++++
 .../{test_unaops.py => series/test_unary.py}  | 32 ++------
 5 files changed, 83 insertions(+), 74 deletions(-)
 rename python/cudf/cudf/tests/{ => general_functions}/test_unique.py (56%)
 rename python/cudf/cudf/tests/{test_transform.py => series/test_apply.py} (65%)
 create mode 100644 python/cudf/cudf/tests/series/test_reductions.py
 rename python/cudf/cudf/tests/{test_unaops.py => series/test_unary.py} (50%)

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 008867211f4..b3b48d19538 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -192,6 +192,26 @@ def set_decomp_env_vars(monkeypatch, request):
         yield
 
 
+@pytest.fixture(
+    params=[
+        "min",
+        "max",
+        "sum",
+        "product",
+        "quantile",
+        "all",
+        "any",
+        "std",
+        "var",
+        "median",
+        "kurtosis",
+        "skew",
+    ]
+)
+def reduction_methods(request):
+    return request.param
+
+
 signed_integer_types = ["int8", "int16", "int32", "int64"]
 unsigned_integer_types = ["uint8", "uint16", "uint32", "uint64"]
 float_types = ["float32", "float64"]
diff --git a/python/cudf/cudf/tests/test_unique.py b/python/cudf/cudf/tests/general_functions/test_unique.py
similarity index 56%
rename from python/cudf/cudf/tests/test_unique.py
rename to python/cudf/cudf/tests/general_functions/test_unique.py
index 74e00d7c389..51da8f06dd3 100644
--- a/python/cudf/cudf/tests/test_unique.py
+++ b/python/cudf/cudf/tests/general_functions/test_unique.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 import cupy as cp
 import numpy as np
@@ -9,35 +9,23 @@
 from cudf.testing import assert_eq
 
 
-@pytest.fixture
-def df():
-    rng = np.random.default_rng(seed=0)
-    arr = rng.integers(2, size=10, dtype=np.int64)
-    return pd.DataFrame(
-        {
-            "foo": arr,
-            "bar": [pd.Timestamp(x) for x in arr],
-        }
-    )
-
-
-@pytest.fixture(params=["foo", "bar"])
-def series_test_vals(request, df):
-    actual = cudf.unique(cudf.Series.from_pandas(df[request.param]))
-    expected = pd.unique(df[request.param])
-    return actual, expected
-
-
-def test_unique_series_obj(series_test_vals):
-    actual, expected = series_test_vals
-
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 1, 2],
+        [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(2)],
+    ],
+)
+def test_unique_series_obj(data):
+    actual = cudf.unique(cudf.Series(data))
+    expected = pd.unique(pd.Series(data))
     assert isinstance(expected, np.ndarray)
     assert isinstance(actual, cudf.Series)
     assert_eq(actual, pd.Series(expected, name=actual.name))
 
 
 @pytest.mark.parametrize(
-    "index",
+    "cudf_index,pandas_index",
     [
         (cudf.Index, pd.Index),
         (cudf.MultiIndex, pd.MultiIndex),
@@ -45,21 +33,26 @@ def test_unique_series_obj(series_test_vals):
         (cudf.CategoricalIndex, pd.CategoricalIndex),
     ],
 )
-@pytest.mark.parametrize("col", ["foo", "bar"])
-def test_unique_index_obj(index, col, df):
-    df = cudf.DataFrame.from_pandas(df)
-    if index[0] == cudf.MultiIndex:
-        df.index = cudf.MultiIndex.from_arrays([df[col], df[col]])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 1, 2],
+        [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(2)],
+    ],
+)
+def test_unique_index_obj(cudf_index, pandas_index, data):
+    if cudf_index == cudf.MultiIndex:
+        idx = cudf_index.from_arrays([data, data])
     else:
-        df.index = index[0](df[col])
-    actual = cudf.unique(df.index)
-    expected = pd.unique(df.index.to_pandas())
+        idx = cudf_index(data)
+    actual = cudf.unique(idx)
+    expected = pd.unique(idx.to_pandas())
 
     isinstance(expected, np.ndarray)
-    assert isinstance(actual, index[0])
+    assert isinstance(actual, cudf_index)
 
-    if index[0] == cudf.MultiIndex:
-        expect = index[1].from_arrays(
+    if cudf_index == cudf.MultiIndex:
+        expect = pandas_index.from_arrays(
             [
                 [x[0] for x in expected],
                 [x[1] for x in expected],
@@ -68,12 +61,13 @@ def test_unique_index_obj(index, col, df):
         )
         assert_eq(actual, expect)
     else:
-        assert_eq(actual, index[1](expected, name=actual.name))
+        assert_eq(actual, cudf_index(expected, name=actual.name))
 
 
-def test_unique_cupy_ndarray(df):
-    arr = np.asarray(df["foo"])
-    garr = cp.asarray(df["foo"])
+def test_unique_cupy_ndarray():
+    ser = pd.Series(pd.Series([1, 1, 2]))
+    arr = np.asarray(ser)
+    garr = cp.asarray(ser)
 
     expected = pd.unique(arr)
     actual = cudf.unique(garr)
@@ -102,7 +96,8 @@ def test_category_dtype_unique(data):
     assert_eq(actual, pd.Series(expected))
 
 
-def test_unique_fails_value_error(df):
+def test_unique_fails_value_error():
+    df = pd.DataFrame({"foo": [1, 2, 3]})
     with pytest.raises(
         ValueError,
         match="Must pass cudf.Series, cudf.Index, or cupy.ndarray object",
@@ -111,8 +106,9 @@ def test_unique_fails_value_error(df):
 
 
 def test_unique_fails_not_implemented_error():
+    ser = cudf.Series(["foo", "foo"], dtype="category")
     with cudf.option_context("mode.pandas_compatible", True):
         with pytest.raises(
             NotImplementedError, match="cudf.Categorical is not implemented"
         ):
-            cudf.unique(cudf.Series(["foo", "foo"], dtype="category"))
+            cudf.unique(ser)
diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/series/test_apply.py
similarity index 65%
rename from python/cudf/cudf/tests/test_transform.py
rename to python/cudf/cudf/tests/series/test_apply.py
index 870bfad8a85..af957aeee6c 100644
--- a/python/cudf/cudf/tests/test_transform.py
+++ b/python/cudf/cudf/tests/series/test_apply.py
@@ -1,18 +1,16 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 
 import numpy as np
 import pytest
 
 from cudf import Series
-from cudf.testing._utils import NUMERIC_TYPES
 
 
 def _generic_function(a):
     return a**3
 
 
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 @pytest.mark.parametrize(
     "udf,testfunc",
     [
@@ -20,10 +18,10 @@ def _generic_function(a):
         (lambda x: x in [1, 2, 3, 4], lambda ser: np.isin(ser, [1, 2, 3, 4])),
     ],
 )
-def test_apply_python_lambda(dtype, udf, testfunc):
-    size = 500
+def test_apply_python_lambda(numeric_types_as_str, udf, testfunc):
+    size = 50
     rng = np.random.default_rng(seed=0)
-    lhs_arr = rng.random(size).astype(dtype)
+    lhs_arr = rng.random(size).astype(numeric_types_as_str)
     lhs_ser = Series(lhs_arr)
 
     out_ser = lhs_ser.apply(udf)
diff --git a/python/cudf/cudf/tests/series/test_reductions.py b/python/cudf/cudf/tests/series/test_reductions.py
new file mode 100644
index 00000000000..1c924444c1e
--- /dev/null
+++ b/python/cudf/cudf/tests/series/test_reductions.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from cudf import Series
+
+
+@pytest.mark.parametrize("data", [[], [1, 2, 3]])
+def test_series_pandas_methods(data, reduction_methods):
+    arr = np.array(data)
+    sr = Series(arr)
+    psr = pd.Series(arr)
+    np.testing.assert_equal(
+        getattr(sr, reduction_methods)(), getattr(psr, reduction_methods)()
+    )
diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/series/test_unary.py
similarity index 50%
rename from python/cudf/cudf/tests/test_unaops.py
rename to python/cudf/cudf/tests/series/test_unary.py
index 182e9147307..c1973b26425 100644
--- a/python/cudf/cudf/tests/test_unaops.py
+++ b/python/cudf/cudf/tests/series/test_unary.py
@@ -3,26 +3,21 @@
 from decimal import Decimal
 
 import numpy as np
-import pandas as pd
-import pytest
 
 from cudf import Series
-from cudf.testing import _utils as utils, assert_eq
+from cudf.testing import assert_eq
 
 
-@pytest.mark.parametrize("dtype", utils.NUMERIC_TYPES)
-def test_series_abs(dtype):
+def test_series_abs(numeric_types_as_str):
     rng = np.random.default_rng(seed=0)
-    arr = (rng.random(1000) * 100).astype(dtype)
+    arr = (rng.random(100) * 100).astype(numeric_types_as_str)
     sr = Series(arr)
     np.testing.assert_equal(sr.abs().to_numpy(), np.abs(arr))
     np.testing.assert_equal(abs(sr).to_numpy(), abs(arr))
 
 
-@pytest.mark.parametrize("dtype", utils.INTEGER_TYPES)
-def test_series_invert(dtype):
-    rng = np.random.default_rng(seed=0)
-    arr = (rng.random(1000) * 100).astype(dtype)
+def test_series_invert(integer_types_as_str):
+    arr = np.array([0, 1, 2], dtype=integer_types_as_str)
     sr = Series(arr)
     np.testing.assert_equal((~sr).to_numpy(), np.invert(arr))
     np.testing.assert_equal((~sr).to_numpy(), ~arr)
@@ -35,23 +30,6 @@ def test_series_neg():
     np.testing.assert_equal((-sr).to_numpy(), -arr)
 
 
-@pytest.mark.parametrize("mth", ["min", "max", "sum", "product"])
-def test_series_pandas_methods(mth):
-    rng = np.random.default_rng(seed=0)
-    arr = (1 + rng.random(5) * 100).astype(np.int64)
-    sr = Series(arr)
-    psr = pd.Series(arr)
-    np.testing.assert_equal(getattr(sr, mth)(), getattr(psr, mth)())
-
-
-@pytest.mark.parametrize("mth", ["min", "max", "sum", "product", "quantile"])
-def test_series_pandas_methods_empty(mth):
-    arr = np.array([])
-    sr = Series(arr)
-    psr = pd.Series(arr)
-    np.testing.assert_equal(getattr(sr, mth)(), getattr(psr, mth)())
-
-
 def test_series_bool_neg():
     sr = Series([True, False, True, None, False, None, True, True])
     psr = sr.to_pandas(nullable=True)

From fd862c07548f009266a5bcb624d7de084c23c5c0 Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Tue, 5 Aug 2025 19:21:57 -0700
Subject: [PATCH 066/366] Upgrade `gcc-toolset` for Java/JNI build to version
 14 (#19500)

As RAPIDS is moving to gcc-14, we should upgrade the rest of the build system (Java/JNI) to match it.

This only changes in the build and test for Java/JNI, no implementation change is needed.

Build for `spark-rapids-jni` will not pass until https://github.com/NVIDIA/spark-rapids-jni/pull/3568 is merged. But merging this should not be blocked by it.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Peixin (https://github.com/pxLi)

URL: https://github.com/rapidsai/cudf/pull/19500
---
 .github/workflows/spark-rapids-jni.yaml | 2 +-
 java/ci/Dockerfile.rocky                | 4 ++--
 java/ci/README.md                       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml
index 832c749874c..f317c7cf531 100644
--- a/.github/workflows/spark-rapids-jni.yaml
+++ b/.github/workflows/spark-rapids-jni.yaml
@@ -19,4 +19,4 @@ jobs:
       - name: "Build spark-rapids-jni"
         run: |
           mkdir target
-          CMAKE_CUDA_ARCHITECTURES=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-11 build/buildcpp.sh
+          CMAKE_CUDA_ARCHITECTURES=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-14 build/buildcpp.sh
diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky
index 28f9361ce8a..e7cf319663d 100644
--- a/java/ci/Dockerfile.rocky
+++ b/java/ci/Dockerfile.rocky
@@ -26,9 +26,9 @@ ARG TARGETPLATFORM=linux/amd64
 # multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 <ARGS> on either amd64 or arm64 host
 # check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH)
 FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE
-ARG TOOLSET_VERSION=11
+ARG TOOLSET_VERSION=14
 ### Install basic requirements
-RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel
+RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-11 gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
 RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids
 
diff --git a/java/ci/README.md b/java/ci/README.md
index 5597cb22109..22a2dd618f6 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -42,7 +42,7 @@ git clone --recursive https://github.com/rapidsai/cudf.git -b branch-25.10
 ```bash
 cd cudf
 export WORKSPACE=`pwd`
-scl enable gcc-toolset-11 "java/ci/build-in-docker.sh"
+scl enable gcc-toolset-14 "java/ci/build-in-docker.sh"
 ```
 
 ### The output

From 3e9e6297f082e3a9cadc9ec83e5f224fb719bee4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 5 Aug 2025 17:23:34 -1000
Subject: [PATCH 067/366] Add streams to all modules with three or fewer
 functions (#19600)

Contributes to https://github.com/rapidsai/cudf/issues/15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19600
---
 cpp/include/cudf/partitioning.hpp             |  2 +-
 python/pylibcudf/pylibcudf/binaryop.pxd       |  6 ++-
 python/pylibcudf/pylibcudf/binaryop.pyi       |  3 ++
 python/pylibcudf/pylibcudf/binaryop.pyx       | 20 ++++++---
 .../pylibcudf/pylibcudf/libcudf/binaryop.pxd  | 16 ++++---
 .../pylibcudf/libcudf/partitioning.pxd        | 22 ++++++---
 .../pylibcudf/pylibcudf/libcudf/quantiles.pxd |  5 ++-
 python/pylibcudf/pylibcudf/libcudf/reduce.pxd | 15 ++++---
 python/pylibcudf/pylibcudf/libcudf/search.pxd |  6 ++-
 python/pylibcudf/pylibcudf/partitioning.pxd   | 17 +++++--
 python/pylibcudf/pylibcudf/partitioning.pyi   | 17 +++++--
 python/pylibcudf/pylibcudf/partitioning.pyx   | 45 ++++++++++++++-----
 python/pylibcudf/pylibcudf/quantiles.pxd      |  7 ++-
 python/pylibcudf/pylibcudf/quantiles.pyi      |  4 ++
 python/pylibcudf/pylibcudf/quantiles.pyx      | 22 +++++++--
 python/pylibcudf/pylibcudf/reduce.pxd         |  9 ++--
 python/pylibcudf/pylibcudf/reduce.pyi         | 20 +++++++--
 python/pylibcudf/pylibcudf/reduce.pyx         | 44 ++++++++++++++----
 python/pylibcudf/pylibcudf/search.pxd         |  8 +++-
 python/pylibcudf/pylibcudf/search.pyi         |  8 +++-
 python/pylibcudf/pylibcudf/search.pyx         | 30 +++++++++++--
 21 files changed, 255 insertions(+), 71 deletions(-)

diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp
index b6bb4914f4c..e6defdb37be 100644
--- a/cpp/include/cudf/partitioning.hpp
+++ b/cpp/include/cudf/partitioning.hpp
@@ -37,7 +37,7 @@ namespace CUDF_EXPORT cudf {
 /**
  * @brief Identifies the hash function to be used in hash partitioning
  */
-enum class hash_id {
+enum class hash_id : int32_t {
   HASH_IDENTITY = 0,  ///< Identity hash function that simply returns the key to be hashed
   HASH_MURMUR3        ///< Murmur3 hash function
 };
diff --git a/python/pylibcudf/pylibcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/binaryop.pxd
index 06625e9e2db..69398c555a8 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pxd
+++ b/python/pylibcudf/pylibcudf/binaryop.pxd
@@ -1,7 +1,8 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from pylibcudf.libcudf.binaryop cimport binary_operator
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -21,7 +22,8 @@ cpdef Column binary_operation(
     LeftBinaryOperand lhs,
     RightBinaryOperand rhs,
     binary_operator op,
-    DataType output_type
+    DataType output_type,
+    Stream stream=*
 )
 
 cpdef bool is_supported_operation(
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi
index f745e6c6854..08b0d58b7c8 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyi
+++ b/python/pylibcudf/pylibcudf/binaryop.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.types import DataType
@@ -48,6 +50,7 @@ def binary_operation(
     rhs: Column | Scalar,
     op: BinaryOperator,
     output_type: DataType,
+    stream: Stream | None = None,
 ) -> Column: ...
 def is_supported_operation(
     out: DataType, lhs: DataType, rhs: DataType, op: BinaryOperator
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx
index c6827431646..1bbafbfe227 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyx
+++ b/python/pylibcudf/pylibcudf/binaryop.pyx
@@ -12,9 +12,12 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.binaryop import \
     binary_operator as BinaryOperator  # no-cython-lint
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
+from .utils cimport _get_stream
 
 __all__ = ["BinaryOperator", "binary_operation", "is_supported_operation"]
 
@@ -22,7 +25,8 @@ cpdef Column binary_operation(
     LeftBinaryOperand lhs,
     RightBinaryOperand rhs,
     binary_operator op,
-    DataType output_type
+    DataType output_type,
+    Stream stream=None
 ):
     """Perform a binary operation between a column and another column or scalar.
 
@@ -43,6 +47,8 @@ cpdef Column binary_operation(
         The operation to perform.
     output_type : DataType
         The data type to use for the output.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -50,6 +56,7 @@ cpdef Column binary_operation(
         The result of the binary operation
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
 
     if LeftBinaryOperand is Column and RightBinaryOperand is Column:
         with nogil:
@@ -57,7 +64,8 @@ cpdef Column binary_operation(
                 lhs.view(),
                 rhs.view(),
                 op,
-                output_type.c_obj
+                output_type.c_obj,
+                stream.view()
             )
     elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar:
         with nogil:
@@ -65,7 +73,8 @@ cpdef Column binary_operation(
                 lhs.view(),
                 dereference(rhs.c_obj),
                 op,
-                output_type.c_obj
+                output_type.c_obj,
+                stream.view()
             )
     elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column:
         with nogil:
@@ -73,12 +82,13 @@ cpdef Column binary_operation(
                 dereference(lhs.c_obj),
                 rhs.view(),
                 op,
-                output_type.c_obj
+                output_type.c_obj,
+                stream.view()
             )
     else:
         raise ValueError(f"Invalid arguments {lhs} and {rhs}")
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef bool is_supported_operation(
diff --git a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
index 607f7c2fa60..d27daad00bd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -9,6 +9,8 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport data_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
     cpdef enum class binary_operator(int32_t):
@@ -52,28 +54,32 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         const scalar& lhs,
         const column_view& rhs,
         binary_operator op,
-        data_type output_type
+        data_type output_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const scalar& rhs,
         binary_operator op,
-        data_type output_type
+        data_type output_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const column_view& rhs,
         binary_operator op,
-        data_type output_type
+        data_type output_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] binary_operation (
         const column_view& lhs,
         const column_view& rhs,
         const string& op,
-        data_type output_type
+        data_type output_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
 cdef extern from "cudf/binaryop.hpp" namespace "cudf::binops" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
index 04566b6e40a..2ca060fd7df 100644
--- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
@@ -1,14 +1,21 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 cimport pylibcudf.libcudf.types as libcudf_types
-from libc.stdint cimport uint32_t
+from libc.stdint cimport int32_t, uint32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.hash cimport DEFAULT_HASH_SEED
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
+cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
+    cpdef enum class hash_id(int32_t):
+        HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
+        HASH_MURMUR3 "cudf::hash_id::HASH_MURMUR3"
 
 
 cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
@@ -16,19 +23,24 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         hash_partition "cudf::hash_partition" (
         const table_view& input,
         const vector[libcudf_types.size_type]& columns_to_hash,
-        int num_partitions
+        int num_partitions,
+        hash_id hash_function,
+        uint32_t seed,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] \
         partition "cudf::partition" (
         const table_view& t,
         const column_view& partition_map,
-        int num_partitions
+        int num_partitions,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] \
         round_robin_partition "cudf::round_robin_partition" (
         const table_view& input,
         int num_partitions,
-        int start_partition
+        int start_partition,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
index 8f60302e776..58e70d37492 100644
--- a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
@@ -14,6 +14,7 @@ from pylibcudf.libcudf.types cimport (
     order_info,
     sorted,
 )
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil:
@@ -24,6 +25,7 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil:
         interpolation interp,
         column_view ordered_indices,
         bool exact,
+        cuda_stream_view stream,
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] quantiles (
@@ -33,4 +35,5 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil:
         sorted is_input_sorted,
         vector[order] column_order,
         vector[null_order] null_precedence,
+        cuda_stream_view stream,
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
index ad79187b733..f740e7a7acf 100644
--- a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport pair
@@ -7,14 +7,16 @@ from pylibcudf.libcudf.aggregation cimport reduce_aggregation, scan_aggregation
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
-from pylibcudf.libcudf.types cimport data_type
+from pylibcudf.libcudf.types cimport data_type, null_policy
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
     cdef unique_ptr[scalar] cpp_reduce "cudf::reduce" (
         column_view col,
         const reduce_aggregation& agg,
-        data_type type
+        data_type type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cpdef enum class scan_type(bool):
@@ -24,10 +26,13 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] cpp_scan "cudf::scan" (
         column_view col,
         const scan_aggregation& agg,
-        scan_type inclusive
+        scan_type inclusive,
+        null_policy null_handling,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[scalar],
               unique_ptr[scalar]] cpp_minmax "cudf::minmax" (
-        column_view col
+        column_view col,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/search.pxd b/python/pylibcudf/pylibcudf/libcudf/search.pxd
index 5ec06858baa..f7264985b9e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/search.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/search.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 cimport pylibcudf.libcudf.types as libcudf_types
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
@@ -6,6 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table_view cimport table_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
@@ -15,6 +16,7 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
         table_view needles,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
+        cuda_stream_view stream,
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] upper_bound(
@@ -22,9 +24,11 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil:
         table_view needles,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
+        cuda_stream_view stream,
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         column_view haystack,
         column_view needles,
+        cuda_stream_view stream,
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/partitioning.pxd
index aad60149fc4..42e40fca776 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/partitioning.pxd
@@ -1,4 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -7,13 +9,20 @@ from .table cimport Table
 cpdef tuple[Table, list] hash_partition(
     Table input,
     list columns_to_hash,
-    int num_partitions
+    int num_partitions,
+    Stream stream = *
 )
 
-cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions)
+cpdef tuple[Table, list] partition(
+    Table t,
+    Column partition_map,
+    int num_partitions,
+    Stream stream = *,
+)
 
 cpdef tuple[Table, list] round_robin_partition(
     Table input,
     int num_partitions,
-    int start_partition=*
+    int start_partition=*,
+    Stream stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyi b/python/pylibcudf/pylibcudf/partitioning.pyi
index 48a2ade23f1..b37ae246b41 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyi
+++ b/python/pylibcudf/pylibcudf/partitioning.pyi
@@ -1,14 +1,25 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
 def hash_partition(
-    input: Table, columns_to_hash: list[int], num_partitions: int
+    input: Table,
+    columns_to_hash: list[int],
+    num_partitions: int,
+    stream: Stream | None = None,
 ) -> tuple[Table, list[int]]: ...
 def partition(
-    t: Table, partition_map: Column, num_partitions: int
+    t: Table,
+    partition_map: Column,
+    num_partitions: int,
+    stream: Stream | None = None,
 ) -> tuple[Table, list[int]]: ...
 def round_robin_partition(
-    input: Table, num_partitions: int, start_partition: int = 0
+    input: Table,
+    num_partitions: int,
+    start_partition: int = 0,
+    stream: Stream | None = None,
 ) -> tuple[Table, list[int]]: ...
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
index 1dacabceb06..c3fb7c1b579 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyx
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 cimport pylibcudf.libcudf.types as libcudf_types
 from libcpp.memory cimport unique_ptr
@@ -7,9 +7,11 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 from pylibcudf.libcudf cimport partitioning as cpp_partitioning
 from pylibcudf.libcudf.table.table cimport table
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "hash_partition",
@@ -20,7 +22,8 @@ __all__ = [
 cpdef tuple[Table, list] hash_partition(
     Table input,
     list columns_to_hash,
-    int num_partitions
+    int num_partitions,
+    Stream stream=None
 ):
     """
     Partitions rows from the input table into multiple output tables.
@@ -35,6 +38,8 @@ cpdef tuple[Table, list] hash_partition(
         Indices of input columns to hash
     num_partitions : int
         The number of partitions to use
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -45,16 +50,26 @@ cpdef tuple[Table, list] hash_partition(
     cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
     cdef int c_num_partitions = num_partitions
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_partitioning.hash_partition(
             input.view(),
             c_columns_to_hash,
-            c_num_partitions
+            c_num_partitions,
+            cpp_partitioning.hash_id.HASH_MURMUR3,
+            cpp_partitioning.DEFAULT_HASH_SEED,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
+    return Table.from_libcudf(move(c_result.first), stream), list(c_result.second)
 
-cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions):
+cpdef tuple[Table, list] partition(
+    Table t,
+    Column partition_map,
+    int num_partitions,
+    Stream stream=None,
+):
     """
     Partitions rows of `t` according to the mapping specified by `partition_map`.
 
@@ -69,6 +84,8 @@ cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partit
         in `t` to it's partition.
     num_partitions : int
         The total number of partitions
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -78,20 +95,24 @@ cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partit
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     cdef int c_num_partitions = num_partitions
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_partitioning.partition(
             t.view(),
             partition_map.view(),
-            c_num_partitions
+            c_num_partitions,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
+    return Table.from_libcudf(move(c_result.first), stream), list(c_result.second)
 
 
 cpdef tuple[Table, list] round_robin_partition(
     Table input,
     int num_partitions,
-    int start_partition=0
+    int start_partition=0,
+    Stream stream=None
 ):
     """
     Round-robin partition.
@@ -106,6 +127,8 @@ cpdef tuple[Table, list] round_robin_partition(
         Number of partitions for the table
     start_partition : int, default 0
         Index of the 1st partition
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -117,9 +140,11 @@ cpdef tuple[Table, list] round_robin_partition(
     cdef int c_num_partitions = num_partitions
     cdef int c_start_partition = start_partition
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_partitioning.round_robin_partition(
-            input.view(), c_num_partitions, c_start_partition
+            input.view(), c_num_partitions, c_start_partition, stream.view()
         )
 
-    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
+    return Table.from_libcudf(move(c_result.first), stream), list(c_result.second)
diff --git a/python/pylibcudf/pylibcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/quantiles.pxd
index fbc1dfb30a6..6794c874933 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pxd
+++ b/python/pylibcudf/pylibcudf/quantiles.pxd
@@ -1,6 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.vector cimport vector
 from pylibcudf.libcudf.types cimport interpolation, sorted
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -11,7 +12,8 @@ cpdef Column quantile(
     vector[double] q,
     interpolation interp = *,
     Column ordered_indices = *,
-    bint exact = *
+    bint exact = *,
+    Stream stream = *
 )
 
 cpdef Table quantiles(
@@ -21,4 +23,5 @@ cpdef Table quantiles(
     sorted is_input_sorted = *,
     list column_order = *,
     list null_precedence = *,
+    Stream stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/quantiles.pyi b/python/pylibcudf/pylibcudf/quantiles.pyi
index dca6eed013a..d5fb0e5b8b1 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pyi
+++ b/python/pylibcudf/pylibcudf/quantiles.pyi
@@ -2,6 +2,8 @@
 
 from collections.abc import Sequence
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import Interpolation, NullOrder, Order, Sorted
@@ -12,6 +14,7 @@ def quantile(
     interp: Interpolation = Interpolation.LINEAR,
     ordered_indices: Column | None = None,
     exact: bool = True,
+    stream: Stream | None = None,
 ) -> Column: ...
 def quantiles(
     input: Table,
@@ -20,4 +23,5 @@ def quantiles(
     is_input_sorted: Sorted = Sorted.NO,
     column_order: list[Order] | None = None,
     null_precedence: list[NullOrder] | None = None,
+    stream: Stream | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx
index 634218586ac..8eb78750d33 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pyx
+++ b/python/pylibcudf/pylibcudf/quantiles.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -12,10 +12,12 @@ from pylibcudf.libcudf.quantiles cimport (
 )
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport null_order, order, sorted
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
 from .types cimport interpolation
+from .utils cimport _get_stream
 
 __all__ = ["quantile", "quantiles"]
 
@@ -24,7 +26,8 @@ cpdef Column quantile(
     vector[double] q,
     interpolation interp = interpolation.LINEAR,
     Column ordered_indices = None,
-    bool exact=True
+    bool exact=True,
+    Stream stream=None
 ):
     """Computes quantiles with interpolation.
 
@@ -49,6 +52,8 @@ cpdef Column quantile(
         Values not indexed by this column will be ignored.
     exact: bool, default True
         Returns doubles if True. Otherwise, returns same type as input
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     For details, see :cpp:func:`quantile`.
 
@@ -66,6 +71,8 @@ cpdef Column quantile(
     else:
         ordered_indices_view = ordered_indices.view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_quantile(
             input.view(),
@@ -73,9 +80,10 @@ cpdef Column quantile(
             interp,
             ordered_indices_view,
             exact,
+            stream.view(),
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Table quantiles(
@@ -85,6 +93,7 @@ cpdef Table quantiles(
     sorted is_input_sorted = sorted.NO,
     list column_order = None,
     list null_precedence = None,
+    Stream stream=None
 ):
     """Computes row quantiles with interpolation.
 
@@ -121,6 +130,8 @@ cpdef Table quantiles(
         all other elements.
 
         Ignored if `is_input_sorted` is `Sorted.YES`
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     For details, see :cpp:func:`quantiles`.
 
@@ -139,6 +150,8 @@ cpdef Table quantiles(
     if null_precedence is not None:
         null_precedence_vec = null_precedence
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_quantiles(
             input.view(),
@@ -147,6 +160,7 @@ cpdef Table quantiles(
             is_input_sorted,
             column_order_vec,
             null_precedence_vec,
+            stream.view(),
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/reduce.pxd b/python/pylibcudf/pylibcudf/reduce.pxd
index 047f08297e4..30176a6602e 100644
--- a/python/pylibcudf/pylibcudf/reduce.pxd
+++ b/python/pylibcudf/pylibcudf/reduce.pxd
@@ -1,6 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.libcudf.reduce cimport scan_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .aggregation cimport Aggregation
 from .column cimport Column
@@ -8,8 +9,8 @@ from .scalar cimport Scalar
 from .types cimport DataType
 
 
-cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type)
+cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type, Stream stream = *)
 
-cpdef Column scan(Column col, Aggregation agg, scan_type inclusive)
+cpdef Column scan(Column col, Aggregation agg, scan_type inclusive, Stream stream = *)
 
-cpdef tuple minmax(Column col)
+cpdef tuple minmax(Column col, Stream stream = *)
diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi
index a09949b7b30..dd7d103b210 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyi
+++ b/python/pylibcudf/pylibcudf/reduce.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
@@ -11,6 +13,18 @@ class ScanType(IntEnum):
     INCLUSIVE = ...
     EXCLUSIVE = ...
 
-def reduce(col: Column, agg: Aggregation, data_type: DataType) -> Scalar: ...
-def scan(col: Column, agg: Aggregation, inclusive: ScanType) -> Column: ...
-def minmax(col: Column) -> tuple[Scalar, Scalar]: ...
+def reduce(
+    col: Column,
+    agg: Aggregation,
+    data_type: DataType,
+    stream: Stream | None = None,
+) -> Scalar: ...
+def scan(
+    col: Column,
+    agg: Aggregation,
+    inclusive: ScanType,
+    stream: Stream | None = None,
+) -> Column: ...
+def minmax(
+    col: Column, stream: Stream | None = None
+) -> tuple[Scalar, Scalar]: ...
diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx
index 1fa10dcd376..da9b0a20134 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyx
+++ b/python/pylibcudf/pylibcudf/reduce.pyx
@@ -8,17 +8,25 @@ from pylibcudf.libcudf.aggregation cimport reduce_aggregation, scan_aggregation
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.reduce cimport scan_type
 from pylibcudf.libcudf.scalar.scalar cimport scalar
+from pylibcudf.libcudf.types cimport null_policy
+from rmm.pylibrmm.stream cimport Stream
 
 from .aggregation cimport Aggregation
 from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
+from .utils cimport _get_stream
 
 from pylibcudf.libcudf.reduce import scan_type as ScanType  # no-cython-lint
 
 __all__ = ["ScanType", "minmax", "reduce", "scan"]
 
-cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type):
+cpdef Scalar reduce(
+    Column col,
+    Aggregation agg,
+    DataType data_type,
+    Stream stream=None
+):
     """Perform a reduction on a column
 
     For details, see ``cudf::reduce`` documentation.
@@ -31,6 +39,8 @@ cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type):
         The aggregation to perform.
     data_type : DataType
         The data type of the result.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -39,16 +49,20 @@ cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type):
     """
     cdef unique_ptr[scalar] result
     cdef const reduce_aggregation *c_agg = agg.view_underlying_as_reduce()
+
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_reduce.cpp_reduce(
             col.view(),
             dereference(c_agg),
-            data_type.c_obj
+            data_type.c_obj,
+            stream.view()
         )
-    return Scalar.from_libcudf(move(result))
+    return Scalar.from_libcudf(move(result), stream)
 
 
-cpdef Column scan(Column col, Aggregation agg, scan_type inclusive):
+cpdef Column scan(Column col, Aggregation agg, scan_type inclusive, Stream stream=None):
     """Perform a scan on a column
 
     For details, see ``cudf::scan`` documentation.
@@ -61,6 +75,8 @@ cpdef Column scan(Column col, Aggregation agg, scan_type inclusive):
         The aggregation to perform.
     inclusive : scan_type
         The type of scan to perform.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -69,16 +85,21 @@ cpdef Column scan(Column col, Aggregation agg, scan_type inclusive):
     """
     cdef unique_ptr[column] result
     cdef const scan_aggregation *c_agg = agg.view_underlying_as_scan()
+
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_reduce.cpp_scan(
             col.view(),
             dereference(c_agg),
             inclusive,
+            null_policy.EXCLUDE,
+            stream.view(),
         )
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef tuple minmax(Column col):
+cpdef tuple minmax(Column col, Stream stream=None):
     """Compute the minimum and maximum of a column
 
     For details, see ``cudf::minmax`` documentation.
@@ -87,6 +108,8 @@ cpdef tuple minmax(Column col):
     ----------
     col : Column
         The column to compute the minimum and maximum of.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -95,12 +118,15 @@ cpdef tuple minmax(Column col):
         being the maximum.
     """
     cdef pair[unique_ptr[scalar], unique_ptr[scalar]] result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_reduce.cpp_minmax(col.view())
+        result = cpp_reduce.cpp_minmax(col.view(), stream.view())
 
     return (
-        Scalar.from_libcudf(move(result.first)),
-        Scalar.from_libcudf(move(result.second)),
+        Scalar.from_libcudf(move(result.first), stream),
+        Scalar.from_libcudf(move(result.second), stream),
     )
 
 ScanType.__str__ = ScanType.__repr__
diff --git a/python/pylibcudf/pylibcudf/search.pxd b/python/pylibcudf/pylibcudf/search.pxd
index 0faf18b108f..4499a7fe131 100644
--- a/python/pylibcudf/pylibcudf/search.pxd
+++ b/python/pylibcudf/pylibcudf/search.pxd
@@ -1,4 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -9,6 +11,7 @@ cpdef Column lower_bound(
     Table needles,
     list column_order,
     list null_precedence,
+    Stream stream = *
 )
 
 cpdef Column upper_bound(
@@ -16,6 +19,7 @@ cpdef Column upper_bound(
     Table needles,
     list column_order,
     list null_precedence,
+    Stream stream = *
 )
 
-cpdef Column contains(Column haystack, Column needles)
+cpdef Column contains(Column haystack, Column needles, Stream stream = *)
diff --git a/python/pylibcudf/pylibcudf/search.pyi b/python/pylibcudf/pylibcudf/search.pyi
index 7f292b129b2..3eec007a40c 100644
--- a/python/pylibcudf/pylibcudf/search.pyi
+++ b/python/pylibcudf/pylibcudf/search.pyi
@@ -1,5 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, Order
@@ -9,11 +11,15 @@ def lower_bound(
     needles: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Column: ...
 def upper_bound(
     haystack: Table,
     needles: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
+) -> Column: ...
+def contains(
+    haystack: Column, needles: Column, stream: Stream | None = None
 ) -> Column: ...
-def contains(haystack: Column, needles: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx
index 32a133213d8..e3263348048 100644
--- a/python/pylibcudf/pylibcudf/search.pyx
+++ b/python/pylibcudf/pylibcudf/search.pyx
@@ -6,9 +6,11 @@ from libcpp.vector cimport vector
 from pylibcudf.libcudf cimport search as cpp_search
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.types cimport null_order, order
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = ["contains", "lower_bound", "upper_bound"]
 
@@ -17,6 +19,7 @@ cpdef Column lower_bound(
     Table needles,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Find smallest indices in haystack where needles may be inserted to retain order.
 
@@ -32,6 +35,8 @@ cpdef Column lower_bound(
         Whether each column should be sorted in ascending or descending order.
     null_precedence : List[NullOrder]
         Whether nulls should come before or after non-nulls.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -41,14 +46,18 @@ cpdef Column lower_bound(
     cdef unique_ptr[column] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_search.lower_bound(
             haystack.view(),
             needles.view(),
             c_orders,
             c_null_precedence,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column upper_bound(
@@ -56,6 +65,7 @@ cpdef Column upper_bound(
     Table needles,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Find largest indices in haystack where needles may be inserted to retain order.
 
@@ -71,6 +81,8 @@ cpdef Column upper_bound(
         Whether each column should be sorted in ascending or descending order.
     null_precedence : List[NullOrder]
         Whether nulls should come before or after non-nulls.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -80,17 +92,21 @@ cpdef Column upper_bound(
     cdef unique_ptr[column] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_search.upper_bound(
             haystack.view(),
             needles.view(),
             c_orders,
             c_null_precedence,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column contains(Column haystack, Column needles):
+cpdef Column contains(Column haystack, Column needles, Stream stream=None):
     """Check whether needles are present in haystack.
 
     For details, see :cpp:func:`contains`.
@@ -101,6 +117,8 @@ cpdef Column contains(Column haystack, Column needles):
         The search space.
     needles : Column
         The values for which to search.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -108,9 +126,13 @@ cpdef Column contains(Column haystack, Column needles):
         Boolean indicator for each needle.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_search.contains(
             haystack.view(),
             needles.view(),
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)

From 7492fc345e8866090609aef1d37bd193379bb6bf Mon Sep 17 00:00:00 2001
From: Peixin <pxli@nyu.edu>
Date: Wed, 6 Aug 2025 12:45:52 +0800
Subject: [PATCH 068/366] Update spark-rapdis-jni action to use PR's base.ref
 and fix issue of ccache version in dockerfile (#19603)

follow-up of https://github.com/rapidsai/cudf/pull/19500

1. Provide env.sh to provide common place for build ENVs
2. Update ccache version to pass compilation in gcc14
3. Update spark-rapids-jni workflow to checkout based on cudf PR's base ref instead of repo default branch

```
++ export 'sclCMD=scl enable gcc-toolset-14'
++ sclCMD='scl enable gcc-toolset-14'
+ CMAKE_CUDA_ARCHITECTURES=90
+ LIBCUDF_DEPENDENCY_MODE=latest
+ USE_GDS=on
+ scl enable gcc-toolset-14 build/buildcpp.sh
```

Authors:
  - Peixin (https://github.com/pxLi)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19603
---
 .github/workflows/spark-rapids-jni.yaml |  3 ++-
 java/ci/Dockerfile.rocky                |  4 ++--
 java/ci/README.md                       |  3 ++-
 java/ci/env.sh                          | 19 +++++++++++++++++++
 4 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 java/ci/env.sh

diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml
index f317c7cf531..f4c168aff1a 100644
--- a/.github/workflows/spark-rapids-jni.yaml
+++ b/.github/workflows/spark-rapids-jni.yaml
@@ -13,10 +13,11 @@ jobs:
         with:
           repository: NVIDIA/spark-rapids-jni
           submodules: recursive
+          ref: ${{ github.event.pull_request.base.ref }}
       - uses: actions/checkout@v4
         with:
           path: thirdparty/cudf
       - name: "Build spark-rapids-jni"
         run: |
           mkdir target
-          CMAKE_CUDA_ARCHITECTURES=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-14 build/buildcpp.sh
+          source build/env.sh && CMAKE_CUDA_ARCHITECTURES=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on ${sclCMD} build/buildcpp.sh
diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky
index e7cf319663d..d6eff8125fa 100644
--- a/java/ci/Dockerfile.rocky
+++ b/java/ci/Dockerfile.rocky
@@ -27,13 +27,14 @@ ARG TARGETPLATFORM=linux/amd64
 # check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH)
 FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE
 ARG TOOLSET_VERSION=14
+ARG CMAKE_VERSION=3.30.7
+ARG CCACHE_VERSION=4.11.2
 ### Install basic requirements
 RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-11 gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
 RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids
 
 # 3.22.3+: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
-ARG CMAKE_VERSION=3.30.7
 # default x86_64 from x86 build, aarch64 cmake for arm build
 ARG CMAKE_ARCH=x86_64
 RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
@@ -42,7 +43,6 @@ RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/down
 ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH
 
 # ccache for interactive builds
-ARG CCACHE_VERSION=4.6
 RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \
    tar zxf ccache-${CCACHE_VERSION}.tar.gz && \
    rm ccache-${CCACHE_VERSION}.tar.gz && \
diff --git a/java/ci/README.md b/java/ci/README.md
index 22a2dd618f6..90112d838f3 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -42,7 +42,8 @@ git clone --recursive https://github.com/rapidsai/cudf.git -b branch-25.10
 ```bash
 cd cudf
 export WORKSPACE=`pwd`
-scl enable gcc-toolset-14 "java/ci/build-in-docker.sh"
+source java/ci/env.sh
+${sclCMD} "java/ci/build-in-docker.sh"
 ```
 
 ### The output
diff --git a/java/ci/env.sh b/java/ci/env.sh
new file mode 100644
index 00000000000..1772437205d
--- /dev/null
+++ b/java/ci/env.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -ex
+export sclCMD=${sclCMD:-"scl enable gcc-toolset-14"}

From 8f26dc4b707a598ae24a64c061ad011c6ecea799 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 6 Aug 2025 10:47:53 -0400
Subject: [PATCH 069/366] Pin Narwhals to 1.47 (#19358)

Use Narwhals version 1.47 in CI

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19358
---
 ci/test_narwhals.sh                           | 25 ++++++++++++++++---
 dependencies.yaml                             |  2 +-
 .../cudf/cudf/testing/narwhals_test_plugin.py |  2 ++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index 6af3be24b3b..a6ade73e5f3 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -32,6 +32,9 @@ python -c "import narwhals; print(narwhals.show_versions())"
 # test_rolling_std_expr_lazy_grouped: xpassing in Narwhals
 # test_rolling_sum_expr_lazy_grouped: xpassing in Narwhals
 # test_rolling_var_expr_lazy_grouped: xpassing in Narwhals
+# test_offset_by_tz: xpassing in Narwhals
+# test_double_same_aggregation: xpassing in Narwhals
+# test_all_kind_of_aggs: xpassing in Narwhals
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF="not test_rolling_mean_expr_lazy_grouped[cudf-expected_a4-3-1-True] \
 and not test_rolling_mean_expr_lazy_grouped[cudf-expected_a5-4-1-True] \
 and not test_rolling_mean_expr_lazy_grouped[cudf-expected_a6-5-1-True] \
@@ -44,7 +47,10 @@ and not test_rolling_sum_expr_lazy_grouped[cudf-expected_a6-5-1-True] \
 and not test_rolling_var_expr_lazy_grouped[cudf-expected_a4-3-1-True-1] \
 and not test_rolling_var_expr_lazy_grouped[cudf-expected_a5-4-1-True-1] \
 and not test_rolling_var_expr_lazy_grouped[cudf-expected_a6-5-1-True-0] \
-and not test_horizontal_slice_with_series"
+and not test_horizontal_slice_with_series \
+and not test_offset_by_tz \
+and not test_double_same_aggregation \
+and not test_all_kind_of_aggs"
 
 rapids-logger "Run narwhals tests for cuDF"
 PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 python -m pytest \
@@ -63,9 +69,11 @@ PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 python -m pytest \
 # columns aren't object anymore. The test expects object, causing a mismatch.
 # test_nan: Narwhals expect this test to fail, but as of polars 1.30 we raise a RuntimeError,
 # not polars ComputeError. So the test is looking for the wrong error and fails.
+# test_floordiv_int_by_zero: This bug is fixed as of 25.08, narwhals should remove the xfail
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF_POLARS=" \
 test_dtypes or \
-test_nan \
+test_nan or \
+test_floordiv_int_by_zero \
 "
 
 rapids-logger "Run narwhals tests for cuDF Polars"
@@ -92,12 +100,23 @@ rapids-logger "Run narwhals tests for cuDF Pandas"
 # test_maybe_convert_dtypes_pandas: https://github.com/rapidsai/cudf/issues/14149
 # test_log_dtype_pandas: cudf is promoting the type to float64
 # test_len_over_2369: It fails during fallback. The error is 'DataFrame' object has no attribute 'to_frame'
+# test_all_ignore_nulls, test_allh_kleene, and test_anyh_kleene: https://github.com/rapidsai/cudf/issues/19417
+# test_offset_by_date_pandas: https://github.com/rapidsai/cudf/issues/19418
+# test_select_boolean_cols and test_select_boolean_cols_multi_group_by: https://github.com/rapidsai/cudf/issues/19421
+# test_to_datetime_pd_preserves_pyarrow_backend_dtype: https://github.com/rapidsai/cudf/issues/19422
 TESTS_THAT_NEED_CUDF_FIX=" \
 test_is_finite_expr or \
 test_is_finite_series or \
 test_maybe_convert_dtypes_pandas or \
 test_log_dtype_pandas or \
-test_len_over_2369 \
+test_len_over_2369 or \
+test_all_ignore_nulls or \
+test_allh_kleene or \
+test_anyh_kleene or \
+test_offset_by_date_pandas or \
+test_select_boolean_cols or \
+test_select_boolean_cols_multi_group_by or \
+test_to_datetime_pd_preserves_pyarrow_backend_dtype \
 "
 
 # test_array_dunder_with_copy: https://github.com/rapidsai/cudf/issues/18248#issuecomment-2719234741
diff --git a/dependencies.yaml b/dependencies.yaml
index 7e53ff8a959..f214bde574e 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -1120,4 +1120,4 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - narwhals==1.41
+          - narwhals==1.47
diff --git a/python/cudf/cudf/testing/narwhals_test_plugin.py b/python/cudf/cudf/testing/narwhals_test_plugin.py
index 41f38999f6e..4355397dd78 100644
--- a/python/cudf/cudf/testing/narwhals_test_plugin.py
+++ b/python/cudf/cudf/testing/narwhals_test_plugin.py
@@ -13,6 +13,8 @@
 EXPECTED_FAILURES: Mapping[str, str] = {
     "tests/frame/select_test.py::test_select_duplicates[cudf]": "cuDF doesn't support having multiple columns with same names",
     "tests/expr_and_series/lit_test.py::test_date_lit[cudf]": "cuDF does not support pa.date32()",
+    "tests/frame/group_by_test.py::test_group_by_no_preserve_dtype[cudf-time]": "multiple dtype in the same column",
+    "tests/frame/group_by_test.py::test_group_by_no_preserve_dtype[cudf-bytes]": "cuDF doesn't support arrow TIME32",
 }
 
 
From 8bbed010a038dcce8d41fdb0c30eb3c33016d152 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Wed, 6 Aug 2025 12:32:50 -0500
Subject: [PATCH 070/366] Compatibility with rapidsmpf 25.10.0 (#19591)

https://github.com/rapidsai/rapidsmpf/pull/404 contained changes to the insert / extract interface that we use in cudf-polars. This updates our usage to be compatible with the new interface, which requires provding a `BufferResource` (obtained from the worker context) rather than a `MemoryResource`.

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)
  - Mads R. B. Kristensen (https://github.com/madsbk)

URL: https://github.com/rapidsai/cudf/pull/19591
---
 .../cudf_polars/experimental/shuffle.py       | 33 ++++++++++++++++---
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py
index c135261ed32..b6ecfc35fa8 100644
--- a/python/cudf_polars/cudf_polars/experimental/shuffle.py
+++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py
@@ -8,7 +8,6 @@
 from typing import TYPE_CHECKING, Any, TypedDict
 
 import pylibcudf as plc
-import rmm.mr
 from rmm.pylibrmm.stream import DEFAULT_STREAM
 
 from cudf_polars.containers import DataFrame
@@ -58,6 +57,14 @@ def insert_partition(
     ) -> None:
         """Add cudf-polars DataFrame chunks to an RMP shuffler."""
         from rapidsmpf.integrations.cudf.partition import partition_and_pack
+        from rapidsmpf.integrations.dask.core import get_worker_context
+
+        context = get_worker_context()
+
+        if context.br is None:  # pragma: no cover
+            raise ValueError(
+                "rapidsmpf insert_partition called on an uninitialized worker."
+            )
 
         on = options["on"]
         assert not other, f"Unexpected arguments: {other}"
@@ -66,8 +73,8 @@ def insert_partition(
             df.table,
             columns_to_hash=columns_to_hash,
             num_partitions=partition_count,
+            br=context.br,
             stream=DEFAULT_STREAM,
-            device_mr=rmm.mr.get_current_device_resource(),
         )
         shuffler.insert_chunks(packed_inputs)
 
@@ -79,16 +86,32 @@ def extract_partition(
         options: ShuffleOptions,
     ) -> DataFrame:
         """Extract a finished partition from the RMP shuffler."""
-        from rapidsmpf.integrations.cudf.partition import unpack_and_concat
+        from rapidsmpf.integrations.cudf.partition import (
+            unpack_and_concat,
+            unspill_partitions,
+        )
+        from rapidsmpf.integrations.dask.core import get_worker_context
+
+        context = get_worker_context()
+        if context.br is None:  # pragma: no cover
+            raise ValueError(
+                "rapidsmpf extract_partition called on an uninitialized worker."
+            )
 
         shuffler.wait_on(partition_id)
         column_names = options["column_names"]
         dtypes = options["dtypes"]
         return DataFrame.from_table(
             unpack_and_concat(
-                shuffler.extract(partition_id),
+                unspill_partitions(
+                    shuffler.extract(partition_id),
+                    br=context.br,
+                    stream=DEFAULT_STREAM,
+                    allow_overbooking=True,
+                    statistics=context.statistics,
+                ),
+                br=context.br,
                 stream=DEFAULT_STREAM,
-                device_mr=rmm.mr.get_current_device_resource(),
             ),
             column_names,
             dtypes,

From aaaa0aa053baf95c7cdce8e6c2190c28752d1186 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 6 Aug 2025 11:01:20 -0700
Subject: [PATCH 071/366] Move some test_multiindex.py to new cudf classic test
 directory structure (#19496)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Other tests in this file will be moved once the `dataframe`/`series` directory structures have been moved in https://github.com/rapidsai/cudf/pull/19485 and https://github.com/rapidsai/cudf/pull/19490

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19496
---
 .../tests/dataframe/methods/test_transpose.py |   16 +
 .../indexes/multiindex/methods/__init__.py    |    0
 .../multiindex/methods/test_argsort.py        |   53 +
 .../indexes/multiindex/methods/test_copy.py   |  128 ++
 .../multiindex/methods/test_droplevel.py      |  129 ++
 .../indexes/multiindex/methods/test_fillna.py |   54 +
 .../indexes/multiindex/methods/test_numpy.py  |   16 +
 .../multiindex/methods/test_nunique.py        |   18 +
 .../indexes/multiindex/methods/test_rename.py |   51 +
 .../multiindex/methods/test_set_methods.py    |  140 ++
 .../multiindex/methods/test_set_names.py      |  102 ++
 .../multiindex/methods/test_sort_values.py    |   52 +
 .../multiindex/methods/test_swaplevel.py      |   29 +
 .../indexes/multiindex/methods/test_take.py   |   41 +
 .../multiindex/methods/test_to_frame.py       |  103 ++
 .../multiindex/methods/test_to_pandas.py      |   41 +
 .../multiindex/methods/test_to_series.py      |   11 +
 .../indexes/multiindex/methods/test_unique.py |   16 +
 .../indexes/multiindex/test_attributes.py     |  214 +++
 .../tests/indexes/multiindex/test_binaryop.py |   14 +
 .../indexes/multiindex/test_constructing.py   |    1 -
 .../indexes/multiindex/test_constructors.py   |  183 +++
 .../tests/indexes/multiindex/test_getitem.py  |   13 +
 .../indexes/multiindex/test_properties.py     |    1 -
 .../indexes/multiindex/test_selecting.py      |    1 -
 python/cudf/cudf/tests/test_multiindex.py     | 1331 +----------------
 26 files changed, 1425 insertions(+), 1333 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_transpose.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_argsort.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_droplevel.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_fillna.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_numpy.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_nunique.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_rename.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_set_methods.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_set_names.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_sort_values.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_swaplevel.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_take.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_to_frame.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_to_pandas.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_to_series.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_unique.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_binaryop.py
 delete mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_constructing.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_getitem.py
 delete mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_properties.py
 delete mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_selecting.py

diff --git a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
new file mode 100644
index 00000000000..bb64e6d4896
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_transpose():
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6]},
+        index=pd.MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6)]),
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf.transpose(), gdf.transpose())
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/__init__.py b/python/cudf/cudf/tests/indexes/multiindex/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_argsort.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_argsort.py
new file mode 100644
index 00000000000..675c7cb69e9
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_argsort.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "pdi",
+    [
+        pd.MultiIndex(
+            levels=[[1, 3.0, 4, 5], [1, 2.3, 5]],
+            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+            names=["x", "y"],
+        ),
+        pd.MultiIndex(
+            levels=[[1, 3, 4, -10], [1, 11, 5]],
+            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+            names=["x", "y"],
+        ),
+        pd.MultiIndex(
+            levels=[["a", "b", "c", "100"], ["1", "100", "5"]],
+            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+            names=["x", "y"],
+        ),
+        pytest.param(
+            pd.MultiIndex(
+                levels=[[None, "b", "c", "a"], ["1", None, "5"]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+            marks=[
+                pytest.mark.xfail(
+                    reason="https://github.com/pandas-dev/pandas/issues/35584"
+                )
+            ],
+        ),
+    ],
+)
+@pytest.mark.parametrize("ascending", [True, False])
+def test_multiindex_argsort(pdi, ascending):
+    gdi = cudf.from_pandas(pdi)
+
+    if not ascending:
+        expected = pdi.argsort()[::-1]
+    else:
+        expected = pdi.argsort()
+
+    actual = gdi.argsort(ascending=ascending)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py
new file mode 100644
index 00000000000..1bae3b8292c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import operator
+from functools import reduce
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_copy_sem():
+    gmi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019]],
+    )
+    pmi = cudf.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019]],
+    )
+    names = ["x", "y"]
+    gmi_copy = gmi.copy(names=names)
+    pmi_copy = pmi.copy(names=names)
+    assert_eq(gmi_copy, pmi_copy)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "Date": [
+                "2020-08-27",
+                "2020-08-28",
+                "2020-08-31",
+                "2020-08-27",
+                "2020-08-28",
+                "2020-08-31",
+                "2020-08-27",
+                "2020-08-28",
+                "2020-08-31",
+            ],
+            "Close": [
+                3400.00,
+                3401.80,
+                3450.96,
+                226.58,
+                228.91,
+                225.53,
+                505.13,
+                525.91,
+                534.98,
+            ],
+            "Symbol": [
+                "AMZN",
+                "AMZN",
+                "AMZN",
+                "MSFT",
+                "MSFT",
+                "MSFT",
+                "NVDA",
+                "NVDA",
+                "NVDA",
+            ],
+        },
+        pd.MultiIndex(
+            levels=[[1001, 1002], [2001, 2002]],
+            codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
+            names=["col1", "col2"],
+        ),
+    ],
+)
+@pytest.mark.parametrize("copy_on_write", [True, False])
+@pytest.mark.parametrize("deep", [True, False])
+def test_multiindex_copy_deep(data, copy_on_write, deep):
+    """Test memory identity for deep copy
+    Case1: Constructed from GroupBy, StringColumns
+    Case2: Constructed from MultiIndex, NumericColumns
+    """
+    with cudf.option_context("copy_on_write", copy_on_write):
+        if isinstance(data, dict):
+            gdf = cudf.DataFrame(data)
+            mi1 = gdf.groupby(["Date", "Symbol"]).mean().index
+            mi2 = mi1.copy(deep=deep)
+
+            lchildren = [col.children for col in mi1._columns]
+            rchildren = [col.children for col in mi2._columns]
+
+            # Flatten
+            lchildren = reduce(operator.add, lchildren)
+            rchildren = reduce(operator.add, rchildren)
+
+            lptrs = [
+                child.base_data.get_ptr(mode="read") for child in lchildren
+            ]
+            rptrs = [
+                child.base_data.get_ptr(mode="read") for child in rchildren
+            ]
+
+            assert all((x == y) for x, y in zip(lptrs, rptrs))
+
+        elif isinstance(data, pd.MultiIndex):
+            data = cudf.MultiIndex.from_pandas(data)
+            same_ref = (not deep) or (
+                cudf.get_option("copy_on_write") and not deep
+            )
+            mi1 = data
+            mi2 = mi1.copy(deep=deep)
+
+            # Assert ._levels identity
+            lptrs = [
+                lv._column.base_data.get_ptr(mode="read") for lv in mi1._levels
+            ]
+            rptrs = [
+                lv._column.base_data.get_ptr(mode="read") for lv in mi2._levels
+            ]
+
+            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+
+            # Assert ._codes identity
+            lptrs = [c.base_data.get_ptr(mode="read") for c in mi1._codes]
+            rptrs = [c.base_data.get_ptr(mode="read") for c in mi2._codes]
+
+            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+
+            # Assert ._data identity
+            lptrs = [d.base_data.get_ptr(mode="read") for d in mi1._columns]
+            rptrs = [d.base_data.get_ptr(mode="read") for d in mi2._columns]
+
+            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_droplevel.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_droplevel.py
new file mode 100644
index 00000000000..37048f0633e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_droplevel.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import itertools
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "level",
+    [
+        [],
+        "alpha",
+        "location",
+        "weather",
+        0,
+        1,
+        [0, 1],
+        -1,
+        [-1, -2],
+        [-1, "weather"],
+    ],
+)
+def test_multiindex_droplevel_simple(level):
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
+
+
+@pytest.mark.parametrize(
+    "level",
+    itertools.chain(
+        *(
+            itertools.combinations(
+                ("alpha", "location", "weather", "sign", "timestamp"), r
+            )
+            for r in range(5)
+        )
+    ),
+)
+def test_multiindex_droplevel_name(level):
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    level = list(level)
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
+
+
+@pytest.mark.parametrize(
+    "level",
+    itertools.chain(*(itertools.combinations(range(5), r) for r in range(5))),
+)
+def test_multiindex_droplevel_index(level):
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    level = list(level)
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
+
+
+def test_multiindex_droplevel_single_level_none_names():
+    data = [(1, 2), (3, 4)]
+    pidx = pd.MultiIndex.from_tuples(data, names=[None, None])
+    gidx = cudf.MultiIndex.from_tuples(data, names=[None, None])
+    result = gidx.droplevel(0)
+    expected = pidx.droplevel(0)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_fillna.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_fillna.py
new file mode 100644
index 00000000000..c3d23e68008
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_fillna.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "gdi, fill_value, expected",
+    [
+        (
+            lambda: cudf.MultiIndex(
+                levels=[[1, 3, 4, None], [1, 2, 5]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+            5,
+            lambda: cudf.MultiIndex(
+                levels=[[1, 3, 4, 5], [1, 2, 5]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+        ),
+        (
+            lambda: cudf.MultiIndex(
+                levels=[[1, 3, 4, None], [1, None, 5]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+            100,
+            lambda: cudf.MultiIndex(
+                levels=[[1, 3, 4, 100], [1, 100, 5]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+        ),
+        (
+            lambda: cudf.MultiIndex(
+                levels=[["a", "b", "c", None], ["1", None, "5"]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+            "100",
+            lambda: cudf.MultiIndex(
+                levels=[["a", "b", "c", "100"], ["1", "100", "5"]],
+                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+                names=["x", "y"],
+            ),
+        ),
+    ],
+)
+def test_multiindex_fillna(gdi, fill_value, expected):
+    assert_eq(expected(), gdi().fillna(fill_value))
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_numpy.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_numpy.py
new file mode 100644
index 00000000000..e3b19b8e7d8
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_numpy.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_to_numpy():
+    midx = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    pmidx = midx.to_pandas()
+
+    assert_eq(midx.to_numpy(), pmidx.to_numpy())
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_nunique.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_nunique.py
new file mode 100644
index 00000000000..266e799267f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_nunique.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("array", [[1, 2], [1, None], [None, None]])
+@pytest.mark.parametrize("dropna", [True, False])
+def test_nunique(array, dropna):
+    arrays = [array, [3, 4]]
+    gidx = cudf.MultiIndex.from_arrays(arrays)
+    pidx = pd.MultiIndex.from_arrays(arrays)
+    result = gidx.nunique(dropna=dropna)
+    expected = pidx.nunique(dropna=dropna)
+    assert result == expected
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_rename.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_rename.py
new file mode 100644
index 00000000000..c0e4f9252f0
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_rename.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("name", [None, "old name"])
+@pytest.mark.parametrize(
+    "names",
+    [
+        [None, None],
+        ["a", None],
+        ["new name", "another name"],
+        [1, None],
+        [2, 3],
+        [42, "name"],
+    ],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_multiindex_rename(name, names, inplace):
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019]], names=[name, None]
+    )
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.rename(names=names, inplace=inplace)
+    actual = gi.rename(names=names, inplace=inplace)
+
+    if inplace:
+        expected, actual = pi, gi
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "names", ["plain string", 123, ["str"], ["l1", "l2", "l3"]]
+)
+def test_multiindex_rename_error(names):
+    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
+    gi = cudf.from_pandas(pi)
+
+    assert_exceptions_equal(
+        lfunc=pi.rename,
+        rfunc=gi.rename,
+        lfunc_args_and_kwargs=([], {"names": names}),
+        rfunc_args_and_kwargs=([], {"names": names}),
+    )
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_set_methods.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_set_methods.py
new file mode 100644
index 00000000000..51731e425b4
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_set_methods.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_multiindex_union_error():
+    midx = cudf.MultiIndex.from_tuples([(10, 12), (8, 9), (3, 4)])
+    pidx = midx.to_pandas()
+
+    assert_exceptions_equal(
+        midx.union,
+        pidx.union,
+        lfunc_args_and_kwargs=(["a"],),
+        rfunc_args_and_kwargs=(["b"],),
+    )
+
+
+@pytest.mark.parametrize(
+    "idx1, idx2",
+    [
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
+            ),
+            pd.MultiIndex.from_arrays(
+                [[1, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], ["Red", "Blue", "Red", "Blue"]],
+                names=["a", "b"],
+            ),
+            pd.MultiIndex.from_arrays(
+                [[3, 3, 2, 4], ["Red", "Green", "Red", "Green"]],
+                names=["x", "y"],
+            ),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+                names=["a", "b", "c"],
+            ),
+            pd.MultiIndex.from_arrays(
+                [[3, 3, 2, 4], [0.2, 0.4, 1.4, 10], [3, 3, 2, 4]]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+                names=["a", "b", "c"],
+            ),
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("sort", [None, False])
+def test_intersection_mulitIndex(idx1, idx2, sort):
+    expected = idx1.intersection(idx2, sort=sort)
+
+    idx1 = cudf.from_pandas(idx1)
+    idx2 = cudf.from_pandas(idx2)
+
+    actual = idx1.intersection(idx2, sort=sort)
+    assert_eq(expected, actual, exact=False)
+
+
+def test_difference():
+    midx = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    midx2 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3, 3], [0, 2, 1, 1, 0, 2]],
+        names=["x", "y"],
+    )
+
+    expected = midx2.to_pandas().difference(midx.to_pandas())
+    actual = midx2.difference(midx)
+    assert isinstance(actual, cudf.MultiIndex)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "idx1, idx2",
+    [
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
+            ),
+            pd.MultiIndex.from_arrays(
+                [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], ["Red", "Blue", "Red", "Blue"]],
+                names=["a", "b"],
+            ),
+            pd.MultiIndex.from_arrays(
+                [[3, 3, 2, 4], ["Red", "Green", "Red", "Green"]],
+                names=["x", "y"],
+            ),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+                names=["a", "b", "c"],
+            ),
+            pd.MultiIndex.from_arrays(
+                [[3, 3, 2, 4], [0.2, 0.4, 1.4, 10], [3, 3, 2, 4]]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+                names=["a", "b", "c"],
+            ),
+            [(2, 6, 12)],
+        ),
+    ],
+)
+@pytest.mark.parametrize("sort", [None, False])
+def test_union_mulitIndex(idx1, idx2, sort):
+    expected = idx1.union(idx2, sort=sort)
+
+    idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.MultiIndex) else idx1
+    idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.MultiIndex) else idx2
+
+    actual = idx1.union(idx2, sort=sort)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_set_names.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_set_names.py
new file mode 100644
index 00000000000..3b99adc346a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_set_names.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "names", [[None, None], ["a", None], ["new name", "another name"]]
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_multiindex_set_names(names, inplace):
+    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.set_names(names=names, inplace=inplace)
+    actual = gi.set_names(names=names, inplace=inplace)
+
+    if inplace:
+        expected, actual = pi, gi
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("idx_names", [[None, None, None], [1, 0, 2]])
+@pytest.mark.parametrize(
+    "level, names",
+    [
+        (0, "abc"),
+        (1, "xyz"),
+        ([2, 1], ["a", "b"]),
+        ([0, 1], ["aa", "bb"]),
+        (None, ["a", "b", "c"]),
+        (None, ["a", None, "c"]),
+    ],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_multiindex_set_names_default_and_int_names(
+    idx_names, level, names, inplace
+):
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]], names=idx_names
+    )
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.set_names(names=names, level=level, inplace=inplace)
+    actual = gi.set_names(names=names, level=level, inplace=inplace)
+
+    if inplace:
+        expected, actual = pi, gi
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "level, names",
+    [
+        ([None], "abc"),
+        (["three", "one"], ["a", "b"]),
+        (["three", 1], ["a", "b"]),
+        ([0, "three", 1], ["a", "b", "z"]),
+        (["one", 1, "three"], ["a", "b", "z"]),
+        (["one", None, "three"], ["a", "b", "z"]),
+        ([2, 1], ["a", "b"]),
+        (1, "xyz"),
+    ],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_multiindex_set_names_string_names(level, names, inplace):
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]],
+        names=["one", None, "three"],
+    )
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.set_names(names=names, level=level, inplace=inplace)
+    actual = gi.set_names(names=names, level=level, inplace=inplace)
+
+    if inplace:
+        expected, actual = pi, gi
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "level, names", [(1, ["a"]), (None, "a"), ([1, 2], ["a"]), (None, ["a"])]
+)
+def test_multiindex_set_names_error(level, names):
+    pi = pd.MultiIndex.from_product(
+        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
+    )
+    gi = cudf.from_pandas(pi)
+
+    assert_exceptions_equal(
+        lfunc=pi.set_names,
+        rfunc=gi.set_names,
+        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
+        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
+    )
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_sort_values.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_sort_values.py
new file mode 100644
index 00000000000..32e19c04a2a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_sort_values.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("return_indexer", [True, False])
+@pytest.mark.parametrize(
+    "pmidx",
+    [
+        pd.MultiIndex(
+            levels=[[1, 3, 4, 5], [1, 2, 5]],
+            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+            names=["x", "y"],
+        ),
+        pd.MultiIndex.from_product(
+            [["bar", "baz", "foo", "qux"], ["one", "two"]],
+            names=["first", "second"],
+        ),
+        pd.MultiIndex(
+            levels=[[], [], []],
+            codes=[[], [], []],
+            names=["one", "two", "three"],
+        ),
+        pd.MultiIndex.from_tuples([(1, 2), (3, 4)]),
+    ],
+)
+def test_multiindex_sort_values(pmidx, ascending, return_indexer):
+    pmidx = pmidx
+    midx = cudf.from_pandas(pmidx)
+
+    expected = pmidx.sort_values(
+        ascending=ascending, return_indexer=return_indexer
+    )
+    actual = midx.sort_values(
+        ascending=ascending, return_indexer=return_indexer
+    )
+
+    if return_indexer:
+        expected_indexer = expected[1]
+        actual_indexer = actual[1]
+
+        assert_eq(expected_indexer, actual_indexer)
+
+        expected = expected[0]
+        actual = actual[0]
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_swaplevel.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_swaplevel.py
new file mode 100644
index 00000000000..4ef52a24628
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_swaplevel.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_swaplevel():
+    midx = cudf.MultiIndex(
+        levels=[
+            ["lama", "cow", "falcon"],
+            ["speed", "weight", "length"],
+            ["first", "second"],
+        ],
+        codes=[
+            [0, 0, 0, 1, 1, 1, 2, 2, 2],
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [0, 0, 0, 0, 0, 0, 1, 1, 1],
+        ],
+        names=["Col1", "Col2", "Col3"],
+    )
+    pd_midx = midx.to_pandas()
+
+    assert_eq(pd_midx.swaplevel(-1, -2), midx.swaplevel(-1, -2))
+    assert_eq(pd_midx.swaplevel(2, 1), midx.swaplevel(2, 1))
+    assert_eq(midx.swaplevel(2, 1), midx.swaplevel(1, 2))
+    assert_eq(pd_midx.swaplevel(0, 2), midx.swaplevel(0, 2))
+    assert_eq(pd_midx.swaplevel(2, 0), midx.swaplevel(2, 0))
+    assert_eq(midx.swaplevel(1, 1), midx.swaplevel(1, 1))
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_take.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_take.py
new file mode 100644
index 00000000000..e8e06d84b70
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_take.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_take():
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex.take([0]), gdfIndex.take([0]))
+    assert_eq(pdfIndex.take(np.array([0])), gdfIndex.take(np.array([0])))
+    assert_eq(pdfIndex.take(pd.Series([0])), gdfIndex.take(cudf.Series([0])))
+    assert_eq(pdfIndex.take([0, 1]), gdfIndex.take([0, 1]))
+    assert_eq(pdfIndex.take(np.array([0, 1])), gdfIndex.take(np.array([0, 1])))
+    assert_eq(
+        pdfIndex.take(pd.Series([0, 1])), gdfIndex.take(cudf.Series([0, 1]))
+    )
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_frame.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_frame.py
new file mode 100644
index 00000000000..716962bb242
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_frame.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.api.extensions import no_default
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("codes", [[0, 1, 2], [-1, 0, 1]])
+def test_multiindex_to_frame(codes):
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+        ],
+        [
+            codes,
+        ],
+    )
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex.to_frame(), gdfIndex.to_frame())
+
+
+@pytest.mark.parametrize(
+    "pidx",
+    [
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+            names=["a", "b", "c"],
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+            names=["a", "a", "a"],
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "name", [None, no_default, ["x", "y", "z"], ["rapids", "rapids", "rapids"]]
+)
+@pytest.mark.parametrize("allow_duplicates", [True, False])
+@pytest.mark.parametrize("index", [True, False])
+def test_multiindex_to_frame_allow_duplicates(
+    pidx, name, allow_duplicates, index
+):
+    gidx = cudf.from_pandas(pidx)
+
+    if name is None or (
+        (
+            len(pidx.names) != len(set(pidx.names))
+            and not all(x is None for x in pidx.names)
+        )
+        and not allow_duplicates
+        and name is no_default
+    ):
+        assert_exceptions_equal(
+            pidx.to_frame,
+            gidx.to_frame,
+            lfunc_args_and_kwargs=(
+                [],
+                {
+                    "index": index,
+                    "name": name,
+                    "allow_duplicates": allow_duplicates,
+                },
+            ),
+            rfunc_args_and_kwargs=(
+                [],
+                {
+                    "index": index,
+                    "name": name,
+                    "allow_duplicates": allow_duplicates,
+                },
+            ),
+        )
+    else:
+        if (
+            len(pidx.names) != len(set(pidx.names))
+            and not all(x is None for x in pidx.names)
+            and not isinstance(name, list)
+        ) or (isinstance(name, list) and len(name) != len(set(name))):
+            # cudf doesn't have the ability to construct dataframes
+            # with duplicate column names
+            with pytest.raises(ValueError):
+                gidx.to_frame(
+                    index=index,
+                    name=name,
+                    allow_duplicates=allow_duplicates,
+                )
+        else:
+            expected = pidx.to_frame(
+                index=index, name=name, allow_duplicates=allow_duplicates
+            )
+            actual = gidx.to_frame(
+                index=index, name=name, allow_duplicates=allow_duplicates
+            )
+
+            assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_pandas.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_pandas.py
new file mode 100644
index 00000000000..0c91c8e46ba
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_pandas.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import datetime
+
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+        pd.Interval(1, 2),
+    ],
+)
+def test_index_to_pandas_arrow_type_nullable_raises(scalar):
+    pa_array = [scalar, None]
+    midx = cudf.MultiIndex(levels=[pa_array], codes=[[0]])
+    with pytest.raises(ValueError):
+        midx.to_pandas(nullable=True, arrow_type=True)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [1, 1.0, "a", datetime.datetime(2020, 1, 1), datetime.timedelta(1)],
+)
+def test_index_to_pandas_arrow_type(scalar):
+    pa_array = pa.array([scalar, None])
+    midx = cudf.MultiIndex(levels=[pa_array], codes=[[0]])
+    result = midx.to_pandas(arrow_type=True)
+    expected = pd.MultiIndex(
+        levels=[pd.arrays.ArrowExtensionArray(pa_array)], codes=[[0]]
+    )
+    pd.testing.assert_index_equal(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_series.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_series.py
new file mode 100644
index 00000000000..c017d73ef83
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_series.py
@@ -0,0 +1,11 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+
+
+def test_multiindex_to_series_error():
+    midx = cudf.MultiIndex.from_tuples([("a", "b")])
+    with pytest.raises(NotImplementedError):
+        midx.to_series()
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_unique.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_unique.py
new file mode 100644
index 00000000000..184d175c3b4
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_unique.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_unique_level():
+    pd_mi = pd.MultiIndex.from_arrays([[1, 1, 2], [3, 3, 2]])
+    cudf_mi = cudf.MultiIndex.from_pandas(pd_mi)
+
+    result = pd_mi.unique(level=1)
+    expected = cudf_mi.unique(level=1)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py b/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
new file mode 100644
index 00000000000..d26e7216a73
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
@@ -0,0 +1,214 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.fixture(
+    params=[
+        "from_product",
+        "from_tuples",
+        "from_arrays",
+        "init",
+    ]
+)
+def midx(request):
+    if request.param == "from_product":
+        return cudf.MultiIndex.from_product([[0, 1], [1, 0]])
+    elif request.param == "from_tuples":
+        return cudf.MultiIndex.from_tuples([(0, 1), (0, 0), (1, 1), (1, 0)])
+    elif request.param == "from_arrays":
+        return cudf.MultiIndex.from_arrays([[0, 0, 1, 1], [1, 0, 1, 0]])
+    elif request.param == "init":
+        return cudf.MultiIndex(
+            levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [1, 0, 1, 0]]
+        )
+    else:
+        raise NotImplementedError(f"{request.param} not implemented")
+
+
+def test_multindex_constructor_levels_always_indexes(midx):
+    assert_eq(midx.levels[0], cudf.Index([0, 1]))
+    assert_eq(midx.levels[1], cudf.Index([0, 1]))
+
+
+def test_bool_raises():
+    assert_exceptions_equal(
+        lfunc=bool,
+        rfunc=bool,
+        lfunc_args_and_kwargs=[[cudf.MultiIndex.from_arrays([range(1)])]],
+        rfunc_args_and_kwargs=[[pd.MultiIndex.from_arrays([range(1)])]],
+    )
+
+
+def test_multi_index_contains_hashable():
+    gidx = cudf.MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3]))
+    pidx = gidx.to_pandas()
+
+    assert_exceptions_equal(
+        lambda: [] in gidx,
+        lambda: [] in pidx,
+        lfunc_args_and_kwargs=((),),
+        rfunc_args_and_kwargs=((),),
+    )
+
+
+def test_multiindex_codes():
+    midx = cudf.MultiIndex.from_tuples(
+        [("a", "b"), ("a", "c"), ("b", "c")], names=["A", "Z"]
+    )
+
+    for p_array, g_array in zip(midx.to_pandas().codes, midx.codes):
+        assert_eq(p_array, g_array)
+
+
+def test_multiindex_values_pandas_compatible():
+    midx = cudf.MultiIndex.from_tuples([(10, 12), (8, 9), (3, 4)])
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            midx.values
+
+
+@pytest.mark.parametrize("bad", ["foo", ["foo"]])
+def test_multiindex_set_names_validation(bad):
+    mi = cudf.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0), (1, 1)])
+    with pytest.raises(ValueError):
+        mi.names = bad
+
+
+def test_multiindex_levels():
+    gidx = cudf.MultiIndex.from_product(
+        [range(3), ["one", "two"]], names=["first", "second"]
+    )
+    pidx = gidx.to_pandas()
+
+    assert_eq(gidx.levels[0], pidx.levels[0])
+    assert_eq(gidx.levels[1], pidx.levels[1])
+
+
+@pytest.mark.parametrize(
+    "pidx",
+    [
+        pd.MultiIndex.from_arrays(
+            [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
+            names=["a", "b", "c"],
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1.0, 2, 3, 4], [5, 6, 7.8, 10], [11, 12, 12, 13]],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        "is_numeric",
+        "is_boolean",
+        "is_integer",
+        "is_floating",
+        "is_object",
+        "is_categorical",
+        "is_interval",
+    ],
+)
+def test_multiindex_type_methods(pidx, func):
+    gidx = cudf.from_pandas(pidx)
+
+    with pytest.warns(FutureWarning):
+        expected = getattr(pidx, func)()
+
+    with pytest.warns(FutureWarning):
+        actual = getattr(gidx, func)()
+
+    if func == "is_object":
+        assert_eq(False, actual)
+    else:
+        assert_eq(expected, actual)
+
+
+def test_multiindex_iter_error():
+    midx = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"{midx.__class__.__name__} object is not iterable. "
+            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
+            f"if you wish to iterate over the values."
+        ),
+    ):
+        iter(midx)
+
+
+def test_multiindex_values():
+    midx = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+
+    result = midx.values
+
+    assert isinstance(result, cp.ndarray)
+    np.testing.assert_array_equal(
+        result.get(), np.array([[1, 1], [1, 5], [3, 2], [4, 2], [5, 1]])
+    )
+
+
+def test_multiindex_values_host():
+    midx = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    pmidx = midx.to_pandas()
+
+    assert_eq(midx.values_host, pmidx.values)
+
+
+@pytest.mark.parametrize(
+    "pdi",
+    [
+        pd.MultiIndex(
+            levels=[[], [], []],
+            codes=[[], [], []],
+            names=["one", "two", "three"],
+        ),
+        pd.MultiIndex.from_tuples([(1, 2), (3, 4)]),
+    ],
+)
+def test_multiindex_empty(pdi):
+    gdi = cudf.from_pandas(pdi)
+
+    assert_eq(pdi.empty, gdi.empty)
+
+
+@pytest.mark.parametrize(
+    "pdi",
+    [
+        pd.MultiIndex(
+            levels=[[], [], []],
+            codes=[[], [], []],
+            names=["one", "two", "three"],
+        ),
+        pd.MultiIndex.from_tuples([(1, 2), (3, 4)]),
+    ],
+)
+def test_multiindex_size(pdi):
+    gdi = cudf.from_pandas(pdi)
+
+    assert_eq(pdi.size, gdi.size)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_binaryop.py b/python/cudf/cudf/tests/indexes/multiindex/test_binaryop.py
new file mode 100644
index 00000000000..973e0feff71
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_binaryop.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_eq_other_multiindex():
+    idx = cudf.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0), (1, 1)])
+    result = idx == idx
+    expected = np.array([True, True])
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_constructing.py b/python/cudf/cudf/tests/indexes/multiindex/test_constructing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/multiindex/test_constructing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py b/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
new file mode 100644
index 00000000000..3db599f49e5
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_multiindex_levels_codes_validation():
+    levels = [["a", "b"], ["c", "d"]]
+
+    # Codes not a sequence of sequences
+    assert_exceptions_equal(
+        lfunc=pd.MultiIndex,
+        rfunc=cudf.MultiIndex,
+        lfunc_args_and_kwargs=([levels, [0, 1]],),
+        rfunc_args_and_kwargs=([levels, [0, 1]],),
+    )
+
+    # Codes don't match levels
+    assert_exceptions_equal(
+        lfunc=pd.MultiIndex,
+        rfunc=cudf.MultiIndex,
+        lfunc_args_and_kwargs=([levels, [[0], [1], [1]]],),
+        rfunc_args_and_kwargs=([levels, [[0], [1], [1]]],),
+    )
+
+    # Largest code greater than number of levels
+    assert_exceptions_equal(
+        lfunc=pd.MultiIndex,
+        rfunc=cudf.MultiIndex,
+        lfunc_args_and_kwargs=([levels, [[0, 1], [0, 2]]],),
+        rfunc_args_and_kwargs=([levels, [[0, 1], [0, 2]]],),
+    )
+
+    # Unequal code lengths
+    assert_exceptions_equal(
+        lfunc=pd.MultiIndex,
+        rfunc=cudf.MultiIndex,
+        lfunc_args_and_kwargs=([levels, [[0, 1], [0]]],),
+        rfunc_args_and_kwargs=([levels, [[0, 1], [0]]],),
+    )
+    # Didn't pass levels and codes
+    assert_exceptions_equal(lfunc=pd.MultiIndex, rfunc=cudf.MultiIndex)
+
+    # Didn't pass non zero levels and codes
+    assert_exceptions_equal(
+        lfunc=pd.MultiIndex,
+        rfunc=cudf.MultiIndex,
+        lfunc_args_and_kwargs=([[], []],),
+        rfunc_args_and_kwargs=([[], []],),
+    )
+
+
+def test_multiindex_construction():
+    levels = [["a", "b"], ["c", "d"]]
+    codes = [[0, 1], [1, 0]]
+    pmi = pd.MultiIndex(levels, codes)
+    mi = cudf.MultiIndex(levels, codes)
+    assert_eq(pmi, mi)
+    pmi = pd.MultiIndex(levels, codes)
+    mi = cudf.MultiIndex(levels=levels, codes=codes)
+    assert_eq(pmi, mi)
+
+
+def test_multiindex_types():
+    codes = [[0, 1], [1, 0]]
+    levels = [[0, 1], [2, 3]]
+    pmi = pd.MultiIndex(levels, codes)
+    mi = cudf.MultiIndex(levels, codes)
+    assert_eq(pmi, mi)
+    levels = [[1.2, 2.1], [1.3, 3.1]]
+    pmi = pd.MultiIndex(levels, codes)
+    mi = cudf.MultiIndex(levels, codes)
+    assert_eq(pmi, mi)
+    levels = [["a", "b"], ["c", "d"]]
+    pmi = pd.MultiIndex(levels, codes)
+    mi = cudf.MultiIndex(levels, codes)
+    assert_eq(pmi, mi)
+
+
+def test_multiindex_from_tuples():
+    arrays = [["a", "a", "b", "b"], ["house", "store", "house", "store"]]
+    tuples = list(zip(*arrays))
+    pmi = pd.MultiIndex.from_tuples(tuples)
+    gmi = cudf.MultiIndex.from_tuples(tuples)
+    assert_eq(pmi, gmi)
+
+
+def test_multiindex_from_dataframe():
+    pdf = pd.DataFrame(
+        [["a", "house"], ["a", "store"], ["b", "house"], ["b", "store"]]
+    )
+    gdf = cudf.from_pandas(pdf)
+    pmi = pd.MultiIndex.from_frame(pdf, names=["alpha", "location"])
+    gmi = cudf.MultiIndex.from_frame(gdf, names=["alpha", "location"])
+    assert_eq(pmi, gmi)
+
+
+@pytest.mark.parametrize(
+    "arrays",
+    [
+        [["a", "a", "b", "b"], ["house", "store", "house", "store"]],
+        [["a", "n", "n"] * 10, ["house", "store", "house", "store"]],
+        [
+            ["a", "n", "n"],
+            ["house", "store", "house", "store", "store"] * 10,
+        ],
+        [
+            ["a", "a", "n"] * 50,
+            ["house", "store", "house", "store", "store"] * 10,
+        ],
+    ],
+)
+def test_multiindex_from_product(arrays):
+    pmi = pd.MultiIndex.from_product(arrays, names=["alpha", "location"])
+    gmi = cudf.MultiIndex.from_product(arrays, names=["alpha", "location"])
+    assert_eq(pmi, gmi)
+
+
+@pytest.mark.parametrize(
+    "array",
+    [
+        list,
+        tuple,
+        np.array,
+        cp.array,
+        pd.Index,
+        cudf.Index,
+        pd.Series,
+        cudf.Series,
+    ],
+)
+def test_multiindex_from_arrays(array):
+    pd_data = [[0, 0, 1, 1], [1, 0, 1, 0]]
+    cudf_data = [array(lst) for lst in pd_data]
+    result = pd.MultiIndex.from_arrays(pd_data)
+    expected = cudf.MultiIndex.from_arrays(cudf_data)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("arg", ["foo", ["foo"]])
+def test_multiindex_from_arrays_wrong_arg(arg):
+    with pytest.raises(TypeError):
+        cudf.MultiIndex.from_arrays(arg)
+
+
+@pytest.mark.parametrize(
+    "idx", [pd.Index, pd.CategoricalIndex, pd.DatetimeIndex, pd.TimedeltaIndex]
+)
+def test_from_arrays_infer_names(idx):
+    arrays = [idx([1], name="foo"), idx([2], name="bar")]
+    expected = pd.MultiIndex.from_arrays(arrays)
+    result = cudf.MultiIndex.from_arrays(arrays)
+    assert_eq(result, expected)
+
+
+def test_multiindex_dtype_error():
+    midx = cudf.MultiIndex.from_tuples([(10, 12), (8, 9), (3, 4)])
+    with pytest.raises(TypeError):
+        cudf.Index(midx, dtype="int64")
+    with pytest.raises(TypeError):
+        cudf.Index(midx.to_pandas(), dtype="int64")
+
+
+def test_multiindex_duplicate_names():
+    gi = cudf.MultiIndex(
+        levels=[["a", "b"], ["b", "a"]],
+        codes=[[0, 0], [0, 1]],
+        names=["a", "a"],
+    )
+    pi = pd.MultiIndex(
+        levels=[["a", "b"], ["b", "a"]],
+        codes=[[0, 0], [0, 1]],
+        names=["a", "a"],
+    )
+
+    assert_eq(gi, pi)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_getitem.py b/python/cudf/cudf/tests/indexes/multiindex/test_getitem.py
new file mode 100644
index 00000000000..f04dcfe9cec
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_getitem.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_empty_slice_pandas_compatibility():
+    expected = pd.MultiIndex.from_tuples([("a", "b")])[:0]
+    with cudf.option_context("mode.pandas_compatible", True):
+        actual = cudf.from_pandas(expected)
+    assert_eq(expected, actual, exact=False)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_properties.py b/python/cudf/cudf/tests/indexes/multiindex/test_properties.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/multiindex/test_properties.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_selecting.py b/python/cudf/cudf/tests/indexes/multiindex/test_selecting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/multiindex/test_selecting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index d274166d0d2..c8c4923a4e3 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -1,26 +1,17 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
-"""
-Test related to MultiIndex
-"""
-
-import datetime
 import itertools
 import operator
 import pickle
-import re
 from contextlib import contextmanager
-from functools import reduce
 from io import BytesIO
 
 import cupy as cp
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import cudf
-from cudf.api.extensions import no_default
 from cudf.core.column import as_column
 from cudf.testing import assert_eq, assert_neq
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
@@ -36,79 +27,6 @@ def expect_pandas_performance_warning(idx):
         yield
 
 
-def test_multiindex_levels_codes_validation():
-    levels = [["a", "b"], ["c", "d"]]
-
-    # Codes not a sequence of sequences
-    assert_exceptions_equal(
-        lfunc=pd.MultiIndex,
-        rfunc=cudf.MultiIndex,
-        lfunc_args_and_kwargs=([levels, [0, 1]],),
-        rfunc_args_and_kwargs=([levels, [0, 1]],),
-    )
-
-    # Codes don't match levels
-    assert_exceptions_equal(
-        lfunc=pd.MultiIndex,
-        rfunc=cudf.MultiIndex,
-        lfunc_args_and_kwargs=([levels, [[0], [1], [1]]],),
-        rfunc_args_and_kwargs=([levels, [[0], [1], [1]]],),
-    )
-
-    # Largest code greater than number of levels
-    assert_exceptions_equal(
-        lfunc=pd.MultiIndex,
-        rfunc=cudf.MultiIndex,
-        lfunc_args_and_kwargs=([levels, [[0, 1], [0, 2]]],),
-        rfunc_args_and_kwargs=([levels, [[0, 1], [0, 2]]],),
-    )
-
-    # Unequal code lengths
-    assert_exceptions_equal(
-        lfunc=pd.MultiIndex,
-        rfunc=cudf.MultiIndex,
-        lfunc_args_and_kwargs=([levels, [[0, 1], [0]]],),
-        rfunc_args_and_kwargs=([levels, [[0, 1], [0]]],),
-    )
-    # Didn't pass levels and codes
-    assert_exceptions_equal(lfunc=pd.MultiIndex, rfunc=cudf.MultiIndex)
-
-    # Didn't pass non zero levels and codes
-    assert_exceptions_equal(
-        lfunc=pd.MultiIndex,
-        rfunc=cudf.MultiIndex,
-        lfunc_args_and_kwargs=([[], []],),
-        rfunc_args_and_kwargs=([[], []],),
-    )
-
-
-def test_multiindex_construction():
-    levels = [["a", "b"], ["c", "d"]]
-    codes = [[0, 1], [1, 0]]
-    pmi = pd.MultiIndex(levels, codes)
-    mi = cudf.MultiIndex(levels, codes)
-    assert_eq(pmi, mi)
-    pmi = pd.MultiIndex(levels, codes)
-    mi = cudf.MultiIndex(levels=levels, codes=codes)
-    assert_eq(pmi, mi)
-
-
-def test_multiindex_types():
-    codes = [[0, 1], [1, 0]]
-    levels = [[0, 1], [2, 3]]
-    pmi = pd.MultiIndex(levels, codes)
-    mi = cudf.MultiIndex(levels, codes)
-    assert_eq(pmi, mi)
-    levels = [[1.2, 2.1], [1.3, 3.1]]
-    pmi = pd.MultiIndex(levels, codes)
-    mi = cudf.MultiIndex(levels, codes)
-    assert_eq(pmi, mi)
-    levels = [["a", "b"], ["c", "d"]]
-    pmi = pd.MultiIndex(levels, codes)
-    mi = cudf.MultiIndex(levels, codes)
-    assert_eq(pmi, mi)
-
-
 def test_multiindex_df_assignment():
     pdf = pd.DataFrame({"x": [1, 2, 3]})
     gdf = cudf.from_pandas(pdf)
@@ -129,30 +47,6 @@ def test_multiindex_series_assignment():
     assert_eq(ps, gs)
 
 
-def test_multiindex_swaplevel():
-    midx = cudf.MultiIndex(
-        levels=[
-            ["lama", "cow", "falcon"],
-            ["speed", "weight", "length"],
-            ["first", "second"],
-        ],
-        codes=[
-            [0, 0, 0, 1, 1, 1, 2, 2, 2],
-            [0, 1, 2, 0, 1, 2, 0, 1, 2],
-            [0, 0, 0, 0, 0, 0, 1, 1, 1],
-        ],
-        names=["Col1", "Col2", "Col3"],
-    )
-    pd_midx = midx.to_pandas()
-
-    assert_eq(pd_midx.swaplevel(-1, -2), midx.swaplevel(-1, -2))
-    assert_eq(pd_midx.swaplevel(2, 1), midx.swaplevel(2, 1))
-    assert_eq(midx.swaplevel(2, 1), midx.swaplevel(1, 2))
-    assert_eq(pd_midx.swaplevel(0, 2), midx.swaplevel(0, 2))
-    assert_eq(pd_midx.swaplevel(2, 0), midx.swaplevel(2, 0))
-    assert_eq(midx.swaplevel(1, 1), midx.swaplevel(1, 1))
-
-
 def test_string_index():
     rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame(rng.random(size=(5, 5)))
@@ -256,13 +150,6 @@ def test_from_pandas(pdf, pdfIndex):
     assert_eq(pdf, gdf)
 
 
-def test_multiindex_transpose(pdf, pdfIndex):
-    pdf = pdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(pdf.transpose(), gdf.transpose())
-
-
 def test_from_pandas_series():
     pdf = pd.DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
@@ -285,26 +172,6 @@ def test_series_multiindex(pdfIndex):
     assert_eq(ps, gs)
 
 
-def test_multiindex_take(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(pdf.index.take([0]), gdf.index.take([0]))
-    assert_eq(pdf.index.take(np.array([0])), gdf.index.take(np.array([0])))
-    from cudf import Series
-
-    assert_eq(pdf.index.take(pd.Series([0])), gdf.index.take(Series([0])))
-    assert_eq(pdf.index.take([0, 1]), gdf.index.take([0, 1]))
-    assert_eq(
-        pdf.index.take(np.array([0, 1])), gdf.index.take(np.array([0, 1]))
-    )
-    assert_eq(
-        pdf.index.take(pd.Series([0, 1])), gdf.index.take(Series([0, 1]))
-    )
-
-
 def test_multiindex_getitem(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
     pdf = pdf.copy(deep=False)
@@ -495,45 +362,6 @@ def test_multiindex_columns(pdf, pdfIndex, query):
     assert_eq(expected, got)
 
 
-def test_multiindex_from_tuples():
-    arrays = [["a", "a", "b", "b"], ["house", "store", "house", "store"]]
-    tuples = list(zip(*arrays))
-    pmi = pd.MultiIndex.from_tuples(tuples)
-    gmi = cudf.MultiIndex.from_tuples(tuples)
-    assert_eq(pmi, gmi)
-
-
-def test_multiindex_from_dataframe():
-    pdf = pd.DataFrame(
-        [["a", "house"], ["a", "store"], ["b", "house"], ["b", "store"]]
-    )
-    gdf = cudf.from_pandas(pdf)
-    pmi = pd.MultiIndex.from_frame(pdf, names=["alpha", "location"])
-    gmi = cudf.MultiIndex.from_frame(gdf, names=["alpha", "location"])
-    assert_eq(pmi, gmi)
-
-
-@pytest.mark.parametrize(
-    "arrays",
-    [
-        [["a", "a", "b", "b"], ["house", "store", "house", "store"]],
-        [["a", "n", "n"] * 1000, ["house", "store", "house", "store"]],
-        [
-            ["a", "n", "n"],
-            ["house", "store", "house", "store", "store"] * 1000,
-        ],
-        [
-            ["a", "a", "n"] * 50,
-            ["house", "store", "house", "store", "store"] * 100,
-        ],
-    ],
-)
-def test_multiindex_from_product(arrays):
-    pmi = pd.MultiIndex.from_product(arrays, names=["alpha", "location"])
-    gmi = cudf.MultiIndex.from_product(arrays, names=["alpha", "location"])
-    assert_eq(pmi, gmi)
-
-
 def test_multiindex_index_and_columns():
     rng = np.random.default_rng(seed=0)
     gdf = cudf.DataFrame(
@@ -702,173 +530,6 @@ def test_multiindex_equals():
     assert_eq(mi1.equals(mi2), False)
 
 
-def test_multiindex_copy_sem():
-    """Test semantic equality for MultiIndex.copy"""
-    names = ["X", "Y"]
-    data = {
-        "Date": [
-            "2020-08-27",
-            "2020-08-28",
-            "2020-08-31",
-            "2020-08-27",
-            "2020-08-28",
-            "2020-08-31",
-            "2020-08-27",
-            "2020-08-28",
-            "2020-08-31",
-        ],
-        "Close": [
-            3400.00,
-            3401.80,
-            3450.96,
-            226.58,
-            228.91,
-            225.53,
-            505.13,
-            525.91,
-            534.98,
-        ],
-        "Symbol": [
-            "AMZN",
-            "AMZN",
-            "AMZN",
-            "MSFT",
-            "MSFT",
-            "MSFT",
-            "NVDA",
-            "NVDA",
-            "NVDA",
-        ],
-    }
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    gdf = gdf.groupby(["Date", "Symbol"], sort=True).mean()
-    pdf = pdf.groupby(["Date", "Symbol"], sort=True).mean()
-
-    gmi = gdf.index
-    gmi_copy = gmi.copy(names=names)
-
-    pmi = pdf.index
-    pmi_copy = pmi.copy(names=names)
-
-    for glv, plv in zip(gmi_copy.levels, pmi_copy.levels):
-        assert all(glv.values_host == plv.values)
-    for gval, pval in zip(gmi.codes, pmi.codes):
-        assert_eq(gval, pval)
-    assert_eq(gmi_copy.names, pmi_copy.names)
-
-    # Test same behavior when used on DataFrame
-    gdf.index = gmi_copy
-    pdf.index = pmi_copy
-    assert repr(gdf) == repr(pdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "Date": [
-                "2020-08-27",
-                "2020-08-28",
-                "2020-08-31",
-                "2020-08-27",
-                "2020-08-28",
-                "2020-08-31",
-                "2020-08-27",
-                "2020-08-28",
-                "2020-08-31",
-            ],
-            "Close": [
-                3400.00,
-                3401.80,
-                3450.96,
-                226.58,
-                228.91,
-                225.53,
-                505.13,
-                525.91,
-                534.98,
-            ],
-            "Symbol": [
-                "AMZN",
-                "AMZN",
-                "AMZN",
-                "MSFT",
-                "MSFT",
-                "MSFT",
-                "NVDA",
-                "NVDA",
-                "NVDA",
-            ],
-        },
-        pd.MultiIndex(
-            levels=[[1001, 1002], [2001, 2002]],
-            codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
-            names=["col1", "col2"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("copy_on_write", [True, False])
-@pytest.mark.parametrize("deep", [True, False])
-def test_multiindex_copy_deep(data, copy_on_write, deep):
-    """Test memory identity for deep copy
-    Case1: Constructed from GroupBy, StringColumns
-    Case2: Constructed from MultiIndex, NumericColumns
-    """
-    with cudf.option_context("copy_on_write", copy_on_write):
-        if isinstance(data, dict):
-            gdf = cudf.DataFrame(data)
-            mi1 = gdf.groupby(["Date", "Symbol"]).mean().index
-            mi2 = mi1.copy(deep=deep)
-
-            lchildren = [col.children for col in mi1._columns]
-            rchildren = [col.children for col in mi2._columns]
-
-            # Flatten
-            lchildren = reduce(operator.add, lchildren)
-            rchildren = reduce(operator.add, rchildren)
-
-            lptrs = [
-                child.base_data.get_ptr(mode="read") for child in lchildren
-            ]
-            rptrs = [
-                child.base_data.get_ptr(mode="read") for child in rchildren
-            ]
-
-            assert all((x == y) for x, y in zip(lptrs, rptrs))
-
-        elif isinstance(data, pd.MultiIndex):
-            data = cudf.MultiIndex.from_pandas(data)
-            same_ref = (not deep) or (
-                cudf.get_option("copy_on_write") and not deep
-            )
-            mi1 = data
-            mi2 = mi1.copy(deep=deep)
-
-            # Assert ._levels identity
-            lptrs = [
-                lv._column.base_data.get_ptr(mode="read") for lv in mi1._levels
-            ]
-            rptrs = [
-                lv._column.base_data.get_ptr(mode="read") for lv in mi2._levels
-            ]
-
-            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
-
-            # Assert ._codes identity
-            lptrs = [c.base_data.get_ptr(mode="read") for c in mi1._codes]
-            rptrs = [c.base_data.get_ptr(mode="read") for c in mi2._codes]
-
-            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
-
-            # Assert ._data identity
-            lptrs = [d.base_data.get_ptr(mode="read") for d in mi1._columns]
-            rptrs = [d.base_data.get_ptr(mode="read") for d in mi2._columns]
-
-            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
-
-
 @pytest.mark.parametrize(
     "iloc_rows",
     [
@@ -988,27 +649,6 @@ def test_multicolumn_item():
     assert_eq(gdgT[(0, 0)], pdgT[(0, 0)])
 
 
-def test_multiindex_to_frame(pdfIndex, pdfIndexNulls):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex.to_frame(), gdfIndex.to_frame())
-
-    gdfIndex = cudf.from_pandas(pdfIndexNulls)
-    assert_eq(
-        pdfIndexNulls.to_frame().fillna("nan"),
-        gdfIndex.to_frame().fillna("nan"),
-    )
-
-
-def test_multiindex_groupby_to_frame():
-    gdf = cudf.DataFrame(
-        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [0, 1, 0, 1, 0]}
-    )
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["x", "y"], sort=True).count()
-    pdg = pdf.groupby(["x", "y"], sort=True).count()
-    assert_eq(pdg.index.to_frame(), gdg.index.to_frame())
-
-
 def test_multiindex_reset_index(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
     pdf = pdf.copy(deep=False)
@@ -1140,499 +780,6 @@ def test_multicolumn_set_item(pdf, pdfIndex):
     assert_eq(pdf, gdf)
 
 
-def test_multiindex_iter_error():
-    midx = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"{midx.__class__.__name__} object is not iterable. "
-            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
-            f"if you wish to iterate over the values."
-        ),
-    ):
-        iter(midx)
-
-
-def test_multiindex_values():
-    midx = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-
-    result = midx.values
-
-    assert isinstance(result, cp.ndarray)
-    np.testing.assert_array_equal(
-        result.get(), np.array([[1, 1], [1, 5], [3, 2], [4, 2], [5, 1]])
-    )
-
-
-def test_multiindex_values_host():
-    midx = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    pmidx = midx.to_pandas()
-
-    assert_eq(midx.values_host, pmidx.values)
-
-
-def test_multiindex_to_numpy():
-    midx = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    pmidx = midx.to_pandas()
-
-    assert_eq(midx.to_numpy(), pmidx.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "gdi, fill_value, expected",
-    [
-        (
-            lambda: cudf.MultiIndex(
-                levels=[[1, 3, 4, None], [1, 2, 5]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-            5,
-            lambda: cudf.MultiIndex(
-                levels=[[1, 3, 4, 5], [1, 2, 5]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-        ),
-        (
-            lambda: cudf.MultiIndex(
-                levels=[[1, 3, 4, None], [1, None, 5]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-            100,
-            lambda: cudf.MultiIndex(
-                levels=[[1, 3, 4, 100], [1, 100, 5]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-        ),
-        (
-            lambda: cudf.MultiIndex(
-                levels=[["a", "b", "c", None], ["1", None, "5"]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-            "100",
-            lambda: cudf.MultiIndex(
-                levels=[["a", "b", "c", "100"], ["1", "100", "5"]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-        ),
-    ],
-)
-def test_multiindex_fillna(gdi, fill_value, expected):
-    assert_eq(expected(), gdi().fillna(fill_value))
-
-
-@pytest.mark.parametrize(
-    "pdi",
-    [
-        pd.MultiIndex(
-            levels=[[], [], []],
-            codes=[[], [], []],
-            names=["one", "two", "three"],
-        ),
-        pd.MultiIndex.from_tuples(
-            list(
-                zip(
-                    *[
-                        [
-                            "bar",
-                            "bar",
-                            "baz",
-                            "baz",
-                            "foo",
-                            "foo",
-                            "qux",
-                            "qux",
-                        ],
-                        [
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                        ],
-                    ]
-                )
-            )
-        ),
-    ],
-)
-def test_multiindex_empty(pdi):
-    gdi = cudf.from_pandas(pdi)
-
-    assert_eq(pdi.empty, gdi.empty)
-
-
-@pytest.mark.parametrize(
-    "pdi",
-    [
-        pd.MultiIndex(
-            levels=[[], [], []],
-            codes=[[], [], []],
-            names=["one", "two", "three"],
-        ),
-        pd.MultiIndex.from_tuples(
-            list(
-                zip(
-                    *[
-                        [
-                            "bar",
-                            "bar",
-                            "baz",
-                            "baz",
-                            "foo",
-                            "foo",
-                            "qux",
-                            "qux",
-                        ],
-                        [
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                        ],
-                    ]
-                )
-            )
-        ),
-    ],
-)
-def test_multiindex_size(pdi):
-    gdi = cudf.from_pandas(pdi)
-
-    assert_eq(pdi.size, gdi.size)
-
-
-@pytest.mark.parametrize(
-    "level",
-    [
-        [],
-        "alpha",
-        "location",
-        "weather",
-        0,
-        1,
-        [0, 1],
-        -1,
-        [-1, -2],
-        [-1, "weather"],
-    ],
-)
-def test_multiindex_droplevel_simple(pdfIndex, level):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
-
-
-@pytest.mark.parametrize(
-    "level",
-    itertools.chain(
-        *(
-            itertools.combinations(
-                ("alpha", "location", "weather", "sign", "timestamp"), r
-            )
-            for r in range(5)
-        )
-    ),
-)
-def test_multiindex_droplevel_name(pdfIndex, level):
-    level = list(level)
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
-
-
-@pytest.mark.parametrize(
-    "level",
-    itertools.chain(*(itertools.combinations(range(5), r) for r in range(5))),
-)
-def test_multiindex_droplevel_index(pdfIndex, level):
-    level = list(level)
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("return_indexer", [True, False])
-@pytest.mark.parametrize(
-    "pmidx",
-    [
-        pd.MultiIndex(
-            levels=[[1, 3, 4, 5], [1, 2, 5]],
-            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-            names=["x", "y"],
-        ),
-        pd.MultiIndex.from_product(
-            [["bar", "baz", "foo", "qux"], ["one", "two"]],
-            names=["first", "second"],
-        ),
-        pd.MultiIndex(
-            levels=[[], [], []],
-            codes=[[], [], []],
-            names=["one", "two", "three"],
-        ),
-        pd.MultiIndex.from_tuples(
-            list(
-                zip(
-                    *[
-                        [
-                            "bar",
-                            "bar",
-                            "baz",
-                            "baz",
-                            "foo",
-                            "foo",
-                            "qux",
-                            "qux",
-                        ],
-                        [
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                            "one",
-                            "two",
-                        ],
-                    ]
-                )
-            )
-        ),
-    ],
-)
-def test_multiindex_sort_values(pmidx, ascending, return_indexer):
-    pmidx = pmidx
-    midx = cudf.from_pandas(pmidx)
-
-    expected = pmidx.sort_values(
-        ascending=ascending, return_indexer=return_indexer
-    )
-    actual = midx.sort_values(
-        ascending=ascending, return_indexer=return_indexer
-    )
-
-    if return_indexer:
-        expected_indexer = expected[1]
-        actual_indexer = actual[1]
-
-        assert_eq(expected_indexer, actual_indexer)
-
-        expected = expected[0]
-        actual = actual[0]
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "pdi",
-    [
-        pd.MultiIndex(
-            levels=[[1, 3.0, 4, 5], [1, 2.3, 5]],
-            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-            names=["x", "y"],
-        ),
-        pd.MultiIndex(
-            levels=[[1, 3, 4, -10], [1, 11, 5]],
-            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-            names=["x", "y"],
-        ),
-        pd.MultiIndex(
-            levels=[["a", "b", "c", "100"], ["1", "100", "5"]],
-            codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-            names=["x", "y"],
-        ),
-        pytest.param(
-            pd.MultiIndex(
-                levels=[[None, "b", "c", "a"], ["1", None, "5"]],
-                codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-                names=["x", "y"],
-            ),
-            marks=[
-                pytest.mark.xfail(
-                    reason="https://github.com/pandas-dev/pandas/issues/35584"
-                )
-            ],
-        ),
-    ],
-)
-@pytest.mark.parametrize("ascending", [True, False])
-def test_multiindex_argsort(pdi, ascending):
-    gdi = cudf.from_pandas(pdi)
-
-    if not ascending:
-        expected = pdi.argsort()[::-1]
-    else:
-        expected = pdi.argsort()
-
-    actual = gdi.argsort(ascending=ascending)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "names", [[None, None], ["a", None], ["new name", "another name"]]
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_set_names(names, inplace):
-    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.set_names(names=names, inplace=inplace)
-    actual = gi.set_names(names=names, inplace=inplace)
-
-    if inplace:
-        expected, actual = pi, gi
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("idx_names", [[None, None, None], [1, 0, 2]])
-@pytest.mark.parametrize(
-    "level, names",
-    [
-        (0, "abc"),
-        (1, "xyz"),
-        ([2, 1], ["a", "b"]),
-        ([0, 1], ["aa", "bb"]),
-        (None, ["a", "b", "c"]),
-        (None, ["a", None, "c"]),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_set_names_default_and_int_names(
-    idx_names, level, names, inplace
-):
-    pi = pd.MultiIndex.from_product(
-        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]], names=idx_names
-    )
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.set_names(names=names, level=level, inplace=inplace)
-    actual = gi.set_names(names=names, level=level, inplace=inplace)
-
-    if inplace:
-        expected, actual = pi, gi
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "level, names",
-    [
-        ([None], "abc"),
-        (["three", "one"], ["a", "b"]),
-        (["three", 1], ["a", "b"]),
-        ([0, "three", 1], ["a", "b", "z"]),
-        (["one", 1, "three"], ["a", "b", "z"]),
-        (["one", None, "three"], ["a", "b", "z"]),
-        ([2, 1], ["a", "b"]),
-        (1, "xyz"),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_set_names_string_names(level, names, inplace):
-    pi = pd.MultiIndex.from_product(
-        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]],
-        names=["one", None, "three"],
-    )
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.set_names(names=names, level=level, inplace=inplace)
-    actual = gi.set_names(names=names, level=level, inplace=inplace)
-
-    if inplace:
-        expected, actual = pi, gi
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "level, names", [(1, ["a"]), (None, "a"), ([1, 2], ["a"]), (None, ["a"])]
-)
-def test_multiindex_set_names_error(level, names):
-    pi = pd.MultiIndex.from_product(
-        [["python", "cobra"], [2018, 2019], ["aab", "bcd"]]
-    )
-    gi = cudf.from_pandas(pi)
-
-    assert_exceptions_equal(
-        lfunc=pi.set_names,
-        rfunc=gi.set_names,
-        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
-        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
-    )
-
-
-@pytest.mark.parametrize("name", [None, "old name"])
-@pytest.mark.parametrize(
-    "names",
-    [
-        [None, None],
-        ["a", None],
-        ["new name", "another name"],
-        [1, None],
-        [2, 3],
-        [42, "name"],
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_multiindex_rename(name, names, inplace):
-    pi = pd.MultiIndex.from_product(
-        [["python", "cobra"], [2018, 2019]], names=[name, None]
-    )
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.rename(names=names, inplace=inplace)
-    actual = gi.rename(names=names, inplace=inplace)
-
-    if inplace:
-        expected, actual = pi, gi
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "names", ["plain string", 123, ["str"], ["l1", "l2", "l3"]]
-)
-def test_multiindex_rename_error(names):
-    pi = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
-    gi = cudf.from_pandas(pi)
-
-    assert_exceptions_equal(
-        lfunc=pi.rename,
-        rfunc=gi.rename,
-        lfunc_args_and_kwargs=([], {"names": names}),
-        rfunc_args_and_kwargs=([], {"names": names}),
-    )
-
-
 @pytest.mark.parametrize(
     "key",
     [0, 1, [], [0, 1], slice(None), slice(0, 0), slice(0, 1), slice(0, 2)],
@@ -1646,141 +793,6 @@ def test_multiindex_indexing(key):
     assert_eq(gi[key], pi[key], exact=False)
 
 
-def test_multiindex_duplicate_names():
-    gi = cudf.MultiIndex(
-        levels=[["a", "b"], ["b", "a"]],
-        codes=[[0, 0], [0, 1]],
-        names=["a", "a"],
-    )
-    pi = pd.MultiIndex(
-        levels=[["a", "b"], ["b", "a"]],
-        codes=[[0, 0], [0, 1]],
-        names=["a", "a"],
-    )
-
-    assert_eq(gi, pi)
-
-
-def test_difference():
-    midx = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    midx2 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3, 3], [0, 2, 1, 1, 0, 2]],
-        names=["x", "y"],
-    )
-
-    expected = midx2.to_pandas().difference(midx.to_pandas())
-    actual = midx2.difference(midx)
-    assert isinstance(actual, cudf.MultiIndex)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "idx1, idx2",
-    [
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
-            ),
-            pd.MultiIndex.from_arrays(
-                [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
-            ),
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], ["Red", "Blue", "Red", "Blue"]],
-                names=["a", "b"],
-            ),
-            pd.MultiIndex.from_arrays(
-                [[3, 3, 2, 4], ["Red", "Green", "Red", "Green"]],
-                names=["x", "y"],
-            ),
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-                names=["a", "b", "c"],
-            ),
-            pd.MultiIndex.from_arrays(
-                [[3, 3, 2, 4], [0.2, 0.4, 1.4, 10], [3, 3, 2, 4]]
-            ),
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-                names=["a", "b", "c"],
-            ),
-            [(2, 6, 12)],
-        ),
-    ],
-)
-@pytest.mark.parametrize("sort", [None, False])
-def test_union_mulitIndex(idx1, idx2, sort):
-    expected = idx1.union(idx2, sort=sort)
-
-    idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.MultiIndex) else idx1
-    idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.MultiIndex) else idx2
-
-    actual = idx1.union(idx2, sort=sort)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "idx1, idx2",
-    [
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
-            ),
-            pd.MultiIndex.from_arrays(
-                [[1, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
-            ),
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], ["Red", "Blue", "Red", "Blue"]],
-                names=["a", "b"],
-            ),
-            pd.MultiIndex.from_arrays(
-                [[3, 3, 2, 4], ["Red", "Green", "Red", "Green"]],
-                names=["x", "y"],
-            ),
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-                names=["a", "b", "c"],
-            ),
-            pd.MultiIndex.from_arrays(
-                [[3, 3, 2, 4], [0.2, 0.4, 1.4, 10], [3, 3, 2, 4]]
-            ),
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-                names=["a", "b", "c"],
-            ),
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-            ),
-        ),
-    ],
-)
-@pytest.mark.parametrize("sort", [None, False])
-def test_intersection_mulitIndex(idx1, idx2, sort):
-    expected = idx1.intersection(idx2, sort=sort)
-
-    idx1 = cudf.from_pandas(idx1)
-    idx2 = cudf.from_pandas(idx2)
-
-    actual = idx1.intersection(idx2, sort=sort)
-    assert_eq(expected, actual, exact=False)
-
-
 @pytest.mark.parametrize(
     "names",
     [
@@ -1810,48 +822,6 @@ def test_pickle_roundtrip_multiindex(names):
     assert_eq(expected_df, actual_df)
 
 
-@pytest.mark.parametrize(
-    "pidx",
-    [
-        pd.MultiIndex.from_arrays(
-            [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-            names=["a", "b", "c"],
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1.0, 2, 3, 4], [5, 6, 7.8, 10], [11, 12, 12, 13]],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "func",
-    [
-        "is_numeric",
-        "is_boolean",
-        "is_integer",
-        "is_floating",
-        "is_object",
-        "is_categorical",
-        "is_interval",
-    ],
-)
-def test_multiindex_type_methods(pidx, func):
-    gidx = cudf.from_pandas(pidx)
-
-    with pytest.warns(FutureWarning):
-        expected = getattr(pidx, func)()
-
-    with pytest.warns(FutureWarning):
-        actual = getattr(gidx, func)()
-
-    if func == "is_object":
-        assert_eq(False, actual)
-    else:
-        assert_eq(expected, actual)
-
-
 def test_multiindex_index_single_row():
     arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
     tuples = list(zip(*arrays))
@@ -1864,23 +834,6 @@ def test_multiindex_index_single_row():
     assert_eq(pdf.loc[("b", 3)], gdf.loc[("b", 3)])
 
 
-def test_multiindex_levels():
-    gidx = cudf.MultiIndex.from_product(
-        [range(3), ["one", "two"]], names=["first", "second"]
-    )
-    pidx = gidx.to_pandas()
-
-    assert_eq(gidx.levels[0], pidx.levels[0])
-    assert_eq(gidx.levels[1], pidx.levels[1])
-
-
-def test_multiindex_empty_slice_pandas_compatibility():
-    expected = pd.MultiIndex.from_tuples([("a", "b")])[:0]
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = cudf.from_pandas(expected)
-    assert_eq(expected, actual, exact=False)
-
-
 @pytest.mark.parametrize(
     "levels",
     itertools.chain.from_iterable(
@@ -1904,134 +857,6 @@ def test_multiindex_sort_index_partial(levels):
     assert_eq(expect, got)
 
 
-def test_multiindex_to_series_error():
-    midx = cudf.MultiIndex.from_tuples([("a", "b")])
-    with pytest.raises(NotImplementedError):
-        midx.to_series()
-
-
-@pytest.mark.parametrize(
-    "pidx",
-    [
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-            names=["a", "b", "c"],
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-            names=["a", "a", "a"],
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 4], [5, 6, 7, 10], [11, 12, 12, 13]],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "name", [None, no_default, ["x", "y", "z"], ["rapids", "rapids", "rapids"]]
-)
-@pytest.mark.parametrize("allow_duplicates", [True, False])
-@pytest.mark.parametrize("index", [True, False])
-def test_multiindex_to_frame_allow_duplicates(
-    pidx, name, allow_duplicates, index
-):
-    gidx = cudf.from_pandas(pidx)
-
-    if name is None or (
-        (
-            len(pidx.names) != len(set(pidx.names))
-            and not all(x is None for x in pidx.names)
-        )
-        and not allow_duplicates
-        and name is no_default
-    ):
-        assert_exceptions_equal(
-            pidx.to_frame,
-            gidx.to_frame,
-            lfunc_args_and_kwargs=(
-                [],
-                {
-                    "index": index,
-                    "name": name,
-                    "allow_duplicates": allow_duplicates,
-                },
-            ),
-            rfunc_args_and_kwargs=(
-                [],
-                {
-                    "index": index,
-                    "name": name,
-                    "allow_duplicates": allow_duplicates,
-                },
-            ),
-        )
-    else:
-        if (
-            len(pidx.names) != len(set(pidx.names))
-            and not all(x is None for x in pidx.names)
-            and not isinstance(name, list)
-        ) or (isinstance(name, list) and len(name) != len(set(name))):
-            # cudf doesn't have the ability to construct dataframes
-            # with duplicate column names
-            with pytest.raises(ValueError):
-                gidx.to_frame(
-                    index=index,
-                    name=name,
-                    allow_duplicates=allow_duplicates,
-                )
-        else:
-            expected = pidx.to_frame(
-                index=index, name=name, allow_duplicates=allow_duplicates
-            )
-            actual = gidx.to_frame(
-                index=index, name=name, allow_duplicates=allow_duplicates
-            )
-
-            assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("bad", ["foo", ["foo"]])
-def test_multiindex_set_names_validation(bad):
-    mi = cudf.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0), (1, 1)])
-    with pytest.raises(ValueError):
-        mi.names = bad
-
-
-def test_multiindex_values_pandas_compatible():
-    midx = cudf.MultiIndex.from_tuples([(10, 12), (8, 9), (3, 4)])
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            midx.values
-
-
-def test_multiindex_dtype_error():
-    midx = cudf.MultiIndex.from_tuples([(10, 12), (8, 9), (3, 4)])
-    with pytest.raises(TypeError):
-        cudf.Index(midx, dtype="int64")
-    with pytest.raises(TypeError):
-        cudf.Index(midx.to_pandas(), dtype="int64")
-
-
-def test_multiindex_codes():
-    midx = cudf.MultiIndex.from_tuples(
-        [("a", "b"), ("a", "c"), ("b", "c")], names=["A", "Z"]
-    )
-
-    for p_array, g_array in zip(midx.to_pandas().codes, midx.codes):
-        assert_eq(p_array, g_array)
-
-
-def test_multiindex_union_error():
-    midx = cudf.MultiIndex.from_tuples([(10, 12), (8, 9), (3, 4)])
-    pidx = midx.to_pandas()
-
-    assert_exceptions_equal(
-        midx.union,
-        pidx.union,
-        lfunc_args_and_kwargs=(["a"],),
-        rfunc_args_and_kwargs=(["b"],),
-    )
-
-
 @pytest.mark.parametrize("idx_get", [(0, 0), (0, 1), (1, 0), (1, 1)])
 @pytest.mark.parametrize("cols_get", [0, 1, [0, 1], [1, 0], [1], [0]])
 def test_multiindex_loc_scalar(idx_get, cols_get):
@@ -2043,157 +868,3 @@ def test_multiindex_loc_scalar(idx_get, cols_get):
     expected = pdf.loc[idx_get, cols_get]
 
     assert_eq(actual, expected)
-
-
-def test_multiindex_eq_other_multiindex():
-    idx = cudf.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0), (1, 1)])
-    result = idx == idx
-    expected = np.array([True, True])
-    assert_eq(result, expected)
-
-
-@pytest.fixture(
-    params=[
-        "from_product",
-        "from_tuples",
-        "from_arrays",
-        "init",
-    ]
-)
-def midx(request):
-    if request.param == "from_product":
-        return cudf.MultiIndex.from_product([[0, 1], [1, 0]])
-    elif request.param == "from_tuples":
-        return cudf.MultiIndex.from_tuples([(0, 1), (0, 0), (1, 1), (1, 0)])
-    elif request.param == "from_arrays":
-        return cudf.MultiIndex.from_arrays([[0, 0, 1, 1], [1, 0, 1, 0]])
-    elif request.param == "init":
-        return cudf.MultiIndex(
-            levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [1, 0, 1, 0]]
-        )
-    else:
-        raise NotImplementedError(f"{request.param} not implemented")
-
-
-def test_multindex_constructor_levels_always_indexes(midx):
-    assert_eq(midx.levels[0], cudf.Index([0, 1]))
-    assert_eq(midx.levels[1], cudf.Index([0, 1]))
-
-
-@pytest.mark.parametrize(
-    "array",
-    [
-        list,
-        tuple,
-        np.array,
-        cp.array,
-        pd.Index,
-        cudf.Index,
-        pd.Series,
-        cudf.Series,
-    ],
-)
-def test_multiindex_from_arrays(array):
-    pd_data = [[0, 0, 1, 1], [1, 0, 1, 0]]
-    cudf_data = [array(lst) for lst in pd_data]
-    result = pd.MultiIndex.from_arrays(pd_data)
-    expected = cudf.MultiIndex.from_arrays(cudf_data)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("arg", ["foo", ["foo"]])
-def test_multiindex_from_arrays_wrong_arg(arg):
-    with pytest.raises(TypeError):
-        cudf.MultiIndex.from_arrays(arg)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-        pd.Interval(1, 2),
-    ],
-)
-def test_index_to_pandas_arrow_type_nullable_raises(scalar):
-    pa_array = [scalar, None]
-    midx = cudf.MultiIndex(levels=[pa_array], codes=[[0]])
-    with pytest.raises(ValueError):
-        midx.to_pandas(nullable=True, arrow_type=True)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [1, 1.0, "a", datetime.datetime(2020, 1, 1), datetime.timedelta(1)],
-)
-def test_index_to_pandas_arrow_type(scalar):
-    pa_array = pa.array([scalar, None])
-    midx = cudf.MultiIndex(levels=[pa_array], codes=[[0]])
-    result = midx.to_pandas(arrow_type=True)
-    expected = pd.MultiIndex(
-        levels=[pd.arrays.ArrowExtensionArray(pa_array)], codes=[[0]]
-    )
-    pd.testing.assert_index_equal(result, expected)
-
-
-def test_multi_index_contains_hashable():
-    gidx = cudf.MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3]))
-    pidx = gidx.to_pandas()
-
-    assert_exceptions_equal(
-        lambda: [] in gidx,
-        lambda: [] in pidx,
-        lfunc_args_and_kwargs=((),),
-        rfunc_args_and_kwargs=((),),
-    )
-
-
-@pytest.mark.parametrize("array", [[1, 2], [1, None], [None, None]])
-@pytest.mark.parametrize("dropna", [True, False])
-def test_nunique(array, dropna):
-    arrays = [array, [3, 4]]
-    gidx = cudf.MultiIndex.from_arrays(arrays)
-    pidx = pd.MultiIndex.from_arrays(arrays)
-    result = gidx.nunique(dropna=dropna)
-    expected = pidx.nunique(dropna=dropna)
-    assert result == expected
-
-
-def test_bool_raises():
-    assert_exceptions_equal(
-        lfunc=bool,
-        rfunc=bool,
-        lfunc_args_and_kwargs=[[cudf.MultiIndex.from_arrays([range(1)])]],
-        rfunc_args_and_kwargs=[[pd.MultiIndex.from_arrays([range(1)])]],
-    )
-
-
-def test_unique_level():
-    pd_mi = pd.MultiIndex.from_arrays([[1, 1, 2], [3, 3, 2]])
-    cudf_mi = cudf.MultiIndex.from_pandas(pd_mi)
-
-    result = pd_mi.unique(level=1)
-    expected = cudf_mi.unique(level=1)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "idx", [pd.Index, pd.CategoricalIndex, pd.DatetimeIndex, pd.TimedeltaIndex]
-)
-def test_from_arrays_infer_names(idx):
-    arrays = [idx([1], name="foo"), idx([2], name="bar")]
-    expected = pd.MultiIndex.from_arrays(arrays)
-    result = cudf.MultiIndex.from_arrays(arrays)
-    assert_eq(result, expected)
-
-
-def test_multiindex_droplevel_single_level_none_names():
-    data = [(1, 2), (3, 4)]
-    pidx = pd.MultiIndex.from_tuples(data, names=[None, None])
-    gidx = cudf.MultiIndex.from_tuples(data, names=[None, None])
-    result = gidx.droplevel(0)
-    expected = pidx.droplevel(0)
-    assert_eq(result, expected)

From 1fab75fe7c3a75a06d30973c5862c10e1f5dc652 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 6 Aug 2025 12:59:37 -0700
Subject: [PATCH 072/366] Use more pytest fixtures and clean data files cuDF
 classic tests subdirectories (#19474)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Remove & move data files where appropriate
* Eliminate/reduce parameterizations of input size

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19474
---
 .pre-commit-config.yaml                         |   2 +-
 python/cudf/cudf/tests/data/ipums.pkl           | Bin 99199 -> 0 bytes
 .../cudf/cudf/tests/data/{ => text}/vocab.txt   |   0
 .../groupby/test_ordering_pandas_compat.py      |  14 +++++---------
 .../cudf/tests/input_output/test_parquet.py     |   4 ++--
 .../cudf/cudf/tests/series/test_datetimelike.py |   9 ++++-----
 .../cudf/tests/text/test_subword_tokenizer.py   |   5 ++---
 7 files changed, 14 insertions(+), 20 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/data/ipums.pkl
 rename python/cudf/cudf/tests/data/{ => text}/vocab.txt (100%)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1483a268ba3..9850bce2c95 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -63,7 +63,7 @@ repos:
             ^cpp/src/io/parquet/ipc/Schema_generated.h|
             ^cpp/src/io/parquet/ipc/Message_generated.h|
             ^cpp/include/cudf_test/cxxopts.hpp|
-            ^python/cudf/cudf/tests/data/vocab.txt|
+            ^python/cudf/cudf/tests/data/text/vocab.txt|
             ^python/cudf/cudf/tests/text/test_text_methods.py
           )
   - repo: local
diff --git a/python/cudf/cudf/tests/data/ipums.pkl b/python/cudf/cudf/tests/data/ipums.pkl
deleted file mode 100644
index 5c8e896487d7c2c116612f49ca44c3711ba6617d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 99199
zcmeHw34B~t_5Vwotz}mb>H?)KZCOg1q)jVZCX-1rO)@i`nItV+N!yg$(3aY!QV^sn
zyAcourHFulY_bW0pdhQtDu@f9;4Yvbg4+*&;E(_J``$TkZXYvgit_L8|L5Kh=X}pS
z=iGblz3;tq@0*#Fb(QDmYbrdJ;q6xTuUOo_dfK96Rt-#BvZ{aiKw)@XYyVJxn=hYN
znEI*0iG?Y{RSOnFSs32I_FuYUXkgWf{$;BR!&|m2J7&?*-Tf>2j~ZB2@S9CJ9PM`6
z(0}p3+JV*67WS`3nDGl%3>>>4xdJ*gFk1{)smy?t5^u$t<tvX1hPQmpvZ1B@t5)?N
zhj<HC4J<ll#p<C|YZeWm`EZr*SQxIsbbOcd3Z335g?Pon!fAzUVM=F3_ahaj6wD68
zRf~s?TRFfsW74zW)L9WYj2*5xp^!?Y=DdaCv(AHu3JcHRG-|Smcz%8+fl((jJ>9tl
z^AoWwg0;r-u~biYVYoV%$R=Z5P@$TEH>6xoOD@)(DGXP3boAxn)6o(1QI$<(sHll|
z_2lvibW|N{odXfmh-Z?sXG4)qv?tT4!f<UaAIm3_adZ=iZApmL@pMlrKOe&h1SoT8
z+?~j0)94o%%O+xIB2sIhtgoBifHqK4N@NkaGM;2VH3VPbOPMr>jhUj4EeP2}uD(0j
zj>x%u7L!KTROfm!=%Y5CPUQmUadBc@-Sj#PCzi=@NveIs5NbQp-HEPPDjZE6MpG3}
z<+B*UnD)7FZJ!$~QB5Ggv$8Fg#RAu+`%;N)t^?y5ry{%TPG|G&v36V;73(>hba!`8
zj!L%9WYctw#j}ZybPpEFca3%Drx!)G%E#s<Q*mCzWGWX(wP>3`jkBbfpU10HgAv5j
zyk3>rM2c6E(4J-2l)4j~H<zU~-IfT(*3!k<`vHBC0lU*JUFjIEGHxFXj?Dw18&^@p
zEZZ<xqRYRlD7MGwN~tfF=g=s&^&}XyqXQ%NBh_SjT7vod!^7=}kR6Fw-j`~-dQ$O@
zKx|8=TXT8ds<mwpGFTdnsW;Zuljwl1E17Cf=F(i4-dH@Ait~o)Oy=Vq35<OUk<wZJ
z{%Ff4TX{Lg2ST8&#Bz4xTIcfVUf8O#>2x=yFwV!or8d*m)7^qdoI{!u^B;^{N4n1!
z^66w(f=k`flkCdR=d4A-O4jzIlKC8NO==UZ(*2ey1`(<<9qAOJkMmKY#*<yKOrE!U
zbv)VO-<#v&$#lL0YX~KthEyWofmMLioo-FwR;-FAb1{qr*qY;_#FK5w77nsDo@~js
zw(tf=Z>@epfDXOodXW)od5L%dAoeCYlJPEH^2%JU8^)?sTa0T`lfpHD2p?R>o{aBP
zo9ad<xc{kbi-}m%-P4tC=QXGm34zDOTf!cYMUZH4mj?nCDi9h}XjEZ_3Nuw`Qel<~
zyzzq$>p}^Aiw>QOgx*C$_adQxkq|&6L=XufL<(cnhcW8I81-R{`Y=X)7^6OnQ6I*r
z4`bAaG3vt@4PlIiFh)ZdqalpZ5XNW-V>E;@8p0S2VT^__Mq?PGF^tg|#%K&<G=?!6
z!x)WWjK(lVV;G|`j4>mOF(ZsIBaAU4j4>mOF(ZsIBaAU4j4>mOF(ZsIGmJ4aj4?Be
zF*A%YGmJ4aj4?BeF*A%YGmJ4ajL{UvXbNLAg)y4K7)@b}rZ7fR7^5kS(G<pL3S-Pd
z4CLrsh7U9D>`Wr-Z{SrBxF=WUy8OM`ztJ<eFZom^51}j4WwMKpHiWJ`Bg4kTgNHZK
z)fz{VsG6T@_YwO?OAfBx>F%zye;CLo5E4%{l74r#r^}C>&J>OQmRkcJ7^`xDtvV-)
zYE28(xwc@Btj)D$v7P48Rc*XQ%@oC9JVkZElOOovNuz*?jLjwHf&05>4Y;JlmSi^H
zF_*ct3L8Inxf)-^oLdb|HpShELNev+=4V-fTWhj6P}TGy`vu0fMApyNmDyMvBdsEE
z5j&E(47a|TK;Up%GRUTt;HcDMn@J#{<3<>#qNT=Nq3U|5d1%2!Vt4bisaU>;qnV#g
zcKc@*NV%R|jQ3O^q4hSoOu93XT7ct7dyl^Y<5XmrTqY5N8Jkg6K8xgz0sBZBXMp_-
zws9(oI@X<J?x#mAm-IJwh%H^%`l!pquydd%h<zPcR_gq;9_ZvC8ShAQtF275_HbJu
zu+R2vtQA`anvGMD>KyMy|72R*nrLrJ;zoprinfeNk5|3FW{K_$50~gfB=*-8YiUbo
zd8Njt<GgXfr_c5?j4#dRGYSa@NJwM-`Qo_LDn7nMpW1j=ESKx+#Dy5ABKogJ(;Od&
zT6m^n$vD=dwy!hSm(F(L`lyJp*7kH{eKob&&MusFu#DBQmRxY+>Pn|OgVRE+H<s*b
z;S)tioW1?S7kf;!`u8UeMENvt<2C>)kih$bQzN97o?IP<Tm>=5@z9v)YQxYeCR4q9
z1}J6|`6RS#jgxOCP|6jg5-$b31v5D0;HHO}vyG>Dgu}CzM2L<)AFJ_-kZZ?Pu7bb=
zWG(h~IC7mu^cOMFTGaZYuT5oI(z%@P8;HIZw>nPd2|h3?@`6Inbj9Ml`61%y&t-<h
zn?^*Q{YAv-Jy#U{N2VLI!NZJ$o=B1=T5$?$!Gfc^-bCK7Ax3Mm4Jn7G7fP9q`B+e2
ziL<UK<9M485!X`fiTSF;35ko0PUZF#iL-%NH@(Y0=Rur3V;(OYwX^Hy)p6i~I2+lS
zV$+Oy{@OsCRTqlT`r81sy~#u#Q={*U8S}V#QS8Je<0}rt#-@4x%@47!FNsS?r9QL}
zkx-z^^!cx8xNU1PeOP0@g;i(zf_Ezj@`Pskg0~~zjUwCPeJaJ96-um3#((&$afl91
zd$i`0d~v90O=M!(JlBh<x0h$f8tgWBqv6W6$Gg)h95j8A8yw#rai67IVi<LeFJx5m
zQ<AU4RK)71q~I8q!Agq6DFu6BLnvXDL>F$qnQ;5fgxhZ>+<r6IezECd9Wu-sD)M@X
z7GYFG92I#v#DN!7M1)ciu~bAb6^qe$iWDu6kt!CW@gS+%Vl*BlRa=b4!=!49(RiE`
zEzgrG7NhY*soG*Ro+(vZjK))?YKzf$u2gL?8c&v@<<U~bVl*BuRa=b4<E3hg(RjdA
zZ7~{;n4<NwO>hBt$W(1H8jqQ(Ek@%(Q?<otJZh@87>$Qb(ekvZVlf)eo2o5F^B>w^
zWIS@JLopf;ovJNH<FQk;Ja?*CjK-6vYKzf$_Ec>#8c(09Ek@({Q?<otNIe7iTxqy+
z1wU;|tvR)jEtsu_CzSf6Y*qh?qXw{VXNlIwV}q~Z^Sjh=)#{=CRYUj;Yq)at&@n3u
zox4yNfKcIgJAluw%=TtSeiCLT$md|4Z*=Z*YQgM0JZ4Gc<F5lEAAfoL_^V~gU;U2?
z!CpTpkls6J4tin5Q=P9K{9eQ1WiPyO&1<h5{9eQN+Wqb3%N2O<<TH1@-Su|&yL;YU
z9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#
z2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx
z0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<F
zz~up#2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%P
zaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxm
zTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb5<U&{j*u4mc5IeYugr@XY?{>_J-ec#`w
zyxGzm%;W9cw_kKnXg}qpxwr3}=x%=OQIsv4LjCxsE?;@zociXE9SG^w_E7KpJvuwA
z-_ZW+<F`L66VxYmH&0~%Go$&6m1y|eSTl!n*y9I={tbR<3&v@U!`n!C8N)nf?~dkw
zOhJHKJDUA@&u<O-y(iYZgys1eVVr1pzI{n^X!qyQl?>+H)fnpicI&1G{q30y^+Er|
zb?~QgUQXI4CBCeyj>n4Uk9R_M;AiRmcq6c2z2?Y#TbhG<tMs#fpkJQ|#~JO)_y22r
zLwj>7XgA@2P#>&MLr`z-3iW>b)x~x2SdPrfc*Jkrf#JA(JL*0vYi<wh(Q&A~9Nv?Z
ze$xTo(iE)sqx%Q-)vW;@eD>SWpU<9Mq29mV%kn|JVN$3M)^ldFKZGN5p+2}?O<`T_
z+E4J@eJC7HG@jNK{WvAY8P)s#SI!B>Gf~!g{cIU`LvY=%><-5jZSRkBUB`y@+&pPN
zIn)Q^-!F*c^@V!By(jT7Uf*wiR@%4AeJAVp*Jc?{CTt(9TW8R3bQ~khU)u%il??A|
zU;j*})H}mCrROcrhmQN7c^O9{Y!}3tCG(jZ>iv1&lM3tpb-r$15EuRU@oqUVsAD|7
z|GHv4f83{PyrMoj&S>3lx1=}dcY?&z=efB#h`U<u{}Sux&u4vX!*MU`d)vH&IMajs
z^<}x9+TSh3`$^~R`#;~a;k?&(%XNr{@pRtNaYW}Ctq1GfEBRAV^4WxV5NCc{7$@4^
zA7@w5?)P_OuZ*`oZ0GB}{et_nTCStsAM1<PDY!3W-L#*EUFEvOWt_!$!Mx|bO&=X+
z$@*%2p7ee>So+cYR=f|R?St!5%zMGS`{cSbg!A^>HS7}fyG*X5&ik3-_4DUFzc*N)
zwrq2-Ub_eNJ$0X*6ZRWi=edFX`QkXD<BZn*e%ED#c1tvlJ8uT-?bgAq16RP^2kt&_
z_knv3Tpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-
zdBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb3{
z9&mZU<pGxmTpn<Fz~up#2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#
z2V5R-dBEiXmj_%PaCyMx0hb3{9&mZU<pGxmTpn<Fz~up#2V5R-dBEj?(d2=`P+{Tu
zJDw2SE@pePW2bjY!Av-<kS$E<tmu5abC**KX6NBCOB)NFUS~z1A3Inz#`99C)SS0a
zeAelqqcd7s7Y99UF8)M~=WQ+;i%D-Tx^hd;>#p*=&aFJ}Q@{yZd)~V7p7#*ObkG)_
zH>TS2dQo2r{hQGB!vD@~Jnxlpp7$i`9|7(R-2rHO8uZI6&==a>R_l4kVNy@xdcOvJ
z19X?7pOX=1JLn$;!sNl-M>O~4N+5~rK^sx)MEa4aXM4L3>S$BFwq-j^!`Bn3-xYL_
z=#BuAJ`DOGC=Q~1CTJe04YUgg!}l_vDIiSA>jF&x?Eq>AO$T*=*row=C}?NUL{KLP
zOW-X6&BFjm&tkB(xbGemoz`iiUi}o+PwVCIZ&*0nX7>e?zZP5rm5C&-9oI_h>Z5f<
z49)LD>x$G<F4t~X*tm`p1-Twt=NeMp3q+Z|6G6DOLOt!YO$5>Y0T6YR={FHXJN4|F
zeGzHnwb>gq4KxeH`=AlT>$DSSKTtb}?ddxegj>{G2%?^L_DenOlR)f`GW%!0ycakY
z&Yx{6KzMAtIuLz$fAOBG2QemNO$O}&;&`?L(U<LZ2XXAw)BZjX^|XHk#QrFA9PGCV
z^dS((z&3a+{rPXs*d17i%|%?R%|!_Po-9aPn{CbYTI(1Tsi$m{>Y{D5zA4ClnJ4%>
zGCxpOR2}O|wmmA-Ux~JIq(1CZb1g()R^%L_g}yArsBF(!?WIut*pD4UWkuTUqIx3N
zjD@IrUm<n%Q5?yS?X(Y~lIrcYYTFoU+fshCab0M$i~4R%N4vIFwA+-E)i*jH?az*{
zvbM47Dr-MlAIVp3S-oix^aN49XzA>+uVVld=Kqt$=O!F3_R-D)v&Z(dllDZL4*-7(
zpY3@V`45y&OCLO|mMc1!X2jh(GCsDkRWhyc9**{@zVuU>sIvOgUXIj<Xt&!~Y}>AK
zso2z&ifzYYY+f7HEtC#<wp1N`wCFYd8T|CxeHb#YCD-?Lk=ZBp)ZGAj6&-NR*)NIr
z01I;*&oL~NS$Ov1`7~No$1|1QHrqnx`H9`3TR)dh0`e@?4C48nXLIJzeL;-Dvlz<(
z=<ovIC8t(+wdjEOJ9Nx@h_X$yFdx=;N1N|~DEDLFC!pOt1bzu^?gvdk`v%x|hwV1l
zeggR(v|R~(HT<3hv8@)pUoTY!kk8H-wBh>x7|7BnNck1?`%cUAR>R3MjB733iO}tj
zSf?TOLm-y(tk@s2<iI+5enxTH;k{+4d>!jb>^W*5eY6cd?S3}2yf<p2o&QLFBiYKW
zBKLxw@GS5dm<4%jJbc^YWr_O*iSMsdaG%k34*ZC!`xOF`sJm2ro`C<6c<Euk^kcu&
z(Z|4-b@oRcV?~K$A=&y_?=RNbhqh&3jQe735X;YD=#z0d1|1*ALL1u=IWN0^=0Vo4
z6MY+ewg*wi&l(uh_Xi##de-Zt9dl?ih@T5EH!y!r5}mfa4|TRV2Uu_Q;r9#u#&9j>
zxiO!S?A#Y<zvUdvrv=wwHw*#;eh@FJ%@ZoT#*1-n4#&Fv5udHx?|DOCALCVC3BMi3
zcsrfzdEbC;EnaSC-CN<!LsEVoNqgmX72dLn3UAvh(9ioTyjkz7@UFWV`s;Cy_?PEB
zeV6CmemZDrg*UVc@uXnc{G!MTmSqD{9g&6hcUtVWDq}gqva#6qIKxJx6rz2<+jWlB
zj-kU;9qYDS&dz>DGjF>u$}BdjeN!<yk4<e~*H`5|c%$c?^o0w&2aX@(<FaSRggWY#
zUf;bUv_JjRpv{Xb#(K++_PoPdafZPCPd(cYeY3*vk4UQR!ZQFpV`!XB9qWJ6Kez&$
zha=7U=fcf_NRJ+gv9TTa_il~Uv7PvJ+WC1L?bOjunQbW3rh59Stn*=A+ft|g+$%O0
zaSS?dj>(p3<8#EhG2c-pQqOr%Po4I`w(6_%Q#*aM9otgoeXHxlwXtQ|=*M{MQ{z#u
z<DgFaU|aQNU)r9i#C9as=}WZRQKzJ_)z6+6=cazzm)42er|N8f+E`Z|>l~BTi8?=B
z6Y8`b`(&Lqq7v<PU+Pag+o->;!RA06YZN&?T>~AL`skdfS3g~YXd7*7qupNb18uKJ
zf3?|dRd4I;>uu{x`RaYBeV0Ox!PaX(wvO_q>a{(ZZnnZPXi+&jF1xO>jduH}o^=*m
zuX121+HAk5P21S0K8m#Q8fdY7DYNJ`3TgpYoBFHGuB)u5K8jI2>nyfjWkrrf$79R1
zYrPa|dqw)#_h_^Y{b;A2?>Z#??oMUaZ5!)0@v|G-M}4X1_kwoaZbv=s^xK?8*NVuz
zMdW(u+Nhm2T`S7=+N!^{vFqx`w%Shp8elvW^aK*!b4WN3z-IyKosef^(>f0FEZ`-;
zPomK};8#G8!S9pk@GZ!lAinc1194DmL65<AG4M^`3gE54oq_wJuLpn^f;MMtgb&x5
zIUd6c_d?1fT`#rKkEGW?{n!_Ev}wQEmg~(tPCb!&Eo`rac9NE;je5JS`YD!ctM-xF
zs*hr+*lPQCP}i#za(uR)^ReqHm(#1g94<nCBwizx`Mny?X<8p!LVlm6Q$Iz1$F4HZ
z-zqCI*QiXiW9yk$?PD!p&XIb4Zmsqx?r7Oc#pdVQY-1Pu9<lY5xo%oU@>3rh^_q;e
zl@WQQK5d(h!`5k6)Nu{%LVxPjU!OJGXLHIrpHgG6Z7M6;{?tYFs#CqzqwVavEo(dK
zbnR@N`j*ns7L8ZVPy6}5i6f20wyCUWkFk`FcIs^#WnDkowM6x{kG5B|Z7SRC)vr|F
zQ5$`zv-R4i+DajH>SwpN`%#~$j(S(z3jF`N0{T1<xyKRttPuHqvD#_V@0uyoNBya{
z{j|OIML#9Au|8nYzHny!A<?~z!=>73I|OywC@a!_Xv9z3YhUzNQXA{|&!2;ihiJ>T
zJ!-T4qBgr7<tWjAf6y!tf6K_<L(T>54cZOFLj6<_&u{#`e=>-B{}d3<J|6|m1~r2Y
z0@2RjM#ez=t!F)m=RdA7`)CI7H=L6|9FLyc*(XWIsOJmTZJYWivVE1bQ=Rr}x3z7Q
z&&T`G1)y6&cY{s>eHC;)h`&px{wmOepxZ&$fF1^24Z0Wf1<)5k=YY-y(Y_vZHt5Ts
zi$V8;9s{ut`dtFL0i=cF(_+iiX`NU~)^?>({i8PRi^ywl*Ec59-(JIw`IoXUMjxaH
z1+~60r2je$U}JtG+0n&&veaSLyMyfCLNiD5oe5vJ`!>E`(TBeLtcEgus<Gz$eLmmK
zcs=>~4Sx$w`(B_)py+R*X&Wc@d(e=-F?bc7?u3A+BVbo0enW#qunf-uZ4bbiNMHU|
zi86iWJ&YWKcKj{<V)S<b+T<}P+Re83ZXZd7{hok+hk@D;zyGFvBHA$boQnDjAda2i
z`x9Be2y~X{E`ZGW(x2`5_YxB!(@q=xmqY$D7J)g9dg>+uN%w=O?*~yIgCBnzaKLU@
z!^!xq5%jlXVTi0xfIN((Mcu3LSphx$r$W9K-?y_p?X=OqrPA~Iuy${Wo;v!Fm}jVe
zcx>RS<06uHomDP}9DCHp>rP#tWxH8qu2a_1Yg-)`k@O^dR4#`cd(>7Xx+O9v+W&^+
z<0EYR4OX-bLcQa{+_t&O_i_C;SK)KIxyrx0=C}>Ka>O~6vr*6cm&j*hBtP0%qCShE
zD@XJ%XQQ4ukjNZ3k{@j>Q6Jp;;rTTx(?6=S>y=_xv~5wjRA1D+7mHqh-m@x?2I<eW
z9*u7~A8c*@p3Ki4RUQq}f8R}P6YaO$nEtoxH}yJ|o6n~9v#I{&;%+WI$FFlxc{Hfw
z9gS}}AMNA4j9eRg{-}m^+orOj-KLaGzf!)~g8X+nBHy9;ey0S{&o}nFHS+`erw{w%
znVH8I?j71rk-qGgek^Q9o&R0|%z+LB@f^whn4cXe(RV=fY(qWkD)V!QAy7LA$K|k{
zwx=)qr61$79d)ZfjA@rDyAqK(FD*Kck^I${sCnPkMP<&rT;aU5FqgB8<R5qhd<uYb
zju!jg;N0vweYMWHMrB*4wo*vHa=sdqsKh>4h}5xYzbY%LkJ`0A)zeRlZR1*SJryZy
zo&Ks<S=Y`+^&ys$*;b48O+Q@=MIAfqEUH)8*4sL^qfPy7nSNSqKb~*v;mF^u+vOK*
zjIspuC9&O$1>4)#2LGl2MZT{-2znYmTcXW#(yj(|?Sp=Hc?cE0mo5hK&uiX58+|_{
z?j`j_uy2bxb$oZ_@8~H{6XfR%i$Kk&vmb4@D|9N`{ZdZD)(?6H{>wpEfoz{WB=+y&
zbBoxHMLYdW;5(wb5cbPK+oQe~#Lq$4&mGWdA9lZ#Z-8wn=rPdWu<%cB?Rod?S)}Xm
zK?&<`!gp(YK0*1+J%Yc<qTC|>{G5ZIKdyWqV&lQTOxm^KX3{?FekuP9{%7I?a(;gM
z8peAIV%P;Pe)$dNaj4kthF^ooQw84-JR5rUu{(UZHY|67;=c5Db`5;ko^w9|ZLXF!
z>iM(?Q=r$_{qP$Qzr})UfsY_A`}h`Uy67K*E{}StQh*QJ^EsOjpR=Tm`c4%AKil^9
z5*z>C?O>5R1o?NCry~yg7zVu#Vz~^&zo#ozPC*;Cr<v~3By`ofZHvluR-KKuKJ3sE
z)E6It`S&hAqqht1XBJ{PnRd40?;F&wlrR14!ZxH9&}0zzQ|^gMhl$K*h5Ie_yf5+E
z>&x7ms7ru)LENKx8BPLGp8;tftlPxz8}KJA-ZCJ^FfC#~LFz{UdA?v<-uwLSi1M|9
z`vaMy*$2OiI2S}a$NXK9S-0tnsIz?os1X$1i>YH#q|DD%wXXiGtFD~Pwsuj!YABU#
zJ9R9IlzES6-Nuc{Y-<<#leP!(+L7#1Cly;yS?8;IqAky|+U+HB6zI?E#JT8mMpUAn
zMeQovdRxbKyw}v9s51SuXe|1&u+E}!wH|1TcC9Pgexs4u&i1qG)GJZ1CF);FZ~K&!
z+0M3UKZ;x@T@Tg$JIM3#NW~rtWxGC7yV2N2>Wgz!(zryOJCSR_qJAnXs*l>WJ?qif
zw#_@y`kOuY@^AK7_&0m}8z%nE9t-768@{jkOCZ;+3BD(}sSbaeHz)X;z1l3ESte7I
zx@W*{^u_RRq1Xq@#78{ucL$C9_ktQ*|6Y)>OZ~l|#*Y5IAY*Ur?*%Ww7s>wLDh9*!
z`4cB&uWs2UoHu2iH)YP7_^T<wyxGW}H|N%TQP3g%l&GO?9p=5$n{T}pcx=cQW9{j$
zrJStxkx=8>eU$Ro>rI>ex9R0%?c-S{NvrTXGA(SYq`Hk^xp=DQcXFdC8duwCo7=Yt
z&bV5)(O#o+GS9~>;{(`$^Ls>=Qftn465ccAO1XHpzFb>dUn;h3FD2V+R8B6{E~+cH
z=21VpUM`-kxBbh><zn+0EA@Ocud!%eqpZ(2WxZCTdA{jy7k$1vkzntIp&Rr%h$-<a
z(7gg=`8(uQAmXD^zXSLh=qRZ_1w2ILm*B^DT>6uGKoubVX8$M9*8};^_$*{ThD_m+
z$1|+&3L72;zwmse=b&vx$M+Zd@Rqjw*%S3>pHTUQXIg%D%h>yXCV}iiJ4+0tacUvA
zQi;O8S@IzIun&^9OIk9-4N5a=NaOw<S5XF`4)@J+KL3uMIiVae_C%2Vw<_g)--(|6
zk5qPtapFi0|Avk4D%BvKF<4k<QPedJv~TCyt`a8pqeb6=v~FXxZ;in|RHpw(vTdiI
z-B#NX?PBYrvYzL)P1OEQ>x^%=wd1JX)<yl*rySaCqduG3m+fClwsnkEE;jwN=$Oj+
zs-6BSj|NBTFWOfrUt1UTA5HvnKBfAyV@2E9{-tDF7j0|zOFu2PANL3D6{U#&sxKwm
zI`vbu`=y^2wG%mxccYA^-_iKIld+Aa-_iKE*rQ*8(X1z*g?FQjCiZB2-pSZTGpBbe
z_GtPYjn6xo_h|YZjnBIkdo<VOox~nZzje~_ha&-Ykoq2$p1!(H%*QOW%d2kqZxmBc
z-$@iolyT7Q1!@x;&$xEcx*~0qwQl3aWV_#JKdkF~)KAyU)>Ef?McQpy{fR1TpNdgi
z)KAa8w*K9U&HdIcYo%hx=DVG~-zl=4u6d^=)2@1m8x($erDt>+er;D!PfO63bU~qe
z3i5ZPuE^g|tGpX*>d)`W9ur^oZx{NE!;1t%*mKi*RHtoKZ=>C&l&o!uw%yh{`G5Zk
zEdCD-aZ~%*RR90bF#luSaQr$4l}Cd*fYJDt^Wp!N<n8}Al8W&EMlx$O|2LBO>nB}2
z=b9hl*{$jc@U<%f+^R3Ye_a>g*fRorZgPOfoD<-VhXuIcv;aSHQ-D98D*7)4c<5aL
zrau#4)06=B_(XvFtq<_fe1OSs2bh=^;A;|Zmt%waT`d7N9T4F1y92BczxbI!eebIR
zy#Am7-?%0~F5kf6g(+q$v(1Ts?cTnYZ{UCPvg;Z2+1Ko6>P(}VVP={pGt102A2s`%
z1I&TuAak&ZnHCc_ttMgGOuJcN7Mew7u{p*JnKkC)=2)}V9B)oACz?;1&zMupspd3u
zx;ev~X+CR)&F9Qn=4^A0InP{VzHBZwmzYb<SIlMRa&v|Gs=3l!Wv({Ym}||~%ys5^
zbA!3j++@CPZZ@}=Z<t%nZRVTic5{dMmie~1)7)jgW9~Nhn0w8A=6>^l`L6k%`M!D3
zJY*g=kC;czW9A3uar1=vp?T8$$o$y+#QfAeWu7+An4g)Sn`g~)<`?Fd=2zy|<~QcI
z=6B}z=6UmidC~mA{L#E*UN)}|j>nBMxCI&iCF8+u$hIZJzf<WA;vc8<26rKABAZ1v
zo9v@xN0RlEEhJk+wwP>yYzf&>Wc>F$2bYoYmqddr$c`afNybl%2ZzXxC;J2$KLH+G
zN5<ch41SL6EHeJ2W|03L=-}tczDRZ{89#>}yo~H}vMb2GN_Hg~f4ww#HQ6;}*OGmW
z>^idR$!;LKk?bb2uan(O#$PuLeuL~*vfIe;j~IG`x0BsL_ARn+lif*n7a4y5HOOBE
z4c<d`FWG%$_me$9_Fb~?k$s=+K{EbEXYgUNN5~!}dyMP{WRH_QLB<Ds@JX^Ck^PwL
zCuIDQ&EQjHPm}TYTZ2C%`#IUOWY3ZPg6x-MzaslJ*>A{xOZGdm-;+I0_5#_9WPc$0
zBN;!vAAFhYPh_u<{TXZkkKy2J5K?PS#s5%ggs1WG@M+w`>}e*ODQ2qK%j|9TG1E=G
zX)w*E!z9fd(`mX)w@I0_$(Xq&YjP%UdQ7kBGxN-RbBH<A9A*wTN0=i`zZoz~%u&Xe
zrDo6^ZI+qkW`$X4K4w;#)#f<!33HNJXFg><ZB90yH|LsyIp2K2e9>HB)|(5>m&~8c
zE9TGUHS-tqSMxXXck>VPU*>i5PxIgA4fCe?m-!#_)}Y6UR*;P$8%tJ6Rz+4#Rzp@x
zHjZpdvaQJQ559VX{3+bv4rCu7+mUQ1GX5TC@PlL@BKt7eN62<2n@F}R*(9>v$aW{&
zgKST-$z)T=rjqSNwl~>6Wc!j$BioOxj%+$vJy`=;BiRhHnPmHu9YA&<*+FCnlQomY
z$XdwaWUXWgvNp1IvJSE&*&MP?vM#c2vJ_dGEJHSzEK8On%aiqx^^*0G%_Eynb_m&_
zWQUO*PId&@0<xuKgJegOeT-~1*&4EslO0R8mh3pP6Ua^^JBjR5WS=Jc4B5$Kr;wdW
zb{g3kWM`6nmTZ{pT(a}X3S{S#eSz!(vh`#al6{HnBC;=&T}*Zf*yx{~3VC+SwEhjB
zw}BH2!_{@urZ-NTUcfFmT-m>NV0FQaAFf`w?3hJIuP$KsqrUp%{ma(CGRRlJ;o9X#
zty%y(Jm2=eL&K&yH~~&)v^<S3i}?i?i@qr7OSFEyuU~}mhiWYPV$C0|vFJ;izMSh1
z<@n<{mYo9dzoetcFW^}8<(@xgWzm;8egVZY(dWUJd=w{1onNA{@W*v52L<3AEI3Cn
zEtnCUE0`7JkKkB(0`UKslj1z7&lfyG@JPXa!G(g01bGN#83@4t0Wn2>QOaTh@P7<S
zaZu_<3$746R#0DlkCXcGf}apPK@h)-@yjO#PZC@w_$k3p3w}oMWWiGePZc~(@N~g5
z1kV)wtl+TV=LF9ZJX`P_!OshxD|nt@LGXORF9`C-ge(^X;H?+DQ1DBF7YTk@@M6JB
z1TPi*ir{5}mkVAY_*KCx1+NmkTJRddYX!e1c%9(&f;R}>D0q|L*9C7DyhZREg0~9Z
zCiqRk+Xe3s{FdOi1@9EROYl2_cMIMlc(34ng7*tPAoyLu?+Jci@Dah^3qCLS2f;rI
zz9jgv;GYElTkw4qf$)Ao?v^Y&2y#zi;kLxWor#5;6ASkz7H&~2+@)B!QL%8ZV&VS8
z!Yzu0yA%sIDi-clEZnJBxKFWgM`Gco#KPT(g?kapB0(d_t%!xY5eqjYmSuvg1V16j
zjfjPN5z8k9xh=7*6XXuX!cB^W`xFbeDi-cmEZneIxM#7PF39bRg*zAvH!&9OV=SK&
zJWKFw!E*#ZFUT#Ag}WXLH$E2beJtGmSiUI8osfl_Aq)3I7H)|w+!a~4F|u%PWa0M6
za;e}~1TPc3T<{9PuL@o%c$MJQg4YOMEBH0R>jbYCyg~3r!J7oXE_k!xErQ<=yjAcv
z!EXxQE_jFFw*<c}c&Ff9g5MFmTksyidj;<kykGDE!S4!wPw@MK4+=gc_^=?~p4Z@D
zFxWf>H|rK;F|fBE2XJba<G|YL;F6F_FX89OW5rY{SS6@UYNWoU;8ueCtqqI*MrOC5
z>i;_#6!(z&o`RDFrwC3J+)Hq8!F>ex6`UrxpJ1KfbisPT2Ej(b8G<tfn*?VG&KBHX
z@BqPP!I)r+U|g_OFd^6`*e=*1m=x?3>=Nu2ObO-%dj$^>JXG*7!NUa?2rd&`E_jUK
zO2Ll_t`b}=I3&16@Z*9H3O*$Gu;8PDj|u)j@NvN>1b-;_q~MPPe=PVD!Ji5~CHS=9
zGlD-8{JG$>g3k&5LhzS@zY_el;BN$fEBHIX7X)7vd`0ljg0Bj`CioY@zY6|M@b7~E
z5d1H}*9HG6_=ez{g8vfyAHlZ-^?|IA2Xl<zSiwrcD#2>O8o^q@af0Iow-DS?a4W&B
z1-B90R&YDP4+!okxRc-n!4C?4Nbtjg9}(PHa2LUeg1ZV%65LI2cfmab_Y|BgI7M)(
z;9i1z3+^Mhui!Mn{RHa-rwi5#HV8Hf&Jdg_*d#bhaJJw_1zQCZf^C9bf+@j_;9SA1
zU`{YE*e5to@DRbn1P>QHLU4iLk%Ik#3k4So4hSw0JW6o6;0nQG1Xl`vOmMZ}kl-4@
zj|(0vxK{8u!Q%xt+^%-e?P|Do;hLq(hL)~aT^O!fy=>{Cfx>x(&T*aNga3>7nt?T|
ovCj=xE*Ls)Wnp;S;{KujC9C?E4?te9d|;@b3cMk&Svc+g0Fni84gdfE

diff --git a/python/cudf/cudf/tests/data/vocab.txt b/python/cudf/cudf/tests/data/text/vocab.txt
similarity index 100%
rename from python/cudf/cudf/tests/data/vocab.txt
rename to python/cudf/cudf/tests/data/text/vocab.txt
diff --git a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
index a8c5ae3b6a3..6fb32ae36b9 100644
--- a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
+++ b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 import numpy as np
 import pytest
 
@@ -6,14 +6,10 @@
 from cudf.testing import assert_eq
 
 
-@pytest.fixture(params=[False, True], ids=["without_nulls", "with_nulls"])
-def with_nulls(request):
-    return request.param
-
-
-@pytest.mark.parametrize("nrows", [30, 300, 300_000])
-@pytest.mark.parametrize("nkeys", [1, 2, 4])
-def test_groupby_maintain_order_random(nrows, nkeys, with_nulls):
+@pytest.mark.parametrize("with_nulls", [False, True])
+def test_groupby_maintain_order_random(with_nulls):
+    nrows = 20
+    nkeys = 3
     rng = np.random.default_rng(seed=0)
     key_names = [f"key{key}" for key in range(nkeys)]
     key_values = [rng.integers(100, size=nrows) for _ in key_names]
diff --git a/python/cudf/cudf/tests/input_output/test_parquet.py b/python/cudf/cudf/tests/input_output/test_parquet.py
index a377bcac285..13acf6825b4 100644
--- a/python/cudf/cudf/tests/input_output/test_parquet.py
+++ b/python/cudf/cudf/tests/input_output/test_parquet.py
@@ -11,7 +11,7 @@
 from cudf.testing import assert_eq
 
 
-def test_parquet_long_list(tmpdir):
+def test_parquet_long_list(tmp_path):
     # This test generates int and string list columns, where each has a row that is very large.
     # When generated by the cudf writer these long rows are contained on a single page,
     # but when generated by pyarrow they span several pages.
@@ -44,7 +44,7 @@ def test_parquet_long_list(tmpdir):
     )
 
     # Write the table to a parquet file using pyarrow
-    file_name = tmpdir.join("long_row_list_test.pq")
+    file_name = tmp_path / "long_row_list_test.pq"
     # https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html
     pq.write_table(
         generated_table,
diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py
index 72662ff70f5..400777e46e1 100644
--- a/python/cudf/cudf/tests/series/test_datetimelike.py
+++ b/python/cudf/cudf/tests/series/test_datetimelike.py
@@ -25,9 +25,11 @@ def _get_all_zones():
     return zones
 
 
-# NOTE: ALL_TIME_ZONES is a very large list; we likely do NOT want to
+# NOTE: _get_all_zones is a very large list; we likely do NOT want to
 # use it for more than a handful of tests
-ALL_TIME_ZONES = _get_all_zones()
+@pytest.fixture(params=_get_all_zones())
+def zone_name(request):
+    return request.param
 
 
 @pytest.fixture(params=["ns", "us", "ms", "s"])
@@ -42,7 +44,6 @@ def tz(request):
     return request.param
 
 
-@pytest.mark.parametrize("zone_name", ALL_TIME_ZONES)
 def test_tz_localize(unit, zone_name):
     s = cudf.Series(date_range("2001-01-01", "2001-01-02", freq="1s"))
     s = s.astype(f"<M8[{unit}]")
@@ -52,7 +53,6 @@ def test_tz_localize(unit, zone_name):
     assert str(s.dtype.tz) == zone_name
 
 
-@pytest.mark.parametrize("zone_name", ALL_TIME_ZONES)
 def test_localize_ambiguous(request, unit, zone_name):
     request.applymarker(
         pytest.mark.xfail(
@@ -78,7 +78,6 @@ def test_localize_ambiguous(request, unit, zone_name):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("zone_name", ALL_TIME_ZONES)
 def test_localize_nonexistent(request, unit, zone_name):
     request.applymarker(
         pytest.mark.xfail(
diff --git a/python/cudf/cudf/tests/text/test_subword_tokenizer.py b/python/cudf/cudf/tests/text/test_subword_tokenizer.py
index a65977e8902..a7fabec3ba8 100644
--- a/python/cudf/cudf/tests/text/test_subword_tokenizer.py
+++ b/python/cudf/cudf/tests/text/test_subword_tokenizer.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
-import os
 
 import numpy as np
 import pytest
@@ -11,7 +10,7 @@
 
 @pytest.fixture(scope="module")
 def datadir(datadir):
-    return os.path.join(datadir, "")
+    return datadir / "text"
 
 
 @pytest.mark.parametrize("max_words", [0, 200, 10])
@@ -27,7 +26,7 @@ def test_text_wordpiece_tokenize(max_words, datadir):
             "but ,  meanwhile ,  nobody cares .   Some of the deepest and most earnest minds vote the question ,  in general ,  a  ' sham and a snare ,  '  and whisper to each other",
         ]
     )
-    vocab_file = os.path.join(datadir, "vocab.txt")
+    vocab_file = datadir / "vocab.txt"
     vc = cudf.read_text(vocab_file, delimiter="\n", strip_delimiters=True)
     wpt = WordPieceVocabulary(vc)
     wpr = wpt.tokenize(s, max_words)

From c6ebcf8eac837de4e71d0d792faf841c098788c5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Aug 2025 11:24:21 -1000
Subject: [PATCH 073/366] Always use strict zipping (#19584)

Closes https://github.com/rapidsai/cudf/issues/15835. Tightens up a few checks and fixes some tests in the process.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Gil Forsyth (https://github.com/gforsyth)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19584
---
 docs/cudf/source/user_guide/10min.ipynb       |  2 +-
 pyproject.toml                                |  9 +++
 python/cudf/cudf/core/abc.py                  |  8 ++-
 python/cudf/cudf/core/accessors/struct.py     |  2 +-
 python/cudf/cudf/core/column/column.py        |  7 +-
 python/cudf/cudf/core/column/datetime.py      |  2 +-
 python/cudf/cudf/core/column/struct.py        | 10 ++-
 python/cudf/cudf/core/column_accessor.py      | 16 +++--
 python/cudf/cudf/core/dataframe.py            | 71 ++++++++++++-------
 python/cudf/cudf/core/dtypes.py               |  2 +-
 python/cudf/cudf/core/frame.py                | 35 ++++++---
 python/cudf/cudf/core/groupby/groupby.py      | 38 +++++++---
 python/cudf/cudf/core/index.py                |  2 +-
 python/cudf/cudf/core/indexed_frame.py        | 21 ++++--
 python/cudf/cudf/core/join/join.py            | 14 ++--
 python/cudf/cudf/core/multiindex.py           | 36 +++++++---
 python/cudf/cudf/core/reshape.py              | 15 ++--
 python/cudf/cudf/core/series.py               |  2 +
 python/cudf/cudf/core/udf/groupby_typing.py   |  7 +-
 python/cudf/cudf/io/json.py                   |  2 +-
 python/cudf/cudf/io/orc.py                    |  6 +-
 python/cudf/cudf/io/parquet.py                | 23 ++++--
 python/cudf/cudf/options.py                   |  2 +-
 .../cudf/pandas/scripts/conftest-patch.py     |  1 -
 python/cudf/cudf/testing/_utils.py            |  4 +-
 .../dataframe/methods/test_convert_dtypes.py  |  4 +-
 .../groupby/test_ordering_pandas_compat.py    |  4 +-
 .../indexes/multiindex/methods/test_copy.py   | 14 ++--
 .../indexes/multiindex/test_attributes.py     |  8 ++-
 .../indexes/multiindex/test_constructors.py   |  2 +-
 python/cudf/cudf/tests/test_array_ufunc.py    | 10 +--
 python/cudf/cudf/tests/test_categorical.py    | 12 +---
 .../cudf/cudf/tests/test_column_accessor.py   |  6 +-
 python/cudf/cudf/tests/test_csv.py            |  2 +-
 python/cudf/cudf/tests/test_cuda_apply.py     | 20 +++---
 python/cudf/cudf/tests/test_dataframe.py      | 14 ++--
 python/cudf/cudf/tests/test_duplicates.py     |  4 +-
 python/cudf/cudf/tests/test_groupby.py        |  9 +--
 python/cudf/cudf/tests/test_index.py          |  6 +-
 python/cudf/cudf/tests/test_join_order.py     | 28 +++++---
 python/cudf/cudf/tests/test_json.py           |  9 ++-
 python/cudf/cudf/tests/test_list.py           |  2 +-
 python/cudf/cudf/tests/test_monotonic.py      |  2 +-
 python/cudf/cudf/tests/test_multiindex.py     |  4 +-
 python/cudf/cudf/tests/test_onehot.py         |  6 +-
 python/cudf/cudf/tests/test_parquet.py        |  2 +-
 python/cudf/cudf/tests/test_spilling.py       |  2 +-
 .../tests/testing/test_assert_frame_equal.py  |  2 +-
 python/cudf/cudf/utils/hash_vocab_utils.py    |  4 +-
 python/cudf/cudf/utils/ioutils.py             | 13 ++--
 .../cudf_pandas_tests/test_cudf_pandas.py     |  8 +--
 .../cudf/cudf_pandas_tests/test_profiler.py   |  2 +-
 .../tests/test_catboost.py                    |  2 +-
 .../tests/test_cuml.py                        |  2 +-
 .../tests/test_matplotlib.py                  |  2 +-
 .../tests/test_seaborn.py                     |  2 +-
 .../tests/test_xgboost.py                     |  4 +-
 python/cudf/pyproject.toml                    |  4 +-
 python/dask_cudf/dask_cudf/backends.py        |  1 +
 python/dask_cudf/dask_cudf/io/orc.py          |  4 +-
 .../dask_cudf/tests/test_accessor.py          | 10 +--
 python/dask_cudf/dask_cudf/tests/test_core.py |  2 +-
 python/pylibcudf/pylibcudf/io/types.pyx       |  7 +-
 python/pylibcudf/tests/common/utils.py        | 14 ++--
 python/pylibcudf/tests/conftest.py            |  5 +-
 python/pylibcudf/tests/io/test_avro.py        |  3 +-
 python/pylibcudf/tests/test_binaryops.py      |  2 +-
 python/pylibcudf/tests/test_copying.py        | 34 ++++++---
 python/pylibcudf/tests/test_lists.py          |  4 +-
 .../tests/test_nvtext_generate_ngrams.py      |  2 +-
 python/pylibcudf/tests/test_nvtext_jaccard.py |  2 +-
 python/pylibcudf/tests/test_nvtext_minhash.py | 10 ++-
 python/pylibcudf/tests/test_quantiles.py      |  2 +-
 python/pylibcudf/tests/test_reshape.py        |  2 +-
 python/pylibcudf/tests/test_string_find.py    |  6 +-
 python/pylibcudf/tests/test_string_replace.py |  2 +-
 .../pylibcudf/tests/test_string_replace_re.py |  2 +-
 python/pylibcudf/tests/test_string_slice.py   |  1 +
 78 files changed, 423 insertions(+), 244 deletions(-)

diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index f9762a5ff0f..87782cd7fb5 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -1982,7 +1982,7 @@
    ],
    "source": [
     "arrays = [[\"a\", \"a\", \"b\", \"b\"], [1, 2, 3, 4]]\n",
-    "tuples = list(zip(*arrays))\n",
+    "tuples = list(zip(*arrays, strict=True))\n",
     "idx = cudf.MultiIndex.from_tuples(tuples)\n",
     "idx"
    ]
diff --git a/pyproject.toml b/pyproject.toml
index 59246db6fb0..291fd063aeb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,6 +47,8 @@ select = [
     "W",
     # isort
     "I",
+    # zip-without-explicit-strict
+    "B905",
     # no-blank-line-before-function
     "D201",
     # one-blank-line-after-class
@@ -134,6 +136,10 @@ fixable = ["ALL"]
 exclude = [
     "cpp/scripts/gdb-pretty-printers.py",
 ]
+extend-unsafe-fixes = [
+    # zip-without-explicit-strict's autofix sets strict to False, but we want to enforce True
+    "B905",
+]
 
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
 "numpy.can_cast".msg = "Use find_common_dtype from cudf.utils.dtypes instead"
@@ -144,6 +150,9 @@ exclude = [
 [tool.ruff.lint.per-file-ignores]
 # We use "== None" to demonstrate null handling in this notebook
 "docs/cudf/source/user_guide/missing-data.ipynb" = ["E711"]
+# We demonstrate UDFs where numba doesn't support `zip(..., strict=True)` (or
+# any keywords) and we don't want to litter demo notebooks with noqas
+"docs/cudf/source/user_guide/guide-to-udfs.ipynb" = ["B905"]
 # Lots of pytest implicitly injected attributes in conftest-patch.py
 "python/cudf/cudf/pandas/scripts/conftest-patch.py" = ["F821"]
 "python/cudf/cudf/pandas/scripts/*" = ["D"]
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index c8ea03b04fe..2a7250709a4 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 """Common abstract base classes for cudf."""
 
 import numpy
@@ -157,7 +157,7 @@ def host_serialize(self):
         header["writeable"] = len(frames) * (None,)
         frames = [
             f.memoryview() if c else memoryview(f)
-            for c, f in zip(header["is-cuda"], frames)
+            for c, f in zip(header["is-cuda"], frames, strict=True)
         ]
         return header, frames
 
@@ -183,7 +183,9 @@ def host_deserialize(cls, header, frames):
         """
         frames = [
             cudf.core.buffer.as_buffer(f) if c else f
-            for c, f in zip(header["is-cuda"], map(memoryview, frames))
+            for c, f in zip(
+                header["is-cuda"], map(memoryview, frames), strict=True
+            )
         ]
         obj = cls.device_deserialize(header, frames)
         return obj
diff --git a/python/cudf/cudf/core/accessors/struct.py b/python/cudf/cudf/core/accessors/struct.py
index aad74a4ffe4..3f81ca3ca2b 100644
--- a/python/cudf/cudf/core/accessors/struct.py
+++ b/python/cudf/cudf/core/accessors/struct.py
@@ -109,7 +109,7 @@ def explode(self) -> DataFrame:
         data = {
             name: col.copy(deep=True)
             for name, col in zip(
-                self._column.dtype.fields, self._column.children
+                self._column.dtype.fields, self._column.children, strict=True
             )
         }
         rangeindex = len(data) == 0
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index f3d8a7798be..a2c21071bbe 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -459,7 +459,7 @@ def children(self) -> tuple[ColumnBase, ...]:
                 )
                 self._children = tuple(  # type: ignore[assignment]
                     child._with_type_metadata(dtype)
-                    for child, dtype in zip(children, dtypes)
+                    for child, dtype in zip(children, dtypes, strict=True)
                 )
         return self._children  # type: ignore[return-value]
 
@@ -2062,7 +2062,8 @@ def serialize(self) -> tuple[dict, list]:
             frames.extend(mask_frames)
         if self.children:
             child_headers, child_frames = zip(
-                *(c.device_serialize() for c in self.children)
+                *(c.device_serialize() for c in self.children),
+                strict=True,
             )
             header["subheaders"] = list(child_headers)
             frames.extend(chain(*child_frames))
@@ -3386,7 +3387,7 @@ def serialize_columns(columns: list[ColumnBase]) -> tuple[list[dict], list]:
         header_columns: list[tuple[dict, list]] = [
             c.device_serialize() for c in columns
         ]
-        headers, column_frames = zip(*header_columns)
+        headers, column_frames = zip(*header_columns, strict=True)
         for f in column_frames:
             frames.extend(f)
 
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 3fcebe118d5..08a0c20506e 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -440,7 +440,7 @@ def isocalendar(self) -> dict[str, ColumnBase]:
         return {
             field: self.strftime(format=directive).astype(np.dtype(np.uint32))
             for field, directive in zip(
-                ["year", "week", "day"], ["%G", "%V", "%u"]
+                ["year", "week", "day"], ["%G", "%V", "%u"], strict=True
             )
         }
 
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 79fd5e51f4f..d0a9f0389f6 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -111,7 +111,10 @@ def to_arrow(self) -> pa.Array:
             else self.dtype
         )
         pa_type = pa.struct(
-            {field: child.type for field, child in zip(dtype.fields, children)}
+            {
+                field: child.type
+                for field, child in zip(dtype.fields, children, strict=True)
+            }
         )
 
         if self.mask is not None:
@@ -190,7 +193,10 @@ def _rename_fields(self, names) -> Self:
         but with the field names equal to `names`.
         """
         dtype = StructDtype(
-            {name: col.dtype for name, col in zip(names, self.children)}
+            {
+                name: col.dtype
+                for name, col in zip(names, self.children, strict=True)
+            }
         )
         return StructColumn(  # type: ignore[return-value]
             data=None,
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 08c501d4747..c776dbc6b20 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -253,7 +253,9 @@ def _grouped_data(self) -> MutableMapping:
         return the underlying mapping as a nested mapping.
         """
         if self.multiindex:
-            return _NestedGetItemDict.from_zip(zip(self.names, self.columns))
+            return _NestedGetItemDict.from_zip(
+                zip(self.names, self.columns, strict=True)
+            )
         else:
             return self._data
 
@@ -383,7 +385,7 @@ def insert(self, name: Hashable, value: ColumnBase, loc: int = -1) -> None:
         else:
             new_keys = self.names[:loc] + (name,) + self.names[loc:]
             new_values = self.columns[:loc] + (value,) + self.columns[loc:]
-            self._data = dict(zip(new_keys, new_values))
+            self._data = dict(zip(new_keys, new_values, strict=True))
         self._clear_cache(old_ncols, old_ncols + 1)
         # The type(name) may no longer match the prior label_dtype
 
@@ -458,7 +460,9 @@ def get_labels_by_index(
                     "Cannot use Series object for mask iloc indexing"
                 )
             # TODO: Doesn't handle on-device columns
-            return tuple(n for n, keep in zip(self.names, index) if keep)
+            return tuple(
+                n for n, keep in zip(self.names, index, strict=True) if keep
+            )
         else:
             if len(set(index)) != len(index):  # type: ignore[arg-type]
                 raise NotImplementedError(
@@ -569,7 +573,9 @@ def _select_by_label_list_like(self, key: tuple) -> Self:
                 )
             data = dict(
                 item
-                for item, keep in zip(self._grouped_data.items(), key)
+                for item, keep in zip(
+                    self._grouped_data.items(), key, strict=True
+                )
                 if keep
             )
         else:
@@ -743,7 +749,7 @@ def rename_column(x):
             if not all(isinstance(label, old_type) for label in new_col_names):
                 label_dtype = None
 
-        data = dict(zip(new_col_names, self.values()))
+        data = dict(zip(new_col_names, self.values(), strict=True))
         return type(self)(
             data=data,
             level_names=self.level_names,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index fd709d4616f..ff0fe0e0564 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -55,6 +55,7 @@
 from cudf.core.column import (
     CategoricalColumn,
     ColumnBase,
+    StringColumn,
     StructColumn,
     as_column,
     column_empty,
@@ -145,12 +146,17 @@ def _recursively_update_struct_names(
 ) -> ColumnBase:
     """Update a Column with struct names from pylibcudf.io.TableWithMetadata.child_names"""
     if col.children:
-        children = list(col.children)
-        for i, (child, names) in enumerate(
-            zip(children, child_names.values())
-        ):
-            children[i] = _recursively_update_struct_names(child, names)
-        col.set_base_children(tuple(children))
+        if not child_names:
+            assert isinstance(col, StringColumn), (
+                "Only string columns can have unnamed children"
+            )
+        else:
+            children = list(col.children)
+            for i, (child, names) in enumerate(
+                zip(children, child_names.values(), strict=True)
+            ):
+                children[i] = _recursively_update_struct_names(child, names)
+            col.set_base_children(tuple(children))
 
     if isinstance(col.dtype, StructDtype):
         col = col._rename_fields(child_names.keys())  # type: ignore[attr-defined]
@@ -717,7 +723,9 @@ def _array_to_column_accessor(
     return ColumnAccessor(
         {
             column_label: as_column(data[:, i], nan_as_null=nan_as_null)
-            for column_label, i in zip(columns_labels, range(data.shape[1]))
+            for column_label, i in zip(
+                columns_labels, range(data.shape[1]), strict=True
+            )
         },
         verify=False,
         rangeindex=isinstance(columns_labels, pd.RangeIndex),
@@ -764,7 +772,7 @@ def _mapping_to_column_accessor(
         )
         data = data.copy()
         for key, aligned_series in zip(
-            values_as_series.keys(), aligned_input_series
+            values_as_series.keys(), aligned_input_series, strict=True
         ):
             if index is not None:
                 aligned_series = aligned_series.reindex(index=index)
@@ -1902,6 +1910,7 @@ def _concat(
                         zip(
                             indices[:first_data_column_position],
                             cols[:first_data_column_position],
+                            strict=True,
                         )
                     )
                 )
@@ -1911,6 +1920,7 @@ def _concat(
                         zip(
                             indices[first_data_column_position:],
                             cols[first_data_column_position:],
+                            strict=True,
                         )
                     ),
                     index=table_index,
@@ -1984,11 +1994,14 @@ def _concat(
 
         # Reassign the categories for any categorical index cols
         if not isinstance(out.index, cudf.RangeIndex):
-            _reassign_categories(
-                categories,
-                out.index._data,
-                indices[:first_data_column_position],
-            )
+            # If the index column was constructed and not generated via concatenation,
+            # then reassigning categories is neither needed nor a valid operation.
+            if first_data_column_position > 0:
+                _reassign_categories(
+                    categories,
+                    out.index._data,
+                    indices[:first_data_column_position],
+                )
             if not isinstance(out.index, MultiIndex) and isinstance(
                 out.index.dtype, CategoricalDtype
             ):
@@ -2225,7 +2238,7 @@ def _fill_same_ca_attributes(
                     "whose columns & index are same respectively, "
                     "please reindex."
                 )
-            rhs = dict(zip(other_pd_index, other.values_host))
+            rhs = dict(zip(other_pd_index, other.values_host, strict=True))
             # For keys in right but not left, perform binops between NaN (not
             # NULL!) and the right value (result is NaN).
             left_default = as_column(np.nan, length=len(self))
@@ -2853,7 +2866,7 @@ def columns(self, columns):
             )
 
         self._data = ColumnAccessor(
-            data=dict(zip(pd_columns, self._columns)),
+            data=dict(zip(pd_columns, self._columns, strict=True)),
             multiindex=multiindex,
             level_names=level_names,
             label_dtype=label_dtype,
@@ -2875,7 +2888,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None:
                 f"got {len(self)} elements"
             )
         self._data = ColumnAccessor(
-            data=dict(zip(other.names, self._columns)),
+            data=dict(zip(other.names, self._columns, strict=True)),
             multiindex=other.multiindex,
             rangeindex=other.rangeindex,
             level_names=other.level_names,
@@ -3284,7 +3297,7 @@ def where(
 
         out = []
         for (name, col), other_col in zip(
-            self._column_labels_and_values, other_cols
+            self._column_labels_and_values, other_cols, strict=True
         ):
             if cond_col := cond._data.get(name):
                 out.append(col.where(cond_col, other_col, inplace))
@@ -5895,7 +5908,7 @@ def to_arrow(self, preserve_index: bool | None = None) -> pa.Table:
                     )
                 data = data.copy(deep=False)
                 for gen_name, col_name in zip(
-                    index_descr, index._column_names
+                    index_descr, index._column_names, strict=True
                 ):
                     data._insert(
                         data.shape[1],
@@ -7632,6 +7645,7 @@ def unnamed_group_generator():
                         else [
                             stacked[i] for i in unnamed_level_values.argsort()
                         ],
+                        strict=True,
                     )
                 ),
                 isinstance(unnamed_level_values, pd.MultiIndex),
@@ -8272,7 +8286,7 @@ def eval(self, expr: str, inplace: bool = False, **kwargs):
             exprs.append(e.strip())
 
         ret = self if inplace else self.copy(deep=False)
-        for name, expr in zip(targets, exprs):
+        for name, expr in zip(targets, exprs, strict=True):
             ret._data[name] = self._compute_column(expr)
         if not inplace:
             return ret
@@ -8450,10 +8464,11 @@ def from_pylibcudf(
             ColumnBase.from_pylibcudf(plc_col, data_ptr_exposed=True)
             for plc_col in plc_columns
         )
+        # We only have child names if the source is a pylibcudf.io.TableWithMetadata.
         if child_names is not None:
             cudf_cols = (
-                _recursively_update_struct_names(col, child_names)
-                for col, child_names in zip(
+                _recursively_update_struct_names(col, cn)
+                for col, cn in zip(
                     cudf_cols, child_names.values(), strict=True
                 )
             )
@@ -8792,7 +8807,7 @@ def _setitem_with_dataframe(
     ):
         replace_df = replace_df.reindex(input_df.index)
 
-    for col_1, col_2 in zip(input_cols, replace_df._column_names):
+    for col_1, col_2 in zip(input_cols, replace_df._column_names, strict=True):
         if col_1 in input_df._column_names:
             if mask is not None:
                 input_df._data[col_1][mask] = as_column(replace_df[col_2])
@@ -8848,11 +8863,11 @@ def _index_from_listlike_of_series(
 # Create a dictionary of the common, non-null columns
 def _get_non_null_cols_and_dtypes(col_idxs, list_of_columns):
     # A mapping of {idx: np.dtype}
-    dtypes = dict()
+    dtypes = {}
     # A mapping of {idx: [...columns]}, where `[...columns]`
     # is a list of columns with at least one valid value for each
     # column name across all input frames
-    non_null_columns = dict()
+    non_null_columns = {}
     for idx in col_idxs:
         for cols in list_of_columns:
             # Skip columns not in this frame
@@ -8877,8 +8892,10 @@ def _find_common_dtypes_and_categories(
 ) -> dict[Any, ColumnBase]:
     # A mapping of {idx: categories}, where `categories` is a
     # column of all the unique categorical values from each
-    # categorical column across all input frames
-    categories = dict()
+    # categorical column across all input frames. This function
+    # also modifies the input dtypes dictionary in place to capture
+    # the common dtype across columns being concatenated.
+    categories = {}
     for idx, cols in non_null_columns.items():
         # default to the first non-null dtype
         dtypes[idx] = cols[0].dtype
@@ -8927,7 +8944,7 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
 
 
 def _reassign_categories(categories, cols, col_idxs):
-    for name, idx in zip(cols, col_idxs):
+    for name, idx in zip(cols, col_idxs, strict=True):
         if idx in categories:
             codes = as_unsigned_codes(len(categories[idx]), cols[name])
             cols[name] = CategoricalColumn(
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 60cc419e3b6..4b67544ce5a 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -718,7 +718,7 @@ def _recursively_replace_fields(self, result: dict) -> dict:
         """
         new_result = {}
         for (new_field, field_dtype), result_value in zip(
-            self.fields.items(), result.values()
+            self.fields.items(), result.values(), strict=True
         ):
             if isinstance(field_dtype, StructDtype) and isinstance(
                 result_value, dict
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c458dec5a67..0c627f3ea84 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -93,7 +93,7 @@ def _columns(self) -> tuple[ColumnBase, ...]:
     def _column_labels_and_values(
         self,
     ) -> Iterable[tuple[Hashable, ColumnBase]]:
-        return zip(self._column_names, self._columns)
+        return zip(self._column_names, self._columns, strict=True)
 
     @property
     def _dtypes(self) -> Generator[tuple[Hashable, Dtype], None, None]:
@@ -129,7 +129,8 @@ def serialize(self):
                     if isinstance(cname, np.generic)
                     else (cname, "")
                     for cname in self._column_names
-                ]
+                ],
+                strict=True,
             )
             if self._column_names
             else ((), ())
@@ -174,11 +175,13 @@ def deserialize(cls, header, frames):
         column_names = [
             getattr(np, cntype)(cname) if cntype != "" else cname
             for cname, cntype in zip(
-                header["column_names"], header["column_names_numpy_type"]
+                header["column_names"],
+                header["column_names_numpy_type"],
+                strict=True,
             )
         ]
         col_accessor = ColumnAccessor(
-            data=dict(zip(column_names, columns)), **kwargs
+            data=dict(zip(column_names, columns, strict=True)), **kwargs
         )
         return cls._from_data(col_accessor)
 
@@ -219,7 +222,7 @@ def _from_columns_like_self(
         """
         if column_names is None:
             column_names = self._column_names
-        data = dict(zip(column_names, columns))
+        data = dict(zip(column_names, columns, strict=True))
         frame = self.__class__._from_data(data)
         return frame._copy_type_metadata(self)
 
@@ -1077,7 +1080,9 @@ def from_arrow(cls, data: pa.Table) -> Self:
             }
 
             for name, plc_codes in zip(
-                dict_indices_table.column_names, plc_indices.columns()
+                dict_indices_table.column_names,
+                plc_indices.columns(),
+                strict=True,
             ):
                 codes = ColumnBase.from_pylibcudf(plc_codes)
                 categories = cudf_dictionaries_columns[name]
@@ -1097,7 +1102,9 @@ def from_arrow(cls, data: pa.Table) -> Self:
         cudf_non_category_frame = {
             name: ColumnBase.from_pylibcudf(plc_col)
             for name, plc_col in zip(
-                data.column_names, plc.Table.from_arrow(data).columns()
+                data.column_names,
+                plc.Table.from_arrow(data).columns(),
+                strict=True,
             )
         }
 
@@ -1199,7 +1206,9 @@ def _copy_type_metadata(self: Self, other: Self) -> Self:
         See `ColumnBase._with_type_metadata` for more information.
         """
         for (name, self_col), (_, other_col) in zip(
-            self._column_labels_and_values, other._column_labels_and_values
+            self._column_labels_and_values,
+            other._column_labels_and_values,
+            strict=True,
         ):
             self._data.set_by_label(
                 name,
@@ -1471,19 +1480,23 @@ def searchsorted(
         # https://github.com/pandas-dev/pandas/issues/54668
         common_dtype_list = [
             find_common_type([col.dtype, val.dtype])
-            for col, val in zip(self._columns, values)
+            for col, val in zip(self._columns, values, strict=True)
         ]
         sources = [
             col
             if is_dtype_equal(col.dtype, common_dtype)
             else col.astype(common_dtype)
-            for col, common_dtype in zip(self._columns, common_dtype_list)
+            for col, common_dtype in zip(
+                self._columns, common_dtype_list, strict=True
+            )
         ]
         values = [
             val
             if is_dtype_equal(val.dtype, common_dtype)
             else val.astype(common_dtype)
-            for val, common_dtype in zip(values, common_dtype_list)
+            for val, common_dtype in zip(
+                values, common_dtype_list, strict=True
+            )
         ]
 
         outcol = ColumnBase.from_pylibcudf(
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 36809a35066..db79e26782f 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -574,7 +574,11 @@ def groups(self):
             )
 
         return dict(
-            zip(group_names.to_pandas(), grouped_index._split(offsets[1:-1]))
+            zip(
+                group_names.to_pandas(),
+                grouped_index._split(offsets[1:-1]),
+                strict=True,
+            )
         )
 
     @cached_property
@@ -608,7 +612,11 @@ def indices(self) -> dict[ScalarLike, cupy.ndarray]:
         else:
             index = Index._from_column(group_keys[0])
         return dict(
-            zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1]))
+            zip(
+                index.to_pandas(),
+                cp.split(indices.values, offsets[1:-1]),
+                strict=True,
+            )
         )
 
     @_performance_tracking
@@ -807,7 +815,9 @@ def _aggregate(
         requests = []
         result_columns: list[list[ColumnBase]] = []
 
-        for i, (col, aggs) in enumerate(zip(values, aggregations)):
+        for i, (col, aggs) in enumerate(
+            zip(values, aggregations, strict=True)
+        ):
             valid_aggregations = get_valid_aggregation(col.dtype)
             included_aggregations_i = []
             col_aggregations = []
@@ -845,7 +855,7 @@ def _aggregate(
             else self._groupby.plc_groupby.aggregate(requests)
         )
 
-        for i, result, val in zip(column_included, results, values):
+        for i, result in zip(column_included, results, strict=True):
             result_columns[i] = [
                 ColumnBase.from_pylibcudf(col) for col in result.columns()
             ]
@@ -866,7 +876,7 @@ def _shift(
                 pa_scalar_to_plc_scalar(
                     pa.scalar(val, type=cudf_dtype_to_pa_type(col.dtype))
                 )
-                for val, col in zip(fill_values, values)
+                for val, col in zip(fill_values, values, strict=True)
             ],
         )
         return (ColumnBase.from_pylibcudf(col) for col in shifts.columns())
@@ -1012,8 +1022,9 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
             included_aggregations,
             result_columns,
             orig_dtypes,
+            strict=True,
         ):
-            for agg_tuple, col in zip(aggs, cols):
+            for agg_tuple, col in zip(aggs, cols, strict=True):
                 agg, agg_kind = agg_tuple
                 agg_name = agg.__name__ if callable(agg) else agg
                 if multilevel:
@@ -1078,11 +1089,11 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
                 right_cols = result_index._columns
                 join_keys = [
                     _match_join_keys(lcol, rcol, "inner")
-                    for lcol, rcol in zip(left_cols, right_cols)
+                    for lcol, rcol in zip(left_cols, right_cols, strict=True)
                 ]
                 # TODO: In future, see if we can centralize
                 # logic else where that has similar patterns.
-                join_keys = map(list, zip(*join_keys))
+                join_keys = map(list, zip(*join_keys, strict=True))
                 # By construction, left and right keys are related by
                 # a permutation, so we can use an inner join.
                 with acquire_spill_lock():
@@ -1563,7 +1574,9 @@ def sample(
                 # Empirically shuffling with cupy is faster at this scale
                 rs = cp.random.get_random_state()
                 rs.seed(seed=random_state)
-                for off, size in zip(group_offsets, size_per_group):
+                for off, size in zip(
+                    group_offsets[:-1], size_per_group, strict=True
+                ):
                     rs.shuffle(indices[off : off + size])
             else:
                 keys = cp.random.default_rng(seed=random_state).random(
@@ -1707,7 +1720,8 @@ def _raise_invalid_type(x):
                     if isinstance(x, tuple)
                     else _raise_invalid_type(x)
                     for x in kwargs.values()
-                )
+                ),
+                strict=True,
             )
         else:
             raise TypeError("Must provide at least one aggregation function.")
@@ -2570,7 +2584,7 @@ def interleave_columns(source_columns):
         res = DataFrame._from_data(
             {
                 x: interleave_columns([gb_cov_corr._data[y] for y in ys])
-                for ys, x in zip(cols_split, column_names)
+                for ys, x in zip(cols_split, column_names, strict=True)
             }
         )
 
@@ -2737,6 +2751,7 @@ def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
                 zip(
                     values._column_names,
                     self._replace_nulls(values._columns, method),
+                    strict=True,
                 )
             )
         )
@@ -2906,6 +2921,7 @@ def shift(
                 zip(
                     values._column_names,
                     self._shift(values._columns, periods, fill_value),
+                    strict=True,
                 )
             )
         )
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 8d46e435604..cc795fad2b1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2645,7 +2645,7 @@ def _data(self) -> ColumnAccessor:
     def _column_labels_and_values(
         self,
     ) -> Iterable[tuple[Hashable, ColumnBase]]:
-        return zip(self._column_names, self._columns)
+        return zip(self._column_names, self._columns, strict=True)
 
     @_performance_tracking
     def _as_int_index(self) -> Index:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index d5200ed8b14..f7ef0f0db12 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -372,7 +372,7 @@ def _from_columns_like_self(
             else:
                 index.name = index_names[0]
 
-        data = dict(zip(column_names, data_columns))
+        data = dict(zip(column_names, data_columns, strict=True))
         frame = type(self)._from_data(data, index)
         return frame._copy_type_metadata(self)
 
@@ -976,7 +976,7 @@ def clip(self, lower=None, upper=None, axis=1, inplace=False):
 
         data = (
             col.clip(low, high)
-            for col, low, high in zip(self._columns, lower, upper)
+            for col, low, high in zip(self._columns, lower, upper, strict=True)
         )
         output = self._from_data_like_self(
             self._data._from_columns_like_self(data)
@@ -2697,7 +2697,7 @@ def sort_index(
                 )
             else:
                 ca = ColumnAccessor(
-                    dict(zip(labels, result_columns)),
+                    dict(zip(labels, result_columns, strict=True)),
                     rangeindex=self._data.rangeindex,
                     multiindex=self._data.multiindex,
                     level_names=self._data.level_names,
@@ -3743,6 +3743,7 @@ def _reindex(
                 for left_dtype, right_dtype in zip(
                     (dtype for _, dtype in df.index._dtypes),
                     (dtype for _, dtype in index._dtypes),
+                    strict=True,
                 )
             )
 
@@ -5320,12 +5321,18 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             if i == column_index
             else new_column._with_type_metadata(old_column.dtype)
             for i, (new_column, old_column) in enumerate(
-                zip(exploded, itertools.chain(idx_cols, self._columns))
+                zip(
+                    exploded,
+                    itertools.chain(idx_cols, self._columns),
+                    strict=True,
+                )
             )
         ]
 
         data = type(self._data)(
-            dict(zip(self._column_names, exploded[len(idx_cols) :])),
+            dict(
+                zip(self._column_names, exploded[len(idx_cols) :], strict=True)
+            ),
             multiindex=self._data.multiindex,
             level_names=self._data.level_names,
             rangeindex=self._data.rangeindex,
@@ -6411,7 +6418,9 @@ def rank(
         if dropped_cols:
             result = type(source)._from_data(
                 ColumnAccessor(
-                    dict(zip(source._column_names, result_columns)),
+                    dict(
+                        zip(source._column_names, result_columns, strict=True)
+                    ),
                     multiindex=source._data.multiindex,
                     level_names=source._data.level_names,
                     label_dtype=source._data.label_dtype,
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 43fbe73170e..43c73171c0c 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -233,7 +233,9 @@ def __init__(
             if on
             else {
                 lkey.name
-                for lkey, rkey in zip(self._left_keys, self._right_keys)
+                for lkey, rkey in zip(
+                    self._left_keys, self._right_keys, strict=True
+                )
                 if lkey.name == rkey.name
                 and not (
                     isinstance(lkey, _IndexIndexer)
@@ -276,7 +278,7 @@ def _gather_maps(self, left_cols, right_cols):
             as_column(range(n), dtype=SIZE_TYPE_DTYPE).take(
                 map_, nullify=null, check_bounds=False
             )
-            for map_, n, null in zip(maps, lengths, nullify)
+            for map_, n, null in zip(maps, lengths, nullify, strict=True)
         ]
         if self.how == "right":
             # If how is right, right map is primary sort key.
@@ -296,7 +298,9 @@ def perform_merge(self) -> DataFrame:
         left_join_cols = []
         right_join_cols = []
 
-        for left_key, right_key in zip(self._left_keys, self._right_keys):
+        for left_key, right_key in zip(
+            self._left_keys, self._right_keys, strict=True
+        ):
             lcol = left_key.get(self.lhs)
             rcol = right_key.get(self.rhs)
             lcol_casted, rcol_casted = _match_join_keys(lcol, rcol, self.how)
@@ -405,7 +409,9 @@ def _merge_results(self, left_result: DataFrame, right_result: DataFrame):
         # combined by filling nulls in the left key column with corresponding
         # values from the right key column:
         if self.how == "outer":
-            for lkey, rkey in zip(self._left_keys, self._right_keys):
+            for lkey, rkey in zip(
+                self._left_keys, self._right_keys, strict=True
+            ):
                 if lkey.name == rkey.name:
                     # fill nulls in lhs from values in the rhs
                     lkey.set(
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index f3bdfce4321..e811ecea30c 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -200,7 +200,9 @@ def __init__(
             new_codes.append(new_code)
 
         source_data: dict[Hashable, column.ColumnBase] = {}
-        for i, (code, level) in enumerate(zip(new_codes, new_levels)):
+        for i, (code, level) in enumerate(
+            zip(new_codes, new_levels, strict=True)
+        ):
             if len(code):
                 lo, hi = code.minmax()
                 if lo < -1 or hi > len(level) - 1:
@@ -263,7 +265,7 @@ def names(self, value):
             # definitely buggy, but we can't disallow non-unique
             # names either...
             self._data = type(self._data)(
-                dict(zip(value, self._columns)),
+                dict(zip(value, self._columns, strict=True)),
                 level_names=self._data.level_names,
                 verify=False,
             )
@@ -549,7 +551,7 @@ def __repr__(self) -> str:
             preprocess = self
 
         arrays = []
-        for name, col in zip(self.names, preprocess._columns):
+        for name, col in zip(self.names, preprocess._columns, strict=True):
             try:
                 pd_idx = col.to_pandas(nullable=True)
             except NotImplementedError:
@@ -637,7 +639,7 @@ def levels(self) -> list[cudf.Index]:
         self._maybe_materialize_codes_and_levels()
         return [
             idx.rename(name)  # type: ignore[misc]
-            for idx, name in zip(self._levels, self.names)  # type: ignore[arg-type]
+            for idx, name in zip(self._levels, self.names, strict=True)  # type: ignore[arg-type]
         ]
 
     @property  # type: ignore
@@ -966,7 +968,7 @@ def __eq__(self, other):
                 [
                     self_col.equals(other_col)
                     for self_col, other_col in zip(
-                        self._columns, other._columns
+                        self._columns, other._columns, strict=True
                     )
                 ]
             )
@@ -1116,7 +1118,13 @@ def to_frame(
         if len(column_names) != len(set(column_names)):
             raise ValueError("Duplicate column names are not allowed")
         ca = ColumnAccessor(
-            dict(zip(column_names, (col.copy() for col in self._columns))),
+            dict(
+                zip(
+                    column_names,
+                    (col.copy() for col in self._columns),
+                    strict=True,
+                )
+            ),
             verify=False,
         )
         return cudf.DataFrame._from_data(
@@ -1972,9 +1980,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
 
         join_keys = [
             _match_join_keys(lcol, rcol, "inner")
-            for lcol, rcol in zip(target._columns, self._columns)
+            for lcol, rcol in zip(target._columns, self._columns, strict=True)
         ]
-        join_keys = map(list, zip(*join_keys))
+        join_keys = map(list, zip(*join_keys, strict=True))
         with acquire_spill_lock():
             plc_tables = [
                 plc.Table([col.to_pylibcudf(mode="read") for col in cols])
@@ -2081,7 +2089,9 @@ def _maybe_match_names(self, other) -> list[Hashable]:
             return [None] * self.nlevels
         return [
             self_name if _is_same_name(self_name, other_name) else None
-            for self_name, other_name in zip(self.names, other.names)
+            for self_name, other_name in zip(
+                self.names, other.names, strict=True
+            )
         ]
 
     @_performance_tracking
@@ -2177,7 +2187,9 @@ def _split_columns_by_levels(
             lv if isinstance(lv, int) else level_names.index(lv)
             for lv in levels
         }
-        for i, (name, col) in enumerate(zip(self.names, self._columns)):
+        for i, (name, col) in enumerate(
+            zip(self.names, self._columns, strict=True)
+        ):
             if in_levels and i in level_indices:
                 name = f"level_{i}" if name is None else name
                 yield name, col
@@ -2218,7 +2230,9 @@ def _columns_for_reset_index(
     ) -> Generator[tuple[Any, column.ColumnBase], None, None]:
         """Return the columns and column names for .reset_index"""
         if levels is None:
-            for i, (col, name) in enumerate(zip(self._columns, self.names)):
+            for i, (col, name) in enumerate(
+                zip(self._columns, self.names, strict=True)
+            ):
                 yield f"level_{i}" if name is None else name, col
         else:
             yield from self._split_columns_by_levels(levels, in_levels=True)
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 02fac2c0abc..74337fd5284 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -463,7 +463,7 @@ def concat(
                     "label types in cuDF at this time. You must convert "
                     "the labels to the same type."
                 )
-            for k, o in zip(keys_objs, objs):
+            for k, o in zip(keys_objs, objs, strict=True):
                 for name, col in o._column_labels_and_values:
                     # if only series, then only keep keys_objs as column labels
                     # if the existing column is multiindex, prepend it
@@ -834,14 +834,14 @@ def get_dummies(
         elif isinstance(prefix, dict):
             prefix_map = prefix
         else:
-            prefix_map = dict(zip(columns, prefix))
+            prefix_map = dict(zip(columns, prefix, strict=True))
 
         if isinstance(prefix_sep, str):
             prefix_sep_map = {}
         elif isinstance(prefix_sep, dict):
             prefix_sep_map = prefix_sep
         else:
-            prefix_sep_map = dict(zip(columns, prefix_sep))
+            prefix_sep_map = dict(zip(columns, prefix_sep, strict=True))
 
         # If we have no columns to encode, we need to drop
         # fallback columns(if any)
@@ -1030,6 +1030,7 @@ def as_tuple(x):
                         target_col.split_by_offsets(
                             list(range(nrows, new_size, nrows))
                         ),
+                        strict=True,
                     )
                 )
             )
@@ -1335,7 +1336,9 @@ def _one_hot_encode_column(
         x if x is not None else "<NA>"
         for x in categories.to_arrow().to_pylist()
     )
-    data = dict(zip(result_labels, column.one_hot_encode(categories)))
+    data = dict(
+        zip(result_labels, column.one_hot_encode(categories), strict=True)
+    )
 
     if drop_first and len(data):
         data.pop(next(iter(data)))
@@ -1463,8 +1466,8 @@ def crosstab(
         raise ValueError("colnames must be unique")
 
     data = {
-        **dict(zip(rownames, map(as_column, index))),
-        **dict(zip(colnames, map(as_column, columns))),
+        **dict(zip(rownames, map(as_column, index), strict=True)),
+        **dict(zip(colnames, map(as_column, columns), strict=True)),
     }
 
     df = cudf.DataFrame._from_data(data)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index ff4fe8866bd..01cf0905723 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -99,6 +99,7 @@ def _describe_numeric(obj, percentiles):
             zip(
                 _format_percentile_names(percentiles),
                 obj.quantile(percentiles).to_numpy(na_value=np.nan).tolist(),
+                strict=True,
             )
         ),
         "max": obj.max(),
@@ -120,6 +121,7 @@ def _describe_timetype(obj, percentiles, typ):
                 .astype(CUDF_STRING_DTYPE)
                 .to_numpy(na_value=np.nan)
                 .tolist(),
+                strict=True,
             )
         ),
         "max": str(typ(obj.max())),
diff --git a/python/cudf/cudf/core/udf/groupby_typing.py b/python/cudf/cudf/core/udf/groupby_typing.py
index dffd7db2f71..eaa1d4c76b0 100644
--- a/python/cudf/cudf/core/udf/groupby_typing.py
+++ b/python/cudf/cudf/core/udf/groupby_typing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 from typing import Any
@@ -204,7 +204,8 @@ def generic(self, args, kws):
         if funcs := call_cuda_functions.get(self.key.__name__):
             for sig in funcs.keys():
                 if all(
-                    arg.group_scalar_type == ty for arg, ty in zip(args, sig)
+                    arg.group_scalar_type == ty
+                    for arg, ty in zip(args, sig, strict=True)
                 ):
                     return nb_signature(sig[0], *args)
         raise UDFError(self.make_error_string(args))
@@ -242,7 +243,7 @@ def generic(self, args, kws):
                 retty, selfty, *argtys = sig
                 if self.this.group_scalar_type == selfty and all(
                     arg.group_scalar_type == ty
-                    for arg, ty in zip(args, argtys)
+                    for arg, ty in zip(args, argtys, strict=True)
                 ):
                     return nb_signature(retty, *args, recvr=self.this)
         raise UDFError(self.make_error_string(args))
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 28a6bc53ba3..fc22b603dd3 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -313,7 +313,7 @@ def _dtype_to_names_list(col: ColumnBase) -> list[tuple[Hashable, Any]]:
     if isinstance(col.dtype, StructDtype):
         return [
             (name, _dtype_to_names_list(child))
-            for name, child in zip(col.dtype.fields, col.children)
+            for name, child in zip(col.dtype.fields, col.children, strict=True)
         ]
     elif isinstance(col.dtype, ListDtype):
         return [("", _dtype_to_names_list(child)) for child in col.children]
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index 488e76304e1..cbab80a9ee7 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -75,7 +75,7 @@ def read_orc_statistics(
         file_statistics = {
             column_name: column_stats
             for column_name, column_stats in zip(
-                column_names, parsed_file_statistics
+                column_names, parsed_file_statistics, strict=True
             )
             if columns is None or column_name in columns
         }
@@ -86,7 +86,7 @@ def read_orc_statistics(
             stripe_statistics = {
                 column_name: column_stats
                 for column_name, column_stats in zip(
-                    column_names, parsed_stripe_statistics
+                    column_names, parsed_stripe_statistics, strict=True
                 )
                 if columns is None or column_name in columns
             }
@@ -688,7 +688,7 @@ def _set_col_children_metadata(
 ) -> None:
     if isinstance(col.dtype, StructDtype):
         for i, (child_col, name) in enumerate(
-            zip(col.children, list(col.dtype.fields))
+            zip(col.children, list(col.dtype.fields), strict=True)
         ):
             col_meta.child(i).set_name(name)
             _set_col_children_metadata(
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 5e14065c08f..7bcb80ffbdc 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -709,7 +709,9 @@ def _process_dataset(
             raise ValueError(
                 "Cannot specify a row_group selection for a directory path."
             )
-        row_groups_map = {path: rgs for path, rgs in zip(paths, row_groups)}
+        row_groups_map = {
+            path: rgs for path, rgs in zip(paths, row_groups, strict=True)
+        }
 
     # Apply filters and discover partition columns
     partition_keys = []
@@ -1091,7 +1093,9 @@ def _parquet_to_frame(
     # unique set of partition keys. Therefore, we start by
     # aggregating all paths with matching keys using a dict
     plan = {}
-    for i, (keys, path) in enumerate(zip(partition_keys, paths_or_buffers)):
+    for i, (keys, path) in enumerate(
+        zip(partition_keys, paths_or_buffers, strict=True)
+    ):
         rgs = row_groups[i] if row_groups else None
         tkeys = tuple(keys)
         if tkeys in plan:
@@ -1237,7 +1241,9 @@ def _read_parquet(
 
             data = {
                 name: ColumnBase.from_pylibcudf(col)
-                for name, col in zip(column_names, concatenated_columns)
+                for name, col in zip(
+                    column_names, concatenated_columns, strict=True
+                )
             }
             df = DataFrame._from_data(data)
             ioutils._add_df_col_struct_names(df, child_names)
@@ -1515,7 +1521,7 @@ def _get_partitioned(
         subdir = fs.sep.join(
             [
                 _hive_dirname(name, val)
-                for name, val in zip(partition_cols, keys)
+                for name, val in zip(partition_cols, keys, strict=True)
             ]
         )
         prefix = fs.sep.join([root_path, subdir])
@@ -1951,7 +1957,9 @@ def write_table(self, df):
             subdir = fs.sep.join(
                 [
                     f"{name}={val}"
-                    for name, val in zip(self.partition_cols, keys)
+                    for name, val in zip(
+                        self.partition_cols, keys, strict=True
+                    )
                 ]
             )
             prefix = fs.sep.join([self.path, subdir])
@@ -2029,6 +2037,7 @@ def write_table(self, df):
             paths,
             partition_info,
             metadata_file_paths,
+            strict=True,
         ):
             if path in self.path_cw_map:  # path is a currently open file
                 cw_idx = self.path_cw_map[path]
@@ -2049,7 +2058,7 @@ def write_table(self, df):
 
         if new_cw_paths:
             # Create new cw for unhandled paths encountered in this write_table
-            new_paths, part_info, meta_paths = zip(*new_cw_paths)
+            new_paths, part_info, meta_paths = zip(*new_cw_paths, strict=True)
             self._chunked_writers.append(
                 (
                     ParquetWriter(new_paths, **self.common_args),
@@ -2167,7 +2176,7 @@ def _set_col_metadata(
 
     if isinstance(col.dtype, StructDtype):
         for i, (child_col, name) in enumerate(
-            zip(col.children, list(col.dtype.fields))
+            zip(col.children, list(col.dtype.fields), strict=True)
         ):
             col_meta.child(i).set_name(name)
             _set_col_metadata(
diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py
index f3d66e4ba67..a728b8582eb 100644
--- a/python/cudf/cudf/options.py
+++ b/python/cudf/cudf/options.py
@@ -387,7 +387,7 @@ def __init__(self, *args) -> None:
                 "[(pat, val), ...])."
             )
 
-        self.ops = tuple(zip(args[::2], args[1::2]))
+        self.ops = tuple(zip(args[::2], args[1::2], strict=True))
 
     def __enter__(self) -> None:
         self.undo = tuple((pat, get_option(pat)) for pat, _ in self.ops)
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 4dc5b3d42ab..c48e4058c02 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -5282,7 +5282,6 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_rename.py::TestRename::test_rename_axis_style_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_inplace",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_mapper_and_positional_arguments_raises",
-    "tests/frame/methods/test_rename.py::TestRename::test_rename_multiindex",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_no_mappings_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_nocopy",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NA_with_None",
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 41401ab4bde..4c662808b9c 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -295,7 +295,9 @@ def get_ptr(x) -> int:
     assert lhs.offset == rhs.offset
     assert lhs.size == rhs.size
     assert len(lhs.base_children) == len(rhs.base_children)
-    for lhs_child, rhs_child in zip(lhs.base_children, rhs.base_children):
+    for lhs_child, rhs_child in zip(
+        lhs.base_children, rhs.base_children, strict=True
+    ):
         assert_column_memory_eq(lhs_child, rhs_child)
     if isinstance(lhs, cudf.core.column.CategoricalColumn) and isinstance(
         rhs, cudf.core.column.CategoricalColumn
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_convert_dtypes.py b/python/cudf/cudf/tests/dataframe/methods/test_convert_dtypes.py
index d1de7245634..ea007eafdf7 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_convert_dtypes.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_convert_dtypes.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 import pandas as pd
 import pytest
 
@@ -33,7 +33,7 @@ def test_convert_dtypes():
     df = pd.DataFrame(
         {
             k: pd.Series(v, dtype=d)
-            for k, v, d in zip(data.keys(), data.values(), dtypes)
+            for k, v, d in zip(data.keys(), data.values(), dtypes, strict=True)
         }
     )
     gdf = cudf.DataFrame.from_pandas(df)
diff --git a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
index 6fb32ae36b9..64bba6f4404 100644
--- a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
+++ b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
@@ -14,7 +14,9 @@ def test_groupby_maintain_order_random(with_nulls):
     key_names = [f"key{key}" for key in range(nkeys)]
     key_values = [rng.integers(100, size=nrows) for _ in key_names]
     value = rng.integers(-100, 100, size=nrows)
-    df = cudf.DataFrame(dict(zip(key_names, key_values), value=value))
+    df = cudf.DataFrame(
+        dict(zip(key_names, key_values, strict=True), value=value)
+    )
     if with_nulls:
         for key in key_names:
             df.loc[df[key] == 1, key] = None
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py
index 1bae3b8292c..2253ad085c5 100644
--- a/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_copy.py
@@ -95,7 +95,7 @@ def test_multiindex_copy_deep(data, copy_on_write, deep):
                 child.base_data.get_ptr(mode="read") for child in rchildren
             ]
 
-            assert all((x == y) for x, y in zip(lptrs, rptrs))
+            assert all((x == y) for x, y in zip(lptrs, rptrs, strict=True))
 
         elif isinstance(data, pd.MultiIndex):
             data = cudf.MultiIndex.from_pandas(data)
@@ -113,16 +113,22 @@ def test_multiindex_copy_deep(data, copy_on_write, deep):
                 lv._column.base_data.get_ptr(mode="read") for lv in mi2._levels
             ]
 
-            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+            assert all(
+                (x == y) == same_ref for x, y in zip(lptrs, rptrs, strict=True)
+            )
 
             # Assert ._codes identity
             lptrs = [c.base_data.get_ptr(mode="read") for c in mi1._codes]
             rptrs = [c.base_data.get_ptr(mode="read") for c in mi2._codes]
 
-            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+            assert all(
+                (x == y) == same_ref for x, y in zip(lptrs, rptrs, strict=True)
+            )
 
             # Assert ._data identity
             lptrs = [d.base_data.get_ptr(mode="read") for d in mi1._columns]
             rptrs = [d.base_data.get_ptr(mode="read") for d in mi2._columns]
 
-            assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
+            assert all(
+                (x == y) == same_ref for x, y in zip(lptrs, rptrs, strict=True)
+            )
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py b/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
index d26e7216a73..54be9e670aa 100644
--- a/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
@@ -50,7 +50,9 @@ def test_bool_raises():
 
 
 def test_multi_index_contains_hashable():
-    gidx = cudf.MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3]))
+    gidx = cudf.MultiIndex.from_tuples(
+        zip(["foo", "bar", "baz"], [1, 2, 3], strict=True)
+    )
     pidx = gidx.to_pandas()
 
     assert_exceptions_equal(
@@ -66,7 +68,9 @@ def test_multiindex_codes():
         [("a", "b"), ("a", "c"), ("b", "c")], names=["A", "Z"]
     )
 
-    for p_array, g_array in zip(midx.to_pandas().codes, midx.codes):
+    for p_array, g_array in zip(
+        midx.to_pandas().codes, midx.codes, strict=True
+    ):
         assert_eq(p_array, g_array)
 
 
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py b/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
index 3db599f49e5..2afb5c4f179 100644
--- a/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
@@ -86,7 +86,7 @@ def test_multiindex_types():
 
 def test_multiindex_from_tuples():
     arrays = [["a", "a", "b", "b"], ["house", "store", "house", "store"]]
-    tuples = list(zip(*arrays))
+    tuples = list(zip(*arrays, strict=True))
     pmi = pd.MultiIndex.from_tuples(tuples)
     gmi = cudf.MultiIndex.from_tuples(tuples)
     assert_eq(pmi, gmi)
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
index 72080a5be3f..abc3c105320 100644
--- a/python/cudf/cudf/tests/test_array_ufunc.py
+++ b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -118,7 +118,7 @@ def test_ufunc_index(request, ufunc):
         expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
 
     if ufunc.nout > 1:
-        for g, e in zip(got, expect):
+        for g, e in zip(got, expect, strict=True):
             assert_eq(g, e, check_exact=False)
     else:
         assert_eq(got, expect, check_exact=False)
@@ -145,7 +145,7 @@ def test_binary_ufunc_index_array(ufunc, reflect):
         expect = ufunc(args[0].to_pandas(), args[1].to_numpy())
 
     if ufunc.nout > 1:
-        for g, e in zip(got, expect):
+        for g, e in zip(got, expect, strict=True):
             if reflect:
                 assert (cp.asnumpy(g) == e).all()
             else:
@@ -241,7 +241,7 @@ def test_ufunc_series(request, ufunc, has_nulls, indexed):
         expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
 
     if ufunc.nout > 1:
-        for g, e in zip(got, expect):
+        for g, e in zip(got, expect, strict=True):
             if has_nulls:
                 e[mask] = np.nan
             assert_eq(g, e, check_exact=False)
@@ -336,7 +336,7 @@ def test_binary_ufunc_series_array(
         expect = ufunc(args[0].to_pandas(), args[1].to_numpy())
 
     if ufunc.nout > 1:
-        for g, e in zip(got, expect):
+        for g, e in zip(got, expect, strict=True):
             if has_nulls:
                 e[mask] = np.nan
             if reflect:
@@ -457,7 +457,7 @@ def test_ufunc_dataframe(request, ufunc, has_nulls, indexed):
         expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
 
     if ufunc.nout > 1:
-        for g, e in zip(got, expect):
+        for g, e in zip(got, expect, strict=True):
             if has_nulls:
                 e[mask] = np.nan
             assert_eq(g, e, check_exact=False)
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 8d4f6091df0..75cd40aa436 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -37,6 +37,7 @@ def pd_str_cat():
 def test_categorical_basic():
     cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
     cudf_cat = cudf.Index(cat)
+    assert_eq(cat.codes, cudf_cat.codes.to_numpy())
 
     pdsr = pd.Series(cat, index=["p", "q", "r", "s", "t"])
     sr = cudf.Series(cat, index=["p", "q", "r", "s", "t"])
@@ -50,16 +51,7 @@ def test_categorical_basic():
         pdsr.cat.codes.values, sr.cat.codes.to_numpy()
     )
 
-    string = str(sr)
-    expect_str = """
-p a
-q a
-r b
-s c
-t a
-"""
-    assert all(x == y for x, y in zip(string.split(), expect_str.split()))
-    assert_eq(cat.codes, cudf_cat.codes.to_numpy())
+    assert str(sr) == str(pdsr)
 
 
 def test_categorical_integer():
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py
index 27ec4fcd1f3..023dbbd8daf 100644
--- a/python/cudf/cudf/tests/test_column_accessor.py
+++ b/python/cudf/cudf/tests/test_column_accessor.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 
 import pandas as pd
@@ -34,7 +34,7 @@ def check_ca_equal(lhs, rhs):
     assert lhs.multiindex == rhs.multiindex
     assert lhs.rangeindex == rhs.rangeindex
     assert lhs.label_dtype == rhs.label_dtype
-    for l_key, r_key in zip(lhs, rhs):
+    for l_key, r_key in zip(lhs, rhs, strict=True):
         assert l_key == r_key
         assert_eq(lhs[l_key], rhs[r_key])
 
@@ -102,7 +102,7 @@ def test_iter(simple_data):
     yields column names.
     """
     ca = ColumnAccessor(simple_data)
-    for expect_key, got_key in zip(simple_data, ca):
+    for expect_key, got_key in zip(simple_data, ca, strict=True):
         assert expect_key == got_key
 
 
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 9494d22a158..4f9ca1b4261 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -122,7 +122,7 @@ def gdf_np_dtypes():
         np.uint32,
         np.uint64,
     ]
-    return dict(zip(gdf_dtypes, np_dtypes))
+    return dict(zip(gdf_dtypes, np_dtypes, strict=True))
 
 
 @pytest.fixture
diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py
index 80c794cb0f4..f7b0af9e51a 100644
--- a/python/cudf/cudf/tests/test_cuda_apply.py
+++ b/python/cudf/cudf/tests/test_cuda_apply.py
@@ -16,7 +16,8 @@
 
 
 def _kernel_multiply(a, b, out):
-    for i, (x, y) in enumerate(zip(a, b)):
+    # numba doesn't support zip(..., strict=True), so we must tell ruff to ignore it.
+    for i, (x, y) in enumerate(zip(a, b)):  # noqa: B905
         out[i] = x * y
 
 
@@ -71,7 +72,7 @@ def test_df_apply_rows():
     nelem = 20
 
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
-        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
+        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):  # noqa: B905
             out1[i] = extra2 * x - extra1 * y
             out2[i] = y - extra1 * z
 
@@ -106,7 +107,7 @@ def test_df_apply_chunks(chunksize):
     nelem = 20
 
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
-        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
+        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):  # noqa: B905
             out1[i] = extra2 * x - extra1 * y + z
             out2[i] = i
 
@@ -140,7 +141,7 @@ def test_df_apply_custom_chunks():
     nelem = 20
 
     def kernel(in1, in2, in3, out1, out2, extra1, extra2):
-        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
+        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):  # noqa: B905
             out1[i] = extra2 * x - extra1 * y + z
             out2[i] = i
 
@@ -157,7 +158,10 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
 
     expect_out1 = extra2 * in1 - extra1 * in2 + in3
     expect_out2 = np.hstack(
-        [np.arange(e - s) for s, e in zip(chunks, chunks[1:] + [len(df)])]
+        [
+            np.arange(e - s)
+            for s, e in zip(chunks, chunks[1:] + [len(df)], strict=True)
+        ]
     )
 
     outdf = df.apply_chunks(
@@ -203,7 +207,7 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
     expect_out2 = np.hstack(
         [
             tpb * np.arange(e - s)
-            for s, e in zip(chunks, chunks[1:] + [len(df)])
+            for s, e in zip(chunks, chunks[1:] + [len(df)], strict=True)
         ]
     )
 
@@ -228,7 +232,7 @@ def test_df_apply_rows_incols_mapping():
     nelem = 20
 
     def kernel(x, y, z, out1, out2, extra1, extra2):
-        for i, (a, b, c) in enumerate(zip(x, y, z)):
+        for i, (a, b, c) in enumerate(zip(x, y, z)):  # noqa: B905
             out1[i] = extra2 * a - extra1 * b
             out2[i] = b - extra1 * c
 
@@ -260,7 +264,7 @@ def test_df_apply_chunks_incols_mapping(chunksize):
     nelem = 20
 
     def kernel(q, p, r, out1, out2, extra1, extra2):
-        for i, (a, b, c) in enumerate(zip(q, p, r)):
+        for i, (a, b, c) in enumerate(zip(q, p, r)):  # noqa: B905
             out1[i] = extra2 * a - extra1 * b + c
             out2[i] = i
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 328d6fbca7b..ccf4c7de0d8 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -348,7 +348,7 @@ def test_axes(data):
     expected = psr.axes
     actual = csr.axes
 
-    for e, a in zip(expected, actual):
+    for e, a in zip(expected, actual, strict=True):
         assert_eq(e, a, exact=False)
 
 
@@ -1677,7 +1677,7 @@ def test_dataframe_hash_partition_keep_index(keep_index):
 
     parts = gdf.partition_by_hash(["key"], nparts=2, keep_index=keep_index)
 
-    for exp, got in zip(expected, parts):
+    for exp, got in zip(expected, parts, strict=True):
         assert_eq(exp, got)
 
 
@@ -5724,7 +5724,7 @@ def test_cov_nans():
 )
 def test_df_sr_binop(psr, colnames, op):
     data = [[3.0, 2.0, 5.0], [3.0, None, 5.0], [6.0, 7.0, np.nan]]
-    data = dict(zip(colnames, data))
+    data = dict(zip(colnames, data, strict=True))
 
     gsr = cudf.Series.from_pandas(psr).astype("float64")
 
@@ -5774,7 +5774,7 @@ def test_df_sr_binop(psr, colnames, op):
 def test_df_sr_binop_col_order(op):
     colnames = [0, 1, 2]
     data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
-    data = dict(zip(colnames, data))
+    data = dict(zip(colnames, data, strict=True))
 
     gdf = cudf.DataFrame(data)
     pdf = pd.DataFrame.from_dict(data)
@@ -9081,8 +9081,8 @@ def test_update_for_dataframes(
 ):
     errors = "ignore"
     join = "left"
-    left = dict(zip(left_keys, data_left))
-    right = dict(zip(right_keys, data_right))
+    left = dict(zip(left_keys, data_left, strict=True))
+    right = dict(zip(right_keys, data_right, strict=True))
     pdf = pd.DataFrame(left)
     gdf = cudf.DataFrame(left, nan_as_null=False)
 
@@ -10926,7 +10926,7 @@ def test_dataframe_column_name(name):
 @pytest.mark.parametrize("names", [["abc", "def"], [1, 2], ["abc", 10]])
 def test_dataframe_multiindex_column_names(names):
     arrays = [["A", "A", "B", "B"], ["one", "two", "one", "two"]]
-    tuples = list(zip(*arrays))
+    tuples = list(zip(*arrays, strict=True))
     index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
 
     pdf = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=index)
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index c9967b83235..ce7e80e0dba 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -588,7 +588,9 @@ def test_drop_duplicates_multi_index():
         ["one", "two", "one", "two", "one", "two", "one", "two"],
     ]
 
-    idx = pd.MultiIndex.from_tuples(list(zip(*arrays)), names=["a", "b"])
+    idx = pd.MultiIndex.from_tuples(
+        list(zip(*arrays, strict=True)), names=["a", "b"]
+    )
     rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame(rng.integers(0, 2, (8, 4)), index=idx)
     gdf = cudf.DataFrame.from_pandas(pdf)
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index f332fb37e56..552ac748e3e 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -217,7 +217,7 @@ def test_groupby_as_index_multiindex(pdf, gdf, as_index):
         assert_eq(pdf, gdf)
     else:
         # column names don't match - check just the values
-        for gcol, pcol in zip(gdf, pdf):
+        for gcol, pcol in zip(gdf, pdf, strict=True):
             assert_array_equal(gdf[gcol].to_numpy(), pdf[pcol].values)
 
 
@@ -1916,7 +1916,7 @@ def test_grouping(grouper):
     gdf = cudf.from_pandas(pdf)
 
     for pdf_group, gdf_group in zip(
-        pdf.groupby(grouper), gdf.groupby(grouper)
+        pdf.groupby(grouper), gdf.groupby(grouper), strict=True
     ):
         assert pdf_group[0] == gdf_group[0]
         assert_eq(pdf_group[1], gdf_group[1])
@@ -2749,7 +2749,7 @@ def test_groupby_shift_row_mixed_fill(shift_perc, direction, fill_value):
     # Pandas does not support specifying different fill_value by column, so we
     # simulate it column by column
     expected = pdf.copy()
-    for col, single_fill in zip(pdf.iloc[:, 1:], fill_value):
+    for col, single_fill in zip(pdf.iloc[:, 1:], fill_value, strict=True):
         expected[col] = (
             pdf[col]
             .groupby(pdf["0"])
@@ -3725,7 +3725,8 @@ def expected(self, df, n, take_head, preserve_order):
                             sorted(values_to_sort.tolist(), key=keyfunc),
                             key=keyfunc,
                         )
-                    )
+                    ),
+                    strict=True,
                 )
                 return cudf.DataFrame(
                     {"a": expect_a, "b": expect_b}, index=index
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 9ee55790b73..dbb193019b2 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -522,7 +522,8 @@ def test_empty_df_head_tail_index(n):
                                 "one",
                                 "two",
                             ],
-                        ]
+                        ],
+                        strict=True,
                     )
                 )
             ),
@@ -550,7 +551,8 @@ def test_empty_df_head_tail_index(n):
                                 "one",
                                 "two",
                             ],
-                        ]
+                        ],
+                        strict=True,
                     )
                 )
             )
diff --git a/python/cudf/cudf/tests/test_join_order.py b/python/cudf/cudf/tests/test_join_order.py
index 9a95f0e01ab..60ec93f5040 100644
--- a/python/cudf/cudf/tests/test_join_order.py
+++ b/python/cudf/cudf/tests/test_join_order.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 import itertools
 import operator
@@ -64,7 +64,7 @@ def expect_inner(left, right, sort):
         keys = []
         val_x = []
         val_y = []
-        for k, v in zip(left_key, left_val):
+        for k, v in zip(left_key, left_val, strict=True):
             if k not in right_have:
                 continue
             for i in right_have[k]:
@@ -76,7 +76,11 @@ def expect_inner(left, right, sort):
             # Python sort is stable, so this will preserve input order for
             # equal items.
             keys, val_x, val_y = zip(
-                *sorted(zip(keys, val_x, val_y), key=operator.itemgetter(0))
+                *sorted(
+                    zip(keys, val_x, val_y, strict=True),
+                    key=operator.itemgetter(0),
+                ),
+                strict=True,
             )
         return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
 
@@ -92,7 +96,7 @@ def expect_left(left, right, sort):
         keys = []
         val_x = []
         val_y = []
-        for k, v in zip(left_key, left_val):
+        for k, v in zip(left_key, left_val, strict=True):
             if k not in right_have:
                 right_vals = [None]
             else:
@@ -107,7 +111,11 @@ def expect_left(left, right, sort):
             # Python sort is stable, so this will preserve input order for
             # equal items.
             keys, val_x, val_y = zip(
-                *sorted(zip(keys, val_x, val_y), key=operator.itemgetter(0))
+                *sorted(
+                    zip(keys, val_x, val_y, strict=True),
+                    key=operator.itemgetter(0),
+                ),
+                strict=True,
             )
         return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
 
@@ -122,7 +130,7 @@ def expect_outer(left, right, sort):
         keys = []
         val_x = []
         val_y = []
-        for k, v in zip(left_key, left_val):
+        for k, v in zip(left_key, left_val, strict=True):
             if k not in right_have:
                 right_vals = [None]
             else:
@@ -132,7 +140,7 @@ def expect_outer(left, right, sort):
                 val_x.append(v)
                 val_y.append(rv)
         left_have = set(left_key)
-        for k, v in zip(right_key, right_val):
+        for k, v in zip(right_key, right_val, strict=True):
             if k not in left_have:
                 keys.append(k)
                 val_x.append(None)
@@ -142,7 +150,11 @@ def expect_outer(left, right, sort):
         # equal items.
         # outer joins are always sorted, but we test both sort values
         keys, val_x, val_y = zip(
-            *sorted(zip(keys, val_x, val_y), key=operator.itemgetter(0))
+            *sorted(
+                zip(keys, val_x, val_y, strict=True),
+                key=operator.itemgetter(0),
+            ),
+            strict=True,
         )
         return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
 
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 6fd2ea24df2..adcd3fff21d 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -354,11 +354,14 @@ def test_json_lines_basic(json_input, engine):
     can_warn = isinstance(json_input, str) and not json_input.endswith(".json")
     with expect_warning_if(can_warn):
         cu_df = cudf.read_json(json_input, engine=engine, lines=True)
+    # io types must seek to the beginning before you can read again
+    if hasattr(json_input, "seek"):
+        json_input.seek(0)
     with expect_warning_if(can_warn):
         pd_df = pd.read_json(json_input, lines=True)
 
     assert all(cu_df.dtypes == ["int64", "int64", "int64"])
-    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns):
+    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
         assert str(cu_col) == str(pd_col)
         np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
 
@@ -392,7 +395,7 @@ def test_json_lines_multiple(tmpdir, json_input, engine):
     pd_df = pd.concat([pdf, pdf])
 
     assert all(cu_df.dtypes == ["int64", "int64", "int64"])
-    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns):
+    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
         assert str(cu_col) == str(pd_col)
         np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
 
@@ -430,7 +433,7 @@ def test_json_read_directory(tmpdir, json_input, engine):
     pd_df = pd.concat([pdf, pdf, pdf])
 
     assert all(cu_df.dtypes == ["int64", "int64", "int64"])
-    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns):
+    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
         assert str(cu_col) == str(pd_col)
         np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
 
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 9648673273e..cadd5c80a54 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -185,7 +185,7 @@ def test_take(data, idx):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
 
-    expected = pd.Series(zip(ps, idx)).map(
+    expected = pd.Series(zip(ps, idx, strict=True)).map(
         lambda x: [x[0][i] for i in x[1]] if x[0] is not None else None
     )
     got = gs.list.take(idx)
diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py
index ae5f1e1c90c..842b40a6d37 100644
--- a/python/cudf/cudf/tests/test_monotonic.py
+++ b/python/cudf/cudf/tests/test_monotonic.py
@@ -211,7 +211,7 @@ def test_multiindex():
     ],
 )
 def test_multiindex_tuples(testarr):
-    tuples = list(zip(*testarr[0]))
+    tuples = list(zip(*testarr[0], strict=True))
 
     index = MultiIndex.from_tuples(tuples, names=testarr[1])
     index_pd = pd.MultiIndex.from_tuples(tuples, names=testarr[1])
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index c8c4923a4e3..b612e20a17f 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -577,7 +577,7 @@ def test_multiindex_iloc(pdf, gdf, pdfIndex, iloc_rows, iloc_columns):
 
 def test_multiindex_iloc_scalar():
     arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
-    tuples = list(zip(*arrays))
+    tuples = list(zip(*arrays, strict=True))
     idx = cudf.MultiIndex.from_tuples(tuples)
     gdf = cudf.DataFrame(
         {"first": cp.random.rand(4), "second": cp.random.rand(4)}
@@ -824,7 +824,7 @@ def test_pickle_roundtrip_multiindex(names):
 
 def test_multiindex_index_single_row():
     arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
-    tuples = list(zip(*arrays))
+    tuples = list(zip(*arrays, strict=True))
     idx = cudf.MultiIndex.from_tuples(tuples)
     gdf = cudf.DataFrame(
         {"first": cp.random.rand(4), "second": cp.random.rand(4)}
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index f15e48dfdc8..b85882a79f5 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -41,7 +41,11 @@ def test_get_dummies(data, index, dtype):
 def test_onehot_get_dummies_multicol(n_cols):
     n_categories = 5
     data = dict(
-        zip(ascii_lowercase, (np.arange(n_categories) for _ in range(n_cols)))
+        zip(
+            ascii_lowercase[:n_cols],
+            (np.arange(n_categories) for _ in range(n_cols)),
+            strict=True,
+        )
     )
 
     gdf = cudf.DataFrame(data)
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 94eb2c794a5..2ab48f7ecce 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -421,7 +421,7 @@ def num_row_groups(rows, group_size):
     assert num_columns == len(pdf.columns)
     assert num_rows == len(pdf.index)
     assert row_groups == num_row_groups(num_rows, row_group_size)
-    for a, b in zip(col_names, pdf.columns):
+    for a, b in zip(col_names, pdf.columns, strict=True):
         assert a == b
 
 
diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py
index 08226dd7f6d..4f5c94c30f0 100644
--- a/python/cudf/cudf/tests/test_spilling.py
+++ b/python/cudf/cudf/tests/test_spilling.py
@@ -791,5 +791,5 @@ def test_scatter_by_map():
     with cudf.option_context("spill", True):
         df = cudf.DataFrame(data)
         result = df.scatter_by_map(data)
-    for i, res in zip(data, result):
+    for i, res in zip(data, result, strict=True):
         assert_eq(res, cudf.DataFrame([i], index=[i]))
diff --git a/python/cudf/cudf/tests/testing/test_assert_frame_equal.py b/python/cudf/cudf/tests/testing/test_assert_frame_equal.py
index 719dc0b1e60..a25aab458cb 100644
--- a/python/cudf/cudf/tests/testing/test_assert_frame_equal.py
+++ b/python/cudf/cudf/tests/testing/test_assert_frame_equal.py
@@ -41,7 +41,7 @@ def test_basic_assert_frame_equal(
         p_left["c"] = np.array(data, dtype="int64")
 
     p_right = pd.DataFrame(index=index)
-    for dtype, name in zip(rdtype, rname):
+    for dtype, name in zip(rdtype, rname, strict=True):
         p_right[name] = np.array(data, dtype=dtype)
 
     left = cudf.from_pandas(p_left)
diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py
index b6660b4b1cc..2e383d590d5 100644
--- a/python/cudf/cudf/utils/hash_vocab_utils.py
+++ b/python/cudf/cudf/utils/hash_vocab_utils.py
@@ -195,7 +195,9 @@ def _store_func(
         f.write(f"{num_outer_bins}\n")
         f.writelines(
             f"{coeff} {offset}\n"
-            for coeff, offset in zip(inner_table_coeffs, offsets_into_ht)
+            for coeff, offset in zip(
+                inner_table_coeffs, offsets_into_ht, strict=True
+            )
         )
         f.write(f"{len(hash_table)}\n")
         f.writelines(f"{kv}\n" for kv in hash_table)
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 40d54157c45..47e5ecee570 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -2295,9 +2295,10 @@ def _get_remote_bytes_all(
             zip(
                 *(
                     (r, j, min(j + blocksize, s))
-                    for r, s in zip(remote_paths, sizes)
+                    for r, s in zip(remote_paths, sizes, strict=True)
                     for j in range(0, s, blocksize)
-                )
+                ),
+                strict=True,
             ),
         )
 
@@ -2306,7 +2307,9 @@ def _get_remote_bytes_all(
 
         # Construct local byte buffers
         # (Need to make sure path offsets are ordered correctly)
-        unique_count = dict(zip(*np.unique(paths, return_counts=True)))
+        unique_count = dict(
+            zip(*np.unique(paths, return_counts=True), strict=True)
+        )
         offset = np.cumsum([0] + [unique_count[p] for p in remote_paths])
         buffers = [
             functools.reduce(operator.add, chunks[offset[i] : offset[i + 1]])
@@ -2340,7 +2343,7 @@ def _get_remote_bytes_parquet(
     )
 
     buffers = []
-    for size, path in zip(sizes, remote_paths):
+    for size, path in zip(sizes, remote_paths, strict=True):
         path_data = data[path]
         buf = np.empty(size, dtype="b")
         for range_offset in path_data.keys():
@@ -2393,7 +2396,7 @@ def _update_col_struct_field_names(
     if col.children:
         children = list(col.children)
         for i, (child, names) in enumerate(
-            zip(children, child_names.values())
+            zip(children, child_names.values(), strict=True)
         ):
             children[i] = _update_col_struct_field_names(child, names)
         col.set_base_children(tuple(children))
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 67c219056ca..4db0129bbae 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -791,14 +791,14 @@ def test_chunked_json_reader(tmpdir, data):
         pd.read_json(file_path, lines=True, chunksize=1) as pd_reader,
         xpd.read_json(file_path, lines=True, chunksize=1) as xpd_reader,
     ):
-        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader):
+        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader, strict=True):
             tm.assert_equal(pd_chunk, xpd_chunk)
 
     with (
         pd.read_json(StringIO(data), lines=True, chunksize=1) as pd_reader,
         xpd.read_json(StringIO(data), lines=True, chunksize=1) as xpd_reader,
     ):
-        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader):
+        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader, strict=True):
             tm.assert_equal(pd_chunk, xpd_chunk)
 
 
@@ -818,14 +818,14 @@ def test_chunked_csv_reader(tmpdir, data):
         pd.read_csv(file_path, chunksize=1) as pd_reader,
         xpd.read_csv(file_path, chunksize=1) as xpd_reader,
     ):
-        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader):
+        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader, strict=True):
             tm.assert_equal(pd_chunk, xpd_chunk, check_index_type=False)
 
     with (
         pd.read_json(StringIO(data), lines=True, chunksize=1) as pd_reader,
         xpd.read_json(StringIO(data), lines=True, chunksize=1) as xpd_reader,
     ):
-        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader):
+        for pd_chunk, xpd_chunk in zip(pd_reader, xpd_reader, strict=True):
             tm.assert_equal(pd_chunk, xpd_chunk, check_index_type=False)
 
 
diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py
index f4426e09ef8..13db59f04cb 100644
--- a/python/cudf/cudf_pandas_tests/test_profiler.py
+++ b/python/cudf/cudf_pandas_tests/test_profiler.py
@@ -65,7 +65,7 @@ def test_profiler():
         "np.isclose",
         "pd.Timestamp",
     ]
-    for line_stats, call in zip(per_line_stats, calls):
+    for line_stats, call in zip(per_line_stats, calls, strict=True):
         # Check that the expected function calls were recorded.
         assert call in line_stats[1]
         # No CPU time
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py
index c42ecf29fcc..ea210ecf3bb 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py
@@ -11,7 +11,7 @@
 def assert_catboost_equal(expect, got, rtol=1e-7, atol=0.0):
     if isinstance(expect, (tuple, list)):
         assert len(expect) == len(got)
-        for e, g in zip(expect, got):
+        for e, g in zip(expect, got, strict=True):
             assert_catboost_equal(e, g, rtol, atol)
     elif isinstance(expect, np.ndarray):
         np.testing.assert_allclose(expect, got, rtol=rtol, atol=atol)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
index 92a6513b7b1..5590db9dc1a 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
@@ -30,7 +30,7 @@ def assert_cuml_equal(expect, got):
         np.testing.assert_allclose(expect, got)
     elif isinstance(expect, tuple) and isinstance(got, tuple):
         assert len(expect) == len(got)
-        for e, g in zip(expect, got):
+        for e, g in zip(expect, got, strict=True):
             assert_cuml_equal(e, g)
     elif isinstance(expect, pd.DataFrame):
         assert pd.testing.assert_frame_equal(expect, got)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
index 6a33666790d..1a8ff355d4d 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
@@ -13,7 +13,7 @@
 def assert_plots_equal(expect, got):
     if isinstance(expect, Axes) and isinstance(got, Axes):
         for expect_ch, got_ch in zip(
-            expect.get_children(), got.get_children()
+            expect.get_children(), got.get_children(), strict=True
         ):
             assert type(expect_ch) is type(got_ch)
             if isinstance(expect_ch, Line2D):
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
index 02b2b1b9997..4ae17782533 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
@@ -12,7 +12,7 @@
 def assert_plots_equal(expect, got):
     if isinstance(expect, Axes) and isinstance(got, Axes):
         for expect_ch, got_ch in zip(
-            expect.get_children(), got.get_children()
+            expect.get_children(), got.get_children(), strict=True
         ):
             assert type(expect_ch) is type(got_ch)
             if isinstance(expect_ch, Line2D):
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
index ba98273404d..9f67c5cb307 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -17,7 +17,7 @@
 def xgboost_assert_equal(expect, got, rtol: float = 1e-7, atol: float = 0.0):
     if isinstance(expect, (tuple, list)):
         assert len(expect) == len(got)
-        for e, g in zip(expect, got):
+        for e, g in zip(expect, got, strict=True):
             xgboost_assert_equal(e, g, rtol, atol)
     elif isinstance(expect, scipy.sparse.csr_matrix):
         np.testing.assert_allclose(expect.data, got.data, rtol=rtol, atol=atol)
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index bd0bf9087a0..8e770112a67 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -106,7 +106,9 @@ filterwarnings = [
     # PerformanceWarning from cupy warming up the JIT cache
     "ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning",
     # Ignore numba PEP 456 warning specific to arm machines
-    "ignore:FNV hashing is not implemented in Numba.*:UserWarning"
+    "ignore:FNV hashing is not implemented in Numba.*:UserWarning",
+    # Allow running UDF tests on older architectures
+    "ignore:.*NVRTC log messages whilst compiling.*:UserWarning",
 ]
 markers = [
     "spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`",
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 678384d78e6..097319b8b18 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -438,6 +438,7 @@ def group_split_cudf(df, c, k, ignore_index=False):
                 map_size=k,
                 keep_index=not ignore_index,
             ),
+            strict=True,
         )
     )
 
diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py
index 5de28751912..6a6d54db615 100644
--- a/python/dask_cudf/dask_cudf/io/orc.py
+++ b/python/dask_cudf/dask_cudf/io/orc.py
@@ -101,7 +101,7 @@ def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs):
         )
 
     sources = []
-    for path, n in zip(paths, nstripes_per_file):
+    for path, n in zip(paths, nstripes_per_file, strict=True):
         for stripe in (
             range(n)
             if filters is None
@@ -186,7 +186,7 @@ def to_orc(
     dwrite = delayed(write_orc_partition)
     parts = [
         dwrite(d, path, fs, filename, compression=compression)
-        for d, filename in zip(df.to_delayed(), filenames)
+        for d, filename in zip(df.to_delayed(), filenames, strict=True)
     ]
 
     if compute:
diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py
index 13407f5d56f..d5bd0b5047b 100644
--- a/python/dask_cudf/dask_cudf/tests/test_accessor.py
+++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py
@@ -131,15 +131,7 @@ def test_categorical_basic(data):
         pdsr.cat.codes.values, result.cat.codes.values_host
     )
 
-    string = str(result)
-    expect_str = """
-0 a
-1 a
-2 b
-3 c
-4 a
-"""
-    assert all(x == y for x, y in zip(string.split(), expect_str.split()))
+    assert str(result) == str(pdsr)
     with dask.config.set({"dataframe.convert-string": False}):
         df = DataFrame()
         df["a"] = ["xyz", "abc", "def"] * 10
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 3d4c26d36b3..ec6f25747b4 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -136,7 +136,7 @@ def _fragmented_gdf(df, nsplit):
     subdivsize = n // nsplit
     starts = [i * subdivsize for i in range(nsplit)]
     ends = starts[1:] + [None]
-    frags = [df[s:e] for s, e in zip(starts, ends)]
+    frags = [df[s:e] for s, e in zip(starts, ends, strict=True)]
     return frags
 
 
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index af57af6e694..844f21e56ba 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -376,14 +376,15 @@ cdef class TableWithMetadata:
     def child_names(self):
         """
         Return a dictionary mapping the names of columns with children
-        to the names of their child columns
+        to the names of their child columns. Columns without children
+        get an empty dictionary.
         """
         return TableWithMetadata._parse_col_names(self.metadata.schema_info)
 
     @staticmethod
     cdef dict _parse_col_names(vector[column_name_info] infos):
-        cdef dict child_names = dict()
-        cdef dict names = dict()
+        cdef dict child_names
+        cdef dict names = {}
         for col_info in infos:
             child_names = TableWithMetadata._parse_col_names(col_info.children)
             names[col_info.name.decode()] = child_names
diff --git a/python/pylibcudf/tests/common/utils.py b/python/pylibcudf/tests/common/utils.py
index a844b74eb26..1ec0afed6cb 100644
--- a/python/pylibcudf/tests/common/utils.py
+++ b/python/pylibcudf/tests/common/utils.py
@@ -163,7 +163,7 @@ def _is_supported_for_pc_is_nan(arr_type):
                 return _is_supported_for_pc_is_nan(arr_type.value_type)
             return True
 
-        for lh_arr, rh_arr in zip(lhs, rhs):
+        for lh_arr, rh_arr in zip(lhs, rhs, strict=True):
             # pc.is_nan does not support nested list
             # with float (eg. list<list<float>>)
             if not _is_supported_for_pc_is_nan(lh_arr.type):
@@ -213,7 +213,9 @@ def assert_table_eq(pa_table: pa.Table, plc_table: plc.Table) -> None:
     """Verify that a pylibcudf table and PyArrow table are equal."""
     assert plc_table.shape() == pa_table.shape
 
-    for plc_col, pa_col in zip(plc_table.columns(), pa_table.columns):
+    for plc_col, pa_col in zip(
+        plc_table.columns(), pa_table.columns, strict=True
+    ):
         assert_column_eq(pa_col, plc_col)
 
 
@@ -235,7 +237,9 @@ def assert_table_and_meta_eq(
     if not check_types_if_empty and plc_table.num_rows() == 0:
         return
 
-    for plc_col, pa_col in zip(plc_table.columns(), pa_table.columns):
+    for plc_col, pa_col in zip(
+        plc_table.columns(), pa_table.columns, strict=True
+    ):
         assert_column_eq(pa_col, plc_col, check_field_nullability)
 
     # Check column name equality
@@ -263,7 +267,9 @@ def nesting_level(typ) -> tuple[int, int]:
         list_, struct = nesting_level(typ.value_type)
         return list_ + 1, struct
     elif isinstance(typ, pa.StructType):
-        lists, structs = map(max, zip(*(nesting_level(t.type) for t in typ)))
+        lists, structs = map(
+            max, zip(*(nesting_level(t.type) for t in typ), strict=True)
+        )
         return lists, structs + 1
     else:
         return 0, 0
diff --git a/python/pylibcudf/tests/conftest.py b/python/pylibcudf/tests/conftest.py
index 4840fd0a6ad..24f9d79aa7f 100644
--- a/python/pylibcudf/tests/conftest.py
+++ b/python/pylibcudf/tests/conftest.py
@@ -119,7 +119,10 @@ def _generate_nested_data(typ):
                 )
             elif isinstance(typ, pa.ListType):
                 pa_array = pa.array(
-                    [list(row_vals) for row_vals in zip(rand_arrs[0])],
+                    [
+                        list(row_vals)
+                        for row_vals in zip(rand_arrs[0], strict=True)
+                    ],
                     type=typ,
                 )
                 child_colnames.append(("", grandchild_colnames))
diff --git a/python/pylibcudf/tests/io/test_avro.py b/python/pylibcudf/tests/io/test_avro.py
index 58faa7d6ad7..13c2d37a8e9 100644
--- a/python/pylibcudf/tests/io/test_avro.py
+++ b/python/pylibcudf/tests/io/test_avro.py
@@ -89,7 +89,8 @@ def _make_avro_table(avro_dtypes, avro_dtype_data, nullable=False):
     )
 
     records = [
-        {"prop1": val1, "prop2": val2} for val1, val2 in zip(*avro_dtype_data)
+        {"prop1": val1, "prop2": val2}
+        for val1, val2 in zip(*avro_dtype_data, strict=True)
     ]
 
     buffer = io.BytesIO()
diff --git a/python/pylibcudf/tests/test_binaryops.py b/python/pylibcudf/tests/test_binaryops.py
index dfe90a6723f..b25c5d22083 100644
--- a/python/pylibcudf/tests/test_binaryops.py
+++ b/python/pylibcudf/tests/test_binaryops.py
@@ -94,7 +94,7 @@ def inner(x, y):
                 return None
             return func(x, y)
 
-        return pa.array([inner(x, y) for x, y in zip(x, y)])
+        return pa.array([inner(x, y) for x, y in zip(x, y, strict=True)])
 
     return wrapper
 
diff --git a/python/pylibcudf/tests/test_copying.py b/python/pylibcudf/tests/test_copying.py
index 29a5df4ebda..d636b10d536 100644
--- a/python/pylibcudf/tests/test_copying.py
+++ b/python/pylibcudf/tests/test_copying.py
@@ -216,7 +216,7 @@ def _pyarrow_boolean_mask_scatter_table(source, mask, target_table):
     return pa.table(
         [
             _pyarrow_boolean_mask_scatter_column(r, mask, v)
-            for v, r in zip(target_table, source)
+            for v, r in zip(target_table, source, strict=True)
         ],
         [""] * target_table.num_columns,
     )
@@ -448,7 +448,9 @@ def test_empty_like_table(source_table):
     _, plc_source_table = source_table
     result = plc.copying.empty_like(plc_source_table)
     assert result.num_columns() == plc_source_table.num_columns()
-    for icol, rcol in zip(plc_source_table.columns(), result.columns()):
+    for icol, rcol in zip(
+        plc_source_table.columns(), result.columns(), strict=True
+    ):
         assert rcol.type() == icol.type()
 
 
@@ -664,7 +666,7 @@ def test_slice_column(target_column):
     upper_bounds = bounds[1::2]
     lower_bounds = bounds[::2]
     result = plc.copying.slice(plc_target_column, bounds)
-    for lb, ub, slice_ in zip(lower_bounds, upper_bounds, result):
+    for lb, ub, slice_ in zip(lower_bounds, upper_bounds, result, strict=True):
         assert_column_eq(pa_target_column[lb:ub], slice_)
 
 
@@ -692,17 +694,23 @@ def test_slice_table(target_table):
     upper_bounds = bounds[1::2]
     lower_bounds = bounds[::2]
     result = plc.copying.slice(plc_target_table, bounds)
-    for lb, ub, slice_ in zip(lower_bounds, upper_bounds, result):
+    for lb, ub, slice_ in zip(lower_bounds, upper_bounds, result, strict=True):
         assert_table_eq(pa_target_table[lb:ub], slice_)
 
 
 def test_split_column(target_column):
     upper_bounds = [1, 3, 5]
-    lower_bounds = [0] + upper_bounds[:-1]
+    lower_bounds = [0, *upper_bounds]
+    upper_bounds_extended = [*upper_bounds, None]  # None means to the end
     pa_target_column, plc_target_column = target_column
     result = plc.copying.split(plc_target_column, upper_bounds)
-    for lb, ub, split in zip(lower_bounds, upper_bounds, result):
-        assert_column_eq(pa_target_column[lb:ub], split)
+    for lb, ub, split in zip(
+        lower_bounds, upper_bounds_extended, result, strict=True
+    ):
+        if ub is None:
+            assert_column_eq(pa_target_column[lb:], split)
+        else:
+            assert_column_eq(pa_target_column[lb:ub], split)
 
 
 def test_split_column_decreasing(target_column):
@@ -721,10 +729,16 @@ def test_split_table(target_table):
     pa_target_table, plc_target_table = target_table
 
     upper_bounds = [1, 3, 5]
-    lower_bounds = [0] + upper_bounds[:-1]
+    lower_bounds = [0, *upper_bounds]
+    upper_bounds_extended = [*upper_bounds, None]  # None means to the end
     result = plc.copying.split(plc_target_table, upper_bounds)
-    for lb, ub, split in zip(lower_bounds, upper_bounds, result):
-        assert_table_eq(pa_target_table[lb:ub], split)
+    for lb, ub, split in zip(
+        lower_bounds, upper_bounds_extended, result, strict=True
+    ):
+        if ub is None:
+            assert_table_eq(pa_target_table[lb:], split)
+        else:
+            assert_table_eq(pa_target_table[lb:ub], split)
 
 
 def test_copy_if_else_column_column(target_column, mask, source_scalar):
diff --git a/python/pylibcudf/tests/test_lists.py b/python/pylibcudf/tests/test_lists.py
index 1ceaa4bee80..718d053085b 100644
--- a/python/pylibcudf/tests/test_lists.py
+++ b/python/pylibcudf/tests/test_lists.py
@@ -52,7 +52,9 @@ def test_concatenate_rows(test_data):
 
     got = plc.lists.concatenate_rows(plc_tbl)
 
-    expect = pa.array([pair[0] + pair[1] for pair in zip(*test_data[0])])
+    expect = pa.array(
+        [pair[0] + pair[1] for pair in zip(*test_data[0], strict=True)]
+    )
 
     assert_column_eq(expect, got)
 
diff --git a/python/pylibcudf/tests/test_nvtext_generate_ngrams.py b/python/pylibcudf/tests/test_nvtext_generate_ngrams.py
index 30724e40698..ffa4b89423c 100644
--- a/python/pylibcudf/tests/test_nvtext_generate_ngrams.py
+++ b/python/pylibcudf/tests/test_nvtext_generate_ngrams.py
@@ -48,7 +48,7 @@ def test_hash_character_ngrams(input_col, ngram, seed):
     pa_result = plc.interop.to_arrow(result)
     assert all(
         len(got) == max(0, len(s.as_py()) - ngram + 1)
-        for got, s in zip(pa_result, input_col)
+        for got, s in zip(pa_result, input_col, strict=True)
     )
     assert pa_result.type == pa.list_(
         pa.field("element", pa.uint32(), nullable=False)
diff --git a/python/pylibcudf/tests/test_nvtext_jaccard.py b/python/pylibcudf/tests/test_nvtext_jaccard.py
index 3f7b0682d31..98725f40c16 100644
--- a/python/pylibcudf/tests/test_nvtext_jaccard.py
+++ b/python/pylibcudf/tests/test_nvtext_jaccard.py
@@ -31,7 +31,7 @@ def jaccard_index(s1, s2, width):
     expect = pa.array(
         [
             jaccard_index(s1.as_py(), s2.as_py(), width)
-            for s1, s2 in zip(input1, input2)
+            for s1, s2 in zip(input1, input2, strict=True)
         ],
         type=pa.float32(),
     )
diff --git a/python/pylibcudf/tests/test_nvtext_minhash.py b/python/pylibcudf/tests/test_nvtext_minhash.py
index 6315488a205..bc6b569dbc6 100644
--- a/python/pylibcudf/tests/test_nvtext_minhash.py
+++ b/python/pylibcudf/tests/test_nvtext_minhash.py
@@ -29,7 +29,10 @@ def test_minhash(minhash_input_data, width):
         width,
     )
     pa_result = plc.interop.to_arrow(result)
-    assert all(len(got) == len(seeds) for got, s in zip(pa_result, input_arr))
+    assert all(
+        len(got) == len(seeds)
+        for got, s in zip(pa_result, input_arr, strict=True)
+    )
     assert pa_result.type == pa.list_(
         pa.field("element", seed_type, nullable=False)
     )
@@ -75,7 +78,10 @@ def test_minhash_ngrams(minhash_ngrams_input_data, ngrams):
         plc.Column.from_arrow(ab),
     )
     pa_result = plc.interop.to_arrow(result)
-    assert all(len(got) == len(ab) for got, s in zip(pa_result, input_arr))
+    assert all(
+        len(got) == len(ab)
+        for got, s in zip(pa_result, input_arr, strict=True)
+    )
     assert pa_result.type == pa.list_(
         pa.field("element", seed_type, nullable=False)
     )
diff --git a/python/pylibcudf/tests/test_quantiles.py b/python/pylibcudf/tests/test_quantiles.py
index 68b7c4203dc..1c72637fd2d 100644
--- a/python/pylibcudf/tests/test_quantiles.py
+++ b/python/pylibcudf/tests/test_quantiles.py
@@ -131,7 +131,7 @@ def _pyarrow_quantiles(
                 [
                     (name, order_mapper[order])
                     for name, order in zip(
-                        pa_tbl_data.column_names, column_order
+                        pa_tbl_data.column_names, column_order, strict=True
                     )
                 ],
                 null_placement="at_start"
diff --git a/python/pylibcudf/tests/test_reshape.py b/python/pylibcudf/tests/test_reshape.py
index 68f7307fda7..9ee7e389335 100644
--- a/python/pylibcudf/tests/test_reshape.py
+++ b/python/pylibcudf/tests/test_reshape.py
@@ -20,7 +20,7 @@ def test_interleave_columns(reshape_data):
     raw_data, reshape_plc_tbl = reshape_data
     got = plc.reshape.interleave_columns(reshape_plc_tbl)
 
-    interleaved_data = [pa.array(pair) for pair in zip(*raw_data)]
+    interleaved_data = [pa.array(pair) for pair in zip(*raw_data, strict=True)]
 
     expect = pa.concat_arrays(interleaved_data)
 
diff --git a/python/pylibcudf/tests/test_string_find.py b/python/pylibcudf/tests/test_string_find.py
index 0489385b33f..633e9f31512 100644
--- a/python/pylibcudf/tests/test_string_find.py
+++ b/python/pylibcudf/tests/test_string_find.py
@@ -142,8 +142,7 @@ def handle_none(st, target):
         [
             handle_none(elem, target)
             for elem, target in zip(
-                pa_data_col.to_pylist(),
-                pa_target_col.to_pylist(),
+                pa_data_col.to_pylist(), pa_target_col.to_pylist(), strict=True
             )
         ],
         type=pa.bool_(),
@@ -159,8 +158,7 @@ def test_find_column(data_col, target_col):
         [
             elem.find(target) if not (elem is None or target is None) else None
             for elem, target in zip(
-                pa_data_col.to_pylist(),
-                pa_target_col.to_pylist(),
+                pa_data_col.to_pylist(), pa_target_col.to_pylist(), strict=True
             )
         ],
         type=pa.int32(),
diff --git a/python/pylibcudf/tests/test_string_replace.py b/python/pylibcudf/tests/test_string_replace.py
index f75ce722f38..2fc0b528656 100644
--- a/python/pylibcudf/tests/test_string_replace.py
+++ b/python/pylibcudf/tests/test_string_replace.py
@@ -108,7 +108,7 @@ def test_replace_col(data_col, col_repl_target, col_repl):
     # for targets/repls, so let's implement our own in python
 
     def replace_list(elem, targets, repls):
-        for target, repl in zip(targets, repls):
+        for target, repl in zip(targets, repls, strict=True):
             res = elem.replace(target, repl)
             if res != elem:
                 return res
diff --git a/python/pylibcudf/tests/test_string_replace_re.py b/python/pylibcudf/tests/test_string_replace_re.py
index 6aa51d4324c..842aea69a8b 100644
--- a/python/pylibcudf/tests/test_string_replace_re.py
+++ b/python/pylibcudf/tests/test_string_replace_re.py
@@ -50,7 +50,7 @@ def test_replace_re_list_str_columns(flags):
         flags=flags,
     )
     expect = arr
-    for pat, repl in zip(pats, repls):
+    for pat, repl in zip(pats, repls, strict=True):
         expect = pc.replace_substring_regex(
             expect,
             pat,
diff --git a/python/pylibcudf/tests/test_string_slice.py b/python/pylibcudf/tests/test_string_slice.py
index 576c78e8c12..60b38513d2e 100644
--- a/python/pylibcudf/tests/test_string_slice.py
+++ b/python/pylibcudf/tests/test_string_slice.py
@@ -87,6 +87,7 @@ def slice_string(st, start, stop):
                 pa_col.to_pylist(),
                 pa_starts_col.to_pylist(),
                 pa_stops_col.to_pylist(),
+                strict=True,
             )
         ],
         type=pa.string(),

From 8013a532d792337b983e9b891d206a9fb0288e9d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 6 Aug 2025 16:38:12 -0500
Subject: [PATCH 074/366] Enable casting integer dtypes to `pl.Datetime` via
 `cudf-polars` (#19607)

Part of https://github.com/rapidsai/cudf/issues/17060

This PR adds the ability to view an integral column as a datetime type in `cudf-polars`.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19607
---
 .../cudf_polars/containers/column.py          | 14 +++++++
 .../cudf_polars/cudf_polars/utils/dtypes.py   |  6 +++
 .../tests/expressions/test_datetime_basic.py  | 42 +++++++++++++++++++
 3 files changed, 62 insertions(+)

diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 4d7a9992f97..31e4c66a02c 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -293,6 +293,20 @@ def astype(self, dtype: DataType) -> Column:
             or self.obj.type().id() == plc.TypeId.STRING
         ):
             return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
+        elif plc.traits.is_integral_not_bool(
+            self.obj.type()
+        ) and plc.traits.is_timestamp(plc_dtype):
+            upcasted = plc.unary.cast(self.obj, plc.DataType(plc.TypeId.INT64))
+            result = plc.column.Column(
+                plc_dtype,
+                upcasted.size(),
+                upcasted.data(),
+                upcasted.null_mask(),
+                upcasted.null_count(),
+                upcasted.offset(),
+                upcasted.children(),
+            )
+            return Column(result, dtype=dtype).sorted_like(self)
         else:
             result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
             if is_order_preserving_cast(self.obj.type(), plc_dtype):
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index a38652f84d9..23a94cf2a67 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -63,6 +63,12 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
             and not from_is_empty
             and is_numeric_not_bool(from_)
         )
+        or (
+            plc.traits.is_integral_not_bool(from_)
+            and from_.id() != plc.TypeId.UINT64  # not overflow safe
+            and not to_is_empty
+            and plc.traits.is_timestamp(to)
+        )
     )
 
 
diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py
index 52071591948..5d98165a419 100644
--- a/python/cudf_polars/tests/expressions/test_datetime_basic.py
+++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py
@@ -11,6 +11,7 @@
 
 from cudf_polars.dsl.expr import TemporalFunction
 from cudf_polars.testing.asserts import (
+    assert_collect_raises,
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
@@ -344,3 +345,44 @@ def test_datetime_cast_time_unit_duration(dtype, time_unit):
 
     q = df.select(pl.col("date").dt.cast_time_unit(time_unit).alias("time_unit_ms"))
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "datetime_dtype",
+    [
+        pl.Datetime("ms"),
+        pl.Datetime("us"),
+        pl.Datetime("ns"),
+    ],
+)
+@pytest.mark.parametrize(
+    "integer_dtype",
+    [
+        pl.Int64(),
+        pl.UInt64(),
+        pl.Int32(),
+        pl.UInt32(),
+        pl.Int16(),
+        pl.UInt16(),
+        pl.Int8(),
+        pl.UInt8(),
+    ],
+)
+def test_datetime_from_integer(datetime_dtype, integer_dtype):
+    values = [
+        0,
+        1,
+        100,
+        pl.select(integer_dtype.max()).item(),
+        pl.select(integer_dtype.min()).item(),
+    ]
+    df = pl.LazyFrame({"data": pl.Series(values, dtype=integer_dtype)})
+    q = df.select(pl.col("data").cast(datetime_dtype).alias("datetime_from_int"))
+    if integer_dtype == pl.UInt64():
+        assert_collect_raises(
+            q,
+            cudf_except=pl.exceptions.ComputeError,
+            polars_except=pl.exceptions.InvalidOperationError,
+        )
+    else:
+        assert_gpu_result_equal(q)

From 2f2a0bd99004433ffb598e7ffc53d75e91bb5411 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Aug 2025 12:09:34 -1000
Subject: [PATCH 075/366] Add streams to all modules with 4-5 functions
 (#19609)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19609
---
 .../pylibcudf/pylibcudf/column_factories.pxd  |  8 +-
 .../pylibcudf/pylibcudf/column_factories.pyi  | 12 +--
 .../pylibcudf/pylibcudf/column_factories.pyx  | 54 +++++++++----
 python/pylibcudf/pylibcudf/filling.pxd        | 10 ++-
 python/pylibcudf/pylibcudf/filling.pyi        | 24 ++++--
 python/pylibcudf/pylibcudf/filling.pyx        | 48 ++++++++---
 .../libcudf/column/column_factories.pxd       | 33 +++++---
 .../pylibcudf/pylibcudf/libcudf/filling.pxd   | 15 +++-
 .../pylibcudf/pylibcudf/libcudf/replace.pxd   | 27 ++++---
 .../pylibcudf/pylibcudf/libcudf/rolling.pxd   | 11 ++-
 python/pylibcudf/pylibcudf/replace.pxd        | 17 +++-
 python/pylibcudf/pylibcudf/replace.pyi        | 10 ++-
 python/pylibcudf/pylibcudf/replace.pyx        | 80 +++++++++++++++----
 python/pylibcudf/pylibcudf/rolling.pxd        | 13 +++
 python/pylibcudf/pylibcudf/rolling.pyi        |  5 ++
 python/pylibcudf/pylibcudf/rolling.pyx        | 26 ++++--
 16 files changed, 301 insertions(+), 92 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/column_factories.pxd b/python/pylibcudf/pylibcudf/column_factories.pxd
index d556085ab64..00a576dea45 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/column_factories.pxd
@@ -1,5 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from pylibcudf.libcudf.types cimport mask_state
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .types cimport DataType, size_type, type_id
@@ -22,28 +23,33 @@ cpdef Column make_numeric_column(
     DataType type_,
     size_type size,
     MaskArg mask,
+    Stream stream = *,
 )
 
 cpdef Column make_fixed_point_column(
     DataType type_,
     size_type size,
     MaskArg mask,
+    Stream stream = *,
 )
 
 cpdef Column make_timestamp_column(
     DataType type_,
     size_type size,
     MaskArg mask,
+    Stream stream = *,
 )
 
 cpdef Column make_duration_column(
     DataType type_,
     size_type size,
     MaskArg mask,
+    Stream stream = *,
 )
 
 cpdef Column make_fixed_width_column(
     DataType type_,
     size_type size,
     MaskArg mask,
+    Stream stream = *,
 )
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi
index c87fe423acb..58556d580df 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyi
+++ b/python/pylibcudf/pylibcudf/column_factories.pyi
@@ -1,20 +1,22 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.types import DataType, MaskState, TypeId
 
 def make_empty_column(type_or_id: DataType | TypeId) -> Column: ...
 def make_numeric_column(
-    type_: DataType, size: int, mstate: MaskState
+    type_: DataType, size: int, mstate: MaskState, stream: Stream | None = None
 ) -> Column: ...
 def make_fixed_point_column(
-    type_: DataType, size: int, mstate: MaskState
+    type_: DataType, size: int, mstate: MaskState, stream: Stream | None = None
 ) -> Column: ...
 def make_timestamp_column(
-    type_: DataType, size: int, mstate: MaskState
+    type_: DataType, size: int, mstate: MaskState, stream: Stream | None = None
 ) -> Column: ...
 def make_duration_column(
-    type_: DataType, size: int, mstate: MaskState
+    type_: DataType, size: int, mstate: MaskState, stream: Stream | None = None
 ) -> Column: ...
 def make_fixed_width_column(
-    type_: DataType, size: int, mstate: MaskState
+    type_: DataType, size: int, mstate: MaskState, stream: Stream | None = None
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx
index c4969a7f502..cec01077425 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyx
+++ b/python/pylibcudf/pylibcudf/column_factories.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.libcudf.column.column cimport column
@@ -11,10 +11,12 @@ from pylibcudf.libcudf.column.column_factories cimport (
     make_timestamp_column as cpp_make_timestamp_column,
 )
 from pylibcudf.libcudf.types cimport mask_state, size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .types cimport DataType, type_id
 
 from .types import MaskState, TypeId
+from .utils cimport _get_stream
 
 
 __all__ = [
@@ -69,7 +71,8 @@ cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id):
 cpdef Column make_numeric_column(
     DataType type_,
     size_type size,
-    MaskArg mstate
+    MaskArg mstate,
+    Stream stream=None
 ):
     """Creates an empty numeric column.
 
@@ -88,19 +91,23 @@ cpdef Column make_numeric_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_make_numeric_column(
             type_.c_obj,
             size,
-            state
+            state,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 cpdef Column make_fixed_point_column(
     DataType type_,
     size_type size,
-    MaskArg mstate
+    MaskArg mstate,
+    Stream stream=None
 ):
 
     cdef unique_ptr[column] result
@@ -115,20 +122,24 @@ cpdef Column make_fixed_point_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_make_fixed_point_column(
             type_.c_obj,
             size,
-            state
+            state,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column make_timestamp_column(
     DataType type_,
     size_type size,
-    MaskArg mstate
+    MaskArg mstate,
+    Stream stream=None
 ):
 
     cdef unique_ptr[column] result
@@ -143,20 +154,24 @@ cpdef Column make_timestamp_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_make_timestamp_column(
             type_.c_obj,
             size,
-            state
+            state,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column make_duration_column(
     DataType type_,
     size_type size,
-    MaskArg mstate
+    MaskArg mstate,
+    Stream stream=None
 ):
 
     cdef unique_ptr[column] result
@@ -171,20 +186,24 @@ cpdef Column make_duration_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_make_duration_column(
             type_.c_obj,
             size,
-            state
+            state,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column make_fixed_width_column(
     DataType type_,
     size_type size,
-    MaskArg mstate
+    MaskArg mstate,
+    Stream stream=None
 ):
 
     cdef unique_ptr[column] result
@@ -199,11 +218,14 @@ cpdef Column make_fixed_width_column(
         state = mstate
     else:
         raise TypeError("Invalid mask argument")
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_make_fixed_width_column(
             type_.c_obj,
             size,
-            state
+            state,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
diff --git a/python/pylibcudf/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd
index 56aef086e1b..4a55068bd43 100644
--- a/python/pylibcudf/pylibcudf/filling.pxd
+++ b/python/pylibcudf/pylibcudf/filling.pxd
@@ -1,5 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -14,6 +15,7 @@ cpdef Column fill(
     size_type begin,
     size_type end,
     Scalar value,
+    Stream stream = *,
 )
 
 cpdef void fill_in_place(
@@ -21,21 +23,25 @@ cpdef void fill_in_place(
     size_type c_begin,
     size_type c_end,
     Scalar value,
+    Stream stream = *,
 )
 
 cpdef Column sequence(
     size_type size,
     Scalar init,
     Scalar step,
+    Stream stream = *,
 )
 
 cpdef Table repeat(
     Table input_table,
-    ColumnOrSize count
+    ColumnOrSize count,
+    Stream stream = *,
 )
 
 cpdef Column calendrical_month_sequence(
     size_type n,
     Scalar init,
     size_type months,
+    Stream stream = *,
 )
diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi
index 0b5e29bdc32..ecbc914d58f 100644
--- a/python/pylibcudf/pylibcudf/filling.pyi
+++ b/python/pylibcudf/pylibcudf/filling.pyi
@@ -1,17 +1,31 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 
 def fill(
-    destination: Column, begin: int, end: int, value: Scalar
+    destination: Column,
+    begin: int,
+    end: int,
+    value: Scalar,
+    stream: Stream | None = None,
 ) -> Column: ...
 def fill_in_place(
-    destination: Column, begin: int, end: int, value: Scalar
+    destination: Column,
+    begin: int,
+    end: int,
+    value: Scalar,
+    stream: Stream | None = None,
 ) -> None: ...
-def sequence(size: int, init: Scalar, step: Scalar) -> Column: ...
-def repeat(input_table: Table, count: Column | int) -> Table: ...
+def sequence(
+    size: int, init: Scalar, step: Scalar, stream: Stream | None = None
+) -> Column: ...
+def repeat(
+    input_table: Table, count: Column | int, stream: Stream | None = None
+) -> Table: ...
 def calendrical_month_sequence(
-    n: int, init: Scalar, months: int
+    n: int, init: Scalar, months: int, stream: Stream | None = None
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx
index ea5b45ff7c2..f8d33adebef 100644
--- a/python/pylibcudf/pylibcudf/filling.pyx
+++ b/python/pylibcudf/pylibcudf/filling.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -13,10 +13,12 @@ from pylibcudf.libcudf.filling cimport (
 )
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
+from .utils cimport _get_stream
 
 
 __all__ = [
@@ -32,6 +34,7 @@ cpdef Column fill(
     size_type begin,
     size_type end,
     Scalar value,
+    Stream stream=None,
 ):
 
     """Fill destination column from begin to end with value.
@@ -56,20 +59,25 @@ cpdef Column fill(
     """
 
     cdef unique_ptr[column] result
+
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_fill(
             destination.view(),
             begin,
             end,
-            dereference((<Scalar> value).c_obj)
+            dereference((<Scalar> value).c_obj),
+            stream.view()
         )
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 cpdef void fill_in_place(
     Column destination,
     size_type begin,
     size_type end,
     Scalar value,
+    Stream stream=None,
 ):
 
     """Fill destination column in place from begin to end with value.
@@ -92,15 +100,18 @@ cpdef void fill_in_place(
     None
     """
 
+    stream = _get_stream(stream)
+
     with nogil:
         cpp_fill_in_place(
             destination.mutable_view(),
             begin,
             end,
-            dereference(value.c_obj)
+            dereference(value.c_obj),
+            stream.view()
         )
 
-cpdef Column sequence(size_type size, Scalar init, Scalar step):
+cpdef Column sequence(size_type size, Scalar init, Scalar step, Stream stream=None):
     """Create a sequence column of size ``size`` with initial value ``init`` and step
     ``step``.
 
@@ -123,18 +134,23 @@ cpdef Column sequence(size_type size, Scalar init, Scalar step):
 
     cdef unique_ptr[column] result
     cdef size_type c_size = size
+
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_sequence(
             c_size,
             dereference(init.c_obj),
             dereference(step.c_obj),
+            stream.view()
         )
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Table repeat(
     Table input_table,
-    ColumnOrSize count
+    ColumnOrSize count,
+    Stream stream=None,
 ):
     """Repeat rows of a Table.
 
@@ -160,25 +176,30 @@ cpdef Table repeat(
 
     cdef unique_ptr[table] result
 
+    stream = _get_stream(stream)
+
     if ColumnOrSize is Column:
         with nogil:
             result = cpp_repeat(
                 input_table.view(),
-                count.view()
+                count.view(),
+                stream.view()
             )
     if ColumnOrSize is size_type:
         with nogil:
             result = cpp_repeat(
                 input_table.view(),
-                count
+                count,
+                stream.view()
             )
-    return Table.from_libcudf(move(result))
+    return Table.from_libcudf(move(result), stream)
 
 
 cpdef Column calendrical_month_sequence(
     size_type n,
     Scalar init,
     size_type months,
+    Stream stream=None,
 ):
 
     """Fill destination column from begin to end with value.
@@ -202,10 +223,13 @@ cpdef Column calendrical_month_sequence(
 
     cdef unique_ptr[column] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_calendrical_month_sequence(
             n,
             dereference(init.c_obj),
-            months
+            months,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
index 162822d2365..60b48a7bb6a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
@@ -12,65 +12,76 @@ from pylibcudf.libcudf.types cimport (
 )
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] make_numeric_column(
         data_type type,
         size_type size,
-        mask_state state
+        mask_state state,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_numeric_column(
         data_type type,
         size_type size,
         device_buffer mask,
-        size_type null_count
+        size_type null_count,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_point_column(
         data_type type,
         size_type size,
-        mask_state state) except +libcudf_exception_handler
+        mask_state state,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_point_column(
         data_type type,
         size_type size,
         device_buffer mask,
-        size_type null_count) except +libcudf_exception_handler
+        size_type null_count,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_timestamp_column(
         data_type type,
         size_type size,
-        mask_state state) except +libcudf_exception_handler
+        mask_state state,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_timestamp_column(
         data_type type,
         size_type size,
         device_buffer mask,
-        size_type null_count) except +libcudf_exception_handler
+        size_type null_count,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_duration_column(
         data_type type,
         size_type size,
-        mask_state state) except +libcudf_exception_handler
+        mask_state state,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_duration_column(
         data_type type,
         size_type size,
         device_buffer mask,
-        size_type null_count) except +libcudf_exception_handler
+        size_type null_count,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_width_column(
         data_type type,
         size_type size,
-        mask_state state) except +libcudf_exception_handler
+        mask_state state,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_fixed_width_column(
         data_type type,
         size_type size,
         device_buffer mask,
-        size_type null_count) except +libcudf_exception_handler
+        size_type null_count,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] make_column_from_scalar(
         const scalar& s,
diff --git a/python/pylibcudf/pylibcudf/libcudf/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/filling.pxd
index d9ae573d23b..ee8e844a2a4 100644
--- a/python/pylibcudf/pylibcudf/libcudf/filling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/filling.pxd
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
@@ -18,34 +19,40 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
         const column_view & input,
         size_type begin,
         size_type end,
-        const scalar & value
+        const scalar & value,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef void fill_in_place(
         const mutable_column_view & destination,
         size_type begin,
         size_type end,
-        const scalar & value
+        const scalar & value,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] repeat(
         const table_view & input,
         const column_view & count,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] repeat(
         const table_view & input,
-        size_type count
+        size_type count,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sequence(
         size_type size,
         const scalar & init,
-        const scalar & step
+        const scalar & step,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] calendrical_month_sequence(
         size_type n,
         const scalar& init,
         size_type months,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/replace.pxd
index bef5a25367b..d95c3dc838f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/replace.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -8,6 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport (
     mutable_column_view,
 )
 from pylibcudf.libcudf.scalar.scalar cimport scalar
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/replace.hpp" namespace "cudf" nogil:
@@ -18,32 +19,40 @@ cdef extern from "cudf/replace.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[column] replace_nulls(
         column_view source_column,
-        column_view replacement_column) except +libcudf_exception_handler
+        column_view replacement_column,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_nulls(
         column_view source_column,
-        scalar replacement) except +libcudf_exception_handler
+        scalar replacement,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_nulls(
         column_view source_column,
-        replace_policy replace_policy) except +libcudf_exception_handler
+        replace_policy replace_policy,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find_and_replace_all(
         column_view source_column,
         column_view values_to_replace,
-        column_view replacement_values) except +libcudf_exception_handler
+        column_view replacement_values,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] clamp(
         column_view source_column,
         scalar lo, scalar lo_replace,
-        scalar hi, scalar hi_replace) except +libcudf_exception_handler
+        scalar hi, scalar hi_replace,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] clamp(
         column_view source_column,
-        scalar lo, scalar hi) except +libcudf_exception_handler
+        scalar lo, scalar hi,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] normalize_nans_and_zeros(
-        column_view source_column) except +libcudf_exception_handler
+        column_view source_column,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef void normalize_nans_and_zeros(
-        mutable_column_view source_column) except +libcudf_exception_handler
+        mutable_column_view source_column,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
index 75402051941..cc889990888 100644
--- a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport data_type, null_order, order, size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
@@ -40,7 +41,8 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         null_order null_order,
         range_window_type preceding,
         range_window_type following,
-        vector[rolling_request]& requests
+        vector[rolling_request]& requests,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rolling_window(
@@ -48,14 +50,16 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         column_view preceding_window,
         column_view following_window,
         size_type min_periods,
-        rolling_aggregation& agg) except +libcudf_exception_handler
+        rolling_aggregation& agg,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rolling_window(
         column_view source,
         size_type preceding_window,
         size_type following_window,
         size_type min_periods,
-        rolling_aggregation& agg) except +libcudf_exception_handler
+        rolling_aggregation& agg,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[column], unique_ptr[column]] make_range_windows(
         const table_view& group_keys,
@@ -64,6 +68,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil:
         null_order null_order,
         range_window_type preceding,
         range_window_type following,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     bool is_valid_rolling_aggregation(
diff --git a/python/pylibcudf/pylibcudf/replace.pxd b/python/pylibcudf/pylibcudf/replace.pxd
index cb9fa8bf960..9b66f772570 100644
--- a/python/pylibcudf/pylibcudf/replace.pxd
+++ b/python/pylibcudf/pylibcudf/replace.pxd
@@ -1,7 +1,8 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from pylibcudf.libcudf.replace cimport replace_policy
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -16,12 +17,17 @@ ctypedef fused ReplacementType:
     object
 
 
-cpdef Column replace_nulls(Column source_column, ReplacementType replacement)
+cpdef Column replace_nulls(
+    Column source_column,
+    ReplacementType replacement,
+    Stream stream = *,
+)
 
 cpdef Column find_and_replace_all(
     Column source_column,
     Column values_to_replace,
     Column replacement_values,
+    Stream stream = *
 )
 
 cpdef Column clamp(
@@ -30,6 +36,11 @@ cpdef Column clamp(
     Scalar hi,
     Scalar lo_replace=*,
     Scalar hi_replace=*,
+    Stream stream = *
 )
 
-cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=*)
+cpdef Column normalize_nans_and_zeros(
+    Column source_column,
+    bool inplace=*,
+    Stream stream = *,
+)
diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi
index eed7a2a6c52..35ef60746a6 100644
--- a/python/pylibcudf/pylibcudf/replace.pyi
+++ b/python/pylibcudf/pylibcudf/replace.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
@@ -10,12 +12,15 @@ class ReplacePolicy(IntEnum):
     FOLLOWING = ...
 
 def replace_nulls(
-    source_column: Column, replacement: Column | Scalar | ReplacePolicy
+    source_column: Column,
+    replacement: Column | Scalar | ReplacePolicy,
+    stream: Stream | None = None,
 ) -> Column: ...
 def find_and_replace_all(
     source_column: Column,
     values_to_replace: Column,
     replacement_values: Column,
+    stream: Stream | None = None,
 ) -> Column: ...
 def clamp(
     source_column: Column,
@@ -23,7 +28,8 @@ def clamp(
     hi: Scalar,
     lo_replace: Scalar | None = None,
     hi_replace: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
 def normalize_nans_and_zeros(
-    source_column: Column, inplace: bool = False
+    source_column: Column, inplace: bool = False, stream: Stream | None = None
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx
index d84f814a5ee..e4089266c4f 100644
--- a/python/pylibcudf/pylibcudf/replace.pyx
+++ b/python/pylibcudf/pylibcudf/replace.pyx
@@ -8,12 +8,14 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.libcudf cimport replace as cpp_replace
 from pylibcudf.libcudf.column.column cimport column
+from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.libcudf.replace import \
     replace_policy as ReplacePolicy  # no-cython-lint
 
 from .column cimport Column
 from .scalar cimport Scalar
+from .utils cimport _get_stream
 
 __all__ = [
     "ReplacePolicy",
@@ -24,7 +26,11 @@ __all__ = [
 ]
 
 
-cpdef Column replace_nulls(Column source_column, ReplacementType replacement):
+cpdef Column replace_nulls(
+    Column source_column,
+    ReplacementType replacement,
+    Stream stream=None,
+):
     """Replace nulls in source_column.
 
     The values used to replace nulls depends on the type of replacement:
@@ -47,6 +53,8 @@ cpdef Column replace_nulls(Column source_column, ReplacementType replacement):
         If a Column, the values to use as replacements. If a Scalar, the value
         to use as a replacement. If a replace_policy, the policy to use to
         determine the replacement value.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -56,6 +64,9 @@ cpdef Column replace_nulls(Column source_column, ReplacementType replacement):
     """
     cdef unique_ptr[column] c_result
     cdef replace_policy policy
+
+    stream = _get_stream(stream)
+
     # Due to https://github.com/cython/cython/issues/5984, if this function is
     # called as a Python function (i.e. without typed inputs, which is always
     # true in pure Python files), the type of `replacement` will be `object`
@@ -64,8 +75,12 @@ cpdef Column replace_nulls(Column source_column, ReplacementType replacement):
         if isinstance(replacement, ReplacePolicy):
             policy = replacement
             with nogil:
-                c_result = cpp_replace.replace_nulls(source_column.view(), policy)
-            return Column.from_libcudf(move(c_result))
+                c_result = cpp_replace.replace_nulls(
+                    source_column.view(),
+                    policy,
+                    stream.view(),
+                )
+            return Column.from_libcudf(move(c_result), stream)
         else:
             raise TypeError("replacement must be a Column, Scalar, or replace_policy")
 
@@ -73,23 +88,31 @@ cpdef Column replace_nulls(Column source_column, ReplacementType replacement):
         if ReplacementType is Column:
             c_result = cpp_replace.replace_nulls(
                 source_column.view(),
-                replacement.view()
+                replacement.view(),
+                stream.view()
             )
         elif ReplacementType is Scalar:
             c_result = cpp_replace.replace_nulls(
-                source_column.view(), dereference(replacement.c_obj)
+                source_column.view(),
+                dereference(replacement.c_obj),
+                stream.view(),
             )
         elif ReplacementType is replace_policy:
-            c_result = cpp_replace.replace_nulls(source_column.view(), replacement)
+            c_result = cpp_replace.replace_nulls(
+                source_column.view(),
+                replacement,
+                stream.view(),
+            )
         else:
             assert False, "Internal error. Please contact pylibcudf developers"
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column find_and_replace_all(
     Column source_column,
     Column values_to_replace,
     Column replacement_values,
+    Stream stream=None
 ):
     """Replace all occurrences of values_to_replace with replacement_values.
 
@@ -103,6 +126,8 @@ cpdef Column find_and_replace_all(
         The column containing values to replace.
     replacement_values : Column
         The column containing replacement values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -111,13 +136,17 @@ cpdef Column find_and_replace_all(
         replaced by replacement_values.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_replace.find_and_replace_all(
             source_column.view(),
             values_to_replace.view(),
             replacement_values.view(),
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column clamp(
@@ -126,6 +155,7 @@ cpdef Column clamp(
     Scalar hi,
     Scalar lo_replace=None,
     Scalar hi_replace=None,
+    Stream stream=None
 ):
     """Clamp the values in source_column to the range [lo, hi].
 
@@ -145,6 +175,8 @@ cpdef Column clamp(
     hi_replace : Scalar, optional
         The value to use for elements that are greater than hi. If not
         specified, the value of hi is used.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -155,25 +187,34 @@ cpdef Column clamp(
         raise ValueError("lo_replace and hi_replace must be specified together")
 
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         if lo_replace is None:
             c_result = cpp_replace.clamp(
                 source_column.view(),
                 dereference(lo.c_obj),
                 dereference(hi.c_obj),
+                stream.view(),
             )
         else:
             c_result = cpp_replace.clamp(
                 source_column.view(),
                 dereference(lo.c_obj),
-                dereference(hi.c_obj),
                 dereference(lo_replace.c_obj),
+                dereference(hi.c_obj),
                 dereference(hi_replace.c_obj),
+                stream.view(),
             )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=False):
+cpdef Column normalize_nans_and_zeros(
+    Column source_column,
+    bool inplace=False,
+    Stream stream=None,
+):
     """Normalize NaNs and zeros in source_column.
 
     For details, see :cpp:func:`normalize_nans_and_zeros`.
@@ -185,6 +226,8 @@ cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=False):
     inplace : bool, optional
         If True, normalize source_column in place. If False, return a new
         column with the normalized values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -192,13 +235,22 @@ cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=False):
         A copy of source_column with NaNs and zeros normalized.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         if inplace:
-            cpp_replace.normalize_nans_and_zeros(source_column.mutable_view())
+            cpp_replace.normalize_nans_and_zeros(
+                source_column.mutable_view(),
+                stream.view(),
+            )
         else:
-            c_result = cpp_replace.normalize_nans_and_zeros(source_column.view())
+            c_result = cpp_replace.normalize_nans_and_zeros(
+                source_column.view(),
+                stream.view(),
+            )
 
     if not inplace:
-        return Column.from_libcudf(move(c_result))
+        return Column.from_libcudf(move(c_result), stream)
 
 ReplacePolicy.__str__ = ReplacePolicy.__repr__
diff --git a/python/pylibcudf/pylibcudf/rolling.pxd b/python/pylibcudf/pylibcudf/rolling.pxd
index 305cdd97238..43242410bda 100644
--- a/python/pylibcudf/pylibcudf/rolling.pxd
+++ b/python/pylibcudf/pylibcudf/rolling.pxd
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.rolling cimport (
     bounded_closed, bounded_open, current_row, rolling_request, unbounded
 )
 from pylibcudf.libcudf.types cimport null_order, order, size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .aggregation cimport Aggregation
 from .column cimport Column
@@ -60,6 +61,7 @@ cpdef Table grouped_range_rolling_window(
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
     list requests,
+    Stream stream = *,
 )
 
 cpdef Column rolling_window(
@@ -68,6 +70,17 @@ cpdef Column rolling_window(
     WindowType following_window,
     size_type min_periods,
     Aggregation agg,
+    Stream stream = *,
 )
 
 cpdef bool is_valid_rolling_aggregation(DataType source, Aggregation agg)
+
+cpdef tuple make_range_windows(
+    Table group_keys,
+    Column orderby,
+    order order,
+    null_order null_order,
+    PrecedingRangeWindowType preceding,
+    FollowingRangeWindowType following,
+    Stream stream = *,
+)
diff --git a/python/pylibcudf/pylibcudf/rolling.pyi b/python/pylibcudf/pylibcudf/rolling.pyi
index 9431e40e156..334792f571a 100644
--- a/python/pylibcudf/pylibcudf/rolling.pyi
+++ b/python/pylibcudf/pylibcudf/rolling.pyi
@@ -1,5 +1,7 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
@@ -32,6 +34,7 @@ def grouped_range_rolling_window(
     preceding: RangeWindowType,
     following: RangeWindowType,
     requests: list[RollingRequest],
+    stream: Stream | None = None,
 ) -> Table: ...
 def rolling_window[WindowType: (Column, int)](
     source: Column,
@@ -39,6 +42,7 @@ def rolling_window[WindowType: (Column, int)](
     following_window: WindowType,
     min_periods: int,
     agg: Aggregation,
+    stream: Stream | None = None,
 ) -> Column: ...
 def is_valid_rolling_aggregation(
     source: DataType, agg: Aggregation
@@ -50,4 +54,5 @@ def make_range_windows(
     null_order: NullOrder,
     preceding: RangeWindowType,
     following: RangeWindowType,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
diff --git a/python/pylibcudf/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx
index 9f4de0491d8..8b9c46994a5 100644
--- a/python/pylibcudf/pylibcudf/rolling.pyx
+++ b/python/pylibcudf/pylibcudf/rolling.pyx
@@ -11,11 +11,13 @@ from pylibcudf.libcudf.aggregation cimport rolling_aggregation
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .aggregation cimport Aggregation
 from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
+from .utils cimport _get_stream
 
 
 __all__ = [
@@ -121,6 +123,7 @@ cpdef Table grouped_range_rolling_window(
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
     list requests,
+    Stream stream=None,
 ):
     """
     Perform grouping-aware range-based rolling window aggregations on some columns.
@@ -154,6 +157,8 @@ cpdef Table grouped_range_rolling_window(
     for req in requests:
         crequests.push_back(move((<RollingRequest?>req).view()))
 
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_rolling.grouped_range_rolling_window(
             group_keys.view(),
@@ -162,9 +167,10 @@ cpdef Table grouped_range_rolling_window(
             null_order,
             dereference(preceding.c_obj.get()),
             dereference(following.c_obj.get()),
-            crequests
+            crequests,
+            stream.view()
         )
-    return Table.from_libcudf(move(result))
+    return Table.from_libcudf(move(result), stream)
 
 
 cpdef Column rolling_window(
@@ -173,6 +179,7 @@ cpdef Column rolling_window(
     WindowType following_window,
     size_type min_periods,
     Aggregation agg,
+    Stream stream=None,
 ):
     """Perform a rolling window operation on a column
 
@@ -202,6 +209,9 @@ cpdef Column rolling_window(
     # TODO: Consider making all the conversion functions nogil functions that
     # reclaim the GIL internally for just the necessary scope like column.view()
     cdef const rolling_aggregation *c_agg = agg.view_underlying_as_rolling()
+
+    stream = _get_stream(stream)
+
     if WindowType is Column:
         with nogil:
             result = cpp_rolling.rolling_window(
@@ -210,6 +220,7 @@ cpdef Column rolling_window(
                 following_window.view(),
                 min_periods,
                 dereference(c_agg),
+                stream.view()
             )
     else:
         with nogil:
@@ -219,9 +230,10 @@ cpdef Column rolling_window(
                 following_window,
                 min_periods,
                 dereference(c_agg),
+                stream.view()
             )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef bool is_valid_rolling_aggregation(DataType source, Aggregation agg):
@@ -249,6 +261,7 @@ cpdef tuple make_range_windows(
     null_order null_order,
     PrecedingRangeWindowType preceding,
     FollowingRangeWindowType following,
+    Stream stream=None,
 ):
     """
     Constructs preceding and following columns given window range specifications.
@@ -277,6 +290,8 @@ cpdef tuple make_range_windows(
     """
     cdef pair[unique_ptr[column], unique_ptr[column]] result
 
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_rolling.make_range_windows(
             group_keys.view(),
@@ -285,8 +300,9 @@ cpdef tuple make_range_windows(
             null_order,
             dereference(preceding.c_obj.get()),
             dereference(following.c_obj.get()),
+            stream.view()
         )
     return (
-        Column.from_libcudf(move(result.first)),
-        Column.from_libcudf(move(result.second))
+        Column.from_libcudf(move(result.first), stream),
+        Column.from_libcudf(move(result.second), stream)
     )

From 01c16280281f922079d1113f504f50b82bc503e5 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 6 Aug 2025 15:47:12 -0700
Subject: [PATCH 076/366] Fix strftime with non-exact %a, %A, %b, %B (#19570)

closes https://github.com/rapidsai/cudf/issues/19568

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19570
---
 python/cudf/cudf/core/column/datetime.py | 108 +++++++++++++----------
 python/cudf/cudf/tests/test_datetime.py  |   5 ++
 2 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 08a0c20506e..24a5968fa5a 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -5,6 +5,7 @@
 import calendar
 import functools
 import locale
+import re
 import warnings
 from locale import nl_langinfo
 from typing import TYPE_CHECKING, Literal
@@ -23,10 +24,9 @@
     get_tz_data,
 )
 from cudf.core.buffer import Buffer, acquire_spill_lock
-from cudf.core.column.column import ColumnBase, as_column, column_empty
+from cudf.core.column.column import ColumnBase, as_column
 from cudf.core.column.temporal_base import TemporalBaseColumn
 from cudf.utils.dtypes import (
-    CUDF_STRING_DTYPE,
     _get_base_dtype,
     cudf_dtype_from_pa_type,
     cudf_dtype_to_pa_type,
@@ -60,49 +60,6 @@
     "%a",
 }
 
-_DATETIME_NAMES = [
-    nl_langinfo(locale.AM_STR),  # type: ignore
-    nl_langinfo(locale.PM_STR),  # type: ignore
-    nl_langinfo(locale.DAY_1),
-    nl_langinfo(locale.DAY_2),
-    nl_langinfo(locale.DAY_3),
-    nl_langinfo(locale.DAY_4),
-    nl_langinfo(locale.DAY_5),
-    nl_langinfo(locale.DAY_6),
-    nl_langinfo(locale.DAY_7),
-    nl_langinfo(locale.ABDAY_1),
-    nl_langinfo(locale.ABDAY_2),
-    nl_langinfo(locale.ABDAY_3),
-    nl_langinfo(locale.ABDAY_4),
-    nl_langinfo(locale.ABDAY_5),
-    nl_langinfo(locale.ABDAY_6),
-    nl_langinfo(locale.ABDAY_7),
-    nl_langinfo(locale.MON_1),
-    nl_langinfo(locale.MON_2),
-    nl_langinfo(locale.MON_3),
-    nl_langinfo(locale.MON_4),
-    nl_langinfo(locale.MON_5),
-    nl_langinfo(locale.MON_6),
-    nl_langinfo(locale.MON_7),
-    nl_langinfo(locale.MON_8),
-    nl_langinfo(locale.MON_9),
-    nl_langinfo(locale.MON_10),
-    nl_langinfo(locale.MON_11),
-    nl_langinfo(locale.MON_12),
-    nl_langinfo(locale.ABMON_1),
-    nl_langinfo(locale.ABMON_2),
-    nl_langinfo(locale.ABMON_3),
-    nl_langinfo(locale.ABMON_4),
-    nl_langinfo(locale.ABMON_5),
-    nl_langinfo(locale.ABMON_6),
-    nl_langinfo(locale.ABMON_7),
-    nl_langinfo(locale.ABMON_8),
-    nl_langinfo(locale.ABMON_9),
-    nl_langinfo(locale.ABMON_10),
-    nl_langinfo(locale.ABMON_11),
-    nl_langinfo(locale.ABMON_12),
-]
-
 
 def _resolve_binop_resolution(
     left_unit: Literal["s", "ms", "us", "ns"],
@@ -459,19 +416,72 @@ def as_timedelta_column(self, dtype: np.dtype) -> None:  # type: ignore[override
             f"cannot astype a datetimelike from {self.dtype} to {dtype}"
         )
 
+    @functools.cached_property
+    def _strftime_names(self) -> plc.Column:
+        """Strftime names for %A, %a, %B, %b"""
+        return plc.Column.from_iterable_of_py(
+            [
+                nl_langinfo(loc)
+                for loc in (
+                    locale.AM_STR,
+                    locale.PM_STR,
+                    locale.DAY_1,
+                    locale.DAY_2,
+                    locale.DAY_3,
+                    locale.DAY_4,
+                    locale.DAY_5,
+                    locale.DAY_6,
+                    locale.DAY_7,
+                    locale.ABDAY_1,
+                    locale.ABDAY_2,
+                    locale.ABDAY_3,
+                    locale.ABDAY_4,
+                    locale.ABDAY_5,
+                    locale.ABDAY_6,
+                    locale.ABDAY_7,
+                    locale.MON_1,
+                    locale.MON_2,
+                    locale.MON_3,
+                    locale.MON_4,
+                    locale.MON_5,
+                    locale.MON_6,
+                    locale.MON_7,
+                    locale.MON_8,
+                    locale.MON_9,
+                    locale.MON_10,
+                    locale.MON_11,
+                    locale.MON_12,
+                    locale.ABMON_1,
+                    locale.ABMON_2,
+                    locale.ABMON_3,
+                    locale.ABMON_4,
+                    locale.ABMON_5,
+                    locale.ABMON_6,
+                    locale.ABMON_7,
+                    locale.ABMON_8,
+                    locale.ABMON_9,
+                    locale.ABMON_10,
+                    locale.ABMON_11,
+                    locale.ABMON_12,
+                )
+            ]
+        )
+
     def strftime(self, format: str) -> StringColumn:
         if len(self) == 0:
             return super().strftime(format)
-        if format in _DATETIME_SPECIAL_FORMATS:
-            names = as_column(_DATETIME_NAMES)
+        if re.search("%[aAbB]", format):
+            names = self._strftime_names
         else:
-            names = column_empty(0, dtype=CUDF_STRING_DTYPE)
+            names = plc.Column.from_scalar(
+                plc.Scalar.from_py(None, plc.DataType(plc.TypeId.STRING)), 0
+            )
         with acquire_spill_lock():
             return type(self).from_pylibcudf(  # type: ignore[return-value]
                 plc.strings.convert.convert_datetime.from_timestamps(
                     self.to_pylibcudf(mode="read"),
                     format,
-                    names.to_pylibcudf(mode="read"),
+                    names,
                 )
             )
 
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index fcc03e43944..94fca7a2a6b 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -1195,6 +1195,11 @@ def test_datetime_fillna(data, dtype, fill_value):
         "%B",
         "%a",
         "%A",
+        "%U_",
+        "_%b",
+        "%B*",
+        "%a ",
+        "%A1",
     ],
 )
 def test_datetime_strftime(data, dtype, date_format):

From 58fc6dccba1aed18ffa3bdcdca303827bb61831f Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 6 Aug 2025 17:39:00 -0700
Subject: [PATCH 077/366] Fix uninitialized variable and misaligned write in
 parquet generic decoder (#19601)

Contributes to #19469

This PR fixes two minor bugs when a page is skipped for decoding in the parquet generic decoder. The bugs were discovered while working on #19469. The first fix initializes s->nesting_info = nullptr to avoid invalid null back copy at return. The second fix writes list offsets up to `< max_depth` instead of `<= max_depth`

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - David Wendt (https://github.com/davidwendt)
  - Paul Mattione (https://github.com/pmattione-nvidia)

URL: https://github.com/rapidsai/cudf/pull/19601
---
 cpp/src/io/parquet/decode_fixed.cu | 2 ++
 cpp/src/io/parquet/page_decode.cuh | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu
index c475a06785c..077f9f80715 100644
--- a/cpp/src/io/parquet/decode_fixed.cu
+++ b/cpp/src/io/parquet/decode_fixed.cu
@@ -1054,6 +1054,8 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
     if (not page_mask[page_idx]) {
       pp->num_nulls  = pp->num_rows;
       pp->num_valids = 0;
+      // Set s->nesting info = nullptr to bypass `null_count_back_copier` at return
+      s->nesting_info = nullptr;
       return;
     }
   }
diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh
index 0b896d7d548..6c987230709 100644
--- a/cpp/src/io/parquet/page_decode.cuh
+++ b/cpp/src/io/parquet/page_decode.cuh
@@ -727,7 +727,7 @@ inline __device__ void get_nesting_bounds(int& start_depth,
  * This function iterates through the nesting levels of a column and updates the offsets for a list
  * column. The offset for the current nesting level equals the length of the next nesting level
  *
- * @tparam decode_block_size The size of the block used for decoding.
+ * @tparam block_size The size of the block used for decoding.
  * @param[in,out] state Pointer to page state containing column and nesting information.
  */
 template <int block_size>
@@ -738,7 +738,7 @@ static __device__ void update_list_offsets_for_pruned_pages(page_state_s* state)
   auto const tid               = cg::this_thread_block().thread_rank();
 
   // Iterate by depth and store offset(s) to the list location(s)
-  for (int depth = tid; depth <= max_depth; depth += block_size) {
+  for (int depth = tid; depth < max_depth; depth += block_size) {
     auto& nesting_info = state->nesting_info[depth];
     // If we're -not- at a leaf column and we're within nesting/row bounds and we have a valid
     // data_out pointer, it implies this is a list column, so emit an offset for the current nesting

From f6f41c42fe47d86fff03219f1b33aab5e9adbde9 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 6 Aug 2025 18:15:26 -0700
Subject: [PATCH 078/366] Move test_joining to new cudf classic test directory
 structure (#19501)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19501
---
 python/cudf/cudf/tests/reshape/test_join.py   |  670 +++++++++
 .../test_merge.py}                            | 1263 ++---------------
 2 files changed, 788 insertions(+), 1145 deletions(-)
 create mode 100644 python/cudf/cudf/tests/reshape/test_join.py
 rename python/cudf/cudf/tests/{test_joining.py => reshape/test_merge.py} (53%)

diff --git a/python/cudf/cudf/tests/reshape/test_join.py b/python/cudf/cudf/tests/reshape/test_join.py
new file mode 100644
index 00000000000..fa80cd299a4
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_join.py
@@ -0,0 +1,670 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.fixture(
+    params=(
+        "left",
+        "inner",
+        "outer",
+        "right",
+        "leftanti",
+        "leftsemi",
+        "cross",
+    )
+)
+def how(request):
+    return request.param
+
+
+def assert_join_results_equal(expect, got, how, **kwargs):
+    if how == "right":
+        got = got[expect.columns]
+
+    if isinstance(expect, (pd.Series, cudf.Series)):
+        return assert_eq(
+            expect.sort_values().reset_index(drop=True),
+            got.sort_values().reset_index(drop=True),
+            **kwargs,
+        )
+    elif isinstance(expect, (pd.DataFrame, cudf.DataFrame)):
+        if not len(
+            expect.columns
+        ):  # can't sort_values() on a df without columns
+            return assert_eq(expect, got, **kwargs)
+
+        assert_eq(
+            expect.sort_values(expect.columns.to_list()).reset_index(
+                drop=True
+            ),
+            got.sort_values(got.columns.to_list()).reset_index(drop=True),
+            **kwargs,
+        )
+    elif isinstance(expect, (pd.Index, cudf.Index)):
+        return assert_eq(expect.sort_values(), got.sort_values(), **kwargs)
+    else:
+        raise ValueError(f"Not a join result: {type(expect).__name__}")
+
+
+@pytest.mark.parametrize(
+    "aa, bb",
+    [
+        [[0, 0, 4, 5, 5], [0, 0, 2, 3, 5]],
+        [[0, 0, 1, 2, 3], [0, 1, 2, 2, 3]],
+        [range(5), range(5, 10)],
+        [[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1.0]],
+    ],
+)
+def test_dataframe_join_how(aa, bb, how):
+    df = cudf.DataFrame(
+        {
+            "a": aa,
+            "b": bb,
+        }
+    )
+
+    def work_pandas(df, how):
+        df1 = df.set_index("a")
+        df2 = df.set_index("b")
+        if how == "leftanti":
+            joined = df1[~df1.index.isin(df2.index)][df1.columns]
+        elif how == "leftsemi":
+            joined = df1[df1.index.isin(df2.index)][df1.columns]
+        else:
+            joined = df1.join(df2, how=how, sort=True)
+        return joined
+
+    def work_gdf(df):
+        df1 = df.set_index("a")
+        df2 = df.set_index("b")
+        joined = df1.join(df2, how=how, sort=True)
+        return joined
+
+    expect = work_pandas(df.to_pandas(), how)
+    got = work_gdf(df)
+    expecto = expect.copy()
+    goto = got.copy()
+
+    expect = expect.astype(np.float64).fillna(np.nan)[expect.columns]
+    got = got.astype(np.float64).fillna(np.nan)[expect.columns]
+
+    assert got.index.name is None
+
+    assert list(expect.columns) == list(got.columns)
+    if how in {"left", "inner", "right", "leftanti", "leftsemi"}:
+        assert_eq(sorted(expect.index.values), sorted(got.index.values))
+        if how != "outer":
+            # Newly introduced ambiguous ValueError thrown when
+            # an index and column have the same name. Rename the
+            # index so sorts work.
+            # TODO: What is the less hacky way?
+            expect.index.name = "bob"
+            got.index.name = "mary"
+            assert_join_results_equal(expect, got, how=how)
+        # if(how=='right'):
+        #     _sorted_check_series(expect['a'], expect['b'],
+        #                          got['a'], got['b'])
+        # else:
+        #     _sorted_check_series(expect['b'], expect['a'], got['b'],
+        #                          got['a'])
+        else:
+            magic = 0xDEADBEAF
+            for c in expecto.columns:
+                expect = expecto[c].fillna(-1)
+                got = goto[c].fillna(-1)
+
+                direct_equal = np.all(expect.values == got.to_numpy())
+                nanfilled_equal = np.all(
+                    expect.fillna(magic).values == got.fillna(magic).to_numpy()
+                )
+                msg = "direct_equal={}, nanfilled_equal={}".format(
+                    direct_equal, nanfilled_equal
+                )
+                assert direct_equal or nanfilled_equal, msg
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="bug in older version of pandas",
+)
+def test_dataframe_join_suffix():
+    rng = np.random.default_rng(seed=0)
+
+    df = cudf.DataFrame(rng.integers(0, 5, (5, 3)), columns=list("abc"))
+
+    left = df.set_index("a")
+    right = df.set_index("c")
+    msg = (
+        "there are overlapping columns but lsuffix and rsuffix are not defined"
+    )
+    with pytest.raises(ValueError, match=msg):
+        left.join(right)
+
+    got = left.join(right, lsuffix="_left", rsuffix="_right", sort=True)
+    expect = left.to_pandas().join(
+        right.to_pandas(),
+        lsuffix="_left",
+        rsuffix="_right",
+        sort=True,
+    )
+    # TODO: Retain result index name
+    expect.index.name = None
+    assert_join_results_equal(expect, got, how="inner")
+
+
+def test_dataframe_join_cats():
+    lhs = cudf.DataFrame()
+    lhs["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
+    lhs["b"] = bb = np.arange(len(lhs))
+    lhs = lhs.set_index("a")
+
+    rhs = cudf.DataFrame()
+    rhs["a"] = pd.Categorical(list("abcac"), categories=list("abc"))
+    rhs["c"] = cc = np.arange(len(rhs))
+    rhs = rhs.set_index("a")
+
+    got = lhs.join(rhs)
+    expect = lhs.to_pandas().join(rhs.to_pandas())
+
+    # Note: pandas make an object Index after joining
+    assert_join_results_equal(expect, got, how="inner")
+
+    # Just do some rough checking here.
+    assert list(got.columns) == ["b", "c"]
+    assert len(got) > 0
+    assert set(got.index.to_pandas()) & set("abc")
+    assert set(got["b"].to_numpy()) & set(bb)
+    assert set(got["c"].to_numpy()) & set(cc)
+
+
+def test_dataframe_join_combine_cats():
+    lhs = cudf.DataFrame({"join_index": ["a", "b", "c"], "data_x": [1, 2, 3]})
+    rhs = cudf.DataFrame({"join_index": ["b", "c", "d"], "data_y": [2, 3, 4]})
+
+    lhs["join_index"] = lhs["join_index"].astype("category")
+    rhs["join_index"] = rhs["join_index"].astype("category")
+
+    lhs = lhs.set_index("join_index")
+    rhs = rhs.set_index("join_index")
+
+    lhs_pd = lhs.to_pandas()
+    rhs_pd = rhs.to_pandas()
+
+    lhs_pd.index = lhs_pd.index.astype("object")
+    rhs_pd.index = rhs_pd.index.astype("object")
+
+    expect = lhs_pd.join(rhs_pd, how="outer")
+    expect.index = expect.index.astype("category")
+    got = lhs.join(rhs, how="outer")
+
+    assert_eq(expect.index.sort_values(), got.index.sort_values())
+
+
+def test_dataframe_join_mismatch_cats(how):
+    if how in {"leftanti", "leftsemi"}:
+        pytest.skip(f"{how} not implemented in pandas")
+
+    pdf1 = pd.DataFrame(
+        {
+            "join_col": ["a", "b", "c", "d", "e"],
+            "data_col_left": [10, 20, 30, 40, 50],
+        }
+    )
+    pdf2 = pd.DataFrame(
+        {"join_col": ["c", "e", "f"], "data_col_right": [6, 7, 8]}
+    )
+
+    pdf1["join_col"] = pdf1["join_col"].astype("category")
+    pdf2["join_col"] = pdf2["join_col"].astype("category")
+
+    gdf1 = cudf.from_pandas(pdf1)
+    gdf2 = cudf.from_pandas(pdf2)
+
+    gdf1 = gdf1.set_index("join_col")
+    gdf2 = gdf2.set_index("join_col")
+
+    pdf1 = pdf1.set_index("join_col")
+    pdf2 = pdf2.set_index("join_col")
+    join_gdf = gdf1.join(gdf2, how=how, sort=True)
+    join_pdf = pdf1.join(pdf2, how=how)
+
+    got = join_gdf.fillna(-1).to_pandas()
+    expect = join_pdf.fillna(-1)  # note: cudf join doesn't mask NA
+
+    # We yield a categorical here whereas pandas gives Object.
+    expect.index = expect.index.astype("category")
+    # cudf creates the columns in different order than pandas for right join
+    if how == "right":
+        got = got[["data_col_left", "data_col_right"]]
+
+    expect.data_col_right = expect.data_col_right.astype(np.int64)
+    expect.data_col_left = expect.data_col_left.astype(np.int64)
+
+    assert_join_results_equal(expect, got, how=how, check_categorical=False)
+
+
+def test_join_datetimes_index(datetime_types_as_str):
+    datetimes = pd.Series(pd.date_range("20010101", "20010102", freq="12h"))
+    pdf_lhs = pd.DataFrame(index=[1, 0, 1, 2, 0, 0, 1])
+    pdf_rhs = pd.DataFrame({"d": datetimes})
+    gdf_lhs = cudf.from_pandas(pdf_lhs)
+    gdf_rhs = cudf.from_pandas(pdf_rhs)
+
+    gdf_rhs["d"] = gdf_rhs["d"].astype(datetime_types_as_str)
+
+    pdf = pdf_lhs.join(pdf_rhs, sort=True)
+    gdf = gdf_lhs.join(gdf_rhs, sort=True)
+
+    assert gdf["d"].dtype == cudf.dtype(datetime_types_as_str)
+
+    assert_join_results_equal(pdf, gdf, how="inner", check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "column_a",
+    [
+        (
+            pd.Series([None, 1, 2, 3, 4, 5, 6, 7], dtype=np.float64),
+            pd.Series([8, 9, 10, 11, 12, None, 14, 15], dtype=np.float64),
+        )
+    ],
+)
+@pytest.mark.parametrize(
+    "column_b",
+    [
+        (
+            pd.Series([0, 1, 0, None, 1, 0, 0, 0], dtype=np.float64),
+            pd.Series([None, 1, 2, 1, 2, 2, 0, 0], dtype=np.float64),
+        )
+    ],
+)
+@pytest.mark.parametrize(
+    "column_c",
+    [
+        (
+            pd.Series(["dog", "cat", "fish", "bug"] * 2),
+            pd.Series(["bird", "cat", "mouse", "snake"] * 2),
+        ),
+        (
+            pd.Series(["dog", "cat", "fish", "bug"] * 2).astype("category"),
+            pd.Series(["bird", "cat", "mouse", "snake"] * 2).astype(
+                "category"
+            ),
+        ),
+    ],
+)
+def test_join_multi(how, column_a, column_b, column_c):
+    if how in {"leftanti", "leftsemi"}:
+        pytest.skip(f"{how} not implemented in pandas")
+
+    index = ["b", "c"]
+    df1 = pd.DataFrame()
+    df1["a1"] = column_a[0]
+    df1["b"] = column_b[0]
+    df1["c"] = column_c[0]
+    df1 = df1.set_index(index)
+    gdf1 = cudf.from_pandas(df1)
+
+    df2 = pd.DataFrame()
+    df2["a2"] = column_a[1]
+    df2["b"] = column_b[1]
+    df2["c"] = column_c[1]
+    df2 = df2.set_index(index)
+    gdf2 = cudf.from_pandas(df2)
+
+    gdf_result = gdf1.join(gdf2, how=how, sort=True)
+    pdf_result = df1.join(df2, how=how, sort=True)
+
+    # Make sure columns are in the same order
+    columns = pdf_result.columns.values
+    gdf_result = gdf_result[columns]
+    pdf_result = pdf_result[columns]
+
+    assert_join_results_equal(pdf_result, gdf_result, how="inner")
+
+
+@pytest.mark.parametrize(
+    ("lhs", "rhs"),
+    [
+        (["a", "b"], ["a"]),
+        (["a"], ["a", "b"]),
+        (["a", "b"], ["b"]),
+        (["b"], ["a", "b"]),
+        (["a"], ["a"]),
+    ],
+)
+@pytest.mark.parametrize("level", ["a", "b", 0, 1])
+def test_index_join(lhs, rhs, how, level):
+    if how in {"leftanti", "leftsemi", "cross"}:
+        pytest.skip(f"{how} not implemented in pandas")
+
+    l_pdf = pd.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
+    r_pdf = pd.DataFrame({"a": [1, 5, 4, 0], "b": [3, 9, 8, 4]})
+    l_df = cudf.from_pandas(l_pdf)
+    r_df = cudf.from_pandas(r_pdf)
+    p_lhs = l_pdf.set_index(lhs).index
+    p_rhs = r_pdf.set_index(rhs).index
+    g_lhs = l_df.set_index(lhs).index
+    g_rhs = r_df.set_index(rhs).index
+
+    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
+    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
+
+    assert_join_results_equal(expected, got, how=how)
+
+
+def test_index_join_corner_cases():
+    l_pdf = pd.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
+    r_pdf = pd.DataFrame(
+        {"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]}
+    )
+    l_df = cudf.from_pandas(l_pdf)
+    r_df = cudf.from_pandas(r_pdf)
+
+    # Join when column name doesn't match with level
+    lhs = ["a", "b"]
+    # level and rhs don't match
+    rhs = ["c"]
+    level = "b"
+    how = "outer"
+    p_lhs = l_pdf.set_index(lhs).index
+    p_rhs = r_pdf.set_index(rhs).index
+    g_lhs = l_df.set_index(lhs).index
+    g_rhs = r_df.set_index(rhs).index
+    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
+    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
+
+    assert_join_results_equal(expected, got, how=how)
+
+    # sort is supported only in case of two non-MultiIndex join
+    # Join when column name doesn't match with level
+    lhs = ["a"]
+    # level and rhs don't match
+    rhs = ["a"]
+    level = "b"
+    how = "left"
+    p_lhs = l_pdf.set_index(lhs).index
+    p_rhs = r_pdf.set_index(rhs).index
+    g_lhs = l_df.set_index(lhs).index
+    g_rhs = r_df.set_index(rhs).index
+    expected = p_lhs.join(p_rhs, how=how, sort=True)
+    got = g_lhs.join(g_rhs, how=how, sort=True)
+
+    assert_join_results_equal(expected, got, how=how)
+
+    # Pandas Index.join on categorical column returns generic column
+    # but cudf will be returning a categorical column itself.
+    lhs = ["a", "b"]
+    rhs = ["a"]
+    level = "a"
+    how = "inner"
+    l_df["a"] = l_df["a"].astype("category")
+    r_df["a"] = r_df["a"].astype("category")
+    p_lhs = l_pdf.set_index(lhs).index
+    p_rhs = r_pdf.set_index(rhs).index
+    g_lhs = l_df.set_index(lhs).index
+    g_rhs = r_df.set_index(rhs).index
+    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
+    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
+
+    got["a"] = got["a"].astype(expected["a"].dtype)
+
+    assert_join_results_equal(expected, got, how=how)
+
+
+def test_index_join_exception_cases():
+    l_df = cudf.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
+    r_df = cudf.DataFrame(
+        {"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]}
+    )
+
+    # Join between two MultiIndex
+    lhs = ["a", "b"]
+    rhs = ["a", "c"]
+    level = "a"
+    how = "outer"
+    g_lhs = l_df.set_index(lhs).index
+    g_rhs = r_df.set_index(rhs).index
+
+    with pytest.raises(TypeError):
+        g_lhs.join(g_rhs, level=level, how=how)
+
+    # Improper level value, level should be an int or scalar value
+    level = ["a"]
+    rhs = ["a"]
+    g_lhs = l_df.set_index(lhs).index
+    g_rhs = r_df.set_index(rhs).index
+    with pytest.raises(ValueError):
+        g_lhs.join(g_rhs, level=level, how=how)
+
+
+def test_typecast_on_join_indexes():
+    join_data_l = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
+    join_data_r = cudf.Series([1, 2, 3, 4, 6], dtype="int32")
+    other_data = ["a", "b", "c", "d", "e"]
+
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
+
+    gdf_l = gdf_l.set_index("join_col")
+    gdf_r = gdf_r.set_index("join_col")
+
+    exp_join_data = [1, 2, 3, 4]
+    exp_other_data = ["a", "b", "c", "d"]
+
+    expect = cudf.DataFrame(
+        {
+            "join_col": exp_join_data,
+            "B_x": exp_other_data,
+            "B_y": exp_other_data,
+        }
+    )
+    expect = expect.set_index("join_col")
+
+    got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
+
+    assert_join_results_equal(expect, got, how="inner")
+
+
+def test_typecast_on_join_multiindices():
+    join_data_l_0 = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
+    join_data_l_1 = cudf.Series([2, 3, 4.1, 5.9, 6], dtype="float32")
+    join_data_l_2 = cudf.Series([7, 8, 9, 0, 1], dtype="float32")
+
+    join_data_r_0 = cudf.Series([1, 2, 3, 4, 5], dtype="int32")
+    join_data_r_1 = cudf.Series([2, 3, 4, 5, 6], dtype="int32")
+    join_data_r_2 = cudf.Series([7, 8, 9, 0, 0], dtype="float64")
+
+    other_data = ["a", "b", "c", "d", "e"]
+
+    gdf_l = cudf.DataFrame(
+        {
+            "join_col_0": join_data_l_0,
+            "join_col_1": join_data_l_1,
+            "join_col_2": join_data_l_2,
+            "B": other_data,
+        }
+    )
+    gdf_r = cudf.DataFrame(
+        {
+            "join_col_0": join_data_r_0,
+            "join_col_1": join_data_r_1,
+            "join_col_2": join_data_r_2,
+            "B": other_data,
+        }
+    )
+
+    gdf_l = gdf_l.set_index(["join_col_0", "join_col_1", "join_col_2"])
+    gdf_r = gdf_r.set_index(["join_col_0", "join_col_1", "join_col_2"])
+
+    exp_join_data_0 = cudf.Series([1, 2], dtype="int32")
+    exp_join_data_1 = cudf.Series([2, 3], dtype="float64")
+    exp_join_data_2 = cudf.Series([7, 8], dtype="float64")
+    exp_other_data = cudf.Series(["a", "b"])
+
+    expect = cudf.DataFrame(
+        {
+            "join_col_0": exp_join_data_0,
+            "join_col_1": exp_join_data_1,
+            "join_col_2": exp_join_data_2,
+            "B_x": exp_other_data,
+            "B_y": exp_other_data,
+        }
+    )
+    expect = expect.set_index(["join_col_0", "join_col_1", "join_col_2"])
+    got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
+
+    assert_join_results_equal(expect, got, how="inner")
+
+
+def test_typecast_on_join_indexes_matching_categorical():
+    join_data_l = cudf.Series(["a", "b", "c", "d", "e"], dtype="category")
+    join_data_r = cudf.Series(["a", "b", "c", "d", "e"], dtype="str")
+    other_data = [1, 2, 3, 4, 5]
+
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
+
+    gdf_l = gdf_l.set_index("join_col")
+    gdf_r = gdf_r.set_index("join_col")
+
+    exp_join_data = ["a", "b", "c", "d", "e"]
+    exp_other_data = [1, 2, 3, 4, 5]
+
+    expect = cudf.DataFrame(
+        {
+            "join_col": exp_join_data,
+            "B_x": exp_other_data,
+            "B_y": exp_other_data,
+        }
+    )
+    expect = expect.set_index("join_col")
+    got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
+
+    assert_join_results_equal(expect, got, how="inner")
+
+
+def test_join_multiindex_empty():
+    lhs = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}, index=["a", "b", "c"])
+    lhs.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
+    rhs = pd.DataFrame(index=["a", "c", "d"])
+    g_lhs = cudf.from_pandas(lhs)
+    g_rhs = cudf.from_pandas(rhs)
+    assert_exceptions_equal(
+        lfunc=lhs.join,
+        rfunc=g_lhs.join,
+        lfunc_args_and_kwargs=([rhs], {"how": "inner"}),
+        rfunc_args_and_kwargs=([g_rhs], {"how": "inner"}),
+        check_exception_type=False,
+    )
+
+
+def test_join_on_index_with_duplicate_names():
+    # although index levels with duplicate names are poorly supported
+    # overall, we *should* be able to join on them:
+    lhs = pd.DataFrame({"a": [1, 2, 3]})
+    rhs = pd.DataFrame({"b": [1, 2, 3]})
+    lhs.index = pd.MultiIndex.from_tuples(
+        [(1, 1), (1, 2), (2, 1)], names=["x", "x"]
+    )
+    rhs.index = pd.MultiIndex.from_tuples(
+        [(1, 1), (1, 3), (2, 1)], names=["x", "x"]
+    )
+    expect = lhs.join(rhs, how="inner")
+
+    lhs = cudf.from_pandas(lhs)
+    rhs = cudf.from_pandas(rhs)
+    got = lhs.join(rhs, how="inner")
+
+    assert_join_results_equal(expect, got, how="inner")
+
+
+def test_join_multiindex_index():
+    # test joining a MultiIndex with an Index with overlapping name
+    lhs = (
+        cudf.DataFrame({"a": [2, 3, 1], "b": [3, 4, 2]})
+        .set_index(["a", "b"])
+        .index
+    )
+    rhs = cudf.DataFrame({"a": [1, 4, 3]}).set_index("a").index
+    expect = lhs.to_pandas().join(rhs.to_pandas(), how="inner")
+    got = lhs.join(rhs, how="inner")
+    assert_join_results_equal(expect, got, how="inner")
+
+
+def test_dataframe_join_on():
+    """Verify that specifying the on parameter gives a NotImplementedError."""
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+    with pytest.raises(NotImplementedError):
+        df.join(df, on="a")
+
+
+def test_index_join_return_indexers_notimplemented():
+    index = cudf.RangeIndex(start=0, stop=20, step=2)
+    other = cudf.Index([4, 4, 3, 3])
+    with pytest.raises(NotImplementedError):
+        index.join(other, how="left", return_indexers=True)
+
+
+@pytest.mark.xfail(
+    reason="https://github.com/pandas-dev/pandas/issues/57065",
+)
+@pytest.mark.parametrize("how", ["inner", "outer"])
+def test_index_join_names(how):
+    idx1 = cudf.Index([10, 1, 2, 4, 2, 1], name="a")
+    idx2 = cudf.Index([-10, 2, 3, 1, 2], name="b")
+    pidx1 = idx1.to_pandas()
+    pidx2 = idx2.to_pandas()
+
+    expected = pidx1.join(pidx2, how=how)
+    actual = idx1.join(idx2, how=how)
+    assert_join_results_equal(actual, expected, how=how)
+
+
+@pytest.mark.parametrize(
+    "left_data",
+    [
+        {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
+        {"lkey": ["foo", "bar", "baz", "foo"], "value": [5, 3, 2, 1]},
+        {
+            "lkey": ["foo", "bar", "baz", "foo"],
+            "value": [5, 3, 2, 1],
+            "extra_left": [1, 2, 3, 4],
+        },
+    ],
+)
+@pytest.mark.parametrize(
+    "right_data",
+    [
+        {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]},
+        {"rkey": ["foo", "bar", "baz", "foo"], "value": [8, 7, 6, 5]},
+        {
+            "rkey": ["foo", "bar", "baz", "foo"],
+            "value": [8, 7, 6, 5],
+            "extra_right": [10, 2, 30, 4],
+        },
+    ],
+)
+@pytest.mark.parametrize("sort", [True, False])
+def test_cross_join_overlapping(left_data, right_data, sort):
+    df1 = cudf.DataFrame(left_data)
+    df2 = cudf.DataFrame(right_data)
+
+    pdf1 = df1.to_pandas()
+    pdf2 = df2.to_pandas()
+    expected = pdf1.join(
+        pdf2, how="cross", lsuffix="_x", rsuffix="_y", sort=sort
+    )
+    result = df1.join(df2, how="cross", lsuffix="_x", rsuffix="_y", sort=sort)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/reshape/test_merge.py
similarity index 53%
rename from python/cudf/cudf/tests/test_joining.py
rename to python/cudf/cudf/tests/reshape/test_merge.py
index bb24111cfc3..f661eb4b587 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/reshape/test_merge.py
@@ -1,19 +1,15 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
-from itertools import combinations, product, repeat
 
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
-    INTEGER_TYPES,
     NUMERIC_TYPES,
-    TIMEDELTA_TYPES,
     assert_exceptions_equal,
     expect_warning_if,
 )
@@ -35,28 +31,6 @@ def how(request):
     return request.param
 
 
-rng = np.random.default_rng(seed=0)
-
-
-@pytest.fixture(
-    params=[
-        [[0, 0, 4, 5, 5], [0, 0, 2, 3, 5]],
-        [[0, 0, 1, 2, 3], [0, 1, 2, 2, 3]],
-        [rng.integers(0, 50, 100), rng.integers(0, 50, 100)],
-        [rng.random(50), rng.random(50)],
-    ]
-)
-def aa_bb(request):
-    return request.param
-
-
-def pd_odd_joins(left, right, join_type):
-    if join_type == "leftanti":
-        return left[~left.index.isin(right.index)][left.columns]
-    elif join_type == "leftsemi":
-        return left[left.index.isin(right.index)][left.columns]
-
-
 def assert_join_results_equal(expect, got, how, **kwargs):
     if how == "right":
         got = got[expect.columns]
@@ -86,193 +60,6 @@ def assert_join_results_equal(expect, got, how, **kwargs):
         raise ValueError(f"Not a join result: {type(expect).__name__}")
 
 
-def test_dataframe_join_how(aa_bb, how):
-    aa, bb = aa_bb
-    df = cudf.DataFrame(
-        {
-            "a": aa,
-            "b": bb,
-        }
-    )
-
-    def work_pandas(df, how):
-        df1 = df.set_index("a")
-        df2 = df.set_index("b")
-        if how == "leftanti":
-            joined = pd_odd_joins(df1, df2, "leftanti")
-        elif how == "leftsemi":
-            joined = pd_odd_joins(df1, df2, "leftsemi")
-        else:
-            joined = df1.join(df2, how=how, sort=True)
-        return joined
-
-    def work_gdf(df):
-        df1 = df.set_index("a")
-        df2 = df.set_index("b")
-        joined = df1.join(df2, how=how, sort=True)
-        return joined
-
-    expect = work_pandas(df.to_pandas(), how)
-    got = work_gdf(df)
-    expecto = expect.copy()
-    goto = got.copy()
-
-    expect = expect.astype(np.float64).fillna(np.nan)[expect.columns]
-    got = got.astype(np.float64).fillna(np.nan)[expect.columns]
-
-    assert got.index.name is None
-
-    assert list(expect.columns) == list(got.columns)
-    if how in {"left", "inner", "right", "leftanti", "leftsemi"}:
-        assert_eq(sorted(expect.index.values), sorted(got.index.values))
-        if how != "outer":
-            # Newly introduced ambiguous ValueError thrown when
-            # an index and column have the same name. Rename the
-            # index so sorts work.
-            # TODO: What is the less hacky way?
-            expect.index.name = "bob"
-            got.index.name = "mary"
-            assert_join_results_equal(expect, got, how=how)
-        # if(how=='right'):
-        #     _sorted_check_series(expect['a'], expect['b'],
-        #                          got['a'], got['b'])
-        # else:
-        #     _sorted_check_series(expect['b'], expect['a'], got['b'],
-        #                          got['a'])
-        else:
-            magic = 0xDEADBEAF
-            for c in expecto.columns:
-                expect = expecto[c].fillna(-1)
-                got = goto[c].fillna(-1)
-
-                direct_equal = np.all(expect.values == got.to_numpy())
-                nanfilled_equal = np.all(
-                    expect.fillna(magic).values == got.fillna(magic).to_numpy()
-                )
-                msg = "direct_equal={}, nanfilled_equal={}".format(
-                    direct_equal, nanfilled_equal
-                )
-                assert direct_equal or nanfilled_equal, msg
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="bug in older version of pandas",
-)
-def test_dataframe_join_suffix():
-    rng = np.random.default_rng(seed=0)
-
-    df = cudf.DataFrame(rng.integers(0, 5, (5, 3)), columns=list("abc"))
-
-    left = df.set_index("a")
-    right = df.set_index("c")
-    msg = (
-        "there are overlapping columns but lsuffix and rsuffix are not defined"
-    )
-    with pytest.raises(ValueError, match=msg):
-        left.join(right)
-
-    got = left.join(right, lsuffix="_left", rsuffix="_right", sort=True)
-    expect = left.to_pandas().join(
-        right.to_pandas(),
-        lsuffix="_left",
-        rsuffix="_right",
-        sort=True,
-    )
-    # TODO: Retain result index name
-    expect.index.name = None
-    assert_join_results_equal(expect, got, how="inner")
-
-
-def test_dataframe_join_cats():
-    lhs = cudf.DataFrame()
-    lhs["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
-    lhs["b"] = bb = np.arange(len(lhs))
-    lhs = lhs.set_index("a")
-
-    rhs = cudf.DataFrame()
-    rhs["a"] = pd.Categorical(list("abcac"), categories=list("abc"))
-    rhs["c"] = cc = np.arange(len(rhs))
-    rhs = rhs.set_index("a")
-
-    got = lhs.join(rhs)
-    expect = lhs.to_pandas().join(rhs.to_pandas())
-
-    # Note: pandas make an object Index after joining
-    assert_join_results_equal(expect, got, how="inner")
-
-    # Just do some rough checking here.
-    assert list(got.columns) == ["b", "c"]
-    assert len(got) > 0
-    assert set(got.index.to_pandas()) & set("abc")
-    assert set(got["b"].to_numpy()) & set(bb)
-    assert set(got["c"].to_numpy()) & set(cc)
-
-
-def test_dataframe_join_combine_cats():
-    lhs = cudf.DataFrame({"join_index": ["a", "b", "c"], "data_x": [1, 2, 3]})
-    rhs = cudf.DataFrame({"join_index": ["b", "c", "d"], "data_y": [2, 3, 4]})
-
-    lhs["join_index"] = lhs["join_index"].astype("category")
-    rhs["join_index"] = rhs["join_index"].astype("category")
-
-    lhs = lhs.set_index("join_index")
-    rhs = rhs.set_index("join_index")
-
-    lhs_pd = lhs.to_pandas()
-    rhs_pd = rhs.to_pandas()
-
-    lhs_pd.index = lhs_pd.index.astype("object")
-    rhs_pd.index = rhs_pd.index.astype("object")
-
-    expect = lhs_pd.join(rhs_pd, how="outer")
-    expect.index = expect.index.astype("category")
-    got = lhs.join(rhs, how="outer")
-
-    assert_eq(expect.index.sort_values(), got.index.sort_values())
-
-
-@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-def test_dataframe_join_mismatch_cats(how):
-    pdf1 = pd.DataFrame(
-        {
-            "join_col": ["a", "b", "c", "d", "e"],
-            "data_col_left": [10, 20, 30, 40, 50],
-        }
-    )
-    pdf2 = pd.DataFrame(
-        {"join_col": ["c", "e", "f"], "data_col_right": [6, 7, 8]}
-    )
-
-    pdf1["join_col"] = pdf1["join_col"].astype("category")
-    pdf2["join_col"] = pdf2["join_col"].astype("category")
-
-    gdf1 = cudf.from_pandas(pdf1)
-    gdf2 = cudf.from_pandas(pdf2)
-
-    gdf1 = gdf1.set_index("join_col")
-    gdf2 = gdf2.set_index("join_col")
-
-    pdf1 = pdf1.set_index("join_col")
-    pdf2 = pdf2.set_index("join_col")
-    join_gdf = gdf1.join(gdf2, how=how, sort=True)
-    join_pdf = pdf1.join(pdf2, how=how)
-
-    got = join_gdf.fillna(-1).to_pandas()
-    expect = join_pdf.fillna(-1)  # note: cudf join doesn't mask NA
-
-    # We yield a categorical here whereas pandas gives Object.
-    expect.index = expect.index.astype("category")
-    # cudf creates the columns in different order than pandas for right join
-    if how == "right":
-        got = got[["data_col_left", "data_col_right"]]
-
-    expect.data_col_right = expect.data_col_right.astype(np.int64)
-    expect.data_col_left = expect.data_col_left.astype(np.int64)
-
-    assert_join_results_equal(expect, got, how=how, check_categorical=False)
-
-
 @pytest.mark.parametrize("on", ["key1", ["key1", "key2"], None])
 def test_dataframe_merge_on(on):
     rng = np.random.default_rng(seed=0)
@@ -421,47 +208,21 @@ def test_dataframe_merge_order():
     "pairs",
     [
         ("", ""),
-        ("", "a"),
-        ("", "ab"),
         ("", "abc"),
-        ("", "b"),
-        ("", "bcd"),
-        ("", "cde"),
         ("a", "a"),
-        ("a", "ab"),
-        ("a", "abc"),
-        ("a", "b"),
-        ("a", "bcd"),
-        ("a", "cde"),
-        ("ab", "ab"),
-        ("ab", "abc"),
-        ("ab", "b"),
-        ("ab", "bcd"),
-        ("ab", "cde"),
-        ("abc", "abc"),
-        ("abc", "b"),
-        ("abc", "bcd"),
-        ("abc", "cde"),
-        ("b", "b"),
-        ("b", "bcd"),
-        ("b", "cde"),
-        ("bcd", "bcd"),
-        ("bcd", "cde"),
-        ("cde", "cde"),
     ],
 )
-@pytest.mark.parametrize("max", [5, 1000])
-@pytest.mark.parametrize("rows", [1, 5, 100])
-@pytest.mark.parametrize("how", ["left", "inner", "outer"])
-def test_dataframe_pairs_of_triples(pairs, max, rows, how):
+def test_dataframe_pairs_of_triples(pairs, how):
+    if how in {"leftsemi", "leftanti"}:
+        pytest.skip(f"{how} not implemented in pandas")
     rng = np.random.default_rng(seed=0)
 
     pdf_left = pd.DataFrame()
     pdf_right = pd.DataFrame()
     for left_column in pairs[0]:
-        pdf_left[left_column] = rng.integers(0, max, rows)
+        pdf_left[left_column] = rng.integers(0, 10, 10)
     for right_column in pairs[1]:
-        pdf_right[right_column] = rng.integers(0, max, rows)
+        pdf_right[right_column] = rng.integers(0, 10, 10)
     gdf_left = cudf.from_pandas(pdf_left)
     gdf_right = cudf.from_pandas(pdf_right)
     if not set(pdf_left.columns).intersection(pdf_right.columns):
@@ -517,10 +278,12 @@ def test_safe_merging_with_left_empty():
     assert len(pdf_result) == len(gdf_result)
 
 
-@pytest.mark.parametrize("how", ["left", "inner", "outer"])
 @pytest.mark.parametrize("left_empty", [True, False])
 @pytest.mark.parametrize("right_empty", [True, False])
 def test_empty_joins(how, left_empty, right_empty):
+    if how in {"leftsemi", "leftanti"}:
+        pytest.skip(f"{how} not implemented in pandas")
+
     pdf = pd.DataFrame({"x": [1, 2, 3]})
 
     if left_empty:
@@ -676,24 +439,23 @@ def test_merge_left_right_index_left_right_on_kwargs2(kwargs):
         assert gd_merge.empty
 
 
+@pytest.mark.parametrize("how", ["inner", "outer", "left"])
 @pytest.mark.parametrize(
-    "hows", [{"how": "inner"}, {"how": "left"}, {"how": "outer"}]
-)
-@pytest.mark.parametrize(
-    "ons",
+    "on",
     [
-        {"on": "a"},
-        {"on": ["a", "b"]},
-        {"on": ["b", "a"]},
-        {"on": ["a", "aa", "b"]},
-        {"on": ["b", "a", "aa"]},
+        "a",
+        ["a", "b"],
+        ["b", "a"],
+        ["a", "aa", "b"],
+        ["b", "a", "aa"],
     ],
 )
-def test_merge_sort(ons, hows):
-    kwargs = {}
-    kwargs.update(hows)
-    kwargs.update(ons)
-    kwargs["sort"] = True
+def test_merge_sort(on, how):
+    kwargs = {
+        "sort": True,
+        "how": how,
+        "on": on,
+    }
     a = [4, 6, 9, 5, 2, 4, 1, 8, 1]
     b = [9, 8, 7, 8, 3, 9, 7, 9, 2]
     aa = [8, 9, 2, 9, 3, 1, 2, 3, 4]
@@ -761,27 +523,6 @@ def test_merge_sort_on_indexes(kwargs):
         assert gd_merge["a"].is_monotonic_increasing
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_join_datetimes_index(dtype):
-    datetimes = pd.Series(pd.date_range("20010101", "20010102", freq="12h"))
-    pdf_lhs = pd.DataFrame(index=[1, 0, 1, 2, 0, 0, 1])
-    pdf_rhs = pd.DataFrame({"d": datetimes})
-    gdf_lhs = cudf.from_pandas(pdf_lhs)
-    gdf_rhs = cudf.from_pandas(pdf_rhs)
-
-    gdf_rhs["d"] = gdf_rhs["d"].astype(dtype)
-
-    pdf = pdf_lhs.join(pdf_rhs, sort=True)
-    gdf = gdf_lhs.join(gdf_rhs, sort=True)
-
-    assert gdf["d"].dtype == cudf.dtype(dtype)
-
-    assert_join_results_equal(pdf, gdf, how="inner", check_dtype=False)
-
-
 def test_join_with_different_names():
     left = pd.DataFrame({"a": [0, 1, 2.0, 3, 4, 5, 9]})
     right = pd.DataFrame({"b": [12, 5, 3, 9.0, 5], "c": [1, 2, 3, 4, 5.0]})
@@ -812,67 +553,6 @@ def test_join_empty_table_dtype():
     assert_eq(pd_merge["a"].dtype, gd_merge["a"].dtype)
 
 
-@pytest.mark.parametrize("how", ["outer", "inner", "left", "right"])
-@pytest.mark.parametrize(
-    "column_a",
-    [
-        (
-            pd.Series([None, 1, 2, 3, 4, 5, 6, 7], dtype=np.float64),
-            pd.Series([8, 9, 10, 11, 12, None, 14, 15], dtype=np.float64),
-        )
-    ],
-)
-@pytest.mark.parametrize(
-    "column_b",
-    [
-        (
-            pd.Series([0, 1, 0, None, 1, 0, 0, 0], dtype=np.float64),
-            pd.Series([None, 1, 2, 1, 2, 2, 0, 0], dtype=np.float64),
-        )
-    ],
-)
-@pytest.mark.parametrize(
-    "column_c",
-    [
-        (
-            pd.Series(["dog", "cat", "fish", "bug"] * 2),
-            pd.Series(["bird", "cat", "mouse", "snake"] * 2),
-        ),
-        (
-            pd.Series(["dog", "cat", "fish", "bug"] * 2).astype("category"),
-            pd.Series(["bird", "cat", "mouse", "snake"] * 2).astype(
-                "category"
-            ),
-        ),
-    ],
-)
-def test_join_multi(how, column_a, column_b, column_c):
-    index = ["b", "c"]
-    df1 = pd.DataFrame()
-    df1["a1"] = column_a[0]
-    df1["b"] = column_b[0]
-    df1["c"] = column_c[0]
-    df1 = df1.set_index(index)
-    gdf1 = cudf.from_pandas(df1)
-
-    df2 = pd.DataFrame()
-    df2["a2"] = column_a[1]
-    df2["b"] = column_b[1]
-    df2["c"] = column_c[1]
-    df2 = df2.set_index(index)
-    gdf2 = cudf.from_pandas(df2)
-
-    gdf_result = gdf1.join(gdf2, how=how, sort=True)
-    pdf_result = df1.join(df2, how=how, sort=True)
-
-    # Make sure columns are in the same order
-    columns = pdf_result.columns.values
-    gdf_result = gdf_result[columns]
-    pdf_result = pdf_result[columns]
-
-    assert_join_results_equal(pdf_result, gdf_result, how="inner")
-
-
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -962,18 +642,25 @@ def test_merge_multi(kwargs):
     assert_join_results_equal(expect, got, how="left")
 
 
-@pytest.mark.parametrize("dtype_l", INTEGER_TYPES)
-@pytest.mark.parametrize("dtype_r", INTEGER_TYPES)
-def test_typecast_on_join_int_to_int(dtype_l, dtype_r):
+@pytest.fixture
+def integer_types_as_str2(integer_types_as_str):
+    return integer_types_as_str
+
+
+def test_typecast_on_join_int_to_int(
+    integer_types_as_str, integer_types_as_str2
+):
     other_data = ["a", "b", "c"]
 
-    join_data_l = cudf.Series([1, 2, 3], dtype=dtype_l)
-    join_data_r = cudf.Series([1, 2, 4], dtype=dtype_r)
+    join_data_l = cudf.Series([1, 2, 3], dtype=integer_types_as_str)
+    join_data_r = cudf.Series([1, 2, 4], dtype=integer_types_as_str2)
 
     gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
     gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
-    exp_dtype = find_common_type((np.dtype(dtype_l), np.dtype(dtype_r)))
+    exp_dtype = find_common_type(
+        (np.dtype(integer_types_as_str), np.dtype(integer_types_as_str2))
+    )
 
     exp_join_data = [1, 2]
     exp_other_data = ["a", "b"]
@@ -992,20 +679,29 @@ def test_typecast_on_join_int_to_int(dtype_l, dtype_r):
     assert_join_results_equal(expect, got, how="inner")
 
 
-@pytest.mark.parametrize("dtype_l", ["float32", "float64"])
-@pytest.mark.parametrize("dtype_r", ["float32", "float64"])
-def test_typecast_on_join_float_to_float(dtype_l, dtype_r):
+@pytest.fixture
+def float_types_as_str2(float_types_as_str):
+    return float_types_as_str
+
+
+def test_typecast_on_join_float_to_float(
+    float_types_as_str, float_types_as_str2
+):
     other_data = ["a", "b", "c", "d", "e", "f"]
 
-    join_data_l = cudf.Series([1, 2, 3, 0.9, 4.5, 6], dtype=dtype_l)
-    join_data_r = cudf.Series([1, 2, 3, 0.9, 4.5, 7], dtype=dtype_r)
+    join_data_l = cudf.Series([1, 2, 3, 0.9, 4.5, 6], dtype=float_types_as_str)
+    join_data_r = cudf.Series(
+        [1, 2, 3, 0.9, 4.5, 7], dtype=float_types_as_str2
+    )
 
     gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
     gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
-    exp_dtype = find_common_type((np.dtype(dtype_l), np.dtype(dtype_r)))
+    exp_dtype = find_common_type(
+        (np.dtype(float_types_as_str), np.dtype(float_types_as_str2))
+    )
 
-    if dtype_l != dtype_r:
+    if float_types_as_str != float_types_as_str2:
         exp_join_data = [1, 2, 3, 4.5]
         exp_other_data = ["a", "b", "c", "e"]
     else:
@@ -1027,24 +723,37 @@ def test_typecast_on_join_float_to_float(dtype_l, dtype_r):
     assert_join_results_equal(expect, got, how="inner")
 
 
-@pytest.mark.parametrize("dtype_l", NUMERIC_TYPES)
-@pytest.mark.parametrize("dtype_r", NUMERIC_TYPES)
-def test_typecast_on_join_mixed_int_float(dtype_l, dtype_r):
+@pytest.fixture
+def numeric_types_as_str2(numeric_types_as_str):
+    return numeric_types_as_str
+
+
+def test_typecast_on_join_mixed_int_float(
+    numeric_types_as_str, numeric_types_as_str2
+):
     if (
-        ("int" in dtype_l or "long" in dtype_l)
-        and ("int" in dtype_r or "long" in dtype_r)
-    ) or ("float" in dtype_l and "float" in dtype_r):
+        ("int" in numeric_types_as_str or "long" in numeric_types_as_str)
+        and ("int" in numeric_types_as_str2 or "long" in numeric_types_as_str2)
+    ) or (
+        "float" in numeric_types_as_str and "float" in numeric_types_as_str2
+    ):
         pytest.skip("like types not tested in this function")
 
     other_data = ["a", "b", "c", "d", "e", "f"]
 
-    join_data_l = cudf.Series([1, 2, 3, 0.9, 4.5, 6], dtype=dtype_l)
-    join_data_r = cudf.Series([1, 2, 3, 0.9, 4.5, 7], dtype=dtype_r)
+    join_data_l = cudf.Series(
+        [1, 2, 3, 0.9, 4.5, 6], dtype=numeric_types_as_str
+    )
+    join_data_r = cudf.Series(
+        [1, 2, 3, 0.9, 4.5, 7], dtype=numeric_types_as_str2
+    )
 
     gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
     gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
-    exp_dtype = find_common_type((np.dtype(dtype_l), np.dtype(dtype_r)))
+    exp_dtype = find_common_type(
+        (np.dtype(numeric_types_as_str), np.dtype(numeric_types_as_str2))
+    )
 
     exp_join_data = [1, 2, 3]
     exp_other_data = ["a", "b", "c"]
@@ -1275,27 +984,28 @@ def test_mixed_decimal_typecast(dtype_l, dtype_r):
         gdf_l.merge(gdf_r, on="join_col", how="inner")
 
 
-@pytest.mark.parametrize(
-    "dtype_l",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-@pytest.mark.parametrize(
-    "dtype_r",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_typecast_on_join_dt_to_dt(dtype_l, dtype_r):
+@pytest.fixture
+def datetime_types_as_str2(datetime_types_as_str):
+    return datetime_types_as_str
+
+
+def test_typecast_on_join_dt_to_dt(
+    datetime_types_as_str, datetime_types_as_str2
+):
     other_data = ["a", "b", "c", "d", "e"]
     join_data_l = cudf.Series(
         ["1991-11-20", "1999-12-31", "2004-12-04", "2015-01-01", "2019-08-15"]
-    ).astype(dtype_l)
+    ).astype(datetime_types_as_str)
     join_data_r = cudf.Series(
         ["1991-11-20", "1999-12-31", "2004-12-04", "2015-01-01", "2019-08-16"]
-    ).astype(dtype_r)
+    ).astype(datetime_types_as_str2)
 
     gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
     gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
 
-    exp_dtype = max(np.dtype(dtype_l), np.dtype(dtype_r))
+    exp_dtype = max(
+        np.dtype(datetime_types_as_str), np.dtype(datetime_types_as_str2)
+    )
 
     exp_join_data = ["1991-11-20", "1999-12-31", "2004-12-04", "2015-01-01"]
     exp_other_data = ["a", "b", "c", "d"]
@@ -1552,725 +1262,6 @@ def test_categorical_typecast_outer_one_cat(dtype):
     assert result["key"].dtype == left["key"].dtype.categories.dtype
 
 
-@pytest.mark.parametrize(
-    ("lhs", "rhs"),
-    [
-        (["a", "b"], ["a"]),
-        (["a"], ["a", "b"]),
-        (["a", "b"], ["b"]),
-        (["b"], ["a", "b"]),
-        (["a"], ["a"]),
-    ],
-)
-@pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
-@pytest.mark.parametrize("level", ["a", "b", 0, 1])
-def test_index_join(lhs, rhs, how, level):
-    l_pdf = pd.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
-    r_pdf = pd.DataFrame({"a": [1, 5, 4, 0], "b": [3, 9, 8, 4]})
-    l_df = cudf.from_pandas(l_pdf)
-    r_df = cudf.from_pandas(r_pdf)
-    p_lhs = l_pdf.set_index(lhs).index
-    p_rhs = r_pdf.set_index(rhs).index
-    g_lhs = l_df.set_index(lhs).index
-    g_rhs = r_df.set_index(rhs).index
-
-    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
-    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
-
-    assert_join_results_equal(expected, got, how=how)
-
-
-def test_index_join_corner_cases():
-    l_pdf = pd.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
-    r_pdf = pd.DataFrame(
-        {"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]}
-    )
-    l_df = cudf.from_pandas(l_pdf)
-    r_df = cudf.from_pandas(r_pdf)
-
-    # Join when column name doesn't match with level
-    lhs = ["a", "b"]
-    # level and rhs don't match
-    rhs = ["c"]
-    level = "b"
-    how = "outer"
-    p_lhs = l_pdf.set_index(lhs).index
-    p_rhs = r_pdf.set_index(rhs).index
-    g_lhs = l_df.set_index(lhs).index
-    g_rhs = r_df.set_index(rhs).index
-    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
-    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
-
-    assert_join_results_equal(expected, got, how=how)
-
-    # sort is supported only in case of two non-MultiIndex join
-    # Join when column name doesn't match with level
-    lhs = ["a"]
-    # level and rhs don't match
-    rhs = ["a"]
-    level = "b"
-    how = "left"
-    p_lhs = l_pdf.set_index(lhs).index
-    p_rhs = r_pdf.set_index(rhs).index
-    g_lhs = l_df.set_index(lhs).index
-    g_rhs = r_df.set_index(rhs).index
-    expected = p_lhs.join(p_rhs, how=how, sort=True)
-    got = g_lhs.join(g_rhs, how=how, sort=True)
-
-    assert_join_results_equal(expected, got, how=how)
-
-    # Pandas Index.join on categorical column returns generic column
-    # but cudf will be returning a categorical column itself.
-    lhs = ["a", "b"]
-    rhs = ["a"]
-    level = "a"
-    how = "inner"
-    l_df["a"] = l_df["a"].astype("category")
-    r_df["a"] = r_df["a"].astype("category")
-    p_lhs = l_pdf.set_index(lhs).index
-    p_rhs = r_pdf.set_index(rhs).index
-    g_lhs = l_df.set_index(lhs).index
-    g_rhs = r_df.set_index(rhs).index
-    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
-    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
-
-    got["a"] = got["a"].astype(expected["a"].dtype)
-
-    assert_join_results_equal(expected, got, how=how)
-
-
-def test_index_join_exception_cases():
-    l_df = cudf.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]})
-    r_df = cudf.DataFrame(
-        {"a": [1, 5, 4, 0], "b": [3, 9, 8, 4], "c": [2, 3, 6, 0]}
-    )
-
-    # Join between two MultiIndex
-    lhs = ["a", "b"]
-    rhs = ["a", "c"]
-    level = "a"
-    how = "outer"
-    g_lhs = l_df.set_index(lhs).index
-    g_rhs = r_df.set_index(rhs).index
-
-    with pytest.raises(TypeError):
-        g_lhs.join(g_rhs, level=level, how=how)
-
-    # Improper level value, level should be an int or scalar value
-    level = ["a"]
-    rhs = ["a"]
-    g_lhs = l_df.set_index(lhs).index
-    g_rhs = r_df.set_index(rhs).index
-    with pytest.raises(ValueError):
-        g_lhs.join(g_rhs, level=level, how=how)
-
-
-def test_typecast_on_join_indexes():
-    join_data_l = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
-    join_data_r = cudf.Series([1, 2, 3, 4, 6], dtype="int32")
-    other_data = ["a", "b", "c", "d", "e"]
-
-    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
-
-    gdf_l = gdf_l.set_index("join_col")
-    gdf_r = gdf_r.set_index("join_col")
-
-    exp_join_data = [1, 2, 3, 4]
-    exp_other_data = ["a", "b", "c", "d"]
-
-    expect = cudf.DataFrame(
-        {
-            "join_col": exp_join_data,
-            "B_x": exp_other_data,
-            "B_y": exp_other_data,
-        }
-    )
-    expect = expect.set_index("join_col")
-
-    got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
-
-    assert_join_results_equal(expect, got, how="inner")
-
-
-def test_typecast_on_join_multiindices():
-    join_data_l_0 = cudf.Series([1, 2, 3, 4, 5], dtype="int8")
-    join_data_l_1 = cudf.Series([2, 3, 4.1, 5.9, 6], dtype="float32")
-    join_data_l_2 = cudf.Series([7, 8, 9, 0, 1], dtype="float32")
-
-    join_data_r_0 = cudf.Series([1, 2, 3, 4, 5], dtype="int32")
-    join_data_r_1 = cudf.Series([2, 3, 4, 5, 6], dtype="int32")
-    join_data_r_2 = cudf.Series([7, 8, 9, 0, 0], dtype="float64")
-
-    other_data = ["a", "b", "c", "d", "e"]
-
-    gdf_l = cudf.DataFrame(
-        {
-            "join_col_0": join_data_l_0,
-            "join_col_1": join_data_l_1,
-            "join_col_2": join_data_l_2,
-            "B": other_data,
-        }
-    )
-    gdf_r = cudf.DataFrame(
-        {
-            "join_col_0": join_data_r_0,
-            "join_col_1": join_data_r_1,
-            "join_col_2": join_data_r_2,
-            "B": other_data,
-        }
-    )
-
-    gdf_l = gdf_l.set_index(["join_col_0", "join_col_1", "join_col_2"])
-    gdf_r = gdf_r.set_index(["join_col_0", "join_col_1", "join_col_2"])
-
-    exp_join_data_0 = cudf.Series([1, 2], dtype="int32")
-    exp_join_data_1 = cudf.Series([2, 3], dtype="float64")
-    exp_join_data_2 = cudf.Series([7, 8], dtype="float64")
-    exp_other_data = cudf.Series(["a", "b"])
-
-    expect = cudf.DataFrame(
-        {
-            "join_col_0": exp_join_data_0,
-            "join_col_1": exp_join_data_1,
-            "join_col_2": exp_join_data_2,
-            "B_x": exp_other_data,
-            "B_y": exp_other_data,
-        }
-    )
-    expect = expect.set_index(["join_col_0", "join_col_1", "join_col_2"])
-    got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
-
-    assert_join_results_equal(expect, got, how="inner")
-
-
-def test_typecast_on_join_indexes_matching_categorical():
-    join_data_l = cudf.Series(["a", "b", "c", "d", "e"], dtype="category")
-    join_data_r = cudf.Series(["a", "b", "c", "d", "e"], dtype="str")
-    other_data = [1, 2, 3, 4, 5]
-
-    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
-    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
-
-    gdf_l = gdf_l.set_index("join_col")
-    gdf_r = gdf_r.set_index("join_col")
-
-    exp_join_data = ["a", "b", "c", "d", "e"]
-    exp_other_data = [1, 2, 3, 4, 5]
-
-    expect = cudf.DataFrame(
-        {
-            "join_col": exp_join_data,
-            "B_x": exp_other_data,
-            "B_y": exp_other_data,
-        }
-    )
-    expect = expect.set_index("join_col")
-    got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
-
-    assert_join_results_equal(expect, got, how="inner")
-
-
-@pytest.mark.parametrize(
-    "lhs",
-    [
-        lambda: cudf.Series([1, 2, 3], name="a"),
-        lambda: cudf.DataFrame({"a": [2, 3, 4], "c": [4, 5, 6]}),
-    ],
-)
-@pytest.mark.parametrize(
-    "rhs",
-    [
-        lambda: cudf.Series([1, 2, 3], name="b"),
-        lambda: cudf.DataFrame({"b": [2, 3, 4], "c": [4, 5, 6]}),
-    ],
-)
-@pytest.mark.parametrize(
-    "how", ["left", "inner", "outer", "leftanti", "leftsemi"]
-)
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        {"left_on": "a", "right_on": "b"},
-        {"left_index": True, "right_on": "b"},
-        {"left_on": "a", "right_index": True},
-        {"left_index": True, "right_index": True},
-    ],
-)
-def test_series_dataframe_mixed_merging(lhs, rhs, how, kwargs):
-    if how in ("leftsemi", "leftanti") and (
-        kwargs.get("left_index") or kwargs.get("right_index")
-    ):
-        pytest.skip("Index joins not compatible with leftsemi and leftanti")
-
-    lhs = lhs()
-    rhs = rhs()
-    check_lhs = lhs.copy()
-    check_rhs = rhs.copy()
-    if isinstance(lhs, cudf.Series):
-        check_lhs = lhs.to_frame()
-    if isinstance(rhs, cudf.Series):
-        check_rhs = rhs.to_frame()
-
-    expect = cudf.merge(check_lhs, check_rhs, how=how, **kwargs)
-    got = cudf.merge(lhs, rhs, how=how, **kwargs)
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-@pytest.mark.xfail(reason="Cannot sort values of list dtype")
-@pytest.mark.parametrize(
-    "how", ["left", "inner", "right", "leftanti", "leftsemi"]
-)
-def test_merge_with_lists(how):
-    pd_left = pd.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5, 6],
-            "b": [[1, 2, 3], [4, 5], None, [6], [7, 8, None], []],
-            "c": ["a", "b", "c", "d", "e", "f"],
-        }
-    )
-    pd_right = pd.DataFrame(
-        {
-            "a": [4, 3, 2, 1, 0, -1],
-            "d": [[[1, 2], None], [], [[3, 4]], None, [[5], [6, 7]], [[8]]],
-        }
-    )
-
-    gd_left = cudf.from_pandas(pd_left)
-    gd_right = cudf.from_pandas(pd_right)
-
-    expect = pd_left.merge(pd_right, on="a")
-    got = gd_left.merge(gd_right, on="a")
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-def test_join_renamed_index():
-    df = cudf.DataFrame(
-        {0: [1, 2, 3, 4, 5], 1: [1, 2, 3, 4, 5], "c": [1, 2, 3, 4, 5]}
-    ).set_index([0, 1])
-    df.index.names = ["a", "b"]  # doesn't actually change df._index._data
-
-    expect = df.to_pandas().merge(
-        df.to_pandas(), left_index=True, right_index=True
-    )
-    got = df.merge(df, left_index=True, right_index=True, how="inner")
-    assert_join_results_equal(expect, got, how="inner")
-
-
-@pytest.mark.parametrize(
-    "lhs_col, lhs_idx, rhs_col, rhs_idx, on",
-    [
-        (["A", "B"], "L0", ["B", "C"], "L0", ["B"]),
-        (["A", "B"], "L0", ["B", "C"], "L0", ["L0"]),
-        (["A", "B"], "L0", ["B", "C"], "L0", ["B", "L0"]),
-        (["A", "B"], "L0", ["C", "L0"], "A", ["A"]),
-        (["A", "B"], "L0", ["C", "L0"], "A", ["L0"]),
-        (["A", "B"], "L0", ["C", "L0"], "A", ["A", "L0"]),
-    ],
-)
-@pytest.mark.parametrize(
-    "how", ["left", "inner", "right", "outer", "leftanti", "leftsemi"]
-)
-def test_join_merge_with_on(lhs_col, lhs_idx, rhs_col, rhs_idx, on, how):
-    lhs_data = {col_name: [4, 5, 6] for col_name in lhs_col}
-    lhs_index = cudf.Index([0, 1, 2], name=lhs_idx)
-
-    rhs_data = {col_name: [4, 5, 6] for col_name in rhs_col}
-    rhs_index = cudf.Index([2, 3, 4], name=rhs_idx)
-
-    gd_left = cudf.DataFrame(lhs_data, lhs_index)
-    gd_right = cudf.DataFrame(rhs_data, rhs_index)
-    pd_left = gd_left.to_pandas()
-    pd_right = gd_right.to_pandas()
-
-    expect = pd_left.merge(pd_right, on=on).sort_index(axis=1, ascending=False)
-    got = gd_left.merge(gd_right, on=on).sort_index(axis=1, ascending=False)
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-@pytest.mark.parametrize(
-    "on",
-    ["A", "L0"],
-)
-@pytest.mark.parametrize(
-    "how", ["left", "inner", "right", "outer", "leftanti", "leftsemi"]
-)
-def test_join_merge_invalid_keys(on, how):
-    gd_left = cudf.DataFrame(
-        {"A": [1, 2, 3], "B": [4, 5, 6]}, index=cudf.Index([0, 1, 2], name="C")
-    )
-    gd_right = cudf.DataFrame(
-        {"D": [2, 3, 4], "E": [7, 8, 0]}, index=cudf.Index([0, 2, 4], name="F")
-    )
-    pd_left = gd_left.to_pandas()
-    pd_right = gd_right.to_pandas()
-
-    with pytest.raises(KeyError):
-        pd_left.merge(pd_right, on=on)
-        gd_left.merge(gd_right, on=on)
-
-
-@pytest.mark.parametrize(
-    "str_data",
-    [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]],
-)
-@pytest.mark.parametrize("num_keys", [1, 2, 3])
-@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-def test_string_join_key(str_data, num_keys, how):
-    other_data = [1, 2, 3, 4, 5][: len(str_data)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    for i in range(num_keys):
-        pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = cudf.Series(str_data, dtype="str")
-    pdf["a"] = other_data
-    gdf["a"] = other_data
-    if len(other_data) == 0:
-        pdf["a"] = pdf["a"].astype("str")
-    pdf2 = pdf.copy()
-    gdf2 = gdf.copy()
-
-    expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how)
-    got = gdf.merge(gdf2, on=list(range(num_keys)), how=how)
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]  # reorder columns
-
-    if how == "right":
-        got = got[expect.columns]  # reorder columns
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-@pytest.mark.parametrize(
-    "str_data_nulls",
-    [
-        ["a", "b", "c"],
-        ["a", "b", "f", "g"],
-        ["f", "g", "h", "i", "j"],
-        ["f", "g", "h"],
-        [None, None, None, None, None],
-        [],
-    ],
-)
-def test_string_join_key_nulls(str_data_nulls):
-    str_data = ["a", "b", "c", "d", "e"]
-    other_data = [1, 2, 3, 4, 5]
-
-    other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    pdf["key"] = pd.Series(str_data, dtype="str")
-    gdf["key"] = cudf.Series(str_data, dtype="str")
-    pdf["vals"] = other_data
-    gdf["vals"] = other_data
-
-    pdf2 = pd.DataFrame()
-    gdf2 = cudf.DataFrame()
-    pdf2["key"] = pd.Series(str_data_nulls, dtype="str")
-    gdf2["key"] = cudf.Series(str_data_nulls, dtype="str")
-    pdf2["vals"] = pd.Series(other_data_nulls, dtype="int64")
-    gdf2["vals"] = cudf.Series(other_data_nulls, dtype="int64")
-
-    expect = pdf.merge(pdf2, on="key", how="left")
-    got = gdf.merge(gdf2, on="key", how="left")
-    got["vals_y"] = got["vals_y"].fillna(-1)
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]
-
-    expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64")
-
-    assert_join_results_equal(expect, got, how="left")
-
-
-@pytest.mark.parametrize(
-    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
-)
-@pytest.mark.parametrize("num_cols", [1, 2, 3])
-@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-def test_string_join_non_key(str_data, num_cols, how):
-    other_data = [1, 2, 3, 4, 5][: len(str_data)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    for i in range(num_cols):
-        pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = cudf.Series(str_data, dtype="str")
-    pdf["a"] = other_data
-    gdf["a"] = other_data
-    if len(other_data) == 0:
-        pdf["a"] = pdf["a"].astype("str")
-
-    pdf2 = pdf.copy()
-    gdf2 = gdf.copy()
-
-    expect = pdf.merge(pdf2, on=["a"], how=how)
-    got = gdf.merge(gdf2, on=["a"], how=how)
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]
-
-    if how == "right":
-        got = got[expect.columns]  # reorder columns
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-@pytest.mark.parametrize(
-    "str_data_nulls",
-    [
-        ["a", "b", "c"],
-        ["a", "b", "f", "g"],
-        ["f", "g", "h", "i", "j"],
-        ["f", "g", "h"],
-        [None, None, None, None, None],
-        [],
-    ],
-)
-def test_string_join_non_key_nulls(str_data_nulls):
-    str_data = ["a", "b", "c", "d", "e"]
-    other_data = [1, 2, 3, 4, 5]
-
-    other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    pdf["vals"] = pd.Series(str_data, dtype="str")
-    gdf["vals"] = cudf.Series(str_data, dtype="str")
-    pdf["key"] = other_data
-    gdf["key"] = other_data
-
-    pdf2 = pd.DataFrame()
-    gdf2 = cudf.DataFrame()
-    pdf2["vals"] = pd.Series(str_data_nulls, dtype="str")
-    gdf2["vals"] = cudf.Series(str_data_nulls, dtype="str")
-    pdf2["key"] = pd.Series(other_data_nulls, dtype="int64")
-    gdf2["key"] = cudf.Series(other_data_nulls, dtype="int64")
-
-    expect = pdf.merge(pdf2, on="key", how="left")
-    got = gdf.merge(gdf2, on="key", how="left")
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]
-
-    assert_join_results_equal(expect, got, how="left")
-
-
-def test_string_join_values_nulls():
-    left_dict = [
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "LEFT NO MATCH 1", "a": -1.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "LEFT NO MATCH 2", "a": -2.0},
-        {"b": "MATCH 3", "a": 3.0},
-        {"b": "MATCH 3", "a": 3.0},
-    ]
-
-    right_dict = [
-        {"b": "RIGHT NO MATCH 1", "c": -1.0},
-        {"b": "MATCH 3", "c": 3.0},
-        {"b": "MATCH 2", "c": 2.0},
-        {"b": "RIGHT NO MATCH 2", "c": -2.0},
-        {"b": "RIGHT NO MATCH 3", "c": -3.0},
-        {"b": "MATCH 1", "c": 1.0},
-    ]
-
-    left_pdf = pd.DataFrame(left_dict)
-    right_pdf = pd.DataFrame(right_dict)
-
-    left_gdf = cudf.DataFrame.from_pandas(left_pdf)
-    right_gdf = cudf.DataFrame.from_pandas(right_pdf)
-
-    expect = left_pdf.merge(right_pdf, how="left", on="b")
-    got = left_gdf.merge(right_gdf, how="left", on="b")
-
-    expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
-    got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
-
-    assert_join_results_equal(expect, got, how="left")
-
-
-@pytest.mark.parametrize(
-    "left_on,right_on",
-    [
-        *product(["a", "b", "c"], ["a", "b"]),
-        *zip(combinations(["a", "b", "c"], 2), repeat(["a", "b"])),
-    ],
-)
-def test_merge_mixed_index_columns(left_on, right_on):
-    left = pd.DataFrame({"a": [1, 2, 1, 2], "b": [2, 3, 3, 4]}).set_index("a")
-    right = pd.DataFrame({"a": [1, 2, 1, 3], "b": [2, 30, 3, 4]}).set_index(
-        "a"
-    )
-
-    left["c"] = 10
-
-    expect = left.merge(right, left_on=left_on, right_on=right_on, how="outer")
-    cleft = cudf.from_pandas(left)
-    cright = cudf.from_pandas(right)
-    got = cleft.merge(cright, left_on=left_on, right_on=right_on, how="outer")
-    assert_join_results_equal(expect, got, how="outer")
-
-
-def test_merge_multiindex_columns():
-    lhs = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
-    lhs.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
-    rhs = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
-    rhs.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "z")])
-    expect = lhs.merge(rhs, on=[("a", "x")], how="inner")
-
-    lhs = cudf.from_pandas(lhs)
-    rhs = cudf.from_pandas(rhs)
-    got = lhs.merge(rhs, on=[("a", "x")], how="inner")
-
-    assert_join_results_equal(expect, got, how="inner")
-
-
-def test_join_multiindex_empty():
-    lhs = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}, index=["a", "b", "c"])
-    lhs.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
-    rhs = pd.DataFrame(index=["a", "c", "d"])
-    g_lhs = cudf.from_pandas(lhs)
-    g_rhs = cudf.from_pandas(rhs)
-    assert_exceptions_equal(
-        lfunc=lhs.join,
-        rfunc=g_lhs.join,
-        lfunc_args_and_kwargs=([rhs], {"how": "inner"}),
-        rfunc_args_and_kwargs=([g_rhs], {"how": "inner"}),
-        check_exception_type=False,
-    )
-
-
-def test_join_on_index_with_duplicate_names():
-    # although index levels with duplicate names are poorly supported
-    # overall, we *should* be able to join on them:
-    lhs = pd.DataFrame({"a": [1, 2, 3]})
-    rhs = pd.DataFrame({"b": [1, 2, 3]})
-    lhs.index = pd.MultiIndex.from_tuples(
-        [(1, 1), (1, 2), (2, 1)], names=["x", "x"]
-    )
-    rhs.index = pd.MultiIndex.from_tuples(
-        [(1, 1), (1, 3), (2, 1)], names=["x", "x"]
-    )
-    expect = lhs.join(rhs, how="inner")
-
-    lhs = cudf.from_pandas(lhs)
-    rhs = cudf.from_pandas(rhs)
-    got = lhs.join(rhs, how="inner")
-
-    assert_join_results_equal(expect, got, how="inner")
-
-
-def test_join_redundant_params():
-    lhs = cudf.DataFrame(
-        {"a": [1, 2, 3], "c": [2, 3, 4]}, index=cudf.Index([0, 1, 2], name="c")
-    )
-    rhs = cudf.DataFrame(
-        {"b": [1, 2, 3]}, index=cudf.Index([0, 1, 2], name="a")
-    )
-    with pytest.raises(ValueError):
-        lhs.merge(rhs, on="a", left_index=True)
-    with pytest.raises(ValueError):
-        lhs.merge(rhs, left_on="a", left_index=True, right_index=True)
-    with pytest.raises(ValueError):
-        lhs.merge(rhs, right_on="a", left_index=True, right_index=True)
-    with pytest.raises(ValueError):
-        lhs.merge(rhs, left_on="c", right_on="b")
-
-
-def test_join_multiindex_index():
-    # test joining a MultiIndex with an Index with overlapping name
-    lhs = (
-        cudf.DataFrame({"a": [2, 3, 1], "b": [3, 4, 2]})
-        .set_index(["a", "b"])
-        .index
-    )
-    rhs = cudf.DataFrame({"a": [1, 4, 3]}).set_index("a").index
-    expect = lhs.to_pandas().join(rhs.to_pandas(), how="inner")
-    got = lhs.join(rhs, how="inner")
-    assert_join_results_equal(expect, got, how="inner")
-
-
-def test_dataframe_join_on():
-    """Verify that specifying the on parameter gives a NotImplementedError."""
-    df = cudf.DataFrame({"a": [1, 2, 3]})
-    with pytest.raises(NotImplementedError):
-        df.join(df, on="a")
-
-
-def test_index_join_return_indexers_notimplemented():
-    index = cudf.RangeIndex(start=0, stop=20, step=2)
-    other = cudf.Index([4, 4, 3, 3])
-    with pytest.raises(NotImplementedError):
-        index.join(other, how="left", return_indexers=True)
-
-
-@pytest.mark.parametrize("how", ["inner", "outer"])
-def test_index_join_names(request, how):
-    idx1 = cudf.Index([10, 1, 2, 4, 2, 1], name="a")
-    idx2 = cudf.Index([-10, 2, 3, 1, 2], name="b")
-    request.applymarker(
-        pytest.mark.xfail(
-            reason="https://github.com/pandas-dev/pandas/issues/57065",
-        )
-    )
-    pidx1 = idx1.to_pandas()
-    pidx2 = idx2.to_pandas()
-
-    expected = pidx1.join(pidx2, how=how)
-    actual = idx1.join(idx2, how=how)
-    assert_join_results_equal(actual, expected, how=how)
-
-
-@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
-def test_join_datetime_timedelta_error(dtype):
-    df1 = cudf.DataFrame({"a": cudf.Series([10, 20, 30], dtype=dtype)})
-    df2 = df1.astype("int")
-
-    with pytest.raises(TypeError):
-        df1.merge(df2)
-
-
-@pytest.mark.parametrize("dtype1", TIMEDELTA_TYPES)
-@pytest.mark.parametrize("dtype2", TIMEDELTA_TYPES)
-def test_merge_timedelta_types(dtype1, dtype2):
-    df1 = cudf.DataFrame({"a": cudf.Series([10, 20, 30], dtype=dtype1)})
-    df2 = cudf.DataFrame({"a": cudf.Series([20, 500, 33240], dtype=dtype2)})
-
-    pdf1 = df1.to_pandas()
-    pdf2 = df2.to_pandas()
-    actual = df1.merge(df2)
-    expected = pdf1.merge(pdf2)
-
-    # Pandas is materializing the index, which is unnecessary
-    # hence the special handling.
-    assert_eq(
-        actual,
-        expected,
-        check_index_type=False
-        if isinstance(actual.index, cudf.RangeIndex)
-        and isinstance(expected.index, pd.Index)
-        else True,
-        check_dtype=len(actual) > 0,
-    )
-
-
 def test_merge_index_on_opposite_how_column_reset_index():
     df = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=[1, 3, 5, 7, 9])
     ser = pd.Series([1, 2], index=pd.Index([1, 2], name="a"), name="b")
@@ -2319,59 +1310,41 @@ def test_merge_left_on_right_index_sort():
     assert_eq(result, expected)
 
 
-@pytest.mark.parametrize(
-    "left_data",
-    [
-        {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
-        {"lkey": ["foo", "bar", "baz", "foo"], "value": [5, 3, 2, 1]},
-        {
-            "lkey": ["foo", "bar", "baz", "foo"],
-            "value": [5, 3, 2, 1],
-            "extra_left": [1, 2, 3, 4],
-        },
-    ],
-)
-@pytest.mark.parametrize(
-    "right_data",
-    [
-        {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]},
-        {"rkey": ["foo", "bar", "baz", "foo"], "value": [8, 7, 6, 5]},
-        {
-            "rkey": ["foo", "bar", "baz", "foo"],
-            "value": [8, 7, 6, 5],
-            "extra_right": [10, 2, 30, 4],
-        },
-    ],
-)
-@pytest.mark.parametrize("sort", [True, False])
-def test_cross_join_overlapping(left_data, right_data, sort):
-    df1 = cudf.DataFrame(left_data)
-    df2 = cudf.DataFrame(right_data)
+def test_merge_renamed_index():
+    df = cudf.DataFrame(
+        {0: [1, 2, 3, 4, 5], 1: [1, 2, 3, 4, 5], "c": [1, 2, 3, 4, 5]}
+    ).set_index([0, 1])
+    df.index.names = ["a", "b"]  # doesn't actually change df._index._data
 
-    pdf1 = df1.to_pandas()
-    pdf2 = df2.to_pandas()
-    expected = pdf1.join(
-        pdf2, how="cross", lsuffix="_x", rsuffix="_y", sort=sort
+    expect = df.to_pandas().merge(
+        df.to_pandas(), left_index=True, right_index=True
     )
-    result = df1.join(df2, how="cross", lsuffix="_x", rsuffix="_y", sort=sort)
-    assert_eq(result, expected)
+    got = df.merge(df, left_index=True, right_index=True, how="inner")
+    assert_join_results_equal(expect, got, how="inner")
 
 
-@pytest.mark.parametrize(
-    "suffixes",
-    [
-        ("_left", "_right"),
-        ("", "_right"),
-        ("_left", ""),
-    ],
-)
-def test_cross_merge(suffixes):
-    df1 = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pdf1 = df1.to_pandas()
+def test_merge_redundant_params():
+    lhs = cudf.DataFrame(
+        {"a": [1, 2, 3], "c": [2, 3, 4]}, index=cudf.Index([0, 1, 2], name="c")
+    )
+    rhs = cudf.DataFrame(
+        {"b": [1, 2, 3]}, index=cudf.Index([0, 1, 2], name="a")
+    )
+    with pytest.raises(ValueError):
+        lhs.merge(rhs, on="a", left_index=True)
+    with pytest.raises(ValueError):
+        lhs.merge(rhs, left_on="a", left_index=True, right_index=True)
+    with pytest.raises(ValueError):
+        lhs.merge(rhs, right_on="a", left_index=True, right_index=True)
+    with pytest.raises(ValueError):
+        lhs.merge(rhs, left_on="c", right_on="b")
 
-    df2 = cudf.DataFrame({"a": [11, 12, 13], "d": [40, 50, 60]})
-    pdf2 = df2.to_pandas()
 
-    expected = pdf1.merge(pdf2, how="cross", suffixes=suffixes)
-    result = df1.merge(df2, how="cross", suffixes=suffixes)
-    assert_eq(result, expected)
+def test_merge_datetime_timedelta_error(temporal_types_as_str):
+    df1 = cudf.DataFrame(
+        {"a": cudf.Series([10, 20, 30], dtype=temporal_types_as_str)}
+    )
+    df2 = df1.astype("int")
+
+    with pytest.raises(TypeError):
+        df1.merge(df2)

From 28613267714446c29a28183522fea86db24cae1c Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 6 Aug 2025 19:16:24 -0700
Subject: [PATCH 079/366] Compile `libcudf_kafka` and `cudf_kafka` with C++20
 (#19543)

Continuation of #19065, hopefully the last one.

Discovered that the kafka components are not compiled using C++20, unlike the rest of libcudf/cuDF.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19543
---
 cpp/libcudf_kafka/CMakeLists.txt       | 11 ++++++-----
 cpp/libcudf_kafka/tests/CMakeLists.txt |  4 +++-
 python/cudf_kafka/CMakeLists.txt       |  7 +++++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
index 06e03bb84d4..84fe208b4a0 100644
--- a/cpp/libcudf_kafka/CMakeLists.txt
+++ b/cpp/libcudf_kafka/CMakeLists.txt
@@ -29,6 +29,10 @@ project(
 # Set a default build type if none was specified
 rapids_cmake_build_type(Release)
 
+# Set C++ standard globally to ensure all targets use C++20
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
 # For now, disable CMake's automatic module scanning for C++ files. There is an sccache bug in the
 # version RAPIDS uses in CI that causes it to handle the resulting -M* flags incorrectly with
 # gcc>=14. We can remove this once we upgrade to a newer sccache version.
@@ -83,11 +87,8 @@ if(TARGET conda_env)
 endif()
 
 set_target_properties(
-  cudf_kafka
-  PROPERTIES BUILD_RPATH "\$ORIGIN"
-             INSTALL_RPATH "\$ORIGIN" # set target compile options
-             CXX_STANDARD 20
-             CXX_STANDARD_REQUIRED ON
+  cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH
+                                               "\$ORIGIN" # set target compile options
 )
 
 add_library(cudf_kafka::cudf_kafka ALIAS cudf_kafka)
diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt
index b819cb6fc3b..05913aae110 100644
--- a/cpp/libcudf_kafka/tests/CMakeLists.txt
+++ b/cpp/libcudf_kafka/tests/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -28,6 +28,8 @@ function(ConfigureTest test_name)
     ${test_name}
     PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_KAFKA_BINARY_DIR}/gtests>"
                INSTALL_RPATH "\$ORIGIN/../../../lib"
+               CXX_STANDARD 20
+               CXX_STANDARD_REQUIRED ON
   )
   target_link_libraries(
     ${test_name} PRIVATE GTest::gmock GTest::gmock_main GTest::gtest_main cudf_kafka
diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt
index dc352e0772a..07d436a0761 100644
--- a/python/cudf_kafka/CMakeLists.txt
+++ b/python/cudf_kafka/CMakeLists.txt
@@ -22,6 +22,13 @@ project(
   LANGUAGES CXX
 )
 
+# Set C++ standard to match other cudf Python packages
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Disable CMake's automatic module scanning for C++ files due to sccache bug with gcc>=14
+set(CMAKE_CXX_SCAN_FOR_MODULES OFF)
+
 find_package(cudf_kafka "${RAPIDS_VERSION}" REQUIRED)
 
 if(NOT cudf_kafka_FOUND)

From d1a00b19c7ea7c72940140415ce2bb0cb40ee0eb Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 7 Aug 2025 09:46:57 -0400
Subject: [PATCH 080/366] Additional gtests error checks for string/timestamp
 convert libcudf APIs (#19562)

Converts generic `cudf::logic_error` exceptions to more appropriate `std::invalid_argument` exceptions and adds additional gtests for those cases. Also update the doxygen as appropriate.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19562
---
 .../cudf/strings/convert/convert_datetime.hpp | 18 +++++++------
 cpp/src/strings/convert/convert_datetime.cu   | 25 +++++++++++--------
 cpp/tests/strings/datetime_tests.cpp          | 16 +++++++-----
 3 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp
index 04eba83925d..874cb9aa69e 100644
--- a/cpp/include/cudf/strings/convert/convert_datetime.hpp
+++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ namespace strings {
  * @brief Returns a new timestamp column converting a strings column into
  * timestamps using the provided format pattern.
  *
- * The format pattern can include the following specifiers: "%Y,%y,%m,%d,%H,%I,%p,%M,%S,%f,%z"
+ * The format pattern can include the following specifiers:
  *
  * | Specifier | Description |
  * | :-------: | ----------- |
@@ -94,7 +94,7 @@ std::unique_ptr<column> to_timestamps(
  * @brief Verifies the given strings column can be parsed to timestamps using the provided format
  * pattern.
  *
- * The format pattern can include the following specifiers: "%Y,%y,%m,%d,%H,%I,%p,%M,%S,%f,%z"
+ * The format pattern can include the following specifiers:
  *
  * | Specifier | Description |
  * | :-------: | ----------- |
@@ -125,6 +125,9 @@ std::unique_ptr<column> to_timestamps(
  * This will return a column of type BOOL8 where a `true` row indicates the corresponding
  * input string can be parsed correctly with the given format.
  *
+ * @throw std::invalid_argument if the `format` string is empty
+ * @throw std::invalid_argument if a specifier is not supported
+ *
  * @param input Strings instance for this operation
  * @param format String specifying the timestamp format in strings
  * @param stream CUDA stream used for device memory operations and kernel launches
@@ -141,7 +144,7 @@ std::unique_ptr<column> is_timestamp(
  * @brief Returns a new strings column converting a timestamp column into
  * strings using the provided format pattern.
  *
- * The format pattern can include the following specifiers: "%Y,%y,%m,%d,%H,%I,%p,%M,%S,%f,%z,%Z"
+ * The format pattern can include the following specifiers:
  *
  * | Specifier | Description |
  * | :-------: | ----------- |
@@ -230,9 +233,10 @@ std::unique_ptr<column> is_timestamp(
  * }
  * @endcode
  *
- * @throw cudf::logic_error if `timestamps` column parameter is not a timestamp type.
- * @throw cudf::logic_error if the `format` string is empty
- * @throw cudf::logic_error if `names.size()` is an invalid size. Must be 0 or 40 strings.
+ * @throw std::invalid_argument if `timestamps` column parameter is not a timestamp type.
+ * @throw std::invalid_argument if the `format` string is empty
+ * @throw std::invalid_argument if `names.size()` is an invalid size. Must be 0 or 40 strings.
+ * @throw std::invalid_argument if a specifier is not supported
  *
  * @param timestamps Timestamp values to convert
  * @param format The string specifying output format.
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index d3564d62efe..965b71e4a08 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -135,7 +135,7 @@ struct format_compiler {
         items.push_back(format_item::new_literal(ch));
         continue;
       }
-      CUDF_EXPECTS(length > 0, "Unfinished specifier in timestamp format");
+      CUDF_EXPECTS(length > 0, "Unfinished specifier in timestamp format", std::invalid_argument);
 
       ch = *str++;
       length--;
@@ -145,7 +145,9 @@ struct format_compiler {
         continue;
       }
       if (ch >= '0' && ch <= '9') {
-        CUDF_EXPECTS(*str == 'f', "precision not supported for specifier: " + std::string(1, *str));
+        CUDF_EXPECTS(*str == 'f',
+                     "precision not supported for specifier: " + std::string(1, *str),
+                     std::invalid_argument);
         specifiers[*str] = static_cast<int8_t>(ch - '0');
         ch               = *str++;
         length--;
@@ -153,7 +155,8 @@ struct format_compiler {
 
       // check if the specifier found is supported
       CUDF_EXPECTS(specifiers.find(ch) != specifiers.end(),
-                   "invalid format specifier: " + std::string(1, ch));
+                   "invalid format specifier: " + std::string(1, ch),
+                   std::invalid_argument);
 
       // create the format item for this specifier
       items.push_back(format_item::new_specifier(ch, specifiers[ch]));
@@ -428,7 +431,7 @@ struct dispatch_to_timestamps_fn {
                   rmm::cuda_stream_view) const
     requires(not cudf::is_timestamp<T>())
   {
-    CUDF_FAIL("Only timestamps type are expected");
+    CUDF_FAIL("Only timestamps type are expected", std::invalid_argument);
   }
 };
 
@@ -441,10 +444,9 @@ std::unique_ptr<cudf::column> to_timestamps(strings_column_view const& input,
                                             rmm::cuda_stream_view stream,
                                             rmm::device_async_resource_ref mr)
 {
-  if (input.is_empty())
-    return make_empty_column(timestamp_type);  // make_timestamp_column(timestamp_type, 0);
+  if (input.is_empty()) { return make_empty_column(timestamp_type); }
 
-  CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.");
+  CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.", std::invalid_argument);
 
   auto d_strings = column_device_view::create(input.parent(), stream);
 
@@ -682,7 +684,7 @@ std::unique_ptr<cudf::column> is_timestamp(strings_column_view const& input,
   size_type strings_count = input.size();
   if (strings_count == 0) return make_empty_column(type_id::BOOL8);
 
-  CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.");
+  CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.", std::invalid_argument);
 
   auto d_strings = column_device_view::create(input.parent(), stream);
 
@@ -1123,7 +1125,7 @@ struct dispatch_from_timestamps_fn {
   strings_children operator()(Args&&...) const
     requires(not cudf::is_timestamp<T>())
   {
-    CUDF_FAIL("Only timestamps type are expected");
+    CUDF_FAIL("Only timestamps type are expected", std::invalid_argument);
   }
 };
 
@@ -1138,9 +1140,10 @@ std::unique_ptr<column> from_timestamps(column_view const& timestamps,
 {
   if (timestamps.is_empty()) return make_empty_column(type_id::STRING);
 
-  CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.");
+  CUDF_EXPECTS(!format.empty(), "Format parameter must not be empty.", std::invalid_argument);
   CUDF_EXPECTS(names.is_empty() || names.size() == format_names_size,
-               "Invalid size for format names.");
+               "Invalid size for format names.",
+               std::invalid_argument);
 
   auto const d_names = column_device_view::create(names.parent(), stream);
 
diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp
index 6a5f95a318e..5192e0e5269 100644
--- a/cpp/tests/strings/datetime_tests.cpp
+++ b/cpp/tests/strings/datetime_tests.cpp
@@ -625,18 +625,22 @@ TEST_F(StringsDatetimeTest, Errors)
                cudf::logic_error);
   EXPECT_THROW(
     cudf::strings::to_timestamps(view, cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}, ""),
-    cudf::logic_error);
+    std::invalid_argument);
   EXPECT_THROW(
     cudf::strings::to_timestamps(view, cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}, "%2Y"),
-    cudf::logic_error);
+    std::invalid_argument);
   EXPECT_THROW(
     cudf::strings::to_timestamps(view, cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}, "%g"),
-    cudf::logic_error);
+    std::invalid_argument);
 
   cudf::test::fixed_width_column_wrapper<int64_t> invalid_timestamps{1530705600};
-  EXPECT_THROW(cudf::strings::from_timestamps(invalid_timestamps), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::from_timestamps(invalid_timestamps), std::invalid_argument);
   cudf::test::fixed_width_column_wrapper<cudf::timestamp_s, cudf::timestamp_s::rep> timestamps{
     1530705600};
-  EXPECT_THROW(cudf::strings::from_timestamps(timestamps, ""), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::from_timestamps(timestamps, "%A %B", view), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::from_timestamps(timestamps, ""), std::invalid_argument);
+  EXPECT_THROW(cudf::strings::from_timestamps(timestamps, "%B", view), std::invalid_argument);
+
+  EXPECT_THROW(cudf::strings::is_timestamp(view, "%D"), std::invalid_argument);
+  EXPECT_THROW(cudf::strings::is_timestamp(view, "%p %"), std::invalid_argument);
+  EXPECT_THROW(cudf::strings::from_timestamps(timestamps, "%Y:%H", view), std::invalid_argument);
 }

From 0456b16ae8e86d759c17b2c9b503fd4453ca7d8d Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 7 Aug 2025 11:14:22 -0400
Subject: [PATCH 081/366] Replace sprintf with std::format in libcudf parquet
 tests (#19364)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes some build failures from the nightly tests https://github.com/rapidsai/cudf/actions/runs/16258662170/job/45899486553
```
 │ │ $SRC_DIR/cpp/tests/io/parquet_common.cpp:507:26: error: '%09d' directive writing between 9 and 11 bytes into a region of size 10 [-Werror=format-overflow=]
 │ │   507 |     sprintf(buf.data(), "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
```

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19364
---
 .../io/experimental/hybrid_scan_common.cpp    | 11 ++--
 cpp/tests/io/parquet_common.cpp               | 24 ++++-----
 cpp/tests/io/parquet_v2_test.cpp              | 54 +++++++------------
 3 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/cpp/tests/io/experimental/hybrid_scan_common.cpp b/cpp/tests/io/experimental/hybrid_scan_common.cpp
index 1224d9d4988..c17541b3321 100644
--- a/cpp/tests/io/experimental/hybrid_scan_common.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_common.cpp
@@ -25,6 +25,9 @@
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
 
+#include <format>
+#include <string>
+
 cudf::host_span<uint8_t const> fetch_footer_bytes(cudf::host_span<uint8_t const> buffer)
 {
   using namespace cudf::io::parquet;
@@ -87,11 +90,7 @@ cudf::test::strings_column_wrapper constant_strings(cudf::size_type value)
 {
   CUDF_EXPECTS(value >= 0 && value <= 9999, "String value must be between 0000 and 9999");
 
-  auto elements =
-    thrust::make_transform_iterator(thrust::make_constant_iterator(value), [](auto i) {
-      std::array<char, 30> buf{};
-      snprintf(buf.data(), buf.size(), "%04d", i);
-      return std::string(buf.data());
-    });
+  auto elements = thrust::make_transform_iterator(thrust::make_constant_iterator(value),
+                                                  [](auto i) { return std::format("{:04d}", i); });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index aa0f5fafe4e..f0be469e5ed 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -18,6 +18,9 @@
 
 #include <cudf/io/parquet.hpp>
 
+#include <format>
+#include <string>
+
 // Global environment for temporary files
 cudf::test::TempDirTestEnvironment* const temp_env =
   static_cast<cudf::test::TempDirTestEnvironment*>(
@@ -478,11 +481,8 @@ template <typename T>
 std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
 ascending()
 {
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf{};
-    snprintf(buf.data(), buf.size(), "%09d", i);
-    return std::string(buf.data());
-  });
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return std::format("{:09d}", i); });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
 
@@ -490,11 +490,8 @@ template <typename T>
 std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
 descending()
 {
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf{};
-    snprintf(buf.data(), buf.size(), "%09d", static_cast<short>(num_ordered_rows - i));
-    return std::string(buf.data());
-  });
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return std::format("{:09d}", static_cast<short>(num_ordered_rows - i)); });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
 
@@ -502,11 +499,8 @@ template <typename T>
 std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
 unordered()
 {
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf{};
-    snprintf(buf.data(), buf.size(), "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
-    return std::string(buf.data());
-  });
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return std::format("{:09d}", (i % 2 == 0) ? i : (num_ordered_rows - i)); });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
 
diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp
index b165b6bfa82..c30495c4e70 100644
--- a/cpp/tests/io/parquet_v2_test.cpp
+++ b/cpp/tests/io/parquet_v2_test.cpp
@@ -24,6 +24,8 @@
 #include <cudf/io/parquet.hpp>
 
 #include <array>
+#include <format>
+#include <string>
 
 using cudf::test::iterators::no_nulls;
 
@@ -694,12 +696,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex)
     is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE;
 
   // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
-    snprintf(buf.data(), buf.size(), "%012d", i);
-    return std::string(buf.data());
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return std::format("{:012d}", i); });
+  auto col0 = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
 
   auto col1_data = random_values<int8_t>(num_rows);
   auto col2_data = random_values<int16_t>(num_rows);
@@ -716,12 +715,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex)
   auto col6 = cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end());
 
   // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
-    snprintf(buf.data(), buf.size(), "%d", i);
-    return std::string(buf.data());
-  });
-  auto col7          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
+  auto str2_elements =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return std::format("{}", i); });
+  auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
 
   auto const expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
 
@@ -788,12 +784,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls)
     is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE;
 
   // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
-    snprintf(buf.data(), buf.size(), "%012d", i);
-    return std::string(buf.data());
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return std::format("{:012d}", i); });
+  auto col0 = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
 
   auto col1_data = random_values<int8_t>(num_rows);
   auto col2_data = random_values<int16_t>(num_rows);
@@ -820,11 +813,8 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls)
     cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end(), valids);
 
   // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
-    snprintf(buf.data(), buf.size(), "%d", i);
-    return std::string(buf.data());
-  });
+  auto str2_elements =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return std::format("{}", i); });
   auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows, valids);
 
   auto expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
@@ -898,12 +888,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn)
     is_v2 ? cudf::io::parquet::PageType::DATA_PAGE_V2 : cudf::io::parquet::PageType::DATA_PAGE;
 
   // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
-    snprintf(buf.data(), buf.size(), "%012d", i);
-    return std::string(buf.data());
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return std::format("{:012d}", i); });
+  auto col0 = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
 
   auto col1_data = random_values<int32_t>(num_rows);
   auto col2_data = random_values<int32_t>(num_rows);
@@ -915,12 +902,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn)
   auto col2 = cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end());
 
   // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    std::array<char, 30> buf;
-    snprintf(buf.data(), buf.size(), "%d", i);
-    return std::string(buf.data());
-  });
-  auto col3          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
+  auto str2_elements =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return std::format("{}", i); });
+  auto col3 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
 
   auto expected = table_view{{col0, col1, col2, col3}};
 

From 37ea851987264ac8f20855e07f3cd7ba6c78cb31 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 7 Aug 2025 15:55:18 -0400
Subject: [PATCH 082/366] Update rapids_config to handle user defined branch
 name (#19623)

rapids_config will use a user defined branch over `RAPIDS_BRANCH` contents

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/19623
---
 cmake/RAPIDS.cmake        | 2 +-
 cmake/rapids_config.cmake | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/cmake/RAPIDS.cmake b/cmake/RAPIDS.cmake
index 40de7cefcd2..ddef819498d 100644
--- a/cmake/RAPIDS.cmake
+++ b/cmake/RAPIDS.cmake
@@ -18,7 +18,7 @@
 cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 # Allow users to control which version is used
-if(NOT rapids-cmake-branch OR NOT rapids-cmake-version)
+if(NOT (rapids-cmake-branch OR rapids-cmake-version))
   message(
     FATAL_ERROR "The CMake variable `rapids-cmake-branch` or `rapids-cmake-version` must be defined"
   )
diff --git a/cmake/rapids_config.cmake b/cmake/rapids_config.cmake
index b706c926e7a..b2c54a3f27d 100644
--- a/cmake/rapids_config.cmake
+++ b/cmake/rapids_config.cmake
@@ -35,6 +35,10 @@ if(NOT _rapids_branch)
   )
 endif()
 
-set(rapids-cmake-version "${RAPIDS_VERSION_MAJOR_MINOR}")
-set(rapids-cmake-branch "${_rapids_branch}")
+if(NOT rapids-cmake-version)
+  set(rapids-cmake-version "${RAPIDS_VERSION_MAJOR_MINOR}")
+endif()
+if(NOT rapids-cmake-branch)
+  set(rapids-cmake-branch "${_rapids_branch}")
+endif()
 include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake")

From 2ae747440d5e3f4227041e35a3fd2f03c299478b Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Thu, 7 Aug 2025 13:48:01 -0700
Subject: [PATCH 083/366] Add nvtx ranges to public APIs of the experimental
 parquet reader (#19618)

Contributes to #19469

This PR moves the NVTX ranges from detail to public APIs of the experimental Parquet reader.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19618
---
 .../io/parquet/experimental/hybrid_scan.cpp   | 28 +++++++++++++++++++
 .../experimental/hybrid_scan_helpers.cpp      |  3 ++
 .../parquet/experimental/hybrid_scan_impl.cpp | 19 -------------
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/cpp/src/io/parquet/experimental/hybrid_scan.cpp b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
index f14ff508561..692c77a6fa1 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
@@ -16,6 +16,7 @@
 
 #include "hybrid_scan_impl.hpp"
 
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/io/experimental/hybrid_scan.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -33,22 +34,30 @@ hybrid_scan_reader::~hybrid_scan_reader() = default;
 
 [[nodiscard]] text::byte_range_info hybrid_scan_reader::page_index_byte_range() const
 {
+  CUDF_FUNC_RANGE();
+
   return _impl->page_index_byte_range();
 }
 
 [[nodiscard]] FileMetaData hybrid_scan_reader::parquet_metadata() const
 {
+  CUDF_FUNC_RANGE();
+
   return _impl->parquet_metadata();
 }
 
 void hybrid_scan_reader::setup_page_index(cudf::host_span<uint8_t const> page_index_bytes) const
 {
+  CUDF_FUNC_RANGE();
+
   return _impl->setup_page_index(page_index_bytes);
 }
 
 std::vector<cudf::size_type> hybrid_scan_reader::all_row_groups(
   parquet_reader_options const& options) const
 {
+  CUDF_FUNC_RANGE();
+
   CUDF_EXPECTS(options.get_row_groups().size() <= 1,
                "Encountered invalid size of row group indices in parquet reader options");
 
@@ -61,6 +70,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::all_row_groups(
 size_type hybrid_scan_reader::total_rows_in_row_groups(
   cudf::host_span<size_type const> row_group_indices) const
 {
+  CUDF_FUNC_RANGE();
+
   if (row_group_indices.empty()) { return 0; }
 
   auto const input_row_group_indices =
@@ -73,6 +84,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_stats(
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -84,6 +97,7 @@ std::pair<std::vector<text::byte_range_info>, std::vector<text::byte_range_info>
 hybrid_scan_reader::secondary_filters_byte_ranges(
   cudf::host_span<size_type const> row_group_indices, parquet_reader_options const& options) const
 {
+  CUDF_FUNC_RANGE();
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -97,6 +111,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_dictiona
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -113,6 +129,8 @@ std::vector<cudf::size_type> hybrid_scan_reader::filter_row_groups_with_bloom_fi
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -129,6 +147,8 @@ std::unique_ptr<cudf::column> hybrid_scan_reader::build_row_mask_with_page_index
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -140,6 +160,8 @@ std::unique_ptr<cudf::column> hybrid_scan_reader::build_row_mask_with_page_index
 hybrid_scan_reader::filter_column_chunks_byte_ranges(
   cudf::host_span<size_type const> row_group_indices, parquet_reader_options const& options) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -155,6 +177,8 @@ table_with_metadata hybrid_scan_reader::materialize_filter_columns(
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
@@ -171,6 +195,8 @@ table_with_metadata hybrid_scan_reader::materialize_filter_columns(
 hybrid_scan_reader::payload_column_chunks_byte_ranges(
   cudf::host_span<size_type const> row_group_indices, parquet_reader_options const& options) const
 {
+  CUDF_FUNC_RANGE();
+
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
 
@@ -185,6 +211,8 @@ table_with_metadata hybrid_scan_reader::materialize_payload_columns(
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
 {
+  CUDF_FUNC_RANGE();
+
   // Temporary vector with row group indices from the first source
   auto const input_row_group_indices =
     std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
index 3932e9bc15c..af4ce719786 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
@@ -20,6 +20,7 @@
 #include "io/parquet/reader_impl_helpers.hpp"
 #include "io/utilities/row_selection.hpp"
 
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/logger.hpp>
 
 #include <thrust/iterator/counting_iterator.h>
@@ -72,6 +73,8 @@ namespace {
 
 metadata::metadata(cudf::host_span<uint8_t const> footer_bytes)
 {
+  CUDF_FUNC_RANGE();
+
   CompactProtocolReader cp(footer_bytes.data(), footer_bytes.size());
   cp.read(this);
   CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index 354eb2adde1..5c15d7b6f08 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -20,7 +20,6 @@
 #include "hybrid_scan_helpers.hpp"
 #include "io/parquet/reader_impl_chunking_utils.cuh"
 
-#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/transform.hpp>
@@ -169,8 +168,6 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
 {
-  CUDF_FUNC_RANGE();
-
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
@@ -195,8 +192,6 @@ hybrid_scan_reader_impl::secondary_filters_byte_ranges(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   parquet_reader_options const& options)
 {
-  CUDF_FUNC_RANGE();
-
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Filter expression must not be empty");
 
@@ -230,8 +225,6 @@ hybrid_scan_reader_impl::filter_row_groups_with_dictionary_pages(
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
 {
-  CUDF_FUNC_RANGE();
-
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
@@ -302,8 +295,6 @@ std::vector<std::vector<size_type>> hybrid_scan_reader_impl::filter_row_groups_w
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
-  CUDF_FUNC_RANGE();
-
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
   table_metadata metadata;
@@ -331,8 +322,6 @@ std::unique_ptr<cudf::column> hybrid_scan_reader_impl::build_row_mask_with_page_
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
-  CUDF_FUNC_RANGE();
-
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
   table_metadata metadata;
@@ -355,8 +344,6 @@ std::pair<std::vector<byte_range_info>, std::vector<cudf::size_type>>
 hybrid_scan_reader_impl::get_input_column_chunk_byte_ranges(
   cudf::host_span<std::vector<size_type> const> row_group_indices) const
 {
-  CUDF_FUNC_RANGE();
-
   // Descriptors for all the chunks that make up the selected columns
   auto const num_input_columns = _input_columns.size();
   auto const num_row_groups =
@@ -438,8 +425,6 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
   CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
   CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
 
-  CUDF_FUNC_RANGE();
-
   reset_internal_state();
 
   table_metadata metadata;
@@ -477,8 +462,6 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
   CUDF_EXPECTS(row_mask.null_count() == 0,
                "Row mask must not have any nulls when materializing payload column");
 
-  CUDF_FUNC_RANGE();
-
   reset_internal_state();
 
   initialize_options(row_group_indices, options, stream);
@@ -728,8 +711,6 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
 void hybrid_scan_reader_impl::set_pass_page_mask(
   cudf::host_span<std::vector<bool> const> data_page_mask)
 {
-  CUDF_FUNC_RANGE();
-
   auto const& pass   = _pass_itm_data;
   auto const& chunks = pass->chunks;
 

From a415e0abc8f9264dc7f222b894c686ef77d4b40f Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 8 Aug 2025 11:09:12 -0400
Subject: [PATCH 084/366] Add fast path for Parquet reading with predicate
 pushdown via AST filters (#19605)

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19605
---
 python/cudf/benchmarks/API/bench_io.py |  37 ++++++
 python/cudf/cudf/io/parquet.py         | 175 +++++++++++++++++++++++--
 2 files changed, 202 insertions(+), 10 deletions(-)
 create mode 100644 python/cudf/benchmarks/API/bench_io.py

diff --git a/python/cudf/benchmarks/API/bench_io.py b/python/cudf/benchmarks/API/bench_io.py
new file mode 100644
index 00000000000..8a05c61ee47
--- /dev/null
+++ b/python/cudf/benchmarks/API/bench_io.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+"""Benchmarks for IO operations."""
+
+import os
+from tempfile import TemporaryDirectory
+
+import pytest
+from config import NUM_ROWS
+
+import cudf
+
+
+@pytest.mark.parametrize("num_rows", NUM_ROWS)
+def bench_read_parquet_with_filters(benchmark, num_rows):
+    df = cudf.DataFrame(
+        {
+            "x": cudf.Series(range(num_rows), dtype="int32"),
+            "y": cudf.Series(range(num_rows, 2 * num_rows), dtype="float64"),
+            "z": cudf.Series(
+                ["a", "b", "c", "d"] * (num_rows // 4), dtype="str"
+            ),
+        }
+    )
+
+    with TemporaryDirectory() as tmpdir:
+        path = os.path.join(tmpdir, "filtered.parquet")
+        df.to_parquet(path)
+
+        threshold = num_rows // 2
+
+        filters = [
+            [("x", ">", threshold), ("z", "in", ["a", "b"])],
+            [("y", "<", threshold), ("z", "not in", ["c"])],
+        ]
+
+        benchmark(cudf.read_parquet, path, filters=filters)
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 7bcb80ffbdc..25a38a4709e 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import datetime
 import io
 import itertools
 import math
@@ -18,6 +19,7 @@
 import pandas as pd
 
 import pylibcudf as plc
+from pylibcudf import expressions as plc_expr
 
 from cudf.api.types import is_list_like
 from cudf.core.buffer import acquire_spill_lock
@@ -534,6 +536,140 @@ def write_to_dataset(
     return metadata
 
 
+_BIN_OPS = {
+    "==": plc_expr.ASTOperator.EQUAL,
+    "!=": plc_expr.ASTOperator.NOT_EQUAL,
+    "<": plc_expr.ASTOperator.LESS,
+    "<=": plc_expr.ASTOperator.LESS_EQUAL,
+    ">": plc_expr.ASTOperator.GREATER,
+    ">=": plc_expr.ASTOperator.GREATER_EQUAL,
+}
+
+
+def _raise_for_unsupported_scalar_types(val: Any):
+    if not (
+        isinstance(val, (int, float))
+        or isinstance(val, (datetime.date, datetime.timedelta))
+        or isinstance(val, str)
+    ):
+        return val
+    raise NotImplementedError(
+        "Only numeric, string, or timestamp/duration scalars are accepted"
+    )
+
+
+def make_literal(v: Any) -> plc_expr.Literal:
+    return plc_expr.Literal(
+        plc.Scalar.from_py(_raise_for_unsupported_scalar_types(v))
+    )
+
+
+def make_expr(col: str, op: str, val: Any) -> plc_expr.Expression:
+    col_ref = plc_expr.ColumnNameReference(col)
+
+    match op:
+        case op if op in _BIN_OPS:
+            return plc_expr.Operation(
+                _BIN_OPS[op],
+                col_ref,
+                make_literal(val),
+            )
+
+        case "in":
+            equal_ops = [
+                plc_expr.Operation(
+                    plc_expr.ASTOperator.EQUAL, col_ref, make_literal(v)
+                )
+                for v in val
+            ]
+            return reduce(
+                partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_OR),
+                equal_ops,
+            )
+
+        case "not in":
+            not_equal_ops = [
+                plc_expr.Operation(
+                    plc_expr.ASTOperator.NOT_EQUAL, col_ref, make_literal(v)
+                )
+                for v in val
+            ]
+            return reduce(
+                partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_AND),
+                not_equal_ops,
+            )
+
+        case "is":
+            if val is None:
+                return plc_expr.Operation(
+                    plc_expr.ASTOperator.IS_NULL, col_ref
+                )
+            raise NotImplementedError("Only `is None` supported")
+
+        case "is not":
+            if val is None:
+                return plc_expr.Operation(
+                    plc_expr.ASTOperator.NOT,
+                    plc_expr.Operation(plc_expr.ASTOperator.IS_NULL, col_ref),
+                )
+            raise NotImplementedError("Only `is not None` supported")
+
+        case _:
+            raise NotImplementedError(f"Unsupported op: {op}")
+
+
+def translate_filters_to_ast(
+    filters: list[list[tuple[str, str, Any]]],
+) -> plc_expr.Expression:
+    """
+    Convert a filter expression in Disjunctive Normal Form (DNF) to a pylibcudf AST.
+
+    Parameters
+    ----------
+    filters : list[list[tuple[str, str, Any]]]
+        A filter expression in DNF.
+        DNF is represented as a list of OR-ed clauses, where each clause is a list of
+        AND-ed predicates. Each predicate is a tuple of the form:
+            (column_name, operator, value)
+
+    Returns
+    -------
+    plc.expressions.Expression
+        A pylibcudf AST expression representing the filter.
+
+    Notes
+    -----
+    This function supports the following operators:
+      - Binary comparisons: ==, !=, <, <=, >, >=
+      - Membership tests: "in", "not in"
+      - Null checks: "is None", "is not None"
+
+    The translation is performed by recursively reducing:
+      - Each clause's predicates into a conjunction (AND)
+      - All clauses into a disjunction (OR) of those conjunctions
+    """
+
+    clauses = [
+        reduce(
+            partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_AND),
+            (make_expr(*pred) for pred in clause),
+        )
+        for clause in filters
+    ]
+
+    if not clauses:
+        raise ValueError("Empty filter expression")
+
+    return (
+        reduce(
+            partial(plc_expr.Operation, plc_expr.ASTOperator.LOGICAL_OR),
+            clauses,
+        )
+        if len(clauses) > 1
+        else clauses[0]
+    )
+
+
 def _parse_metadata(meta) -> tuple[bool, Any, None | np.dtype]:
     file_is_range_index = False
     file_index_cols = None
@@ -824,13 +960,26 @@ def read_parquet(
     # Normalize and validate filters
     filters = _normalize_filters(filters)
 
+    # Attempt to translate filters to a libcudf AST
+    ast_filter = None
+    if (
+        engine == "cudf"
+        and filters is not None
+        and categorical_partitions is None
+        and dataset_kwargs is None
+    ):
+        try:
+            ast_filter = translate_filters_to_ast(filters)
+        except NotImplementedError:
+            pass
+
     # Use pyarrow dataset to detect/process directory-partitioned
     # data and apply filters. Note that we can only support partitioned
     # data and filtering if the input is a single directory or list of
     # paths.
     partition_keys = []
     partition_categories = {}
-    if fs and paths:
+    if ast_filter is None and fs and paths:
         (
             paths,
             row_groups,
@@ -848,7 +997,7 @@ def read_parquet(
 
     # Prepare remote-IO options
     prefetch_options = kwargs.pop("prefetch_options", {})
-    if not ioutils._is_local_filesystem(fs):
+    if ast_filter is None and not ioutils._is_local_filesystem(fs):
         # The default prefetch method depends on the
         # `row_groups` argument. In most cases we will use
         # method="all" by default, because it is fastest
@@ -912,7 +1061,7 @@ def read_parquet(
     # will be dropped almost immediately after IO. However,
     # we do NEED these columns for accurate filtering.
     projected_columns = None
-    if columns and filters:
+    if ast_filter is None and columns and filters:
         projected_columns = columns
         columns = sorted(
             set(v[0] for v in itertools.chain.from_iterable(filters))
@@ -933,10 +1082,13 @@ def read_parquet(
         nrows=nrows,
         skip_rows=skip_rows,
         allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
+        filters=ast_filter,
         **kwargs,
     )
     # Apply filters row-wise (if any are defined), and return
-    df = _apply_post_filters(df, filters)
+    if ast_filter is None:
+        df = _apply_post_filters(df, filters)
+
     if projected_columns:
         # Elements of `projected_columns` may now be in the index.
         # We must filter these names from our projection
@@ -1169,6 +1321,7 @@ def _read_parquet(
     nrows: int | None = None,
     skip_rows: int | None = None,
     allow_mismatched_pq_schemas: bool = False,
+    filters: plc_expr.Expression | None = None,
     *args,
     **kwargs,
 ) -> DataFrame:
@@ -1176,7 +1329,7 @@ def _read_parquet(
     # cudf and pyarrow to read parquet data
     if engine == "cudf":
         if set(kwargs.keys()).difference(
-            set(("_chunk_read_limit", "_pass_read_limit"))
+            set(("_chunk_read_limit", "_pass_read_limit", "filters"))
         ):
             raise ValueError(
                 "cudf engine doesn't support the "
@@ -1192,10 +1345,11 @@ def _read_parquet(
         if skip_rows is None:
             skip_rows = 0
         if get_option("io.parquet.low_memory"):
-            # Note: If this function ever takes accepts filters
-            # allow_range_index needs to be False when a filter is passed
-            # (see read_parquet)
-            allow_range_index = columns is not None and len(columns) != 0
+            # If filters are used, we can't rely on a RangeIndex
+            # (row count may be reduced)
+            allow_range_index = (
+                filters is None and columns is not None and len(columns) != 0
+            )
 
             options = (
                 plc.io.parquet.ParquetReaderOptions.builder(
@@ -1213,6 +1367,8 @@ def _read_parquet(
                 options.set_skip_rows(skip_rows)
             if columns is not None:
                 options.set_columns(columns)
+            if filters is not None:
+                options.set_filter(filters)
 
             reader = plc.io.parquet.ChunkedParquetReader(
                 options,
@@ -1261,7 +1417,6 @@ def _read_parquet(
             return df
         else:
             allow_range_index = True
-            filters = kwargs.get("filters", None)
             if columns is not None and len(columns) == 0 or filters:
                 allow_range_index = False
 

From 65e55a7b885b5d1660687ad6c1e143a48bb1fa69 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 8 Aug 2025 11:13:57 -0400
Subject: [PATCH 085/366] Fix cudf::sequence() to throw exception for invalid
 scalar inputs (#19612)

Fix the `cudf::sequence()` logic to throw an exception if given scalar parameters set with `valid=false`.
This mostly effected some benchmark data generation logic.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19612
---
 cpp/benchmarks/common/generate_input.cu | 29 +++++++++----------------
 cpp/include/cudf/filling.hpp            | 14 +++++++-----
 cpp/src/filling/sequence.cu             | 15 ++++++++-----
 cpp/tests/filling/sequence_tests.cpp    | 21 ++++++++++++++----
 4 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index d8e868a4ae8..03fe04fe5d6 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -640,14 +640,7 @@ std::unique_ptr<cudf::column> create_distinct_rows_column(data_profile const& pr
   auto valid_dist = random_value_fn<bool>(
     distribution_params<bool>{1. - profile.get_null_probability().value_or(0)});
 
-  cudf::data_type const dtype = [&]() {
-    if constexpr (cudf::is_fixed_point<T>())
-      return cudf::data_type{cudf::type_to_id<T>(), 0};
-    else
-      return cudf::data_type{cudf::type_to_id<T>()};
-  }();
-
-  auto init = cudf::make_default_constructed_scalar(dtype);
+  auto init = cudf::make_fixed_width_scalar(T{});
   auto col  = cudf::sequence(num_rows, *init);
 
   rmm::device_uvector<bool> null_mask(0, cudf::get_default_stream());
@@ -698,9 +691,8 @@ template <>
 std::unique_ptr<cudf::column> create_distinct_rows_column<cudf::string_view>(
   data_profile const& profile, thrust::minstd_rand& engine, cudf::size_type num_rows)
 {
-  auto col     = create_random_column<cudf::string_view>(profile, engine, num_rows);
-  auto int_col = cudf::sequence(
-    num_rows, *cudf::make_default_constructed_scalar(cudf::data_type{cudf::type_id::INT32}));
+  auto col        = create_random_column<cudf::string_view>(profile, engine, num_rows);
+  auto int_col    = cudf::sequence(num_rows, *cudf::make_fixed_width_scalar<int32_t>(0));
   auto int2strcol = cudf::strings::from_integers(int_col->view());
   auto concat_col = cudf::strings::concatenate(cudf::table_view({col->view(), int2strcol->view()}));
   return std::move(cudf::sample(cudf::table_view({concat_col->view()}), num_rows)->release()[0]);
@@ -793,8 +785,7 @@ std::unique_ptr<cudf::column> create_distinct_rows_column<cudf::struct_view>(
   auto const dist_params = profile.get_distribution_params<cudf::struct_view>();
   auto col               = create_random_column<cudf::struct_view>(profile, engine, num_rows);
   std::vector<std::unique_ptr<cudf::column>> children;
-  children.push_back(cudf::sequence(
-    num_rows, *cudf::make_default_constructed_scalar(cudf::data_type{cudf::type_id::INT32})));
+  children.push_back(cudf::sequence(num_rows, *cudf::make_fixed_width_scalar<int32_t>(0)));
   for (int lvl = dist_params.max_depth; lvl > 1; --lvl) {
     std::vector<std::unique_ptr<cudf::column>> parents;
     parents.push_back(
@@ -891,12 +882,11 @@ std::unique_ptr<cudf::column> create_distinct_rows_column<cudf::list_view>(
 {
   auto const dist_params = profile.get_distribution_params<cudf::list_view>();
   auto col               = create_random_column<cudf::list_view>(profile, engine, num_rows);
-  auto child_column      = cudf::sequence(
-    num_rows, *cudf::make_default_constructed_scalar(cudf::data_type{cudf::type_id::INT32}));
+  auto zero              = cudf::make_fixed_width_scalar<cudf::size_type>(0);
+  auto child_column      = cudf::sequence(num_rows, *zero);
   for (int lvl = dist_params.max_depth; lvl > 0; --lvl) {
-    auto offsets_column = cudf::sequence(
-      num_rows + 1, *cudf::make_default_constructed_scalar(cudf::data_type{cudf::type_id::INT32}));
-    auto list_column = cudf::make_lists_column(
+    auto offsets_column = cudf::sequence(num_rows + 1, *zero);
+    auto list_column    = cudf::make_lists_column(
       num_rows, std::move(offsets_column), std::move(child_column), 0, rmm::device_buffer{});
     std::swap(child_column, list_column);
   }
@@ -1021,7 +1011,8 @@ std::unique_ptr<cudf::table> create_sequence_table(std::vector<cudf::type_id> co
   auto columns = std::vector<std::unique_ptr<cudf::column>>(dtype_ids.size());
   std::transform(dtype_ids.begin(), dtype_ids.end(), columns.begin(), [&](auto dtype) mutable {
     auto init = cudf::make_default_constructed_scalar(cudf::data_type{dtype});
-    auto col  = cudf::sequence(num_rows.count, *init);
+    init->set_valid_async(true);
+    auto col = cudf::sequence(num_rows.count, *init);
     auto [mask, count] =
       create_random_null_mask(num_rows.count, null_probability, seed_dist(seed_engine));
     col->set_null_mask(std::move(mask), count);
diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp
index 15a21b44f3b..8650dc9b0d9 100644
--- a/cpp/include/cudf/filling.hpp
+++ b/cpp/include/cudf/filling.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -166,9 +166,10 @@ std::unique_ptr<table> repeat(
  * step = 2
  * return = [0, 2, 4]
  * ```
- * @throws cudf::logic_error if @p init and @p step are not the same type.
- * @throws cudf::logic_error if scalar types are not numeric.
- * @throws cudf::logic_error if @p size is < 0.
+ *
+ * @throws std::invalid_argument if @p init and @p step are not the same type.
+ * @throws std::invalid_argument if scalar types are not numeric or invalid
+ * @throws std::invalid_argument if @p size is < 0.
  *
  * @param size Size of the output column
  * @param init First value in the sequence
@@ -196,8 +197,9 @@ std::unique_ptr<column> sequence(
  * init = 0
  * return = [0, 1, 2]
  * ```
- * @throws cudf::logic_error if @p init is not numeric.
- * @throws cudf::logic_error if @p size is < 0.
+ *
+ * @throws std::invalid_argument if @p init is not numeric or invalid
+ * @throws std::invalid_argument if @p size is < 0
  *
  * @param size Size of the output column
  * @param init First value in the sequence
diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu
index 2793f381002..76aca186094 100644
--- a/cpp/src/filling/sequence.cu
+++ b/cpp/src/filling/sequence.cu
@@ -18,6 +18,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/sequence.hpp>
 #include <cudf/filling.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
@@ -117,7 +118,7 @@ struct sequence_functor {
   std::unique_ptr<column> operator()(Args&&...)
     requires(not cudf::is_numeric<T>() or cudf::is_boolean<T>())
   {
-    CUDF_FAIL("Unsupported sequence scalar type");
+    CUDF_FAIL("Unsupported sequence scalar type", cudf::data_type_error);
   }
 };
 
@@ -132,8 +133,11 @@ std::unique_ptr<column> sequence(size_type size,
   CUDF_EXPECTS(cudf::have_same_types(init, step),
                "init and step must be of the same type.",
                cudf::data_type_error);
-  CUDF_EXPECTS(size >= 0, "size must be >= 0");
-  CUDF_EXPECTS(is_numeric(init.type()), "Input scalar types must be numeric");
+  CUDF_EXPECTS(size >= 0, "size must be >= 0", std::invalid_argument);
+  CUDF_EXPECTS(
+    is_numeric(init.type()), "Input scalar types must be numeric", std::invalid_argument);
+  CUDF_EXPECTS(init.is_valid(stream), "init must be a valid scalar", std::invalid_argument);
+  CUDF_EXPECTS(step.is_valid(stream), "step must be a valid scalar", std::invalid_argument);
 
   return type_dispatcher(init.type(), sequence_functor{}, size, init, step, stream, mr);
 }
@@ -143,8 +147,9 @@ std::unique_ptr<column> sequence(size_type size,
                                  rmm::cuda_stream_view stream,
                                  rmm::device_async_resource_ref mr)
 {
-  CUDF_EXPECTS(size >= 0, "size must be >= 0");
-  CUDF_EXPECTS(is_numeric(init.type()), "init scalar type must be numeric");
+  CUDF_EXPECTS(size >= 0, "size must be >= 0", std::invalid_argument);
+  CUDF_EXPECTS(is_numeric(init.type()), "init scalar type must be numeric", cudf::data_type_error);
+  CUDF_EXPECTS(init.is_valid(stream), "init must be a valid scalar", std::invalid_argument);
 
   return type_dispatcher(init.type(), sequence_functor{}, size, init, stream, mr);
 }
diff --git a/cpp/tests/filling/sequence_tests.cpp b/cpp/tests/filling/sequence_tests.cpp
index 53782c90c26..cf49d9f6c04 100644
--- a/cpp/tests/filling/sequence_tests.cpp
+++ b/cpp/tests/filling/sequence_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -84,15 +84,15 @@ TEST_F(SequenceTestFixture, BadTypes)
 {
   cudf::string_scalar string_init("zero");
   cudf::string_scalar string_step("???");
-  EXPECT_THROW(cudf::sequence(10, string_init, string_step), cudf::logic_error);
+  EXPECT_THROW(cudf::sequence(10, string_init, string_step), std::invalid_argument);
 
   cudf::numeric_scalar<bool> bool_init(true);
   cudf::numeric_scalar<bool> bool_step(false);
-  EXPECT_THROW(cudf::sequence(10, bool_init, bool_step), cudf::logic_error);
+  EXPECT_THROW(cudf::sequence(10, bool_init, bool_step), cudf::data_type_error);
 
   cudf::timestamp_scalar<cudf::timestamp_s> ts_init(cudf::duration_s{10}, true);
   cudf::timestamp_scalar<cudf::timestamp_s> ts_step(cudf::duration_s{10}, true);
-  EXPECT_THROW(cudf::sequence(10, ts_init, ts_step), cudf::logic_error);
+  EXPECT_THROW(cudf::sequence(10, ts_init, ts_step), std::invalid_argument);
 }
 
 TEST_F(SequenceTestFixture, MismatchedInputs)
@@ -110,6 +110,19 @@ TEST_F(SequenceTestFixture, MismatchedInputs)
   EXPECT_THROW(cudf::sequence(10, init3, step3), cudf::data_type_error);
 }
 
+TEST_F(SequenceTestFixture, InvalidInput)
+{
+  cudf::numeric_scalar<int> init(0, false);
+  EXPECT_THROW(cudf::sequence(10, init), std::invalid_argument);
+  cudf::numeric_scalar<int8_t> step1(1);
+  EXPECT_THROW(cudf::sequence(10, init, step1), std::invalid_argument);
+
+  cudf::numeric_scalar<int> zero(0);
+  cudf::numeric_scalar<float> step(1, false);
+  EXPECT_THROW(cudf::sequence(10, zero, step), std::invalid_argument);
+  EXPECT_THROW(cudf::sequence(10, init, step), std::invalid_argument);
+}
+
 TYPED_TEST(SequenceTypedTestFixture, DefaultStep)
 {
   using T = TypeParam;

From 3ff177cfd7d7772dbaf38b3f3edbd9083f717e18 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 8 Aug 2025 12:47:47 -0400
Subject: [PATCH 086/366] Add PDS-DS queries 2 through 10 to cudf-polars
 benchmarks (#19488)

Contributes to #19125 by adding PDS-DS queries 2 through 10. And to https://github.com/rapidsai/cudf/issues/19533 by removing the CPU collect calls in Q6 and Q9.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19488
---
 .../benchmarks/pdsds_queries/q10.py           | 225 ++++++++
 .../benchmarks/pdsds_queries/q2.py            | 244 ++++++++
 .../benchmarks/pdsds_queries/q3.py            |  65 +++
 .../benchmarks/pdsds_queries/q4.py            | 359 ++++++++++++
 .../benchmarks/pdsds_queries/q5.py            | 462 +++++++++++++++
 .../benchmarks/pdsds_queries/q6.py            |  92 +++
 .../benchmarks/pdsds_queries/q7.py            |  79 +++
 .../benchmarks/pdsds_queries/q8.py            | 524 ++++++++++++++++++
 .../benchmarks/pdsds_queries/q9.py            | 137 +++++
 9 files changed, 2187 insertions(+)
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q10.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q2.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q3.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q4.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q5.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q6.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q7.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q8.py
 create mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q9.py

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q10.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q10.py
new file mode 100644
index 00000000000..f4818511009
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q10.py
@@ -0,0 +1,225 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 10."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 10."""
+    return """
+    SELECT cd_gender,
+                   cd_marital_status,
+                   cd_education_status,
+                   Count(*) cnt1,
+                   cd_purchase_estimate,
+                   Count(*) cnt2,
+                   cd_credit_rating,
+                   Count(*) cnt3,
+                   cd_dep_count,
+                   Count(*) cnt4,
+                   cd_dep_employed_count,
+                   Count(*) cnt5,
+                   cd_dep_college_count,
+                   Count(*) cnt6
+    FROM   customer c,
+           customer_address ca,
+           customer_demographics
+    WHERE  c.c_current_addr_sk = ca.ca_address_sk
+           AND ca_county IN ( 'Lycoming County', 'Sheridan County',
+                              'Kandiyohi County',
+                              'Pike County',
+                                               'Greene County' )
+           AND cd_demo_sk = c.c_current_cdemo_sk
+           AND EXISTS (SELECT *
+                       FROM   store_sales,
+                              date_dim
+                       WHERE  c.c_customer_sk = ss_customer_sk
+                              AND ss_sold_date_sk = d_date_sk
+                              AND d_year = 2002
+                              AND d_moy BETWEEN 4 AND 4 + 3)
+           AND ( EXISTS (SELECT *
+                         FROM   web_sales,
+                                date_dim
+                         WHERE  c.c_customer_sk = ws_bill_customer_sk
+                                AND ws_sold_date_sk = d_date_sk
+                                AND d_year = 2002
+                                AND d_moy BETWEEN 4 AND 4 + 3)
+                  OR EXISTS (SELECT *
+                             FROM   catalog_sales,
+                                    date_dim
+                             WHERE  c.c_customer_sk = cs_ship_customer_sk
+                                    AND cs_sold_date_sk = d_date_sk
+                                    AND d_year = 2002
+                                    AND d_moy BETWEEN 4 AND 4 + 3) )
+    GROUP  BY cd_gender,
+              cd_marital_status,
+              cd_education_status,
+              cd_purchase_estimate,
+              cd_credit_rating,
+              cd_dep_count,
+              cd_dep_employed_count,
+              cd_dep_college_count
+    ORDER  BY cd_gender,
+              cd_marital_status,
+              cd_education_status,
+              cd_purchase_estimate,
+              cd_credit_rating,
+              cd_dep_count,
+              cd_dep_employed_count,
+              cd_dep_college_count
+    LIMIT 100;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 10."""
+    # Load required tables
+    customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
+    customer_address = get_data(
+        run_config.dataset_path, "customer_address", run_config.suffix
+    )
+    customer_demographics = get_data(
+        run_config.dataset_path, "customer_demographics", run_config.suffix
+    )
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    web_sales = get_data(run_config.dataset_path, "web_sales", run_config.suffix)
+    catalog_sales = get_data(
+        run_config.dataset_path, "catalog_sales", run_config.suffix
+    )
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+
+    # Target counties and date range
+    target_counties = [
+        "Lycoming County",
+        "Sheridan County",
+        "Kandiyohi County",
+        "Pike County",
+        "Greene County",
+    ]
+
+    # Get customers with store sales in the target period (EXISTS condition 1)
+    store_customers = (
+        store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
+        .filter(
+            (pl.col("d_year") == 2002)
+            & (pl.col("d_moy").is_between(4, 7, closed="both"))
+        )
+        .select("ss_customer_sk")
+        .unique()
+    )
+
+    # Get customers with web sales in the target period (EXISTS condition 2a)
+    web_customers = (
+        web_sales.join(date_dim, left_on="ws_sold_date_sk", right_on="d_date_sk")
+        .filter(
+            (pl.col("d_year") == 2002)
+            & (pl.col("d_moy").is_between(4, 7, closed="both"))
+        )
+        .select(pl.col("ws_bill_customer_sk").alias("customer_sk"))
+        .unique()
+    )
+
+    # Get customers with catalog sales in the target period (EXISTS condition 2b)
+    catalog_customers = (
+        catalog_sales.join(date_dim, left_on="cs_sold_date_sk", right_on="d_date_sk")
+        .filter(
+            (pl.col("d_year") == 2002)
+            & (pl.col("d_moy").is_between(4, 7, closed="both"))
+        )
+        .select(pl.col("cs_ship_customer_sk").alias("customer_sk"))
+        .unique()
+    )
+
+    # Combine web and catalog customers (OR condition)
+    web_or_catalog_customers = pl.concat([web_customers, catalog_customers]).unique()
+
+    # Main query: join customer tables and apply filters
+    return (
+        customer.join(
+            customer_address, left_on="c_current_addr_sk", right_on="ca_address_sk"
+        )
+        .join(
+            customer_demographics, left_on="c_current_cdemo_sk", right_on="cd_demo_sk"
+        )
+        .filter(pl.col("ca_county").is_in(target_counties))
+        # Apply EXISTS conditions through joins
+        .join(
+            store_customers,
+            left_on="c_customer_sk",
+            right_on="ss_customer_sk",
+            how="inner",
+        )
+        .join(
+            web_or_catalog_customers,
+            left_on="c_customer_sk",
+            right_on="customer_sk",
+            how="inner",
+        )
+        .group_by(
+            [
+                "cd_gender",
+                "cd_marital_status",
+                "cd_education_status",
+                "cd_purchase_estimate",
+                "cd_credit_rating",
+                "cd_dep_count",
+                "cd_dep_employed_count",
+                "cd_dep_college_count",
+            ]
+        )
+        .agg(
+            [
+                # Cast -> Int64 to match DuckDB
+                # TODO: We should plan to make these optional
+                pl.len().alias("cnt1").cast(pl.Int64),
+                pl.len().alias("cnt2").cast(pl.Int64),
+                pl.len().alias("cnt3").cast(pl.Int64),
+                pl.len().alias("cnt4").cast(pl.Int64),
+                pl.len().alias("cnt5").cast(pl.Int64),
+                pl.len().alias("cnt6").cast(pl.Int64),
+            ]
+        )
+        .sort(
+            [
+                "cd_gender",
+                "cd_marital_status",
+                "cd_education_status",
+                "cd_purchase_estimate",
+                "cd_credit_rating",
+                "cd_dep_count",
+                "cd_dep_employed_count",
+                "cd_dep_college_count",
+            ],
+            nulls_last=True,
+        )
+        .limit(100)
+        .select(
+            [
+                "cd_gender",
+                "cd_marital_status",
+                "cd_education_status",
+                "cnt1",
+                "cd_purchase_estimate",
+                "cnt2",
+                "cd_credit_rating",
+                "cnt3",
+                "cd_dep_count",
+                "cnt4",
+                "cd_dep_employed_count",
+                "cnt5",
+                "cd_dep_college_count",
+                "cnt6",
+            ]
+        )
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q2.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q2.py
new file mode 100644
index 00000000000..76fe4c05571
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q2.py
@@ -0,0 +1,244 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 2."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 2."""
+    return """
+    WITH wscs
+         AS (SELECT sold_date_sk,
+                    sales_price
+             FROM   (SELECT ws_sold_date_sk    sold_date_sk,
+                            ws_ext_sales_price sales_price
+                     FROM   web_sales)
+             UNION ALL
+             (SELECT cs_sold_date_sk    sold_date_sk,
+                     cs_ext_sales_price sales_price
+              FROM   catalog_sales)),
+         wswscs
+         AS (SELECT d_week_seq,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Sunday' ) THEN sales_price
+                          ELSE NULL
+                        END) sun_sales,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Monday' ) THEN sales_price
+                          ELSE NULL
+                        END) mon_sales,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Tuesday' ) THEN sales_price
+                          ELSE NULL
+                        END) tue_sales,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Wednesday' ) THEN sales_price
+                          ELSE NULL
+                        END) wed_sales,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Thursday' ) THEN sales_price
+                          ELSE NULL
+                        END) thu_sales,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Friday' ) THEN sales_price
+                          ELSE NULL
+                        END) fri_sales,
+                    Sum(CASE
+                          WHEN ( d_day_name = 'Saturday' ) THEN sales_price
+                          ELSE NULL
+                        END) sat_sales
+             FROM   wscs,
+                    date_dim
+             WHERE  d_date_sk = sold_date_sk
+             GROUP  BY d_week_seq)
+    SELECT d_week_seq1,
+           Round(sun_sales1 / sun_sales2, 2),
+           Round(mon_sales1 / mon_sales2, 2),
+           Round(tue_sales1 / tue_sales2, 2),
+           Round(wed_sales1 / wed_sales2, 2),
+           Round(thu_sales1 / thu_sales2, 2),
+           Round(fri_sales1 / fri_sales2, 2),
+           Round(sat_sales1 / sat_sales2, 2)
+    FROM   (SELECT wswscs.d_week_seq d_week_seq1,
+                   sun_sales         sun_sales1,
+                   mon_sales         mon_sales1,
+                   tue_sales         tue_sales1,
+                   wed_sales         wed_sales1,
+                   thu_sales         thu_sales1,
+                   fri_sales         fri_sales1,
+                   sat_sales         sat_sales1
+            FROM   wswscs,
+                   date_dim
+            WHERE  date_dim.d_week_seq = wswscs.d_week_seq
+                   AND d_year = 1998) y,
+           (SELECT wswscs.d_week_seq d_week_seq2,
+                   sun_sales         sun_sales2,
+                   mon_sales         mon_sales2,
+                   tue_sales         tue_sales2,
+                   wed_sales         wed_sales2,
+                   thu_sales         thu_sales2,
+                   fri_sales         fri_sales2,
+                   sat_sales         sat_sales2
+            FROM   wswscs,
+                   date_dim
+            WHERE  date_dim.d_week_seq = wswscs.d_week_seq
+                   AND d_year = 1998 + 1) z
+    WHERE  d_week_seq1 = d_week_seq2 - 53
+    ORDER  BY d_week_seq1;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 2."""
+    # Load required tables
+    web_sales = get_data(run_config.dataset_path, "web_sales", run_config.suffix)
+    catalog_sales = get_data(
+        run_config.dataset_path, "catalog_sales", run_config.suffix
+    )
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    # Step 1: Create wscs CTE equivalent (union of web and catalog sales)
+    wscs = pl.concat(
+        [
+            web_sales.select(
+                [
+                    pl.col("ws_sold_date_sk").alias("sold_date_sk"),
+                    pl.col("ws_ext_sales_price").alias("sales_price"),
+                ]
+            ),
+            catalog_sales.select(
+                [
+                    pl.col("cs_sold_date_sk").alias("sold_date_sk"),
+                    pl.col("cs_ext_sales_price").alias("sales_price"),
+                ]
+            ),
+        ]
+    )
+    # Step 2: Create wswscs CTE equivalent (aggregate by week and day of week)
+    # First join with date_dim to get day names
+    wscs_with_dates = wscs.join(date_dim, left_on="sold_date_sk", right_on="d_date_sk")
+    # Create separate aggregations for each day to better control null handling
+    days = (
+        "Sunday",
+        "Monday",
+        "Tuesday",
+        "Wednesday",
+        "Thursday",
+        "Friday",
+        "Saturday",
+    )
+    day_cols = (
+        "sun_sales",
+        "mon_sales",
+        "tue_sales",
+        "wed_sales",
+        "thu_sales",
+        "fri_sales",
+        "sat_sales",
+    )
+    # Start with all week sequences
+    all_weeks = wscs_with_dates.select("d_week_seq").unique()
+    wswscs = all_weeks
+
+    wswscs = (
+        wscs_with_dates.with_columns(
+            [
+                pl.when(pl.col("d_day_name") == day)
+                .then(pl.col("sales_price"))
+                .otherwise(None)
+                .alias(name)
+                for day, name in zip(days, day_cols, strict=True)
+            ]
+        )
+        .group_by("d_week_seq")
+        .agg(
+            *(pl.col(name).sum().alias(name) for name in day_cols),
+            *(pl.col(name).count().alias(f"{name}_count") for name in day_cols),
+        )
+        .with_columns(
+            [
+                pl.when(pl.col(f"{name}_count") > 0)
+                .then(pl.col(name))
+                .otherwise(None)
+                .alias(name)
+                for name in day_cols
+            ]
+        )
+        .select(["d_week_seq", *day_cols])
+    )
+
+    # Step 3: Create year 1998 data (y subquery equivalent)
+    y_1998 = (
+        wswscs.join(date_dim, left_on="d_week_seq", right_on="d_week_seq")
+        .filter(pl.col("d_year") == 1998)
+        .select(
+            [
+                pl.col("d_week_seq").alias("d_week_seq1"),
+                pl.col("sun_sales").alias("sun_sales1"),
+                pl.col("mon_sales").alias("mon_sales1"),
+                pl.col("tue_sales").alias("tue_sales1"),
+                pl.col("wed_sales").alias("wed_sales1"),
+                pl.col("thu_sales").alias("thu_sales1"),
+                pl.col("fri_sales").alias("fri_sales1"),
+                pl.col("sat_sales").alias("sat_sales1"),
+            ]
+        )
+    )
+    # Step 4: Create year 1999 data (z subquery equivalent)
+    z_1999 = (
+        wswscs.join(date_dim, left_on="d_week_seq", right_on="d_week_seq")
+        .filter(pl.col("d_year") == 1999)
+        .select(
+            [
+                pl.col("d_week_seq").alias("d_week_seq2"),
+                pl.col("sun_sales").alias("sun_sales2"),
+                pl.col("mon_sales").alias("mon_sales2"),
+                pl.col("tue_sales").alias("tue_sales2"),
+                pl.col("wed_sales").alias("wed_sales2"),
+                pl.col("thu_sales").alias("thu_sales2"),
+                pl.col("fri_sales").alias("fri_sales2"),
+                pl.col("sat_sales").alias("sat_sales2"),
+            ]
+        )
+    )
+    # Step 5: Join the two years and calculate ratios
+    return (
+        y_1998.join(z_1999, left_on="d_week_seq1", right_on=pl.col("d_week_seq2") - 53)
+        .select(
+            [
+                pl.col("d_week_seq1"),
+                (pl.col("sun_sales1") / pl.col("sun_sales2"))
+                .round(2)
+                .alias("round((sun_sales1 / sun_sales2), 2)"),
+                (pl.col("mon_sales1") / pl.col("mon_sales2"))
+                .round(2)
+                .alias("round((mon_sales1 / mon_sales2), 2)"),
+                (pl.col("tue_sales1") / pl.col("tue_sales2"))
+                .round(2)
+                .alias("round((tue_sales1 / tue_sales2), 2)"),
+                (pl.col("wed_sales1") / pl.col("wed_sales2"))
+                .round(2)
+                .alias("round((wed_sales1 / wed_sales2), 2)"),
+                (pl.col("thu_sales1") / pl.col("thu_sales2"))
+                .round(2)
+                .alias("round((thu_sales1 / thu_sales2), 2)"),
+                (pl.col("fri_sales1") / pl.col("fri_sales2"))
+                .round(2)
+                .alias("round((fri_sales1 / fri_sales2), 2)"),
+                (pl.col("sat_sales1") / pl.col("sat_sales2"))
+                .round(2)
+                .alias("round((sat_sales1 / sat_sales2), 2)"),
+            ]
+        )
+        .sort("d_week_seq1")
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q3.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q3.py
new file mode 100644
index 00000000000..effb62d09ff
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q3.py
@@ -0,0 +1,65 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 3."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 3."""
+    return """
+    SELECT dt.d_year,
+                   item.i_brand_id          brand_id,
+                   item.i_brand             brand,
+                   Sum(ss_ext_discount_amt) sum_agg
+    FROM   date_dim dt,
+           store_sales,
+           item
+    WHERE  dt.d_date_sk = store_sales.ss_sold_date_sk
+           AND store_sales.ss_item_sk = item.i_item_sk
+           AND item.i_manufact_id = 427
+           AND dt.d_moy = 11
+    GROUP  BY dt.d_year,
+              item.i_brand,
+              item.i_brand_id
+    ORDER  BY dt.d_year,
+              sum_agg DESC,
+              brand_id
+    LIMIT 100;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 3."""
+    # Load required tables
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    item = get_data(run_config.dataset_path, "item", run_config.suffix)
+    # Execute the query following the SQL logic
+    return (
+        date_dim.join(store_sales, left_on="d_date_sk", right_on="ss_sold_date_sk")
+        .join(item, left_on="ss_item_sk", right_on="i_item_sk")
+        .filter((pl.col("i_manufact_id") == 427) & (pl.col("d_moy") == 11))
+        .group_by(["d_year", "i_brand", "i_brand_id"])
+        .agg([pl.col("ss_ext_discount_amt").sum().alias("sum_agg")])
+        .select(
+            [
+                pl.col("d_year"),
+                pl.col("i_brand_id").alias("brand_id"),
+                pl.col("i_brand").alias("brand"),
+                pl.col("sum_agg"),
+            ]
+        )
+        .sort(["d_year", "sum_agg", "brand_id"], descending=[False, True, False])
+        .limit(100)
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q4.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q4.py
new file mode 100644
index 00000000000..357eb260c77
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q4.py
@@ -0,0 +1,359 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 4."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 4."""
+    return """
+    WITH year_total
+         AS (SELECT c_customer_id                       customer_id,
+                    c_first_name                        customer_first_name,
+                    c_last_name                         customer_last_name,
+                    c_preferred_cust_flag               customer_preferred_cust_flag
+                    ,
+                    c_birth_country
+                    customer_birth_country,
+                    c_login                             customer_login,
+                    c_email_address                     customer_email_address,
+                    d_year                              dyear,
+                    Sum(( ( ss_ext_list_price - ss_ext_wholesale_cost
+                            - ss_ext_discount_amt
+                          )
+                          +
+                              ss_ext_sales_price ) / 2) year_total,
+                    's'                                 sale_type
+             FROM   customer,
+                    store_sales,
+                    date_dim
+             WHERE  c_customer_sk = ss_customer_sk
+                    AND ss_sold_date_sk = d_date_sk
+             GROUP  BY c_customer_id,
+                       c_first_name,
+                       c_last_name,
+                       c_preferred_cust_flag,
+                       c_birth_country,
+                       c_login,
+                       c_email_address,
+                       d_year
+             UNION ALL
+             SELECT c_customer_id                             customer_id,
+                    c_first_name                              customer_first_name,
+                    c_last_name                               customer_last_name,
+                    c_preferred_cust_flag
+                    customer_preferred_cust_flag,
+                    c_birth_country                           customer_birth_country
+                    ,
+                    c_login
+                    customer_login,
+                    c_email_address                           customer_email_address
+                    ,
+                    d_year                                    dyear
+                    ,
+                    Sum(( ( ( cs_ext_list_price
+                              - cs_ext_wholesale_cost
+                              - cs_ext_discount_amt
+                            ) +
+                                  cs_ext_sales_price ) / 2 )) year_total,
+                    'c'                                       sale_type
+             FROM   customer,
+                    catalog_sales,
+                    date_dim
+             WHERE  c_customer_sk = cs_bill_customer_sk
+                    AND cs_sold_date_sk = d_date_sk
+             GROUP  BY c_customer_id,
+                       c_first_name,
+                       c_last_name,
+                       c_preferred_cust_flag,
+                       c_birth_country,
+                       c_login,
+                       c_email_address,
+                       d_year
+             UNION ALL
+             SELECT c_customer_id                             customer_id,
+                    c_first_name                              customer_first_name,
+                    c_last_name                               customer_last_name,
+                    c_preferred_cust_flag
+                    customer_preferred_cust_flag,
+                    c_birth_country                           customer_birth_country
+                    ,
+                    c_login
+                    customer_login,
+                    c_email_address                           customer_email_address
+                    ,
+                    d_year                                    dyear
+                    ,
+                    Sum(( ( ( ws_ext_list_price
+                              - ws_ext_wholesale_cost
+                              - ws_ext_discount_amt
+                            ) +
+                                  ws_ext_sales_price ) / 2 )) year_total,
+                    'w'                                       sale_type
+             FROM   customer,
+                    web_sales,
+                    date_dim
+             WHERE  c_customer_sk = ws_bill_customer_sk
+                    AND ws_sold_date_sk = d_date_sk
+             GROUP  BY c_customer_id,
+                       c_first_name,
+                       c_last_name,
+                       c_preferred_cust_flag,
+                       c_birth_country,
+                       c_login,
+                       c_email_address,
+                       d_year)
+    SELECT t_s_secyear.customer_id,
+                   t_s_secyear.customer_first_name,
+                   t_s_secyear.customer_last_name,
+                   t_s_secyear.customer_preferred_cust_flag
+    FROM   year_total t_s_firstyear,
+           year_total t_s_secyear,
+           year_total t_c_firstyear,
+           year_total t_c_secyear,
+           year_total t_w_firstyear,
+           year_total t_w_secyear
+    WHERE  t_s_secyear.customer_id = t_s_firstyear.customer_id
+           AND t_s_firstyear.customer_id = t_c_secyear.customer_id
+           AND t_s_firstyear.customer_id = t_c_firstyear.customer_id
+           AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
+           AND t_s_firstyear.customer_id = t_w_secyear.customer_id
+           AND t_s_firstyear.sale_type = 's'
+           AND t_c_firstyear.sale_type = 'c'
+           AND t_w_firstyear.sale_type = 'w'
+           AND t_s_secyear.sale_type = 's'
+           AND t_c_secyear.sale_type = 'c'
+           AND t_w_secyear.sale_type = 'w'
+           AND t_s_firstyear.dyear = 2001
+           AND t_s_secyear.dyear = 2001 + 1
+           AND t_c_firstyear.dyear = 2001
+           AND t_c_secyear.dyear = 2001 + 1
+           AND t_w_firstyear.dyear = 2001
+           AND t_w_secyear.dyear = 2001 + 1
+           AND t_s_firstyear.year_total > 0
+           AND t_c_firstyear.year_total > 0
+           AND t_w_firstyear.year_total > 0
+           AND CASE
+                 WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total /
+                                                        t_c_firstyear.year_total
+                 ELSE NULL
+               END > CASE
+                       WHEN t_s_firstyear.year_total > 0 THEN
+                       t_s_secyear.year_total /
+                       t_s_firstyear.year_total
+                       ELSE NULL
+                     END
+           AND CASE
+                 WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total /
+                                                        t_c_firstyear.year_total
+                 ELSE NULL
+               END > CASE
+                       WHEN t_w_firstyear.year_total > 0 THEN
+                       t_w_secyear.year_total /
+                       t_w_firstyear.year_total
+                       ELSE NULL
+                     END
+    ORDER  BY t_s_secyear.customer_id,
+              t_s_secyear.customer_first_name,
+              t_s_secyear.customer_last_name,
+              t_s_secyear.customer_preferred_cust_flag
+    LIMIT 100;
+    """
+
+
+def build_sales_subquery(  # noqa: D103
+    sales_df: pl.LazyFrame,
+    date_df: pl.LazyFrame,
+    customer_df: pl.LazyFrame,
+    sold_date_key: str,
+    customer_key: str,
+    col_prefix: str,
+    *,
+    year_filter: bool = False,
+    include_customer_info: bool = False,
+) -> pl.LazyFrame:
+    profit_expr = (
+        (
+            pl.col(f"{col_prefix}ext_list_price")
+            - pl.col(f"{col_prefix}ext_wholesale_cost")
+            - pl.col(f"{col_prefix}ext_discount_amt")
+        )
+        + pl.col(f"{col_prefix}ext_sales_price")
+    ) / 2
+
+    df = (
+        sales_df.join(date_df, left_on=sold_date_key, right_on="d_date_sk")
+        .join(customer_df, left_on=customer_key, right_on="c_customer_sk")
+        .group_by(
+            [
+                "c_customer_id",
+                "c_first_name",
+                "c_last_name",
+                "c_preferred_cust_flag",
+                "c_birth_country",
+                "c_login",
+                "c_email_address",
+                "d_year",
+            ]
+        )
+        .agg(profit_expr.sum().alias("year_total"))
+    )
+
+    if year_filter:
+        df = df.filter(pl.col("year_total") > 0)
+
+    if include_customer_info:
+        return df.select(
+            [
+                pl.col("c_customer_id").alias("customer_id"),
+                pl.col("c_first_name").alias("customer_first_name"),
+                pl.col("c_last_name").alias("customer_last_name"),
+                pl.col("c_preferred_cust_flag").alias("customer_preferred_cust_flag"),
+                pl.col("year_total"),
+            ]
+        )
+    else:
+        return df.select(
+            [pl.col("c_customer_id").alias("customer_id"), pl.col("year_total")]
+        )
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 4."""
+    # Load required tables
+    customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    catalog_sales = get_data(
+        run_config.dataset_path, "catalog_sales", run_config.suffix
+    )
+    web_sales = get_data(run_config.dataset_path, "web_sales", run_config.suffix)
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    date_2001 = date_dim.filter(pl.col("d_year") == 2001)
+    date_2002 = date_dim.filter(pl.col("d_year") == 2002)
+
+    # Store sales - first year (2001)
+    t_s_firstyear = build_sales_subquery(
+        store_sales,
+        date_2001,
+        customer,
+        sold_date_key="ss_sold_date_sk",
+        customer_key="ss_customer_sk",
+        col_prefix="ss_",
+        year_filter=True,
+        include_customer_info=True,
+    )
+
+    # Store sales - second year (2002)
+    t_s_secyear = build_sales_subquery(
+        store_sales,
+        date_2002,
+        customer,
+        sold_date_key="ss_sold_date_sk",
+        customer_key="ss_customer_sk",
+        col_prefix="ss_",
+        year_filter=False,
+        include_customer_info=True,
+    )
+
+    # Catalog sales - first year (2001)
+    t_c_firstyear = build_sales_subquery(
+        catalog_sales,
+        date_2001,
+        customer,
+        sold_date_key="cs_sold_date_sk",
+        customer_key="cs_bill_customer_sk",
+        col_prefix="cs_",
+        year_filter=True,
+        include_customer_info=False,
+    )
+
+    # Catalog sales - first year (2002)
+    t_c_secyear = build_sales_subquery(
+        catalog_sales,
+        date_2002,
+        customer,
+        sold_date_key="cs_sold_date_sk",
+        customer_key="cs_bill_customer_sk",
+        col_prefix="cs_",
+        year_filter=False,
+        include_customer_info=False,
+    )
+
+    # Web sales - first year (2001)
+    t_w_firstyear = build_sales_subquery(
+        web_sales,
+        date_2001,
+        customer,
+        sold_date_key="ws_sold_date_sk",
+        customer_key="ws_bill_customer_sk",
+        col_prefix="ws_",
+        year_filter=True,
+        include_customer_info=False,
+    )
+
+    # Web sales - first year (2001)
+    t_w_secyear = build_sales_subquery(
+        web_sales,
+        date_2002,
+        customer,
+        sold_date_key="ws_sold_date_sk",
+        customer_key="ws_bill_customer_sk",
+        col_prefix="ws_",
+        year_filter=False,
+        include_customer_info=False,
+    )
+
+    # Perform the joins and filtering
+    sort_cols = [
+        "customer_id",
+        "customer_first_name",
+        "customer_last_name",
+        "customer_preferred_cust_flag",
+    ]
+    return (
+        t_s_secyear.join(t_s_firstyear, on="customer_id", suffix="_sf", how="inner")
+        .join(t_c_firstyear, on="customer_id", suffix="_cf", how="inner")
+        .join(t_c_secyear, on="customer_id", suffix="_cs", how="inner")
+        .join(t_w_firstyear, on="customer_id", suffix="_wf", how="inner")
+        .join(t_w_secyear, on="customer_id", suffix="_ws", how="inner")
+        .filter(
+            # All first year totals must be > 0
+            (pl.col("year_total_sf") > 0)
+            & (pl.col("year_total_cf") > 0)
+            & (pl.col("year_total_wf") > 0)
+            &
+            # Catalog growth rate > Store growth rate
+            (
+                pl.when(pl.col("year_total_cf") > 0)
+                .then(pl.col("year_total_cs") / pl.col("year_total_cf"))
+                .otherwise(None)
+                > pl.when(pl.col("year_total_sf") > 0)
+                .then(pl.col("year_total") / pl.col("year_total_sf"))
+                .otherwise(None)
+            )
+            &
+            # Catalog growth rate > Web growth rate
+            (
+                pl.when(pl.col("year_total_cf") > 0)
+                .then(pl.col("year_total_cs") / pl.col("year_total_cf"))
+                .otherwise(None)
+                > pl.when(pl.col("year_total_wf") > 0)
+                .then(pl.col("year_total_ws") / pl.col("year_total_wf"))
+                .otherwise(None)
+            )
+        )
+        .select(sort_cols)
+        .sort(sort_cols)
+        .limit(100)
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q5.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q5.py
new file mode 100644
index 00000000000..f03d9e04b9f
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q5.py
@@ -0,0 +1,462 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 5."""
+
+from __future__ import annotations
+
+from datetime import date, timedelta
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 5."""
+    return """
+    WITH ssr AS
+    (
+             SELECT   s_store_id,
+                      Sum(sales_price) AS sales,
+                      Sum(profit)      AS profit,
+                      Sum(return_amt)  AS returns1,
+                      Sum(net_loss)    AS profit_loss
+             FROM     (
+                             SELECT ss_store_sk             AS store_sk,
+                                    ss_sold_date_sk         AS date_sk,
+                                    ss_ext_sales_price      AS sales_price,
+                                    ss_net_profit           AS profit,
+                                    Cast(0 AS DECIMAL(7,2)) AS return_amt,
+                                    Cast(0 AS DECIMAL(7,2)) AS net_loss
+                             FROM   store_sales
+                             UNION ALL
+                             SELECT sr_store_sk             AS store_sk,
+                                    sr_returned_date_sk     AS date_sk,
+                                    Cast(0 AS DECIMAL(7,2)) AS sales_price,
+                                    Cast(0 AS DECIMAL(7,2)) AS profit,
+                                    sr_return_amt           AS return_amt,
+                                    sr_net_loss             AS net_loss
+                             FROM   store_returns ) salesreturns,
+                      date_dim,
+                      store
+             WHERE    date_sk = d_date_sk
+             AND      d_date BETWEEN Cast('2002-08-22' AS DATE) AND      (
+                               Cast('2002-08-22' AS DATE) + INTERVAL '14' day)
+             AND      store_sk = s_store_sk
+             GROUP BY s_store_id) , csr AS
+    (
+             SELECT   cp_catalog_page_id,
+                      sum(sales_price) AS sales,
+                      sum(profit)      AS profit,
+                      sum(return_amt)  AS returns1,
+                      sum(net_loss)    AS profit_loss
+             FROM     (
+                             SELECT cs_catalog_page_sk      AS page_sk,
+                                    cs_sold_date_sk         AS date_sk,
+                                    cs_ext_sales_price      AS sales_price,
+                                    cs_net_profit           AS profit,
+                                    cast(0 AS decimal(7,2)) AS return_amt,
+                                    cast(0 AS decimal(7,2)) AS net_loss
+                             FROM   catalog_sales
+                             UNION ALL
+                             SELECT cr_catalog_page_sk      AS page_sk,
+                                    cr_returned_date_sk     AS date_sk,
+                                    cast(0 AS decimal(7,2)) AS sales_price,
+                                    cast(0 AS decimal(7,2)) AS profit,
+                                    cr_return_amount        AS return_amt,
+                                    cr_net_loss             AS net_loss
+                             FROM   catalog_returns ) salesreturns,
+                      date_dim,
+                      catalog_page
+             WHERE    date_sk = d_date_sk
+             AND      d_date BETWEEN cast('2002-08-22' AS date) AND      (
+                               cast('2002-08-22' AS date) + INTERVAL '14' day)
+             AND      page_sk = cp_catalog_page_sk
+             GROUP BY cp_catalog_page_id) , wsr AS
+    (
+             SELECT   web_site_id,
+                      sum(sales_price) AS sales,
+                      sum(profit)      AS profit,
+                      sum(return_amt)  AS returns1,
+                      sum(net_loss)    AS profit_loss
+             FROM     (
+                             SELECT ws_web_site_sk          AS wsr_web_site_sk,
+                                    ws_sold_date_sk         AS date_sk,
+                                    ws_ext_sales_price      AS sales_price,
+                                    ws_net_profit           AS profit,
+                                    cast(0 AS decimal(7,2)) AS return_amt,
+                                    cast(0 AS decimal(7,2)) AS net_loss
+                             FROM   web_sales
+                             UNION ALL
+                             SELECT          ws_web_site_sk          AS wsr_web_site_sk,
+                                             wr_returned_date_sk     AS date_sk,
+                                             cast(0 AS decimal(7,2)) AS sales_price,
+                                             cast(0 AS decimal(7,2)) AS profit,
+                                             wr_return_amt           AS return_amt,
+                                             wr_net_loss             AS net_loss
+                             FROM            web_returns
+                             LEFT OUTER JOIN web_sales
+                             ON              (
+                                                             wr_item_sk = ws_item_sk
+                                             AND             wr_order_number = ws_order_number) ) salesreturns,
+                      date_dim,
+                      web_site
+             WHERE    date_sk = d_date_sk
+             AND      d_date BETWEEN cast('2002-08-22' AS date) AND      (
+                               cast('2002-08-22' AS date) + INTERVAL '14' day)
+             AND      wsr_web_site_sk = web_site_sk
+             GROUP BY web_site_id)
+    SELECT
+             channel ,
+             id ,
+             sum(sales)   AS sales ,
+             sum(returns1) AS returns1 ,
+             sum(profit)  AS profit
+    FROM     (
+                    SELECT 'store channel' AS channel ,
+                           'store'
+                                  || s_store_id AS id ,
+                           sales ,
+                           returns1 ,
+                           (profit - profit_loss) AS profit
+                    FROM   ssr
+                    UNION ALL
+                    SELECT 'catalog channel' AS channel ,
+                           'catalog_page'
+                                  || cp_catalog_page_id AS id ,
+                           sales ,
+                           returns1 ,
+                           (profit - profit_loss) AS profit
+                    FROM   csr
+                    UNION ALL
+                    SELECT 'web channel' AS channel ,
+                           'web_site'
+                                  || web_site_id AS id ,
+                           sales ,
+                           returns1 ,
+                           (profit - profit_loss) AS profit
+                    FROM   wsr ) x
+    GROUP BY rollup (channel, id)
+    ORDER BY channel ,
+             id
+    LIMIT 100;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 5."""
+    # Load required tables
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    store_returns = get_data(
+        run_config.dataset_path, "store_returns", run_config.suffix
+    )
+    catalog_sales = get_data(
+        run_config.dataset_path, "catalog_sales", run_config.suffix
+    )
+    catalog_returns = get_data(
+        run_config.dataset_path, "catalog_returns", run_config.suffix
+    )
+    web_sales = get_data(run_config.dataset_path, "web_sales", run_config.suffix)
+    web_returns = get_data(run_config.dataset_path, "web_returns", run_config.suffix)
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    store = get_data(run_config.dataset_path, "store", run_config.suffix)
+    catalog_page = get_data(run_config.dataset_path, "catalog_page", run_config.suffix)
+    web_site = get_data(run_config.dataset_path, "web_site", run_config.suffix)
+
+    # Date range filter - use actual date values
+    start_date = date(2002, 8, 22)
+    end_date = start_date + timedelta(days=14)
+
+    # Step 1: Create ssr CTE (Store Sales and Returns)
+    # Filter sales and returns by date first, then transform
+    store_sales_data = (
+        store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
+        .filter(pl.col("d_date").is_between(start_date, end_date, closed="both"))
+        .select(
+            [
+                pl.col("ss_store_sk").alias("store_sk"),
+                pl.col("ss_sold_date_sk").alias("date_sk"),
+                pl.col("ss_ext_sales_price").alias("sales_price"),
+                pl.col("ss_net_profit").alias("profit"),
+                pl.lit(0.0).alias("return_amt"),
+                pl.lit(0.0).alias("net_loss"),
+            ]
+        )
+    )
+    store_returns_data = (
+        store_returns.join(
+            date_dim, left_on="sr_returned_date_sk", right_on="d_date_sk"
+        )
+        .filter(pl.col("d_date").is_between(start_date, end_date, closed="both"))
+        .select(
+            [
+                pl.col("sr_store_sk").alias("store_sk"),
+                pl.col("sr_returned_date_sk").alias("date_sk"),
+                pl.lit(0.0).alias("sales_price"),
+                pl.lit(0.0).alias("profit"),
+                pl.col("sr_return_amt").alias("return_amt"),
+                pl.col("sr_net_loss").alias("net_loss"),
+            ]
+        )
+    )
+    store_salesreturns = pl.concat([store_sales_data, store_returns_data])
+    ssr = (
+        store_salesreturns.join(store, left_on="store_sk", right_on="s_store_sk")
+        .group_by("s_store_id")
+        .agg(
+            [
+                pl.col("sales_price").sum().alias("sales"),
+                pl.col("sales_price").count().alias("sales_count"),
+                pl.col("profit").sum().alias("profit"),
+                pl.col("profit").count().alias("profit_count"),
+                pl.col("return_amt").sum().alias("returns1"),
+                pl.col("return_amt").count().alias("returns1_count"),
+                pl.col("net_loss").sum().alias("profit_loss"),
+                pl.col("net_loss").count().alias("profit_loss_count"),
+            ]
+        )
+        .with_columns(
+            [
+                pl.when(pl.col("sales_count") > 0)
+                .then(pl.col("sales"))
+                .otherwise(None)
+                .alias("sales"),
+                pl.when(pl.col("profit_count") > 0)
+                .then(pl.col("profit"))
+                .otherwise(None)
+                .alias("profit"),
+                pl.when(pl.col("returns1_count") > 0)
+                .then(pl.col("returns1"))
+                .otherwise(None)
+                .alias("returns1"),
+                pl.when(pl.col("profit_loss_count") > 0)
+                .then(pl.col("profit_loss"))
+                .otherwise(None)
+                .alias("profit_loss"),
+            ]
+        )
+        .drop(["sales_count", "profit_count", "returns1_count", "profit_loss_count"])
+    )
+
+    # Step 2: Create csr CTE (Catalog Sales and Returns)
+    # Filter sales and returns by date first, then transform
+    catalog_sales_data = (
+        catalog_sales.join(date_dim, left_on="cs_sold_date_sk", right_on="d_date_sk")
+        .filter(pl.col("d_date").is_between(start_date, end_date, closed="both"))
+        .select(
+            [
+                pl.col("cs_catalog_page_sk").alias("page_sk"),
+                pl.col("cs_sold_date_sk").alias("date_sk"),
+                pl.col("cs_ext_sales_price").alias("sales_price"),
+                pl.col("cs_net_profit").alias("profit"),
+                pl.lit(0.0).alias("return_amt"),
+                pl.lit(0.0).alias("net_loss"),
+            ]
+        )
+    )
+    catalog_returns_data = (
+        catalog_returns.join(
+            date_dim, left_on="cr_returned_date_sk", right_on="d_date_sk"
+        )
+        .filter(pl.col("d_date").is_between(start_date, end_date, closed="both"))
+        .select(
+            [
+                pl.col("cr_catalog_page_sk").alias("page_sk"),
+                pl.col("cr_returned_date_sk").alias("date_sk"),
+                pl.lit(0.0).alias("sales_price"),
+                pl.lit(0.0).alias("profit"),
+                pl.col("cr_return_amount").alias("return_amt"),
+                pl.col("cr_net_loss").alias("net_loss"),
+            ]
+        )
+    )
+    catalog_salesreturns = pl.concat([catalog_sales_data, catalog_returns_data])
+    csr = (
+        catalog_salesreturns.join(
+            catalog_page, left_on="page_sk", right_on="cp_catalog_page_sk"
+        )
+        .group_by("cp_catalog_page_id")
+        .agg(
+            [
+                pl.col("sales_price").sum().alias("sales"),
+                pl.col("sales_price").count().alias("sales_count"),
+                pl.col("profit").sum().alias("profit"),
+                pl.col("profit").count().alias("profit_count"),
+                pl.col("return_amt").sum().alias("returns1"),
+                pl.col("return_amt").count().alias("returns1_count"),
+                pl.col("net_loss").sum().alias("profit_loss"),
+                pl.col("net_loss").count().alias("profit_loss_count"),
+            ]
+        )
+        .with_columns(
+            [
+                pl.when(pl.col("sales_count") > 0)
+                .then(pl.col("sales"))
+                .otherwise(None)
+                .alias("sales"),
+                pl.when(pl.col("profit_count") > 0)
+                .then(pl.col("profit"))
+                .otherwise(None)
+                .alias("profit"),
+                pl.when(pl.col("returns1_count") > 0)
+                .then(pl.col("returns1"))
+                .otherwise(None)
+                .alias("returns1"),
+                pl.when(pl.col("profit_loss_count") > 0)
+                .then(pl.col("profit_loss"))
+                .otherwise(None)
+                .alias("profit_loss"),
+            ]
+        )
+        .drop(["sales_count", "profit_count", "returns1_count", "profit_loss_count"])
+    )
+
+    # Step 3: Create wsr CTE (Web Sales and Returns)
+    # Filter sales and returns by date first, then transform
+    web_sales_data = (
+        web_sales.join(date_dim, left_on="ws_sold_date_sk", right_on="d_date_sk")
+        .filter(pl.col("d_date").is_between(start_date, end_date, closed="both"))
+        .select(
+            [
+                pl.col("ws_web_site_sk").alias("wsr_web_site_sk"),
+                pl.col("ws_sold_date_sk").alias("date_sk"),
+                pl.col("ws_ext_sales_price").alias("sales_price"),
+                pl.col("ws_net_profit").alias("profit"),
+                pl.lit(0.0).alias("return_amt"),
+                pl.lit(0.0).alias("net_loss"),
+            ]
+        )
+    )
+    # For web returns, we need the LEFT OUTER JOIN with web_sales, then filter by date
+    web_returns_data = (
+        web_returns.join(date_dim, left_on="wr_returned_date_sk", right_on="d_date_sk")
+        .filter(pl.col("d_date").is_between(start_date, end_date, closed="both"))
+        .join(
+            web_sales.select(["ws_item_sk", "ws_order_number", "ws_web_site_sk"]),
+            left_on=["wr_item_sk", "wr_order_number"],
+            right_on=["ws_item_sk", "ws_order_number"],
+            how="left",
+        )
+        .select(
+            [
+                pl.col("ws_web_site_sk").alias("wsr_web_site_sk"),
+                pl.col("wr_returned_date_sk").alias("date_sk"),
+                pl.lit(0.0).alias("sales_price"),
+                pl.lit(0.0).alias("profit"),
+                pl.col("wr_return_amt").alias("return_amt"),
+                pl.col("wr_net_loss").alias("net_loss"),
+            ]
+        )
+    )
+    web_salesreturns = pl.concat([web_sales_data, web_returns_data])
+    wsr = (
+        web_salesreturns.join(
+            web_site, left_on="wsr_web_site_sk", right_on="web_site_sk"
+        )
+        .group_by("web_site_id")
+        .agg(
+            [
+                pl.col("sales_price").sum().alias("sales"),
+                pl.col("sales_price").count().alias("sales_count"),
+                pl.col("profit").sum().alias("profit"),
+                pl.col("profit").count().alias("profit_count"),
+                pl.col("return_amt").sum().alias("returns1"),
+                pl.col("return_amt").count().alias("returns1_count"),
+                pl.col("net_loss").sum().alias("profit_loss"),
+                pl.col("net_loss").count().alias("profit_loss_count"),
+            ]
+        )
+        .with_columns(
+            [
+                pl.when(pl.col("sales_count") > 0)
+                .then(pl.col("sales"))
+                .otherwise(None)
+                .alias("sales"),
+                pl.when(pl.col("profit_count") > 0)
+                .then(pl.col("profit"))
+                .otherwise(None)
+                .alias("profit"),
+                pl.when(pl.col("returns1_count") > 0)
+                .then(pl.col("returns1"))
+                .otherwise(None)
+                .alias("returns1"),
+                pl.when(pl.col("profit_loss_count") > 0)
+                .then(pl.col("profit_loss"))
+                .otherwise(None)
+                .alias("profit_loss"),
+            ]
+        )
+        .drop(["sales_count", "profit_count", "returns1_count", "profit_loss_count"])
+    )
+
+    # Step 4: Create the union of all channels
+    store_channel = ssr.select(
+        [
+            pl.lit("store channel").alias("channel"),
+            (pl.lit("store") + pl.col("s_store_id").cast(pl.Utf8)).alias("id"),
+            pl.col("sales"),
+            pl.col("returns1"),
+            (pl.col("profit") - pl.col("profit_loss")).alias("profit"),
+        ]
+    )
+    catalog_channel = csr.select(
+        [
+            pl.lit("catalog channel").alias("channel"),
+            (pl.lit("catalog_page") + pl.col("cp_catalog_page_id").cast(pl.Utf8)).alias(
+                "id"
+            ),
+            pl.col("sales"),
+            pl.col("returns1"),
+            (pl.col("profit") - pl.col("profit_loss")).alias("profit"),
+        ]
+    )
+    web_channel = wsr.select(
+        [
+            pl.lit("web channel").alias("channel"),
+            (pl.lit("web_site") + pl.col("web_site_id").cast(pl.Utf8)).alias("id"),
+            pl.col("sales"),
+            pl.col("returns1"),
+            (pl.col("profit") - pl.col("profit_loss")).alias("profit"),
+        ]
+    )
+    all_channels = pl.concat([store_channel, catalog_channel, web_channel])
+
+    # Step 5: Group by channel and id (filter out NULL rollup rows)
+    return (
+        all_channels.group_by(["channel", "id"])
+        .agg(
+            [
+                pl.col("sales").sum().alias("sales"),
+                pl.col("sales").count().alias("sales_count"),
+                pl.col("returns1").sum().alias("returns1"),
+                pl.col("returns1").count().alias("returns1_count"),
+                pl.col("profit").sum().alias("profit"),
+                pl.col("profit").count().alias("profit_count"),
+            ]
+        )
+        .with_columns(
+            [
+                pl.when(pl.col("sales_count") > 0)
+                .then(pl.col("sales"))
+                .otherwise(None)
+                .alias("sales"),
+                pl.when(pl.col("returns1_count") > 0)
+                .then(pl.col("returns1"))
+                .otherwise(None)
+                .alias("returns1"),
+                pl.when(pl.col("profit_count") > 0)
+                .then(pl.col("profit"))
+                .otherwise(None)
+                .alias("profit"),
+            ]
+        )
+        .drop(["sales_count", "returns1_count", "profit_count"])
+        .filter(pl.col("channel").is_not_null() & pl.col("id").is_not_null())
+        .sort(["channel", "id"])
+        .limit(100)
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q6.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q6.py
new file mode 100644
index 00000000000..b597e7582a2
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q6.py
@@ -0,0 +1,92 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 6."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 6."""
+    return """
+    SELECT a.ca_state state,
+                   Count(*)   cnt
+    FROM   customer_address a,
+           customer c,
+           store_sales s,
+           date_dim d,
+           item i
+    WHERE  a.ca_address_sk = c.c_current_addr_sk
+           AND c.c_customer_sk = s.ss_customer_sk
+           AND s.ss_sold_date_sk = d.d_date_sk
+           AND s.ss_item_sk = i.i_item_sk
+           AND d.d_month_seq = (SELECT DISTINCT ( d_month_seq )
+                                FROM   date_dim
+                                WHERE  d_year = 1998
+                                       AND d_moy = 7)
+           AND i.i_current_price > 1.2 * (SELECT Avg(j.i_current_price)
+                                          FROM   item j
+                                          WHERE  j.i_category = i.i_category)
+    GROUP  BY a.ca_state
+    HAVING Count(*) >= 10
+    --ORDER  BY cnt
+    ORDER BY cnt, state
+    LIMIT 100;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 6."""
+    # Load required tables
+    customer_address = get_data(
+        run_config.dataset_path, "customer_address", run_config.suffix
+    )
+    customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    item = get_data(run_config.dataset_path, "item", run_config.suffix)
+
+    # Subquery 1: d_month_seq values for July 1998
+    target_month_seq_table = (
+        date_dim.filter((pl.col("d_year") == 1998) & (pl.col("d_moy") == 7))
+        .select("d_month_seq")
+        .unique()
+    )
+
+    # Subquery 2: Calculate average price per category
+    avg_price_per_category = item.group_by("i_category").agg(
+        pl.col("i_current_price").mean().alias("avg_price")
+    )
+
+    return (
+        customer_address.join(
+            customer, left_on="ca_address_sk", right_on="c_current_addr_sk"
+        )
+        .join(store_sales, left_on="c_customer_sk", right_on="ss_customer_sk")
+        .join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
+        .join(item, left_on="ss_item_sk", right_on="i_item_sk")
+        .join(avg_price_per_category, on="i_category")
+        .join(target_month_seq_table, on="d_month_seq", how="semi")
+        .filter(pl.col("i_current_price") > 1.2 * pl.col("avg_price"))
+        .group_by("ca_state")
+        .agg(pl.len().alias("cnt"))
+        .filter(pl.col("cnt") >= 10)
+        .sort(["cnt", "ca_state"], nulls_last=True)
+        .limit(100)
+        .select(
+            [
+                pl.col("ca_state").alias("state"),
+                # Cast -> Int64 to match DuckDB
+                pl.col("cnt").cast(pl.Int64),
+            ]
+        )
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q7.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q7.py
new file mode 100644
index 00000000000..7efef3cbb14
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q7.py
@@ -0,0 +1,79 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 7."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 7."""
+    return """
+    SELECT i_item_id,
+                   Avg(ss_quantity)    agg1,
+                   Avg(ss_list_price)  agg2,
+                   Avg(ss_coupon_amt)  agg3,
+                   Avg(ss_sales_price) agg4
+    FROM   store_sales,
+           customer_demographics,
+           date_dim,
+           item,
+           promotion
+    WHERE  ss_sold_date_sk = d_date_sk
+           AND ss_item_sk = i_item_sk
+           AND ss_cdemo_sk = cd_demo_sk
+           AND ss_promo_sk = p_promo_sk
+           AND cd_gender = 'F'
+           AND cd_marital_status = 'W'
+           AND cd_education_status = '2 yr Degree'
+           AND ( p_channel_email = 'N'
+                  OR p_channel_event = 'N' )
+           AND d_year = 1998
+    GROUP  BY i_item_id
+    ORDER  BY i_item_id
+    LIMIT 100;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 7."""
+    # Load required tables
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    customer_demographics = get_data(
+        run_config.dataset_path, "customer_demographics", run_config.suffix
+    )
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    item = get_data(run_config.dataset_path, "item", run_config.suffix)
+    promotion = get_data(run_config.dataset_path, "promotion", run_config.suffix)
+
+    return (
+        store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
+        .join(item, left_on="ss_item_sk", right_on="i_item_sk")
+        .join(customer_demographics, left_on="ss_cdemo_sk", right_on="cd_demo_sk")
+        .join(promotion, left_on="ss_promo_sk", right_on="p_promo_sk")
+        .filter(pl.col("cd_gender") == "F")
+        .filter(pl.col("cd_marital_status") == "W")
+        .filter(pl.col("cd_education_status") == "2 yr Degree")
+        .filter((pl.col("p_channel_email") == "N") | (pl.col("p_channel_event") == "N"))
+        .filter(pl.col("d_year") == 1998)
+        .group_by("i_item_id")
+        .agg(
+            [
+                pl.col("ss_quantity").mean().alias("agg1"),
+                pl.col("ss_list_price").mean().alias("agg2"),
+                pl.col("ss_coupon_amt").mean().alias("agg3"),
+                pl.col("ss_sales_price").mean().alias("agg4"),
+            ]
+        )
+        .sort("i_item_id", nulls_last=True)
+        .limit(100)
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q8.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q8.py
new file mode 100644
index 00000000000..5a06a654ff8
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q8.py
@@ -0,0 +1,524 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 8."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+"""
+warning!, one filter removed to prevent zero row results
+
+note: alternate zip code
+  '70069',  # 93 preferred customers
+  '60069',  # 87 preferred customers
+  '78877',  # 87 preferred customers
+  '60169',  # 87 preferred customers
+  '68252',  # 86 preferred customers
+  '71087',  # 84 preferred customers
+  '71711',  # 84 preferred customers
+  '68877',  # 84 preferred customers
+  '55709',  # 82 preferred customers
+"""
+
+TARGET_YEAR = 1996
+TARGET_QUARTER = 2
+TARGET_ZIPS = [
+    "67436",
+    "26121",
+    "38443",
+    "63157",
+    "68856",
+    "19485",
+    "86425",
+    "26741",
+    "70991",
+    "60899",
+    "63573",
+    "47556",
+    "56193",
+    "93314",
+    "87827",
+    "62017",
+    "85067",
+    "95390",
+    "48091",
+    "10261",
+    "81845",
+    "41790",
+    "42853",
+    "24675",
+    "12840",
+    "60065",
+    "84430",
+    "57451",
+    "24021",
+    "91735",
+    "75335",
+    "71935",
+    "34482",
+    "56943",
+    "70695",
+    "52147",
+    "56251",
+    "28411",
+    "86653",
+    "23005",
+    "22478",
+    "29031",
+    "34398",
+    "15365",
+    "42460",
+    "33337",
+    "59433",
+    "73943",
+    "72477",
+    "74081",
+    "74430",
+    "64605",
+    "39006",
+    "11226",
+    "49057",
+    "97308",
+    "42663",
+    "18187",
+    "19768",
+    "43454",
+    "32147",
+    "76637",
+    "51975",
+    "11181",
+    "45630",
+    "33129",
+    "45995",
+    "64386",
+    "55522",
+    "26697",
+    "20963",
+    "35154",
+    "64587",
+    "49752",
+    "66386",
+    "30586",
+    "59286",
+    "13177",
+    "66646",
+    "84195",
+    "74316",
+    "36853",
+    "32927",
+    "12469",
+    "11904",
+    "36269",
+    "17724",
+    "55346",
+    "12595",
+    "53988",
+    "65439",
+    "28015",
+    "63268",
+    "73590",
+    "29216",
+    "82575",
+    "69267",
+    "13805",
+    "91678",
+    "79460",
+    "94152",
+    "14961",
+    "15419",
+    "48277",
+    "62588",
+    "55493",
+    "28360",
+    "14152",
+    "55225",
+    "18007",
+    "53705",
+    "56573",
+    "80245",
+    "71769",
+    "57348",
+    "36845",
+    "13039",
+    "17270",
+    "22363",
+    "83474",
+    "25294",
+    "43269",
+    "77666",
+    "15488",
+    "99146",
+    "64441",
+    "43338",
+    "38736",
+    "62754",
+    "48556",
+    "86057",
+    "23090",
+    "38114",
+    "66061",
+    "18910",
+    "84385",
+    "23600",
+    "19975",
+    "27883",
+    "65719",
+    "19933",
+    "32085",
+    "49731",
+    "40473",
+    "27190",
+    "46192",
+    "23949",
+    "44738",
+    "12436",
+    "64794",
+    "68741",
+    "15333",
+    "24282",
+    "49085",
+    "31844",
+    "71156",
+    "48441",
+    "17100",
+    "98207",
+    "44982",
+    "20277",
+    "71496",
+    "96299",
+    "37583",
+    "22206",
+    "89174",
+    "30589",
+    "61924",
+    "53079",
+    "10976",
+    "13104",
+    "42794",
+    "54772",
+    "15809",
+    "56434",
+    "39975",
+    "13874",
+    "30753",
+    "77598",
+    "78229",
+    "59478",
+    "12345",
+    "55547",
+    "57422",
+    "42600",
+    "79444",
+    "29074",
+    "29752",
+    "21676",
+    "32096",
+    "43044",
+    "39383",
+    "37296",
+    "36295",
+    "63077",
+    "16572",
+    "31275",
+    "18701",
+    "40197",
+    "48242",
+    "27219",
+    "49865",
+    "84175",
+    "30446",
+    "25165",
+    "13807",
+    "72142",
+    "70499",
+    "70464",
+    "71429",
+    "18111",
+    "70857",
+    "29545",
+    "36425",
+    "52706",
+    "36194",
+    "42963",
+    "75068",
+    "47921",
+    "74763",
+    "90990",
+    "89456",
+    "62073",
+    "88397",
+    "73963",
+    "75885",
+    "62657",
+    "12530",
+    "81146",
+    "57434",
+    "25099",
+    "41429",
+    "98441",
+    "48713",
+    "52552",
+    "31667",
+    "14072",
+    "13903",
+    "44709",
+    "85429",
+    "58017",
+    "38295",
+    "44875",
+    "73541",
+    "30091",
+    "12707",
+    "23762",
+    "62258",
+    "33247",
+    "78722",
+    "77431",
+    "14510",
+    "35656",
+    "72428",
+    "92082",
+    "35267",
+    "43759",
+    "24354",
+    "90952",
+    "11512",
+    "21242",
+    "22579",
+    "56114",
+    "32339",
+    "52282",
+    "41791",
+    "24484",
+    "95020",
+    "28408",
+    "99710",
+    "11899",
+    "43344",
+    "72915",
+    "27644",
+    "62708",
+    "74479",
+    "17177",
+    "32619",
+    "12351",
+    "91339",
+    "31169",
+    "57081",
+    "53522",
+    "16712",
+    "34419",
+    "71779",
+    "44187",
+    "46206",
+    "96099",
+    "61910",
+    "53664",
+    "12295",
+    "31837",
+    "33096",
+    "10813",
+    "63048",
+    "31732",
+    "79118",
+    "73084",
+    "72783",
+    "84952",
+    "46965",
+    "77956",
+    "39815",
+    "32311",
+    "75329",
+    "48156",
+    "30826",
+    "49661",
+    "13736",
+    "92076",
+    "74865",
+    "88149",
+    "92397",
+    "52777",
+    "68453",
+    "32012",
+    "21222",
+    "52721",
+    "24626",
+    "18210",
+    "42177",
+    "91791",
+    "75251",
+    "82075",
+    "44372",
+    "45542",
+    "20609",
+    "60115",
+    "17362",
+    "22750",
+    "90434",
+    "31852",
+    "54071",
+    "33762",
+    "14705",
+    "40718",
+    "56433",
+    "30996",
+    "40657",
+    "49056",
+    "23585",
+    "66455",
+    "41021",
+    "74736",
+    "72151",
+    "37007",
+    "21729",
+    "60177",
+    "84558",
+    "59027",
+    "93855",
+    "60022",
+    "86443",
+    "19541",
+    "86886",
+    "30532",
+    "39062",
+    "48532",
+    "34713",
+    "52077",
+    "22564",
+    "64638",
+    "15273",
+    "31677",
+    "36138",
+    "62367",
+    "60261",
+    "80213",
+    "42818",
+    "25113",
+    "72378",
+    "69802",
+    "69096",
+    "55443",
+    "28820",
+    "13848",
+    "78258",
+    "37490",
+    "30556",
+    "77380",
+    "28447",
+    "44550",
+    "26791",
+    "70609",
+    "82182",
+    "33306",
+    "43224",
+    "22322",
+    "86959",
+    "68519",
+    "14308",
+    "46501",
+    "81131",
+    "34056",
+    "61991",
+    "19896",
+    "87804",
+    "65774",
+    "92564",
+]
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 8."""
+    return f"""
+    -- start query 8 in stream 0 using template query8.tpl
+    SELECT s_store_name,
+                Sum(ss_net_profit)
+    FROM   store_sales,
+        date_dim,
+        store,
+        (SELECT ca_zip
+            FROM   (SELECT Substr(ca_zip, 1, 5) ca_zip
+                    FROM   customer_address
+                    WHERE  Substr(ca_zip, 1, 5) IN ({", ".join(f"'{zip}'" for zip in TARGET_ZIPS)})
+                    INTERSECT
+                    SELECT ca_zip
+                    FROM   (SELECT Substr(ca_zip, 1, 5) ca_zip,
+                                Count(*)             cnt
+                            FROM   customer_address,
+                                customer
+                            WHERE  ca_address_sk = c_current_addr_sk
+                                AND c_preferred_cust_flag = 'Y'
+                            GROUP  BY ca_zip
+                            HAVING Count(*) > 10)A1)A2) V1
+    WHERE  ss_store_sk = s_store_sk
+        AND ss_sold_date_sk = d_date_sk
+        AND d_qoy = {TARGET_QUARTER}
+        AND d_year = {TARGET_YEAR}
+        AND ( Substr(s_zip, 1, 2) = Substr(V1.ca_zip, 1, 2) )
+    GROUP  BY s_store_name
+    ORDER  BY s_store_name
+    LIMIT 100;
+
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 8."""
+    # Load required tables
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
+    store = get_data(run_config.dataset_path, "store", run_config.suffix)
+    customer_address = get_data(
+        run_config.dataset_path, "customer_address", run_config.suffix
+    )
+    customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
+
+    # First subquery: get first 5 chars of zip codes from target list
+    target_zips_5char = (
+        customer_address.select(pl.col("ca_zip").str.slice(0, 5).alias("ca_zip"))
+        .filter(pl.col("ca_zip").is_in(TARGET_ZIPS))
+        .unique()
+    )
+
+    # Second subquery: preferred customers by zip with count > 10
+    preferred_customer_zips = (
+        customer_address.join(
+            customer, left_on="ca_address_sk", right_on="c_current_addr_sk"
+        )
+        .filter(pl.col("c_preferred_cust_flag") == "Y")
+        .group_by(pl.col("ca_zip").str.slice(0, 5).alias("ca_zip"))
+        .agg(pl.len().alias("cnt"))
+        .filter(pl.col("cnt") > 10)
+        .select("ca_zip")
+    )
+
+    # INTERSECT: Get common zip codes between target list and preferred customer zips
+    intersect_zips = target_zips_5char.join(
+        preferred_customer_zips, on="ca_zip", how="inner"
+    ).select("ca_zip")
+
+    # Main query: join store_sales with date_dim, store, and filter by zip codes
+    return (
+        store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
+        .join(store, left_on="ss_store_sk", right_on="s_store_sk")
+        .join(
+            intersect_zips,
+            left_on=pl.col("s_zip").str.slice(0, 2),
+            right_on=pl.col("ca_zip").str.slice(0, 2),
+        )
+        .filter(pl.col("d_qoy") == TARGET_QUARTER)
+        .filter(pl.col("d_year") == TARGET_YEAR)
+        .group_by("s_store_name")
+        .agg(pl.col("ss_net_profit").sum().alias("sum"))
+        .sort("s_store_name", nulls_last=True)
+        .limit(100)
+        .select([pl.col("s_store_name"), pl.col("sum").alias("sum(ss_net_profit)")])
+    )
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q9.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q9.py
new file mode 100644
index 00000000000..6f4ae38dac1
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q9.py
@@ -0,0 +1,137 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Query 9."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import polars as pl
+
+from cudf_polars.experimental.benchmarks.utils import get_data
+
+if TYPE_CHECKING:
+    from cudf_polars.experimental.benchmarks.utils import RunConfig
+
+
+def duckdb_impl(run_config: RunConfig) -> str:
+    """Query 9."""
+    return """
+    -- start query 9 in stream 0 using template query9.tpl
+    SELECT CASE
+             WHEN (SELECT Count(*)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 1 AND 20) > 3672 THEN
+             (SELECT Avg(ss_ext_list_price)
+              FROM   store_sales
+              WHERE
+             ss_quantity BETWEEN 1 AND 20)
+             ELSE (SELECT Avg(ss_net_profit)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 1 AND 20)
+           END bucket1,
+           CASE
+             WHEN (SELECT Count(*)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 21 AND 40) > 3392 THEN
+             (SELECT Avg(ss_ext_list_price)
+              FROM   store_sales
+              WHERE
+             ss_quantity BETWEEN 21 AND 40)
+             ELSE (SELECT Avg(ss_net_profit)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 21 AND 40)
+           END bucket2,
+           CASE
+             WHEN (SELECT Count(*)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 41 AND 60) > 32784 THEN
+             (SELECT Avg(ss_ext_list_price)
+              FROM   store_sales
+              WHERE
+             ss_quantity BETWEEN 41 AND 60)
+             ELSE (SELECT Avg(ss_net_profit)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 41 AND 60)
+           END bucket3,
+           CASE
+             WHEN (SELECT Count(*)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 61 AND 80) > 26032 THEN
+             (SELECT Avg(ss_ext_list_price)
+              FROM   store_sales
+              WHERE
+             ss_quantity BETWEEN 61 AND 80)
+             ELSE (SELECT Avg(ss_net_profit)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 61 AND 80)
+           END bucket4,
+           CASE
+             WHEN (SELECT Count(*)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 81 AND 100) > 23982 THEN
+             (SELECT Avg(ss_ext_list_price)
+              FROM   store_sales
+              WHERE
+             ss_quantity BETWEEN 81 AND 100)
+             ELSE (SELECT Avg(ss_net_profit)
+                   FROM   store_sales
+                   WHERE  ss_quantity BETWEEN 81 AND 100)
+           END bucket5
+    FROM   reason
+    WHERE  r_reason_sk = 1;
+    """
+
+
+def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
+    """Query 9."""
+    # Load required tables
+    store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
+    reason = get_data(run_config.dataset_path, "reason", run_config.suffix)
+
+    # Define bucket configurations: (min_qty, max_qty, count_threshold)
+    buckets = [
+        (1, 20, 3672),
+        (21, 40, 3392),
+        (41, 60, 32784),
+        (61, 80, 26032),
+        (81, 100, 23982),
+    ]
+
+    # Calculate each bucket summary
+    bucket_stats = []
+    for i, (min_qty, max_qty, _) in enumerate(buckets, 1):
+        # Compute count, avg(ss_ext_list_price), avg(ss_net_profit) for each quantity range
+        stats = store_sales.filter(
+            pl.col("ss_quantity").is_between(min_qty, max_qty, closed="both")
+        ).select(
+            [
+                pl.len().alias(f"count_{i}"),
+                pl.col("ss_ext_list_price").mean().alias(f"avg_price_{i}"),
+                pl.col("ss_net_profit").mean().alias(f"avg_profit_{i}"),
+            ]
+        )
+        bucket_stats.append(stats)
+
+    # Combine all bucket summaries into one row
+    combined_stats = pl.concat(bucket_stats, how="horizontal")
+
+    # Select appropriate value per bucket based on count threshold
+    bucket_values = []
+    for i, (_, _, threshold) in enumerate(buckets, 1):
+        bucket = (
+            pl.when(pl.col(f"count_{i}") > threshold)
+            .then(pl.col(f"avg_price_{i}"))
+            .otherwise(pl.col(f"avg_profit_{i}"))
+            .alias(f"bucket{i}")
+        )
+        bucket_values.append(bucket)
+
+    # Create result DataFrame with one row (using reason table as in SQL)
+    return (
+        reason.filter(pl.col("r_reason_sk") == 1)
+        .join(combined_stats, how="cross")
+        .select(bucket_values)
+        .limit(1)
+    )

From b7d8e9094da3379044a426e103eb5df47fa4b7f2 Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Fri, 8 Aug 2025 16:43:28 -0400
Subject: [PATCH 087/366] Fix anchor naming conventions in dependencies.yaml
 (#19635)

Replace hypens with underscores, and remove any `-dep` suffix.

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19635
---
 dependencies.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index f214bde574e..7b240f5bc84 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -488,7 +488,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-        - &numba-cuda-dep numba-cuda>=0.14.0,<0.15.0a0
+        - &numba_cuda numba-cuda>=0.14.0,<0.15.0a0
   pyarrow_run:
     common:
       - output_types: [conda]
@@ -653,14 +653,14 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cachetools
-          - &numba-dep numba>=0.59.1,<0.62.0a0
+          - &numba numba>=0.59.1,<0.62.0a0
           - nvtx>=0.2.1
           - packaging
           - rich
           - typing_extensions>=4.0.0
       - output_types: [conda]
         packages:
-          - *numba-cuda-dep
+          - *numba_cuda
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -682,10 +682,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - &numba-cuda-cu12-dep numba-cuda[cu12]>=0.14.0,<0.15.0a0
+              - &numba_cuda_cu12 numba-cuda[cu12]>=0.14.0,<0.15.0a0
           - matrix: # Fallback for no matrix
             packages:
-              - *numba-cuda-cu12-dep
+              - *numba_cuda_cu12
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:

From 2cefac9e7ccc4cf9a35c64b210abade5bffa1599 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 8 Aug 2025 14:42:04 -0700
Subject: [PATCH 088/366] Propagate exceptions thrown in async IO operations
 (#19628)

closes #19586

In some places in libcudf, `std::future::wait()` is called to wait on asynchronous IO operations. The issue with this is that wait does not propagate any exceptions that may be thrown.

This PR replaces the `wait()` calls with `get()` and adds readers and writers tests to make sure exceptions are re-thrown.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Amin Aramoon (https://github.com/aminaramoon)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19628
---
 cpp/src/io/orc/writer_impl.cu                 |   4 +-
 cpp/src/io/parquet/bloom_filter_reader.cu     |   2 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu  |   6 +-
 .../parquet/reader_impl_preprocess_utils.cu   |   2 +-
 cpp/src/io/parquet/writer_impl.cu             |   4 +-
 cpp/tests/io/io_test_utils.hpp                | 119 ++++++++++++++++++
 cpp/tests/io/json/json_test.cpp               |  56 +++++++++
 cpp/tests/io/orc_test.cpp                     |  54 ++++++++
 cpp/tests/io/parquet_reader_test.cpp          |  55 ++++++++
 9 files changed, 293 insertions(+), 9 deletions(-)
 create mode 100644 cpp/tests/io/io_test_utils.hpp

diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 612f0b69737..64c8c729e4a 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -2715,8 +2715,8 @@ void writer::impl::write_orc_data_to_sink(encoded_data const& enc_data,
       stripe.footerLength = bytes_written;
     }
   }
-  for (auto const& task : write_tasks) {
-    task.wait();
+  for (auto& task : write_tasks) {
+    task.get();
   }
 }
 
diff --git a/cpp/src/io/parquet/bloom_filter_reader.cu b/cpp/src/io/parquet/bloom_filter_reader.cu
index d2d7fcac959..7abceb3ad4d 100644
--- a/cpp/src/io/parquet/bloom_filter_reader.cu
+++ b/cpp/src/io/parquet/bloom_filter_reader.cu
@@ -391,7 +391,7 @@ void read_bloom_filter_data(host_span<std::unique_ptr<datasource> const> sources
 
   // Read task sync function
   for (auto& task : read_tasks) {
-    task.wait();
+    task.get();
   }
 }
 
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 2343ade4784..87fb1950017 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -345,10 +345,10 @@ void reader_impl::read_compressed_data()
 
   auto& chunks = pass.chunks;
 
-  auto const [has_compressed_data, read_chunks_tasks] = read_column_chunks();
-  pass.has_compressed_data                            = has_compressed_data;
+  auto [has_compressed_data, read_chunks_tasks] = read_column_chunks();
+  pass.has_compressed_data                      = has_compressed_data;
 
-  read_chunks_tasks.wait();
+  read_chunks_tasks.get();
 
   // Process dataset chunk pages into output columns
   auto const total_pages = _has_page_index ? count_page_headers_with_pgidx(chunks, _stream)
diff --git a/cpp/src/io/parquet/reader_impl_preprocess_utils.cu b/cpp/src/io/parquet/reader_impl_preprocess_utils.cu
index 04b36abb37f..6029d7ff538 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess_utils.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess_utils.cu
@@ -238,7 +238,7 @@ void generate_depth_remappings(
   }
   auto sync_fn = [](decltype(read_tasks) read_tasks) {
     for (auto& task : read_tasks) {
-      task.wait();
+      task.get();
     }
   };
   return std::async(std::launch::deferred, sync_fn, std::move(read_tasks));
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 41fdbe7e6fc..621eadb0e5b 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -2497,8 +2497,8 @@ void writer::impl::write_parquet_data_to_sink(
         if (i == 0) { row_group.file_offset = chunk_offset; }
       }
     }
-    for (auto const& task : write_tasks) {
-      task.wait();
+    for (auto& task : write_tasks) {
+      task.get();
     }
   }
 
diff --git a/cpp/tests/io/io_test_utils.hpp b/cpp/tests/io/io_test_utils.hpp
new file mode 100644
index 00000000000..a19eb597e50
--- /dev/null
+++ b/cpp/tests/io/io_test_utils.hpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/io/data_sink.hpp>
+#include <cudf/io/datasource.hpp>
+
+#include <future>
+#include <vector>
+
+namespace cudf::test {
+
+/**
+ * @brief Custom exception for device read async testing
+ */
+class AsyncException : public std::exception {};
+
+/**
+ * @brief Datasource that throws an exception in device_read_async for testing
+ */
+class ThrowingDeviceReadDatasource : public cudf::io::datasource {
+ private:
+  std::vector<char> const& data_;
+
+ public:
+  explicit ThrowingDeviceReadDatasource(std::vector<char> const& data) : data_(data) {}
+
+  std::unique_ptr<cudf::io::datasource::buffer> host_read(size_t offset, size_t size) override
+  {
+    size = std::min(size, data_.size() - offset);
+    // Convert char data to bytes for the buffer
+    std::vector<std::byte> byte_data(size);
+    std::memcpy(byte_data.data(), data_.data() + offset, size);
+    return cudf::io::datasource::buffer::create(std::move(byte_data));
+  }
+
+  size_t host_read(size_t offset, size_t size, uint8_t* dst) override
+  {
+    auto const read_size = std::min(size, data_.size() - offset);
+    std::memcpy(dst, data_.data() + offset, read_size);
+    return read_size;
+  }
+
+  [[nodiscard]] bool supports_device_read() const override { return true; }
+
+  std::unique_ptr<cudf::io::datasource::buffer> device_read(size_t offset,
+                                                            size_t size,
+                                                            rmm::cuda_stream_view stream) override
+  {
+    // For testing, just copy the data from the host buffer into a new buffer
+    size = std::min(size, data_.size() - offset);
+    rmm::device_buffer out_data(size, stream);
+    cudaMemcpyAsync(
+      out_data.data(), data_.data() + offset, size, cudaMemcpyHostToDevice, stream.value());
+    cudaStreamSynchronize(stream.value());
+    return cudf::io::datasource::buffer::create(std::move(out_data));
+  }
+
+  std::future<size_t> device_read_async(size_t offset,
+                                        size_t size,
+                                        uint8_t* dst,
+                                        rmm::cuda_stream_view stream) override
+  {
+    // This datasource returns a future that throws a custom exception when accessed for testing
+    std::promise<size_t> promise;
+    promise.set_exception(std::make_exception_ptr(AsyncException()));
+    return promise.get_future();
+  }
+
+  [[nodiscard]] size_t size() const override { return data_.size(); }
+};
+
+/**
+ * @brief Data sink that throws an exception in device_write_async for testing
+ */
+class ThrowingDeviceWriteDataSink : public cudf::io::data_sink {
+ private:
+  size_t buffer_size_ = 0;
+
+ public:
+  void host_write(void const* data, size_t size) override { buffer_size_ += size; }
+
+  [[nodiscard]] bool supports_device_write() const override { return true; }
+
+  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
+  {
+    buffer_size_ += size;
+  }
+
+  std::future<void> device_write_async(void const* gpu_data,
+                                       size_t size,
+                                       rmm::cuda_stream_view stream) override
+  {
+    // This data sink returns a future that throws a custom exception when accessed for testing
+    std::promise<void> promise;
+    promise.set_exception(std::make_exception_ptr(AsyncException()));
+    return promise.get_future();
+  }
+
+  void flush() override {}
+
+  size_t bytes_written() override { return buffer_size_; }
+};
+
+}  // namespace cudf::test
diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp
index 8bdda1dc4bf..53e4d3881e1 100644
--- a/cpp/tests/io/json/json_test.cpp
+++ b/cpp/tests/io/json/json_test.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "../io_test_utils.hpp"
+
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -3591,4 +3593,58 @@ TEST_F(JsonBatchedReaderTest, EmptyLastBatch)
                                  cudf::test::strings_column_wrapper{{"b", "b", "b", "b"}});
 }
 
+TEST_F(JsonReaderTest, DeviceReadAsyncThrows)
+{
+  // Create simple JSON data
+  std::string json_string = R"({"a": 1}
+{"a": 2}
+{"a": 3}
+{"a": 4}
+{"a": 5})";
+
+  // Convert to char vector
+  std::vector<char> json_data(json_string.begin(), json_string.end());
+
+  // Create our throwing datasource
+  auto throwing_source = std::make_unique<cudf::test::ThrowingDeviceReadDatasource>(json_data);
+  cudf::io::source_info source_info(throwing_source.get());
+
+  // Try to read the JSON data - this should either succeed or propagate AsyncException
+  // from device_read_async.
+  cudf::io::json_reader_options read_args =
+    cudf::io::json_reader_options::builder(source_info).lines(true);
+  try {
+    cudf::io::read_json(read_args);
+    // Test passes if no exception is thrown
+  } catch (const cudf::test::AsyncException&) {
+    // Test passes if AsyncException is thrown (expected test exception)
+  } catch (const std::exception& e) {
+    // Test fails if any other exception is thrown
+    FAIL() << "Unexpected exception thrown: " << e.what();
+  }
+}
+
+TEST_F(JsonReaderTest, DeviceWriteAsyncThrows)
+{
+  // Create a simple table to write
+  auto col0           = cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}};
+  auto table_to_write = cudf::table_view{{col0}};
+
+  auto throwing_sink = std::make_unique<cudf::test::ThrowingDeviceWriteDataSink>();
+
+  cudf::io::json_writer_options write_args = cudf::io::json_writer_options::builder(
+    cudf::io::sink_info{throwing_sink.get()}, table_to_write);
+
+  // The write_json call should either succeed or throw AsyncException.
+  try {
+    cudf::io::write_json(write_args);
+    // Test passes if no exception is thrown
+  } catch (const cudf::test::AsyncException&) {
+    // Test passes if AsyncException is thrown (expected test exception)
+  } catch (const std::exception& e) {
+    // Test fails if any other exception is thrown
+    FAIL() << "Unexpected exception thrown: " << e.what();
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index b0b83f7f419..90b5ebbfd43 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "compression_common.hpp"
+#include "io_test_utils.hpp"
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
@@ -2305,6 +2306,59 @@ TEST_F(OrcWriterTest, MultipleBlocksInStripeFooter)
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
 
+TEST_F(OrcReaderTest, DeviceReadAsyncThrows)
+{
+  // Create a simple ORC file in memory
+  auto col0           = cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}};
+  auto table_to_write = table_view{{col0}};
+
+  std::vector<char> out_buffer;
+  cudf::io::orc_writer_options write_args =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, table_to_write);
+  cudf::io::write_orc(write_args);
+
+  // Create our throwing datasource
+  auto throwing_source = std::make_unique<cudf::test::ThrowingDeviceReadDatasource>(out_buffer);
+  cudf::io::source_info source_info(throwing_source.get());
+
+  // Try to read the ORC file - this should either succeed or propagate AsyncException
+  // from device_read_async.
+  cudf::io::orc_reader_options read_args = cudf::io::orc_reader_options::builder(source_info);
+  try {
+    cudf::io::read_orc(read_args);
+    // Test passes if no exception is thrown
+  } catch (const cudf::test::AsyncException&) {
+    // Test passes if AsyncException is thrown (expected test exception)
+  } catch (const std::exception& e) {
+    // Test fails if any other exception is thrown
+    FAIL() << "Unexpected exception thrown: " << e.what();
+  }
+}
+
+TEST_F(OrcReaderTest, DeviceWriteAsyncThrows)
+{
+  // Create a simple table to write
+  auto col0           = cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}};
+  auto table_to_write = table_view{{col0}};
+
+  auto throwing_sink = std::make_unique<cudf::test::ThrowingDeviceWriteDataSink>();
+
+  cudf::io::orc_writer_options write_args =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{throwing_sink.get()}, table_to_write);
+
+  // The write_orc call should either succeed or throw AsyncException.
+  // Should only fail if a different exception is thrown.
+  try {
+    cudf::io::write_orc(write_args);
+    // Test passes if no exception is thrown
+  } catch (const cudf::test::AsyncException&) {
+    // Test passes if AsyncException is thrown (expected test exception)
+  } catch (const std::exception& e) {
+    // Test fails if any other exception is thrown
+    FAIL() << "Unexpected exception thrown: " << e.what();
+  }
+}
+
 INSTANTIATE_TEST_CASE_P(Nvcomp,
                         OrcCompressionTest,
                         ::testing::Combine(::testing::Values("NVCOMP"),
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
index a39c943f067..3213034b541 100644
--- a/cpp/tests/io/parquet_reader_test.cpp
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "compression_common.hpp"
+#include "io_test_utils.hpp"
 #include "parquet_common.hpp"
 
 #include <cudf_test/base_fixture.hpp>
@@ -33,6 +34,7 @@
 #include <src/io/parquet/parquet_gpu.hpp>
 
 #include <array>
+#include <memory>
 
 using ParquetDecompressionTest = DecompressionTest<ParquetReaderTest>;
 
@@ -3165,3 +3167,56 @@ TEST_F(ParquetReaderTest, RowBoundsAndFilter)
                 metadata.num_row_groups_after_stats_filter.value() == 3);  // RGs: {},{0,1,4},{}
   }
 }
+
+TEST_F(ParquetReaderTest, DeviceReadAsyncThrows)
+{
+  // Create a simple parquet file in memory
+  auto col0           = cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}};
+  auto table_to_write = table_view{{col0}};
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options write_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, table_to_write);
+  cudf::io::write_parquet(write_args);
+
+  // Create our throwing datasource
+  auto throwing_source = std::make_unique<cudf::test::ThrowingDeviceReadDatasource>(out_buffer);
+  cudf::io::source_info source_info(throwing_source.get());
+
+  // Try to read the parquet file - this should either succeed or propagate AsyncException
+  // from device_read_async.
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(source_info);
+  try {
+    cudf::io::read_parquet(read_args);
+    // Test passes if no exception is thrown
+  } catch (const cudf::test::AsyncException&) {
+    // Test passes if AsyncException is thrown (expected test exception)
+  } catch (const std::exception& e) {
+    // Test fails if any other exception is thrown
+    FAIL() << "Unexpected exception thrown: " << e.what();
+  }
+}
+
+TEST_F(ParquetReaderTest, DeviceWriteAsyncThrows)
+{
+  // Create a simple table to write
+  auto col0           = cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}};
+  auto table_to_write = table_view{{col0}};
+
+  auto throwing_sink = std::make_unique<cudf::test::ThrowingDeviceWriteDataSink>();
+
+  cudf::io::parquet_writer_options write_args = cudf::io::parquet_writer_options::builder(
+    cudf::io::sink_info{throwing_sink.get()}, table_to_write);
+
+  // The write_parquet call should either succeed or throw AsyncException.
+  try {
+    cudf::io::write_parquet(write_args);
+    // Test passes if no exception is thrown
+  } catch (const cudf::test::AsyncException&) {
+    // Test passes if AsyncException is thrown (expected test exception)
+  } catch (const std::exception& e) {
+    // Test fails if any other exception is thrown
+    FAIL() << "Unexpected exception thrown: " << e.what();
+  }
+}

From 94b245fb5f7e5cf4d0cda78486a0f85853ff1782 Mon Sep 17 00:00:00 2001
From: Gary Shen <gashen@nvidia.com>
Date: Mon, 11 Aug 2025 15:27:55 +0800
Subject: [PATCH 089/366] Use cudaDeviceGetAttribute to get ComputeMode for
 CUDA13 (#19645)

CUDA13 doesn't support get compute mode from cudaDeviceProp.computeMode.
It's changed to use  cudaDeviceGetAttribute.
Use macro CUDART_VERSION > 13000 to detect building on cuda13
And keep the original code for cuda12 in else part.

close #19644

Authors:
  - Gary Shen (https://github.com/GaryShen2008)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19645
---
 java/src/main/native/src/CudaJni.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index 1fca42a18c4..c5359c821ae 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -199,9 +199,19 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getNativeComputeMode(JNIEnv* env
     cudf::jni::auto_set_device(env);
     int device;
     CUDF_CUDA_TRY(cudaGetDevice(&device));
+
+#if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
+    // CUDA 13.0+ removed computeMode from cudaDeviceProp
+    // Return computeMode from cudaDeviceGetAttribute
+    int compute_mode;
+    CUDF_CUDA_TRY(cudaDeviceGetAttribute(&compute_mode, cudaDevAttrComputeMode, device));
+    return compute_mode;
+#else
+    // CUDA 12.x and earlier
     cudaDeviceProp device_prop;
     CUDF_CUDA_TRY(cudaGetDeviceProperties(&device_prop, device));
     return device_prop.computeMode;
+#endif
   }
   CATCH_STD(env, -2);
 }

From 78c8a953ed7d21a37cdc8a7e8fe6048af8e38a90 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 11 Aug 2025 08:17:37 -0400
Subject: [PATCH 090/366] Simplify cudf::scalar usage in reduce utility
 (#19608)

Fixes the `cudf::reduction::detail::reduce` internal utility to use the returned `cudf::scalar` instances directly in the CUB calls to simplify the logic.
This should help solve the issues for building/running #19119 -- the device-scalar ctors are no longer required.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19608
---
 .../cudf/reduction/detail/reduction.cuh       | 58 +++++++++----------
 cpp/src/copying/get_element.cu                | 20 +++----
 cpp/src/scalar/scalar_factories.cpp           | 19 +++---
 3 files changed, 41 insertions(+), 56 deletions(-)

diff --git a/cpp/include/cudf/reduction/detail/reduction.cuh b/cpp/include/cudf/reduction/detail/reduction.cuh
index 32c618e3b60..6b15c8fb4c0 100644
--- a/cpp/include/cudf/reduction/detail/reduction.cuh
+++ b/cpp/include/cudf/reduction/detail/reduction.cuh
@@ -40,18 +40,17 @@ namespace detail {
 /**
  * @brief Compute the specified simple reduction over the input range of elements.
  *
- * @param[in] d_in      the begin iterator
- * @param[in] num_items the number of items
- * @param[in] op        the reduction operator
- * @param[in] init      Optional initial value of the reduction
- * @param[in] stream    CUDA stream used for device memory operations and kernel launches
- * @param[in] mr        Device memory resource used to allocate the returned scalar's device
- * memory
- * @returns   Output scalar in device memory
- *
  * @tparam Op               the reduction operator with device binary operator
  * @tparam InputIterator    the input column iterator
  * @tparam OutputType       the output type of reduction
+ *
+ * @param d_in      the begin iterator
+ * @param num_items the number of items
+ * @param op        the reduction operator
+ * @param init      Optional initial value of the reduction
+ * @param stream    CUDA stream used for device memory operations and kernel launches
+ * @param mr        Device memory resource used to allocate the returned scalar's device memory
+ * @returns Output scalar in device memory
  */
 template <typename Op,
           typename InputIterator,
@@ -66,7 +65,8 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
 {
   auto const binary_op     = cudf::detail::cast_functor<OutputType>(op.get_binary_op());
   auto const initial_value = init.value_or(op.template get_identity<OutputType>());
-  auto dev_result          = rmm::device_scalar<OutputType>{initial_value, stream, mr};
+  using ScalarType         = cudf::scalar_type_t<OutputType>;
+  auto result              = std::make_unique<ScalarType>(initial_value, true, stream, mr);
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
@@ -74,7 +74,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
   cub::DeviceReduce::Reduce(d_temp_storage.data(),
                             temp_storage_bytes,
                             d_in,
-                            dev_result.data(),
+                            result->data(),
                             num_items,
                             binary_op,
                             initial_value,
@@ -85,15 +85,12 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
   cub::DeviceReduce::Reduce(d_temp_storage.data(),
                             temp_storage_bytes,
                             d_in,
-                            dev_result.data(),
+                            result->data(),
                             num_items,
                             binary_op,
                             initial_value,
                             stream.value());
-
-  // only for string_view, data is copied
-  auto s = new cudf::scalar_type_t<OutputType>(std::move(dev_result), true, stream, mr);
-  return std::unique_ptr<scalar>(s);
+  return result;
 }
 
 template <typename Op,
@@ -151,25 +148,12 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             initial_value,
                             stream.value());
 
-  using ScalarType = cudf::scalar_type_t<OutputType>;
-  auto s = new ScalarType(dev_result, true, stream, mr);  // only for string_view, data is copied
-  return std::unique_ptr<scalar>(s);
+  return std::make_unique<cudf::string_scalar>(dev_result, true, stream, mr);
 }
 
 /**
  * @brief compute reduction by the compound operator (reduce and transform)
  *
- * @param[in] d_in        the begin iterator
- * @param[in] num_items   the number of items
- * @param[in] op          the reduction operator
- * @param[in] valid_count Number of valid items
- * @param[in] ddof        Delta degrees of freedom used for standard deviation and variance
- * @param[in] init        Optional initial value of the reduction
- * @param[in] stream      CUDA stream used for device memory operations and kernel launches
- * @param[in] mr          Device memory resource used to allocate the returned scalar's device
- * memory
- * @returns   Output scalar in device memory
- *
  * The reduction operator must have `intermediate::compute_result()` method.
  * This method performs reduction using binary operator `Op::Op` and transforms the
  * result to `OutputType` using `compute_result()` transform method.
@@ -177,6 +161,16 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
  * @tparam Op               the reduction operator with device binary operator
  * @tparam InputIterator    the input column iterator
  * @tparam OutputType       the output type of reduction
+ *
+ * @param d_in        the begin iterator
+ * @param num_items   the number of items
+ * @param op          the reduction operator
+ * @param valid_count Number of valid items
+ * @param ddof        Delta degrees of freedom used for standard deviation and variance
+ * @param init        Optional initial value of the reduction
+ * @param stream      CUDA stream used for device memory operations and kernel launches
+ * @param mr          Device memory resource used to allocate the returned scalar's device memory
+ * @returns Output scalar in device memory
  */
 template <typename Op,
           typename InputIterator,
@@ -220,14 +214,14 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
 
   // compute the result value from intermediate value in device
   using ScalarType = cudf::scalar_type_t<OutputType>;
-  auto result      = new ScalarType(OutputType{0}, true, stream, mr);
+  auto result      = std::make_unique<ScalarType>(OutputType{0}, true, stream, mr);
   thrust::for_each_n(rmm::exec_policy(stream),
                      intermediate_result.data(),
                      1,
                      [dres = result->data(), op, valid_count, ddof] __device__(auto i) {
                        *dres = op.template compute_result<OutputType>(i, valid_count, ddof);
                      });
-  return std::unique_ptr<scalar>(result);
+  return result;
 }
 
 }  // namespace detail
diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu
index 80b0bd5242f..b492f774a46 100644
--- a/cpp/src/copying/get_element.cu
+++ b/cpp/src/copying/get_element.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -108,9 +108,7 @@ struct get_element_functor {
       stream);
 
     if (!key_index_scalar.is_valid(stream)) {
-      auto null_result = make_default_constructed_scalar(dict_view.keys().type(), stream, mr);
-      null_result->set_valid_async(false, stream);
-      return null_result;
+      return make_default_constructed_scalar(dict_view.keys().type(), stream, mr);
     }
 
     // retrieve the key element using the key-index
@@ -156,12 +154,12 @@ struct get_element_functor {
 
     auto device_col = column_device_view::create(input, stream);
 
-    cudf::detail::device_scalar<Type> temp_data(stream, mr);
-    cudf::detail::device_scalar<bool> temp_valid(stream, mr);
+    auto result = std::make_unique<fixed_point_scalar<T>>(
+      Type{}, numeric::scale_type{input.type().scale()}, false, stream, mr);
 
     device_single_thread(
-      [buffer   = temp_data.data(),
-       validity = temp_valid.data(),
+      [buffer   = result->data(),
+       validity = result->validity_data(),
        d_col    = *device_col,
        index] __device__() mutable {
         *buffer   = d_col.element<Type>(index);
@@ -169,11 +167,7 @@ struct get_element_functor {
       },
       stream);
 
-    return std::make_unique<fixed_point_scalar<T>>(std::move(temp_data),
-                                                   numeric::scale_type{input.type().scale()},
-                                                   temp_valid.value(stream),
-                                                   stream,
-                                                   mr);
+    return result;
   }
 
   template <typename T, std::enable_if_t<std::is_same_v<T, struct_view>>* p = nullptr>
diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp
index 9f242bdffe0..b31c47c6cff 100644
--- a/cpp/src/scalar/scalar_factories.cpp
+++ b/cpp/src/scalar/scalar_factories.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,25 +27,22 @@ namespace cudf {
 namespace {
 struct scalar_construction_helper {
   template <typename T,
-            typename ScalarType                                                = scalar_type_t<T>,
             std::enable_if_t<is_fixed_width<T>() and not is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<scalar> operator()(rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr) const
   {
-    using Type = device_storage_type_t<T>;
-    auto s     = new ScalarType(Type{}, false, stream, mr);
-    return std::unique_ptr<scalar>(s);
+    using Type       = device_storage_type_t<T>;
+    using ScalarType = scalar_type_t<T>;
+    return std::make_unique<ScalarType>(Type{}, false, stream, mr);
   }
 
-  template <typename T,
-            typename ScalarType                    = scalar_type_t<T>,
-            std::enable_if_t<is_fixed_point<T>()>* = nullptr>
+  template <typename T, std::enable_if_t<is_fixed_point<T>()>* = nullptr>
   std::unique_ptr<scalar> operator()(rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr) const
   {
-    using Type = device_storage_type_t<T>;
-    auto s     = new ScalarType(Type{}, numeric::scale_type{0}, false, stream, mr);
-    return std::unique_ptr<scalar>(s);
+    using Type       = device_storage_type_t<T>;
+    using ScalarType = scalar_type_t<T>;
+    return std::make_unique<ScalarType>(Type{}, numeric::scale_type{0}, false, stream, mr);
   }
 
   template <typename T, typename... Args, std::enable_if_t<not is_fixed_width<T>()>* = nullptr>

From 314dcbc72feda715830815a74a0ad3f91ae493e1 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 11 Aug 2025 11:25:40 -0500
Subject: [PATCH 091/366] Make `DataFrame.dtypes` not fallback to CPU always
 (#19627)

Fixes: #19620

This PR returns the corresponding pandas dtypes for `CategoricalDtype` and `IntervalDtype` only in pandas compatibilty mode so that correct dtypes are present with `cudf.pandas` enabled.

This change introduces 9 low priority failures that are now xfailed.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19627
---
 python/cudf/cudf/core/dataframe.py            | 23 ++++++++++-
 python/cudf/cudf/pandas/_wrappers/pandas.py   | 19 +++++++++-
 .../cudf/pandas/scripts/conftest-patch.py     | 38 +++++++++++--------
 3 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ff0fe0e0564..9833768a15a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -70,6 +70,7 @@
     Decimal64Dtype,
     Decimal128Dtype,
     IntervalDtype,
+    ListDtype,
     StructDtype,
 )
 from cudf.core.groupby.groupby import DataFrameGroupBy, groupby_doc_template
@@ -1252,7 +1253,27 @@ def dtypes(self) -> pd.Series:
         string              object
         dtype: object
         """
-        return pd.Series(dict(self._dtypes), dtype="object")
+        result_dict = dict(self._dtypes)
+        if cudf.get_option("mode.pandas_compatible"):
+            for key, value in result_dict.items():
+                if isinstance(
+                    value,
+                    (
+                        ListDtype,
+                        StructDtype,
+                        Decimal32Dtype,
+                        Decimal64Dtype,
+                        Decimal128Dtype,
+                    ),
+                ):
+                    raise TypeError(
+                        f"Column '{key}' has {type(value).__name__}, which is not supported in pandas."
+                    )
+
+        result = pd.Series(
+            result_dict, index=self._data.to_pandas_index, dtype="object"
+        )
+        return result
 
     @property
     def ndim(self) -> int:
diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 6f35412282d..492334c9416 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -266,6 +266,23 @@ def ignore_ipython_canary_check(self, **kwargs):
     )
 
 
+def _DataFrame_dtypes_apply_func(value):
+    if isinstance(value, (cudf.CategoricalDtype, cudf.IntervalDtype)):
+        return value.to_pandas()
+    return value
+
+
+def _DataFrame__dtypes(self):
+    result = _fast_slow_function_call(
+        lambda self: self.dtypes,
+        self,
+    )[0]
+    result = _maybe_wrap_result(
+        result._fsproxy_slow.apply(_DataFrame_dtypes_apply_func), None
+    )
+    return result
+
+
 DataFrame = make_final_proxy_type(
     "DataFrame",
     cudf.DataFrame,
@@ -280,7 +297,7 @@ def ignore_ipython_canary_check(self, **kwargs):
         "_constructor_sliced": _FastSlowAttribute("_constructor_sliced"),
         "_accessors": set(),
         "_ipython_canary_method_should_not_exist_": ignore_ipython_canary_check,
-        "dtypes": _FastSlowAttribute("dtypes", private=True),
+        "dtypes": property(_DataFrame__dtypes),
     },
 )
 
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index c48e4058c02..c9795a643a9 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -88,21 +88,6 @@ def pytest_unconfigure(config):
 
 # TODO: Pass these tests with cudf.pandas enabled.
 NODEIDS_THAT_FAIL_WITH_CUDF_PANDAS = {
-    "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_searchsorted[False]",
-    "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_searchsorted[None]",
-    "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_searchsorted[True]",
-    "tests/arrays/test_datetimelike.py::test_searchsorted_datetimelike_with_listlike_invalid_dtype[arg0-values1]",
-    "tests/extension/test_categorical.py::TestCategorical::test_searchsorted[True]",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[backfill-expected1]",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[nearest-expected2]",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[pad-expected0]",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_error",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[backfill]",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[nearest]",
-    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[pad]",
-    "tests/indexes/ranges/test_indexing.py::TestGetIndexer::test_get_indexer_limit",
-    "tests/indexes/timedeltas/test_searchsorted.py::TestSearchSorted::test_searchsorted_invalid_argument_dtype[arg0]",
-    "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_setitem_frame_with_inverted_slice",
     "tests/api/test_api.py::test_pandas_array_alias",
     "tests/apply/test_frame_apply.py::test_agg_transform[axis='columns']",
     "tests/apply/test_frame_apply.py::test_agg_transform[axis='index']",
@@ -760,6 +745,9 @@ def pytest_unconfigure(config):
     "tests/arrays/boolean/test_reduction.py::test_reductions_return_types[count-True]",
     "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_memory_usage",
     "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_min_max_ordered[array]",
+    "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_searchsorted[False]",
+    "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_searchsorted[None]",
+    "tests/arrays/categorical/test_analytics.py::TestCategoricalAnalytics::test_searchsorted[True]",
     "tests/arrays/categorical/test_api.py::TestCategoricalAPI::test_rename_categories",
     "tests/arrays/categorical/test_api.py::TestCategoricalAPI::test_set_categories",
     "tests/arrays/categorical/test_api.py::TestPrivateCategoricalAPI::test_codes_immutable",
@@ -1347,6 +1335,7 @@ def pytest_unconfigure(config):
     "tests/arrays/test_datetimelike.py::TestPeriodArray::test_array_interface[YE]",
     "tests/arrays/test_datetimelike.py::TestTimedeltaArray::test_array_interface",
     "tests/arrays/test_datetimelike.py::TestTimedeltaArray::test_searchsorted_castable_strings[pyarrow_numpy-series]",
+    "tests/arrays/test_datetimelike.py::test_searchsorted_datetimelike_with_listlike_invalid_dtype[arg0-values1]",
     "tests/arrays/test_datetimes.py::TestDatetimeArray::test_array_interface",
     "tests/arrays/test_datetimes.py::TestDatetimeArray::test_astype_copies[datetime64[ns]-datetime64[ns]]",
     "tests/arrays/test_datetimes.py::TestDatetimeArray::test_astype_to_same",
@@ -3764,6 +3753,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_categorical.py::TestCategorical::test_reduce_series_boolean[any-False]",
     "tests/extension/test_categorical.py::TestCategorical::test_reduce_series_boolean[any-True]",
     "tests/extension/test_categorical.py::TestCategorical::test_reindex_non_na_fill_value",
+    "tests/extension/test_categorical.py::TestCategorical::test_searchsorted[True]",
     "tests/extension/test_categorical.py::TestCategorical::test_series_constructor",
     "tests/extension/test_categorical.py::TestCategorical::test_series_constructor_scalar_na_with_index",
     "tests/extension/test_categorical.py::TestCategorical::test_setitem_frame_2d_values",
@@ -8180,11 +8170,18 @@ def pytest_unconfigure(config):
     "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[nan-int]",
     "tests/indexes/numeric/test_indexing.py::TestContains::test_contains_float64_nans",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_invalid",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[backfill-expected1]",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[nearest-expected2]",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[pad-expected0]",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_error",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_numeric_index_boolean_target[get_indexer-float64]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_numeric_index_boolean_target[get_indexer-int64]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_numeric_index_boolean_target[get_indexer-range]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_numeric_index_boolean_target[get_indexer-uint64]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_numeric_vs_bool",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[backfill]",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[nearest]",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[pad]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_loc_masked_na_and_nan",
     "tests/indexes/numeric/test_numeric.py::TestFloatNumericIndex::test_equals_numeric",
     "tests/indexes/numeric/test_numeric.py::TestFloatNumericIndex::test_type_coercion_fail[int16]",
@@ -8223,6 +8220,7 @@ def pytest_unconfigure(config):
     "tests/indexes/ranges/test_constructors.py::TestRangeIndexConstructors::test_constructor[args5-kwargs5-0-0-1-foo]",
     "tests/indexes/ranges/test_constructors.py::TestRangeIndexConstructors::test_constructor[args6-kwargs6-0-0-1-None]",
     "tests/indexes/ranges/test_constructors.py::TestRangeIndexConstructors::test_constructor[args6-kwargs6-0-0-1-foo]",
+    "tests/indexes/ranges/test_indexing.py::TestGetIndexer::test_get_indexer_limit",
     "tests/indexes/ranges/test_join.py::TestJoin::test_join_self[inner]",
     "tests/indexes/ranges/test_join.py::TestJoin::test_join_self[left]",
     "tests/indexes/ranges/test_join.py::TestJoin::test_join_self[outer]",
@@ -8751,6 +8749,7 @@ def pytest_unconfigure(config):
     "tests/indexes/timedeltas/test_formats.py::TestTimedeltaIndexRendering::test_representation[__repr__]",
     "tests/indexes/timedeltas/test_formats.py::TestTimedeltaIndexRendering::test_representation[__str__]",
     "tests/indexes/timedeltas/test_indexing.py::TestContains::test_contains",
+    "tests/indexes/timedeltas/test_searchsorted.py::TestSearchSorted::test_searchsorted_invalid_argument_dtype[arg0]",
     "tests/indexes/timedeltas/test_setops.py::TestTimedeltaIndex::test_intersection_equal[False]",
     "tests/indexes/timedeltas/test_setops.py::TestTimedeltaIndex::test_intersection_equal[None]",
     "tests/indexes/timedeltas/test_setops.py::TestTimedeltaIndex::test_intersection_non_monotonic[None-rng2-expected2]",
@@ -10010,6 +10009,7 @@ def pytest_unconfigure(config):
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_setitem_consistency[val2]",
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_setitem_consistency_dt64_to_float",
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_setitem_empty_append_expands_rows_mixed_dtype",
+    "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_setitem_frame_with_inverted_slice",
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_uint64_disallow_negative",
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_setitem_new_key_tz[loc]",
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_setitem_new_key_tz[setitem]",
@@ -14991,6 +14991,14 @@ def pytest_unconfigure(config):
     "tests/window/test_rolling_functions.py::test_rolling_max_resample[None]",
     "tests/window/test_rolling_functions.py::test_rolling_median_resample",
     "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type0]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type2]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type3]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type4]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type5]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type6]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type7]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type8]",
+    "tests/window/test_rolling_functions.py::test_rolling_min_max_numeric_types[data_type9]",
     "tests/window/test_rolling_functions.py::test_rolling_min_resample[10]",
     "tests/window/test_rolling_functions.py::test_rolling_min_resample[1]",
     "tests/window/test_rolling_functions.py::test_rolling_min_resample[2]",

From 3caed58d4d532f01c922deabf464ad6c934d9d49 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Aug 2025 13:55:32 -0700
Subject: [PATCH 092/366] Move test_replace.py to new cudf classic directory
 structure (#19629)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19629
---
 .../cudf/tests/dataframe/methods/test_clip.py |   66 +
 .../tests/dataframe/methods/test_fillna.py    |   76 +
 .../tests/dataframe/methods/test_replace.py   |  172 ++
 .../tests/dataframe/methods/test_where.py     |   81 +
 .../cudf/tests/series/methods/test_clip.py    |   46 +
 .../cudf/tests/series/methods/test_fillna.py  |  496 +++++-
 .../cudf/tests/series/methods/test_replace.py |  527 ++++++
 .../cudf/tests/series/methods/test_where.py   |   47 +
 python/cudf/cudf/tests/test_replace.py        | 1459 -----------------
 9 files changed, 1510 insertions(+), 1460 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_clip.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_fillna.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_replace.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_where.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_clip.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_replace.py
 delete mode 100644 python/cudf/cudf/tests/test_replace.py

diff --git a/python/cudf/cudf/tests/dataframe/methods/test_clip.py b/python/cudf/cudf/tests/dataframe/methods/test_clip.py
new file mode 100644
index 00000000000..3387368278e
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_clip.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    ("lower", "upper"),
+    [
+        ([2, 7.4], [4, 7.9]),
+        ([2, 7.4], None),
+        (
+            None,
+            [4, 7.9],
+        ),
+    ],
+)
+def test_dataframe_clip(lower, upper, inplace):
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3, 4, 5], "b": [7.1, 7.24, 7.5, 7.8, 8.11]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.clip(lower=lower, upper=upper, inplace=inplace)
+    expect = pdf.clip(lower=lower, upper=upper, axis=1)
+
+    if inplace is True:
+        assert_eq(expect, gdf)
+    else:
+        assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    ("lower", "upper"),
+    [("b", "d"), ("b", None), (None, "c"), (None, None)],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_dataframe_category_clip(lower, upper, inplace):
+    data = ["a", "b", "c", "d", "e"]
+    pdf = pd.DataFrame({"a": data})
+    gdf = cudf.from_pandas(pdf)
+    gdf["a"] = gdf["a"].astype("category")
+
+    expect = pdf.clip(lower=lower, upper=upper)
+    got = gdf.clip(lower=lower, upper=upper, inplace=inplace)
+
+    if inplace is True:
+        assert_eq(expect, gdf.astype("str"))
+    else:
+        assert_eq(expect, got.astype("str"))
+
+
+@pytest.mark.parametrize(
+    ("lower", "upper"),
+    [([2, 7.4], [4, 7.9, "d"]), ([2, 7.4, "a"], [4, 7.9, "d"])],
+)
+def test_dataframe_exceptions_for_clip(lower, upper):
+    gdf = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5], "b": [7.1, 7.24, 7.5, 7.8, 8.11]}
+    )
+
+    with pytest.raises(ValueError):
+        gdf.clip(lower=lower, upper=upper)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_fillna.py b/python/cudf/cudf/tests/dataframe/methods/test_fillna.py
new file mode 100644
index 00000000000..8a121cad33d
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_fillna.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame({"a": [1, 2, None], "b": [None, None, 5]}),
+        pd.DataFrame(
+            {"a": [1, 2, None], "b": [None, None, 5]}, index=["a", "p", "z"]
+        ),
+        pd.DataFrame({"a": [1, 2, 3]}),
+    ],
+)
+@pytest.mark.parametrize(
+    "value",
+    [
+        10,
+        pd.Series([10, 20, 30]),
+        pd.Series([3, 4, 5]),
+        pd.Series([10, 20, 30], index=["z", "a", "p"]),
+        {"a": 5, "b": pd.Series([3, 4, 5])},
+        {"a": 5001},
+        {"b": pd.Series([11, 22, 33], index=["a", "p", "z"])},
+        {"a": 5, "b": pd.Series([3, 4, 5], index=["a", "p", "z"])},
+        {"c": 100},
+        np.nan,
+    ],
+)
+def test_fillna_dataframe(pdf, value, inplace):
+    if inplace:
+        pdf = pdf.copy(deep=True)
+    gdf = cudf.from_pandas(pdf)
+
+    fill_value_pd = value
+    if isinstance(fill_value_pd, (pd.Series, pd.DataFrame)):
+        fill_value_cudf = cudf.from_pandas(fill_value_pd)
+    elif isinstance(fill_value_pd, dict):
+        fill_value_cudf = {}
+        for key in fill_value_pd:
+            temp_val = fill_value_pd[key]
+            if isinstance(temp_val, pd.Series):
+                temp_val = cudf.from_pandas(temp_val)
+            fill_value_cudf[key] = temp_val
+    else:
+        fill_value_cudf = value
+
+    expect = pdf.fillna(fill_value_pd, inplace=inplace)
+    got = gdf.fillna(fill_value_cudf, inplace=inplace)
+
+    if inplace:
+        got = gdf
+        expect = pdf
+
+    assert_eq(expect, got)
+
+
+def test_fillna_columns_multiindex():
+    columns = pd.MultiIndex.from_tuples([("a", "b"), ("d", "e")])
+    pdf = pd.DataFrame(
+        {"0": [1, 2, None, 3, None], "1": [None, None, None, None, 4]}
+    )
+    pdf.columns = columns
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.fillna(10)
+    actual = gdf.fillna(10)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_replace.py b/python/cudf/cudf/tests/dataframe/methods/test_replace.py
new file mode 100644
index 00000000000..9f6b9007de7
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_replace.py
@@ -0,0 +1,172 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+    expect_warning_if,
+)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning introduced in pandas-2.2.0",
+)
+@pytest.mark.parametrize(
+    "data, dtype",
+    [
+        (
+            {
+                "a": [0, 1, None, 2, 3],
+                "b": [3, 2, 2, 3, None],
+                "c": ["abc", "def", ".", None, None],
+            },
+            None,
+        ),
+        (
+            {
+                "a": ["one", "two", None, "three"],
+                "b": ["one", None, "two", "three"],
+            },
+            "category",
+        ),
+        (
+            {
+                "col one": [None, 10, 11, None, 1000, 500, 600],
+                "col two": ["abc", "def", "ghi", None, "pp", None, "a"],
+                "a": [0.324, 0.234, 324.342, 23.32, 9.9, None, None],
+            },
+            None,
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "to_replace,value",
+    [
+        (0, 4),
+        ([0, 1], [4, 5]),
+        ([0, 1], 4),
+        ({"a": 0, "b": 0}, {"a": 4, "b": 5}),
+        ({"a": 0}, {"a": 4}),
+        ("abc", "---"),
+        ([".", "gh"], "hi"),
+        ([".", "def"], ["_", None]),
+        ({"c": 0}, {"a": 4, "b": 5}),
+        ({"a": 2}, {"c": "a"}),
+        ("two", "three"),
+        ([1, 2], pd.Series([10, 11])),
+        (pd.Series([10, 11], index=[3, 2]), None),
+        (
+            pd.Series(["a+", "+c", "p", "---"], index=["abc", "gh", "l", "z"]),
+            None,
+        ),
+        (
+            pd.Series([10, 11], index=[3, 2]),
+            {"a": [-10, -30], "l": [-111, -222]},
+        ),
+        (pd.Series([10, 11], index=[3, 2]), 555),
+        (
+            pd.Series([10, 11], index=["a", "b"]),
+            pd.Series([555, 1111], index=["a", "b"]),
+        ),
+        ({"a": "2", "b": "3", "zzz": "hi"}, None),
+        ({"a": 2, "b": 3, "zzz": "hi"}, 324353),
+        (
+            {"a": 2, "b": 3, "zzz": "hi"},
+            pd.Series([5, 6, 10], index=["a", "b", "col one"]),
+        ),
+    ],
+)
+def test_dataframe_replace(data, dtype, to_replace, value):
+    gdf = cudf.DataFrame(data, dtype=dtype)
+    pdf = gdf.to_pandas()
+
+    pd_value = value
+    if isinstance(value, pd.Series):
+        gd_value = cudf.from_pandas(value)
+    else:
+        gd_value = value
+
+    pd_to_replace = to_replace
+    if isinstance(to_replace, pd.Series):
+        gd_to_replace = cudf.from_pandas(to_replace)
+    else:
+        gd_to_replace = to_replace
+
+    can_warn = (
+        isinstance(gdf["a"].dtype, cudf.CategoricalDtype)
+        and isinstance(to_replace, str)
+        and to_replace == "two"
+        and isinstance(value, str)
+        and value == "three"
+    )
+    with expect_warning_if(can_warn):
+        if pd_value is None:
+            expected = pdf.replace(to_replace=pd_to_replace)
+        else:
+            expected = pdf.replace(to_replace=pd_to_replace, value=pd_value)
+    with expect_warning_if(can_warn):
+        actual = gdf.replace(to_replace=gd_to_replace, value=gd_value)
+
+    expected_sorted = expected.sort_values(by=list(expected.columns), axis=0)
+    actual_sorted = actual.sort_values(by=list(actual.columns), axis=0)
+
+    assert_eq(expected_sorted, actual_sorted)
+
+
+def test_dataframe_replace_with_nulls():
+    # numerical
+    pdf1 = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0, 1, 2, 3]})
+    gdf1 = cudf.from_pandas(pdf1)
+    pdf2 = pdf1.replace(0, 4)
+    gdf2 = gdf1.replace(0, None).fillna(4)
+    assert_eq(gdf2, pdf2)
+
+    # list input
+    pdf6 = pdf1.replace([0, 1], [4, 5])
+    gdf6 = gdf1.replace([0, 1], [4, None]).fillna(5)
+    assert_eq(gdf6, pdf6)
+
+    pdf7 = pdf1.replace([0, 1], 4)
+    gdf7 = gdf1.replace([0, 1], None).fillna(4)
+    assert_eq(gdf7, pdf7)
+
+    # dict input:
+    pdf8 = pdf1.replace({"a": 0, "b": 0}, {"a": 4, "b": 5})
+    gdf8 = gdf1.replace({"a": 0, "b": 0}, {"a": None, "b": 5}).fillna(4)
+    assert_eq(gdf8, pdf8)
+
+    gdf1 = cudf.DataFrame({"a": [0, 1, 2, 3], "b": [0, 1, 2, None]})
+    gdf9 = gdf1.replace([0, 1], [4, 5]).fillna(3)
+    assert_eq(gdf9, pdf6)
+
+
+def test_replace_df_error():
+    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_exceptions_equal(
+        lfunc=pdf.replace,
+        rfunc=gdf.replace,
+        lfunc_args_and_kwargs=([], {"to_replace": -1, "value": []}),
+        rfunc_args_and_kwargs=([], {"to_replace": -1, "value": []}),
+    )
+
+
+def test_replace_multiple_rows(datadir):
+    path = datadir / "parquet" / "replace_multiple_rows.parquet"
+    pdf = pd.read_parquet(path)
+    gdf = cudf.read_parquet(path)
+
+    pdf.replace([np.inf, -np.inf], np.nan, inplace=True)
+    gdf.replace([np.inf, -np.inf], np.nan, inplace=True)
+
+    assert_eq(pdf, gdf, check_dtype=False)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_where.py b/python/cudf/cudf/tests/dataframe/methods/test_where.py
new file mode 100644
index 00000000000..f7af9945272
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_where.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("fill_value", [[888, 999]])
+def test_dataframe_with_nulls_where_with_scalars(fill_value):
+    pdf = pd.DataFrame(
+        {
+            "A": [-1, 2, -3, None, 5, 6, -7, 0],
+            "B": [4, -2, 3, None, 7, 6, 8, 0],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.where(pdf % 3 == 0, fill_value)
+    got = gdf.where(gdf % 3 == 0, fill_value)
+
+    assert_eq(expect, got)
+
+
+def test_dataframe_with_different_types():
+    # Testing for int and float
+    pdf = pd.DataFrame(
+        {"A": [111, 22, 31, 410, 56], "B": [-10.12, 121.2, 45.7, 98.4, 87.6]}
+    )
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.where(pdf > 50, -pdf)
+    got = gdf.where(gdf > 50, -gdf)
+
+    assert_eq(expect, got)
+
+    # Testing for string
+    pdf = pd.DataFrame({"A": ["a", "bc", "cde", "fghi"]})
+    gdf = cudf.from_pandas(pdf)
+    pdf_mask = pd.DataFrame({"A": [True, False, True, False]})
+    gdf_mask = cudf.from_pandas(pdf_mask)
+    expect = pdf.where(pdf_mask, ["cudf"])
+    got = gdf.where(gdf_mask, ["cudf"])
+
+    assert_eq(expect, got)
+
+    # Testing for categoriacal
+    pdf = pd.DataFrame({"A": ["a", "b", "b", "c"]})
+    pdf["A"] = pdf["A"].astype("category")
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.where(pdf_mask, "c")
+    got = gdf.where(gdf_mask, ["c"])
+
+    assert_eq(expect, got)
+
+
+def test_dataframe_where_with_different_options():
+    pdf = pd.DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
+    gdf = cudf.from_pandas(pdf)
+
+    # numpy array
+    boolean_mask = np.array([[False, True], [True, False], [False, True]])
+
+    expect = pdf.where(boolean_mask, -pdf)
+    got = gdf.where(boolean_mask, -gdf)
+
+    assert_eq(expect, got)
+
+    # with single scalar
+    expect = pdf.where(boolean_mask, 8)
+    got = gdf.where(boolean_mask, 8)
+
+    assert_eq(expect, got)
+
+    # with multi scalar
+    expect = pdf.where(boolean_mask, [8, 9])
+    got = gdf.where(boolean_mask, [8, 9])
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_clip.py b/python/cudf/cudf/tests/series/methods/test_clip.py
new file mode 100644
index 00000000000..0a3fc787ef1
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_clip.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    ("data", "lower", "upper"),
+    [
+        ([1, 2, 3, 4, 5], 2, 4),
+        ([1, 2, 3, 4, 5], 2, None),
+        ([1, 2, 3, 4, 5], None, 4),
+        ([1, 2, 3, 4, 5], None, None),
+        ([1, 2, 3, 4, 5], 4, 2),
+        ([1.0, 2.0, 3.0, 4.0, 5.0], 4, 2),
+        (pd.Series([1, 2, 3, 4, 5], dtype="int32"), 4, 2),
+        (["a", "b", "c", "d", "e"], "b", "d"),
+        (["a", "b", "c", "d", "e"], "b", None),
+        (["a", "b", "c", "d", "e"], None, "d"),
+        (["a", "b", "c", "d", "e"], "d", "b"),
+    ],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_series_clip(data, lower, upper, inplace):
+    psr = pd.Series(data)
+    gsr = cudf.from_pandas(psr)
+
+    expect = psr.clip(lower=lower, upper=upper)
+    got = gsr.clip(lower=lower, upper=upper, inplace=inplace)
+
+    if inplace is True:
+        assert_eq(expect, gsr)
+    else:
+        assert_eq(expect, got)
+
+
+def test_series_exceptions_for_clip():
+    with pytest.raises(ValueError):
+        cudf.Series([1, 2, 3, 4]).clip([1, 2], [2, 3])
+
+    with pytest.raises(NotImplementedError):
+        cudf.Series([1, 2, 3, 4]).clip(1, 2, axis=0)
diff --git a/python/cudf/cudf/tests/series/methods/test_fillna.py b/python/cudf/cudf/tests/series/methods/test_fillna.py
index e64fb209519..094b27c4fff 100644
--- a/python/cudf/cudf/tests/series/methods/test_fillna.py
+++ b/python/cudf/cudf/tests/series/methods/test_fillna.py
@@ -1,10 +1,19 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
+import decimal
 
 import numpy as np
+import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.mark.parametrize(
@@ -16,7 +25,6 @@
         [np.nan, 1, 10, 393.32, np.nan],
     ],
 )
-@pytest.mark.parametrize("nan_as_null", [True, False])
 @pytest.mark.parametrize("fill_value", [1.2, 332, np.nan])
 def test_fillna_with_nan(data, nan_as_null, fill_value):
     gs = cudf.Series(data, dtype="float64", nan_as_null=nan_as_null)
@@ -79,3 +87,489 @@ def test_timedelta_fillna(data, timedelta_types_as_str, fill_value):
     actual = actual.dropna()
 
     assert_eq(expected, actual)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, None, None, 2, 3, 4],
+        [None, None, 1, 2, None, 3, 4],
+        [1, 2, None, 3, 4, None, None],
+        [0] + [None] * 14,
+        [None] * 14 + [0],
+    ],
+)
+@pytest.mark.parametrize("container", [pd.Series, pd.DataFrame])
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
+def test_fillna_method_numerical(
+    data, container, numeric_types_as_str, method, inplace
+):
+    if container == pd.DataFrame:
+        data = {"a": data, "b": data, "c": data}
+
+    pdata = container(data)
+
+    data_dtype = numeric_types_as_str
+    if np.dtype(numeric_types_as_str).kind != "f":
+        data_dtype = cudf.utils.dtypes.np_dtypes_to_pandas_dtypes[
+            np.dtype(numeric_types_as_str)
+        ]
+    pdata = pdata.astype(data_dtype)
+
+    # Explicitly using nans_as_nulls=True
+    gdata = cudf.from_pandas(pdata, nan_as_null=True)
+
+    with pytest.warns(FutureWarning):
+        expected = pdata.fillna(method=method, inplace=inplace)
+    with pytest.warns(FutureWarning):
+        actual = gdata.fillna(method=method, inplace=inplace)
+
+    if inplace:
+        expected = pdata
+        actual = gdata
+
+    assert_eq(expected, actual, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "gsr_data, dtype",
+    [
+        (
+            ["2.34", "5.2", "7.47", None, "92.29", None],
+            cudf.Decimal64Dtype(7, 2),
+        ),
+        (
+            ["-74.56", None, "-23.73", "34.55", "2.89", None],
+            cudf.Decimal32Dtype(7, 2),
+        ),
+        (
+            ["85.955", np.nan, "-3.243", np.nan, "29.492", np.nan],
+            cudf.Decimal64Dtype(8, 3),
+        ),
+        (
+            ["2.964", None, "57.432", "-989.330", None, "56.444"],
+            cudf.Decimal64Dtype(8, 3),
+        ),
+        (
+            [np.nan, "55.2498", np.nan, "-5.2965", "-28.9423", np.nan],
+            cudf.Decimal64Dtype(10, 4),
+        ),
+        (
+            ["2.964", None, "54347.432", "-989.330", None, "56.444"],
+            cudf.Decimal128Dtype(20, 7),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        lambda: 42,
+        lambda: -123,
+        lambda: decimal.Decimal("8.2"),
+        lambda: decimal.Decimal("-12.87"),
+        lambda: cudf.Series(
+            [None, -854, 9533, -274, -845, 7924], dtype="int32"
+        ),
+        lambda: cudf.Series(
+            ["-53.5", "13.4", "-64.3", None, "42.42", None]
+        ).astype(cudf.Decimal64Dtype(7, 2)),
+        lambda: cudf.Series(
+            ["57.45", np.nan, np.nan, "686.49", "-55.5", "73.24"],
+        ).astype(cudf.Decimal64Dtype(7, 2)),
+    ],
+)
+def test_fillna_decimal(gsr_data, dtype, fill_value, inplace):
+    gsr = cudf.Series(gsr_data).astype(dtype)
+    psr = gsr.to_pandas()
+    fill_value = fill_value()
+    if isinstance(fill_value, cudf.Series):
+        p_fill_value = fill_value.to_pandas()
+    else:
+        p_fill_value = fill_value
+
+    expected = psr.fillna(p_fill_value, inplace=inplace)
+    got = gsr.fillna(fill_value, inplace=inplace)
+
+    assert_eq(expected, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "psr",
+    [
+        pd.Series(["a", "b", "a", None, "c", None], dtype="category"),
+        pd.Series(
+            ["a", "b", "a", None, "c", None],
+            dtype="category",
+            index=["q", "r", "z", "a", "b", "c"],
+        ),
+        pd.Series(
+            ["a", "b", "a", None, "c", None],
+            dtype="category",
+            index=["x", "t", "p", "q", "r", "z"],
+        ),
+        pd.Series(["a", "b", "a", np.nan, "c", np.nan], dtype="category"),
+        pd.Series(
+            [None, None, None, None, None, None, "a", "b", "c"],
+            dtype="category",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        "c",
+        pd.Series(["c", "c", "c", "c", "c", "a"], dtype="category"),
+        pd.Series(
+            ["a", "b", "a", None, "c", None],
+            dtype="category",
+            index=["x", "t", "p", "q", "r", "z"],
+        ),
+        pd.Series(
+            ["a", "b", "a", None, "c", None],
+            dtype="category",
+            index=["q", "r", "z", "a", "b", "c"],
+        ),
+        pd.Series(["a", "b", "a", None, "c", None], dtype="category"),
+        pd.Series(["a", "b", "a", np.nan, "c", np.nan], dtype="category"),
+    ],
+)
+def test_fillna_categorical(psr, fill_value, inplace):
+    if inplace:
+        psr = psr.copy(deep=True)
+    gsr = cudf.from_pandas(psr)
+
+    if isinstance(fill_value, pd.Series):
+        fill_value_cudf = cudf.from_pandas(fill_value)
+    else:
+        fill_value_cudf = fill_value
+
+    if (
+        isinstance(fill_value_cudf, cudf.Series)
+        and gsr.dtype != fill_value_cudf.dtype
+    ):
+        assert_exceptions_equal(
+            lfunc=psr.fillna,
+            rfunc=gsr.fillna,
+            lfunc_args_and_kwargs=([fill_value], {"inplace": inplace}),
+            rfunc_args_and_kwargs=([fill_value_cudf], {"inplace": inplace}),
+        )
+    else:
+        expected = psr.fillna(fill_value, inplace=inplace)
+        got = gsr.fillna(fill_value_cudf, inplace=inplace)
+
+        if inplace:
+            expected = psr
+            got = gsr
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "psr",
+    [
+        pd.Series(
+            pd.date_range(
+                "2010-01-01",
+                "2020-01-10",
+                freq="1YE" if PANDAS_GE_220 else "1y",
+            )
+        ),
+        pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"),
+        pd.Series(
+            [
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                "2011-10-10",
+                "2010-01-01",
+                "2010-01-02",
+                "2010-01-04",
+                "2010-11-01",
+            ],
+            dtype="datetime64[ns]",
+        ),
+        pd.Series(
+            [
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                "2011-10-10",
+                "2010-01-01",
+                "2010-01-02",
+                "2010-01-04",
+                "2010-11-01",
+            ],
+            dtype="datetime64[ns]",
+            index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        pd.Timestamp("2010-01-02"),
+        pd.Series(
+            pd.date_range(
+                "2010-01-01",
+                "2020-01-10",
+                freq="1YE" if PANDAS_GE_220 else "1y",
+            )
+        )
+        + pd.Timedelta("1d"),
+        pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"),
+        pd.Series(
+            [
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                "2011-10-10",
+                "2010-01-01",
+                "2010-01-02",
+                "2010-01-04",
+                "2010-11-01",
+            ],
+            dtype="datetime64[ns]",
+        ),
+        pd.Series(
+            [
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                "2011-10-10",
+                "2010-01-01",
+                "2010-01-02",
+                "2010-01-04",
+                "2010-11-01",
+            ],
+            dtype="datetime64[ns]",
+            index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
+        ),
+    ],
+)
+def test_fillna_datetime(psr, fill_value, inplace):
+    if inplace:
+        psr = psr.copy(deep=True)
+    gsr = cudf.from_pandas(psr)
+
+    if isinstance(fill_value, pd.Series):
+        fill_value_cudf = cudf.from_pandas(fill_value)
+    else:
+        fill_value_cudf = fill_value
+
+    expected = psr.fillna(fill_value, inplace=inplace)
+    got = gsr.fillna(fill_value_cudf, inplace=inplace)
+
+    if inplace:
+        got = gsr
+        expected = psr
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        # Categorical
+        pd.Categorical([1, 2, None, None, 3, 4]),
+        pd.Categorical([None, None, 1, None, 3, 4]),
+        pd.Categorical([1, 2, None, 3, 4, None, None]),
+        pd.Categorical(["1", "20", None, None, "3", "40"]),
+        pd.Categorical([None, None, "10", None, "30", "4"]),
+        pd.Categorical(["1", "20", None, "30", "4", None, None]),
+        # Datetime
+        np.array(
+            [
+                "2020-01-01 08:00:00",
+                "2020-01-01 09:00:00",
+                None,
+                "2020-01-01 10:00:00",
+                None,
+                "2020-01-01 10:00:00",
+            ],
+            dtype="datetime64[ns]",
+        ),
+        np.array(
+            [
+                None,
+                None,
+                "2020-01-01 09:00:00",
+                "2020-01-01 10:00:00",
+                None,
+                "2020-01-01 10:00:00",
+            ],
+            dtype="datetime64[ns]",
+        ),
+        np.array(
+            [
+                "2020-01-01 09:00:00",
+                None,
+                None,
+                "2020-01-01 10:00:00",
+                None,
+                None,
+            ],
+            dtype="datetime64[ns]",
+        ),
+        # Timedelta
+        np.array(
+            [10, 100, 1000, None, None, 10, 100, 1000], dtype="datetime64[ns]"
+        ),
+        np.array(
+            [None, None, 10, None, 1000, 100, 10], dtype="datetime64[ns]"
+        ),
+        np.array(
+            [10, 100, None, None, 1000, None, None], dtype="datetime64[ns]"
+        ),
+        # String
+        np.array(
+            ["10", "100", "1000", None, None, "10", "100", "1000"],
+            dtype="object",
+        ),
+        np.array(
+            [None, None, "1000", None, "10", "100", "10"], dtype="object"
+        ),
+        np.array(
+            ["10", "100", None, None, "1000", None, None], dtype="object"
+        ),
+    ],
+)
+@pytest.mark.parametrize("container", [pd.Series, pd.DataFrame])
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
+def test_fillna_method_fixed_width_non_num(data, container, method, inplace):
+    if container == pd.DataFrame:
+        data = {"a": data, "b": data, "c": data}
+
+    pdata = container(data)
+
+    # Explicitly using nans_as_nulls=True
+    gdata = cudf.from_pandas(pdata, nan_as_null=True)
+
+    with pytest.warns(FutureWarning):
+        expected = pdata.fillna(method=method, inplace=inplace)
+    with pytest.warns(FutureWarning):
+        actual = gdata.fillna(method=method, inplace=inplace)
+
+    if inplace:
+        expected = pdata
+        actual = gdata
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "psr",
+    [
+        pd.Series(["a", "b", "c", "d"]),
+        pd.Series([None] * 4, dtype="object"),
+        pd.Series(["z", None, "z", None]),
+        pd.Series(["x", "y", None, None, None]),
+        pd.Series([None, None, None, "i", "P"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        "a",
+        pd.Series(["a", "b", "c", "d"]),
+        pd.Series(["z", None, "z", None]),
+        pd.Series([None] * 4, dtype="object"),
+        pd.Series(["x", "y", None, None, None]),
+        pd.Series([None, None, None, "i", "P"]),
+    ],
+)
+def test_fillna_string(psr, fill_value, inplace):
+    if inplace:
+        psr = psr.copy(deep=True)
+    gsr = cudf.from_pandas(psr)
+
+    if isinstance(fill_value, pd.Series):
+        fill_value_cudf = cudf.from_pandas(fill_value)
+    else:
+        fill_value_cudf = fill_value
+
+    expected = psr.fillna(fill_value, inplace=inplace)
+    got = gsr.fillna(fill_value_cudf, inplace=inplace)
+
+    if inplace:
+        expected = psr
+        got = gsr
+
+    assert_eq(expected, got)
+
+
+def test_series_fillna_invalid_dtype(integer_types_as_str):
+    gdf = cudf.Series([1, 2, None, 3], dtype=integer_types_as_str)
+    fill_value = 2.5
+    msg = (
+        f"Cannot safely cast non-equivalent"
+        f" {type(fill_value).__name__} to {gdf.dtype.type.__name__}"
+    )
+    with pytest.raises(TypeError, match=msg):
+        gdf.fillna(fill_value)
+
+
+@pytest.mark.parametrize(
+    "data", [[1, 2.0, 3, 4, None, 1, None, 10, None], ["a", "b", "c"]]
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        None,
+        [1, 2, 3],
+        ["a", "b", "z"],
+        ["a", "b", "c", "d", "e", "f", "g", "l", "m"],
+    ],
+)
+@pytest.mark.parametrize("value", [[1, 2, 3, 4, None, 1, None, 10, None]])
+def test_series_fillna(data, index, value):
+    psr = pd.Series(
+        data,
+        index=index if index is not None and len(index) == len(data) else None,
+    )
+    gsr = cudf.Series(
+        data,
+        index=index if index is not None and len(index) == len(data) else None,
+    )
+
+    expect = psr.fillna(pd.Series(value))
+    got = gsr.fillna(cudf.Series(value))
+    assert_eq(expect, got)
+
+
+def test_series_fillna_error():
+    psr = pd.Series([1, 2, None, 3, None])
+    gsr = cudf.from_pandas(psr)
+
+    assert_exceptions_equal(
+        psr.fillna,
+        gsr.fillna,
+        ([pd.DataFrame({"a": [1, 2, 3]})],),
+        ([cudf.DataFrame({"a": [1, 2, 3]})],),
+    )
+
+
+def test_fillna_nan_and_null():
+    ser = cudf.Series(pa.array([float("nan"), None, 1.1]), nan_as_null=False)
+    result = ser.fillna(2.2)
+    expected = cudf.Series([2.2, 2.2, 1.1])
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_replace.py b/python/cudf/cudf/tests/series/methods/test_replace.py
new file mode 100644
index 00000000000..7bdad916cab
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_replace.py
@@ -0,0 +1,527 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_GT_214,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+    expect_warning_if,
+)
+
+
+@pytest.mark.parametrize(
+    "gsr_data, dtype",
+    [
+        [[5, 1, 2, 3, None, 243, None, 4], None],
+        [["one", "two", "three", None, "one"], "category"],
+        [[*list(range(400)), None], None],
+    ],
+)
+@pytest.mark.parametrize(
+    "to_replace,value",
+    [
+        (0, 5),
+        ("one", "two"),
+        ("one", "five"),
+        ("abc", "hello"),
+        ([0, 1], [5, 6]),
+        ([22, 323, 27, 0], -1),
+        ([1, 2, 3], cudf.Series([10, 11, 12])),
+        (cudf.Series([1, 2, 3]), None),
+        ({1: 10, 2: 22}, None),
+        (np.inf, 4),
+    ],
+)
+def test_series_replace_all(gsr_data, dtype, to_replace, value):
+    gsr = cudf.Series(gsr_data, dtype=dtype)
+    psr = gsr.to_pandas()
+
+    gd_to_replace = to_replace
+    if isinstance(to_replace, cudf.Series):
+        pd_to_replace = to_replace.to_pandas()
+    else:
+        pd_to_replace = to_replace
+
+    gd_value = value
+    if isinstance(value, cudf.Series):
+        pd_value = value.to_pandas()
+    else:
+        pd_value = value
+
+    expect_warn = (
+        isinstance(gsr.dtype, cudf.CategoricalDtype)
+        and isinstance(gd_to_replace, str)
+        and gd_to_replace == "one"
+    )
+    with expect_warning_if(expect_warn):
+        actual = gsr.replace(to_replace=gd_to_replace, value=gd_value)
+    with expect_warning_if(expect_warn and PANDAS_GE_220):
+        if pd_value is None:
+            # TODO: Remove this workaround once cudf
+            # introduces `no_default` values
+            expected = psr.replace(to_replace=pd_to_replace)
+        else:
+            expected = psr.replace(to_replace=pd_to_replace, value=pd_value)
+
+    assert_eq(
+        expected.sort_values().reset_index(drop=True),
+        actual.sort_values().reset_index(drop=True),
+    )
+
+
+def test_series_replace():
+    a1 = np.array([0, 1, 2, 3, 4])
+
+    # Numerical
+    a2 = np.array([5, 1, 2, 3, 4])
+    sr1 = cudf.Series(a1)
+    sr2 = sr1.replace(0, 5)
+    assert_eq(a2, sr2.to_numpy())
+
+    # Categorical
+    psr3 = pd.Series(["one", "two", "three"], dtype="category")
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
+        psr4 = psr3.replace("one", "two")
+    sr3 = cudf.from_pandas(psr3)
+    with pytest.warns(FutureWarning):
+        sr4 = sr3.replace("one", "two")
+    assert_eq(
+        psr4.sort_values().reset_index(drop=True),
+        sr4.sort_values().reset_index(drop=True),
+    )
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
+        psr5 = psr3.replace("one", "five")
+    with pytest.warns(FutureWarning):
+        sr5 = sr3.replace("one", "five")
+
+    assert_eq(psr5, sr5)
+
+    # List input
+    a6 = np.array([5, 6, 2, 3, 4])
+    sr6 = sr1.replace([0, 1], [5, 6])
+    assert_eq(a6, sr6.to_numpy())
+
+    assert_eq(
+        sr1.replace([0, 1], [5.5, 6.5]),
+        sr1.to_pandas().replace([0, 1], [5.5, 6.5]),
+    )
+
+    # Series input
+    a8 = np.array([5, 5, 5, 3, 4])
+    sr8 = sr1.replace(sr1[:3].to_numpy(), 5)
+    assert_eq(a8, sr8.to_numpy())
+
+    # large input containing null
+    sr9 = cudf.Series([*list(range(400)), None])
+    sr10 = sr9.replace([22, 323, 27, 0], None)
+    assert sr10.null_count == 5
+    assert len(sr10.dropna().to_numpy()) == (401 - 5)
+
+    sr11 = sr9.replace([22, 323, 27, 0], -1)
+    assert sr11.null_count == 1
+    assert len(sr11.dropna().to_numpy()) == (401 - 1)
+
+    # large input not containing nulls
+    sr9 = sr9.fillna(-11)
+    sr12 = sr9.replace([22, 323, 27, 0], None)
+    assert sr12.null_count == 4
+    assert len(sr12.dropna().to_numpy()) == (401 - 4)
+
+    sr13 = sr9.replace([22, 323, 27, 0], -1)
+    assert sr13.null_count == 0
+    assert len(sr13.to_numpy()) == 401
+
+
+def test_series_replace_with_nulls():
+    a1 = np.array([0, 1, 2, 3, 4])
+
+    # Numerical
+    a2 = np.array([-10, 1, 2, 3, 4])
+    sr1 = cudf.Series(a1)
+    sr2 = sr1.replace(0, None).fillna(-10)
+    assert_eq(a2, sr2.to_numpy())
+
+    # List input
+    a6 = np.array([-10, 6, 2, 3, 4])
+    sr6 = sr1.replace([0, 1], [None, 6]).fillna(-10)
+    assert_eq(a6, sr6.to_numpy())
+
+    sr1 = cudf.Series([0, 1, 2, 3, 4, None])
+    assert_eq(
+        sr1.replace([0, 1], [5.5, 6.5]).fillna(-10),
+        sr1.to_pandas().replace([0, 1], [5.5, 6.5]).fillna(-10),
+    )
+
+    # Series input
+    a8 = np.array([-10, -10, -10, 3, 4, -10])
+    sr8 = sr1.replace(cudf.Series([-10] * 3, index=sr1[:3]), None).fillna(-10)
+    assert_eq(a8, sr8.to_numpy())
+
+    a9 = np.array([-10, 6, 2, 3, 4, -10])
+    sr9 = sr1.replace([0, 1], [None, 6]).fillna(-10)
+    assert_eq(a9, sr9.to_numpy())
+
+
+@pytest.mark.parametrize(
+    "psr",
+    [
+        pd.Series([0, 1, None, 2, None], dtype=pd.Int8Dtype()),
+        pd.Series([0, 1, np.nan, 2, np.nan]),
+    ],
+)
+@pytest.mark.parametrize("fill_value", [10, pd.Series([10, 20, 30, 40, 50])])
+def test_series_fillna_numerical(
+    psr, numeric_types_as_str, fill_value, inplace
+):
+    if inplace:
+        psr = psr.copy(deep=True)
+    # TODO: These tests should use Pandas' nullable int type
+    # when we support a recent enough version of Pandas
+    # https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
+    if np.dtype(numeric_types_as_str).kind != "f" and psr.dtype.kind == "i":
+        psr = psr.astype(
+            cudf.utils.dtypes.np_dtypes_to_pandas_dtypes[
+                np.dtype(numeric_types_as_str)
+            ]
+        )
+
+    gsr = cudf.from_pandas(psr)
+
+    if isinstance(fill_value, pd.Series):
+        fill_value_cudf = cudf.from_pandas(fill_value)
+    else:
+        fill_value_cudf = fill_value
+
+    expected = psr.fillna(fill_value, inplace=inplace)
+    actual = gsr.fillna(fill_value_cudf, inplace=inplace)
+
+    if inplace:
+        expected = psr
+        actual = gsr
+
+    # TODO: Remove check_dtype when we have support
+    # to compare with pandas nullable dtypes
+    assert_eq(expected, actual, check_dtype=False)
+
+
+def test_series_multiple_times_with_nulls():
+    sr = cudf.Series([1, 2, 3, None])
+    expected = cudf.Series([None, None, None, None], dtype=np.int64)
+
+    for i in range(3):
+        got = sr.replace([1, 2, 3], None)
+        assert_eq(expected, got)
+        # BUG: #2695
+        # The following series will acquire a chunk of memory and update with
+        # values, but these values may still linger even after the memory
+        # gets released. This memory space might get used for replace in
+        # subsequent calls and the memory used for mask may have junk values.
+        # So, if it is not updated properly, the result would be wrong.
+        # So, this will help verify that scenario.
+        cudf.Series([1, 1, 1, None])
+
+
+@pytest.mark.parametrize(
+    "replacement", [128, 128.0, 128.5, 32769, 32769.0, 32769.5]
+)
+def test_numeric_series_replace_dtype(
+    request, numeric_types_as_str, replacement
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=PANDAS_GT_214
+            and (
+                (
+                    numeric_types_as_str == "int8"
+                    and replacement in {128, 128.0, 32769, 32769.0}
+                )
+                or (
+                    numeric_types_as_str == "int16"
+                    and replacement in {32769, 32769.0}
+                )
+            ),
+            reason="Pandas throws an AssertionError for these "
+            "cases and asks us to log a bug, they are trying to "
+            "avoid a RecursionError which cudf will not run into",
+        )
+    )
+    psr = pd.Series([0, 1, 2, 3, 4, 5], dtype=numeric_types_as_str)
+    sr = cudf.from_pandas(psr)
+
+    expect = psr.replace(1, replacement)
+    got = sr.replace(1, replacement)
+
+    assert_eq(expect, got)
+
+    # to_replace is a list, replacement is a scalar
+    expect = psr.replace([2, 3], replacement)
+    got = sr.replace([2, 3], replacement)
+
+    assert_eq(expect, got)
+
+    # If to_replace is a scalar and replacement is a list
+    with pytest.raises(TypeError):
+        sr.replace(0, [replacement, 2])
+
+    # Both list of unequal length
+    with pytest.raises(ValueError):
+        sr.replace([0, 1], [replacement])
+
+    # Both lists of equal length
+    expect = psr.replace([2, 3], [replacement, replacement])
+    got = sr.replace([2, 3], [replacement, replacement])
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "pframe, replace_args",
+    [
+        (
+            pd.Series([5, 1, 2, 3, 4]),
+            {"to_replace": 5, "value": 0, "inplace": True},
+        ),
+        (
+            pd.Series([5, 1, 2, 3, 4]),
+            {"to_replace": {5: 0, 3: -5}, "inplace": True},
+        ),
+        (pd.Series([5, 1, 2, 3, 4]), {}),
+        pytest.param(
+            pd.Series(["one", "two", "three"], dtype="category"),
+            {"to_replace": "one", "value": "two", "inplace": True},
+            marks=pytest.mark.xfail(
+                condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
+                reason="https://github.com/pandas-dev/pandas/issues/43232"
+                "https://github.com/pandas-dev/pandas/issues/53358",
+            ),
+        ),
+        (
+            pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}),
+            {"to_replace": 5, "value": 0, "inplace": True},
+        ),
+        (
+            pd.Series([1, 2, 3, 45]),
+            {
+                "to_replace": np.array([]).astype(int),
+                "value": 77,
+                "inplace": True,
+            },
+        ),
+        (
+            pd.Series([1, 2, 3, 45]),
+            {
+                "to_replace": np.array([]).astype(int),
+                "value": 77,
+                "inplace": False,
+            },
+        ),
+        (
+            pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}),
+            {"to_replace": {"a": 2}, "value": {"a": -33}, "inplace": True},
+        ),
+        (
+            pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}),
+            {
+                "to_replace": {"a": [2, 5]},
+                "value": {"a": [9, 10]},
+                "inplace": True,
+            },
+        ),
+        (
+            pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}),
+            {"to_replace": [], "value": [], "inplace": True},
+        ),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning not given on older versions of pandas",
+)
+def test_replace_inplace(pframe, replace_args):
+    gpu_frame = cudf.from_pandas(pframe)
+    pandas_frame = pframe.copy()
+
+    gpu_copy = gpu_frame.copy()
+    cpu_copy = pandas_frame.copy()
+
+    assert_eq(gpu_frame, pandas_frame)
+    assert_eq(gpu_copy, cpu_copy)
+    with expect_warning_if(len(replace_args) == 0):
+        gpu_frame.replace(**replace_args)
+    with expect_warning_if(len(replace_args) == 0):
+        pandas_frame.replace(**replace_args)
+    assert_eq(gpu_frame, pandas_frame)
+    assert_eq(gpu_copy, cpu_copy)
+
+
+def test_series_replace_errors():
+    gsr = cudf.Series([1, 2, None, 3, None])
+    psr = gsr.to_pandas()
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "to_replace and value should be of same types,"
+            "got to_replace dtype: int64 and "
+            "value dtype: object"
+        ),
+    ):
+        gsr.replace(1, "a")
+
+    gsr = cudf.Series(["a", "b", "c"])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "to_replace and value should be of same types,"
+            "got to_replace dtype: int64 and "
+            "value dtype: object"
+        ),
+    ):
+        gsr.replace([1, 2], ["a", "b"])
+
+    assert_exceptions_equal(
+        psr.replace,
+        gsr.replace,
+        ([{"a": 1}, 1],),
+        ([{"a": 1}, 1],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=psr.replace,
+        rfunc=gsr.replace,
+        lfunc_args_and_kwargs=([[1, 2], [1]],),
+        rfunc_args_and_kwargs=([[1, 2], [1]],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=psr.replace,
+        rfunc=gsr.replace,
+        lfunc_args_and_kwargs=([object(), [1]],),
+        rfunc_args_and_kwargs=([object(), [1]],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=psr.replace,
+        rfunc=gsr.replace,
+        lfunc_args_and_kwargs=([{"a": 1}, object()],),
+        rfunc_args_and_kwargs=([{"a": 1}, object()],),
+    )
+
+
+@pytest.mark.parametrize(
+    "gsr,old,new,expected",
+    [
+        (
+            lambda: cudf.Series(["a", "b", "c", None]),
+            None,
+            "a",
+            lambda: cudf.Series(["a", "b", "c", "a"]),
+        ),
+        (
+            lambda: cudf.Series(["a", "b", "c", None]),
+            [None, "a", "a"],
+            ["c", "b", "d"],
+            lambda: cudf.Series(["d", "b", "c", "c"]),
+        ),
+        (
+            lambda: cudf.Series(["a", "b", "c", None]),
+            [None, "a"],
+            ["b", None],
+            lambda: cudf.Series([None, "b", "c", "b"]),
+        ),
+        (
+            lambda: cudf.Series(["a", "b", "c", None]),
+            [None, None],
+            [None, None],
+            lambda: cudf.Series(["a", "b", "c", None]),
+        ),
+        (
+            lambda: cudf.Series([1, 2, None, 3]),
+            None,
+            10,
+            lambda: cudf.Series([1, 2, 10, 3]),
+        ),
+        (
+            lambda: cudf.Series([1, 2, None, 3]),
+            [None, 1, 1],
+            [3, 2, 4],
+            lambda: cudf.Series([4, 2, 3, 3]),
+        ),
+        (
+            lambda: cudf.Series([1, 2, None, 3]),
+            [None, 1],
+            [2, None],
+            lambda: cudf.Series([None, 2, 2, 3]),
+        ),
+        (
+            lambda: cudf.Series(["a", "q", "t", None], dtype="category"),
+            None,
+            "z",
+            lambda: cudf.Series(["a", "q", "t", "z"], dtype="category"),
+        ),
+        (
+            lambda: cudf.Series(["a", "q", "t", None], dtype="category"),
+            [None, "a", "q"],
+            ["z", None, None],
+            lambda: cudf.Series([None, None, "t", "z"], dtype="category"),
+        ),
+        (
+            lambda: cudf.Series(["a", None, "t", None], dtype="category"),
+            [None, "t"],
+            ["p", None],
+            lambda: cudf.Series(["a", "p", None, "p"], dtype="category"),
+        ),
+    ],
+)
+def test_replace_nulls(gsr, old, new, expected):
+    gsr = gsr()
+    with expect_warning_if(isinstance(gsr.dtype, cudf.CategoricalDtype)):
+        actual = gsr.replace(old, new)
+    assert_eq(
+        expected().sort_values().reset_index(drop=True),
+        actual.sort_values().reset_index(drop=True),
+    )
+
+
+def test_replace_with_index_objects():
+    result = cudf.Series([1, 2]).replace(cudf.Index([1]), cudf.Index([2]))
+    expected = pd.Series([1, 2]).replace(pd.Index([1]), pd.Index([2]))
+    assert_eq(result, expected)
+
+
+def test_replace_datetime_series():
+    pd_series = pd.Series(pd.date_range("20210101", periods=5))
+    pd_result = pd_series.replace(
+        pd.Timestamp("2021-01-02"), pd.Timestamp("2021-01-10")
+    )
+
+    cudf_series = cudf.Series(pd.date_range("20210101", periods=5))
+    cudf_result = cudf_series.replace(
+        pd.Timestamp("2021-01-02"), pd.Timestamp("2021-01-10")
+    )
+
+    assert_eq(pd_result, cudf_result)
+
+
+def test_replace_timedelta_series():
+    pd_series = pd.Series(pd.timedelta_range("1 days", periods=5))
+    pd_result = pd_series.replace(
+        pd.Timedelta("2 days"), pd.Timedelta("10 days")
+    )
+
+    cudf_series = cudf.Series(pd.timedelta_range("1 days", periods=5))
+    cudf_result = cudf_series.replace(
+        pd.Timedelta("2 days"), pd.Timedelta("10 days")
+    )
+
+    assert_eq(pd_result, cudf_result)
diff --git a/python/cudf/cudf/tests/series/methods/test_where.py b/python/cudf/cudf/tests/series/methods/test_where.py
index e0f01fd3cb8..9cb12df2f0c 100644
--- a/python/cudf/cudf/tests/series/methods/test_where.py
+++ b/python/cudf/cudf/tests/series/methods/test_where.py
@@ -1,9 +1,12 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import operator
 import re
 
+import pandas as pd
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 def test_series_where_mixed_dtypes_error():
@@ -23,3 +26,47 @@ def test_series_where_mixed_bool_dtype():
     s = cudf.Series([True, False, True])
     with pytest.raises(TypeError):
         s.where(~s, 10)
+
+
+@pytest.mark.parametrize("fill_value", [100, 100.0, 128.5])
+@pytest.mark.parametrize("op", [operator.gt, operator.eq, operator.lt])
+def test_series_where(numeric_types_as_str, fill_value, op):
+    psr = pd.Series(list(range(10)), dtype=numeric_types_as_str)
+    sr = cudf.from_pandas(psr)
+
+    try:
+        scalar_fits = sr.dtype.type(fill_value) == fill_value
+    except OverflowError:
+        scalar_fits = False
+
+    if not scalar_fits:
+        with pytest.raises(TypeError):
+            sr.where(op(sr, 0), fill_value)
+    else:
+        # Cast back to original dtype as pandas automatically upcasts
+        expect = psr.where(op(psr, 0), fill_value)
+        got = sr.where(op(sr, 0), fill_value)
+        # pandas returns 'float16' dtype, which is not supported in cudf
+        assert_eq(
+            expect,
+            got,
+            check_dtype=expect.dtype.kind != "f",
+        )
+
+
+@pytest.mark.parametrize("fill_value", [100, 100.0, 100.5])
+def test_series_with_nulls_where(fill_value):
+    psr = pd.Series([None] * 3 + list(range(5)))
+    sr = cudf.from_pandas(psr)
+
+    expect = psr.where(psr > 0, fill_value)
+    got = sr.where(sr > 0, fill_value)
+    assert_eq(expect, got)
+
+    expect = psr.where(psr < 0, fill_value)
+    got = sr.where(sr < 0, fill_value)
+    assert_eq(expect, got)
+
+    expect = psr.where(psr == 0, fill_value)
+    got = sr.where(sr == 0, fill_value)
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
deleted file mode 100644
index b1efcef5d1e..00000000000
--- a/python/cudf/cudf/tests/test_replace.py
+++ /dev/null
@@ -1,1459 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import operator
-import re
-from decimal import Decimal
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_GT_214,
-    PANDAS_VERSION,
-)
-from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    INTEGER_TYPES,
-    NUMERIC_TYPES,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
-
-
-@pytest.mark.parametrize(
-    "gsr_data, dtype",
-    [
-        [[5, 1, 2, 3, None, 243, None, 4], None],
-        [["one", "two", "three", None, "one"], "category"],
-        [[*list(range(400)), None], None],
-    ],
-)
-@pytest.mark.parametrize(
-    "to_replace,value",
-    [
-        (0, 5),
-        ("one", "two"),
-        ("one", "five"),
-        ("abc", "hello"),
-        ([0, 1], [5, 6]),
-        ([22, 323, 27, 0], -1),
-        ([1, 2, 3], cudf.Series([10, 11, 12])),
-        (cudf.Series([1, 2, 3]), None),
-        ({1: 10, 2: 22}, None),
-        (np.inf, 4),
-    ],
-)
-def test_series_replace_all(gsr_data, dtype, to_replace, value):
-    gsr = cudf.Series(gsr_data, dtype=dtype)
-    psr = gsr.to_pandas()
-
-    gd_to_replace = to_replace
-    if isinstance(to_replace, cudf.Series):
-        pd_to_replace = to_replace.to_pandas()
-    else:
-        pd_to_replace = to_replace
-
-    gd_value = value
-    if isinstance(value, cudf.Series):
-        pd_value = value.to_pandas()
-    else:
-        pd_value = value
-
-    expect_warn = (
-        isinstance(gsr.dtype, cudf.CategoricalDtype)
-        and isinstance(gd_to_replace, str)
-        and gd_to_replace == "one"
-    )
-    with expect_warning_if(expect_warn):
-        actual = gsr.replace(to_replace=gd_to_replace, value=gd_value)
-    with expect_warning_if(expect_warn and PANDAS_GE_220):
-        if pd_value is None:
-            # TODO: Remove this workaround once cudf
-            # introduces `no_default` values
-            expected = psr.replace(to_replace=pd_to_replace)
-        else:
-            expected = psr.replace(to_replace=pd_to_replace, value=pd_value)
-
-    assert_eq(
-        expected.sort_values().reset_index(drop=True),
-        actual.sort_values().reset_index(drop=True),
-    )
-
-
-def test_series_replace():
-    a1 = np.array([0, 1, 2, 3, 4])
-
-    # Numerical
-    a2 = np.array([5, 1, 2, 3, 4])
-    sr1 = cudf.Series(a1)
-    sr2 = sr1.replace(0, 5)
-    assert_eq(a2, sr2.to_numpy())
-
-    # Categorical
-    psr3 = pd.Series(["one", "two", "three"], dtype="category")
-    with expect_warning_if(PANDAS_GE_220, FutureWarning):
-        psr4 = psr3.replace("one", "two")
-    sr3 = cudf.from_pandas(psr3)
-    with pytest.warns(FutureWarning):
-        sr4 = sr3.replace("one", "two")
-    assert_eq(
-        psr4.sort_values().reset_index(drop=True),
-        sr4.sort_values().reset_index(drop=True),
-    )
-    with expect_warning_if(PANDAS_GE_220, FutureWarning):
-        psr5 = psr3.replace("one", "five")
-    with pytest.warns(FutureWarning):
-        sr5 = sr3.replace("one", "five")
-
-    assert_eq(psr5, sr5)
-
-    # List input
-    a6 = np.array([5, 6, 2, 3, 4])
-    sr6 = sr1.replace([0, 1], [5, 6])
-    assert_eq(a6, sr6.to_numpy())
-
-    assert_eq(
-        sr1.replace([0, 1], [5.5, 6.5]),
-        sr1.to_pandas().replace([0, 1], [5.5, 6.5]),
-    )
-
-    # Series input
-    a8 = np.array([5, 5, 5, 3, 4])
-    sr8 = sr1.replace(sr1[:3].to_numpy(), 5)
-    assert_eq(a8, sr8.to_numpy())
-
-    # large input containing null
-    sr9 = cudf.Series([*list(range(400)), None])
-    sr10 = sr9.replace([22, 323, 27, 0], None)
-    assert sr10.null_count == 5
-    assert len(sr10.dropna().to_numpy()) == (401 - 5)
-
-    sr11 = sr9.replace([22, 323, 27, 0], -1)
-    assert sr11.null_count == 1
-    assert len(sr11.dropna().to_numpy()) == (401 - 1)
-
-    # large input not containing nulls
-    sr9 = sr9.fillna(-11)
-    sr12 = sr9.replace([22, 323, 27, 0], None)
-    assert sr12.null_count == 4
-    assert len(sr12.dropna().to_numpy()) == (401 - 4)
-
-    sr13 = sr9.replace([22, 323, 27, 0], -1)
-    assert sr13.null_count == 0
-    assert len(sr13.to_numpy()) == 401
-
-
-def test_series_replace_with_nulls():
-    a1 = np.array([0, 1, 2, 3, 4])
-
-    # Numerical
-    a2 = np.array([-10, 1, 2, 3, 4])
-    sr1 = cudf.Series(a1)
-    sr2 = sr1.replace(0, None).fillna(-10)
-    assert_eq(a2, sr2.to_numpy())
-
-    # List input
-    a6 = np.array([-10, 6, 2, 3, 4])
-    sr6 = sr1.replace([0, 1], [None, 6]).fillna(-10)
-    assert_eq(a6, sr6.to_numpy())
-
-    sr1 = cudf.Series([0, 1, 2, 3, 4, None])
-    assert_eq(
-        sr1.replace([0, 1], [5.5, 6.5]).fillna(-10),
-        sr1.to_pandas().replace([0, 1], [5.5, 6.5]).fillna(-10),
-    )
-
-    # Series input
-    a8 = np.array([-10, -10, -10, 3, 4, -10])
-    sr8 = sr1.replace(cudf.Series([-10] * 3, index=sr1[:3]), None).fillna(-10)
-    assert_eq(a8, sr8.to_numpy())
-
-    a9 = np.array([-10, 6, 2, 3, 4, -10])
-    sr9 = sr1.replace([0, 1], [None, 6]).fillna(-10)
-    assert_eq(a9, sr9.to_numpy())
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning introduced in pandas-2.2.0",
-)
-@pytest.mark.parametrize(
-    "data, dtype",
-    [
-        (
-            {
-                "a": [0, 1, None, 2, 3],
-                "b": [3, 2, 2, 3, None],
-                "c": ["abc", "def", ".", None, None],
-            },
-            None,
-        ),
-        (
-            {
-                "a": ["one", "two", None, "three"],
-                "b": ["one", None, "two", "three"],
-            },
-            "category",
-        ),
-        (
-            {
-                "col one": [None, 10, 11, None, 1000, 500, 600],
-                "col two": ["abc", "def", "ghi", None, "pp", None, "a"],
-                "a": [0.324, 0.234, 324.342, 23.32, 9.9, None, None],
-            },
-            None,
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "to_replace,value",
-    [
-        (0, 4),
-        ([0, 1], [4, 5]),
-        ([0, 1], 4),
-        ({"a": 0, "b": 0}, {"a": 4, "b": 5}),
-        ({"a": 0}, {"a": 4}),
-        ("abc", "---"),
-        ([".", "gh"], "hi"),
-        ([".", "def"], ["_", None]),
-        ({"c": 0}, {"a": 4, "b": 5}),
-        ({"a": 2}, {"c": "a"}),
-        ("two", "three"),
-        ([1, 2], pd.Series([10, 11])),
-        (pd.Series([10, 11], index=[3, 2]), None),
-        (
-            pd.Series(["a+", "+c", "p", "---"], index=["abc", "gh", "l", "z"]),
-            None,
-        ),
-        (
-            pd.Series([10, 11], index=[3, 2]),
-            {"a": [-10, -30], "l": [-111, -222]},
-        ),
-        (pd.Series([10, 11], index=[3, 2]), 555),
-        (
-            pd.Series([10, 11], index=["a", "b"]),
-            pd.Series([555, 1111], index=["a", "b"]),
-        ),
-        ({"a": "2", "b": "3", "zzz": "hi"}, None),
-        ({"a": 2, "b": 3, "zzz": "hi"}, 324353),
-        (
-            {"a": 2, "b": 3, "zzz": "hi"},
-            pd.Series([5, 6, 10], index=["a", "b", "col one"]),
-        ),
-    ],
-)
-def test_dataframe_replace(data, dtype, to_replace, value):
-    gdf = cudf.DataFrame(data, dtype=dtype)
-    pdf = gdf.to_pandas()
-
-    pd_value = value
-    if isinstance(value, pd.Series):
-        gd_value = cudf.from_pandas(value)
-    else:
-        gd_value = value
-
-    pd_to_replace = to_replace
-    if isinstance(to_replace, pd.Series):
-        gd_to_replace = cudf.from_pandas(to_replace)
-    else:
-        gd_to_replace = to_replace
-
-    can_warn = (
-        isinstance(gdf["a"].dtype, cudf.CategoricalDtype)
-        and isinstance(to_replace, str)
-        and to_replace == "two"
-        and isinstance(value, str)
-        and value == "three"
-    )
-    with expect_warning_if(can_warn):
-        if pd_value is None:
-            expected = pdf.replace(to_replace=pd_to_replace)
-        else:
-            expected = pdf.replace(to_replace=pd_to_replace, value=pd_value)
-    with expect_warning_if(can_warn):
-        actual = gdf.replace(to_replace=gd_to_replace, value=gd_value)
-
-    expected_sorted = expected.sort_values(by=list(expected.columns), axis=0)
-    actual_sorted = actual.sort_values(by=list(actual.columns), axis=0)
-
-    assert_eq(expected_sorted, actual_sorted)
-
-
-def test_dataframe_replace_with_nulls():
-    # numerical
-    pdf1 = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0, 1, 2, 3]})
-    gdf1 = cudf.from_pandas(pdf1)
-    pdf2 = pdf1.replace(0, 4)
-    gdf2 = gdf1.replace(0, None).fillna(4)
-    assert_eq(gdf2, pdf2)
-
-    # list input
-    pdf6 = pdf1.replace([0, 1], [4, 5])
-    gdf6 = gdf1.replace([0, 1], [4, None]).fillna(5)
-    assert_eq(gdf6, pdf6)
-
-    pdf7 = pdf1.replace([0, 1], 4)
-    gdf7 = gdf1.replace([0, 1], None).fillna(4)
-    assert_eq(gdf7, pdf7)
-
-    # dict input:
-    pdf8 = pdf1.replace({"a": 0, "b": 0}, {"a": 4, "b": 5})
-    gdf8 = gdf1.replace({"a": 0, "b": 0}, {"a": None, "b": 5}).fillna(4)
-    assert_eq(gdf8, pdf8)
-
-    gdf1 = cudf.DataFrame({"a": [0, 1, 2, 3], "b": [0, 1, 2, None]})
-    gdf9 = gdf1.replace([0, 1], [4, 5]).fillna(3)
-    assert_eq(gdf9, pdf6)
-
-
-@pytest.mark.parametrize(
-    "psr",
-    [
-        pd.Series([0, 1, None, 2, None], dtype=pd.Int8Dtype()),
-        pd.Series([0, 1, np.nan, 2, np.nan]),
-    ],
-)
-@pytest.mark.parametrize("data_dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("fill_value", [10, pd.Series([10, 20, 30, 40, 50])])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_series_fillna_numerical(psr, data_dtype, fill_value, inplace):
-    test_psr = psr.copy(deep=True)
-    # TODO: These tests should use Pandas' nullable int type
-    # when we support a recent enough version of Pandas
-    # https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
-    if np.dtype(data_dtype).kind not in ("f") and test_psr.dtype.kind == "i":
-        test_psr = test_psr.astype(
-            cudf.utils.dtypes.np_dtypes_to_pandas_dtypes[np.dtype(data_dtype)]
-        )
-
-    gsr = cudf.from_pandas(test_psr)
-
-    if isinstance(fill_value, pd.Series):
-        fill_value_cudf = cudf.from_pandas(fill_value)
-    else:
-        fill_value_cudf = fill_value
-
-    expected = test_psr.fillna(fill_value, inplace=inplace)
-    actual = gsr.fillna(fill_value_cudf, inplace=inplace)
-
-    if inplace:
-        expected = test_psr
-        actual = gsr
-
-    # TODO: Remove check_dtype when we have support
-    # to compare with pandas nullable dtypes
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, None, None, 2, 3, 4],
-        [None, None, 1, 2, None, 3, 4],
-        [1, 2, None, 3, 4, None, None],
-        [0] + [None] * 14,
-        [None] * 14 + [0],
-    ],
-)
-@pytest.mark.parametrize("container", [pd.Series, pd.DataFrame])
-@pytest.mark.parametrize("data_dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("method", ["ffill", "bfill"])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
-    if container == pd.DataFrame:
-        data = {"a": data, "b": data, "c": data}
-
-    pdata = container(data)
-
-    if np.dtype(data_dtype).kind not in ("f"):
-        data_dtype = cudf.utils.dtypes.np_dtypes_to_pandas_dtypes[
-            np.dtype(data_dtype)
-        ]
-    pdata = pdata.astype(data_dtype)
-
-    # Explicitly using nans_as_nulls=True
-    gdata = cudf.from_pandas(pdata, nan_as_null=True)
-
-    with pytest.warns(FutureWarning):
-        expected = pdata.fillna(method=method, inplace=inplace)
-    with pytest.warns(FutureWarning):
-        actual = gdata.fillna(method=method, inplace=inplace)
-
-    if inplace:
-        expected = pdata
-        actual = gdata
-
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "gsr_data, dtype",
-    [
-        (["2.34", "5.2", "7.47", None, "92.29", None], Decimal64Dtype(7, 2)),
-        (
-            ["-74.56", None, "-23.73", "34.55", "2.89", None],
-            Decimal32Dtype(7, 2),
-        ),
-        (
-            ["85.955", np.nan, "-3.243", np.nan, "29.492", np.nan],
-            Decimal64Dtype(8, 3),
-        ),
-        (
-            ["2.964", None, "57.432", "-989.330", None, "56.444"],
-            Decimal64Dtype(8, 3),
-        ),
-        (
-            [np.nan, "55.2498", np.nan, "-5.2965", "-28.9423", np.nan],
-            Decimal64Dtype(10, 4),
-        ),
-        (
-            ["2.964", None, "54347.432", "-989.330", None, "56.444"],
-            Decimal128Dtype(20, 7),
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        42,
-        -123,
-        Decimal("8.2"),
-        Decimal("-12.87"),
-        cudf.Series([None, -854, 9533, -274, -845, 7924], dtype="int32"),
-        cudf.Series(["-53.5", "13.4", "-64.3", None, "42.42", None]).astype(
-            Decimal64Dtype(7, 2)
-        ),
-        cudf.Series(
-            ["57.45", np.nan, np.nan, "686.49", "-55.5", "73.24"],
-        ).astype(Decimal64Dtype(7, 2)),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_decimal(gsr_data, dtype, fill_value, inplace):
-    gsr = cudf.Series(gsr_data).astype(dtype)
-    psr = gsr.to_pandas()
-
-    if isinstance(fill_value, cudf.Series):
-        p_fill_value = fill_value.to_pandas()
-    else:
-        p_fill_value = fill_value
-
-    expected = psr.fillna(p_fill_value, inplace=inplace)
-    got = gsr.fillna(fill_value, inplace=inplace)
-
-    assert_eq(expected, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "psr_data",
-    [
-        pd.Series(["a", "b", "a", None, "c", None], dtype="category"),
-        pd.Series(
-            ["a", "b", "a", None, "c", None],
-            dtype="category",
-            index=["q", "r", "z", "a", "b", "c"],
-        ),
-        pd.Series(
-            ["a", "b", "a", None, "c", None],
-            dtype="category",
-            index=["x", "t", "p", "q", "r", "z"],
-        ),
-        pd.Series(["a", "b", "a", np.nan, "c", np.nan], dtype="category"),
-        pd.Series(
-            [None, None, None, None, None, None, "a", "b", "c"],
-            dtype="category",
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        "c",
-        pd.Series(["c", "c", "c", "c", "c", "a"], dtype="category"),
-        pd.Series(
-            ["a", "b", "a", None, "c", None],
-            dtype="category",
-            index=["x", "t", "p", "q", "r", "z"],
-        ),
-        pd.Series(
-            ["a", "b", "a", None, "c", None],
-            dtype="category",
-            index=["q", "r", "z", "a", "b", "c"],
-        ),
-        pd.Series(["a", "b", "a", None, "c", None], dtype="category"),
-        pd.Series(["a", "b", "a", np.nan, "c", np.nan], dtype="category"),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_categorical(psr_data, fill_value, inplace):
-    psr = psr_data.copy(deep=True)
-    gsr = cudf.from_pandas(psr)
-
-    if isinstance(fill_value, pd.Series):
-        fill_value_cudf = cudf.from_pandas(fill_value)
-    else:
-        fill_value_cudf = fill_value
-
-    if (
-        isinstance(fill_value_cudf, cudf.Series)
-        and gsr.dtype != fill_value_cudf.dtype
-    ):
-        assert_exceptions_equal(
-            lfunc=psr.fillna,
-            rfunc=gsr.fillna,
-            lfunc_args_and_kwargs=([fill_value], {"inplace": inplace}),
-            rfunc_args_and_kwargs=([fill_value_cudf], {"inplace": inplace}),
-        )
-    else:
-        expected = psr.fillna(fill_value, inplace=inplace)
-        got = gsr.fillna(fill_value_cudf, inplace=inplace)
-
-        if inplace:
-            expected = psr
-            got = gsr
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "psr_data",
-    [
-        pd.Series(
-            pd.date_range(
-                "2010-01-01",
-                "2020-01-10",
-                freq="1YE" if PANDAS_GE_220 else "1y",
-            )
-        ),
-        pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"),
-        pd.Series(
-            [
-                None,
-                None,
-                None,
-                None,
-                None,
-                None,
-                "2011-10-10",
-                "2010-01-01",
-                "2010-01-02",
-                "2010-01-04",
-                "2010-11-01",
-            ],
-            dtype="datetime64[ns]",
-        ),
-        pd.Series(
-            [
-                None,
-                None,
-                None,
-                None,
-                None,
-                None,
-                "2011-10-10",
-                "2010-01-01",
-                "2010-01-02",
-                "2010-01-04",
-                "2010-11-01",
-            ],
-            dtype="datetime64[ns]",
-            index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        pd.Timestamp("2010-01-02"),
-        pd.Series(
-            pd.date_range(
-                "2010-01-01",
-                "2020-01-10",
-                freq="1YE" if PANDAS_GE_220 else "1y",
-            )
-        )
-        + pd.Timedelta("1d"),
-        pd.Series(["2010-01-01", None, "2011-10-10"], dtype="datetime64[ns]"),
-        pd.Series(
-            [
-                None,
-                None,
-                None,
-                None,
-                None,
-                None,
-                "2011-10-10",
-                "2010-01-01",
-                "2010-01-02",
-                "2010-01-04",
-                "2010-11-01",
-            ],
-            dtype="datetime64[ns]",
-        ),
-        pd.Series(
-            [
-                None,
-                None,
-                None,
-                None,
-                None,
-                None,
-                "2011-10-10",
-                "2010-01-01",
-                "2010-01-02",
-                "2010-01-04",
-                "2010-11-01",
-            ],
-            dtype="datetime64[ns]",
-            index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_datetime(psr_data, fill_value, inplace):
-    psr = psr_data.copy(deep=True)
-    gsr = cudf.from_pandas(psr)
-
-    if isinstance(fill_value, pd.Series):
-        fill_value_cudf = cudf.from_pandas(fill_value)
-    else:
-        fill_value_cudf = fill_value
-
-    expected = psr.fillna(fill_value, inplace=inplace)
-    got = gsr.fillna(fill_value_cudf, inplace=inplace)
-
-    if inplace:
-        got = gsr
-        expected = psr
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "data",
-    [
-        # Categorical
-        pd.Categorical([1, 2, None, None, 3, 4]),
-        pd.Categorical([None, None, 1, None, 3, 4]),
-        pd.Categorical([1, 2, None, 3, 4, None, None]),
-        pd.Categorical(["1", "20", None, None, "3", "40"]),
-        pd.Categorical([None, None, "10", None, "30", "4"]),
-        pd.Categorical(["1", "20", None, "30", "4", None, None]),
-        # Datetime
-        np.array(
-            [
-                "2020-01-01 08:00:00",
-                "2020-01-01 09:00:00",
-                None,
-                "2020-01-01 10:00:00",
-                None,
-                "2020-01-01 10:00:00",
-            ],
-            dtype="datetime64[ns]",
-        ),
-        np.array(
-            [
-                None,
-                None,
-                "2020-01-01 09:00:00",
-                "2020-01-01 10:00:00",
-                None,
-                "2020-01-01 10:00:00",
-            ],
-            dtype="datetime64[ns]",
-        ),
-        np.array(
-            [
-                "2020-01-01 09:00:00",
-                None,
-                None,
-                "2020-01-01 10:00:00",
-                None,
-                None,
-            ],
-            dtype="datetime64[ns]",
-        ),
-        # Timedelta
-        np.array(
-            [10, 100, 1000, None, None, 10, 100, 1000], dtype="datetime64[ns]"
-        ),
-        np.array(
-            [None, None, 10, None, 1000, 100, 10], dtype="datetime64[ns]"
-        ),
-        np.array(
-            [10, 100, None, None, 1000, None, None], dtype="datetime64[ns]"
-        ),
-        # String
-        np.array(
-            ["10", "100", "1000", None, None, "10", "100", "1000"],
-            dtype="object",
-        ),
-        np.array(
-            [None, None, "1000", None, "10", "100", "10"], dtype="object"
-        ),
-        np.array(
-            ["10", "100", None, None, "1000", None, None], dtype="object"
-        ),
-    ],
-)
-@pytest.mark.parametrize("container", [pd.Series, pd.DataFrame])
-@pytest.mark.parametrize("method", ["ffill", "bfill"])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_method_fixed_width_non_num(data, container, method, inplace):
-    if container == pd.DataFrame:
-        data = {"a": data, "b": data, "c": data}
-
-    pdata = container(data)
-
-    # Explicitly using nans_as_nulls=True
-    gdata = cudf.from_pandas(pdata, nan_as_null=True)
-
-    with pytest.warns(FutureWarning):
-        expected = pdata.fillna(method=method, inplace=inplace)
-    with pytest.warns(FutureWarning):
-        actual = gdata.fillna(method=method, inplace=inplace)
-
-    if inplace:
-        expected = pdata
-        actual = gdata
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame({"a": [1, 2, None], "b": [None, None, 5]}),
-        pd.DataFrame(
-            {"a": [1, 2, None], "b": [None, None, 5]}, index=["a", "p", "z"]
-        ),
-        pd.DataFrame({"a": [1, 2, 3]}),
-    ],
-)
-@pytest.mark.parametrize(
-    "value",
-    [
-        10,
-        pd.Series([10, 20, 30]),
-        pd.Series([3, 4, 5]),
-        pd.Series([10, 20, 30], index=["z", "a", "p"]),
-        {"a": 5, "b": pd.Series([3, 4, 5])},
-        {"a": 5001},
-        {"b": pd.Series([11, 22, 33], index=["a", "p", "z"])},
-        {"a": 5, "b": pd.Series([3, 4, 5], index=["a", "p", "z"])},
-        {"c": 100},
-        np.nan,
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_dataframe(df, value, inplace):
-    pdf = df.copy(deep=True)
-    gdf = cudf.from_pandas(pdf)
-
-    fill_value_pd = value
-    if isinstance(fill_value_pd, (pd.Series, pd.DataFrame)):
-        fill_value_cudf = cudf.from_pandas(fill_value_pd)
-    elif isinstance(fill_value_pd, dict):
-        fill_value_cudf = {}
-        for key in fill_value_pd:
-            temp_val = fill_value_pd[key]
-            if isinstance(temp_val, pd.Series):
-                temp_val = cudf.from_pandas(temp_val)
-            fill_value_cudf[key] = temp_val
-    else:
-        fill_value_cudf = value
-
-    expect = pdf.fillna(fill_value_pd, inplace=inplace)
-    got = gdf.fillna(fill_value_cudf, inplace=inplace)
-
-    if inplace:
-        got = gdf
-        expect = pdf
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "ps_data",
-    [
-        pd.Series(["a", "b", "c", "d"]),
-        pd.Series([None] * 4, dtype="object"),
-        pd.Series(["z", None, "z", None]),
-        pd.Series(["x", "y", None, None, None]),
-        pd.Series([None, None, None, "i", "P"]),
-    ],
-)
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        "a",
-        pd.Series(["a", "b", "c", "d"]),
-        pd.Series(["z", None, "z", None]),
-        pd.Series([None] * 4, dtype="object"),
-        pd.Series(["x", "y", None, None, None]),
-        pd.Series([None, None, None, "i", "P"]),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_fillna_string(ps_data, fill_value, inplace):
-    psr = ps_data.copy(deep=True)
-    gsr = cudf.from_pandas(psr)
-
-    if isinstance(fill_value, pd.Series):
-        fill_value_cudf = cudf.from_pandas(fill_value)
-    else:
-        fill_value_cudf = fill_value
-
-    expected = psr.fillna(fill_value, inplace=inplace)
-    got = gsr.fillna(fill_value_cudf, inplace=inplace)
-
-    if inplace:
-        expected = psr
-        got = gsr
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("data_dtype", INTEGER_TYPES)
-def test_series_fillna_invalid_dtype(data_dtype):
-    gdf = cudf.Series([1, 2, None, 3], dtype=data_dtype)
-    fill_value = 2.5
-    msg = (
-        f"Cannot safely cast non-equivalent"
-        f" {type(fill_value).__name__} to {gdf.dtype.type.__name__}"
-    )
-    with pytest.raises(TypeError, match=msg):
-        gdf.fillna(fill_value)
-
-
-@pytest.mark.parametrize("data_dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("fill_value", [100, 100.0, 128.5])
-@pytest.mark.parametrize("op", [operator.gt, operator.eq, operator.lt])
-def test_series_where(data_dtype, fill_value, op):
-    psr = pd.Series(list(range(10)), dtype=data_dtype)
-    sr = cudf.from_pandas(psr)
-
-    try:
-        scalar_fits = sr.dtype.type(fill_value) == fill_value
-    except OverflowError:
-        scalar_fits = False
-
-    if not scalar_fits:
-        with pytest.raises(TypeError):
-            sr.where(op(sr, 0), fill_value)
-    else:
-        # Cast back to original dtype as pandas automatically upcasts
-        expect = psr.where(op(psr, 0), fill_value)
-        got = sr.where(op(sr, 0), fill_value)
-        # pandas returns 'float16' dtype, which is not supported in cudf
-        assert_eq(
-            expect,
-            got,
-            check_dtype=expect.dtype.kind not in ("f"),
-        )
-
-
-@pytest.mark.parametrize("fill_value", [100, 100.0, 100.5])
-def test_series_with_nulls_where(fill_value):
-    psr = pd.Series([None] * 3 + list(range(5)))
-    sr = cudf.from_pandas(psr)
-
-    expect = psr.where(psr > 0, fill_value)
-    got = sr.where(sr > 0, fill_value)
-    assert_eq(expect, got)
-
-    expect = psr.where(psr < 0, fill_value)
-    got = sr.where(sr < 0, fill_value)
-    assert_eq(expect, got)
-
-    expect = psr.where(psr == 0, fill_value)
-    got = sr.where(sr == 0, fill_value)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("fill_value", [[888, 999]])
-def test_dataframe_with_nulls_where_with_scalars(fill_value):
-    pdf = pd.DataFrame(
-        {
-            "A": [-1, 2, -3, None, 5, 6, -7, 0],
-            "B": [4, -2, 3, None, 7, 6, 8, 0],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.where(pdf % 3 == 0, fill_value)
-    got = gdf.where(gdf % 3 == 0, fill_value)
-
-    assert_eq(expect, got)
-
-
-def test_dataframe_with_different_types():
-    # Testing for int and float
-    pdf = pd.DataFrame(
-        {"A": [111, 22, 31, 410, 56], "B": [-10.12, 121.2, 45.7, 98.4, 87.6]}
-    )
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.where(pdf > 50, -pdf)
-    got = gdf.where(gdf > 50, -gdf)
-
-    assert_eq(expect, got)
-
-    # Testing for string
-    pdf = pd.DataFrame({"A": ["a", "bc", "cde", "fghi"]})
-    gdf = cudf.from_pandas(pdf)
-    pdf_mask = pd.DataFrame({"A": [True, False, True, False]})
-    gdf_mask = cudf.from_pandas(pdf_mask)
-    expect = pdf.where(pdf_mask, ["cudf"])
-    got = gdf.where(gdf_mask, ["cudf"])
-
-    assert_eq(expect, got)
-
-    # Testing for categoriacal
-    pdf = pd.DataFrame({"A": ["a", "b", "b", "c"]})
-    pdf["A"] = pdf["A"].astype("category")
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.where(pdf_mask, "c")
-    got = gdf.where(gdf_mask, ["c"])
-
-    assert_eq(expect, got)
-
-
-def test_dataframe_where_with_different_options():
-    pdf = pd.DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
-    gdf = cudf.from_pandas(pdf)
-
-    # numpy array
-    boolean_mask = np.array([[False, True], [True, False], [False, True]])
-
-    expect = pdf.where(boolean_mask, -pdf)
-    got = gdf.where(boolean_mask, -gdf)
-
-    assert_eq(expect, got)
-
-    # with single scalar
-    expect = pdf.where(boolean_mask, 8)
-    got = gdf.where(boolean_mask, 8)
-
-    assert_eq(expect, got)
-
-    # with multi scalar
-    expect = pdf.where(boolean_mask, [8, 9])
-    got = gdf.where(boolean_mask, [8, 9])
-
-    assert_eq(expect, got)
-
-
-def test_series_multiple_times_with_nulls():
-    sr = cudf.Series([1, 2, 3, None])
-    expected = cudf.Series([None, None, None, None], dtype=np.int64)
-
-    for i in range(3):
-        got = sr.replace([1, 2, 3], None)
-        assert_eq(expected, got)
-        # BUG: #2695
-        # The following series will acquire a chunk of memory and update with
-        # values, but these values may still linger even after the memory
-        # gets released. This memory space might get used for replace in
-        # subsequent calls and the memory used for mask may have junk values.
-        # So, if it is not updated properly, the result would be wrong.
-        # So, this will help verify that scenario.
-        cudf.Series([1, 1, 1, None])
-
-
-@pytest.mark.parametrize("series_dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize(
-    "replacement", [128, 128.0, 128.5, 32769, 32769.0, 32769.5]
-)
-def test_numeric_series_replace_dtype(request, series_dtype, replacement):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=PANDAS_GT_214
-            and (
-                (
-                    series_dtype == "int8"
-                    and replacement in {128, 128.0, 32769, 32769.0}
-                )
-                or (
-                    series_dtype == "int16" and replacement in {32769, 32769.0}
-                )
-            ),
-            reason="Pandas throws an AssertionError for these "
-            "cases and asks us to log a bug, they are trying to "
-            "avoid a RecursionError which cudf will not run into",
-        )
-    )
-    psr = pd.Series([0, 1, 2, 3, 4, 5], dtype=series_dtype)
-    sr = cudf.from_pandas(psr)
-
-    expect = psr.replace(1, replacement)
-    got = sr.replace(1, replacement)
-
-    assert_eq(expect, got)
-
-    # to_replace is a list, replacement is a scalar
-    expect = psr.replace([2, 3], replacement)
-    got = sr.replace([2, 3], replacement)
-
-    assert_eq(expect, got)
-
-    # If to_replace is a scalar and replacement is a list
-    with pytest.raises(TypeError):
-        sr.replace(0, [replacement, 2])
-
-    # Both list of unequal length
-    with pytest.raises(ValueError):
-        sr.replace([0, 1], [replacement])
-
-    # Both lists of equal length
-    expect = psr.replace([2, 3], [replacement, replacement])
-    got = sr.replace([2, 3], [replacement, replacement])
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "pframe, replace_args",
-    [
-        (
-            pd.Series([5, 1, 2, 3, 4]),
-            {"to_replace": 5, "value": 0, "inplace": True},
-        ),
-        (
-            pd.Series([5, 1, 2, 3, 4]),
-            {"to_replace": {5: 0, 3: -5}, "inplace": True},
-        ),
-        (pd.Series([5, 1, 2, 3, 4]), {}),
-        pytest.param(
-            pd.Series(["one", "two", "three"], dtype="category"),
-            {"to_replace": "one", "value": "two", "inplace": True},
-            marks=pytest.mark.xfail(
-                condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
-                reason="https://github.com/pandas-dev/pandas/issues/43232"
-                "https://github.com/pandas-dev/pandas/issues/53358",
-            ),
-        ),
-        (
-            pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}),
-            {"to_replace": 5, "value": 0, "inplace": True},
-        ),
-        (
-            pd.Series([1, 2, 3, 45]),
-            {
-                "to_replace": np.array([]).astype(int),
-                "value": 77,
-                "inplace": True,
-            },
-        ),
-        (
-            pd.Series([1, 2, 3, 45]),
-            {
-                "to_replace": np.array([]).astype(int),
-                "value": 77,
-                "inplace": False,
-            },
-        ),
-        (
-            pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}),
-            {"to_replace": {"a": 2}, "value": {"a": -33}, "inplace": True},
-        ),
-        (
-            pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}),
-            {
-                "to_replace": {"a": [2, 5]},
-                "value": {"a": [9, 10]},
-                "inplace": True,
-            },
-        ),
-        (
-            pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}),
-            {"to_replace": [], "value": [], "inplace": True},
-        ),
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Warning not given on older versions of pandas",
-)
-def test_replace_inplace(pframe, replace_args):
-    gpu_frame = cudf.from_pandas(pframe)
-    pandas_frame = pframe.copy()
-
-    gpu_copy = gpu_frame.copy()
-    cpu_copy = pandas_frame.copy()
-
-    assert_eq(gpu_frame, pandas_frame)
-    assert_eq(gpu_copy, cpu_copy)
-    with expect_warning_if(len(replace_args) == 0):
-        gpu_frame.replace(**replace_args)
-    with expect_warning_if(len(replace_args) == 0):
-        pandas_frame.replace(**replace_args)
-    assert_eq(gpu_frame, pandas_frame)
-    assert_eq(gpu_copy, cpu_copy)
-
-
-def test_replace_df_error():
-    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_exceptions_equal(
-        lfunc=pdf.replace,
-        rfunc=gdf.replace,
-        lfunc_args_and_kwargs=([], {"to_replace": -1, "value": []}),
-        rfunc_args_and_kwargs=([], {"to_replace": -1, "value": []}),
-    )
-
-
-@pytest.mark.parametrize(
-    ("lower", "upper"),
-    [
-        ([2, 7.4], [4, 7.9]),
-        ([2, 7.4], None),
-        (
-            None,
-            [4, 7.9],
-        ),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_clip(lower, upper, inplace):
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 3, 4, 5], "b": [7.1, 7.24, 7.5, 7.8, 8.11]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    got = gdf.clip(lower=lower, upper=upper, inplace=inplace)
-    expect = pdf.clip(lower=lower, upper=upper, axis=1)
-
-    if inplace is True:
-        assert_eq(expect, gdf)
-    else:
-        assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    ("lower", "upper"),
-    [("b", "d"), ("b", None), (None, "c"), (None, None)],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_category_clip(lower, upper, inplace):
-    data = ["a", "b", "c", "d", "e"]
-    pdf = pd.DataFrame({"a": data})
-    gdf = cudf.from_pandas(pdf)
-    gdf["a"] = gdf["a"].astype("category")
-
-    expect = pdf.clip(lower=lower, upper=upper)
-    got = gdf.clip(lower=lower, upper=upper, inplace=inplace)
-
-    if inplace is True:
-        assert_eq(expect, gdf.astype("str"))
-    else:
-        assert_eq(expect, got.astype("str"))
-
-
-@pytest.mark.parametrize(
-    ("lower", "upper"),
-    [([2, 7.4], [4, 7.9, "d"]), ([2, 7.4, "a"], [4, 7.9, "d"])],
-)
-def test_dataframe_exceptions_for_clip(lower, upper):
-    gdf = cudf.DataFrame(
-        {"a": [1, 2, 3, 4, 5], "b": [7.1, 7.24, 7.5, 7.8, 8.11]}
-    )
-
-    with pytest.raises(ValueError):
-        gdf.clip(lower=lower, upper=upper)
-
-
-@pytest.mark.parametrize(
-    ("data", "lower", "upper"),
-    [
-        ([1, 2, 3, 4, 5], 2, 4),
-        ([1, 2, 3, 4, 5], 2, None),
-        ([1, 2, 3, 4, 5], None, 4),
-        ([1, 2, 3, 4, 5], None, None),
-        ([1, 2, 3, 4, 5], 4, 2),
-        ([1.0, 2.0, 3.0, 4.0, 5.0], 4, 2),
-        (pd.Series([1, 2, 3, 4, 5], dtype="int32"), 4, 2),
-        (["a", "b", "c", "d", "e"], "b", "d"),
-        (["a", "b", "c", "d", "e"], "b", None),
-        (["a", "b", "c", "d", "e"], None, "d"),
-        (["a", "b", "c", "d", "e"], "d", "b"),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_series_clip(data, lower, upper, inplace):
-    psr = pd.Series(data)
-    gsr = cudf.from_pandas(psr)
-
-    expect = psr.clip(lower=lower, upper=upper)
-    got = gsr.clip(lower=lower, upper=upper, inplace=inplace)
-
-    if inplace is True:
-        assert_eq(expect, gsr)
-    else:
-        assert_eq(expect, got)
-
-
-def test_series_exceptions_for_clip():
-    with pytest.raises(ValueError):
-        cudf.Series([1, 2, 3, 4]).clip([1, 2], [2, 3])
-
-    with pytest.raises(NotImplementedError):
-        cudf.Series([1, 2, 3, 4]).clip(1, 2, axis=0)
-
-
-@pytest.mark.parametrize(
-    "data", [[1, 2.0, 3, 4, None, 1, None, 10, None], ["a", "b", "c"]]
-)
-@pytest.mark.parametrize(
-    "index",
-    [
-        None,
-        [1, 2, 3],
-        ["a", "b", "z"],
-        ["a", "b", "c", "d", "e", "f", "g", "l", "m"],
-    ],
-)
-@pytest.mark.parametrize("value", [[1, 2, 3, 4, None, 1, None, 10, None]])
-def test_series_fillna(data, index, value):
-    psr = pd.Series(
-        data,
-        index=index if index is not None and len(index) == len(data) else None,
-    )
-    gsr = cudf.Series(
-        data,
-        index=index if index is not None and len(index) == len(data) else None,
-    )
-
-    expect = psr.fillna(pd.Series(value))
-    got = gsr.fillna(cudf.Series(value))
-    assert_eq(expect, got)
-
-
-def test_series_fillna_error():
-    psr = pd.Series([1, 2, None, 3, None])
-    gsr = cudf.from_pandas(psr)
-
-    assert_exceptions_equal(
-        psr.fillna,
-        gsr.fillna,
-        ([pd.DataFrame({"a": [1, 2, 3]})],),
-        ([cudf.DataFrame({"a": [1, 2, 3]})],),
-    )
-
-
-def test_series_replace_errors():
-    gsr = cudf.Series([1, 2, None, 3, None])
-    psr = gsr.to_pandas()
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "to_replace and value should be of same types,"
-            "got to_replace dtype: int64 and "
-            "value dtype: object"
-        ),
-    ):
-        gsr.replace(1, "a")
-
-    gsr = cudf.Series(["a", "b", "c"])
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "to_replace and value should be of same types,"
-            "got to_replace dtype: int64 and "
-            "value dtype: object"
-        ),
-    ):
-        gsr.replace([1, 2], ["a", "b"])
-
-    assert_exceptions_equal(
-        psr.replace,
-        gsr.replace,
-        ([{"a": 1}, 1],),
-        ([{"a": 1}, 1],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=psr.replace,
-        rfunc=gsr.replace,
-        lfunc_args_and_kwargs=([[1, 2], [1]],),
-        rfunc_args_and_kwargs=([[1, 2], [1]],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=psr.replace,
-        rfunc=gsr.replace,
-        lfunc_args_and_kwargs=([object(), [1]],),
-        rfunc_args_and_kwargs=([object(), [1]],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=psr.replace,
-        rfunc=gsr.replace,
-        lfunc_args_and_kwargs=([{"a": 1}, object()],),
-        rfunc_args_and_kwargs=([{"a": 1}, object()],),
-    )
-
-
-@pytest.mark.parametrize(
-    "gsr,old,new,expected",
-    [
-        (
-            lambda: cudf.Series(["a", "b", "c", None]),
-            None,
-            "a",
-            lambda: cudf.Series(["a", "b", "c", "a"]),
-        ),
-        (
-            lambda: cudf.Series(["a", "b", "c", None]),
-            [None, "a", "a"],
-            ["c", "b", "d"],
-            lambda: cudf.Series(["d", "b", "c", "c"]),
-        ),
-        (
-            lambda: cudf.Series(["a", "b", "c", None]),
-            [None, "a"],
-            ["b", None],
-            lambda: cudf.Series([None, "b", "c", "b"]),
-        ),
-        (
-            lambda: cudf.Series(["a", "b", "c", None]),
-            [None, None],
-            [None, None],
-            lambda: cudf.Series(["a", "b", "c", None]),
-        ),
-        (
-            lambda: cudf.Series([1, 2, None, 3]),
-            None,
-            10,
-            lambda: cudf.Series([1, 2, 10, 3]),
-        ),
-        (
-            lambda: cudf.Series([1, 2, None, 3]),
-            [None, 1, 1],
-            [3, 2, 4],
-            lambda: cudf.Series([4, 2, 3, 3]),
-        ),
-        (
-            lambda: cudf.Series([1, 2, None, 3]),
-            [None, 1],
-            [2, None],
-            lambda: cudf.Series([None, 2, 2, 3]),
-        ),
-        (
-            lambda: cudf.Series(["a", "q", "t", None], dtype="category"),
-            None,
-            "z",
-            lambda: cudf.Series(["a", "q", "t", "z"], dtype="category"),
-        ),
-        (
-            lambda: cudf.Series(["a", "q", "t", None], dtype="category"),
-            [None, "a", "q"],
-            ["z", None, None],
-            lambda: cudf.Series([None, None, "t", "z"], dtype="category"),
-        ),
-        (
-            lambda: cudf.Series(["a", None, "t", None], dtype="category"),
-            [None, "t"],
-            ["p", None],
-            lambda: cudf.Series(["a", "p", None, "p"], dtype="category"),
-        ),
-    ],
-)
-def test_replace_nulls(gsr, old, new, expected):
-    gsr = gsr()
-    with expect_warning_if(isinstance(gsr.dtype, cudf.CategoricalDtype)):
-        actual = gsr.replace(old, new)
-    assert_eq(
-        expected().sort_values().reset_index(drop=True),
-        actual.sort_values().reset_index(drop=True),
-    )
-
-
-def test_fillna_columns_multiindex():
-    columns = pd.MultiIndex.from_tuples([("a", "b"), ("d", "e")])
-    pdf = pd.DataFrame(
-        {"0": [1, 2, None, 3, None], "1": [None, None, None, None, 4]}
-    )
-    pdf.columns = columns
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.fillna(10)
-    actual = gdf.fillna(10)
-
-    assert_eq(expected, actual)
-
-
-def test_fillna_nan_and_null():
-    ser = cudf.Series(pa.array([float("nan"), None, 1.1]), nan_as_null=False)
-    result = ser.fillna(2.2)
-    expected = cudf.Series([2.2, 2.2, 1.1])
-    assert_eq(result, expected)
-
-
-def test_replace_with_index_objects():
-    result = cudf.Series([1, 2]).replace(cudf.Index([1]), cudf.Index([2]))
-    expected = pd.Series([1, 2]).replace(pd.Index([1]), pd.Index([2]))
-    assert_eq(result, expected)
-
-
-# Example test function for datetime series replace
-def test_replace_datetime_series():
-    # Create a pandas datetime series
-    pd_series = pd.Series(pd.date_range("20210101", periods=5))
-    # Replace a specific datetime value
-    pd_result = pd_series.replace(
-        pd.Timestamp("2021-01-02"), pd.Timestamp("2021-01-10")
-    )
-
-    # Create a cudf datetime series
-    cudf_series = cudf.Series(pd.date_range("20210101", periods=5))
-    # Replace a specific datetime value
-    cudf_result = cudf_series.replace(
-        pd.Timestamp("2021-01-02"), pd.Timestamp("2021-01-10")
-    )
-
-    assert_eq(pd_result, cudf_result)
-
-
-# Example test function for timedelta series replace
-def test_replace_timedelta_series():
-    # Create a pandas timedelta series
-    pd_series = pd.Series(pd.timedelta_range("1 days", periods=5))
-    # Replace a specific timedelta value
-    pd_result = pd_series.replace(
-        pd.Timedelta("2 days"), pd.Timedelta("10 days")
-    )
-
-    # Create a cudf timedelta series
-    cudf_series = cudf.Series(pd.timedelta_range("1 days", periods=5))
-    # Replace a specific timedelta value
-    cudf_result = cudf_series.replace(
-        pd.Timedelta("2 days"), pd.Timedelta("10 days")
-    )
-
-    assert_eq(pd_result, cudf_result)
-
-
-def test_replace_multiple_rows(datadir):
-    path = datadir / "parquet" / "replace_multiple_rows.parquet"
-    pdf = pd.read_parquet(path)
-    gdf = cudf.read_parquet(path)
-
-    pdf.replace([np.inf, -np.inf], np.nan, inplace=True)
-    gdf.replace([np.inf, -np.inf], np.nan, inplace=True)
-
-    assert_eq(pdf, gdf, check_dtype=False)

From 7acddc976937109bd4c0d77b7b3d6c22ffc7c088 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Aug 2025 13:56:14 -0700
Subject: [PATCH 093/366] Move test_monotonic.py to new cudf classic test
 directory structure (#19572)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19572
---
 .../indexes/categoricalindex/__init__.py      |   0
 .../categoricalindex/test_attributes.py       |  20 +
 .../tests/indexes/datetimeindex/__init__.py   |   0
 .../indexes/datetimeindex/test_attributes.py  |  45 +++
 .../tests/indexes/index/methods/__init__.py   |   0
 .../index/methods/test_get_slice_bounds.py    |  47 +++
 .../tests/indexes/index/test_attributes.py    |  34 ++
 .../indexes/multiindex/test_attributes.py     |  54 +++
 .../cudf/tests/indexes/rangeindex/__init__.py |   0
 .../indexes/rangeindex/methods/__init__.py    |   0
 .../methods/test_get_slice_bounds.py          |  37 ++
 .../indexes/rangeindex/test_attributes.py     |  18 +
 .../cudf/cudf/tests/series/test_attributes.py |  78 ++++
 python/cudf/cudf/tests/test_monotonic.py      | 354 ------------------
 14 files changed, 333 insertions(+), 354 deletions(-)
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_get_slice_bounds.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/test_attributes.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_slice_bounds.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
 delete mode 100644 python/cudf/cudf/tests/test_monotonic.py

diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/__init__.py b/python/cudf/cudf/tests/indexes/categoricalindex/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py b/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py
new file mode 100644
index 00000000000..cfdf26877b6
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+from cudf.core.index import CategoricalIndex
+
+
+@pytest.mark.parametrize(
+    "testlist", [["c", "d", "e", "f"], ["z", "y", "x", "r"]]
+)
+def test_categorical_index_is_unique_monotonic(testlist):
+    # Assuming unordered categorical data cannot be "monotonic"
+    raw_cat = pd.Categorical(testlist, ordered=True)
+    index = CategoricalIndex(raw_cat)
+    index_pd = pd.CategoricalIndex(raw_cat)
+
+    assert index.is_unique == index_pd.is_unique
+    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
+    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/__init__.py b/python/cudf/cudf/tests/indexes/datetimeindex/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
new file mode 100644
index 00000000000..5fb8c2fb647
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+from cudf.core.index import DatetimeIndex
+
+
+@pytest.mark.parametrize(
+    "testlist",
+    [
+        [
+            "2001-01-01 00:00:00",
+            "2001-02-03 08:00:00",
+            "2001-03-08 16:00:00",
+            "2001-04-11 00:00:00",
+        ],
+        [
+            "2001-04-11 00:00:00",
+            "2001-03-08 16:00:00",
+            "2001-02-03 08:00:00",
+            "2001-01-01 00:00:00",
+        ],
+        [
+            "2001-04-11 00:00:00",
+            "2001-02-03 08:00:00",
+            "2001-03-08 16:00:00",
+            "2001-01-01 00:00:00",
+        ],
+        [
+            "2001-04-11 00:00:00",
+            "2001-01-01 00:00:00",
+            "2001-02-03 08:00:00",
+            "2001-03-08 16:00:00",
+            "2001-01-01 00:00:00",
+        ],
+    ],
+)
+def test_datetime_index_is_unique_monotonic(testlist):
+    index = DatetimeIndex(testlist)
+    index_pd = pd.DatetimeIndex(testlist)
+
+    assert index.is_unique == index_pd.is_unique
+    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
+    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
diff --git a/python/cudf/cudf/tests/indexes/index/methods/__init__.py b/python/cudf/cudf/tests/indexes/index/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_get_slice_bounds.py b/python/cudf/cudf/tests/indexes/index/methods/test_get_slice_bounds.py
new file mode 100644
index 00000000000..68efbc71f22
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_get_slice_bounds.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+from cudf import Index
+
+
+@pytest.mark.parametrize(
+    "testlist",
+    [
+        [10, 9, 8, 8, 7],
+        [2.0, 5.0, 4.0, 3.0, 7.0],
+        ["b", "cat", "e", "bat", "c"],
+    ],
+)
+@pytest.mark.parametrize("side", ["left", "right"])
+def test_get_slice_bound(testlist, side):
+    index = Index(testlist)
+    index_pd = pd.Index(testlist)
+    for label in testlist:
+        expect = index_pd.get_slice_bound(label, side)
+        got = index.get_slice_bound(label, side)
+        assert got == expect
+
+
+@pytest.mark.parametrize("label", [1, 5, 7, 11])
+@pytest.mark.parametrize("side", ["left", "right"])
+def test_get_slice_bound_missing(label, side):
+    mylist = [2, 4, 6, 8, 10]
+    index = Index(mylist)
+    index_pd = pd.Index(mylist)
+
+    expect = index_pd.get_slice_bound(label, side)
+    got = index.get_slice_bound(label, side)
+    assert got == expect
+
+
+@pytest.mark.parametrize("label", ["a", "c", "g"])
+@pytest.mark.parametrize("side", ["left", "right"])
+def test_get_slice_bound_missing_str(label, side):
+    mylist = ["b", "d", "f"]
+    index = Index(mylist)
+    index_pd = pd.Index(mylist)
+    got = index.get_slice_bound(label, side)
+    expect = index_pd.get_slice_bound(label, side)
+    assert got == expect
diff --git a/python/cudf/cudf/tests/indexes/index/test_attributes.py b/python/cudf/cudf/tests/indexes/index/test_attributes.py
new file mode 100644
index 00000000000..2e80dfb272e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/test_attributes.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from cudf import Index
+
+
+@pytest.mark.parametrize(
+    "testlist",
+    [
+        [1, 2, 3, 4],
+        [1, 2, 3, 4, None],
+        [1, 2, 3, 3, 4],
+        [10, 9, 8, 7],
+        [10, 9, 8, 8, 7],
+        [1, 2, 3, 4, np.nan],
+        [10, 9, 8, np.nan, 7],
+        [10, 9, 8, 8, 7, np.nan],
+        ["c", "d", "e", "f"],
+        ["c", "d", "e", "e", "f"],
+        ["c", "d", "e", "f", None],
+        ["z", "y", "x", "r"],
+        ["z", "y", "x", "x", "r"],
+    ],
+)
+def test_index_is_unique_monotonic(testlist):
+    index = Index(testlist)
+    index_pd = pd.Index(testlist)
+
+    assert index.is_unique == index_pd.is_unique
+    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
+    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py b/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
index 54be9e670aa..c938683764b 100644
--- a/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_attributes.py
@@ -8,10 +8,64 @@
 import pytest
 
 import cudf
+from cudf import MultiIndex
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal
 
 
+def test_multiindex_is_unique_monotonic():
+    pidx = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+        ],
+    )
+    pidx.names = ["alpha", "location", "weather", "sign"]
+    gidx = cudf.from_pandas(pidx)
+
+    assert pidx.is_unique == gidx.is_unique
+    assert pidx.is_monotonic_increasing == gidx.is_monotonic_increasing
+    assert pidx.is_monotonic_decreasing == gidx.is_monotonic_decreasing
+
+
+@pytest.mark.parametrize(
+    "testarr",
+    [
+        (
+            [
+                ["bar", "bar", "foo", "foo", "qux", "qux", "qux"],
+                ["one", "two", "one", "two", "one", "two", "two"],
+            ],
+            ["first", "second"],
+        ),
+        (
+            [
+                ["bar", "bar", "foo", "foo", "qux", "qux"],
+                ["one", "two", "one", "two", "one", "two"],
+            ],
+            ["first", "second"],
+        ),
+    ],
+)
+def test_multiindex_tuples_is_unique_monotonic(testarr):
+    tuples = list(zip(*testarr[0], strict=True))
+
+    index = MultiIndex.from_tuples(tuples, names=testarr[1])
+    index_pd = pd.MultiIndex.from_tuples(tuples, names=testarr[1])
+
+    assert index.is_unique == index_pd.is_unique
+    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
+    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
+
+
 @pytest.fixture(
     params=[
         "from_product",
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/__init__.py b/python/cudf/cudf/tests/indexes/rangeindex/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/__init__.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_slice_bounds.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_slice_bounds.py
new file mode 100644
index 00000000000..6588e961bb1
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_slice_bounds.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+from cudf.core.index import RangeIndex
+
+
+@pytest.mark.parametrize(
+    "start, stop", [(0, 10), (0, 1), (3, 4), (0, 0), (3, 3)]
+)
+@pytest.mark.parametrize("idx", [-1, 0, 5, 10, 11])
+@pytest.mark.parametrize("side", ["left", "right"])
+def test_rangeindex_get_slice_bound_basic(start, stop, idx, side):
+    pd_index = pd.RangeIndex(start, stop)
+    cudf_index = RangeIndex(start, stop)
+    expect = pd_index.get_slice_bound(idx, side)
+    got = cudf_index.get_slice_bound(idx, side)
+    assert expect == got
+
+
+@pytest.mark.parametrize(
+    "start, stop, step",
+    [(3, 20, 5), (20, 3, -5), (20, 3, 5), (3, 20, -5), (0, 0, 2), (3, 3, 2)],
+)
+@pytest.mark.parametrize(
+    "label",
+    [3, 8, 13, 18, 20, 15, 10, 5, -1, 0, 19, 21, 6, 11, 17],
+)
+@pytest.mark.parametrize("side", ["left", "right"])
+def test_rangeindex_get_slice_bound_step(start, stop, step, label, side):
+    pd_index = pd.RangeIndex(start, stop, step)
+    cudf_index = RangeIndex(start, stop, step)
+
+    expect = pd_index.get_slice_bound(label, side)
+    got = cudf_index.get_slice_bound(label, side)
+    assert expect == got
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
new file mode 100644
index 00000000000..a9de5d39622
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+from cudf.core.index import RangeIndex
+
+
+@pytest.mark.parametrize(
+    "start, stop, step", [(10, 20, 1), (0, -10, -1), (5, 5, 1)]
+)
+def test_range_index_is_unique_monotonic(start, stop, step):
+    index = RangeIndex(start=start, stop=stop, step=step)
+    index_pd = pd.RangeIndex(start=start, stop=stop, step=step)
+
+    assert index.is_unique == index_pd.is_unique
+    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
+    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 6cc5999ab4f..aedbf696009 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -9,6 +9,84 @@
 from cudf.testing import assert_eq
 
 
+@pytest.mark.parametrize(
+    "testlist",
+    [
+        [1, 2, 3, 4],
+        [1, 2, 3, 3, 4],
+        [10, 9, 8, 7],
+        [10, 9, 8, 8, 7],
+        ["c", "d", "e", "f"],
+        ["c", "d", "e", "e", "f"],
+        ["z", "y", "x", "r"],
+        ["z", "y", "x", "x", "r"],
+    ],
+)
+def test_series_is_unique_monotonic(testlist):
+    series = cudf.Series(testlist)
+    series_pd = pd.Series(testlist)
+
+    assert series.is_unique == series_pd.is_unique
+    assert series.is_monotonic_increasing == series_pd.is_monotonic_increasing
+    assert series.is_monotonic_decreasing == series_pd.is_monotonic_decreasing
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [pd.Timestamp("2018-01-01"), pd.Timestamp("2019-01-31"), None],
+        [1, 2, 3, None],
+        [None, 1, 2, 3],
+        ["a", "b", "c", None],
+        [None, "a", "b", "c"],
+    ],
+)
+def test_is_monotonic_always_false_for_null(data):
+    ser = cudf.Series(data)
+    assert ser.is_monotonic_increasing is False
+    assert ser.is_monotonic_decreasing is False
+
+
+@pytest.mark.parametrize("box", [cudf.Series, cudf.Index])
+@pytest.mark.parametrize(
+    "value,na_like",
+    [
+        [1, None],
+        [np.datetime64("2020-01-01", "ns"), np.datetime64("nat", "ns")],
+        ["s", None],
+        [1.0, np.nan],
+    ],
+    ids=repr,
+)
+def test_is_unique(box, value, na_like):
+    obj = box([value], nan_as_null=False)
+    assert obj.is_unique
+
+    obj = box([value, value], nan_as_null=False)
+    assert not obj.is_unique
+
+    obj = box([None, value], nan_as_null=False)
+    assert obj.is_unique
+
+    obj = box([None, None, value], nan_as_null=False)
+    assert not obj.is_unique
+
+    if na_like is not None:
+        obj = box([na_like, value], nan_as_null=False)
+        assert obj.is_unique
+
+        obj = box([na_like, na_like], nan_as_null=False)
+        assert not obj.is_unique
+
+        try:
+            if not np.isnat(na_like):
+                # pyarrow coerces nat to null
+                obj = box([None, na_like, value], nan_as_null=False)
+                assert obj.is_unique
+        except TypeError:
+            pass
+
+
 @pytest.fixture(
     params=[
         pd.Series([0, 1, 2, np.nan, 4, None, 6]),
diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py
deleted file mode 100644
index 842b40a6d37..00000000000
--- a/python/cudf/cudf/tests/test_monotonic.py
+++ /dev/null
@@ -1,354 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-"""
-Tests related to is_unique, is_monotonic_increasing &
-is_monotonic_decreasing attributes
-"""
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import Index, MultiIndex, Series
-from cudf.core.index import CategoricalIndex, DatetimeIndex, RangeIndex
-
-
-@pytest.mark.parametrize("testrange", [(10, 20, 1), (0, -10, -1), (5, 5, 1)])
-def test_range_index(testrange):
-    index = RangeIndex(
-        start=testrange[0], stop=testrange[1], step=testrange[2]
-    )
-    index_pd = pd.RangeIndex(
-        start=testrange[0], stop=testrange[1], step=testrange[2]
-    )
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist",
-    [
-        [1, 2, 3, 4],
-        [1, 2, 3, 4, None],
-        [1, 2, 3, 3, 4],
-        [10, 9, 8, 7],
-        [10, 9, 8, 8, 7],
-        ["c", "d", "e", "f"],
-        ["c", "d", "e", "e", "f"],
-        ["c", "d", "e", "f", None],
-        ["z", "y", "x", "r"],
-        ["z", "y", "x", "x", "r"],
-    ],
-)
-def test_generic_index(testlist):
-    index = Index(testlist)
-    index_pd = pd.Index(testlist)
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist",
-    [
-        [1, 2, 3, 4, np.nan],
-        [10, 9, 8, np.nan, 7],
-        [10, 9, 8, 8, 7, np.nan],
-    ],
-)
-def test_float_index(testlist):
-    index_pd = pd.Index(testlist)
-    index = cudf.from_pandas(index_pd, nan_as_null=False)
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist",
-    [
-        ["c", "d", "e", "f"],
-        ["c", "d", "e", "e", "f"],
-        ["z", "y", "x", "r"],
-        ["z", "y", "x", "x", "r"],
-    ],
-)
-def test_string_index(testlist):
-    index = cudf.Index(testlist)
-    index_pd = pd.Index(testlist)
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist", [["c", "d", "e", "f"], ["z", "y", "x", "r"]]
-)
-def test_categorical_index(testlist):
-    # Assuming unordered categorical data cannot be "monotonic"
-    raw_cat = pd.Categorical(testlist, ordered=True)
-    index = CategoricalIndex(raw_cat)
-    index_pd = pd.CategoricalIndex(raw_cat)
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist",
-    [
-        [
-            "2001-01-01 00:00:00",
-            "2001-02-03 08:00:00",
-            "2001-03-08 16:00:00",
-            "2001-04-11 00:00:00",
-        ],
-        [
-            "2001-04-11 00:00:00",
-            "2001-03-08 16:00:00",
-            "2001-02-03 08:00:00",
-            "2001-01-01 00:00:00",
-        ],
-        [
-            "2001-04-11 00:00:00",
-            "2001-02-03 08:00:00",
-            "2001-03-08 16:00:00",
-            "2001-01-01 00:00:00",
-        ],
-        [
-            "2001-04-11 00:00:00",
-            "2001-01-01 00:00:00",
-            "2001-02-03 08:00:00",
-            "2001-03-08 16:00:00",
-            "2001-01-01 00:00:00",
-        ],
-    ],
-)
-def test_datetime_index(testlist):
-    index = DatetimeIndex(testlist)
-    index_pd = pd.DatetimeIndex(testlist)
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist",
-    [
-        [1, 2, 3, 4],
-        [1, 2, 3, 3, 4],
-        [10, 9, 8, 7],
-        [10, 9, 8, 8, 7],
-        ["c", "d", "e", "f"],
-        ["c", "d", "e", "e", "f"],
-        ["z", "y", "x", "r"],
-        ["z", "y", "x", "x", "r"],
-    ],
-)
-def test_series(testlist):
-    series = Series(testlist)
-    series_pd = pd.Series(testlist)
-
-    assert series.is_unique == series_pd.is_unique
-    assert series.is_monotonic_increasing == series_pd.is_monotonic_increasing
-    assert series.is_monotonic_decreasing == series_pd.is_monotonic_decreasing
-
-
-def test_multiindex():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.random(size=(7, 5)))
-    pdf.index = pd.MultiIndex(
-        [
-            ["a", "b", "c"],
-            ["house", "store", "forest"],
-            ["clouds", "clear", "storm"],
-            ["fire", "smoke", "clear"],
-        ],
-        [
-            [0, 0, 0, 0, 1, 1, 2],
-            [1, 1, 1, 1, 0, 0, 2],
-            [0, 0, 2, 2, 2, 0, 1],
-            [0, 0, 0, 1, 2, 0, 1],
-        ],
-    )
-    pdf.index.names = ["alpha", "location", "weather", "sign"]
-    gdf = cudf.from_pandas(pdf)
-
-    assert pdf.index.is_unique == gdf.index.is_unique
-    assert (
-        pdf.index.is_monotonic_increasing == gdf.index.is_monotonic_increasing
-    )
-    assert (
-        pdf.index.is_monotonic_decreasing == gdf.index.is_monotonic_decreasing
-    )
-
-
-@pytest.mark.parametrize(
-    "testarr",
-    [
-        (
-            [
-                ["bar", "bar", "foo", "foo", "qux", "qux", "qux"],
-                ["one", "two", "one", "two", "one", "two", "two"],
-            ],
-            ["first", "second"],
-        ),
-        (
-            [
-                ["bar", "bar", "foo", "foo", "qux", "qux"],
-                ["one", "two", "one", "two", "one", "two"],
-            ],
-            ["first", "second"],
-        ),
-    ],
-)
-def test_multiindex_tuples(testarr):
-    tuples = list(zip(*testarr[0], strict=True))
-
-    index = MultiIndex.from_tuples(tuples, names=testarr[1])
-    index_pd = pd.MultiIndex.from_tuples(tuples, names=testarr[1])
-
-    assert index.is_unique == index_pd.is_unique
-    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
-    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize(
-    "testlist",
-    [
-        [10, 9, 8, 8, 7],
-        [2.0, 5.0, 4.0, 3.0, 7.0],
-        ["b", "d", "e", "a", "c"],
-        ["frog", "cat", "bat", "dog"],
-    ],
-)
-@pytest.mark.parametrize("side", ["left", "right"])
-def test_get_slice_bound(testlist, side):
-    index = Index(testlist)
-    index_pd = pd.Index(testlist)
-    for label in testlist:
-        expect = index_pd.get_slice_bound(label, side)
-        got = index.get_slice_bound(label, side)
-        assert got == expect
-
-
-@pytest.mark.parametrize("bounds", [(0, 10), (0, 1), (3, 4), (0, 0), (3, 3)])
-@pytest.mark.parametrize(
-    "indices",
-    [[-1, 0, 5, 10, 11], [-1, 0, 1, 2], [2, 3, 4, 5], [-1, 0, 1], [2, 3, 4]],
-)
-@pytest.mark.parametrize("side", ["left", "right"])
-def test_rangeindex_get_slice_bound_basic(bounds, indices, side):
-    start, stop = bounds
-    pd_index = pd.RangeIndex(start, stop)
-    cudf_index = RangeIndex(start, stop)
-    for idx in indices:
-        expect = pd_index.get_slice_bound(idx, side)
-        got = cudf_index.get_slice_bound(idx, side)
-        assert expect == got
-
-
-@pytest.mark.parametrize(
-    "bounds",
-    [(3, 20, 5), (20, 3, -5), (20, 3, 5), (3, 20, -5), (0, 0, 2), (3, 3, 2)],
-)
-@pytest.mark.parametrize(
-    "label",
-    [3, 8, 13, 18, 20, 15, 10, 5, -1, 0, 19, 21, 6, 11, 17],
-)
-@pytest.mark.parametrize("side", ["left", "right"])
-def test_rangeindex_get_slice_bound_step(bounds, label, side):
-    start, stop, step = bounds
-    pd_index = pd.RangeIndex(start, stop, step)
-    cudf_index = RangeIndex(start, stop, step)
-
-    expect = pd_index.get_slice_bound(label, side)
-    got = cudf_index.get_slice_bound(label, side)
-    assert expect == got
-
-
-@pytest.mark.parametrize("label", [1, 3, 5, 7, 9, 11])
-@pytest.mark.parametrize("side", ["left", "right"])
-def test_get_slice_bound_missing(label, side):
-    mylist = [2, 4, 6, 8, 10]
-    index = Index(mylist)
-    index_pd = pd.Index(mylist)
-
-    expect = index_pd.get_slice_bound(label, side)
-    got = index.get_slice_bound(label, side)
-    assert got == expect
-
-
-@pytest.mark.parametrize("label", ["a", "c", "e", "g"])
-@pytest.mark.parametrize("side", ["left", "right"])
-def test_get_slice_bound_missing_str(label, side):
-    mylist = ["b", "d", "f"]
-    index = Index(mylist)
-    index_pd = pd.Index(mylist)
-    got = index.get_slice_bound(label, side)
-    expect = index_pd.get_slice_bound(label, side)
-    assert got == expect
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [pd.Timestamp("2018-01-01"), pd.Timestamp("2019-01-31"), None],
-        [1, 2, 3, None],
-        [None, 1, 2, 3],
-        ["a", "b", "c", None],
-        [None, "a", "b", "c"],
-    ],
-)
-def test_is_monotonic_always_falls_for_null(data):
-    ser = Series(data)
-    assert ser.is_monotonic_increasing is False
-    assert ser.is_monotonic_decreasing is False
-
-
-@pytest.mark.parametrize("box", [Series, Index])
-@pytest.mark.parametrize(
-    "value,na_like",
-    [
-        [1, None],
-        [np.datetime64("2020-01-01", "ns"), np.datetime64("nat", "ns")],
-        ["s", None],
-        [1.0, np.nan],
-    ],
-    ids=repr,
-)
-def test_is_unique(box, value, na_like):
-    obj = box([value], nan_as_null=False)
-    assert obj.is_unique
-
-    obj = box([value, value], nan_as_null=False)
-    assert not obj.is_unique
-
-    obj = box([None, value], nan_as_null=False)
-    assert obj.is_unique
-
-    obj = box([None, None, value], nan_as_null=False)
-    assert not obj.is_unique
-
-    if na_like is not None:
-        obj = box([na_like, value], nan_as_null=False)
-        assert obj.is_unique
-
-        obj = box([na_like, na_like], nan_as_null=False)
-        assert not obj.is_unique
-
-        try:
-            if not np.isnat(na_like):
-                # pyarrow coerces nat to null
-                obj = box([None, na_like, value], nan_as_null=False)
-                assert obj.is_unique
-        except TypeError:
-            pass

From fc762b899c2ccfdb676169e94996f3275642c53f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Aug 2025 13:57:57 -0700
Subject: [PATCH 094/366] Move test_rolling/ewm.py to new cudf classic
 directory structure (#19611)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19611
---
 python/cudf/cudf/tests/test_rolling.py        | 538 ------------------
 .../cudf/cudf/tests/{ => window}/test_ewm.py  |   8 +-
 python/cudf/cudf/tests/window/test_rolling.py | 538 +++++++++++++++++-
 3 files changed, 540 insertions(+), 544 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/test_rolling.py
 rename python/cudf/cudf/tests/{ => window}/test_ewm.py (85%)

diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
deleted file mode 100644
index 30958f29a8f..00000000000
--- a/python/cudf/cudf/tests/test_rolling.py
+++ /dev/null
@@ -1,538 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-
-import math
-import pickle
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-from cudf.testing.dataset_generator import rand_dataframe
-
-
-@pytest.mark.parametrize(
-    "data,index",
-    [
-        ([], []),
-        ([1, 1, 1, 1], None),
-        ([1, 2, 3, 4], pd.date_range("2001-01-01", "2001-01-04")),
-        ([1, 2, 4, 9, 9, 4], ["a", "b", "c", "d", "e", "f"]),
-    ],
-)
-@pytest.mark.parametrize(
-    "agg", ["sum", "min", "max", "mean", "count", "std", "var"]
-)
-@pytest.mark.parametrize("nulls", ["none", "one", "some", "all"])
-@pytest.mark.parametrize("center", [True, False])
-def test_rolling_series_basic(data, index, agg, nulls, center):
-    rng = np.random.default_rng(1)
-
-    if len(data) > 0:
-        if nulls == "one":
-            p = rng.integers(0, len(data))
-            data[p] = np.nan
-        elif nulls == "some":
-            p1, p2 = rng.integers(0, len(data), (2,))
-            data[p1] = np.nan
-            data[p2] = np.nan
-        elif nulls == "all":
-            data = [np.nan] * len(data)
-
-    psr = pd.Series(data, index=index)
-    gsr = cudf.from_pandas(psr)
-    for window_size in range(1, len(data) + 1):
-        for min_periods in range(1, window_size + 1):
-            expect = getattr(
-                psr.rolling(window_size, min_periods, center), agg
-            )().fillna(-1)
-            got = getattr(
-                gsr.rolling(window_size, min_periods, center), agg
-            )().fillna(-1)
-            assert_eq(expect, got, check_dtype=False, check_freq=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [], "b": []},
-        {"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]},
-        {"a": [1, 2, 4, 9, 9, 4], "b": [1, 2, 4, 9, 9, 4]},
-        {
-            "a": np.array([1, 2, 4, 9, 9, 4]),
-            "b": np.array([1.5, 2.2, 2.2, 8.0, 9.1, 4.2]),
-        },
-    ],
-)
-@pytest.mark.parametrize(
-    "agg", ["sum", "min", "max", "mean", "count", "std", "var"]
-)
-@pytest.mark.parametrize("nulls", ["none", "one", "some", "all"])
-@pytest.mark.parametrize("center", [True, False])
-def test_rolling_dataframe_basic(data, agg, nulls, center):
-    rng = np.random.default_rng(0)
-    pdf = pd.DataFrame(data)
-
-    if len(pdf) > 0:
-        if nulls == "all":
-            pdf = pd.DataFrame(np.nan, columns=pdf.columns, index=pdf.index)
-        else:
-            for col_idx in range(len(pdf.columns)):
-                if nulls == "one":
-                    p = rng.integers(0, len(data))
-                    pdf.iloc[p, col_idx] = np.nan
-                elif nulls == "some":
-                    p1, p2 = rng.integers(0, len(data), (2,))
-                    pdf.iloc[p1, col_idx] = np.nan
-                    pdf.iloc[p2, col_idx] = np.nan
-
-    gdf = cudf.from_pandas(pdf)
-    for window_size in range(1, len(data) + 1):
-        for min_periods in range(1, window_size + 1):
-            expect = getattr(
-                pdf.rolling(window_size, min_periods, center), agg
-            )().fillna(-1)
-            got = getattr(
-                gdf.rolling(window_size, min_periods, center), agg
-            )().fillna(-1)
-            assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "agg",
-    [
-        "sum",
-        "min",
-        "max",
-        "mean",
-        "count",
-        "std",
-        "var",
-    ],
-)
-def test_rolling_with_offset(agg):
-    psr = pd.Series(
-        [1, 2, 4, 4, np.nan, 9],
-        index=[
-            pd.Timestamp("20190101 09:00:00"),
-            pd.Timestamp("20190101 09:00:01"),
-            pd.Timestamp("20190101 09:00:02"),
-            pd.Timestamp("20190101 09:00:04"),
-            pd.Timestamp("20190101 09:00:07"),
-            pd.Timestamp("20190101 09:00:08"),
-        ],
-    )
-    gsr = cudf.from_pandas(psr)
-    assert_eq(
-        getattr(psr.rolling("2s"), agg)().fillna(-1),
-        getattr(gsr.rolling("2s"), agg)().fillna(-1),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize("agg", ["std", "var"])
-@pytest.mark.parametrize("ddof", [0, 1])
-@pytest.mark.parametrize("center", [True, False])
-@pytest.mark.parametrize("window_size", [2, 10, 100])
-def test_rolling_var_std_large(agg, ddof, center, window_size):
-    iupper_bound = math.sqrt(np.iinfo(np.int64).max / window_size)
-    ilower_bound = -math.sqrt(abs(np.iinfo(np.int64).min) / window_size)
-
-    fupper_bound = math.sqrt(np.finfo(np.float64).max / window_size)
-    flower_bound = -math.sqrt(abs(np.finfo(np.float64).min) / window_size)
-
-    n_rows = 1_000
-    data = rand_dataframe(
-        dtypes_meta=[
-            {
-                "dtype": "int64",
-                "null_frequency": 0.4,
-                "cardinality": n_rows,
-                "min_bound": ilower_bound,
-                "max_bound": iupper_bound,
-            },
-            {
-                "dtype": "float64",
-                "null_frequency": 0.4,
-                "cardinality": n_rows,
-                "min_bound": flower_bound,
-                "max_bound": fupper_bound,
-            },
-            {
-                "dtype": "decimal64",
-                "null_frequency": 0.4,
-                "cardinality": n_rows,
-                "min_bound": ilower_bound,
-                "max_bound": iupper_bound,
-            },
-        ],
-        rows=n_rows,
-        use_threads=False,
-        seed=100,
-    )
-    pdf = data.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-
-    expect = getattr(pdf.rolling(window_size, 1, center), agg)(ddof=ddof)
-    got = getattr(gdf.rolling(window_size, 1, center), agg)(ddof=ddof)
-
-    import platform
-
-    if platform.machine() == "aarch64":
-        # Due to pandas-37051, pandas rolling var/std on uniform window is
-        # not reliable. Skipping these rows when comparing.
-        for col in expect:
-            mask = (got[col].fillna(-1) != 0).to_pandas()
-            expect[col] = expect[col][mask]
-            got[col] = got[col][mask]
-            assert_eq(expect[col], got[col], check_freq=False)
-    else:
-        assert_eq(expect, got, check_freq=False)
-
-
-def test_rolling_var_uniform_window():
-    """
-    Pandas adopts an online variance calculation algorithm. This gives a
-    floating point artifact.
-
-    In cudf, each window is computed independently from the previous window,
-    this gives better numeric precision.
-    """
-
-    s = pd.Series([1e8, 5, 5, 5])
-    expected = s.rolling(3).var()
-    got = cudf.from_pandas(s).rolling(3).var()
-
-    assert_eq(expected, got)
-
-
-def test_rolling_count_with_offset():
-    """
-    This test covers the xfail case from test_rolling_with_offset["count"].
-    It is expected that count should return a non-Nan value, even if
-    the counted value is a Nan, unless the min-periods condition
-    is not met.
-    This behaviour is consistent with counts for rolling-windows,
-    in the non-offset window case.
-    """
-    psr = pd.Series(
-        [1, 2, 4, 4, np.nan, 9],
-        index=[
-            pd.Timestamp("20190101 09:00:00"),
-            pd.Timestamp("20190101 09:00:01"),
-            pd.Timestamp("20190101 09:00:02"),
-            pd.Timestamp("20190101 09:00:04"),
-            pd.Timestamp("20190101 09:00:07"),
-            pd.Timestamp("20190101 09:00:08"),
-        ],
-    )
-    gsr = cudf.from_pandas(psr)
-    assert_eq(
-        getattr(gsr.rolling("2s"), "count")().fillna(-1),
-        pd.Series(
-            [1, 2, 2, 1, 0, 1],
-            index=[
-                pd.Timestamp("20190101 09:00:00"),
-                pd.Timestamp("20190101 09:00:01"),
-                pd.Timestamp("20190101 09:00:02"),
-                pd.Timestamp("20190101 09:00:04"),
-                pd.Timestamp("20190101 09:00:07"),
-                pd.Timestamp("20190101 09:00:08"),
-            ],
-        ),
-        check_dtype=False,
-    )
-
-
-def test_rolling_getattr():
-    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(
-        pdf.rolling(2).a.sum().fillna(-1),
-        gdf.rolling(2).a.sum().fillna(-1),
-        check_dtype=False,
-    )
-
-
-def test_rolling_getitem():
-    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(
-        pdf.rolling(2)["a"].sum().fillna(-1),
-        gdf.rolling(2)["a"].sum().fillna(-1),
-        check_dtype=False,
-    )
-    assert_eq(
-        pdf.rolling(2)["a", "b"].sum().fillna(-1),
-        gdf.rolling(2)["a", "b"].sum().fillna(-1),
-        check_dtype=False,
-    )
-    assert_eq(
-        pdf.rolling(2)[["a", "b"]].sum().fillna(-1),
-        gdf.rolling(2)["a", "b"].sum().fillna(-1),
-        check_dtype=False,
-    )
-
-
-def test_rolling_getitem_window():
-    index = pd.DatetimeIndex(
-        pd.date_range("2000-01-01", "2000-01-02", freq="1h")
-    )
-    pdf = pd.DataFrame({"x": np.arange(len(index))}, index=index)
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(
-        pdf.rolling("2h").x.mean(),
-        gdf.rolling("2h").x.mean(),
-        check_freq=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "data,index", [([1.2, 4.5, 5.9, 2.4, 9.3, 7.1], None), ([], [])]
-)
-@pytest.mark.parametrize("center", [True, False])
-def test_rollling_series_numba_udf_basic(data, index, center):
-    psr = pd.Series(data, index=index)
-    gsr = cudf.from_pandas(psr)
-
-    def some_func(A):
-        b = 0
-        for a in A:
-            b = max(b, math.sqrt(a))
-        return b
-
-    for window_size in range(1, len(data) + 1):
-        for min_periods in range(1, window_size + 1):
-            assert_eq(
-                psr.rolling(window_size, min_periods, center)
-                .apply(some_func)
-                .fillna(-1),
-                gsr.rolling(window_size, min_periods, center)
-                .apply(some_func)
-                .fillna(-1),
-                check_dtype=False,
-            )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [], "b": []},
-        {"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]},
-        {"a": [1, 2, 4, 9, 9, 4], "b": [1, 2, 4, 9, 9, 4]},
-        {
-            "a": np.array([1, 2, 4, 9, 9, 4]),
-            "b": np.array([1.5, 2.2, 2.2, 8.0, 9.1, 4.2]),
-        },
-    ],
-)
-@pytest.mark.parametrize("center", [True, False])
-def test_rolling_dataframe_numba_udf_basic(data, center):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    def some_func(A):
-        b = 0
-        for a in A:
-            b = b + a**2
-        return b / len(A)
-
-    for window_size in range(1, len(data) + 1):
-        for min_periods in range(1, window_size + 1):
-            assert_eq(
-                pdf.rolling(window_size, min_periods, center)
-                .apply(some_func)
-                .fillna(-1),
-                gdf.rolling(window_size, min_periods, center)
-                .apply(some_func)
-                .fillna(-1),
-                check_dtype=False,
-            )
-
-
-def test_rolling_numba_udf_with_offset():
-    psr = pd.Series(
-        [1, 2, 4, 4, 8, 9],
-        index=[
-            pd.Timestamp("20190101 09:00:00"),
-            pd.Timestamp("20190101 09:00:01"),
-            pd.Timestamp("20190101 09:00:02"),
-            pd.Timestamp("20190101 09:00:04"),
-            pd.Timestamp("20190101 09:00:07"),
-            pd.Timestamp("20190101 09:00:08"),
-        ],
-    )
-    gsr = cudf.from_pandas(psr)
-
-    def some_func(A):
-        b = 0
-        for a in A:
-            b = b + a
-        return b / len(A)
-
-    assert_eq(
-        psr.rolling("2s").apply(some_func).fillna(-1),
-        gsr.rolling("2s").apply(some_func).fillna(-1),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "agg", ["sum", "min", "max", "mean", "count", "var", "std"]
-)
-def test_rolling_groupby_simple(agg):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
-            "b": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    for window_size in range(1, len(pdf) + 1):
-        expect = getattr(pdf.groupby("a").rolling(window_size), agg)().fillna(
-            -1
-        )
-        got = getattr(gdf.groupby("a").rolling(window_size), agg)().fillna(-1)
-        assert_eq(expect, got, check_dtype=False)
-
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 1, 2, 2], "b": [1, 1, 2, 2, 3], "c": [1, 2, 3, 4, 5]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    for window_size in range(1, len(pdf) + 1):
-        expect = getattr(pdf.groupby("a").rolling(window_size), agg)().fillna(
-            -1
-        )
-        got = getattr(gdf.groupby("a").rolling(window_size), agg)().fillna(-1)
-        assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "agg", ["sum", "min", "max", "mean", "count", "var", "std"]
-)
-def test_rolling_groupby_multi(agg):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
-            "b": [0, 0, 1, 1, 0, 1, 2, 1, 1, 0],
-            "c": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    for window_size in range(1, len(pdf) + 1):
-        expect = getattr(
-            pdf.groupby(["a", "b"], sort=True).rolling(window_size), agg
-        )().fillna(-1)
-        got = getattr(
-            gdf.groupby(["a", "b"], sort=True).rolling(window_size), agg
-        )().fillna(-1)
-        assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "agg", ["sum", "min", "max", "mean", "count", "var", "std"]
-)
-@pytest.mark.parametrize(
-    "window_size", ["1d", "2d", "3d", "4d", "5d", "6d", "7d"]
-)
-def test_rolling_groupby_offset(agg, window_size):
-    pdf = pd.DataFrame(
-        {
-            "date": pd.date_range(start="2016-01-01", periods=7, freq="D"),
-            "group": [1, 2, 2, 1, 1, 2, 1],
-            "val": [5, 6, 7, 8, 1, 2, 3],
-        }
-    ).set_index("date")
-    gdf = cudf.from_pandas(pdf)
-    expect = getattr(pdf.groupby("group").rolling(window_size), agg)().fillna(
-        -1
-    )
-    got = getattr(gdf.groupby("group").rolling(window_size), agg)().fillna(-1)
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_rolling_custom_index_support():
-    from pandas.api.indexers import BaseIndexer
-
-    class CustomIndexer(BaseIndexer):
-        def get_window_bounds(
-            self, num_values, min_periods, center, closed, step=None
-        ):
-            start = np.empty(num_values, dtype=np.int64)
-            end = np.empty(num_values, dtype=np.int64)
-
-            for i in range(num_values):
-                if self.use_expanding[i]:
-                    start[i] = 0
-                    end[i] = i + 1
-                else:
-                    start[i] = i
-                    end[i] = i + self.window_size
-
-            return start, end
-
-    use_expanding = [True, False, True, False, True]
-    indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
-
-    df = pd.DataFrame({"values": range(5)})
-    gdf = cudf.from_pandas(df)
-
-    expected = df.rolling(window=indexer).sum()
-    actual = gdf.rolling(window=indexer).sum()
-
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "indexer",
-    [
-        pd.api.indexers.FixedForwardWindowIndexer(window_size=2),
-        pd.api.indexers.VariableOffsetWindowIndexer(
-            index=pd.date_range("2020", periods=5), offset=pd.offsets.BDay(1)
-        ),
-    ],
-)
-def test_rolling_indexer_support(indexer):
-    df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
-    gdf = cudf.from_pandas(df)
-
-    expected = df.rolling(window=indexer, min_periods=2).sum()
-    actual = gdf.rolling(window=indexer, min_periods=2).sum()
-
-    assert_eq(expected, actual)
-
-
-def test_rolling_series():
-    df = cudf.DataFrame({"a": range(0, 100), "b": [10, 20, 30, 40, 50] * 20})
-    pdf = df.to_pandas()
-
-    expected = pdf.groupby("b")["a"].rolling(5).mean()
-    actual = df.groupby("b")["a"].rolling(5).mean()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("klass", ["DataFrame", "Series"])
-def test_pandas_compat_int_nan_min_periods(klass):
-    data = [None, 1, 2, None, 4, 6, 11]
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(cudf, klass)(data).rolling(2, min_periods=1).sum()
-    expected = getattr(pd, klass)(data).rolling(2, min_periods=1).sum()
-    assert_eq(result, expected)
-
-    result = getattr(cudf, klass)(data).rolling(2, min_periods=1).sum()
-    expected = getattr(cudf, klass)([None, 1, 3, 2, 4, 10, 17])
-    assert_eq(result, expected)
-
-
-def test_groupby_rolling_pickleable():
-    df = cudf.DataFrame({"a": [1, 1, 2], "b": [1, 2, 3]})
-    gb_rolling = pickle.loads(pickle.dumps(df.groupby("a").rolling(2)))
-    assert_eq(gb_rolling.obj, cudf.DataFrame({"b": [1, 2, 3]}))
diff --git a/python/cudf/cudf/tests/test_ewm.py b/python/cudf/cudf/tests/window/test_ewm.py
similarity index 85%
rename from python/cudf/cudf/tests/test_ewm.py
rename to python/cudf/cudf/tests/window/test_ewm.py
index 6cb3c19d5a8..6d7734c5c00 100644
--- a/python/cudf/cudf/tests/test_ewm.py
+++ b/python/cudf/cudf/tests/window/test_ewm.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 import pytest
 
 import cudf
@@ -35,12 +35,10 @@ def test_ewma(data, params, adjust):
     sets of keyword arguemnts that effect the raw
     coefficients of the formula
     """
-    params["adjust"] = adjust
-
     gsr = cudf.Series(data, dtype="float64")
     psr = gsr.to_pandas()
 
-    expect = psr.ewm(**params).mean()
-    got = gsr.ewm(**params).mean()
+    expect = psr.ewm(**params, adjust=adjust).mean()
+    got = gsr.ewm(**params, adjust=adjust).mean()
 
     assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/window/test_rolling.py b/python/cudf/cudf/tests/window/test_rolling.py
index 06777c8e6af..4af4932b9ca 100644
--- a/python/cudf/cudf/tests/window/test_rolling.py
+++ b/python/cudf/cudf/tests/window/test_rolling.py
@@ -1 +1,537 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import math
+import pickle
+import platform
+
+import numpy as np
+import pandas as pd
+import pytest
+from pandas.api.indexers import BaseIndexer
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing.dataset_generator import rand_dataframe
+
+
+@pytest.fixture(params=[True, False])
+def center(request):
+    return request.param
+
+
+@pytest.fixture
+def supported_rolling_reductions(reduction_methods):
+    if reduction_methods in [
+        "product",
+        "quantile",
+        "all",
+        "any",
+        "median",
+        "kurtosis",
+        "skew",
+    ]:
+        pytest.skip(f"{reduction_methods} not implemented")
+    return reduction_methods
+
+
+@pytest.mark.parametrize(
+    "data,index",
+    [
+        ([], []),
+        ([1, 1, 1, 1], None),
+        ([1, 2, 3, 4], pd.date_range("2001-01-01", "2001-01-04")),
+        ([1, 2, 4, 9, 9, 4], ["a", "b", "c", "d", "e", "f"]),
+    ],
+)
+@pytest.mark.parametrize("nulls", ["none", "one", "some", "all"])
+def test_rolling_series_basic(
+    data, index, supported_rolling_reductions, nulls, center, request
+):
+    rng = np.random.default_rng(1)
+
+    if len(data) > 0:
+        if nulls == "one":
+            p = rng.integers(0, len(data))
+            data[p] = np.nan
+        elif nulls == "some":
+            p1, p2 = rng.integers(0, len(data), (2,))
+            data[p1] = np.nan
+            data[p2] = np.nan
+        elif nulls == "all":
+            data = [np.nan] * len(data)
+
+    psr = pd.Series(data, index=index)
+    gsr = cudf.from_pandas(psr)
+    for window_size in range(1, len(data) + 1):
+        for min_periods in range(1, window_size + 1):
+            expect = getattr(
+                psr.rolling(window_size, min_periods, center),
+                supported_rolling_reductions,
+            )().fillna(-1)
+            got = getattr(
+                gsr.rolling(window_size, min_periods, center),
+                supported_rolling_reductions,
+            )().fillna(-1)
+            assert_eq(expect, got, check_dtype=False, check_freq=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [], "b": []},
+        {"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]},
+        {"a": [1, 2, 4, 9, 9, 4], "b": [1, 2, 4, 9, 9, 4]},
+        {
+            "a": np.array([1, 2, 4, 9, 9, 4]),
+            "b": np.array([1.5, 2.2, 2.2, 8.0, 9.1, 4.2]),
+        },
+    ],
+)
+@pytest.mark.parametrize("nulls", ["none", "one", "some", "all"])
+def test_rolling_dataframe_basic(
+    data, supported_rolling_reductions, nulls, center, request
+):
+    rng = np.random.default_rng(0)
+    pdf = pd.DataFrame(data)
+
+    if len(pdf) > 0:
+        if nulls == "all":
+            pdf = pd.DataFrame(np.nan, columns=pdf.columns, index=pdf.index)
+        else:
+            for col_idx in range(len(pdf.columns)):
+                if nulls == "one":
+                    p = rng.integers(0, len(data))
+                    pdf.iloc[p, col_idx] = np.nan
+                elif nulls == "some":
+                    p1, p2 = rng.integers(0, len(data), (2,))
+                    pdf.iloc[p1, col_idx] = np.nan
+                    pdf.iloc[p2, col_idx] = np.nan
+
+    gdf = cudf.from_pandas(pdf)
+    for window_size in range(1, len(data) + 1):
+        for min_periods in range(1, window_size + 1):
+            expect = getattr(
+                pdf.rolling(window_size, min_periods, center),
+                supported_rolling_reductions,
+            )().fillna(-1)
+            got = getattr(
+                gdf.rolling(window_size, min_periods, center),
+                supported_rolling_reductions,
+            )().fillna(-1)
+            assert_eq(expect, got, check_dtype=False)
+
+
+def test_rolling_with_offset(supported_rolling_reductions):
+    psr = pd.Series(
+        [1, 2, 4, 4, np.nan, 9],
+        index=[
+            pd.Timestamp("20190101 09:00:00"),
+            pd.Timestamp("20190101 09:00:01"),
+            pd.Timestamp("20190101 09:00:02"),
+            pd.Timestamp("20190101 09:00:04"),
+            pd.Timestamp("20190101 09:00:07"),
+            pd.Timestamp("20190101 09:00:08"),
+        ],
+    )
+    gsr = cudf.from_pandas(psr)
+    assert_eq(
+        getattr(psr.rolling("2s"), supported_rolling_reductions)().fillna(-1),
+        getattr(gsr.rolling("2s"), supported_rolling_reductions)().fillna(-1),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("agg", ["std", "var"])
+@pytest.mark.parametrize("ddof", [0, 1])
+@pytest.mark.parametrize("window_size", [2, 100])
+def test_rolling_var_std_large(agg, ddof, center, window_size):
+    iupper_bound = math.sqrt(np.iinfo(np.int64).max / window_size)
+    ilower_bound = -math.sqrt(abs(np.iinfo(np.int64).min) / window_size)
+
+    fupper_bound = math.sqrt(np.finfo(np.float64).max / window_size)
+    flower_bound = -math.sqrt(abs(np.finfo(np.float64).min) / window_size)
+
+    n_rows = 1_000
+    data = rand_dataframe(
+        dtypes_meta=[
+            {
+                "dtype": "int64",
+                "null_frequency": 0.4,
+                "cardinality": n_rows,
+                "min_bound": ilower_bound,
+                "max_bound": iupper_bound,
+            },
+            {
+                "dtype": "float64",
+                "null_frequency": 0.4,
+                "cardinality": n_rows,
+                "min_bound": flower_bound,
+                "max_bound": fupper_bound,
+            },
+            {
+                "dtype": "decimal64",
+                "null_frequency": 0.4,
+                "cardinality": n_rows,
+                "min_bound": ilower_bound,
+                "max_bound": iupper_bound,
+            },
+        ],
+        rows=n_rows,
+        use_threads=False,
+        seed=100,
+    )
+    pdf = data.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+
+    expect = getattr(pdf.rolling(window_size, 1, center), agg)(ddof=ddof)
+    got = getattr(gdf.rolling(window_size, 1, center), agg)(ddof=ddof)
+
+    if platform.machine() == "aarch64":
+        # Due to pandas-37051, pandas rolling var/std on uniform window is
+        # not reliable. Skipping these rows when comparing.
+        for col in expect:
+            mask = (got[col].fillna(-1) != 0).to_pandas()
+            expect[col] = expect[col][mask]
+            got[col] = got[col][mask]
+            assert_eq(expect[col], got[col], check_freq=False)
+    else:
+        assert_eq(expect, got, check_freq=False)
+
+
+def test_rolling_var_uniform_window():
+    """
+    Pandas adopts an online variance calculation algorithm. This gives a
+    floating point artifact.
+
+    In cudf, each window is computed independently from the previous window,
+    this gives better numeric precision.
+    """
+
+    s = pd.Series([1e8, 5, 5, 5])
+    expected = s.rolling(3).var()
+    got = cudf.from_pandas(s).rolling(3).var()
+
+    assert_eq(expected, got)
+
+
+def test_rolling_count_with_offset():
+    """
+    This test covers the xfail case from test_rolling_with_offset["count"].
+    It is expected that count should return a non-Nan value, even if
+    the counted value is a Nan, unless the min-periods condition
+    is not met.
+    This behaviour is consistent with counts for rolling-windows,
+    in the non-offset window case.
+    """
+    psr = pd.Series(
+        [1, 2, 4, 4, np.nan, 9],
+        index=[
+            pd.Timestamp("20190101 09:00:00"),
+            pd.Timestamp("20190101 09:00:01"),
+            pd.Timestamp("20190101 09:00:02"),
+            pd.Timestamp("20190101 09:00:04"),
+            pd.Timestamp("20190101 09:00:07"),
+            pd.Timestamp("20190101 09:00:08"),
+        ],
+    )
+    gsr = cudf.from_pandas(psr)
+    assert_eq(
+        getattr(gsr.rolling("2s"), "count")().fillna(-1),
+        pd.Series(
+            [1, 2, 2, 1, 0, 1],
+            index=[
+                pd.Timestamp("20190101 09:00:00"),
+                pd.Timestamp("20190101 09:00:01"),
+                pd.Timestamp("20190101 09:00:02"),
+                pd.Timestamp("20190101 09:00:04"),
+                pd.Timestamp("20190101 09:00:07"),
+                pd.Timestamp("20190101 09:00:08"),
+            ],
+        ),
+        check_dtype=False,
+    )
+
+
+def test_rolling_getattr():
+    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(
+        pdf.rolling(2).a.sum().fillna(-1),
+        gdf.rolling(2).a.sum().fillna(-1),
+        check_dtype=False,
+    )
+
+
+def test_rolling_getitem():
+    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(
+        pdf.rolling(2)["a"].sum().fillna(-1),
+        gdf.rolling(2)["a"].sum().fillna(-1),
+        check_dtype=False,
+    )
+    assert_eq(
+        pdf.rolling(2)["a", "b"].sum().fillna(-1),
+        gdf.rolling(2)["a", "b"].sum().fillna(-1),
+        check_dtype=False,
+    )
+    assert_eq(
+        pdf.rolling(2)[["a", "b"]].sum().fillna(-1),
+        gdf.rolling(2)["a", "b"].sum().fillna(-1),
+        check_dtype=False,
+    )
+
+
+def test_rolling_getitem_window():
+    index = pd.DatetimeIndex(
+        pd.date_range("2000-01-01", "2000-01-02", freq="1h")
+    )
+    pdf = pd.DataFrame({"x": np.arange(len(index))}, index=index)
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(
+        pdf.rolling("2h").x.mean(),
+        gdf.rolling("2h").x.mean(),
+        check_freq=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data,index", [([1.2, 4.5, 5.9, 2.4, 9.3, 7.1], None), ([], [])]
+)
+def test_rollling_series_numba_udf_basic(data, index, center):
+    psr = pd.Series(data, index=index)
+    gsr = cudf.from_pandas(psr)
+
+    def some_func(A):
+        b = 0
+        for a in A:
+            b = max(b, math.sqrt(a))
+        return b
+
+    for window_size in range(1, len(data) + 1):
+        for min_periods in range(1, window_size + 1):
+            assert_eq(
+                psr.rolling(window_size, min_periods, center)
+                .apply(some_func)
+                .fillna(-1),
+                gsr.rolling(window_size, min_periods, center)
+                .apply(some_func)
+                .fillna(-1),
+                check_dtype=False,
+            )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [], "b": []},
+        {"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]},
+        {"a": [1, 2, 4, 9, 9, 4], "b": [1, 2, 4, 9, 9, 4]},
+        {
+            "a": np.array([1, 2, 4, 9, 9, 4]),
+            "b": np.array([1.5, 2.2, 2.2, 8.0, 9.1, 4.2]),
+        },
+    ],
+)
+def test_rolling_dataframe_numba_udf_basic(data, center):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    def some_func(A):
+        b = 0
+        for a in A:
+            b = b + a**2
+        return b / len(A)
+
+    for window_size in range(1, len(data) + 1):
+        for min_periods in range(1, window_size + 1):
+            assert_eq(
+                pdf.rolling(window_size, min_periods, center)
+                .apply(some_func)
+                .fillna(-1),
+                gdf.rolling(window_size, min_periods, center)
+                .apply(some_func)
+                .fillna(-1),
+                check_dtype=False,
+            )
+
+
+def test_rolling_numba_udf_with_offset():
+    psr = pd.Series(
+        [1, 2, 4, 4, 8, 9],
+        index=[
+            pd.Timestamp("20190101 09:00:00"),
+            pd.Timestamp("20190101 09:00:01"),
+            pd.Timestamp("20190101 09:00:02"),
+            pd.Timestamp("20190101 09:00:04"),
+            pd.Timestamp("20190101 09:00:07"),
+            pd.Timestamp("20190101 09:00:08"),
+        ],
+    )
+    gsr = cudf.from_pandas(psr)
+
+    def some_func(A):
+        b = 0
+        for a in A:
+            b = b + a
+        return b / len(A)
+
+    assert_eq(
+        psr.rolling("2s").apply(some_func).fillna(-1),
+        gsr.rolling("2s").apply(some_func).fillna(-1),
+        check_dtype=False,
+    )
+
+
+def test_rolling_groupby_simple(supported_rolling_reductions):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
+            "b": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    for window_size in range(1, len(pdf) + 1):
+        expect = getattr(
+            pdf.groupby("a").rolling(window_size), supported_rolling_reductions
+        )().fillna(-1)
+        got = getattr(
+            gdf.groupby("a").rolling(window_size), supported_rolling_reductions
+        )().fillna(-1)
+        assert_eq(expect, got, check_dtype=False)
+
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 1, 2, 2], "b": [1, 1, 2, 2, 3], "c": [1, 2, 3, 4, 5]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    for window_size in range(1, len(pdf) + 1):
+        expect = getattr(
+            pdf.groupby("a").rolling(window_size), supported_rolling_reductions
+        )().fillna(-1)
+        got = getattr(
+            gdf.groupby("a").rolling(window_size), supported_rolling_reductions
+        )().fillna(-1)
+        assert_eq(expect, got, check_dtype=False)
+
+
+def test_rolling_groupby_multi(supported_rolling_reductions):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
+            "b": [0, 0, 1, 1, 0, 1, 2, 1, 1, 0],
+            "c": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    for window_size in range(1, len(pdf) + 1):
+        expect = getattr(
+            pdf.groupby(["a", "b"], sort=True).rolling(window_size),
+            supported_rolling_reductions,
+        )().fillna(-1)
+        got = getattr(
+            gdf.groupby(["a", "b"], sort=True).rolling(window_size),
+            supported_rolling_reductions,
+        )().fillna(-1)
+        assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize("window_size", ["1d", "3d", "6d", "7d"])
+def test_rolling_groupby_offset(supported_rolling_reductions, window_size):
+    pdf = pd.DataFrame(
+        {
+            "date": pd.date_range(start="2016-01-01", periods=7, freq="D"),
+            "group": [1, 2, 2, 1, 1, 2, 1],
+            "val": [5, 6, 7, 8, 1, 2, 3],
+        }
+    ).set_index("date")
+    gdf = cudf.from_pandas(pdf)
+    expect = getattr(
+        pdf.groupby("group").rolling(window_size), supported_rolling_reductions
+    )().fillna(-1)
+    got = getattr(
+        gdf.groupby("group").rolling(window_size), supported_rolling_reductions
+    )().fillna(-1)
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_rolling_custom_index_support():
+    class CustomIndexer(BaseIndexer):
+        def get_window_bounds(
+            self, num_values, min_periods, center, closed, step=None
+        ):
+            start = np.empty(num_values, dtype=np.int64)
+            end = np.empty(num_values, dtype=np.int64)
+
+            for i in range(num_values):
+                if self.use_expanding[i]:
+                    start[i] = 0
+                    end[i] = i + 1
+                else:
+                    start[i] = i
+                    end[i] = i + self.window_size
+
+            return start, end
+
+    use_expanding = [True, False, True, False, True]
+    indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
+
+    df = pd.DataFrame({"values": range(5)})
+    gdf = cudf.from_pandas(df)
+
+    expected = df.rolling(window=indexer).sum()
+    actual = gdf.rolling(window=indexer).sum()
+
+    assert_eq(expected, actual, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [
+        pd.api.indexers.FixedForwardWindowIndexer(window_size=2),
+        pd.api.indexers.VariableOffsetWindowIndexer(
+            index=pd.date_range("2020", periods=5), offset=pd.offsets.BDay(1)
+        ),
+    ],
+)
+def test_rolling_indexer_support(indexer):
+    df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+    gdf = cudf.from_pandas(df)
+
+    expected = df.rolling(window=indexer, min_periods=2).sum()
+    actual = gdf.rolling(window=indexer, min_periods=2).sum()
+
+    assert_eq(expected, actual)
+
+
+def test_rolling_series():
+    df = cudf.DataFrame({"a": range(0, 100), "b": [10, 20, 30, 40, 50] * 20})
+    pdf = df.to_pandas()
+
+    expected = pdf.groupby("b")["a"].rolling(5).mean()
+    actual = df.groupby("b")["a"].rolling(5).mean()
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("klass", ["DataFrame", "Series"])
+def test_pandas_compat_int_nan_min_periods(klass):
+    data = [None, 1, 2, None, 4, 6, 11]
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(cudf, klass)(data).rolling(2, min_periods=1).sum()
+    expected = getattr(pd, klass)(data).rolling(2, min_periods=1).sum()
+    assert_eq(result, expected)
+
+    result = getattr(cudf, klass)(data).rolling(2, min_periods=1).sum()
+    expected = getattr(cudf, klass)([None, 1, 3, 2, 4, 10, 17])
+    assert_eq(result, expected)
+
+
+def test_groupby_rolling_pickleable():
+    df = cudf.DataFrame({"a": [1, 1, 2], "b": [1, 2, 3]})
+    gb_rolling = pickle.loads(pickle.dumps(df.groupby("a").rolling(2)))
+    assert_eq(gb_rolling.obj, cudf.DataFrame({"b": [1, 2, 3]}))

From f9d6eccb2c65bffa7fc3941d602de572a56e6516 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Aug 2025 15:16:26 -0700
Subject: [PATCH 095/366] Use more pytest fixtures and avoid GPU
 parameterization in test_binops/column/column_accessor/contains.py and more
 (#19473)

Towards https://github.com/rapidsai/cudf/issues/9999

* Use more pytest fixtures
* Avoids pytest.mark.parametrize with GPU objects
* Eliminate/reduce parameterizations of input size

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19473
---
 python/cudf/cudf/tests/conftest.py            |  76 ++
 python/cudf/cudf/tests/test_binops.py         | 971 +++++++-----------
 python/cudf/cudf/tests/test_buffer.py         |  49 +-
 python/cudf/cudf/tests/test_categorical.py    | 152 ++-
 python/cudf/cudf/tests/test_column.py         |  54 +-
 .../cudf/cudf/tests/test_column_accessor.py   |  53 +-
 python/cudf/cudf/tests/test_contains.py       |  97 +-
 python/cudf/cudf/tests/test_copying.py        |  70 +-
 8 files changed, 666 insertions(+), 856 deletions(-)

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index b3b48d19538..7a8a6c3881e 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import itertools
+import operator
 import os
 import pathlib
 
@@ -192,6 +193,69 @@ def set_decomp_env_vars(monkeypatch, request):
         yield
 
 
+arithmetic_ops = [
+    operator.add,
+    operator.sub,
+    operator.mul,
+    operator.floordiv,
+    operator.truediv,
+    operator.mod,
+    operator.pow,
+]
+comparison_ops = [
+    operator.eq,
+    operator.ne,
+    operator.lt,
+    operator.le,
+    operator.gt,
+    operator.ge,
+]
+
+
+@pytest.fixture(params=arithmetic_ops)
+def arithmetic_op(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=itertools.chain.from_iterable(
+        (op.__name__, f"r{op.__name__}") for op in arithmetic_ops
+    )
+)
+def arithmetic_op_method(request):
+    """Arithmetic methods defined on Series/DataFrame"""
+    return request.param
+
+
+@pytest.fixture(params=comparison_ops)
+def comparison_op(request):
+    return request.param
+
+
+@pytest.fixture
+def comparison_op_method(comparison_op):
+    """Comparison methods defined on Series/DataFrame"""
+    return comparison_op.__name__
+
+
+@pytest.fixture(params=arithmetic_ops + comparison_ops)
+def binary_op(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=itertools.chain(
+        itertools.chain.from_iterable(
+            (op.__name__, f"r{op.__name__}") for op in arithmetic_ops
+        ),
+        (op.__name__ for op in comparison_ops),
+    )
+)
+def binary_op_method(request):
+    """Binary methods defined on Series/DataFrame"""
+    return request.param
+
+
 @pytest.fixture(
     params=[
         "min",
@@ -250,6 +314,12 @@ def integer_types_as_str(request):
     return request.param
 
 
+@pytest.fixture
+def integer_types_as_str2(integer_types_as_str):
+    """Used for testing cartesian product of integer_types_as_str"""
+    return integer_types_as_str
+
+
 @pytest.fixture(params=float_types)
 def float_types_as_str(request):
     """
@@ -270,6 +340,12 @@ def numeric_types_as_str(request):
     return request.param
 
 
+@pytest.fixture
+def numeric_types_as_str2(numeric_types_as_str):
+    """Used for testing cartesian product of numeric_types_as_str"""
+    return numeric_types_as_str
+
+
 @pytest.fixture(
     params=signed_integer_types
     + unsigned_integer_types
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 34e568215ae..3a5326fb350 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -2,10 +2,11 @@
 
 import datetime
 import decimal
+import itertools
 import operator
 import re
 import warnings
-from itertools import combinations_with_replacement, product
+from concurrent.futures import ThreadPoolExecutor
 
 import cupy as cp
 import numpy as np
@@ -31,112 +32,12 @@
 )
 
 STRING_TYPES = {"str"}
-
-_binops = [
-    operator.add,
-    operator.sub,
-    operator.mul,
-    operator.floordiv,
-    operator.truediv,
-    operator.mod,
-    operator.pow,
-]
-
-_binops_compare = [
-    operator.eq,
-    operator.ne,
-    operator.lt,
-    operator.le,
-    operator.gt,
-    operator.ge,
-]
-
-_bitwise_binops = [operator.and_, operator.or_, operator.xor]
-
-_int_types = [
-    "int8",
-    "int16",
-    "int32",
-    "int64",
-    "uint8",
-    "uint16",
-    "uint32",
-]
-
-_cmpops = [
-    operator.lt,
-    operator.gt,
-    operator.le,
-    operator.ge,
-    operator.eq,
-    operator.ne,
-]
-
-_reflected_ops = [
-    lambda x: 1 + x,
-    lambda x: 2 * x,
-    lambda x: 2 - x,
-    lambda x: 2 // x,
-    lambda x: 2 / x,
-    lambda x: 3 + x,
-    lambda x: 3 * x,
-    lambda x: 3 - x,
-    lambda x: 3 // x,
-    lambda x: 3 / x,
-    lambda x: 3 % x,
-    lambda x: -1 + x,
-    lambda x: -2 * x,
-    lambda x: -2 - x,
-    lambda x: -2 // x,
-    lambda x: -2 / x,
-    lambda x: -3 + x,
-    lambda x: -3 * x,
-    lambda x: -3 - x,
-    lambda x: -3 // x,
-    lambda x: -3 / x,
-    lambda x: -3 % x,
-    lambda x: 0 + x,
-    lambda x: 0 * x,
-    lambda x: 0 - x,
-    lambda x: 0 // x,
-    lambda x: 0 / x,
-]
-
-_operators_arithmetic = [
-    "add",
-    "radd",
-    "sub",
-    "rsub",
-    "mul",
-    "rmul",
-    "mod",
-    "rmod",
-    "pow",
-    "rpow",
-    "div",
-    "divide",
-    "floordiv",
-    "rfloordiv",
-    "truediv",
-    "rtruediv",
-]
-
-_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"]
-
-
 pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
 
 # If spilling is enabled globally, we skip many test permutations
 # to reduce running time.
 if get_global_manager() is not None:
-    _binops = _binops[:1]
-    _binops_compare = _binops_compare[:1]
-    _int_types = _int_types[-1:]
-    _cmpops = _cmpops[:1]
-    _reflected_ops = _reflected_ops[:1]
-    _operators_arithmetic = _operators_arithmetic[:1]
-    _operators_comparison = _operators_comparison[:1]
     DATETIME_TYPES = {"datetime64[ms]"}
     NUMERIC_TYPES = {"float32"}
     FLOAT_TYPES = {"float64"}
@@ -147,11 +48,10 @@
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("binop", _binops)
-def test_series_binop(request, binop, obj_class):
+def test_series_binop(request, arithmetic_op, obj_class):
     request.applymarker(
         pytest.mark.xfail(
-            binop is operator.floordiv,
+            arithmetic_op is operator.floordiv,
             reason="https://github.com/rapidsai/cudf/issues/17073",
         )
     )
@@ -169,8 +69,8 @@ def test_series_binop(request, binop, obj_class):
         sr1 = Index(sr1)
         sr2 = Index(sr2)
 
-    expect = binop(psr1, psr2)
-    result = binop(sr1, sr2)
+    expect = arithmetic_op(psr1, psr2)
+    result = arithmetic_op(sr1, sr2)
 
     if obj_class == "Index":
         result = Series(result)
@@ -178,28 +78,25 @@ def test_series_binop(request, binop, obj_class):
     assert_eq(result, expect)
 
 
-@pytest.mark.parametrize("binop", _binops)
-def test_series_binop_concurrent(binop):
+def test_series_binop_concurrent(arithmetic_op):
     def func(index):
         rng = np.random.default_rng(seed=0)
         arr = rng.random(100) * 10
         sr = Series(arr)
 
-        result = binop(sr.astype("int32"), sr)
-        expect = binop(arr.astype("int32"), arr)
+        result = arithmetic_op(sr.astype("int32"), sr)
+        expect = arithmetic_op(arr.astype("int32"), arr)
 
         np.testing.assert_almost_equal(result.to_numpy(), expect, decimal=5)
 
-    from concurrent.futures import ThreadPoolExecutor
-
     indices = range(10)
     with ThreadPoolExecutor(4) as e:  # four processes
         list(e.map(func, indices))
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("nelem,binop", list(product([1, 2, 100], _binops)))
-def test_series_binop_scalar(nelem, binop, obj_class):
+def test_series_binop_scalar(arithmetic_op, obj_class):
+    nelem = 10
     rng = np.random.default_rng(seed=0)
     arr = rng.random(nelem)
     rhs = rng.choice(arr).item()
@@ -208,25 +105,24 @@ def test_series_binop_scalar(nelem, binop, obj_class):
     if obj_class == "Index":
         sr = Index(sr)
 
-    result = binop(sr, rhs)
+    result = arithmetic_op(sr, rhs)
 
     if obj_class == "Index":
         result = Series(result)
 
-    np.testing.assert_almost_equal(result.to_numpy(), binop(arr, rhs))
+    np.testing.assert_almost_equal(result.to_numpy(), arithmetic_op(arr, rhs))
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("binop", _bitwise_binops)
-@pytest.mark.parametrize(
-    "lhs_dtype,rhs_dtype", list(product(_int_types, _int_types))
-)
-def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype):
+@pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])
+def test_series_bitwise_binop(
+    binop, obj_class, integer_types_as_str, integer_types_as_str2
+):
     rng = np.random.default_rng(seed=0)
-    arr1 = (rng.random(100) * 100).astype(lhs_dtype)
+    arr1 = (rng.random(100) * 100).astype(integer_types_as_str)
     sr1 = Series(arr1)
 
-    arr2 = (rng.random(100) * 100).astype(rhs_dtype)
+    arr2 = (rng.random(100) * 100).astype(integer_types_as_str2)
     sr2 = Series(arr2)
 
     if obj_class == "Index":
@@ -242,14 +138,12 @@ def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype):
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("cmpop", _cmpops)
-@pytest.mark.parametrize(
-    "dtype", ["int8", "int32", "int64", "float32", "float64", "datetime64[ms]"]
-)
-def test_series_compare(cmpop, obj_class, dtype):
+def test_series_compare(
+    comparison_op, obj_class, numeric_and_temporal_types_as_str
+):
     rng = np.random.default_rng(seed=0)
-    arr1 = rng.integers(0, 100, 100).astype(dtype)
-    arr2 = rng.integers(0, 100, 100).astype(dtype)
+    arr1 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
+    arr2 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
     sr1 = Series(arr1)
     sr2 = Series(arr2)
 
@@ -257,73 +151,58 @@ def test_series_compare(cmpop, obj_class, dtype):
         sr1 = Index(sr1)
         sr2 = Index(sr2)
 
-    result1 = cmpop(sr1, sr1)
-    result2 = cmpop(sr2, sr2)
-    result3 = cmpop(sr1, sr2)
+    result1 = comparison_op(sr1, sr1)
+    result2 = comparison_op(sr2, sr2)
+    result3 = comparison_op(sr1, sr2)
 
     if obj_class == "Index":
         result1 = Series(result1)
         result2 = Series(result2)
         result3 = Series(result3)
 
-    np.testing.assert_equal(result1.to_numpy(), cmpop(arr1, arr1))
-    np.testing.assert_equal(result2.to_numpy(), cmpop(arr2, arr2))
-    np.testing.assert_equal(result3.to_numpy(), cmpop(arr1, arr2))
+    np.testing.assert_equal(result1.to_numpy(), comparison_op(arr1, arr1))
+    np.testing.assert_equal(result2.to_numpy(), comparison_op(arr2, arr2))
+    np.testing.assert_equal(result3.to_numpy(), comparison_op(arr1, arr2))
 
 
 @pytest.mark.parametrize(
     "dtype,val",
     [("int8", 200), ("int32", 2**32), ("uint8", -128), ("uint64", -1)],
 )
-@pytest.mark.parametrize(
-    "op",
-    [
-        operator.eq,
-        operator.ne,
-        operator.lt,
-        operator.le,
-        operator.gt,
-        operator.ge,
-    ],
-)
 @pytest.mark.parametrize("reverse", [False, True])
-def test_series_compare_integer(dtype, val, op, reverse):
+def test_series_compare_integer(dtype, val, comparison_op, reverse):
     # Tests that these actually work, even though they are out of bound.
     force_cast_val = np.array(val).astype(dtype)
     sr = Series(
         [np.iinfo(dtype).min, np.iinfo(dtype).max, force_cast_val, None],
         dtype=dtype,
     )
-
-    if reverse:
-        _op = op
-
-        def op(x, y):
-            return _op(y, x)
-
     # We expect the same result as comparing to a value within range (e.g. 0)
     # except that a NULL value evaluates to False
-    if op(0, val):
-        expected = Series([True, True, True, None])
+    exp = False
+    if reverse:
+        if comparison_op(val, 0):
+            exp = True
+        res = comparison_op(val, sr)
     else:
-        expected = Series([False, False, False, None])
+        if comparison_op(0, val):
+            exp = True
+        res = comparison_op(sr, val)
 
-    res = op(sr, val)
+    expected = Series([exp, exp, exp, None])
     assert_eq(res, expected)
 
 
-def _series_compare_nulls_typegen():
-    return [
-        *combinations_with_replacement(DATETIME_TYPES, 2),
-        *combinations_with_replacement(TIMEDELTA_TYPES, 2),
-        *combinations_with_replacement(NUMERIC_TYPES, 2),
-        *combinations_with_replacement(STRING_TYPES, 2),
-    ]
-
-
-@pytest.mark.parametrize("cmpop", _cmpops)
-@pytest.mark.parametrize("dtypes", _series_compare_nulls_typegen())
-def test_series_compare_nulls(cmpop, dtypes):
+@pytest.mark.parametrize(
+    "dtypes",
+    [
+        *itertools.combinations_with_replacement(DATETIME_TYPES, 2),
+        *itertools.combinations_with_replacement(TIMEDELTA_TYPES, 2),
+        *itertools.combinations_with_replacement(NUMERIC_TYPES, 2),
+        *itertools.combinations_with_replacement(STRING_TYPES, 2),
+    ],
+)
+def test_series_compare_nulls(comparison_op, dtypes):
     ltype, rtype = dtypes
 
     ldata = [1, 2, None, None, 5]
@@ -337,9 +216,9 @@ def test_series_compare_nulls(cmpop, dtypes):
 
     expect_mask = np.logical_and(lmask, rmask)
     expect = cudf.Series([None] * 5, dtype="bool")
-    expect[expect_mask] = cmpop(lser[expect_mask], rser[expect_mask])
+    expect[expect_mask] = comparison_op(lser[expect_mask], rser[expect_mask])
 
-    got = cmpop(lser, rser)
+    got = comparison_op(lser, rser)
     assert_eq(expect, got)
 
 
@@ -348,11 +227,6 @@ def str_series_cmp_data():
     return pd.Series(["a", "b", None, "d", "e", None], dtype="string")
 
 
-@pytest.fixture(ids=[op.__name__ for op in _cmpops], params=_cmpops)
-def str_series_compare_str_cmpop(request):
-    return request.param
-
-
 @pytest.fixture(ids=["eq", "ne"], params=[operator.eq, operator.ne])
 def str_series_compare_num_cmpop(request):
     return request.param
@@ -363,24 +237,16 @@ def cmp_scalar(request):
     return request.param
 
 
-def test_str_series_compare_str(
-    str_series_cmp_data, str_series_compare_str_cmpop
-):
-    expect = str_series_compare_str_cmpop(str_series_cmp_data, "a")
-    got = str_series_compare_str_cmpop(
-        Series.from_pandas(str_series_cmp_data), "a"
-    )
+def test_str_series_compare_str(str_series_cmp_data, comparison_op):
+    expect = comparison_op(str_series_cmp_data, "a")
+    got = comparison_op(Series.from_pandas(str_series_cmp_data), "a")
 
     assert_eq(expect, got.to_pandas(nullable=True))
 
 
-def test_str_series_compare_str_reflected(
-    str_series_cmp_data, str_series_compare_str_cmpop
-):
-    expect = str_series_compare_str_cmpop("a", str_series_cmp_data)
-    got = str_series_compare_str_cmpop(
-        "a", Series.from_pandas(str_series_cmp_data)
-    )
+def test_str_series_compare_str_reflected(str_series_cmp_data, comparison_op):
+    expect = comparison_op("a", str_series_cmp_data)
+    got = comparison_op("a", Series.from_pandas(str_series_cmp_data))
 
     assert_eq(expect, got.to_pandas(nullable=True))
 
@@ -408,10 +274,8 @@ def test_str_series_compare_num_reflected(
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("nelem", [1, 2, 100])
-@pytest.mark.parametrize("cmpop", _cmpops)
 @pytest.mark.parametrize("dtype", [*utils.NUMERIC_TYPES, "datetime64[ms]"])
-def test_series_compare_scalar(nelem, cmpop, obj_class, dtype):
+def test_series_compare_scalar(comparison_op, obj_class, dtype):
     rng = np.random.default_rng(seed=0)
     arr1 = rng.integers(0, 100, 100).astype(dtype)
     sr1 = Series(arr1)
@@ -420,23 +284,24 @@ def test_series_compare_scalar(nelem, cmpop, obj_class, dtype):
     if obj_class == "Index":
         sr1 = Index(sr1)
 
-    result1 = cmpop(sr1, rhs)
-    result2 = cmpop(rhs, sr1)
+    result1 = comparison_op(sr1, rhs)
+    result2 = comparison_op(rhs, sr1)
 
     if obj_class == "Index":
         result1 = Series(result1)
         result2 = Series(result2)
 
-    np.testing.assert_equal(result1.to_numpy(), cmpop(arr1, rhs))
-    np.testing.assert_equal(result2.to_numpy(), cmpop(rhs, arr1))
+    np.testing.assert_equal(result1.to_numpy(), comparison_op(arr1, rhs))
+    np.testing.assert_equal(result2.to_numpy(), comparison_op(rhs, arr1))
 
 
 _nulls = ["none", "some"]
 
 
-@pytest.mark.parametrize("nelem", [1, 7, 8, 9, 32, 64, 128])
-@pytest.mark.parametrize("lhs_nulls,rhs_nulls", list(product(_nulls, _nulls)))
-def test_validity_add(nelem, lhs_nulls, rhs_nulls):
+@pytest.mark.parametrize("lhs_nulls", _nulls)
+@pytest.mark.parametrize("rhs_nulls", _nulls)
+def test_validity_add(lhs_nulls, rhs_nulls):
+    nelem = 10
     rng = np.random.default_rng(seed=0)
     # LHS
     lhs_data = rng.random(nelem)
@@ -485,21 +350,14 @@ def test_validity_add(nelem, lhs_nulls, rhs_nulls):
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize(
-    "binop,lhs_dtype,rhs_dtype",
-    list(
-        product(
-            [operator.add, operator.mul],
-            utils.NUMERIC_TYPES,
-            utils.NUMERIC_TYPES,
-        )
-    ),
-)
-def test_series_binop_mixed_dtype(binop, lhs_dtype, rhs_dtype, obj_class):
+@pytest.mark.parametrize("binop", [operator.add, operator.mul])
+def test_series_binop_mixed_dtype(
+    binop, numeric_types_as_str, numeric_types_as_str2, obj_class
+):
     nelem = 10
     rng = np.random.default_rng(seed=0)
-    lhs = (rng.random(nelem) * nelem).astype(lhs_dtype)
-    rhs = (rng.random(nelem) * nelem).astype(rhs_dtype)
+    lhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str)
+    rhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str2)
 
     sr1 = Series(lhs)
     sr2 = Series(rhs)
@@ -517,15 +375,13 @@ def test_series_binop_mixed_dtype(binop, lhs_dtype, rhs_dtype, obj_class):
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize(
-    "cmpop,lhs_dtype,rhs_dtype",
-    list(product(_cmpops, utils.NUMERIC_TYPES, utils.NUMERIC_TYPES)),
-)
-def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class):
+def test_series_cmpop_mixed_dtype(
+    comparison_op, numeric_types_as_str, numeric_types_as_str2, obj_class
+):
     nelem = 5
     rng = np.random.default_rng(seed=0)
-    lhs = (rng.random(nelem) * nelem).astype(lhs_dtype)
-    rhs = (rng.random(nelem) * nelem).astype(rhs_dtype)
+    lhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str)
+    rhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str2)
 
     sr1 = Series(lhs)
     sr2 = Series(rhs)
@@ -534,28 +390,30 @@ def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class):
         sr1 = Index(sr1)
         sr2 = Index(sr2)
 
-    result = cmpop(Series(sr1), Series(sr2))
+    result = comparison_op(Series(sr1), Series(sr2))
 
     if obj_class == "Index":
         result = Series(result)
 
-    np.testing.assert_array_equal(result.to_numpy(), cmpop(lhs, rhs))
+    np.testing.assert_array_equal(result.to_numpy(), comparison_op(lhs, rhs))
 
 
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize(
-    "func, dtype", list(product(_reflected_ops, utils.NUMERIC_TYPES))
+@pytest.mark.filterwarnings(
+    "ignore:invalid value encountered in power:RuntimeWarning"
+)
+@pytest.mark.filterwarnings(
+    "ignore:divide by zero encountered in power:RuntimeWarning"
 )
-def test_series_reflected_ops_scalar(func, dtype, obj_class):
+@pytest.mark.parametrize("obj_class", [cudf.Series, cudf.Index])
+@pytest.mark.parametrize("scalar", [-1, 0, 1])
+def test_series_reflected_ops_scalar(
+    arithmetic_op, scalar, numeric_types_as_str, obj_class
+):
     # create random series
-    random_series = utils.gen_rand(dtype, 100, low=10, seed=12)
+    func = lambda x: arithmetic_op(scalar, x)  # noqa: E731
+    random_series = utils.gen_rand(numeric_types_as_str, 100, low=10, seed=12)
 
-    # gpu series
-    gs = Series(random_series)
-
-    # class typing
-    if obj_class == "Index":
-        gs = Index(gs)
+    gs = obj_class(random_series)
 
     try:
         gs_result = func(gs)
@@ -577,16 +435,14 @@ def test_series_reflected_ops_scalar(func, dtype, obj_class):
     np.testing.assert_allclose(ps_result, gs_result.to_numpy())
 
 
-@pytest.mark.parametrize("binop", _binops)
-def test_different_shapes_and_columns(binop):
-    # TODO: support `pow()` on NaN values. Particularly, the cases:
-    #       `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`
-    if binop is operator.pow:
-        return
+def test_different_shapes_and_columns(request, arithmetic_op):
+    if arithmetic_op is operator.pow:
+        msg = "TODO: Support `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`"
+        request.applymarker(pytest.mark.xfail(reason=msg))
 
     # Empty frame on the right side
-    pd_frame = binop(pd.DataFrame({"x": [1, 2]}), pd.DataFrame({}))
-    cd_frame = binop(cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({}))
+    pd_frame = arithmetic_op(pd.DataFrame({"x": [1, 2]}), pd.DataFrame({}))
+    cd_frame = arithmetic_op(cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({}))
     assert_eq(cd_frame, pd_frame)
 
     # Empty frame on the left side
@@ -610,17 +466,11 @@ def test_different_shapes_and_columns(binop):
     assert_eq(cd_frame, pd_frame)
 
 
-@pytest.mark.parametrize("binop", _binops)
-def test_different_shapes_and_same_columns(binop):
-    # TODO: support `pow()` on NaN values. Particularly, the cases:
-    #       `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`
-    if binop is operator.pow:
-        return
-
-    pd_frame = binop(
+def test_different_shapes_and_same_columns(arithmetic_op):
+    pd_frame = arithmetic_op(
         pd.DataFrame({"x": [1, 2]}), pd.DataFrame({"x": [1, 2, 3]})
     )
-    cd_frame = binop(
+    cd_frame = arithmetic_op(
         cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({"x": [1, 2, 3]})
     )
     # cast x as float64 so it matches pandas dtype
@@ -628,12 +478,12 @@ def test_different_shapes_and_same_columns(binop):
     assert_eq(cd_frame, pd_frame)
 
 
-@pytest.mark.parametrize("binop", _binops)
-def test_different_shapes_and_columns_with_unaligned_indices(binop):
-    # TODO: support `pow()` on NaN values. Particularly, the cases:
-    #       `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`
-    if binop is operator.pow:
-        return
+def test_different_shapes_and_columns_with_unaligned_indices(
+    request, arithmetic_op
+):
+    if arithmetic_op is operator.pow:
+        msg = "TODO: Support `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`"
+        request.applymarker(pytest.mark.xfail(reason=msg))
 
     # Test with a RangeIndex
     pdf1 = pd.DataFrame({"x": [4, 3, 2, 1], "y": [7, 3, 8, 6]})
@@ -650,8 +500,8 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop):
     gdf2 = cudf.DataFrame.from_pandas(pdf2)
     gdf3 = cudf.DataFrame.from_pandas(pdf3)
 
-    pd_frame = binop(binop(pdf1, pdf2), pdf3)
-    cd_frame = binop(binop(gdf1, gdf2), gdf3)
+    pd_frame = arithmetic_op(arithmetic_op(pdf1, pdf2), pdf3)
+    cd_frame = arithmetic_op(arithmetic_op(gdf1, gdf2), gdf3)
     # cast x and y as float64 so it matches pandas dtype
     cd_frame["x"] = cd_frame["x"].astype(np.float64)
     cd_frame["y"] = cd_frame["y"].astype(np.float64)
@@ -665,8 +515,8 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop):
     pdf2 = pd.DataFrame({"x": [2]}, index=["a"])
     gdf1 = cudf.DataFrame.from_pandas(pdf1)
     gdf2 = cudf.DataFrame.from_pandas(pdf2)
-    pd_frame = binop(pdf1, pdf2)
-    cd_frame = binop(gdf1, gdf2)
+    pd_frame = arithmetic_op(pdf1, pdf2)
+    cd_frame = arithmetic_op(gdf1, gdf2)
 
     # Sort both frames consistently for comparison
     pd_sorted = pd_frame.sort_index().sort_values(list(pd_frame.columns))
@@ -675,78 +525,76 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop):
 
 
 @pytest.mark.parametrize(
-    "df2",
+    "pdf2",
     [
-        cudf.DataFrame({"a": [3, 2, 1]}, index=[3, 2, 1]),
-        cudf.DataFrame([3, 2]),
+        pd.DataFrame({"a": [3, 2, 1]}, index=[3, 2, 1]),
+        pd.DataFrame([3, 2]),
     ],
 )
-@pytest.mark.parametrize("binop", [operator.eq, operator.ne])
-def test_df_different_index_shape(df2, binop):
+def test_df_different_index_shape(pdf2, comparison_op):
     df1 = cudf.DataFrame([1, 2, 3], index=[1, 2, 3])
 
     pdf1 = df1.to_pandas()
-    pdf2 = df2.to_pandas()
+    df2 = cudf.DataFrame.from_pandas(pdf2)
 
     utils.assert_exceptions_equal(
-        lfunc=binop,
-        rfunc=binop,
+        lfunc=comparison_op,
+        rfunc=comparison_op,
         lfunc_args_and_kwargs=([pdf1, pdf2],),
         rfunc_args_and_kwargs=([df1, df2],),
     )
 
 
-@pytest.mark.parametrize("op", [operator.eq, operator.ne])
-def test_boolean_scalar_binop(op):
+def test_boolean_scalar_binop(comparison_op):
     rng = np.random.default_rng(seed=0)
     psr = pd.Series(rng.choice([True, False], 10))
     gsr = cudf.from_pandas(psr)
-    assert_eq(op(psr, True), op(gsr, True))
-    assert_eq(op(psr, False), op(gsr, False))
+    assert_eq(comparison_op(psr, True), comparison_op(gsr, True))
+    assert_eq(comparison_op(psr, False), comparison_op(gsr, False))
 
 
-@pytest.mark.parametrize("func", _operators_arithmetic)
 @pytest.mark.parametrize("has_nulls", [True, False])
 @pytest.mark.parametrize("fill_value", [None, 27])
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
-def test_operator_func_between_series(dtype, func, has_nulls, fill_value):
+def test_operator_func_between_series(
+    float_types_as_str, arithmetic_op_method, has_nulls, fill_value
+):
     count = 1000
     gdf_series_a = utils.gen_rand_series(
-        dtype, count, has_nulls=has_nulls, stride=10000
+        float_types_as_str, count, has_nulls=has_nulls, stride=10000
     )
     gdf_series_b = utils.gen_rand_series(
-        dtype, count, has_nulls=has_nulls, stride=100
+        float_types_as_str, count, has_nulls=has_nulls, stride=100
     )
     pdf_series_a = gdf_series_a.to_pandas()
     pdf_series_b = gdf_series_b.to_pandas()
 
-    gdf_result = getattr(gdf_series_a, func)(
+    gdf_result = getattr(gdf_series_a, arithmetic_op_method)(
         gdf_series_b, fill_value=fill_value
     )
-    pdf_result = getattr(pdf_series_a, func)(
+    pdf_result = getattr(pdf_series_a, arithmetic_op_method)(
         pdf_series_b, fill_value=fill_value
     )
 
     assert_eq(pdf_result, gdf_result)
 
 
-@pytest.mark.parametrize("func", _operators_arithmetic)
 @pytest.mark.parametrize("has_nulls", [True, False])
 @pytest.mark.parametrize("fill_value", [None, 27])
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
-def test_operator_func_series_and_scalar(dtype, func, has_nulls, fill_value):
+def test_operator_func_series_and_scalar(
+    float_types_as_str, arithmetic_op_method, has_nulls, fill_value
+):
     count = 1000
     scalar = 59
     gdf_series = utils.gen_rand_series(
-        dtype, count, has_nulls=has_nulls, stride=10000
+        float_types_as_str, count, has_nulls=has_nulls, stride=10000
     )
     pdf_series = gdf_series.to_pandas()
 
-    gdf_series_result = getattr(gdf_series, func)(
+    gdf_series_result = getattr(gdf_series, arithmetic_op_method)(
         scalar,
         fill_value=fill_value,
     )
-    pdf_series_result = getattr(pdf_series, func)(
+    pdf_series_result = getattr(pdf_series, arithmetic_op_method)(
         scalar,
         fill_value=fill_value,
     )
@@ -754,27 +602,26 @@ def test_operator_func_series_and_scalar(dtype, func, has_nulls, fill_value):
     assert_eq(pdf_series_result, gdf_series_result)
 
 
-_permu_values = [0, 1, None, np.nan]
-
-
-@pytest.mark.parametrize("fill_value", _permu_values)
-@pytest.mark.parametrize("scalar_a", _permu_values)
-@pytest.mark.parametrize("scalar_b", _permu_values)
-@pytest.mark.parametrize("func", _operators_comparison)
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
+@pytest.mark.parametrize("fill_value", [0, 1, None, np.nan])
+@pytest.mark.parametrize("scalar_a", [0, 1, None, np.nan])
+@pytest.mark.parametrize("scalar_b", [0, 1, None, np.nan])
 def test_operator_func_between_series_logical(
-    dtype, func, scalar_a, scalar_b, fill_value
+    float_types_as_str, comparison_op_method, scalar_a, scalar_b, fill_value
 ):
-    gdf_series_a = Series([scalar_a], nan_as_null=False).astype(dtype)
-    gdf_series_b = Series([scalar_b], nan_as_null=False).astype(dtype)
+    gdf_series_a = Series([scalar_a], nan_as_null=False).astype(
+        float_types_as_str
+    )
+    gdf_series_b = Series([scalar_b], nan_as_null=False).astype(
+        float_types_as_str
+    )
 
     pdf_series_a = gdf_series_a.to_pandas(nullable=True)
     pdf_series_b = gdf_series_b.to_pandas(nullable=True)
 
-    gdf_series_result = getattr(gdf_series_a, func)(
+    gdf_series_result = getattr(gdf_series_a, comparison_op_method)(
         gdf_series_b, fill_value=fill_value
     )
-    pdf_series_result = getattr(pdf_series_a, func)(
+    pdf_series_result = getattr(pdf_series_a, comparison_op_method)(
         pdf_series_b, fill_value=fill_value
     )
     expect = pdf_series_result
@@ -795,33 +642,43 @@ def test_operator_func_between_series_logical(
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
-@pytest.mark.parametrize("func", _operators_comparison)
 @pytest.mark.parametrize("has_nulls", [True, False])
 @pytest.mark.parametrize("scalar", [-59.0, np.nan, 0, 59.0])
 @pytest.mark.parametrize("fill_value", [None, 1.0])
 def test_operator_func_series_and_scalar_logical(
-    request, dtype, func, has_nulls, scalar, fill_value
+    request,
+    float_types_as_str,
+    comparison_op_method,
+    has_nulls,
+    scalar,
+    fill_value,
 ):
     request.applymarker(
         pytest.mark.xfail(
             PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
             and fill_value == 1.0
             and scalar is np.nan
-            and (has_nulls or (not has_nulls and func not in {"eq", "ne"})),
+            and (
+                has_nulls
+                or (not has_nulls and comparison_op_method not in {"eq", "ne"})
+            ),
             reason="https://github.com/pandas-dev/pandas/issues/57447",
         )
     )
     if has_nulls:
-        gdf_series = cudf.Series([-1.0, 0, cudf.NA, 1.1], dtype=dtype)
+        gdf_series = cudf.Series(
+            [-1.0, 0, cudf.NA, 1.1], dtype=float_types_as_str
+        )
     else:
-        gdf_series = cudf.Series([-1.0, 0, 10.5, 1.1], dtype=dtype)
+        gdf_series = cudf.Series(
+            [-1.0, 0, 10.5, 1.1], dtype=float_types_as_str
+        )
     pdf_series = gdf_series.to_pandas(nullable=True)
-    gdf_series_result = getattr(gdf_series, func)(
+    gdf_series_result = getattr(gdf_series, comparison_op_method)(
         scalar,
         fill_value=fill_value,
     )
-    pdf_series_result = getattr(pdf_series, func)(
+    pdf_series_result = getattr(pdf_series, comparison_op_method)(
         scalar, fill_value=fill_value
     )
 
@@ -831,11 +688,12 @@ def test_operator_func_series_and_scalar_logical(
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("func", _operators_arithmetic)
 @pytest.mark.parametrize("nulls", _nulls)
 @pytest.mark.parametrize("fill_value", [None, 27])
 @pytest.mark.parametrize("other", ["df", "scalar"])
-def test_operator_func_dataframe(func, nulls, fill_value, other):
+def test_operator_func_dataframe(
+    arithmetic_op_method, nulls, fill_value, other
+):
     num_rows = 100
     num_cols = 3
 
@@ -862,16 +720,17 @@ def gen_df():
     gdf1 = cudf.DataFrame.from_pandas(pdf1)
     gdf2 = cudf.DataFrame.from_pandas(pdf2) if other == "df" else 59.0
 
-    got = getattr(gdf1, func)(gdf2, fill_value=fill_value)
-    expect = getattr(pdf1, func)(pdf2, fill_value=fill_value)[list(got._data)]
+    got = getattr(gdf1, arithmetic_op_method)(gdf2, fill_value=fill_value)
+    expect = getattr(pdf1, arithmetic_op_method)(pdf2, fill_value=fill_value)[
+        list(got._data)
+    ]
 
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("func", _operators_comparison)
 @pytest.mark.parametrize("nulls", _nulls)
 @pytest.mark.parametrize("other", ["df", "scalar"])
-def test_logical_operator_func_dataframe(func, nulls, other):
+def test_logical_operator_func_dataframe(comparison_op_method, nulls, other):
     num_rows = 100
     num_cols = 3
 
@@ -902,37 +761,26 @@ def gen_df():
         else 59.0
     )
 
-    got = getattr(gdf1, func)(gdf2)
-    expect = getattr(pdf1, func)(pdf2)[list(got._data)]
+    got = getattr(gdf1, comparison_op_method)(gdf2)
+    expect = getattr(pdf1, comparison_op_method)(pdf2)[list(got._data)]
 
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "func",
-    [op for op in _operators_arithmetic if op not in {"rmod", "rfloordiv"}]
-    + _operators_comparison
-    + [
-        pytest.param(
-            "rmod",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/12162"
-            ),
-        ),
-        pytest.param(
-            "rfloordiv",
-            marks=pytest.mark.xfail(
+@pytest.mark.parametrize("rhs", [0, 1, 10])
+def test_binop_bool_uint(request, binary_op_method, rhs):
+    if binary_op_method in {"rmod", "rfloordiv"}:
+        request.applymarker(
+            pytest.mark.xfail(
                 reason="https://github.com/rapidsai/cudf/issues/12162"
             ),
-        ),
-    ],
-)
-@pytest.mark.parametrize("rhs", [0, 1, 2, 128])
-def test_binop_bool_uint(func, rhs):
+        )
     psr = pd.Series([True, False, False])
     gsr = cudf.from_pandas(psr)
     assert_eq(
-        getattr(psr, func)(rhs), getattr(gsr, func)(rhs), check_dtype=False
+        getattr(psr, binary_op_method)(rhs),
+        getattr(gsr, binary_op_method)(rhs),
+        check_dtype=False,
     )
 
 
@@ -981,30 +829,19 @@ def test_floordiv_zero_bool(scalar_divisor):
         cr // cudf_div
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    (
-        pytest.param(
-            np.bool_,
-            marks=pytest_xfail(
-                reason=(
-                    "Pandas handling of division by zero-bool is too strange"
-                )
-            ),
-        ),
-        np.int8,
-        np.uint8,
-        np.int64,
-        np.uint64,
-        np.float32,
-        np.float64,
-    ),
-)
-def test_rmod_zero_nan(dtype):
-    sr = pd.Series([1, 1, 0], dtype=dtype)
+def test_rmod_zero_nan(numeric_and_bool_types_as_str, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            numeric_and_bool_types_as_str == "bool",
+            reason="pandas returns int8, cuDF returns int64",
+        )
+    )
+    sr = pd.Series([1, 1, 0], dtype=numeric_and_bool_types_as_str)
     cr = cudf.from_pandas(sr)
     assert_eq(1 % sr, 1 % cr)
-    expected_dtype = np.float64 if cr.dtype.kind != "f" else dtype
+    expected_dtype = (
+        np.float64 if cr.dtype.kind != "f" else numeric_and_bool_types_as_str
+    )
     assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype))
 
 
@@ -1048,7 +885,7 @@ def is_timezone_aware_dtype(dtype: str) -> bool:
     return bool(re.match(r"^datetime64\[ns, .+\]$", dtype))
 
 
-@pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12])
+@pytest.mark.parametrize("n_periods", [0, 1, -12])
 @pytest.mark.parametrize(
     "frequency",
     [
@@ -1145,16 +982,6 @@ def test_datetime_dateoffset_binaryop(
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "date_col",
-    [
-        [
-            "2000-01-01 00:00:00.012345678",
-            "2000-01-31 00:00:00.012345678",
-            "2000-02-29 00:00:00.012345678",
-        ]
-    ],
-)
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -1177,8 +1004,15 @@ def test_datetime_dateoffset_binaryop(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="Fails in older versions of pandas",
 )
-def test_datetime_dateoffset_binaryop_multiple(request, date_col, kwargs, op):
-    gsr = cudf.Series(date_col, dtype="datetime64[ns]")
+def test_datetime_dateoffset_binaryop_multiple(request, kwargs, op):
+    gsr = cudf.Series(
+        [
+            "2000-01-01 00:00:00.012345678",
+            "2000-01-31 00:00:00.012345678",
+            "2000-02-29 00:00:00.012345678",
+        ],
+        dtype="datetime64[ns]",
+    )
     psr = gsr.to_pandas()
 
     poffset = pd.DateOffset(**kwargs)
@@ -1190,7 +1024,7 @@ def test_datetime_dateoffset_binaryop_multiple(request, date_col, kwargs, op):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12])
+@pytest.mark.parametrize("n_periods", [0, 1, -12])
 @pytest.mark.parametrize(
     "frequency",
     [
@@ -1290,43 +1124,21 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype):
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-        "uint8",
-        "uint16",
-        "uint32",
-        "uint64",
-        "float32",
-        "float64",
-        "datetime64[ns]",
-        "datetime64[us]",
-        "datetime64[ms]",
-        "datetime64[s]",
-        "timedelta64[ns]",
-        "timedelta64[us]",
-        "timedelta64[ms]",
-        "timedelta64[s]",
-    ],
-)
-@pytest.mark.parametrize("op", _operators_comparison)
-def test_binops_with_NA_consistent(dtype, op):
+def test_binops_with_NA_consistent(
+    numeric_and_temporal_types_as_str, comparison_op_method
+):
     data = [1, 2, 3]
-    sr = cudf.Series(data, dtype=dtype)
+    sr = cudf.Series(data, dtype=numeric_and_temporal_types_as_str)
 
-    result = getattr(sr, op)(cudf.NA)
-    if dtype in NUMERIC_TYPES:
-        if op == "ne":
+    result = getattr(sr, comparison_op_method)(cudf.NA)
+    if sr.dtype.kind in "mM":
+        assert result.null_count == len(data)
+    else:
+        if comparison_op_method == "ne":
             expect_all = True
         else:
             expect_all = False
         assert (result == expect_all).all()
-    elif dtype in DATETIME_TYPES & TIMEDELTA_TYPES:
-        assert result._column.null_count == len(data)
 
 
 @pytest.mark.parametrize(
@@ -1767,7 +1579,7 @@ def test_binops_reflect_decimal(
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("powers", [0, 1, 2, 3])
+@pytest.mark.parametrize("powers", [0, 1, 2])
 def test_binops_decimal_pow(powers):
     s = cudf.Series(
         [
@@ -1791,157 +1603,165 @@ def test_binops_raise_error():
 
 
 @pytest.mark.parametrize(
-    "args",
+    "op, ldata, ldtype, rdata, expected1, expected2",
     [
         (
             operator.eq,
             ["100", "41", None],
             cudf.Decimal64Dtype(scale=0, precision=5),
             [100, 42, 12],
-            cudf.Series([True, False, None], dtype=bool),
-            cudf.Series([True, False, None], dtype=bool),
+            [True, False, None],
+            [True, False, None],
         ),
         (
             operator.eq,
             ["100.000", "42.001", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             [100, 42, 12],
-            cudf.Series([True, False, None], dtype=bool),
-            cudf.Series([True, False, None], dtype=bool),
+            [True, False, None],
+            [True, False, None],
         ),
         (
             operator.eq,
             ["100", "40", None],
             cudf.Decimal64Dtype(scale=-1, precision=3),
             [100, 42, 12],
-            cudf.Series([True, False, None], dtype=bool),
-            cudf.Series([True, False, None], dtype=bool),
+            [True, False, None],
+            [True, False, None],
         ),
         (
             operator.ne,
             ["100", "42", "24", None],
             cudf.Decimal64Dtype(scale=0, precision=3),
             [100, 40, 24, 12],
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, True, False, None], dtype=bool),
+            [False, True, False, None],
+            [False, True, False, None],
         ),
         (
             operator.ne,
             ["10.1", "88", "11", None],
             cudf.Decimal64Dtype(scale=1, precision=3),
             [10, 42, 11, 12],
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, True, False, None], dtype=bool),
+            [True, True, False, None],
+            [True, True, False, None],
         ),
         (
             operator.ne,
             ["100.000", "42", "23.999", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             [100, 42, 24, 12],
-            cudf.Series([False, False, True, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, False, True, None],
+            [False, False, True, None],
         ),
         (
             operator.lt,
             ["100", "40", "28", None],
             cudf.Decimal64Dtype(scale=0, precision=3),
             [100, 42, 24, 12],
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, True, False, None],
+            [False, False, True, None],
         ),
         (
             operator.lt,
             ["100.000", "42.002", "23.999", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             [100, 42, 24, 12],
-            cudf.Series([False, False, True, None], dtype=bool),
-            cudf.Series([False, True, False, None], dtype=bool),
+            [False, False, True, None],
+            [False, True, False, None],
         ),
         (
             operator.lt,
             ["100", "40", "10", None],
             cudf.Decimal64Dtype(scale=-1, precision=3),
             [100, 42, 8, 12],
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, True, False, None],
+            [False, False, True, None],
         ),
         (
             operator.gt,
             ["100", "42", "20", None],
             cudf.Decimal64Dtype(scale=0, precision=3),
             [100, 40, 24, 12],
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, True, False, None],
+            [False, False, True, None],
         ),
         (
             operator.gt,
             ["100.000", "42.002", "23.999", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             [100, 42, 24, 12],
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, True, False, None],
+            [False, False, True, None],
         ),
         (
             operator.gt,
             ["100", "40", "10", None],
             cudf.Decimal64Dtype(scale=-1, precision=3),
             [100, 42, 8, 12],
-            cudf.Series([False, False, True, None], dtype=bool),
-            cudf.Series([False, True, False, None], dtype=bool),
+            [False, False, True, None],
+            [False, True, False, None],
         ),
         (
             operator.le,
             ["100", "40", "28", None],
             cudf.Decimal64Dtype(scale=0, precision=3),
             [100, 42, 24, 12],
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, False, True, None], dtype=bool),
+            [True, True, False, None],
+            [True, False, True, None],
         ),
         (
             operator.le,
             ["100.000", "42.002", "23.999", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             [100, 42, 24, 12],
-            cudf.Series([True, False, True, None], dtype=bool),
-            cudf.Series([True, True, False, None], dtype=bool),
+            [True, False, True, None],
+            [True, True, False, None],
         ),
         (
             operator.le,
             ["100", "40", "10", None],
             cudf.Decimal64Dtype(scale=-1, precision=3),
             [100, 42, 8, 12],
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, False, True, None], dtype=bool),
+            [True, True, False, None],
+            [True, False, True, None],
         ),
         (
             operator.ge,
             ["100", "42", "20", None],
             cudf.Decimal64Dtype(scale=0, precision=3),
             [100, 40, 24, 12],
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, False, True, None], dtype=bool),
+            [True, True, False, None],
+            [True, False, True, None],
         ),
         (
             operator.ge,
             ["100.000", "42.002", "23.999", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             [100, 42, 24, 12],
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, False, True, None], dtype=bool),
+            [True, True, False, None],
+            [True, False, True, None],
         ),
         (
             operator.ge,
             ["100", "40", "10", None],
             cudf.Decimal64Dtype(scale=-1, precision=3),
             [100, 42, 8, 12],
-            cudf.Series([True, False, True, None], dtype=bool),
-            cudf.Series([True, True, False, None], dtype=bool),
+            [True, False, True, None],
+            [True, True, False, None],
         ),
     ],
 )
-@pytest.mark.parametrize("integer_dtype", utils.INTEGER_TYPES)
 @pytest.mark.parametrize("reflected", [True, False])
-def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
+def test_binops_decimal_comp_mixed_integer(
+    op,
+    ldata,
+    ldtype,
+    rdata,
+    expected1,
+    expected2,
+    integer_types_as_str,
+    reflected,
+):
     """
     Tested compare operations:
         eq, lt, gt, le, ge
@@ -1951,12 +1771,12 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
     of the following compare results: {True, False, None}.
     """
     if not reflected:
-        op, ldata, ldtype, rdata, expected, _ = args
+        expected = cudf.Series(expected1, dtype=bool)
     else:
-        op, ldata, ldtype, rdata, _, expected = args
+        expected = cudf.Series(expected2, dtype=bool)
 
     lhs = utils._decimal_series(ldata, ldtype)
-    rhs = cudf.Series(rdata, dtype=integer_dtype)
+    rhs = cudf.Series(rdata, dtype=integer_types_as_str)
 
     if reflected:
         rhs, lhs = lhs, rhs
@@ -1967,7 +1787,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
 
 
 @pytest.mark.parametrize(
-    "args",
+    "op, lhs, l_dtype, rhs, expect, expect_dtype, reflect",
     [
         (
             operator.add,
@@ -2196,17 +2016,17 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
         ),
     ],
 )
-def test_binops_decimal_scalar(args):
-    op, lhs, l_dtype, rhs, expect, expect_dtype, reflect = args
-
-    def decimal_series(input, dtype):
-        return cudf.Series(
-            [x if x is None else decimal.Decimal(x) for x in input],
-            dtype=dtype,
-        )
-
-    lhs = decimal_series(lhs, l_dtype)
-    expect = decimal_series(expect, expect_dtype)
+def test_binops_decimal_scalar(
+    op, lhs, l_dtype, rhs, expect, expect_dtype, reflect
+):
+    lhs = cudf.Series(
+        [x if x is None else decimal.Decimal(x) for x in lhs],
+        dtype=l_dtype,
+    )
+    expect = cudf.Series(
+        [x if x is None else decimal.Decimal(x) for x in expect],
+        dtype=expect_dtype,
+    )
 
     if reflect:
         lhs, rhs = rhs, lhs
@@ -2217,108 +2037,110 @@ def decimal_series(input, dtype):
 
 
 @pytest.mark.parametrize(
-    "args",
+    "op, ldata, ldtype, rdata, expected1, expected2",
     [
         (
             operator.eq,
             ["100.00", "41", None],
             cudf.Decimal64Dtype(scale=0, precision=5),
             100,
-            cudf.Series([True, False, None], dtype=bool),
-            cudf.Series([True, False, None], dtype=bool),
+            [True, False, None],
+            [True, False, None],
         ),
         (
             operator.eq,
             ["100.123", "41", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             decimal.Decimal("100.123"),
-            cudf.Series([True, False, None], dtype=bool),
-            cudf.Series([True, False, None], dtype=bool),
+            [True, False, None],
+            [True, False, None],
         ),
         (
             operator.ne,
             ["100.00", "41", None],
             cudf.Decimal64Dtype(scale=2, precision=5),
             100,
-            cudf.Series([False, True, None], dtype=bool),
-            cudf.Series([False, True, None], dtype=bool),
+            [False, True, None],
+            [False, True, None],
         ),
         (
             operator.ne,
             ["100.123", "120.21", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             decimal.Decimal("100.123"),
-            cudf.Series([False, True, None], dtype=bool),
-            cudf.Series([False, True, None], dtype=bool),
+            [False, True, None],
+            [False, True, None],
         ),
         (
             operator.gt,
             ["100.00", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=2, precision=5),
             100,
-            cudf.Series([False, False, True, None], dtype=bool),
-            cudf.Series([False, True, False, None], dtype=bool),
+            [False, False, True, None],
+            [False, True, False, None],
         ),
         (
             operator.gt,
             ["100.123", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             decimal.Decimal("100.123"),
-            cudf.Series([False, False, True, None], dtype=bool),
-            cudf.Series([False, True, False, None], dtype=bool),
+            [False, False, True, None],
+            [False, True, False, None],
         ),
         (
             operator.ge,
             ["100.00", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=2, precision=5),
             100,
-            cudf.Series([True, False, True, None], dtype=bool),
-            cudf.Series([True, True, False, None], dtype=bool),
+            [True, False, True, None],
+            [True, True, False, None],
         ),
         (
             operator.ge,
             ["100.123", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             decimal.Decimal("100.123"),
-            cudf.Series([True, False, True, None], dtype=bool),
-            cudf.Series([True, True, False, None], dtype=bool),
+            [True, False, True, None],
+            [True, True, False, None],
         ),
         (
             operator.lt,
             ["100.00", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=2, precision=5),
             100,
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, True, False, None],
+            [False, False, True, None],
         ),
         (
             operator.lt,
             ["100.123", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             decimal.Decimal("100.123"),
-            cudf.Series([False, True, False, None], dtype=bool),
-            cudf.Series([False, False, True, None], dtype=bool),
+            [False, True, False, None],
+            [False, False, True, None],
         ),
         (
             operator.le,
             ["100.00", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=2, precision=5),
             100,
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, False, True, None], dtype=bool),
+            [True, True, False, None],
+            [True, False, True, None],
         ),
         (
             operator.le,
             ["100.123", "41", "120.21", None],
             cudf.Decimal64Dtype(scale=3, precision=6),
             decimal.Decimal("100.123"),
-            cudf.Series([True, True, False, None], dtype=bool),
-            cudf.Series([True, False, True, None], dtype=bool),
+            [True, True, False, None],
+            [True, False, True, None],
         ),
     ],
 )
 @pytest.mark.parametrize("reflected", [True, False])
-def test_binops_decimal_scalar_compare(args, reflected):
+def test_binops_decimal_scalar_compare(
+    op, ldata, ldtype, rdata, expected1, expected2, reflected
+):
     """
     Tested compare operations:
         eq, lt, gt, le, ge
@@ -2327,9 +2149,9 @@ def test_binops_decimal_scalar_compare(args, reflected):
     following compare results: {True, False, None}.
     """
     if not reflected:
-        op, ldata, ldtype, rdata, expected, _ = args
+        expected = cudf.Series(expected1, dtype=bool)
     else:
-        op, ldata, ldtype, rdata, _, expected = args
+        expected = cudf.Series(expected2, dtype=bool)
 
     lhs = utils._decimal_series(ldata, ldtype)
     rhs = rdata
@@ -2342,53 +2164,35 @@ def test_binops_decimal_scalar_compare(args, reflected):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "uint8",
-        "uint16",
-        "uint32",
-        "uint64",
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-        "float32",
-        "float64",
-        "str",
-        "datetime64[ns]",
-        "datetime64[us]",
-        "datetime64[ms]",
-        "datetime64[s]",
-        "timedelta64[ns]",
-        "timedelta64[us]",
-        "timedelta64[ms]",
-        "timedelta64[s]",
-    ],
-)
 @pytest.mark.parametrize("null_scalar", [None, cudf.NA, np.datetime64("NaT")])
-@pytest.mark.parametrize("cmpop", _cmpops)
-def test_column_null_scalar_comparison(dtype, null_scalar, cmpop):
+def test_column_null_scalar_comparison(
+    request, all_supported_types_as_str, null_scalar, comparison_op
+):
     # This test is meant to validate that comparing
     # a series of any dtype with a null scalar produces
     # a new series where all the elements are <NA>.
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str == "category",
+            raises=ValueError,
+            reason="Value ... not found in column",
+        )
+    )
+    dtype = cudf.dtype(all_supported_types_as_str)
 
     if isinstance(null_scalar, np.datetime64):
-        if cudf.dtype(dtype).kind not in "mM":
-            pytest.skip()
+        if dtype.kind not in "mM":
+            pytest.skip(f"{null_scalar} not applicable for {dtype}")
         null_scalar = null_scalar.astype(dtype)
 
-    dtype = cudf.dtype(dtype)
-
     data = [1, 2, 3, 4, 5]
     sr = cudf.Series(data, dtype=dtype)
-    result = cmpop(sr, null_scalar)
+    result = comparison_op(sr, null_scalar)
 
     assert result.isnull().all()
 
 
-@pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"])
-def test_equality_ops_index_mismatch(fn):
+def test_equality_ops_index_mismatch(comparison_op_method):
     a = cudf.Series(
         [1, 2, 3, None, None, 4], index=["a", "b", "c", "d", "e", "f"]
     )
@@ -2399,13 +2203,26 @@ def test_equality_ops_index_mismatch(fn):
 
     pa = a.to_pandas(nullable=True)
     pb = b.to_pandas(nullable=True)
-    expected = getattr(pa, fn)(pb)
-    actual = getattr(a, fn)(b).to_pandas(nullable=True)
+    expected = getattr(pa, comparison_op_method)(pb)
+    actual = getattr(a, comparison_op_method)(b).to_pandas(nullable=True)
 
     assert_eq(expected, actual)
 
 
-def generate_test_null_equals_columnops_data():
+@pytest.mark.parametrize(
+    "dtype",
+    sorted(
+        itertools.chain(
+            NUMERIC_TYPES,
+            DATETIME_TYPES,
+            TIMEDELTA_TYPES,
+            STRING_TYPES,
+            ["category"],
+        )
+    ),
+)
+@pytest.mark.parametrize("null_case", ["neither", "left", "right", "both"])
+def test_null_equals_columnops(dtype, null_case):
     # Generate tuples of:
     # (left_data, right_data, compare_bool
     # where compare_bool is the correct answer to
@@ -2425,36 +2242,16 @@ def set_null_cases(column_l, column_r, case):
             raise ValueError("Unknown null case")
         return column_l, column_r
 
-    null_cases = ["neither", "left", "right", "both"]
     data = [1, 2, 3]
 
-    results = []
-    # TODO: Numeric types can be cross compared as null equal
-    for dtype in (
-        list(NUMERIC_TYPES)
-        + list(DATETIME_TYPES)
-        + list(TIMEDELTA_TYPES)
-        + list(STRING_TYPES)
-        + ["category"]
-    ):
-        for case in null_cases:
-            left = cudf.Series(data, dtype=dtype)
-            right = cudf.Series(data, dtype=dtype)
-            if case in {"left", "right"}:
-                answer = False
-            else:
-                answer = True
-            left, right = set_null_cases(left, right, case)
-            results.append((left._column, right._column, answer, case))
-
-    return results
-
-
-@pytest.mark.parametrize(
-    "lcol,rcol,ans,case", generate_test_null_equals_columnops_data()
-)
-def test_null_equals_columnops(lcol, rcol, ans, case):
-    assert lcol.equals(rcol) == ans
+    left = cudf.Series(data, dtype=dtype)
+    right = cudf.Series(data, dtype=dtype)
+    if null_case in {"left", "right"}:
+        answer = False
+    else:
+        answer = True
+    left, right = set_null_cases(left, right, null_case)
+    assert left._column.equals(right._column) is answer
 
 
 def test_add_series_to_dataframe():
@@ -2470,27 +2267,25 @@ def test_add_series_to_dataframe():
 
 
 @pytest.mark.parametrize("obj_class", [cudf.Series, cudf.Index])
-@pytest.mark.parametrize("binop", _binops)
-def test_binops_cupy_array(obj_class, binop):
+def test_binops_cupy_array(obj_class, arithmetic_op):
     # Skip 0 to not deal with NaNs from division.
     data = range(1, 100)
     lhs = obj_class(data)
     rhs = cp.array(data)
-    assert (binop(lhs, rhs) == binop(lhs, lhs)).all()
+    assert (arithmetic_op(lhs, rhs) == arithmetic_op(lhs, lhs)).all()
 
 
-@pytest.mark.parametrize("binop", _binops + _binops_compare)
-@pytest.mark.parametrize("data", [None, [-9, 7], [5, -2], [12, 18]])
+@pytest.mark.parametrize("data", [None, [-9, 7], [12, 18]])
 @pytest.mark.parametrize("scalar", [1, 3, 12, np.nan])
-def test_empty_column(binop, data, scalar):
+def test_empty_column(binary_op, data, scalar):
     gdf = cudf.DataFrame(columns=["a", "b"])
     if data is not None:
         gdf["a"] = data
 
     pdf = gdf.to_pandas()
 
-    got = binop(gdf, scalar)
-    expected = binop(pdf, scalar)
+    got = binary_op(gdf, scalar)
+    expected = binary_op(pdf, scalar)
 
     assert_eq(expected, got)
 
@@ -2498,16 +2293,16 @@ def test_empty_column(binop, data, scalar):
 @pytest.mark.parametrize(
     "df",
     [
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             [[1, 2, 3, 4], [5, 6, 7, 8], [10, 11, 12, 13], [14, 15, 16, 17]]
         ),
         pytest.param(
-            cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]),
+            lambda: cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]),
             marks=pytest_xfail(
                 reason="Cannot access Frame.values if frame contains nulls"
             ),
         ),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame(
             [
                 [1.2, 2.3, 3.4, 4.5],
                 [5.6, 6.7, 7.8, 8.9],
@@ -2515,24 +2310,26 @@ def test_empty_column(binop, data, scalar):
                 [9.1, 2.4, 4.5, 65.34],
             ]
         ),
-        cudf.Series([14, 15, 16, 17]),
-        cudf.Series([14.15, 15.16, 16.17, 17.18]),
+        lambda: cudf.Series([14, 15, 16, 17]),
+        lambda: cudf.Series([14.15, 15.16, 16.17, 17.18]),
     ],
 )
 @pytest.mark.parametrize(
     "other",
     [
-        cudf.DataFrame([[9, 10], [11, 12], [13, 14], [15, 16]]),
-        cudf.DataFrame(
+        lambda: cudf.DataFrame([[9, 10], [11, 12], [13, 14], [15, 16]]),
+        lambda: cudf.DataFrame(
             [[9.4, 10.5], [11.6, 12.7], [13.8, 14.9], [15.1, 16.2]]
         ),
-        cudf.Series([5, 6, 7, 8]),
-        cudf.Series([5.6, 6.7, 7.8, 8.9]),
-        np.array([5, 6, 7, 8]),
-        [25.5, 26.6, 27.7, 28.8],
+        lambda: cudf.Series([5, 6, 7, 8]),
+        lambda: cudf.Series([5.6, 6.7, 7.8, 8.9]),
+        lambda: np.array([5, 6, 7, 8]),
+        lambda: [25.5, 26.6, 27.7, 28.8],
     ],
 )
 def test_binops_dot(df, other):
+    df = df()
+    other = other()
     pdf = df.to_pandas()
     host_other = other.to_pandas() if hasattr(other, "to_pandas") else other
 
@@ -2592,13 +2389,12 @@ def test_binop_integer_power_int_series():
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize("op", _binops)
-def test_binop_index_series(op):
+def test_binop_index_series(arithmetic_op):
     gi = cudf.Index([10, 11, 12])
     gs = cudf.Series([1, 2, 3])
 
-    actual = op(gi, gs)
-    expected = op(gi.to_pandas(), gs.to_pandas())
+    actual = arithmetic_op(gi, gs)
+    expected = arithmetic_op(gi.to_pandas(), gs.to_pandas())
 
     assert_eq(expected, actual)
 
@@ -2649,7 +2445,6 @@ def test_binop_lhs_numpy_datetimelike_scalar(scalar):
     assert_eq(result, expected)
 
 
-@pytest.mark.parametrize("comp_op", _cmpops)
 @pytest.mark.parametrize("ordered", [True, False])
 @pytest.mark.parametrize(
     "data_left, data_right",
@@ -2658,7 +2453,9 @@ def test_binop_lhs_numpy_datetimelike_scalar(scalar):
         [[1, 2], [1, 3]],
     ],
 )
-def test_cat_non_cat_compare_ops(comp_op, data_left, data_right, ordered):
+def test_cat_non_cat_compare_ops(
+    comparison_op, data_left, data_right, ordered
+):
     pd_non_cat = pd.Series(data_left)
     pd_cat = pd.Series(
         data_right,
@@ -2669,15 +2466,20 @@ def test_cat_non_cat_compare_ops(comp_op, data_left, data_right, ordered):
     cudf_cat = cudf.Series.from_pandas(pd_cat)
 
     if (
-        not ordered and comp_op not in {operator.eq, operator.ne}
-    ) or comp_op in {operator.gt, operator.lt, operator.le, operator.ge}:
+        not ordered and comparison_op not in {operator.eq, operator.ne}
+    ) or comparison_op in {
+        operator.gt,
+        operator.lt,
+        operator.le,
+        operator.ge,
+    }:
         with pytest.raises(TypeError):
-            comp_op(pd_non_cat, pd_cat)
+            comparison_op(pd_non_cat, pd_cat)
         with pytest.raises(TypeError):
-            comp_op(cudf_non_cat, cudf_cat)
+            comparison_op(cudf_non_cat, cudf_cat)
     else:
-        expected = comp_op(pd_non_cat, pd_cat)
-        result = comp_op(cudf_non_cat, cudf_cat)
+        expected = comparison_op(pd_non_cat, pd_cat)
+        result = comparison_op(cudf_non_cat, cudf_cat)
         assert_eq(result, expected)
 
 
@@ -2704,10 +2506,9 @@ def test_eq_ne_non_comparable_types(
     assert_eq(result, expected)
 
 
-@pytest.mark.parametrize("op", _binops_compare)
-def test_binops_compare_stdlib_date_scalar(op):
+def test_binops_compare_stdlib_date_scalar(comparison_op):
     dt = datetime.date(2020, 1, 1)
     data = [dt]
-    result = op(cudf.Series(data), dt)
-    expected = op(pd.Series(data), dt)
+    result = comparison_op(cudf.Series(data), dt)
+    expected = comparison_op(pd.Series(data), dt)
     assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/test_buffer.py
index 03637e05eae..e36523f10b4 100644
--- a/python/cudf/cudf/tests/test_buffer.py
+++ b/python/cudf/cudf/tests/test_buffer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import cupy as cp
 import pytest
@@ -7,22 +7,30 @@
 
 pytestmark = pytest.mark.spilling
 
-arr_len = 10
+
+@pytest.fixture
+def arr_len():
+    return 10
 
 
 @pytest.mark.parametrize(
-    "data",
+    "data, expect_success",
     [
-        (cp.zeros(arr_len), True),
-        (cp.zeros((1, arr_len)), True),
-        (cp.zeros((1, arr_len, 1)), True),
-        (cp.zeros((arr_len, arr_len)), True),
-        (cp.zeros((arr_len, arr_len)).reshape(arr_len * arr_len), True),
-        (cp.zeros((arr_len, arr_len))[:, 0], False),
+        (lambda arr_len: cp.zeros(arr_len), True),
+        (lambda arr_len: cp.zeros((1, arr_len)), True),
+        (lambda arr_len: cp.zeros((1, arr_len, 1)), True),
+        (lambda arr_len: cp.zeros((arr_len, arr_len)), True),
+        (
+            lambda arr_len: cp.zeros((arr_len, arr_len)).reshape(
+                arr_len * arr_len
+            ),
+            True,
+        ),
+        (lambda arr_len: cp.zeros((arr_len, arr_len))[:, 0], False),
     ],
 )
-def test_buffer_from_cuda_iface_contiguous(data):
-    data, expect_success = data
+def test_buffer_from_cuda_iface_contiguous(data, expect_success, arr_len):
+    data = data(arr_len)
     if expect_success:
         as_buffer(data.view("|u1"))
     else:
@@ -33,14 +41,15 @@ def test_buffer_from_cuda_iface_contiguous(data):
 @pytest.mark.parametrize(
     "data",
     [
-        cp.arange(arr_len),
-        cp.arange(arr_len).reshape(1, arr_len),
-        cp.arange(arr_len).reshape(1, arr_len, 1),
-        cp.arange(arr_len**2).reshape(arr_len, arr_len),
+        lambda arr_len: cp.arange(arr_len),
+        lambda arr_len: cp.arange(arr_len).reshape(1, arr_len),
+        lambda arr_len: cp.arange(arr_len).reshape(1, arr_len, 1),
+        lambda arr_len: cp.arange(arr_len**2).reshape(arr_len, arr_len),
     ],
 )
 @pytest.mark.parametrize("dtype", ["uint8", "int8", "float32", "int32"])
-def test_buffer_from_cuda_iface_dtype(data, dtype):
+def test_buffer_from_cuda_iface_dtype(data, dtype, arr_len):
+    data = data(arr_len)
     data = data.astype(dtype)
     buf = as_buffer(data)
     got = cp.array(buf).reshape(-1).view("uint8")
@@ -48,7 +57,7 @@ def test_buffer_from_cuda_iface_dtype(data, dtype):
     assert (expect == got).all()
 
 
-def test_buffer_creation_from_any():
+def test_buffer_creation_from_any(arr_len):
     ary = cp.arange(arr_len)
     b = as_buffer(ary, exposed=True)
     assert isinstance(b, Buffer)
@@ -89,12 +98,12 @@ def test_buffer_repr(size, expect):
         slice(0, 0),
         slice(0, 1),
         slice(-2, -1),
-        slice(0, arr_len),
+        slice(0, 10),
         slice(2, 3),
         slice(2, -1),
     ],
 )
-def test_buffer_slice(idx):
+def test_buffer_slice(idx, arr_len):
     ary = cp.arange(arr_len, dtype="uint8")
     buf = as_buffer(ary)
     expect = ary[idx]
@@ -112,7 +121,7 @@ def test_buffer_slice(idx):
         (slice(3, 2, -1), ValueError, "slice must be C-contiguous"),
     ],
 )
-def test_buffer_slice_fail(idx, err_type, err_msg):
+def test_buffer_slice_fail(idx, err_type, err_msg, arr_len):
     ary = cp.arange(arr_len, dtype="uint8")
     buf = as_buffer(ary)
 
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 75cd40aa436..c5e2f05fcd9 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -241,9 +241,8 @@ def test_cat_series_binop_error():
     )
 
 
-@pytest.mark.parametrize("num_elements", [10, 100, 1000])
-def test_categorical_unique(num_elements):
-    # create categorical series
+def test_categorical_unique():
+    num_elements = 20
     rng = np.random.default_rng(seed=12)
     pd_cat = pd.Categorical(
         pd.Series(
@@ -268,9 +267,8 @@ def test_categorical_unique(num_elements):
     np.testing.assert_array_equal(pdf_unique_sorted, gdf_unique_sorted)
 
 
-@pytest.mark.parametrize("nelem", [20, 50, 100])
-def test_categorical_unique_count(nelem):
-    # create categorical series
+def test_categorical_unique_count():
+    nelem = 20
     rng = np.random.default_rng(seed=0)
     pd_cat = pd.Categorical(
         pd.Series(
@@ -334,8 +332,8 @@ def test_categorical_set_categories_preserves_order():
 
 
 def test_categorical_as_ordered(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat.copy().set_ordered(False))
-    cd_sr = cudf.Series(pd_str_cat.copy().set_ordered(False))
+    pd_sr = pd.Series(pd_str_cat.set_ordered(False))
+    cd_sr = cudf.Series(pd_str_cat.set_ordered(False))
 
     assert cd_sr.cat.ordered is False
     assert cd_sr.cat.ordered == pd_sr.cat.ordered
@@ -349,8 +347,8 @@ def test_categorical_as_ordered(pd_str_cat):
 
 
 def test_categorical_as_unordered(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat.copy().set_ordered(True))
-    cd_sr = cudf.Series(pd_str_cat.copy().set_ordered(True))
+    pd_sr = pd.Series(pd_str_cat.set_ordered(True))
+    cd_sr = cudf.Series(pd_str_cat.set_ordered(True))
 
     assert cd_sr.cat.ordered is True
     assert cd_sr.cat.ordered == pd_sr.cat.ordered
@@ -366,19 +364,15 @@ def test_categorical_as_unordered(pd_str_cat):
 @pytest.mark.parametrize("from_ordered", [True, False])
 @pytest.mark.parametrize("to_ordered", [True, False])
 def test_categorical_reorder_categories(pd_str_cat, from_ordered, to_ordered):
-    pd_sr = pd.Series(pd_str_cat.copy().set_ordered(from_ordered))
-    cd_sr = cudf.Series(pd_str_cat.copy().set_ordered(from_ordered))
+    pd_sr = pd.Series(pd_str_cat.set_ordered(from_ordered))
+    cd_sr = cudf.Series(pd_str_cat.set_ordered(from_ordered))
 
     assert_eq(pd_sr, cd_sr)
 
     assert str(pd_sr) == str(cd_sr)
 
-    kwargs = dict(
-        ordered=to_ordered,
-    )
-
-    pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs)
-    cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs)
+    pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), ordered=to_ordered)
+    cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), ordered=to_ordered)
 
     assert_eq(pd_sr_1, cd_sr_1)
 
@@ -386,8 +380,8 @@ def test_categorical_reorder_categories(pd_str_cat, from_ordered, to_ordered):
 
 
 def test_categorical_add_categories(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat.copy())
-    cd_sr = cudf.Series(pd_str_cat.copy())
+    pd_sr = pd.Series(pd_str_cat)
+    cd_sr = cudf.Series(pd_str_cat)
 
     assert_eq(pd_sr, cd_sr)
 
@@ -403,8 +397,8 @@ def test_categorical_add_categories(pd_str_cat):
 
 
 def test_categorical_remove_categories(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat.copy())
-    cd_sr = cudf.Series(pd_str_cat.copy())
+    pd_sr = pd.Series(pd_str_cat)
+    cd_sr = cudf.Series(pd_str_cat)
 
     assert_eq(pd_sr, cd_sr)
 
@@ -456,21 +450,22 @@ def test_categorical_dataframe_slice_copy():
     ],
 )
 @pytest.mark.parametrize(
-    "cat_type",
+    "categories",
     [
-        pd.CategoricalDtype(categories=["aa", "bb", "cc"]),
-        pd.CategoricalDtype(categories=[2, 4, 10, 100]),
-        pd.CategoricalDtype(categories=["aa", "bb", "c"]),
-        pd.CategoricalDtype(categories=["a", "bb", "c"]),
-        pd.CategoricalDtype(categories=["a", "b", "c"]),
-        pd.CategoricalDtype(categories=["1", "2", "3", "4"]),
-        pd.CategoricalDtype(categories=["1.0", "2.5", "3.001", "9"]),
-        pd.CategoricalDtype(categories=[]),
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["aa", "bb", "c"],
+        ["a", "bb", "c"],
+        ["a", "b", "c"],
+        ["1", "2", "3", "4"],
+        ["1.0", "2.5", "3.001", "9"],
+        [],
     ],
 )
-def test_categorical_typecast(data, cat_type):
-    pd_data = data.copy()
+def test_categorical_typecast(data, categories):
+    pd_data = data
     gd_data = cudf.from_pandas(data)
+    cat_type = pd.CategoricalDtype(categories)
 
     assert_eq(pd_data.astype(cat_type), gd_data.astype(cat_type))
 
@@ -503,7 +498,7 @@ def test_categorical_typecast(data, cat_type):
     ],
 )
 def test_categorical_set_categories_categoricals(data, new_categories):
-    pd_data = data.copy().astype("category")
+    pd_data = data.astype("category")
     gd_data = cudf.from_pandas(pd_data)
 
     expected = pd_data.cat.set_categories(new_categories=new_categories)
@@ -539,18 +534,19 @@ def test_categorical_set_categories_categoricals(data, new_categories):
     ],
 )
 @pytest.mark.parametrize(
-    "dtype",
+    "categories",
     [
-        pd.CategoricalDtype(categories=["aa", "bb", "cc"]),
-        pd.CategoricalDtype(categories=[2, 4, 10, 100]),
-        pd.CategoricalDtype(categories=["aa", "bb", "c"]),
-        pd.CategoricalDtype(categories=["a", "bb", "c"]),
-        pd.CategoricalDtype(categories=["a", "b", "c"]),
-        pd.CategoricalDtype(categories=["22", "b", "c"]),
-        pd.CategoricalDtype(categories=[]),
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["aa", "bb", "c"],
+        ["a", "bb", "c"],
+        ["a", "b", "c"],
+        ["22", "b", "c"],
+        [],
     ],
 )
-def test_categorical_creation(data, dtype):
+def test_categorical_creation(data, categories):
+    dtype = pd.CategoricalDtype(categories)
     expected = pd.Series(data, dtype=dtype)
     got = cudf.Series(data, dtype=dtype)
     assert_eq(expected, got)
@@ -584,33 +580,22 @@ def test_categorical_dtype(categories, ordered):
 
 
 @pytest.mark.parametrize(
-    ("data", "expected"),
+    ("values", "expected"),
     [
-        (cudf.Series([1]), np.uint8),
-        (cudf.Series([1, None]), np.uint8),
-        (cudf.Series(np.arange(np.iinfo(np.int8).max)), np.uint8),
-        (
-            cudf.Series(np.append(np.arange(np.iinfo(np.int8).max), [None])),
-            np.uint8,
-        ),
-        (cudf.Series(np.arange(np.iinfo(np.int16).max)), np.uint16),
-        (
-            cudf.Series(np.append(np.arange(np.iinfo(np.int16).max), [None])),
-            np.uint16,
-        ),
-        (cudf.Series(np.arange(np.iinfo(np.uint8).max)), np.uint8),
-        (
-            cudf.Series(np.append(np.arange(np.iinfo(np.uint8).max), [None])),
-            np.uint8,
-        ),
-        (cudf.Series(np.arange(np.iinfo(np.uint16).max)), np.uint16),
-        (
-            cudf.Series(np.append(np.arange(np.iinfo(np.uint16).max), [None])),
-            np.uint16,
-        ),
+        ([1], np.uint8),
+        ([1, None], np.uint8),
+        (np.arange(np.iinfo(np.int8).max), np.uint8),
+        (np.append(np.arange(np.iinfo(np.int8).max), [None]), np.uint8),
+        (np.arange(np.iinfo(np.int16).max), np.uint16),
+        (np.append(np.arange(np.iinfo(np.int16).max), [None]), np.uint16),
+        (np.arange(np.iinfo(np.uint8).max), np.uint8),
+        (np.append(np.arange(np.iinfo(np.uint8).max), [None]), np.uint8),
+        (np.arange(np.iinfo(np.uint16).max), np.uint16),
+        (np.append(np.arange(np.iinfo(np.uint16).max), [None]), np.uint16),
     ],
 )
-def test_astype_dtype(data, expected):
+def test_astype_dtype(values, expected):
+    data = cudf.Series(values)
     got = data.astype("category").cat.codes.dtype
     np.testing.assert_equal(got, expected)
 
@@ -696,18 +681,19 @@ def test_add_categories_mixed_error():
     ],
 )
 @pytest.mark.parametrize(
-    "cat_dtype",
+    "categories",
     [
-        pd.CategoricalDtype(categories=["aa", "bb", "cc"]),
-        pd.CategoricalDtype(categories=[2, 4, 10, 100]),
-        pd.CategoricalDtype(categories=["aa", "bb", "c"]),
-        pd.CategoricalDtype(categories=["a", "bb", "c"]),
-        pd.CategoricalDtype(categories=["a", "b", "c"]),
-        pd.CategoricalDtype(categories=["22", "b", "c"]),
-        pd.CategoricalDtype(categories=["a"]),
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["aa", "bb", "c"],
+        ["a", "bb", "c"],
+        ["a", "b", "c"],
+        ["22", "b", "c"],
+        ["a"],
     ],
 )
-def test_categorical_assignment(data, cat_dtype):
+def test_categorical_assignment(data, categories):
+    cat_dtype = pd.CategoricalDtype(categories)
     pd_df = pd.DataFrame()
     pd_df["a"] = np.ones(len(data))
     cd_df = cudf.from_pandas(pd_df)
@@ -777,16 +763,16 @@ def test_series_construction_with_nulls(input_obj, dtype):
 @pytest.mark.parametrize(
     "data",
     [
-        {"a": cudf.Series(["a", "b", "c", "a", "c", "b"]).astype("category")},
+        {"a": pd.Series(["a", "b", "c", "a", "c", "b"]).astype("category")},
         {
-            "a": cudf.Series(["a", "a", "b", "b"]).astype("category"),
-            "b": cudf.Series(["b", "b", "c", "c"]).astype("category"),
-            "c": cudf.Series(["c", "c", "a", "a"]).astype("category"),
+            "a": pd.Series(["a", "a", "b", "b"]).astype("category"),
+            "b": pd.Series(["b", "b", "c", "c"]).astype("category"),
+            "c": pd.Series(["c", "c", "a", "a"]).astype("category"),
         },
         {
-            "a": cudf.Series(["a", None, "b", "b"]).astype("category"),
-            "b": cudf.Series(["b", "b", None, "c"]).astype("category"),
-            "c": cudf.Series(["c", "c", "a", None]).astype("category"),
+            "a": pd.Series(["a", None, "b", "b"]).astype("category"),
+            "b": pd.Series(["b", "b", None, "c"]).astype("category"),
+            "c": pd.Series(["c", "c", "a", None]).astype("category"),
         },
     ],
 )
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 41bf96f6939..15988673bcd 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -311,16 +311,17 @@ def test_column_view_valid_numeric_to_numeric(data, from_dtype, to_dtype):
 
 
 @pytest.mark.parametrize(
-    "data,from_dtype,to_dtype",
+    "to_dtype",
     [
-        (np.arange(9), "int8", "int64"),
-        (np.arange(3), "int8", "int16"),
-        (np.arange(6), "int8", "float32"),
-        (np.arange(1), "int8", "datetime64[ns]"),
+        "int64",
+        "int16",
+        "float32",
+        "datetime64[ns]",
     ],
 )
-def test_column_view_invalid_numeric_to_numeric(data, from_dtype, to_dtype):
-    from_dtype = np.dtype(from_dtype)
+def test_column_view_invalid_numeric_to_numeric(to_dtype):
+    data = np.arange(5)
+    from_dtype = np.dtype("int8")
     to_dtype = np.dtype(to_dtype)
     cpu_data = np.asarray(data, dtype=from_dtype)
     gpu_data = as_column(data, dtype=from_dtype)
@@ -407,34 +408,19 @@ def test_column_view_string_slice(slc):
     assert_eq(expect, got)
 
 
+@pytest.mark.parametrize("box", [cp.asarray, np.asarray])
 @pytest.mark.parametrize(
-    "data,expected",
+    "data",
     [
-        (
-            np.array([1, 2, 3, 4, 5], dtype="uint8"),
-            cudf.core.column.as_column(
-                [1, 2, 3, 4, 5], dtype=np.dtype(np.uint8)
-            ),
-        ),
-        (
-            cp.array([1, 2, 3, 4, 5], dtype="uint8"),
-            cudf.core.column.as_column(
-                [1, 2, 3, 4, 5], dtype=np.dtype(np.uint8)
-            ),
-        ),
-        (
-            cp.array([], dtype="uint8"),
-            cudf.core.column.column_empty(0, dtype=np.dtype(np.uint8)),
-        ),
-        (
-            cp.array([255], dtype="uint8"),
-            cudf.core.column.as_column([255], dtype=np.dtype(np.uint8)),
-        ),
+        np.array([1, 2, 3, 4, 5], dtype="uint8"),
+        np.array([], dtype="uint8"),
+        np.array([255], dtype="uint8"),
     ],
 )
-def test_as_column_buffer(data, expected):
+def test_as_column_buffer(box, data):
+    expected = cudf.core.column.as_column(data)
     actual_column = cudf.core.column.as_column(
-        cudf.core.buffer.as_buffer(data), dtype=data.dtype
+        cudf.core.buffer.as_buffer(box(data)), dtype=data.dtype
     )
     assert_eq(
         cudf.Series._from_column(actual_column),
@@ -560,12 +546,8 @@ def test_build_series_from_nullable_pandas_dtype(pd_dtype, expect_dtype):
         ("Float64", "float64"),
     ],
 )
-@pytest.mark.parametrize(
-    "data",
-    [[1, 2, 0]],
-)
-def test_astype_with_aliases(alias, expect_dtype, data):
-    pd_data = pd.Series(data)
+def test_astype_with_aliases(alias, expect_dtype):
+    pd_data = pd.Series([1, 2, 0])
     gd_data = cudf.Series.from_pandas(pd_data)
 
     assert_eq(pd_data.astype(expect_dtype), gd_data.astype(alias))
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py
index 023dbbd8daf..ae0e71b5f58 100644
--- a/python/cudf/cudf/tests/test_column_accessor.py
+++ b/python/cudf/cudf/tests/test_column_accessor.py
@@ -9,25 +9,6 @@
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.testing import assert_eq
 
-simple_test_data = [
-    {},
-    {"a": as_column([])},
-    {"a": as_column([1])},
-    {"a": as_column(["a"])},
-    {"a": as_column([1, 2, 3]), "b": as_column(["a", "b", "c"])},
-]
-
-mi_test_data = [
-    {("a", "b"): as_column([1, 2, 4]), ("a", "c"): as_column([2, 3, 4])},
-    {("a", "b"): as_column([1, 2, 3]), ("a", ""): as_column([2, 3, 4])},
-    {("a", "b"): as_column([1, 2, 4]), ("c", "d"): as_column([2, 3, 4])},
-    {
-        ("a", "b"): as_column([1, 2, 3]),
-        ("a", "c"): as_column([2, 3, 4]),
-        ("b", ""): as_column([4, 5, 6]),
-    },
-]
-
 
 def check_ca_equal(lhs, rhs):
     assert lhs.level_names == rhs.level_names
@@ -39,19 +20,17 @@ def check_ca_equal(lhs, rhs):
         assert_eq(lhs[l_key], rhs[r_key])
 
 
-@pytest.fixture(params=simple_test_data)
+@pytest.fixture(
+    params=[
+        {},
+        {"a": []},
+        {"a": [1]},
+        {"a": ["a"]},
+        {"a": [1, 2, 3], "b": ["a", "b", "c"]},
+    ]
+)
 def simple_data(request):
-    return request.param
-
-
-@pytest.fixture(params=mi_test_data)
-def mi_data(request):
-    return request.param
-
-
-@pytest.fixture(params=simple_test_data + mi_test_data)
-def all_data(request):
-    return request.param
+    return {key: as_column(data) for key, data in request.param.items()}
 
 
 def test_to_pandas_simple(simple_data):
@@ -72,7 +51,17 @@ def test_to_pandas_simple(simple_data):
     )
 
 
-def test_to_pandas_multiindex(mi_data):
+@pytest.mark.parametrize(
+    "keys",
+    [
+        [("a", "b"), ("a", "c")],
+        [("a", "b"), ("c", "d")],
+        [("a", "b"), ("a", ""), ("b", "")],
+        [("a", "b"), ("a", "c"), ("b", "")],
+    ],
+)
+def test_to_pandas_multiindex(keys):
+    mi_data = {key: as_column([1, 2, 4]) for key in keys}
     ca = ColumnAccessor(mi_data, multiindex=True)
     assert_eq(
         ca.to_pandas_index,
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
index fe86df99d35..2c6bc0e8a00 100644
--- a/python/cudf/cudf/tests/test_contains.py
+++ b/python/cudf/cudf/tests/test_contains.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import datetime
 
@@ -9,74 +9,45 @@
 import cudf
 from cudf import Series
 from cudf.core.index import Index, RangeIndex
-from cudf.testing import assert_eq
 from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
 
 
-def cudf_date_series(start, stop, freq):
-    return Series(pd.date_range(start, stop, freq=freq, name="times"))
-
-
-def cudf_num_series(start, stop, step=1):
-    return Series(range(start, stop, step))
-
-
-def get_categorical_series():
-    return Series(
-        pd.Categorical(
-            ["ab", "ac", "cd", "ab", "cd"], categories=["ab", "ac", "cd"]
-        )
-    )
-
-
-def get_string_series():
-    return Series(["ab", "ac", "ba", "cc", "ad"])
-
-
-# If the type being searched is different from type of series, exceptions
-# are thrown well within the python code, and needs to be handled.
-# Some of the test cases check this scenario. Example : String Vs Numerical
-testdata_all = [
-    (
-        cudf_date_series("20010101", "20020215", freq="400h"),
-        datetime.datetime.strptime("2001-01-01", "%Y-%m-%d"),
-        True,
-    ),
-    (
-        cudf_date_series("20010101", "20020215", freq="400h"),
-        datetime.datetime.strptime("2000-01-01", "%Y-%m-%d"),
-        False,
-    ),
-    (cudf_date_series("20010101", "20020215", freq="400h"), 20000101, False),
-    (get_categorical_series(), "cd", True),
-    (get_categorical_series(), "dc", False),
-    (get_categorical_series(), "c", False),
-    (get_categorical_series(), "c", False),
-    (get_categorical_series(), 1, False),
-    (get_string_series(), "ac", True),
-    (get_string_series(), "ca", False),
-    (get_string_series(), "c", False),
-    (get_string_series(), 97, False),
-    (cudf_num_series(0, 100, 5), 60, True),
-    (cudf_num_series(0, 100, 5), 71, False),
-    (cudf_num_series(0, 100, 5), "a", False),
-]
-
-
-@pytest.mark.parametrize("values, item, expected", testdata_all)
-def test_series_contains(values, item, expected):
-    assert_eq(expected, item in Series(index=values))
-
-
-@pytest.mark.parametrize("values, item, expected", testdata_all)
-def test_index_contains(values, item, expected):
-    index = Index(values)
-    assert_eq(expected, item in index)
+@pytest.mark.parametrize(
+    "values, item, expected",
+    [
+        [[1, 2, 3], 2, True],
+        [[1, 2, 3], 4, False],
+        [[1, 2, 3], "a", False],
+        [["a", "b", "c"], "a", True],
+        [["a", "b", "c"], "ab", False],
+        [["a", "b", "c"], 6, False],
+        [pd.Categorical(["a", "b", "c"]), "a", True],
+        [pd.Categorical(["a", "b", "c"]), "ab", False],
+        [pd.Categorical(["a", "b", "c"]), 6, False],
+        [pd.date_range("20010101", periods=5, freq="D"), 20000101, False],
+        [
+            pd.date_range("20010101", periods=5, freq="D"),
+            datetime.datetime(2000, 1, 1),
+            False,
+        ],
+        [
+            pd.date_range("20010101", periods=5, freq="D"),
+            datetime.datetime(2001, 1, 1),
+            True,
+        ],
+    ],
+)
+@pytest.mark.parametrize(
+    "box", [Index, lambda x: Series(index=x)], ids=["index", "series"]
+)
+def test_contains(values, item, expected, box):
+    assert (item in box(values)) is expected
 
 
 def test_rangeindex_contains():
-    assert_eq(True, 9 in RangeIndex(start=0, stop=10, name="Index"))
-    assert_eq(False, 10 in RangeIndex(start=0, stop=10, name="Index"))
+    ridx = RangeIndex(start=0, stop=10, name="Index")
+    assert 9 in ridx
+    assert 10 not in ridx
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py
index 26f92f75807..dc19c52715a 100644
--- a/python/cudf/cudf/tests/test_copying.py
+++ b/python/cudf/cudf/tests/test_copying.py
@@ -365,52 +365,48 @@ def test_series_zero_copy_cow_off():
 
 @pytest.mark.parametrize("copy_on_write", [True, False])
 def test_series_str_copy(copy_on_write):
-    original_cow_setting = cudf.get_option("copy_on_write")
-    cudf.set_option("copy_on_write", copy_on_write)
-    s = cudf.Series(["a", "b", "c", "d", "e"])
-    s1 = s.copy(deep=True)
-    s2 = s.copy(deep=True)
+    with cudf.option_context("copy_on_write", copy_on_write):
+        s = cudf.Series(["a", "b", "c", "d", "e"])
+        s1 = s.copy(deep=True)
+        s2 = s.copy(deep=True)
 
-    assert_eq(s, cudf.Series(["a", "b", "c", "d", "e"]))
-    assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
-    assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
 
-    s[0:3] = "abc"
+        s[0:3] = "abc"
 
-    assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
-    assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
-    assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
+        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
 
-    s2[1:4] = "xyz"
+        s2[1:4] = "xyz"
 
-    assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
-    assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
-    assert_eq(s2, cudf.Series(["a", "xyz", "xyz", "xyz", "e"]))
-    cudf.set_option("copy_on_write", original_cow_setting)
+        assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
+        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s2, cudf.Series(["a", "xyz", "xyz", "xyz", "e"]))
 
 
 @pytest.mark.parametrize("copy_on_write", [True, False])
 def test_series_cat_copy(copy_on_write):
-    original_cow_setting = cudf.get_option("copy_on_write")
-    cudf.set_option("copy_on_write", copy_on_write)
-    s = cudf.Series([10, 20, 30, 40, 50], dtype="category")
-    s1 = s.copy(deep=True)
-    s2 = s1.copy(deep=True)
-    s3 = s1.copy(deep=True)
-
-    s[0] = 50
-    assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
-    assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
-    assert_eq(s2, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
-    assert_eq(s3, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
-
-    s2[3] = 10
-    s3[2:5] = 20
-    assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
-    assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype=s.dtype))
-    assert_eq(s2, cudf.Series([10, 20, 30, 10, 50], dtype=s.dtype))
-    assert_eq(s3, cudf.Series([10, 20, 20, 20, 20], dtype=s.dtype))
-    cudf.set_option("copy_on_write", original_cow_setting)
+    with cudf.option_context("copy_on_write", copy_on_write):
+        s = cudf.Series([10, 20, 30, 40, 50], dtype="category")
+        s1 = s.copy(deep=True)
+        s2 = s1.copy(deep=True)
+        s3 = s1.copy(deep=True)
+
+        s[0] = 50
+        assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
+        assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
+        assert_eq(s2, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
+        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
+
+        s2[3] = 10
+        s3[2:5] = 20
+        assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
+        assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype=s.dtype))
+        assert_eq(s2, cudf.Series([10, 20, 30, 10, 50], dtype=s.dtype))
+        assert_eq(s3, cudf.Series([10, 20, 20, 20, 20], dtype=s.dtype))
 
 
 def test_dataframe_cow_slice_setitem():

From 7f05fbcd58eac9447d8bd53d46b83d13f58fe483 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Aug 2025 15:28:10 -0700
Subject: [PATCH 096/366] Move test_search/test_scan/test_seriesmap.py to new
 cudf classic test directory structure (#19492)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19492
---
 .../dataframe/methods/test_searchsorted.py    |  42 +++
 .../series/methods/test_cumulative_methods.py | 124 ++++++++
 .../methods/test_map.py}                      |   0
 .../methods/test_searchsorted.py}             |  40 +--
 python/cudf/cudf/tests/test_scan.py           | 269 ------------------
 5 files changed, 167 insertions(+), 308 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_searchsorted.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_cumulative_methods.py
 rename python/cudf/cudf/tests/{test_seriesmap.py => series/methods/test_map.py} (100%)
 rename python/cudf/cudf/tests/{test_search.py => series/methods/test_searchsorted.py} (78%)
 delete mode 100644 python/cudf/cudf/tests/test_scan.py

diff --git a/python/cudf/cudf/tests/dataframe/methods/test_searchsorted.py b/python/cudf/cudf/tests/dataframe/methods/test_searchsorted.py
new file mode 100644
index 00000000000..f765c576993
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_searchsorted.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("side", ["left", "right"])
+@pytest.mark.parametrize("multiindex", [True, False])
+def test_searchsorted_dataframe(side, multiindex):
+    values = cudf.DataFrame(
+        {
+            "a": [1, 0, 5, 1],
+            "b": [-0.998, 0.031, -0.888, -0.998],
+            "c": ["C", "A", "G", "B"],
+        }
+    )
+    base = cudf.DataFrame(
+        {
+            "a": [1, 1, 1, 5],
+            "b": [-0.999, -0.998, -0.997, -0.888],
+            "c": ["A", "C", "E", "G"],
+        }
+    )
+
+    if multiindex:
+        base = base.set_index(["a", "b", "c"]).index
+        values = values.set_index(["a", "b", "c"]).index
+
+    result = base.searchsorted(values, side=side).tolist()
+
+    if side == "left":
+        assert result == [1, 0, 3, 1]
+    else:
+        assert result == [2, 0, 4, 1]
+
+
+def test_search_sorted_dataframe_unequal_number_of_columns():
+    values = cudf.DataFrame({"a": [1, 0, 5, 1]})
+    base = cudf.DataFrame({"a": [1, 0, 5, 1], "b": ["x", "z", "w", "a"]})
+
+    with pytest.raises(ValueError, match="Mismatch number of columns"):
+        base.searchsorted(values)
diff --git a/python/cudf/cudf/tests/series/methods/test_cumulative_methods.py b/python/cudf/cudf/tests/series/methods/test_cumulative_methods.py
new file mode 100644
index 00000000000..8c2b14ec2d9
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_cumulative_methods.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
+from cudf.testing import assert_eq
+from cudf.testing._utils import gen_rand
+
+
+@pytest.fixture(params=[0, 5])
+def nelem(request):
+    return request.param
+
+
+@pytest.fixture(params=["cumsum", "cummin", "cummax", "cumprod"])
+def cumulative_methods(request):
+    return request.param
+
+
+def test_cumulative_methods(numeric_types_as_str, nelem, cumulative_methods):
+    dtype = np.dtype(numeric_types_as_str)
+    if dtype == np.int8:
+        # to keep data in range
+        data = gen_rand(dtype, nelem, low=-2, high=2)
+    else:
+        data = gen_rand(dtype, nelem)
+
+    decimal = 4 if dtype == np.float32 else 6
+
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+    np.testing.assert_array_almost_equal(
+        getattr(gs, cumulative_methods)().to_numpy(),
+        getattr(ps, cumulative_methods)(),
+        decimal=decimal,
+    )
+
+
+def test_cumulative_methods_masked(numeric_types_as_str, cumulative_methods):
+    data = [1, 2, None, 4, 5]
+    gs = cudf.Series(data).astype(numeric_types_as_str)
+    # float64 since pandas usses NaN as missing value
+    ps = pd.Series(data).astype("float64")
+    assert_eq(
+        getattr(gs, cumulative_methods)(),
+        getattr(ps, cumulative_methods)(),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        Decimal64Dtype(8, 4),
+        Decimal64Dtype(10, 5),
+        Decimal64Dtype(12, 7),
+        Decimal32Dtype(8, 5),
+        Decimal128Dtype(13, 6),
+    ],
+)
+def test_cumsum_decimal(dtype):
+    data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"]
+    gser = cudf.Series(data).astype(dtype)
+    pser = pd.Series(data, dtype="float64")
+
+    got = gser.cumsum()
+    expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        Decimal64Dtype(8, 4),
+        Decimal64Dtype(11, 6),
+        Decimal64Dtype(14, 7),
+        Decimal32Dtype(8, 4),
+        Decimal128Dtype(11, 6),
+    ],
+)
+def test_cummin_decimal(dtype):
+    data = ["8394.294", np.nan, "-9940.444", np.nan, "-23.928"]
+    gser = cudf.Series(data).astype(dtype)
+    pser = pd.Series(data, dtype="float64")
+
+    got = gser.cummin()
+    expected = cudf.Series.from_pandas(pser.cummin()).astype(dtype)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        Decimal64Dtype(8, 4),
+        Decimal64Dtype(11, 6),
+        Decimal64Dtype(14, 7),
+        Decimal32Dtype(8, 4),
+        Decimal128Dtype(11, 6),
+    ],
+)
+def test_cummax_decimal(dtype):
+    data = [np.nan, "54.203", "8.222", "644.32", "-562.272"]
+    gser = cudf.Series(data).astype(dtype)
+    pser = pd.Series(data, dtype="float64")
+
+    got = gser.cummax()
+    expected = cudf.Series.from_pandas(pser.cummax()).astype(dtype)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("method", ["cumsum", "cumprod"])
+def test_scan_boolean(method):
+    s = cudf.Series([True, False, True, False])
+
+    got = getattr(s, method)()
+    expect = getattr(s.to_pandas(), method)()
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/series/methods/test_map.py
similarity index 100%
rename from python/cudf/cudf/tests/test_seriesmap.py
rename to python/cudf/cudf/tests/series/methods/test_map.py
diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/series/methods/test_searchsorted.py
similarity index 78%
rename from python/cudf/cudf/tests/test_search.py
rename to python/cudf/cudf/tests/series/methods/test_searchsorted.py
index 6fcbcde5be7..23fb916a202 100644
--- a/python/cudf/cudf/tests/test_search.py
+++ b/python/cudf/cudf/tests/series/methods/test_searchsorted.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 import cupy
 import numpy as np
 import pandas as pd
@@ -50,44 +50,6 @@ def test_searchsorted(side, obj_class, vals_class):
     assert_eq(expect, cupy.asnumpy(got))
 
 
-@pytest.mark.parametrize("side", ["left", "right"])
-@pytest.mark.parametrize("multiindex", [True, False])
-def test_searchsorted_dataframe(side, multiindex):
-    values = cudf.DataFrame(
-        {
-            "a": [1, 0, 5, 1],
-            "b": [-0.998, 0.031, -0.888, -0.998],
-            "c": ["C", "A", "G", "B"],
-        }
-    )
-    base = cudf.DataFrame(
-        {
-            "a": [1, 1, 1, 5],
-            "b": [-0.999, -0.998, -0.997, -0.888],
-            "c": ["A", "C", "E", "G"],
-        }
-    )
-
-    if multiindex:
-        base = base.set_index(["a", "b", "c"]).index
-        values = values.set_index(["a", "b", "c"]).index
-
-    result = base.searchsorted(values, side=side).tolist()
-
-    if side == "left":
-        assert result == [1, 0, 3, 1]
-    else:
-        assert result == [2, 0, 4, 1]
-
-
-def test_search_sorted_dataframe_unequal_number_of_columns():
-    values = cudf.DataFrame({"a": [1, 0, 5, 1]})
-    base = cudf.DataFrame({"a": [1, 0, 5, 1], "b": ["x", "z", "w", "a"]})
-
-    with pytest.raises(ValueError, match="Mismatch number of columns"):
-        base.searchsorted(values)
-
-
 @pytest.mark.parametrize("side", ["left", "right"])
 def test_searchsorted_categorical(side):
     cat1 = pd.Categorical(
diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py
deleted file mode 100644
index d4b21480070..00000000000
--- a/python/cudf/cudf/tests/test_scan.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing import assert_eq
-from cudf.testing._utils import INTEGER_TYPES, NUMERIC_TYPES, gen_rand
-
-
-@pytest.fixture(params=NUMERIC_TYPES)
-def dtype(request):
-    return request.param
-
-
-@pytest.fixture(params=[0, 1, 5])
-def nelem(request):
-    return request.param
-
-
-def test_cumsum(dtype, nelem):
-    if dtype == np.int8:
-        # to keep data in range
-        data = gen_rand(dtype, nelem, low=-2, high=2)
-    else:
-        data = gen_rand(dtype, nelem)
-
-    decimal = 4 if dtype == np.float32 else 6
-
-    # series
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gs.cumsum().to_numpy(), ps.cumsum(), decimal=decimal
-    )
-
-    # dataframe series (named series)
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series(data)
-    pdf = pd.DataFrame()
-    pdf["a"] = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gdf.a.cumsum().to_numpy(), pdf.a.cumsum(), decimal=decimal
-    )
-
-
-def test_cumsum_masked():
-    data = [1, 2, None, 4, 5]
-    float_types = ["float32", "float64"]
-
-    for type_ in float_types:
-        gs = cudf.Series(data).astype(type_)
-        ps = pd.Series(data).astype(type_)
-        assert_eq(gs.cumsum(), ps.cumsum())
-
-    for type_ in INTEGER_TYPES:
-        gs = cudf.Series(data).astype(type_)
-        got = gs.cumsum()
-        expected = pd.Series([1, 3, np.nan, 7, 12], dtype="float64")
-        assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        Decimal64Dtype(8, 4),
-        Decimal64Dtype(10, 5),
-        Decimal64Dtype(12, 7),
-        Decimal32Dtype(8, 5),
-        Decimal128Dtype(13, 6),
-    ],
-)
-def test_cumsum_decimal(dtype):
-    data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"]
-    gser = cudf.Series(data).astype(dtype)
-    pser = pd.Series(data, dtype="float64")
-
-    got = gser.cumsum()
-    expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype)
-
-    assert_eq(got, expected)
-
-
-def test_cummin(dtype, nelem):
-    if dtype == np.int8:
-        # to keep data in range
-        data = gen_rand(dtype, nelem, low=-2, high=2)
-    else:
-        data = gen_rand(dtype, nelem)
-
-    decimal = 4 if dtype == np.float32 else 6
-
-    # series
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gs.cummin().to_numpy(), ps.cummin(), decimal=decimal
-    )
-
-    # dataframe series (named series)
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series(data)
-    pdf = pd.DataFrame()
-    pdf["a"] = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gdf.a.cummin().to_numpy(), pdf.a.cummin(), decimal=decimal
-    )
-
-
-def test_cummin_masked():
-    data = [1, 2, None, 4, 5]
-    float_types = ["float32", "float64"]
-
-    for type_ in float_types:
-        gs = cudf.Series(data).astype(type_)
-        ps = pd.Series(data).astype(type_)
-        assert_eq(gs.cummin(), ps.cummin())
-
-    for type_ in INTEGER_TYPES:
-        gs = cudf.Series(data).astype(type_)
-        expected = pd.Series([1, 1, np.nan, 1, 1]).astype("float64")
-        assert_eq(gs.cummin(), expected)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        Decimal64Dtype(8, 4),
-        Decimal64Dtype(11, 6),
-        Decimal64Dtype(14, 7),
-        Decimal32Dtype(8, 4),
-        Decimal128Dtype(11, 6),
-    ],
-)
-def test_cummin_decimal(dtype):
-    data = ["8394.294", np.nan, "-9940.444", np.nan, "-23.928"]
-    gser = cudf.Series(data).astype(dtype)
-    pser = pd.Series(data, dtype="float64")
-
-    got = gser.cummin()
-    expected = cudf.Series.from_pandas(pser.cummin()).astype(dtype)
-
-    assert_eq(got, expected)
-
-
-def test_cummax(dtype, nelem):
-    if dtype == np.int8:
-        # to keep data in range
-        data = gen_rand(dtype, nelem, low=-2, high=2)
-    else:
-        data = gen_rand(dtype, nelem)
-
-    decimal = 4 if dtype == np.float32 else 6
-
-    # series
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gs.cummax().to_numpy(), ps.cummax(), decimal=decimal
-    )
-
-    # dataframe series (named series)
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series(data)
-    pdf = pd.DataFrame()
-    pdf["a"] = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gdf.a.cummax().to_numpy(), pdf.a.cummax(), decimal=decimal
-    )
-
-
-def test_cummax_masked():
-    data = [1, 2, None, 4, 5]
-    float_types = ["float32", "float64"]
-
-    for type_ in float_types:
-        gs = cudf.Series(data).astype(type_)
-        ps = pd.Series(data).astype(type_)
-        assert_eq(gs.cummax(), ps.cummax())
-
-    for type_ in INTEGER_TYPES:
-        gs = cudf.Series(data).astype(type_)
-        expected = pd.Series([1, 2, np.nan, 4, 5]).astype("float64")
-        assert_eq(gs.cummax(), expected)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        Decimal64Dtype(8, 4),
-        Decimal64Dtype(11, 6),
-        Decimal64Dtype(14, 7),
-        Decimal32Dtype(8, 4),
-        Decimal128Dtype(11, 6),
-    ],
-)
-def test_cummax_decimal(dtype):
-    data = [np.nan, "54.203", "8.222", "644.32", "-562.272"]
-    gser = cudf.Series(data).astype(dtype)
-    pser = pd.Series(data, dtype="float64")
-
-    got = gser.cummax()
-    expected = cudf.Series.from_pandas(pser.cummax()).astype(dtype)
-
-    assert_eq(got, expected)
-
-
-def test_cumprod(dtype, nelem):
-    if dtype == np.int8:
-        # to keep data in range
-        data = gen_rand(dtype, nelem, low=-2, high=2)
-    else:
-        data = gen_rand(dtype, nelem)
-
-    decimal = 4 if dtype == np.float32 else 6
-
-    # series
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gs.cumprod().to_numpy(), ps.cumprod(), decimal=decimal
-    )
-
-    # dataframe series (named series)
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series(data)
-    pdf = pd.DataFrame()
-    pdf["a"] = pd.Series(data)
-    np.testing.assert_array_almost_equal(
-        gdf.a.cumprod().to_numpy(), pdf.a.cumprod(), decimal=decimal
-    )
-
-
-def test_cumprod_masked():
-    data = [1, 2, None, 4, 5]
-    float_types = ["float32", "float64"]
-
-    for type_ in float_types:
-        gs = cudf.Series(data).astype(type_)
-        ps = pd.Series(data).astype(type_)
-        assert_eq(gs.cumprod(), ps.cumprod())
-
-    for type_ in INTEGER_TYPES:
-        gs = cudf.Series(data).astype(type_)
-        got = gs.cumprod()
-        expected = pd.Series([1, 2, np.nan, 8, 40], dtype="float64")
-        assert_eq(got, expected)
-
-
-def test_scan_boolean_cumsum():
-    s = cudf.Series([0, -1, -300, 23, 4, -3, 0, 0, 100])
-
-    # cumsum test
-    got = (s > 0).cumsum()
-    expect = (s > 0).to_pandas().cumsum()
-
-    assert_eq(expect, got)
-
-
-def test_scan_boolean_cumprod():
-    s = cudf.Series([0, -1, -300, 23, 4, -3, 0, 0, 100])
-
-    # cumprod test
-    got = (s > 0).cumprod()
-    expect = (s > 0).to_pandas().cumprod()
-
-    assert_eq(expect, got)

From 9c6ee0aa5bbd448f208cbf88ef5c669e73f95516 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Aug 2025 15:52:19 -0700
Subject: [PATCH 097/366] Add streams to stream_compaction (#19651)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19651
---
 .../pylibcudf/libcudf/stream_compaction.pxd   | 22 +++--
 .../pylibcudf/pylibcudf/stream_compaction.pxd | 25 ++++--
 .../pylibcudf/pylibcudf/stream_compaction.pyi | 30 +++++--
 .../pylibcudf/pylibcudf/stream_compaction.pyx | 83 ++++++++++++++-----
 4 files changed, 121 insertions(+), 39 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
index 78b9bcb299b..ba7c7f6edd1 100644
--- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
@@ -14,6 +14,7 @@ from pylibcudf.libcudf.types cimport (
     null_policy,
     size_type,
 )
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
@@ -26,18 +27,21 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] drop_nulls(
         table_view source_table,
         vector[size_type] keys,
-        size_type keep_threshold
+        size_type keep_threshold,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] drop_nans(
         table_view source_table,
         vector[size_type] keys,
-        size_type keep_threshold
+        size_type keep_threshold,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] apply_boolean_mask(
         table_view source_table,
-        column_view boolean_mask
+        column_view boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] unique(
@@ -45,6 +49,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         vector[size_type] keys,
         duplicate_keep_option keep,
         null_equality nulls_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] distinct(
@@ -53,6 +58,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         duplicate_keep_option keep,
         null_equality nulls_equal,
         nan_equality nans_equals,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] distinct_indices(
@@ -60,6 +66,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         duplicate_keep_option keep,
         null_equality nulls_equal,
         nan_equality nans_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_distinct(
@@ -68,12 +75,14 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
         duplicate_keep_option keep,
         null_equality nulls_equal,
         nan_equality nans_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef size_type unique_count(
         column_view column,
         null_policy null_handling,
-        nan_policy nan_handling) except +libcudf_exception_handler
+        nan_policy nan_handling,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef size_type unique_count(
         table_view source_table,
@@ -82,7 +91,8 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
     cdef size_type distinct_count(
         column_view column,
         null_policy null_handling,
-        nan_policy nan_handling) except +libcudf_exception_handler
+        nan_policy nan_handling,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef size_type distinct_count(
         table_view source_table,
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd
index a20a23e2e58..1eacdf32f2c 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
 from pylibcudf.libcudf.types cimport (
@@ -8,22 +8,30 @@ from pylibcudf.libcudf.types cimport (
     null_policy,
     size_type,
 )
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
 
 
-cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold)
+cpdef Table drop_nulls(
+    Table source_table, list keys, size_type keep_threshold, Stream stream = *
+)
 
-cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold)
+cpdef Table drop_nans(
+    Table source_table, list keys, size_type keep_threshold, Stream stream = *
+)
 
-cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask)
+cpdef Table apply_boolean_mask(
+    Table source_table, Column boolean_mask, Stream stream = *
+)
 
 cpdef Table unique(
     Table input,
     list keys,
     duplicate_keep_option keep,
     null_equality nulls_equal,
+    Stream stream = *,
 )
 
 cpdef Table distinct(
@@ -32,6 +40,7 @@ cpdef Table distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
+    Stream stream = *,
 )
 
 cpdef Column distinct_indices(
@@ -39,6 +48,7 @@ cpdef Column distinct_indices(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
+    Stream stream = *,
 )
 
 cpdef Table stable_distinct(
@@ -47,16 +57,19 @@ cpdef Table stable_distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
+    Stream stream = *,
 )
 
 cpdef size_type unique_count(
     Column column,
     null_policy null_handling,
-    nan_policy nan_handling
+    nan_policy nan_handling,
+    Stream stream = *
 )
 
 cpdef size_type distinct_count(
     Column column,
     null_policy null_handling,
-    nan_policy nan_handling
+    nan_policy nan_handling,
+    Stream stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi
index 99cade48309..5b7acab3f50 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyi
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import NanEquality, NanPolicy, NullEquality, NullPolicy
@@ -13,17 +15,26 @@ class DuplicateKeepOption(IntEnum):
     KEEP_NONE = ...
 
 def drop_nulls(
-    source_table: Table, keys: list[int], keep_threshold: int
+    source_table: Table,
+    keys: list[int],
+    keep_threshold: int,
+    stream: Stream | None = None,
 ) -> Table: ...
 def drop_nans(
-    source_table: Table, keys: list[int], keep_threshold: int
+    source_table: Table,
+    keys: list[int],
+    keep_threshold: int,
+    stream: Stream | None = None,
+) -> Table: ...
+def apply_boolean_mask(
+    source_table: Table, boolean_mask: Column, stream: Stream | None = None
 ) -> Table: ...
-def apply_boolean_mask(source_table: Table, boolean_mask: Column) -> Table: ...
 def unique(
     input: Table,
     keys: list[int],
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> Table: ...
 def distinct(
     input: Table,
@@ -31,12 +42,14 @@ def distinct(
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
+    stream: Stream | None = None,
 ) -> Table: ...
 def distinct_indices(
     input: Table,
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
+    stream: Stream | None = None,
 ) -> Column: ...
 def stable_distinct(
     input: Table,
@@ -44,10 +57,17 @@ def stable_distinct(
     keep: DuplicateKeepOption,
     nulls_equal: NullEquality,
     nans_equal: NanEquality,
+    stream: Stream | None = None,
 ) -> Table: ...
 def unique_count(
-    source: Column, null_handling: NullPolicy, nan_handling: NanPolicy
+    source: Column,
+    null_handling: NullPolicy,
+    nan_handling: NanPolicy,
+    stream: Stream | None = None,
 ) -> int: ...
 def distinct_count(
-    source: Column, null_handling: NullPolicy, nan_handling: NanPolicy
+    source: Column,
+    null_handling: NullPolicy,
+    nan_handling: NanPolicy,
+    stream: Stream | None = None,
 ) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx
index 8f308e3b29e..8eafe040508 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyx
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx
@@ -17,9 +17,11 @@ from pylibcudf.libcudf.types cimport (
 
 from pylibcudf.libcudf.stream_compaction import \
     duplicate_keep_option as DuplicateKeepOption  # no-cython-lint, isort:skip
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "DuplicateKeepOption",
@@ -34,7 +36,9 @@ __all__ = [
     "unique_count",
 ]
 
-cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold):
+cpdef Table drop_nulls(
+    Table source_table, list keys, size_type keep_threshold, Stream stream=None
+):
     """Filters out rows from the input table based on the presence of nulls.
 
     For details, see :cpp:func:`drop_nulls`.
@@ -55,14 +59,19 @@ cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold):
     """
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_stream_compaction.drop_nulls(
-            source_table.view(), c_keys, keep_threshold
+            source_table.view(), c_keys, keep_threshold, stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold):
+cpdef Table drop_nans(
+    Table source_table, list keys, size_type keep_threshold, Stream stream=None
+):
     """Filters out rows from the input table based on the presence of NaNs.
 
     For details, see :cpp:func:`drop_nans`.
@@ -83,14 +92,19 @@ cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold):
     """
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_stream_compaction.drop_nulls(
-            source_table.view(), c_keys, keep_threshold
+        c_result = cpp_stream_compaction.drop_nans(
+            source_table.view(), c_keys, keep_threshold, stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask):
+cpdef Table apply_boolean_mask(
+    Table source_table, Column boolean_mask, Stream stream=None
+):
     """Filters out rows from the input table based on a boolean mask.
 
     For details, see :cpp:func:`apply_boolean_mask`.
@@ -108,11 +122,14 @@ cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask):
         A new table with rows removed based on the boolean mask.
     """
     cdef unique_ptr[table] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_stream_compaction.apply_boolean_mask(
-            source_table.view(), boolean_mask.view()
+            source_table.view(), boolean_mask.view(), stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Table unique(
@@ -120,6 +137,7 @@ cpdef Table unique(
     list keys,
     duplicate_keep_option keep,
     null_equality nulls_equal,
+    Stream stream=None,
 ):
     """Filter duplicate consecutive rows from the input table.
 
@@ -149,11 +167,14 @@ cpdef Table unique(
     """
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_stream_compaction.unique(
-            input.view(), c_keys, keep, nulls_equal
+            input.view(), c_keys, keep, nulls_equal, stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Table distinct(
@@ -162,6 +183,7 @@ cpdef Table distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
+    Stream stream=None,
 ):
     """Get the distinct rows from the input table.
 
@@ -188,11 +210,14 @@ cpdef Table distinct(
     """
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_stream_compaction.distinct(
-            input.view(), c_keys, keep, nulls_equal, nans_equal
+            input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Column distinct_indices(
@@ -200,6 +225,7 @@ cpdef Column distinct_indices(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
+    Stream stream=None,
 ):
     """Get the indices of the distinct rows from the input table.
 
@@ -222,11 +248,14 @@ cpdef Column distinct_indices(
         A new column with the indices of the distinct rows from the input table.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_stream_compaction.distinct_indices(
-            input.view(), keep, nulls_equal, nans_equal
+            input.view(), keep, nulls_equal, nans_equal, stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Table stable_distinct(
@@ -235,6 +264,7 @@ cpdef Table stable_distinct(
     duplicate_keep_option keep,
     null_equality nulls_equal,
     nan_equality nans_equal,
+    Stream stream=None,
 ):
     """Get the distinct rows from the input table, preserving input order.
 
@@ -261,17 +291,21 @@ cpdef Table stable_distinct(
     """
     cdef unique_ptr[table] c_result
     cdef vector[size_type] c_keys = keys
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_stream_compaction.stable_distinct(
-            input.view(), c_keys, keep, nulls_equal, nans_equal
+            input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef size_type unique_count(
     Column source,
     null_policy null_handling,
-    nan_policy nan_handling
+    nan_policy nan_handling,
+    Stream stream=None
 ):
     """Returns the number of unique consecutive elements in the input column.
 
@@ -296,15 +330,18 @@ cpdef size_type unique_count(
     If the input column is sorted, then unique_count can produce the
     same result as distinct_count, but faster.
     """
+    stream = _get_stream(stream)
+
     return cpp_stream_compaction.unique_count(
-        source.view(), null_handling, nan_handling
+        source.view(), null_handling, nan_handling, stream.view()
     )
 
 
 cpdef size_type distinct_count(
     Column source,
     null_policy null_handling,
-    nan_policy nan_handling
+    nan_policy nan_handling,
+    Stream stream=None
 ):
     """Returns the number of distinct elements in the input column.
 
@@ -324,8 +361,10 @@ cpdef size_type distinct_count(
     size_type
         The number of distinct elements in the input column.
     """
+    stream = _get_stream(stream)
+
     return cpp_stream_compaction.distinct_count(
-        source.view(), null_handling, nan_handling
+        source.view(), null_handling, nan_handling, stream.view()
     )
 
 DuplicateKeepOption.__str__ = DuplicateKeepOption.__repr__

From edf638edfb0c7592af261f00592569696e5691a1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Aug 2025 16:15:04 -0700
Subject: [PATCH 098/366] Move some test_datetime.py tests to new cudf classic
 test directory structure (#19505)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19505
---
 .../tests/dataframe/indexing/test_setitem.py  |   12 +
 .../tests/dataframe/methods/test_dropna.py    |   34 +
 .../dataframe/methods/test_sort_values.py     |   26 +
 .../tests/dataframe/methods/test_to_arrow.py  |   10 +
 .../datetime => dateoffset}/__init__.py       |    0
 .../tests/dateoffset/test_constructors.py     |   10 +
 .../general_functions/test_date_range.py      |  238 ++
 .../general_functions/test_to_datetime.py     |  312 +++
 .../tests/indexes/datetime/test_components.py |    1 -
 .../indexes/datetime/test_constructing.py     |    1 -
 .../tests/indexes/datetime/test_conversion.py |    1 -
 .../indexes/datetimeindex/methods/__init__.py |    0
 .../datetimeindex/methods/test_isocalendar.py |   30 +
 .../datetimeindex/methods/test_strftime.py    |   24 +
 .../datetimeindex/methods/test_tz_convert.py  |   16 +
 .../methods/test_tz_localize.py}              |   11 +-
 .../indexes/datetimeindex/test_attributes.py  |  106 +-
 .../indexes/datetimeindex/test_constructor.py |   87 +
 .../test_getitem.py}                          |    0
 .../cudf/tests/series/accessors/test_dt.py    |  478 ++++
 .../tests/series/indexing/test_getitem.py     |    5 +
 .../cudf/tests/series/methods/test_astype.py  |  183 ++
 .../cudf/tests/series/methods/test_fillna.py  |   39 +
 .../tests/series/methods/test_first_last.py   |  126 +
 .../cudf/tests/series/methods/test_nunique.py |   27 +
 .../cudf/tests/series/methods/test_query.py   |   49 +
 .../tests/series/methods/test_to_pandas.py    |   18 +
 .../cudf/tests/series/methods/test_unique.py  |   38 +
 .../cudf/cudf/tests/series/test_attributes.py |   52 +
 .../cudf/tests/series/test_constructors.py    |   99 +
 python/cudf/cudf/tests/test_datetime.py       | 2020 +----------------
 31 files changed, 2020 insertions(+), 2033 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_dropna.py
 rename python/cudf/cudf/tests/{indexes/datetime => dateoffset}/__init__.py (100%)
 create mode 100644 python/cudf/cudf/tests/dateoffset/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/general_functions/test_date_range.py
 create mode 100644 python/cudf/cudf/tests/general_functions/test_to_datetime.py
 delete mode 100644 python/cudf/cudf/tests/indexes/datetime/test_components.py
 delete mode 100644 python/cudf/cudf/tests/indexes/datetime/test_constructing.py
 delete mode 100644 python/cudf/cudf/tests/indexes/datetime/test_conversion.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/test_isocalendar.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/test_strftime.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py
 rename python/cudf/cudf/tests/indexes/{datetime/test_time_specific.py => datetimeindex/methods/test_tz_localize.py} (62%)
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py
 rename python/cudf/cudf/tests/indexes/{datetime/test_indexing.py => datetimeindex/test_getitem.py} (100%)
 create mode 100644 python/cudf/cudf/tests/series/methods/test_first_last.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_query.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_unique.py

diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
new file mode 100644
index 00000000000..73f4632fafd
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+
+from cudf import DataFrame
+
+
+def test_setitem_datetime():
+    df = DataFrame()
+    df["date"] = pd.date_range("20010101", "20010105").values
+    assert df.date.dtype.kind == "M"
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_dropna.py b/python/cudf/cudf/tests/dataframe/methods/test_dropna.py
new file mode 100644
index 00000000000..ec27503a0ef
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_dropna.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_datetime_dataframe():
+    data = {
+        "timearray": np.array(
+            [0, 1, None, 2, 20, None, 897], dtype="datetime64[ms]"
+        )
+    }
+    gdf = cudf.DataFrame(data)
+    pdf = pd.DataFrame(data)
+
+    assert_eq(pdf, gdf)
+
+    assert_eq(pdf.dropna(), gdf.dropna())
+
+    assert_eq(pdf.isnull(), gdf.isnull())
+
+    data = np.array([0, 1, None, 2, 20, None, 897], dtype="datetime64[ms]")
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    assert_eq(ps, gs)
+
+    assert_eq(ps.dropna(), gs.dropna())
+
+    assert_eq(ps.isnull(), gs.isnull())
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py b/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
index 1c322ff67af..2a7b53d94cb 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import pytest
 
+import cudf
 from cudf import DataFrame, option_context
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
@@ -192,3 +193,28 @@ def test_sort_values_by_ambiguous():
         lfunc_args_and_kwargs=(["a"], {}),
         rfunc_args_and_kwargs=(["a"], {}),
     )
+
+
+def test_sort_values_datetime():
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        {
+            "date": np.array(
+                [
+                    np.datetime64("2016-11-20"),
+                    np.datetime64("2020-11-20"),
+                    np.datetime64("2019-11-20"),
+                    np.datetime64("1918-11-20"),
+                    np.datetime64("2118-11-20"),
+                ]
+            ),
+            "vals": rng.random(5),
+        }
+    )
+
+    gdf = cudf.from_pandas(df)
+
+    s_df = df.sort_values(by="date")
+    s_gdf = gdf.sort_values(by="date")
+
+    assert_eq(s_df, s_gdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
index 4bbed8fab9e..c033efed0b0 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
@@ -1,9 +1,11 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
+import pandas as pd
 import pyarrow as pa
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("preserve_index", [False, True, None])
@@ -13,3 +15,11 @@ def test_dataframe_to_arrow_preserve_index(preserve_index):
     expect = pa.Table.from_pandas(pf, preserve_index=preserve_index).schema
     got = df.to_arrow(preserve_index=preserve_index).schema
     assert expect == got
+
+
+def test_datetime_to_arrow(datetime_types_as_str):
+    data = pd.date_range("2000-01-01", "2000-01-02", freq="3600s")
+    gdf = cudf.DataFrame({"timestamp": data.astype(datetime_types_as_str)})
+    assert_eq(
+        gdf, cudf.DataFrame.from_arrow(gdf.to_arrow(preserve_index=False))
+    )
diff --git a/python/cudf/cudf/tests/indexes/datetime/__init__.py b/python/cudf/cudf/tests/dateoffset/__init__.py
similarity index 100%
rename from python/cudf/cudf/tests/indexes/datetime/__init__.py
rename to python/cudf/cudf/tests/dateoffset/__init__.py
diff --git a/python/cudf/cudf/tests/dateoffset/test_constructors.py b/python/cudf/cudf/tests/dateoffset/test_constructors.py
new file mode 100644
index 00000000000..56338f44773
--- /dev/null
+++ b/python/cudf/cudf/tests/dateoffset/test_constructors.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+
+
+def test_dateoffset_instance_subclass_check():
+    assert not issubclass(pd.DateOffset, cudf.DateOffset)
+    assert not isinstance(pd.DateOffset(), cudf.DateOffset)
diff --git a/python/cudf/cudf/tests/general_functions/test_date_range.py b/python/cudf/cudf/tests/general_functions/test_date_range.py
new file mode 100644
index 00000000000..199ebca8eeb
--- /dev/null
+++ b/python/cudf/cudf/tests/general_functions/test_date_range.py
@@ -0,0 +1,238 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    expect_warning_if,
+)
+
+
+def test_date_range_freq_default():
+    result = pd.date_range("2020-01-01", periods=2, name="foo")
+    expected = cudf.date_range("2020-01-01", periods=2, name="foo")
+    assert_eq(result, expected)
+
+
+def test_date_range_tz():
+    result = pd.date_range("2020-01-01", periods=2, tz="UTC")
+    expected = cudf.date_range("2020-01-01", periods=2, tz="UTC")
+    assert_eq(result, expected)
+
+    result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
+    expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
+    assert_eq(result, expected)
+
+
+def test_date_range_start_end_divisible_by_freq():
+    result = cudf.date_range("2011-01-01", "2011-01-02", freq="h")
+    expected = pd.date_range("2011-01-01", "2011-01-02", freq="h")
+    assert_eq(result, expected)
+
+
+@pytest.fixture(
+    params=[
+        "2000-02-13 08:41:06",
+        "1996-11-21 04:05:30",
+        "1970-01-01 00:00:00",
+        "1831-05-08 15:23:21",
+    ],
+    ids=["leap_year", "non_leap_year", "unix_epoch_time_0", "random_date"],
+)
+def start(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        "2000-02-13 08:41:06",
+        "1996-11-21 04:05:30",
+        "1970-01-01 00:00:00",
+        "1831-05-08 15:23:21",
+    ],
+    ids=["leap_year", "non_leap_year", "unix_epoch_time_0", "random_date"],
+)
+def end(request):
+    return request.param
+
+
+@pytest.fixture(params=[1, 10])
+def periods(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        {"months": 3, "years": 1},
+        {"hours": 10, "days": 57, "nanoseconds": 3},
+        "83D",
+        "17h",
+        "-680min",
+        "110546s",
+        "110546789ms",
+        "110546789248us",
+    ]
+)
+def freq(request):
+    return request.param
+
+
+def test_date_range_start_end_periods(start, end, periods):
+    expect = pd.date_range(start=start, end=end, periods=periods, name="a")
+    got = cudf.date_range(start=start, end=end, periods=periods, name="a")
+
+    np.testing.assert_allclose(
+        expect.to_numpy().astype("int64"),
+        got.to_pandas().to_numpy().astype("int64"),
+    )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/46877",
+)
+def test_date_range_end_freq_periods(end, freq, periods):
+    if isinstance(freq, str):
+        _gfreq = _pfreq = freq
+    else:
+        _gfreq = cudf.DateOffset(**freq)
+        _pfreq = pd.DateOffset(**freq)
+
+    expect = pd.date_range(end=end, periods=periods, freq=_pfreq, name="a")
+    got = cudf.date_range(end=end, periods=periods, freq=_gfreq, name="a")
+
+    np.testing.assert_allclose(
+        expect.to_numpy().astype("int64"),
+        got.to_pandas().to_numpy().astype("int64"),
+    )
+
+
+def test_date_range_freq_does_not_divide_range():
+    expect = pd.date_range(
+        "2001-01-01 00:00:00.000000", "2001-01-01 00:00:00.000010", freq="3us"
+    )
+    got = cudf.date_range(
+        "2001-01-01 00:00:00.000000", "2001-01-01 00:00:00.000010", freq="3us"
+    )
+    np.testing.assert_allclose(
+        expect.to_numpy().astype("int64"),
+        got.to_pandas().to_numpy().astype("int64"),
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"nanoseconds": 1},
+        {"months": 1},
+    ],
+)
+def test_date_range_raise_overflow(kwargs):
+    start = np.datetime64(np.iinfo("int64").max, "ns")
+    periods = 2
+    freq = cudf.DateOffset(**kwargs)
+    with pytest.raises(pd.errors.OutOfBoundsDatetime):
+        cudf.date_range(start=start, periods=periods, freq=freq)
+
+
+@pytest.mark.parametrize(
+    "freqstr_unsupported",
+    [
+        "1ME",
+        "2SME",
+        "3MS",
+        "4BME",
+        "5CBME",
+        "6SMS",
+        "7BMS",
+        "8CBMS",
+        "QE",
+        "2BQE",
+        "3BQS",
+        "10YE",
+        "9BYE",
+        "8YS",
+        "7BYS",
+        "bh",
+        "B",
+    ],
+)
+def test_date_range_raise_unsupported(freqstr_unsupported):
+    if not PANDAS_GE_220 and freqstr_unsupported.endswith("E"):
+        pytest.skip(reason="YE, etc. support was added in pandas 2.2")
+
+    s, e = "2001-01-01", "2008-01-31"
+    pd.date_range(start=s, end=e, freq=freqstr_unsupported)
+    with pytest.raises(ValueError, match="does not yet support"):
+        cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+
+    # We also check that these values are unsupported when using lowercase
+    # characters. We exclude the value 3MS (every 3 month starts) because 3ms
+    # is a valid frequency for every 3 milliseconds.
+    if freqstr_unsupported != "3MS":
+        freqstr_unsupported = freqstr_unsupported.lower()
+        with pytest.raises(ValueError, match="does not yet support"):
+            with expect_warning_if(
+                PANDAS_GE_220 and freqstr_unsupported not in {"b", "bh"}
+            ):
+                cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_date_range_start_freq_periods(start, freq, periods):
+    if isinstance(freq, str):
+        _gfreq = _pfreq = freq
+    else:
+        _gfreq = cudf.DateOffset(**freq)
+        _pfreq = pd.DateOffset(**freq)
+
+    expect = pd.date_range(start=start, periods=periods, freq=_pfreq, name="a")
+    got = cudf.date_range(start=start, periods=periods, freq=_gfreq, name="a")
+
+    np.testing.assert_allclose(
+        expect.to_numpy().astype("int64"),
+        got.to_pandas().to_numpy().astype("int64"),
+    )
+
+
+def test_daterange_pandas_compatibility():
+    with cudf.option_context("mode.pandas_compatible", True):
+        expected = pd.date_range(
+            "2010-01-01", "2010-02-01", periods=10, name="times"
+        )
+        actual = cudf.date_range(
+            "2010-01-01", "2010-02-01", periods=10, name="times"
+        )
+    assert_eq(expected, actual)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_date_range_start_end_freq(start, end, freq):
+    if isinstance(freq, str):
+        _gfreq = _pfreq = freq
+    else:
+        _gfreq = cudf.DateOffset(**freq)
+        _pfreq = pd.DateOffset(**freq)
+
+    expect = pd.date_range(start=start, end=end, freq=_pfreq, name="a")
+    got = cudf.date_range(start=start, end=end, freq=_gfreq, name="a")
+
+    np.testing.assert_allclose(
+        expect.to_numpy().astype("int64"),
+        got.to_pandas().to_numpy().astype("int64"),
+    )
diff --git a/python/cudf/cudf/tests/general_functions/test_to_datetime.py b/python/cudf/cudf/tests/general_functions/test_to_datetime.py
new file mode 100644
index 00000000000..e8ebff29e54
--- /dev/null
+++ b/python/cudf/cudf/tests/general_functions/test_to_datetime.py
@@ -0,0 +1,312 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+    expect_warning_if,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        None,
+        [],
+        pd.Series([], dtype="float64"),
+        pd.Index([]),
+        pd.Series([1, 2, 3]),
+        pd.Series([0, 1, -1]),
+        pd.Series([0, 1, -1, 100.3, 200, 47637289]),
+        pd.Series(["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"]),
+        [1, 2, 3, 100, -123, -1, 0, 1000000000000679367],
+        pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}),
+        pd.DataFrame(
+            {"year": ["2015", "2016"], "month": ["2", "3"], "day": [4, 5]}
+        ),
+        pd.DataFrame(
+            {
+                "year": [2015, 2016],
+                "month": [2, 3],
+                "day": [4, 5],
+                "minute": [1, 100],
+                "second": [90, 10],
+                "hour": [1, 0.5],
+            },
+            index=["a", "b"],
+        ),
+        pd.DataFrame(
+            {
+                "year": [],
+                "month": [],
+                "day": [],
+                "minute": [],
+                "second": [],
+                "hour": [],
+            },
+        ),
+        ["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"],
+        pd.Index([1, 2, 3, 4]),
+        pd.DatetimeIndex(
+            ["1970-01-01 00:00:00.000000001", "1970-01-01 00:00:00.000000002"],
+            dtype="datetime64[ns]",
+            freq=None,
+        ),
+        pd.DatetimeIndex(
+            [],
+            dtype="datetime64[ns]",
+            freq=None,
+        ),
+        pd.Series([1, 2, 3]).astype("datetime64[ns]"),
+        pd.Series([1, 2, 3]).astype("datetime64[us]"),
+        pd.Series([1, 2, 3]).astype("datetime64[ms]"),
+        pd.Series([1, 2, 3]).astype("datetime64[s]"),
+        pd.Series([1, 2, 3]).astype("datetime64[D]"),
+        1,
+        100,
+        17,
+        53.638435454,
+        np.array([1, 10, 15, 478925, 2327623467]),
+        np.array([0.3474673, -10, 15, 478925.34345, 2327623467]),
+    ],
+)
+@pytest.mark.parametrize("dayfirst", [True, False])
+def test_cudf_to_datetime(data, dayfirst):
+    pd_data = data
+    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
+        gd_data = cudf.from_pandas(pd_data)
+    else:
+        if type(pd_data).__module__ == np.__name__:
+            gd_data = cp.array(pd_data)
+        else:
+            gd_data = pd_data
+
+    expected = pd.to_datetime(pd_data, dayfirst=dayfirst)
+    actual = cudf.to_datetime(gd_data, dayfirst=dayfirst)
+
+    if isinstance(expected, pd.Series):
+        assert_eq(actual, expected, check_dtype=False)
+    else:
+        assert_eq(actual, expected, check_exact=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        "2",
+        ["1", "2", "3"],
+        ["1/1/1", "2/2/2", "1"],
+        pd.Series([1, 2, 3], dtype="timedelta64[ns]"),
+        pd.DataFrame(
+            {
+                "year": [2015, 2016],
+                "month": [2, 3],
+                "day": [4, 5],
+                "minute": [1, 100],
+                "second": [90, 10],
+                "hour": [1, 0],
+                "blablacol": [1, 1],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "month": [2, 3],
+                "day": [4, 5],
+                "minute": [1, 100],
+                "second": [90, 10],
+                "hour": [1, 0],
+            }
+        ),
+    ],
+)
+@pytest.mark.filterwarnings("ignore:Could not infer format:UserWarning")
+def test_to_datetime_errors(data):
+    pd_data = data
+    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
+        gd_data = cudf.from_pandas(pd_data)
+    else:
+        gd_data = pd_data
+
+    assert_exceptions_equal(
+        pd.to_datetime,
+        cudf.to_datetime,
+        ([pd_data],),
+        ([gd_data],),
+    )
+
+
+def test_to_datetime_not_implemented():
+    with pytest.raises(NotImplementedError):
+        cudf.to_datetime([], exact=False)
+
+    with pytest.raises(NotImplementedError):
+        cudf.to_datetime([], origin="julian")
+
+    with pytest.raises(NotImplementedError):
+        cudf.to_datetime([], yearfirst=True)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        1,
+        [],
+        pd.Series([], dtype="float64"),
+        pd.Index([]),
+        pd.Series([1, 2, 3]),
+        pd.Series([1, 2.4, 3]),
+        pd.Series([0, 1, -1]),
+        pd.Series([0, 1, -1, 100, 200, 47637]),
+        [10, 12, 1200, 15003],
+        pd.DatetimeIndex(
+            [],
+            dtype="datetime64[ns]",
+            freq=None,
+        ),
+        pd.Index([1, 2, 3, 4]),
+    ],
+)
+@pytest.mark.parametrize("unit", ["D", "s", "ms", "us", "ns"])
+def test_to_datetime_units(data, unit):
+    pd_data = data
+    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
+        gd_data = cudf.from_pandas(pd_data)
+    else:
+        gd_data = pd_data
+
+    expected = pd.to_datetime(pd_data, unit=unit)
+    actual = cudf.to_datetime(gd_data, unit=unit)
+
+    if isinstance(expected, pd.Series):
+        assert_eq(actual, expected, check_dtype=False)
+    else:
+        assert_eq(actual, expected, exact=False, check_exact=False)
+
+
+@pytest.mark.parametrize(
+    "data,format",
+    [
+        ("2012-10-11", None),
+        ("2012-10-11", "%Y-%m-%d"),
+        ("2012-10-11", "%Y-%d-%m"),
+        (["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"], None),
+        (["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"], "%Y-%m-%d"),
+        (["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"], "%Y-%d-%m"),
+        (["10-11-2012", "01-01-2010", "07-07-2016", "02-02-2014"], "%m-%d-%Y"),
+        (["10-11-2012", "01-01-2010", "07-07-2016", "02-02-2014"], "%d-%m-%Y"),
+        (["10-11-2012", "01-01-2010", "07-07-2016", "02-02-2014"], None),
+        (["2012/10/11", "2010/01/01", "2016/07/07", "2014/02/02"], None),
+        (["2012/10/11", "2010/01/01", "2016/07/07", "2014/02/02"], "%Y/%m/%d"),
+        (["2012/10/11", "2010/01/01", "2016/07/07", "2014/02/02"], "%Y/%d/%m"),
+        (["10/11/2012", "01/01/2010", "07/07/2016", "02/02/2014"], "%m/%d/%Y"),
+        (["10/11/2012", "01/01/2010", "07/07/2016", "02/02/2014"], "%d/%m/%Y"),
+        (["10/11/2012", "01/01/2010", "07/07/2016", "02/02/2014"], None),
+        (["2021-04-13 12:30:04.123456789"], "%Y-%m-%d %H:%M:%S.%f"),
+        (pd.Series([2015, 2020, 2021]), "%Y"),
+        pytest.param(
+            pd.Series(["1", "2", "1"]),
+            "%m",
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/6109"
+                "https://github.com/pandas-dev/pandas/issues/35934"
+            ),
+        ),
+        pytest.param(
+            pd.Series(["14", "20", "10"]),
+            "%d",
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/6109"
+                "https://github.com/pandas-dev/pandas/issues/35934"
+            ),
+        ),
+        (pd.Series([2015, 2020.0, 2021.2]), "%Y"),
+    ],
+)
+@pytest.mark.parametrize("infer_datetime_format", [True, False])
+def test_to_datetime_format(data, format, infer_datetime_format):
+    pd_data = data
+    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
+        gd_data = cudf.from_pandas(pd_data)
+    else:
+        gd_data = pd_data
+
+    with expect_warning_if(True, UserWarning):
+        expected = pd.to_datetime(
+            pd_data, format=format, infer_datetime_format=infer_datetime_format
+        )
+    with expect_warning_if(not infer_datetime_format):
+        actual = cudf.to_datetime(
+            gd_data, format=format, infer_datetime_format=infer_datetime_format
+        )
+
+    if isinstance(expected, pd.Series):
+        assert_eq(actual, expected, check_dtype=False)
+    else:
+        assert_eq(actual, expected, check_exact=False)
+
+
+def test_to_datetime_data_out_of_range_for_format():
+    with pytest.raises(ValueError):
+        cudf.to_datetime("2015-02-99", format="%Y-%m-%d")
+
+
+def test_to_datetime_different_formats_notimplemented():
+    with pytest.raises(NotImplementedError):
+        cudf.to_datetime(["2015-02-01", "2015-02-01 10:10:10"])
+
+
+def test_datetime_to_datetime_error():
+    assert_exceptions_equal(
+        lfunc=pd.to_datetime,
+        rfunc=cudf.to_datetime,
+        lfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"],),
+        rfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"],),
+        check_exception_type=False,
+    )
+
+
+@pytest.mark.parametrize("code", ["z", "Z"])
+def test_format_timezone_not_implemented(code):
+    with pytest.raises(NotImplementedError):
+        cudf.to_datetime(
+            ["2020-01-01 00:00:00 UTC"], format=f"%Y-%m-%d %H:%M:%S %{code}"
+        )
+
+
+@pytest.mark.parametrize("tz", ["UTC-3", "+01:00"])
+def test_utc_offset_not_implemented(tz):
+    with pytest.raises((NotImplementedError, ValueError)):
+        cudf.to_datetime([f"2020-01-01 00:00:00{tz}"])
+
+
+def test_Z_utc_offset():
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.to_datetime(["2020-01-01 00:00:00Z"])
+
+    result = cudf.to_datetime(["2020-01-01 00:00:00Z"])
+    expected = cudf.to_datetime(["2020-01-01 00:00:00"])
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("arg", [True, False])
+def test_args_not_datetime_typerror(arg):
+    with pytest.raises(TypeError):
+        cudf.to_datetime([arg])
+
+
+@pytest.mark.parametrize("errors", ["coerce", "ignore"])
+def test_to_datetime_errors_non_scalar_not_implemented(errors):
+    with pytest.raises(NotImplementedError):
+        cudf.to_datetime([1, ""], unit="s", errors=errors)
+
+
+def test_to_datetime_errors_ignore_deprecated():
+    with pytest.warns(FutureWarning):
+        cudf.to_datetime("2001-01-01 00:04:45", errors="ignore")
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_components.py b/python/cudf/cudf/tests/indexes/datetime/test_components.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/datetime/test_components.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_constructing.py b/python/cudf/cudf/tests/indexes/datetime/test_constructing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/datetime/test_constructing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_conversion.py b/python/cudf/cudf/tests/indexes/datetime/test_conversion.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/datetime/test_conversion.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/__init__.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_isocalendar.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_isocalendar.py
new file mode 100644
index 00000000000..e166a66f81c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_isocalendar.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [None, None],
+        [
+            "2020-05-31 08:00:00",
+            "1999-12-31 18:40:00",
+            "2000-12-31 04:00:00",
+        ],
+        ["2100-03-14 07:30:00"],
+    ],
+)
+def test_isocalendar_index(data):
+    ps = pd.DatetimeIndex(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.isocalendar()
+    got = gs.isocalendar()
+
+    assert_eq(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_strftime.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_strftime.py
new file mode 100644
index 00000000000..2c621f460a6
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_strftime.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "method, kwargs",
+    [
+        ["to_pydatetime", {}],
+        ["to_period", {"freq": "D"}],
+        ["strftime", {"date_format": "%Y-%m-%d"}],
+    ],
+)
+def test_dti_methods(method, kwargs):
+    pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
+    cudf_dti = cudf.from_pandas(pd_dti)
+
+    result = getattr(cudf_dti, method)(**kwargs)
+    expected = getattr(pd_dti, method)(**kwargs)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py
new file mode 100644
index 00000000000..1c026224da3
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import zoneinfo
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_tz_convert():
+    tz = zoneinfo.ZoneInfo("America/New_York")
+    pidx = pd.date_range("2023-01-01", periods=3, freq="h")
+    idx = cudf.from_pandas(pidx)
+    pidx = pidx.tz_localize("UTC")
+    idx = idx.tz_localize("UTC")
+    assert_eq(pidx.tz_convert(tz), idx.tz_convert(tz))
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_localize.py
similarity index 62%
rename from python/cudf/cudf/tests/indexes/datetime/test_time_specific.py
rename to python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_localize.py
index 7cc629270b1..7c07b11fa97 100644
--- a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_localize.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 import zoneinfo
 
 import pandas as pd
@@ -16,15 +16,6 @@ def test_tz_localize():
     assert_eq(pidx.tz_localize(tz), idx.tz_localize(tz))
 
 
-def test_tz_convert():
-    tz = zoneinfo.ZoneInfo("America/New_York")
-    pidx = pd.date_range("2023-01-01", periods=3, freq="h")
-    idx = cudf.from_pandas(pidx)
-    pidx = pidx.tz_localize("UTC")
-    idx = idx.tz_localize("UTC")
-    assert_eq(pidx.tz_convert(tz), idx.tz_convert(tz))
-
-
 def test_delocalize_naive():
     pidx = pd.date_range("2023-01-01", periods=3, freq="h")
     idx = cudf.from_pandas(pidx)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
index 5fb8c2fb647..8d230451886 100644
--- a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
@@ -1,9 +1,111 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
+import pandas._testing as tm
 import pytest
 
-from cudf.core.index import DatetimeIndex
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "field",
+    [
+        "year",
+        "month",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "microsecond",
+        "nanosecond",
+        "weekday",
+        "dayofweek",
+        "dayofyear",
+        "day_of_year",
+    ],
+)
+def test_dt_index(field):
+    data = pd.DatetimeIndex(
+        [pd.Timestamp(2020, 1, 2, 3, 4, 5, 6, nanosecond=7)]
+    )
+    gdf_data = cudf.DatetimeIndex(data)
+    assert_eq(getattr(gdf_data, field), getattr(data, field), exact=False)
+
+
+@pytest.mark.parametrize(
+    "attr",
+    [
+        "is_month_start",
+        "is_month_end",
+        "is_quarter_end",
+        "is_quarter_start",
+        "is_year_end",
+        "is_year_start",
+        "days_in_month",
+        "timetz",
+        "time",
+        "date",
+    ],
+)
+def test_dti_datetime_attributes(attr):
+    data = [
+        "2020-01-01",
+        "2020-01-31",
+        "2020-03-01",
+        "2020-03-31",
+        "2020-03-31",
+        "2020-12-31",
+        None,
+    ]
+    pd_dti = pd.DatetimeIndex(data, name="foo")
+    cudf_dti = cudf.from_pandas(pd_dti)
+
+    result = getattr(cudf_dti, attr)
+    expected = getattr(pd_dti, attr)
+    if isinstance(result, np.ndarray):
+        # numpy doesn't assert object arrays with NaT correctly
+        tm.assert_numpy_array_equal(result, expected)
+    else:
+        assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("attr", ["freq", "unit"])
+def test_dti_properties(attr):
+    pd_dti = pd.DatetimeIndex(
+        ["2020-01-01", "2020-01-02"], dtype="datetime64[ns]"
+    )
+    cudf_dti = cudf.DatetimeIndex(
+        ["2020-01-01", "2020-01-02"], dtype="datetime64[ns]"
+    )
+
+    result = getattr(cudf_dti, attr)
+    expected = getattr(pd_dti, attr)
+    assert result == expected
+
+
+def test_writable_numpy_array():
+    gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]")
+    expected_flags = pd.Index(
+        [1, 2, 3], dtype="datetime64[ns]"
+    )._data._ndarray.flags
+
+    actual_flags = gi.to_pandas()._data._ndarray.flags
+    assert expected_flags.c_contiguous == actual_flags.c_contiguous
+    assert expected_flags.f_contiguous == actual_flags.f_contiguous
+    assert expected_flags.writeable == actual_flags.writeable
+    assert expected_flags.aligned == actual_flags.aligned
+    assert expected_flags.writebackifcopy == actual_flags.writebackifcopy
+
+
+def test_dti_asi8():
+    pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
+    cudf_dti = cudf.from_pandas(pd_dti)
+
+    result = pd_dti.asi8
+    expected = cudf_dti.asi8
+    assert_eq(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -37,7 +139,7 @@
     ],
 )
 def test_datetime_index_is_unique_monotonic(testlist):
-    index = DatetimeIndex(testlist)
+    index = cudf.DatetimeIndex(testlist)
     index_pd = pd.DatetimeIndex(testlist)
 
     assert index.is_unique == index_pd.is_unique
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py
new file mode 100644
index 00000000000..bae161521a4
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core.index import DatetimeIndex
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.mark.parametrize(
+    "data,dtype,freq",
+    [
+        ([10], "datetime64[ns]", "2ns"),
+        ([10, 12, 14, 16], "datetime64[ns]", "2ns"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1ns"),
+        ([100, 200, 300, 400], "datetime64[s]", "100s"),
+        ([101, 201, 301, 401], "datetime64[ms]", "100ms"),
+    ],
+)
+def test_datetime_index_with_freq(data, dtype, freq):
+    actual = cudf.DatetimeIndex(data, dtype=dtype, freq=freq)
+    expected = pd.DatetimeIndex(data, dtype=dtype, freq=freq)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data,dtype,freq",
+    [
+        ([10, 1232, 13244, 13426], "datetime64[ns]", "2ns"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1s"),
+        ([10000, 200, 300, 400], "datetime64[s]", "100s"),
+        ([107871, 201, 301, 401], "datetime64[ms]", "100ns"),
+    ],
+)
+def test_datetime_index_freq_error(data, dtype, freq):
+    assert_exceptions_equal(
+        pd.DatetimeIndex,
+        cudf.DatetimeIndex,
+        ([data], {"dtype": dtype, "freq": freq}),
+        ([data], {"dtype": dtype, "freq": freq}),
+    )
+
+
+def test_strings_with_utc_offset_not_implemented():
+    with pytest.raises(NotImplementedError):
+        DatetimeIndex(["2022-07-22 00:00:00+02:00"])
+
+
+def test_dateimeindex_from_noniso_string():
+    data = ["20160920", "20160925"]
+    gdti = cudf.DatetimeIndex(data)
+    pdti = pd.DatetimeIndex(data)
+
+    assert_eq(gdti, pdti)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            "2020-01-31",
+            "2020-02-15",
+            "2020-02-29",
+            "2020-03-15",
+            "2020-03-31",
+            "2020-04-15",
+            "2020-04-30",
+        ],
+        [43534, 43543, 37897, 2000],
+    ],
+)
+@pytest.mark.parametrize("dtype", [None, "datetime64[ns]"])
+def test_datetime_constructor(data, dtype):
+    expected = pd.DatetimeIndex(data=data, dtype=dtype)
+    actual = cudf.DatetimeIndex(data=data, dtype=dtype)
+
+    assert_eq(expected, actual)
+
+    expected = pd.DatetimeIndex(data=pd.Series(data), dtype=dtype)
+    actual = cudf.DatetimeIndex(data=cudf.Series(data), dtype=dtype)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_indexing.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_getitem.py
similarity index 100%
rename from python/cudf/cudf/tests/indexes/datetime/test_indexing.py
rename to python/cudf/cudf/tests/indexes/datetimeindex/test_getitem.py
diff --git a/python/cudf/cudf/tests/series/accessors/test_dt.py b/python/cudf/cudf/tests/series/accessors/test_dt.py
index 40b604bc043..401f89aed65 100644
--- a/python/cudf/cudf/tests/series/accessors/test_dt.py
+++ b/python/cudf/cudf/tests/series/accessors/test_dt.py
@@ -2,9 +2,14 @@
 
 import cupy as cp
 import numpy as np
+import pandas as pd
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
 
 
@@ -93,3 +98,476 @@ def test_timedelta_series_total_seconds(
     expected = psr.dt.total_seconds()
     actual = gsr.dt.total_seconds()
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("meth", ["day_name", "month_name"])
+@pytest.mark.parametrize("klass", [cudf.Series, cudf.DatetimeIndex])
+def test_day_month_name_locale_not_implemented(meth, klass):
+    obj = klass(cudf.date_range("2020-01-01", periods=7))
+    if klass is cudf.Series:
+        obj = obj.dt
+    with pytest.raises(NotImplementedError):
+        getattr(obj, meth)(locale="pt_BR.utf8")
+
+
+@pytest.mark.parametrize("meth", ["day_name", "month_name"])
+@pytest.mark.parametrize("klass", [pd.Series, pd.DatetimeIndex])
+def test_day_month_name(meth, klass):
+    data = [
+        "2020-05-31 08:00:00",
+        None,
+        "1999-12-31 18:40:00",
+        "2000-12-31 04:00:00",
+        None,
+        "1900-02-28 07:00:00",
+        "1800-03-14 07:30:00",
+        "2100-03-14 07:30:00",
+        "1970-01-01 00:00:00",
+        "1969-12-31 12:59:00",
+    ]
+
+    p_obj = klass(data, dtype="datetime64[s]")
+    g_obj = cudf.from_pandas(p_obj)
+
+    if klass is pd.Series:
+        p_obj = p_obj.dt
+        g_obj = g_obj.dt
+
+    expect = getattr(p_obj, meth)()
+    got = getattr(g_obj, meth)()
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "freqstr",
+    [
+        "H",
+        "N",
+        "T",
+        "L",
+        "U",
+        "S",
+    ],
+)
+def test_datetime_ceil_raise_warning(freqstr):
+    t = cudf.Series(
+        ["2001-01-01 00:04:45", "2001-01-01 00:04:58", "2001-01-01 00:05:04"],
+        dtype="datetime64[ns]",
+    )
+    with pytest.warns(FutureWarning):
+        t.dt.ceil(freqstr)
+
+
+@pytest.mark.parametrize(
+    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
+)
+def test_round(datetime_types_as_str, resolution):
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:10",
+        "2000-12-31 04:00:05",
+        "1900-02-28 07:00:06",
+        "1800-03-14 07:30:20",
+        "2100-03-14 07:30:20",
+        "1970-01-01 00:00:09",
+        "1969-12-31 12:59:10",
+    ]
+    gs = cudf.Series(data, dtype=datetime_types_as_str)
+    ps = gs.to_pandas()
+
+    expect = ps.dt.round(resolution)
+    got = gs.dt.round(resolution)
+    assert_eq(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/52761",
+)
+@pytest.mark.parametrize(
+    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
+)
+def test_floor(datetime_types_as_str, resolution):
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:10",
+        "2000-12-31 04:00:05",
+        "1900-02-28 07:00:06",
+        "1800-03-14 07:30:20",
+        "2100-03-14 07:30:20",
+        "1970-01-01 00:00:09",
+        "1969-12-31 12:59:10",
+    ]
+    gs = cudf.Series(data, dtype=datetime_types_as_str)
+    ps = gs.to_pandas()
+
+    expect = ps.dt.floor(resolution)
+    got = gs.dt.floor(resolution)
+    assert_eq(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/52761",
+)
+@pytest.mark.parametrize(
+    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
+)
+def test_ceil(datetime_types_as_str, resolution):
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:10",
+        "2000-12-31 04:00:05",
+        "1900-02-28 07:00:06",
+        "1800-03-14 07:30:20",
+        "2100-03-14 07:30:20",
+        "1970-01-01 00:00:09",
+        "1969-12-31 12:59:10",
+    ]
+    gs = cudf.Series(data, dtype=datetime_types_as_str)
+    ps = gs.to_pandas()
+
+    expect = ps.dt.ceil(resolution)
+    got = gs.dt.ceil(resolution)
+    assert_eq(expect, got)
+
+
+def test_days_in_months():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    assert_eq(ps.dt.days_in_month, gs.dt.days_in_month)
+
+
+def test_is_month_start():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_month_start
+    got = gs.dt.is_month_start
+
+    assert_eq(expect, got)
+
+
+def test_is_month_end():
+    data = [
+        "2020-05-31",
+        "2020-02-29",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_month_end
+    got = gs.dt.is_month_end
+
+    assert_eq(expect, got)
+
+
+def test_is_year_start():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-01-01",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-01-01",
+        "1969-12-11",
+        "2017-12-30",
+        "2017-12-31",
+        "2018-01-01",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_year_start
+    got = gs.dt.is_year_start
+
+    assert_eq(expect, got)
+
+
+def test_is_year_end():
+    data = [
+        "2020-05-31",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-12-31",
+        "1800-03-14",
+        "2017-12-30",
+        "2017-12-31",
+        "2020-12-31 08:00:00",
+        None,
+        "1999-12-31 18:40:00",
+        "2000-12-31 04:00:00",
+        None,
+        "1800-12-14 07:30:00",
+        "2100-12-14 07:30:00",
+        "2020-05-31",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_year_end
+    got = gs.dt.is_year_end
+
+    assert_eq(expect, got)
+
+
+def test_is_quarter_start():
+    data = [
+        "2020-05-01",
+        "2020-05-31",
+        "2020-02-29",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-04-1",
+        "1970-01-01",
+        "1969-12-11",
+        "2020-12-31",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_quarter_start
+    got = gs.dt.is_quarter_start
+
+    assert_eq(expect, got)
+
+
+def test_is_quarter_end():
+    data = [
+        "2020-05-01",
+        "2020-05-31",
+        "2020-02-29",
+        None,
+        "1999-12-01",
+        "2000-12-21",
+        None,
+        "1900-02-28",
+        "1800-03-14",
+        "2100-03-10",
+        "1970-04-1",
+        "1970-01-01",
+        "1969-12-11",
+        "2020-12-31",
+    ]
+    ps = pd.Series(data, dtype="datetime64[ns]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_quarter_end
+    got = gs.dt.is_quarter_end
+
+    assert_eq(expect, got)
+
+
+def test_is_leap_year():
+    data = [
+        "2020-05-31 08:00:00",
+        None,
+        "1999-12-31 18:40:00",
+        "2000-12-31 04:00:00",
+        None,
+        "1900-02-28 07:00:00",
+        "1800-03-14 07:30:00",
+        "2100-03-14 07:30:00",
+        "1970-01-01 00:00:00",
+        "1969-12-31 12:59:00",
+    ]
+
+    # Series
+    ps = pd.Series(data, dtype="datetime64[s]")
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.is_leap_year
+    got = gs.dt.is_leap_year
+
+    assert_eq(expect, got)
+
+    # DatetimeIndex
+    pIndex = pd.DatetimeIndex(data)
+    gIndex = cudf.from_pandas(pIndex)
+
+    expect2 = pIndex.is_leap_year
+    got2 = gIndex.is_leap_year
+
+    assert_eq(expect2, got2)
+
+
+def test_quarter():
+    data = [
+        "2020-05-31 08:00:00",
+        "1999-12-31 18:40:00",
+        "2000-12-31 04:00:00",
+        "1900-02-28 07:00:00",
+        "1800-03-14 07:30:00",
+        "2100-03-14 07:30:00",
+        "1970-01-01 00:00:00",
+        "1969-12-31 12:59:00",
+    ]
+    dtype = "datetime64[s]"
+
+    # Series
+    ps = pd.Series(data, dtype=dtype)
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.quarter
+    got = gs.dt.quarter
+
+    assert_eq(expect, got, check_dtype=False)
+
+    # DatetimeIndex
+    pIndex = pd.DatetimeIndex(data)
+    gIndex = cudf.from_pandas(pIndex)
+
+    expect2 = pIndex.quarter
+    got2 = gIndex.quarter
+
+    assert_eq(expect2.values, got2.values)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series([], dtype="datetime64[ns]"),
+        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
+        pd.Series([None, None], dtype="datetime64[ns]"),
+        pd.Series("2020-05-31 08:00:00", dtype="datetime64[s]"),
+        pd.Series(
+            pd.date_range(start="2021-07-25", end="2021-07-30"),
+            index=["a", "b", "c", "d", "e", "f"],
+        ),
+    ],
+)
+def test_isocalendar_series(data):
+    ps = data.copy()
+    gs = cudf.from_pandas(ps)
+
+    expect = ps.dt.isocalendar()
+    got = gs.dt.isocalendar()
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3, None], [], [100121, 1221312, 321312321, 1232131223]]
+)
+@pytest.mark.parametrize(
+    "date_format",
+    [
+        "%d - %m",
+        "%y/%H",
+        "%Y",
+        "%I - %M / %S",
+        "%f",
+        "%j",
+        "%p",
+        "%w",
+        "%U",
+        "%W",
+        "%G",
+        "%u",
+        "%V",
+        "%b",
+        "%B",
+        "%a",
+        "%A",
+        "%U_",
+        "_%b",
+        "%B*",
+        "%a ",
+        "%A1",
+    ],
+)
+def test_datetime_strftime(data, datetime_types_as_str, date_format):
+    gsr = cudf.Series(data, dtype=datetime_types_as_str)
+    psr = gsr.to_pandas()
+
+    expected = psr.dt.strftime(date_format=date_format)
+    actual = gsr.dt.strftime(date_format=date_format)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("date_format", ["%c", "%x", "%X"])
+def test_datetime_strftime_not_implemented_formats(date_format):
+    gsr = cudf.Series([1, 2, 3], dtype="datetime64[ms]")
+
+    with pytest.raises(NotImplementedError):
+        gsr.dt.strftime(date_format=date_format)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.date_range("20010101", "20020215", freq="400h", name="times"),
+        pd.date_range(
+            "20010101", freq="243434324423423234ns", name="times", periods=10
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "field",
+    [
+        "year",
+        "month",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "microsecond",
+        "nanosecond",
+        "weekday",
+        "dayofweek",
+        "dayofyear",
+        "day_of_year",
+    ],
+)
+def test_dt_series_datetime_fields(data, field):
+    pd_data = pd.Series(data)
+    gdf_data = cudf.Series(pd_data)
+    base = getattr(pd_data.dt, field)
+    test = getattr(gdf_data.dt, field)
+    assert_eq(base, test, check_dtype=False)
diff --git a/python/cudf/cudf/tests/series/indexing/test_getitem.py b/python/cudf/cudf/tests/series/indexing/test_getitem.py
index 568d6761cdd..3ed2ef57d9d 100644
--- a/python/cudf/cudf/tests/series/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_getitem.py
@@ -145,6 +145,11 @@ def test_struct_getitem(series, expected):
     assert sr[0] == expected
 
 
+def test_datetime_getitem_na():
+    s = cudf.Series([1, 2, None, 3], dtype="datetime64[ns]")
+    assert s[2] is cudf.NaT
+
+
 def test_timedelta_getitem_na():
     s = cudf.Series([1, 2, None, 3], dtype="timedelta64[ns]")
     assert s[2] is cudf.NaT
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 8373b173815..30b4fcbdc4e 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -314,3 +314,186 @@ def test_timedelta_str_roundtrip(sr_data, sr_dtype, exp_data, exp_dtype):
     assert_eq(expected_series, actual_series)
 
     assert_eq(gsr, actual_series.astype(gsr.dtype))
+
+
+def test_typecast_from_datetime(numeric_types_as_str):
+    data = pd.date_range(
+        "2019-07-16 00:00:00",
+        "2019-07-16 00:00:01",
+        freq="5555us",
+        name="times",
+    )
+    pd_data = pd.Series(data)
+    np_data = np.array(pd_data)
+    gdf_data = cudf.Series(pd_data)
+
+    np_casted = np_data.astype(numeric_types_as_str)
+    gdf_casted = gdf_data.astype(numeric_types_as_str)
+
+    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
+
+
+def test_typecast_from_datetime_to_int64_to_datetime(datetime_types_as_str):
+    data = pd.date_range(
+        "2019-07-16 00:00:00",
+        "2019-07-16 00:00:01",
+        freq="5555us",
+        name="times",
+    )
+    pd_data = pd.Series(data)
+    np_data = np.array(pd_data)
+    gdf_data = cudf.Series(pd_data)
+
+    np_casted = np_data.astype(np.int64).astype(datetime_types_as_str)
+    gdf_casted = gdf_data.astype(np.int64).astype(datetime_types_as_str)
+
+    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
+
+
+def test_typecast_to_different_datetime_resolutions(datetime_types_as_str):
+    data = pd.date_range(
+        "2019-07-16 00:00:00",
+        "2019-07-16 00:00:01",
+        freq="5555us",
+        name="times",
+    )
+    pd_data = pd.Series(data)
+    np_data = np.array(pd_data).astype(datetime_types_as_str)
+    gdf_series = cudf.Series(pd_data).astype(datetime_types_as_str)
+    np.testing.assert_equal(np_data, gdf_series.to_numpy())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            "2019-07-16 00:00:00.333",
+            "2019-07-16 00:00:00.666",
+            "2019-07-16 00:00:00.888",
+        ],
+        [
+            "2019-07-16 00:00:00.333333",
+            "2019-07-16 00:00:00.666666",
+            "2019-07-16 00:00:00.888888",
+        ],
+        [
+            "2019-07-16 00:00:00.333333333",
+            "2019-07-16 00:00:00.666666666",
+            "2019-07-16 00:00:00.888888888",
+        ],
+    ],
+    ids=["ms_data", "us_data", "ns_data"],
+)
+def test_string_timstamp_typecast_to_different_datetime_resolutions(
+    data, datetime_types_as_str
+):
+    pd_sr = pd.Series(data)
+    gdf_sr = cudf.Series.from_pandas(pd_sr)
+
+    expect = pd_sr.values.astype(datetime_types_as_str)
+    got = gdf_sr.astype(datetime_types_as_str).values_host
+
+    np.testing.assert_equal(expect, got)
+
+
+def test_typecast_to_datetime(numeric_types_as_str, datetime_types_as_str):
+    data = np.arange(1, 10)
+    np_data = data.astype(numeric_types_as_str)
+    gdf_data = cudf.Series(np_data)
+
+    np_casted = np_data.astype(datetime_types_as_str)
+    gdf_casted = gdf_data.astype(datetime_types_as_str)
+
+    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
+
+
+def test_typecast_to_from_datetime(
+    numeric_types_as_str, datetime_types_as_str
+):
+    data = np.arange(1, 10)
+    np_data = data.astype(numeric_types_as_str)
+    gdf_data = cudf.Series(np_data)
+
+    np_casted = np_data.astype(datetime_types_as_str).astype(
+        numeric_types_as_str
+    )
+    gdf_casted = gdf_data.astype(datetime_types_as_str).astype(
+        numeric_types_as_str
+    )
+
+    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
+
+
+@pytest.fixture
+def datetime_types_as_str2(datetime_types_as_str):
+    return datetime_types_as_str
+
+
+def test_typecast_from_datetime_to_datetime(
+    datetime_types_as_str, datetime_types_as_str2
+):
+    data = np.arange(1, 10)
+    np_data = data.astype(datetime_types_as_str)
+    ser = cudf.Series(np_data)
+
+    np_casted = np_data.astype(datetime_types_as_str2)
+    ser_casted = ser.astype(datetime_types_as_str2)
+
+    np.testing.assert_equal(np_casted, ser_casted.to_numpy())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["2001-01-01", "2002-02-02", "2000-01-05", "NaT"],
+        ["2001-01-01", "2002-02-02", "2000-01-05", None],
+        [None, None, None, None, None],
+    ],
+)
+def test_str_null_to_datetime(data, datetime_types_as_str):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    assert_eq(
+        psr.astype(datetime_types_as_str), gsr.astype(datetime_types_as_str)
+    )
+
+
+def test_str_to_datetime_error():
+    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
+    gsr = cudf.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
+
+    assert_exceptions_equal(
+        lfunc=psr.astype,
+        rfunc=gsr.astype,
+        lfunc_args_and_kwargs=(["datetime64[s]"],),
+        rfunc_args_and_kwargs=(["datetime64[s]"],),
+        check_exception_type=False,
+    )
+
+
+@pytest.mark.parametrize("timezone", ["", "Z"])
+@pytest.mark.parametrize(
+    "data",
+    [
+        "2002-10-27T04:30",
+        "2002-10-27T04:30:00",
+        "2002-10-27T04:30:00.000",
+        "2002-10-27T04:30:00.000000",
+        "2002-10-27T04:30:00.000000000",
+    ],
+)
+def test_datetime_infer_format(data, timezone, datetime_types_as_str):
+    ts_data = [data + timezone]
+    sr = cudf.Series(ts_data)
+    psr = pd.Series(ts_data)
+    if not timezone:
+        expected = psr.astype(datetime_types_as_str)
+        actual = sr.astype(datetime_types_as_str)
+
+        assert_eq(expected, actual)
+    else:
+        with cudf.option_context("mode.pandas_compatible", True):
+            with pytest.raises(NotImplementedError):
+                # pandas doesn't allow parsing "Z" to naive type
+                sr.astype(datetime_types_as_str)
diff --git a/python/cudf/cudf/tests/series/methods/test_fillna.py b/python/cudf/cudf/tests/series/methods/test_fillna.py
index 094b27c4fff..5dac20da52c 100644
--- a/python/cudf/cudf/tests/series/methods/test_fillna.py
+++ b/python/cudf/cudf/tests/series/methods/test_fillna.py
@@ -48,6 +48,45 @@ def test_fillna_categorical_with_different_categories_raises():
         ser.fillna(cudf.Series([1, 2]), dtype="category")
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [1, 2],
+        [None, 1],
+        [None, None],
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        np.datetime64("2005-02"),
+        np.datetime64("2005-02-25"),
+        np.datetime64("2005-02-25T03:30"),
+        np.datetime64("nat"),
+        "NaT",
+    ],
+)
+def test_datetime_fillna(data, datetime_types_as_str, fill_value):
+    sr = cudf.Series(data, dtype=datetime_types_as_str)
+    psr = sr.to_pandas()
+
+    expected = psr.dropna()
+    actual = sr.dropna()
+
+    assert_eq(expected, actual)
+
+    expected = psr.fillna(fill_value)
+    actual = sr.fillna(fill_value)
+
+    assert_eq(expected, actual)
+
+    expected = expected.dropna()
+    actual = actual.dropna()
+
+    assert_eq(expected, actual)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/series/methods/test_first_last.py b/python/cudf/cudf/tests/series/methods/test_first_last.py
new file mode 100644
index 00000000000..ffee8b06977
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_first_last.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.DatetimeIndex([]),
+        pd.DatetimeIndex(["2010-05-31"]),
+        pd.date_range("2000-01-01", "2000-12-31", periods=21),
+    ],
+)
+@pytest.mark.parametrize(
+    "offset",
+    [
+        "10Y",
+        "6M",
+        "M",
+        "31D",
+        "0H",
+        "44640T",
+        "44640min",
+        "2678000S",
+        "2678000000L",
+        "2678000000ms",
+        "2678000000000U",
+        "2678000000000us",
+        "2678000000000000N",
+        "2678000000000000ns",
+    ],
+)
+def test_first(idx, offset):
+    p = pd.Series(range(len(idx)), dtype="int64", index=idx)
+    g = cudf.from_pandas(p)
+
+    with pytest.warns(FutureWarning):
+        expect = p.first(offset=offset)
+    with pytest.warns(FutureWarning):
+        got = g.first(offset=offset)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_first_start_at_end_of_month():
+    idx = pd.DatetimeIndex(
+        [
+            "2020-01-31",
+            "2020-02-15",
+            "2020-02-29",
+            "2020-03-15",
+            "2020-03-31",
+            "2020-04-15",
+            "2020-04-30",
+        ]
+    )
+    offset = "3M"
+    p = pd.Series(range(len(idx)), index=idx)
+    g = cudf.from_pandas(p)
+
+    with pytest.warns(FutureWarning):
+        expect = p.first(offset=offset)
+    with pytest.warns(FutureWarning):
+        got = g.first(offset=offset)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.DatetimeIndex([]),
+        pd.DatetimeIndex(["2010-05-31"]),
+        pd.date_range("2000-01-01", "2000-12-31", periods=21),
+    ],
+)
+@pytest.mark.parametrize(
+    "offset",
+    [
+        "10Y",
+        "6M",
+        "M",
+        "31D",
+        "0H",
+        "44640T",
+        "44640min",
+        "2678000S",
+        "2678000000L",
+        "2678000000ms",
+        "2678000000000U",
+        "2678000000000us",
+        "2678000000000000N",
+        "2678000000000000ns",
+    ],
+)
+def test_last(idx, offset):
+    p = pd.Series(range(len(idx)), dtype="int64", index=idx)
+    g = cudf.from_pandas(p)
+
+    with pytest.warns(FutureWarning):
+        expect = p.last(offset=offset)
+    with pytest.warns(FutureWarning):
+        got = g.last(offset=offset)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_nunique.py b/python/cudf/cudf/tests/series/methods/test_nunique.py
index 19ff910e316..c645fa401f4 100644
--- a/python/cudf/cudf/tests/series/methods/test_nunique.py
+++ b/python/cudf/cudf/tests/series/methods/test_nunique.py
@@ -1,6 +1,9 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+
+import numpy as np
 import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -23,3 +26,27 @@ def test_series_nunique():
     expected = pd_s.nunique()
 
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series([], dtype="datetime64[ns]"),
+        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
+        pd.Series([None, None], dtype="datetime64[ns]"),
+    ],
+)
+@pytest.mark.parametrize("nulls", ["none", "some"])
+def test_datetime_nunique(data, nulls):
+    psr = data.copy()
+    rng = np.random.default_rng(seed=0)
+
+    if len(data) > 0:
+        if nulls == "some":
+            p = rng.integers(0, len(data), 2)
+            psr[p] = None
+
+    gsr = cudf.from_pandas(psr)
+    expected = psr.nunique()
+    got = gsr.nunique()
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_query.py b/python/cudf/cudf/tests/series/methods/test_query.py
new file mode 100644
index 00000000000..b6b69cf8ebb
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_query.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import datetime
+
+import numpy as np
+import pandas as pd
+
+from cudf import DataFrame
+from cudf.testing import assert_eq
+
+
+def test_issue_165():
+    df_pandas = pd.DataFrame()
+    start_date = datetime.datetime.strptime("2000-10-21", "%Y-%m-%d")
+    data = [(start_date + datetime.timedelta(days=x)) for x in range(6)]
+    df_pandas["dates"] = data
+    df_pandas["num"] = [1, 2, 3, 4, 5, 6]
+    df_cudf = DataFrame.from_pandas(df_pandas)
+
+    base = df_pandas.query("dates==@start_date")
+    test = df_cudf.query("dates==@start_date")
+    assert_eq(base, test)
+    assert len(test) > 0
+
+    mask = df_cudf.dates == start_date
+    base_mask = df_pandas.dates == start_date
+    assert_eq(mask, base_mask, check_names=False)
+    assert mask.to_pandas().sum() > 0
+
+    start_date_ts = pd.Timestamp(start_date)
+    test = df_cudf.query("dates==@start_date_ts")
+    base = df_pandas.query("dates==@start_date_ts")
+    assert_eq(base, test)
+    assert len(test) > 0
+
+    mask = df_cudf.dates == start_date_ts
+    base_mask = df_pandas.dates == start_date_ts
+    assert_eq(mask, base_mask, check_names=False)
+    assert mask.to_pandas().sum() > 0
+
+    start_date_np = np.datetime64(start_date_ts, "ns")
+    test = df_cudf.query("dates==@start_date_np")
+    base = df_pandas.query("dates==@start_date_np")
+    assert_eq(base, test)
+    assert len(test) > 0
+
+    mask = df_cudf.dates == start_date_np
+    base_mask = df_pandas.dates == start_date_np
+    assert_eq(mask, base_mask, check_names=False)
+    assert mask.to_pandas().sum() > 0
diff --git a/python/cudf/cudf/tests/series/methods/test_to_pandas.py b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
index bc78a8c7871..1768d6ccc0e 100644
--- a/python/cudf/cudf/tests/series/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
@@ -195,3 +195,21 @@ def test_writable_numpy_array_timedelta():
     assert expected_flags.writeable == actual_flags.writeable
     assert expected_flags.aligned == actual_flags.aligned
     assert expected_flags.writebackifcopy == actual_flags.writebackifcopy
+
+
+@pytest.mark.parametrize("nulls", ["some", "all"])
+def test_to_from_pandas_nulls(nulls):
+    data = np.arange(1, 10)
+    pd_data = pd.Series(data.astype("datetime64[ns]"))
+    if nulls == "some":
+        # Fill half the values with NaT
+        pd_data[list(range(0, len(pd_data), 2))] = np.datetime64("nat", "ns")
+    elif nulls == "all":
+        # Fill all the values with NaT
+        pd_data[:] = np.datetime64("nat", "ns")
+    gdf_data = cudf.Series.from_pandas(pd_data)
+
+    expect = pd_data
+    got = gdf_data.to_pandas()
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_unique.py b/python/cudf/cudf/tests/series/methods/test_unique.py
new file mode 100644
index 00000000000..2aa4b78b039
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_unique.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series([], dtype="datetime64[ns]"),
+        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
+        pd.Series([None, None], dtype="datetime64[ns]"),
+    ],
+)
+@pytest.mark.parametrize("nulls", ["none", "some"])
+def test_datetime_unique(data, nulls):
+    rng = np.random.default_rng(seed=0)
+    psr = data.copy()
+
+    if len(data) > 0:
+        if nulls == "some":
+            p = rng.integers(0, len(data), 2)
+            psr[p] = None
+
+    gsr = cudf.from_pandas(psr)
+    expected = psr.unique()
+    got = gsr.unique()
+
+    # Unique does not provide a guarantee on ordering.
+    assert_eq(
+        pd.Series(expected).sort_values(ignore_index=True),
+        got.sort_values(ignore_index=True).to_pandas(),
+    )
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index aedbf696009..0e99893f530 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -241,3 +242,54 @@ def test_timedelta_contains(data, timedelta_types_as_str, scalar):
     actual = scalar in psr
 
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        [["2018-01-01", None, "2019-01-31", None, "2018-01-01"], True],
+        [
+            [
+                "2018-01-01",
+                "2018-01-02",
+                "2019-01-31",
+                "2018-03-01",
+                "2018-01-01",
+            ],
+            False,
+        ],
+        [
+            np.array(
+                ["2018-01-01", None, "2019-12-30"], dtype="datetime64[ms]"
+            ),
+            True,
+        ],
+    ],
+)
+def test_datetime_has_null_test(data, expected):
+    data = cudf.Series(data, dtype="datetime64[ms]")
+    pd_data = data.to_pandas()
+    count = pd_data.notna().value_counts()
+    expected_count = 0
+    if False in count.keys():
+        expected_count = count[False]
+
+    assert expected is data.has_nulls
+    assert expected_count == data.null_count
+
+
+def test_datetime_has_null_test_pyarrow():
+    data = cudf.Series(
+        pa.array(
+            [0, np.iinfo("int64").min, np.iinfo("int64").max, None],
+            type=pa.timestamp("ns"),
+        )
+    )
+    assert data.has_nulls is True
+    assert data.null_count == 1
+
+
+def test_error_values_datetime():
+    s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    with pytest.raises(NotImplementedError, match="cupy does not support"):
+        s.values
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index d5a9ea50317..e7bdf6d415f 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import datetime
 import decimal
 
 import cupy as cp
@@ -8,6 +9,10 @@
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
 from cudf.core.column.column import as_column
 from cudf.errors import MixedTypeError
 from cudf.testing import assert_eq
@@ -757,3 +762,97 @@ def test_create_struct_series(data):
     expect = pd.Series(data)
     got = cudf.Series(data)
     assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.date_range("20010101", "20020215", freq="400h", name="times"),
+        pd.date_range(
+            "20010101", freq="243434324423423234ns", name="times", periods=10
+        ),
+    ],
+)
+def test_series_from_pandas_datetime_index(data):
+    pd_data = pd.Series(data)
+    gdf_data = cudf.Series(pd_data)
+    assert_eq(pd_data, gdf_data)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    ["datetime64[D]", "datetime64[W]", "datetime64[M]", "datetime64[Y]"],
+)
+def test_datetime_array_timeunit_cast(dtype):
+    testdata = np.array(
+        [
+            np.datetime64("2016-11-20"),
+            np.datetime64("2020-11-20"),
+            np.datetime64("2019-11-20"),
+            np.datetime64("1918-11-20"),
+            np.datetime64("2118-11-20"),
+        ],
+        dtype=dtype,
+    )
+
+    gs = cudf.Series(testdata)
+    ps = pd.Series(testdata)
+
+    assert_eq(ps, gs)
+
+    gdf = cudf.DataFrame()
+    gdf["a"] = np.arange(5)
+    gdf["b"] = testdata
+
+    pdf = pd.DataFrame()
+    pdf["a"] = np.arange(5)
+    pdf["b"] = testdata
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize("timeunit", ["D", "W", "M", "Y"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_datetime_scalar_timeunit_cast(timeunit):
+    testscalar = np.datetime64("2016-11-20", timeunit)
+
+    gs = cudf.Series(testscalar)
+    ps = pd.Series(testscalar)
+
+    assert_eq(ps, gs, check_dtype=False)
+
+    gdf = cudf.DataFrame()
+    gdf["a"] = np.arange(5)
+    gdf["b"] = testscalar
+
+    pdf = pd.DataFrame()
+    pdf["a"] = np.arange(5)
+    pdf["b"] = testscalar
+
+    assert gdf["b"].dtype == np.dtype("datetime64[s]")
+    assert_eq(pdf, gdf, check_dtype=True)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_datetime_string_to_datetime_resolution_loss_raises():
+    data = ["2020-01-01 00:00:00.00001"]
+    dtype = "datetime64[s]"
+    with pytest.raises(ValueError):
+        cudf.Series(data, dtype=dtype)
+    with pytest.raises(ValueError):
+        pd.Series(data, dtype=dtype)
+
+
+def test_timezone_pyarrow_array():
+    pa_array = pa.array(
+        [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)],
+        type=pa.timestamp("ns", "UTC"),
+    )
+    result = cudf.Series(pa_array)
+    expected = pa_array.to_pandas()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 94fca7a2a6b..1bc6fe19d02 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -2,29 +2,22 @@
 
 import datetime
 import operator
-import warnings
 
 import cupy as cp
 import numpy as np
 import pandas as pd
-import pandas._testing as tm
-import pyarrow as pa
 import pytest
 
 import cudf
-import cudf.testing.dataset_generator as dataset_generator
-from cudf import DataFrame, Series
+from cudf import Series
 from cudf.core._compat import (
     PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
     PANDAS_GE_230,
     PANDAS_VERSION,
 )
-from cudf.core.index import DatetimeIndex
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
-    NUMERIC_TYPES,
     assert_exceptions_equal,
     expect_warning_if,
 )
@@ -56,32 +49,6 @@ def data(request):
     return request.param
 
 
-@pytest.fixture(
-    params=[
-        "year",
-        "month",
-        "day",
-        "hour",
-        "minute",
-        "second",
-        "microsecond",
-        "nanosecond",
-        "weekday",
-        "dayofweek",
-        "dayofyear",
-        "day_of_year",
-    ]
-)
-def field(request):
-    return request.param
-
-
-def test_series(data):
-    pd_data = pd.Series(data)
-    gdf_data = Series(pd_data)
-    assert_eq(pd_data, gdf_data)
-
-
 @pytest.mark.parametrize(
     "lhs_dtype",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
@@ -157,633 +124,6 @@ def test_dt_ops(data):
     assert_eq(pd_data > pd_data, gdf_data > gdf_data)
 
 
-# libcudf doesn't respect timezones
-def test_dt_series(data, field):
-    pd_data = pd.Series(data)
-    gdf_data = Series(pd_data)
-    base = getattr(pd_data.dt, field)
-    test = getattr(gdf_data.dt, field)
-    assert_eq(base, test, check_dtype=False)
-
-
-def test_dt_index(data, field):
-    gdf_data = DatetimeIndex(data)
-    assert_eq(getattr(gdf_data, field), getattr(data, field), exact=False)
-
-
-def test_setitem_datetime():
-    df = DataFrame()
-    df["date"] = pd.date_range("20010101", "20010105").values
-    assert df.date.dtype.kind == "M"
-
-
-def test_sort_datetime():
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        {
-            "date": np.array(
-                [
-                    np.datetime64("2016-11-20"),
-                    np.datetime64("2020-11-20"),
-                    np.datetime64("2019-11-20"),
-                    np.datetime64("1918-11-20"),
-                    np.datetime64("2118-11-20"),
-                ]
-            ),
-            "vals": rng.random(5),
-        }
-    )
-
-    gdf = cudf.from_pandas(df)
-
-    s_df = df.sort_values(by="date")
-    s_gdf = gdf.sort_values(by="date")
-
-    assert_eq(s_df, s_gdf)
-
-
-def test_issue_165():
-    df_pandas = pd.DataFrame()
-    start_date = datetime.datetime.strptime("2000-10-21", "%Y-%m-%d")
-    data = [(start_date + datetime.timedelta(days=x)) for x in range(6)]
-    df_pandas["dates"] = data
-    df_pandas["num"] = [1, 2, 3, 4, 5, 6]
-    df_cudf = DataFrame.from_pandas(df_pandas)
-
-    base = df_pandas.query("dates==@start_date")
-    test = df_cudf.query("dates==@start_date")
-    assert_eq(base, test)
-    assert len(test) > 0
-
-    mask = df_cudf.dates == start_date
-    base_mask = df_pandas.dates == start_date
-    assert_eq(mask, base_mask, check_names=False)
-    assert mask.to_pandas().sum() > 0
-
-    start_date_ts = pd.Timestamp(start_date)
-    test = df_cudf.query("dates==@start_date_ts")
-    base = df_pandas.query("dates==@start_date_ts")
-    assert_eq(base, test)
-    assert len(test) > 0
-
-    mask = df_cudf.dates == start_date_ts
-    base_mask = df_pandas.dates == start_date_ts
-    assert_eq(mask, base_mask, check_names=False)
-    assert mask.to_pandas().sum() > 0
-
-    start_date_np = np.datetime64(start_date_ts, "ns")
-    test = df_cudf.query("dates==@start_date_np")
-    base = df_pandas.query("dates==@start_date_np")
-    assert_eq(base, test)
-    assert len(test) > 0
-
-    mask = df_cudf.dates == start_date_np
-    base_mask = df_pandas.dates == start_date_np
-    assert_eq(mask, base_mask, check_names=False)
-    assert mask.to_pandas().sum() > 0
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-def test_typecast_from_datetime(data, dtype):
-    pd_data = pd.Series(data)
-    np_data = np.array(pd_data)
-    gdf_data = Series(pd_data)
-
-    np_casted = np_data.astype(dtype)
-    gdf_casted = gdf_data.astype(dtype)
-
-    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_typecast_from_datetime_to_int64_to_datetime(data, dtype):
-    pd_data = pd.Series(data)
-    np_data = np.array(pd_data)
-    gdf_data = Series(pd_data)
-
-    np_casted = np_data.astype(np.int64).astype(dtype)
-    gdf_casted = gdf_data.astype(np.int64).astype(dtype)
-
-    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_typecast_to_different_datetime_resolutions(data, dtype):
-    data = pd.date_range(
-        "2019-07-16 00:00:00",
-        "2019-07-16 00:00:01",
-        freq="5555us",
-        name="times",
-    )
-    pd_data = pd.Series(data)
-    np_data = np.array(pd_data).astype(dtype)
-    gdf_series = Series(pd_data).astype(dtype)
-    np.testing.assert_equal(np_data, gdf_series.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2019-07-16 00:00:00.333",
-            "2019-07-16 00:00:00.666",
-            "2019-07-16 00:00:00.888",
-        ],
-        [
-            "2019-07-16 00:00:00.333333",
-            "2019-07-16 00:00:00.666666",
-            "2019-07-16 00:00:00.888888",
-        ],
-        [
-            "2019-07-16 00:00:00.333333333",
-            "2019-07-16 00:00:00.666666666",
-            "2019-07-16 00:00:00.888888888",
-        ],
-    ],
-    ids=["ms_data", "us_data", "ns_data"],
-)
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_string_timstamp_typecast_to_different_datetime_resolutions(
-    data, dtype
-):
-    pd_sr = pd.Series(data)
-    gdf_sr = cudf.Series.from_pandas(pd_sr)
-
-    expect = pd_sr.values.astype(dtype)
-    got = gdf_sr.astype(dtype).values_host
-
-    np.testing.assert_equal(expect, got)
-
-
-@pytest.mark.parametrize("from_dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize(
-    "to_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_typecast_to_datetime(from_dtype, to_dtype):
-    data = np.arange(1, 10)
-    np_data = data.astype(from_dtype)
-    gdf_data = Series(np_data)
-
-    np_casted = np_data.astype(to_dtype)
-    gdf_casted = gdf_data.astype(to_dtype)
-
-    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
-
-
-@pytest.mark.parametrize("from_dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize(
-    "to_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_typecast_to_from_datetime(from_dtype, to_dtype):
-    data = np.arange(1, 10)
-    np_data = data.astype(from_dtype)
-    gdf_data = Series(np_data)
-
-    np_casted = np_data.astype(to_dtype).astype(from_dtype)
-    gdf_casted = gdf_data.astype(to_dtype).astype(from_dtype)
-
-    np.testing.assert_equal(np_casted, gdf_casted.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "from_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-@pytest.mark.parametrize(
-    "to_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_typecast_from_datetime_to_datetime(from_dtype, to_dtype):
-    data = np.arange(1, 10)
-    np_data = data.astype(from_dtype)
-    ser = Series(np_data)
-
-    np_casted = np_data.astype(to_dtype)
-    ser_casted = ser.astype(to_dtype)
-
-    np.testing.assert_equal(np_casted, ser_casted.to_numpy())
-
-
-@pytest.mark.parametrize("nulls", ["some", "all"])
-def test_to_from_pandas_nulls(data, nulls):
-    data = np.arange(1, 10)
-    pd_data = pd.Series(data.astype("datetime64[ns]"))
-    if nulls == "some":
-        # Fill half the values with NaT
-        pd_data[list(range(0, len(pd_data), 2))] = np.datetime64("nat", "ns")
-    elif nulls == "all":
-        # Fill all the values with NaT
-        pd_data[:] = np.datetime64("nat", "ns")
-    gdf_data = Series.from_pandas(pd_data)
-
-    expect = pd_data
-    got = gdf_data.to_pandas()
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_datetime_to_arrow(dtype):
-    timestamp = (
-        cudf.datasets.timeseries(
-            start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={}
-        )
-        .reset_index()["timestamp"]
-        .reset_index(drop=True)
-    )
-    gdf = DataFrame({"timestamp": timestamp.astype(dtype)})
-    assert_eq(gdf, DataFrame.from_arrow(gdf.to_arrow(preserve_index=False)))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([], dtype="datetime64[ns]"),
-        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
-        pd.Series([None, None], dtype="datetime64[ns]"),
-    ],
-)
-@pytest.mark.parametrize("nulls", ["none", "some"])
-def test_datetime_unique(data, nulls):
-    rng = np.random.default_rng(seed=0)
-    psr = data.copy()
-
-    if len(data) > 0:
-        if nulls == "some":
-            p = rng.integers(0, len(data), 2)
-            psr[p] = None
-
-    gsr = cudf.from_pandas(psr)
-    expected = psr.unique()
-    got = gsr.unique()
-
-    # Unique does not provide a guarantee on ordering.
-    assert_eq(
-        pd.Series(expected).sort_values(ignore_index=True),
-        got.sort_values(ignore_index=True).to_pandas(),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([], dtype="datetime64[ns]"),
-        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
-        pd.Series([None, None], dtype="datetime64[ns]"),
-    ],
-)
-@pytest.mark.parametrize("nulls", ["none", "some"])
-def test_datetime_nunique(data, nulls):
-    psr = data.copy()
-    rng = np.random.default_rng(seed=0)
-
-    if len(data) > 0:
-        if nulls == "some":
-            p = rng.integers(0, len(data), 2)
-            psr[p] = None
-
-    gsr = cudf.from_pandas(psr)
-    expected = psr.nunique()
-    got = gsr.nunique()
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data, expected",
-    [
-        [["2018-01-01", None, "2019-01-31", None, "2018-01-01"], True],
-        [
-            [
-                "2018-01-01",
-                "2018-01-02",
-                "2019-01-31",
-                "2018-03-01",
-                "2018-01-01",
-            ],
-            False,
-        ],
-        [
-            np.array(
-                ["2018-01-01", None, "2019-12-30"], dtype="datetime64[ms]"
-            ),
-            True,
-        ],
-    ],
-)
-def test_datetime_has_null_test(data, expected):
-    data = Series(data, dtype="datetime64[ms]")
-    pd_data = data.to_pandas()
-    count = pd_data.notna().value_counts()
-    expected_count = 0
-    if False in count.keys():
-        expected_count = count[False]
-
-    assert_eq(expected, data.has_nulls)
-    assert_eq(expected_count, data.null_count)
-
-
-def test_datetime_has_null_test_pyarrow():
-    data = Series(
-        pa.array(
-            [0, np.iinfo("int64").min, np.iinfo("int64").max, None],
-            type=pa.timestamp("ns"),
-        )
-    )
-    expected = True
-    expected_count = 1
-
-    assert_eq(expected, data.has_nulls)
-    assert_eq(expected_count, data.null_count)
-
-
-def test_datetime_dataframe():
-    data = {
-        "timearray": np.array(
-            [0, 1, None, 2, 20, None, 897], dtype="datetime64[ms]"
-        )
-    }
-    gdf = cudf.DataFrame(data)
-    pdf = pd.DataFrame(data)
-
-    assert_eq(pdf, gdf)
-
-    assert_eq(pdf.dropna(), gdf.dropna())
-
-    assert_eq(pdf.isnull(), gdf.isnull())
-
-    data = np.array([0, 1, None, 2, 20, None, 897], dtype="datetime64[ms]")
-    gs = cudf.Series(data)
-    ps = pd.Series(data)
-
-    assert_eq(ps, gs)
-
-    assert_eq(ps.dropna(), gs.dropna())
-
-    assert_eq(ps.isnull(), gs.isnull())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        None,
-        [],
-        pd.Series([], dtype="float64"),
-        pd.Index([]),
-        pd.Series([1, 2, 3]),
-        pd.Series([0, 1, -1]),
-        pd.Series([0, 1, -1, 100.3, 200, 47637289]),
-        pd.Series(["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"]),
-        [1, 2, 3, 100, -123, -1, 0, 1000000000000679367],
-        pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}),
-        pd.DataFrame(
-            {"year": ["2015", "2016"], "month": ["2", "3"], "day": [4, 5]}
-        ),
-        pd.DataFrame(
-            {
-                "year": [2015, 2016],
-                "month": [2, 3],
-                "day": [4, 5],
-                "minute": [1, 100],
-                "second": [90, 10],
-                "hour": [1, 0.5],
-            },
-            index=["a", "b"],
-        ),
-        pd.DataFrame(
-            {
-                "year": [],
-                "month": [],
-                "day": [],
-                "minute": [],
-                "second": [],
-                "hour": [],
-            },
-        ),
-        ["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"],
-        pd.Index([1, 2, 3, 4]),
-        pd.DatetimeIndex(
-            ["1970-01-01 00:00:00.000000001", "1970-01-01 00:00:00.000000002"],
-            dtype="datetime64[ns]",
-            freq=None,
-        ),
-        pd.DatetimeIndex(
-            [],
-            dtype="datetime64[ns]",
-            freq=None,
-        ),
-        pd.Series([1, 2, 3]).astype("datetime64[ns]"),
-        pd.Series([1, 2, 3]).astype("datetime64[us]"),
-        pd.Series([1, 2, 3]).astype("datetime64[ms]"),
-        pd.Series([1, 2, 3]).astype("datetime64[s]"),
-        pd.Series([1, 2, 3]).astype("datetime64[D]"),
-        1,
-        100,
-        17,
-        53.638435454,
-        np.array([1, 10, 15, 478925, 2327623467]),
-        np.array([0.3474673, -10, 15, 478925.34345, 2327623467]),
-    ],
-)
-@pytest.mark.parametrize("dayfirst", [True, False])
-def test_cudf_to_datetime(data, dayfirst):
-    pd_data = data
-    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
-        gd_data = cudf.from_pandas(pd_data)
-    else:
-        if type(pd_data).__module__ == np.__name__:
-            gd_data = cp.array(pd_data)
-        else:
-            gd_data = pd_data
-
-    expected = pd.to_datetime(pd_data, dayfirst=dayfirst)
-    actual = cudf.to_datetime(gd_data, dayfirst=dayfirst)
-
-    if isinstance(expected, pd.Series):
-        assert_eq(actual, expected, check_dtype=False)
-    else:
-        assert_eq(actual, expected, check_exact=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        "2",
-        ["1", "2", "3"],
-        ["1/1/1", "2/2/2", "1"],
-        pd.Series([1, 2, 3], dtype="timedelta64[ns]"),
-        pd.DataFrame(
-            {
-                "year": [2015, 2016],
-                "month": [2, 3],
-                "day": [4, 5],
-                "minute": [1, 100],
-                "second": [90, 10],
-                "hour": [1, 0],
-                "blablacol": [1, 1],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "month": [2, 3],
-                "day": [4, 5],
-                "minute": [1, 100],
-                "second": [90, 10],
-                "hour": [1, 0],
-            }
-        ),
-    ],
-)
-def test_to_datetime_errors(data):
-    pd_data = data
-    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
-        gd_data = cudf.from_pandas(pd_data)
-    else:
-        gd_data = pd_data
-
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        assert_exceptions_equal(
-            pd.to_datetime,
-            cudf.to_datetime,
-            ([pd_data],),
-            ([gd_data],),
-        )
-
-
-def test_to_datetime_not_implemented():
-    with pytest.raises(NotImplementedError):
-        cudf.to_datetime([], exact=False)
-
-    with pytest.raises(NotImplementedError):
-        cudf.to_datetime([], origin="julian")
-
-    with pytest.raises(NotImplementedError):
-        cudf.to_datetime([], yearfirst=True)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        1,
-        [],
-        pd.Series([], dtype="float64"),
-        pd.Index([]),
-        pd.Series([1, 2, 3]),
-        pd.Series([1, 2.4, 3]),
-        pd.Series([0, 1, -1]),
-        pd.Series([0, 1, -1, 100, 200, 47637]),
-        [10, 12, 1200, 15003],
-        pd.DatetimeIndex(
-            [],
-            dtype="datetime64[ns]",
-            freq=None,
-        ),
-        pd.Index([1, 2, 3, 4]),
-    ],
-)
-@pytest.mark.parametrize("unit", ["D", "s", "ms", "us", "ns"])
-def test_to_datetime_units(data, unit):
-    pd_data = data
-    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
-        gd_data = cudf.from_pandas(pd_data)
-    else:
-        gd_data = pd_data
-
-    expected = pd.to_datetime(pd_data, unit=unit)
-    actual = cudf.to_datetime(gd_data, unit=unit)
-
-    if isinstance(expected, pd.Series):
-        assert_eq(actual, expected, check_dtype=False)
-    else:
-        assert_eq(actual, expected, exact=False, check_exact=False)
-
-
-@pytest.mark.parametrize(
-    "data,format",
-    [
-        ("2012-10-11", None),
-        ("2012-10-11", "%Y-%m-%d"),
-        ("2012-10-11", "%Y-%d-%m"),
-        (["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"], None),
-        (["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"], "%Y-%m-%d"),
-        (["2012-10-11", "2010-01-01", "2016-07-07", "2014-02-02"], "%Y-%d-%m"),
-        (["10-11-2012", "01-01-2010", "07-07-2016", "02-02-2014"], "%m-%d-%Y"),
-        (["10-11-2012", "01-01-2010", "07-07-2016", "02-02-2014"], "%d-%m-%Y"),
-        (["10-11-2012", "01-01-2010", "07-07-2016", "02-02-2014"], None),
-        (["2012/10/11", "2010/01/01", "2016/07/07", "2014/02/02"], None),
-        (["2012/10/11", "2010/01/01", "2016/07/07", "2014/02/02"], "%Y/%m/%d"),
-        (["2012/10/11", "2010/01/01", "2016/07/07", "2014/02/02"], "%Y/%d/%m"),
-        (["10/11/2012", "01/01/2010", "07/07/2016", "02/02/2014"], "%m/%d/%Y"),
-        (["10/11/2012", "01/01/2010", "07/07/2016", "02/02/2014"], "%d/%m/%Y"),
-        (["10/11/2012", "01/01/2010", "07/07/2016", "02/02/2014"], None),
-        (["2021-04-13 12:30:04.123456789"], "%Y-%m-%d %H:%M:%S.%f"),
-        (pd.Series([2015, 2020, 2021]), "%Y"),
-        pytest.param(
-            pd.Series(["1", "2", "1"]),
-            "%m",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/6109"
-                "https://github.com/pandas-dev/pandas/issues/35934"
-            ),
-        ),
-        pytest.param(
-            pd.Series(["14", "20", "10"]),
-            "%d",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/6109"
-                "https://github.com/pandas-dev/pandas/issues/35934"
-            ),
-        ),
-        (pd.Series([2015, 2020.0, 2021.2]), "%Y"),
-    ],
-)
-@pytest.mark.parametrize("infer_datetime_format", [True, False])
-def test_to_datetime_format(data, format, infer_datetime_format):
-    pd_data = data
-    if isinstance(pd_data, (pd.Series, pd.DataFrame, pd.Index)):
-        gd_data = cudf.from_pandas(pd_data)
-    else:
-        gd_data = pd_data
-
-    with expect_warning_if(True, UserWarning):
-        expected = pd.to_datetime(
-            pd_data, format=format, infer_datetime_format=infer_datetime_format
-        )
-    with expect_warning_if(not infer_datetime_format):
-        actual = cudf.to_datetime(
-            gd_data, format=format, infer_datetime_format=infer_datetime_format
-        )
-
-    if isinstance(expected, pd.Series):
-        assert_eq(actual, expected, check_dtype=False)
-    else:
-        assert_eq(actual, expected, check_exact=False)
-
-
-def test_to_datetime_data_out_of_range_for_format():
-    with pytest.raises(ValueError):
-        cudf.to_datetime("2015-02-99", format="%Y-%m-%d")
-
-
-def test_to_datetime_different_formats_notimplemented():
-    with pytest.raises(NotImplementedError):
-        cudf.to_datetime(["2015-02-01", "2015-02-01 10:10:10"])
-
-
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
     reason="Fails in older versions of pandas.",
@@ -801,92 +141,6 @@ def test_datetime_can_cast_safely():
     assert sr._column.can_cast_safely(np.dtype("datetime64[ns]")) is False
 
 
-# Cudf autocasts unsupported time_units
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[D]", "datetime64[W]", "datetime64[M]", "datetime64[Y]"],
-)
-def test_datetime_array_timeunit_cast(dtype):
-    testdata = np.array(
-        [
-            np.datetime64("2016-11-20"),
-            np.datetime64("2020-11-20"),
-            np.datetime64("2019-11-20"),
-            np.datetime64("1918-11-20"),
-            np.datetime64("2118-11-20"),
-        ],
-        dtype=dtype,
-    )
-
-    gs = Series(testdata)
-    ps = pd.Series(testdata)
-
-    assert_eq(ps, gs)
-
-    gdf = DataFrame()
-    gdf["a"] = np.arange(5)
-    gdf["b"] = testdata
-
-    pdf = pd.DataFrame()
-    pdf["a"] = np.arange(5)
-    pdf["b"] = testdata
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("timeunit", ["D", "W", "M", "Y"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_datetime_scalar_timeunit_cast(timeunit):
-    testscalar = np.datetime64("2016-11-20", timeunit)
-
-    gs = Series(testscalar)
-    ps = pd.Series(testscalar)
-
-    assert_eq(ps, gs, check_dtype=False)
-
-    gdf = DataFrame()
-    gdf["a"] = np.arange(5)
-    gdf["b"] = testscalar
-
-    pdf = pd.DataFrame()
-    pdf["a"] = np.arange(5)
-    pdf["b"] = testscalar
-
-    assert gdf["b"].dtype == cudf.dtype("datetime64[s]")
-    assert_eq(pdf, gdf, check_dtype=True)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["2001-01-01", "2002-02-02", "2000-01-05", "NaT"],
-        ["2001-01-01", "2002-02-02", "2000-01-05", None],
-        [None, None, None, None, None],
-    ],
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-def test_str_null_to_datetime(data, dtype):
-    psr = pd.Series(data)
-    gsr = Series(data)
-
-    assert_eq(psr.astype(dtype), gsr.astype(dtype))
-
-
-def test_str_to_datetime_error():
-    psr = pd.Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
-    gsr = Series(["2001-01-01", "2002-02-02", "2000-01-05", "None"])
-
-    assert_exceptions_equal(
-        lfunc=psr.astype,
-        rfunc=gsr.astype,
-        lfunc_args_and_kwargs=(["datetime64[s]"],),
-        rfunc_args_and_kwargs=(["datetime64[s]"],),
-        check_exception_type=False,
-    )
-
-
 @pytest.mark.parametrize(
     "data",
     [
@@ -1130,97 +384,7 @@ def test_datetime_invalid_ops():
     )
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [1, 2, 3],
-        [None, 1, 10, 11, None],
-        [None, None, None, None, None],
-        [None],
-    ],
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        np.datetime64("2005-02"),
-        np.datetime64("2005-02-25"),
-        np.datetime64("2005-02-25T03:30"),
-        np.datetime64("nat"),
-        "NaT",
-    ],
-)
-def test_datetime_fillna(data, dtype, fill_value):
-    sr = cudf.Series(data, dtype=dtype)
-    psr = sr.to_pandas()
-
-    expected = psr.dropna()
-    actual = sr.dropna()
-
-    assert_eq(expected, actual)
-
-    expected = psr.fillna(fill_value)
-    actual = sr.fillna(fill_value)
-
-    assert_eq(expected, actual)
-
-    expected = expected.dropna()
-    actual = actual.dropna()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data", [[1, 2, 3, None], [], [100121, 1221312, 321312321, 1232131223]]
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "date_format",
-    [
-        "%d - %m",
-        "%y/%H",
-        "%Y",
-        "%I - %M / %S",
-        "%f",
-        "%j",
-        "%p",
-        "%w",
-        "%U",
-        "%W",
-        "%G",
-        "%u",
-        "%V",
-        "%b",
-        "%B",
-        "%a",
-        "%A",
-        "%U_",
-        "_%b",
-        "%B*",
-        "%a ",
-        "%A1",
-    ],
-)
-def test_datetime_strftime(data, dtype, date_format):
-    gsr = cudf.Series(data, dtype=dtype)
-    psr = gsr.to_pandas()
-
-    expected = psr.dt.strftime(date_format=date_format)
-    actual = gsr.dt.strftime(date_format=date_format)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("date_format", ["%c", "%x", "%X"])
-def test_datetime_strftime_not_implemented_formats(date_format):
-    gsr = cudf.Series([1, 2, 3], dtype="datetime64[ms]")
-
-    with pytest.raises(NotImplementedError):
-        gsr.dt.strftime(date_format=date_format)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
+@pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
 @pytest.mark.parametrize("dtype", DATETIME_TYPES)
 @pytest.mark.parametrize("stat", ["mean", "quantile"])
 def test_datetime_stats(data, dtype, stat):
@@ -1274,764 +438,6 @@ def test_datetime_reductions(data, op, dtype):
         assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("timezone", ["", "Z"])
-@pytest.mark.parametrize(
-    "data",
-    [
-        "2002-10-27T04:30",
-        "2002-10-27T04:30:00",
-        "2002-10-27T04:30:00.000",
-        "2002-10-27T04:30:00.000000",
-        "2002-10-27T04:30:00.000000000",
-    ],
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-def test_datetime_infer_format(data, timezone, dtype):
-    ts_data = [data + timezone]
-    sr = cudf.Series(ts_data)
-    psr = pd.Series(ts_data)
-    if not timezone:
-        expected = psr.astype(dtype)
-        actual = sr.astype(dtype)
-
-        assert_eq(expected, actual)
-    else:
-        with cudf.option_context("mode.pandas_compatible", True):
-            with pytest.raises(NotImplementedError):
-                # pandas doesn't allow parsing "Z" to naive type
-                sr.astype(dtype)
-
-
-def test_dateoffset_instance_subclass_check():
-    assert not issubclass(pd.DateOffset, cudf.DateOffset)
-    assert not isinstance(pd.DateOffset(), cudf.DateOffset)
-
-
-def test_datetime_to_datetime_error():
-    assert_exceptions_equal(
-        lfunc=pd.to_datetime,
-        rfunc=cudf.to_datetime,
-        lfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"],),
-        rfunc_args_and_kwargs=(["02-Oct-2017 09:30", "%d-%B-%Y %H:%M"],),
-        check_exception_type=False,
-    )
-
-
-def test_is_leap_year():
-    data = [
-        "2020-05-31 08:00:00",
-        None,
-        "1999-12-31 18:40:00",
-        "2000-12-31 04:00:00",
-        None,
-        "1900-02-28 07:00:00",
-        "1800-03-14 07:30:00",
-        "2100-03-14 07:30:00",
-        "1970-01-01 00:00:00",
-        "1969-12-31 12:59:00",
-    ]
-
-    # Series
-    ps = pd.Series(data, dtype="datetime64[s]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_leap_year
-    got = gs.dt.is_leap_year
-
-    assert_eq(expect, got)
-
-    # DatetimeIndex
-    pIndex = pd.DatetimeIndex(data)
-    gIndex = cudf.from_pandas(pIndex)
-
-    expect2 = pIndex.is_leap_year
-    got2 = gIndex.is_leap_year
-
-    assert_eq(expect2, got2)
-
-
-def test_quarter():
-    data = [
-        "2020-05-31 08:00:00",
-        "1999-12-31 18:40:00",
-        "2000-12-31 04:00:00",
-        "1900-02-28 07:00:00",
-        "1800-03-14 07:30:00",
-        "2100-03-14 07:30:00",
-        "1970-01-01 00:00:00",
-        "1969-12-31 12:59:00",
-    ]
-    dtype = "datetime64[s]"
-
-    # Series
-    ps = pd.Series(data, dtype=dtype)
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.quarter
-    got = gs.dt.quarter
-
-    assert_eq(expect, got, check_dtype=False)
-
-    # DatetimeIndex
-    pIndex = pd.DatetimeIndex(data)
-    gIndex = cudf.from_pandas(pIndex)
-
-    expect2 = pIndex.quarter
-    got2 = gIndex.quarter
-
-    assert_eq(expect2.values, got2.values)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([], dtype="datetime64[ns]"),
-        pd.Series(pd.date_range("2010-01-01", "2010-02-01")),
-        pd.Series([None, None], dtype="datetime64[ns]"),
-        pd.Series("2020-05-31 08:00:00", dtype="datetime64[s]"),
-        pd.Series(
-            pd.date_range(start="2021-07-25", end="2021-07-30"),
-            index=["a", "b", "c", "d", "e", "f"],
-        ),
-    ],
-)
-def test_isocalendar_series(data):
-    ps = data.copy()
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.isocalendar()
-    got = gs.dt.isocalendar()
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [None, None],
-        [
-            "2020-05-31 08:00:00",
-            "1999-12-31 18:40:00",
-            "2000-12-31 04:00:00",
-        ],
-        ["2100-03-14 07:30:00"],
-    ],
-)
-def test_isocalendar_index(data):
-    ps = pd.DatetimeIndex(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.isocalendar()
-    got = gs.isocalendar()
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-def test_days_in_months(dtype):
-    nrows = 1000
-
-    data = dataset_generator.rand_dataframe(
-        dtypes_meta=[
-            {"dtype": dtype, "null_frequency": 0.4, "cardinality": nrows}
-        ],
-        rows=nrows,
-        use_threads=False,
-        seed=23,
-    )
-
-    ps = data.to_pandas()["0"]
-    gs = cudf.from_pandas(ps)
-
-    assert_eq(ps.dt.days_in_month, gs.dt.days_in_month)
-
-
-def test_is_month_start():
-    data = [
-        "2020-05-31",
-        None,
-        "1999-12-01",
-        "2000-12-21",
-        None,
-        "1900-02-28",
-        "1800-03-14",
-        "2100-03-10",
-        "1970-01-01",
-        "1969-12-11",
-    ]
-    ps = pd.Series(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_month_start
-    got = gs.dt.is_month_start
-
-    assert_eq(expect, got)
-
-
-##################################################################
-#                       Date Range Tests                         #
-##################################################################
-
-date_range_test_freq = [
-    {"months": 3, "years": 1},
-    {"hours": 10, "days": 57, "nanoseconds": 3},
-    "83D",
-    "17h",
-    "-680min",
-    "110546s",
-    "110546789ms",
-    "110546789248us",
-]
-
-
-@pytest.fixture(
-    params=[
-        "2000-02-13 08:41:06",
-        "1996-11-21 04:05:30",
-        "1970-01-01 00:00:00",
-        "1831-05-08 15:23:21",
-    ],
-    ids=["leap_year", "non_leap_year", "unix_epoch_time_0", "random_date"],
-)
-def start(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=[
-        "2000-02-13 08:41:06",
-        "1996-11-21 04:05:30",
-        "1970-01-01 00:00:00",
-        "1831-05-08 15:23:21",
-    ],
-    ids=["leap_year", "non_leap_year", "unix_epoch_time_0", "random_date"],
-)
-def end(request):
-    return request.param
-
-
-@pytest.fixture(params=[1, 10])
-def periods(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=[
-        {"months": 3, "years": 1},
-        {"hours": 10, "days": 57, "nanoseconds": 3},
-        "83D",
-        "17h",
-        "-680min",
-        "110546s",
-        "110546789ms",
-        "110546789248us",
-    ]
-)
-def freq(request):
-    return request.param
-
-
-def test_date_range_start_end_periods(start, end, periods):
-    expect = pd.date_range(start=start, end=end, periods=periods, name="a")
-    got = cudf.date_range(start=start, end=end, periods=periods, name="a")
-
-    np.testing.assert_allclose(
-        expect.to_numpy().astype("int64"),
-        got.to_pandas().to_numpy().astype("int64"),
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_date_range_start_end_freq(start, end, freq):
-    if isinstance(freq, str):
-        _gfreq = _pfreq = freq
-    else:
-        _gfreq = cudf.DateOffset(**freq)
-        _pfreq = pd.DateOffset(**freq)
-
-    expect = pd.date_range(start=start, end=end, freq=_pfreq, name="a")
-    got = cudf.date_range(start=start, end=end, freq=_gfreq, name="a")
-
-    np.testing.assert_allclose(
-        expect.to_numpy().astype("int64"),
-        got.to_pandas().to_numpy().astype("int64"),
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_date_range_start_freq_periods(start, freq, periods):
-    if isinstance(freq, str):
-        _gfreq = _pfreq = freq
-    else:
-        _gfreq = cudf.DateOffset(**freq)
-        _pfreq = pd.DateOffset(**freq)
-
-    expect = pd.date_range(start=start, periods=periods, freq=_pfreq, name="a")
-    got = cudf.date_range(start=start, periods=periods, freq=_gfreq, name="a")
-
-    np.testing.assert_allclose(
-        expect.to_numpy().astype("int64"),
-        got.to_pandas().to_numpy().astype("int64"),
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/46877",
-)
-def test_date_range_end_freq_periods(end, freq, periods):
-    if isinstance(freq, str):
-        _gfreq = _pfreq = freq
-    else:
-        _gfreq = cudf.DateOffset(**freq)
-        _pfreq = pd.DateOffset(**freq)
-
-    expect = pd.date_range(end=end, periods=periods, freq=_pfreq, name="a")
-    got = cudf.date_range(end=end, periods=periods, freq=_gfreq, name="a")
-
-    np.testing.assert_allclose(
-        expect.to_numpy().astype("int64"),
-        got.to_pandas().to_numpy().astype("int64"),
-    )
-
-
-def test_date_range_freq_does_not_divide_range():
-    expect = pd.date_range(
-        "2001-01-01 00:00:00.000000", "2001-01-01 00:00:00.000010", freq="3us"
-    )
-    got = cudf.date_range(
-        "2001-01-01 00:00:00.000000", "2001-01-01 00:00:00.000010", freq="3us"
-    )
-    np.testing.assert_allclose(
-        expect.to_numpy().astype("int64"),
-        got.to_pandas().to_numpy().astype("int64"),
-    )
-
-
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        {"nanoseconds": 1},
-        {"months": 1},
-    ],
-)
-def test_date_range_raise_overflow(kwargs):
-    start = np.datetime64(np.iinfo("int64").max, "ns")
-    periods = 2
-    freq = cudf.DateOffset(**kwargs)
-    with pytest.raises(pd.errors.OutOfBoundsDatetime):
-        cudf.date_range(start=start, periods=periods, freq=freq)
-
-
-@pytest.mark.parametrize(
-    "freqstr_unsupported",
-    [
-        "1ME",
-        "2SME",
-        "3MS",
-        "4BME",
-        "5CBME",
-        "6SMS",
-        "7BMS",
-        "8CBMS",
-        "QE",
-        "2BQE",
-        "3BQS",
-        "10YE",
-        "9BYE",
-        "8YS",
-        "7BYS",
-        "bh",
-        "B",
-    ],
-)
-def test_date_range_raise_unsupported(freqstr_unsupported):
-    if not PANDAS_GE_220 and freqstr_unsupported.endswith("E"):
-        pytest.skip(reason="YE, etc. support was added in pandas 2.2")
-
-    s, e = "2001-01-01", "2008-01-31"
-    pd.date_range(start=s, end=e, freq=freqstr_unsupported)
-    with pytest.raises(ValueError, match="does not yet support"):
-        cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
-
-    # We also check that these values are unsupported when using lowercase
-    # characters. We exclude the value 3MS (every 3 month starts) because 3ms
-    # is a valid frequency for every 3 milliseconds.
-    if freqstr_unsupported != "3MS":
-        freqstr_unsupported = freqstr_unsupported.lower()
-        with pytest.raises(ValueError, match="does not yet support"):
-            with expect_warning_if(
-                PANDAS_GE_220 and freqstr_unsupported not in {"b", "bh"}
-            ):
-                cudf.date_range(start=s, end=e, freq=freqstr_unsupported)
-
-
-##################################################################
-#                    End of Date Range Test                      #
-##################################################################
-
-
-def test_is_month_end():
-    data = [
-        "2020-05-31",
-        "2020-02-29",
-        None,
-        "1999-12-01",
-        "2000-12-21",
-        None,
-        "1900-02-28",
-        "1800-03-14",
-        "2100-03-10",
-        "1970-01-01",
-        "1969-12-11",
-    ]
-    ps = pd.Series(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_month_end
-    got = gs.dt.is_month_end
-
-    assert_eq(expect, got)
-
-
-def test_is_year_start():
-    data = [
-        "2020-05-31",
-        None,
-        "1999-12-01",
-        "2000-12-21",
-        None,
-        "1900-01-01",
-        "1800-03-14",
-        "2100-03-10",
-        "1970-01-01",
-        "1969-12-11",
-        "2017-12-30",
-        "2017-12-31",
-        "2018-01-01",
-    ]
-    ps = pd.Series(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_year_start
-    got = gs.dt.is_year_start
-
-    assert_eq(expect, got)
-
-
-def test_is_year_end():
-    data = [
-        "2020-05-31",
-        None,
-        "1999-12-01",
-        "2000-12-21",
-        None,
-        "1900-12-31",
-        "1800-03-14",
-        "2017-12-30",
-        "2017-12-31",
-        "2020-12-31 08:00:00",
-        None,
-        "1999-12-31 18:40:00",
-        "2000-12-31 04:00:00",
-        None,
-        "1800-12-14 07:30:00",
-        "2100-12-14 07:30:00",
-        "2020-05-31",
-    ]
-    ps = pd.Series(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_year_end
-    got = gs.dt.is_year_end
-
-    assert_eq(expect, got)
-
-
-def test_is_quarter_start():
-    data = [
-        "2020-05-01",
-        "2020-05-31",
-        "2020-02-29",
-        None,
-        "1999-12-01",
-        "2000-12-21",
-        None,
-        "1900-02-28",
-        "1800-03-14",
-        "2100-03-10",
-        "1970-04-1",
-        "1970-01-01",
-        "1969-12-11",
-        "2020-12-31",
-    ]
-    ps = pd.Series(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_quarter_start
-    got = gs.dt.is_quarter_start
-
-    assert_eq(expect, got)
-
-
-def test_is_quarter_end():
-    data = [
-        "2020-05-01",
-        "2020-05-31",
-        "2020-02-29",
-        None,
-        "1999-12-01",
-        "2000-12-21",
-        None,
-        "1900-02-28",
-        "1800-03-14",
-        "2100-03-10",
-        "1970-04-1",
-        "1970-01-01",
-        "1969-12-11",
-        "2020-12-31",
-    ]
-    ps = pd.Series(data, dtype="datetime64[ns]")
-    gs = cudf.from_pandas(ps)
-
-    expect = ps.dt.is_quarter_end
-    got = gs.dt.is_quarter_end
-
-    assert_eq(expect, got)
-
-
-def test_error_values():
-    s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    with pytest.raises(NotImplementedError, match="cupy does not support"):
-        s.values
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/52761",
-)
-@pytest.mark.parametrize("time_type", DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
-)
-def test_ceil(data, time_type, resolution):
-    data = [
-        "2020-05-31 08:00:00",
-        "1999-12-31 18:40:10",
-        "2000-12-31 04:00:05",
-        "1900-02-28 07:00:06",
-        "1800-03-14 07:30:20",
-        "2100-03-14 07:30:20",
-        "1970-01-01 00:00:09",
-        "1969-12-31 12:59:10",
-    ]
-    gs = cudf.Series(data, dtype=time_type)
-    ps = gs.to_pandas()
-
-    expect = ps.dt.ceil(resolution)
-    got = gs.dt.ceil(resolution)
-    assert_eq(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/52761",
-)
-@pytest.mark.parametrize("time_type", DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
-)
-def test_floor(time_type, resolution):
-    data = [
-        "2020-05-31 08:00:00",
-        "1999-12-31 18:40:10",
-        "2000-12-31 04:00:05",
-        "1900-02-28 07:00:06",
-        "1800-03-14 07:30:20",
-        "2100-03-14 07:30:20",
-        "1970-01-01 00:00:09",
-        "1969-12-31 12:59:10",
-    ]
-    gs = cudf.Series(data, dtype=time_type)
-    ps = gs.to_pandas()
-
-    expect = ps.dt.floor(resolution)
-    got = gs.dt.floor(resolution)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("time_type", DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "resolution", ["D", "h", "min", "min", "s", "ms", "us", "ns"]
-)
-def test_round(time_type, resolution):
-    data = [
-        "2020-05-31 08:00:00",
-        "1999-12-31 18:40:10",
-        "2000-12-31 04:00:05",
-        "1900-02-28 07:00:06",
-        "1800-03-14 07:30:20",
-        "2100-03-14 07:30:20",
-        "1970-01-01 00:00:09",
-        "1969-12-31 12:59:10",
-    ]
-    gs = cudf.Series(data, dtype=time_type)
-    ps = gs.to_pandas()
-
-    expect = ps.dt.round(resolution)
-    got = gs.dt.round(resolution)
-    assert_eq(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.DatetimeIndex([]),
-        pd.DatetimeIndex(["2010-05-31"]),
-        pd.date_range("2000-01-01", "2000-12-31", periods=21),
-    ],
-)
-@pytest.mark.parametrize(
-    "offset",
-    [
-        "10Y",
-        "6M",
-        "M",
-        "31D",
-        "0H",
-        "44640T",
-        "44640min",
-        "2678000S",
-        "2678000000L",
-        "2678000000ms",
-        "2678000000000U",
-        "2678000000000us",
-        "2678000000000000N",
-        "2678000000000000ns",
-    ],
-)
-def test_first(idx, offset):
-    p = pd.Series(range(len(idx)), dtype="int64", index=idx)
-    g = cudf.from_pandas(p)
-
-    with pytest.warns(FutureWarning):
-        expect = p.first(offset=offset)
-    with pytest.warns(FutureWarning):
-        got = g.first(offset=offset)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-def test_first_start_at_end_of_month():
-    idx = pd.DatetimeIndex(
-        [
-            "2020-01-31",
-            "2020-02-15",
-            "2020-02-29",
-            "2020-03-15",
-            "2020-03-31",
-            "2020-04-15",
-            "2020-04-30",
-        ]
-    )
-    offset = "3M"
-    p = pd.Series(range(len(idx)), index=idx)
-    g = cudf.from_pandas(p)
-
-    with pytest.warns(FutureWarning):
-        expect = p.first(offset=offset)
-    with pytest.warns(FutureWarning):
-        got = g.first(offset=offset)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.DatetimeIndex([]),
-        pd.DatetimeIndex(["2010-05-31"]),
-        pd.date_range("2000-01-01", "2000-12-31", periods=21),
-    ],
-)
-@pytest.mark.parametrize(
-    "offset",
-    [
-        "10Y",
-        "6M",
-        "M",
-        "31D",
-        "0H",
-        "44640T",
-        "44640min",
-        "2678000S",
-        "2678000000L",
-        "2678000000ms",
-        "2678000000000U",
-        "2678000000000us",
-        "2678000000000000N",
-        "2678000000000000ns",
-    ],
-)
-def test_last(idx, offset):
-    p = pd.Series(range(len(idx)), dtype="int64", index=idx)
-    g = cudf.from_pandas(p)
-
-    with pytest.warns(FutureWarning):
-        expect = p.last(offset=offset)
-    with pytest.warns(FutureWarning):
-        got = g.last(offset=offset)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            "2020-01-31",
-            "2020-02-15",
-            "2020-02-29",
-            "2020-03-15",
-            "2020-03-31",
-            "2020-04-15",
-            "2020-04-30",
-        ],
-        [43534, 43543, 37897, 2000],
-    ],
-)
-@pytest.mark.parametrize("dtype", [None, "datetime64[ns]"])
-def test_datetime_constructor(data, dtype):
-    expected = pd.DatetimeIndex(data=data, dtype=dtype)
-    actual = cudf.DatetimeIndex(data=data, dtype=dtype)
-
-    assert_eq(expected, actual)
-
-    expected = pd.DatetimeIndex(data=pd.Series(data), dtype=dtype)
-    actual = cudf.DatetimeIndex(data=cudf.Series(data), dtype=dtype)
-
-    assert_eq(expected, actual)
-
-
 def test_datetime_binop_tz_timestamp(op):
     s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
     pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
@@ -2059,391 +465,6 @@ def test_datetime_series_cmpops_pandas_compatibility(op):
     assert_eq(expect, got)
 
 
-def test_datetime_getitem_na():
-    s = cudf.Series([1, 2, None, 3], dtype="datetime64[ns]")
-    assert s[2] is cudf.NaT
-
-
-def test_daterange_pandas_compatibility():
-    with cudf.option_context("mode.pandas_compatible", True):
-        expected = pd.date_range(
-            "2010-01-01", "2010-02-01", periods=10, name="times"
-        )
-        actual = cudf.date_range(
-            "2010-01-01", "2010-02-01", periods=10, name="times"
-        )
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data,dtype,freq",
-    [
-        ([10], "datetime64[ns]", "2ns"),
-        ([10, 12, 14, 16], "datetime64[ns]", "2ns"),
-        ([10, 11, 12, 13], "datetime64[ns]", "1ns"),
-        ([100, 200, 300, 400], "datetime64[s]", "100s"),
-        ([101, 201, 301, 401], "datetime64[ms]", "100ms"),
-    ],
-)
-def test_datetime_index_with_freq(data, dtype, freq):
-    actual = cudf.DatetimeIndex(data, dtype=dtype, freq=freq)
-    expected = pd.DatetimeIndex(data, dtype=dtype, freq=freq)
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "data,dtype,freq",
-    [
-        ([10, 1232, 13244, 13426], "datetime64[ns]", "2ns"),
-        ([10, 11, 12, 13], "datetime64[ns]", "1s"),
-        ([10000, 200, 300, 400], "datetime64[s]", "100s"),
-        ([107871, 201, 301, 401], "datetime64[ms]", "100ns"),
-    ],
-)
-def test_datetime_index_freq_error(data, dtype, freq):
-    assert_exceptions_equal(
-        pd.DatetimeIndex,
-        cudf.DatetimeIndex,
-        ([data], {"dtype": dtype, "freq": freq}),
-        ([data], {"dtype": dtype, "freq": freq}),
-    )
-
-
-def test_strings_with_utc_offset_not_implemented():
-    with pytest.raises(NotImplementedError):
-        DatetimeIndex(["2022-07-22 00:00:00+02:00"])
-
-
-@pytest.mark.parametrize("code", ["z", "Z"])
-def test_format_timezone_not_implemented(code):
-    with pytest.raises(NotImplementedError):
-        cudf.to_datetime(
-            ["2020-01-01 00:00:00 UTC"], format=f"%Y-%m-%d %H:%M:%S %{code}"
-        )
-
-
-@pytest.mark.parametrize("tz", ["UTC-3", "+01:00"])
-def test_utc_offset_not_implemented(tz):
-    with pytest.raises((NotImplementedError, ValueError)):
-        cudf.to_datetime([f"2020-01-01 00:00:00{tz}"])
-
-
-def test_Z_utc_offset():
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.to_datetime(["2020-01-01 00:00:00Z"])
-
-    result = cudf.to_datetime(["2020-01-01 00:00:00Z"])
-    expected = cudf.to_datetime(["2020-01-01 00:00:00"])
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("arg", [True, False])
-def test_args_not_datetime_typerror(arg):
-    with pytest.raises(TypeError):
-        cudf.to_datetime([arg])
-
-
-@pytest.mark.parametrize(
-    "data, dtype",
-    [
-        [
-            [
-                "2000-01-01 00:00:00.000000000",
-                "2000-01-01 00:00:00.000000000",
-                "2000-01-01 00:00:00.000000000",
-            ],
-            "datetime64[s]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.000000000",
-                None,
-                "2000-01-01 00:00:00.000000000",
-            ],
-            "datetime64[s]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.001000000",
-                "2000-01-01 00:00:00.000000000",
-                "2000-01-01 00:00:00.000000000",
-            ],
-            "datetime64[us]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.010000000",
-                "2000-01-01 00:00:00.020000000",
-                "2000-01-01 00:00:00.030000000",
-            ],
-            "datetime64[ms]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.010000000",
-                "2000-01-01 00:00:00.020000000",
-                None,
-            ],
-            "datetime64[ms]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.000001000",
-                "2000-01-01 00:00:00.000000000",
-                "2000-01-01 00:00:00.000004000",
-            ],
-            "datetime64[us]",
-        ],
-        [
-            [
-                None,
-                "2000-01-01 00:00:00.000000000",
-                "2000-01-01 00:00:00.000004000",
-            ],
-            "datetime64[us]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.000000010",
-                "2000-01-01 00:00:00.000000002",
-                "2000-01-01 00:00:00.000000000",
-            ],
-            "datetime64[ns]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:00.000000010",
-                None,
-                "2000-01-01 00:00:00.000000000",
-            ],
-            "datetime64[ns]",
-        ],
-        [
-            [
-                "2000-01-01 00:00:01.000000000",
-                "2000-01-01 00:00:40.000000000",
-                "2000-01-01 00:00:59.000000000",
-            ],
-            "datetime64[s]",
-        ],
-        [
-            [
-                "2000-01-01 00:10:00.000000000",
-                "2000-01-01 00:30:40.000000000",
-                "2000-01-01 00:59:00.000000000",
-            ],
-            "datetime64[s]",
-        ],
-        [
-            [
-                "2000-01-01 07:00:00.000000000",
-                "2000-01-01 08:00:00.000000000",
-                None,
-            ],
-            "datetime64[s]",
-        ],
-        [[None, None, None], "datetime64[s]"],
-        [[], "datetime64[s]"],
-        [
-            [
-                "2000-01-01 00:10:00.123456789",
-                "2000-01-01 00:30:40.123123456",
-                "2000-01-01 00:59:00.675347634",
-            ],
-            "datetime64[ns]",
-        ],
-    ],
-)
-def test_datetime_to_str(data, dtype):
-    gs = cudf.Series(data, dtype=dtype)
-    ps = gs.to_pandas()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = gs.astype("str")
-
-    expected = ps.astype("string")
-
-    assert_eq(actual.to_pandas(nullable=True), expected)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_datetime_string_to_datetime_resolution_loss_raises():
-    data = ["2020-01-01 00:00:00.00001"]
-    dtype = "datetime64[s]"
-    with pytest.raises(ValueError):
-        cudf.Series(data, dtype=dtype)
-    with pytest.raises(ValueError):
-        pd.Series(data, dtype=dtype)
-
-
-def test_dateimeindex_from_noniso_string():
-    data = ["20160920", "20160925"]
-    gdti = cudf.DatetimeIndex(data)
-    pdti = pd.DatetimeIndex(data)
-
-    assert_eq(gdti, pdti)
-
-
-@pytest.mark.parametrize("errors", ["coerce", "ignore"])
-def test_to_datetime_errors_non_scalar_not_implemented(errors):
-    with pytest.raises(NotImplementedError):
-        cudf.to_datetime([1, ""], unit="s", errors=errors)
-
-
-@pytest.mark.parametrize(
-    "freqstr",
-    [
-        "H",
-        "N",
-        "T",
-        "L",
-        "U",
-        "S",
-    ],
-)
-def test_datetime_raise_warning(freqstr):
-    t = cudf.Series(
-        ["2001-01-01 00:04:45", "2001-01-01 00:04:58", "2001-01-01 00:05:04"],
-        dtype="datetime64[ns]",
-    )
-    with pytest.warns(FutureWarning):
-        t.dt.ceil(freqstr)
-
-
-def test_timezone_pyarrow_array():
-    pa_array = pa.array(
-        [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)],
-        type=pa.timestamp("ns", "UTC"),
-    )
-    result = cudf.Series(pa_array)
-    expected = pa_array.to_pandas()
-    assert_eq(result, expected)
-
-
-def test_to_datetime_errors_ignore_deprecated():
-    with pytest.warns(FutureWarning):
-        cudf.to_datetime("2001-01-01 00:04:45", errors="ignore")
-
-
-def test_date_range_freq_default():
-    result = pd.date_range("2020-01-01", periods=2, name="foo")
-    expected = cudf.date_range("2020-01-01", periods=2, name="foo")
-    assert_eq(result, expected)
-
-
-def test_date_range_tz():
-    result = pd.date_range("2020-01-01", periods=2, tz="UTC")
-    expected = cudf.date_range("2020-01-01", periods=2, tz="UTC")
-    assert_eq(result, expected)
-
-    result = pd.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
-    expected = cudf.date_range("2020-01-01", "2020-01-02", periods=2, tz="UTC")
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("meth", ["day_name", "month_name"])
-@pytest.mark.parametrize("klass", [pd.Series, pd.DatetimeIndex])
-def test_day_month_name(meth, klass):
-    data = [
-        "2020-05-31 08:00:00",
-        None,
-        "1999-12-31 18:40:00",
-        "2000-12-31 04:00:00",
-        None,
-        "1900-02-28 07:00:00",
-        "1800-03-14 07:30:00",
-        "2100-03-14 07:30:00",
-        "1970-01-01 00:00:00",
-        "1969-12-31 12:59:00",
-    ]
-
-    p_obj = klass(data, dtype="datetime64[s]")
-    g_obj = cudf.from_pandas(p_obj)
-
-    if klass is pd.Series:
-        p_obj = p_obj.dt
-        g_obj = g_obj.dt
-
-    expect = getattr(p_obj, meth)()
-    got = getattr(g_obj, meth)()
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("meth", ["day_name", "month_name"])
-@pytest.mark.parametrize("klass", [cudf.Series, cudf.DatetimeIndex])
-def test_day_month_name_locale_not_implemented(meth, klass):
-    obj = klass(cudf.date_range("2020-01-01", periods=7))
-    if klass is cudf.Series:
-        obj = obj.dt
-    with pytest.raises(NotImplementedError):
-        getattr(obj, meth)(locale="pt_BR.utf8")
-
-
-@pytest.mark.parametrize(
-    "attr",
-    [
-        "is_month_start",
-        "is_month_end",
-        "is_quarter_end",
-        "is_quarter_start",
-        "is_year_end",
-        "is_year_start",
-        "days_in_month",
-        "timetz",
-        "time",
-        "date",
-    ],
-)
-def test_dti_datetime_attributes(attr):
-    data = [
-        "2020-01-01",
-        "2020-01-31",
-        "2020-03-01",
-        "2020-03-31",
-        "2020-03-31",
-        "2020-12-31",
-        None,
-    ]
-    pd_dti = pd.DatetimeIndex(data, name="foo")
-    cudf_dti = cudf.from_pandas(pd_dti)
-
-    result = getattr(cudf_dti, attr)
-    expected = getattr(pd_dti, attr)
-    if isinstance(result, np.ndarray):
-        # numpy doesn't assert object arrays with NaT correctly
-        tm.assert_numpy_array_equal(result, expected)
-    else:
-        assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("attr", ["freq", "unit"])
-def test_dti_properties(attr):
-    pd_dti = pd.DatetimeIndex(
-        ["2020-01-01", "2020-01-02"], dtype="datetime64[ns]"
-    )
-    cudf_dti = cudf.DatetimeIndex(
-        ["2020-01-01", "2020-01-02"], dtype="datetime64[ns]"
-    )
-
-    result = getattr(cudf_dti, attr)
-    expected = getattr(pd_dti, attr)
-    assert result == expected
-
-
-def test_dti_asi8():
-    pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
-    cudf_dti = cudf.from_pandas(pd_dti)
-
-    result = pd_dti.asi8
-    expected = cudf_dti.asi8
-    assert_eq(result, expected)
-
-
 @pytest.mark.parametrize(
     "method, kwargs",
     [["mean", {}], ["std", {}], ["std", {"ddof": 0}]],
@@ -2455,40 +476,3 @@ def test_dti_reduction(method, kwargs):
     result = getattr(cudf_dti, method)(**kwargs)
     expected = getattr(pd_dti, method)(**kwargs)
     assert result == expected
-
-
-@pytest.mark.parametrize(
-    "method, kwargs",
-    [
-        ["to_pydatetime", {}],
-        ["to_period", {"freq": "D"}],
-        ["strftime", {"date_format": "%Y-%m-%d"}],
-    ],
-)
-def test_dti_methods(method, kwargs):
-    pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
-    cudf_dti = cudf.from_pandas(pd_dti)
-
-    result = getattr(cudf_dti, method)(**kwargs)
-    expected = getattr(pd_dti, method)(**kwargs)
-    assert_eq(result, expected)
-
-
-def test_date_range_start_end_divisible_by_freq():
-    result = cudf.date_range("2011-01-01", "2011-01-02", freq="h")
-    expected = pd.date_range("2011-01-01", "2011-01-02", freq="h")
-    assert_eq(result, expected)
-
-
-def test_writable_numpy_array():
-    gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]")
-    expected_flags = pd.Index(
-        [1, 2, 3], dtype="datetime64[ns]"
-    )._data._ndarray.flags
-
-    actual_flags = gi.to_pandas()._data._ndarray.flags
-    assert expected_flags.c_contiguous == actual_flags.c_contiguous
-    assert expected_flags.f_contiguous == actual_flags.f_contiguous
-    assert expected_flags.writeable == actual_flags.writeable
-    assert expected_flags.aligned == actual_flags.aligned
-    assert expected_flags.writebackifcopy == actual_flags.writebackifcopy

From bc9d4bb91b7200578980461b70db53f3109a2958 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 11 Aug 2025 19:25:59 -0400
Subject: [PATCH 099/366] Skip managed memory test if managed memory not
 supported in cudf-polars (#19653)

Skips the test: `test_cudf_polars_enable_disable_managed_memory` if managed memory is not supported.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19653
---
 python/cudf_polars/tests/test_config.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py
index 52651fbe5c8..3de18e0a4e4 100644
--- a/python/cudf_polars/tests/test_config.py
+++ b/python/cudf_polars/tests/test_config.py
@@ -10,6 +10,7 @@
 import polars as pl
 from polars.testing.asserts import assert_frame_equal
 
+import pylibcudf as plc
 import rmm
 
 import cudf_polars.utils.config
@@ -83,17 +84,21 @@ def test_invalid_memory_resource_raises(mr):
             q.collect(engine=pl.GPUEngine(memory_resource=mr))
 
 
-@pytest.mark.parametrize("disable_managed_memory", ["1", "0"])
-def test_cudf_polars_enable_disable_managed_memory(monkeypatch, disable_managed_memory):
+@pytest.mark.skipif(
+    not plc.utils._is_concurrent_managed_access_supported(),
+    reason="managed memory not supported",
+)
+@pytest.mark.parametrize("enable_managed_memory", ["1", "0"])
+def test_cudf_polars_enable_disable_managed_memory(monkeypatch, enable_managed_memory):
     q = pl.LazyFrame({"a": [1, 2, 3]})
 
     with monkeypatch.context() as monkeycontext:
         monkeycontext.setenv(
-            "POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", disable_managed_memory
+            "POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", enable_managed_memory
         )
         result = q.collect(engine=pl.GPUEngine())
-        mr = default_memory_resource(0, bool(disable_managed_memory == "1"))
-        if disable_managed_memory == "1":
+        mr = default_memory_resource(0, bool(enable_managed_memory == "1"))
+        if enable_managed_memory == "1":
             assert isinstance(mr, rmm.mr.PrefetchResourceAdaptor)
             assert isinstance(mr.upstream_mr, rmm.mr.PoolMemoryResource)
         else:

From 1d6aaf12bcc5a57bff3c0cd882c2019425b63279 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Aug 2025 17:02:13 -0700
Subject: [PATCH 100/366] Add streams support to datetime APIs (#19654)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19654
---
 python/pylibcudf/pylibcudf/datetime.pxd       | 24 +++--
 python/pylibcudf/pylibcudf/datetime.pyi       | 30 ++++---
 python/pylibcudf/pylibcudf/datetime.pyx       | 87 ++++++++++++-------
 .../pylibcudf/pylibcudf/libcudf/datetime.pxd  | 34 +++++---
 4 files changed, 116 insertions(+), 59 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
index ce295990d26..7e92acaaff8 100644
--- a/python/pylibcudf/pylibcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/datetime.pxd
@@ -3,6 +3,7 @@
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -10,35 +11,40 @@ ctypedef fused ColumnOrScalar:
 
 cpdef Column extract_datetime_component(
     Column input,
-    datetime_component component
+    datetime_component component,
+    Stream stream = *
 )
 
 cpdef Column ceil_datetimes(
     Column input,
-    rounding_frequency freq
+    rounding_frequency freq,
+    Stream stream = *
 )
 
 cpdef Column floor_datetimes(
     Column input,
-    rounding_frequency freq
+    rounding_frequency freq,
+    Stream stream = *
 )
 
 cpdef Column round_datetimes(
     Column input,
-    rounding_frequency freq
+    rounding_frequency freq,
+    Stream stream = *
 )
 
 cpdef Column add_calendrical_months(
     Column timestamps,
     ColumnOrScalar months,
+    Stream stream = *
 )
 
-cpdef Column day_of_year(Column input)
+cpdef Column day_of_year(Column input, Stream stream = *)
 
-cpdef Column is_leap_year(Column input)
+cpdef Column is_leap_year(Column input, Stream stream = *)
 
-cpdef Column last_day_of_month(Column input)
+cpdef Column last_day_of_month(Column input, Stream stream = *)
 
-cpdef Column extract_quarter(Column input)
+cpdef Column extract_quarter(Column input, Stream stream = *)
 
-cpdef Column days_in_month(Column input)
+cpdef Column days_in_month(Column input, Stream stream = *)
diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi
index 8eedaeefe61..3b464d3bf11 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyi
+++ b/python/pylibcudf/pylibcudf/datetime.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
@@ -27,16 +29,24 @@ class RoundingFrequency(IntEnum):
     NANOSECOND = ...
 
 def extract_datetime_component(
-    input: Column, component: DatetimeComponent
+    input: Column, component: DatetimeComponent, stream: Stream | None = None
+) -> Column: ...
+def ceil_datetimes(
+    input: Column, freq: RoundingFrequency, stream: Stream | None = None
+) -> Column: ...
+def floor_datetimes(
+    input: Column, freq: RoundingFrequency, stream: Stream | None = None
+) -> Column: ...
+def round_datetimes(
+    input: Column, freq: RoundingFrequency, stream: Stream | None = None
 ) -> Column: ...
-def ceil_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
-def floor_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
-def round_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
 def add_calendrical_months(
-    input: Column, months: Column | Scalar
+    input: Column, months: Column | Scalar, stream: Stream | None = None
+) -> Column: ...
+def day_of_year(input: Column, stream: Stream | None = None) -> Column: ...
+def is_leap_year(input: Column, stream: Stream | None = None) -> Column: ...
+def last_day_of_month(
+    input: Column, stream: Stream | None = None
 ) -> Column: ...
-def day_of_year(input: Column) -> Column: ...
-def is_leap_year(input: Column) -> Column: ...
-def last_day_of_month(input: Column) -> Column: ...
-def extract_quarter(input: Column) -> Column: ...
-def days_in_month(input: Column) -> Column: ...
+def extract_quarter(input: Column, stream: Stream | None = None) -> Column: ...
+def days_in_month(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index da736755848..0aa13917c97 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -23,8 +23,11 @@ from pylibcudf.libcudf.datetime import \
     rounding_frequency as RoundingFrequency  # no-cython-lint
 
 from cython.operator cimport dereference
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
+from .scalar cimport Scalar
+from .utils cimport _get_stream
 
 __all__ = [
     "DatetimeComponent",
@@ -43,7 +46,8 @@ __all__ = [
 
 cpdef Column extract_datetime_component(
     Column input,
-    datetime_component component
+    datetime_component component,
+    Stream stream=None
 ):
     """
     Extract a datetime component from a datetime column.
@@ -64,13 +68,16 @@ cpdef Column extract_datetime_component(
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_extract_datetime_component(input.view(), component)
-    return Column.from_libcudf(move(result))
+        result = cpp_extract_datetime_component(input.view(), component, stream.view())
+    return Column.from_libcudf(move(result), stream)
 
 cpdef Column ceil_datetimes(
     Column input,
-    rounding_frequency freq
+    rounding_frequency freq,
+    Stream stream=None
 ):
     """
     Round datetimes up to the nearest multiple of the given frequency.
@@ -91,13 +98,16 @@ cpdef Column ceil_datetimes(
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_ceil_datetimes(input.view(), freq)
-    return Column.from_libcudf(move(result))
+        result = cpp_ceil_datetimes(input.view(), freq, stream.view())
+    return Column.from_libcudf(move(result), stream)
 
 cpdef Column floor_datetimes(
     Column input,
-    rounding_frequency freq
+    rounding_frequency freq,
+    Stream stream=None
 ):
     """
     Round datetimes down to the nearest multiple of the given frequency.
@@ -118,13 +128,16 @@ cpdef Column floor_datetimes(
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_floor_datetimes(input.view(), freq)
-    return Column.from_libcudf(move(result))
+        result = cpp_floor_datetimes(input.view(), freq, stream.view())
+    return Column.from_libcudf(move(result), stream)
 
 cpdef Column round_datetimes(
     Column input,
-    rounding_frequency freq
+    rounding_frequency freq,
+    Stream stream=None
 ):
     """
     Round datetimes to the nearest multiple of the given frequency.
@@ -145,13 +158,16 @@ cpdef Column round_datetimes(
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_round_datetimes(input.view(), freq)
-    return Column.from_libcudf(move(result))
+        result = cpp_round_datetimes(input.view(), freq, stream.view())
+    return Column.from_libcudf(move(result), stream)
 
 cpdef Column add_calendrical_months(
     Column input,
     ColumnOrScalar months,
+    Stream stream=None
 ):
     """
     Adds or subtracts a number of months from the datetime
@@ -177,15 +193,18 @@ cpdef Column add_calendrical_months(
 
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
         result = cpp_add_calendrical_months(
             input.view(),
             months.view() if ColumnOrScalar is Column else
-            dereference(months.get())
+            dereference(months.get()),
+            stream.view()
         )
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
-cpdef Column day_of_year(Column input):
+cpdef Column day_of_year(Column input, Stream stream=None):
     """
     Computes the day number since the start of
     the year from the datetime. The value is between
@@ -205,11 +224,13 @@ cpdef Column day_of_year(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_day_of_year(input.view())
-    return Column.from_libcudf(move(result))
+        result = cpp_day_of_year(input.view(), stream.view())
+    return Column.from_libcudf(move(result), stream)
 
-cpdef Column is_leap_year(Column input):
+cpdef Column is_leap_year(Column input, Stream stream=None):
     """
     Check if the year of the given date is a leap year.
 
@@ -228,11 +249,13 @@ cpdef Column is_leap_year(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_is_leap_year(input.view())
-    return Column.from_libcudf(move(result))
+        result = cpp_is_leap_year(input.view(), stream.view())
+    return Column.from_libcudf(move(result), stream)
 
-cpdef Column last_day_of_month(Column input):
+cpdef Column last_day_of_month(Column input, Stream stream=None):
     """
     Computes the last day of the month.
 
@@ -251,11 +274,13 @@ cpdef Column last_day_of_month(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_last_day_of_month(input.view())
-    return Column.from_libcudf(move(result))
+        result = cpp_last_day_of_month(input.view(), stream.view())
+    return Column.from_libcudf(move(result), stream)
 
-cpdef Column extract_quarter(Column input):
+cpdef Column extract_quarter(Column input, Stream stream=None):
     """
     Returns the quarter (ie. a value from {1, 2, 3, 4})
     that the date is in.
@@ -274,11 +299,13 @@ cpdef Column extract_quarter(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_extract_quarter(input.view())
-    return Column.from_libcudf(move(result))
+        result = cpp_extract_quarter(input.view(), stream.view())
+    return Column.from_libcudf(move(result), stream)
 
-cpdef Column days_in_month(Column input):
+cpdef Column days_in_month(Column input, Stream stream=None):
     """
     Extract the number of days in the month.
 
@@ -296,9 +323,11 @@ cpdef Column days_in_month(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_days_in_month(input.view())
-    return Column.from_libcudf(move(result))
+        result = cpp_days_in_month(input.view(), stream.view())
+    return Column.from_libcudf(move(result), stream)
 
 DatetimeComponent.__str__ = DatetimeComponent.__repr__
 RoundingFrequency.__str__ = RoundingFrequency.__repr__
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index 7dacab668b6..25ee571044a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -6,6 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
@@ -23,7 +24,8 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
 
     cdef unique_ptr[column] extract_datetime_component(
         const column_view& column,
-        datetime_component component
+        datetime_component component,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cpdef enum class rounding_frequency(int32_t):
@@ -36,35 +38,45 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
         NANOSECOND
 
     cdef unique_ptr[column] ceil_datetimes(
-        const column_view& column, rounding_frequency freq
+        const column_view& column, rounding_frequency freq,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] floor_datetimes(
-        const column_view& column, rounding_frequency freq
+        const column_view& column, rounding_frequency freq,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] round_datetimes(
-        const column_view& column, rounding_frequency freq
+        const column_view& column, rounding_frequency freq,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] add_calendrical_months(
         const column_view& timestamps,
-        const column_view& months
+        const column_view& months,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] add_calendrical_months(
         const column_view& timestamps,
-        const scalar& months
+        const scalar& months,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] day_of_year(
-        const column_view& column
+        const column_view& column,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] is_leap_year(
-        const column_view& column
+        const column_view& column,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] last_day_of_month(
-        const column_view& column
+        const column_view& column,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] extract_quarter(
-        const column_view& column
+        const column_view& column,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] days_in_month(
-        const column_view& column
+        const column_view& column,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler

From e7482c1f5a19f2783103de323be0ef148070bb3b Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 11 Aug 2025 20:20:06 -0400
Subject: [PATCH 101/366] Use rapids_cuda_enable_fatbin_compression (#19650)

Standardize compression flags via rapids_cuda_enable_fatbin_compression

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/19650
---
 cpp/cmake/Modules/ConfigureCUDA.cmake | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
index a6987803c3b..e0c5cedf2ee 100644
--- a/cpp/cmake/Modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -37,13 +37,8 @@ if(DISABLE_DEPRECATION_WARNINGS)
 endif()
 
 # make sure we produce smallest binary size
-list(APPEND CUDF_CUDA_FLAGS -Xfatbin=-compress-all)
-if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
-   AND (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION
-                                                                   VERSION_LESS 13.0)
-)
-  list(APPEND CUDF_CUDA_FLAGS -Xfatbin=--compress-level=3)
-endif()
+include(${rapids-cmake-dir}/cuda/enable_fatbin_compression.cmake)
+rapids_cuda_enable_fatbin_compression(VARIABLE CUDF_CUDA_FLAGS TUNE_FOR rapids)
 
 # Option to enable line info in CUDA device compilation to allow introspection when profiling /
 # memchecking

From c867802d8c640e91756309fe0419212955259077 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Aug 2025 17:40:28 -0700
Subject: [PATCH 102/366] Add streams to transform and unary (#19613)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19613
---
 .../pylibcudf/pylibcudf/libcudf/transform.pxd | 29 ++++--
 python/pylibcudf/pylibcudf/libcudf/unary.pxd  | 19 ++--
 python/pylibcudf/pylibcudf/transform.pxd      | 21 +++--
 python/pylibcudf/pylibcudf/transform.pyi      | 27 ++++--
 python/pylibcudf/pylibcudf/transform.pyx      | 93 ++++++++++++++-----
 python/pylibcudf/pylibcudf/unary.pxd          | 15 +--
 python/pylibcudf/pylibcudf/unary.pyi          | 18 ++--
 python/pylibcudf/pylibcudf/unary.pyx          | 50 ++++++----
 8 files changed, 191 insertions(+), 81 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
index 9be137a077e..ce55a4a841a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
+from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -13,43 +14,53 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
     cdef pair[unique_ptr[device_buffer], size_type] bools_to_mask (
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] mask_to_bools (
-        bitmask_type* bitmask, size_type begin_bit, size_type end_bit
+        bitmask_type* bitmask, size_type begin_bit, size_type end_bit,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[device_buffer], size_type] nans_to_nulls(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] compute_column(
         table_view table,
-        expression expr
+        expression expr,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] transform(
         const vector[column_view] & inputs,
         const string & transform_udf,
         data_type output_type,
-        bool is_ptx
+        bool is_ptx,
+        optional[void *] user_data,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[table], unique_ptr[column]] encode(
-        table_view input
+        table_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[unique_ptr[column], table_view] one_hot_encode(
         column_view input_column,
-        column_view categories
-    ) except +
+        column_view categories,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] compute_column(
         const table_view table,
-        const expression& expr
+        const expression& expr,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/unary.pxd b/python/pylibcudf/pylibcudf/libcudf/unary.pxd
index 5027e528660..3a68b948fea 100644
--- a/python/pylibcudf/pylibcudf/libcudf/unary.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/unary.pxd
@@ -6,6 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/unary.hpp" namespace "cudf" nogil:
@@ -38,21 +39,27 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil:
 
     cdef extern unique_ptr[column] unary_operation(
         column_view input,
-        unary_operator op) except +libcudf_exception_handler
+        unary_operator op,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef extern unique_ptr[column] is_null(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef extern unique_ptr[column] is_valid(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef extern unique_ptr[column] cast(
         column_view input,
-        data_type out_type) except +libcudf_exception_handler
+        data_type out_type,
+        cuda_stream_view stream) except +libcudf_exception_handler
     cdef extern bool is_supported_cast(data_type from_, data_type to) noexcept
     cdef extern unique_ptr[column] is_nan(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef extern unique_ptr[column] is_not_nan(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd
index 45f79158055..09fc4cf72bd 100644
--- a/python/pylibcudf/pylibcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/transform.pxd
@@ -1,6 +1,7 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from pylibcudf.libcudf.types cimport bitmask_type, data_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .expressions cimport Expression
@@ -9,19 +10,25 @@ from .table cimport Table
 from .types cimport DataType
 
 
-cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input)
+cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input, Stream stream = *)
 
-cpdef Column compute_column(Table input, Expression expr)
+cpdef Column compute_column(Table input, Expression expr, Stream stream = *)
 
-cpdef tuple[gpumemoryview, int] bools_to_mask(Column input)
+cpdef tuple[gpumemoryview, int] bools_to_mask(Column input, Stream stream = *)
 
-cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit)
+cpdef Column mask_to_bools(
+    Py_ssize_t bitmask,
+    int begin_bit,
+    int end_bit,
+    Stream stream = *,
+)
 
 cpdef Column transform(list[Column] inputs,
                        str transform_udf,
                        DataType output_type,
-                       bool is_ptx)
+                       bool is_ptx,
+                       Stream stream = *)
 
-cpdef tuple[Table, Column] encode(Table input)
+cpdef tuple[Table, Column] encode(Table input, Stream stream = *)
 
-cpdef Table one_hot_encode(Column input_column, Column categories)
+cpdef Table one_hot_encode(Column input_column, Column categories, Stream stream = *)
diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi
index ff7c43115bd..b38d19d732a 100644
--- a/python/pylibcudf/pylibcudf/transform.pyi
+++ b/python/pylibcudf/pylibcudf/transform.pyi
@@ -1,19 +1,34 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
 from pylibcudf.gpumemoryview import gpumemoryview
 from pylibcudf.table import Table
 from pylibcudf.types import DataType
 
-def nans_to_nulls(input: Column) -> tuple[gpumemoryview, int]: ...
-def compute_column(input: Table, expr: Expression) -> Column: ...
-def bools_to_mask(input: Column) -> tuple[gpumemoryview, int]: ...
-def mask_to_bools(bitmask: int, begin_bit: int, end_bit: int) -> Column: ...
+def nans_to_nulls(
+    input: Column, stream: Stream | None = None
+) -> tuple[gpumemoryview, int]: ...
+def compute_column(
+    input: Table, expr: Expression, stream: Stream | None = None
+) -> Column: ...
+def bools_to_mask(
+    input: Column, stream: Stream | None = None
+) -> tuple[gpumemoryview, int]: ...
+def mask_to_bools(
+    bitmask: int, begin_bit: int, end_bit: int, stream: Stream | None = None
+) -> Column: ...
 def transform(
     inputs: list[Column],
     transform_udf: str,
     output_type: DataType,
     is_ptx: bool,
+    stream: Stream | None = None,
 ) -> Column: ...
-def encode(input: Table) -> tuple[Table, Column]: ...
-def one_hot_encode(input: Column, categories: Column) -> Table: ...
+def encode(
+    input: Table, stream: Stream | None = None
+) -> tuple[Table, Column]: ...
+def one_hot_encode(
+    input: Column, categories: Column, stream: Stream | None = None
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
index a06e6e43dd8..0581bef2b19 100644
--- a/python/pylibcudf/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -3,6 +3,7 @@
 from cython.operator cimport dereference
 
 from libcpp.memory cimport unique_ptr
+from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 from libcpp.utility cimport move, pair
@@ -16,10 +17,12 @@ from pylibcudf.libcudf.types cimport bitmask_type, size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview
 from .types cimport DataType
+from .utils cimport _get_stream
 
 __all__ = [
     "bools_to_mask",
@@ -31,7 +34,10 @@ __all__ = [
     "transform",
 ]
 
-cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input):
+cpdef tuple[gpumemoryview, int] nans_to_nulls(
+    Column input,
+    Stream stream=None,
+):
     """Create a null mask preserving existing nulls and converting nans to null.
 
     For details, see :cpp:func:`nans_to_nulls`.
@@ -47,16 +53,18 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input):
     """
     cdef pair[unique_ptr[device_buffer], size_type] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_transform.nans_to_nulls(input.view())
+        c_result = cpp_transform.nans_to_nulls(input.view(), stream.view())
 
     return (
-        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first))),
+        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream)),
         c_result.second
     )
 
 
-cpdef Column compute_column(Table input, Expression expr):
+cpdef Column compute_column(Table input, Expression expr, Stream stream=None):
     """Create a column by evaluating an expression on a table.
 
     For details see :cpp:func:`compute_column`.
@@ -74,15 +82,20 @@ cpdef Column compute_column(Table input, Expression expr):
     """
     cdef unique_ptr[column] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_transform.compute_column(
-            input.view(), dereference(expr.c_obj.get())
+            input.view(), dereference(expr.c_obj.get()), stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef tuple[gpumemoryview, int] bools_to_mask(Column input):
+cpdef tuple[gpumemoryview, int] bools_to_mask(
+    Column input,
+    Stream stream=None,
+):
     """Create a bitmask from a column of boolean elements
 
     Parameters
@@ -97,16 +110,23 @@ cpdef tuple[gpumemoryview, int] bools_to_mask(Column input):
     """
     cdef pair[unique_ptr[device_buffer], size_type] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_transform.bools_to_mask(input.view())
+        c_result = cpp_transform.bools_to_mask(input.view(), stream.view())
 
     return (
-        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first))),
+        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream)),
         c_result.second
     )
 
 
-cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit):
+cpdef Column mask_to_bools(
+    Py_ssize_t bitmask,
+    int begin_bit,
+    int end_bit,
+    Stream stream=None,
+):
     """Creates a boolean column from given bitmask.
 
     Parameters
@@ -126,16 +146,24 @@ cpdef Column mask_to_bools(Py_ssize_t bitmask, int begin_bit, int end_bit):
     cdef unique_ptr[column] c_result
     cdef bitmask_type * bitmask_ptr = <bitmask_type*>bitmask
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_transform.mask_to_bools(bitmask_ptr, begin_bit, end_bit)
+        c_result = cpp_transform.mask_to_bools(
+            bitmask_ptr,
+            begin_bit,
+            end_bit,
+            stream.view(),
+        )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column transform(list[Column] inputs,
                        str transform_udf,
                        DataType output_type,
-                       bool is_ptx):
+                       bool is_ptx,
+                       Stream stream=None):
     """Create a new column by applying a transform function against
        multiple input columns.
 
@@ -160,18 +188,26 @@ cpdef Column transform(list[Column] inputs,
     cdef unique_ptr[column] c_result
     cdef string c_transform_udf = transform_udf.encode()
     cdef bool c_is_ptx = is_ptx
+    cdef optional[void *] user_data
+
+    stream = _get_stream(stream)
 
     for input in inputs:
         c_inputs.push_back((<Column?>input).view())
 
     with nogil:
         c_result = cpp_transform.transform(
-            c_inputs, c_transform_udf, output_type.c_obj, c_is_ptx
+            c_inputs,
+            c_transform_udf,
+            output_type.c_obj,
+            c_is_ptx,
+            user_data,
+            stream.view(),
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef tuple[Table, Column] encode(Table input):
+cpdef tuple[Table, Column] encode(Table input, Stream stream=None):
     """Encode the rows of the given table as integers.
 
     Parameters
@@ -187,15 +223,21 @@ cpdef tuple[Table, Column] encode(Table input):
     """
     cdef pair[unique_ptr[table], unique_ptr[column]] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_transform.encode(input.view())
+        c_result = cpp_transform.encode(input.view(), stream.view())
 
     return (
-        Table.from_libcudf(move(c_result.first)),
-        Column.from_libcudf(move(c_result.second))
+        Table.from_libcudf(move(c_result.first), stream),
+        Column.from_libcudf(move(c_result.second), stream)
     )
 
-cpdef Table one_hot_encode(Column input, Column categories):
+cpdef Table one_hot_encode(
+    Column input,
+    Column categories,
+    Stream stream=None,
+):
     """Encodes `input` by generating a new column
     for each value in `categories` indicating the presence
     of that value in `input`.
@@ -215,11 +257,18 @@ cpdef Table one_hot_encode(Column input, Column categories):
     cdef pair[unique_ptr[column], table_view] c_result
     cdef Table owner_table
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_transform.one_hot_encode(input.view(), categories.view())
+        c_result = cpp_transform.one_hot_encode(
+            input.view(),
+            categories.view(),
+            stream.view(),
+        )
 
     owner_table = Table(
-        [Column.from_libcudf(move(c_result.first))] * c_result.second.num_columns()
+        [Column.from_libcudf(move(c_result.first), stream)]
+        * c_result.second.num_columns()
     )
 
     return Table.from_table_view(c_result.second, owner_table)
diff --git a/python/pylibcudf/pylibcudf/unary.pxd b/python/pylibcudf/pylibcudf/unary.pxd
index 9ee08653599..b2414458d1c 100644
--- a/python/pylibcudf/pylibcudf/unary.pxd
+++ b/python/pylibcudf/pylibcudf/unary.pxd
@@ -1,22 +1,23 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from pylibcudf.libcudf.unary cimport unary_operator
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .types cimport DataType
 
 
-cpdef Column unary_operation(Column input, unary_operator op)
+cpdef Column unary_operation(Column input, unary_operator op, Stream stream = *)
 
-cpdef Column is_null(Column input)
+cpdef Column is_null(Column input, Stream stream = *)
 
-cpdef Column is_valid(Column input)
+cpdef Column is_valid(Column input, Stream stream = *)
 
-cpdef Column cast(Column input, DataType data_type)
+cpdef Column cast(Column input, DataType data_type, Stream stream = *)
 
-cpdef Column is_nan(Column input)
+cpdef Column is_nan(Column input, Stream stream = *)
 
-cpdef Column is_not_nan(Column input)
+cpdef Column is_not_nan(Column input, Stream stream = *)
 
 cpdef bool is_supported_cast(DataType from_, DataType to)
diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi
index 4959e163125..28e0bb59327 100644
--- a/python/pylibcudf/pylibcudf/unary.pyi
+++ b/python/pylibcudf/pylibcudf/unary.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
@@ -31,10 +33,14 @@ class UnaryOperator(IntEnum):
     NOT = ...
     NEGATE = ...
 
-def unary_operation(input: Column, op: UnaryOperator) -> Column: ...
-def is_null(input: Column) -> Column: ...
-def is_valid(input: Column) -> Column: ...
-def cast(input: Column, data_type: DataType) -> Column: ...
-def is_nan(input: Column) -> Column: ...
-def is_not_nan(input: Column) -> Column: ...
+def unary_operation(
+    input: Column, op: UnaryOperator, stream: Stream | None = None
+) -> Column: ...
+def is_null(input: Column, stream: Stream | None = None) -> Column: ...
+def is_valid(input: Column, stream: Stream | None = None) -> Column: ...
+def cast(
+    input: Column, data_type: DataType, stream: Stream | None = None
+) -> Column: ...
+def is_nan(input: Column, stream: Stream | None = None) -> Column: ...
+def is_not_nan(input: Column, stream: Stream | None = None) -> Column: ...
 def is_supported_cast(from_: DataType, to: DataType) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx
index 3915ed8274a..e68b780fdc1 100644
--- a/python/pylibcudf/pylibcudf/unary.pyx
+++ b/python/pylibcudf/pylibcudf/unary.pyx
@@ -6,12 +6,14 @@ from libcpp.utility cimport move
 from pylibcudf.libcudf cimport unary as cpp_unary
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.unary cimport unary_operator
+from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.libcudf.unary import \
     unary_operator as UnaryOperator  # no-cython-lint
 
 from .column cimport Column
 from .types cimport DataType
+from .utils cimport _get_stream
 
 __all__ = [
     "UnaryOperator",
@@ -24,7 +26,7 @@ __all__ = [
     "unary_operation",
 ]
 
-cpdef Column unary_operation(Column input, unary_operator op):
+cpdef Column unary_operation(Column input, unary_operator op, Stream stream=None):
     """Perform a unary operation on a column.
 
     For details, see :cpp:func:`unary_operation`.
@@ -43,13 +45,15 @@ cpdef Column unary_operation(Column input, unary_operator op):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_unary.unary_operation(input.view(), op)
+        result = cpp_unary.unary_operation(input.view(), op, stream.view())
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef Column is_null(Column input):
+cpdef Column is_null(Column input, Stream stream=None):
     """Check whether elements of a column are null.
 
     For details, see :cpp:func:`is_null`.
@@ -66,13 +70,15 @@ cpdef Column is_null(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_unary.is_null(input.view())
+        result = cpp_unary.is_null(input.view(), stream.view())
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef Column is_valid(Column input):
+cpdef Column is_valid(Column input, Stream stream=None):
     """Check whether elements of a column are valid.
 
     For details, see :cpp:func:`is_valid`.
@@ -89,13 +95,15 @@ cpdef Column is_valid(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_unary.is_valid(input.view())
+        result = cpp_unary.is_valid(input.view(), stream.view())
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef Column cast(Column input, DataType data_type):
+cpdef Column cast(Column input, DataType data_type, Stream stream=None):
     """Cast a column to a different data type.
 
     For details, see :cpp:func:`cast`.
@@ -114,13 +122,15 @@ cpdef Column cast(Column input, DataType data_type):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_unary.cast(input.view(), data_type.c_obj)
+        result = cpp_unary.cast(input.view(), data_type.c_obj, stream.view())
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef Column is_nan(Column input):
+cpdef Column is_nan(Column input, Stream stream=None):
     """Check whether elements of a column are nan.
 
     For details, see :cpp:func:`is_nan`.
@@ -137,13 +147,15 @@ cpdef Column is_nan(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_unary.is_nan(input.view())
+        result = cpp_unary.is_nan(input.view(), stream.view())
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef Column is_not_nan(Column input):
+cpdef Column is_not_nan(Column input, Stream stream=None):
     """Check whether elements of a column are not nan.
 
     For details, see :cpp:func:`is_not_nan`.
@@ -160,10 +172,12 @@ cpdef Column is_not_nan(Column input):
     """
     cdef unique_ptr[column] result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_unary.is_not_nan(input.view())
+        result = cpp_unary.is_not_nan(input.view(), stream.view())
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 cpdef bool is_supported_cast(DataType from_, DataType to):
     """Check if a cast between datatypes is supported.

From 91aeaa747401a89d9d9888effa78ab3951fd3b79 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 12 Aug 2025 08:13:26 -0400
Subject: [PATCH 103/366] Require `--scale` for PDS-DS benchmarks (due to
 nonlinear scaling) (#19631)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Contributes to #19200. Unlike PDS-H, which scales linearly with row counts, PDS-DS tables scale nonlinearly, so we cannot infer the scale factor from a single table’s row count.

In the future, we can relax this requirement by maintaining a map of scale factors to expected row counts for PDS-DS tables.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19631
---
 .../experimental/benchmarks/pdsds.py          |  1 +
 .../experimental/benchmarks/pdsh.py           |  2 ++
 .../experimental/benchmarks/utils.py          | 26 +++++++++++++------
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py
index 904ae633d73..b9daef51ec4 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py
@@ -72,6 +72,7 @@ class PDSDSQueries(metaclass=PDSDSQueriesMeta):
     """Base class for query loading."""
 
     q_impl: str
+    name: str = "pdsds"
 
 
 class PDSDSPolarsQueries(PDSDSQueries):
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py
index 4cb5911ecae..960d8449589 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py
@@ -39,6 +39,8 @@
 class PDSHQueries:
     """PDS-H query definitions."""
 
+    name: str = "pdsh"
+
     @staticmethod
     def q0(run_config: RunConfig) -> pl.LazyFrame:
         """Query 0."""
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 11e6442baab..c85fbf8319e 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -16,7 +16,6 @@
 import time
 from collections import defaultdict
 from datetime import datetime, timezone
-from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, assert_never
 
 import nvtx
@@ -41,6 +40,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Sequence
+    from pathlib import Path
 
 
 ExecutorType = Literal["in-memory", "streaming", "cpu"]
@@ -155,9 +155,7 @@ def collect(cls) -> HardwareInfo:
         return cls(gpus=gpus)
 
 
-def _infer_scale_factor(path: str | Path, suffix: str) -> int | float:
-    name = Path(sys.argv[0]).name
-
+def _infer_scale_factor(name: str, path: str | Path, suffix: str) -> int | float:
     if "pdsh" in name:
         supplier = get_data(path, "supplier", suffix)
         num_rows = supplier.select(pl.len()).collect().item(0, 0)
@@ -176,7 +174,7 @@ def _infer_scale_factor(path: str | Path, suffix: str) -> int | float:
 
 @dataclasses.dataclass(kw_only=True)
 class RunConfig:
-    """Results for a PDS-H query run."""
+    """Results for a PDS-H or PDS-DS query run."""
 
     queries: list[int]
     suffix: str
@@ -203,6 +201,7 @@ class RunConfig:
     rapidsmpf_oom_protection: bool
     rapidsmpf_spill: bool
     spill_device: float
+    query_set: str
 
     @classmethod
     def from_args(cls, args: argparse.Namespace) -> RunConfig:
@@ -214,12 +213,21 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig:
             scheduler = None
 
         path = args.path
-        if (scale_factor := args.scale) is None:
+        name = args.query_set
+        scale_factor = args.scale
+
+        if scale_factor is None:
+            if "pdsds" in name:
+                raise ValueError(
+                    "--scale is required for PDS-DS benchmarks.\n"
+                    "TODO: This will be inferred once we maintain a map of scale factors to row counts."
+                )
             if path is None:
                 raise ValueError(
                     "Must specify --root and --scale if --path is not specified."
                 )
-            scale_factor = _infer_scale_factor(path, args.suffix)
+            # For PDS-H, infer scale factor based on row count
+            scale_factor = _infer_scale_factor(name, path, args.suffix)
         if path is None:
             path = f"{args.root}/scale-{scale_factor}"
         try:
@@ -229,7 +237,7 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig:
 
         if args.scale is not None:
             # Validate the user-supplied scale factor
-            sf_inf = _infer_scale_factor(path, args.suffix)
+            sf_inf = _infer_scale_factor(name, path, args.suffix)
             rel_error = abs((scale_factor - sf_inf) / sf_inf)
             if rel_error > 0.01:
                 raise ValueError(
@@ -255,6 +263,7 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig:
             spill_device=args.spill_device,
             rapidsmpf_spill=args.rapidsmpf_spill,
             max_rows_per_partition=args.max_rows_per_partition,
+            query_set=args.query_set,
         )
 
     def serialize(self, engine: pl.GPUEngine | None) -> dict:
@@ -681,6 +690,7 @@ def run_polars(
 ) -> None:
     """Run the queries using the given benchmark and executor options."""
     args = parse_args(options, num_queries=num_queries)
+    vars(args).update({"query_set": benchmark.name})
     run_config = RunConfig.from_args(args)
     validation_failures: list[int] = []
 

From bf63a7d37cb2a1ee9900ebea4f281d9292b4a958 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 12 Aug 2025 10:37:00 -0400
Subject: [PATCH 104/366] Use public Arrow functions for TDigest in
 PercentileApproxInputTypesTests (#19648)

Changes the `PercentileApproxInputTypesTests` logic in `percentile_approx_test.cpp` to use the public arrow functions to compute the tdigest values instead of `internal` or `detail` functions.
This required enabling the `ARROW_COMPUTE=ON` functions to be enabled in the `get_arrow.cmake` to build the libarrow.so.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19648
---
 cpp/cmake/thirdparty/get_arrow.cmake          | 14 +++++++++--
 .../quantiles/percentile_approx_test.cpp      | 24 ++++++++++++++-----
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index c519fa687c3..8293f96fb5b 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -82,8 +82,9 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
 
   rapids_cpm_find(
     Arrow ${VERSION}
-    GLOBAL_TARGETS arrow_shared parquet_shared arrow_acero_shared arrow_dataset_shared arrow_static
-                   parquet_static arrow_acero_static arrow_dataset_static
+    GLOBAL_TARGETS
+      arrow_shared parquet_shared arrow_acero_shared arrow_dataset_shared arrow_compute_shared
+      arrow_static parquet_static arrow_acero_static arrow_dataset_static arrow_compute_static
     CPM_ARGS
     GIT_REPOSITORY https://github.com/apache/arrow.git
     GIT_TAG apache-arrow-${VERSION}
@@ -91,6 +92,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
     EXCLUDE_FROM_ALL ${EXCLUDE_FROM_ALL}
     OPTIONS "CMAKE_VERBOSE_MAKEFILE ON"
             "ARROW_ACERO ON"
+            "ARROW_COMPUTE ON"
             "ARROW_IPC ON"
             "ARROW_DATASET ON"
             "ARROW_WITH_BACKTRACE ON"
@@ -145,6 +147,9 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
       # us
       set(ArrowDataset_DIR "${Arrow_DIR}")
       find_package(ArrowDataset REQUIRED QUIET)
+      # Set this to enable `find_package(ArrowCompute)`
+      set(ArrowCompute_DIR "${Arrow_DIR}")
+      find_package(ArrowCompute REQUIRED QUIET)
     endif()
     # Arrow_ADDED: set if CPM downloaded Arrow from Github
   elseif(Arrow_ADDED)
@@ -288,6 +293,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
           NAMESPACE cudf::
           FINAL_CODE_BLOCK arrow_dataset_code_string
         )
+
         set(parquet_code_string
             [=[
                 if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared))
@@ -320,6 +326,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
       rapids_export_package(BUILD Parquet cudf-exports)
       rapids_export_package(BUILD ArrowDataset cudf-exports)
     endif()
+    rapids_export_package(BUILD ArrowCompute cudf-exports)
 
     include("${rapids-cmake-dir}/export/find_package_root.cmake")
     rapids_export_find_package_root(
@@ -335,6 +342,9 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
       EXPORT_SET cudf-exports
       CONDITION ENABLE_PARQUET
     )
+    rapids_export_find_package_root(
+      BUILD ArrowCompute [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports
+    )
   endif()
 
   set(ARROW_LIBRARIES
diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp
index 086f97c03af..a65f4766159 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cpp
+++ b/cpp/tests/quantiles/percentile_approx_test.cpp
@@ -31,7 +31,8 @@
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
-#include <arrow/util/tdigest.h>
+#include <arrow/api.h>
+#include <arrow/compute/api.h>
 
 namespace {
 std::unique_ptr<cudf::column> arrow_percentile_approx(cudf::column_view const& _values,
@@ -59,17 +60,28 @@ std::unique_ptr<cudf::column> arrow_percentile_approx(cudf::column_view const& _
   }
 
   // generate the tdigest
-  arrow::internal::TDigest atd(delta, sorted_values.size() * 2);
+  arrow::DoubleBuilder builder;
   for (size_t idx = 0; idx < h_values.size(); idx++) {
-    if (sorted_values.null_mask() == nullptr || h_validity[idx]) { atd.Add(h_values[idx]); }
+    if (sorted_values.null_mask() == nullptr || h_validity[idx]) {
+      EXPECT_TRUE(builder.Append(h_values[idx]).ok());
+    }
   }
+  std::shared_ptr<arrow::Array> array;
+  EXPECT_TRUE(builder.Finish(&array).ok());
+
+  auto const udelta = static_cast<uint32_t>(delta);
+  auto const usize  = static_cast<uint32_t>(h_values.size()) * 2;
+  arrow::compute::TDigestOptions options{percentages, udelta, usize};
+
+  auto arrow_result = arrow::compute::CallFunction("tdigest", {array}, &options);
+  auto result_array = arrow_result.ValueOrDie().array_as<arrow::DoubleArray>();
 
-  // generate the percentiles and stuff them into a list column
+  // copy the percentiles and stuff them into a list column
   std::vector<double> h_result;
   h_result.reserve(percentages.size());
   std::transform(
-    percentages.begin(), percentages.end(), std::back_inserter(h_result), [&atd](double p) {
-      return atd.Quantile(p);
+    result_array->begin(), result_array->end(), std::back_inserter(h_result), [](auto p) {
+      return p.value();
     });
   cudf::test::fixed_width_column_wrapper<double> result(h_result.begin(), h_result.end());
   cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{

From d6533a294e8fd3ce75a6e149c8c0aabd15d86f14 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 12 Aug 2025 10:57:34 -0400
Subject: [PATCH 105/366] Set scalar to valid in range_window_bounds
 unbounded/current_row (#19622)

Fixes the `cudf::range_bound_windows` `current_row()` and `unbounded()` functions to return valid scalars.
Also adds a check for invalid scalar parameter in `range_comparable_value` function.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)
  - MithunR (https://github.com/mythrocks)
  - Basit Ayantunde (https://github.com/lamarrr)

URL: https://github.com/rapidsai/cudf/pull/19622
---
 cpp/src/rolling/detail/range_window_bounds.hpp |  4 +++-
 cpp/src/rolling/range_window_bounds.cpp        | 10 +++++++---
 cpp/tests/rolling/range_window_bounds_test.cpp | 11 +++++------
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp
index 62e28cf47c4..0536b450209 100644
--- a/cpp/src/rolling/detail/range_window_bounds.hpp
+++ b/cpp/src/rolling/detail/range_window_bounds.hpp
@@ -139,7 +139,9 @@ range_rep_type<OrderByType> range_comparable_value(range_window_bounds const& ra
                                                    rmm::cuda_stream_view stream)
 {
   auto const& range_scalar = range_bounds.range_scalar();
-  using range_type         = cudf::detail::range_type<OrderByType>;
+  CUDF_EXPECTS(
+    range_scalar.is_valid(stream), "Range bounds scalar must be valid.", std::invalid_argument);
+  using range_type = cudf::detail::range_type<OrderByType>;
 
   CUDF_EXPECTS(range_scalar.type().id() == cudf::type_to_id<range_type>(),
                "Range bounds scalar must match the type of the orderby column.");
diff --git a/cpp/src/rolling/range_window_bounds.cpp b/cpp/src/rolling/range_window_bounds.cpp
index 7f698dfcd6b..2e4a26f720b 100644
--- a/cpp/src/rolling/range_window_bounds.cpp
+++ b/cpp/src/rolling/range_window_bounds.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,12 +78,16 @@ range_window_bounds::range_window_bounds(extent_type extent_,
 
 range_window_bounds range_window_bounds::unbounded(data_type type, rmm::cuda_stream_view stream)
 {
-  return {extent_type::UNBOUNDED, make_default_constructed_scalar(type, stream), stream};
+  auto s = make_default_constructed_scalar(type, stream);
+  s->set_valid_async(true, stream);
+  return {extent_type::UNBOUNDED, std::move(s), stream};
 }
 
 range_window_bounds range_window_bounds::current_row(data_type type, rmm::cuda_stream_view stream)
 {
-  return {extent_type::CURRENT_ROW, make_default_constructed_scalar(type, stream), stream};
+  auto s = make_default_constructed_scalar(type, stream);
+  s->set_valid_async(true, stream);
+  return {extent_type::CURRENT_ROW, std::move(s), stream};
 }
 
 range_window_bounds range_window_bounds::get(scalar const& boundary, rmm::cuda_stream_view stream)
diff --git a/cpp/tests/rolling/range_window_bounds_test.cpp b/cpp/tests/rolling/range_window_bounds_test.cpp
index a67555280f4..df551f96e50 100644
--- a/cpp/tests/rolling/range_window_bounds_test.cpp
+++ b/cpp/tests/rolling/range_window_bounds_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -116,21 +116,20 @@ TYPED_TEST(NumericRangeWindowBoundsTest, BoundsConstruction)
   using range_type  = cudf::detail::range_type<OrderByType>;
   using rep_type    = cudf::detail::range_rep_type<OrderByType>;
   auto const dtype  = cudf::data_type{cudf::type_to_id<OrderByType>()};
+  auto const stream = cudf::get_default_stream();
 
   static_assert(std::is_integral_v<range_type>);
   auto range_3 = cudf::range_window_bounds::get(cudf::numeric_scalar<range_type>{3, true});
   EXPECT_FALSE(range_3.is_unbounded() &&
                "range_window_bounds constructed from scalar cannot be unbounded.");
-  EXPECT_EQ(
-    cudf::detail::range_comparable_value<OrderByType>(range_3, dtype, cudf::get_default_stream()),
-    rep_type{3});
+  EXPECT_EQ(cudf::detail::range_comparable_value<OrderByType>(range_3, dtype, stream), rep_type{3});
 
   auto range_unbounded =
     cudf::range_window_bounds::unbounded(cudf::data_type{cudf::type_to_id<range_type>()});
   EXPECT_TRUE(range_unbounded.is_unbounded() &&
               "range_window_bounds::unbounded() must return an unbounded range.");
-  EXPECT_EQ(cudf::detail::range_comparable_value<OrderByType>(
-              range_unbounded, dtype, cudf::get_default_stream()),
+  EXPECT_TRUE(range_unbounded.range_scalar().is_valid(stream));
+  EXPECT_EQ(cudf::detail::range_comparable_value<OrderByType>(range_unbounded, dtype, stream),
             rep_type{});
 }
 

From 4bb819bd0cb9b30227f7cf59dbbdc1b60183ad05 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 12 Aug 2025 09:18:30 -0700
Subject: [PATCH 106/366] Clean and move test_join_order/interpolate/onehot.py
 to new cudf classic test directory structure (#19662)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19662
---
 .../dataframe/methods/test_interpolate.py     |  64 ++++
 .../test_get_dummies.py}                      |  40 ++-
 python/cudf/cudf/tests/reshape/test_merge.py  | 270 +++++++++++++++-
 .../{ => series/methods}/test_interpolate.py  |  73 +----
 python/cudf/cudf/tests/test_join_order.py     | 287 ------------------
 5 files changed, 368 insertions(+), 366 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_interpolate.py
 rename python/cudf/cudf/tests/{test_onehot.py => general_functions/test_get_dummies.py} (82%)
 rename python/cudf/cudf/tests/{ => series/methods}/test_interpolate.py (50%)
 delete mode 100644 python/cudf/cudf/tests/test_join_order.py

diff --git a/python/cudf/cudf/tests/dataframe/methods/test_interpolate.py b/python/cudf/cudf/tests/dataframe/methods/test_interpolate.py
new file mode 100644
index 00000000000..506d0677178
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_interpolate.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # basics
+        {"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]},
+        {"A": [1.0, None, 3.0], "B": [4.0, None, 6.0]},
+        {"A": [None, 2.0, 3.0], "B": [4.0, 5.0, None]},
+    ],
+)
+def test_interpolate_dataframe(data):
+    # Pandas interpolate methods do not seem to work
+    # with nullable dtypes yet, so this method treats
+    # NAs as NaNs
+    # https://github.com/pandas-dev/pandas/issues/40252
+    axis = 0
+    method = "linear"
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expect = pdf.interpolate(method=method, axis=axis)
+    got = gdf.interpolate(method=method, axis=axis)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data,kwargs",
+    [
+        (
+            {"A": ["a", "b", "c"], "B": ["d", "e", "f"]},
+            {"axis": 0, "method": "linear"},
+        ),
+        ({"A": [1, 2, 3]}, {"method": "pad", "limit_direction": "forward"}),
+        ({"A": [1, 2, 3]}, {"method": "ffill", "limit_direction": "forward"}),
+        ({"A": [1, 2, 3]}, {"method": "bfill", "limit_direction": "backward"}),
+        (
+            {"A": [1, 2, 3]},
+            {"method": "backfill", "limit_direction": "backward"},
+        ),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not fail on older versions of pandas",
+)
+def test_interpolate_dataframe_error_cases(data, kwargs):
+    gsr = cudf.DataFrame(data)
+    psr = gsr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=psr.interpolate,
+        rfunc=gsr.interpolate,
+        lfunc_args_and_kwargs=([], kwargs),
+        rfunc_args_and_kwargs=([], kwargs),
+    )
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/general_functions/test_get_dummies.py
similarity index 82%
rename from python/cudf/cudf/tests/test_onehot.py
rename to python/cudf/cudf/tests/general_functions/test_get_dummies.py
index b85882a79f5..6108f4b7b7f 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/general_functions/test_get_dummies.py
@@ -22,13 +22,16 @@
         (range(10), [1, 2, 3, 4, 5] * 2),
     ],
 )
-@pytest.mark.parametrize("dtype", ["bool", "uint8"])
-def test_get_dummies(data, index, dtype):
+def test_get_dummies(data, index, numeric_and_bool_types_as_str):
     pdf = pd.DataFrame({"x": data}, index=index)
     gdf = cudf.from_pandas(pdf)
 
-    encoded_expected = pd.get_dummies(pdf, prefix="test", dtype=dtype)
-    encoded_actual = cudf.get_dummies(gdf, prefix="test", dtype=dtype)
+    encoded_expected = pd.get_dummies(
+        pdf, prefix="test", dtype=numeric_and_bool_types_as_str
+    )
+    encoded_actual = cudf.get_dummies(
+        gdf, prefix="test", dtype=numeric_and_bool_types_as_str
+    )
 
     assert_eq(
         encoded_expected,
@@ -37,8 +40,8 @@ def test_get_dummies(data, index, dtype):
     )
 
 
-@pytest.mark.parametrize("n_cols", [5, 10, 20])
-def test_onehot_get_dummies_multicol(n_cols):
+def test_onehot_get_dummies_multicol():
+    n_cols = 5
     n_categories = 5
     data = dict(
         zip(
@@ -57,9 +60,15 @@ def test_onehot_get_dummies_multicol(n_cols):
     assert_eq(encoded_expected, encoded_actual)
 
 
-@pytest.mark.parametrize("nan_as_null", [True, False])
 @pytest.mark.parametrize("dummy_na", [True, False])
-def test_onehost_get_dummies_dummy_na(nan_as_null, dummy_na):
+def test_get_dummies_dummy_na(request, nan_as_null, dummy_na):
+    request.applymarker(
+        pytest.mark.xfail(
+            nan_as_null is None,
+            reason=f"Incorrect cuDF result with {nan_as_null=}",
+        )
+    )
+
     df = cudf.DataFrame({"a": [0, 1, np.nan]}, nan_as_null=nan_as_null)
     pdf = df.to_pandas(nullable=nan_as_null)
 
@@ -133,17 +142,24 @@ def test_get_dummies_with_nan():
 )
 @pytest.mark.parametrize("prefix_sep", ["-", "#"])
 @pytest.mark.parametrize("prefix", [None, "hi"])
-@pytest.mark.parametrize("dtype", ["uint8", "int16"])
-def test_get_dummies_array_like(data, prefix_sep, prefix, dtype):
+def test_get_dummies_array_like(
+    data, prefix_sep, prefix, numeric_and_bool_types_as_str
+):
     data = data()
     pd_data = data.to_pandas()
 
     expected = pd.get_dummies(
-        pd_data, prefix=prefix, prefix_sep=prefix_sep, dtype=dtype
+        pd_data,
+        prefix=prefix,
+        prefix_sep=prefix_sep,
+        dtype=numeric_and_bool_types_as_str,
     )
 
     actual = cudf.get_dummies(
-        data, prefix=prefix, prefix_sep=prefix_sep, dtype=dtype
+        data,
+        prefix=prefix,
+        prefix_sep=prefix_sep,
+        dtype=numeric_and_bool_types_as_str,
     )
 
     assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/reshape/test_merge.py b/python/cudf/cudf/tests/reshape/test_merge.py
index f661eb4b587..cd11f36448b 100644
--- a/python/cudf/cudf/tests/reshape/test_merge.py
+++ b/python/cudf/cudf/tests/reshape/test_merge.py
@@ -1,11 +1,19 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
+import itertools
+import operator
+import string
+from collections import defaultdict
 
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
@@ -31,6 +39,11 @@ def how(request):
     return request.param
 
 
+@pytest.fixture(params=[False, True])
+def sort(request):
+    return request.param
+
+
 def assert_join_results_equal(expect, got, how, **kwargs):
     if how == "right":
         got = got[expect.columns]
@@ -1348,3 +1361,258 @@ def test_merge_datetime_timedelta_error(temporal_types_as_str):
 
     with pytest.raises(TypeError):
         df1.merge(df2)
+
+
+if PANDAS_GE_220:
+    # Behaviour in sort=False case didn't match documentation in many
+    # cases prior to https://github.com/pandas-dev/pandas/pull/54611
+    # (released as part of pandas 2.2)
+    def expected(left, right, sort, *, how):
+        left = left.to_pandas()
+        right = right.to_pandas()
+        return left.merge(right, on="key", how=how, sort=sort)
+
+else:
+
+    def expect_inner(left, right, sort):
+        left_key = left.key.values_host.tolist()
+        left_val = left.val.values_host.tolist()
+        right_key = right.key.values_host.tolist()
+        right_val = right.val.values_host.tolist()
+
+        right_have = defaultdict(list)
+        for i, k in enumerate(right_key):
+            right_have[k].append(i)
+        keys = []
+        val_x = []
+        val_y = []
+        for k, v in zip(left_key, left_val, strict=True):
+            if k not in right_have:
+                continue
+            for i in right_have[k]:
+                keys.append(k)
+                val_x.append(v)
+                val_y.append(right_val[i])
+
+        if sort:
+            # Python sort is stable, so this will preserve input order for
+            # equal items.
+            keys, val_x, val_y = zip(
+                *sorted(
+                    zip(keys, val_x, val_y, strict=True),
+                    key=operator.itemgetter(0),
+                ),
+                strict=True,
+            )
+        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
+
+    def expect_left(left, right, sort):
+        left_key = left.key.values_host.tolist()
+        left_val = left.val.values_host.tolist()
+        right_key = right.key.values_host.tolist()
+        right_val = right.val.values_host.tolist()
+
+        right_have = defaultdict(list)
+        for i, k in enumerate(right_key):
+            right_have[k].append(i)
+        keys = []
+        val_x = []
+        val_y = []
+        for k, v in zip(left_key, left_val, strict=True):
+            if k not in right_have:
+                right_vals = [None]
+            else:
+                right_vals = [right_val[i] for i in right_have[k]]
+
+            for rv in right_vals:
+                keys.append(k)
+                val_x.append(v)
+                val_y.append(rv)
+
+        if sort:
+            # Python sort is stable, so this will preserve input order for
+            # equal items.
+            keys, val_x, val_y = zip(
+                *sorted(
+                    zip(keys, val_x, val_y, strict=True),
+                    key=operator.itemgetter(0),
+                ),
+                strict=True,
+            )
+        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
+
+    def expect_outer(left, right, sort):
+        left_key = left.key.values_host.tolist()
+        left_val = left.val.values_host.tolist()
+        right_key = right.key.values_host.tolist()
+        right_val = right.val.values_host.tolist()
+        right_have = defaultdict(list)
+        for i, k in enumerate(right_key):
+            right_have[k].append(i)
+        keys = []
+        val_x = []
+        val_y = []
+        for k, v in zip(left_key, left_val, strict=True):
+            if k not in right_have:
+                right_vals = [None]
+            else:
+                right_vals = [right_val[i] for i in right_have[k]]
+            for rv in right_vals:
+                keys.append(k)
+                val_x.append(v)
+                val_y.append(rv)
+        left_have = set(left_key)
+        for k, v in zip(right_key, right_val, strict=True):
+            if k not in left_have:
+                keys.append(k)
+                val_x.append(None)
+                val_y.append(v)
+
+        # Python sort is stable, so this will preserve input order for
+        # equal items.
+        # outer joins are always sorted, but we test both sort values
+        keys, val_x, val_y = zip(
+            *sorted(
+                zip(keys, val_x, val_y, strict=True),
+                key=operator.itemgetter(0),
+            ),
+            strict=True,
+        )
+        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
+
+    def expected(left, right, sort, *, how):
+        if how == "inner":
+            return expect_inner(left, right, sort)
+        elif how == "outer":
+            return expect_outer(left, right, sort)
+        elif how == "left":
+            return expect_left(left, right, sort)
+        elif how == "right":
+            return expect_left(right, left, sort).rename(
+                {"val_x": "val_y", "val_y": "val_x"}, axis=1
+            )
+        else:
+            raise NotImplementedError()
+
+
+def test_join_ordering_pandas_compat(request, sort, how):
+    if how in ["leftanti", "leftsemi", "cross"]:
+        pytest.skip(f"Test not applicable for {how}")
+    request.applymarker(
+        pytest.mark.xfail(
+            PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
+            and how == "right",
+            reason="TODO: Result ording of suffix'ed columns is incorrect",
+        )
+    )
+    left_key = [1, 3, 2, 1, 1, 2, 5, 1, 4, 5, 8, 12, 12312, 1] * 100
+    left_val = range(len(left_key))
+    left = cudf.DataFrame({"key": left_key, "val": left_val})
+    right_key = [12312, 12312, 3, 2, 1, 1, 5, 7, 2] * 200
+    right_val = list(
+        itertools.islice(itertools.cycle(string.ascii_letters), len(right_key))
+    )
+    right = cudf.DataFrame({"key": right_key, "val": right_val})
+    with cudf.option_context("mode.pandas_compatible", True):
+        actual = left.merge(right, on="key", how=how, sort=sort)
+    expect = expected(left, right, sort, how=how)
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize("on_index", [True, False])
+@pytest.mark.parametrize("left_unique", [True, False])
+@pytest.mark.parametrize("left_monotonic", [True, False])
+@pytest.mark.parametrize("right_unique", [True, False])
+@pytest.mark.parametrize("right_monotonic", [True, False])
+def test_merge_combinations(
+    request,
+    how,
+    sort,
+    on_index,
+    left_unique,
+    left_monotonic,
+    right_unique,
+    right_monotonic,
+):
+    if how in ["leftanti", "leftsemi", "cross"]:
+        pytest.skip(f"Test not applicable for {how}")
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=how == "outer"
+            and on_index
+            and left_unique
+            and not left_monotonic
+            and right_unique
+            and not right_monotonic,
+            reason="https://github.com/pandas-dev/pandas/issues/55992",
+        )
+    )
+    left = [2, 3]
+    if left_unique:
+        left.append(4 if left_monotonic else 1)
+    else:
+        left.append(3 if left_monotonic else 2)
+
+    right = [2, 3]
+    if right_unique:
+        right.append(4 if right_monotonic else 1)
+    else:
+        right.append(3 if right_monotonic else 2)
+
+    left = cudf.DataFrame({"key": left})
+    right = cudf.DataFrame({"key": right})
+
+    if on_index:
+        left = left.set_index("key")
+        right = right.set_index("key")
+        on_kwargs = {"left_index": True, "right_index": True}
+    else:
+        on_kwargs = {"on": "key"}
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = cudf.merge(left, right, how=how, sort=sort, **on_kwargs)
+    if on_index:
+        left = left.reset_index()
+        right = right.reset_index()
+
+    if how in ["left", "right", "inner"]:
+        if how in ["left", "inner"]:
+            expected, other, other_unique = left, right, right_unique
+        else:
+            expected, other, other_unique = right, left, left_unique
+        if how == "inner":
+            keep_values = set(left["key"].values_host).intersection(
+                right["key"].values_host
+            )
+            keep_mask = expected["key"].isin(keep_values)
+            expected = expected[keep_mask]
+        if sort:
+            expected = expected.sort_values("key")
+        if not other_unique:
+            other_value_counts = other["key"].value_counts()
+            repeats = other_value_counts.reindex(
+                expected["key"].values, fill_value=1
+            )
+            repeats = repeats.astype(np.intp)
+            expected = expected["key"].repeat(repeats.values)
+            expected = expected.to_frame()
+    elif how == "outer":
+        if on_index and left_unique and left["key"].equals(right["key"]):
+            expected = cudf.DataFrame({"key": left["key"]})
+        else:
+            left_counts = left["key"].value_counts()
+            right_counts = right["key"].value_counts()
+            expected_counts = left_counts.mul(right_counts, fill_value=1)
+            expected_counts = expected_counts.astype(np.intp)
+            expected = expected_counts.index.values_host.repeat(
+                expected_counts.values_host
+            )
+            expected = cudf.DataFrame({"key": expected})
+            expected = expected.sort_values("key")
+
+    if on_index:
+        expected = expected.set_index("key")
+    else:
+        expected = expected.reset_index(drop=True)
+
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/series/methods/test_interpolate.py
similarity index 50%
rename from python/cudf/cudf/tests/test_interpolate.py
rename to python/cudf/cudf/tests/series/methods/test_interpolate.py
index c76a49103e2..dd7acb2f3b7 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/series/methods/test_interpolate.py
@@ -1,35 +1,11 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 import pytest
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # basics
-        {"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]},
-        {"A": [1.0, None, 3.0], "B": [4.0, None, 6.0]},
-        {"A": [None, 2.0, 3.0], "B": [4.0, 5.0, None]},
-    ],
-)
-@pytest.mark.parametrize("method", ["linear"])
-@pytest.mark.parametrize("axis", [0])
-def test_interpolate_dataframe(data, method, axis):
-    # Pandas interpolate methods do not seem to work
-    # with nullable dtypes yet, so this method treats
-    # NAs as NaNs
-    # https://github.com/pandas-dev/pandas/issues/40252
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    expect = pdf.interpolate(method=method, axis=axis)
-    got = gdf.interpolate(method=method, axis=axis)
-    assert_eq(expect, got)
+from cudf.testing._utils import expect_warning_if
 
 
 @pytest.mark.skipif(
@@ -49,9 +25,9 @@ def test_interpolate_dataframe(data, method, axis):
         [0.1, 0.2, 0.3],
     ],
 )
-@pytest.mark.parametrize("method", ["linear"])
-@pytest.mark.parametrize("axis", [0])
-def test_interpolate_series(data, method, axis):
+def test_interpolate_series(data):
+    axis = 0
+    method = "linear"
     gsr = cudf.Series(data)
     psr = gsr.to_pandas()
 
@@ -64,11 +40,8 @@ def test_interpolate_series(data, method, axis):
     assert_eq(expect, got, check_dtype=psr.dtype != "object")
 
 
-@pytest.mark.parametrize(
-    "data,index", [([2.0, None, 4.0, None, 2.0], [1, 2, 3, 2, 1])]
-)
-def test_interpolate_series_unsorted_index(data, index):
-    gsr = cudf.Series(data, index=index)
+def test_interpolate_series_unsorted_index():
+    gsr = cudf.Series([2.0, None, 4.0, None, 2.0], index=[1, 2, 3, 2, 1])
     psr = gsr.to_pandas()
 
     expect = psr.interpolate(method="values")
@@ -109,38 +82,6 @@ def test_interpolate_series_values_or_index(data, index, method):
     assert_eq(expect, got, check_dtype=psr.dtype != "object")
 
 
-@pytest.mark.parametrize(
-    "data,kwargs",
-    [
-        (
-            {"A": ["a", "b", "c"], "B": ["d", "e", "f"]},
-            {"axis": 0, "method": "linear"},
-        ),
-        ({"A": [1, 2, 3]}, {"method": "pad", "limit_direction": "forward"}),
-        ({"A": [1, 2, 3]}, {"method": "ffill", "limit_direction": "forward"}),
-        ({"A": [1, 2, 3]}, {"method": "bfill", "limit_direction": "backward"}),
-        (
-            {"A": [1, 2, 3]},
-            {"method": "backfill", "limit_direction": "backward"},
-        ),
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Does not fail on older versions of pandas",
-)
-def test_interpolate_dataframe_error_cases(data, kwargs):
-    gsr = cudf.DataFrame(data)
-    psr = gsr.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=psr.interpolate,
-        rfunc=gsr.interpolate,
-        lfunc_args_and_kwargs=([], kwargs),
-        rfunc_args_and_kwargs=([], kwargs),
-    )
-
-
 def test_interpolate_noop_new_column():
     ser = cudf.Series([1.0, 2.0, 3.0])
     result = ser.interpolate()
diff --git a/python/cudf/cudf/tests/test_join_order.py b/python/cudf/cudf/tests/test_join_order.py
deleted file mode 100644
index 60ec93f5040..00000000000
--- a/python/cudf/cudf/tests/test_join_order.py
+++ /dev/null
@@ -1,287 +0,0 @@
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
-
-import itertools
-import operator
-import string
-from collections import defaultdict
-
-import numpy as np
-import pytest
-
-import cudf
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_VERSION,
-)
-from cudf.testing import assert_eq
-
-
-@pytest.fixture(params=[False, True], ids=["unsorted", "sorted"])
-def sort(request):
-    return request.param
-
-
-@pytest.fixture
-def left():
-    left_key = [1, 3, 2, 1, 1, 2, 5, 1, 4, 5, 8, 12, 12312, 1] * 100
-    left_val = list(range(len(left_key)))
-    return cudf.DataFrame({"key": left_key, "val": left_val})
-
-
-@pytest.fixture
-def right():
-    right_key = [12312, 12312, 3, 2, 1, 1, 5, 7, 2] * 200
-    right_val = list(
-        itertools.islice(itertools.cycle(string.ascii_letters), len(right_key))
-    )
-    return cudf.DataFrame({"key": right_key, "val": right_val})
-
-
-# Behaviour in sort=False case didn't match documentation in many
-# cases prior to https://github.com/pandas-dev/pandas/pull/54611
-# (released as part of pandas 2.2)
-if PANDAS_GE_220:
-    # Behaviour in sort=False case didn't match documentation in many
-    # cases prior to https://github.com/pandas-dev/pandas/pull/54611
-    # (released as part of pandas 2.2)
-    def expected(left, right, sort, *, how):
-        left = left.to_pandas()
-        right = right.to_pandas()
-        return left.merge(right, on="key", how=how, sort=sort)
-
-else:
-
-    def expect_inner(left, right, sort):
-        left_key = left.key.values_host.tolist()
-        left_val = left.val.values_host.tolist()
-        right_key = right.key.values_host.tolist()
-        right_val = right.val.values_host.tolist()
-
-        right_have = defaultdict(list)
-        for i, k in enumerate(right_key):
-            right_have[k].append(i)
-        keys = []
-        val_x = []
-        val_y = []
-        for k, v in zip(left_key, left_val, strict=True):
-            if k not in right_have:
-                continue
-            for i in right_have[k]:
-                keys.append(k)
-                val_x.append(v)
-                val_y.append(right_val[i])
-
-        if sort:
-            # Python sort is stable, so this will preserve input order for
-            # equal items.
-            keys, val_x, val_y = zip(
-                *sorted(
-                    zip(keys, val_x, val_y, strict=True),
-                    key=operator.itemgetter(0),
-                ),
-                strict=True,
-            )
-        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
-
-    def expect_left(left, right, sort):
-        left_key = left.key.values_host.tolist()
-        left_val = left.val.values_host.tolist()
-        right_key = right.key.values_host.tolist()
-        right_val = right.val.values_host.tolist()
-
-        right_have = defaultdict(list)
-        for i, k in enumerate(right_key):
-            right_have[k].append(i)
-        keys = []
-        val_x = []
-        val_y = []
-        for k, v in zip(left_key, left_val, strict=True):
-            if k not in right_have:
-                right_vals = [None]
-            else:
-                right_vals = [right_val[i] for i in right_have[k]]
-
-            for rv in right_vals:
-                keys.append(k)
-                val_x.append(v)
-                val_y.append(rv)
-
-        if sort:
-            # Python sort is stable, so this will preserve input order for
-            # equal items.
-            keys, val_x, val_y = zip(
-                *sorted(
-                    zip(keys, val_x, val_y, strict=True),
-                    key=operator.itemgetter(0),
-                ),
-                strict=True,
-            )
-        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
-
-    def expect_outer(left, right, sort):
-        left_key = left.key.values_host.tolist()
-        left_val = left.val.values_host.tolist()
-        right_key = right.key.values_host.tolist()
-        right_val = right.val.values_host.tolist()
-        right_have = defaultdict(list)
-        for i, k in enumerate(right_key):
-            right_have[k].append(i)
-        keys = []
-        val_x = []
-        val_y = []
-        for k, v in zip(left_key, left_val, strict=True):
-            if k not in right_have:
-                right_vals = [None]
-            else:
-                right_vals = [right_val[i] for i in right_have[k]]
-            for rv in right_vals:
-                keys.append(k)
-                val_x.append(v)
-                val_y.append(rv)
-        left_have = set(left_key)
-        for k, v in zip(right_key, right_val, strict=True):
-            if k not in left_have:
-                keys.append(k)
-                val_x.append(None)
-                val_y.append(v)
-
-        # Python sort is stable, so this will preserve input order for
-        # equal items.
-        # outer joins are always sorted, but we test both sort values
-        keys, val_x, val_y = zip(
-            *sorted(
-                zip(keys, val_x, val_y, strict=True),
-                key=operator.itemgetter(0),
-            ),
-            strict=True,
-        )
-        return cudf.DataFrame({"key": keys, "val_x": val_x, "val_y": val_y})
-
-    def expected(left, right, sort, *, how):
-        if how == "inner":
-            return expect_inner(left, right, sort)
-        elif how == "outer":
-            return expect_outer(left, right, sort)
-        elif how == "left":
-            return expect_left(left, right, sort)
-        elif how == "right":
-            return expect_left(right, left, sort).rename(
-                {"val_x": "val_y", "val_y": "val_x"}, axis=1
-            )
-        else:
-            raise NotImplementedError()
-
-
-@pytest.mark.parametrize("how", ["inner", "left", "right", "outer"])
-def test_join_ordering_pandas_compat(request, left, right, sort, how):
-    request.applymarker(
-        pytest.mark.xfail(
-            PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
-            and how == "right",
-            reason="TODO: Result ording of suffix'ed columns is incorrect",
-        )
-    )
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = left.merge(right, on="key", how=how, sort=sort)
-    expect = expected(left, right, sort, how=how)
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-@pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.parametrize("on_index", [True, False])
-@pytest.mark.parametrize("left_unique", [True, False])
-@pytest.mark.parametrize("left_monotonic", [True, False])
-@pytest.mark.parametrize("right_unique", [True, False])
-@pytest.mark.parametrize("right_monotonic", [True, False])
-def test_merge_combinations(
-    request,
-    how,
-    sort,
-    on_index,
-    left_unique,
-    left_monotonic,
-    right_unique,
-    right_monotonic,
-):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=how == "outer"
-            and on_index
-            and left_unique
-            and not left_monotonic
-            and right_unique
-            and not right_monotonic,
-            reason="https://github.com/pandas-dev/pandas/issues/55992",
-        )
-    )
-    left = [2, 3]
-    if left_unique:
-        left.append(4 if left_monotonic else 1)
-    else:
-        left.append(3 if left_monotonic else 2)
-
-    right = [2, 3]
-    if right_unique:
-        right.append(4 if right_monotonic else 1)
-    else:
-        right.append(3 if right_monotonic else 2)
-
-    left = cudf.DataFrame({"key": left})
-    right = cudf.DataFrame({"key": right})
-
-    if on_index:
-        left = left.set_index("key")
-        right = right.set_index("key")
-        on_kwargs = {"left_index": True, "right_index": True}
-    else:
-        on_kwargs = {"on": "key"}
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = cudf.merge(left, right, how=how, sort=sort, **on_kwargs)
-    if on_index:
-        left = left.reset_index()
-        right = right.reset_index()
-
-    if how in ["left", "right", "inner"]:
-        if how in ["left", "inner"]:
-            expected, other, other_unique = left, right, right_unique
-        else:
-            expected, other, other_unique = right, left, left_unique
-        if how == "inner":
-            keep_values = set(left["key"].values_host).intersection(
-                right["key"].values_host
-            )
-            keep_mask = expected["key"].isin(keep_values)
-            expected = expected[keep_mask]
-        if sort:
-            expected = expected.sort_values("key")
-        if not other_unique:
-            other_value_counts = other["key"].value_counts()
-            repeats = other_value_counts.reindex(
-                expected["key"].values, fill_value=1
-            )
-            repeats = repeats.astype(np.intp)
-            expected = expected["key"].repeat(repeats.values)
-            expected = expected.to_frame()
-    elif how == "outer":
-        if on_index and left_unique and left["key"].equals(right["key"]):
-            expected = cudf.DataFrame({"key": left["key"]})
-        else:
-            left_counts = left["key"].value_counts()
-            right_counts = right["key"].value_counts()
-            expected_counts = left_counts.mul(right_counts, fill_value=1)
-            expected_counts = expected_counts.astype(np.intp)
-            expected = expected_counts.index.values_host.repeat(
-                expected_counts.values_host
-            )
-            expected = cudf.DataFrame({"key": expected})
-            expected = expected.sort_values("key")
-
-    if on_index:
-        expected = expected.set_index("key")
-    else:
-        expected = expected.reset_index(drop=True)
-
-    assert_eq(result, expected)

From 52433f1745dd31d29c47b1d66531b2b3c8fd4794 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 12 Aug 2025 09:26:03 -0700
Subject: [PATCH 107/366] Add streams to hashing APIs (#19663)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19663
---
 python/pylibcudf/pylibcudf/hashing.pxd      |  25 +++--
 python/pylibcudf/pylibcudf/hashing.pyi      |  30 ++++--
 python/pylibcudf/pylibcudf/hashing.pyx      | 104 ++++++++++++++------
 python/pylibcudf/pylibcudf/libcudf/hash.pxd |  31 ++++--
 4 files changed, 128 insertions(+), 62 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/hashing.pxd b/python/pylibcudf/pylibcudf/hashing.pxd
index fbd478f963f..c95c5a995bb 100644
--- a/python/pylibcudf/pylibcudf/hashing.pxd
+++ b/python/pylibcudf/pylibcudf/hashing.pxd
@@ -1,6 +1,7 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint32_t, uint64_t
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
@@ -8,27 +9,31 @@ from .table cimport Table
 
 cpdef Column murmurhash3_x86_32(
     Table input,
-    uint32_t seed=*
+    uint32_t seed=*,
+    Stream stream=*
 )
 
 cpdef Table murmurhash3_x64_128(
     Table input,
-    uint64_t seed=*
+    uint64_t seed=*,
+    Stream stream=*
 )
 
 cpdef Column xxhash_32(
     Table input,
-    uint32_t seed=*
+    uint32_t seed=*,
+    Stream stream=*
 )
 
 cpdef Column xxhash_64(
     Table input,
-    uint64_t seed=*
+    uint64_t seed=*,
+    Stream stream=*
 )
 
-cpdef Column md5(Table input)
-cpdef Column sha1(Table input)
-cpdef Column sha224(Table input)
-cpdef Column sha256(Table input)
-cpdef Column sha384(Table input)
-cpdef Column sha512(Table input)
+cpdef Column md5(Table input, Stream stream=*)
+cpdef Column sha1(Table input, Stream stream=*)
+cpdef Column sha224(Table input, Stream stream=*)
+cpdef Column sha256(Table input, Stream stream=*)
+cpdef Column sha384(Table input, Stream stream=*)
+cpdef Column sha512(Table input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi
index d535d842a18..5eb217146b9 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyi
+++ b/python/pylibcudf/pylibcudf/hashing.pyi
@@ -2,18 +2,28 @@
 
 from typing import Final
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
 LIBCUDF_DEFAULT_HASH_SEED: Final[int]
 
-def murmurhash3_x86_32(input: Table, seed: int = ...) -> Column: ...
-def murmurhash3_x64_128(input: Table, seed: int = ...) -> Table: ...
-def xxhash_32(input: Table, seed: int = ...) -> Column: ...
-def xxhash_64(input: Table, seed: int = ...) -> Column: ...
-def md5(input: Table) -> Column: ...
-def sha1(input: Table) -> Column: ...
-def sha224(input: Table) -> Column: ...
-def sha256(input: Table) -> Column: ...
-def sha384(input: Table) -> Column: ...
-def sha512(input: Table) -> Column: ...
+def murmurhash3_x86_32(
+    input: Table, seed: int = ..., stream: Stream | None = None
+) -> Column: ...
+def murmurhash3_x64_128(
+    input: Table, seed: int = ..., stream: Stream | None = None
+) -> Table: ...
+def xxhash_32(
+    input: Table, seed: int = ..., stream: Stream | None = None
+) -> Column: ...
+def xxhash_64(
+    input: Table, seed: int = ..., stream: Stream | None = None
+) -> Column: ...
+def md5(input: Table, stream: Stream | None = None) -> Column: ...
+def sha1(input: Table, stream: Stream | None = None) -> Column: ...
+def sha224(input: Table, stream: Stream | None = None) -> Column: ...
+def sha256(input: Table, stream: Stream | None = None) -> Column: ...
+def sha384(input: Table, stream: Stream | None = None) -> Column: ...
+def sha512(input: Table, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx
index 1f093b20c6b..fe4cc14f4c2 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyx
+++ b/python/pylibcudf/pylibcudf/hashing.pyx
@@ -17,9 +17,11 @@ from pylibcudf.libcudf.hash cimport (
     xxhash_64 as cpp_xxhash_64,
 )
 from pylibcudf.libcudf.table.table cimport table
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "LIBCUDF_DEFAULT_HASH_SEED",
@@ -39,7 +41,8 @@ LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED
 
 cpdef Column murmurhash3_x86_32(
     Table input,
-    uint32_t seed=DEFAULT_HASH_SEED
+    uint32_t seed=DEFAULT_HASH_SEED,
+    Stream stream=None
 ):
     """Computes the MurmurHash3 32-bit hash value of each row in the given table.
 
@@ -58,18 +61,23 @@ cpdef Column murmurhash3_x86_32(
         A column where each row is the hash of a row from the input
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_murmurhash3_x86_32(
             input.view(),
-            seed
+            seed,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Table murmurhash3_x64_128(
     Table input,
-    uint64_t seed=DEFAULT_HASH_SEED
+    uint64_t seed=DEFAULT_HASH_SEED,
+    Stream stream=None
 ):
     """Computes the MurmurHash3 64-bit hash value of each row in the given table.
 
@@ -88,18 +96,23 @@ cpdef Table murmurhash3_x64_128(
         A table of two UINT64 columns
     """
     cdef unique_ptr[table] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_murmurhash3_x64_128(
             input.view(),
-            seed
+            seed,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Column xxhash_32(
     Table input,
-    uint32_t seed=DEFAULT_HASH_SEED
+    uint32_t seed=DEFAULT_HASH_SEED,
+    Stream stream=None
 ):
     """Computes the xxHash 32-bit hash value of each row in the given table.
 
@@ -119,18 +132,23 @@ cpdef Column xxhash_32(
     """
 
     cdef unique_ptr[column] c_result
-    with  nogil:
+
+    stream = _get_stream(stream)
+
+    with nogil:
         c_result = cpp_xxhash_32(
             input.view(),
-            seed
+            seed,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column xxhash_64(
     Table input,
-    uint64_t seed=DEFAULT_HASH_SEED
+    uint64_t seed=DEFAULT_HASH_SEED,
+    Stream stream=None
 ):
     """Computes the xxHash 64-bit hash value of each row in the given table.
 
@@ -150,16 +168,20 @@ cpdef Column xxhash_64(
     """
 
     cdef unique_ptr[column] c_result
-    with  nogil:
+
+    stream = _get_stream(stream)
+
+    with nogil:
         c_result = cpp_xxhash_64(
             input.view(),
-            seed
+            seed,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column md5(Table input):
+cpdef Column md5(Table input, Stream stream=None):
     """Computes the MD5 hash value of each row in the given table.
 
     For details, see :cpp:func:`md5`.
@@ -177,11 +199,14 @@ cpdef Column md5(Table input):
     """
 
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_md5(input.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_md5(input.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column sha1(Table input):
+cpdef Column sha1(Table input, Stream stream=None):
     """Computes the SHA-1 hash value of each row in the given table.
 
     For details, see :cpp:func:`sha1`.
@@ -197,12 +222,15 @@ cpdef Column sha1(Table input):
         A column where each row is the hash of a row from the input
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_sha1(input.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_sha1(input.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column sha224(Table input):
+cpdef Column sha224(Table input, Stream stream=None):
     """Computes the SHA-224 hash value of each row in the given table.
 
     For details, see :cpp:func:`sha224`.
@@ -218,12 +246,15 @@ cpdef Column sha224(Table input):
         A column where each row is the hash of a row from the input
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_sha224(input.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_sha224(input.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column sha256(Table input):
+cpdef Column sha256(Table input, Stream stream=None):
     """Computes the SHA-256 hash value of each row in the given table.
 
     For details, see :cpp:func:`sha256`.
@@ -239,12 +270,15 @@ cpdef Column sha256(Table input):
         A column where each row is the hash of a row from the input
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_sha256(input.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_sha256(input.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column sha384(Table input):
+cpdef Column sha384(Table input, Stream stream=None):
     """Computes the SHA-384 hash value of each row in the given table.
 
     For details, see :cpp:func:`sha384`.
@@ -260,12 +294,15 @@ cpdef Column sha384(Table input):
         A column where each row is the hash of a row from the input
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_sha384(input.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_sha384(input.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column sha512(Table input):
+cpdef Column sha512(Table input, Stream stream=None):
     """Computes the SHA-512 hash value of each row in the given table.
 
     For details, see :cpp:func:`sha512`.
@@ -281,6 +318,9 @@ cpdef Column sha512(Table input):
         A column where each row is the hash of a row from the input
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_sha512(input.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_sha512(input.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd
index 46fdf62cd6b..7a3dec20f24 100644
--- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd
@@ -6,52 +6,63 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil:
 
     cdef unique_ptr[column] murmurhash3_x86_32(
         const table_view& input,
-        const uint32_t seed
+        const uint32_t seed,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] murmurhash3_x64_128(
         const table_view& input,
-        const uint64_t seed
+        const uint64_t seed,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] md5(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha1(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha224(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha256(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha384(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sha512(
-        const table_view& input
+        const table_view& input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] xxhash_32(
         const table_view& input,
-        const uint32_t seed
+        const uint32_t seed,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] xxhash_64(
         const table_view& input,
-        const uint64_t seed
+        const uint64_t seed,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
 cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil:

From aa0858be9214617607aabc00f1841419cc2e406b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 12 Aug 2025 09:31:46 -0700
Subject: [PATCH 108/366] Improve documentation around why we need no_gc_clear
 on pylibcudf Scalars (#19661)

I decided there wasn't a great place in the developer docs for this information, but the existing comment didn't have much information so I expanded it substantially.

Resolves #14249

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19661
---
 python/pylibcudf/pylibcudf/scalar.pyx | 40 ++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 7ba769e6060..0d533c960a4 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -78,10 +78,42 @@ except ImportError as err:
 __all__ = ["Scalar"]
 
 
-# The DeviceMemoryResource attribute could be released prematurely
-# by the gc if the Scalar is in a reference cycle. Removing the tp_clear
-# function with the no_gc_clear decoration prevents that. See
-# https://github.com/rapidsai/rmm/pull/931 for details.
+# The no_gc_clear decorator on this class is necessary for the following reason:
+#
+# The object underlying a Scalar is a libcudf scalar. The underlying storage
+# type within the scalar depends on the scalar's data type, but regardless of
+# the proximate storage class all of the types ultimately store their data in
+# an rmm::device_buffer that has an associated rmm memory resource used for
+# allocation and deallocation. That memory resource must therefore still be
+# alive when the Scalar is destroyed. With the current architecture of cudf we
+# do not know exactly what mr was used to construct the scalar, so until then
+# the best we can do is to grab the current memory resource at the time of
+# construction and keep it alive until the Scalar is destroyed (for potential
+# problems with this approach, see https://github.com/rapidsai/rmm/issues/1515;
+# the solution will be to address https://github.com/rapidsai/cudf/issues/15170
+# and also pass mrs all the way down to every rmm Python API to avoid its
+# default mrs). This is done in the `__cinit__` method below.
+#
+# However, even in the most common case where this approach gives us the
+# correct mr, we still have a problem. If a Scalar participates in a reference
+# cycle, then when the garbage collector goes to clear that cycle its default
+# behavior will be to clear all attributes of the object, including the mr
+# attribute (see
+# https://cython.readthedocs.io/en/latest/src/userguide/extension_types.html#dealloc-intro).
+# This is fine in the immediate Cython code because Scalar does not define a
+# `__dealloc__` method, so there is no need for the mr in the Cython code.
+# However, if after the Scalar was created some other code called
+# `set_current_device_resource`, then there may be no other references left to
+# the mr used to create the scalar. In that case, the reference count of the
+# Python DeviceMemoryResource will drop to zero and it will immediately be
+# destroyed, resulting in the destruction of the underlying C++ memory resource
+# as well (rmm::device_buffer only has a non-owning reference to it because all
+# mrs in rmm are managed with unique_ptr semantics). That will result in a
+# segmentation fault when the device_buffer goes to deallocate its memory using
+# a freed memory resources. To prevent this, we use the `no_gc_clear` decorator
+# to prevent the garbage collector from clearing the `mr` attribute when it
+# clears the Scalar object as described in
+# https://cython.readthedocs.io/en/latest/src/userguide/extension_types.html#disabling-cycle-breaking-tp-clear.
 @no_gc_clear
 cdef class Scalar:
     """A scalar value in device memory.

From 1dd127ceb20b81969e245953bb01fd67b9b04f67 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 12 Aug 2025 09:44:21 -0700
Subject: [PATCH 109/366] Move test_resampling/query/pickling to new cudf
 classic directory structure (#19615)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19615
---
 .../{ => dataframe/methods}/test_query.py     | 65 ++++++++++---------
 .../groupby/test_function_application.py      |  1 -
 .../test_resample.py}                         |  0
 python/cudf/cudf/tests/groupby/test_stats.py  |  1 -
 .../tests/{ => input_output}/test_pickling.py | 14 ++--
 5 files changed, 39 insertions(+), 42 deletions(-)
 rename python/cudf/cudf/tests/{ => dataframe/methods}/test_query.py (82%)
 delete mode 100644 python/cudf/cudf/tests/groupby/test_function_application.py
 rename python/cudf/cudf/tests/{test_resampling.py => groupby/test_resample.py} (100%)
 delete mode 100644 python/cudf/cudf/tests/groupby/test_stats.py
 rename python/cudf/cudf/tests/{ => input_output}/test_pickling.py (93%)

diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/dataframe/methods/test_query.py
similarity index 82%
rename from python/cudf/cudf/tests/test_query.py
rename to python/cudf/cudf/tests/dataframe/methods/test_query.py
index ddb8a7ffd37..a2776fb144e 100644
--- a/python/cudf/cudf/tests/test_query.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_query.py
@@ -30,7 +30,6 @@ def test_query_parser(text, expect_args):
     assert tuple(argspec.args) == tuple(expect_args)
 
 
-@pytest.mark.parametrize("nelem", [1, 10])
 @pytest.mark.parametrize(
     "fn",
     [
@@ -40,20 +39,22 @@ def test_query_parser(text, expect_args):
     ],
 )
 @pytest.mark.parametrize("nulls", [True, False])
-def test_query(nelem, fn, nulls):
-    # prepare
+def test_query(fn, nulls):
+    n = 5
     expect_fn, query_expr = fn
     rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame()
-    pdf["a"] = np.arange(nelem)
-    pdf["b"] = rng.random(nelem) * nelem
+    pdf = pd.DataFrame(
+        {
+            "a": np.arange(n),
+            "b": rng.random(n) * n,
+        }
+    )
     if nulls:
         pdf.loc[::2, "a"] = None
     gdf = cudf.from_pandas(pdf)
     assert_eq(pdf.query(query_expr), gdf.query(query_expr))
 
 
-@pytest.mark.parametrize("nelem", [1, 10])
 @pytest.mark.parametrize(
     "fn",
     [
@@ -64,13 +65,13 @@ def test_query(nelem, fn, nulls):
         ),
     ],
 )
-def test_query_ref_env(nelem, fn):
-    # prepare
+def test_query_ref_env(fn):
+    n = 5
     expect_fn, query_expr = fn
     rng = np.random.default_rng(seed=0)
     df = DataFrame()
-    df["a"] = aa = np.arange(nelem)
-    df["b"] = bb = rng.random(nelem) * nelem
+    df["a"] = aa = np.arange(n)
+    df["b"] = bb = rng.random(n) * n
     c = 2.3
     d = 1.2
     # udt
@@ -104,15 +105,22 @@ def test_query_local_dict():
     got = df.query(expr, local_dict={"val": 10})
     np.testing.assert_array_equal(aa[aa < 10], got["a"].to_numpy())
 
-    # test for datetime
-    df = DataFrame()
-    data = np.array(["2018-10-07", "2018-10-08"], dtype="datetime64")
-    df["datetimes"] = data
+
+def test_query_local_dict_datetime():
+    df = DataFrame(
+        {
+            "datetimes": np.array(
+                ["2018-10-07", "2018-10-08"], dtype="datetime64"
+            )
+        }
+    )
     search_date = datetime.datetime.strptime("2018-10-08", "%Y-%m-%d")
     expr = "datetimes==@search_date"
 
     got = df.query(expr, local_dict={"search_date": search_date})
-    np.testing.assert_array_equal(data[1], got["datetimes"].to_numpy())
+    np.testing.assert_array_equal(
+        np.datetime64("2018-10-08"), got["datetimes"].to_numpy()
+    )
 
 
 def test_query_global_dict():
@@ -166,7 +174,6 @@ def test_query_empty_frames():
     assert_eq(got, expect)
 
 
-@pytest.mark.parametrize(("a_val", "b_val", "c_val"), [(4, 3, 15)])
 @pytest.mark.parametrize("index", ["a", ["a", "b"]])
 @pytest.mark.parametrize(
     "query",
@@ -176,7 +183,10 @@ def test_query_empty_frames():
         "(a < @a_val and b >@b_val) or c >@c_val",
     ],
 )
-def test_query_with_index_name(index, query, a_val, b_val, c_val):
+def test_query_with_index_name(index, query):
+    a_val = 4  # noqa: F841
+    b_val = 3  # noqa: F841
+    c_val = 15  # noqa: F841
     pdf = pd.DataFrame(
         {
             "a": [1, None, 3, 4, 5],
@@ -194,7 +204,6 @@ def test_query_with_index_name(index, query, a_val, b_val, c_val):
     assert_eq(out, expect)
 
 
-@pytest.mark.parametrize(("a_val", "b_val", "c_val"), [(4, 3, 15)])
 @pytest.mark.parametrize(
     "query",
     [
@@ -203,7 +212,10 @@ def test_query_with_index_name(index, query, a_val, b_val, c_val):
         "(index < @a_val and b >@b_val) or c >@c_val",
     ],
 )
-def test_query_with_index_keyword(query, a_val, b_val, c_val):
+def test_query_with_index_keyword(query):
+    a_val = 4  # noqa: F841
+    b_val = 3  # noqa: F841
+    c_val = 15  # noqa: F841
     pdf = pd.DataFrame(
         {
             "a": [1, None, 3, 4, 5],
@@ -237,15 +249,6 @@ def test_query_unsupported_dtypes():
         gdf.query(query)
 
 
-@pytest.mark.parametrize(
-    "values",
-    [
-        [0, 1.0, 2.0, None, np.nan, None, 3, 5],
-        [0, 1.0, 2.0, None, 3, np.nan, None, 4],
-        [0, 1.0, 2.0, None, 3, np.nan, None, 4, None, 9],
-    ],
-)
-@pytest.mark.parametrize("nan_as_null", [True, False])
 @pytest.mark.parametrize(
     "query",
     [
@@ -260,8 +263,8 @@ def test_query_unsupported_dtypes():
         "a >= 3",
     ],
 )
-def test_query_mask(values, nan_as_null, query):
-    data = {"a": values}
+def test_query_mask(nan_as_null, query):
+    data = {"a": [0, 1.0, 2.0, None, 3, np.nan, None, 4]}
     pdf = pd.DataFrame(data)
     gdf = cudf.DataFrame(data, nan_as_null=nan_as_null)
 
diff --git a/python/cudf/cudf/tests/groupby/test_function_application.py b/python/cudf/cudf/tests/groupby/test_function_application.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/groupby/test_function_application.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_resampling.py b/python/cudf/cudf/tests/groupby/test_resample.py
similarity index 100%
rename from python/cudf/cudf/tests/test_resampling.py
rename to python/cudf/cudf/tests/groupby/test_resample.py
diff --git a/python/cudf/cudf/tests/groupby/test_stats.py b/python/cudf/cudf/tests/groupby/test_stats.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/groupby/test_stats.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/input_output/test_pickling.py
similarity index 93%
rename from python/cudf/cudf/tests/test_pickling.py
rename to python/cudf/cudf/tests/input_output/test_pickling.py
index ac13056fa7c..ed3483f296b 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/input_output/test_pickling.py
@@ -70,7 +70,7 @@ def test_pickle_index():
     idx = Index(np.arange(nelem), name="a")
     pickled = pickle.dumps(idx)
     out = pickle.loads(pickled)
-    assert (idx == out).all()
+    assert_eq(idx, out)
 
 
 def test_pickle_buffer():
@@ -83,17 +83,13 @@ def test_pickle_buffer():
     assert unpacked.size == arr.nbytes
 
 
-@pytest.mark.parametrize("named", [True, False])
-def test_pickle_series(named):
+@pytest.mark.parametrize("name", [None, "a"])
+def test_pickle_series(name):
     rng = np.random.default_rng(seed=0)
-    if named:
-        ser = Series(rng.random(10), name="a")
-    else:
-        ser = Series(rng.random(10))
-
+    ser = Series(rng.random(10), name=name)
     pickled = pickle.dumps(ser)
     out = pickle.loads(pickled)
-    assert (ser == out).all()
+    assert_eq(ser, out)
 
 
 @pytest.mark.parametrize(

From 0fbd451151a3f5226624448a27de75a3d81b6e4e Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Tue, 12 Aug 2025 10:09:44 -0700
Subject: [PATCH 110/366] Re-enable Disabled Join Tests (#19649)

This PR re-enables previously disabled join tests following the recent cuco version bump, along with the necessary fixes.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

URL: https://github.com/rapidsai/cudf/pull/19649
---
 cpp/tests/join/distinct_join_tests.cpp |  3 +--
 cpp/tests/join/join_tests.cpp          | 11 ++++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp
index b5c02bd562e..ce27a3fbc0a 100644
--- a/cpp/tests/join/distinct_join_tests.cpp
+++ b/cpp/tests/join/distinct_join_tests.cpp
@@ -507,8 +507,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls)
     build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
 }
 
-// Disabled for now, waiting on upstream cuco updates
-TEST_F(DistinctJoinTest, DISABLED_InvalidLoadFactor)
+TEST_F(DistinctJoinTest, InvalidLoadFactor)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 3ea65232c81..1f371576d55 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -40,6 +40,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cuco/utility/error.hpp>
+
 #include <limits>
 
 namespace {
@@ -185,8 +187,7 @@ struct JoinTest : public cudf::test::BaseFixture {
   }
 };
 
-// Disabled for now, waiting on upstream cuco updates
-TEST_F(JoinTest, DISABLED_InvalidLoadFactor)
+TEST_F(JoinTest, InvalidLoadFactor)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
@@ -199,13 +200,13 @@ TEST_F(JoinTest, DISABLED_InvalidLoadFactor)
 
   // Test load factor of -0.1
   EXPECT_THROW(cudf::hash_join(t0, cudf::nullable_join::NO, cudf::null_equality::EQUAL, -0.1),
-               std::invalid_argument);
+               cuco::logic_error);
   // Test load factor of 0
   EXPECT_THROW(cudf::hash_join(t0, cudf::nullable_join::NO, cudf::null_equality::EQUAL, 0.0),
-               std::invalid_argument);
+               cuco::logic_error);
   // Test load factor > 1
   EXPECT_THROW(cudf::hash_join(t0, cudf::nullable_join::NO, cudf::null_equality::EQUAL, 1.5),
-               std::invalid_argument);
+               cuco::logic_error);
 }
 
 struct JoinParameterizedTest : public JoinTest, public testing::WithParamInterface<algorithm> {};

From f82828f9163b384e5012ef334a3c97f62a605637 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 12 Aug 2025 15:42:45 -0400
Subject: [PATCH 111/366] Fix broken links in 10min notebook (#19665)

Fixed some outdated links in the 10min to cuDF notebook.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19665
---
 docs/cudf/source/user_guide/10min.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index 87782cd7fb5..23c947ecf95 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -15,11 +15,11 @@
     "\n",
     "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n",
     "\n",
-    "[Dask cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed using cuDF GPU DataFrames instead of Pandas DataFrames. For instance, when you call `dask_cudf.read_csv(...)`, your cluster's GPUs do the work of parsing the CSV file(s) by calling [`cudf.read_csv()`](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.read_csv.html).\n",
+    "[Dask cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed using cuDF GPU DataFrames instead of Pandas DataFrames. For instance, when you call `dask_cudf.read_csv(...)`, your cluster's GPUs do the work of parsing the CSV file(s) by calling [`cudf.read_csv()`](https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/api/cudf.read_csv.html).\n",
     "\n",
     "\n",
     "<div class=\"alert alert-block alert-info\">\n",
-    "<b>Note:</b> This notebook uses the explicit Dask cuDF API (dask_cudf) for clarity. However, we strongly recommend that you use Dask's <a href=\"https://docs.dask.org/en/latest/configuration.html\">configuration infrastructure</a> to set the \"dataframe.backend\" option to \"cudf\", and work with the Dask DataFrame API directly. Please see the <a href=\"https://github.com/rapidsai/cudf/tree/main/python/dask_cudf\">Dask cuDF documentation</a> for more information.\n",
+    "<b>Note:</b> This notebook uses the explicit Dask cuDF API (dask_cudf) for clarity. However, we strongly recommend that you use Dask's <a href=\"https://docs.dask.org/en/stable/configuration.html\">configuration infrastructure</a> to set the \"dataframe.backend\" option to \"cudf\", and work with the Dask DataFrame API directly. Please see the <a href=\"https://github.com/rapidsai/cudf/tree/main/python/dask_cudf\">Dask cuDF documentation</a> for more information.\n",
     "</div>\n",
     "\n",
     "\n",
@@ -2572,7 +2572,7 @@
    "id": "fd3fc4f3",
    "metadata": {},
    "source": [
-    "Like pandas, cuDF provides string processing methods in the `str` attribute of `Series`. Full documentation of string methods is a work in progress. Please see the [cuDF API documentation](https://docs.rapids.ai/api/cudf/stable/api_docs/series.html#string-handling) for more information."
+    "Like pandas, cuDF provides string processing methods in the `str` attribute of `Series`. Full documentation of string methods is a work in progress. Please see the [cuDF API documentation](https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/series.html#string-handling) for more information."
    ]
   },
   {
@@ -2637,7 +2637,7 @@
    "id": "44fe1243",
    "metadata": {},
    "source": [
-    "As well as simple manipulation, We can also match strings using [regular expressions](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.core.column.string.StringMethods.match.html)."
+    "As well as simple manipulation, We can also match strings using [regular expressions](https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/api/cudf.core.accessors.string.StringMethods.match.html)."
    ]
   },
   {

From 5cd9ea07bbafa36288d5939fdc29d7d2dbe1cae8 Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Tue, 12 Aug 2025 21:23:35 +0100
Subject: [PATCH 112/366] [FEA] Remove excessive copies of JITIFY's ProgramData
 during JIT kernel launch (#19667)

This MR removes the excessive copies of the programdata, which was being copied on every kernel launch. The program data contains compressed un-compiled C++ code of CUDF's headers, which is very large.
This impacts both small and large columns/tables.

Follows-up https://github.com/rapidsai/cudf/issues/19625

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19667
---
 cpp/src/jit/cache.cpp | 4 ++--
 cpp/src/jit/cache.hpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/jit/cache.cpp b/cpp/src/jit/cache.cpp
index 70cf5f3150e..958d071ad1f 100644
--- a/cpp/src/jit/cache.cpp
+++ b/cpp/src/jit/cache.cpp
@@ -112,7 +112,7 @@ std::size_t try_parse_numeric_env_var(char const* const env_name, std::size_t de
 }
 }  // namespace
 
-jitify2::ProgramCache<>& jit::program_cache::get(jitify2::PreprocessedProgramData preprog)
+jitify2::ProgramCache<>& jit::program_cache::get(jitify2::PreprocessedProgramData const& preprog)
 {
   std::lock_guard<std::mutex> const caches_lock(_caches_mutex);
 
@@ -138,7 +138,7 @@ jitify2::ProgramCache<>& jit::program_cache::get(jitify2::PreprocessedProgramDat
   return *(existing_cache->second);
 }
 
-jitify2::ProgramCache<>& jit::get_program_cache(jitify2::PreprocessedProgramData preprog)
+jitify2::ProgramCache<>& jit::get_program_cache(jitify2::PreprocessedProgramData const& preprog)
 {
   return cudf::get_context().program_cache().get(preprog);
 }
diff --git a/cpp/src/jit/cache.hpp b/cpp/src/jit/cache.hpp
index da51e573679..1772134bb90 100644
--- a/cpp/src/jit/cache.hpp
+++ b/cpp/src/jit/cache.hpp
@@ -39,10 +39,10 @@ class program_cache {
   program_cache& operator=(program_cache&&)      = delete;
   ~program_cache()                               = default;
 
-  jitify2::ProgramCache<>& get(jitify2::PreprocessedProgramData preprog);
+  jitify2::ProgramCache<>& get(jitify2::PreprocessedProgramData const& preprog);
 };
 
-jitify2::ProgramCache<>& get_program_cache(jitify2::PreprocessedProgramData preprog);
+jitify2::ProgramCache<>& get_program_cache(jitify2::PreprocessedProgramData const& preprog);
 
 }  // namespace jit
 }  // namespace cudf

From 23b59a9b3f828ffb04f87f074c1b057fca5dbd4d Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 12 Aug 2025 17:22:38 -0400
Subject: [PATCH 113/366] Fix integer overflow in warp-per-row grid calculation
 (#19638)

Fixed int overflow for `size * warp_size` calculation for the number of threads given to `grid1d` utility in several strings and nvtext functions.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19638
---
 cpp/src/strings/attributes.cu       |  5 +++--
 cpp/src/strings/case.cu             |  3 ++-
 cpp/src/strings/like.cu             |  5 +++--
 cpp/src/strings/search/find.cu      | 12 +++++++-----
 cpp/src/text/vocabulary_tokenize.cu |  3 ++-
 cpp/src/text/wordpiece_tokenize.cu  |  5 +++--
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index ac1f32160ed..46360ee8663 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -152,8 +152,9 @@ std::unique_ptr<column> count_characters_parallel(strings_column_view const& inp
   auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
 
   // fill in the lengths
-  constexpr int block_size = 256;
-  cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size};
+  constexpr thread_index_type block_size = 256;
+  constexpr thread_index_type warp_size  = cudf::detail::warp_size;
+  cudf::detail::grid_1d grid{input.size() * warp_size, block_size};
   count_characters_parallel_fn<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
     *d_strings, d_lengths);
 
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index 490d8ce8a5c..6b3fe0a6cd6 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -456,7 +456,8 @@ std::unique_ptr<column> convert_case(strings_column_view const& input,
   // note: tried to use segmented-reduce approach instead here and it was consistently slower
   auto [offsets, bytes] = [&] {
     rmm::device_uvector<size_type> sizes(input.size(), stream);
-    auto grid = cudf::detail::grid_1d(input.size() * cudf::detail::warp_size, block_size);
+    constexpr thread_index_type warp_size = cudf::detail::warp_size;
+    auto grid = cudf::detail::grid_1d(input.size() * warp_size, block_size);
     count_bytes_kernel<bytes_per_thread>
       <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
         ccfn, *d_strings, sizes.data());
diff --git a/cpp/src/strings/like.cu b/cpp/src/strings/like.cu
index 508f5055b86..6d0c963fc74 100644
--- a/cpp/src/strings/like.cu
+++ b/cpp/src/strings/like.cu
@@ -343,8 +343,9 @@ std::unique_ptr<column> like(strings_column_view const& input,
                       like_fn{*d_strings, patterns_itr, d_escape});
   } else {
     // warp-parallel for longer strings
-    constexpr auto block_size = 512;
-    auto const grid = cudf::detail::grid_1d(input.size() * cudf::detail::warp_size, block_size);
+    constexpr thread_index_type block_size = 512;
+    constexpr thread_index_type warp_size  = cudf::detail::warp_size;
+    auto const grid = cudf::detail::grid_1d(input.size() * warp_size, block_size);
     like_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream>>>(
       *d_strings, patterns_itr, d_escape, results->mutable_view().data<bool>());
   }
diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu
index cb862bcdeda..2991a12235a 100644
--- a/cpp/src/strings/search/find.cu
+++ b/cpp/src/strings/search/find.cu
@@ -181,8 +181,9 @@ void find_utility(strings_column_view const& input,
   auto d_results = output.mutable_view().data<size_type>();
   if ((input.chars_size(stream) / (input.size() - input.null_count())) > AVG_CHAR_BYTES_THRESHOLD) {
     // warp-per-string runs faster for longer strings (but not shorter ones)
-    constexpr int block_size = 256;
-    cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size};
+    constexpr auto block_size             = 256;
+    constexpr thread_index_type warp_size = cudf::detail::warp_size;
+    cudf::detail::grid_1d grid{input.size() * warp_size, block_size};
     finder_warp_parallel_fn<TargetIterator, forward>
       <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
         *d_strings, target_itr, start, stop, d_results);
@@ -398,9 +399,10 @@ std::unique_ptr<column> contains_warp_parallel(strings_column_view const& input,
       rmm::exec_policy_nosync(stream), results_view.begin<bool>(), results_view.end<bool>(), true);
   } else {
     // launch warp per string
-    auto const d_strings     = column_device_view::create(input.parent(), stream);
-    constexpr int block_size = 256;
-    cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size};
+    auto const d_strings                   = column_device_view::create(input.parent(), stream);
+    constexpr thread_index_type block_size = 256;
+    constexpr thread_index_type warp_size  = cudf::detail::warp_size;
+    cudf::detail::grid_1d grid{input.size() * warp_size, block_size};
     contains_warp_parallel_fn<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
       *d_strings, d_target, results_view.data<bool>());
   }
diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu
index e8e65848913..15069a8f01b 100644
--- a/cpp/src/text/vocabulary_tokenize.cu
+++ b/cpp/src/text/vocabulary_tokenize.cu
@@ -420,7 +420,8 @@ std::unique_ptr<cudf::column> tokenize_with_vocabulary(cudf::strings_column_view
                        stream.value()>>>(d_input_chars, chars_size, d_delimiter, d_marks.data());
 
   // launch warp per string to compute token counts
-  cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size};
+  constexpr cudf::thread_index_type warp_size = cudf::detail::warp_size;
+  cudf::detail::grid_1d grid{input.size() * warp_size, block_size};
   token_counts_fn<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
     *d_strings, d_delimiter, d_token_counts.data(), d_marks.data());
   auto [token_offsets, total_count] = cudf::detail::make_offsets_child_column(
diff --git a/cpp/src/text/wordpiece_tokenize.cu b/cpp/src/text/wordpiece_tokenize.cu
index 02f9d38940c..b27bd379356 100644
--- a/cpp/src/text/wordpiece_tokenize.cu
+++ b/cpp/src/text/wordpiece_tokenize.cu
@@ -791,8 +791,9 @@ rmm::device_uvector<cudf::size_type> compute_some_tokens(
 
   // find start/end for each row up to max_words_per_row words;
   // store word positions in start_words and sizes in word_sizes
-  cudf::detail::grid_1d grid_find{input.size() * cudf::detail::warp_size, block_size};
-  find_words_kernel<cudf::detail::warp_size>
+  constexpr cudf::thread_index_type warp_size = cudf::detail::warp_size;
+  cudf::detail::grid_1d grid_find{input.size() * warp_size, block_size};
+  find_words_kernel<warp_size>
     <<<grid_find.num_blocks, grid_find.num_threads_per_block, 0, stream.value()>>>(
       *d_strings, d_input_chars, max_word_offsets.data(), start_words.data(), word_sizes.data());
 

From 6a7134c9a26168140eff7c2fdef9a701ae756d40 Mon Sep 17 00:00:00 2001
From: Jigao Luo <jigao.luo@outlook.com>
Date: Wed, 13 Aug 2025 00:43:11 +0200
Subject: [PATCH 114/366] Replace `rmm::device_scalar` with
 `cudf::detail::device_scalar` due to unnecessary synchronization (Part 3 of
 miss-sync) (#19119)

For issue #18967, this PR is one part of merging the PR Draft #18968. In this PR, almost all `rmm::device_scalar` calls in libcudf are replaced with `cudf::detail::device_scalar` due to its internal host-pinned bounce buffer.

This is also a call to action to use host-pinned memory globally in libcudf, with arguments stated in #18967 and #18968.

Authors:
  - Jigao Luo (https://github.com/JigaoLuo)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19119
---
 cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md    | 15 +++++++++++++++
 .../cudf/detail/sizes_to_offsets_iterator.cuh     |  6 +++---
 cpp/include/cudf/reduction/detail/reduction.cuh   |  7 +++----
 cpp/include/cudf_test/nanoarrow_utils.hpp         |  2 +-
 cpp/src/join/sort_merge_join.cu                   |  4 ++--
 .../iterator/sizes_to_offsets_iterator_test.cu    |  8 ++++----
 cpp/tests/scalar/scalar_device_view_test.cu       |  6 +++---
 7 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index 9c319b9048e..52e3a47cb9a 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -655,6 +655,21 @@ kernel<<<...>>>(int_scalar.data(),...);
 int host_value = int_scalar.value();
 ```
 
+##### cudf::detail::device_scalar<T>
+Acts as a drop-in replacement for `rmm::device_scalar<T>`, with the key difference
+being the use of pinned host memory as a bounce buffer for data transfers.
+It is recommended for internal use to avoid the implicit synchronization overhead caused by
+memcpy operations on pageable host memory.
+
+```c++
+// Same as the case with rmm::device_scalar<T> above
+cudf::detail::device_scalar<int> int_scalar{42, stream, mr};
+kernel<<<...>>>(int_scalar.data(),...);
+
+// Note: This device-to-host transfer uses host-pinned bounce buffer for efficient memcpy
+int host_value = int_scalar.value();
+```
+
 #### rmm::device_vector<T>
 
 Allocates a specified number of elements of the specified type. If no initialization value is
diff --git a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
index 67a2da31d82..8dc7213edbf 100644
--- a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
+++ b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
@@ -17,12 +17,12 @@
 #pragma once
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_scalar.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <cuda/functional>
@@ -203,7 +203,7 @@ struct sizes_to_offsets_iterator {
  *  auto begin = // begin input iterator
  *  auto end = // end input iterator
  *  auto result = rmm::device_uvector(std::distance(begin,end), stream);
- *  auto last = rmm::device_scalar<int64_t>(0, stream);
+ *  auto last = cudf::detail::device_scalar<int64_t>(0, stream);
  *  auto itr = make_sizes_to_offsets_iterator(result.begin(),
  *                                            result.end(),
  *                                            last.data());
@@ -270,7 +270,7 @@ auto sizes_to_offsets(SizesIterator begin,
                 "Only numeric types are supported by sizes_to_offsets");
 
   using LastType    = std::conditional_t<std::is_signed_v<SizeType>, int64_t, uint64_t>;
-  auto last_element = rmm::device_scalar<LastType>(0, stream);
+  auto last_element = cudf::detail::device_scalar<LastType>(0, stream);
   auto output_itr =
     make_sizes_to_offsets_iterator(result, result + std::distance(begin, end), last_element.data());
   // This function uses the type of the initialization parameter as the accumulator type
diff --git a/cpp/include/cudf/reduction/detail/reduction.cuh b/cpp/include/cudf/reduction/detail/reduction.cuh
index 6b15c8fb4c0..89eb49e8e8a 100644
--- a/cpp/include/cudf/reduction/detail/reduction.cuh
+++ b/cpp/include/cudf/reduction/detail/reduction.cuh
@@ -19,13 +19,13 @@
 #include "reduction_operators.cuh"
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/utilities/cast_functor.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
-#include <rmm/device_scalar.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <cub/device/device_reduce.cuh>
@@ -123,7 +123,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
 {
   auto const binary_op     = cudf::detail::cast_functor<OutputType>(op.get_binary_op());
   auto const initial_value = init.value_or(op.template get_identity<OutputType>());
-  auto dev_result          = rmm::device_scalar<OutputType>{initial_value, stream};
+  auto dev_result          = cudf::detail::device_scalar<OutputType>{initial_value, stream};
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
@@ -167,7 +167,6 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
  * @param op          the reduction operator
  * @param valid_count Number of valid items
  * @param ddof        Delta degrees of freedom used for standard deviation and variance
- * @param init        Optional initial value of the reduction
  * @param stream      CUDA stream used for device memory operations and kernel launches
  * @param mr          Device memory resource used to allocate the returned scalar's device memory
  * @returns Output scalar in device memory
@@ -187,7 +186,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
   auto const binary_op     = cudf::detail::cast_functor<IntermediateType>(op.get_binary_op());
   auto const initial_value = op.template get_identity<IntermediateType>();
 
-  rmm::device_scalar<IntermediateType> intermediate_result{initial_value, stream};
+  cudf::detail::device_scalar<IntermediateType> intermediate_result{initial_value, stream};
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
diff --git a/cpp/include/cudf_test/nanoarrow_utils.hpp b/cpp/include/cudf_test/nanoarrow_utils.hpp
index faeaea9e1d9..4a014450576 100644
--- a/cpp/include/cudf_test/nanoarrow_utils.hpp
+++ b/cpp/include/cudf_test/nanoarrow_utils.hpp
@@ -160,7 +160,7 @@ std::enable_if_t<std::is_same_v<T, cudf::string_view>, void> populate_from_col(
     ArrowArrayBuffer(arr, 2)->size_bytes = sview.chars_size(cudf::get_default_stream());
     ArrowArrayBuffer(arr, 2)->data       = const_cast<uint8_t*>(view.data<uint8_t>());
   } else {
-    auto zero          = rmm::device_scalar<int32_t>(0, cudf::get_default_stream());
+    auto zero          = cudf::detail::device_scalar<int32_t>(0, cudf::get_default_stream());
     uint8_t const* ptr = reinterpret_cast<uint8_t*>(zero.data());
     nanoarrow::BufferInitWrapped(ArrowArrayBuffer(arr, 1), std::move(zero), ptr, 4);
   }
diff --git a/cpp/src/join/sort_merge_join.cu b/cpp/src/join/sort_merge_join.cu
index bc81d2577f1..ec015c704b2 100644
--- a/cpp/src/join/sort_merge_join.cu
+++ b/cpp/src/join/sort_merge_join.cu
@@ -174,8 +174,8 @@ merge<LargerIterator, SmallerIterator>::matches_per_row(rmm::cuda_stream_view st
 
   // naive: iterate through larger table and binary search on smaller table
   auto const larger_numrows = larger.num_rows();
-  rmm::device_scalar<bound_type> d_lb_type(bound_type::LOWER, stream, temp_mr);
-  rmm::device_scalar<bound_type> d_ub_type(bound_type::UPPER, stream, temp_mr);
+  cudf::detail::device_scalar<bound_type> d_lb_type(bound_type::LOWER, stream, temp_mr);
+  cudf::detail::device_scalar<bound_type> d_ub_type(bound_type::UPPER, stream, temp_mr);
 
   auto match_counts =
     cudf::detail::make_zeroed_device_uvector_async<size_type>(larger_numrows + 1, stream, temp_mr);
diff --git a/cpp/tests/iterator/sizes_to_offsets_iterator_test.cu b/cpp/tests/iterator/sizes_to_offsets_iterator_test.cu
index 3b412b76dde..3baf0187b56 100644
--- a/cpp/tests/iterator/sizes_to_offsets_iterator_test.cu
+++ b/cpp/tests/iterator/sizes_to_offsets_iterator_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,10 +17,10 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/sizes_to_offsets_iterator.cuh>
 #include <cudf/utilities/default_stream.hpp>
 
-#include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
@@ -48,7 +48,7 @@ TYPED_TEST(SizesToOffsetsIteratorTestTyped, ExclusiveScan)
   auto d_col  = cudf::test::fixed_width_column_wrapper<T>(sizes.begin(), sizes.end());
   auto d_view = cudf::column_view(d_col);
 
-  auto last   = rmm::device_scalar<LastType>(0, stream);
+  auto last   = cudf::detail::device_scalar<LastType>(0, stream);
   auto result = rmm::device_uvector<T>(d_view.size(), stream);
   auto output_itr =
     cudf::detail::make_sizes_to_offsets_iterator(result.begin(), result.end(), last.data());
@@ -80,7 +80,7 @@ TEST_F(SizesToOffsetsIteratorTest, ScanWithOverflow)
   auto d_col  = cudf::test::fixed_width_column_wrapper<int32_t>(values.begin(), values.end());
   auto d_view = cudf::column_view(d_col);
 
-  auto last   = rmm::device_scalar<int64_t>(0, stream);
+  auto last   = cudf::detail::device_scalar<int64_t>(0, stream);
   auto result = rmm::device_uvector<int32_t>(d_view.size(), stream);
   auto output_itr =
     cudf::detail::make_sizes_to_offsets_iterator(result.begin(), result.end(), last.data());
diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu
index d64a8f4418c..8f55cd6274f 100644
--- a/cpp/tests/scalar/scalar_device_view_test.cu
+++ b/cpp/tests/scalar/scalar_device_view_test.cu
@@ -59,7 +59,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, Value)
 
   auto scalar_device_view  = cudf::get_scalar_device_view(s);
   auto scalar_device_view1 = cudf::get_scalar_device_view(s1);
-  rmm::device_scalar<bool> result{cudf::get_default_stream()};
+  cudf::detail::device_scalar<bool> result{cudf::get_default_stream()};
 
   test_set_value<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view,
                                                                   scalar_device_view1);
@@ -86,7 +86,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, ConstructNull)
   TypeParam value = cudf::test::make_type_param_scalar<TypeParam>(5);
   cudf::scalar_type_t<TypeParam> s(value, false);
   auto scalar_device_view = cudf::get_scalar_device_view(s);
-  rmm::device_scalar<bool> result{cudf::get_default_stream()};
+  cudf::detail::device_scalar<bool> result{cudf::get_default_stream()};
 
   test_null<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view, result.data());
   CUDF_CHECK_CUDA(0);
@@ -130,7 +130,7 @@ TEST_F(StringScalarDeviceViewTest, Value)
   cudf::string_scalar s(value);
 
   auto scalar_device_view = cudf::get_scalar_device_view(s);
-  rmm::device_scalar<bool> result{cudf::get_default_stream()};
+  cudf::detail::device_scalar<bool> result{cudf::get_default_stream()};
   auto value_v = cudf::detail::make_device_uvector(
     value, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
 

From fa49ec5fd1c17520734952e91c0b5306b285c9d6 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 12 Aug 2025 21:24:46 -0400
Subject: [PATCH 115/366] [BUG] Set `query_set` arg when validating/running
 cudf-polars PDS-DS benchmarks (#19674)

Follows up https://github.com/rapidsai/cudf/pull/19631. I didn't set the set `query_set` (ie. the benchmark name `pdsh` or `pdsds`) arg when running the PDS-DS benchmarks in validation mode (ie. `--engine validate`) or the DuckDB benchmarks (ie. `--engine duckdb`). Therefore we'd get this error
```
$ python python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py --engine duckdb 12 --root tpcds_parquet --scale 1.0
Traceback (most recent call last):
  File "/home/coder/cudf/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py", line 218, in <module>
    run_duckdb(PDSDSDuckDBQueries, extra_args)
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/coder/cudf/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py", line 109, in run_duckdb
    run_config = RunConfig.from_args(args)
  File "/home/coder/cudf/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py", line 216, in from_args
    name = args.query_set
           ^^^^^^^^^^^^^^
AttributeError: 'Namespace' object has no attribute 'query_set'
```
This PR fixes this error.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19674
---
 .../cudf_polars/experimental/benchmarks/pdsds.py      | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py
index b9daef51ec4..75b56991a55 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds.py
@@ -35,6 +35,7 @@
 if TYPE_CHECKING:
     from collections.abc import Sequence
     from types import ModuleType
+    from typing import Any
 
 # Without this setting, the first IO task to run
 # on each worker takes ~15 sec extra
@@ -101,9 +102,10 @@ def execute_duckdb_query(query: str, dataset_path: Path) -> pl.DataFrame:
     return conn.execute("\n".join(statements)).pl()
 
 
-def run_duckdb(options: Sequence[str] | None = None) -> None:
+def run_duckdb(benchmark: Any, options: Sequence[str] | None = None) -> None:
     """Run the benchmark with DuckDB."""
     args = parse_args(options, num_queries=99)
+    vars(args).update({"query_set": benchmark.name})
     run_config = RunConfig.from_args(args)
     records: defaultdict[int, list[Record]] = defaultdict(list)
 
@@ -130,11 +132,12 @@ def run_duckdb(options: Sequence[str] | None = None) -> None:
             records[q_id].append(record)
 
 
-def run_validate(options: Sequence[str] | None = None) -> None:
+def run_validate(benchmark: Any, options: Sequence[str] | None = None) -> None:
     """Validate Polars CPU vs DuckDB or Polars GPU."""
     from polars.testing import assert_frame_equal
 
     args = parse_args(options, num_queries=99)
+    vars(args).update({"query_set": benchmark.name})
     run_config = RunConfig.from_args(args)
 
     baseline = args.baseline
@@ -212,6 +215,6 @@ def run_validate(options: Sequence[str] | None = None) -> None:
     if args.engine == "polars":
         run_polars(PDSDSPolarsQueries, extra_args, num_queries=99)
     elif args.engine == "duckdb":
-        run_duckdb(extra_args)
+        run_duckdb(PDSDSDuckDBQueries, extra_args)
     elif args.engine == "validate":
-        run_validate(extra_args)
+        run_validate(PDSDSQueries, extra_args)

From b76565b7d01d07961f27d6408493c2cdcd1ade5e Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 12 Aug 2025 20:34:17 -0500
Subject: [PATCH 116/366] Update to numba-cuda>=0.18.0,<0.19.0 (#19604)

Updates to `numba-cuda >=0.18.0,<0.19.0`. Drops dependency on `pynvjitlink`. Updates cuda-python pinning to `>=12.9.1,<13.0.0a0` to get cuda-bindings support and fix for a segfault with Python 3.13.

Authors:
  - Bradley Dice (https://github.com/bdice)
  - https://github.com/brandon-b-miller

Approvers:
  - https://github.com/brandon-b-miller
  - Graham Markall (https://github.com/gmarkall)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19604
---
 .../all_cuda-129_arch-aarch64.yaml            |  7 ++--
 .../all_cuda-129_arch-x86_64.yaml             |  7 ++--
 conda/recipes/cudf/recipe.yaml                | 14 ++-----
 conda/recipes/pylibcudf/recipe.yaml           |  2 +-
 dependencies.yaml                             | 39 +++++++++----------
 python/cudf/cudf/core/udf/strings_lowering.py |  4 +-
 python/cudf/pyproject.toml                    |  9 ++---
 python/cudf/udf_cpp/shim.cu                   |  8 ++++
 python/pylibcudf/pyproject.toml               |  2 +-
 9 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 0e95832dddd..b53ce1f224e 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -20,7 +20,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.6.2,<13.0a0
+- cuda-python>=12.9.1,<13.0a0
 - cuda-sanitizer-api
 - cuda-version=12.9
 - cupy>=12.0.0
@@ -54,8 +54,8 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba-cuda>=0.14.0,<0.15.0a0
-- numba>=0.59.1,<0.62.0a0
+- numba-cuda>=0.18.0,<0.19.0a0
+- numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
@@ -69,7 +69,6 @@ dependencies:
 - pre-commit
 - pyarrow>=14.0.0,<20.0.0a0
 - pydata-sphinx-theme>=0.15.4
-- pynvjitlink>=0.0.0a0
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index e96b8d81953..4d1af2746ac 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -20,7 +20,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.6.2,<13.0a0
+- cuda-python>=12.9.1,<13.0a0
 - cuda-sanitizer-api
 - cuda-version=12.9
 - cupy>=12.0.0
@@ -55,8 +55,8 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba-cuda>=0.14.0,<0.15.0a0
-- numba>=0.59.1,<0.62.0a0
+- numba-cuda>=0.18.0,<0.19.0a0
+- numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
@@ -70,7 +70,6 @@ dependencies:
 - pre-commit
 - pyarrow>=14.0.0,<20.0.0a0
 - pydata-sphinx-theme>=0.15.4
-- pynvjitlink>=0.0.0a0
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 38e32b5c1f2..3f69e8dcd2e 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -55,7 +55,7 @@ requirements:
     - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - scikit-build-core >=0.10.0
     - dlpack >=0.8,<1.0
-    - numba-cuda >=0.14.0,<0.15.0a0
+    - numba-cuda >=0.18.0,<0.19.0a0
     - libcudf =${{ version }}
     - pylibcudf =${{ version }}
     - rmm =${{ minor_version }}
@@ -70,8 +70,8 @@ requirements:
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.4.0dev0
     - cupy >=12.0.0
-    - numba-cuda >=0.14.0,<0.15.0a0
-    - numba >=0.59.1,<0.62.0a0
+    - numba-cuda >=0.18.0,<0.19.0a0
+    - numba >=0.60.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<20.0.0a0
     - libcudf =${{ version }}
@@ -79,13 +79,7 @@ requirements:
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
     - cuda-cudart
-    # Needed by Numba for CUDA support
-    - cuda-nvcc-impl
-    # TODO: Add nvjitlink here
-    # xref: https://github.com/rapidsai/cudf/issues/12822
-    - cuda-nvrtc
-    - cuda-python >=12.6.2,<13.0a0
-    - pynvjitlink
+    - cuda-python >=12.9.1,<13.0a0
     - if: linux and x86_64
       then:
         - libcufile
diff --git a/conda/recipes/pylibcudf/recipe.yaml b/conda/recipes/pylibcudf/recipe.yaml
index 2d2cf0a630f..6ba6e189d0f 100644
--- a/conda/recipes/pylibcudf/recipe.yaml
+++ b/conda/recipes/pylibcudf/recipe.yaml
@@ -72,7 +72,7 @@ requirements:
     - libcudf =${{ version }}
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
-    - cuda-python >=12.6.2,<13.0a0
+    - cuda-python >=12.9.1,<13.0a0
     - nvtx >=0.2.1
     - packaging
   ignore_run_exports:
diff --git a/dependencies.yaml b/dependencies.yaml
index 7b240f5bc84..504a2b81f96 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -486,9 +486,19 @@ dependencies:
           - cython>=3.0.3
   build_python_cudf:
     common:
-      - output_types: [conda, requirements, pyproject]
+      - output_types: [conda]
         packages:
-        - &numba_cuda numba-cuda>=0.14.0,<0.15.0a0
+        - &numba_cuda numba-cuda>=0.18.0,<0.19.0a0
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - &numba_cuda_cu12 numba-cuda[cu12]>=0.18.0,<0.19.0a0
+          - matrix: # Fallback for no matrix
+            packages:
+              - *numba_cuda_cu12
   pyarrow_run:
     common:
       - output_types: [conda]
@@ -646,14 +656,14 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages: &run_pylibcudf_packages_all_cu12
-              - cuda-python>=12.6.2,<13.0a0
+              - cuda-python>=12.9.1,<13.0a0
           - {matrix: null, packages: *run_pylibcudf_packages_all_cu12}
   run_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cachetools
-          - &numba numba>=0.59.1,<0.62.0a0
+          - &numba numba>=0.60.0,<0.62.0a0
           - nvtx>=0.2.1
           - packaging
           - rich
@@ -671,18 +681,13 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages: &run_cudf_packages_all_cu12
-              - cuda-python>=12.6.2,<13.0a0
+              - cuda-python>=12.9.1,<13.0a0
           - {matrix: null, packages: *run_cudf_packages_all_cu12}
-      - output_types: conda
-        matrices:
-          - matrix: {cuda: "12.*"}
-            packages:
-              - &pynvjitlink_unsuffixed pynvjitlink>=0.0.0a0
       - output_types: [requirements, pyproject]
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - &numba_cuda_cu12 numba-cuda[cu12]>=0.14.0,<0.15.0a0
+              - *numba_cuda_cu12
           - matrix: # Fallback for no matrix
             packages:
               - *numba_cuda_cu12
@@ -692,15 +697,9 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - pynvjitlink-cu12>=0.0.0a0
               - nvidia-cuda-nvcc-cu12
               - nvidia-cuda-nvrtc-cu12
-          - matrix:
-              cuda: "12.*"
-              cuda_suffixed: "false"
-            packages: &run_cudf_cu12_unsuffixed
-              - *pynvjitlink_unsuffixed
-          - {matrix: null, packages: *run_cudf_cu12_unsuffixed}
+          - {matrix: null, packages: []}
   run_cudf_polars:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -782,9 +781,9 @@ dependencies:
         matrices:
           - matrix: {dependencies: "oldest"}
             packages:
-              - numba==0.59.1
-              - numba-cuda==0.14.0
+              - numba==0.60.0
               - pandas==2.0.*
+              - numba-cuda==0.18.0
           - matrix: {dependencies: "latest"}
             packages:
               - pandas==2.3.1
diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
index 4cb755785e8..61f69cb8c71 100644
--- a/python/cudf/cudf/core/udf/strings_lowering.py
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -267,7 +267,9 @@ def decref_managed_udf_string(context, builder, sig, args):
         context, builder, value=managed_ptr
     )
     fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.IntType(8))])
-    fn = cgutils.get_or_insert_function(builder.module, fnty, "NRT_decref")
+    fn = cgutils.get_or_insert_function(
+        builder.module, fnty, "NRT_decref_managed_string"
+    )
     builder.call(fn, (managed.meminfo,))
 
     return
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 8e770112a67..6cb02397aed 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -19,12 +19,12 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cachetools",
-    "cuda-python>=12.6.2,<13.0a0",
+    "cuda-python>=12.9.1,<13.0a0",
     "cupy-cuda12x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==25.10.*,>=0.0.0a0",
-    "numba-cuda[cu12]>=0.14.0,<0.15.0a0",
-    "numba>=0.59.1,<0.62.0a0",
+    "numba-cuda[cu12]>=0.18.0,<0.19.0a0",
+    "numba>=0.60.0,<0.62.0a0",
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
@@ -32,7 +32,6 @@ dependencies = [
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
     "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
     "pylibcudf==25.10.*,>=0.0.0a0",
-    "pynvjitlink>=0.0.0a0",
     "rich",
     "rmm==25.10.*,>=0.0.0a0",
     "typing_extensions>=4.0.0",
@@ -126,7 +125,7 @@ requires = [
     "libcudf==25.10.*,>=0.0.0a0",
     "librmm==25.10.*,>=0.0.0a0",
     "ninja",
-    "numba-cuda>=0.14.0,<0.15.0a0",
+    "numba-cuda[cu12]>=0.18.0,<0.19.0a0",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "rmm==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cudf/udf_cpp/shim.cu b/python/cudf/udf_cpp/shim.cu
index 535358bfc9a..d6f84a23299 100644
--- a/python/cudf/udf_cpp/shim.cu
+++ b/python/cudf/udf_cpp/shim.cu
@@ -75,6 +75,14 @@ __device__ NRT_MemInfo* make_meminfo_for_new_udf_string(udf_string* udf_str)
   }
 }
 
+// Special decref called only by python after transferring ownership of output strings
+// Must reset dtor with one that is part of the current module
+extern "C" __device__ void NRT_decref_managed_string(NRT_MemInfo* mi)
+{
+  mi->dtor = udf_str_dtor;
+  NRT_decref(mi);
+}
+
 extern "C" __device__ int len(int* nb_retval, void const* str)
 {
   auto sv    = reinterpret_cast<cudf::string_view const*>(str);
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 85d8693b1c3..0c7f89111e3 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=12.6.2,<13.0a0",
+    "cuda-python>=12.9.1,<13.0a0",
     "libcudf==25.10.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",

From 05c2bca3f82527e2145a95366523310613c1b876 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 12 Aug 2025 18:40:20 -0700
Subject: [PATCH 117/366] Add streams to sorting APIs (#19671)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19671
---
 .../pylibcudf/pylibcudf/libcudf/sorting.pxd   |  34 ++++--
 python/pylibcudf/pylibcudf/sorting.pxd        |  27 +++--
 python/pylibcudf/pylibcudf/sorting.pyi        |  22 +++-
 python/pylibcudf/pylibcudf/sorting.pyx        | 102 +++++++++++++++---
 4 files changed, 150 insertions(+), 35 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
index a5fc13dc90e..0fcda9320dc 100644
--- a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd
@@ -16,19 +16,22 @@ from pylibcudf.libcudf.types cimport (
     null_order,
     size_type
 )
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] sorted_order(
         table_view source_table,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] stable_sorted_order(
         table_view source_table,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rank(
@@ -37,12 +40,15 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         order column_order,
         null_policy null_handling,
         null_order null_precedence,
-        bool percentage) except +libcudf_exception_handler
+        bool percentage,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
 
     cdef bool is_sorted(
         const table_view& table,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] segmented_sort_by_key(
@@ -50,7 +56,8 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const table_view& keys,
         const column_view& segment_offsets,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_segmented_sort_by_key(
@@ -58,43 +65,50 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
         const table_view& keys,
         const column_view& segment_offsets,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] sort_by_key(
         const table_view& values,
         const table_view& keys,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_sort_by_key(
         const table_view& values,
         const table_view& keys,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] sort(
         table_view source_table,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] stable_sort(
         table_view source_table,
         vector[order] column_order,
-        vector[null_order] null_precedence
+        vector[null_order] null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] top_k(
         const column_view& col,
         size_type k,
         order sort_order,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] top_k_order(
         const column_view& col,
         size_type k,
         order sort_order,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/sorting.pxd b/python/pylibcudf/pylibcudf/sorting.pxd
index 91f8354f965..2262238c966 100644
--- a/python/pylibcudf/pylibcudf/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/sorting.pxd
@@ -3,17 +3,21 @@
 from libcpp cimport bool
 from pylibcudf.libcudf.aggregation cimport rank_method
 from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
 
 
-cpdef Column sorted_order(Table source_table, list column_order, list null_precedence)
+cpdef Column sorted_order(
+    Table source_table, list column_order, list null_precedence, Stream stream=*
+)
 
 cpdef Column stable_sorted_order(
     Table source_table,
     list column_order,
     list null_precedence,
+    Stream stream=*
 )
 
 cpdef Column rank(
@@ -23,9 +27,12 @@ cpdef Column rank(
     null_policy null_handling,
     null_order null_precedence,
     bool percentage,
+    Stream stream=*
 )
 
-cpdef bool is_sorted(Table table, list column_order, list null_precedence)
+cpdef bool is_sorted(
+    Table table, list column_order, list null_precedence, Stream stream=*
+)
 
 cpdef Table segmented_sort_by_key(
     Table values,
@@ -33,6 +40,7 @@ cpdef Table segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
+    Stream stream=*
 )
 
 cpdef Table stable_segmented_sort_by_key(
@@ -41,6 +49,7 @@ cpdef Table stable_segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
+    Stream stream=*
 )
 
 cpdef Table sort_by_key(
@@ -48,6 +57,7 @@ cpdef Table sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
+    Stream stream=*
 )
 
 cpdef Table stable_sort_by_key(
@@ -55,12 +65,17 @@ cpdef Table stable_sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
+    Stream stream=*
 )
 
-cpdef Table sort(Table source_table, list column_order, list null_precedence)
+cpdef Table sort(
+    Table source_table, list column_order, list null_precedence, Stream stream=*
+)
 
-cpdef Table stable_sort(Table source_table, list column_order, list null_precedence)
+cpdef Table stable_sort(
+    Table source_table, list column_order, list null_precedence, Stream stream=*
+)
 
-cpdef Column top_k(Column col, size_type k, order sort_order = *)
+cpdef Column top_k(Column col, size_type k, order sort_order=*, Stream stream=*)
 
-cpdef Column top_k_order(Column col, size_type k, order sort_order = *)
+cpdef Column top_k_order(Column col, size_type k, order sort_order=*, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi
index 07ad962d0ce..4ff529631e3 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyi
+++ b/python/pylibcudf/pylibcudf/sorting.pyi
@@ -1,5 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.aggregation import RankMethod
 from pylibcudf.column import Column
 from pylibcudf.table import Table
@@ -9,11 +11,13 @@ def sorted_order(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Column: ...
 def stable_sorted_order(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Column: ...
 def rank(
     input_view: Column,
@@ -22,9 +26,13 @@ def rank(
     null_handling: NullPolicy,
     null_precedence: NullOrder,
     percentage: bool,
+    stream: Stream | None = None,
 ) -> Column: ...
 def is_sorted(
-    tbl: Table, column_order: list[Order], null_precedence: list[NullOrder]
+    tbl: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> bool: ...
 def segmented_sort_by_key(
     values: Table,
@@ -32,6 +40,7 @@ def segmented_sort_by_key(
     segment_offsets: Column,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
 def stable_segmented_sort_by_key(
     values: Table,
@@ -39,34 +48,43 @@ def stable_segmented_sort_by_key(
     segment_offsets: Column,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
 def sort_by_key(
     values: Table,
     keys: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
 def stable_sort_by_key(
     values: Table,
     keys: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
 def sort(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
 def stable_sort(
     source_table: Table,
     column_order: list[Order],
     null_precedence: list[NullOrder],
+    stream: Stream | None = None,
 ) -> Table: ...
 def top_k(
     col: Column,
     k: int,
     sort_order: Order = Order.DESCENDING,
+    stream: Stream | None = None,
 ) -> Column: ...
 def top_k_order(
-    col: Column, k: int, sort_order: Order = Order.DESCENDING
+    col: Column,
+    k: int,
+    sort_order: Order = Order.DESCENDING,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx
index 31efc018d6d..a1f28b7762b 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyx
+++ b/python/pylibcudf/pylibcudf/sorting.pyx
@@ -8,9 +8,11 @@ from pylibcudf.libcudf.aggregation cimport rank_method
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "is_sorted",
@@ -25,7 +27,9 @@ __all__ = [
     "stable_sorted_order",
 ]
 
-cpdef Column sorted_order(Table source_table, list column_order, list null_precedence):
+cpdef Column sorted_order(
+    Table source_table, list column_order, list null_precedence, Stream stream=None
+):
     """Computes the row indices required to sort the table.
 
     For details, see :cpp:func:`sorted_order`.
@@ -47,19 +51,24 @@ cpdef Column sorted_order(Table source_table, list column_order, list null_prece
     cdef unique_ptr[column] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.sorted_order(
             source_table.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column stable_sorted_order(
     Table source_table,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Computes the row indices required to sort the table,
     preserving order of equal elements.
@@ -83,13 +92,17 @@ cpdef Column stable_sorted_order(
     cdef unique_ptr[column] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.stable_sorted_order(
             source_table.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column rank(
@@ -99,6 +112,7 @@ cpdef Column rank(
     null_policy null_handling,
     null_order null_precedence,
     bool percentage,
+    Stream stream=None
 ):
     """Computes the rank of each element in the column.
 
@@ -125,6 +139,9 @@ cpdef Column rank(
         The rank of each element in the column.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.rank(
             input_view.view(),
@@ -133,11 +150,14 @@ cpdef Column rank(
             null_handling,
             null_precedence,
             percentage,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef bool is_sorted(Table tbl, list column_order, list null_precedence):
+cpdef bool is_sorted(
+    Table tbl, list column_order, list null_precedence, Stream stream=None
+):
     """Checks if the table is sorted.
 
     For details, see :cpp:func:`is_sorted`.
@@ -159,11 +179,15 @@ cpdef bool is_sorted(Table tbl, list column_order, list null_precedence):
     cdef bool c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.is_sorted(
             tbl.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
     return c_result
 
@@ -174,6 +198,7 @@ cpdef Table segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Sorts the table by key, within segments.
 
@@ -200,6 +225,9 @@ cpdef Table segmented_sort_by_key(
     cdef unique_ptr[table] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.segmented_sort_by_key(
             values.view(),
@@ -207,8 +235,9 @@ cpdef Table segmented_sort_by_key(
             segment_offsets.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Table stable_segmented_sort_by_key(
@@ -217,6 +246,7 @@ cpdef Table stable_segmented_sort_by_key(
     Column segment_offsets,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Sorts the table by key preserving order of equal elements,
     within segments.
@@ -244,6 +274,9 @@ cpdef Table stable_segmented_sort_by_key(
     cdef unique_ptr[table] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.stable_segmented_sort_by_key(
             values.view(),
@@ -251,8 +284,9 @@ cpdef Table stable_segmented_sort_by_key(
             segment_offsets.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Table sort_by_key(
@@ -260,6 +294,7 @@ cpdef Table sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Sorts the table by key.
 
@@ -284,14 +319,18 @@ cpdef Table sort_by_key(
     cdef unique_ptr[table] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.sort_by_key(
             values.view(),
             keys.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef Table stable_sort_by_key(
@@ -299,6 +338,7 @@ cpdef Table stable_sort_by_key(
     Table keys,
     list column_order,
     list null_precedence,
+    Stream stream=None
 ):
     """Sorts the table by key preserving order of equal elements.
 
@@ -323,17 +363,23 @@ cpdef Table stable_sort_by_key(
     cdef unique_ptr[table] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.stable_sort_by_key(
             values.view(),
             keys.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Table sort(Table source_table, list column_order, list null_precedence):
+cpdef Table sort(
+    Table source_table, list column_order, list null_precedence, Stream stream=None
+):
     """Sorts the table.
 
     For details, see :cpp:func:`sort`.
@@ -355,16 +401,22 @@ cpdef Table sort(Table source_table, list column_order, list null_precedence):
     cdef unique_ptr[table] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.sort(
             source_table.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Table stable_sort(Table source_table, list column_order, list null_precedence):
+cpdef Table stable_sort(
+    Table source_table, list column_order, list null_precedence, Stream stream=None
+):
     """Sorts the table preserving order of equal elements.
 
     For details, see :cpp:func:`stable_sort`.
@@ -386,16 +438,22 @@ cpdef Table stable_sort(Table source_table, list column_order, list null_precede
     cdef unique_ptr[table] c_result
     cdef vector[order] c_orders = column_order
     cdef vector[null_order] c_null_precedence = null_precedence
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.stable_sort(
             source_table.view(),
             c_orders,
             c_null_precedence,
+            stream.view()
         )
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Column top_k(Column col, size_type k, order sort_order = order.DESCENDING):
+cpdef Column top_k(
+    Column col, size_type k, order sort_order = order.DESCENDING, Stream stream=None
+):
     """
     Computes the top-k values of a column.
 
@@ -417,16 +475,22 @@ cpdef Column top_k(Column col, size_type k, order sort_order = order.DESCENDING)
         A column of the top ``k`` elements from the input.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.top_k(
             col.view(),
             k,
             sort_order,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column top_k_order(Column col, size_type k, order sort_order = order.DESCENDING):
+cpdef Column top_k_order(
+    Column col, size_type k, order sort_order = order.DESCENDING, Stream stream=None
+):
     """
     Computes the indices of the top-k values of a column.
 
@@ -451,10 +515,14 @@ cpdef Column top_k_order(Column col, size_type k, order sort_order = order.DESCE
         A column of the indices of the top ``k`` elements.
     """
     cdef unique_ptr[column] c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_sorting.top_k_order(
             col.view(),
             k,
             sort_order,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)

From 97a3014bf4f7569fd8df373040a7cb2e09d2bd65 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 12 Aug 2025 18:43:56 -0700
Subject: [PATCH 118/366] Add streams to pylibcudf join APIs (#19672)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19672
---
 python/pylibcudf/pylibcudf/join.pxd         |  40 ++--
 python/pylibcudf/pylibcudf/join.pyi         |  61 +++++-
 python/pylibcudf/pylibcudf/join.pyx         | 210 +++++++++++++++-----
 python/pylibcudf/pylibcudf/libcudf/join.pxd |  56 ++++--
 4 files changed, 278 insertions(+), 89 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd
index bb9162b466a..e5aa8be2261 100644
--- a/python/pylibcudf/pylibcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/join.pxd
@@ -1,6 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.libcudf.types cimport null_equality
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .expressions cimport Expression
@@ -10,63 +11,73 @@ from .table cimport Table
 cpdef tuple inner_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef tuple left_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef tuple full_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef Column left_semi_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef Column left_anti_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
-cpdef Table cross_join(Table left, Table right)
+cpdef Table cross_join(Table left, Table right, Stream stream=*)
 
 cpdef tuple conditional_inner_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=*
 )
 
 cpdef tuple conditional_left_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=*
 )
 
 cpdef tuple conditional_full_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=*
 )
 
 cpdef Column conditional_left_semi_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=*
 )
 
 cpdef Column conditional_left_anti_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=*
 )
 
 cpdef tuple mixed_inner_join(
@@ -75,7 +86,8 @@ cpdef tuple mixed_inner_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef tuple mixed_left_join(
@@ -84,7 +96,8 @@ cpdef tuple mixed_left_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef tuple mixed_full_join(
@@ -93,7 +106,8 @@ cpdef tuple mixed_full_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef Column mixed_left_semi_join(
@@ -102,7 +116,8 @@ cpdef Column mixed_left_semi_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
 
 cpdef Column mixed_left_anti_join(
@@ -111,5 +126,6 @@ cpdef Column mixed_left_anti_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi
index f34357baa67..5008bbd2a94 100644
--- a/python/pylibcudf/pylibcudf/join.pyi
+++ b/python/pylibcudf/pylibcudf/join.pyi
@@ -1,40 +1,74 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
 from pylibcudf.table import Table
 from pylibcudf.types import NullEquality
 
 def inner_join(
-    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+    left_keys: Table,
+    right_keys: Table,
+    nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def left_join(
-    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+    left_keys: Table,
+    right_keys: Table,
+    nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def full_join(
-    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+    left_keys: Table,
+    right_keys: Table,
+    nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def left_semi_join(
-    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+    left_keys: Table,
+    right_keys: Table,
+    nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> Column: ...
 def left_anti_join(
-    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+    left_keys: Table,
+    right_keys: Table,
+    nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> Column: ...
-def cross_join(left: Table, right: Table) -> Table: ...
+def cross_join(
+    left: Table, right: Table, stream: Stream | None = None
+) -> Table: ...
 def conditional_inner_join(
-    left: Table, right: Table, binary_predicate: Expression
+    left: Table,
+    right: Table,
+    binary_predicate: Expression,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_left_join(
-    left: Table, right: Table, binary_predicate: Expression
+    left: Table,
+    right: Table,
+    binary_predicate: Expression,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_full_join(
-    left: Table, right: Table, binary_predicate: Expression
+    left: Table,
+    right: Table,
+    binary_predicate: Expression,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_left_semi_join(
-    left: Table, right: Table, binary_predicate: Expression
+    left: Table,
+    right: Table,
+    binary_predicate: Expression,
+    stream: Stream | None = None,
 ) -> Column: ...
 def conditional_left_anti_join(
-    left: Table, right: Table, binary_predicate: Expression
+    left: Table,
+    right: Table,
+    binary_predicate: Expression,
+    stream: Stream | None = None,
 ) -> Column: ...
 def mixed_inner_join(
     left_keys: Table,
@@ -43,6 +77,7 @@ def mixed_inner_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_left_join(
     left_keys: Table,
@@ -51,6 +86,7 @@ def mixed_left_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_full_join(
     left_keys: Table,
@@ -59,6 +95,7 @@ def mixed_full_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_left_semi_join(
     left_keys: Table,
@@ -67,6 +104,7 @@ def mixed_left_semi_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> Column: ...
 def mixed_left_anti_join(
     left_keys: Table,
@@ -75,4 +113,5 @@ def mixed_left_anti_join(
     right_conditional: Table,
     binary_predicate: Expression,
     nulls_equal: NullEquality,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index c2efe05ffc4..a9261345db5 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -1,8 +1,10 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator import dereference
 
+from libc.stddef cimport size_t
 from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.optional cimport optional
 from libcpp.utility cimport move
 from pylibcudf.libcudf cimport join as cpp_join
 from pylibcudf.libcudf.column.column cimport column
@@ -10,10 +12,12 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport null_equality
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .expressions cimport Expression
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "conditional_full_join",
@@ -34,7 +38,7 @@ __all__ = [
     "mixed_left_semi_join",
 ]
 
-cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map):
+cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map, Stream stream):
     # helper to convert a gather map to a Column
     return Column.from_libcudf(
         move(
@@ -43,14 +47,16 @@ cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map):
                 device_buffer(),
                 0
             )
-        )
+        ),
+        stream
     )
 
 
 cpdef tuple inner_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform an inner join between two tables.
 
@@ -72,18 +78,24 @@ cpdef tuple inner_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_join.inner_join(left_keys.view(), right_keys.view(), nulls_equal)
+        c_result = cpp_join.inner_join(
+            left_keys.view(), right_keys.view(), nulls_equal, stream.view()
+        )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
 cpdef tuple left_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a left join between two tables.
 
@@ -105,18 +117,24 @@ cpdef tuple left_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_join.left_join(left_keys.view(), right_keys.view(), nulls_equal)
+        c_result = cpp_join.left_join(
+            left_keys.view(), right_keys.view(), nulls_equal, stream.view()
+        )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
 cpdef tuple full_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a full join between two tables.
 
@@ -138,18 +156,24 @@ cpdef tuple full_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_join.full_join(left_keys.view(), right_keys.view(), nulls_equal)
+        c_result = cpp_join.full_join(
+            left_keys.view(), right_keys.view(), nulls_equal, stream.view()
+        )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
 cpdef Column left_semi_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a left semi join between two tables.
 
@@ -170,19 +194,24 @@ cpdef Column left_semi_join(
         A column containing the row indices from the left table after the join.
     """
     cdef cpp_join.gather_map_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.left_semi_join(
             left_keys.view(),
             right_keys.view(),
-            nulls_equal
+            nulls_equal,
+            stream.view()
         )
-    return _column_from_gather_map(move(c_result))
+    return _column_from_gather_map(move(c_result), stream)
 
 
 cpdef Column left_anti_join(
     Table left_keys,
     Table right_keys,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a left anti join between two tables.
 
@@ -203,16 +232,20 @@ cpdef Column left_anti_join(
         A column containing the row indices from the left table after the join.
     """
     cdef cpp_join.gather_map_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.left_anti_join(
             left_keys.view(),
             right_keys.view(),
-            nulls_equal
+            nulls_equal,
+            stream.view()
         )
-    return _column_from_gather_map(move(c_result))
+    return _column_from_gather_map(move(c_result), stream)
 
 
-cpdef Table cross_join(Table left, Table right):
+cpdef Table cross_join(Table left, Table right, Stream stream=None):
     """Perform a cross join on two tables.
 
     For details see :cpp:func:`cross_join`.
@@ -230,15 +263,19 @@ cpdef Table cross_join(Table left, Table right):
         The result of cross joining the two inputs.
     """
     cdef unique_ptr[table] result
+
+    stream = _get_stream(stream)
+
     with nogil:
-        result = cpp_join.cross_join(left.view(), right.view())
-    return Table.from_libcudf(move(result))
+        result = cpp_join.cross_join(left.view(), right.view(), stream.view())
+    return Table.from_libcudf(move(result), stream)
 
 
 cpdef tuple conditional_inner_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=None
 ):
     """Perform a conditional inner join between two tables.
 
@@ -260,13 +297,21 @@ cpdef tuple conditional_inner_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+    cdef optional[size_t] output_size
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.conditional_inner_join(
-            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+            left.view(),
+            right.view(),
+            dereference(binary_predicate.c_obj.get()),
+            output_size,
+            stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
@@ -274,6 +319,7 @@ cpdef tuple conditional_left_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=None
 ):
     """Perform a conditional left join between two tables.
 
@@ -295,13 +341,21 @@ cpdef tuple conditional_left_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+    cdef optional[size_t] output_size
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.conditional_left_join(
-            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+            left.view(),
+            right.view(),
+            dereference(binary_predicate.c_obj.get()),
+            output_size,
+            stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
@@ -309,6 +363,7 @@ cpdef tuple conditional_full_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=None
 ):
     """Perform a conditional full join between two tables.
 
@@ -330,13 +385,19 @@ cpdef tuple conditional_full_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.conditional_full_join(
-            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+            left.view(),
+            right.view(),
+            dereference(binary_predicate.c_obj.get()),
+            stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
@@ -344,6 +405,7 @@ cpdef Column conditional_left_semi_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=None
 ):
     """Perform a conditional left semi join between two tables.
 
@@ -364,17 +426,26 @@ cpdef Column conditional_left_semi_join(
         A column containing the row indices from the left table after the join.
     """
     cdef cpp_join.gather_map_type c_result
+    cdef optional[size_t] output_size
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.conditional_left_semi_join(
-            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+            left.view(),
+            right.view(),
+            dereference(binary_predicate.c_obj.get()),
+            output_size,
+            stream.view()
         )
-    return _column_from_gather_map(move(c_result))
+    return _column_from_gather_map(move(c_result), stream)
 
 
 cpdef Column conditional_left_anti_join(
     Table left,
     Table right,
     Expression binary_predicate,
+    Stream stream=None
 ):
     """Perform a conditional left anti join between two tables.
 
@@ -395,11 +466,19 @@ cpdef Column conditional_left_anti_join(
         A column containing the row indices from the left table after the join.
     """
     cdef cpp_join.gather_map_type c_result
+    cdef optional[size_t] output_size
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.conditional_left_anti_join(
-            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+            left.view(),
+            right.view(),
+            dereference(binary_predicate.c_obj.get()),
+            output_size,
+            stream.view()
         )
-    return _column_from_gather_map(move(c_result))
+    return _column_from_gather_map(move(c_result), stream)
 
 
 cpdef tuple mixed_inner_join(
@@ -408,7 +487,8 @@ cpdef tuple mixed_inner_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a mixed inner join between two tables.
 
@@ -436,6 +516,10 @@ cpdef tuple mixed_inner_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+    cdef cpp_join.output_size_data_type empty_optional
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.mixed_inner_join(
             left_keys.view(),
@@ -444,10 +528,12 @@ cpdef tuple mixed_inner_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
+            empty_optional,
+            stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
@@ -457,7 +543,8 @@ cpdef tuple mixed_left_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a mixed left join between two tables.
 
@@ -485,6 +572,10 @@ cpdef tuple mixed_left_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+    cdef cpp_join.output_size_data_type empty_optional
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.mixed_left_join(
             left_keys.view(),
@@ -493,10 +584,12 @@ cpdef tuple mixed_left_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
+            empty_optional,
+            stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
@@ -506,7 +599,8 @@ cpdef tuple mixed_full_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a mixed full join between two tables.
 
@@ -534,6 +628,10 @@ cpdef tuple mixed_full_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
+    cdef cpp_join.output_size_data_type empty_optional
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.mixed_full_join(
             left_keys.view(),
@@ -542,10 +640,12 @@ cpdef tuple mixed_full_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
+            empty_optional,
+            stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first)),
-        _column_from_gather_map(move(c_result.second)),
+        _column_from_gather_map(move(c_result.first), stream),
+        _column_from_gather_map(move(c_result.second), stream),
     )
 
 
@@ -555,7 +655,8 @@ cpdef Column mixed_left_semi_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a mixed left semi join between two tables.
 
@@ -582,6 +683,9 @@ cpdef Column mixed_left_semi_join(
         A column containing the row indices from the left table after the join.
     """
     cdef cpp_join.gather_map_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.mixed_left_semi_join(
             left_keys.view(),
@@ -590,8 +694,9 @@ cpdef Column mixed_left_semi_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
+            stream.view()
         )
-    return _column_from_gather_map(move(c_result))
+    return _column_from_gather_map(move(c_result), stream)
 
 
 cpdef Column mixed_left_anti_join(
@@ -600,7 +705,8 @@ cpdef Column mixed_left_anti_join(
     Table left_conditional,
     Table right_conditional,
     Expression binary_predicate,
-    null_equality nulls_equal
+    null_equality nulls_equal,
+    Stream stream=None
 ):
     """Perform a mixed left anti join between two tables.
 
@@ -627,6 +733,9 @@ cpdef Column mixed_left_anti_join(
         A column containing the row indices from the left table after the join.
     """
     cdef cpp_join.gather_map_type c_result
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_join.mixed_left_anti_join(
             left_keys.view(),
@@ -635,5 +744,6 @@ cpdef Column mixed_left_anti_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
+            stream.view()
         )
-    return _column_from_gather_map(move(c_result))
+    return _column_from_gather_map(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 3810bdb6798..111576ea1d9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -12,71 +12,85 @@ from pylibcudf.libcudf.expressions cimport expression
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_equality, size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 from rmm.librmm.device_uvector cimport device_uvector
+from pylibcudf.libcudf.utilities.span cimport device_span
 
 ctypedef unique_ptr[device_uvector[size_type]] gather_map_type
 ctypedef pair[gather_map_type, gather_map_type] gather_map_pair_type
+ctypedef optional[pair[size_t, device_span[const size_type]]] output_size_data_type
 
 cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
     cdef gather_map_pair_type inner_join(
         const table_view left_keys,
         const table_view right_keys,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type left_join(
         const table_view left_keys,
         const table_view right_keys,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type full_join(
         const table_view left_keys,
         const table_view right_keys,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_semi_join(
         const table_view left_keys,
         const table_view right_keys,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_anti_join(
         const table_view left_keys,
         const table_view right_keys,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type inner_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type left_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type full_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_semi_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_anti_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] cross_join(
         const table_view left,
         const table_view right,
+        cuda_stream_view stream
     ) except +
 
 cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
@@ -84,65 +98,67 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
         const expression binary_predicate,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_inner_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        optional[size_t] output_size
+        optional[size_t] output_size,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_left_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_left_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        optional[size_t] output_size
+        optional[size_t] output_size,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_full_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-    ) except +libcudf_exception_handler
-
-    cdef gather_map_pair_type conditional_full_join(
-        const table_view left,
-        const table_view right,
-        const expression binary_predicate,
-        optional[size_t] output_size
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_semi_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_semi_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        optional[size_t] output_size
+        optional[size_t] output_size,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_anti_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_anti_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        optional[size_t] output_size
+        optional[size_t] output_size,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
 cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
@@ -152,7 +168,9 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view left_conditional,
         const table_view right_conditional,
         const expression binary_predicate,
-        null_equality compare_nulls
+        null_equality compare_nulls,
+        output_size_data_type output_size_data,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type mixed_left_join(
@@ -161,7 +179,9 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view left_conditional,
         const table_view right_conditional,
         const expression binary_predicate,
-        null_equality compare_nulls
+        null_equality compare_nulls,
+        output_size_data_type output_size_data,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type mixed_full_join(
@@ -170,7 +190,9 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view left_conditional,
         const table_view right_conditional,
         const expression binary_predicate,
-        null_equality compare_nulls
+        null_equality compare_nulls,
+        output_size_data_type output_size_data,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type mixed_left_semi_join(
@@ -179,7 +201,8 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view left_conditional,
         const table_view right_conditional,
         const expression binary_predicate,
-        null_equality compare_nulls
+        null_equality compare_nulls,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef gather_map_type mixed_left_anti_join(
@@ -188,5 +211,6 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view left_conditional,
         const table_view right_conditional,
         const expression binary_predicate,
-        null_equality compare_nulls
+        null_equality compare_nulls,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler

From 5bc06674e2455bdc745a8322608417da0d870327 Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Wed, 13 Aug 2025 13:57:10 +0100
Subject: [PATCH 119/366] [FEA] Refactor AST `operator_functor`s for use in
 JIT-compiled CUDA (#19541)

This pull-request restructures the AST headers to enable them to be used in online-compiled (JIT-compiled) CUDA.
It also adds an operator-to-string utility necessary for  CUDA codegen.
It also fixes the `std::` namespace references in the floating_conversion header to use `cuda::std::`.

Precedes https://github.com/rapidsai/cudf/pull/19467

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19541
---
 cpp/include/cudf/ast/ast_operator.hpp         | 101 +++
 .../cudf/ast/detail/operator_functor.cuh      | 760 ++++++++++++++++++
 cpp/include/cudf/ast/detail/operators.cuh     | 752 +----------------
 cpp/include/cudf/ast/detail/operators.hpp     |   2 +
 cpp/include/cudf/ast/detail/possibly_null.cuh |  45 ++
 cpp/include/cudf/ast/expressions.hpp          |  75 +-
 cpp/include/cudf/fixed_point/conv.hpp         | 113 +++
 .../detail/floating_conversion.hpp            |  22 +-
 cpp/include/cudf/unary.hpp                    |  79 --
 cpp/src/ast/operators.cpp                     |  57 ++
 cpp/src/binaryop/compiled/binary_ops.cuh      |   1 +
 cpp/src/quantiles/quantiles_util.hpp          |   1 +
 .../quantiles/tdigest/tdigest_aggregation.cu  |   1 +
 cpp/src/unary/cast_ops.cu                     |   1 +
 cpp/tests/fixed_point/fixed_point_tests.cpp   |   3 +-
 15 files changed, 1106 insertions(+), 907 deletions(-)
 create mode 100644 cpp/include/cudf/ast/ast_operator.hpp
 create mode 100644 cpp/include/cudf/ast/detail/operator_functor.cuh
 create mode 100644 cpp/include/cudf/ast/detail/possibly_null.cuh
 create mode 100644 cpp/include/cudf/fixed_point/conv.hpp

diff --git a/cpp/include/cudf/ast/ast_operator.hpp b/cpp/include/cudf/ast/ast_operator.hpp
new file mode 100644
index 00000000000..397a3550143
--- /dev/null
+++ b/cpp/include/cudf/ast/ast_operator.hpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/utilities/export.hpp>
+
+#include <cuda/std/cstdint>
+
+namespace CUDF_EXPORT cudf {
+
+namespace ast {
+/**
+ * @addtogroup expressions
+ * @{
+ * @file
+ */
+
+/**
+ * @brief Enum of supported operators.
+ */
+enum class ast_operator : int32_t {
+  // Binary operators
+  ADD,         ///< operator +
+  SUB,         ///< operator -
+  MUL,         ///< operator *
+  DIV,         ///< operator / using common type of lhs and rhs
+  TRUE_DIV,    ///< operator / after promoting type to floating point
+  FLOOR_DIV,   ///< operator / after promoting to 64 bit floating point and then
+               ///< flooring the result
+  MOD,         ///< operator %
+  PYMOD,       ///< operator % using Python's sign rules for negatives
+  POW,         ///< lhs ^ rhs
+  EQUAL,       ///< operator ==
+  NULL_EQUAL,  ///< operator == with Spark rules: NULL_EQUAL(null, null) is true, NULL_EQUAL(null,
+               ///< valid) is false, and
+               ///< NULL_EQUAL(valid, valid) == EQUAL(valid, valid)
+  NOT_EQUAL,   ///< operator !=
+  LESS,        ///< operator <
+  GREATER,     ///< operator >
+  LESS_EQUAL,  ///< operator <=
+  GREATER_EQUAL,     ///< operator >=
+  BITWISE_AND,       ///< operator &
+  BITWISE_OR,        ///< operator |
+  BITWISE_XOR,       ///< operator ^
+  LOGICAL_AND,       ///< operator &&
+  NULL_LOGICAL_AND,  ///< operator && with Spark rules: NULL_LOGICAL_AND(null, null) is null,
+                     ///< NULL_LOGICAL_AND(null, true) is
+                     ///< null, NULL_LOGICAL_AND(null, false) is false, and NULL_LOGICAL_AND(valid,
+                     ///< valid) == LOGICAL_AND(valid, valid)
+  LOGICAL_OR,        ///< operator ||
+  NULL_LOGICAL_OR,   ///< operator || with Spark rules: NULL_LOGICAL_OR(null, null) is null,
+                     ///< NULL_LOGICAL_OR(null, true) is true,
+                     ///< NULL_LOGICAL_OR(null, false) is null, and NULL_LOGICAL_OR(valid, valid) ==
+                     ///< LOGICAL_OR(valid, valid)
+  // Unary operators
+  IDENTITY,        ///< Identity function
+  IS_NULL,         ///< Check if operand is null
+  SIN,             ///< Trigonometric sine
+  COS,             ///< Trigonometric cosine
+  TAN,             ///< Trigonometric tangent
+  ARCSIN,          ///< Trigonometric sine inverse
+  ARCCOS,          ///< Trigonometric cosine inverse
+  ARCTAN,          ///< Trigonometric tangent inverse
+  SINH,            ///< Hyperbolic sine
+  COSH,            ///< Hyperbolic cosine
+  TANH,            ///< Hyperbolic tangent
+  ARCSINH,         ///< Hyperbolic sine inverse
+  ARCCOSH,         ///< Hyperbolic cosine inverse
+  ARCTANH,         ///< Hyperbolic tangent inverse
+  EXP,             ///< Exponential (base e, Euler number)
+  LOG,             ///< Natural Logarithm (base e)
+  SQRT,            ///< Square-root (x^0.5)
+  CBRT,            ///< Cube-root (x^(1.0/3))
+  CEIL,            ///< Smallest integer value not less than arg
+  FLOOR,           ///< largest integer value not greater than arg
+  ABS,             ///< Absolute value
+  RINT,            ///< Rounds the floating-point argument arg to an integer value
+  BIT_INVERT,      ///< Bitwise Not (~)
+  NOT,             ///< Logical Not (!)
+  CAST_TO_INT64,   ///< Cast value to int64_t
+  CAST_TO_UINT64,  ///< Cast value to uint64_t
+  CAST_TO_FLOAT64  ///< Cast value to double
+};
+
+/** @} */  // end of group
+}  // namespace ast
+
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/ast/detail/operator_functor.cuh b/cpp/include/cudf/ast/detail/operator_functor.cuh
new file mode 100644
index 00000000000..640c7548c80
--- /dev/null
+++ b/cpp/include/cudf/ast/detail/operator_functor.cuh
@@ -0,0 +1,760 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/ast/ast_operator.hpp>
+#include <cudf/ast/detail/possibly_null.cuh>
+#include <cudf/fixed_point/conv.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <cuda/std/cmath>
+
+namespace CUDF_EXPORT cudf {
+namespace ast::detail {
+
+/**
+ * @brief Operator functor.
+ *
+ * This functor is templated on an `ast_operator`, with each template specialization defining a
+ * callable `operator()` that executes the operation. The functor specialization also has a member
+ * `arity` defining the number of operands that are accepted by the call to `operator()`. The
+ * `operator()` is templated on the types of its inputs (e.g. `typename LHS` and `typename RHS` for
+ * a binary operator). Trailing return types are defined as `decltype(result)` where `result` is
+ * the returned value. The trailing return types allow SFINAE to only consider template
+ * instantiations for valid combinations of types. This, in turn, allows the operator functors to be
+ * used with traits like `is_valid_binary_op` that rely on `std::is_invocable` and related features.
+ *
+ * @tparam op AST operator.
+ */
+template <ast_operator op, bool has_nulls>
+struct operator_functor {};
+
+template <>
+struct operator_functor<ast_operator::ADD, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs + rhs)
+  {
+    return lhs + rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::SUB, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs - rhs)
+  {
+    return lhs - rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::MUL, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs * rhs)
+  {
+    return lhs * rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::DIV, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs / rhs)
+  {
+    return lhs / rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::TRUE_DIV, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(static_cast<double>(lhs) / static_cast<double>(rhs))
+  {
+    return static_cast<double>(lhs) / static_cast<double>(rhs);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::FLOOR_DIV, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(floor(static_cast<double>(lhs) / static_cast<double>(rhs)))
+  {
+    return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::MOD, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs))
+    requires(cuda::std::is_integral_v<CommonType>)
+  {
+    return static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs);
+  }
+
+  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)))
+    requires(cuda::std::is_same_v<CommonType, float>)
+  {
+    return fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs));
+  }
+
+  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)))
+    requires(cuda::std::is_same_v<CommonType, double>)
+  {
+    return fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs));
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::PYMOD, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(((static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs)) +
+                 static_cast<CommonType>(rhs)) %
+                static_cast<CommonType>(rhs))
+    requires(cuda::std::is_integral_v<CommonType>)
+  {
+    return ((static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs)) +
+            static_cast<CommonType>(rhs)) %
+           static_cast<CommonType>(rhs);
+  }
+
+  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(fmodf(fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
+                        static_cast<CommonType>(rhs),
+                      static_cast<CommonType>(rhs)))
+    requires(cuda::std::is_same_v<CommonType, float>)
+  {
+    return fmodf(fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
+                   static_cast<CommonType>(rhs),
+                 static_cast<CommonType>(rhs));
+  }
+
+  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(fmod(fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
+                       static_cast<CommonType>(rhs),
+                     static_cast<CommonType>(rhs)))
+    requires(cuda::std::is_same_v<CommonType, double>)
+  {
+    return fmod(fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
+                  static_cast<CommonType>(rhs),
+                static_cast<CommonType>(rhs));
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::POW, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
+    -> decltype(cuda::std::pow(lhs, rhs))
+  {
+    return cuda::std::pow(lhs, rhs);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::EQUAL, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs == rhs)
+  {
+    return lhs == rhs;
+  }
+};
+
+// Alias NULL_EQUAL = EQUAL in the non-nullable case.
+template <>
+struct operator_functor<ast_operator::NULL_EQUAL, false>
+  : public operator_functor<ast_operator::EQUAL, false> {};
+
+template <>
+struct operator_functor<ast_operator::NOT_EQUAL, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs != rhs)
+  {
+    return lhs != rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::LESS, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs < rhs)
+  {
+    return lhs < rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::GREATER, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs > rhs)
+  {
+    return lhs > rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::LESS_EQUAL, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs <= rhs)
+  {
+    return lhs <= rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::GREATER_EQUAL, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs >= rhs)
+  {
+    return lhs >= rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::BITWISE_AND, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs & rhs)
+  {
+    return lhs & rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::BITWISE_OR, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs | rhs)
+  {
+    return lhs | rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::BITWISE_XOR, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs ^ rhs)
+  {
+    return lhs ^ rhs;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::LOGICAL_AND, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs && rhs)
+  {
+    return lhs && rhs;
+  }
+};
+
+// Alias NULL_LOGICAL_AND = LOGICAL_AND in the non-nullable case.
+template <>
+struct operator_functor<ast_operator::NULL_LOGICAL_AND, false>
+  : public operator_functor<ast_operator::LOGICAL_AND, false> {};
+
+template <>
+struct operator_functor<ast_operator::LOGICAL_OR, false> {
+  static constexpr auto arity{2};
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs || rhs)
+  {
+    return lhs || rhs;
+  }
+};
+
+// Alias NULL_LOGICAL_OR = LOGICAL_OR in the non-nullable case.
+template <>
+struct operator_functor<ast_operator::NULL_LOGICAL_OR, false>
+  : public operator_functor<ast_operator::LOGICAL_OR, false> {};
+
+template <>
+struct operator_functor<ast_operator::IDENTITY, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(input)
+  {
+    return input;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::IS_NULL, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> bool
+  {
+    return false;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::SIN, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::sin(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::sin(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::COS, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::cos(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::cos(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::TAN, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::tan(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::tan(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ARCSIN, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::asin(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::asin(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ARCCOS, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::acos(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::acos(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ARCTAN, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::atan(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::atan(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::SINH, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::sinh(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::sinh(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::COSH, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::cosh(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::cosh(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::TANH, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::tanh(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::tanh(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ARCSINH, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept
+    -> decltype(cuda::std::asinh(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::asinh(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ARCCOSH, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept
+    -> decltype(cuda::std::acosh(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::acosh(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ARCTANH, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept
+    -> decltype(cuda::std::atanh(input))
+    requires(cuda::std::is_floating_point_v<InputT>)
+  {
+    return cuda::std::atanh(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::EXP, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::exp(input))
+  {
+    return cuda::std::exp(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::LOG, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::log(input))
+  {
+    return cuda::std::log(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::SQRT, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::sqrt(input))
+  {
+    return cuda::std::sqrt(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::CBRT, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::cbrt(input))
+  {
+    return cuda::std::cbrt(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::CEIL, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::ceil(input))
+  {
+    return cuda::std::ceil(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::FLOOR, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept
+    -> decltype(cuda::std::floor(input))
+  {
+    return cuda::std::floor(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::ABS, false> {
+  static constexpr auto arity{1};
+
+  // Only accept signed or unsigned types (both require is_arithmetic<T> to be true)
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::abs(input))
+    requires(cuda::std::is_signed_v<InputT>)
+  {
+    return cuda::std::abs(input);
+  }
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(input)
+    requires(cuda::std::is_unsigned_v<InputT>)
+  {
+    return input;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::RINT, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::rint(input))
+  {
+    return cuda::std::rint(input);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::BIT_INVERT, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(~input)
+  {
+    return ~input;
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::NOT, false> {
+  static constexpr auto arity{1};
+
+  template <typename InputT>
+  __device__ inline auto operator()(InputT input) const noexcept -> decltype(!input)
+  {
+    return !input;
+  }
+};
+
+template <typename To>
+struct cast {
+  static constexpr auto arity{1};
+  template <typename From>
+  __device__ inline auto operator()(From f) const noexcept -> To
+    requires(is_fixed_point<From>())
+  {
+    if constexpr (cuda::std::is_floating_point_v<To>) {
+      return convert_fixed_to_floating<To>(f);
+    } else {
+      return static_cast<To>(f);
+    }
+  }
+
+  template <typename From>
+  __device__ inline auto operator()(From f) const noexcept -> decltype(static_cast<To>(f))
+    requires(!is_fixed_point<From>())
+  {
+    return static_cast<To>(f);
+  }
+};
+
+template <>
+struct operator_functor<ast_operator::CAST_TO_INT64, false> : cast<int64_t> {};
+template <>
+struct operator_functor<ast_operator::CAST_TO_UINT64, false> : cast<uint64_t> {};
+template <>
+struct operator_functor<ast_operator::CAST_TO_FLOAT64, false> : cast<double> {};
+
+/*
+ * The default specialization of nullable operators is to fall back to the non-nullable
+ * implementation
+ */
+template <ast_operator op>
+struct operator_functor<op, true> {
+  using NonNullOperator       = operator_functor<op, false>;
+  static constexpr auto arity = NonNullOperator::arity;
+
+  template <typename LHS, typename RHS, std::size_t arity_placeholder = arity>
+  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
+    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
+    requires(arity_placeholder == 2)
+  {
+    using Out = possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>;
+    return (lhs.has_value() && rhs.has_value()) ? Out{NonNullOperator{}(*lhs, *rhs)} : Out{};
+  }
+
+  template <typename Input, std::size_t arity_placeholder = arity>
+  __device__ inline auto operator()(Input const input) const noexcept
+    -> possibly_null_value_t<decltype(NonNullOperator{}(*input)), true>
+    requires(arity_placeholder == 1)
+  {
+    using Out = possibly_null_value_t<decltype(NonNullOperator{}(*input)), true>;
+    return input.has_value() ? Out{NonNullOperator{}(*input)} : Out{};
+  }
+};
+
+// IS_NULL(null) is true, IS_NULL(valid) is false
+template <>
+struct operator_functor<ast_operator::IS_NULL, true> {
+  using NonNullOperator       = operator_functor<ast_operator::IS_NULL, false>;
+  static constexpr auto arity = NonNullOperator::arity;
+
+  template <typename LHS>
+  __device__ inline auto operator()(LHS const lhs) const noexcept -> bool
+  {
+    return !lhs.has_value();
+  }
+};
+
+// NULL_EQUAL(null, null) is true, NULL_EQUAL(null, valid) is false, and NULL_EQUAL(valid, valid) ==
+// EQUAL(valid, valid)
+template <>
+struct operator_functor<ast_operator::NULL_EQUAL, true> {
+  using NonNullOperator       = operator_functor<ast_operator::NULL_EQUAL, false>;
+  static constexpr auto arity = NonNullOperator::arity;
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
+    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
+  {
+    // Case 1: Neither is null, so the output is given by the operation.
+    if (lhs.has_value() && rhs.has_value()) { return {NonNullOperator{}(*lhs, *rhs)}; }
+    // Case 2: Two nulls compare equal.
+    if (!lhs.has_value() && !rhs.has_value()) { return {true}; }
+    // Case 3: One value is null, while the other is not, so we return false.
+    return {false};
+  }
+};
+
+///< NULL_LOGICAL_AND(null, null) is null, NULL_LOGICAL_AND(null, true) is null,
+///< NULL_LOGICAL_AND(null, false) is false, and NULL_LOGICAL_AND(valid, valid) ==
+///< LOGICAL_AND(valid, valid)
+template <>
+struct operator_functor<ast_operator::NULL_LOGICAL_AND, true> {
+  using NonNullOperator       = operator_functor<ast_operator::NULL_LOGICAL_AND, false>;
+  static constexpr auto arity = NonNullOperator::arity;
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
+    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
+  {
+    // Case 1: Neither is null, so the output is given by the operation.
+    if (lhs.has_value() && rhs.has_value()) { return {NonNullOperator{}(*lhs, *rhs)}; }
+    // Case 2: Two nulls return null.
+    if (!lhs.has_value() && !rhs.has_value()) { return {}; }
+    // Case 3: One value is null, while the other is not. If it's true we return null, otherwise we
+    // return false.
+    auto const& valid_element = lhs.has_value() ? lhs : rhs;
+    if (*valid_element) { return {}; }
+    return {false};
+  }
+};
+
+///< NULL_LOGICAL_OR(null, null) is null, NULL_LOGICAL_OR(null, true) is true, NULL_LOGICAL_OR(null,
+///< false) is null, and NULL_LOGICAL_OR(valid, valid) == LOGICAL_OR(valid, valid)
+template <>
+struct operator_functor<ast_operator::NULL_LOGICAL_OR, true> {
+  using NonNullOperator       = operator_functor<ast_operator::NULL_LOGICAL_OR, false>;
+  static constexpr auto arity = NonNullOperator::arity;
+
+  template <typename LHS, typename RHS>
+  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
+    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
+  {
+    // Case 1: Neither is null, so the output is given by the operation.
+    if (lhs.has_value() && rhs.has_value()) { return {NonNullOperator{}(*lhs, *rhs)}; }
+    // Case 2: Two nulls return null.
+    if (!lhs.has_value() && !rhs.has_value()) { return {}; }
+    // Case 3: One value is null, while the other is not. If it's true we return true, otherwise we
+    // return null.
+    auto const& valid_element = lhs.has_value() ? lhs : rhs;
+    if (*valid_element) { return {true}; }
+    return {};
+  }
+};
+
+}  // namespace ast::detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/ast/detail/operators.cuh b/cpp/include/cudf/ast/detail/operators.cuh
index 1a7f7357f7e..333b8be2de3 100644
--- a/cpp/include/cudf/ast/detail/operators.cuh
+++ b/cpp/include/cudf/ast/detail/operators.cuh
@@ -15,37 +15,20 @@
  */
 #pragma once
 
+#include <cudf/ast/ast_operator.hpp>
+#include <cudf/ast/detail/operator_functor.cuh>
+#include <cudf/ast/detail/possibly_null.cuh>
 #include <cudf/ast/expressions.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
-#include <cuda/std/cmath>
-#include <cuda/std/optional>
 #include <cuda/std/type_traits>
 
 namespace CUDF_EXPORT cudf {
 namespace ast::detail {
 
-// Type trait for wrapping nullable types in a cuda::std::optional. Non-nullable
-// types are returned as is.
-template <typename T, bool has_nulls>
-struct possibly_null_value;
-
-template <typename T>
-struct possibly_null_value<T, true> {
-  using type = cuda::std::optional<T>;
-};
-
-template <typename T>
-struct possibly_null_value<T, false> {
-  using type = T;
-};
-
-template <typename T, bool has_nulls>
-using possibly_null_value_t = typename possibly_null_value<T, has_nulls>::type;
-
 // Traits for valid operator / type combinations
 template <typename Op, typename LHS, typename RHS>
 constexpr bool is_valid_binary_op = cuda::std::is_invocable_v<Op, LHS, RHS>;
@@ -177,734 +160,5 @@ CUDF_HOST_DEVICE inline constexpr decltype(auto) ast_operator_dispatcher(ast_ope
   }
 }
 
-/**
- * @brief Operator functor.
- *
- * This functor is templated on an `ast_operator`, with each template specialization defining a
- * callable `operator()` that executes the operation. The functor specialization also has a member
- * `arity` defining the number of operands that are accepted by the call to `operator()`. The
- * `operator()` is templated on the types of its inputs (e.g. `typename LHS` and `typename RHS` for
- * a binary operator). Trailing return types are defined as `decltype(result)` where `result` is
- * the returned value. The trailing return types allow SFINAE to only consider template
- * instantiations for valid combinations of types. This, in turn, allows the operator functors to be
- * used with traits like `is_valid_binary_op` that rely on `std::is_invocable` and related features.
- *
- * @tparam op AST operator.
- */
-template <ast_operator op, bool has_nulls>
-struct operator_functor {};
-
-template <>
-struct operator_functor<ast_operator::ADD, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs + rhs)
-  {
-    return lhs + rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::SUB, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs - rhs)
-  {
-    return lhs - rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::MUL, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs * rhs)
-  {
-    return lhs * rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::DIV, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs / rhs)
-  {
-    return lhs / rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::TRUE_DIV, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(static_cast<double>(lhs) / static_cast<double>(rhs))
-  {
-    return static_cast<double>(lhs) / static_cast<double>(rhs);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::FLOOR_DIV, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(floor(static_cast<double>(lhs) / static_cast<double>(rhs)))
-  {
-    return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::MOD, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs))
-    requires(cuda::std::is_integral_v<CommonType>)
-  {
-    return static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs);
-  }
-
-  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)))
-    requires(cuda::std::is_same_v<CommonType, float>)
-  {
-    return fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs));
-  }
-
-  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)))
-    requires(cuda::std::is_same_v<CommonType, double>)
-  {
-    return fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs));
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::PYMOD, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(((static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs)) +
-                 static_cast<CommonType>(rhs)) %
-                static_cast<CommonType>(rhs))
-    requires(cuda::std::is_integral_v<CommonType>)
-  {
-    return ((static_cast<CommonType>(lhs) % static_cast<CommonType>(rhs)) +
-            static_cast<CommonType>(rhs)) %
-           static_cast<CommonType>(rhs);
-  }
-
-  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(fmodf(fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
-                        static_cast<CommonType>(rhs),
-                      static_cast<CommonType>(rhs)))
-    requires(cuda::std::is_same_v<CommonType, float>)
-  {
-    return fmodf(fmodf(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
-                   static_cast<CommonType>(rhs),
-                 static_cast<CommonType>(rhs));
-  }
-
-  template <typename LHS, typename RHS, typename CommonType = cuda::std::common_type_t<LHS, RHS>>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(fmod(fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
-                       static_cast<CommonType>(rhs),
-                     static_cast<CommonType>(rhs)))
-    requires(cuda::std::is_same_v<CommonType, double>)
-  {
-    return fmod(fmod(static_cast<CommonType>(lhs), static_cast<CommonType>(rhs)) +
-                  static_cast<CommonType>(rhs),
-                static_cast<CommonType>(rhs));
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::POW, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept
-    -> decltype(cuda::std::pow(lhs, rhs))
-  {
-    return cuda::std::pow(lhs, rhs);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::EQUAL, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs == rhs)
-  {
-    return lhs == rhs;
-  }
-};
-
-// Alias NULL_EQUAL = EQUAL in the non-nullable case.
-template <>
-struct operator_functor<ast_operator::NULL_EQUAL, false>
-  : public operator_functor<ast_operator::EQUAL, false> {};
-
-template <>
-struct operator_functor<ast_operator::NOT_EQUAL, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs != rhs)
-  {
-    return lhs != rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::LESS, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs < rhs)
-  {
-    return lhs < rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::GREATER, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs > rhs)
-  {
-    return lhs > rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::LESS_EQUAL, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs <= rhs)
-  {
-    return lhs <= rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::GREATER_EQUAL, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs >= rhs)
-  {
-    return lhs >= rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::BITWISE_AND, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs & rhs)
-  {
-    return lhs & rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::BITWISE_OR, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs | rhs)
-  {
-    return lhs | rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::BITWISE_XOR, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs ^ rhs)
-  {
-    return lhs ^ rhs;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::LOGICAL_AND, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs && rhs)
-  {
-    return lhs && rhs;
-  }
-};
-
-// Alias NULL_LOGICAL_AND = LOGICAL_AND in the non-nullable case.
-template <>
-struct operator_functor<ast_operator::NULL_LOGICAL_AND, false>
-  : public operator_functor<ast_operator::LOGICAL_AND, false> {};
-
-template <>
-struct operator_functor<ast_operator::LOGICAL_OR, false> {
-  static constexpr auto arity{2};
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS lhs, RHS rhs) const noexcept -> decltype(lhs || rhs)
-  {
-    return lhs || rhs;
-  }
-};
-
-// Alias NULL_LOGICAL_OR = LOGICAL_OR in the non-nullable case.
-template <>
-struct operator_functor<ast_operator::NULL_LOGICAL_OR, false>
-  : public operator_functor<ast_operator::LOGICAL_OR, false> {};
-
-template <>
-struct operator_functor<ast_operator::IDENTITY, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(input)
-  {
-    return input;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::IS_NULL, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> bool
-  {
-    return false;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::SIN, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::sin(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::sin(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::COS, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::cos(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::cos(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::TAN, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::tan(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::tan(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ARCSIN, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::asin(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::asin(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ARCCOS, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::acos(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::acos(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ARCTAN, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::atan(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::atan(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::SINH, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::sinh(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::sinh(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::COSH, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::cosh(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::cosh(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::TANH, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::tanh(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::tanh(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ARCSINH, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept
-    -> decltype(cuda::std::asinh(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::asinh(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ARCCOSH, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept
-    -> decltype(cuda::std::acosh(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::acosh(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ARCTANH, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept
-    -> decltype(cuda::std::atanh(input))
-    requires(cuda::std::is_floating_point_v<InputT>)
-  {
-    return cuda::std::atanh(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::EXP, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::exp(input))
-  {
-    return cuda::std::exp(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::LOG, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::log(input))
-  {
-    return cuda::std::log(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::SQRT, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::sqrt(input))
-  {
-    return cuda::std::sqrt(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::CBRT, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::cbrt(input))
-  {
-    return cuda::std::cbrt(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::CEIL, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::ceil(input))
-  {
-    return cuda::std::ceil(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::FLOOR, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept
-    -> decltype(cuda::std::floor(input))
-  {
-    return cuda::std::floor(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::ABS, false> {
-  static constexpr auto arity{1};
-
-  // Only accept signed or unsigned types (both require is_arithmetic<T> to be true)
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::abs(input))
-    requires(cuda::std::is_signed_v<InputT>)
-  {
-    return cuda::std::abs(input);
-  }
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(input)
-    requires(cuda::std::is_unsigned_v<InputT>)
-  {
-    return input;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::RINT, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(cuda::std::rint(input))
-  {
-    return cuda::std::rint(input);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::BIT_INVERT, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(~input)
-  {
-    return ~input;
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::NOT, false> {
-  static constexpr auto arity{1};
-
-  template <typename InputT>
-  __device__ inline auto operator()(InputT input) const noexcept -> decltype(!input)
-  {
-    return !input;
-  }
-};
-
-template <typename To>
-struct cast {
-  static constexpr auto arity{1};
-  template <typename From>
-  __device__ inline auto operator()(From f) const noexcept -> To
-    requires(is_fixed_point<From>())
-  {
-    if constexpr (cuda::std::is_floating_point_v<To>) {
-      return convert_fixed_to_floating<To>(f);
-    } else {
-      return static_cast<To>(f);
-    }
-  }
-
-  template <typename From>
-  __device__ inline auto operator()(From f) const noexcept -> decltype(static_cast<To>(f))
-    requires(!is_fixed_point<From>())
-  {
-    return static_cast<To>(f);
-  }
-};
-
-template <>
-struct operator_functor<ast_operator::CAST_TO_INT64, false> : cast<int64_t> {};
-template <>
-struct operator_functor<ast_operator::CAST_TO_UINT64, false> : cast<uint64_t> {};
-template <>
-struct operator_functor<ast_operator::CAST_TO_FLOAT64, false> : cast<double> {};
-
-/*
- * The default specialization of nullable operators is to fall back to the non-nullable
- * implementation
- */
-template <ast_operator op>
-struct operator_functor<op, true> {
-  using NonNullOperator       = operator_functor<op, false>;
-  static constexpr auto arity = NonNullOperator::arity;
-
-  template <typename LHS, typename RHS, std::size_t arity_placeholder = arity>
-  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
-    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
-    requires(arity_placeholder == 2)
-  {
-    using Out = possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>;
-    return (lhs.has_value() && rhs.has_value()) ? Out{NonNullOperator{}(*lhs, *rhs)} : Out{};
-  }
-
-  template <typename Input, std::size_t arity_placeholder = arity>
-  __device__ inline auto operator()(Input const input) const noexcept
-    -> possibly_null_value_t<decltype(NonNullOperator{}(*input)), true>
-    requires(arity_placeholder == 1)
-  {
-    using Out = possibly_null_value_t<decltype(NonNullOperator{}(*input)), true>;
-    return input.has_value() ? Out{NonNullOperator{}(*input)} : Out{};
-  }
-};
-
-// IS_NULL(null) is true, IS_NULL(valid) is false
-template <>
-struct operator_functor<ast_operator::IS_NULL, true> {
-  using NonNullOperator       = operator_functor<ast_operator::IS_NULL, false>;
-  static constexpr auto arity = NonNullOperator::arity;
-
-  template <typename LHS>
-  __device__ inline auto operator()(LHS const lhs) const noexcept -> bool
-  {
-    return !lhs.has_value();
-  }
-};
-
-// NULL_EQUAL(null, null) is true, NULL_EQUAL(null, valid) is false, and NULL_EQUAL(valid, valid) ==
-// EQUAL(valid, valid)
-template <>
-struct operator_functor<ast_operator::NULL_EQUAL, true> {
-  using NonNullOperator       = operator_functor<ast_operator::NULL_EQUAL, false>;
-  static constexpr auto arity = NonNullOperator::arity;
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
-    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
-  {
-    // Case 1: Neither is null, so the output is given by the operation.
-    if (lhs.has_value() && rhs.has_value()) { return {NonNullOperator{}(*lhs, *rhs)}; }
-    // Case 2: Two nulls compare equal.
-    if (!lhs.has_value() && !rhs.has_value()) { return {true}; }
-    // Case 3: One value is null, while the other is not, so we return false.
-    return {false};
-  }
-};
-
-///< NULL_LOGICAL_AND(null, null) is null, NULL_LOGICAL_AND(null, true) is null,
-///< NULL_LOGICAL_AND(null, false) is false, and NULL_LOGICAL_AND(valid, valid) ==
-///< LOGICAL_AND(valid, valid)
-template <>
-struct operator_functor<ast_operator::NULL_LOGICAL_AND, true> {
-  using NonNullOperator       = operator_functor<ast_operator::NULL_LOGICAL_AND, false>;
-  static constexpr auto arity = NonNullOperator::arity;
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
-    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
-  {
-    // Case 1: Neither is null, so the output is given by the operation.
-    if (lhs.has_value() && rhs.has_value()) { return {NonNullOperator{}(*lhs, *rhs)}; }
-    // Case 2: Two nulls return null.
-    if (!lhs.has_value() && !rhs.has_value()) { return {}; }
-    // Case 3: One value is null, while the other is not. If it's true we return null, otherwise we
-    // return false.
-    auto const& valid_element = lhs.has_value() ? lhs : rhs;
-    if (*valid_element) { return {}; }
-    return {false};
-  }
-};
-
-///< NULL_LOGICAL_OR(null, null) is null, NULL_LOGICAL_OR(null, true) is true, NULL_LOGICAL_OR(null,
-///< false) is null, and NULL_LOGICAL_OR(valid, valid) == LOGICAL_OR(valid, valid)
-template <>
-struct operator_functor<ast_operator::NULL_LOGICAL_OR, true> {
-  using NonNullOperator       = operator_functor<ast_operator::NULL_LOGICAL_OR, false>;
-  static constexpr auto arity = NonNullOperator::arity;
-
-  template <typename LHS, typename RHS>
-  __device__ inline auto operator()(LHS const lhs, RHS const rhs) const noexcept
-    -> possibly_null_value_t<decltype(NonNullOperator{}(*lhs, *rhs)), true>
-  {
-    // Case 1: Neither is null, so the output is given by the operation.
-    if (lhs.has_value() && rhs.has_value()) { return {NonNullOperator{}(*lhs, *rhs)}; }
-    // Case 2: Two nulls return null.
-    if (!lhs.has_value() && !rhs.has_value()) { return {}; }
-    // Case 3: One value is null, while the other is not. If it's true we return true, otherwise we
-    // return null.
-    auto const& valid_element = lhs.has_value() ? lhs : rhs;
-    if (*valid_element) { return {true}; }
-    return {};
-  }
-};
-
 }  // namespace ast::detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp
index 61bee5b479b..f12148c9ef8 100644
--- a/cpp/include/cudf/ast/detail/operators.hpp
+++ b/cpp/include/cudf/ast/detail/operators.hpp
@@ -47,6 +47,8 @@ cudf::data_type ast_operator_return_type(ast_operator op,
  */
 cudf::size_type ast_operator_arity(ast_operator op);
 
+std::string_view ast_operator_string(ast_operator op);
+
 }  // namespace ast::detail
 
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/ast/detail/possibly_null.cuh b/cpp/include/cudf/ast/detail/possibly_null.cuh
new file mode 100644
index 00000000000..d30b914666f
--- /dev/null
+++ b/cpp/include/cudf/ast/detail/possibly_null.cuh
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/utilities/export.hpp>
+
+#include <cuda/std/optional>
+
+namespace CUDF_EXPORT cudf {
+
+namespace ast::detail {
+
+// Type trait for wrapping nullable types in a cuda::std::optional. Non-nullable
+// types are returned as is.
+template <typename T, bool has_nulls>
+struct possibly_null_value;
+
+template <typename T>
+struct possibly_null_value<T, true> {
+  using type = cuda::std::optional<T>;
+};
+
+template <typename T>
+struct possibly_null_value<T, false> {
+  using type = T;
+};
+
+template <typename T, bool has_nulls>
+using possibly_null_value_t = typename possibly_null_value<T, has_nulls>::type;
+
+}  // namespace ast::detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp
index 4c5601be856..6e084f9fb2d 100644
--- a/cpp/include/cudf/ast/expressions.hpp
+++ b/cpp/include/cudf/ast/expressions.hpp
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <cudf/ast/ast_operator.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/table/table_view.hpp>
@@ -90,73 +91,6 @@ struct expression {
   virtual ~expression() {}
 };
 
-/**
- * @brief Enum of supported operators.
- */
-enum class ast_operator : int32_t {
-  // Binary operators
-  ADD,         ///< operator +
-  SUB,         ///< operator -
-  MUL,         ///< operator *
-  DIV,         ///< operator / using common type of lhs and rhs
-  TRUE_DIV,    ///< operator / after promoting type to floating point
-  FLOOR_DIV,   ///< operator / after promoting to 64 bit floating point and then
-               ///< flooring the result
-  MOD,         ///< operator %
-  PYMOD,       ///< operator % using Python's sign rules for negatives
-  POW,         ///< lhs ^ rhs
-  EQUAL,       ///< operator ==
-  NULL_EQUAL,  ///< operator == with Spark rules: NULL_EQUAL(null, null) is true, NULL_EQUAL(null,
-               ///< valid) is false, and
-               ///< NULL_EQUAL(valid, valid) == EQUAL(valid, valid)
-  NOT_EQUAL,   ///< operator !=
-  LESS,        ///< operator <
-  GREATER,     ///< operator >
-  LESS_EQUAL,  ///< operator <=
-  GREATER_EQUAL,     ///< operator >=
-  BITWISE_AND,       ///< operator &
-  BITWISE_OR,        ///< operator |
-  BITWISE_XOR,       ///< operator ^
-  LOGICAL_AND,       ///< operator &&
-  NULL_LOGICAL_AND,  ///< operator && with Spark rules: NULL_LOGICAL_AND(null, null) is null,
-                     ///< NULL_LOGICAL_AND(null, true) is
-                     ///< null, NULL_LOGICAL_AND(null, false) is false, and NULL_LOGICAL_AND(valid,
-                     ///< valid) == LOGICAL_AND(valid, valid)
-  LOGICAL_OR,        ///< operator ||
-  NULL_LOGICAL_OR,   ///< operator || with Spark rules: NULL_LOGICAL_OR(null, null) is null,
-                     ///< NULL_LOGICAL_OR(null, true) is true,
-                     ///< NULL_LOGICAL_OR(null, false) is null, and NULL_LOGICAL_OR(valid, valid) ==
-                     ///< LOGICAL_OR(valid, valid)
-  // Unary operators
-  IDENTITY,        ///< Identity function
-  IS_NULL,         ///< Check if operand is null
-  SIN,             ///< Trigonometric sine
-  COS,             ///< Trigonometric cosine
-  TAN,             ///< Trigonometric tangent
-  ARCSIN,          ///< Trigonometric sine inverse
-  ARCCOS,          ///< Trigonometric cosine inverse
-  ARCTAN,          ///< Trigonometric tangent inverse
-  SINH,            ///< Hyperbolic sine
-  COSH,            ///< Hyperbolic cosine
-  TANH,            ///< Hyperbolic tangent
-  ARCSINH,         ///< Hyperbolic sine inverse
-  ARCCOSH,         ///< Hyperbolic cosine inverse
-  ARCTANH,         ///< Hyperbolic tangent inverse
-  EXP,             ///< Exponential (base e, Euler number)
-  LOG,             ///< Natural Logarithm (base e)
-  SQRT,            ///< Square-root (x^0.5)
-  CBRT,            ///< Cube-root (x^(1.0/3))
-  CEIL,            ///< Smallest integer value not less than arg
-  FLOOR,           ///< largest integer value not greater than arg
-  ABS,             ///< Absolute value
-  RINT,            ///< Rounds the floating-point argument arg to an integer value
-  BIT_INVERT,      ///< Bitwise Not (~)
-  NOT,             ///< Logical Not (!)
-  CAST_TO_INT64,   ///< Cast value to int64_t
-  CAST_TO_UINT64,  ///< Cast value to uint64_t
-  CAST_TO_FLOAT64  ///< Cast value to double
-};
-
 /**
  * @brief Enum of table references.
  *
@@ -317,6 +251,13 @@ class literal : public expression {
    */
   [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
 
+  /**
+   * @brief Get the scalar.
+   *
+   * @return The scalar object
+   */
+  [[nodiscard]] cudf::scalar const& get_scalar() const { return scalar; }
+
   /**
    * @copydoc expression::accept
    */
diff --git a/cpp/include/cudf/fixed_point/conv.hpp b/cpp/include/cudf/fixed_point/conv.hpp
new file mode 100644
index 00000000000..3ccb876d009
--- /dev/null
+++ b/cpp/include/cudf/fixed_point/conv.hpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/fixed_point/detail/floating_conversion.hpp>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/traits.hpp>
+
+namespace CUDF_EXPORT cudf {
+/**
+ * @addtogroup fixed_point_classes
+ * @{
+ * @file
+ * @brief Conversion functions for fixed-point numbers
+ */
+
+/**
+ * @brief Convert a floating-point value to fixed point
+ *
+ * @note This conversion was moved from fixed-point member functions to free functions.
+ * This is so that the complex conversion code is not included into many parts of the
+ * code base that don't need it, and so that it's more obvious to pinpoint where these
+ * conversions are occurring.
+ *
+ * @tparam Fixed The fixed-point type to convert to
+ * @tparam Floating The floating-point type to convert from
+ * @param floating The floating-point value to convert
+ * @param scale The desired scale of the fixed-point value
+ * @return The converted fixed-point value
+ */
+template <typename Fixed,
+          typename Floating,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<Floating>&& is_fixed_point<Fixed>())>
+CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::scale_type scale)
+{
+  using Rep        = typename Fixed::rep;
+  auto const value = [&]() {
+    if constexpr (Fixed::rad == numeric::Radix::BASE_10) {
+      return numeric::detail::convert_floating_to_integral<Rep>(floating, scale);
+    } else {
+      return static_cast<Rep>(numeric::detail::shift<Rep, Fixed::rad>(floating, scale));
+    }
+  }();
+
+  return Fixed(numeric::scaled_integer<Rep>{value, scale});
+}
+
+/**
+ * @brief Convert a fixed-point value to floating point
+ *
+ * @note This conversion was moved from fixed-point member functions to free functions.
+ * This is so that the complex conversion code is not included into many parts of the
+ * code base that don't need it, and so that it's more obvious to pinpoint where these
+ * conversions are occurring.
+ *
+ * @tparam Floating The floating-point type to convert to
+ * @tparam Fixed The fixed-point type to convert from
+ * @param fixed The fixed-point value to convert
+ * @return The converted floating-point value
+ */
+template <typename Floating,
+          typename Fixed,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<Floating>&& is_fixed_point<Fixed>())>
+CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed)
+{
+  using Rep = typename Fixed::rep;
+  if constexpr (Fixed::rad == numeric::Radix::BASE_10) {
+    return numeric::detail::convert_integral_to_floating<Floating>(fixed.value(), fixed.scale());
+  } else {
+    auto const casted = static_cast<Floating>(fixed.value());
+    auto const scale  = numeric::scale_type{-fixed.scale()};
+    return numeric::detail::shift<Rep, Fixed::rad>(casted, scale);
+  }
+}
+
+/**
+ * @brief Convert a value to floating point
+ *
+ * @tparam Floating The floating-point type to convert to
+ * @tparam Input The input type to convert from
+ * @param input The input value to convert
+ * @return The converted floating-point value
+ */
+template <typename Floating,
+          typename Input,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<Floating>)>
+CUDF_HOST_DEVICE Floating convert_to_floating(Input input)
+{
+  if constexpr (is_fixed_point<Input>()) {
+    return convert_fixed_to_floating<Floating>(input);
+  } else {
+    return static_cast<Floating>(input);
+  }
+}
+
+/** @} */  // end of group
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
index 21d2a9a7d3a..282880e4023 100644
--- a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
+++ b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
@@ -38,27 +38,27 @@ namespace detail {
  * @return The number of significant bits: the # of bits - # of leading zeroes
  */
 template <typename T,
-          CUDF_ENABLE_IF(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint64_t> ||
-                         std::is_same_v<T, __uint128_t>)>
+          CUDF_ENABLE_IF(cuda::std::is_same_v<T, uint32_t> || cuda::std::is_same_v<T, uint64_t> ||
+                         cuda::std::is_same_v<T, __uint128_t>)>
 CUDF_HOST_DEVICE inline constexpr int count_significant_bits(T value)
 {
 #ifdef __CUDA_ARCH__
-  if constexpr (std::is_same_v<T, uint64_t>) {
+  if constexpr (cuda::std::is_same_v<T, uint64_t>) {
     return 64 - __clzll(static_cast<int64_t>(value));
-  } else if constexpr (std::is_same_v<T, uint32_t>) {
+  } else if constexpr (cuda::std::is_same_v<T, uint32_t>) {
     return 32 - __clz(static_cast<int32_t>(value));
-  } else if constexpr (std::is_same_v<T, __uint128_t>) {
+  } else if constexpr (cuda::std::is_same_v<T, __uint128_t>) {
     // 128 bit type, must break up into high and low components
     auto const high_bits = static_cast<int64_t>(value >> 64);
     auto const low_bits  = static_cast<int64_t>(value);
     return 128 - (__clzll(high_bits) + static_cast<int>(high_bits == 0) * __clzll(low_bits));
   }
 #else
-  if constexpr (std::is_same_v<T, uint64_t>) {
+  if constexpr (cuda::std::is_same_v<T, uint64_t>) {
     return 64 - cuda::std::countl_zero(value);
-  } else if constexpr (std::is_same_v<T, uint32_t>) {
+  } else if constexpr (cuda::std::is_same_v<T, uint32_t>) {
     return 32 - cuda::std::countl_zero(value);
-  } else if constexpr (std::is_same_v<T, __uint128_t>) {
+  } else if constexpr (cuda::std::is_same_v<T, __uint128_t>) {
     // 128 bit type, must break up into high and low components
     auto const high_bits = static_cast<uint64_t>(value >> 64);
     if (high_bits == 0) {
@@ -263,7 +263,7 @@ struct floating_converter {
     auto integer_rep = bit_cast_to_integer(floating);
 
     // Extract the currently stored (biased) exponent
-    using SignedType   = std::make_signed_t<IntegralType>;
+    using SignedType   = cuda::std::make_signed_t<IntegralType>;
     auto exponent_bits = integer_rep & exponent_mask;
     auto stored_pow2   = static_cast<SignedType>(exponent_bits >> num_stored_mantissa_bits);
 
@@ -550,13 +550,13 @@ struct shifting_constants {
   static constexpr bool is_double = cuda::std::is_same_v<FloatingType, double>;
 
   /// Integer type that can hold the value of the significand
-  using IntegerRep = std::conditional_t<is_double, uint64_t, uint32_t>;
+  using IntegerRep = cuda::std::conditional_t<is_double, uint64_t, uint32_t>;
 
   /// Num bits needed to hold the significand
   static constexpr auto num_significand_bits = cuda::std::numeric_limits<FloatingType>::digits;
 
   /// Shift data back and forth in space of a type with 2x the starting bits, to give us enough room
-  using ShiftingRep = std::conditional_t<is_double, __uint128_t, uint64_t>;
+  using ShiftingRep = cuda::std::conditional_t<is_double, __uint128_t, uint64_t>;
 
   // The significand of a float / double is 24 / 53 bits
   // However, to uniquely represent each double / float as different #'s in decimal
diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp
index ea9457296f6..77cc808a8ed 100644
--- a/cpp/include/cudf/unary.hpp
+++ b/cpp/include/cudf/unary.hpp
@@ -34,85 +34,6 @@ namespace CUDF_EXPORT cudf {
  * @brief Column APIs for unary ops
  */
 
-/**
- * @brief Convert a floating-point value to fixed point
- *
- * @note This conversion was moved from fixed-point member functions to free functions.
- * This is so that the complex conversion code is not included into many parts of the
- * code base that don't need it, and so that it's more obvious to pinpoint where these
- * conversions are occurring.
- *
- * @tparam Fixed The fixed-point type to convert to
- * @tparam Floating The floating-point type to convert from
- * @param floating The floating-point value to convert
- * @param scale The desired scale of the fixed-point value
- * @return The converted fixed-point value
- */
-template <typename Fixed,
-          typename Floating,
-          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<Floating>&& is_fixed_point<Fixed>())>
-CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::scale_type scale)
-{
-  using Rep        = typename Fixed::rep;
-  auto const value = [&]() {
-    if constexpr (Fixed::rad == numeric::Radix::BASE_10) {
-      return numeric::detail::convert_floating_to_integral<Rep>(floating, scale);
-    } else {
-      return static_cast<Rep>(numeric::detail::shift<Rep, Fixed::rad>(floating, scale));
-    }
-  }();
-
-  return Fixed(numeric::scaled_integer<Rep>{value, scale});
-}
-
-/**
- * @brief Convert a fixed-point value to floating point
- *
- * @note This conversion was moved from fixed-point member functions to free functions.
- * This is so that the complex conversion code is not included into many parts of the
- * code base that don't need it, and so that it's more obvious to pinpoint where these
- * conversions are occurring.
- *
- * @tparam Floating The floating-point type to convert to
- * @tparam Fixed The fixed-point type to convert from
- * @param fixed The fixed-point value to convert
- * @return The converted floating-point value
- */
-template <typename Floating,
-          typename Fixed,
-          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<Floating>&& is_fixed_point<Fixed>())>
-CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed)
-{
-  using Rep = typename Fixed::rep;
-  if constexpr (Fixed::rad == numeric::Radix::BASE_10) {
-    return numeric::detail::convert_integral_to_floating<Floating>(fixed.value(), fixed.scale());
-  } else {
-    auto const casted = static_cast<Floating>(fixed.value());
-    auto const scale  = numeric::scale_type{-fixed.scale()};
-    return numeric::detail::shift<Rep, Fixed::rad>(casted, scale);
-  }
-}
-
-/**
- * @brief Convert a value to floating point
- *
- * @tparam Floating The floating-point type to convert to
- * @tparam Input The input type to convert from
- * @param input The input value to convert
- * @return The converted floating-point value
- */
-template <typename Floating,
-          typename Input,
-          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<Floating>)>
-CUDF_HOST_DEVICE Floating convert_to_floating(Input input)
-{
-  if constexpr (is_fixed_point<Input>()) {
-    return convert_fixed_to_floating<Floating>(input);
-  } else {
-    return static_cast<Floating>(input);
-  }
-}
-
 /**
  * @brief Types of unary operations that can be performed on data.
  */
diff --git a/cpp/src/ast/operators.cpp b/cpp/src/ast/operators.cpp
index 0fa548f4d90..655d011b81c 100644
--- a/cpp/src/ast/operators.cpp
+++ b/cpp/src/ast/operators.cpp
@@ -272,6 +272,63 @@ cudf::size_type ast_operator_arity(ast_operator op)
   return result;
 }
 
+std::string_view ast_operator_string(ast_operator op)
+{
+  switch (op) {
+    case ast_operator::ADD: return "ADD";
+    case ast_operator::SUB: return "SUB";
+    case ast_operator::MUL: return "MUL";
+    case ast_operator::DIV: return "DIV";
+    case ast_operator::TRUE_DIV: return "TRUE_DIV";
+    case ast_operator::FLOOR_DIV: return "FLOOR_DIV";
+    case ast_operator::MOD: return "MOD";
+    case ast_operator::PYMOD: return "PYMOD";
+    case ast_operator::POW: return "POW";
+    case ast_operator::EQUAL: return "EQUAL";
+    case ast_operator::NULL_EQUAL: return "NULL_EQUAL";
+    case ast_operator::NOT_EQUAL: return "NOT_EQUAL";
+    case ast_operator::LESS: return "LESS";
+    case ast_operator::GREATER: return "GREATER";
+    case ast_operator::LESS_EQUAL: return "LESS_EQUAL";
+    case ast_operator::GREATER_EQUAL: return "GREATER_EQUAL";
+    case ast_operator::BITWISE_AND: return "BITWISE_AND";
+    case ast_operator::BITWISE_OR: return "BITWISE_OR";
+    case ast_operator::BITWISE_XOR: return "BITWISE_XOR";
+    case ast_operator::LOGICAL_AND: return "LOGICAL_AND";
+    case ast_operator::NULL_LOGICAL_AND: return "NULL_LOGICAL_AND";
+    case ast_operator::LOGICAL_OR: return "LOGICAL_OR";
+    case ast_operator::NULL_LOGICAL_OR: return "NULL_LOGICAL_OR";
+    case ast_operator::IDENTITY: return "IDENTITY";
+    case ast_operator::IS_NULL: return "IS_NULL";
+    case ast_operator::SIN: return "SIN";
+    case ast_operator::COS: return "COS";
+    case ast_operator::TAN: return "TAN";
+    case ast_operator::ARCSIN: return "ARCSIN";
+    case ast_operator::ARCCOS: return "ARCCOS";
+    case ast_operator::ARCTAN: return "ARCTAN";
+    case ast_operator::SINH: return "SINH";
+    case ast_operator::COSH: return "COSH";
+    case ast_operator::TANH: return "TANH";
+    case ast_operator::ARCSINH: return "ARCSINH";
+    case ast_operator::ARCCOSH: return "ARCCOSH";
+    case ast_operator::ARCTANH: return "ARCTANH";
+    case ast_operator::EXP: return "EXP";
+    case ast_operator::LOG: return "LOG";
+    case ast_operator::SQRT: return "SQRT";
+    case ast_operator::CBRT: return "CBRT";
+    case ast_operator::CEIL: return "CEIL";
+    case ast_operator::FLOOR: return "FLOOR";
+    case ast_operator::ABS: return "ABS";
+    case ast_operator::RINT: return "RINT";
+    case ast_operator::BIT_INVERT: return "BIT_INVERT";
+    case ast_operator::NOT: return "NOT";
+    case ast_operator::CAST_TO_INT64: return "CAST_TO_INT64";
+    case ast_operator::CAST_TO_UINT64: return "CAST_TO_UINT64";
+    case ast_operator::CAST_TO_FLOAT64: return "CAST_TO_FLOAT64";
+    default: CUDF_FAIL("Unrecognized operator type.");
+  }
+}
+
 }  // namespace detail
 
 }  // namespace ast
diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh
index 2f255e7a07c..1b1ecf9a19c 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/binary_ops.cuh
@@ -22,6 +22,7 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/fixed_point/conv.hpp>
 #include <cudf/unary.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
diff --git a/cpp/src/quantiles/quantiles_util.hpp b/cpp/src/quantiles/quantiles_util.hpp
index 52fe5356361..18ff0774600 100644
--- a/cpp/src/quantiles/quantiles_util.hpp
+++ b/cpp/src/quantiles/quantiles_util.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/detail/utilities/assert.cuh>
+#include <cudf/fixed_point/conv.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
 #include <cudf/utilities/error.hpp>
diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
index d28c04f423a..617c33c7502 100644
--- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
+++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
@@ -30,6 +30,7 @@
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/functional.hpp>
 #include <cudf/detail/utilities/grid_1d.cuh>
+#include <cudf/fixed_point/conv.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/unary.hpp>
 #include <cudf/utilities/memory_resource.hpp>
diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu
index d8ea857e4af..7327f0d8614 100644
--- a/cpp/src/unary/cast_ops.cu
+++ b/cpp/src/unary/cast_ops.cu
@@ -22,6 +22,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/detail/valid_if.cuh>
+#include <cudf/fixed_point/conv.hpp>
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp
index f8f8d525043..f1724eb9195 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cpp
+++ b/cpp/tests/fixed_point/fixed_point_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf_test/testing_main.hpp>
 
 #include <cudf/binaryop.hpp>
+#include <cudf/fixed_point/conv.hpp>
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/unary.hpp>
 

From 954599faf1acdde45e85500fe35fcb7354df5ea1 Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Wed, 13 Aug 2025 10:20:25 -0700
Subject: [PATCH 120/366] Support hash-based workflow for `M2` groupby
 aggregation (#19569)

This implements the hash-based workflow for `M2` groupby aggregation. A benchmark is also added, showing that the hash-based approach can improve performance over sort-based by up to 40 percent.

Since the output is generated using hash table, the order of keys/values will be undefined thus may not be the same as in the input.

---
Benchmark:
```
## [0] Quadro RTX 6000

|  T  |  value_key_ratio  |  num_rows  |  null_probability  |   Ref Time |   Ref Noise |   Cmp Time |   Cmp Noise |         Diff |   %Diff |  Status  |
|-----|-------------------|------------|--------------------|------------|-------------|------------|-------------|--------------|---------|----------|
| I32 |        10         |   10000    |         0          | 215.940 us |       9.98% | 156.946 us |       8.62% |   -58.993 us | -27.32% |   FAST   |
| I32 |        30         |   10000    |         0          | 198.239 us |       1.32% | 149.199 us |       5.59% |   -49.039 us | -24.74% |   FAST   |
| I32 |        100        |   10000    |         0          | 196.804 us |       3.81% | 145.584 us |       4.77% |   -51.220 us | -26.03% |   FAST   |
| I32 |        10         |  1000000   |         0          | 626.309 us |       1.10% | 635.264 us |       1.85% |     8.955 us |   1.43% |   SLOW   |
| I32 |        30         |  1000000   |         0          | 617.688 us |       1.15% | 477.760 us |       2.14% |  -139.929 us | -22.65% |   FAST   |
| I32 |        100        |  1000000   |         0          | 612.476 us |       1.23% | 381.148 us |       1.72% |  -231.328 us | -37.77% |   FAST   |
| I32 |        10         |  10000000  |         0          |   7.335 ms |       0.78% |   8.007 ms |       0.64% |   671.021 us |   9.15% |   SLOW   |
| I32 |        30         |  10000000  |         0          |   7.140 ms |       0.78% |   7.845 ms |       0.59% |   704.838 us |   9.87% |   SLOW   |
| I32 |        100        |  10000000  |         0          |   6.975 ms |       0.78% |   6.285 ms |       0.77% |  -690.236 us |  -9.90% |   FAST   |
| I32 |        10         |   10000    |        0.1         | 288.683 us |       1.84% | 179.761 us |       4.03% |  -108.923 us | -37.73% |   FAST   |
| I32 |        30         |   10000    |        0.1         | 287.724 us |       2.13% | 177.806 us |       2.02% |  -109.918 us | -38.20% |   FAST   |
| I32 |        100        |   10000    |        0.1         | 285.990 us |       2.53% | 174.958 us |       3.17% |  -111.033 us | -38.82% |   FAST   |
| I32 |        10         |  1000000   |        0.1         | 768.166 us |       1.03% | 666.330 us |       1.69% |  -101.836 us | -13.26% |   FAST   |
| I32 |        30         |  1000000   |        0.1         | 753.631 us |       1.69% | 514.416 us |       2.36% |  -239.214 us | -31.74% |   FAST   |
| I32 |        100        |  1000000   |        0.1         | 747.192 us |       1.01% | 417.477 us |       1.86% |  -329.715 us | -44.13% |   FAST   |
| I32 |        10         |  10000000  |        0.1         |   7.948 ms |       0.80% |   7.958 ms |       0.55% |     9.830 us |   0.12% |   SAME   |
| I32 |        30         |  10000000  |        0.1         |   7.704 ms |       0.74% |   7.616 ms |       0.51% |   -87.225 us |  -1.13% |   FAST   |
| I32 |        100        |  10000000  |        0.1         |   7.538 ms |       0.80% |   6.068 ms |       0.78% | -1469.687 us | -19.50% |   FAST   |
| I32 |        10         |   10000    |        0.9         | 289.933 us |       3.40% | 172.943 us |       2.03% |  -116.989 us | -40.35% |   FAST   |
| I32 |        30         |   10000    |        0.9         | 289.697 us |       3.85% | 171.887 us |       2.65% |  -117.811 us | -40.67% |   FAST   |
| I32 |        100        |   10000    |        0.9         | 286.871 us |       2.91% | 168.887 us |       1.81% |  -117.984 us | -41.13% |   FAST   |
| I32 |        10         |  1000000   |        0.9         | 759.703 us |       1.84% | 469.550 us |       2.02% |  -290.153 us | -38.19% |   FAST   |
| I32 |        30         |  1000000   |        0.9         | 749.905 us |       4.00% | 405.214 us |       1.81% |  -344.691 us | -45.96% |   FAST   |
| I32 |        100        |  1000000   |        0.9         | 743.373 us |       2.09% | 370.548 us |       1.76% |  -372.825 us | -50.15% |   FAST   |
| I32 |        10         |  10000000  |        0.9         |   7.901 ms |       0.69% |   4.686 ms |       1.03% | -3214.663 us | -40.69% |   FAST   |
| I32 |        30         |  10000000  |        0.9         |   7.663 ms |       0.79% |   4.038 ms |       1.30% | -3624.460 us | -47.30% |   FAST   |
| I32 |        100        |  10000000  |        0.9         |   7.483 ms |       0.82% |   3.183 ms |       1.46% | -4299.710 us | -57.46% |   FAST   |
| F64 |        10         |   10000    |         0          | 206.186 us |       1.98% | 152.118 us |       2.07% |   -54.068 us | -26.22% |   FAST   |
| F64 |        30         |   10000    |         0          | 206.396 us |       3.77% | 151.780 us |       3.92% |   -54.616 us | -26.46% |   FAST   |
| F64 |        100        |   10000    |         0          | 202.879 us |       1.54% | 148.957 us |       3.89% |   -53.922 us | -26.58% |   FAST   |
| F64 |        10         |  1000000   |         0          | 678.915 us |       1.96% | 662.225 us |       1.68% |   -16.690 us |  -2.46% |   FAST   |
| F64 |        30         |  1000000   |         0          | 665.775 us |       1.27% | 515.827 us |       1.72% |  -149.948 us | -22.52% |   FAST   |
| F64 |        100        |  1000000   |         0          | 660.599 us |       1.73% | 424.674 us |       1.79% |  -235.925 us | -35.71% |   FAST   |
| F64 |        10         |  10000000  |         0          |   7.677 ms |       0.76% |   8.056 ms |       0.59% |   378.792 us |   4.93% |   SLOW   |
| F64 |        30         |  10000000  |         0          |   7.466 ms |       1.05% |   7.851 ms |       0.50% |   385.055 us |   5.16% |   SLOW   |
| F64 |        100        |  10000000  |         0          |   7.285 ms |       0.92% |   6.317 ms |       0.71% |  -968.127 us | -13.29% |   FAST   |
| F64 |        10         |   10000    |        0.1         | 299.562 us |       3.55% | 183.848 us |       5.22% |  -115.714 us | -38.63% |   FAST   |
| F64 |        30         |   10000    |        0.1         | 299.092 us |       3.72% | 182.238 us |       2.39% |  -116.854 us | -39.07% |   FAST   |
| F64 |        100        |   10000    |        0.1         | 296.364 us |       3.12% | 179.645 us |       2.52% |  -116.719 us | -39.38% |   FAST   |
| F64 |        10         |  1000000   |        0.1         | 811.874 us |       1.19% | 698.105 us |       1.64% |  -113.769 us | -14.01% |   FAST   |
| F64 |        30         |  1000000   |        0.1         | 795.446 us |       1.93% | 553.580 us |       2.28% |  -241.867 us | -30.41% |   FAST   |
| F64 |        100        |  1000000   |        0.1         | 801.054 us |       4.61% | 456.706 us |       1.58% |  -344.348 us | -42.99% |   FAST   |
| F64 |        10         |  10000000  |        0.1         |   8.174 ms |       0.88% |   8.025 ms |       0.50% |  -149.418 us |  -1.83% |   FAST   |
| F64 |        30         |  10000000  |        0.1         |   7.929 ms |       0.80% |   7.693 ms |       0.58% |  -235.359 us |  -2.97% |   FAST   |
| F64 |        100        |  10000000  |        0.1         |   7.736 ms |       0.80% |   6.178 ms |       0.73% | -1557.934 us | -20.14% |   FAST   |
| F64 |        10         |   10000    |        0.9         | 294.503 us |       1.49% | 176.414 us |       9.54% |  -118.089 us | -40.10% |   FAST   |
| F64 |        30         |   10000    |        0.9         | 298.363 us |       6.18% | 175.130 us |       2.80% |  -123.233 us | -41.30% |   FAST   |
| F64 |        100        |   10000    |        0.9         | 292.613 us |       1.48% | 171.707 us |       2.15% |  -120.906 us | -41.32% |   FAST   |
| F64 |        10         |  1000000   |        0.9         | 792.239 us |       1.35% | 482.531 us |       1.84% |  -309.708 us | -39.09% |   FAST   |
| F64 |        30         |  1000000   |        0.9         | 776.355 us |       1.14% | 422.448 us |       1.78% |  -353.907 us | -45.59% |   FAST   |
| F64 |        100        |  1000000   |        0.9         | 771.947 us |       1.58% | 391.754 us |       2.64% |  -380.193 us | -49.25% |   FAST   |
| F64 |        10         |  10000000  |        0.9         |   8.051 ms |       0.76% |   4.735 ms |       1.10% | -3315.632 us | -41.18% |   FAST   |
| F64 |        30         |  10000000  |        0.9         |   7.822 ms |       0.75% |   4.110 ms |       1.41% | -3711.202 us | -47.45% |   FAST   |
| F64 |        100        |  10000000  |        0.9         |   7.645 ms |       0.80% |   3.254 ms |       1.47% | -4390.196 us | -57.43% |   FAST   |
```

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Yunsong Wang (https://github.com/PointKernel)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/19569
---
 cpp/benchmarks/CMakeLists.txt                 |  1 +
 cpp/benchmarks/groupby/group_m2.cpp           | 83 +++++++++++++++++
 .../cudf/detail/aggregation/aggregation.cuh   |  4 +
 cpp/src/aggregation/aggregation.cu            |  2 +-
 .../groupby/hash/flatten_single_pass_aggs.cpp | 13 ++-
 cpp/src/groupby/hash/groupby.cu               | 60 +++++-------
 .../hash/hash_compound_agg_finalizer.cu       | 35 ++++++-
 .../hash/hash_compound_agg_finalizer.hpp      |  4 +-
 ...ar_hash_functor.cuh => m2_var_functor.cuh} | 91 ++++++++++++++++---
 cpp/tests/groupby/m2_tests.cpp                | 42 +++++++--
 .../test/java/ai/rapids/cudf/TableTest.java   | 12 ++-
 11 files changed, 282 insertions(+), 65 deletions(-)
 create mode 100644 cpp/benchmarks/groupby/group_m2.cpp
 rename cpp/src/groupby/hash/{var_hash_functor.cuh => m2_var_functor.cuh} (58%)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 21f56e1331c..e111b6395e9 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -262,6 +262,7 @@ ConfigureBench(
 ConfigureNVBench(
   GROUPBY_NVBENCH
   groupby/group_histogram.cpp
+  groupby/group_m2.cpp
   groupby/group_max.cpp
   groupby/group_max_multithreaded.cpp
   groupby/group_nunique.cpp
diff --git a/cpp/benchmarks/groupby/group_m2.cpp b/cpp/benchmarks/groupby/group_m2.cpp
new file mode 100644
index 00000000000..be907e9e343
--- /dev/null
+++ b/cpp/benchmarks/groupby/group_m2.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+
+#include <cudf/groupby.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename Type>
+void groupby_m2_helper(nvbench::state& state,
+                       cudf::size_type num_rows,
+                       cudf::size_type value_key_ratio,
+                       double null_probability)
+{
+  auto const keys = [&] {
+    data_profile const profile =
+      data_profile_builder()
+        .cardinality(num_rows / value_key_ratio)
+        .no_validity()
+        .distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
+    return create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
+  }();
+
+  auto const values = [&] {
+    auto builder = data_profile_builder().cardinality(0).distribution(
+      cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
+    if (null_probability > 0) {
+      builder.null_probability(null_probability);
+    } else {
+      builder.no_validity();
+    }
+    return create_random_column(
+      cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
+  }();
+
+  // Vector of 1 request
+  std::vector<cudf::groupby::aggregation_request> requests(1);
+  requests.back().values = values->view();
+  requests.back().aggregations.push_back(cudf::make_m2_aggregation<cudf::groupby_aggregation>());
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
+    auto gb_obj       = cudf::groupby::groupby(cudf::table_view({keys->view()}));
+    auto const result = gb_obj.aggregate(requests);
+  });
+
+  auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / elapsed_time, "rows/s");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+template <typename Type>
+void bench_groupby_m2(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const value_key_ratio  = static_cast<cudf::size_type>(state.get_int64("value_key_ratio"));
+  auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const null_probability = state.get_float64("null_probability");
+
+  groupby_m2_helper<Type>(state, num_rows, value_key_ratio, null_probability);
+}
+
+NVBENCH_BENCH_TYPES(bench_groupby_m2, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, double>))
+  .set_name("groupby_m2")
+  .add_int64_axis("value_key_ratio", {10, 30, 100})
+  .add_int64_axis("num_rows", {10'000, 1'000'000, 10'000'000})
+  .add_float64_axis("null_probability", {0, 0.1, 0.9});
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh
index 2124a131c19..92f1aeea572 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.cuh
+++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh
@@ -90,6 +90,10 @@ struct corresponding_operator<aggregation::SUM_OF_SQUARES> {
   using type = DeviceSum;
 };
 template <>
+struct corresponding_operator<aggregation::M2> {
+  using type = DeviceSum;
+};
+template <>
 struct corresponding_operator<aggregation::STD> {
   using type = DeviceSum;
 };
diff --git a/cpp/src/aggregation/aggregation.cu b/cpp/src/aggregation/aggregation.cu
index c58d1f7af7c..89c96c877e4 100644
--- a/cpp/src/aggregation/aggregation.cu
+++ b/cpp/src/aggregation/aggregation.cu
@@ -53,7 +53,7 @@ struct identity_initializer {
             (k == aggregation::SUM or k == aggregation::MIN or k == aggregation::MAX or
              k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or
              k == aggregation::ARGMAX or k == aggregation::ARGMIN or
-             k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or
+             k == aggregation::SUM_OF_SQUARES or k == aggregation::M2 or k == aggregation::STD or
              k == aggregation::VARIANCE or
              (k == aggregation::PRODUCT and is_product_supported<T>()))) or
            (k == aggregation::SUM_WITH_OVERFLOW and std::is_same_v<T, cudf::struct_view>);
diff --git a/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp b/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
index a533f7a6448..8e64560d246 100644
--- a/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
+++ b/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -68,6 +68,17 @@ class groupby_simple_aggregations_collector final
     return aggs;
   }
 
+  std::vector<std::unique_ptr<aggregation>> visit(data_type,
+                                                  cudf::detail::m2_aggregation const&) override
+  {
+    std::vector<std::unique_ptr<aggregation>> aggs;
+    aggs.push_back(make_sum_aggregation());
+    // COUNT_VALID
+    aggs.push_back(make_count_aggregation());
+
+    return aggs;
+  }
+
   std::vector<std::unique_ptr<aggregation>> visit(data_type,
                                                   cudf::detail::var_aggregation const&) override
   {
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index b9f08c2c505..155c2fee9ce 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -21,7 +21,6 @@
 #include <cudf/aggregation.hpp>
 #include <cudf/detail/aggregation/result_cache.hpp>
 #include <cudf/detail/groupby.hpp>
-#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
@@ -33,44 +32,38 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <algorithm>
 #include <memory>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
 namespace cudf::groupby::detail::hash {
 namespace {
 /**
- * @brief List of aggregation operations that can be computed with a hash-based
- * implementation.
+ * @brief List of aggregation operations that can be computed with a hash-based implementation.
+ *
+ * For single pass aggregations, the supported operations are the ones that can be atomically
+ * updated: SUM, SUM_WITH_OVERFLOW, SUM_OF_SQUARES, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL.
+ * For compound aggregations, the supported operations are the ones that depends on the single pass
+ * aggregations above: ARGMIN(MIN), ARGMAX(MAX), MEAN(SUM, COUNT_VALID), M2/STD/VARIANCE(M2,
+ * COUNT_VALID).
  */
-constexpr std::array<aggregation::Kind, 13> hash_aggregations{aggregation::SUM,
-                                                              aggregation::SUM_WITH_OVERFLOW,
-                                                              aggregation::PRODUCT,
-                                                              aggregation::MIN,
-                                                              aggregation::MAX,
-                                                              aggregation::COUNT_VALID,
-                                                              aggregation::COUNT_ALL,
-                                                              aggregation::ARGMIN,
-                                                              aggregation::ARGMAX,
-                                                              aggregation::SUM_OF_SQUARES,
-                                                              aggregation::MEAN,
-                                                              aggregation::STD,
-                                                              aggregation::VARIANCE};
-
-// Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
-// Compound: MEAN(SUM, COUNT_VALID), VARIANCE, STD(MEAN (SUM, COUNT_VALID), COUNT_VALID),
-// ARGMAX, ARGMIN
-
-// TODO replace with std::find in C++20 onwards.
-template <class T, size_t N>
-constexpr bool array_contains(std::array<T, N> const& haystack, T needle)
-{
-  for (auto const& val : haystack) {
-    if (val == needle) return true;
-  }
-  return false;
-}
+const auto hash_aggregations = std::unordered_set{// Single pass aggregations:
+                                                  aggregation::SUM,
+                                                  aggregation::SUM_WITH_OVERFLOW,
+                                                  aggregation::SUM_OF_SQUARES,
+                                                  aggregation::PRODUCT,
+                                                  aggregation::MIN,
+                                                  aggregation::MAX,
+                                                  aggregation::COUNT_VALID,
+                                                  aggregation::COUNT_ALL,
+                                                  // Compound aggregations:
+                                                  aggregation::ARGMIN,
+                                                  aggregation::ARGMAX,
+                                                  aggregation::MEAN,
+                                                  aggregation::M2,
+                                                  aggregation::STD,
+                                                  aggregation::VARIANCE};
 
 /**
  * @brief Indicates whether the specified aggregation operation can be computed
@@ -80,10 +73,7 @@ constexpr bool array_contains(std::array<T, N> const& haystack, T needle)
  * @return true `t` is valid for a hash based groupby
  * @return false `t` is invalid for a hash based groupby
  */
-bool constexpr is_hash_aggregation(aggregation::Kind t)
-{
-  return array_contains(hash_aggregations, t);
-}
+bool is_hash_aggregation(aggregation::Kind t) { return hash_aggregations.contains(t); }
 
 std::unique_ptr<table> dispatch_groupby(table_view const& keys,
                                         host_span<aggregation_request const> requests,
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
index bdd05c2e01b..8b61254ce38 100644
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
@@ -16,7 +16,7 @@
 
 #include "hash_compound_agg_finalizer.hpp"
 #include "helpers.cuh"
-#include "var_hash_functor.cuh"
+#include "m2_var_functor.cuh"
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
@@ -29,7 +29,6 @@
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <memory>
 
@@ -150,6 +149,38 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::mean_aggregation
   dense_results->add_result(col, agg, std::move(result));
 }
 
+template <typename SetType>
+void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::m2_aggregation const& agg)
+{
+  if (dense_results->has_result(col, agg)) { return; }
+
+  auto sum_agg   = make_sum_aggregation();
+  auto count_agg = make_count_aggregation();
+  this->visit(*sum_agg);
+  this->visit(*count_agg);
+  auto const sum_result   = sparse_results->get_result(col, *sum_agg);
+  auto const count_result = sparse_results->get_result(col, *count_agg);
+
+  auto const d_values_ptr = column_device_view::create(col, stream);
+  auto const d_sum_ptr    = column_device_view::create(sum_result, stream).release();
+  auto const d_count_ptr  = column_device_view::create(count_result, stream).release();
+
+  auto output = make_fixed_width_column(
+    cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
+  auto output_view  = mutable_column_device_view::create(output->mutable_view(), stream);
+  auto output_tview = mutable_table_view{{output->mutable_view()}};
+  cudf::detail::initialize_with_identity(
+    output_tview, host_span<cudf::aggregation::Kind const>(&agg.kind, 1), stream);
+
+  thrust::for_each_n(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_counting_iterator(0),
+    col.size(),
+    m2_hash_functor{set, row_bitmask, *output_view, *d_values_ptr, *d_sum_ptr, *d_count_ptr});
+  sparse_results->add_result(col, agg, std::move(output));
+  dense_results->add_result(col, agg, to_dense_agg_result(agg));
+}
+
 template <typename SetType>
 void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::var_aggregation const& agg)
 {
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
index 8bee1a92c40..63e08a19177 100644
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,6 +62,8 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
 
   void visit(cudf::detail::mean_aggregation const& agg) override;
 
+  void visit(cudf::detail::m2_aggregation const& agg) override;
+
   void visit(cudf::detail::var_aggregation const& agg) override;
 
   void visit(cudf::detail::std_aggregation const& agg) override;
diff --git a/cpp/src/groupby/hash/var_hash_functor.cuh b/cpp/src/groupby/hash/m2_var_functor.cuh
similarity index 58%
rename from cpp/src/groupby/hash/var_hash_functor.cuh
rename to cpp/src/groupby/hash/m2_var_functor.cuh
index 51a4ce22dbe..c8f72fc4fbc 100644
--- a/cpp/src/groupby/hash/var_hash_functor.cuh
+++ b/cpp/src/groupby/hash/m2_var_functor.cuh
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #pragma once
 
 #include <cudf/aggregation.hpp>
@@ -22,11 +23,82 @@
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
-#include <cuco/static_set_ref.cuh>
 #include <cuda/atomic>
-#include <cuda/std/type_traits>
 
 namespace cudf::groupby::detail::hash {
+
+template <typename Source>
+__device__ constexpr static bool is_m2_var_supported()
+{
+  return is_numeric<Source>() && !is_fixed_point<Source>();
+}
+
+template <typename SetType>
+struct m2_hash_functor {
+  SetType set;
+  bitmask_type const* __restrict__ row_bitmask;
+  mutable_column_device_view target;
+  column_device_view source;
+  column_device_view sum;
+  column_device_view count;
+  m2_hash_functor(SetType set,
+                  bitmask_type const* row_bitmask,
+                  mutable_column_device_view target,
+                  column_device_view source,
+                  column_device_view sum,
+                  column_device_view count)
+    : set{set}, row_bitmask{row_bitmask}, target{target}, source{source}, sum{sum}, count{count}
+  {
+  }
+
+  template <typename Source>
+  __device__ void operator()(column_device_view const&, size_type, size_type) noexcept
+    requires(!is_m2_var_supported<Source>())
+  {
+    CUDF_UNREACHABLE("Invalid source type for M2 aggregation.");
+  }
+
+  template <typename Source>
+  __device__ void operator()(column_device_view const& source,
+                             size_type source_index,
+                             size_type target_index) noexcept
+    requires(is_m2_var_supported<Source>())
+  {
+    using Target    = cudf::detail::target_type_t<Source, aggregation::M2>;
+    using SumType   = cudf::detail::target_type_t<Source, aggregation::SUM>;
+    using CountType = cudf::detail::target_type_t<Source, aggregation::COUNT_VALID>;
+
+    if (source.is_null(source_index)) { return; }
+    auto const group_size = count.element<CountType>(target_index);
+    if (group_size == 0) { return; }
+
+    auto const x      = static_cast<Target>(source.element<Source>(source_index));
+    auto const mean   = static_cast<Target>(sum.element<SumType>(target_index)) / group_size;
+    auto const diff   = x - mean;
+    auto const result = diff * diff;
+    cuda::atomic_ref<Target, cuda::thread_scope_device> ref{target.element<Target>(target_index)};
+    ref.fetch_add(result, cuda::std::memory_order_relaxed);
+    if (target.is_null(target_index)) { target.set_valid(target_index); }
+  }
+
+  __device__ inline void operator()(size_type source_index)
+  {
+    if (row_bitmask == nullptr or bit_is_set(row_bitmask, source_index)) {
+      auto const target_index = *set.find(source_index);
+
+      auto col         = source;
+      auto source_type = source.type();
+      if (source_type.id() == type_id::DICTIONARY32) {
+        col          = source.child(cudf::dictionary_column_view::keys_column_index);
+        source_type  = col.type();
+        source_index = static_cast<size_type>(source.element<dictionary32>(source_index));
+      }
+
+      type_dispatcher(source_type, *this, col, source_index, target_index);
+    }
+  }
+};
+
 template <typename SetType>
 struct var_hash_functor {
   SetType set;
@@ -54,16 +126,8 @@ struct var_hash_functor {
   }
 
   template <typename Source>
-  constexpr static bool is_supported()
-  {
-    return is_numeric<Source>() && !is_fixed_point<Source>();
-  }
-
-  template <typename Source>
-  __device__ void operator()(column_device_view const& source,
-                             size_type source_index,
-                             size_type target_index) noexcept
-    requires(!is_supported<Source>())
+  __device__ void operator()(column_device_view const&, size_type, size_type) noexcept
+    requires(!is_m2_var_supported<Source>())
   {
     CUDF_UNREACHABLE("Invalid source type for std, var aggregation combination.");
   }
@@ -72,7 +136,7 @@ struct var_hash_functor {
   __device__ void operator()(column_device_view const& source,
                              size_type source_index,
                              size_type target_index) noexcept
-    requires(is_supported<Source>())
+    requires(is_m2_var_supported<Source>())
   {
     using Target    = cudf::detail::target_type_t<Source, aggregation::VARIANCE>;
     using SumType   = cudf::detail::target_type_t<Source, aggregation::SUM>;
@@ -109,4 +173,5 @@ struct var_hash_functor {
     }
   }
 };
+
 }  // namespace cudf::groupby::detail::hash
diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp
index 4359c154cf6..085d06fcdd7 100644
--- a/cpp/tests/groupby/m2_tests.cpp
+++ b/cpp/tests/groupby/m2_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 #include <cudf/aggregation.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
+#include <cudf/sorting.hpp>
 
 using namespace cudf::test::iterators;
 
@@ -41,14 +42,39 @@ using M2s_col = cudf::test::fixed_width_column_wrapper<T>;
 
 auto compute_M2(cudf::column_view const& keys, cudf::column_view const& values)
 {
-  std::vector<cudf::groupby::aggregation_request> requests;
-  requests.emplace_back();
-  requests[0].values = values;
-  requests[0].aggregations.emplace_back(cudf::make_m2_aggregation<cudf::groupby_aggregation>());
-
   auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys}));
-  auto result = gb_obj.aggregate(requests);
-  return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0]));
+
+  auto [hash_gb_keys, hash_gb_vals] = [&] {
+    std::vector<cudf::groupby::aggregation_request> requests;
+    requests.emplace_back();
+    requests[0].values = values;
+    requests[0].aggregations.emplace_back(cudf::make_m2_aggregation<cudf::groupby_aggregation>());
+    auto const result      = gb_obj.aggregate(requests);
+    auto const sort_order  = cudf::sorted_order(result.first->view(), {}, {});
+    auto const sorted_keys = cudf::gather(result.first->view(), *sort_order);
+    auto const sorted_vals =
+      cudf::gather(cudf::table_view({result.second[0].results[0]->view()}), *sort_order);
+    return std::pair(std::move(sorted_keys->release()[0]), std::move(sorted_vals->release()[0]));
+  }();
+
+  auto const [sort_gb_keys, sort_gb_vals] = [&] {
+    // Create a fresh aggregation request for sort-based aggregation instead of reusing.
+    // This is to avoid wrong output when the previous groupby aggregation has not been executed
+    // while the requests vector is modified.
+    std::vector<cudf::groupby::aggregation_request> requests;
+    requests.emplace_back();
+    requests[0].values = values;
+    requests[0].aggregations.emplace_back(cudf::make_m2_aggregation<cudf::groupby_aggregation>());
+    requests[0].aggregations.emplace_back(
+      cudf::make_nth_element_aggregation<cudf::groupby_aggregation>(0));
+    auto result = gb_obj.aggregate(requests);
+    return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0]));
+  }();
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*hash_gb_keys, *sort_gb_keys, verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*hash_gb_vals, *sort_gb_vals, verbosity);
+
+  return std::pair(std::move(hash_gb_keys), std::move(hash_gb_vals));
 }
 }  // namespace
 
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 12d7c15791b..1289f468002 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -7799,10 +7799,11 @@ void testGroupByM2() {
              .build();
          Table results = input.groupBy(0).aggregate(GroupByAggregation.M2()
                .onColumn(1));
+         Table resultsSorted = results.orderBy(OrderByArg.asc(0));
          Table expected = new Table.TestBuilder().column(1, 2, 3)
              .column(42.0, 122.75, 114.0)
              .build()) {
-      assertTablesAreEqual(expected, results);
+      assertTablesAreEqual(expected, resultsSorted);
     }
 
     // Test with values have nulls (the values associated with key=2 has both nulls and non-nulls,
@@ -7812,10 +7813,11 @@ void testGroupByM2() {
              .build();
          Table results = input.groupBy(0).aggregate(GroupByAggregation.M2()
              .onColumn(1));
+         Table resultsSorted = results.orderBy(OrderByArg.asc(0));
          Table expected = new Table.TestBuilder().column(1, 2, 3, 4, 5)
              .column(0.0, 2.0, 8.0, 0.0, null)
              .build()) {
-      assertTablesAreEqual(expected, results);
+      assertTablesAreEqual(expected, resultsSorted);
     }
 
     // Test with floating-point values having NaN:
@@ -7824,10 +7826,11 @@ void testGroupByM2() {
              .build();
          Table results = input.groupBy(0).aggregate(GroupByAggregation.M2()
              .onColumn(1));
+         Table resultsSorted = results.orderBy(OrderByArg.asc(0));
          Table expected = new Table.TestBuilder().column(1, 2, 3, 4, null)
              .column(18.0, Double.NaN, 18.0, Double.NaN, 0.0)
              .build()) {
-      assertTablesAreEqual(expected, results);
+      assertTablesAreEqual(expected, resultsSorted);
     }
 
     // Test with floating-point values having NaN and +/- Inf
@@ -7857,10 +7860,11 @@ void testGroupByM2() {
              .build();
          Table results = input.groupBy(0).aggregate(GroupByAggregation.M2()
              .onColumn(1));
+         Table resultsSorted = results.orderBy(OrderByArg.asc(0));
          Table expected = new Table.TestBuilder().column(1, 2, 3, 4, 5)
              .column(Double.NaN, Double.NaN, Double.NaN, Double.NaN, 12.5)
              .build()) {
-      assertTablesAreEqual(expected, results);
+      assertTablesAreEqual(expected, resultsSorted);
     }
   }
 

From 1cf49c9043c3c997a64d6b05dbfd773700ec2c44 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 13 Aug 2025 15:16:57 -0700
Subject: [PATCH 121/366] Add streams support to all list APIs (#19683)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19683
---
 .../pylibcudf/libcudf/lists/combine.pxd       |  11 +-
 .../pylibcudf/libcudf/lists/contains.pxd      |   8 +-
 .../libcudf/lists/count_elements.pxd          |   6 +-
 .../pylibcudf/libcudf/lists/explode.pxd       |   4 +-
 .../pylibcudf/libcudf/lists/extract.pxd       |   9 +-
 .../pylibcudf/libcudf/lists/filling.pxd       |   5 +-
 .../pylibcudf/libcudf/lists/gather.pxd        |   4 +-
 .../pylibcudf/libcudf/lists/reverse.pxd       |   4 +-
 .../libcudf/lists/set_operations.pxd          |  15 +-
 .../pylibcudf/libcudf/lists/sorting.pxd       |   9 +-
 .../libcudf/lists/stream_compaction.pxd       |   9 +-
 python/pylibcudf/pylibcudf/lists.pxd          |  63 +++++--
 python/pylibcudf/pylibcudf/lists.pyi          |  47 ++++--
 python/pylibcudf/pylibcudf/lists.pyx          | 159 +++++++++++++-----
 14 files changed, 263 insertions(+), 90 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
index 3e4c88d62b0..f01318e0e4e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
@@ -6,6 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table_view cimport table_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/combine.hpp" namespace \
@@ -16,14 +17,18 @@ cdef extern from "cudf/lists/combine.hpp" namespace \
         NULLIFY_OUTPUT_ROW
 
     cdef unique_ptr[column] concatenate_rows(
-        const table_view input_table
+        const table_view input_table,
+        concatenate_null_policy null_policy,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate_list_elements(
         const table_view input_table,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate_list_elements(
         const column_view input_table,
-        concatenate_null_policy null_policy
+        concatenate_null_policy null_policy,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
index 13a32d46c7a..23b39f78e1f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.scalar.scalar cimport scalar
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
@@ -17,25 +18,30 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] contains(
         const lists_column_view& lists,
         const scalar& search_key,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         const lists_column_view& lists,
         const column_view& search_keys,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains_nulls(
         const lists_column_view& lists,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] index_of(
         const lists_column_view& lists,
         const scalar& search_key,
         duplicate_find_option find_option,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] index_of(
         const lists_column_view& lists,
         const column_view& search_keys,
         duplicate_find_option find_option,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
index 64c75ccabd3..64fbb4abbbc 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd
@@ -1,11 +1,13 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] count_elements(
-        const lists_column_view&
+        const lists_column_view&,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
index adec02caad1..bf57dbc353a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd
@@ -1,13 +1,15 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] explode_outer(
         const table_view,
         size_type explode_column_idx,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
index 046bb51c68e..b837f6ca409 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd
@@ -1,17 +1,20 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column, column_view
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/extract.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] extract_list_element(
         const lists_column_view&,
-        size_type
+        size_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[column] extract_list_element(
         const lists_column_view&,
-        const column_view&
+        const column_view&,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
index 35e2559d902..5c6ce69f648 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd
@@ -1,18 +1,21 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] sequences(
         const column_view& starts,
         const column_view& sizes,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] sequences(
         const column_view& starts,
         const column_view& steps,
         const column_view& sizes,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
index 4fde535b306..c4c189b0148 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd
@@ -4,10 +4,12 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.copying cimport out_of_bounds_policy
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] segmented_gather(
         const lists_column_view& source_column,
         const lists_column_view& gather_map_list,
-        out_of_bounds_policy bounds_policy
+        out_of_bounds_policy bounds_policy,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
index 1ae3b4409ef..68d4d91aea9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd
@@ -1,11 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] reverse(
         const lists_column_view& lists_column,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
index 1f4855bdbf3..076c765b3c3 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd
@@ -1,9 +1,10 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.types cimport nan_equality, null_equality
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
@@ -11,26 +12,30 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil:
         const lists_column_view& lhs,
         const lists_column_view& rhs,
         null_equality nulls_equal,
-        nan_equality nans_equal
+        nan_equality nans_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] have_overlap(
         const lists_column_view& lhs,
         const lists_column_view& rhs,
         null_equality nulls_equal,
-        nan_equality nans_equal
+        nan_equality nans_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] intersect_distinct(
         const lists_column_view& lhs,
         const lists_column_view& rhs,
         null_equality nulls_equal,
-        nan_equality nans_equal
+        nan_equality nans_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] union_distinct(
         const lists_column_view& lhs,
         const lists_column_view& rhs,
         null_equality nulls_equal,
-        nan_equality nans_equal
+        nan_equality nans_equal,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
index 344b55b402f..80b46727650 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd
@@ -1,20 +1,23 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.types cimport null_order, order
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil:
     cdef unique_ptr[column] sort_lists(
         const lists_column_view source_column,
         order column_order,
-        null_order null_precedence
+        null_order null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] stable_sort_lists(
         const lists_column_view source_column,
         order column_order,
-        null_order null_precedence
+        null_order null_precedence,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
index 8341ac69bf5..0766d36e724 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd
@@ -1,9 +1,11 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
+from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
 from pylibcudf.libcudf.types cimport nan_equality, null_equality
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/lists/stream_compaction.hpp" \
@@ -11,10 +13,13 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \
     cdef unique_ptr[column] apply_boolean_mask(
         const lists_column_view& lists_column,
         const lists_column_view& boolean_mask,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] distinct(
         const lists_column_view& lists_column,
         null_equality nulls_equal,
-        nan_equality nans_equal
+        nan_equality nans_equal,
+        duplicate_keep_option keep_option,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd
index ee0c1390791..42484910d02 100644
--- a/python/pylibcudf/pylibcudf/lists.pxd
+++ b/python/pylibcudf/pylibcudf/lists.pxd
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.types cimport (
 from pylibcudf.libcudf.copying cimport out_of_bounds_policy
 from pylibcudf.libcudf.lists.combine cimport concatenate_null_policy
 from pylibcudf.libcudf.lists.contains cimport duplicate_find_option
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -20,46 +21,72 @@ ctypedef fused ColumnOrSizeType:
     Column
     size_type
 
-cpdef Table explode_outer(Table, size_type explode_column_idx)
+cpdef Table explode_outer(Table, size_type explode_column_idx, Stream stream=*)
 
-cpdef Column concatenate_rows(Table)
+cpdef Column concatenate_rows(Table, Stream stream=*)
 
-cpdef Column concatenate_list_elements(Column, concatenate_null_policy null_policy)
+cpdef Column concatenate_list_elements(
+    Column, concatenate_null_policy null_policy, Stream stream=*
+)
 
-cpdef Column contains(Column, ColumnOrScalar)
+cpdef Column contains(Column, ColumnOrScalar, Stream stream=*)
 
-cpdef Column contains_nulls(Column)
+cpdef Column contains_nulls(Column, Stream stream=*)
 
-cpdef Column index_of(Column, ColumnOrScalar, duplicate_find_option)
+cpdef Column index_of(
+    Column, ColumnOrScalar, duplicate_find_option, Stream stream=*
+)
 
-cpdef Column reverse(Column)
+cpdef Column reverse(Column, Stream stream=*)
 
-cpdef Column segmented_gather(Column, Column, out_of_bounds_policy bounds_policy=*)
+cpdef Column segmented_gather(
+    Column, Column, out_of_bounds_policy bounds_policy=*, Stream stream=*
+)
 
-cpdef Column extract_list_element(Column, ColumnOrSizeType)
+cpdef Column extract_list_element(Column, ColumnOrSizeType, Stream stream=*)
 
-cpdef Column count_elements(Column)
+cpdef Column count_elements(Column, Stream stream=*)
 
-cpdef Column sequences(Column, Column, Column steps = *)
+cpdef Column sequences(
+    Column, Column, Column steps = *, Stream stream=*
+)
 
-cpdef Column sort_lists(Column, order, null_order, bool stable = *)
+cpdef Column sort_lists(
+    Column, order, null_order, bool stable = *, Stream stream=*
+)
 
 cpdef Column difference_distinct(
-    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+    Column,
+    Column,
+    null_equality nulls_equal=*,
+    nan_equality nans_equal=*,
+    Stream stream=*
 )
 
 cpdef Column have_overlap(
-    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+    Column,
+    Column,
+    null_equality nulls_equal=*,
+    nan_equality nans_equal=*,
+    Stream stream=*
 )
 
 cpdef Column intersect_distinct(
-    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+    Column,
+    Column,
+    null_equality nulls_equal=*,
+    nan_equality nans_equal=*,
+    Stream stream=*
 )
 
 cpdef Column union_distinct(
-    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+    Column,
+    Column,
+    null_equality nulls_equal=*,
+    nan_equality nans_equal=*,
+    Stream stream=*
 )
 
-cpdef Column apply_boolean_mask(Column, Column)
+cpdef Column apply_boolean_mask(Column, Column, Stream stream=*)
 
-cpdef Column distinct(Column, null_equality, nan_equality)
+cpdef Column distinct(Column, null_equality, nan_equality, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
index e08d3df1036..0fe4afa08ac 100644
--- a/python/pylibcudf/pylibcudf/lists.pyi
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -2,6 +2,8 @@
 
 from enum import IntEnum
 
+from rmm import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.copying import OutOfBoundsPolicy
 from pylibcudf.scalar import Scalar
@@ -16,60 +18,83 @@ class DuplicateFindOption(IntEnum):
     FIND_FIRST = ...
     FIND_LAST = ...
 
-def explode_outer(input: Table, explode_column_idx: int) -> Table: ...
-def concatenate_rows(input: Table) -> Column: ...
+def explode_outer(
+    input: Table, explode_column_idx: int, stream: Stream | None = None
+) -> Table: ...
+def concatenate_rows(input: Table, stream: Stream | None = None) -> Column: ...
 def concatenate_list_elements(
-    input: Column, null_policy: ConcatenateNullPolicy
+    input: Column,
+    null_policy: ConcatenateNullPolicy,
+    stream: Stream | None = None,
+) -> Column: ...
+def contains(
+    input: Column, search_key: Column | Scalar, stream: Stream | None = None
 ) -> Column: ...
-def contains(input: Column, search_key: Column | Scalar) -> Column: ...
-def contains_nulls(input: Column) -> Column: ...
+def contains_nulls(input: Column, stream: Stream | None = None) -> Column: ...
 def index_of(
     input: Column,
     search_key: Column | Scalar,
     find_option: DuplicateFindOption,
+    stream: Stream | None = None,
 ) -> Column: ...
-def reverse(input: Column) -> Column: ...
+def reverse(input: Column, stream: Stream | None = None) -> Column: ...
 def segmented_gather(
     input: Column,
     gather_map_list: Column,
     bounds_policy: OutOfBoundsPolicy = OutOfBoundsPolicy.DONT_CHECK,
+    stream: Stream | None = None,
 ) -> Column: ...
-def extract_list_element(input: Column, index: Column | int) -> Column: ...
-def count_elements(input: Column) -> Column: ...
+def extract_list_element(
+    input: Column, index: Column | int, stream: Stream | None = None
+) -> Column: ...
+def count_elements(input: Column, stream: Stream | None = None) -> Column: ...
 def sequences(
-    starts: Column, sizes: Column, steps: Column | None = None
+    starts: Column,
+    sizes: Column,
+    steps: Column | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
 def sort_lists(
     input: Column,
     sort_order: Order,
     na_position: NullOrder,
     stable: bool = False,
+    stream: Stream | None = None,
 ) -> Column: ...
 def difference_distinct(
     lhs: Column,
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
+    stream: Stream | None = None,
 ) -> Column: ...
 def have_overlap(
     lhs: Column,
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
+    stream: Stream | None = None,
 ) -> Column: ...
 def intersect_distinct(
     lhs: Column,
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
+    stream: Stream | None = None,
 ) -> Column: ...
 def union_distinct(
     lhs: Column,
     rhs: Column,
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
+    stream: Stream | None = None,
+) -> Column: ...
+def apply_boolean_mask(
+    input: Column, boolean_mask: Column, stream: Stream | None = None
 ) -> Column: ...
-def apply_boolean_mask(input: Column, boolean_mask: Column) -> Column: ...
 def distinct(
-    input: Column, nulls_equal: NullEquality, nans_equal: NanEquality
+    input: Column,
+    nulls_equal: NullEquality,
+    nans_equal: NanEquality,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index eee5a43f6a8..5d6744f41c6 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -33,6 +33,7 @@ from pylibcudf.libcudf.lists.stream_compaction cimport (
     apply_boolean_mask as cpp_apply_boolean_mask,
     distinct as cpp_distinct,
 )
+from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport (
     nan_equality,
@@ -46,9 +47,12 @@ from pylibcudf.lists cimport ColumnOrScalar, ColumnOrSizeType
 from pylibcudf.libcudf.lists.combine import concatenate_null_policy as ConcatenateNullPolicy # no-cython-lint
 from pylibcudf.libcudf.lists.contains import duplicate_find_option as DuplicateFindOption # no-cython-lint
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column, ListColumnView
 from .scalar cimport Scalar
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "ConcatenateNullPolicy",
@@ -73,7 +77,9 @@ __all__ = [
     "union_distinct",
 ]
 
-cpdef Table explode_outer(Table input, size_type explode_column_idx):
+cpdef Table explode_outer(
+    Table input, size_type explode_column_idx, Stream stream=None
+):
     """Explode a column of lists into rows.
 
     All other columns will be duplicated for each element in the list.
@@ -94,13 +100,17 @@ cpdef Table explode_outer(Table input, size_type explode_column_idx):
     """
     cdef unique_ptr[table] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_explode.explode_outer(input.view(), explode_column_idx)
+        c_result = cpp_explode.explode_outer(
+            input.view(), explode_column_idx, stream.view()
+        )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Column concatenate_rows(Table input):
+cpdef Column concatenate_rows(Table input, Stream stream=None):
     """Concatenate multiple lists columns into a single lists column row-wise.
 
     For details, see :cpp:func:`concatenate_list_elements`.
@@ -117,14 +127,18 @@ cpdef Column concatenate_rows(Table input):
     """
     cdef unique_ptr[column] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_concatenate_rows(input.view())
+        c_result = cpp_concatenate_rows(
+            input.view(), concatenate_null_policy.IGNORE, stream.view()
+        )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column concatenate_list_elements(
-    Column input, concatenate_null_policy null_policy
+    Column input, concatenate_null_policy null_policy, Stream stream=None
 ):
     """Concatenate multiple lists on the same row into a single list.
 
@@ -144,13 +158,17 @@ cpdef Column concatenate_list_elements(
     """
     cdef unique_ptr[column] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_concatenate_list_elements(input.view(), null_policy)
+        c_result = cpp_concatenate_list_elements(
+            input.view(), null_policy, stream.view()
+        )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column contains(Column input, ColumnOrScalar search_key):
+cpdef Column contains(Column input, ColumnOrScalar search_key, Stream stream=None):
     """Create a column of bool values indicating whether
     the search_key is contained in the input.
 
@@ -176,6 +194,8 @@ cpdef Column contains(Column input, ColumnOrScalar search_key):
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
+    stream = _get_stream(stream)
+
     if not isinstance(search_key, (Column, Scalar)):
         raise TypeError("Must pass a Column or Scalar")
 
@@ -185,11 +205,12 @@ cpdef Column contains(Column input, ColumnOrScalar search_key):
             search_key.view() if ColumnOrScalar is Column else dereference(
                 search_key.get()
             ),
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column contains_nulls(Column input):
+cpdef Column contains_nulls(Column input, Stream stream=None):
     """Create a column of bool values indicating whether
     each row in the lists column contains a null value.
 
@@ -208,13 +229,19 @@ cpdef Column contains_nulls(Column input):
     """
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
+
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_contains.contains_nulls(list_view.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_contains.contains_nulls(list_view.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column index_of(
-    Column input, ColumnOrScalar search_key, duplicate_find_option find_option
+    Column input,
+    ColumnOrScalar search_key,
+    duplicate_find_option find_option,
+    Stream stream=None
 ):
     """Create a column of index values indicating the position of a search
     key row within the corresponding list row in the lists column.
@@ -243,6 +270,9 @@ cpdef Column index_of(
     """
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_contains.index_of(
             list_view.view(),
@@ -250,11 +280,12 @@ cpdef Column index_of(
                 search_key.get()
             ),
             find_option,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column reverse(Column input):
+cpdef Column reverse(Column input, Stream stream=None):
     """Reverse the element order within each list of the input column.
 
     For details, see :cpp:func:`reverse`.
@@ -272,15 +303,18 @@ cpdef Column reverse(Column input):
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_reverse.reverse(list_view.view())
-    return Column.from_libcudf(move(c_result))
+        c_result = cpp_reverse.reverse(list_view.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column segmented_gather(
     Column input,
     Column gather_map_list,
     out_of_bounds_policy bounds_policy=out_of_bounds_policy.DONT_CHECK,
+    Stream stream=None,
 ):
     """Create a column with elements gathered based on the indices in gather_map_list
 
@@ -314,16 +348,21 @@ cpdef Column segmented_gather(
     cdef ListColumnView list_view1 = input.list_view()
     cdef ListColumnView list_view2 = gather_map_list.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_gather.segmented_gather(
             list_view1.view(),
             list_view2.view(),
             bounds_policy,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column extract_list_element(Column input, ColumnOrSizeType index):
+cpdef Column extract_list_element(
+    Column input, ColumnOrSizeType index, Stream stream=None
+):
     """Create a column of extracted list elements.
 
     For details, see :cpp:func:`extract_list_element`.
@@ -343,15 +382,18 @@ cpdef Column extract_list_element(Column input, ColumnOrSizeType index):
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_extract_list_element(
             list_view.view(),
             index.view() if ColumnOrSizeType is Column else index,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column count_elements(Column input):
+cpdef Column count_elements(Column input, Stream stream=None):
     """Count the number of rows in each
     list element in the given lists column.
     For details, see :cpp:func:`count_elements`.
@@ -371,13 +413,17 @@ cpdef Column count_elements(Column input):
     cdef ListColumnView list_view = input.list_view()
     cdef unique_ptr[column] c_result
 
+    stream = _get_stream(stream)
+
     with nogil:
-        c_result = cpp_count_elements(list_view.view())
+        c_result = cpp_count_elements(list_view.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column sequences(Column starts, Column sizes, Column steps = None):
+cpdef Column sequences(
+    Column starts, Column sizes, Column steps = None, Stream stream=None
+):
     """Create a lists column in which each row contains a sequence of
     values specified by a tuple of (start, step, size) parameters.
 
@@ -399,23 +445,27 @@ cpdef Column sequences(Column starts, Column sizes, Column steps = None):
     """
     cdef unique_ptr[column] c_result
 
+    stream = _get_stream(stream)
+
     if steps is not None:
         with nogil:
             c_result = cpp_filling.sequences(
                 starts.view(),
                 steps.view(),
                 sizes.view(),
+                stream.view(),
             )
     else:
         with nogil:
-            c_result = cpp_filling.sequences(starts.view(), sizes.view())
-    return Column.from_libcudf(move(c_result))
+            c_result = cpp_filling.sequences(starts.view(), sizes.view(), stream.view())
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column sort_lists(
     Column input,
     order sort_order,
     null_order na_position,
-    bool stable = False
+    bool stable = False,
+    Stream stream=None
 ):
     """Sort the elements within a list in each row of a list column.
 
@@ -442,20 +492,24 @@ cpdef Column sort_lists(
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         if stable:
             c_result = cpp_stable_sort_lists(
                     list_view.view(),
                     sort_order,
                     na_position,
+                    stream.view(),
             )
         else:
             c_result = cpp_sort_lists(
                     list_view.view(),
                     sort_order,
                     na_position,
+                    stream.view(),
             )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column difference_distinct(
@@ -463,6 +517,7 @@ cpdef Column difference_distinct(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
+    Stream stream=None,
 ):
     """Create a column of index values indicating the position of a search
     key row within the corresponding list row in the lists column.
@@ -489,14 +544,17 @@ cpdef Column difference_distinct(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_set_operations.difference_distinct(
             lhs_view.view(),
             rhs_view.view(),
             nulls_equal,
             nans_equal,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column have_overlap(
@@ -504,6 +562,7 @@ cpdef Column have_overlap(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
+    Stream stream=None,
 ):
     """Check if lists at each row of the given lists columns overlap.
 
@@ -529,14 +588,17 @@ cpdef Column have_overlap(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_set_operations.have_overlap(
             lhs_view.view(),
             rhs_view.view(),
             nulls_equal,
             nans_equal,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column intersect_distinct(
@@ -544,6 +606,7 @@ cpdef Column intersect_distinct(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
+    Stream stream=None,
 ):
     """Create a lists column of distinct elements common to two input lists columns.
 
@@ -569,14 +632,17 @@ cpdef Column intersect_distinct(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_set_operations.intersect_distinct(
             lhs_view.view(),
             rhs_view.view(),
             nulls_equal,
             nans_equal,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column union_distinct(
@@ -584,6 +650,7 @@ cpdef Column union_distinct(
     Column rhs,
     null_equality nulls_equal=null_equality.EQUAL,
     nan_equality nans_equal=nan_equality.ALL_EQUAL,
+    Stream stream=None,
 ):
     """Create a lists column of distinct elements found in
     either of two input lists columns.
@@ -610,17 +677,20 @@ cpdef Column union_distinct(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_set_operations.union_distinct(
             lhs_view.view(),
             rhs_view.view(),
             nulls_equal,
             nans_equal,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column apply_boolean_mask(Column input, Column boolean_mask):
+cpdef Column apply_boolean_mask(Column input, Column boolean_mask, Stream stream=None):
     """Filters elements in each row of the input lists column using a boolean mask
 
     For details, see :cpp:func:`apply_boolean_mask`.
@@ -640,15 +710,24 @@ cpdef Column apply_boolean_mask(Column input, Column boolean_mask):
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
     cdef ListColumnView mask_view = boolean_mask.list_view()
+
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_apply_boolean_mask(
             list_view.view(),
             mask_view.view(),
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column distinct(Column input, null_equality nulls_equal, nan_equality nans_equal):
+cpdef Column distinct(
+    Column input,
+    null_equality nulls_equal,
+    nan_equality nans_equal,
+    Stream stream=None
+):
     """Create a new list column without duplicate elements in each list.
 
     For details, see :cpp:func:`distinct`.
@@ -670,13 +749,17 @@ cpdef Column distinct(Column input, null_equality nulls_equal, nan_equality nans
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
+    stream = _get_stream(stream)
+
     with nogil:
         c_result = cpp_distinct(
             list_view.view(),
             nulls_equal,
             nans_equal,
+            duplicate_keep_option.KEEP_ANY,
+            stream.view(),
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 ConcatenateNullPolicy.__str__ = ConcatenateNullPolicy.__repr__
 DuplicateFindOption.__str__ = DuplicateFindOption.__repr__

From 86e5d5522aea9347d64c33af55e34f2d2902b30b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 13 Aug 2025 15:40:03 -0700
Subject: [PATCH 122/366] Move test_array_function/ufunc to new cudf classic
 test directory structure (#19637)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19637
---
 python/cudf/cudf/tests/conftest.py            |  44 ++
 .../tests/dataframe/test_array_function.py    |  44 ++
 .../cudf/tests/dataframe/test_np_ufuncs.py    | 128 +++++
 .../indexes/index/test_array_function.py      |  29 ++
 .../indexes/multiindex/test_array_function.py |  24 +
 .../cudf/cudf/tests/indexes/test_np_ufuncs.py |  85 +++
 .../cudf/tests/series/test_array_function.py  |  36 ++
 .../cudf/cudf/tests/series/test_np_ufuncs.py  | 210 ++++++++
 python/cudf/cudf/tests/test_array_function.py | 133 -----
 python/cudf/cudf/tests/test_array_ufunc.py    | 486 ------------------
 10 files changed, 600 insertions(+), 619 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/test_array_function.py
 create mode 100644 python/cudf/cudf/tests/dataframe/test_np_ufuncs.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/test_array_function.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_array_function.py
 create mode 100644 python/cudf/cudf/tests/indexes/test_np_ufuncs.py
 create mode 100644 python/cudf/cudf/tests/series/test_array_function.py
 create mode 100644 python/cudf/cudf/tests/series/test_np_ufuncs.py
 delete mode 100644 python/cudf/cudf/tests/test_array_function.py
 delete mode 100644 python/cudf/cudf/tests/test_array_ufunc.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 7a8a6c3881e..4718ecdc711 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -431,6 +431,50 @@ def all_supported_types_as_str(request):
     return request.param
 
 
+# pandas can raise warnings for some inputs to the following ufuncs:
+numpy_ufuncs = []
+for name in dir(np):
+    func = getattr(np, name)
+    if isinstance(func, np.ufunc) and hasattr(cp, name):
+        if func in {
+            np.arccos,
+            np.arccosh,
+            np.arcsin,
+            np.arctanh,
+            np.fmod,
+            np.log,
+            np.log10,
+            np.log2,
+            np.reciprocal,
+        }:
+            marks = [
+                pytest.mark.filterwarnings(
+                    "ignore:invalid value encountered:RuntimeWarning"
+                ),
+                pytest.mark.filterwarnings(
+                    "ignore:divide by zero:RuntimeWarning"
+                ),
+            ]
+            numpy_ufuncs.append(pytest.param(func, marks=marks))
+        elif func in {
+            np.bitwise_and,
+            np.bitwise_or,
+            np.bitwise_xor,
+        }:
+            marks = pytest.mark.filterwarnings(
+                "ignore:Operation between non boolean Series:FutureWarning"
+            )
+            numpy_ufuncs.append(pytest.param(func, marks=marks))
+        else:
+            numpy_ufuncs.append(func)
+
+
+@pytest.fixture(params=numpy_ufuncs)
+def numpy_ufunc(request):
+    """Numpy ufuncs also supported by cupy."""
+    return request.param
+
+
 @pytest.fixture(params=[True, False])
 def dropna(request):
     """Param for `dropna` argument"""
diff --git a/python/cudf/cudf/tests/dataframe/test_array_function.py b/python/cudf/cudf/tests/dataframe/test_array_function.py
new file mode 100644
index 00000000000..49ea9d59918
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_array_function.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: np.mean(x, axis=0),
+        lambda x: np.sum(x, axis=0),
+        lambda x: np.var(x, ddof=1, axis=0),
+        lambda x: np.dot(x, x.transpose()),
+        np.all,
+        np.any,
+        lambda x: np.prod(x, axis=0),
+        lambda x: np.prod(x, axis=1),
+    ],
+)
+def test_array_func_cudf_dataframe(func):
+    pd_df = pd.DataFrame(np.ones((3, 3)))
+    cudf_df = cudf.from_pandas(pd_df)
+    expect = func(pd_df)
+    got = func(cudf_df)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: np.cov(x, x),
+        np.linalg.norm,
+        np.linalg.det,
+    ],
+)
+def test_array_func_missing_cudf_dataframe(func):
+    pd_df = pd.DataFrame(np.ones((3, 3)))
+    cudf_df = cudf.from_pandas(pd_df)
+    with pytest.raises(TypeError):
+        func(cudf_df)
diff --git a/python/cudf/cudf/tests/dataframe/test_np_ufuncs.py b/python/cudf/cudf/tests/dataframe/test_np_ufuncs.py
new file mode 100644
index 00000000000..315a1303806
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_np_ufuncs.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import operator
+from functools import reduce
+
+import cupy as cp
+import numpy as np
+import pytest
+from packaging.version import parse
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if, set_random_null_mask_inplace
+
+
+# Skip matmul since it requires aligned shapes.
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize("has_nulls", [True, False])
+@pytest.mark.parametrize("indexed", [True, False])
+def test_ufunc_dataframe(request, numpy_ufunc, has_nulls, indexed):
+    # Note: This test assumes that all ufuncs are unary or binary.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                indexed
+                and numpy_ufunc
+                in {
+                    np.greater,
+                    np.greater_equal,
+                    np.less,
+                    np.less_equal,
+                    np.not_equal,
+                    np.equal,
+                }
+            ),
+            reason="Comparison operators do not support misaligned indexes.",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=numpy_ufunc in {np.ceil, np.floor, np.trunc}
+            and not has_nulls
+            and parse(np.__version__) >= parse("2.1")
+            and parse(cp.__version__) < parse("14"),
+            reason="https://github.com/cupy/cupy/issues/9018",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=numpy_ufunc == np.matmul,
+            reason=f"{numpy_ufunc} is not supported in cuDF",
+        )
+    )
+
+    N = 100
+    # Avoid zeros in either array to skip division by 0 errors. Also limit the
+    # scale to avoid issues with overflow, etc. We use ints because some
+    # operations (like bitwise ops) are not defined for floats.
+    # TODO: Add tests of mismatched columns etc.
+    pandas_args = args = [
+        cudf.DataFrame(
+            {"foo": cp.random.randint(low=1, high=10, size=N)},
+            index=cp.random.choice(range(N), N, False) if indexed else None,
+        )
+        for _ in range(numpy_ufunc.nin)
+    ]
+
+    if has_nulls:
+        # Converting nullable integer cudf.Series to pandas will produce a
+        # float pd.Series, so instead we replace nulls with an arbitrary
+        # integer value, precompute the mask, and then reapply it afterwards.
+        for arg in args:
+            set_random_null_mask_inplace(arg["foo"])
+        pandas_args = [arg.copy() for arg in args]
+        for arg in pandas_args:
+            arg["foo"] = arg["foo"].fillna(0)
+
+        # Note: Different indexes must be aligned before the mask is computed.
+        # This requires using an internal function (_align_indices), and that
+        # is unlikely to change for the foreseeable future.
+        aligned = (
+            cudf.core.dataframe._align_indices(*args)
+            if indexed and numpy_ufunc.nin == 2
+            else args
+        )
+        mask = reduce(
+            operator.or_, (a["foo"].isna() for a in aligned)
+        ).to_pandas()
+
+    got = numpy_ufunc(*args)
+
+    expect = numpy_ufunc(*(arg.to_pandas() for arg in pandas_args))
+
+    if numpy_ufunc.nout > 1:
+        for g, e in zip(got, expect, strict=True):
+            if has_nulls:
+                e[mask] = np.nan
+            assert_eq(g, e, check_exact=False)
+    else:
+        if has_nulls:
+            with expect_warning_if(
+                numpy_ufunc
+                in (
+                    np.isfinite,
+                    np.isinf,
+                    np.isnan,
+                    np.logical_and,
+                    np.logical_not,
+                    np.logical_or,
+                    np.logical_xor,
+                    np.signbit,
+                    np.equal,
+                    np.greater,
+                    np.greater_equal,
+                    np.less,
+                    np.less_equal,
+                    np.not_equal,
+                )
+            ):
+                expect[mask] = np.nan
+        assert_eq(got, expect, check_exact=False)
diff --git a/python/cudf/cudf/tests/indexes/index/test_array_function.py b/python/cudf/cudf/tests/indexes/index/test_array_function.py
new file mode 100644
index 00000000000..4d41d81dc35
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/test_array_function.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_np_unique_cudf_index():
+    np_ar = np.array([1, 1, 3])
+    cudf_index = cudf.Index(np_ar)
+    expect = cudf.Index(np.unique(np_ar))
+    got = np.unique(cudf_index)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: np.cov(x, x),
+        np.linalg.norm,
+        np.linalg.det,
+    ],
+)
+def test_array_func_missing_cudf_index(func):
+    cudf_index = cudf.Index([1, 2, 3])
+    with pytest.raises(TypeError):
+        func(cudf_index)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_array_function.py b/python/cudf/cudf/tests/indexes/multiindex/test_array_function.py
new file mode 100644
index 00000000000..1bd2dd132e7
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_array_function.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: np.cov(x, x),
+        lambda x: np.dot(x, x),
+        np.linalg.norm,
+        np.linalg.det,
+    ],
+)
+def test_array_func_missing_cudf_multi_index(func):
+    levels = [["a", "b"], ["c", "d"]]
+    codes = [[0, 1], [1, 0]]
+
+    cudf_multi_index = cudf.MultiIndex(levels, codes)
+    with pytest.raises(TypeError):
+        func(cudf_multi_index)
diff --git a/python/cudf/cudf/tests/indexes/test_np_ufuncs.py b/python/cudf/cudf/tests/indexes/test_np_ufuncs.py
new file mode 100644
index 00000000000..e91b377abcb
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/test_np_ufuncs.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cupy as cp
+import numpy as np
+import pytest
+from packaging.version import parse
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_LT_300,
+)
+from cudf.testing import assert_eq
+
+
+def test_ufunc_index(request, numpy_ufunc):
+    # Note: This test assumes that all ufuncs are unary or binary.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=numpy_ufunc == np.matmul and PANDAS_LT_300,
+            reason="Fixed by https://github.com/pandas-dev/pandas/pull/57079",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=numpy_ufunc in {np.ceil, np.floor, np.trunc}
+            and parse(np.__version__) >= parse("2.1")
+            and parse(cp.__version__) < parse("14"),
+            reason="https://github.com/cupy/cupy/issues/9018",
+        )
+    )
+
+    N = 100
+    # Avoid zeros in either array to skip division by 0 errors. Also limit the
+    # scale to avoid issues with overflow, etc. We use ints because some
+    # operations (like bitwise ops) are not defined for floats.
+    pandas_args = args = [
+        cudf.Index(
+            cp.random.randint(low=1, high=10, size=N),
+        )
+        for _ in range(numpy_ufunc.nin)
+    ]
+
+    got = numpy_ufunc(*args)
+
+    expect = numpy_ufunc(*(arg.to_pandas() for arg in pandas_args))
+
+    if numpy_ufunc.nout > 1:
+        for g, e in zip(got, expect, strict=True):
+            assert_eq(g, e, check_exact=False)
+    else:
+        assert_eq(got, expect, check_exact=False)
+
+
+@pytest.mark.parametrize(
+    "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and]
+)
+@pytest.mark.parametrize("reflect", [True, False])
+def test_binary_ufunc_index_array(ufunc, reflect):
+    N = 100
+    # Avoid zeros in either array to skip division by 0 errors. Also limit the
+    # scale to avoid issues with overflow, etc. We use ints because some
+    # operations (like bitwise ops) are not defined for floats.
+    args = [cudf.Index(cp.random.rand(N)) for _ in range(ufunc.nin)]
+
+    arg1 = args[1].to_cupy()
+
+    if reflect:
+        got = ufunc(arg1, args[0])
+        expect = ufunc(args[1].to_numpy(), args[0].to_pandas())
+    else:
+        got = ufunc(args[0], arg1)
+        expect = ufunc(args[0].to_pandas(), args[1].to_numpy())
+
+    if ufunc.nout > 1:
+        for g, e in zip(got, expect, strict=True):
+            if reflect:
+                assert (cp.asnumpy(g) == e).all()
+            else:
+                assert_eq(g, e, check_exact=False)
+    else:
+        if reflect:
+            assert (cp.asnumpy(got) == expect).all()
+        else:
+            assert_eq(got, expect, check_exact=False)
diff --git a/python/cudf/cudf/tests/series/test_array_function.py b/python/cudf/cudf/tests/series/test_array_function.py
new file mode 100644
index 00000000000..4715a0a7103
--- /dev/null
+++ b/python/cudf/cudf/tests/series/test_array_function.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        np.mean,
+        np.sum,
+        lambda x: np.var(x, ddof=1),
+        np.unique,
+        lambda x: np.dot(x, x),
+        np.linalg.norm,
+    ],
+)
+def test_array_func_cudf_series(func):
+    np_ar = np.arange(10, dtype=np.float32)
+    cudf_ser = cudf.Series(np_ar)
+    expect = func(np_ar)
+    got = func(cudf_ser)
+    if np.isscalar(expect):
+        assert expect == got
+    else:
+        assert_eq(cudf.Series(expect), got)
+
+
+@pytest.mark.parametrize("index", [None, [1, 2, 3]])
+def test_list_input_array_func(index):
+    s = cudf.Series(np.array([1, 2, 3]), index=index)
+    with pytest.raises(TypeError):
+        np.concatenate([s, s, s])
diff --git a/python/cudf/cudf/tests/series/test_np_ufuncs.py b/python/cudf/cudf/tests/series/test_np_ufuncs.py
new file mode 100644
index 00000000000..c43f73b7e1f
--- /dev/null
+++ b/python/cudf/cudf/tests/series/test_np_ufuncs.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import operator
+from functools import reduce
+
+import cupy as cp
+import numpy as np
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if, set_random_null_mask_inplace
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize("has_nulls", [True, False])
+@pytest.mark.parametrize("indexed", [True, False])
+def test_ufunc_series(request, numpy_ufunc, has_nulls, indexed):
+    # Note: This test assumes that all ufuncs are unary or binary.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                indexed
+                and numpy_ufunc
+                in {
+                    np.greater,
+                    np.greater_equal,
+                    np.less,
+                    np.less_equal,
+                    np.not_equal,
+                    np.equal,
+                }
+            ),
+            reason="Comparison operators do not support misaligned indexes.",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=numpy_ufunc == np.matmul and has_nulls,
+            reason="Can't call cupy on column with nulls",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=numpy_ufunc.__name__.startswith("bitwise")
+            and indexed
+            and has_nulls,
+            reason="https://github.com/pandas-dev/pandas/issues/52500",
+        )
+    )
+
+    N = 100
+    # Avoid zeros in either array to skip division by 0 errors. Also limit the
+    # scale to avoid issues with overflow, etc. We use ints because some
+    # operations (like bitwise ops) are not defined for floats.
+    pandas_args = args = [
+        cudf.Series(
+            cp.random.randint(low=1, high=10, size=N),
+            index=cp.random.choice(range(N), N, False) if indexed else None,
+        )
+        for _ in range(numpy_ufunc.nin)
+    ]
+
+    if has_nulls:
+        # Converting nullable integer cudf.Series to pandas will produce a
+        # float pd.Series, so instead we replace nulls with an arbitrary
+        # integer value, precompute the mask, and then reapply it afterwards.
+        for arg in args:
+            set_random_null_mask_inplace(arg)
+        pandas_args = [arg.fillna(0) for arg in args]
+
+        # Note: Different indexes must be aligned before the mask is computed.
+        # This requires using an internal function (_align_indices), and that
+        # is unlikely to change for the foreseeable future.
+        aligned = (
+            cudf.core.series._align_indices(args, allow_non_unique=True)
+            if indexed and numpy_ufunc.nin == 2
+            else args
+        )
+        mask = reduce(operator.or_, (a.isna() for a in aligned)).to_pandas()
+
+    got = numpy_ufunc(*args)
+
+    expect = numpy_ufunc(*(arg.to_pandas() for arg in pandas_args))
+
+    if numpy_ufunc.nout > 1:
+        for g, e in zip(got, expect, strict=True):
+            if has_nulls:
+                e[mask] = np.nan
+            assert_eq(g, e, check_exact=False)
+    else:
+        if has_nulls:
+            with expect_warning_if(
+                numpy_ufunc
+                in (
+                    np.isfinite,
+                    np.isinf,
+                    np.isnan,
+                    np.logical_and,
+                    np.logical_not,
+                    np.logical_or,
+                    np.logical_xor,
+                    np.signbit,
+                    np.equal,
+                    np.greater,
+                    np.greater_equal,
+                    np.less,
+                    np.less_equal,
+                    np.not_equal,
+                )
+            ):
+                expect[mask] = np.nan
+            assert_eq(got, expect, check_exact=False)
+
+
+@pytest.mark.parametrize(
+    "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and]
+)
+@pytest.mark.parametrize("has_nulls", [True, False])
+@pytest.mark.parametrize("indexed", [True, False])
+@pytest.mark.parametrize("reflect", [True, False])
+def test_binary_ufunc_series_array(
+    request, ufunc, has_nulls, indexed, reflect
+):
+    fname = ufunc.__name__
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=reflect and has_nulls,
+            reason=(
+                "When cupy is the left operand there is no way for us to "
+                "avoid calling its binary operators, which cannot handle "
+                "cudf objects that contain nulls."
+            ),
+        )
+    )
+    # The way cudf casts nans in arrays to nulls during binops with cudf
+    # objects is currently incompatible with pandas.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                fname in {"greater", "greater_equal", "logical_and"}
+                and has_nulls
+            ),
+            reason=(
+                "cudf and pandas incompatible casting nans to nulls in binops"
+            ),
+        )
+    )
+    N = 100
+    # Avoid zeros in either array to skip division by 0 errors. Also limit the
+    # scale to avoid issues with overflow, etc. We use ints because some
+    # operations (like bitwise ops) are not defined for floats.
+    args = [
+        cudf.Series(
+            cp.random.rand(N),
+            index=cp.random.choice(range(N), N, False) if indexed else None,
+        )
+        for _ in range(ufunc.nin)
+    ]
+
+    if has_nulls:
+        # Converting nullable integer cudf.Series to pandas will produce a
+        # float pd.Series, so instead we replace nulls with an arbitrary
+        # integer value, precompute the mask, and then reapply it afterwards.
+        for arg in args:
+            set_random_null_mask_inplace(arg)
+
+        # Cupy doesn't support nulls, so we fill with nans before converting.
+        args[1] = args[1].fillna(cp.nan)
+        mask = args[0].isna().to_pandas()
+
+    arg1 = args[1].to_cupy()
+
+    if reflect:
+        got = ufunc(arg1, args[0])
+        expect = ufunc(args[1].to_numpy(), args[0].to_pandas())
+    else:
+        got = ufunc(args[0], arg1)
+        expect = ufunc(args[0].to_pandas(), args[1].to_numpy())
+
+    if ufunc.nout > 1:
+        for g, e in zip(got, expect, strict=True):
+            if has_nulls:
+                e[mask] = np.nan
+            if reflect:
+                assert (cp.asnumpy(g) == e).all()
+            else:
+                assert_eq(g, e, check_exact=False)
+    else:
+        if has_nulls:
+            expect[mask] = np.nan
+        if reflect:
+            assert (cp.asnumpy(got) == expect).all()
+        else:
+            assert_eq(got, expect, check_exact=False)
+
+
+def test_ufunc_cudf_series_error_with_out_kwarg():
+    cudf_s1 = cudf.Series(data=[-1, 2, 3, 0])
+    cudf_s2 = cudf.Series(data=[-1, 2, 3, 0])
+    cudf_s3 = cudf.Series(data=[0, 0, 0, 0])
+    with pytest.raises(TypeError):
+        np.add(x1=cudf_s1, x2=cudf_s2, out=cudf_s3)
diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py
deleted file mode 100644
index f627087d64c..00000000000
--- a/python/cudf/cudf/tests/test_array_function.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-@pytest.fixture
-def rng():
-    return np.random.default_rng(seed=0)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: np.mean(x),
-        lambda x: np.sum(x),
-        lambda x: np.var(x, ddof=1),
-        lambda x: np.unique(x),
-        lambda x: np.dot(x, x),
-        lambda x: np.linalg.norm(x),
-    ],
-)
-def test_array_func_cudf_series(func, rng):
-    np_ar = rng.random(100)
-    cudf_ser = cudf.Series(np_ar)
-    expect = func(np_ar)
-    got = func(cudf_ser)
-    if np.isscalar(expect):
-        assert_eq(expect, got)
-    else:
-        assert_eq(expect, got.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: np.mean(x, axis=0),
-        lambda x: np.sum(x, axis=0),
-        lambda x: np.var(x, ddof=1, axis=0),
-        lambda x: np.dot(x, x.transpose()),
-        lambda x: np.all(x),
-        lambda x: np.any(x),
-        lambda x: np.prod(x, axis=0),
-        lambda x: np.prod(x, axis=1),
-    ],
-)
-def test_array_func_cudf_dataframe(func, rng):
-    pd_df = pd.DataFrame(rng.uniform(size=(100, 10)))
-    cudf_df = cudf.from_pandas(pd_df)
-    expect = func(pd_df)
-    got = func(cudf_df)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: np.cov(x, x),
-        lambda x: np.linalg.norm(x),
-        lambda x: np.linalg.det(x),
-    ],
-)
-def test_array_func_missing_cudf_dataframe(func, rng):
-    pd_df = pd.DataFrame(rng.uniform(size=(100, 10)))
-    cudf_df = cudf.from_pandas(pd_df)
-    with pytest.raises(TypeError):
-        func(cudf_df)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: np.unique(x),
-    ],
-)
-def test_array_func_cudf_index(func, rng):
-    np_ar = rng.random(100)
-    cudf_index = cudf.Index(cudf.Series(np_ar))
-    expect = func(np_ar)
-    got = func(cudf_index)
-    if np.isscalar(expect):
-        assert_eq(expect, got)
-    else:
-        assert_eq(expect, got.to_numpy())
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: np.cov(x, x),
-        lambda x: np.linalg.norm(x),
-        lambda x: np.linalg.det(x),
-    ],
-)
-def test_array_func_missing_cudf_index(func, rng):
-    np_ar = rng.random(100)
-    cudf_index = cudf.Index(cudf.Series(np_ar))
-    with pytest.raises(TypeError):
-        func(cudf_index)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: np.cov(x, x),
-        lambda x: np.dot(x, x),
-        lambda x: np.linalg.norm(x),
-        lambda x: np.linalg.det(x),
-    ],
-)
-def test_array_func_missing_cudf_multi_index(func):
-    levels = [["a", "b"], ["c", "d"]]
-    codes = [[0, 1], [1, 0]]
-
-    cudf_multi_index = cudf.MultiIndex(levels, codes)
-    with pytest.raises(TypeError):
-        func(cudf_multi_index)
-
-
-def test_list_input_array_func():
-    ar = np.array([1, 2, 3])
-
-    s = cudf.Series(ar)
-    with pytest.raises(TypeError):
-        np.concatenate([s, s, s])
-
-    s = cudf.Series(ar, index=[1, 2, 3])
-    with pytest.raises(TypeError):
-        np.concatenate([s, s, s])
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
deleted file mode 100644
index abc3c105320..00000000000
--- a/python/cudf/cudf/tests/test_array_ufunc.py
+++ /dev/null
@@ -1,486 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import operator
-import warnings
-from contextlib import contextmanager
-from functools import reduce
-
-import cupy as cp
-import numpy as np
-import pytest
-from packaging.version import parse
-
-import cudf
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_LT_300,
-    PANDAS_VERSION,
-)
-from cudf.testing import assert_eq
-from cudf.testing._utils import expect_warning_if, set_random_null_mask_inplace
-
-
-@pytest.fixture(
-    params=[
-        obj
-        for obj in (getattr(np, name) for name in dir(np))
-        if isinstance(obj, np.ufunc)
-    ]
-)
-def ufunc(request):
-    return request.param
-
-
-@contextmanager
-def _hide_ufunc_warnings(ufunc):
-    # pandas raises warnings for some inputs to the following ufuncs:
-    name = ufunc.__name__
-    if name in {
-        "arccos",
-        "arccosh",
-        "arcsin",
-        "arctanh",
-        "fmod",
-        "log",
-        "log10",
-        "log2",
-        "reciprocal",
-    }:
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                f"invalid value encountered in {name}",
-                category=RuntimeWarning,
-            )
-            warnings.filterwarnings(
-                "ignore",
-                f"divide by zero encountered in {name}",
-                category=RuntimeWarning,
-            )
-            yield
-    elif name in {
-        "bitwise_and",
-        "bitwise_or",
-        "bitwise_xor",
-    }:
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                "Operation between non boolean Series with different "
-                "indexes will no longer return a boolean result in "
-                "a future version. Cast both Series to object type "
-                "to maintain the prior behavior.",
-                category=FutureWarning,
-            )
-            yield
-    else:
-        yield
-
-
-def test_ufunc_index(request, ufunc):
-    # Note: This test assumes that all ufuncs are unary or binary.
-    fname = ufunc.__name__
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=not hasattr(cp, fname),
-            reason=f"cupy has no support for '{fname}'",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=fname == "matmul" and PANDAS_LT_300,
-            reason="Fixed by https://github.com/pandas-dev/pandas/pull/57079",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=fname in {"ceil", "floor", "trunc"}
-            and parse(np.__version__) >= parse("2.1")
-            and parse(cp.__version__) < parse("14"),
-            reason="https://github.com/cupy/cupy/issues/9018",
-        )
-    )
-
-    N = 100
-    # Avoid zeros in either array to skip division by 0 errors. Also limit the
-    # scale to avoid issues with overflow, etc. We use ints because some
-    # operations (like bitwise ops) are not defined for floats.
-    pandas_args = args = [
-        cudf.Index(
-            cp.random.randint(low=1, high=10, size=N),
-        )
-        for _ in range(ufunc.nin)
-    ]
-
-    got = ufunc(*args)
-
-    with _hide_ufunc_warnings(ufunc):
-        expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
-
-    if ufunc.nout > 1:
-        for g, e in zip(got, expect, strict=True):
-            assert_eq(g, e, check_exact=False)
-    else:
-        assert_eq(got, expect, check_exact=False)
-
-
-@pytest.mark.parametrize(
-    "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and]
-)
-@pytest.mark.parametrize("reflect", [True, False])
-def test_binary_ufunc_index_array(ufunc, reflect):
-    N = 100
-    # Avoid zeros in either array to skip division by 0 errors. Also limit the
-    # scale to avoid issues with overflow, etc. We use ints because some
-    # operations (like bitwise ops) are not defined for floats.
-    args = [cudf.Index(cp.random.rand(N)) for _ in range(ufunc.nin)]
-
-    arg1 = args[1].to_cupy()
-
-    if reflect:
-        got = ufunc(arg1, args[0])
-        expect = ufunc(args[1].to_numpy(), args[0].to_pandas())
-    else:
-        got = ufunc(args[0], arg1)
-        expect = ufunc(args[0].to_pandas(), args[1].to_numpy())
-
-    if ufunc.nout > 1:
-        for g, e in zip(got, expect, strict=True):
-            if reflect:
-                assert (cp.asnumpy(g) == e).all()
-            else:
-                assert_eq(g, e, check_exact=False)
-    else:
-        if reflect:
-            assert (cp.asnumpy(got) == expect).all()
-        else:
-            assert_eq(got, expect, check_exact=False)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize("has_nulls", [True, False])
-@pytest.mark.parametrize("indexed", [True, False])
-def test_ufunc_series(request, ufunc, has_nulls, indexed):
-    # Note: This test assumes that all ufuncs are unary or binary.
-    fname = ufunc.__name__
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                indexed
-                and fname
-                in {
-                    "greater",
-                    "greater_equal",
-                    "less",
-                    "less_equal",
-                    "not_equal",
-                    "equal",
-                }
-            ),
-            reason="Comparison operators do not support misaligned indexes.",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=ufunc == np.matmul and has_nulls,
-            reason="Can't call cupy on column with nulls",
-        )
-    )
-    # If we don't have explicit dispatch and cupy doesn't support the operator,
-    # we expect a failure
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=not hasattr(cp, fname),
-            reason=f"cupy has no support for '{fname}'",
-        )
-    )
-
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=fname.startswith("bitwise") and indexed and has_nulls,
-            reason="https://github.com/pandas-dev/pandas/issues/52500",
-        )
-    )
-
-    N = 100
-    # Avoid zeros in either array to skip division by 0 errors. Also limit the
-    # scale to avoid issues with overflow, etc. We use ints because some
-    # operations (like bitwise ops) are not defined for floats.
-    pandas_args = args = [
-        cudf.Series(
-            cp.random.randint(low=1, high=10, size=N),
-            index=cp.random.choice(range(N), N, False) if indexed else None,
-        )
-        for _ in range(ufunc.nin)
-    ]
-
-    if has_nulls:
-        # Converting nullable integer cudf.Series to pandas will produce a
-        # float pd.Series, so instead we replace nulls with an arbitrary
-        # integer value, precompute the mask, and then reapply it afterwards.
-        for arg in args:
-            set_random_null_mask_inplace(arg)
-        pandas_args = [arg.fillna(0) for arg in args]
-
-        # Note: Different indexes must be aligned before the mask is computed.
-        # This requires using an internal function (_align_indices), and that
-        # is unlikely to change for the foreseeable future.
-        aligned = (
-            cudf.core.series._align_indices(args, allow_non_unique=True)
-            if indexed and ufunc.nin == 2
-            else args
-        )
-        mask = reduce(operator.or_, (a.isna() for a in aligned)).to_pandas()
-
-    got = ufunc(*args)
-
-    with _hide_ufunc_warnings(ufunc):
-        expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
-
-    if ufunc.nout > 1:
-        for g, e in zip(got, expect, strict=True):
-            if has_nulls:
-                e[mask] = np.nan
-            assert_eq(g, e, check_exact=False)
-    else:
-        if has_nulls:
-            with expect_warning_if(
-                fname
-                in (
-                    "isfinite",
-                    "isinf",
-                    "isnan",
-                    "logical_and",
-                    "logical_not",
-                    "logical_or",
-                    "logical_xor",
-                    "signbit",
-                    "equal",
-                    "greater",
-                    "greater_equal",
-                    "less",
-                    "less_equal",
-                    "not_equal",
-                )
-            ):
-                expect[mask] = np.nan
-            assert_eq(got, expect, check_exact=False)
-
-
-@pytest.mark.parametrize(
-    "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and]
-)
-@pytest.mark.parametrize("has_nulls", [True, False])
-@pytest.mark.parametrize("indexed", [True, False])
-@pytest.mark.parametrize("reflect", [True, False])
-def test_binary_ufunc_series_array(
-    request, ufunc, has_nulls, indexed, reflect
-):
-    fname = ufunc.__name__
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=reflect and has_nulls,
-            reason=(
-                "When cupy is the left operand there is no way for us to "
-                "avoid calling its binary operators, which cannot handle "
-                "cudf objects that contain nulls."
-            ),
-        )
-    )
-    # The way cudf casts nans in arrays to nulls during binops with cudf
-    # objects is currently incompatible with pandas.
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                fname in {"greater", "greater_equal", "logical_and"}
-                and has_nulls
-            ),
-            reason=(
-                "cudf and pandas incompatible casting nans to nulls in binops"
-            ),
-        )
-    )
-    N = 100
-    # Avoid zeros in either array to skip division by 0 errors. Also limit the
-    # scale to avoid issues with overflow, etc. We use ints because some
-    # operations (like bitwise ops) are not defined for floats.
-    args = [
-        cudf.Series(
-            cp.random.rand(N),
-            index=cp.random.choice(range(N), N, False) if indexed else None,
-        )
-        for _ in range(ufunc.nin)
-    ]
-
-    if has_nulls:
-        # Converting nullable integer cudf.Series to pandas will produce a
-        # float pd.Series, so instead we replace nulls with an arbitrary
-        # integer value, precompute the mask, and then reapply it afterwards.
-        for arg in args:
-            set_random_null_mask_inplace(arg)
-
-        # Cupy doesn't support nulls, so we fill with nans before converting.
-        args[1] = args[1].fillna(cp.nan)
-        mask = args[0].isna().to_pandas()
-
-    arg1 = args[1].to_cupy()
-
-    if reflect:
-        got = ufunc(arg1, args[0])
-        expect = ufunc(args[1].to_numpy(), args[0].to_pandas())
-    else:
-        got = ufunc(args[0], arg1)
-        expect = ufunc(args[0].to_pandas(), args[1].to_numpy())
-
-    if ufunc.nout > 1:
-        for g, e in zip(got, expect, strict=True):
-            if has_nulls:
-                e[mask] = np.nan
-            if reflect:
-                assert (cp.asnumpy(g) == e).all()
-            else:
-                assert_eq(g, e, check_exact=False)
-    else:
-        if has_nulls:
-            expect[mask] = np.nan
-        if reflect:
-            assert (cp.asnumpy(got) == expect).all()
-        else:
-            assert_eq(got, expect, check_exact=False)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [np.add],
-)
-def test_ufunc_cudf_series_error_with_out_kwarg(func):
-    cudf_s1 = cudf.Series(data=[-1, 2, 3, 0])
-    cudf_s2 = cudf.Series(data=[-1, 2, 3, 0])
-    cudf_s3 = cudf.Series(data=[0, 0, 0, 0])
-    # this throws a value-error because of presence of out kwarg
-    with pytest.raises(TypeError):
-        func(x1=cudf_s1, x2=cudf_s2, out=cudf_s3)
-
-
-# Skip matmul since it requires aligned shapes.
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize("has_nulls", [True, False])
-@pytest.mark.parametrize("indexed", [True, False])
-def test_ufunc_dataframe(request, ufunc, has_nulls, indexed):
-    # Note: This test assumes that all ufuncs are unary or binary.
-    fname = ufunc.__name__
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                indexed
-                and fname
-                in {
-                    "greater",
-                    "greater_equal",
-                    "less",
-                    "less_equal",
-                    "not_equal",
-                    "equal",
-                }
-            ),
-            reason="Comparison operators do not support misaligned indexes.",
-        )
-    )
-    # If we don't have explicit dispatch and cupy doesn't support the operator,
-    # we expect a failure
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=not hasattr(cp, fname),
-            reason=f"cupy has no support for '{fname}'",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=fname in {"ceil", "floor", "trunc"}
-            and not has_nulls
-            and parse(np.__version__) >= parse("2.1")
-            and parse(cp.__version__) < parse("14"),
-            reason="https://github.com/cupy/cupy/issues/9018",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=fname == "matmul",
-            reason=f"{fname} is not supported in cuDF",
-        )
-    )
-
-    N = 100
-    # Avoid zeros in either array to skip division by 0 errors. Also limit the
-    # scale to avoid issues with overflow, etc. We use ints because some
-    # operations (like bitwise ops) are not defined for floats.
-    # TODO: Add tests of mismatched columns etc.
-    pandas_args = args = [
-        cudf.DataFrame(
-            {"foo": cp.random.randint(low=1, high=10, size=N)},
-            index=cp.random.choice(range(N), N, False) if indexed else None,
-        )
-        for _ in range(ufunc.nin)
-    ]
-
-    if has_nulls:
-        # Converting nullable integer cudf.Series to pandas will produce a
-        # float pd.Series, so instead we replace nulls with an arbitrary
-        # integer value, precompute the mask, and then reapply it afterwards.
-        for arg in args:
-            set_random_null_mask_inplace(arg["foo"])
-        pandas_args = [arg.copy() for arg in args]
-        for arg in pandas_args:
-            arg["foo"] = arg["foo"].fillna(0)
-
-        # Note: Different indexes must be aligned before the mask is computed.
-        # This requires using an internal function (_align_indices), and that
-        # is unlikely to change for the foreseeable future.
-        aligned = (
-            cudf.core.dataframe._align_indices(*args)
-            if indexed and ufunc.nin == 2
-            else args
-        )
-        mask = reduce(
-            operator.or_, (a["foo"].isna() for a in aligned)
-        ).to_pandas()
-
-    got = ufunc(*args)
-
-    with _hide_ufunc_warnings(ufunc):
-        expect = ufunc(*(arg.to_pandas() for arg in pandas_args))
-
-    if ufunc.nout > 1:
-        for g, e in zip(got, expect, strict=True):
-            if has_nulls:
-                e[mask] = np.nan
-            assert_eq(g, e, check_exact=False)
-    else:
-        if has_nulls:
-            with expect_warning_if(
-                fname
-                in (
-                    "isfinite",
-                    "isinf",
-                    "isnan",
-                    "logical_and",
-                    "logical_not",
-                    "logical_or",
-                    "logical_xor",
-                    "signbit",
-                    "equal",
-                    "greater",
-                    "greater_equal",
-                    "less",
-                    "less_equal",
-                    "not_equal",
-                )
-            ):
-                expect[mask] = np.nan
-        assert_eq(got, expect, check_exact=False)

From fa67af0f96e5fced188294b621087a354b90f6ac Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 13 Aug 2025 15:42:38 -0700
Subject: [PATCH 123/366] Move ~half of test_groupby.py to new cudf classic
 test directory structure (#19640)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19640
---
 python/cudf/cudf/testing/__init__.py          |    1 +
 python/cudf/cudf/testing/testing.py           |   24 +
 python/cudf/cudf/tests/groupby/conftest.py    |   14 +
 python/cudf/cudf/tests/groupby/test_agg.py    |  575 +++-
 python/cudf/cudf/tests/groupby/test_apply.py  |  141 +
 .../cudf/tests/groupby/test_attributes.py     |  126 +
 .../cudf/tests/groupby/test_computation.py    |   18 -
 .../cudf/tests/groupby/test_constructors.py   |   16 +
 .../cudf/tests/groupby/test_cummulative.py    |   95 +
 python/cudf/cudf/tests/groupby/test_diff.py   |  194 ++
 python/cudf/cudf/tests/groupby/test_fillna.py |  191 ++
 .../{test_indexing.py => test_get_group.py}   |    0
 .../cudf/tests/groupby/test_groupby_obj.py    |   15 -
 python/cudf/cudf/tests/groupby/test_nth.py    |   25 +
 .../cudf/cudf/tests/groupby/test_nunique.py   |   47 +
 .../groupby/test_ordering_pandas_compat.py    |   28 -
 python/cudf/cudf/tests/groupby/test_pipe.py   |   15 +
 python/cudf/cudf/tests/groupby/test_rank.py   |   59 +
 .../cudf/tests/groupby/test_reductions.py     |  678 +++++
 python/cudf/cudf/tests/groupby/test_shift.py  |  204 ++
 python/cudf/cudf/tests/groupby/test_unique.py |   31 +
 python/cudf/cudf/tests/test_groupby.py        | 2422 +----------------
 22 files changed, 2530 insertions(+), 2389 deletions(-)
 create mode 100644 python/cudf/cudf/tests/groupby/conftest.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_apply.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_attributes.py
 delete mode 100644 python/cudf/cudf/tests/groupby/test_computation.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_cummulative.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_diff.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_fillna.py
 rename python/cudf/cudf/tests/groupby/{test_indexing.py => test_get_group.py} (100%)
 delete mode 100644 python/cudf/cudf/tests/groupby/test_groupby_obj.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_nth.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_nunique.py
 delete mode 100644 python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_pipe.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_rank.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_shift.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_unique.py

diff --git a/python/cudf/cudf/testing/__init__.py b/python/cudf/cudf/testing/__init__.py
index a4afa54f754..b03e5bf4375 100644
--- a/python/cudf/cudf/testing/__init__.py
+++ b/python/cudf/cudf/testing/__init__.py
@@ -4,6 +4,7 @@
 from cudf.testing.testing import (
     assert_eq,
     assert_frame_equal,
+    assert_groupby_results_equal,
     assert_index_equal,
     assert_neq,
     assert_series_equal,
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 190011f614e..048a54a76f8 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -809,3 +809,27 @@ def assert_neq(left, right, **kwargs):
         pass
     else:
         raise AssertionError
+
+
+def assert_groupby_results_equal(
+    expect, got, sort=True, as_index=True, by=None, **kwargs
+):
+    # Because we don't sort by index by default in groupby,
+    # sort expect and got by index before comparing.
+    if sort:
+        if as_index:
+            expect = expect.sort_index()
+            got = got.sort_index()
+        else:
+            assert by is not None
+            if isinstance(expect, (pd.DataFrame, cudf.DataFrame)):
+                expect = expect.sort_values(by=by).reset_index(drop=True)
+            else:
+                expect = expect.sort_values(by=by).reset_index(drop=True)
+
+            if isinstance(got, cudf.DataFrame):
+                got = got.sort_values(by=by).reset_index(drop=True)
+            else:
+                got = got.sort_values(by=by).reset_index(drop=True)
+
+    assert_eq(expect, got, **kwargs)
diff --git a/python/cudf/cudf/tests/groupby/conftest.py b/python/cudf/cudf/tests/groupby/conftest.py
new file mode 100644
index 00000000000..db95a7b3b8e
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/conftest.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pytest
+
+
+@pytest.fixture(params=[True, False])
+def as_index(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"]
+)
+def groupby_reduction_methods(request):
+    return request.param
diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py
index dc20a27177a..42c74f967bb 100644
--- a/python/cudf/cudf/tests/groupby/test_agg.py
+++ b/python/cudf/cudf/tests/groupby/test_agg.py
@@ -1,16 +1,20 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import decimal
+import itertools
+
 import numpy as np
+import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing import assert_eq
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq, assert_groupby_results_equal
 
 
-@pytest.mark.parametrize(
-    "empty",
-    [True, False],
-    ids=["empty", "nonempty"],
-)
+@pytest.mark.parametrize("empty", [True, False])
 def test_agg_count_dtype(empty):
     df = cudf.DataFrame({"a": [1, 2, 1], "c": ["a", "b", "c"]})
     if empty:
@@ -72,3 +76,560 @@ def test_dataframe_agg_with_invalid_kwarg():
     with pytest.raises(TypeError, match="Invalid keyword argument"):
         df = cudf.DataFrame({"a": [1, 2, 1, 2], "b": [0, 0, 0, 0]})
         df.groupby("a").agg(foo=set())
+
+
+@pytest.mark.parametrize("with_nulls", [False, True])
+def test_groupby_agg_maintain_order_random(with_nulls):
+    nrows = 20
+    nkeys = 3
+    rng = np.random.default_rng(seed=0)
+    key_names = [f"key{key}" for key in range(nkeys)]
+    key_values = [rng.integers(100, size=nrows) for _ in key_names]
+    value = rng.integers(-100, 100, size=nrows)
+    df = cudf.DataFrame(
+        dict(zip(key_names, key_values, strict=True), value=value)
+    )
+    if with_nulls:
+        for key in key_names:
+            df.loc[df[key] == 1, key] = None
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = df.groupby(key_names, sort=False).agg({"value": "sum"})
+    expect = (
+        df.to_pandas().groupby(key_names, sort=False).agg({"value": "sum"})
+    )
+    assert_eq(expect, got, check_index_type=not with_nulls)
+
+
+def test_groupby_agg_mean_min():
+    pdf = pd.DataFrame(np.ones((20, 3)), columns=["x", "y", "val"])
+    gdf = cudf.DataFrame(pdf)
+    got_df = gdf.groupby(["x", "y"]).agg(["mean", "min"])
+    expect_df = pdf.groupby(["x", "y"]).agg(["mean", "min"])
+    assert_groupby_results_equal(got_df, expect_df)
+
+
+def test_groupby_agg_min_max_dictargs():
+    pdf = pd.DataFrame(np.ones((20, 5)), columns=["x", "y", "val", "a", "b"])
+    gdf = cudf.DataFrame(pdf)
+    expect_df = pdf.groupby(["x", "y"]).agg({"a": "min", "b": "max"})
+    got_df = gdf.groupby(["x", "y"]).agg({"a": "min", "b": "max"})
+    assert_groupby_results_equal(expect_df, got_df)
+
+
+def test_groupby_agg_min_max_dictlist():
+    pdf = pd.DataFrame(np.ones((20, 5)), columns=["x", "y", "val", "a", "b"])
+    gdf = cudf.DataFrame(pdf)
+    expect_df = pdf.groupby(["x", "y"]).agg(
+        {"a": ["min", "max"], "b": ["min", "max"]}
+    )
+    got_df = gdf.groupby(["x", "y"]).agg(
+        {"a": ["min", "max"], "b": ["min", "max"]}
+    )
+    assert_groupby_results_equal(got_df, expect_df)
+
+
+def test_groupby_as_index_single_agg(as_index):
+    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = gdf.groupby("y", as_index=as_index).agg({"x": "mean"})
+    pdf = pdf.groupby("y", as_index=as_index).agg({"x": "mean"})
+    assert_groupby_results_equal(pdf, gdf, as_index=as_index, by="y")
+
+
+def test_groupby_default():
+    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = gdf.groupby("y").agg({"x": "mean"})
+    pdf = pdf.groupby("y").agg({"x": "mean"})
+    assert_groupby_results_equal(pdf, gdf)
+
+
+def test_groupby_as_index_multiindex(as_index):
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 1], "b": [3, 3, 3], "c": [2, 2, 3], "d": [3, 1, 2]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    gdf = gdf.groupby(["a", "b"], as_index=as_index, sort=True).agg(
+        {"c": "mean"}
+    )
+    pdf = pdf.groupby(["a", "b"], as_index=as_index, sort=True).agg(
+        {"c": "mean"}
+    )
+
+    if as_index:
+        assert_eq(pdf, gdf)
+    else:
+        # column names don't match - check just the values
+        for gcol, pcol in zip(gdf, pdf, strict=True):
+            np.testing.assert_array_equal(
+                gdf[gcol].to_numpy(), pdf[pcol].values
+            )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        "mean",
+        "std",
+        "var",
+        "min",
+        "max",
+        "idxmin",
+        "idxmax",
+        "count",
+        "sum",
+        "prod",
+    ],
+)
+def test_groupby_2keys_agg(func):
+    # gdf (Note: lack of multiIndex)
+    nelem = 20
+    pdf = pd.DataFrame(np.ones((nelem, 2)), columns=["x", "y"])
+    gdf = cudf.DataFrame(pdf)
+    expect_df = pdf.groupby(["x", "y"]).agg(func)
+    got_df = gdf.groupby(["x", "y"]).agg(func)
+
+    assert_groupby_results_equal(got_df, expect_df)
+
+
+def test_series_groupby_agg(groupby_reduction_methods):
+    s = pd.Series([1, 2, 3])
+    g = cudf.Series([1, 2, 3])
+    sg = s.groupby(s // 2).agg(groupby_reduction_methods)
+    gg = g.groupby(g // 2).agg(groupby_reduction_methods)
+    assert_groupby_results_equal(sg, gg)
+
+
+def test_groupby_agg_decimal(groupby_reduction_methods, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            groupby_reduction_methods in ["prod", "mean"],
+            raises=pd.errors.DataError,
+            reason=f"{groupby_reduction_methods} not supported with Decimals in pandas",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            groupby_reduction_methods in ["idxmax", "idxmin"]
+            and PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+            reason=f"{groupby_reduction_methods} not supported with Decimals in an older version of pandas",
+        )
+    )
+    rng = np.random.default_rng(seed=0)
+    num_groups = 4
+    nelem_per_group = 10
+    # The number of digits after the decimal to use.
+    decimal_digits = 2
+    # The number of digits before the decimal to use.
+    whole_digits = 2
+
+    scale = 10**whole_digits
+    nelem = num_groups * nelem_per_group
+
+    # The unique is necessary because otherwise if there are duplicates idxmin
+    # and idxmax may return different results than pandas (see
+    # https://github.com/rapidsai/cudf/issues/7756). This is not relevant to
+    # the current version of the test, because idxmin and idxmax simply don't
+    # work with pandas Series composed of Decimal objects (see
+    # https://github.com/pandas-dev/pandas/issues/40685). However, if that is
+    # ever enabled, then this issue will crop up again so we may as well have
+    # it fixed now.
+    x = np.unique((rng.random(nelem) * scale).round(decimal_digits))
+    y = np.unique((rng.random(nelem) * scale).round(decimal_digits))
+
+    if x.size < y.size:
+        total_elements = x.size
+        y = y[: x.size]
+    else:
+        total_elements = y.size
+        x = x[: y.size]
+
+    # Note that this filtering can lead to one group with fewer elements, but
+    # that shouldn't be a problem and is probably useful to test.
+    idx_col = np.tile(np.arange(num_groups), nelem_per_group)[:total_elements]
+
+    decimal_x = pd.Series([decimal.Decimal(str(d)) for d in x])
+    decimal_y = pd.Series([decimal.Decimal(str(d)) for d in y])
+
+    pdf = pd.DataFrame({"idx": idx_col, "x": decimal_x, "y": decimal_y})
+    gdf = cudf.DataFrame(
+        {
+            "idx": idx_col,
+            "x": cudf.Series(decimal_x),
+            "y": cudf.Series(decimal_y),
+        }
+    )
+
+    expect_df = pdf.groupby("idx", sort=True).agg(groupby_reduction_methods)
+    got_df = gdf.groupby("idx", sort=True).agg(groupby_reduction_methods)
+    assert_eq(expect_df["x"], got_df["x"], check_dtype=False)
+    assert_eq(expect_df["y"], got_df["y"], check_dtype=False)
+
+
+def test_groupby_use_agg_column_as_index():
+    pdf = pd.DataFrame({"a": [1, 1, 1, 3, 5]})
+    gdf = cudf.DataFrame({"a": [1, 1, 1, 3, 5]})
+    gdf["a"] = [1, 1, 1, 3, 5]
+    pdg = pdf.groupby("a").agg({"a": "count"})
+    gdg = gdf.groupby("a").agg({"a": "count"})
+    assert_groupby_results_equal(pdg, gdg, check_dtype=False)
+
+
+def test_groupby_list_then_string():
+    gdf = cudf.DataFrame(
+        {"a": [0, 1, 0, 1, 2], "b": [11, 2, 15, 12, 2], "c": [6, 7, 6, 7, 6]}
+    )
+    pdf = gdf.to_pandas()
+    gdg = gdf.groupby("a", as_index=True).agg(
+        {"b": ["min", "max"], "c": "max"}
+    )
+    pdg = pdf.groupby("a", as_index=True).agg(
+        {"b": ["min", "max"], "c": "max"}
+    )
+    assert_groupby_results_equal(gdg, pdg)
+
+
+def test_groupby_different_unequal_length_column_aggregations():
+    gdf = cudf.DataFrame(
+        {"a": [0, 1, 0, 1, 2], "b": [11, 2, 15, 12, 2], "c": [6, 7, 6, 7, 6]}
+    )
+    pdf = gdf.to_pandas()
+    gdg = gdf.groupby("a", as_index=True).agg(
+        {"b": "min", "c": ["max", "min"]}
+    )
+    pdg = pdf.groupby("a", as_index=True).agg(
+        {"b": "min", "c": ["max", "min"]}
+    )
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_single_var_two_aggs():
+    gdf = cudf.DataFrame(
+        {"a": [0, 1, 0, 1, 2], "b": [11, 2, 15, 12, 2], "c": [6, 7, 6, 7, 6]}
+    )
+    pdf = gdf.to_pandas()
+    gdg = gdf.groupby("a", as_index=True).agg({"b": ["min", "max"]})
+    pdg = pdf.groupby("a", as_index=True).agg({"b": ["min", "max"]})
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_double_var_two_aggs():
+    gdf = cudf.DataFrame(
+        {"a": [0, 1, 0, 1, 2], "b": [11, 2, 15, 12, 2], "c": [6, 7, 6, 7, 6]}
+    )
+    pdf = gdf.to_pandas()
+    gdg = gdf.groupby(["a", "b"], as_index=True).agg({"c": ["min", "max"]})
+    pdg = pdf.groupby(["a", "b"], as_index=True).agg({"c": ["min", "max"]})
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_multi_agg_single_groupby_series():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "x": rng.integers(0, 5, size=100),
+            "y": rng.normal(size=100),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    pdg = pdf.groupby("x").y.agg(["sum", "max"])
+    gdg = gdf.groupby("x").y.agg(["sum", "max"])
+
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_multi_agg_multi_groupby():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": rng.integers(0, 5, 10),
+            "b": rng.integers(0, 5, 10),
+            "c": rng.integers(0, 5, 10),
+            "d": rng.integers(0, 5, 10),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    pdg = pdf.groupby(["a", "b"]).agg(["sum", "max"])
+    gdg = gdf.groupby(["a", "b"]).agg(["sum", "max"])
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_datetime_multi_agg_multi_groupby():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": pd.date_range(
+                "2020-01-01",
+                freq="D",
+                periods=10,
+            ),
+            "b": rng.integers(0, 5, 10),
+            "c": rng.integers(0, 5, 10),
+            "d": rng.integers(0, 5, 10),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    pdg = pdf.groupby(["a", "b"]).agg(["sum", "max"])
+    gdg = gdf.groupby(["a", "b"]).agg(["sum", "max"])
+
+    assert_groupby_results_equal(pdg, gdg)
+
+
+@pytest.mark.parametrize(
+    "agg",
+    [
+        ["min", "max", "count", "mean"],
+        ["mean", "var", "std"],
+        ["count", "mean", "var", "std"],
+    ],
+)
+def test_groupby_multi_agg_hash_groupby(agg):
+    gdf = cudf.DataFrame(
+        {"id": [0, 0, 1, 1, 2, 2, 0], "a": [0, 1, 2, 3, 4, 5, 6]}
+    )
+    pdf = gdf.to_pandas()
+    check_dtype = "count" not in agg
+    pdg = pdf.groupby("id").agg(agg)
+    gdg = gdf.groupby("id").agg(agg)
+    assert_groupby_results_equal(pdg, gdg, check_dtype=check_dtype)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="previous verion of pandas throws a warning",
+)
+def test_groupby_nulls_basic(groupby_reduction_methods, request):
+    pdf = pd.DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": [1, 2, 1, 2, 1, None]})
+    gdf = cudf.from_pandas(pdf)
+    assert_groupby_results_equal(
+        getattr(pdf.groupby("a"), groupby_reduction_methods)(),
+        getattr(gdf.groupby("a"), groupby_reduction_methods)(),
+    )
+
+    pdf = pd.DataFrame(
+        {
+            "a": [0, 0, 1, 1, 2, 2],
+            "b": [1, 2, 1, 2, 1, None],
+            "c": [1, 2, 1, None, 1, 2],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_groupby_results_equal(
+        getattr(pdf.groupby("a"), groupby_reduction_methods)(),
+        getattr(gdf.groupby("a"), groupby_reduction_methods)(),
+    )
+
+    pdf = pd.DataFrame(
+        {
+            "a": [0, 0, 1, 1, 2, 2],
+            "b": [1, 2, 1, 2, 1, None],
+            "c": [1, 2, None, None, 1, 2],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    request.applymarker(
+        pytest.mark.xfail(
+            groupby_reduction_methods in ["prod", "sum"],
+            reason="cuDF returns NaN instead of an actual value",
+        )
+    )
+    assert_groupby_results_equal(
+        getattr(pdf.groupby("a"), groupby_reduction_methods)(),
+        getattr(gdf.groupby("a"), groupby_reduction_methods)(),
+    )
+
+
+@pytest.mark.parametrize("agg", [lambda x: x.count(), "count"])
+@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
+def test_groupby_count(agg, by):
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.groupby(by).agg(agg)
+    got = gdf.groupby(by).agg(agg)
+
+    assert_groupby_results_equal(expect, got, check_dtype=True)
+
+
+@pytest.mark.parametrize("agg", [lambda x: x.median(), "median"])
+@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
+def test_groupby_median(agg, by):
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.groupby(by).agg(agg)
+    got = gdf.groupby(by).agg(agg)
+
+    assert_groupby_results_equal(expect, got, check_dtype=False)
+
+
+def test_multi_agg():
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": ["a", "b", "c", "d"]}
+    )
+    pdf = gdf.to_pandas()
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg({"b": ["count", "mean"], "c": ["count"]}),
+        gdf.groupby("a").agg({"b": ["count", "mean"], "c": ["count"]}),
+    )
+
+
+@pytest.mark.parametrize(
+    "agg",
+    (
+        [
+            *itertools.combinations(["count", "max", "min", "nunique"], 2),
+            {"b": "min", "c": "mean"},
+            {"b": "max", "c": "mean"},
+            {"b": "count", "c": "mean"},
+            {"b": "nunique", "c": "mean"},
+        ]
+    ),
+)
+def test_groupby_agg_combinations(agg):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 2, 2, 3],
+            "b": ["a", "a", "b", "c", "d"],
+            "c": [1, 2, 3, 4, 5],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg(agg),
+        gdf.groupby("a").agg(agg),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("list_agg", [list, "collect"])
+def test_groupby_list_simple(list_agg):
+    pdf = pd.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, None, 4, 5, 6]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg({"b": list}),
+        gdf.groupby("a").agg({"b": list_agg}),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("list_agg", [list, "collect"])
+def test_groupby_list_of_lists(list_agg):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2, 2],
+            "b": [[1, 2], [3, None, 5], None, [], [7, 8], [9]],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg({"b": list}),
+        gdf.groupby("a").agg({"b": list_agg}),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("list_agg", [list, "collect"])
+def test_groupby_list_of_structs(list_agg):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2, 2],
+            "b": [
+                {"c": "1", "d": 1},
+                {"c": "2", "d": 2},
+                {"c": "3", "d": 3},
+                {"c": "4", "d": 4},
+                {"c": "5", "d": 5},
+                {"c": "6", "d": 6},
+            ],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    grouped = gdf.groupby("a").agg({"b": list_agg})
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg({"b": list}),
+        grouped,
+        check_dtype=True,
+    )
+    assert grouped["b"].dtype.element_type == gdf["b"].dtype
+
+
+@pytest.mark.parametrize("list_agg", [list, "collect"])
+def test_groupby_list_single_element(list_agg):
+    pdf = pd.DataFrame({"a": [1, 2], "b": [3, None]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg({"b": list}),
+        gdf.groupby("a").agg({"b": list_agg}),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "agg", [list, [list, "count"], {"b": list, "c": "sum"}]
+)
+def test_groupby_list_strings(agg):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2],
+            "b": ["b", "a", None, "e", "d"],
+            "c": [1, 2, 3, 4, 5],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").agg(agg),
+        gdf.groupby("a").agg(agg),
+        check_dtype=False,
+    )
+
+
+def test_groupby_list_columns_excluded():
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 2, 2],
+            "b": [1, 2, 3, 4],
+            "c": [[1, 2], [3, 4], [5, 6], [7, 8]],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    pandas_result = pdf.groupby("a").mean(numeric_only=True)
+    pandas_agg_result = pdf.groupby("a").agg("mean", numeric_only=True)
+
+    assert_groupby_results_equal(
+        pandas_result,
+        gdf.groupby("a").mean(numeric_only=True),
+        check_dtype=False,
+    )
+
+    assert_groupby_results_equal(
+        pandas_agg_result,
+        gdf.groupby("a").agg("mean"),
+        check_dtype=False,
+    )
+
+
+def test_groupby_mix_agg_scan():
+    err_msg = "Cannot perform both aggregation and scan in one operation"
+    func = ["cumsum", "sum"]
+    gb = cudf.DataFrame(np.ones((10, 3)), columns=["x", "y", "z"]).groupby(
+        ["x", "y"], sort=True
+    )
+
+    gb.agg(func[0])
+    gb.agg(func[1])
+    gb.agg(func[1:])
+    with pytest.raises(NotImplementedError, match=err_msg):
+        gb.agg(func)
diff --git a/python/cudf/cudf/tests/groupby/test_apply.py b/python/cudf/cudf/tests/groupby/test_apply.py
new file mode 100644
index 00000000000..1c340a34e2a
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_apply.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+from numba import cuda
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_groupby_results_equal
+
+
+@pytest.fixture(params=["cudf", "jit"])
+def engine(request):
+    return request.param
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
+def test_groupby_as_index_apply(as_index, engine):
+    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = gdf.groupby("y", as_index=as_index).apply(
+        lambda df: df["x"].mean(), engine=engine
+    )
+    kwargs = {"func": lambda df: df["x"].mean(), "include_groups": False}
+    pdf = pdf.groupby("y", as_index=as_index).apply(**kwargs)
+    assert_groupby_results_equal(pdf, gdf, as_index=as_index, by="y")
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply():
+    rng = np.random.default_rng(seed=0)
+    nelem = 20
+    df = cudf.DataFrame(
+        {
+            "key1": rng.integers(0, 3, nelem),
+            "key2": rng.integers(0, 2, nelem),
+            "val1": rng.random(nelem),
+            "val2": rng.random(nelem),
+        }
+    )
+
+    expect_grpby = df.to_pandas().groupby(
+        ["key1", "key2"], as_index=False, group_keys=False
+    )
+    got_grpby = df.groupby(["key1", "key2"])
+
+    def foo(df):
+        df["out"] = df["val1"] + df["val2"]
+        return df
+
+    expect = expect_grpby.apply(foo, include_groups=False)
+    got = got_grpby.apply(foo, include_groups=False)
+    assert_groupby_results_equal(expect, got)
+
+
+def f1(df, k):
+    df["out"] = df["val1"] + df["val2"] + k
+    return df
+
+
+def f2(df, k, L):
+    df["out"] = df["val1"] - df["val2"] + (k / L)
+    return df
+
+
+def f3(df, k, L, m):
+    df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m
+    return df
+
+
+@pytest.mark.parametrize(
+    "func,args", [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_args(func, args):
+    rng = np.random.default_rng(seed=0)
+    nelem = 20
+    df = cudf.DataFrame(
+        {
+            "key1": rng.integers(0, 3, nelem),
+            "key2": rng.integers(0, 2, nelem),
+            "val1": rng.random(nelem),
+            "val2": rng.random(nelem),
+        }
+    )
+
+    expect_grpby = df.to_pandas().groupby(
+        ["key1", "key2"], as_index=False, group_keys=False
+    )
+    got_grpby = df.groupby(["key1", "key2"])
+    expect = expect_grpby.apply(func, *args, include_groups=False)
+    got = got_grpby.apply(func, *args, include_groups=False)
+    assert_groupby_results_equal(expect, got)
+
+
+def test_groupby_apply_grouped():
+    df = cudf.DataFrame(
+        {
+            "key1": range(20),
+            "key2": range(20),
+            "val1": range(20),
+            "val2": range(20),
+        }
+    )
+
+    got_grpby = df.groupby(["key1", "key2"])
+
+    def foo(key1, val1, com1, com2):
+        for i in range(cuda.threadIdx.x, len(key1), cuda.blockDim.x):
+            com1[i] = key1[i] * 10000 + val1[i]
+            com2[i] = i
+
+    got = got_grpby.apply_grouped(
+        foo,
+        incols=["key1", "val1"],
+        outcols={"com1": np.float64, "com2": np.int32},
+        tpb=8,
+    )
+
+    got = got.to_pandas()
+
+    expect = df.copy()
+    expect["com1"] = (expect["key1"] * 10000 + expect["key1"]).astype(
+        np.float64
+    )
+    expect["com2"] = np.zeros(20, dtype=np.int32)
+
+    assert_groupby_results_equal(expect, got)
diff --git a/python/cudf/cudf/tests/groupby/test_attributes.py b/python/cudf/cudf/tests/groupby/test_attributes.py
new file mode 100644
index 00000000000..16e6741229d
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_attributes.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_groups():
+    # https://github.com/rapidsai/cudf/issues/14955
+    df = cudf.DataFrame({"a": [1, 2] * 2}, index=[0] * 4)
+    agg = df.groupby("a")
+    pagg = df.to_pandas().groupby("a")
+    for key in agg.groups:
+        np.testing.assert_array_equal(
+            pagg.indices[key], agg.indices[key].get()
+        )
+        assert_eq(pagg.get_group(key), agg.get_group(key))
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        "a",
+        "b",
+        ["a"],
+        ["b"],
+        ["a", "b"],
+        ["b", "a"],
+        np.array([0, 0, 0, 1, 1, 1, 2]),
+    ],
+)
+def test_groupby_groups(by):
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 1, 2, 1, 2, 3], "b": [1, 2, 3, 4, 5, 6, 7]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    pdg = pdf.groupby(by)
+    gdg = gdf.groupby(by)
+
+    for key in pdg.groups:
+        assert key in gdg.groups
+        assert_eq(pdg.groups[key], gdg.groups[key])
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        "a",
+        "b",
+        ["a"],
+        ["b"],
+        ["a", "b"],
+        ["b", "a"],
+        ["a", "c"],
+        ["a", "b", "c"],
+    ],
+)
+def test_groupby_groups_multi(by):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 2, 1, 2, 1, 2, 3],
+            "b": ["a", "b", "a", "b", "b", "c", "c"],
+            "c": [1, 2, 3, 4, 5, 6, 7],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    pdg = pdf.groupby(by)
+    gdg = gdf.groupby(by)
+
+    for key in pdg.groups:
+        assert key in gdg.groups
+        assert_eq(pdg.groups[key], gdg.groups[key])
+
+
+def test_groupby_iterate_groups():
+    rng = np.random.default_rng(seed=0)
+    nelem = 20
+    df = cudf.DataFrame(
+        {
+            "key1": rng.integers(0, 3, nelem),
+            "key2": rng.integers(0, 2, nelem),
+            "val1": rng.random(nelem),
+            "val2": rng.random(nelem),
+        }
+    )
+
+    def assert_values_equal(arr):
+        np.testing.assert_array_equal(arr[0], arr)
+
+    for name, grp in df.groupby(["key1", "key2"]):
+        pddf = grp.to_pandas()
+        for k in "key1,key2".split(","):
+            assert_values_equal(pddf[k].values)
+
+
+@pytest.mark.parametrize(
+    "grouper",
+    [
+        "a",
+        ["a"],
+        ["a", "b"],
+        np.array([0, 1, 1, 2, 3, 2]),
+        {0: "a", 1: "a", 2: "b", 3: "a", 4: "b", 5: "c"},
+        lambda x: x + 1,
+        ["a", np.array([0, 1, 1, 2, 3, 2])],
+    ],
+)
+def test_grouping(grouper):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2, 3],
+            "b": [1, 2, 1, 2, 1, 2],
+            "c": [1, 2, 3, 4, 5, 6],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    for pdf_group, gdf_group in zip(
+        pdf.groupby(grouper), gdf.groupby(grouper), strict=True
+    ):
+        assert pdf_group[0] == gdf_group[0]
+        assert_eq(pdf_group[1], gdf_group[1])
diff --git a/python/cudf/cudf/tests/groupby/test_computation.py b/python/cudf/cudf/tests/groupby/test_computation.py
deleted file mode 100644
index 630fcdc4dce..00000000000
--- a/python/cudf/cudf/tests/groupby/test_computation.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
-def test_rank_return_type_compatible_mode(method):
-    # in compatible mode, rank() always returns floats
-    pdf = pd.DataFrame({"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]})
-    with cudf.option_context("mode.pandas_compatible", True):
-        df = cudf.from_pandas(pdf)
-        result = df.groupby("a").rank(method=method)
-    expect = pdf.groupby("a").rank(method=method)
-    assert_eq(expect, result)
-    assert result["b"].dtype == "float64"
diff --git a/python/cudf/cudf/tests/groupby/test_constructors.py b/python/cudf/cudf/tests/groupby/test_constructors.py
new file mode 100644
index 00000000000..503ec3d3500
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_constructors.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("data", [{"a": [1, 2]}, {"a": [1, 2], "b": [2, 3]}])
+def test_groupby_nonempty_no_keys(data):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+    assert_exceptions_equal(
+        lambda: pdf.groupby([]),
+        lambda: gdf.groupby([]),
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_cummulative.py b/python/cudf/cudf/tests/groupby/test_cummulative.py
new file mode 100644
index 00000000000..1eab8a1b317
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_cummulative.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+@pytest.mark.parametrize("index", [None, [1, 2, 3, 4]])
+def test_groupby_cumcount(index):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 3, 4],
+            "b": ["bob", "bob", "alice", "cooper"],
+            "c": [1, 2, 3, 4],
+        },
+        index=index,
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").cumcount(),
+        gdf.groupby("a").cumcount(),
+        check_dtype=False,
+    )
+
+    assert_groupby_results_equal(
+        pdf.groupby(["a", "b", "c"]).cumcount(),
+        gdf.groupby(["a", "b", "c"]).cumcount(),
+        check_dtype=False,
+    )
+
+    sr = pd.Series(range(len(pdf)), index=index)
+    assert_groupby_results_equal(
+        pdf.groupby(sr).cumcount(),
+        gdf.groupby(sr).cumcount(),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "func", ["cummin", "cummax", "cumcount", "cumsum", "cumprod"]
+)
+def test_groupby_2keys_scan(func):
+    nelem = 20
+    pdf = pd.DataFrame(np.ones((nelem, 3)), columns=["x", "y", "val"])
+    expect_df = pdf.groupby(["x", "y"], sort=True).agg(func)
+    gdf = cudf.from_pandas(pdf)
+    got_df = gdf.groupby(["x", "y"], sort=True).agg(func)
+    # pd.groupby.cumcount returns a series.
+    if isinstance(expect_df, pd.Series):
+        expect_df = expect_df.to_frame("val")
+
+    assert_groupby_results_equal(got_df, expect_df)
+
+    expect_df = getattr(pdf.groupby(["x", "y"], sort=True), func)()
+    got_df = getattr(gdf.groupby(["x", "y"], sort=True), func)()
+    assert_groupby_results_equal(got_df, expect_df)
+
+    expect_df = getattr(pdf.groupby(["x", "y"], sort=True)[["x"]], func)()
+    got_df = getattr(gdf.groupby(["x", "y"], sort=True)[["x"]], func)()
+    assert_groupby_results_equal(got_df, expect_df)
+
+    expect_df = getattr(pdf.groupby(["x", "y"], sort=True)["y"], func)()
+    got_df = getattr(gdf.groupby(["x", "y"], sort=True)["y"], func)()
+    assert_groupby_results_equal(got_df, expect_df)
+
+
+@pytest.mark.parametrize(
+    "with_nan", [False, True], ids=["just-NA", "also-NaN"]
+)
+@pytest.mark.parametrize(
+    "duplicate_index", [False, True], ids=["rangeindex", "dupindex"]
+)
+def test_groupby_scan_null_keys(with_nan, dropna, duplicate_index):
+    key_col = [None, 1, 2, None, 3, None, 3, 1, None, 1]
+    if with_nan:
+        df = pd.DataFrame(
+            {"key": pd.Series(key_col, dtype="float32"), "value": range(10)}
+        )
+    else:
+        df = pd.DataFrame(
+            {"key": pd.Series(key_col, dtype="Int32"), "value": range(10)}
+        )
+
+    if duplicate_index:
+        # Non-default index with duplicates
+        df.index = [1, 2, 3, 1, 3, 2, 4, 1, 6, 10]
+
+    cdf = cudf.from_pandas(df)
+
+    expect = df.groupby("key", dropna=dropna).cumsum()
+    got = cdf.groupby("key", dropna=dropna).cumsum()
+    assert_groupby_results_equal(expect, got)
diff --git a/python/cudf/cudf/tests/groupby/test_diff.py b/python/cudf/cudf/tests/groupby/test_diff.py
new file mode 100644
index 00000000000..a9e60e0f4b5
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_diff.py
@@ -0,0 +1,194 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_groupby_results_equal
+from cudf.testing.dataset_generator import rand_dataframe
+
+
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+def test_groupby_diff_row(shift_perc, direction):
+    nelem = 20
+    pdf = pd.DataFrame(np.ones((nelem, 4)), columns=["x", "y", "val", "val2"])
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["x", "y"]).diff(periods=n_shift)
+    got = gdf.groupby(["x", "y"]).diff(periods=n_shift)
+
+    assert_groupby_results_equal(expected, got)
+
+
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+def test_groupby_diff_row_mixed_numerics(shift_perc, direction):
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["0"]).diff(periods=n_shift)
+    got = gdf.groupby(["0"]).diff(periods=n_shift)
+
+    assert_groupby_results_equal(expected, got)
+
+
+def test_groupby_diff_row_zero_shift():
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    gdf = cudf.from_pandas(t.to_pandas())
+
+    expected = gdf
+    got = gdf.groupby(["0"]).shift(periods=0)
+
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_groupby_fillna_multi_value():
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ms]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    key_col = "0"
+    value_cols = ["1", "2", "3", "4", "5", "6"]
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+
+    # fill the dataframe with the first non-null item in the column
+    fill_values = {
+        name: pdf[name].loc[pdf[name].first_valid_index()]
+        for name in value_cols
+    }
+    # cudf can't fillna with a pandas.Timedelta type
+    fill_values["4"] = fill_values["4"].to_numpy()
+    with pytest.warns(FutureWarning):
+        expect = pdf.groupby(key_col).fillna(value=fill_values)
+    with pytest.warns(FutureWarning):
+        got = gdf.groupby(key_col).fillna(value=fill_values)
+
+    assert_groupby_results_equal(expect[value_cols], got[value_cols])
+
+
+# TODO: cudf.fillna does not support decimal column to column fill yet
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_groupby_fillna_multi_value_df():
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ms]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    key_col = "0"
+    value_cols = ["1", "2", "3", "4", "5"]
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+
+    # fill the dataframe with the first non-null item in the column
+    fill_values = {
+        name: pdf[name].loc[pdf[name].first_valid_index()]
+        for name in value_cols
+    }
+    # cudf can't fillna with a pandas.Timedelta type
+    fill_values["4"] = fill_values["4"].to_numpy()
+    fill_values = pd.DataFrame(fill_values, index=pdf.index)
+    with pytest.warns(FutureWarning):
+        expect = pdf.groupby(key_col).fillna(value=fill_values)
+
+    fill_values = cudf.from_pandas(fill_values)
+    with pytest.warns(FutureWarning):
+        got = gdf.groupby(key_col).fillna(value=fill_values)
+
+    assert_groupby_results_equal(expect[value_cols], got[value_cols])
diff --git a/python/cudf/cudf/tests/groupby/test_fillna.py b/python/cudf/cudf/tests/groupby/test_fillna.py
new file mode 100644
index 00000000000..c8f45818357
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_fillna.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_groupby_results_equal
+from cudf.testing.dataset_generator import rand_dataframe
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_groupby_fillna_multi_value():
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ms]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    key_col = "0"
+    value_cols = ["1", "2", "3", "4", "5", "6"]
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+
+    # fill the dataframe with the first non-null item in the column
+    fill_values = {
+        name: pdf[name].loc[pdf[name].first_valid_index()]
+        for name in value_cols
+    }
+    # cudf can't fillna with a pandas.Timedelta type
+    fill_values["4"] = fill_values["4"].to_numpy()
+    with pytest.warns(FutureWarning):
+        expect = pdf.groupby(key_col).fillna(value=fill_values)
+    with pytest.warns(FutureWarning):
+        got = gdf.groupby(key_col).fillna(value=fill_values)
+
+    assert_groupby_results_equal(expect[value_cols], got[value_cols])
+
+
+# TODO: cudf.fillna does not support decimal column to column fill yet
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_groupby_fillna_multi_value_df():
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ms]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    key_col = "0"
+    value_cols = ["1", "2", "3", "4", "5"]
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+
+    # fill the dataframe with the first non-null item in the column
+    fill_values = {
+        name: pdf[name].loc[pdf[name].first_valid_index()]
+        for name in value_cols
+    }
+    # cudf can't fillna with a pandas.Timedelta type
+    fill_values["4"] = fill_values["4"].to_numpy()
+    fill_values = pd.DataFrame(fill_values, index=pdf.index)
+    with pytest.warns(FutureWarning):
+        expect = pdf.groupby(key_col).fillna(value=fill_values)
+
+    fill_values = cudf.from_pandas(fill_values)
+    with pytest.warns(FutureWarning):
+        got = gdf.groupby(key_col).fillna(value=fill_values)
+
+    assert_groupby_results_equal(expect[value_cols], got[value_cols])
+
+
+@pytest.mark.parametrize(
+    "by",
+    [pd.Series([1, 1, 2, 2, 3, 4]), lambda x: x % 2 == 0, pd.Grouper(level=0)],
+)
+@pytest.mark.parametrize(
+    "data", [[1, None, 2, None, 3, None], [1, 2, 3, 4, 5, 6]]
+)
+@pytest.mark.parametrize("args", [{"value": 42}, {"method": "ffill"}])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_groupby_various_by_fillna(by, data, args):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps)
+
+    with pytest.warns(FutureWarning):
+        expect = ps.groupby(by).fillna(**args)
+    if isinstance(by, pd.Grouper):
+        by = cudf.Grouper(level=by.level)
+    with pytest.warns(FutureWarning):
+        got = gs.groupby(by).fillna(**args)
+
+    assert_groupby_results_equal(expect, got, check_dtype=False)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
+def test_groupby_fillna_method(method):
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "list",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+                "lists_max_length": 10,
+                "nesting_max_depth": 3,
+                "value_type": "int64",
+            },
+            {"dtype": "category", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    key_col = "0"
+    value_cols = ["1", "2", "3", "4", "5", "6", "7", "8"]
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+
+    with pytest.warns(FutureWarning):
+        expect = pdf.groupby(key_col).fillna(method=method)
+    with pytest.warns(FutureWarning):
+        got = gdf.groupby(key_col).fillna(method=method)
+
+    assert_groupby_results_equal(
+        expect[value_cols], got[value_cols], sort=False
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_indexing.py b/python/cudf/cudf/tests/groupby/test_get_group.py
similarity index 100%
rename from python/cudf/cudf/tests/groupby/test_indexing.py
rename to python/cudf/cudf/tests/groupby/test_get_group.py
diff --git a/python/cudf/cudf/tests/groupby/test_groupby_obj.py b/python/cudf/cudf/tests/groupby/test_groupby_obj.py
deleted file mode 100644
index ab2b16d263c..00000000000
--- a/python/cudf/cudf/tests/groupby/test_groupby_obj.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-from numpy.testing import assert_array_equal
-
-import cudf
-from cudf.testing import assert_eq
-
-
-def test_groupby_14955():
-    # https://github.com/rapidsai/cudf/issues/14955
-    df = cudf.DataFrame({"a": [1, 2] * 2}, index=[0] * 4)
-    agg = df.groupby("a")
-    pagg = df.to_pandas().groupby("a")
-    for key in agg.groups:
-        assert_array_equal(pagg.indices[key], agg.indices[key].get())
-        assert_eq(pagg.get_group(key), agg.get_group(key))
diff --git a/python/cudf/cudf/tests/groupby/test_nth.py b/python/cudf/cudf/tests/groupby/test_nth.py
new file mode 100644
index 00000000000..1fb9d32f535
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_nth.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+@pytest.mark.parametrize("n", [0, 2, 10])
+@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
+def test_groupby_nth(n, by):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 3],
+            "b": [1, 2, 2, 2, 1],
+            "c": [1, 2, None, 4, 5],
+            "d": ["a", "b", "c", "d", "e"],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.groupby(by).nth(n)
+    got = gdf.groupby(by).nth(n)
+
+    assert_groupby_results_equal(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/groupby/test_nunique.py b/python/cudf/cudf/tests/groupby/test_nunique.py
new file mode 100644
index 00000000000..742c35f874d
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_nunique.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+@pytest.mark.parametrize("agg", [lambda x: x.nunique(), "nunique"])
+@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
+def test_groupby_nunique(agg, by):
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.groupby(by).nunique()
+    got = gdf.groupby(by).nunique()
+
+    assert_groupby_results_equal(expect, got, check_dtype=False)
+
+
+def test_nunique_dropna(dropna):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 2],
+            "b": [4, None, 5],
+            "c": [None, None, 7],
+            "d": [1, 1, 3],
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    result = gdf.groupby("a")["b"].nunique(dropna=dropna)
+    expected = pdf.groupby("a")["b"].nunique(dropna=dropna)
+    assert_groupby_results_equal(result, expected, check_dtype=False)
+
+
+def test_groupby_nunique_series():
+    pdf = pd.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 1, 1, 2]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a")["b"].nunique(),
+        gdf.groupby("a")["b"].nunique(),
+        check_dtype=False,
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
deleted file mode 100644
index 64bba6f4404..00000000000
--- a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2024-2025, NVIDIA CORPORATION.
-import numpy as np
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-@pytest.mark.parametrize("with_nulls", [False, True])
-def test_groupby_maintain_order_random(with_nulls):
-    nrows = 20
-    nkeys = 3
-    rng = np.random.default_rng(seed=0)
-    key_names = [f"key{key}" for key in range(nkeys)]
-    key_values = [rng.integers(100, size=nrows) for _ in key_names]
-    value = rng.integers(-100, 100, size=nrows)
-    df = cudf.DataFrame(
-        dict(zip(key_names, key_values, strict=True), value=value)
-    )
-    if with_nulls:
-        for key in key_names:
-            df.loc[df[key] == 1, key] = None
-    with cudf.option_context("mode.pandas_compatible", True):
-        got = df.groupby(key_names, sort=False).agg({"value": "sum"})
-    expect = (
-        df.to_pandas().groupby(key_names, sort=False).agg({"value": "sum"})
-    )
-    assert_eq(expect, got, check_index_type=not with_nulls)
diff --git a/python/cudf/cudf/tests/groupby/test_pipe.py b/python/cudf/cudf/tests/groupby/test_pipe.py
new file mode 100644
index 00000000000..f6342380610
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_pipe.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+def test_groupby_pipe():
+    pdf = pd.DataFrame({"A": "a b a b".split(), "B": [1, 2, 3, 4]})
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.groupby("A").pipe(lambda x: x.max() - x.min())
+    actual = gdf.groupby("A").pipe(lambda x: x.max() - x.min())
+
+    assert_groupby_results_equal(expected, actual)
diff --git a/python/cudf/cudf/tests/groupby/test_rank.py b/python/cudf/cudf/tests/groupby/test_rank.py
new file mode 100644
index 00000000000..60aee399ca0
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_rank.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq, assert_groupby_results_equal
+
+
+@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
+def test_rank_return_type_compatible_mode(method):
+    # in compatible mode, rank() always returns floats
+    pdf = pd.DataFrame({"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]})
+    with cudf.option_context("mode.pandas_compatible", True):
+        df = cudf.from_pandas(pdf)
+        result = df.groupby("a").rank(method=method)
+    expect = pdf.groupby("a").rank(method=method)
+    assert_eq(expect, result)
+    assert result["b"].dtype == "float64"
+
+
+@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
+@pytest.mark.parametrize("pct", [False, True])
+def test_groupby_2keys_rank(method, ascending, na_option, pct):
+    nelem = 20
+    pdf = pd.DataFrame(
+        {
+            "x": np.arange(nelem),
+            "y": np.arange(nelem),
+            "z": np.concatenate([np.arange(nelem - 10), np.full(10, np.nan)]),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    expect_df = pdf.groupby(["x", "y"], sort=True).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=pct
+    )
+    got_df = gdf.groupby(["x", "y"], sort=True).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=pct
+    )
+
+    assert_groupby_results_equal(got_df, expect_df, check_dtype=False)
+
+
+def test_groupby_rank_fails():
+    gdf = cudf.DataFrame(
+        {"x": [1, 2, 3, 4], "y": [1, 2, 3, 4], "z": [1, 2, 3, 4]}
+    )
+    with pytest.raises(NotImplementedError):
+        gdf.groupby(["x", "y"]).rank(method="min", axis=1)
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2, 2],
+            "b": [[1, 2], [3, None, 5], None, [], [7, 8], [9]],
+        }
+    )
+    with pytest.raises(NotImplementedError):
+        gdf.groupby(["a"]).rank(method="min", axis=1)
diff --git a/python/cudf/cudf/tests/groupby/test_reductions.py b/python/cudf/cudf/tests/groupby/test_reductions.py
new file mode 100644
index 00000000000..adbc5af309f
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_reductions.py
@@ -0,0 +1,678 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_groupby_mean():
+    pdf = pd.DataFrame(np.ones((20, 3)), columns=["x", "y", "val"])
+    gdf = cudf.DataFrame(pdf)
+    got_df = gdf.groupby(["x", "y"]).mean()
+    expect_df = pdf.groupby(["x", "y"]).mean()
+    assert_groupby_results_equal(got_df, expect_df)
+
+
+def test_groupby_mean_3level():
+    pdf = pd.DataFrame(np.ones((20, 4)), columns=["x", "y", "val", "z"])
+    gdf = cudf.DataFrame(pdf)
+    bys = list("xyz")
+    got_df = pdf.groupby(bys).mean()
+    expect_df = gdf.groupby(bys).mean()
+    assert_groupby_results_equal(got_df, expect_df)
+
+
+def test_group_keys_true():
+    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
+    gdf = gdf.groupby("y", group_keys=True).sum()
+    pdf = pdf.groupby("y", group_keys=True).sum()
+    assert_groupby_results_equal(pdf, gdf)
+
+
+def test_groupby_getitem_getattr(as_index):
+    pdf = pd.DataFrame({"x": [1, 3, 1], "y": [1, 2, 3], "z": [1, 4, 5]})
+    gdf = cudf.from_pandas(pdf)
+    assert_groupby_results_equal(
+        pdf.groupby("x", as_index=as_index)["y"].sum(),
+        gdf.groupby("x", as_index=as_index)["y"].sum(),
+        as_index=as_index,
+        by="x",
+    )
+    assert_groupby_results_equal(
+        pdf.groupby("x", as_index=as_index).y.sum(),
+        gdf.groupby("x", as_index=as_index).y.sum(),
+        as_index=as_index,
+        by="x",
+    )
+    assert_groupby_results_equal(
+        pdf.groupby("x", as_index=as_index)[["y"]].sum(),
+        gdf.groupby("x", as_index=as_index)[["y"]].sum(),
+        as_index=as_index,
+        by="x",
+    )
+    assert_groupby_results_equal(
+        pdf.groupby(["x", "y"], as_index=as_index).sum(),
+        gdf.groupby(["x", "y"], as_index=as_index).sum(),
+        as_index=as_index,
+        by=["x", "y"],
+    )
+
+
+def test_groupby_cats():
+    rng = np.random.default_rng(seed=0)
+    df = cudf.DataFrame(
+        {"cats": pd.Categorical(list("aabaacaab")), "vals": rng.random(9)}
+    )
+
+    cats = df["cats"].values_host
+    vals = df["vals"].to_numpy()
+
+    grouped = df.groupby(["cats"], as_index=False).mean()
+
+    got_vals = grouped["vals"]
+
+    got_cats = grouped["cats"]
+
+    for i in range(len(got_vals)):
+        expect = vals[cats == got_cats[i]].mean()
+        np.testing.assert_almost_equal(got_vals[i], expect)
+
+
+def test_series_groupby(groupby_reduction_methods):
+    s = pd.Series([1, 2, 3])
+    g = cudf.Series([1, 2, 3])
+    sg = s.groupby(s // 2)
+    gg = g.groupby(g // 2)
+    sa = getattr(sg, groupby_reduction_methods)()
+    ga = getattr(gg, groupby_reduction_methods)()
+    assert_groupby_results_equal(sa, ga)
+
+
+def test_groupby_level_zero(groupby_reduction_methods, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            groupby_reduction_methods in ["idxmin", "idxmax"],
+            reason="gather needed for idxmin/idxmax",
+        )
+    )
+    pdf = pd.DataFrame({"x": [1, 2, 3]}, index=[2, 5, 5])
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdg = pdf.groupby(level=0)
+    gdg = gdf.groupby(level=0)
+    pdresult = getattr(pdg, groupby_reduction_methods)()
+    gdresult = getattr(gdg, groupby_reduction_methods)()
+    assert_groupby_results_equal(
+        pdresult,
+        gdresult,
+    )
+
+
+def test_groupby_series_level_zero(groupby_reduction_methods, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            groupby_reduction_methods in ["idxmin", "idxmax"],
+            reason="gather needed for idxmin/idxmax",
+        )
+    )
+    pdf = pd.Series([1, 2, 3], index=[2, 5, 5])
+    gdf = cudf.Series.from_pandas(pdf)
+    pdg = pdf.groupby(level=0)
+    gdg = gdf.groupby(level=0)
+    pdresult = getattr(pdg, groupby_reduction_methods)()
+    gdresult = getattr(gdg, groupby_reduction_methods)()
+    assert_groupby_results_equal(pdresult, gdresult)
+
+
+def test_groupby_column_name():
+    pdf = pd.DataFrame({"xx": [1.0, 2.0, 3.0], "yy": [1, 2, 3]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    g = gdf.groupby("yy")
+    p = pdf.groupby("yy")
+    gxx = g["xx"].sum()
+    pxx = p["xx"].sum()
+    assert_groupby_results_equal(pxx, gxx)
+
+    gxx = g["xx"].count()
+    pxx = p["xx"].count()
+    assert_groupby_results_equal(pxx, gxx, check_dtype=False)
+
+    gxx = g["xx"].min()
+    pxx = p["xx"].min()
+    assert_groupby_results_equal(pxx, gxx)
+
+    gxx = g["xx"].max()
+    pxx = p["xx"].max()
+    assert_groupby_results_equal(pxx, gxx)
+
+    gxx = g["xx"].idxmin()
+    pxx = p["xx"].idxmin()
+    assert_groupby_results_equal(pxx, gxx, check_dtype=False)
+
+    gxx = g["xx"].idxmax()
+    pxx = p["xx"].idxmax()
+    assert_groupby_results_equal(pxx, gxx, check_dtype=False)
+
+    gxx = g["xx"].mean()
+    pxx = p["xx"].mean()
+    assert_groupby_results_equal(pxx, gxx)
+
+
+def test_groupby_column_numeral():
+    pdf = pd.DataFrame({0: [1.0, 2.0, 3.0], 1: [1, 2, 3]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    p = pdf.groupby(1)
+    g = gdf.groupby(1)
+    pxx = p[0].sum()
+    gxx = g[0].sum()
+    assert_groupby_results_equal(pxx, gxx)
+
+    pdf = pd.DataFrame({0.5: [1.0, 2.0, 3.0], 1.5: [1, 2, 3]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    p = pdf.groupby(1.5)
+    g = gdf.groupby(1.5)
+    pxx = p[0.5].sum()
+    gxx = g[0.5].sum()
+    assert_groupby_results_equal(pxx, gxx)
+
+
+@pytest.mark.parametrize(
+    "series",
+    [
+        [0, 1, 0],
+        [1, 1, 1],
+        [0, 1, 1],
+        [1, 2, 3],
+        [4, 3, 2],
+        [0, 2, 0],
+        pd.Series([0, 2, 0]),
+        pd.Series([0, 2, 0], index=[0, 2, 1]),
+    ],
+)
+def test_groupby_external_series(series):
+    pdf = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pxx = pdf.groupby(pd.Series(series)).x.sum()
+    gxx = gdf.groupby(cudf.Series(series)).x.sum()
+    assert_groupby_results_equal(pxx, gxx)
+
+
+@pytest.mark.parametrize("series", [[0.0, 1.0], [1.0, 1.0, 1.0, 1.0]])
+def test_groupby_external_series_incorrect_length(series):
+    pdf = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pxx = pdf.groupby(pd.Series(series)).x.sum()
+    gxx = gdf.groupby(cudf.Series(series)).x.sum()
+    assert_groupby_results_equal(pxx, gxx)
+
+
+@pytest.mark.parametrize(
+    "level", [0, 1, "a", "b", [0, 1], ["a", "b"], ["a", 1], -1, [-1, -2]]
+)
+def test_groupby_levels(level):
+    idx = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (2, 2)], names=("a", "b"))
+    pdf = pd.DataFrame({"c": [1, 2, 3], "d": [2, 3, 4]}, index=idx)
+    gdf = cudf.from_pandas(pdf)
+    assert_groupby_results_equal(
+        pdf.groupby(level=level).sum(),
+        gdf.groupby(level=level).sum(),
+    )
+
+
+def test_advanced_groupby_levels():
+    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [1, 2, 1], "z": [1, 1, 1]})
+    gdf = cudf.from_pandas(pdf)
+    pdg = pdf.groupby(["x", "y"]).sum()
+    gdg = gdf.groupby(["x", "y"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdh = pdg.groupby(level=1).sum()
+    gdh = gdg.groupby(level=1).sum()
+    assert_groupby_results_equal(pdh, gdh)
+    pdg = pdf.groupby(["x", "y", "z"]).sum()
+    gdg = gdf.groupby(["x", "y", "z"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdg = pdf.groupby(["z"]).sum()
+    gdg = gdf.groupby(["z"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdg = pdf.groupby(["y", "z"]).sum()
+    gdg = gdf.groupby(["y", "z"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdg = pdf.groupby(["x", "z"]).sum()
+    gdg = gdf.groupby(["x", "z"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdg = pdf.groupby(["y"]).sum()
+    gdg = gdf.groupby(["y"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdg = pdf.groupby(["x"]).sum()
+    gdg = gdf.groupby(["x"]).sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdh = pdg.groupby(level=0).sum()
+    gdh = gdg.groupby(level=0).sum()
+    assert_groupby_results_equal(pdh, gdh)
+    pdg = pdf.groupby(["x", "y"]).sum()
+    gdg = gdf.groupby(["x", "y"]).sum()
+    pdh = pdg.groupby(level=[0, 1]).sum()
+    gdh = gdg.groupby(level=[0, 1]).sum()
+    assert_groupby_results_equal(pdh, gdh)
+    pdh = pdg.groupby(level=[1, 0]).sum()
+    gdh = gdg.groupby(level=[1, 0]).sum()
+    assert_groupby_results_equal(pdh, gdh)
+    pdg = pdf.groupby(["x", "y"]).sum()
+    gdg = gdf.groupby(["x", "y"]).sum()
+
+    assert_exceptions_equal(
+        lfunc=pdg.groupby,
+        rfunc=gdg.groupby,
+        lfunc_args_and_kwargs=([], {"level": 2}),
+        rfunc_args_and_kwargs=([], {"level": 2}),
+    )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.groupby(["x", "y", "z"]).sum(),
+        lambda df: df.groupby(["x", "y"]).sum(),
+        lambda df: df.groupby(["x", "y"]).agg("sum"),
+        lambda df: df.groupby(["y"]).sum(),
+        lambda df: df.groupby(["y"]).agg("sum"),
+        lambda df: df.groupby(["x"]).sum(),
+        lambda df: df.groupby(["x"]).agg("sum"),
+        lambda df: df.groupby(["x", "y"]).z.sum(),
+        lambda df: df.groupby(["x", "y"]).z.agg("sum"),
+    ],
+)
+def test_empty_groupby(func):
+    pdf = pd.DataFrame({"x": [], "y": [], "z": []})
+    gdf = cudf.from_pandas(pdf)
+    assert_groupby_results_equal(func(pdf), func(gdf), check_index_type=False)
+
+
+def test_groupby_unsupported_columns():
+    rng = np.random.default_rng(seed=12)
+    pd_cat = pd.Categorical(
+        pd.Series(rng.choice(["a", "b", 1], 3), dtype="category")
+    )
+    pdf = pd.DataFrame(
+        {
+            "x": [1, 2, 3],
+            "y": ["a", "b", "c"],
+            "z": ["d", "e", "f"],
+            "a": [3, 4, 5],
+        }
+    )
+    pdf["b"] = pd_cat
+    gdf = cudf.from_pandas(pdf)
+    pdg = pdf.groupby("x").sum(numeric_only=True)
+    # cudf does not yet support numeric_only, so our default is False (unlike
+    # pandas, which defaults to inferring and throws a warning about it).
+    gdg = gdf.groupby("x").sum(numeric_only=True)
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_list_of_series():
+    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [1, 2, 1]})
+    gdf = cudf.from_pandas(pdf)
+    pdg = pdf.groupby([pdf.x]).y.sum()
+    gdg = gdf.groupby([gdf.x]).y.sum()
+    assert_groupby_results_equal(pdg, gdg)
+    pdg = pdf.groupby([pdf.x, pdf.y]).y.sum()
+    gdg = gdf.groupby([gdf.x, gdf.y]).y.sum()
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_apply_basic_agg_single_column():
+    gdf = cudf.DataFrame(
+        {
+            "key": [0, 0, 1, 1, 2, 2, 0],
+            "val": [0, 1, 2, 3, 4, 5, 6],
+            "mult": [0, 1, 2, 3, 4, 5, 6],
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    gdg = gdf.groupby(["key", "val"]).mult.sum()
+    pdg = pdf.groupby(["key", "val"]).mult.sum()
+    assert_groupby_results_equal(pdg, gdg)
+
+
+def test_groupby_nulls_in_index():
+    pdf = pd.DataFrame({"a": [None, 2, 1, 1], "b": [1, 2, 3, 4]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").sum(), gdf.groupby("a").sum()
+    )
+
+
+def test_groupby_all_nulls_index():
+    gdf = cudf.DataFrame(
+        {
+            "a": cudf.Series([None, None, None, None], dtype="object"),
+            "b": [1, 2, 3, 4],
+        }
+    )
+    pdf = gdf.to_pandas()
+    assert_groupby_results_equal(
+        pdf.groupby("a").sum(), gdf.groupby("a").sum()
+    )
+
+    gdf = cudf.DataFrame(
+        {"a": cudf.Series([np.nan, np.nan, np.nan, np.nan]), "b": [1, 2, 3, 4]}
+    )
+    pdf = gdf.to_pandas()
+    assert_groupby_results_equal(
+        pdf.groupby("a").sum(), gdf.groupby("a").sum()
+    )
+
+
+@pytest.mark.parametrize("sort", [True, False])
+def test_groupby_sort(sort):
+    pdf = pd.DataFrame({"a": [2, 2, 1, 1], "b": [1, 2, 3, 4]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a", sort=sort).sum(),
+        gdf.groupby("a", sort=sort).sum(),
+        check_like=not sort,
+    )
+
+    pdf = pd.DataFrame(
+        {"c": [-1, 2, 1, 4], "b": [1, 2, 3, 4], "a": [2, 2, 1, 1]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby(["c", "b"], sort=sort).sum(),
+        gdf.groupby(["c", "b"], sort=sort).sum(),
+        check_like=not sort,
+    )
+
+    ps = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=[2, 2, 2, 3, 3, 1, 1, 1])
+    gs = cudf.from_pandas(ps)
+
+    assert_groupby_results_equal(
+        ps.groupby(level=0, sort=sort).sum().to_frame(),
+        gs.groupby(level=0, sort=sort).sum().to_frame(),
+        check_like=not sort,
+    )
+
+    ps = pd.Series(
+        [1, 2, 3, 4, 5, 6, 7, 8],
+        index=pd.MultiIndex.from_product([(1, 2), ("a", "b"), (42, 84)]),
+    )
+    gs = cudf.from_pandas(ps)
+
+    assert_groupby_results_equal(
+        ps.groupby(level=0, sort=sort).sum().to_frame(),
+        gs.groupby(level=0, sort=sort).sum().to_frame(),
+        check_like=not sort,
+    )
+
+
+def test_groupby_cat():
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 2], "b": pd.Series(["b", "b", "a"], dtype="category")}
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_groupby_results_equal(
+        pdf.groupby("a").count(),
+        gdf.groupby("a").count(),
+        check_dtype=False,
+    )
+
+
+def test_groupby_index_type():
+    df = cudf.DataFrame()
+    df["string_col"] = ["a", "b", "c"]
+    df["counts"] = [1, 2, 3]
+    res = df.groupby(by="string_col").counts.sum()
+    assert res.index.dtype == cudf.dtype("object")
+
+
+@pytest.mark.parametrize(
+    "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
+)
+@pytest.mark.parametrize("q", [0.25, 0.4, 0.5, 0.7, 1])
+def test_groupby_quantile(request, interpolation, q):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(q == 0.5 and interpolation == "nearest"),
+            reason=(
+                "Pandas NaN Rounding will fail nearest interpolation at 0.5"
+            ),
+        )
+    )
+
+    raw_data = {
+        "y": [None, 1, 2, 3, 4, None, 6, 7, 8, 9],
+        "x": [1, 2, 3, 1, 2, 2, 1, None, 3, 2],
+    }
+    # Pandas>0.25 now casts NaN in quantile operations as a float64
+    # # so we are filling with zeros.
+    pdf = pd.DataFrame(raw_data).fillna(0)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pdg = pdf.groupby("x")
+    gdg = gdf.groupby("x")
+
+    pdresult = pdg.quantile(q, interpolation=interpolation)
+    gdresult = gdg.quantile(q, interpolation=interpolation)
+
+    assert_groupby_results_equal(pdresult, gdresult)
+
+
+def test_groupby_std():
+    raw_data = {
+        "x": [1, 2, 3, 1, 2, 2, 1, None, 3, 2],
+        "y": [None, 1, 2, 3, 4, None, 6, 7, 8, 9],
+    }
+    pdf = pd.DataFrame(raw_data)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdg = pdf.groupby("x")
+    gdg = gdf.groupby("x")
+    pdresult = pdg.std()
+    gdresult = gdg.std()
+
+    assert_groupby_results_equal(pdresult, gdresult)
+
+
+def test_groupby_size():
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 3, 4],
+            "b": ["bob", "bob", "alice", "cooper"],
+            "c": [1, 2, 3, 4],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").size(),
+        gdf.groupby("a").size(),
+        check_dtype=False,
+    )
+
+    assert_groupby_results_equal(
+        pdf.groupby(["a", "b", "c"]).size(),
+        gdf.groupby(["a", "b", "c"]).size(),
+        check_dtype=False,
+    )
+
+    sr = pd.Series(range(len(pdf)))
+    assert_groupby_results_equal(
+        pdf.groupby(sr).size(),
+        gdf.groupby(sr).size(),
+        check_dtype=False,
+    )
+
+
+def test_groupby_datetime(request, as_index, groupby_reduction_methods):
+    pdf = pd.DataFrame(
+        {
+            "x": [1, 2, 3],
+            "y": [4, 5, 6],
+            "val": [7, 8, 9],
+            "datetime": pd.date_range("2020-01-01", periods=3),
+        }
+    )
+    gdf = cudf.DataFrame(pdf)
+    pdg = pdf.groupby("datetime", as_index=as_index)
+    gdg = gdf.groupby("datetime", as_index=as_index)
+    pdres = getattr(pdg, groupby_reduction_methods)()
+    gdres = getattr(gdg, groupby_reduction_methods)()
+    assert_groupby_results_equal(
+        pdres,
+        gdres,
+        as_index=as_index,
+        by=["datetime"],
+    )
+
+
+def test_groupby_dropna():
+    df = cudf.DataFrame({"a": [1, 1, None], "b": [1, 2, 3]})
+    expect = cudf.DataFrame(
+        {"b": [3, 3]}, index=cudf.Series([1, None], name="a")
+    )
+    got = df.groupby("a", dropna=False).sum()
+    assert_groupby_results_equal(expect, got)
+
+    df = cudf.DataFrame(
+        {"a": [1, 1, 1, None], "b": [1, None, 1, None], "c": [1, 2, 3, 4]}
+    )
+    idx = cudf.MultiIndex.from_frame(
+        df[["a", "b"]].drop_duplicates().sort_values(["a", "b"]),
+        names=["a", "b"],
+    )
+    expect = cudf.DataFrame({"c": [4, 2, 4]}, index=idx)
+    got = df.groupby(["a", "b"], dropna=False).sum()
+
+    assert_groupby_results_equal(expect, got)
+
+
+def test_groupby_dropna_getattr():
+    df = cudf.DataFrame()
+    df["id"] = [0, 1, 1, None, None, 3, 3]
+    df["val"] = [0, 1, 1, 2, 2, 3, 3]
+    got = df.groupby("id", dropna=False).val.sum()
+
+    expect = cudf.Series(
+        [0, 2, 6, 4], name="val", index=cudf.Series([0, 1, 3, None], name="id")
+    )
+
+    assert_groupby_results_equal(expect, got)
+
+
+def test_groupby_categorical_from_string():
+    gdf = cudf.DataFrame()
+    gdf["id"] = ["a", "b", "c"]
+    gdf["val"] = [0, 1, 2]
+    gdf["id"] = gdf["id"].astype("category")
+    assert_groupby_results_equal(
+        cudf.DataFrame({"val": gdf["val"]}).set_index(keys=gdf["id"]),
+        gdf.groupby("id").sum(),
+    )
+
+
+def test_groupby_arbitrary_length_series():
+    gdf = cudf.DataFrame({"a": [1, 1, 2], "b": [2, 3, 4]}, index=[4, 5, 6])
+    gsr = cudf.Series([1.0, 2.0, 2.0], index=[3, 4, 5])
+
+    pdf = gdf.to_pandas()
+    psr = gsr.to_pandas()
+
+    expect = pdf.groupby(psr).sum()
+    got = gdf.groupby(gsr).sum()
+
+    assert_groupby_results_equal(expect, got)
+
+
+def test_groupby_series_same_name_as_dataframe_column():
+    gdf = cudf.DataFrame({"a": [1, 1, 2], "b": [2, 3, 4]}, index=[4, 5, 6])
+    gsr = cudf.Series([1.0, 2.0, 2.0], name="a", index=[3, 4, 5])
+
+    pdf = gdf.to_pandas()
+    psr = gsr.to_pandas()
+
+    expect = pdf.groupby(psr).sum()
+    got = gdf.groupby(gsr).sum()
+
+    assert_groupby_results_equal(expect, got)
+
+
+def test_group_by_series_and_column_name_in_by():
+    gdf = cudf.DataFrame(
+        {"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]}, index=[1, 2, 3]
+    )
+    gsr0 = cudf.Series([0.0, 1.0, 2.0], name="a", index=[1, 2, 3])
+    gsr1 = cudf.Series([0.0, 1.0, 3.0], name="b", index=[3, 4, 5])
+
+    pdf = gdf.to_pandas()
+    psr0 = gsr0.to_pandas()
+    psr1 = gsr1.to_pandas()
+
+    expect = pdf.groupby(["x", psr0, psr1]).sum()
+    got = gdf.groupby(["x", gsr0, gsr1]).sum()
+
+    assert_groupby_results_equal(expect, got)
+
+
+def test_raise_data_error():
+    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_exceptions_equal(
+        pdf.groupby("a").mean,
+        gdf.groupby("a").mean,
+    )
+
+
+def test_reset_index_after_empty_groupby():
+    # GH #5475
+    pdf = pd.DataFrame({"a": [1, 2, 3]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_groupby_results_equal(
+        pdf.groupby("a").sum().reset_index(),
+        gdf.groupby("a").sum().reset_index(),
+        as_index=False,
+        by="a",
+    )
+
+
+def test_groupby_attribute_error():
+    err_msg = "Test error message"
+
+    class TestGroupBy(cudf.core.groupby.GroupBy):
+        @property
+        def _groupby(self):
+            raise AttributeError(err_msg)
+
+    a = cudf.DataFrame({"a": [1, 2], "b": [2, 3]})
+    gb = TestGroupBy(a, a["a"])
+
+    with pytest.raises(AttributeError, match=err_msg):
+        gb.sum()
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
+)
+def test_groupby_no_keys(pdf):
+    gdf = cudf.from_pandas(pdf)
+    if isinstance(pdf, pd.DataFrame):
+        kwargs = {"check_column_type": False}
+    else:
+        kwargs = {}
+    assert_groupby_results_equal(
+        pdf.groupby([]).max(),
+        gdf.groupby([]).max(),
+        check_dtype=False,
+        check_index_type=False,  # Int64 v/s Float64
+        **kwargs,
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_shift.py b/python/cudf/cudf/tests/groupby/test_shift.py
new file mode 100644
index 00000000000..edb48826138
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_shift.py
@@ -0,0 +1,204 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+from cudf.testing.dataset_generator import rand_dataframe
+
+
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+@pytest.mark.parametrize("fill_value", [None, np.nan, 42])
+def test_groupby_shift_row(shift_perc, direction, fill_value):
+    nelem = 20
+    pdf = pd.DataFrame(np.ones((nelem, 3)), columns=["x", "y", "val"])
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["x", "y"]).shift(
+        periods=n_shift, fill_value=fill_value
+    )
+    got = gdf.groupby(["x", "y"]).shift(periods=n_shift, fill_value=fill_value)
+
+    assert_groupby_results_equal(expected, got)
+
+
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        None,
+        pytest.param(
+            0,
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/10608"
+            ),
+        ),
+        pytest.param(
+            42,
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/10608"
+            ),
+        ),
+    ],
+)
+def test_groupby_shift_row_mixed_numerics(shift_perc, direction, fill_value):
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
+    got = gdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
+
+    assert_groupby_results_equal(expected, got)
+
+
+# TODO: Shifting list columns is currently unsupported because we cannot
+# construct a null list scalar in python. Support once it is added.
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+def test_groupby_shift_row_mixed(shift_perc, direction):
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["0"]).shift(periods=n_shift)
+    got = gdf.groupby(["0"]).shift(periods=n_shift)
+
+    assert_groupby_results_equal(expected, got)
+
+
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        [
+            42,
+            "fill",
+            np.datetime64(123, "ns"),
+            np.timedelta64(456, "ns"),
+        ]
+    ],
+)
+def test_groupby_shift_row_mixed_fill(shift_perc, direction, fill_value):
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    # Pandas does not support specifying different fill_value by column, so we
+    # simulate it column by column
+    expected = pdf.copy()
+    for col, single_fill in zip(pdf.iloc[:, 1:], fill_value, strict=True):
+        expected[col] = (
+            pdf[col]
+            .groupby(pdf["0"])
+            .shift(periods=n_shift, fill_value=single_fill)
+        )
+
+    got = gdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
+
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )
+
+
+@pytest.mark.parametrize("fill_value", [None, 0, 42])
+def test_groupby_shift_row_zero_shift(fill_value):
+    nelem = 20
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+        seed=0,
+    )
+    gdf = cudf.from_pandas(t.to_pandas())
+
+    expected = gdf
+    got = gdf.groupby(["0"]).shift(periods=0, fill_value=fill_value)
+
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_unique.py b/python/cudf/cudf/tests/groupby/test_unique.py
new file mode 100644
index 00000000000..e515531e478
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_unique.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+@pytest.mark.parametrize(
+    "by,data",
+    [
+        ([], []),
+        ([1, 1, 2, 2], [0, 0, 1, 1]),
+        ([1, 2, 3, 4], [0, 0, 0, 0]),
+        ([1, 2, 1, 2], [0, 1, 1, 1]),
+    ],
+)
+def test_groupby_unique(by, data, all_supported_types_as_str, request):
+    pdf = pd.DataFrame({"by": by, "data": data})
+    pdf["data"] = pdf["data"].astype(all_supported_types_as_str)
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.groupby("by")["data"].unique()
+    got = gdf.groupby("by")["data"].unique()
+    request.applymarker(
+        pytest.mark.xfail(
+            len(by) == 0 and all_supported_types_as_str == "category",
+            reason="pandas returns Categorical, cuDF returns np.ndarray",
+        )
+    )
+    assert_groupby_results_equal(expect, got, check_dtype=len(by) > 0)
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 552ac748e3e..5cab96d3db9 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1,19 +1,15 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 import collections
-import datetime
 import itertools
 import operator
 import string
 import textwrap
-from decimal import Decimal
 from functools import partial
 
 import numpy as np
 import pandas as pd
 import pytest
-from numba import cuda
-from numpy.testing import assert_array_equal
 
 import cudf
 from cudf import DataFrame, Series
@@ -28,13 +24,8 @@
 from cudf.core.udf.utils import UDFError, precompiled
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
-    DATETIME_TYPES,
-    SIGNED_TYPES,
-    TIMEDELTA_TYPES,
-    assert_exceptions_equal,
     expect_warning_if,
 )
-from cudf.testing.dataset_generator import rand_dataframe
 
 _now = np.datetime64("now")
 _tomorrow = _now + np.timedelta64(1, "D")
@@ -106,310 +97,6 @@ def pdf(gdf):
     return gdf.to_pandas()
 
 
-def test_groupby_mean():
-    nelem = 20
-    got_df = make_frame(DataFrame, nelem=nelem).groupby(["x", "y"]).mean()
-    expect_df = (
-        make_frame(pd.DataFrame, nelem=nelem).groupby(["x", "y"]).mean()
-    )
-    assert_groupby_results_equal(got_df, expect_df)
-
-
-def test_groupby_mean_3level():
-    nelem = 20
-    lvls = "z"
-    bys = list("xyz")
-    got_df = (
-        make_frame(DataFrame, nelem=nelem, extra_levels=lvls)
-        .groupby(bys)
-        .mean()
-    )
-    expect_df = (
-        make_frame(pd.DataFrame, nelem=nelem, extra_levels=lvls)
-        .groupby(bys)
-        .mean()
-    )
-    assert_groupby_results_equal(got_df, expect_df)
-
-
-def test_groupby_agg_mean_min():
-    nelem = 20
-    got_df = (
-        make_frame(DataFrame, nelem=nelem)
-        .groupby(["x", "y"])
-        .agg(["mean", "min"])
-    )
-    expect_df = (
-        make_frame(pd.DataFrame, nelem=nelem)
-        .groupby(["x", "y"])
-        .agg(["mean", "min"])
-    )
-    assert_groupby_results_equal(got_df, expect_df)
-
-
-def test_groupby_agg_min_max_dictargs():
-    nelem = 20
-    expect_df = (
-        make_frame(pd.DataFrame, nelem=nelem, extra_vals="ab")
-        .groupby(["x", "y"])
-        .agg({"a": "min", "b": "max"})
-    )
-    got_df = (
-        make_frame(DataFrame, nelem=nelem, extra_vals="ab")
-        .groupby(["x", "y"])
-        .agg({"a": "min", "b": "max"})
-    )
-    assert_groupby_results_equal(expect_df, got_df)
-
-
-def test_groupby_agg_min_max_dictlist():
-    nelem = 20
-    expect_df = (
-        make_frame(pd.DataFrame, nelem=nelem, extra_vals="ab")
-        .groupby(["x", "y"])
-        .agg({"a": ["min", "max"], "b": ["min", "max"]})
-    )
-    got_df = (
-        make_frame(DataFrame, nelem=nelem, extra_vals="ab")
-        .groupby(["x", "y"])
-        .agg({"a": ["min", "max"], "b": ["min", "max"]})
-    )
-    assert_groupby_results_equal(got_df, expect_df)
-
-
-@pytest.mark.parametrize("as_index", [True, False])
-def test_groupby_as_index_single_agg(pdf, gdf, as_index):
-    gdf = gdf.groupby("y", as_index=as_index).agg({"x": "mean"})
-    pdf = pdf.groupby("y", as_index=as_index).agg({"x": "mean"})
-    assert_groupby_results_equal(pdf, gdf, as_index=as_index, by="y")
-
-
-@pytest.mark.parametrize("engine", ["cudf", "jit"])
-@pytest.mark.parametrize("as_index", [True, False])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_as_index_apply(pdf, gdf, as_index, engine):
-    gdf = gdf.groupby("y", as_index=as_index).apply(
-        lambda df: df["x"].mean(), engine=engine
-    )
-    kwargs = {"func": lambda df: df["x"].mean(), "include_groups": False}
-    pdf = pdf.groupby("y", as_index=as_index).apply(**kwargs)
-    assert_groupby_results_equal(pdf, gdf, as_index=as_index, by="y")
-
-
-@pytest.mark.parametrize("as_index", [True, False])
-def test_groupby_as_index_multiindex(pdf, gdf, as_index):
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 1], "b": [3, 3, 3], "c": [2, 2, 3], "d": [3, 1, 2]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    gdf = gdf.groupby(["a", "b"], as_index=as_index, sort=True).agg(
-        {"c": "mean"}
-    )
-    pdf = pdf.groupby(["a", "b"], as_index=as_index, sort=True).agg(
-        {"c": "mean"}
-    )
-
-    if as_index:
-        assert_eq(pdf, gdf)
-    else:
-        # column names don't match - check just the values
-        for gcol, pcol in zip(gdf, pdf, strict=True):
-            assert_array_equal(gdf[gcol].to_numpy(), pdf[pcol].values)
-
-
-def test_groupby_default(pdf, gdf):
-    gdf = gdf.groupby("y").agg({"x": "mean"})
-    pdf = pdf.groupby("y").agg({"x": "mean"})
-    assert_groupby_results_equal(pdf, gdf)
-
-
-def test_group_keys_true(pdf, gdf):
-    gdf = gdf.groupby("y", group_keys=True).sum()
-    pdf = pdf.groupby("y", group_keys=True).sum()
-    assert_groupby_results_equal(pdf, gdf)
-
-
-@pytest.mark.parametrize("as_index", [True, False])
-def test_groupby_getitem_getattr(as_index):
-    pdf = pd.DataFrame({"x": [1, 3, 1], "y": [1, 2, 3], "z": [1, 4, 5]})
-    gdf = cudf.from_pandas(pdf)
-    assert_groupby_results_equal(
-        pdf.groupby("x", as_index=as_index)["y"].sum(),
-        gdf.groupby("x", as_index=as_index)["y"].sum(),
-        as_index=as_index,
-        by="x",
-    )
-    assert_groupby_results_equal(
-        pdf.groupby("x", as_index=as_index).y.sum(),
-        gdf.groupby("x", as_index=as_index).y.sum(),
-        as_index=as_index,
-        by="x",
-    )
-    assert_groupby_results_equal(
-        pdf.groupby("x", as_index=as_index)[["y"]].sum(),
-        gdf.groupby("x", as_index=as_index)[["y"]].sum(),
-        as_index=as_index,
-        by="x",
-    )
-    assert_groupby_results_equal(
-        pdf.groupby(["x", "y"], as_index=as_index).sum(),
-        gdf.groupby(["x", "y"], as_index=as_index).sum(),
-        as_index=as_index,
-        by=["x", "y"],
-    )
-
-
-def test_groupby_cats():
-    rng = np.random.default_rng(seed=0)
-    df = DataFrame(
-        {"cats": pd.Categorical(list("aabaacaab")), "vals": rng.random(9)}
-    )
-
-    cats = df["cats"].values_host
-    vals = df["vals"].to_numpy()
-
-    grouped = df.groupby(["cats"], as_index=False).mean()
-
-    got_vals = grouped["vals"]
-
-    got_cats = grouped["cats"]
-
-    for i in range(len(got_vals)):
-        expect = vals[cats == got_cats[i]].mean()
-        np.testing.assert_almost_equal(got_vals[i], expect)
-
-
-def test_groupby_iterate_groups():
-    rng = np.random.default_rng(seed=0)
-    nelem = 20
-    df = DataFrame(
-        {
-            "key1": rng.integers(0, 3, nelem),
-            "key2": rng.integers(0, 2, nelem),
-            "val1": rng.random(nelem),
-            "val2": rng.random(nelem),
-        }
-    )
-
-    def assert_values_equal(arr):
-        np.testing.assert_array_equal(arr[0], arr)
-
-    for name, grp in df.groupby(["key1", "key2"]):
-        pddf = grp.to_pandas()
-        for k in "key1,key2".split(","):
-            assert_values_equal(pddf[k].values)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply():
-    rng = np.random.default_rng(seed=0)
-    nelem = 20
-    df = DataFrame(
-        {
-            "key1": rng.integers(0, 3, nelem),
-            "key2": rng.integers(0, 2, nelem),
-            "val1": rng.random(nelem),
-            "val2": rng.random(nelem),
-        }
-    )
-
-    expect_grpby = df.to_pandas().groupby(
-        ["key1", "key2"], as_index=False, group_keys=False
-    )
-    got_grpby = df.groupby(["key1", "key2"])
-
-    def foo(df):
-        df["out"] = df["val1"] + df["val2"]
-        return df
-
-    expect = expect_grpby.apply(foo, include_groups=False)
-    got = got_grpby.apply(foo, include_groups=False)
-    assert_groupby_results_equal(expect, got)
-
-
-def f1(df, k):
-    df["out"] = df["val1"] + df["val2"] + k
-    return df
-
-
-def f2(df, k, L):
-    df["out"] = df["val1"] - df["val2"] + (k / L)
-    return df
-
-
-def f3(df, k, L, m):
-    df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m
-    return df
-
-
-@pytest.mark.parametrize(
-    "func,args", [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_args(func, args):
-    rng = np.random.default_rng(seed=0)
-    nelem = 20
-    df = DataFrame(
-        {
-            "key1": rng.integers(0, 3, nelem),
-            "key2": rng.integers(0, 2, nelem),
-            "val1": rng.random(nelem),
-            "val2": rng.random(nelem),
-        }
-    )
-
-    expect_grpby = df.to_pandas().groupby(
-        ["key1", "key2"], as_index=False, group_keys=False
-    )
-    got_grpby = df.groupby(["key1", "key2"])
-    expect = expect_grpby.apply(func, *args, include_groups=False)
-    got = got_grpby.apply(func, *args, include_groups=False)
-    assert_groupby_results_equal(expect, got)
-
-
-def test_groupby_apply_grouped():
-    df = DataFrame()
-    nelem = 20
-    df["key1"] = range(nelem)
-    df["key2"] = range(nelem)
-    df["val1"] = range(nelem)
-    df["val2"] = range(nelem)
-
-    got_grpby = df.groupby(["key1", "key2"])
-
-    def foo(key1, val1, com1, com2):
-        for i in range(cuda.threadIdx.x, len(key1), cuda.blockDim.x):
-            com1[i] = key1[i] * 10000 + val1[i]
-            com2[i] = i
-
-    got = got_grpby.apply_grouped(
-        foo,
-        incols=["key1", "val1"],
-        outcols={"com1": np.float64, "com2": np.int32},
-        tpb=8,
-    )
-
-    got = got.to_pandas()
-
-    expect = df.copy()
-    expect["com1"] = (expect["key1"] * 10000 + expect["key1"]).astype(
-        np.float64
-    )
-    expect["com2"] = np.zeros(nelem, dtype=np.int32)
-
-    assert_groupby_results_equal(expect, got)
-
-
 @pytest.fixture(scope="module")
 def groupby_jit_data_small():
     """
@@ -1000,1440 +687,150 @@ def pdf_func(df):
     assert_groupby_results_equal(expect, got)
 
 
-@pytest.mark.parametrize(
-    "func",
-    [
-        "mean",
-        "std",
-        "var",
-        "min",
-        "max",
-        "idxmin",
-        "idxmax",
-        "count",
-        "sum",
-        "prod",
-    ],
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
 )
-def test_groupby_2keys_agg(func):
-    # gdf (Note: lack of multiIndex)
-    nelem = 20
-    expect_df = (
-        make_frame(pd.DataFrame, nelem=nelem).groupby(["x", "y"]).agg(func)
+def test_groupby_apply_noempty_group():
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 2, 2], "b": [1, 2, 1, 2], "c": [1, 2, 3, 4]}
     )
-    got_df = make_frame(DataFrame, nelem=nelem).groupby(["x", "y"]).agg(func)
+    gdf = cudf.from_pandas(pdf)
 
-    check_dtype = func not in _index_type_aggs
-    assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
+    expect = (
+        pdf.groupby("a", group_keys=False)
+        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
+        .reset_index(drop=True)
+    )
+    got = (
+        gdf.groupby("a")
+        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
+        .reset_index(drop=True)
+    )
+    assert_groupby_results_equal(expect, got)
 
 
-@pytest.mark.parametrize("num_groups", [2, 20])
-@pytest.mark.parametrize("nelem_per_group", [1, 10])
-@pytest.mark.parametrize(
-    "func",
-    ["min", "max", "count", "sum"],
-    # TODO: Replace the above line with the one below once
-    # https://github.com/pandas-dev/pandas/issues/40685 is resolved.
-    # "func", ["min", "max", "idxmin", "idxmax", "count", "sum"],
-)
-def test_groupby_agg_decimal(num_groups, nelem_per_group, func):
-    rng = np.random.default_rng(seed=0)
-    # The number of digits after the decimal to use.
-    decimal_digits = 2
-    # The number of digits before the decimal to use.
-    whole_digits = 2
-
-    scale = 10**whole_digits
-    nelem = num_groups * nelem_per_group
-
-    # The unique is necessary because otherwise if there are duplicates idxmin
-    # and idxmax may return different results than pandas (see
-    # https://github.com/rapidsai/cudf/issues/7756). This is not relevant to
-    # the current version of the test, because idxmin and idxmax simply don't
-    # work with pandas Series composed of Decimal objects (see
-    # https://github.com/pandas-dev/pandas/issues/40685). However, if that is
-    # ever enabled, then this issue will crop up again so we may as well have
-    # it fixed now.
-    x = np.unique((rng.random(nelem) * scale).round(decimal_digits))
-    y = np.unique((rng.random(nelem) * scale).round(decimal_digits))
-
-    if x.size < y.size:
-        total_elements = x.size
-        y = y[: x.size]
-    else:
-        total_elements = y.size
-        x = x[: y.size]
+def create_test_groupby_apply_return_scalars_params():
+    def f0(x):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = ticker / 10
+        return full
 
-    # Note that this filtering can lead to one group with fewer elements, but
-    # that shouldn't be a problem and is probably useful to test.
-    idx_col = np.tile(np.arange(num_groups), nelem_per_group)[:total_elements]
+    def f1(x, k):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = ticker / k
+        return full
 
-    decimal_x = pd.Series([Decimal(str(d)) for d in x])
-    decimal_y = pd.Series([Decimal(str(d)) for d in y])
+    def f2(x, k, L):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = L * (ticker / k)
+        return full
 
-    pdf = pd.DataFrame({"idx": idx_col, "x": decimal_x, "y": decimal_y})
-    gdf = DataFrame(
-        {
-            "idx": idx_col,
-            "x": cudf.Series(decimal_x),
-            "y": cudf.Series(decimal_y),
-        }
-    )
+    def f3(x, k, L, m):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = L * (ticker / k) % m
+        return full
 
-    expect_df = pdf.groupby("idx", sort=True).agg(func)
-    got_df = gdf.groupby("idx", sort=True).agg(func)
-    assert_eq(expect_df["x"], got_df["x"], check_dtype=False)
-    assert_eq(expect_df["y"], got_df["y"], check_dtype=False)
+    return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
 
 
 @pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"]
+    "func,args", create_test_groupby_apply_return_scalars_params()
 )
-def test_series_groupby(agg):
-    s = pd.Series([1, 2, 3])
-    g = Series([1, 2, 3])
-    sg = s.groupby(s // 2)
-    gg = g.groupby(g // 2)
-    sa = getattr(sg, agg)()
-    ga = getattr(gg, agg)()
-    check_dtype = agg not in _index_type_aggs
-    assert_groupby_results_equal(sa, ga, check_dtype=check_dtype)
-
-
-@pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"]
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
 )
-def test_series_groupby_agg(agg):
-    s = pd.Series([1, 2, 3])
-    g = Series([1, 2, 3])
-    sg = s.groupby(s // 2).agg(agg)
-    gg = g.groupby(g // 2).agg(agg)
-    check_dtype = agg not in _index_type_aggs
-    assert_groupby_results_equal(sg, gg, check_dtype=check_dtype)
+def test_groupby_apply_return_scalars(func, args):
+    pdf = pd.DataFrame(
+        {
+            "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
+            "B": [
+                0.01,
+                np.nan,
+                0.03,
+                0.04,
+                np.nan,
+                0.06,
+                0.07,
+                0.08,
+                0.09,
+                1.0,
+            ],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
 
+    expected = pdf.groupby("A").apply(func, *args, include_groups=False)
+    actual = gdf.groupby("A").apply(func, *args, include_groups=False)
 
-@pytest.mark.parametrize(
-    "agg",
-    [
-        "min",
-        "max",
-        "count",
-        "sum",
-        "prod",
-        "mean",
-        pytest.param(
-            "idxmin",
-            marks=pytest.mark.xfail(reason="gather needed for idxmin"),
-        ),
-        pytest.param(
-            "idxmax",
-            marks=pytest.mark.xfail(reason="gather needed for idxmax"),
-        ),
-    ],
-)
-def test_groupby_level_zero(agg):
-    pdf = pd.DataFrame({"x": [1, 2, 3]}, index=[2, 5, 5])
-    gdf = DataFrame.from_pandas(pdf)
-    pdg = pdf.groupby(level=0)
-    gdg = gdf.groupby(level=0)
-    pdresult = getattr(pdg, agg)()
-    gdresult = getattr(gdg, agg)()
-    check_dtype = agg not in _index_type_aggs
-    assert_groupby_results_equal(pdresult, gdresult, check_dtype=check_dtype)
+    assert_groupby_results_equal(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "agg",
-    [
-        "min",
-        "max",
-        "count",
-        "sum",
-        "prod",
-        "mean",
-        pytest.param(
-            "idxmin",
-            marks=pytest.mark.xfail(reason="gather needed for idxmin"),
-        ),
-        pytest.param(
-            "idxmax",
-            marks=pytest.mark.xfail(reason="gather needed for idxmax"),
-        ),
-    ],
-)
-def test_groupby_series_level_zero(agg):
-    pdf = pd.Series([1, 2, 3], index=[2, 5, 5])
-    gdf = Series.from_pandas(pdf)
-    pdg = pdf.groupby(level=0)
-    gdg = gdf.groupby(level=0)
-    pdresult = getattr(pdg, agg)()
-    gdresult = getattr(gdg, agg)()
-    check_dtype = agg not in _index_type_aggs
-    assert_groupby_results_equal(pdresult, gdresult, check_dtype=check_dtype)
-
-
-def test_groupby_column_name():
-    pdf = pd.DataFrame({"xx": [1.0, 2.0, 3.0], "yy": [1, 2, 3]})
-    gdf = DataFrame.from_pandas(pdf)
-    g = gdf.groupby("yy")
-    p = pdf.groupby("yy")
-    gxx = g["xx"].sum()
-    pxx = p["xx"].sum()
-    assert_groupby_results_equal(pxx, gxx)
-
-    gxx = g["xx"].count()
-    pxx = p["xx"].count()
-    assert_groupby_results_equal(pxx, gxx, check_dtype=False)
-
-    gxx = g["xx"].min()
-    pxx = p["xx"].min()
-    assert_groupby_results_equal(pxx, gxx)
-
-    gxx = g["xx"].max()
-    pxx = p["xx"].max()
-    assert_groupby_results_equal(pxx, gxx)
-
-    gxx = g["xx"].idxmin()
-    pxx = p["xx"].idxmin()
-    assert_groupby_results_equal(pxx, gxx, check_dtype=False)
-
-    gxx = g["xx"].idxmax()
-    pxx = p["xx"].idxmax()
-    assert_groupby_results_equal(pxx, gxx, check_dtype=False)
-
-    gxx = g["xx"].mean()
-    pxx = p["xx"].mean()
-    assert_groupby_results_equal(pxx, gxx)
-
-
-def test_groupby_column_numeral():
-    pdf = pd.DataFrame({0: [1.0, 2.0, 3.0], 1: [1, 2, 3]})
-    gdf = DataFrame.from_pandas(pdf)
-    p = pdf.groupby(1)
-    g = gdf.groupby(1)
-    pxx = p[0].sum()
-    gxx = g[0].sum()
-    assert_groupby_results_equal(pxx, gxx)
-
-    pdf = pd.DataFrame({0.5: [1.0, 2.0, 3.0], 1.5: [1, 2, 3]})
-    gdf = DataFrame.from_pandas(pdf)
-    p = pdf.groupby(1.5)
-    g = gdf.groupby(1.5)
-    pxx = p[0.5].sum()
-    gxx = g[0.5].sum()
-    assert_groupby_results_equal(pxx, gxx)
+def create_test_groupby_apply_return_series_dataframe_params():
+    def f0(x):
+        return x - x.max()
 
+    def f1(x):
+        return x.min() - x.max()
 
-@pytest.mark.parametrize(
-    "series",
-    [
-        [0, 1, 0],
-        [1, 1, 1],
-        [0, 1, 1],
-        [1, 2, 3],
-        [4, 3, 2],
-        [0, 2, 0],
-        pd.Series([0, 2, 0]),
-        pd.Series([0, 2, 0], index=[0, 2, 1]),
-    ],
-)
-def test_groupby_external_series(series):
-    pdf = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]})
-    gdf = DataFrame.from_pandas(pdf)
-    pxx = pdf.groupby(pd.Series(series)).x.sum()
-    gxx = gdf.groupby(cudf.Series(series)).x.sum()
-    assert_groupby_results_equal(pxx, gxx)
+    def f2(x):
+        return x.min()
 
+    def f3(x, k):
+        return x - x.max() + k
 
-@pytest.mark.parametrize("series", [[0.0, 1.0], [1.0, 1.0, 1.0, 1.0]])
-def test_groupby_external_series_incorrect_length(series):
-    pdf = pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]})
-    gdf = DataFrame.from_pandas(pdf)
-    pxx = pdf.groupby(pd.Series(series)).x.sum()
-    gxx = gdf.groupby(cudf.Series(series)).x.sum()
-    assert_groupby_results_equal(pxx, gxx)
+    def f4(x, k, L):
+        return x.min() - x.max() + (k / L)
 
+    def f5(x, k, L, m):
+        return m * x.min() + (k / L)
 
-@pytest.mark.parametrize(
-    "level", [0, 1, "a", "b", [0, 1], ["a", "b"], ["a", 1], -1, [-1, -2]]
-)
-def test_groupby_levels(level):
-    idx = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (2, 2)], names=("a", "b"))
-    pdf = pd.DataFrame({"c": [1, 2, 3], "d": [2, 3, 4]}, index=idx)
-    gdf = cudf.from_pandas(pdf)
-    assert_groupby_results_equal(
-        pdf.groupby(level=level).sum(),
-        gdf.groupby(level=level).sum(),
-    )
-
-
-def test_advanced_groupby_levels():
-    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [1, 2, 1], "z": [1, 1, 1]})
-    gdf = cudf.from_pandas(pdf)
-    pdg = pdf.groupby(["x", "y"]).sum()
-    gdg = gdf.groupby(["x", "y"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdh = pdg.groupby(level=1).sum()
-    gdh = gdg.groupby(level=1).sum()
-    assert_groupby_results_equal(pdh, gdh)
-    pdg = pdf.groupby(["x", "y", "z"]).sum()
-    gdg = gdf.groupby(["x", "y", "z"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdg = pdf.groupby(["z"]).sum()
-    gdg = gdf.groupby(["z"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdg = pdf.groupby(["y", "z"]).sum()
-    gdg = gdf.groupby(["y", "z"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdg = pdf.groupby(["x", "z"]).sum()
-    gdg = gdf.groupby(["x", "z"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdg = pdf.groupby(["y"]).sum()
-    gdg = gdf.groupby(["y"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdg = pdf.groupby(["x"]).sum()
-    gdg = gdf.groupby(["x"]).sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdh = pdg.groupby(level=0).sum()
-    gdh = gdg.groupby(level=0).sum()
-    assert_groupby_results_equal(pdh, gdh)
-    pdg = pdf.groupby(["x", "y"]).sum()
-    gdg = gdf.groupby(["x", "y"]).sum()
-    pdh = pdg.groupby(level=[0, 1]).sum()
-    gdh = gdg.groupby(level=[0, 1]).sum()
-    assert_groupby_results_equal(pdh, gdh)
-    pdh = pdg.groupby(level=[1, 0]).sum()
-    gdh = gdg.groupby(level=[1, 0]).sum()
-    assert_groupby_results_equal(pdh, gdh)
-    pdg = pdf.groupby(["x", "y"]).sum()
-    gdg = gdf.groupby(["x", "y"]).sum()
-
-    assert_exceptions_equal(
-        lfunc=pdg.groupby,
-        rfunc=gdg.groupby,
-        lfunc_args_and_kwargs=([], {"level": 2}),
-        rfunc_args_and_kwargs=([], {"level": 2}),
-    )
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda df: df.groupby(["x", "y", "z"]).sum(),
-        lambda df: df.groupby(["x", "y"]).sum(),
-        lambda df: df.groupby(["x", "y"]).agg("sum"),
-        lambda df: df.groupby(["y"]).sum(),
-        lambda df: df.groupby(["y"]).agg("sum"),
-        lambda df: df.groupby(["x"]).sum(),
-        lambda df: df.groupby(["x"]).agg("sum"),
-        lambda df: df.groupby(["x", "y"]).z.sum(),
-        lambda df: df.groupby(["x", "y"]).z.agg("sum"),
-    ],
-)
-def test_empty_groupby(func):
-    pdf = pd.DataFrame({"x": [], "y": [], "z": []})
-    gdf = cudf.from_pandas(pdf)
-    assert_groupby_results_equal(func(pdf), func(gdf), check_index_type=False)
-
-
-def test_groupby_unsupported_columns():
-    rng = np.random.default_rng(seed=12)
-    pd_cat = pd.Categorical(
-        pd.Series(rng.choice(["a", "b", 1], 3), dtype="category")
-    )
-    pdf = pd.DataFrame(
-        {
-            "x": [1, 2, 3],
-            "y": ["a", "b", "c"],
-            "z": ["d", "e", "f"],
-            "a": [3, 4, 5],
-        }
-    )
-    pdf["b"] = pd_cat
-    gdf = cudf.from_pandas(pdf)
-    pdg = pdf.groupby("x").sum(numeric_only=True)
-    # cudf does not yet support numeric_only, so our default is False (unlike
-    # pandas, which defaults to inferring and throws a warning about it).
-    gdg = gdf.groupby("x").sum(numeric_only=True)
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_list_of_series():
-    pdf = pd.DataFrame({"x": [1, 2, 3], "y": [1, 2, 1]})
-    gdf = cudf.from_pandas(pdf)
-    pdg = pdf.groupby([pdf.x]).y.sum()
-    gdg = gdf.groupby([gdf.x]).y.sum()
-    assert_groupby_results_equal(pdg, gdg)
-    pdg = pdf.groupby([pdf.x, pdf.y]).y.sum()
-    gdg = gdf.groupby([gdf.x, gdf.y]).y.sum()
-    pytest.skip()
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_use_agg_column_as_index():
-    pdf = pd.DataFrame()
-    pdf["a"] = [1, 1, 1, 3, 5]
-    gdf = cudf.DataFrame()
-    gdf["a"] = [1, 1, 1, 3, 5]
-    pdg = pdf.groupby("a").agg({"a": "count"})
-    gdg = gdf.groupby("a").agg({"a": "count"})
-    assert_groupby_results_equal(pdg, gdg, check_dtype=False)
-
-
-def test_groupby_list_then_string():
-    gdf = cudf.DataFrame()
-    gdf["a"] = [0, 1, 0, 1, 2]
-    gdf["b"] = [11, 2, 15, 12, 2]
-    gdf["c"] = [6, 7, 6, 7, 6]
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby("a", as_index=True).agg(
-        {"b": ["min", "max"], "c": "max"}
-    )
-    pdg = pdf.groupby("a", as_index=True).agg(
-        {"b": ["min", "max"], "c": "max"}
-    )
-    assert_groupby_results_equal(gdg, pdg)
-
-
-def test_groupby_different_unequal_length_column_aggregations():
-    gdf = cudf.DataFrame()
-    gdf["a"] = [0, 1, 0, 1, 2]
-    gdf["b"] = [11, 2, 15, 12, 2]
-    gdf["c"] = [11, 2, 15, 12, 2]
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby("a", as_index=True).agg(
-        {"b": "min", "c": ["max", "min"]}
-    )
-    pdg = pdf.groupby("a", as_index=True).agg(
-        {"b": "min", "c": ["max", "min"]}
-    )
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_single_var_two_aggs():
-    gdf = cudf.DataFrame()
-    gdf["a"] = [0, 1, 0, 1, 2]
-    gdf["b"] = [11, 2, 15, 12, 2]
-    gdf["c"] = [11, 2, 15, 12, 2]
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby("a", as_index=True).agg({"b": ["min", "max"]})
-    pdg = pdf.groupby("a", as_index=True).agg({"b": ["min", "max"]})
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_double_var_two_aggs():
-    gdf = cudf.DataFrame()
-    gdf["a"] = [0, 1, 0, 1, 2]
-    gdf["b"] = [11, 2, 15, 12, 2]
-    gdf["c"] = [11, 2, 15, 12, 2]
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["a", "b"], as_index=True).agg({"c": ["min", "max"]})
-    pdg = pdf.groupby(["a", "b"], as_index=True).agg({"c": ["min", "max"]})
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_apply_basic_agg_single_column():
-    gdf = DataFrame()
-    gdf["key"] = [0, 0, 1, 1, 2, 2, 0]
-    gdf["val"] = [0, 1, 2, 3, 4, 5, 6]
-    gdf["mult"] = gdf["key"] * gdf["val"]
-    pdf = gdf.to_pandas()
-
-    gdg = gdf.groupby(["key", "val"]).mult.sum()
-    pdg = pdf.groupby(["key", "val"]).mult.sum()
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_multi_agg_single_groupby_series():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "x": rng.integers(0, 5, size=10000),
-            "y": rng.normal(size=10000),
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    pdg = pdf.groupby("x").y.agg(["sum", "max"])
-    gdg = gdf.groupby("x").y.agg(["sum", "max"])
-
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_multi_agg_multi_groupby():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "a": rng.integers(0, 5, 10),
-            "b": rng.integers(0, 5, 10),
-            "c": rng.integers(0, 5, 10),
-            "d": rng.integers(0, 5, 10),
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    pdg = pdf.groupby(["a", "b"]).agg(["sum", "max"])
-    gdg = gdf.groupby(["a", "b"]).agg(["sum", "max"])
-    assert_groupby_results_equal(pdg, gdg)
-
-
-def test_groupby_datetime_multi_agg_multi_groupby():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "a": pd.date_range(
-                datetime.datetime.now(),
-                datetime.datetime.now() + datetime.timedelta(9),
-                freq="D",
-            ),
-            "b": rng.integers(0, 5, 10),
-            "c": rng.integers(0, 5, 10),
-            "d": rng.integers(0, 5, 10),
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    pdg = pdf.groupby(["a", "b"]).agg(["sum", "max"])
-    gdg = gdf.groupby(["a", "b"]).agg(["sum", "max"])
-
-    assert_groupby_results_equal(pdg, gdg)
+    return [
+        (f0, ()),
+        (f1, ()),
+        (f2, ()),
+        (f3, (42,)),
+        (f4, (42, 119)),
+        (f5, (41, 119, 212.1)),
+    ]
 
 
 @pytest.mark.parametrize(
-    "agg",
-    [
-        ["min", "max", "count", "mean"],
-        ["mean", "var", "std"],
-        ["count", "mean", "var", "std"],
-    ],
+    "func,args", create_test_groupby_apply_return_series_dataframe_params()
 )
-def test_groupby_multi_agg_hash_groupby(agg):
-    coll_dict = {letter: float for letter in string.ascii_lowercase}
-    coll_dict["id"] = int
-    gdf = cudf.datasets.timeseries(
-        start="2000",
-        end="2000-01-2",
-        dtypes=coll_dict,
-        freq="1s",
-        seed=1,
-    ).reset_index(drop=True)
-    pdf = gdf.to_pandas()
-    check_dtype = "count" not in agg
-    pdg = pdf.groupby("id").agg(agg)
-    gdg = gdf.groupby("id").agg(agg)
-    assert_groupby_results_equal(pdg, gdg, check_dtype=check_dtype)
-
-
 @pytest.mark.skipif(
     PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="previous verion of pandas throws a warning",
-)
-@pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmax", "idxmin", "sum", "prod", "count", "mean"]
+    reason="Include groups missing on old versions of pandas",
 )
-def test_groupby_nulls_basic(agg):
-    check_dtype = agg not in _index_type_aggs
-
-    pdf = pd.DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": [1, 2, 1, 2, 1, None]})
-    gdf = cudf.from_pandas(pdf)
-    assert_groupby_results_equal(
-        getattr(pdf.groupby("a"), agg)(),
-        getattr(gdf.groupby("a"), agg)(),
-        check_dtype=check_dtype,
-    )
-
-    pdf = pd.DataFrame(
-        {
-            "a": [0, 0, 1, 1, 2, 2],
-            "b": [1, 2, 1, 2, 1, None],
-            "c": [1, 2, 1, None, 1, 2],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    assert_groupby_results_equal(
-        getattr(pdf.groupby("a"), agg)(),
-        getattr(gdf.groupby("a"), agg)(),
-        check_dtype=check_dtype,
-    )
-
-    pdf = pd.DataFrame(
-        {
-            "a": [0, 0, 1, 1, 2, 2],
-            "b": [1, 2, 1, 2, 1, None],
-            "c": [1, 2, None, None, 1, 2],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    # TODO: fillna() used here since we don't follow
-    # Pandas' null semantics. Should we change it?
-
-    assert_groupby_results_equal(
-        getattr(pdf.groupby("a"), agg)().fillna(0),
-        getattr(gdf.groupby("a"), agg)().fillna(0 if agg != "prod" else 1),
-        check_dtype=check_dtype,
-    )
-
-
-def test_groupby_nulls_in_index():
-    pdf = pd.DataFrame({"a": [None, 2, 1, 1], "b": [1, 2, 3, 4]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").sum(), gdf.groupby("a").sum()
-    )
-
-
-def test_groupby_all_nulls_index():
-    gdf = cudf.DataFrame(
-        {
-            "a": cudf.Series([None, None, None, None], dtype="object"),
-            "b": [1, 2, 3, 4],
-        }
-    )
-    pdf = gdf.to_pandas()
-    assert_groupby_results_equal(
-        pdf.groupby("a").sum(), gdf.groupby("a").sum()
-    )
-
-    gdf = cudf.DataFrame(
-        {"a": cudf.Series([np.nan, np.nan, np.nan, np.nan]), "b": [1, 2, 3, 4]}
-    )
-    pdf = gdf.to_pandas()
-    assert_groupby_results_equal(
-        pdf.groupby("a").sum(), gdf.groupby("a").sum()
-    )
-
-
-@pytest.mark.parametrize("sort", [True, False])
-def test_groupby_sort(sort):
-    pdf = pd.DataFrame({"a": [2, 2, 1, 1], "b": [1, 2, 3, 4]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(
-        pdf.groupby("a", sort=sort).sum(),
-        gdf.groupby("a", sort=sort).sum(),
-        check_like=not sort,
-    )
-
+def test_groupby_apply_return_series_dataframe(func, args):
     pdf = pd.DataFrame(
-        {"c": [-1, 2, 1, 4], "b": [1, 2, 3, 4], "a": [2, 2, 1, 1]}
+        {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
     )
     gdf = cudf.from_pandas(pdf)
 
-    assert_eq(
-        pdf.groupby(["c", "b"], sort=sort).sum(),
-        gdf.groupby(["c", "b"], sort=sort).sum(),
-        check_like=not sort,
-    )
-
-    ps = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=[2, 2, 2, 3, 3, 1, 1, 1])
-    gs = cudf.from_pandas(ps)
-
-    assert_eq(
-        ps.groupby(level=0, sort=sort).sum().to_frame(),
-        gs.groupby(level=0, sort=sort).sum().to_frame(),
-        check_like=not sort,
-    )
-
-    ps = pd.Series(
-        [1, 2, 3, 4, 5, 6, 7, 8],
-        index=pd.MultiIndex.from_product([(1, 2), ("a", "b"), (42, 84)]),
-    )
-    gs = cudf.from_pandas(ps)
-
-    assert_eq(
-        ps.groupby(level=0, sort=sort).sum().to_frame(),
-        gs.groupby(level=0, sort=sort).sum().to_frame(),
-        check_like=not sort,
-    )
-
-
-def test_groupby_cat():
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 2], "b": pd.Series(["b", "b", "a"], dtype="category")}
-    )
-    gdf = cudf.from_pandas(pdf)
-    assert_groupby_results_equal(
-        pdf.groupby("a").count(),
-        gdf.groupby("a").count(),
-        check_dtype=False,
+    expected = pdf.groupby(["key"], group_keys=False).apply(
+        func, *args, include_groups=False
     )
+    actual = gdf.groupby(["key"]).apply(func, *args, include_groups=False)
 
-
-def test_groupby_index_type():
-    df = cudf.DataFrame()
-    df["string_col"] = ["a", "b", "c"]
-    df["counts"] = [1, 2, 3]
-    res = df.groupby(by="string_col").counts.sum()
-    assert res.index.dtype == cudf.dtype("object")
+    assert_groupby_results_equal(expected, actual)
 
 
 @pytest.mark.parametrize(
-    "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
+    "pdf",
+    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
 )
-@pytest.mark.parametrize("q", [0.25, 0.4, 0.5, 0.7, 1])
-def test_groupby_quantile(request, interpolation, q):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(q == 0.5 and interpolation == "nearest"),
-            reason=(
-                "Pandas NaN Rounding will fail nearest interpolation at 0.5"
-            ),
-        )
-    )
-
-    raw_data = {
-        "y": [None, 1, 2, 3, 4, None, 6, 7, 8, 9],
-        "x": [1, 2, 3, 1, 2, 2, 1, None, 3, 2],
-    }
-    # Pandas>0.25 now casts NaN in quantile operations as a float64
-    # # so we are filling with zeros.
-    pdf = pd.DataFrame(raw_data).fillna(0)
-    gdf = DataFrame.from_pandas(pdf)
-
-    pdg = pdf.groupby("x")
-    gdg = gdf.groupby("x")
-
-    pdresult = pdg.quantile(q, interpolation=interpolation)
-    gdresult = gdg.quantile(q, interpolation=interpolation)
-
-    assert_groupby_results_equal(pdresult, gdresult)
-
-
-def test_groupby_std():
-    raw_data = {
-        "x": [1, 2, 3, 1, 2, 2, 1, None, 3, 2],
-        "y": [None, 1, 2, 3, 4, None, 6, 7, 8, 9],
-    }
-    pdf = pd.DataFrame(raw_data)
-    gdf = DataFrame.from_pandas(pdf)
-    pdg = pdf.groupby("x")
-    gdg = gdf.groupby("x")
-    pdresult = pdg.std()
-    gdresult = gdg.std()
-
-    assert_groupby_results_equal(pdresult, gdresult)
-
-
-def test_groupby_size():
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 3, 4],
-            "b": ["bob", "bob", "alice", "cooper"],
-            "c": [1, 2, 3, 4],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").size(),
-        gdf.groupby("a").size(),
-        check_dtype=False,
-    )
-
-    assert_groupby_results_equal(
-        pdf.groupby(["a", "b", "c"]).size(),
-        gdf.groupby(["a", "b", "c"]).size(),
-        check_dtype=False,
-    )
-
-    sr = pd.Series(range(len(pdf)))
-    assert_groupby_results_equal(
-        pdf.groupby(sr).size(),
-        gdf.groupby(sr).size(),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize("index", [None, [1, 2, 3, 4]])
-def test_groupby_cumcount(index):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 3, 4],
-            "b": ["bob", "bob", "alice", "cooper"],
-            "c": [1, 2, 3, 4],
-        },
-        index=index,
-    )
+def test_groupby_apply_no_keys(pdf):
     gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").cumcount(),
-        gdf.groupby("a").cumcount(),
-        check_dtype=False,
-    )
-
-    assert_groupby_results_equal(
-        pdf.groupby(["a", "b", "c"]).cumcount(),
-        gdf.groupby(["a", "b", "c"]).cumcount(),
-        check_dtype=False,
-    )
-
-    sr = pd.Series(range(len(pdf)), index=index)
-    assert_groupby_results_equal(
-        pdf.groupby(sr).cumcount(),
-        gdf.groupby(sr).cumcount(),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize("as_index", [True, False])
-@pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmin", "idxmax", "mean", "count"]
-)
-def test_groupby_datetime(request, as_index, agg):
-    nelem = 20
-    if agg == "mean" and as_index is True:
-        request.applymarker(
-            pytest.mark.xfail(reason="Invalid type/aggregation combination")
-        )
-    check_dtype = agg not in ("mean", "count", "idxmin", "idxmax")
-    pdf = make_frame(pd.DataFrame, nelem=nelem, with_datetime=True)
-    gdf = make_frame(cudf.DataFrame, nelem=nelem, with_datetime=True)
-    pdg = pdf.groupby("datetime", as_index=as_index)
-    gdg = gdf.groupby("datetime", as_index=as_index)
-    if as_index is False:
-        pdres = getattr(pdg, agg)()
-        gdres = getattr(gdg, agg)()
+    if isinstance(pdf, pd.DataFrame):
+        kwargs = {"check_column_type": False}
     else:
-        pdres = pdg.agg({"datetime": agg})
-        gdres = gdg.agg({"datetime": agg})
-    assert_groupby_results_equal(
-        pdres,
-        gdres,
-        check_dtype=check_dtype,
-        as_index=as_index,
-        by=["datetime"],
-    )
-
-
-def test_groupby_dropna():
-    df = cudf.DataFrame({"a": [1, 1, None], "b": [1, 2, 3]})
-    expect = cudf.DataFrame(
-        {"b": [3, 3]}, index=cudf.Series([1, None], name="a")
-    )
-    got = df.groupby("a", dropna=False).sum()
-    assert_groupby_results_equal(expect, got)
-
-    df = cudf.DataFrame(
-        {"a": [1, 1, 1, None], "b": [1, None, 1, None], "c": [1, 2, 3, 4]}
-    )
-    idx = cudf.MultiIndex.from_frame(
-        df[["a", "b"]].drop_duplicates().sort_values(["a", "b"]),
-        names=["a", "b"],
-    )
-    expect = cudf.DataFrame({"c": [4, 2, 4]}, index=idx)
-    got = df.groupby(["a", "b"], dropna=False).sum()
-
-    assert_groupby_results_equal(expect, got)
-
-
-def test_groupby_dropna_getattr():
-    df = cudf.DataFrame()
-    df["id"] = [0, 1, 1, None, None, 3, 3]
-    df["val"] = [0, 1, 1, 2, 2, 3, 3]
-    got = df.groupby("id", dropna=False).val.sum()
-
-    expect = cudf.Series(
-        [0, 2, 6, 4], name="val", index=cudf.Series([0, 1, 3, None], name="id")
-    )
-
-    assert_groupby_results_equal(expect, got)
-
-
-def test_groupby_categorical_from_string():
-    gdf = cudf.DataFrame()
-    gdf["id"] = ["a", "b", "c"]
-    gdf["val"] = [0, 1, 2]
-    gdf["id"] = gdf["id"].astype("category")
-    assert_groupby_results_equal(
-        cudf.DataFrame({"val": gdf["val"]}).set_index(keys=gdf["id"]),
-        gdf.groupby("id").sum(),
-    )
-
-
-def test_groupby_arbitrary_length_series():
-    gdf = cudf.DataFrame({"a": [1, 1, 2], "b": [2, 3, 4]}, index=[4, 5, 6])
-    gsr = cudf.Series([1.0, 2.0, 2.0], index=[3, 4, 5])
-
-    pdf = gdf.to_pandas()
-    psr = gsr.to_pandas()
-
-    expect = pdf.groupby(psr).sum()
-    got = gdf.groupby(gsr).sum()
-
-    assert_groupby_results_equal(expect, got)
-
-
-def test_groupby_series_same_name_as_dataframe_column():
-    gdf = cudf.DataFrame({"a": [1, 1, 2], "b": [2, 3, 4]}, index=[4, 5, 6])
-    gsr = cudf.Series([1.0, 2.0, 2.0], name="a", index=[3, 4, 5])
-
-    pdf = gdf.to_pandas()
-    psr = gsr.to_pandas()
-
-    expect = pdf.groupby(psr).sum()
-    got = gdf.groupby(gsr).sum()
-
-    assert_groupby_results_equal(expect, got)
-
-
-def test_group_by_series_and_column_name_in_by():
-    gdf = cudf.DataFrame(
-        {"x": [1.0, 2.0, 3.0], "y": [1, 2, 1]}, index=[1, 2, 3]
-    )
-    gsr0 = cudf.Series([0.0, 1.0, 2.0], name="a", index=[1, 2, 3])
-    gsr1 = cudf.Series([0.0, 1.0, 3.0], name="b", index=[3, 4, 5])
-
-    pdf = gdf.to_pandas()
-    psr0 = gsr0.to_pandas()
-    psr1 = gsr1.to_pandas()
-
-    expect = pdf.groupby(["x", psr0, psr1]).sum()
-    got = gdf.groupby(["x", gsr0, gsr1]).sum()
-
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.parametrize(
-    "grouper",
-    [
-        "a",
-        ["a"],
-        ["a", "b"],
-        np.array([0, 1, 1, 2, 3, 2]),
-        {0: "a", 1: "a", 2: "b", 3: "a", 4: "b", 5: "c"},
-        lambda x: x + 1,
-        ["a", np.array([0, 1, 1, 2, 3, 2])],
-    ],
-)
-def test_grouping(grouper):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2, 3],
-            "b": [1, 2, 1, 2, 1, 2],
-            "c": [1, 2, 3, 4, 5, 6],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    for pdf_group, gdf_group in zip(
-        pdf.groupby(grouper), gdf.groupby(grouper), strict=True
-    ):
-        assert pdf_group[0] == gdf_group[0]
-        assert_eq(pdf_group[1], gdf_group[1])
-
-
-@pytest.mark.parametrize("agg", [lambda x: x.count(), "count"])
-@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
-def test_groupby_count(agg, by):
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.groupby(by).agg(agg)
-    got = gdf.groupby(by).agg(agg)
-
-    assert_groupby_results_equal(expect, got, check_dtype=True)
-
-
-@pytest.mark.parametrize("agg", [lambda x: x.median(), "median"])
-@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
-def test_groupby_median(agg, by):
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.groupby(by).agg(agg)
-    got = gdf.groupby(by).agg(agg)
-
-    assert_groupby_results_equal(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize("agg", [lambda x: x.nunique(), "nunique"])
-@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
-def test_groupby_nunique(agg, by):
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.groupby(by).nunique()
-    got = gdf.groupby(by).nunique()
-
-    assert_groupby_results_equal(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize("dropna", [True, False])
-def test_nunique_dropna(dropna):
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 1, 2],
-            "b": [4, None, 5],
-            "c": [None, None, 7],
-            "d": [1, 1, 3],
-        }
-    )
-    pdf = gdf.to_pandas()
-
-    result = gdf.groupby("a")["b"].nunique(dropna=dropna)
-    expected = pdf.groupby("a")["b"].nunique(dropna=dropna)
-    assert_groupby_results_equal(result, expected, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "n",
-    [0, 1, 2, 10],
-)
-@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
-def test_groupby_nth(n, by):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 3],
-            "b": [1, 2, 2, 2, 1],
-            "c": [1, 2, None, 4, 5],
-            "d": ["a", "b", "c", "d", "e"],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.groupby(by).nth(n)
-    got = gdf.groupby(by).nth(n)
-
-    assert_groupby_results_equal(expect, got, check_dtype=False)
-
-
-def test_raise_data_error():
-    pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_exceptions_equal(
-        pdf.groupby("a").mean,
-        gdf.groupby("a").mean,
-    )
-
-
-def test_multi_agg():
-    gdf = cudf.DataFrame(
-        {"a": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": ["a", "b", "c", "d"]}
-    )
-    pdf = gdf.to_pandas()
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg({"b": ["count", "mean"], "c": ["count"]}),
-        gdf.groupby("a").agg({"b": ["count", "mean"], "c": ["count"]}),
-    )
-
-
-@pytest.mark.parametrize(
-    "agg",
-    (
-        [
-            *itertools.combinations(["count", "max", "min", "nunique"], 2),
-            {"b": "min", "c": "mean"},
-            {"b": "max", "c": "mean"},
-            {"b": "count", "c": "mean"},
-            {"b": "nunique", "c": "mean"},
-        ]
-    ),
-)
-def test_groupby_agg_combinations(agg):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 2, 2, 3],
-            "b": ["a", "a", "b", "c", "d"],
-            "c": [1, 2, 3, 4, 5],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg(agg),
-        gdf.groupby("a").agg(agg),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_apply_noempty_group():
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 2, 2], "b": [1, 2, 1, 2], "c": [1, 2, 3, 4]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = (
-        pdf.groupby("a", group_keys=False)
-        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
-        .reset_index(drop=True)
-    )
-    got = (
-        gdf.groupby("a")
-        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
-        .reset_index(drop=True)
-    )
-    assert_groupby_results_equal(expect, got)
-
-
-def test_reset_index_after_empty_groupby():
-    # GH #5475
-    pdf = pd.DataFrame({"a": [1, 2, 3]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").sum().reset_index(),
-        gdf.groupby("a").sum().reset_index(),
-        as_index=False,
-        by="a",
-    )
-
-
-def test_groupby_attribute_error():
-    err_msg = "Test error message"
-
-    class TestGroupBy(cudf.core.groupby.GroupBy):
-        @property
-        def _groupby(self):
-            raise AttributeError(err_msg)
-
-    a = cudf.DataFrame({"a": [1, 2], "b": [2, 3]})
-    gb = TestGroupBy(a, a["a"])
-
-    with pytest.raises(AttributeError, match=err_msg):
-        gb.sum()
-
-
-@pytest.mark.parametrize(
-    "by",
-    [
-        "a",
-        "b",
-        ["a"],
-        ["b"],
-        ["a", "b"],
-        ["b", "a"],
-        np.array([0, 0, 0, 1, 1, 1, 2]),
-    ],
-)
-def test_groupby_groups(by):
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 1, 2, 1, 2, 3], "b": [1, 2, 3, 4, 5, 6, 7]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    pdg = pdf.groupby(by)
-    gdg = gdf.groupby(by)
-
-    for key in pdg.groups:
-        assert key in gdg.groups
-        assert_eq(pdg.groups[key], gdg.groups[key])
-
-
-@pytest.mark.parametrize(
-    "by",
-    [
-        "a",
-        "b",
-        ["a"],
-        ["b"],
-        ["a", "b"],
-        ["b", "a"],
-        ["a", "c"],
-        ["a", "b", "c"],
-    ],
-)
-def test_groupby_groups_multi(by):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 2, 1, 2, 1, 2, 3],
-            "b": ["a", "b", "a", "b", "b", "c", "c"],
-            "c": [1, 2, 3, 4, 5, 6, 7],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    pdg = pdf.groupby(by)
-    gdg = gdf.groupby(by)
-
-    for key in pdg.groups:
-        assert key in gdg.groups
-        assert_eq(pdg.groups[key], gdg.groups[key])
-
-
-def test_groupby_nunique_series():
-    pdf = pd.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 1, 1, 2]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a")["b"].nunique(),
-        gdf.groupby("a")["b"].nunique(),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize("list_agg", [list, "collect"])
-def test_groupby_list_simple(list_agg):
-    pdf = pd.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, None, 4, 5, 6]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg({"b": list}),
-        gdf.groupby("a").agg({"b": list_agg}),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize("list_agg", [list, "collect"])
-def test_groupby_list_of_lists(list_agg):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2, 2],
-            "b": [[1, 2], [3, None, 5], None, [], [7, 8], [9]],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg({"b": list}),
-        gdf.groupby("a").agg({"b": list_agg}),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize("list_agg", [list, "collect"])
-def test_groupby_list_of_structs(list_agg):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2, 2],
-            "b": [
-                {"c": "1", "d": 1},
-                {"c": "2", "d": 2},
-                {"c": "3", "d": 3},
-                {"c": "4", "d": 4},
-                {"c": "5", "d": 5},
-                {"c": "6", "d": 6},
-            ],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    grouped = gdf.groupby("a").agg({"b": list_agg})
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg({"b": list}),
-        grouped,
-        check_dtype=True,
-    )
-    assert grouped["b"].dtype.element_type == gdf["b"].dtype
-
-
-@pytest.mark.parametrize("list_agg", [list, "collect"])
-def test_groupby_list_single_element(list_agg):
-    pdf = pd.DataFrame({"a": [1, 2], "b": [3, None]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg({"b": list}),
-        gdf.groupby("a").agg({"b": list_agg}),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "agg", [list, [list, "count"], {"b": list, "c": "sum"}]
-)
-def test_groupby_list_strings(agg):
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2],
-            "b": ["b", "a", None, "e", "d"],
-            "c": [1, 2, 3, 4, 5],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_groupby_results_equal(
-        pdf.groupby("a").agg(agg),
-        gdf.groupby("a").agg(agg),
-        check_dtype=False,
-    )
-
-
-def test_groupby_list_columns_excluded():
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 2, 2],
-            "b": [1, 2, 3, 4],
-            "c": [[1, 2], [3, 4], [5, 6], [7, 8]],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    pandas_result = pdf.groupby("a").mean(numeric_only=True)
-    pandas_agg_result = pdf.groupby("a").agg("mean", numeric_only=True)
-
-    assert_groupby_results_equal(
-        pandas_result,
-        gdf.groupby("a").mean(numeric_only=True),
-        check_dtype=False,
-    )
-
-    assert_groupby_results_equal(
-        pandas_agg_result,
-        gdf.groupby("a").agg("mean"),
-        check_dtype=False,
-    )
-
-
-def test_groupby_pipe():
-    pdf = pd.DataFrame({"A": "a b a b".split(), "B": [1, 2, 3, 4]})
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby("A").pipe(lambda x: x.max() - x.min())
-    actual = gdf.groupby("A").pipe(lambda x: x.max() - x.min())
-
-    assert_groupby_results_equal(expected, actual)
-
-
-def create_test_groupby_apply_return_scalars_params():
-    def f0(x):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = ticker / 10
-        return full
-
-    def f1(x, k):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = ticker / k
-        return full
-
-    def f2(x, k, L):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = L * (ticker / k)
-        return full
-
-    def f3(x, k, L, m):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = L * (ticker / k) % m
-        return full
-
-    return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
-
-
-@pytest.mark.parametrize(
-    "func,args", create_test_groupby_apply_return_scalars_params()
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_return_scalars(func, args):
-    pdf = pd.DataFrame(
-        {
-            "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
-            "B": [
-                0.01,
-                np.nan,
-                0.03,
-                0.04,
-                np.nan,
-                0.06,
-                0.07,
-                0.08,
-                0.09,
-                1.0,
-            ],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby("A").apply(func, *args, include_groups=False)
-    actual = gdf.groupby("A").apply(func, *args, include_groups=False)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-def create_test_groupby_apply_return_series_dataframe_params():
-    def f0(x):
-        return x - x.max()
-
-    def f1(x):
-        return x.min() - x.max()
-
-    def f2(x):
-        return x.min()
-
-    def f3(x, k):
-        return x - x.max() + k
-
-    def f4(x, k, L):
-        return x.min() - x.max() + (k / L)
-
-    def f5(x, k, L, m):
-        return m * x.min() + (k / L)
-
-    return [
-        (f0, ()),
-        (f1, ()),
-        (f2, ()),
-        (f3, (42,)),
-        (f4, (42, 119)),
-        (f5, (41, 119, 212.1)),
-    ]
-
-
-@pytest.mark.parametrize(
-    "func,args", create_test_groupby_apply_return_series_dataframe_params()
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_apply_return_series_dataframe(func, args):
-    pdf = pd.DataFrame(
-        {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby(["key"], group_keys=False).apply(
-        func, *args, include_groups=False
-    )
-    actual = gdf.groupby(["key"]).apply(func, *args, include_groups=False)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "pdf",
-    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
-)
-def test_groupby_no_keys(pdf):
-    gdf = cudf.from_pandas(pdf)
-    if isinstance(pdf, pd.DataFrame):
-        kwargs = {"check_column_type": False}
-    else:
-        kwargs = {}
-    assert_groupby_results_equal(
-        pdf.groupby([]).max(),
-        gdf.groupby([]).max(),
-        check_dtype=False,
-        check_index_type=False,  # Int64 v/s Float64
-        **kwargs,
-    )
-
-
-@pytest.mark.parametrize(
-    "pdf",
-    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
-)
-def test_groupby_apply_no_keys(pdf):
-    gdf = cudf.from_pandas(pdf)
-    if isinstance(pdf, pd.DataFrame):
-        kwargs = {"check_column_type": False}
-    else:
-        kwargs = {}
+        kwargs = {}
     assert_groupby_results_equal(
         pdf.groupby([], group_keys=False).apply(lambda x: x.max()),
         gdf.groupby([]).apply(lambda x: x.max()),
@@ -2442,623 +839,6 @@ def test_groupby_apply_no_keys(pdf):
     )
 
 
-@pytest.mark.parametrize(
-    "pdf",
-    [pd.DataFrame({"a": [1, 2]}), pd.DataFrame({"a": [1, 2], "b": [2, 3]})],
-)
-def test_groupby_nonempty_no_keys(pdf):
-    gdf = cudf.from_pandas(pdf)
-    assert_exceptions_equal(
-        lambda: pdf.groupby([]),
-        lambda: gdf.groupby([]),
-    )
-
-
-@pytest.mark.parametrize(
-    "by,data",
-    [
-        pytest.param(
-            [],
-            [],
-            marks=pytest.mark.xfail(reason="dtype always cast to object"),
-        ),
-        ([1, 1, 2, 2], [0, 0, 1, 1]),
-        ([1, 2, 3, 4], [0, 0, 0, 0]),
-        ([1, 2, 1, 2], [0, 1, 1, 1]),
-    ],
-)
-@pytest.mark.parametrize(
-    "dtype",
-    SIGNED_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["string", "category"],
-)
-def test_groupby_unique(by, data, dtype):
-    pdf = pd.DataFrame({"by": by, "data": data})
-    pdf["data"] = pdf["data"].astype(dtype)
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.groupby("by")["data"].unique()
-    got = gdf.groupby("by")["data"].unique()
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.parametrize(
-    "func", ["cummin", "cummax", "cumcount", "cumsum", "cumprod"]
-)
-def test_groupby_2keys_scan(func):
-    nelem = 20
-    pdf = make_frame(pd.DataFrame, nelem=nelem)
-    expect_df = pdf.groupby(["x", "y"], sort=True).agg(func)
-    gdf = cudf.from_pandas(pdf)
-    got_df = gdf.groupby(["x", "y"], sort=True).agg(func)
-    # pd.groupby.cumcount returns a series.
-    if isinstance(expect_df, pd.Series):
-        expect_df = expect_df.to_frame("val")
-
-    check_dtype = func not in _index_type_aggs
-    assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
-
-    expect_df = getattr(pdf.groupby(["x", "y"], sort=True), func)()
-    got_df = getattr(gdf.groupby(["x", "y"], sort=True), func)()
-    assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
-
-    expect_df = getattr(pdf.groupby(["x", "y"], sort=True)[["x"]], func)()
-    got_df = getattr(gdf.groupby(["x", "y"], sort=True)[["x"]], func)()
-    assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
-
-    expect_df = getattr(pdf.groupby(["x", "y"], sort=True)["y"], func)()
-    got_df = getattr(gdf.groupby(["x", "y"], sort=True)["y"], func)()
-    assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
-
-
-@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
-@pytest.mark.parametrize("pct", [False, True])
-def test_groupby_2keys_rank(method, ascending, na_option, pct):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    pdf = t.to_pandas()
-    pdf.columns = ["x", "y", "z"]
-    gdf = cudf.from_pandas(pdf)
-    expect_df = pdf.groupby(["x", "y"], sort=True).rank(
-        method=method, ascending=ascending, na_option=na_option, pct=pct
-    )
-    got_df = gdf.groupby(["x", "y"], sort=True).rank(
-        method=method, ascending=ascending, na_option=na_option, pct=pct
-    )
-
-    assert_groupby_results_equal(got_df, expect_df, check_dtype=False)
-
-
-def test_groupby_rank_fails():
-    gdf = cudf.DataFrame(
-        {"x": [1, 2, 3, 4], "y": [1, 2, 3, 4], "z": [1, 2, 3, 4]}
-    )
-    with pytest.raises(NotImplementedError):
-        gdf.groupby(["x", "y"]).rank(method="min", axis=1)
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2, 2],
-            "b": [[1, 2], [3, None, 5], None, [], [7, 8], [9]],
-        }
-    )
-    with pytest.raises(NotImplementedError):
-        gdf.groupby(["a"]).rank(method="min", axis=1)
-
-
-@pytest.mark.parametrize(
-    "with_nan", [False, True], ids=["just-NA", "also-NaN"]
-)
-@pytest.mark.parametrize("dropna", [False, True], ids=["keepna", "dropna"])
-@pytest.mark.parametrize(
-    "duplicate_index", [False, True], ids=["rangeindex", "dupindex"]
-)
-def test_groupby_scan_null_keys(with_nan, dropna, duplicate_index):
-    key_col = [None, 1, 2, None, 3, None, 3, 1, None, 1]
-    if with_nan:
-        df = pd.DataFrame(
-            {"key": pd.Series(key_col, dtype="float32"), "value": range(10)}
-        )
-    else:
-        df = pd.DataFrame(
-            {"key": pd.Series(key_col, dtype="Int32"), "value": range(10)}
-        )
-
-    if duplicate_index:
-        # Non-default index with duplicates
-        df.index = [1, 2, 3, 1, 3, 2, 4, 1, 6, 10]
-
-    cdf = cudf.from_pandas(df)
-
-    expect = df.groupby("key", dropna=dropna).cumsum()
-    got = cdf.groupby("key", dropna=dropna).cumsum()
-    assert_eq(expect, got)
-
-
-def test_groupby_mix_agg_scan():
-    err_msg = "Cannot perform both aggregation and scan in one operation"
-    func = ["cumsum", "sum"]
-    gb = make_frame(DataFrame, nelem=10).groupby(["x", "y"], sort=True)
-
-    gb.agg(func[0])
-    gb.agg(func[1])
-    gb.agg(func[1:])
-    with pytest.raises(NotImplementedError, match=err_msg):
-        gb.agg(func)
-
-
-@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
-@pytest.mark.parametrize("direction", [1, -1])
-@pytest.mark.parametrize("fill_value", [None, np.nan, 42])
-def test_groupby_shift_row(shift_perc, direction, fill_value):
-    nelem = 20
-    pdf = make_frame(pd.DataFrame, nelem=nelem, extra_vals=["val2"])
-    gdf = cudf.from_pandas(pdf)
-    n_shift = int(nelem * shift_perc) * direction
-
-    expected = pdf.groupby(["x", "y"]).shift(
-        periods=n_shift, fill_value=fill_value
-    )
-    got = gdf.groupby(["x", "y"]).shift(periods=n_shift, fill_value=fill_value)
-
-    assert_groupby_results_equal(
-        expected[["val", "val2"]], got[["val", "val2"]]
-    )
-
-
-@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
-@pytest.mark.parametrize("direction", [1, -1])
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        None,
-        pytest.param(
-            0,
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/10608"
-            ),
-        ),
-        pytest.param(
-            42,
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/10608"
-            ),
-        ),
-    ],
-)
-def test_groupby_shift_row_mixed_numerics(shift_perc, direction, fill_value):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-    n_shift = int(nelem * shift_perc) * direction
-
-    expected = pdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
-    got = gdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
-
-    assert_groupby_results_equal(
-        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
-    )
-
-
-# TODO: Shifting list columns is currently unsupported because we cannot
-# construct a null list scalar in python. Support once it is added.
-@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
-@pytest.mark.parametrize("direction", [1, -1])
-def test_groupby_shift_row_mixed(shift_perc, direction):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-    n_shift = int(nelem * shift_perc) * direction
-
-    expected = pdf.groupby(["0"]).shift(periods=n_shift)
-    got = gdf.groupby(["0"]).shift(periods=n_shift)
-
-    assert_groupby_results_equal(
-        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
-    )
-
-
-@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
-@pytest.mark.parametrize("direction", [1, -1])
-@pytest.mark.parametrize(
-    "fill_value",
-    [
-        [
-            42,
-            "fill",
-            np.datetime64(123, "ns"),
-            np.timedelta64(456, "ns"),
-        ]
-    ],
-)
-def test_groupby_shift_row_mixed_fill(shift_perc, direction, fill_value):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-    n_shift = int(nelem * shift_perc) * direction
-
-    # Pandas does not support specifying different fill_value by column, so we
-    # simulate it column by column
-    expected = pdf.copy()
-    for col, single_fill in zip(pdf.iloc[:, 1:], fill_value, strict=True):
-        expected[col] = (
-            pdf[col]
-            .groupby(pdf["0"])
-            .shift(periods=n_shift, fill_value=single_fill)
-        )
-
-    got = gdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
-
-    assert_groupby_results_equal(
-        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
-    )
-
-
-@pytest.mark.parametrize("fill_value", [None, 0, 42])
-def test_groupby_shift_row_zero_shift(fill_value):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    gdf = cudf.from_pandas(t.to_pandas())
-
-    expected = gdf
-    got = gdf.groupby(["0"]).shift(periods=0, fill_value=fill_value)
-
-    assert_groupby_results_equal(
-        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
-    )
-
-
-@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
-@pytest.mark.parametrize("direction", [1, -1])
-def test_groupby_diff_row(shift_perc, direction):
-    nelem = 20
-    pdf = make_frame(pd.DataFrame, nelem=nelem, extra_vals=["val2"])
-    gdf = cudf.from_pandas(pdf)
-    n_shift = int(nelem * shift_perc) * direction
-
-    expected = pdf.groupby(["x", "y"]).diff(periods=n_shift)
-    got = gdf.groupby(["x", "y"]).diff(periods=n_shift)
-
-    assert_groupby_results_equal(
-        expected[["val", "val2"]], got[["val", "val2"]]
-    )
-
-
-@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
-@pytest.mark.parametrize("direction", [1, -1])
-def test_groupby_diff_row_mixed_numerics(shift_perc, direction):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-    n_shift = int(nelem * shift_perc) * direction
-
-    expected = pdf.groupby(["0"]).diff(periods=n_shift)
-    got = gdf.groupby(["0"]).diff(periods=n_shift)
-
-    assert_groupby_results_equal(
-        expected[["1", "2", "3", "4", "5"]], got[["1", "2", "3", "4", "5"]]
-    )
-
-
-def test_groupby_diff_row_zero_shift():
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    gdf = cudf.from_pandas(t.to_pandas())
-
-    expected = gdf
-    got = gdf.groupby(["0"]).shift(periods=0)
-
-    assert_groupby_results_equal(
-        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-def test_groupby_fillna_multi_value():
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ms]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    key_col = "0"
-    value_cols = ["1", "2", "3", "4", "5", "6"]
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-
-    # fill the dataframe with the first non-null item in the column
-    fill_values = {
-        name: pdf[name].loc[pdf[name].first_valid_index()]
-        for name in value_cols
-    }
-    # cudf can't fillna with a pandas.Timedelta type
-    fill_values["4"] = fill_values["4"].to_numpy()
-    with pytest.warns(FutureWarning):
-        expect = pdf.groupby(key_col).fillna(value=fill_values)
-    with pytest.warns(FutureWarning):
-        got = gdf.groupby(key_col).fillna(value=fill_values)
-
-    assert_groupby_results_equal(expect[value_cols], got[value_cols])
-
-
-# TODO: cudf.fillna does not support decimal column to column fill yet
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-def test_groupby_fillna_multi_value_df():
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ms]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    key_col = "0"
-    value_cols = ["1", "2", "3", "4", "5"]
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-
-    # fill the dataframe with the first non-null item in the column
-    fill_values = {
-        name: pdf[name].loc[pdf[name].first_valid_index()]
-        for name in value_cols
-    }
-    # cudf can't fillna with a pandas.Timedelta type
-    fill_values["4"] = fill_values["4"].to_numpy()
-    fill_values = pd.DataFrame(fill_values, index=pdf.index)
-    with pytest.warns(FutureWarning):
-        expect = pdf.groupby(key_col).fillna(value=fill_values)
-
-    fill_values = cudf.from_pandas(fill_values)
-    with pytest.warns(FutureWarning):
-        got = gdf.groupby(key_col).fillna(value=fill_values)
-
-    assert_groupby_results_equal(expect[value_cols], got[value_cols])
-
-
-@pytest.mark.parametrize(
-    "by",
-    [pd.Series([1, 1, 2, 2, 3, 4]), lambda x: x % 2 == 0, pd.Grouper(level=0)],
-)
-@pytest.mark.parametrize(
-    "data", [[1, None, 2, None, 3, None], [1, 2, 3, 4, 5, 6]]
-)
-@pytest.mark.parametrize("args", [{"value": 42}, {"method": "ffill"}])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-def test_groupby_various_by_fillna(by, data, args):
-    ps = pd.Series(data)
-    gs = cudf.from_pandas(ps)
-
-    with pytest.warns(FutureWarning):
-        expect = ps.groupby(by).fillna(**args)
-    if isinstance(by, pd.Grouper):
-        by = cudf.Grouper(level=by.level)
-    with pytest.warns(FutureWarning):
-        got = gs.groupby(by).fillna(**args)
-
-    assert_groupby_results_equal(expect, got, check_dtype=False)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize("method", ["ffill", "bfill"])
-def test_groupby_fillna_method(method):
-    nelem = 20
-    t = rand_dataframe(
-        dtypes_meta=[
-            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
-            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
-            {
-                "dtype": "datetime64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "timedelta64[ns]",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-            },
-            {
-                "dtype": "list",
-                "null_frequency": 0.4,
-                "cardinality": 10,
-                "lists_max_length": 10,
-                "nesting_max_depth": 3,
-                "value_type": "int64",
-            },
-            {"dtype": "category", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "decimal64", "null_frequency": 0.4, "cardinality": 10},
-            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
-        ],
-        rows=nelem,
-        use_threads=False,
-        seed=0,
-    )
-    key_col = "0"
-    value_cols = ["1", "2", "3", "4", "5", "6", "7", "8"]
-    pdf = t.to_pandas()
-    gdf = cudf.from_pandas(pdf)
-
-    with pytest.warns(FutureWarning):
-        expect = pdf.groupby(key_col).fillna(method=method)
-    with pytest.warns(FutureWarning):
-        got = gdf.groupby(key_col).fillna(method=method)
-
-    assert_groupby_results_equal(
-        expect[value_cols], got[value_cols], sort=False
-    )
-
-
 @pytest.mark.parametrize(
     "data",
     [

From 9ea44e8ab91f0fb50a4f1c9d55c3d4f4bfc32dd6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 13 Aug 2025 15:51:43 -0700
Subject: [PATCH 124/366] Move test_offset/repr.py to new cudf classic testing
 directory (#19677)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19677
---
 python/cudf/cudf/tests/dataframe/test_repr.py |  484 ++++++
 .../tests/dateoffset/test_constructors.py     |   55 +
 .../indexes/categoricalindex/test_repr.py     |   46 +
 .../cudf/tests/indexes/index/test_repr.py     |  128 ++
 .../tests/indexes/intervalindex/__init__.py   |    0
 .../tests/indexes/intervalindex/test_repr.py  |   20 +
 .../tests/indexes/multiindex/test_repr.py     |  281 +++
 .../tests/indexes/timedeltaindex/test_repr.py |   69 +
 python/cudf/cudf/tests/series/test_repr.py    |  521 ++++++
 python/cudf/cudf/tests/test_offset.py         |   61 -
 python/cudf/cudf/tests/test_repr.py           | 1518 -----------------
 11 files changed, 1604 insertions(+), 1579 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/test_repr.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/test_repr.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/test_repr.py
 create mode 100644 python/cudf/cudf/tests/indexes/intervalindex/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/intervalindex/test_repr.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/test_repr.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/test_repr.py
 create mode 100644 python/cudf/cudf/tests/series/test_repr.py
 delete mode 100644 python/cudf/cudf/tests/test_offset.py
 delete mode 100644 python/cudf/cudf/tests/test_repr.py

diff --git a/python/cudf/cudf/tests/dataframe/test_repr.py b/python/cudf/cudf/tests/dataframe/test_repr.py
new file mode 100644
index 00000000000..be2fddb8436
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_repr.py
@@ -0,0 +1,484 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import textwrap
+
+import numpy as np
+import pandas as pd
+import pytest
+from hypothesis import given, settings, strategies as st
+
+import cudf
+
+
+@pytest.mark.parametrize("ncols", [1, 2, 10])
+def test_null_dataframe(ncols):
+    dtype_categories = [
+        "float32",
+        "float64",
+        "datetime64[ns]",
+        "str",
+        "category",
+    ]
+    rng = np.random.default_rng(seed=0)
+    size = 20
+    data = cudf.DataFrame()
+    for dtype in dtype_categories:
+        sr = cudf.Series(rng.integers(0, 128, size)).astype(dtype)
+        sr[rng.choice([False, True], size=size)] = None
+        data[dtype] = sr
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+    with pd.option_context("display.max_columns", int(ncols)):
+        pdf_repr = repr(pdf).replace("NaN", "<NA>").replace("None", "<NA>")
+        assert pdf_repr.split() == repr(gdf).split()
+
+
+@pytest.mark.parametrize("nrows", [5, 10, 15])
+@pytest.mark.parametrize("ncols", [5, 10, 15])
+@pytest.mark.parametrize("size", [20, 21])
+def test_full_dataframe_20(all_supported_types_as_str, size, nrows, ncols):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {idx: rng.integers(0, 100, size) for idx in range(size)}
+    ).astype(all_supported_types_as_str)
+    gdf = cudf.from_pandas(pdf)
+
+    with pd.option_context(
+        "display.max_rows", int(nrows), "display.max_columns", int(ncols)
+    ):
+        assert repr(pdf) == repr(gdf)
+        assert pdf._repr_html_() == gdf._repr_html_()
+        assert pdf._repr_latex_() == gdf._repr_latex_()
+
+
+@given(
+    st.lists(
+        st.integers(-9223372036854775808, 9223372036854775807),
+        min_size=1,
+        max_size=1000,
+    )
+)
+@settings(deadline=None, max_examples=20)
+def test_integer_dataframe(x):
+    gdf = cudf.DataFrame({"x": x})
+    pdf = gdf.to_pandas()
+    with pd.option_context("display.max_columns", 1):
+        assert repr(gdf) == repr(pdf)
+        assert repr(gdf.T) == repr(pdf.T)
+
+
+@given(st.lists(st.floats()))
+@settings(deadline=None, max_examples=20)
+def test_float_dataframe(x):
+    gdf = cudf.DataFrame({"x": cudf.Series(x, dtype=float, nan_as_null=False)})
+    pdf = gdf.to_pandas()
+    assert repr(gdf) == repr(pdf)
+
+
+def test_mixed_dataframe():
+    data = {
+        "Integer": np.array([2345, 11987, 9027, 9027]),
+        "Date": np.array(
+            ["18/04/1995", "14/07/1994", "07/06/2006", "16/09/2005"]
+        ),
+        "Float": np.array([9.001, 8.343, 6, 2.781]),
+        "Integer2": np.array([2345, 106, 2088, 789277]),
+        "Category": np.array(["M", "F", "F", "F"]),
+        "String": np.array(["Alpha", "Beta", "Gamma", "Delta"]),
+        "Boolean": np.array([True, False, True, False]),
+    }
+    mixed_gdf = cudf.DataFrame(data)
+    mixed_pdf = pd.DataFrame(data)
+    assert repr(mixed_gdf) == repr(mixed_pdf)
+
+
+def test_MI():
+    rng = np.random.default_rng(seed=0)
+    gdf = cudf.DataFrame(
+        {
+            "a": rng.integers(0, 4, 10),
+            "b": rng.integers(0, 4, 10),
+            "c": rng.integers(0, 4, 10),
+        }
+    )
+    levels = [["a", "b", "c", "d"], ["w", "x", "y", "z"], ["m", "n"]]
+    codes = [
+        [0, 0, 0, 0, 1, 1, 2, 2, 3, 3],
+        [0, 1, 2, 3, 0, 1, 2, 3, 0, 1],
+        [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
+    ]
+    with pd.option_context("display.max_rows", 999, "display.max_columns", 0):
+        gdf = gdf.set_index(cudf.MultiIndex(levels=levels, codes=codes))
+        pdf = gdf.to_pandas()
+        assert repr(gdf) == repr(pdf)
+        assert repr(gdf.index) == repr(pdf.index)
+        assert repr(gdf.T) == repr(pdf.T)
+
+
+@pytest.mark.parametrize("nrows", [0, 1, 3, 5, 10])
+@pytest.mark.parametrize("ncols", [0, 1, 2, 3])
+def test_groupby_MI(nrows, ncols):
+    gdf = cudf.DataFrame(
+        {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
+    )
+    pdf = gdf.to_pandas()
+    gdg = gdf.groupby(["a", "b"], sort=True).count()
+    pdg = pdf.groupby(["a", "b"], sort=True).count()
+    with pd.option_context(
+        "display.max_rows", nrows, "display.max_columns", ncols
+    ):
+        assert repr(gdg) == repr(pdg)
+        assert repr(gdg.index) == repr(pdg.index)
+        assert repr(gdg.T) == repr(pdg.T)
+
+
+@pytest.mark.parametrize(
+    "gdf",
+    [
+        lambda: cudf.DataFrame({"a": range(10000)}),
+        lambda: cudf.DataFrame({"a": range(10000), "b": range(10000)}),
+        lambda: cudf.DataFrame({"a": range(20), "b": range(20)}),
+        lambda: cudf.DataFrame(
+            {
+                "a": range(20),
+                "b": range(20),
+                "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
+            }
+        ),
+        lambda: cudf.DataFrame(index=[1, 2, 3]),
+        lambda: cudf.DataFrame(index=range(10000)),
+        lambda: cudf.DataFrame(columns=["a", "b", "c", "d"]),
+        lambda: cudf.DataFrame(columns=["a"], index=range(10000)),
+        lambda: cudf.DataFrame(
+            columns=["a", "col2", "...col n"], index=range(10000)
+        ),
+        lambda: cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
+        lambda: cudf.DataFrame(
+            columns=["a", "b", "c", "d"],
+            index=cudf.Series(range(10000)).astype("str"),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "slc",
+    [
+        slice(2500, 5000),
+        slice(2500, 2501),
+        slice(5000),
+        slice(1, 10),
+        slice(10, 20),
+        slice(15, 2400),
+    ],
+)
+@pytest.mark.parametrize("max_seq_items", [1, 10, 60, 10000, None])
+@pytest.mark.parametrize("max_rows", [1, 10, 60, 10000, None])
+def test_dataframe_sliced(gdf, slc, max_seq_items, max_rows):
+    gdf = gdf()
+    with pd.option_context(
+        "display.max_seq_items", max_seq_items, "display.max_rows", max_rows
+    ):
+        pdf = gdf.to_pandas()
+
+        sliced_gdf = gdf[slc]
+        sliced_pdf = pdf[slc]
+
+        expected_repr = repr(sliced_pdf).replace("None", "<NA>")
+        actual_repr = repr(sliced_gdf)
+
+        assert expected_repr == actual_repr
+
+
+@pytest.mark.parametrize(
+    "df,pandas_special_case",
+    [
+        (pd.DataFrame({"a": [1, 2, 3]}, index=[10, 20, None]), False),
+        (
+            pd.DataFrame(
+                {
+                    "a": [1, None, 3],
+                    "string_col": ["hello", "world", "rapids"],
+                },
+                index=[None, "a", "b"],
+            ),
+            True,
+        ),
+        (pd.DataFrame([], index=[None, "a", "b"]), False),
+        (pd.DataFrame({"aa": [None, None]}, index=[None, None]), False),
+        (pd.DataFrame({"aa": [1, 2, 3]}, index=[None, None, None]), False),
+        (
+            pd.DataFrame(
+                {"aa": [None, 2, 3]},
+                index=np.array([1, None, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {"aa": [None, 2, 3]},
+                index=np.array([100, None, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {"aa": [None, None, None]},
+                index=np.array([None, None, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {"aa": [1, None, 3]},
+                index=np.array([10, 15, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {"a": [1, 2, None], "v": [10, None, 22], "p": [100, 200, 300]}
+            ).set_index(["a", "v"]),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": [1, 2, None],
+                    "v": ["n", "c", "a"],
+                    "p": [None, None, None],
+                }
+            ).set_index(["a", "v"]),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": np.array([1, None, None], dtype="datetime64[ns]"),
+                    "v": ["n", "c", "a"],
+                    "p": [None, None, None],
+                }
+            ).set_index(["a", "v"]),
+            False,
+        ),
+    ],
+)
+def test_dataframe_null_index_repr(df, pandas_special_case):
+    pdf = df
+    gdf = cudf.from_pandas(pdf)
+
+    expected_repr = repr(pdf).replace("NaN", "<NA>").replace("None", "<NA>")
+    actual_repr = repr(gdf)
+
+    if pandas_special_case:
+        # Pandas inconsistently print Index null values
+        # as `None` at some places and `NaN` at few other places
+        # Whereas cudf is consistent with strings `null` values
+        # to be printed as `None` everywhere.
+        actual_repr = repr(gdf).replace("None", "<NA>")
+
+    assert expected_repr.split() == actual_repr.split()
+
+
+@pytest.mark.parametrize(
+    "df,expected_repr",
+    [
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [1000000, 200000, 3000000], dtype="timedelta64[s]"
+                    )
+                }
+            ),
+            textwrap.dedent(
+                """
+                                  a
+                0  11 days 13:46:40
+                1   2 days 07:33:20
+                2  34 days 17:20:00
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [
+                            136457654,
+                            None,
+                            245345345,
+                            223432411,
+                            None,
+                            3634548734,
+                            23234,
+                        ],
+                        dtype="timedelta64[s]",
+                    ),
+                    "b": [10, 11, 22, 33, 44, 55, 66],
+                }
+            ),
+            textwrap.dedent(
+                """
+                                     a   b
+                0   1579 days 08:54:14  10
+                1                  NaT  11
+                2   2839 days 15:29:05  22
+                3   2586 days 00:33:31  33
+                4                  NaT  44
+                5  42066 days 12:52:14  55
+                6      0 days 06:27:14  66
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [
+                            136457654,
+                            None,
+                            245345345,
+                            223432411,
+                            None,
+                            3634548734,
+                            23234,
+                        ],
+                        dtype="timedelta64[s]",
+                        index=["a", "b", "c", "d", "e", "f", "g"],
+                    )
+                }
+            ),
+            textwrap.dedent(
+                """
+                                     a
+                a   1579 days 08:54:14
+                b                  NaT
+                c   2839 days 15:29:05
+                d   2586 days 00:33:31
+                e                  NaT
+                f  42066 days 12:52:14
+                g      0 days 06:27:14
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [1, 2, 3, 4, 5, 6, 7],
+                        index=cudf.Index(
+                            [
+                                136457654,
+                                None,
+                                245345345,
+                                223432411,
+                                None,
+                                3634548734,
+                                23234,
+                            ],
+                            dtype="timedelta64[ms]",
+                        ),
+                    )
+                }
+            ),
+            textwrap.dedent(
+                """
+                                      a
+                1 days 13:54:17.654   1
+                NaT                   2
+                2 days 20:09:05.345   3
+                2 days 14:03:52.411   4
+                NaT                   5
+                42 days 01:35:48.734  6
+                0 days 00:00:23.234   7
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        ["a", "f", "q", "e", "w", "e", "t"],
+                        index=cudf.Index(
+                            [
+                                136457654,
+                                None,
+                                245345345,
+                                223432411,
+                                None,
+                                3634548734,
+                                23234,
+                            ],
+                            dtype="timedelta64[ns]",
+                        ),
+                    )
+                }
+            ),
+            textwrap.dedent(
+                """
+                                    a
+                0 days 00:00:00.136457654  a
+                NaT                 f
+                0 days 00:00:00.245345345  q
+                0 days 00:00:00.223432411  e
+                NaT                 w
+                0 days 00:00:03.634548734  e
+                0 days 00:00:00.000023234  t
+                """
+            ),
+        ),
+    ],
+)
+def test_timedelta_dataframe_repr(df, expected_repr):
+    actual_repr = repr(df())
+
+    assert actual_repr.split() == expected_repr.split()
+
+
+def test_categorical_dataframe_with_nan_repr():
+    series = cudf.Series(
+        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+    ).astype("category")
+    df = cudf.DataFrame({"a": series})
+    expected_repr = textwrap.dedent(
+        """
+          a
+    0   1.0
+    1   2.0
+    2   NaN
+    3  10.0
+    4   NaN
+    5  <NA>
+    """
+    )
+
+    assert repr(df).split() == expected_repr.split()
+
+
+def test_repr_struct_after_concat():
+    df = cudf.DataFrame(
+        {
+            "a": cudf.Series(
+                [
+                    {"sa": 2056831253},
+                    {"sa": -1463792165},
+                    {"sa": 1735783038},
+                    {"sa": 103774433},
+                    {"sa": -1413247520},
+                ]
+                * 13
+            ),
+            "b": cudf.Series(
+                [
+                    {"sa": {"ssa": 1140062029}},
+                    None,
+                    {"sa": {"ssa": 1998862860}},
+                    {"sa": None},
+                    {"sa": {"ssa": -395088502}},
+                ]
+                * 13
+            ),
+        }
+    )
+    pdf = df.to_pandas()
+
+    assert repr(df) == repr(pdf)
diff --git a/python/cudf/cudf/tests/dateoffset/test_constructors.py b/python/cudf/cudf/tests/dateoffset/test_constructors.py
index 56338f44773..08cfff7fbaa 100644
--- a/python/cudf/cudf/tests/dateoffset/test_constructors.py
+++ b/python/cudf/cudf/tests/dateoffset/test_constructors.py
@@ -1,10 +1,65 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import re
+
+import numpy as np
 import pandas as pd
+import pytest
 
 import cudf
 
 
+@pytest.mark.parametrize("period", [1.5, 0.5, "string", "1", "1.0"])
+@pytest.mark.parametrize("freq", ["years", "months"])
+def test_construction_invalid(period, freq):
+    kwargs = {freq: period}
+    with pytest.raises(ValueError):
+        cudf.DateOffset(**kwargs)
+
+
+@pytest.mark.parametrize(
+    "unit", ["nanoseconds", "microseconds", "milliseconds", "seconds"]
+)
+def test_construct_max_offset(unit):
+    cudf.DateOffset(**{unit: np.iinfo("int64").max})
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"seconds": np.iinfo("int64").max + 1},
+        {"seconds": np.iinfo("int64").max, "minutes": 1},
+        {"minutes": np.iinfo("int64").max},
+    ],
+)
+def test_offset_construction_overflow(kwargs):
+    with pytest.raises(NotImplementedError):
+        cudf.DateOffset(**kwargs)
+
+
+@pytest.mark.parametrize(
+    "unit",
+    [
+        "years",
+        "months",
+        "weeks",
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "milliseconds",
+        "microseconds",
+        "nanoseconds",
+    ],
+)
+@pytest.mark.parametrize("period", [0.5, -0.5, 0.71])
+def test_offset_no_fractional_periods(unit, period):
+    with pytest.raises(
+        ValueError, match=re.escape("Non-integer periods not supported")
+    ):
+        cudf.DateOffset(**{unit: period})
+
+
 def test_dateoffset_instance_subclass_check():
     assert not issubclass(pd.DateOffset, cudf.DateOffset)
     assert not isinstance(pd.DateOffset(), cudf.DateOffset)
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/test_repr.py b/python/cudf/cudf/tests/indexes/categoricalindex/test_repr.py
new file mode 100644
index 00000000000..e0c87d1dd0c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/test_repr.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+
+
+def test_categorical_index_with_nan_repr():
+    cat_index = cudf.Index(
+        cudf.Series(
+            [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+        ).astype("category")
+    )
+
+    expected_repr = (
+        "CategoricalIndex([1.0, 2.0, NaN, 10.0, NaN, <NA>], "
+        "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')"
+    )
+
+    assert repr(cat_index) == expected_repr
+
+    sliced_expected_repr = (
+        "CategoricalIndex([NaN, 10.0, NaN, <NA>], "
+        "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')"
+    )
+
+    assert repr(cat_index[2:]) == sliced_expected_repr
+
+
+def test_unique_categories_repr():
+    pi = pd.CategoricalIndex(range(10_000))
+    gi = cudf.CategoricalIndex(range(10_000))
+    expected_repr = repr(pi)
+    actual_repr = repr(gi)
+    assert expected_repr == actual_repr
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_categorical_index_ordered(ordered):
+    pi = pd.CategoricalIndex(range(10), ordered=ordered)
+    gi = cudf.CategoricalIndex(range(10), ordered=ordered)
+
+    assert repr(pi) == repr(gi)
diff --git a/python/cudf/cudf/tests/indexes/index/test_repr.py b/python/cudf/cudf/tests/indexes/index/test_repr.py
new file mode 100644
index 00000000000..a9d769d5344
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/test_repr.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("length", [0, 1, 10, 100, 1000])
+def test_numeric_index_repr(length, numeric_types_as_str):
+    rng = np.random.default_rng(seed=0)
+    data = rng.integers(0, high=100, size=length).astype(numeric_types_as_str)
+    pidx = pd.Index(data)
+    gidx = cudf.Index(data)
+
+    assert repr(pidx) == repr(gidx)
+
+
+@pytest.mark.parametrize(
+    "index,expected_repr",
+    [
+        (
+            lambda: cudf.Index([1, 2, 3, None]),
+            "Index([1, 2, 3, <NA>], dtype='int64')",
+        ),
+        (
+            lambda: cudf.Index([None, 2.2, 3.324342, None]),
+            "Index([<NA>, 2.2, 3.324342, <NA>], dtype='float64')",
+        ),
+        (
+            lambda: cudf.Index([None, None, None], name="hello"),
+            "Index([<NA>, <NA>, <NA>], dtype='object', name='hello')",
+        ),
+        (
+            lambda: cudf.Index(
+                [None, None, None], dtype="float", name="hello"
+            ),
+            "Index([<NA>, <NA>, <NA>], dtype='float64', name='hello')",
+        ),
+        (
+            lambda: cudf.Index([None], dtype="float64", name="hello"),
+            "Index([<NA>], dtype='float64', name='hello')",
+        ),
+        (
+            lambda: cudf.Index([None], dtype="int8", name="hello"),
+            "Index([<NA>], dtype='int8', name='hello')",
+        ),
+        (
+            lambda: cudf.Index([None] * 50, dtype="object"),
+            "Index([<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>"
+            ", <NA>, <NA>,\n       <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
+            "<NA>, <NA>, <NA>, <NA>, <NA>,\n       <NA>, <NA>, <NA>, <NA>, "
+            "<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>,\n       <NA>, "
+            "<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
+            "<NA>,\n       <NA>, <NA>],\n      dtype='object')",
+        ),
+        (
+            lambda: cudf.Index([None] * 20, dtype="uint32"),
+            "Index([<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
+            "<NA>,\n       <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
+            "<NA>,\n       <NA>, <NA>],\n      dtype='uint32')",
+        ),
+        (
+            lambda: cudf.Index(
+                [None, 111, 22, 33, None, 23, 34, 2343, None], dtype="int16"
+            ),
+            "Index([<NA>, 111, 22, 33, <NA>, 23, 34, 2343, <NA>], "
+            "dtype='int16')",
+        ),
+        (
+            lambda: cudf.Index([1, 2, 3, None], dtype="category"),
+            "CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], "
+            "ordered=False, dtype='category')",
+        ),
+        (
+            lambda: cudf.Index([None, None], dtype="category"),
+            "CategoricalIndex([<NA>, <NA>], categories=[], ordered=False, "
+            "dtype='category')",
+        ),
+        (
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[ns]")
+            ),
+            "DatetimeIndex([1970-01-01 00:00:00.000000010, "
+            "1970-01-01 00:00:00.000000020,"
+            "\n       1970-01-01 00:00:00.000000030, NaT],\n      "
+            "dtype='datetime64[ns]')",
+        ),
+        (
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[s]")
+            ),
+            "DatetimeIndex([1970-01-01 00:00:10, "
+            "1970-01-01 00:00:20, 1970-01-01 00:00:30,\n"
+            "       NaT],\n      dtype='datetime64[s]')",
+        ),
+        (
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[us]")
+            ),
+            "DatetimeIndex([1970-01-01 00:00:00.000010, "
+            "1970-01-01 00:00:00.000020,\n       "
+            "1970-01-01 00:00:00.000030, NaT],\n      "
+            "dtype='datetime64[us]')",
+        ),
+        (
+            lambda: cudf.Index(
+                np.array([10, 20, 30, None], dtype="datetime64[ms]")
+            ),
+            "DatetimeIndex([1970-01-01 00:00:00.010, "
+            "1970-01-01 00:00:00.020,\n       "
+            "1970-01-01 00:00:00.030, NaT],\n      "
+            "dtype='datetime64[ms]')",
+        ),
+        (
+            lambda: cudf.Index(np.array([None] * 10, dtype="datetime64[ms]")),
+            "DatetimeIndex([NaT, NaT, NaT, NaT, NaT, NaT, NaT, NaT, "
+            "NaT, NaT], dtype='datetime64[ms]')",
+        ),
+    ],
+)
+def test_index_null(index, expected_repr):
+    index = index()
+    actual_repr = repr(index)
+
+    assert expected_repr == actual_repr
diff --git a/python/cudf/cudf/tests/indexes/intervalindex/__init__.py b/python/cudf/cudf/tests/indexes/intervalindex/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/intervalindex/test_repr.py b/python/cudf/cudf/tests/indexes/intervalindex/test_repr.py
new file mode 100644
index 00000000000..66034d9961c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/intervalindex/test_repr.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+
+import cudf
+
+
+def test_interval_index_repr():
+    pi = pd.Index(
+        [
+            np.nan,
+            pd.Interval(2.0, 3.0, closed="right"),
+            pd.Interval(3.0, 4.0, closed="right"),
+        ]
+    )
+    gi = cudf.from_pandas(pi)
+
+    assert repr(pi) == repr(gi)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_repr.py b/python/cudf/cudf/tests/indexes/multiindex/test_repr.py
new file mode 100644
index 00000000000..70106a9376b
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_repr.py
@@ -0,0 +1,281 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import textwrap
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "pmi",
+    [
+        pd.MultiIndex.from_tuples(
+            [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")]
+        ),
+        pd.MultiIndex.from_tuples(
+            [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")] * 10
+        ),
+        pd.MultiIndex.from_tuples([(1, "red", 102, "sdf")]),
+        pd.MultiIndex.from_tuples(
+            [
+                ("abc", 0.234, 1),
+                ("a", -0.34, 0),
+                ("ai", 111, 4385798),
+                ("rapids", 0, 34534534),
+            ],
+            names=["alphabets", "floats", "ints"],
+        ),
+    ],
+)
+@pytest.mark.parametrize("max_seq_items", [None, 1, 2, 5, 10, 100])
+def test_multiindex_repr(pmi, max_seq_items):
+    with pd.option_context("display.max_seq_items", max_seq_items):
+        gmi = cudf.from_pandas(pmi)
+
+        assert repr(gmi) == repr(pmi)
+
+
+@pytest.mark.parametrize(
+    "gdi, expected_repr",
+    [
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": [None, 1, 2, 3],
+                    "b": ["abc", None, "xyz", None],
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["a", "b"])
+            .index,
+            textwrap.dedent(
+                """
+                MultiIndex([(<NA>, 'abc'),
+                            (   1,  <NA>),
+                            (   2, 'xyz'),
+                            (   3,  <NA>)],
+                        names=['a', 'b'])
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series([None, np.nan, 2, 3], nan_as_null=False),
+                    "b": ["abc", None, "xyz", None],
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["a", "b"])
+            .index,
+            textwrap.dedent(
+                """
+            MultiIndex([(<NA>, 'abc'),
+                        ( nan,  <NA>),
+                        ( 2.0, 'xyz'),
+                        ( 3.0,  <NA>)],
+                    names=['a', 'b'])
+            """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series([None, 1, 2, 3], dtype="datetime64[ns]"),
+                    "b": ["abc", None, "xyz", None],
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["a", "b"])
+            .index,
+            textwrap.dedent(
+                """
+            MultiIndex([(                          'NaT', 'abc'),
+                        ('1970-01-01 00:00:00.000000001',  <NA>),
+                        ('1970-01-01 00:00:00.000000002', 'xyz'),
+                        ('1970-01-01 00:00:00.000000003',  <NA>)],
+                    names=['a', 'b'])
+            """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series([None, 1, 2, 3], dtype="datetime64[ns]"),
+                    "b": ["abc", None, "xyz", None],
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["a", "b", "c"])
+            .index,
+            textwrap.dedent(
+                """
+                MultiIndex([(                          'NaT', 'abc', 0.345),
+                            ('1970-01-01 00:00:00.000000001',  <NA>,  <NA>),
+                            ('1970-01-01 00:00:00.000000002', 'xyz', 100.0),
+                            ('1970-01-01 00:00:00.000000003',  <NA>,  10.0)],
+                        names=['a', 'b', 'c'])
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": ["abc", None, "xyz", None],
+                    "b": cudf.Series([None, 1, 2, 3], dtype="timedelta64[ns]"),
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["a", "b", "c"])
+            .index,
+            textwrap.dedent(
+                """
+                MultiIndex([('abc',                         NaT, 0.345),
+                            ( <NA>, '0 days 00:00:00.000000001',  <NA>),
+                            ('xyz', '0 days 00:00:00.000000002', 100.0),
+                            ( <NA>, '0 days 00:00:00.000000003',  10.0)],
+                        names=['a', 'b', 'c'])
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": ["abc", None, "xyz", None],
+                    "b": cudf.Series([None, 1, 2, 3], dtype="timedelta64[ns]"),
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["c", "a"])
+            .index,
+            textwrap.dedent(
+                """
+                MultiIndex([(0.345, 'abc'),
+                            ( <NA>,  <NA>),
+                            (100.0, 'xyz'),
+                            ( 10.0,  <NA>)],
+                        names=['c', 'a'])
+                """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": [None, None, None, None],
+                    "b": cudf.Series(
+                        [None, None, None, None], dtype="timedelta64[ns]"
+                    ),
+                    "c": [0.345, np.nan, 100, 10],
+                }
+            )
+            .set_index(["b", "a"])
+            .index,
+            textwrap.dedent(
+                """
+            MultiIndex([(NaT, <NA>),
+                        (NaT, <NA>),
+                        (NaT, <NA>),
+                        (NaT, <NA>)],
+                    names=['b', 'a'])
+            """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": [1, 2, None, 3, 5],
+                    "b": [
+                        "abc",
+                        "def, hi, bye",
+                        None,
+                        ", one, two, three, four",
+                        None,
+                    ],
+                    "c": cudf.Series(
+                        [0.3232, np.nan, 1, None, -0.34534], nan_as_null=False
+                    ),
+                    "d": [None, 100, 2000324, None, None],
+                }
+            )
+            .set_index(["a", "b", "c", "d"])
+            .index,
+            textwrap.dedent(
+                """
+    MultiIndex([(   1,                     'abc',   0.3232,    <NA>),
+                (   2,            'def, hi, bye',      nan,     100),
+                (<NA>,                      <NA>,      1.0, 2000324),
+                (   3, ', one, two, three, four',     <NA>,    <NA>),
+                (   5,                      <NA>, -0.34534,    <NA>)],
+            names=['a', 'b', 'c', 'd'])
+    """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": [1, 2, None, 3, 5],
+                    "b": [
+                        "abc",
+                        "def, hi, bye",
+                        None,
+                        ", one, two, three, four",
+                        None,
+                    ],
+                    "c": cudf.Series(
+                        [0.3232, np.nan, 1, None, -0.34534], nan_as_null=False
+                    ),
+                    "d": [None, 100, 2000324, None, None],
+                }
+            )
+            .set_index(["b", "a", "c", "d"])
+            .index,
+            textwrap.dedent(
+                """
+    MultiIndex([(                    'abc',    1,   0.3232,    <NA>),
+                (           'def, hi, bye',    2,      nan,     100),
+                (                     <NA>, <NA>,      1.0, 2000324),
+                (', one, two, three, four',    3,     <NA>,    <NA>),
+                (                     <NA>,    5, -0.34534,    <NA>)],
+            names=['b', 'a', 'c', 'd'])
+    """
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": ["(abc", "2", None, "3", "5"],
+                    "b": [
+                        "abc",
+                        "def, hi, bye",
+                        None,
+                        ", one, two, three, four",
+                        None,
+                    ],
+                    "c": cudf.Series(
+                        [0.3232, np.nan, 1, None, -0.34534], nan_as_null=False
+                    ),
+                    "d": [None, 100, 2000324, None, None],
+                }
+            )
+            .set_index(["a", "b", "c", "d"])
+            .index,
+            textwrap.dedent(
+                """
+    MultiIndex([('(abc',                     'abc',   0.3232,    <NA>),
+                (   '2',            'def, hi, bye',      nan,     100),
+                (  <NA>,                      <NA>,      1.0, 2000324),
+                (   '3', ', one, two, three, four',     <NA>,    <NA>),
+                (   '5',                      <NA>, -0.34534,    <NA>)],
+            names=['a', 'b', 'c', 'd'])
+    """
+            ),
+        ),
+    ],
+)
+def test_multiindex_null_repr(gdi, expected_repr):
+    actual_repr = repr(gdi())
+
+    assert actual_repr.split() == expected_repr.split()
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/test_repr.py b/python/cudf/cudf/tests/indexes/timedeltaindex/test_repr.py
new file mode 100644
index 00000000000..3b82bee2d93
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/test_repr.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "index, expected_repr",
+    [
+        (
+            lambda: cudf.Index(
+                [1000000, 200000, 3000000], dtype="timedelta64[ms]"
+            ),
+            "TimedeltaIndex(['0 days 00:16:40', "
+            "'0 days 00:03:20', '0 days 00:50:00'], "
+            "dtype='timedelta64[ms]')",
+        ),
+        (
+            lambda: cudf.Index(
+                [None, None, None, None, None], dtype="timedelta64[us]"
+            ),
+            "TimedeltaIndex([NaT, NaT, NaT, NaT, NaT], "
+            "dtype='timedelta64[us]')",
+        ),
+        (
+            lambda: cudf.Index(
+                [
+                    136457654,
+                    None,
+                    245345345,
+                    223432411,
+                    None,
+                    3634548734,
+                    23234,
+                ],
+                dtype="timedelta64[us]",
+            ),
+            "TimedeltaIndex([0 days 00:02:16.457654, NaT, "
+            "0 days 00:04:05.345345, "
+            "0 days 00:03:43.432411, NaT,"
+            "       0 days 01:00:34.548734, 0 days 00:00:00.023234],"
+            "      dtype='timedelta64[us]')",
+        ),
+        (
+            lambda: cudf.Index(
+                [
+                    136457654,
+                    None,
+                    245345345,
+                    223432411,
+                    None,
+                    3634548734,
+                    23234,
+                ],
+                dtype="timedelta64[s]",
+            ),
+            "TimedeltaIndex([1579 days 08:54:14, NaT, 2839 days 15:29:05,"
+            "       2586 days 00:33:31, NaT, 42066 days 12:52:14, "
+            "0 days 06:27:14],"
+            "      dtype='timedelta64[s]')",
+        ),
+    ],
+)
+def test_timedelta_index_repr(index, expected_repr):
+    actual_repr = repr(index())
+
+    assert actual_repr.split() == expected_repr.split()
diff --git a/python/cudf/cudf/tests/series/test_repr.py b/python/cudf/cudf/tests/series/test_repr.py
new file mode 100644
index 00000000000..2e1e9888ff4
--- /dev/null
+++ b/python/cudf/cudf/tests/series/test_repr.py
@@ -0,0 +1,521 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import textwrap
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+from hypothesis import given, settings, strategies as st
+
+import cudf
+from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
+
+
+@pytest.mark.parametrize("nrows", [0, 5, 10])
+def test_null_series(nrows, all_supported_types_as_str, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str in {"bool", "timedelta64[ms]"},
+            reason=f"cuDF repr doesn't match pandas repr for {all_supported_types_as_str}",
+        )
+    )
+    rng = np.random.default_rng(seed=0)
+    size = 5
+    sr = cudf.Series(rng.integers(1, 9, size)).astype(
+        all_supported_types_as_str
+    )
+    sr[rng.choice([False, True], size=size)] = None
+    if all_supported_types_as_str != "category" and cudf.dtype(
+        all_supported_types_as_str
+    ).kind in {"u", "i"}:
+        ps = pd.Series(
+            sr._column.data_array_view(mode="read").copy_to_host(),
+            dtype=np_dtypes_to_pandas_dtypes.get(
+                cudf.dtype(all_supported_types_as_str),
+                cudf.dtype(all_supported_types_as_str),
+            ),
+        )
+        ps[sr.isnull().to_pandas()] = pd.NA
+    else:
+        ps = sr.to_pandas()
+
+    with pd.option_context("display.max_rows", int(nrows)):
+        psrepr = repr(ps).replace("NaN", "<NA>").replace("None", "<NA>")
+        if "UInt" in psrepr:
+            psrepr = psrepr.replace("UInt", "uint")
+        elif "Int" in psrepr:
+            psrepr = psrepr.replace("Int", "int")
+        assert psrepr.split() == repr(sr).split()
+
+
+@pytest.mark.parametrize("nrows", [None, 0, 2, 10, 20, 21])
+def test_full_series(nrows, all_supported_types_as_str):
+    size = 20
+    rng = np.random.default_rng(seed=0)
+    ps = pd.Series(rng.integers(0, 100, size)).astype(
+        all_supported_types_as_str
+    )
+    sr = cudf.from_pandas(ps)
+    with pd.option_context("display.max_rows", nrows):
+        assert repr(ps) == repr(sr)
+
+
+@given(
+    st.lists(
+        st.integers(-9223372036854775808, 9223372036854775807), max_size=1000
+    )
+)
+@settings(deadline=None, max_examples=20)
+def test_integer_series(x):
+    sr = cudf.Series(x, dtype=int)
+    ps = pd.Series(data=x, dtype=int)
+
+    assert repr(sr) == repr(ps)
+
+
+@given(st.lists(st.floats()))
+@settings(deadline=None, max_examples=20)
+def test_float_series(x):
+    sr = cudf.Series(x, dtype=float, nan_as_null=False)
+    ps = pd.Series(data=x, dtype=float)
+    assert repr(sr) == repr(ps)
+
+
+@pytest.mark.parametrize(
+    "sr,pandas_special_case",
+    [
+        (pd.Series([1, 2, 3], index=[10, 20, None]), False),
+        (pd.Series([1, None, 3], name="a", index=[None, "a", "b"]), True),
+        (pd.Series(None, index=[None, "a", "b"], dtype="float"), True),
+        (pd.Series([None, None], name="aa", index=[None, None]), False),
+        (pd.Series([1, 2, 3], index=[None, None, None]), False),
+        (
+            pd.Series(
+                [None, 2, 3],
+                index=np.array([1, None, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.Series(
+                [None, None, None],
+                index=np.array([None, None, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.Series(
+                [1, None, 3],
+                index=np.array([10, 15, None], dtype="datetime64[ns]"),
+            ),
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {"a": [1, 2, None], "v": [10, None, 22], "p": [100, 200, 300]}
+            ).set_index(["a", "v"])["p"],
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": [1, 2, None],
+                    "v": ["n", "c", "a"],
+                    "p": [None, None, None],
+                }
+            ).set_index(["a", "v"])["p"],
+            False,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": np.array([1, None, None], dtype="datetime64[ns]"),
+                    "v": ["n", "c", "a"],
+                    "p": [None, None, None],
+                }
+            ).set_index(["a", "v"])["p"],
+            False,
+        ),
+    ],
+)
+def test_series_null_index_repr(sr, pandas_special_case):
+    psr = sr
+    gsr = cudf.from_pandas(psr)
+
+    expected_repr = repr(psr).replace("NaN", "<NA>").replace("None", "<NA>")
+    actual_repr = repr(gsr)
+
+    if pandas_special_case:
+        # Pandas inconsistently print Index null values
+        # as `None` at some places and `NaN` at few other places
+        # Whereas cudf is consistent with strings `null` values
+        # to be printed as `None` everywhere.
+        actual_repr = repr(gsr).replace("None", "<NA>")
+    assert expected_repr.split() == actual_repr.split()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [
+            136457654,
+            134736784,
+            245345345,
+            223432411,
+            2343241,
+            3634548734,
+            23234,
+        ],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+@pytest.mark.parametrize("dtype", ["timedelta64[s]", "timedelta64[us]"])
+def test_timedelta_series_s_us_repr(data, dtype):
+    sr = cudf.Series(data, dtype=dtype)
+    psr = sr.to_pandas()
+
+    expected = repr(psr).replace("timedelta64[ns]", dtype)
+    actual = repr(sr)
+
+    assert expected.split() == actual.split()
+
+
+@pytest.mark.parametrize(
+    "ser, expected_repr",
+    [
+        (
+            lambda: cudf.Series([], dtype="timedelta64[ns]"),
+            textwrap.dedent(
+                """
+            Series([], dtype: timedelta64[ns])
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series([], dtype="timedelta64[ms]"),
+            textwrap.dedent(
+                """
+            Series([], dtype: timedelta64[ms])
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [1000000, 200000, 3000000], dtype="timedelta64[ns]"
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:00:00.001000
+            1    0 days 00:00:00.000200
+            2    0 days 00:00:00.003000
+            dtype: timedelta64[ns]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [1000000, 200000, 3000000], dtype="timedelta64[ms]"
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:16:40
+            1    0 days 00:03:20
+            2    0 days 00:50:00
+            dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [1000000, 200000, None], dtype="timedelta64[ns]"
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:00:00.001000000
+            1    0 days 00:00:00.000200000
+            2                          NaT
+            dtype: timedelta64[ns]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [1000000, 200000, None], dtype="timedelta64[ms]"
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:16:40
+            1    0 days 00:03:20
+            2                NaT
+            dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [None, None, None, None, None], dtype="timedelta64[ns]"
+            ),
+            textwrap.dedent(
+                """
+            0    NaT
+            1    NaT
+            2    NaT
+            3    NaT
+            4    NaT
+            dtype: timedelta64[ns]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [None, None, None, None, None], dtype="timedelta64[ms]"
+            ),
+            textwrap.dedent(
+                """
+            0    NaT
+            1    NaT
+            2    NaT
+            3    NaT
+            4    NaT
+            dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [12, 12, 22, 343, 4353534, 435342], dtype="timedelta64[ns]"
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:00:00.000000012
+            1    0 days 00:00:00.000000012
+            2    0 days 00:00:00.000000022
+            3    0 days 00:00:00.000000343
+            4    0 days 00:00:00.004353534
+            5    0 days 00:00:00.000435342
+            dtype: timedelta64[ns]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [12, 12, 22, 343, 4353534, 435342], dtype="timedelta64[ms]"
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:00:00.012000
+            1    0 days 00:00:00.012000
+            2    0 days 00:00:00.022000
+            3    0 days 00:00:00.343000
+            4    0 days 01:12:33.534000
+            5    0 days 00:07:15.342000
+            dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+                dtype="timedelta64[ns]",
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:00:00.000000001
+            1    0 days 00:00:00.000001132
+            2    0 days 00:00:00.023223231
+            3    0 days 00:00:00.000000233
+            4              0 days 00:00:00
+            5    0 days 00:00:00.000000332
+            6    0 days 00:00:00.000000323
+            dtype: timedelta64[ns]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+                dtype="timedelta64[ms]",
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 00:00:00.001000
+            1    0 days 00:00:01.132000
+            2    0 days 06:27:03.231000
+            3    0 days 00:00:00.233000
+            4           0 days 00:00:00
+            5    0 days 00:00:00.332000
+            6    0 days 00:00:00.323000
+            dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [
+                    13645765432432,
+                    134736784,
+                    245345345,
+                    223432411,
+                    999992343241,
+                    3634548734,
+                    23234,
+                ],
+                dtype="timedelta64[ms]",
+            ),
+            textwrap.dedent(
+                """
+            0    157937 days 02:23:52.432000
+            1         1 days 13:25:36.784000
+            2         2 days 20:09:05.345000
+            3         2 days 14:03:52.411000
+            4     11573 days 23:39:03.241000
+            5        42 days 01:35:48.734000
+            6         0 days 00:00:23.234000
+            dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [
+                    13645765432432,
+                    134736784,
+                    245345345,
+                    223432411,
+                    999992343241,
+                    3634548734,
+                    23234,
+                ],
+                dtype="timedelta64[ns]",
+            ),
+            textwrap.dedent(
+                """
+            0    0 days 03:47:25.765432432
+            1    0 days 00:00:00.134736784
+            2    0 days 00:00:00.245345345
+            3    0 days 00:00:00.223432411
+            4    0 days 00:16:39.992343241
+            5    0 days 00:00:03.634548734
+            6    0 days 00:00:00.000023234
+            dtype: timedelta64[ns]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [
+                    13645765432432,
+                    134736784,
+                    245345345,
+                    223432411,
+                    999992343241,
+                    3634548734,
+                    23234,
+                ],
+                dtype="timedelta64[ms]",
+                name="abc",
+            ),
+            textwrap.dedent(
+                """
+            0    157937 days 02:23:52.432000
+            1         1 days 13:25:36.784000
+            2         2 days 20:09:05.345000
+            3         2 days 14:03:52.411000
+            4     11573 days 23:39:03.241000
+            5        42 days 01:35:48.734000
+            6         0 days 00:00:23.234000
+            Name: abc, dtype: timedelta64[ms]
+            """
+            ),
+        ),
+        (
+            lambda: cudf.Series(
+                [
+                    13645765432432,
+                    134736784,
+                    245345345,
+                    223432411,
+                    999992343241,
+                    3634548734,
+                    23234,
+                ],
+                dtype="timedelta64[ns]",
+                index=["a", "b", "z", "x", "y", "l", "m"],
+                name="hello",
+            ),
+            textwrap.dedent(
+                """
+            a    0 days 03:47:25.765432432
+            b    0 days 00:00:00.134736784
+            z    0 days 00:00:00.245345345
+            x    0 days 00:00:00.223432411
+            y    0 days 00:16:39.992343241
+            l    0 days 00:00:03.634548734
+            m    0 days 00:00:00.000023234
+            Name: hello, dtype: timedelta64[ns]
+            """
+            ),
+        ),
+    ],
+)
+def test_timedelta_series_ns_ms_repr(ser, expected_repr):
+    expected = expected_repr
+    actual = repr(ser())
+
+    assert expected.split() == actual.split()
+
+
+def test_categorical_series_with_nan_repr():
+    series = cudf.Series(
+        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+    ).astype("category")
+
+    expected_repr = textwrap.dedent(
+        """
+    0     1.0
+    1     2.0
+    2     NaN
+    3    10.0
+    4     NaN
+    5    <NA>
+    dtype: category
+    Categories (4, float64): [1.0, 2.0, 10.0, NaN]
+    """
+    )
+
+    assert repr(series).split() == expected_repr.split()
+
+    sliced_expected_repr = textwrap.dedent(
+        """
+        2     NaN
+        3    10.0
+        4     NaN
+        5    <NA>
+        dtype: category
+        Categories (4, float64): [1.0, 2.0, 10.0, NaN]
+        """
+    )
+
+    assert repr(series[2:]).split() == sliced_expected_repr.split()
+
+
+def test_empty_series_name():
+    ps = pd.Series([], name="abc", dtype="int")
+    gs = cudf.from_pandas(ps)
+
+    assert repr(ps) == repr(gs)
diff --git a/python/cudf/cudf/tests/test_offset.py b/python/cudf/cudf/tests/test_offset.py
deleted file mode 100644
index 1ed04616f04..00000000000
--- a/python/cudf/cudf/tests/test_offset.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-import re
-
-import numpy as np
-import pytest
-
-from cudf import DateOffset
-
-INT64MAX = np.iinfo("int64").max
-
-
-@pytest.mark.parametrize("period", [1.5, 0.5, "string", "1", "1.0"])
-@pytest.mark.parametrize("freq", ["years", "months"])
-def test_construction_invalid(period, freq):
-    kwargs = {freq: period}
-    with pytest.raises(ValueError):
-        DateOffset(**kwargs)
-
-
-@pytest.mark.parametrize(
-    "unit", ["nanoseconds", "microseconds", "milliseconds", "seconds"]
-)
-def test_construct_max_offset(unit):
-    DateOffset(**{unit: np.iinfo("int64").max})
-
-
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        {"seconds": INT64MAX + 1},
-        {"seconds": INT64MAX, "minutes": 1},
-        {"minutes": INT64MAX},
-    ],
-)
-def test_offset_construction_overflow(kwargs):
-    with pytest.raises(NotImplementedError):
-        DateOffset(**kwargs)
-
-
-@pytest.mark.parametrize(
-    "unit",
-    [
-        "years",
-        "months",
-        "weeks",
-        "days",
-        "hours",
-        "minutes",
-        "seconds",
-        "milliseconds",
-        "microseconds",
-        "nanoseconds",
-    ],
-)
-@pytest.mark.parametrize("period", [0.5, -0.5, 0.71])
-def test_offset_no_fractional_periods(unit, period):
-    with pytest.raises(
-        ValueError, match=re.escape("Non-integer periods not supported")
-    ):
-        DateOffset(**{unit: period})
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
deleted file mode 100644
index 89fa6a0bb78..00000000000
--- a/python/cudf/cudf/tests/test_repr.py
+++ /dev/null
@@ -1,1518 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import textwrap
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-from hypothesis import given, settings, strategies as st
-
-import cudf
-from cudf.testing import _utils as utils
-from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
-
-
-@pytest.fixture(
-    params=[
-        "uint16",
-        "int64",
-        "float64",
-        "str",
-        "category",
-        "datetime64[ns]",
-    ]
-)
-def dtype(request):
-    return request.param
-
-
-@pytest.mark.parametrize("nrows", [0, 5, 10])
-def test_null_series(nrows, dtype):
-    rng = np.random.default_rng(seed=0)
-    size = 5
-    sr = cudf.Series(rng.integers(1, 9, size)).astype(dtype)
-    sr[rng.choice([False, True], size=size)] = None
-    if dtype != "category" and cudf.dtype(dtype).kind in {"u", "i"}:
-        ps = pd.Series(
-            sr._column.data_array_view(mode="read").copy_to_host(),
-            dtype=np_dtypes_to_pandas_dtypes.get(
-                cudf.dtype(dtype), cudf.dtype(dtype)
-            ),
-        )
-        ps[sr.isnull().to_pandas()] = pd.NA
-    else:
-        ps = sr.to_pandas()
-
-    with pd.option_context("display.max_rows", int(nrows)):
-        psrepr = repr(ps).replace("NaN", "<NA>").replace("None", "<NA>")
-        if "UInt" in psrepr:
-            psrepr = psrepr.replace("UInt", "uint")
-        elif "Int" in psrepr:
-            psrepr = psrepr.replace("Int", "int")
-        assert psrepr.split() == repr(sr).split()
-
-
-@pytest.mark.parametrize("ncols", [1, 2, 3, 4, 5, 10])
-def test_null_dataframe(ncols):
-    dtype_categories = [
-        "float32",
-        "float64",
-        "datetime64[ns]",
-        "str",
-        "category",
-    ]
-    rng = np.random.default_rng(seed=0)
-    size = 20
-    gdf = cudf.DataFrame()
-    for dtype in dtype_categories:
-        sr = cudf.Series(rng.integers(0, 128, size)).astype(dtype)
-        sr[rng.choice([False, True], size=size)] = None
-        gdf[dtype] = sr
-    pdf = gdf.to_pandas()
-    with pd.option_context("display.max_columns", int(ncols)):
-        pdf_repr = repr(pdf).replace("NaN", "<NA>").replace("None", "<NA>")
-        assert pdf_repr.split() == repr(gdf).split()
-
-
-@pytest.mark.parametrize("nrows", [None, 0, 1, 2, 9, 10, 11, 19, 20, 21])
-def test_full_series(nrows, dtype):
-    size = 20
-    rng = np.random.default_rng(seed=0)
-    ps = pd.Series(rng.integers(0, 100, size)).astype(dtype)
-    sr = cudf.from_pandas(ps)
-    with pd.option_context("display.max_rows", nrows):
-        assert repr(ps) == repr(sr)
-
-
-@pytest.mark.parametrize("nrows", [5, 10, 15])
-@pytest.mark.parametrize("ncols", [5, 10, 15])
-@pytest.mark.parametrize("size", [20, 21])
-def test_full_dataframe_20(dtype, size, nrows, ncols):
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {idx: rng.integers(0, 100, size) for idx in range(size)}
-    ).astype(dtype)
-    gdf = cudf.from_pandas(pdf)
-
-    with pd.option_context(
-        "display.max_rows", int(nrows), "display.max_columns", int(ncols)
-    ):
-        assert repr(pdf) == repr(gdf)
-        assert pdf._repr_html_() == gdf._repr_html_()
-        assert pdf._repr_latex_() == gdf._repr_latex_()
-
-
-@given(
-    st.lists(
-        st.integers(-9223372036854775808, 9223372036854775807),
-        min_size=1,
-        max_size=1000,
-    )
-)
-@settings(deadline=None, max_examples=20)
-def test_integer_dataframe(x):
-    gdf = cudf.DataFrame({"x": x})
-    pdf = gdf.to_pandas()
-    with pd.option_context("display.max_columns", 1):
-        assert repr(gdf) == repr(pdf)
-        assert repr(gdf.T) == repr(pdf.T)
-
-
-@given(
-    st.lists(
-        st.integers(-9223372036854775808, 9223372036854775807), max_size=1000
-    )
-)
-@settings(deadline=None, max_examples=20)
-def test_integer_series(x):
-    sr = cudf.Series(x, dtype=int)
-    ps = pd.Series(data=x, dtype=int)
-
-    assert repr(sr) == repr(ps)
-
-
-@given(st.lists(st.floats()))
-@settings(deadline=None, max_examples=20)
-def test_float_dataframe(x):
-    gdf = cudf.DataFrame({"x": cudf.Series(x, dtype=float, nan_as_null=False)})
-    pdf = gdf.to_pandas()
-    assert repr(gdf) == repr(pdf)
-
-
-@given(st.lists(st.floats()))
-@settings(deadline=None, max_examples=20)
-def test_float_series(x):
-    sr = cudf.Series(x, dtype=float, nan_as_null=False)
-    ps = pd.Series(data=x, dtype=float)
-    assert repr(sr) == repr(ps)
-
-
-@pytest.fixture
-def mixed_pdf():
-    pdf = pd.DataFrame()
-    pdf["Integer"] = np.array([2345, 11987, 9027, 9027])
-    pdf["Date"] = np.array(
-        ["18/04/1995", "14/07/1994", "07/06/2006", "16/09/2005"]
-    )
-    pdf["Float"] = np.array([9.001, 8.343, 6, 2.781])
-    pdf["Integer2"] = np.array([2345, 106, 2088, 789277])
-    pdf["Category"] = np.array(["M", "F", "F", "F"])
-    pdf["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
-    pdf["Boolean"] = np.array([True, False, True, False])
-    return pdf
-
-
-@pytest.fixture
-def mixed_gdf(mixed_pdf):
-    return cudf.from_pandas(mixed_pdf)
-
-
-def test_mixed_dataframe(mixed_pdf, mixed_gdf):
-    assert repr(mixed_gdf) == repr(mixed_pdf)
-
-
-def test_mixed_series(mixed_pdf, mixed_gdf):
-    for col in mixed_gdf.columns:
-        assert repr(mixed_gdf[col]) == repr(mixed_pdf[col])
-
-
-def test_MI():
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame(
-        {
-            "a": rng.integers(0, 4, 10),
-            "b": rng.integers(0, 4, 10),
-            "c": rng.integers(0, 4, 10),
-        }
-    )
-    levels = [["a", "b", "c", "d"], ["w", "x", "y", "z"], ["m", "n"]]
-    codes = [
-        [0, 0, 0, 0, 1, 1, 2, 2, 3, 3],
-        [0, 1, 2, 3, 0, 1, 2, 3, 0, 1],
-        [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
-    ]
-    with pd.option_context("display.max_rows", 999, "display.max_columns", 0):
-        gdf = gdf.set_index(cudf.MultiIndex(levels=levels, codes=codes))
-        pdf = gdf.to_pandas()
-        assert repr(gdf) == repr(pdf)
-        assert repr(gdf.index) == repr(pdf.index)
-        assert repr(gdf.T) == repr(pdf.T)
-
-
-@pytest.mark.parametrize("nrows", [0, 1, 3, 5, 10])
-@pytest.mark.parametrize("ncols", [0, 1, 2, 3])
-def test_groupby_MI(nrows, ncols):
-    gdf = cudf.DataFrame(
-        {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
-    )
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["a", "b"], sort=True).count()
-    pdg = pdf.groupby(["a", "b"], sort=True).count()
-    with pd.option_context(
-        "display.max_rows", nrows, "display.max_columns", ncols
-    ):
-        assert repr(gdg) == repr(pdg)
-        assert repr(gdg.index) == repr(pdg.index)
-        assert repr(gdg.T) == repr(pdg.T)
-
-
-@pytest.mark.parametrize("dtype", utils.NUMERIC_TYPES)
-@pytest.mark.parametrize("length", [0, 1, 10, 100, 1000])
-def test_generic_index(length, dtype):
-    rng = np.random.default_rng(seed=0)
-    psr = pd.Series(
-        range(length),
-        index=rng.integers(0, high=100, size=length).astype(dtype),
-        dtype="float64" if length == 0 else None,
-    )
-    gsr = cudf.Series.from_pandas(psr)
-
-    assert repr(psr.index) == repr(gsr.index)
-
-
-@pytest.mark.parametrize(
-    "gdf",
-    [
-        lambda: cudf.DataFrame({"a": range(10000)}),
-        lambda: cudf.DataFrame({"a": range(10000), "b": range(10000)}),
-        lambda: cudf.DataFrame({"a": range(20), "b": range(20)}),
-        lambda: cudf.DataFrame(
-            {
-                "a": range(20),
-                "b": range(20),
-                "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
-            }
-        ),
-        lambda: cudf.DataFrame(index=[1, 2, 3]),
-        lambda: cudf.DataFrame(index=range(10000)),
-        lambda: cudf.DataFrame(columns=["a", "b", "c", "d"]),
-        lambda: cudf.DataFrame(columns=["a"], index=range(10000)),
-        lambda: cudf.DataFrame(
-            columns=["a", "col2", "...col n"], index=range(10000)
-        ),
-        lambda: cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
-        lambda: cudf.DataFrame(
-            columns=["a", "b", "c", "d"],
-            index=cudf.Series(range(10000)).astype("str"),
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "slice",
-    [
-        slice(2500, 5000),
-        slice(2500, 2501),
-        slice(5000),
-        slice(1, 10),
-        slice(10, 20),
-        slice(15, 2400),
-    ],
-)
-@pytest.mark.parametrize("max_seq_items", [1, 10, 60, 10000, None])
-@pytest.mark.parametrize("max_rows", [1, 10, 60, 10000, None])
-def test_dataframe_sliced(gdf, slice, max_seq_items, max_rows):
-    gdf = gdf()
-    with pd.option_context(
-        "display.max_seq_items", max_seq_items, "display.max_rows", max_rows
-    ):
-        pdf = gdf.to_pandas()
-
-        sliced_gdf = gdf[slice]
-        sliced_pdf = pdf[slice]
-
-        expected_repr = repr(sliced_pdf).replace("None", "<NA>")
-        actual_repr = repr(sliced_gdf)
-
-        assert expected_repr == actual_repr
-
-
-@pytest.mark.parametrize(
-    "index,expected_repr",
-    [
-        (
-            lambda: cudf.Index([1, 2, 3, None]),
-            "Index([1, 2, 3, <NA>], dtype='int64')",
-        ),
-        (
-            lambda: cudf.Index([None, 2.2, 3.324342, None]),
-            "Index([<NA>, 2.2, 3.324342, <NA>], dtype='float64')",
-        ),
-        (
-            lambda: cudf.Index([None, None, None], name="hello"),
-            "Index([<NA>, <NA>, <NA>], dtype='object', name='hello')",
-        ),
-        (
-            lambda: cudf.Index(
-                [None, None, None], dtype="float", name="hello"
-            ),
-            "Index([<NA>, <NA>, <NA>], dtype='float64', name='hello')",
-        ),
-        (
-            lambda: cudf.Index([None], dtype="float64", name="hello"),
-            "Index([<NA>], dtype='float64', name='hello')",
-        ),
-        (
-            lambda: cudf.Index([None], dtype="int8", name="hello"),
-            "Index([<NA>], dtype='int8', name='hello')",
-        ),
-        (
-            lambda: cudf.Index([None] * 50, dtype="object"),
-            "Index([<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>"
-            ", <NA>, <NA>,\n       <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
-            "<NA>, <NA>, <NA>, <NA>, <NA>,\n       <NA>, <NA>, <NA>, <NA>, "
-            "<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>,\n       <NA>, "
-            "<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
-            "<NA>,\n       <NA>, <NA>],\n      dtype='object')",
-        ),
-        (
-            lambda: cudf.Index([None] * 20, dtype="uint32"),
-            "Index([<NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
-            "<NA>,\n       <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, <NA>, "
-            "<NA>,\n       <NA>, <NA>],\n      dtype='uint32')",
-        ),
-        (
-            lambda: cudf.Index(
-                [None, 111, 22, 33, None, 23, 34, 2343, None], dtype="int16"
-            ),
-            "Index([<NA>, 111, 22, 33, <NA>, 23, 34, 2343, <NA>], "
-            "dtype='int16')",
-        ),
-        (
-            lambda: cudf.Index([1, 2, 3, None], dtype="category"),
-            "CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], "
-            "ordered=False, dtype='category')",
-        ),
-        (
-            lambda: cudf.Index([None, None], dtype="category"),
-            "CategoricalIndex([<NA>, <NA>], categories=[], ordered=False, "
-            "dtype='category')",
-        ),
-        (
-            lambda: cudf.Index(
-                np.array([10, 20, 30, None], dtype="datetime64[ns]")
-            ),
-            "DatetimeIndex([1970-01-01 00:00:00.000000010, "
-            "1970-01-01 00:00:00.000000020,"
-            "\n       1970-01-01 00:00:00.000000030, NaT],\n      "
-            "dtype='datetime64[ns]')",
-        ),
-        (
-            lambda: cudf.Index(
-                np.array([10, 20, 30, None], dtype="datetime64[s]")
-            ),
-            "DatetimeIndex([1970-01-01 00:00:10, "
-            "1970-01-01 00:00:20, 1970-01-01 00:00:30,\n"
-            "       NaT],\n      dtype='datetime64[s]')",
-        ),
-        (
-            lambda: cudf.Index(
-                np.array([10, 20, 30, None], dtype="datetime64[us]")
-            ),
-            "DatetimeIndex([1970-01-01 00:00:00.000010, "
-            "1970-01-01 00:00:00.000020,\n       "
-            "1970-01-01 00:00:00.000030, NaT],\n      "
-            "dtype='datetime64[us]')",
-        ),
-        (
-            lambda: cudf.Index(
-                np.array([10, 20, 30, None], dtype="datetime64[ms]")
-            ),
-            "DatetimeIndex([1970-01-01 00:00:00.010, "
-            "1970-01-01 00:00:00.020,\n       "
-            "1970-01-01 00:00:00.030, NaT],\n      "
-            "dtype='datetime64[ms]')",
-        ),
-        (
-            lambda: cudf.Index(np.array([None] * 10, dtype="datetime64[ms]")),
-            "DatetimeIndex([NaT, NaT, NaT, NaT, NaT, NaT, NaT, NaT, "
-            "NaT, NaT], dtype='datetime64[ms]')",
-        ),
-    ],
-)
-def test_generic_index_null(index, expected_repr):
-    index = index()
-    actual_repr = repr(index)
-
-    assert expected_repr == actual_repr
-
-
-@pytest.mark.parametrize(
-    "df,pandas_special_case",
-    [
-        (pd.DataFrame({"a": [1, 2, 3]}, index=[10, 20, None]), False),
-        (
-            pd.DataFrame(
-                {
-                    "a": [1, None, 3],
-                    "string_col": ["hello", "world", "rapids"],
-                },
-                index=[None, "a", "b"],
-            ),
-            True,
-        ),
-        (pd.DataFrame([], index=[None, "a", "b"]), False),
-        (pd.DataFrame({"aa": [None, None]}, index=[None, None]), False),
-        (pd.DataFrame({"aa": [1, 2, 3]}, index=[None, None, None]), False),
-        (
-            pd.DataFrame(
-                {"aa": [None, 2, 3]},
-                index=np.array([1, None, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {"aa": [None, 2, 3]},
-                index=np.array([100, None, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {"aa": [None, None, None]},
-                index=np.array([None, None, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {"aa": [1, None, 3]},
-                index=np.array([10, 15, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {"a": [1, 2, None], "v": [10, None, 22], "p": [100, 200, 300]}
-            ).set_index(["a", "v"]),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": [1, 2, None],
-                    "v": ["n", "c", "a"],
-                    "p": [None, None, None],
-                }
-            ).set_index(["a", "v"]),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": np.array([1, None, None], dtype="datetime64[ns]"),
-                    "v": ["n", "c", "a"],
-                    "p": [None, None, None],
-                }
-            ).set_index(["a", "v"]),
-            False,
-        ),
-    ],
-)
-def test_dataframe_null_index_repr(df, pandas_special_case):
-    pdf = df
-    gdf = cudf.from_pandas(pdf)
-
-    expected_repr = repr(pdf).replace("NaN", "<NA>").replace("None", "<NA>")
-    actual_repr = repr(gdf)
-
-    if pandas_special_case:
-        # Pandas inconsistently print Index null values
-        # as `None` at some places and `NaN` at few other places
-        # Whereas cudf is consistent with strings `null` values
-        # to be printed as `None` everywhere.
-        actual_repr = repr(gdf).replace("None", "<NA>")
-
-    assert expected_repr.split() == actual_repr.split()
-
-
-@pytest.mark.parametrize(
-    "sr,pandas_special_case",
-    [
-        (pd.Series([1, 2, 3], index=[10, 20, None]), False),
-        (pd.Series([1, None, 3], name="a", index=[None, "a", "b"]), True),
-        (pd.Series(None, index=[None, "a", "b"], dtype="float"), True),
-        (pd.Series([None, None], name="aa", index=[None, None]), False),
-        (pd.Series([1, 2, 3], index=[None, None, None]), False),
-        (
-            pd.Series(
-                [None, 2, 3],
-                index=np.array([1, None, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.Series(
-                [None, None, None],
-                index=np.array([None, None, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.Series(
-                [1, None, 3],
-                index=np.array([10, 15, None], dtype="datetime64[ns]"),
-            ),
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {"a": [1, 2, None], "v": [10, None, 22], "p": [100, 200, 300]}
-            ).set_index(["a", "v"])["p"],
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": [1, 2, None],
-                    "v": ["n", "c", "a"],
-                    "p": [None, None, None],
-                }
-            ).set_index(["a", "v"])["p"],
-            False,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": np.array([1, None, None], dtype="datetime64[ns]"),
-                    "v": ["n", "c", "a"],
-                    "p": [None, None, None],
-                }
-            ).set_index(["a", "v"])["p"],
-            False,
-        ),
-    ],
-)
-def test_series_null_index_repr(sr, pandas_special_case):
-    psr = sr
-    gsr = cudf.from_pandas(psr)
-
-    expected_repr = repr(psr).replace("NaN", "<NA>").replace("None", "<NA>")
-    actual_repr = repr(gsr)
-
-    if pandas_special_case:
-        # Pandas inconsistently print Index null values
-        # as `None` at some places and `NaN` at few other places
-        # Whereas cudf is consistent with strings `null` values
-        # to be printed as `None` everywhere.
-        actual_repr = repr(gsr).replace("None", "<NA>")
-    assert expected_repr.split() == actual_repr.split()
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [1],
-        [12, 11, 232, 223432411, 2343241, 234324, 23234],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-        [
-            136457654,
-            134736784,
-            245345345,
-            223432411,
-            2343241,
-            3634548734,
-            23234,
-        ],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-    ],
-)
-@pytest.mark.parametrize("dtype", ["timedelta64[s]", "timedelta64[us]"])
-def test_timedelta_series_s_us_repr(data, dtype):
-    sr = cudf.Series(data, dtype=dtype)
-    psr = sr.to_pandas()
-
-    expected = repr(psr).replace("timedelta64[ns]", dtype)
-    actual = repr(sr)
-
-    assert expected.split() == actual.split()
-
-
-@pytest.mark.parametrize(
-    "ser, expected_repr",
-    [
-        (
-            lambda: cudf.Series([], dtype="timedelta64[ns]"),
-            textwrap.dedent(
-                """
-            Series([], dtype: timedelta64[ns])
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series([], dtype="timedelta64[ms]"),
-            textwrap.dedent(
-                """
-            Series([], dtype: timedelta64[ms])
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [1000000, 200000, 3000000], dtype="timedelta64[ns]"
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:00:00.001000
-            1    0 days 00:00:00.000200
-            2    0 days 00:00:00.003000
-            dtype: timedelta64[ns]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [1000000, 200000, 3000000], dtype="timedelta64[ms]"
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:16:40
-            1    0 days 00:03:20
-            2    0 days 00:50:00
-            dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [1000000, 200000, None], dtype="timedelta64[ns]"
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:00:00.001000000
-            1    0 days 00:00:00.000200000
-            2                          NaT
-            dtype: timedelta64[ns]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [1000000, 200000, None], dtype="timedelta64[ms]"
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:16:40
-            1    0 days 00:03:20
-            2                NaT
-            dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [None, None, None, None, None], dtype="timedelta64[ns]"
-            ),
-            textwrap.dedent(
-                """
-            0    NaT
-            1    NaT
-            2    NaT
-            3    NaT
-            4    NaT
-            dtype: timedelta64[ns]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [None, None, None, None, None], dtype="timedelta64[ms]"
-            ),
-            textwrap.dedent(
-                """
-            0    NaT
-            1    NaT
-            2    NaT
-            3    NaT
-            4    NaT
-            dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [12, 12, 22, 343, 4353534, 435342], dtype="timedelta64[ns]"
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:00:00.000000012
-            1    0 days 00:00:00.000000012
-            2    0 days 00:00:00.000000022
-            3    0 days 00:00:00.000000343
-            4    0 days 00:00:00.004353534
-            5    0 days 00:00:00.000435342
-            dtype: timedelta64[ns]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [12, 12, 22, 343, 4353534, 435342], dtype="timedelta64[ms]"
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:00:00.012000
-            1    0 days 00:00:00.012000
-            2    0 days 00:00:00.022000
-            3    0 days 00:00:00.343000
-            4    0 days 01:12:33.534000
-            5    0 days 00:07:15.342000
-            dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-                dtype="timedelta64[ns]",
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:00:00.000000001
-            1    0 days 00:00:00.000001132
-            2    0 days 00:00:00.023223231
-            3    0 days 00:00:00.000000233
-            4              0 days 00:00:00
-            5    0 days 00:00:00.000000332
-            6    0 days 00:00:00.000000323
-            dtype: timedelta64[ns]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-                dtype="timedelta64[ms]",
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 00:00:00.001000
-            1    0 days 00:00:01.132000
-            2    0 days 06:27:03.231000
-            3    0 days 00:00:00.233000
-            4           0 days 00:00:00
-            5    0 days 00:00:00.332000
-            6    0 days 00:00:00.323000
-            dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [
-                    13645765432432,
-                    134736784,
-                    245345345,
-                    223432411,
-                    999992343241,
-                    3634548734,
-                    23234,
-                ],
-                dtype="timedelta64[ms]",
-            ),
-            textwrap.dedent(
-                """
-            0    157937 days 02:23:52.432000
-            1         1 days 13:25:36.784000
-            2         2 days 20:09:05.345000
-            3         2 days 14:03:52.411000
-            4     11573 days 23:39:03.241000
-            5        42 days 01:35:48.734000
-            6         0 days 00:00:23.234000
-            dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [
-                    13645765432432,
-                    134736784,
-                    245345345,
-                    223432411,
-                    999992343241,
-                    3634548734,
-                    23234,
-                ],
-                dtype="timedelta64[ns]",
-            ),
-            textwrap.dedent(
-                """
-            0    0 days 03:47:25.765432432
-            1    0 days 00:00:00.134736784
-            2    0 days 00:00:00.245345345
-            3    0 days 00:00:00.223432411
-            4    0 days 00:16:39.992343241
-            5    0 days 00:00:03.634548734
-            6    0 days 00:00:00.000023234
-            dtype: timedelta64[ns]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [
-                    13645765432432,
-                    134736784,
-                    245345345,
-                    223432411,
-                    999992343241,
-                    3634548734,
-                    23234,
-                ],
-                dtype="timedelta64[ms]",
-                name="abc",
-            ),
-            textwrap.dedent(
-                """
-            0    157937 days 02:23:52.432000
-            1         1 days 13:25:36.784000
-            2         2 days 20:09:05.345000
-            3         2 days 14:03:52.411000
-            4     11573 days 23:39:03.241000
-            5        42 days 01:35:48.734000
-            6         0 days 00:00:23.234000
-            Name: abc, dtype: timedelta64[ms]
-            """
-            ),
-        ),
-        (
-            lambda: cudf.Series(
-                [
-                    13645765432432,
-                    134736784,
-                    245345345,
-                    223432411,
-                    999992343241,
-                    3634548734,
-                    23234,
-                ],
-                dtype="timedelta64[ns]",
-                index=["a", "b", "z", "x", "y", "l", "m"],
-                name="hello",
-            ),
-            textwrap.dedent(
-                """
-            a    0 days 03:47:25.765432432
-            b    0 days 00:00:00.134736784
-            z    0 days 00:00:00.245345345
-            x    0 days 00:00:00.223432411
-            y    0 days 00:16:39.992343241
-            l    0 days 00:00:03.634548734
-            m    0 days 00:00:00.000023234
-            Name: hello, dtype: timedelta64[ns]
-            """
-            ),
-        ),
-    ],
-)
-def test_timedelta_series_ns_ms_repr(ser, expected_repr):
-    expected = expected_repr
-    actual = repr(ser())
-
-    assert expected.split() == actual.split()
-
-
-@pytest.mark.parametrize(
-    "df,expected_repr",
-    [
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [1000000, 200000, 3000000], dtype="timedelta64[s]"
-                    )
-                }
-            ),
-            textwrap.dedent(
-                """
-                                  a
-                0  11 days 13:46:40
-                1   2 days 07:33:20
-                2  34 days 17:20:00
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [
-                            136457654,
-                            None,
-                            245345345,
-                            223432411,
-                            None,
-                            3634548734,
-                            23234,
-                        ],
-                        dtype="timedelta64[s]",
-                    ),
-                    "b": [10, 11, 22, 33, 44, 55, 66],
-                }
-            ),
-            textwrap.dedent(
-                """
-                                     a   b
-                0   1579 days 08:54:14  10
-                1                  NaT  11
-                2   2839 days 15:29:05  22
-                3   2586 days 00:33:31  33
-                4                  NaT  44
-                5  42066 days 12:52:14  55
-                6      0 days 06:27:14  66
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [
-                            136457654,
-                            None,
-                            245345345,
-                            223432411,
-                            None,
-                            3634548734,
-                            23234,
-                        ],
-                        dtype="timedelta64[s]",
-                        index=["a", "b", "c", "d", "e", "f", "g"],
-                    )
-                }
-            ),
-            textwrap.dedent(
-                """
-                                     a
-                a   1579 days 08:54:14
-                b                  NaT
-                c   2839 days 15:29:05
-                d   2586 days 00:33:31
-                e                  NaT
-                f  42066 days 12:52:14
-                g      0 days 06:27:14
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [1, 2, 3, 4, 5, 6, 7],
-                        index=cudf.Index(
-                            [
-                                136457654,
-                                None,
-                                245345345,
-                                223432411,
-                                None,
-                                3634548734,
-                                23234,
-                            ],
-                            dtype="timedelta64[ms]",
-                        ),
-                    )
-                }
-            ),
-            textwrap.dedent(
-                """
-                                      a
-                1 days 13:54:17.654   1
-                NaT                   2
-                2 days 20:09:05.345   3
-                2 days 14:03:52.411   4
-                NaT                   5
-                42 days 01:35:48.734  6
-                0 days 00:00:23.234   7
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        ["a", "f", "q", "e", "w", "e", "t"],
-                        index=cudf.Index(
-                            [
-                                136457654,
-                                None,
-                                245345345,
-                                223432411,
-                                None,
-                                3634548734,
-                                23234,
-                            ],
-                            dtype="timedelta64[ns]",
-                        ),
-                    )
-                }
-            ),
-            textwrap.dedent(
-                """
-                                    a
-                0 days 00:00:00.136457654  a
-                NaT                 f
-                0 days 00:00:00.245345345  q
-                0 days 00:00:00.223432411  e
-                NaT                 w
-                0 days 00:00:03.634548734  e
-                0 days 00:00:00.000023234  t
-                """
-            ),
-        ),
-    ],
-)
-def test_timedelta_dataframe_repr(df, expected_repr):
-    actual_repr = repr(df())
-
-    assert actual_repr.split() == expected_repr.split()
-
-
-@pytest.mark.parametrize(
-    "index, expected_repr",
-    [
-        (
-            lambda: cudf.Index(
-                [1000000, 200000, 3000000], dtype="timedelta64[ms]"
-            ),
-            "TimedeltaIndex(['0 days 00:16:40', "
-            "'0 days 00:03:20', '0 days 00:50:00'], "
-            "dtype='timedelta64[ms]')",
-        ),
-        (
-            lambda: cudf.Index(
-                [None, None, None, None, None], dtype="timedelta64[us]"
-            ),
-            "TimedeltaIndex([NaT, NaT, NaT, NaT, NaT], "
-            "dtype='timedelta64[us]')",
-        ),
-        (
-            lambda: cudf.Index(
-                [
-                    136457654,
-                    None,
-                    245345345,
-                    223432411,
-                    None,
-                    3634548734,
-                    23234,
-                ],
-                dtype="timedelta64[us]",
-            ),
-            "TimedeltaIndex([0 days 00:02:16.457654, NaT, "
-            "0 days 00:04:05.345345, "
-            "0 days 00:03:43.432411, NaT,"
-            "       0 days 01:00:34.548734, 0 days 00:00:00.023234],"
-            "      dtype='timedelta64[us]')",
-        ),
-        (
-            lambda: cudf.Index(
-                [
-                    136457654,
-                    None,
-                    245345345,
-                    223432411,
-                    None,
-                    3634548734,
-                    23234,
-                ],
-                dtype="timedelta64[s]",
-            ),
-            "TimedeltaIndex([1579 days 08:54:14, NaT, 2839 days 15:29:05,"
-            "       2586 days 00:33:31, NaT, 42066 days 12:52:14, "
-            "0 days 06:27:14],"
-            "      dtype='timedelta64[s]')",
-        ),
-    ],
-)
-def test_timedelta_index_repr(index, expected_repr):
-    actual_repr = repr(index())
-
-    assert actual_repr.split() == expected_repr.split()
-
-
-@pytest.mark.parametrize(
-    "pmi",
-    [
-        pd.MultiIndex.from_tuples(
-            [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")]
-        ),
-        pd.MultiIndex.from_tuples(
-            [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")] * 10
-        ),
-        pd.MultiIndex.from_tuples([(1, "red", 102, "sdf")]),
-        pd.MultiIndex.from_tuples(
-            [
-                ("abc", 0.234, 1),
-                ("a", -0.34, 0),
-                ("ai", 111, 4385798),
-                ("rapids", 0, 34534534),
-            ],
-            names=["alphabets", "floats", "ints"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("max_seq_items", [None, 1, 2, 5, 10, 100])
-def test_multiindex_repr(pmi, max_seq_items):
-    with pd.option_context("display.max_seq_items", max_seq_items):
-        gmi = cudf.from_pandas(pmi)
-
-        assert repr(gmi) == repr(pmi)
-
-
-@pytest.mark.parametrize(
-    "gdi, expected_repr",
-    [
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": [None, 1, 2, 3],
-                    "b": ["abc", None, "xyz", None],
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["a", "b"])
-            .index,
-            textwrap.dedent(
-                """
-                MultiIndex([(<NA>, 'abc'),
-                            (   1,  <NA>),
-                            (   2, 'xyz'),
-                            (   3,  <NA>)],
-                        names=['a', 'b'])
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series([None, np.nan, 2, 3], nan_as_null=False),
-                    "b": ["abc", None, "xyz", None],
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["a", "b"])
-            .index,
-            textwrap.dedent(
-                """
-            MultiIndex([(<NA>, 'abc'),
-                        ( nan,  <NA>),
-                        ( 2.0, 'xyz'),
-                        ( 3.0,  <NA>)],
-                    names=['a', 'b'])
-            """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series([None, 1, 2, 3], dtype="datetime64[ns]"),
-                    "b": ["abc", None, "xyz", None],
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["a", "b"])
-            .index,
-            textwrap.dedent(
-                """
-            MultiIndex([(                          'NaT', 'abc'),
-                        ('1970-01-01 00:00:00.000000001',  <NA>),
-                        ('1970-01-01 00:00:00.000000002', 'xyz'),
-                        ('1970-01-01 00:00:00.000000003',  <NA>)],
-                    names=['a', 'b'])
-            """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series([None, 1, 2, 3], dtype="datetime64[ns]"),
-                    "b": ["abc", None, "xyz", None],
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["a", "b", "c"])
-            .index,
-            textwrap.dedent(
-                """
-                MultiIndex([(                          'NaT', 'abc', 0.345),
-                            ('1970-01-01 00:00:00.000000001',  <NA>,  <NA>),
-                            ('1970-01-01 00:00:00.000000002', 'xyz', 100.0),
-                            ('1970-01-01 00:00:00.000000003',  <NA>,  10.0)],
-                        names=['a', 'b', 'c'])
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": ["abc", None, "xyz", None],
-                    "b": cudf.Series([None, 1, 2, 3], dtype="timedelta64[ns]"),
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["a", "b", "c"])
-            .index,
-            textwrap.dedent(
-                """
-                MultiIndex([('abc',                         NaT, 0.345),
-                            ( <NA>, '0 days 00:00:00.000000001',  <NA>),
-                            ('xyz', '0 days 00:00:00.000000002', 100.0),
-                            ( <NA>, '0 days 00:00:00.000000003',  10.0)],
-                        names=['a', 'b', 'c'])
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": ["abc", None, "xyz", None],
-                    "b": cudf.Series([None, 1, 2, 3], dtype="timedelta64[ns]"),
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["c", "a"])
-            .index,
-            textwrap.dedent(
-                """
-                MultiIndex([(0.345, 'abc'),
-                            ( <NA>,  <NA>),
-                            (100.0, 'xyz'),
-                            ( 10.0,  <NA>)],
-                        names=['c', 'a'])
-                """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": [None, None, None, None],
-                    "b": cudf.Series(
-                        [None, None, None, None], dtype="timedelta64[ns]"
-                    ),
-                    "c": [0.345, np.nan, 100, 10],
-                }
-            )
-            .set_index(["b", "a"])
-            .index,
-            textwrap.dedent(
-                """
-            MultiIndex([(NaT, <NA>),
-                        (NaT, <NA>),
-                        (NaT, <NA>),
-                        (NaT, <NA>)],
-                    names=['b', 'a'])
-            """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": [1, 2, None, 3, 5],
-                    "b": [
-                        "abc",
-                        "def, hi, bye",
-                        None,
-                        ", one, two, three, four",
-                        None,
-                    ],
-                    "c": cudf.Series(
-                        [0.3232, np.nan, 1, None, -0.34534], nan_as_null=False
-                    ),
-                    "d": [None, 100, 2000324, None, None],
-                }
-            )
-            .set_index(["a", "b", "c", "d"])
-            .index,
-            textwrap.dedent(
-                """
-    MultiIndex([(   1,                     'abc',   0.3232,    <NA>),
-                (   2,            'def, hi, bye',      nan,     100),
-                (<NA>,                      <NA>,      1.0, 2000324),
-                (   3, ', one, two, three, four',     <NA>,    <NA>),
-                (   5,                      <NA>, -0.34534,    <NA>)],
-            names=['a', 'b', 'c', 'd'])
-    """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": [1, 2, None, 3, 5],
-                    "b": [
-                        "abc",
-                        "def, hi, bye",
-                        None,
-                        ", one, two, three, four",
-                        None,
-                    ],
-                    "c": cudf.Series(
-                        [0.3232, np.nan, 1, None, -0.34534], nan_as_null=False
-                    ),
-                    "d": [None, 100, 2000324, None, None],
-                }
-            )
-            .set_index(["b", "a", "c", "d"])
-            .index,
-            textwrap.dedent(
-                """
-    MultiIndex([(                    'abc',    1,   0.3232,    <NA>),
-                (           'def, hi, bye',    2,      nan,     100),
-                (                     <NA>, <NA>,      1.0, 2000324),
-                (', one, two, three, four',    3,     <NA>,    <NA>),
-                (                     <NA>,    5, -0.34534,    <NA>)],
-            names=['b', 'a', 'c', 'd'])
-    """
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": ["(abc", "2", None, "3", "5"],
-                    "b": [
-                        "abc",
-                        "def, hi, bye",
-                        None,
-                        ", one, two, three, four",
-                        None,
-                    ],
-                    "c": cudf.Series(
-                        [0.3232, np.nan, 1, None, -0.34534], nan_as_null=False
-                    ),
-                    "d": [None, 100, 2000324, None, None],
-                }
-            )
-            .set_index(["a", "b", "c", "d"])
-            .index,
-            textwrap.dedent(
-                """
-    MultiIndex([('(abc',                     'abc',   0.3232,    <NA>),
-                (   '2',            'def, hi, bye',      nan,     100),
-                (  <NA>,                      <NA>,      1.0, 2000324),
-                (   '3', ', one, two, three, four',     <NA>,    <NA>),
-                (   '5',                      <NA>, -0.34534,    <NA>)],
-            names=['a', 'b', 'c', 'd'])
-    """
-            ),
-        ),
-    ],
-)
-def test_multiindex_null_repr(gdi, expected_repr):
-    actual_repr = repr(gdi())
-
-    assert actual_repr.split() == expected_repr.split()
-
-
-def test_categorical_series_with_nan_repr():
-    series = cudf.Series(
-        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
-    ).astype("category")
-
-    expected_repr = textwrap.dedent(
-        """
-    0     1.0
-    1     2.0
-    2     NaN
-    3    10.0
-    4     NaN
-    5    <NA>
-    dtype: category
-    Categories (4, float64): [1.0, 2.0, 10.0, NaN]
-    """
-    )
-
-    assert repr(series).split() == expected_repr.split()
-
-    sliced_expected_repr = textwrap.dedent(
-        """
-        2     NaN
-        3    10.0
-        4     NaN
-        5    <NA>
-        dtype: category
-        Categories (4, float64): [1.0, 2.0, 10.0, NaN]
-        """
-    )
-
-    assert repr(series[2:]).split() == sliced_expected_repr.split()
-
-
-def test_categorical_dataframe_with_nan_repr():
-    series = cudf.Series(
-        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
-    ).astype("category")
-    df = cudf.DataFrame({"a": series})
-    expected_repr = textwrap.dedent(
-        """
-          a
-    0   1.0
-    1   2.0
-    2   NaN
-    3  10.0
-    4   NaN
-    5  <NA>
-    """
-    )
-
-    assert repr(df).split() == expected_repr.split()
-
-
-def test_categorical_index_with_nan_repr():
-    cat_index = cudf.Index(
-        cudf.Series(
-            [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
-        ).astype("category")
-    )
-
-    expected_repr = (
-        "CategoricalIndex([1.0, 2.0, NaN, 10.0, NaN, <NA>], "
-        "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')"
-    )
-
-    assert repr(cat_index) == expected_repr
-
-    sliced_expected_repr = (
-        "CategoricalIndex([NaN, 10.0, NaN, <NA>], "
-        "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')"
-    )
-
-    assert repr(cat_index[2:]) == sliced_expected_repr
-
-
-def test_empty_series_name():
-    ps = pd.Series([], name="abc", dtype="int")
-    gs = cudf.from_pandas(ps)
-
-    assert repr(ps) == repr(gs)
-
-
-def test_repr_struct_after_concat():
-    df = cudf.DataFrame(
-        {
-            "a": cudf.Series(
-                [
-                    {"sa": 2056831253},
-                    {"sa": -1463792165},
-                    {"sa": 1735783038},
-                    {"sa": 103774433},
-                    {"sa": -1413247520},
-                ]
-                * 13
-            ),
-            "b": cudf.Series(
-                [
-                    {"sa": {"ssa": 1140062029}},
-                    None,
-                    {"sa": {"ssa": 1998862860}},
-                    {"sa": None},
-                    {"sa": {"ssa": -395088502}},
-                ]
-                * 13
-            ),
-        }
-    )
-    pdf = df.to_pandas()
-
-    assert repr(df) == repr(pdf)
-
-
-def test_interval_index_repr():
-    pi = pd.Index(
-        [
-            np.nan,
-            pd.Interval(2.0, 3.0, closed="right"),
-            pd.Interval(3.0, 4.0, closed="right"),
-        ]
-    )
-    gi = cudf.from_pandas(pi)
-
-    assert repr(pi) == repr(gi)
-
-
-def test_unique_categories_repr():
-    pi = pd.CategoricalIndex(range(10_000))
-    gi = cudf.CategoricalIndex(range(10_000))
-    expected_repr = repr(pi)
-    actual_repr = repr(gi)
-    assert expected_repr == actual_repr
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_categorical_index_ordered(ordered):
-    pi = pd.CategoricalIndex(range(10), ordered=ordered)
-    gi = cudf.CategoricalIndex(range(10), ordered=ordered)
-
-    assert repr(pi) == repr(gi)

From 1bf6ba289a4229a924589a2efba83c8bd157dcff Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 13 Aug 2025 15:55:37 -0700
Subject: [PATCH 125/366] Run pylibcudf tests without its optional dependencies
 (#19657)

This PR changes cudf CI to run pylibcudf tests twice, once with optionals installed and once without. I'd love to be able to test without pyarrow for our interop bits, but I don't think it's realistic given how heavily we rely on pyarrow in the test suite.

Resolves #18201

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19657
---
 ci/test_wheel_cudf.sh           | 36 ++++++++++++++++++++++++++-------
 dependencies.yaml               | 27 ++++++++++++++++++++++---
 python/pylibcudf/pyproject.toml |  7 ++++++-
 3 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh
index e28ac0514a7..9cbd237511d 100755
--- a/ci/test_wheel_cudf.sh
+++ b/ci/test_wheel_cudf.sh
@@ -12,11 +12,38 @@ CUDF_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-do
 LIBCUDF_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 PYLIBCUDF_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github python)
 
-rapids-logger "Install cudf, pylibcudf, and test requirements"
+rapids-logger "Install pylibcudf and its basic dependencies in a virtual environment"
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints py_test_cudf ./constraints.txt
 
+RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
+# To test pylibcudf without its optional dependencies, we create a virtual environment
+python -m venv env
+. env/bin/activate
+rapids-pip-retry install \
+    -v \
+    --constraint ./constraints.txt \
+    --constraint "${PIP_CONSTRAINT}" \
+    "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
+    "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
+
+rapids-logger "pytest pylibcudf without optional dependencies"
+pushd python/pylibcudf/tests
+python -m pytest \
+  --cache-clear \
+  --numprocesses=8 \
+  --dist=worksteal \
+  .
+popd
+
+deactivate
+
+rapids-logger "Install cudf, pylibcudf, and test requirements"
+
 # notes:
 #
 #   * echo to expand wildcard before adding `[test]` requires for pip
@@ -29,12 +56,7 @@ rapids-pip-retry install \
     --constraint "${PIP_CONSTRAINT}" \
     "$(echo "${CUDF_WHEELHOUSE}"/cudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \
     "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
-    "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
-
-RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
-mkdir -p "${RAPIDS_TESTS_DIR}"
-
+    "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test, pyarrow, numpy]"
 
 rapids-logger "pytest pylibcudf"
 pushd python/pylibcudf/tests
diff --git a/dependencies.yaml b/dependencies.yaml
index 504a2b81f96..37883b03b4f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -23,6 +23,7 @@ files:
       - docs
       - iwyu
       - notebooks
+      - numpy_run
       - py_version
       - pyarrow_run
       - rapids_build_skbuild
@@ -69,6 +70,7 @@ files:
       - test_python_common
       - test_python_cudf_common
       - test_python_cudf
+      - numpy_run
       - test_python_pylibcudf
       - depends_on_cudf
       - depends_on_pylibcudf
@@ -77,6 +79,7 @@ files:
     output: none
     includes:
       - cuda_version
+      - numpy_run
       - py_version
       - test_python_common
       - test_python_pylibcudf
@@ -86,6 +89,7 @@ files:
     output: none
     includes:
       - cuda_version
+      - numpy_run
       - py_version
       - test_python_common
       - test_python_cudf_common
@@ -169,6 +173,7 @@ files:
     extras:
       table: project
     includes:
+      - numpy_run
       - run_common
       - run_cudf
       - pyarrow_run
@@ -246,6 +251,14 @@ files:
       - depends_on_libcudf
       - depends_on_rmm
       - run_pylibcudf
+  py_run_pylibcudf_numpy:
+    output: pyproject
+    pyproject_dir: python/pylibcudf
+    extras:
+      table: project.optional-dependencies
+      key: numpy
+    includes:
+      - numpy_run
   py_run_pylibcudf_pyarrow:
     output: pyproject
     pyproject_dir: python/pylibcudf
@@ -261,6 +274,7 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
+      - depends_on_cupy
       - pyarrow_run
       - test_python_common
       - test_python_cudf_common
@@ -311,6 +325,7 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
+      - numpy_run
       - test_python_common
       - test_python_cudf_polars
   py_build_dask_cudf:
@@ -326,6 +341,7 @@ files:
     extras:
       table: project
     includes:
+      - numpy_run
       - run_common
       - run_dask_cudf
       - depends_on_cudf
@@ -499,6 +515,11 @@ dependencies:
           - matrix: # Fallback for no matrix
             packages:
               - *numba_cuda_cu12
+  numpy_run:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - numpy>=1.23,<3.0a0
   pyarrow_run:
     common:
       - output_types: [conda]
@@ -642,7 +663,6 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - fsspec>=0.6.0
-          - &numpy numpy>=1.23,<3.0a0
           - pandas>=2.0,<2.4.0dev0
   run_pylibcudf:
     common:
@@ -815,14 +835,16 @@ dependencies:
           - mmh3
           - nanoarrow
           - hypothesis>=6.131.7
-          - *numpy
+          - *numba
           - pandas
       - output_types: conda
         packages:
           - python-xxhash
+          - *numba_cuda
       - output_types: [pyproject, requirements]
         packages:
           - xxhash
+          - *numba_cuda_cu12
   test_python_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -888,7 +910,6 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - dask-cuda==25.10.*,>=0.0.0a0
-          - *numpy
           - rich
   test_python_narwhals:
     common:
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 0c7f89111e3..081d4e54a4c 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -39,11 +39,13 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
+    "cupy-cuda12x>=12.0.0",
     "fastavro>=0.22.9",
     "hypothesis>=6.131.7",
     "mmh3",
     "nanoarrow",
-    "numpy>=1.23,<3.0a0",
+    "numba-cuda[cu12]>=0.18.0,<0.19.0a0",
+    "numba>=0.60.0,<0.62.0a0",
     "pandas",
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
     "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
@@ -56,6 +58,9 @@ pyarrow = [
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
     "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+numpy = [
+    "numpy>=1.23,<3.0a0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
 Homepage = "https://github.com/rapidsai/cudf"

From f9fcc122dbcd0057043c879b19ec65be5f4e6856 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 14 Aug 2025 13:23:06 -0400
Subject: [PATCH 126/366] Support output-type for MEDIAN/QUANTILE aggregation
 in cudf::reduce (#19267)

Adds support to return the aggregation result for MEDIAN and QUANTILE as specified in the `output_type` parameter in the `cudf::reduce` API.
The underlying code calls `cudf::detail::quantile` which has an `exact` parameter that returns a `double` type if set to true. Otherwise, it will return a value using the input type.

The code change here is to set `exact` to true only if the `output_type` for `cudf::reduce` is set to `FLOAT64`
This means the current behavior can be achieved by always setting the `output_type` to `FLOAT64`.

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19267
---
 cpp/CMakeLists.txt                            |   1 +
 cpp/include/cudf/reduction.hpp                |  80 +++++------
 .../reduction/detail/reduction_functions.hpp  |  22 +++
 cpp/src/reductions/quantile.cu                |  58 ++++++++
 cpp/src/reductions/reductions.cpp             |  45 ++----
 cpp/tests/reductions/reduction_tests.cpp      | 132 +++++++++++-------
 .../cudf_polars/tests/expressions/test_agg.py |  21 +--
 7 files changed, 218 insertions(+), 141 deletions(-)
 create mode 100644 cpp/src/reductions/quantile.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 50b96a9baf4..6b9601856c0 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -638,6 +638,7 @@ add_library(
   src/reductions/nth_element.cu
   src/reductions/nunique.cu
   src/reductions/product.cu
+  src/reductions/quantile.cu
   src/reductions/reductions.cpp
   src/reductions/scan/rank_scan.cu
   src/reductions/scan/ewm.cu
diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp
index a96bde9cefd..e4150f4153a 100644
--- a/cpp/include/cudf/reduction.hpp
+++ b/cpp/include/cudf/reduction.hpp
@@ -35,13 +35,14 @@ namespace CUDF_EXPORT cudf {
  */
 enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
 
+// clang-format off
 /**
  * @brief  Computes the reduction of the values in all rows of a column.
  *
- * This function does not detect overflows in reductions. When `output_dtype`
+ * This function does not detect overflows in reductions. When `output_type`
  * does not match the `col.type()`, their values may be promoted to
  * `int64_t` or `double` for computing aggregations and then cast to
- * `output_dtype` before returning.
+ * `output_type` before returning.
  *
  * Only `min` and `max` ops are supported for reduction of non-arithmetic
  * types (e.g. timestamp or string).
@@ -49,46 +50,44 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
  * Any null values are skipped for the operation.
  * If the reduction fails, the output scalar returns with `%is_valid()==false`.
  *
- * For empty or all-null input, the result is generally a null scalar except for specific
+ * For empty or all-null input, the result is generally an invalid scalar except for specific
  * aggregations where the aggregation has a well-defined output.
  *
- * If the input column is an arithmetic type, the `output_dtype` can be any arithmetic
+ * If the input column is an arithmetic type, the `output_type` can be any arithmetic
  * type. If the input column is a non-arithmetic type (e.g. timestamp or string)
- * the `output_dtype` must match the `col.type()`. If the reduction type is `any` or
- * `all`, the `output_dtype` must be type BOOL8.
+ * the `output_type` must match the `col.type()`. If the reduction type is `any` or
+ * `all`, the `output_type` must be type BOOL8.
  *
  * | Aggregation | Output Type | Init Value | Empty Input | Comments |
- * | :---------: | ----------- | ---------- | ----------- | -------- |
- * | SUM/PRODUCT | output_dtype | yes | NA | Input accumulated into output_dtype variable |
- * | SUM_OF_SQUARES | output_dtype | no | NA | Input accumulated into output_dtype variable |
- * | MIN/MAX | col.type | yes | NA | Supports arithmetic, timestamp, duration, string |
+ * | :---------: | ----------- | :--------: | ----------- | -------- |
+ * | SUM/PRODUCT | output_type | yes | NA | Input accumulated into output_type variable |
+ * | SUM_OF_SQUARES | output_type | no | NA | Input accumulated into output_type variable |
+ * | MIN/MAX | col.type | yes | NA | Supports arithmetic, timestamp, duration, string types only |
  * | ANY/ALL | BOOL8 | yes | True for ALL only | Checks for non-zero elements |
- * | MEAN/VARIANCE/STD | FLOAT32/FLOAT64 | no | NA | output_dtype must be a float type |
- * | MEDIAN/QUANTILE | FLOAT64 | no | NA |  |
- * | NUNIQUE | output_dtype | no | 1 if all-nulls | May process null rows |
+ * | MEAN/VARIANCE/STD | FLOAT32/FLOAT64 | no | NA | output_type must be a float type |
+ * | MEDIAN/QUANTILE | output_type | no | NA | Exact value if output_type is FLOAT64. See @ref cudf::quantile |
+ * | NUNIQUE | output_type | no | 1 if all-nulls | May process null rows |
  * | NTH_ELEMENT | col.type | no | NA |  |
  * | BITWISE_AGG | col.type | no | NA | Supports only integral types |
- * | HISTOGRAM/MERGE_HISTOGRAM | LIST of col.type | no | empty list |  |
- * | COLLECT_LIST/COLLECT_SET | LIST of col.type | no | empty list |  |
- * | TDIGEST/MERGE_TDIGEST | STRUCT | no | empty struct | tdigest scalar is returned |
- * | HOST_UDF | output_dtype | yes | NA | Custom UDF could ignore output_dtype |
+ * | HISTOGRAM/MERGE_HISTOGRAM | LIST of col.type | no | empty list returned |  |
+ * | COLLECT_LIST/COLLECT_SET | LIST of col.type | no | empty list returned |  |
+ * | TDIGEST/MERGE_TDIGEST | STRUCT | no | empty struct returned | tdigest scalar is returned |
+ * | HOST_UDF | output_type | yes | NA | Custom UDF could ignore output_type |
  *
  * The NA in the table indicates an output scalar with `%is_valid()==false`
  *
- * @throw cudf::logic_error if reduction is called for non-arithmetic output
+ * @throw std::invalid_argument if reduction is called for non-arithmetic output
  * type and operator other than `min` and `max`.
- * @throw cudf::logic_error if input column data type is not convertible to
- * `output_dtype`.
- * @throw cudf::logic_error if `min` or `max` reduction is called and the
+ * @throw std::invalid_argument if input column data type is not convertible to `output_type`.
+ * @throw std::invalid_argument if `min` or `max` reduction is called and the
  * output type does not match the input column data type.
- * @throw cudf::logic_error if `any` or `all` reduction is called and the
- * output type is not BOOL8.
- * @throw cudf::logic_error if `mean`, `var`, or `std` reduction is called and
- * the `output_dtype` is not floating point.
+ * @throw std::invalid_argument if `any` or `all` reduction is called and the output type is not BOOL8.
+ * @throw std::invalid_argument if `mean`, `var`, or `std` reduction is called and
+ * the `output_type` is not floating point.
  *
  * @param col Input column view
  * @param agg Aggregation operator applied by the reduction
- * @param output_dtype The output scalar type
+ * @param output_type The output scalar type
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @returns Output scalar with reduce result
@@ -96,9 +95,10 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
 std::unique_ptr<scalar> reduce(
   column_view const& col,
   reduce_aggregation const& agg,
-  data_type output_dtype,
+  data_type output_type,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+// clang-format on
 
 /**
  * @brief  Computes the reduction of the values in all rows of a column with an initial value
@@ -108,12 +108,12 @@ std::unique_ptr<scalar> reduce(
  * @see cudf::reduce(column_view const&,reduce_aggregation
  * const&,data_type,rmm::cuda_stream_view,rmm::device_async_resource_ref) for more details
  *
- * @throw cudf::logic_error if reduction is not `sum`, `product`, `min`, `max`, `any`, or `all`
+ * @throw std::invalid_argument if reduction is not `sum`, `product`, `min`, `max`, `any`, or `all`
  * and `init` is specified.
  *
  * @param col Input column view
  * @param agg Aggregation operator applied by the reduction
- * @param output_dtype The output scalar type
+ * @param output_type The output scalar type
  * @param init The initial value of the reduction
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
@@ -122,7 +122,7 @@ std::unique_ptr<scalar> reduce(
 std::unique_ptr<scalar> reduce(
   column_view const& col,
   reduce_aggregation const& agg,
-  data_type output_dtype,
+  data_type output_type,
   std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -130,10 +130,10 @@ std::unique_ptr<scalar> reduce(
 /**
  * @brief  Compute reduction of each segment in the input column
  *
- * This function does not detect overflows in reductions. When `output_dtype`
+ * This function does not detect overflows in reductions. When `output_type`
  * does not match the `segmented_values.type()`, their values may be promoted to
  * `int64_t` or `double` for computing aggregations and then cast to
- * `output_dtype` before returning.
+ * `output_type` before returning.
  *
  * Null values are treated as identities during reduction.
  *
@@ -143,7 +143,7 @@ std::unique_ptr<scalar> reduce(
  * If any index in `offsets` is out of bound of `segmented_values`, the behavior
  * is undefined.
  *
- * If the input column has arithmetic type, `output_dtype` can be any arithmetic
+ * If the input column has arithmetic type, `output_type` can be any arithmetic
  * type. If the input column has non-arithmetic type, e.g. timestamp, the same
  * output type must be specified.
  *
@@ -152,17 +152,17 @@ std::unique_ptr<scalar> reduce(
  * @throw cudf::logic_error if reduction is called for non-arithmetic output
  * type and operator other than `min` and `max`.
  * @throw cudf::logic_error if input column data type is not convertible to
- * `output_dtype` type.
+ * `output_type` type.
  * @throw cudf::logic_error if `min` or `max` reduction is called and the
- * `output_dtype` does not match the input column data type.
+ * `output_type` does not match the input column data type.
  * @throw cudf::logic_error if `any` or `all` reduction is called and the
- * `output_dtype` is not BOOL8.
+ * `output_type` is not BOOL8.
  *
  * @param segmented_values Column view of segmented inputs
  * @param offsets Each segment's offset of `segmented_values`. A list of offsets with size
  * `num_segments + 1`. The size of `i`th segment is `offsets[i+1] - offsets[i]`.
  * @param agg Aggregation operator applied by the reduction
- * @param output_dtype  The output column type
+ * @param output_type  The output column type
  * @param null_handling If `INCLUDE`, the reduction is valid if all elements in a segment are valid,
  * otherwise null. If `EXCLUDE`, the reduction is valid if any element in the segment is valid,
  * otherwise null.
@@ -174,7 +174,7 @@ std::unique_ptr<column> segmented_reduce(
   column_view const& segmented_values,
   device_span<size_type const> offsets,
   segmented_reduce_aggregation const& agg,
-  data_type output_dtype,
+  data_type output_type,
   null_policy null_handling,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -187,7 +187,7 @@ std::unique_ptr<column> segmented_reduce(
  * @param offsets Each segment's offset of `segmented_values`. A list of offsets with size
  * `num_segments + 1`. The size of `i`th segment is `offsets[i+1] - offsets[i]`.
  * @param agg Aggregation operator applied by the reduction
- * @param output_dtype  The output column type
+ * @param output_type  The output column type
  * @param null_handling If `INCLUDE`, the reduction is valid if all elements in a segment are valid,
  * otherwise null. If `EXCLUDE`, the reduction is valid if any element in the segment is valid,
  * otherwise null.
@@ -200,7 +200,7 @@ std::unique_ptr<column> segmented_reduce(
   column_view const& segmented_values,
   device_span<size_type const> offsets,
   segmented_reduce_aggregation const& agg,
-  data_type output_dtype,
+  data_type output_type,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
index a90f43a3b54..910e3b9c2e3 100644
--- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
@@ -367,6 +367,28 @@ std::unique_ptr<scalar> bitwise_reduction(bitwise_op bit_op,
                                           rmm::cuda_stream_view stream,
                                           rmm::device_async_resource_ref mr);
 
+/**
+ * @brief Computes quantile value of the elements in the input column
+ *
+ * @see cudf::quantile for additional details
+ *
+ * @throw std::invalid_argument if the input column type is not an arithmetic type
+ *
+ * @param col Input column to compute quantile
+ * @param quantile_value Quantile value in range [0,1]
+ * @param interpolation Interpolation method
+ * @param output_type Data type of return type
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Quantile as scalar of type `output_type`
+ */
+std::unique_ptr<cudf::scalar> quantile(column_view const& col,
+                                       double quantile_value,
+                                       cudf::interpolation interpolation,
+                                       cudf::data_type const output_type,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::device_async_resource_ref mr);
+
 /**
  * @brief Computes the number of unique elements in the input column
  *
diff --git a/cpp/src/reductions/quantile.cu b/cpp/src/reductions/quantile.cu
new file mode 100644
index 00000000000..946ed377fe7
--- /dev/null
+++ b/cpp/src/reductions/quantile.cu
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simple.cuh"
+
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/quantiles.hpp>
+#include <cudf/detail/sorting.hpp>
+#include <cudf/reduction/detail/reduction_functions.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace reduction {
+namespace detail {
+
+std::unique_ptr<cudf::scalar> quantile(column_view const& col,
+                                       double quantile_value,
+                                       cudf::interpolation interpolation,
+                                       cudf::data_type const output_type,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::device_async_resource_ref mr)
+{
+  auto current_mr = cudf::get_current_device_resource_ref();
+  auto sorted_indices =
+    cudf::detail::sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr);
+  auto valid_sorted_indices =
+    cudf::detail::split(*sorted_indices, {col.size() - col.null_count()}, stream)[0];
+  // only perform an exact quantile calculation for output-type FLOAT64
+  // @see cudf::quantile for more details on this parameter
+  auto exact   = output_type.id() == cudf::type_id::FLOAT64;
+  auto col_ptr = cudf::detail::quantile(
+    col, {quantile_value}, interpolation, valid_sorted_indices, exact, stream, current_mr);
+  auto result = cudf::detail::get_element(*col_ptr, 0, stream, mr);
+  if (result->type().id() == output_type.id()) { return result; }
+  return cudf::type_dispatcher(output_type,
+                               cudf::reduction::simple::detail::cast_numeric_scalar_fn<double>{},
+                               static_cast<numeric_scalar<double>*>(result.get()),
+                               stream,
+                               mr);
+}
+}  // namespace detail
+}  // namespace reduction
+}  // namespace cudf
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 3dd0db63c0c..4c75cc312ba 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -67,33 +67,14 @@ std::unique_ptr<scalar> reduce_aggregate_impl(
       return standard_deviation(col, output_dtype, var_agg._ddof, stream, mr);
     }
     case aggregation::MEDIAN: {
-      auto current_mr = cudf::get_current_device_resource_ref();
-      auto sorted_indices =
-        cudf::detail::sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr);
-      auto valid_sorted_indices =
-        cudf::detail::split(*sorted_indices, {col.size() - col.null_count()}, stream)[0];
-      auto col_ptr = cudf::detail::quantile(
-        col, {0.5}, interpolation::LINEAR, valid_sorted_indices, true, stream, current_mr);
-      return cudf::detail::get_element(*col_ptr, 0, stream, mr);
+      return quantile(col, 0.5, interpolation::LINEAR, output_dtype, stream, mr);
     }
     case aggregation::QUANTILE: {
-      auto quantile_agg = static_cast<cudf::detail::quantile_aggregation const&>(agg);
-      CUDF_EXPECTS(quantile_agg._quantiles.size() == 1,
-                   "Reduction quantile accepts only one quantile value");
-      auto current_mr = cudf::get_current_device_resource_ref();
-      auto sorted_indices =
-        cudf::detail::sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr);
-      auto valid_sorted_indices =
-        cudf::detail::split(*sorted_indices, {col.size() - col.null_count()}, stream)[0];
-
-      auto col_ptr = cudf::detail::quantile(col,
-                                            quantile_agg._quantiles,
-                                            quantile_agg._interpolation,
-                                            valid_sorted_indices,
-                                            true,
-                                            stream,
-                                            current_mr);
-      return cudf::detail::get_element(*col_ptr, 0, stream, mr);
+      auto qagg = static_cast<cudf::detail::quantile_aggregation const&>(agg);
+      CUDF_EXPECTS(qagg._quantiles.size() == 1,
+                   "Reduction quantile accepts only one quantile value",
+                   std::invalid_argument);
+      return quantile(col, qagg._quantiles.front(), qagg._interpolation, output_dtype, stream, mr);
     }
     case aggregation::NUNIQUE: {
       auto nunique_agg = static_cast<cudf::detail::nunique_aggregation const&>(agg);
@@ -121,13 +102,15 @@ std::unique_ptr<scalar> reduce_aggregate_impl(
     }
     case aggregation::TDIGEST: {
       CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
-                   "Tdigest aggregations expect output type to be STRUCT");
+                   "Tdigest aggregations expect output type to be STRUCT",
+                   std::invalid_argument);
       auto td_agg = static_cast<cudf::detail::tdigest_aggregation const&>(agg);
       return tdigest::detail::reduce_tdigest(col, td_agg.max_centroids, stream, mr);
     }
     case aggregation::MERGE_TDIGEST: {
       CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
-                   "Tdigest aggregations expect output type to be STRUCT");
+                   "Tdigest aggregations expect output type to be STRUCT",
+                   std::invalid_argument);
       auto td_agg = static_cast<cudf::detail::merge_tdigest_aggregation const&>(agg);
       return tdigest::detail::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr);
     }
@@ -135,14 +118,15 @@ std::unique_ptr<scalar> reduce_aggregate_impl(
       auto const& udf_base_ptr =
         dynamic_cast<cudf::detail::host_udf_aggregation const&>(agg).udf_ptr;
       auto const udf_ptr = dynamic_cast<reduce_host_udf const*>(udf_base_ptr.get());
-      CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HOST_UDF instance for reduction.");
+      CUDF_EXPECTS(
+        udf_ptr != nullptr, "Invalid HOST_UDF instance for reduction.", std::invalid_argument);
       return (*udf_ptr)(col, output_dtype, init, stream, mr);
     }
     case aggregation::BITWISE_AGG: {
       auto const bitwise_agg = static_cast<cudf::detail::bitwise_aggregation const&>(agg);
       return bitwise_reduction(bitwise_agg.bit_op, col, stream, mr);
     }
-    default: CUDF_FAIL("Unsupported reduction operator");
+    default: CUDF_FAIL("Unsupported reduction operator", std::invalid_argument);
   }
 }
 
@@ -214,7 +198,8 @@ std::unique_ptr<scalar> reduce(column_view const& col,
                             agg.kind == aggregation::HOST_UDF)) {
     CUDF_FAIL(
       "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, ALL, and HOST_UDF "
-      "aggregation types");
+      "aggregation types",
+      std::invalid_argument);
   }
 
   // Returns default scalar if input column is empty or all null
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 42ffcf7d45a..ff0806d6dfa 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -1117,6 +1117,20 @@ TEST_F(ReductionEmptyTest, empty_column)
   result = cudf::reduce(col_nulls, *nunique_agg, size_data_type);
   EXPECT_EQ(result->is_valid(), true);
   EXPECT_EQ(dynamic_cast<cudf::numeric_scalar<cudf::size_type>*>(result.get())->value(), 1);
+
+  auto double_type = cudf::data_type{cudf::type_id::FLOAT64};
+  auto median_agg  = cudf::make_median_aggregation<cudf::reduce_aggregation>();
+  result           = cudf::reduce(col0, *median_agg, double_type);
+  EXPECT_EQ(result->is_valid(), false);
+  result = cudf::reduce(col_nulls, *median_agg, double_type);
+  EXPECT_EQ(result->is_valid(), false);
+
+  auto quantile_agg =
+    cudf::make_quantile_aggregation<cudf::reduce_aggregation>({0.0}, cudf::interpolation::LINEAR);
+  result = cudf::reduce(col0, *quantile_agg, double_type);
+  EXPECT_EQ(result->is_valid(), false);
+  result = cudf::reduce(col_nulls, *quantile_agg, double_type);
+  EXPECT_EQ(result->is_valid(), false);
 }
 
 TEST_F(ReductionEmptyTest, Errors)
@@ -1420,6 +1434,7 @@ TYPED_TEST(ReductionTest, Median)
   std::vector<int> int_values({6, -14, 13, 64, 0, -13, -20, 45});
   std::vector<bool> host_bools({true, true, true, false, true, true, true, true});
   std::vector<T> v = convert_values<T>(int_values);
+  auto output_type = cudf::data_type{cudf::type_to_id<double>()};
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
@@ -1428,10 +1443,11 @@ TYPED_TEST(ReductionTest, Median)
     if (std::is_signed_v<T>) return 3.0;
     return 13.5;
   }();
-  EXPECT_EQ(
-    this->template reduction_test<double>(col, *cudf::make_median_aggregation<reduce_aggregation>())
-      .first,
-    expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, *cudf::make_median_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            expected_value);
 
   auto col_odd              = cudf::split(col, {1})[1];
   double expected_value_odd = [] {
@@ -1441,7 +1457,7 @@ TYPED_TEST(ReductionTest, Median)
   }();
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_odd, *cudf::make_median_aggregation<reduce_aggregation>())
+                col_odd, *cudf::make_median_aggregation<reduce_aggregation>(), output_type)
               .first,
             expected_value_odd);
 
@@ -1455,7 +1471,7 @@ TYPED_TEST(ReductionTest, Median)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, *cudf::make_median_aggregation<reduce_aggregation>())
+                col_nulls, *cudf::make_median_aggregation<reduce_aggregation>(), output_type)
               .first,
             expected_null_value);
 
@@ -1467,7 +1483,7 @@ TYPED_TEST(ReductionTest, Median)
   }();
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls_odd, *cudf::make_median_aggregation<reduce_aggregation>())
+                col_nulls_odd, *cudf::make_median_aggregation<reduce_aggregation>(), output_type)
               .first,
             expected_null_value_odd);
 }
@@ -1480,37 +1496,42 @@ TYPED_TEST(ReductionTest, Quantile)
   std::vector<bool> host_bools({true, true, true, false, true, true, true, true});
   std::vector<T> v = convert_values<T>(int_values);
   cudf::interpolation interp{cudf::interpolation::LINEAR};
+  auto output_type = cudf::data_type{cudf::type_to_id<double>()};
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   double expected_value0 = std::is_same_v<T, bool> || std::is_unsigned_v<T> ? v[4] : v[6];
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col, *cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
-              .first,
-            expected_value0);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col, *cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp), output_type)
+      .first,
+    expected_value0);
 
   double expected_value1 = v[3];
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col, *cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
-              .first,
-            expected_value1);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col, *cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp), output_type)
+      .first,
+    expected_value1);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   double expected_null_value1                         = v[7];
 
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
-              .first,
-            expected_value0);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
-              .first,
-            expected_null_value1);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp), output_type)
+      .first,
+    expected_value0);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp), output_type)
+      .first,
+    expected_null_value1);
 }
 
 TYPED_TEST(ReductionTest, UniqueCount)
@@ -2576,20 +2597,22 @@ TYPED_TEST(DictionaryReductionTest, Median)
   using T = TypeParam;
   std::vector<int> int_values({6, -14, 13, 64, 0, -13, -20, 45});
   std::vector<T> v = convert_values<T>(int_values);
+  auto output_type = cudf::data_type{cudf::type_to_id<double>()};
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
-  EXPECT_EQ(
-    this->template reduction_test<double>(col, *cudf::make_median_aggregation<reduce_aggregation>())
-      .first,
-    (std::is_signed_v<T>) ? 3.0 : 13.5);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, *cudf::make_median_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            (std::is_signed_v<T>) ? 3.0 : 13.5);
 
   // test with nulls
   std::vector<bool> validity({true, true, true, false, true, true, true, true});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, *cudf::make_median_aggregation<reduce_aggregation>())
+                col_nulls, *cudf::make_median_aggregation<reduce_aggregation>(), output_type)
               .first,
             (std::is_signed_v<T>) ? 0.0 : 13.0);
 }
@@ -2600,35 +2623,40 @@ TYPED_TEST(DictionaryReductionTest, Quantile)
   std::vector<int> int_values({6, -14, 13, 64, 0, -13, -20, 45});
   std::vector<T> v = convert_values<T>(int_values);
   cudf::interpolation interp{cudf::interpolation::LINEAR};
+  auto output_type = cudf::data_type{cudf::type_to_id<double>()};
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
   double expected_value = std::is_same_v<T, bool> || std::is_unsigned_v<T> ? 0.0 : -20.0;
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col, *cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
-              .first,
-            expected_value);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col, *cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
-              .first,
-            64.0);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col, *cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp), output_type)
+      .first,
+    expected_value);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col, *cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp), output_type)
+      .first,
+    64.0);
 
   // test with nulls
   std::vector<bool> validity({true, true, true, false, true, true, true, true});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
-              .first,
-            expected_value);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
-              .first,
-            45.0);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp), output_type)
+      .first,
+    expected_value);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(
+        col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp), output_type)
+      .first,
+    45.0);
 }
 
 struct ListReductionTest : public cudf::test::BaseFixture {
diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py
index f3dacb57508..9bcd4bd5c4b 100644
--- a/python/cudf_polars/tests/expressions/test_agg.py
+++ b/python/cudf_polars/tests/expressions/test_agg.py
@@ -8,7 +8,6 @@
 
 from cudf_polars.dsl import expr
 from cudf_polars.testing.asserts import (
-    DEFAULT_BLOCKSIZE_MODE,
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
@@ -72,17 +71,7 @@ def df(dtype, with_nulls, is_sorted):
 def test_agg(df, agg):
     expr = getattr(pl.col("a"), agg)()
     q = df.select(expr)
-
-    # https://github.com/rapidsai/cudf/issues/15852
-    check_dtypes = agg not in {"median"}
-    if (
-        not check_dtypes
-        and q.collect_schema()["a"] != pl.Float64
-        and DEFAULT_BLOCKSIZE_MODE == "default"
-    ):
-        with pytest.raises(AssertionError):
-            assert_gpu_result_equal(q)
-    assert_gpu_result_equal(q, check_dtypes=check_dtypes, check_exact=False)
+    assert_gpu_result_equal(q, check_exact=False)
 
 
 def test_bool_agg(agg, request):
@@ -109,13 +98,7 @@ def test_cum_agg_reverse_unsupported(cum_agg):
 def test_quantile(df, q, interp):
     expr = pl.col("a").quantile(q, interp)
     q = df.select(expr)
-
-    # https://github.com/rapidsai/cudf/issues/15852
-    check_dtypes = q.collect_schema()["a"] == pl.Float64
-    if not check_dtypes:
-        with pytest.raises(AssertionError):
-            assert_gpu_result_equal(q)
-    assert_gpu_result_equal(q, check_dtypes=check_dtypes, check_exact=False)
+    assert_gpu_result_equal(q, check_exact=False)
 
 
 def test_quantile_invalid_q(df):

From 2063844cedfdf542d4e7b5e6ce083cefb4bd5ac6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 14 Aug 2025 10:47:25 -0700
Subject: [PATCH 127/366] Avoid cudf.pandas fallback for
 `pandas.array.NumpyExtensionArray` of strings (#19558)

This case was falling back when _sometimes_ this case can be faithfully represented in cuDF. The exception to _sometimes_ is when the data has missing values.

pandas strings, pre- and post- pandas 3.0, can have a varied amount of missing value sentinels (`None`, `np.nan`, `pd.NA`) depending if the pandas string type is `object` or `pd.StringDtype`. I don't anticipate it being practical to preserve which missing value sentinel(s) were specified in pandas while round-tripping though cuDF. Currently cuDF always `to_pandas` string data as `object` type with `None` as the missing value sentinel which isn't always correct (per the additions to `conftest-patch.py`). Discussed offline, it probably isn't worth falling back if strings with missing value could never be accelerated.

Additionally, defines `DataFrame.__iter__` on the proxy object to short circuit to returning `pandas.DataFrame.__iter__` to avoid the fallback logic

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19558
---
 python/cudf/cudf/core/column/column.py        |  6 +-
 python/cudf/cudf/pandas/_wrappers/pandas.py   |  1 +
 .../cudf/pandas/scripts/conftest-patch.py     | 93 +++++++++++++++++++
 .../cudf/tests/series/test_constructors.py    | 15 +++
 4 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index a2c21071bbe..c462278d2d4 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -3046,8 +3046,10 @@ def as_column(
             if (
                 cudf.get_option("mode.pandas_compatible")
                 and inferred_dtype == "mixed"
-                and not isinstance(
-                    pyarrow_array.type, (pa.ListType, pa.StructType)
+                and not (
+                    pa.types.is_list(pyarrow_array.type)
+                    or pa.types.is_struct(pyarrow_array.type)
+                    or pa.types.is_string(pyarrow_array.type)
                 )
             ):
                 raise MixedTypeError("Cannot create column with mixed types")
diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 492334c9416..95f559f4b6f 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -298,6 +298,7 @@ def _DataFrame__dtypes(self):
         "_accessors": set(),
         "_ipython_canary_method_should_not_exist_": ignore_ipython_canary_check,
         "dtypes": property(_DataFrame__dtypes),
+        "__iter__": custom_iter,
     },
 )
 
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index c9795a643a9..3869116496e 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -104,6 +104,9 @@ def pytest_unconfigure(config):
     "tests/apply/test_frame_apply.py::test_apply_empty_infer_type[python-1-True-mean-index]",
     "tests/apply/test_frame_apply.py::test_apply_function_runs_once",
     "tests/apply/test_frame_apply.py::test_apply_raw_function_runs_once[python]",
+    "tests/apply/test_frame_apply.py::test_mixed_column_raises[max-df0]",
+    "tests/apply/test_frame_apply.py::test_mixed_column_raises[min-df0]",
+    "tests/apply/test_frame_apply.py::test_mixed_column_raises[sum-df0]",
     "tests/apply/test_frame_apply.py::test_nuiscance_columns",
     "tests/apply/test_frame_apply.py::test_nunique_empty",
     "tests/apply/test_frame_transform.py::test_transform_listlike[axis='columns'-ops0-names0]",
@@ -829,6 +832,22 @@ def pytest_unconfigure(config):
     "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__rtruediv__-other1]",
     "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__truediv__-1.0]",
     "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__truediv__-other1]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int16Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int16Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int32Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int32Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int64Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int64Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int8Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int8Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt16Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt16Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt32Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt32Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt64Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt64Dtype-__rmul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt8Dtype-__mul__]",
+    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt8Dtype-__rmul__]",
     "tests/arrays/integer/test_arithmetic.py::test_values_multiplying_large_series_by_NA",
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Int16Dtype]",
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Int32Dtype]",
@@ -5382,6 +5401,12 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_tz_localize.py::TestTZLocalize::test_tz_localize_copy_inplace_mutate[Series-True]",
     "tests/frame/methods/test_update.py::TestDataFrameUpdate::test_update_modify_view",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NoneType]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[Decimal]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NaTType]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NAType]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[Decimal-columns1]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NaTType-columns1]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NAType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty_normalize",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NoneType-columns1]",
@@ -6021,6 +6046,8 @@ def pytest_unconfigure(config):
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_multiple_out_of_bounds[True]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_nan_in_multiindex_columns[False]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_nan_in_multiindex_columns[True]",
+    "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_nan_level[False]",
+    "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_nan_level[True]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_nullable_dtype[False]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_nullable_dtype[True]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_order_with_unsorted_levels_multi_row_2[False]",
@@ -6956,6 +6983,10 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_nth.py::test_nth_after_selection[any-selection2]",
     "tests/groupby/methods/test_nth.py::test_nth_column_order",
     "tests/groupby/methods/test_nth.py::test_nth_indexed",
+    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[Decimal-nth]",
+    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NaTType-nth]",
+    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NAType-nth]",
+    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NoneType-nth]",
     "tests/groupby/methods/test_nth.py::test_nth_multi_grouper",
     "tests/groupby/methods/test_nth.py::test_nth_multi_index_as_expected",
     "tests/groupby/methods/test_nth.py::test_nth_with_na_object[NoneType--1]",
@@ -7126,6 +7157,12 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-True-True-expected_rows2-expected_count2-expected_group_size2-True]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-False-count-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-True-proportion-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[Decimal-False-count-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[Decimal-True-proportion-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NaTType-False-count-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NaTType-True-proportion-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-False-count-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_dropna_combinations[True-False-expected_rows2-expected_values2]",
     "tests/groupby/methods/test_value_counts.py::test_value_counts_sort[False-False-False]",
     "tests/groupby/methods/test_value_counts.py::test_value_counts_sort[False-True-False]",
@@ -7424,6 +7461,24 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_true_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_agg[False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NoneType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[Decimal-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NaTType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NAType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-Decimal-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-NaTType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-NAType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-NoneType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-Decimal-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-NaTType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-NAType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-NoneType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-Decimal-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NaTType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NAType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NoneType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-Decimal-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NaTType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[False]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[True]",
@@ -8196,6 +8251,11 @@ def pytest_unconfigure(config):
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int16]",
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int32]",
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int64]",
+    "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[Decimal]",
+    "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[NaTType]",
+    "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[NAType]",
+    "tests/indexes/object/test_indexing.py::TestGetIndexer::test_get_indexer_with_NA_values[None-unique_nulls_fixture22]",
+    "tests/indexes/object/test_indexing.py::TestGetIndexer::test_get_indexer_with_NA_values[unique_nulls_fixture2-None]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--object]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--string[pyarrow_numpy]]",
     "tests/indexes/period/test_constructors.py::TestPeriodIndex::test_constructor_fromarraylike",
@@ -8268,6 +8328,10 @@ def pytest_unconfigure(config):
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=str[python]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[pyarrow]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[python]]",
+    "tests/indexes/string/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[string=object-null3]",
+    "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-Decimal]",
+    "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-NaTType]",
+    "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-NAType]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=object-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[pyarrow]-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[python]-in_slice13-]",
@@ -8319,6 +8383,9 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[uint32]",
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[uint64]",
     "tests/indexes/test_base.py::TestIndex::test_equals_op_mismatched_multiindex_raises[index0]",
+    "tests/indexes/test_base.py::TestIndex::test_format_missing[Decimal-vals1]",
+    "tests/indexes/test_base.py::TestIndex::test_format_missing[NaTType-vals1]",
+    "tests/indexes/test_base.py::TestIndex::test_format_missing[NAType-vals1]",
     "tests/indexes/test_base.py::TestIndex::test_is_",
     "tests/indexes/test_base.py::TestIndex::test_isin_level_kwarg_bad_label_raises[bool-dtype-nan]",
     "tests/indexes/test_base.py::TestIndex::test_isin_level_kwarg_bad_label_raises[categorical-nan]",
@@ -8375,6 +8442,12 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float32]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float64]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[Decimal-Decimal]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[Decimal-NoneType]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NaTType-NaTType]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NaTType-NoneType]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NAType]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NoneType]",
     "tests/indexes/test_base.py::TestIndex::test_map_defaultdict",
     "tests/indexes/test_base.py::TestIndex::test_str_attribute_raises[index2]",
     "tests/indexes/test_base.py::TestIndex::test_str_bool_return",
@@ -10456,6 +10529,8 @@ def pytest_unconfigure(config):
     "tests/io/test_fsspec.py::test_json_options[zip]",
     "tests/io/test_fsspec.py::test_json_options[zstd]",
     "tests/io/test_fsspec.py::test_non_fsspec_options",
+    "tests/io/test_html.py::TestReadHtml::test_extract_links[bs4-header]",
+    "tests/io/test_html.py::TestReadHtml::test_extract_links[lxml-header]",
     "tests/io/test_orc.py::test_orc_reader_basic",
     "tests/io/test_orc.py::test_orc_reader_date_high",
     "tests/io/test_orc.py::test_orc_reader_date_low",
@@ -11231,6 +11306,18 @@ def pytest_unconfigure(config):
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_join_multi_dtypes[float32-int320]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_join_multi_dtypes[float32-int321]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_bool_dtype[right-expected_data3]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-str0]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-str1]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-U]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-str0]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-str1]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-U]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-str0]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-str1]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-U]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-str0]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-str1]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-U]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_empty[False-right-empty]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_empty[True-right-right]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_incompat_dtypes_are_ok[df1_vals3-df2_vals3]",
@@ -12408,6 +12495,9 @@ def pytest_unconfigure(config):
     "tests/series/indexing/test_setitem.py::TestSetitemTimedelta64IntoNumeric::test_mask_key[int-loc]",
     "tests/series/indexing/test_setitem.py::TestSetitemTimedelta64IntoNumeric::test_mask_key[int-setitem]",
     "tests/series/indexing/test_setitem.py::TestSetitemTimedelta64IntoNumeric::test_series_where[int]",
+    "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlargement_object_none[Decimal]",
+    "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlargement_object_none[NaTType]",
+    "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlargement_object_none[NAType]",
     "tests/series/indexing/test_setitem.py::TestSmallIntegerSetitemUpcast::test_int_key[iloc-4611686018427387904]",
     "tests/series/indexing/test_setitem.py::TestSmallIntegerSetitemUpcast::test_int_key[iloc-8589934593.0]",
     "tests/series/indexing/test_setitem.py::TestSmallIntegerSetitemUpcast::test_int_key[iloc-8589934593.1]",
@@ -12466,6 +12556,8 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_astype.py::TestAstype::test_astype_str_cast_td64",
     "tests/series/methods/test_astype.py::TestAstype::test_dt64_series_astype_object",
     "tests/series/methods/test_astype.py::TestAstype::test_td64_series_astype_object",
+    "tests/series/methods/test_astype.py::TestAstype::test_astype_to_str_preserves_na[None-None]",
+    "tests/series/methods/test_astype.py::TestAstype::test_astype_to_str_preserves_na[value2-<NA>]",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[False-True-None]",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[False-True-foo]",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[True-False-None]",
@@ -13017,6 +13109,7 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_reindex.py::test_reindexing_with_float64_NA_log",
     "tests/series/methods/test_rename.py::TestRename::test_rename_copy_false",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_categorical_single",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_change_dtype_series",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_datetime64",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Float64-input_data4-to_replace4-expected_data4]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Int64-input_data2-to_replace2-expected_data2]",
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index e7bdf6d415f..df83ab51043 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -11,6 +11,7 @@
 import cudf
 from cudf.core._compat import (
     PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_210,
     PANDAS_VERSION,
 )
 from cudf.core.column.column import as_column
@@ -112,6 +113,20 @@ def test_series_unitness_np_datetimelike_units():
         pd.Series(data)
 
 
+def test_from_numpyextensionarray_string_object_pandas_compat_mode():
+    NumpyExtensionArray = (
+        pd.arrays.NumpyExtensionArray
+        if PANDAS_GE_210
+        else pd.arrays.PandasArray
+    )
+
+    data = NumpyExtensionArray(np.array(["a", None], dtype=object))
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = cudf.Series(data)
+    expected = pd.Series(data)
+    assert_eq(result, expected)
+
+
 def test_list_category_like_maintains_dtype():
     dtype = cudf.CategoricalDtype(categories=[1, 2, 3, 4], ordered=True)
     data = [1, 2, 3]

From 641e0910851c4799c537b505fc56fa766264aca6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 14 Aug 2025 13:01:54 -0700
Subject: [PATCH 128/366] Move test_reshape.py to new cudf classic directory
 strucutre, remove reshape._merge_sorted (#19614)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Also removed `reshape._merge_sorted` and its associated tests which were very slow. It appears this was once used by `dask_cudf`, xref https://github.com/rapidsai/cudf/pull/10713, but I cannot find usage of this anymore (possibly removed with migration to dask expressions?)

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19614
---
 python/cudf/cudf/core/reshape.py              | 111 ---
 .../methods/test_interleave_columns.py        |  48 +
 .../cudf/cudf/tests/reshape/test_crosstab.py  |  61 ++
 python/cudf/cudf/tests/reshape/test_melt.py   | 124 +++
 python/cudf/cudf/tests/reshape/test_pivot.py  | 113 +++
 .../cudf/tests/reshape/test_pivot_table.py    |  95 ++
 python/cudf/cudf/tests/reshape/test_stack.py  | 182 ++++
 python/cudf/cudf/tests/reshape/test_tile.py   |  42 +
 .../cudf/cudf/tests/reshape/test_unstack.py   | 121 +++
 python/cudf/cudf/tests/test_reshape.py        | 888 ------------------
 10 files changed, 786 insertions(+), 999 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_interleave_columns.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_crosstab.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_melt.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_pivot.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_pivot_table.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_stack.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_tile.py
 create mode 100644 python/cudf/cudf/tests/reshape/test_unstack.py
 delete mode 100644 python/cudf/cudf/tests/test_reshape.py

diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 74337fd5284..e9ee79f35aa 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pandas as pd
 
-import pylibcudf as plc
-
 import cudf
 from cudf.api.extensions import no_default
 from cudf.api.types import is_list_like, is_scalar
@@ -878,115 +876,6 @@ def get_dummies(
         return cudf.DataFrame._from_data(data, index=ser.index)
 
 
-def _merge_sorted(
-    objs,
-    keys=None,
-    by_index=False,
-    ignore_index=False,
-    ascending=True,
-    na_position="last",
-):
-    """Merge a list of sorted DataFrame or Series objects.
-
-    Dataframes/Series in objs list MUST be pre-sorted by columns
-    listed in `keys`, or by the index (if `by_index=True`).
-
-    Parameters
-    ----------
-    objs : list of DataFrame or Series
-    keys : list, default None
-        List of Column names to sort by. If None, all columns used
-        (Ignored if `by_index=True`)
-    by_index : bool, default False
-        Use index for sorting. `keys` input will be ignored if True
-    ignore_index : bool, default False
-        Drop and ignore index during merge. Default range index will
-        be used in the output dataframe.
-    ascending : bool, default True
-        Sorting is in ascending order, otherwise it is descending
-    na_position : {'first', 'last'}, default 'last'
-        'first' nulls at the beginning, 'last' nulls at the end
-
-    Returns
-    -------
-    A new, lexicographically sorted, DataFrame/Series.
-    """
-    if is_scalar(objs):
-        raise TypeError("objs must be a list-like of Frame-like objects")
-
-    if len(objs) < 1:
-        raise ValueError("objs must be non-empty")
-
-    if not all(
-        isinstance(table, (cudf.DataFrame, cudf.Series)) for table in objs
-    ):
-        raise TypeError("Elements of objs must be Frame-like")
-
-    if len(objs) == 1:
-        return objs[0]
-
-    if by_index and ignore_index:
-        raise ValueError("`by_index` and `ignore_index` cannot both be True")
-
-    if by_index:
-        key_columns_indices = list(range(0, objs[0].index.nlevels))
-    else:
-        if keys is None:
-            key_columns_indices = list(range(0, objs[0]._num_columns))
-        else:
-            key_columns_indices = [
-                objs[0]._column_names.index(key) for key in keys
-            ]
-        if not ignore_index:
-            key_columns_indices = [
-                idx + objs[0].index.nlevels for idx in key_columns_indices
-            ]
-
-    columns = (
-        itertools.chain(obj.index._columns, obj._columns)
-        if not ignore_index
-        else obj._columns
-        for obj in objs
-    )
-
-    input_tables = [
-        plc.Table([col.to_pylibcudf(mode="read") for col in source_columns])
-        for source_columns in columns
-    ]
-
-    num_keys = len(key_columns_indices)
-
-    column_order = (
-        plc.types.Order.ASCENDING if ascending else plc.types.Order.DESCENDING
-    )
-
-    if not ascending:
-        na_position = "last" if na_position == "first" else "first"
-
-    null_precedence = (
-        plc.types.NullOrder.BEFORE
-        if na_position == "first"
-        else plc.types.NullOrder.AFTER
-    )
-
-    plc_table = plc.merge.merge(
-        input_tables,
-        key_columns_indices,
-        [column_order] * num_keys,
-        [null_precedence] * num_keys,
-    )
-
-    result_columns = [
-        ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
-    ]
-
-    return objs[0]._from_columns_like_self(
-        result_columns,
-        column_names=objs[0]._column_names,
-        index_names=None if ignore_index else objs[0]._index_names,
-    )
-
-
 def _pivot(
     col_accessor: ColumnAccessor,
     index: Index | MultiIndex,
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_interleave_columns.py b/python/cudf/cudf/tests/dataframe/methods/test_interleave_columns.py
new file mode 100644
index 00000000000..855619f31f4
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_interleave_columns.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("nulls", ["none", "some"])
+def test_interleave_columns(nulls, all_supported_types_as_str):
+    if (
+        all_supported_types_as_str not in ["float32", "float64"]
+        and nulls == "some"
+    ):
+        pytest.skip(
+            reason=f"nulls not supported in {all_supported_types_as_str}"
+        )
+
+    num_rows = 10
+    num_cols = 2
+    pdf = pd.DataFrame(dtype=all_supported_types_as_str)
+    rng = np.random.default_rng(seed=0)
+    for i in range(num_cols):
+        colname = str(i)
+        data = pd.Series(rng.integers(0, 26, num_rows)).astype(
+            all_supported_types_as_str
+        )
+
+        if nulls == "some":
+            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
+            data[idx] = np.nan
+        pdf[colname] = data
+
+    gdf = cudf.from_pandas(pdf)
+
+    if all_supported_types_as_str == "category":
+        with pytest.raises(ValueError):
+            assert gdf.interleave_columns()
+    else:
+        got = gdf.interleave_columns()
+
+        expect = pd.Series(np.vstack(pdf.to_numpy()).reshape((-1,))).astype(
+            all_supported_types_as_str
+        )
+
+        assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/reshape/test_crosstab.py b/python/cudf/cudf/tests/reshape/test_crosstab.py
new file mode 100644
index 00000000000..fac485d9028
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_crosstab.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_crosstab_simple():
+    a = np.array(
+        [
+            "foo",
+            "foo",
+            "foo",
+            "foo",
+            "bar",
+            "bar",
+            "bar",
+            "bar",
+            "foo",
+            "foo",
+            "foo",
+        ],
+        dtype=object,
+    )
+    b = np.array(
+        [
+            "one",
+            "one",
+            "one",
+            "two",
+            "one",
+            "one",
+            "one",
+            "two",
+            "two",
+            "two",
+            "one",
+        ],
+        dtype=object,
+    )
+    c = np.array(
+        [
+            "dull",
+            "dull",
+            "shiny",
+            "dull",
+            "dull",
+            "shiny",
+            "shiny",
+            "dull",
+            "shiny",
+            "shiny",
+            "shiny",
+        ],
+        dtype=object,
+    )
+    expected = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"])
+    actual = cudf.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"])
+    assert_eq(expected, actual, check_dtype=False)
diff --git a/python/cudf/cudf/tests/reshape/test_melt.py b/python/cudf/cudf/tests/reshape/test_melt.py
new file mode 100644
index 00000000000..3583c12b544
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_melt.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("num_id_vars", [0, 2])
+@pytest.mark.parametrize("num_value_vars", [0, 2])
+@pytest.mark.parametrize("nulls", ["none", "some", "all"])
+def test_melt(
+    nulls,
+    num_id_vars,
+    num_value_vars,
+    numeric_and_temporal_types_as_str,
+    ignore_index,
+):
+    if numeric_and_temporal_types_as_str not in [
+        "float32",
+        "float64",
+    ] and nulls in ["some", "all"]:
+        pytest.skip(
+            reason=f"nulls not supported in {numeric_and_temporal_types_as_str}"
+        )
+
+    num_rows = 10
+    pdf = pd.DataFrame()
+    id_vars = []
+    rng = np.random.default_rng(seed=0)
+    for i in range(num_id_vars):
+        colname = "id" + str(i)
+        data = rng.integers(0, 26, num_rows).astype(
+            numeric_and_temporal_types_as_str
+        )
+        if nulls == "some":
+            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
+            data[idx] = np.nan
+        elif nulls == "all":
+            data[:] = np.nan
+        pdf[colname] = data
+        id_vars.append(colname)
+
+    value_vars = []
+    for i in range(num_value_vars):
+        colname = "val" + str(i)
+        data = rng.integers(0, 26, num_rows).astype(
+            numeric_and_temporal_types_as_str
+        )
+        if nulls == "some":
+            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
+            data[idx] = np.nan
+        elif nulls == "all":
+            data[:] = np.nan
+        pdf[colname] = data
+        value_vars.append(colname)
+
+    gdf = cudf.from_pandas(pdf)
+
+    got = cudf.melt(
+        frame=gdf,
+        id_vars=id_vars,
+        value_vars=value_vars,
+        ignore_index=ignore_index,
+    )
+    got_from_melt_method = gdf.melt(
+        id_vars=id_vars, value_vars=value_vars, ignore_index=ignore_index
+    )
+
+    expect = pd.melt(
+        frame=pdf,
+        id_vars=id_vars,
+        value_vars=value_vars,
+        ignore_index=ignore_index,
+    )
+
+    assert_eq(expect, got)
+
+    assert_eq(expect, got_from_melt_method)
+
+
+def test_melt_more_than_255_columns():
+    mydict = {"id": ["foobar"]}
+    for i in range(1, 260):
+        mydict[f"d_{i}"] = i
+
+    df = pd.DataFrame(mydict)
+    grid_df = pd.melt(df, id_vars=["id"], var_name="d", value_name="sales")
+
+    df_d = cudf.DataFrame(mydict)
+    grid_df_d = cudf.melt(
+        df_d, id_vars=["id"], var_name="d", value_name="sales"
+    )
+    grid_df_d["d"] = grid_df_d["d"]
+
+    assert_eq(grid_df, grid_df_d)
+
+
+def test_melt_str_scalar_id_var():
+    data = {"index": [1, 2], "id": [1, 2], "d0": [10, 20], "d1": [30, 40]}
+    result = cudf.melt(
+        cudf.DataFrame(data),
+        id_vars="index",
+        var_name="column",
+        value_name="value",
+    )
+    expected = pd.melt(
+        pd.DataFrame(data),
+        id_vars="index",
+        var_name="column",
+        value_name="value",
+    )
+    assert_eq(result, expected)
+
+
+def test_melt_falsy_var_name():
+    df = cudf.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
+    result = cudf.melt(df, id_vars=["A"], value_vars=["B"], var_name="")
+    expected = pd.melt(
+        df.to_pandas(), id_vars=["A"], value_vars=["B"], var_name=""
+    )
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/reshape/test_pivot.py b/python/cudf/cudf/tests/reshape/test_pivot.py
new file mode 100644
index 00000000000..73398d52bad
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_pivot.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "index, column, data",
+    [
+        ([], [], []),
+        ([0], [0], [0]),
+        ([0, 0], [0, 1], [1, 2.0]),
+        ([0, 1], [0, 0], [1, 2.0]),
+        ([0, 1], [0, 1], [1, 2.0]),
+        (["a", "a", "b", "b"], ["c", "d", "c", "d"], [1, 2, 3, 4]),
+        (
+            ["a", "a", "b", "b", "a"],
+            ["c", "d", "c", "d", "e"],
+            [1, 2, 3, 4, 5],
+        ),
+    ],
+)
+def test_pivot_simple(index, column, data):
+    pdf = pd.DataFrame({"index": index, "column": column, "data": data})
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.pivot(columns="column", index="index")
+    got = gdf.pivot(columns="column", index="index")
+
+    check_index_and_columns = expect.shape != (0, 0)
+    assert_eq(
+        expect,
+        got,
+        check_dtype=False,
+        check_index_type=check_index_and_columns,
+        check_column_type=check_index_and_columns,
+    )
+
+
+def test_pivot_multi_values():
+    # from Pandas docs:
+    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot.html
+    pdf = pd.DataFrame(
+        {
+            "foo": ["one", "one", "one", "two", "two", "two"],
+            "bar": ["A", "B", "C", "A", "B", "C"],
+            "baz": [1, 2, 3, 4, 5, 6],
+            "zoo": ["x", "y", "z", "q", "w", "t"],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(
+        pdf.pivot(index="foo", columns="bar", values=["baz", "zoo"]),
+        gdf.pivot(index="foo", columns="bar", values=["baz", "zoo"]),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "values", ["z", "z123", ["z123"], ["z", "z123", "123z"]]
+)
+def test_pivot_values(values):
+    data = [
+        ["A", "a", 0, 0, 0],
+        ["A", "b", 1, 1, 1],
+        ["A", "c", 2, 2, 2],
+        ["B", "a", 0, 0, 0],
+        ["B", "b", 1, 1, 1],
+        ["B", "c", 2, 2, 2],
+        ["C", "a", 0, 0, 0],
+        ["C", "b", 1, 1, 1],
+        ["C", "c", 2, 2, 2],
+    ]
+    columns = ["x", "y", "z", "z123", "123z"]
+    pdf = pd.DataFrame(data, columns=columns)
+    cdf = cudf.DataFrame(data, columns=columns)
+    expected = pd.pivot(pdf, index="x", columns="y", values=values)
+    actual = cudf.pivot(cdf, index="x", columns="y", values=values)
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+    )
+
+
+def test_pivot_duplicate_error():
+    gdf = cudf.DataFrame(
+        {"a": [0, 1, 2, 2], "b": [1, 2, 3, 3], "d": [1, 2, 3, 4]}
+    )
+    with pytest.raises(ValueError):
+        gdf.pivot(index="a", columns="b")
+    with pytest.raises(ValueError):
+        gdf.pivot(index="b", columns="a")
+
+
+@pytest.mark.parametrize("index", [["ix"], ["ix", "foo"]])
+@pytest.mark.parametrize("columns", [["col"], ["col", "baz"]])
+def test_pivot_list_like_index_columns(index, columns):
+    data = {
+        "bar": ["x", "y", "z", "w"],
+        "col": ["a", "b", "a", "b"],
+        "foo": [1, 2, 3, 4],
+        "ix": [1, 1, 2, 2],
+        "baz": [0, 0, 0, 0],
+    }
+    pd_df = pd.DataFrame(data)
+    cudf_df = cudf.DataFrame(data)
+    result = cudf_df.pivot(columns=columns, index=index)
+    expected = pd_df.pivot(columns=columns, index=index)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/reshape/test_pivot_table.py b/python/cudf/cudf/tests/reshape/test_pivot_table.py
new file mode 100644
index 00000000000..c7d0eb47411
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_pivot_table.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
+)
+def test_pivot_table_simple(aggfunc):
+    rng = np.random.default_rng(seed=0)
+    fill_value = 0
+    pdf = pd.DataFrame(
+        {
+            "A": ["one", "one", "two", "three"] * 6,
+            "B": ["A", "B", "C"] * 8,
+            "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+            "D": rng.standard_normal(size=24),
+            "E": rng.standard_normal(size=24),
+        }
+    )
+    expected = pd.pivot_table(
+        pdf,
+        values=["D", "E"],
+        index=["A", "B"],
+        columns=["C"],
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+    )
+    cdf = cudf.DataFrame.from_pandas(pdf)
+    actual = cudf.pivot_table(
+        cdf,
+        values=["D", "E"],
+        index=["A", "B"],
+        columns=["C"],
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+    )
+    assert_eq(expected, actual, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
+)
+def test_dataframe_pivot_table_simple(aggfunc):
+    rng = np.random.default_rng(seed=0)
+    fill_value = 0
+    pdf = pd.DataFrame(
+        {
+            "A": ["one", "one", "two", "three"] * 6,
+            "B": ["A", "B", "C"] * 8,
+            "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+            "D": rng.standard_normal(size=24),
+            "E": rng.standard_normal(size=24),
+        }
+    )
+    expected = pdf.pivot_table(
+        values=["D", "E"],
+        index=["A", "B"],
+        columns=["C"],
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+    )
+    cdf = cudf.DataFrame.from_pandas(pdf)
+    actual = cdf.pivot_table(
+        values=["D", "E"],
+        index=["A", "B"],
+        columns=["C"],
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+    )
+    assert_eq(expected, actual, check_dtype=False)
+
+
+@pytest.mark.parametrize("index", ["A", ["A"]])
+@pytest.mark.parametrize("columns", ["C", ["C"]])
+def test_pivot_table_scalar_index_columns(index, columns):
+    data = {
+        "A": ["one", "one", "two", "three"] * 6,
+        "B": ["A", "B", "C"] * 8,
+        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+        "D": range(24),
+        "E": range(24),
+    }
+    result = cudf.DataFrame(data).pivot_table(
+        values="D", index=index, columns=columns, aggfunc="sum"
+    )
+    expected = pd.DataFrame(data).pivot_table(
+        values="D", index=index, columns=columns, aggfunc="sum"
+    )
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/reshape/test_stack.py b/python/cudf/cudf/tests/reshape/test_stack.py
new file mode 100644
index 00000000000..64b976f6be0
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_stack.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    expect_warning_if,
+)
+
+
+@pytest.mark.parametrize("nulls", ["none", "some"])
+def test_df_stack(nulls, all_supported_types_as_str):
+    if (
+        all_supported_types_as_str not in ["float32", "float64"]
+        and nulls == "some"
+    ):
+        pytest.skip(
+            reason=f"nulls not supported in {all_supported_types_as_str}"
+        )
+    elif all_supported_types_as_str == "category":
+        pytest.skip(reason="category not applicable for test")
+
+    num_cols = 2
+    num_rows = 10
+    pdf = pd.DataFrame()
+    rng = np.random.default_rng(seed=0)
+    for i in range(num_cols):
+        colname = str(i)
+        data = rng.integers(0, 26, num_rows).astype(all_supported_types_as_str)
+        if nulls == "some":
+            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
+            data[idx] = np.nan
+        pdf[colname] = data
+
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.stack()
+    expect = pdf.stack()
+
+    assert_eq(expect, got)
+
+
+def test_df_stack_reset_index():
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4],
+            "b": [10, 11, 12, 13],
+            "c": ["ab", "cd", None, "gh"],
+        }
+    )
+    df = df.set_index(["a", "b"])
+    pdf = df.to_pandas()
+
+    expected = pdf.stack()
+    actual = df.stack()
+
+    assert_eq(expected, actual)
+
+    expected = expected.reset_index()
+    actual = actual.reset_index()
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Need pandas-2.1.0+ to match `stack` api",
+)
+@pytest.mark.parametrize(
+    "tuples",
+    [
+        [("A", "cat"), ("A", "dog"), ("B", "cat"), ("B", "dog")],
+        [("A", "cat"), ("B", "bird"), ("A", "dog"), ("B", "dog")],
+    ],
+)
+@pytest.mark.parametrize(
+    "level",
+    [
+        -1,
+        0,
+        1,
+        "letter",
+        "animal",
+        [0, 1],
+        [1, 0],
+        ["letter", "animal"],
+        ["animal", "letter"],
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(2, name="range"),
+        pd.Index([9, 8], name="myindex"),
+        pd.MultiIndex.from_arrays(
+            [
+                ["A", "B"],
+                [101, 102],
+            ],
+            names=["first", "second"],
+        ),
+    ],
+)
+def test_df_stack_multiindex_column_axis(tuples, index, level, dropna):
+    if isinstance(level, list) and len(level) > 1 and not dropna:
+        pytest.skip(
+            "Stacking multiple levels with dropna==False is unsupported."
+        )
+    columns = pd.MultiIndex.from_tuples(tuples, names=["letter", "animal"])
+
+    pdf = pd.DataFrame(
+        data=[[1, 2, 3, 4], [2, 4, 6, 8]], columns=columns, index=index
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    with pytest.warns(FutureWarning):
+        got = gdf.stack(level=level, dropna=dropna, future_stack=False)
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
+        expect = pdf.stack(level=level, dropna=dropna, future_stack=False)
+
+    assert_eq(expect, got, check_dtype=False)
+
+    got = gdf.stack(level=level, future_stack=True)
+    expect = pdf.stack(level=level, future_stack=True)
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_df_stack_mixed_dtypes():
+    pdf = pd.DataFrame(
+        {
+            "A": pd.Series([1, 2, 3], dtype="f4"),
+            "B": pd.Series([4, 5, 6], dtype="f8"),
+        }
+    )
+
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.stack()
+    expect = pdf.stack()
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Need pandas-2.1.0+ to match `stack` api",
+)
+@pytest.mark.parametrize("level", [["animal", "hair_length"], [1, 2]])
+def test_df_stack_multiindex_column_axis_pd_example(level):
+    columns = pd.MultiIndex.from_tuples(
+        [
+            ("A", "cat", "long"),
+            ("B", "cat", "long"),
+            ("A", "dog", "short"),
+            ("B", "dog", "short"),
+        ],
+        names=["exp", "animal", "hair_length"],
+    )
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(rng.standard_normal(size=(4, 4)), columns=columns)
+
+    with expect_warning_if(PANDAS_GE_220, FutureWarning):
+        expect = df.stack(level=level, future_stack=False)
+    gdf = cudf.from_pandas(df)
+    with pytest.warns(FutureWarning):
+        got = gdf.stack(level=level, future_stack=False)
+
+    assert_eq(expect, got)
+
+    expect = df.stack(level=level, future_stack=True)
+    got = gdf.stack(level=level, future_stack=True)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/reshape/test_tile.py b/python/cudf/cudf/tests/reshape/test_tile.py
new file mode 100644
index 00000000000..2f071829d13
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_tile.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("count", [1, 10])
+@pytest.mark.parametrize("nulls", ["none", "some"])
+def test_tile(nulls, all_supported_types_as_str, count):
+    if (
+        all_supported_types_as_str not in ["float32", "float64"]
+        and nulls == "some"
+    ):
+        pytest.skip(
+            reason=f"nulls not supported in {all_supported_types_as_str}"
+        )
+
+    num_cols = 2
+    num_rows = 10
+    pdf = pd.DataFrame(dtype=all_supported_types_as_str)
+    rng = np.random.default_rng(seed=0)
+    for i in range(num_cols):
+        colname = str(i)
+        data = pd.Series(rng.integers(num_cols, 26, num_rows)).astype(
+            all_supported_types_as_str
+        )
+
+        if nulls == "some":
+            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
+            data[idx] = np.nan
+        pdf[colname] = data
+
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.tile(count)
+    expect = pd.DataFrame(pd.concat([pdf] * count))
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/reshape/test_unstack.py b/python/cudf/cudf/tests/reshape/test_unstack.py
new file mode 100644
index 00000000000..6cb38537809
--- /dev/null
+++ b/python/cudf/cudf/tests/reshape/test_unstack.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "level",
+    [
+        0,
+        pytest.param(
+            1,
+            marks=pytest.mark.xfail(
+                reason="Categorical column indexes not supported"
+            ),
+        ),
+        2,
+        "foo",
+        pytest.param(
+            "bar",
+            marks=pytest.mark.xfail(
+                reason="Categorical column indexes not supported"
+            ),
+        ),
+        "baz",
+        [],
+        pytest.param(
+            [0, 1],
+            marks=pytest.mark.xfail(
+                reason="Categorical column indexes not supported"
+            ),
+        ),
+        ["foo"],
+        pytest.param(
+            ["foo", "bar"],
+            marks=pytest.mark.xfail(
+                reason="Categorical column indexes not supported"
+            ),
+        ),
+        pytest.param(
+            [0, 1, 2],
+            marks=pytest.mark.xfail(reason="Pandas behaviour unclear"),
+        ),
+        pytest.param(
+            ["foo", "bar", "baz"],
+            marks=pytest.mark.xfail(reason="Pandas behaviour unclear"),
+        ),
+    ],
+)
+def test_unstack_multiindex(level):
+    pdf = pd.DataFrame(
+        {
+            "foo": ["one", "one", "one", "two", "two", "two"],
+            "bar": pd.Categorical(["A", "B", "C", "A", "B", "C"]),
+            "baz": [1, 2, 3, 4, 5, 6],
+            "zoo": ["x", "y", "z", "q", "w", "t"],
+        }
+    ).set_index(["foo", "bar", "baz"])
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(
+        pdf.unstack(level=level),
+        gdf.unstack(level=level),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.Index(range(0, 5), name=None),
+        pd.Index(range(0, 5), name="row_index"),
+        pytest.param(
+            pd.CategoricalIndex(["d", "e", "f", "g", "h"]),
+            marks=pytest.mark.xfail(
+                reason="Categorical column indexes not supported"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "col_idx",
+    [
+        pd.Index(["a", "b"], name=None),
+        pd.Index(["a", "b"], name="col_index"),
+        pd.MultiIndex.from_tuples(
+            [("c", 1), ("c", 2)], names=["col_index1", None]
+        ),
+    ],
+)
+def test_unstack_index(index, col_idx):
+    data = {
+        "A": [1.0, 2.0, 3.0, 4.0, 5.0],
+        "B": [11.0, 12.0, 13.0, 14.0, 15.0],
+    }
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    pdf.index = index
+    pdf.columns = col_idx
+
+    gdf.index = cudf.from_pandas(index)
+    gdf.columns = cudf.from_pandas(col_idx)
+
+    assert_eq(pdf.unstack(), gdf.unstack())
+
+
+def test_unstack_index_invalid():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Calling unstack() on single index dataframe with "
+            "different column datatype is not supported."
+        ),
+    ):
+        gdf.unstack()
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
deleted file mode 100644
index 84cf6136255..00000000000
--- a/python/cudf/cudf/tests/test_reshape.py
+++ /dev/null
@@ -1,888 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-
-import re
-from itertools import chain
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_VERSION,
-)
-from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    ALL_TYPES,
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    expect_warning_if,
-)
-
-pytest_xfail = pytest.mark.xfail
-pytestmark = pytest.mark.spilling
-
-# If spilling is enabled globally, we skip many test permutations
-# to reduce running time.
-if get_global_manager() is not None:
-    ALL_TYPES = ["float32"]
-    DATETIME_TYPES = ["datetime64[ms]"]
-    NUMERIC_TYPES = ["float32"]
-    # To save time, we skip tests marked "pytest.mark.xfail"
-    pytest_xfail = pytest.mark.skipif
-
-
-@pytest.mark.parametrize("num_id_vars", [0, 1, 2])
-@pytest.mark.parametrize("num_value_vars", [0, 1, 2])
-@pytest.mark.parametrize("num_rows", [1, 2, 100])
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
-@pytest.mark.parametrize("nulls", ["none", "some", "all"])
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_melt(
-    nulls, num_id_vars, num_value_vars, num_rows, dtype, ignore_index
-):
-    if dtype not in ["float32", "float64"] and nulls in ["some", "all"]:
-        pytest.skip(reason="nulls not supported in dtype: " + dtype)
-
-    pdf = pd.DataFrame()
-    id_vars = []
-    rng = np.random.default_rng(seed=0)
-    for i in range(num_id_vars):
-        colname = "id" + str(i)
-        data = rng.integers(0, 26, num_rows).astype(dtype)
-        if nulls == "some":
-            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
-            data[idx] = np.nan
-        elif nulls == "all":
-            data[:] = np.nan
-        pdf[colname] = data
-        id_vars.append(colname)
-
-    value_vars = []
-    for i in range(num_value_vars):
-        colname = "val" + str(i)
-        data = rng.integers(0, 26, num_rows).astype(dtype)
-        if nulls == "some":
-            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
-            data[idx] = np.nan
-        elif nulls == "all":
-            data[:] = np.nan
-        pdf[colname] = data
-        value_vars.append(colname)
-
-    gdf = cudf.from_pandas(pdf)
-
-    got = cudf.melt(
-        frame=gdf,
-        id_vars=id_vars,
-        value_vars=value_vars,
-        ignore_index=ignore_index,
-    )
-    got_from_melt_method = gdf.melt(
-        id_vars=id_vars, value_vars=value_vars, ignore_index=ignore_index
-    )
-
-    expect = pd.melt(
-        frame=pdf,
-        id_vars=id_vars,
-        value_vars=value_vars,
-        ignore_index=ignore_index,
-    )
-
-    assert_eq(expect, got)
-
-    assert_eq(expect, got_from_melt_method)
-
-
-def test_melt_more_than_255_columns():
-    mydict = {"id": ["foobar"]}
-    for i in range(1, 260):
-        mydict[f"d_{i}"] = i
-
-    df = pd.DataFrame(mydict)
-    grid_df = pd.melt(df, id_vars=["id"], var_name="d", value_name="sales")
-
-    df_d = cudf.DataFrame(mydict)
-    grid_df_d = cudf.melt(
-        df_d, id_vars=["id"], var_name="d", value_name="sales"
-    )
-    grid_df_d["d"] = grid_df_d["d"]
-
-    assert_eq(grid_df, grid_df_d)
-
-
-def test_melt_str_scalar_id_var():
-    data = {"index": [1, 2], "id": [1, 2], "d0": [10, 20], "d1": [30, 40]}
-    result = cudf.melt(
-        cudf.DataFrame(data),
-        id_vars="index",
-        var_name="column",
-        value_name="value",
-    )
-    expected = pd.melt(
-        pd.DataFrame(data),
-        id_vars="index",
-        var_name="column",
-        value_name="value",
-    )
-    assert_eq(result, expected)
-
-
-def test_melt_falsy_var_name():
-    df = cudf.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
-    result = cudf.melt(df, id_vars=["A"], value_vars=["B"], var_name="")
-    expected = pd.melt(
-        df.to_pandas(), id_vars=["A"], value_vars=["B"], var_name=""
-    )
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("num_cols", [1, 2, 10])
-@pytest.mark.parametrize("num_rows", [1, 2, 1000])
-@pytest.mark.parametrize(
-    "dtype", list(chain(NUMERIC_TYPES, DATETIME_TYPES, ["str"]))
-)
-@pytest.mark.parametrize("nulls", ["none", "some"])
-def test_df_stack(nulls, num_cols, num_rows, dtype):
-    if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(reason="nulls not supported in dtype: " + dtype)
-
-    pdf = pd.DataFrame()
-    rng = np.random.default_rng(seed=0)
-    for i in range(num_cols):
-        colname = str(i)
-        data = rng.integers(0, 26, num_rows).astype(dtype)
-        if nulls == "some":
-            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
-            data[idx] = np.nan
-        pdf[colname] = data
-
-    gdf = cudf.from_pandas(pdf)
-
-    got = gdf.stack()
-    expect = pdf.stack()
-
-    assert_eq(expect, got)
-
-
-def test_df_stack_reset_index():
-    df = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4],
-            "b": [10, 11, 12, 13],
-            "c": ["ab", "cd", None, "gh"],
-        }
-    )
-    df = df.set_index(["a", "b"])
-    pdf = df.to_pandas()
-
-    expected = pdf.stack()
-    actual = df.stack()
-
-    assert_eq(expected, actual)
-
-    expected = expected.reset_index()
-    actual = actual.reset_index()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Need pandas-2.1.0+ to match `stack` api",
-)
-@pytest.mark.parametrize(
-    "columns",
-    [
-        pd.MultiIndex.from_tuples(
-            [("A", "cat"), ("A", "dog"), ("B", "cat"), ("B", "dog")],
-            names=["letter", "animal"],
-        ),
-        pd.MultiIndex.from_tuples(
-            [("A", "cat"), ("B", "bird"), ("A", "dog"), ("B", "dog")],
-            names=["letter", "animal"],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "level",
-    [
-        -1,
-        0,
-        1,
-        "letter",
-        "animal",
-        [0, 1],
-        [1, 0],
-        ["letter", "animal"],
-        ["animal", "letter"],
-    ],
-)
-@pytest.mark.parametrize(
-    "index",
-    [
-        pd.RangeIndex(2, name="range"),
-        pd.Index([9, 8], name="myindex"),
-        pd.MultiIndex.from_arrays(
-            [
-                ["A", "B"],
-                [101, 102],
-            ],
-            names=["first", "second"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("dropna", [True, False])
-def test_df_stack_multiindex_column_axis(columns, index, level, dropna):
-    if isinstance(level, list) and len(level) > 1 and not dropna:
-        pytest.skip(
-            "Stacking multiple levels with dropna==False is unsupported."
-        )
-
-    pdf = pd.DataFrame(
-        data=[[1, 2, 3, 4], [2, 4, 6, 8]], columns=columns, index=index
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    with pytest.warns(FutureWarning):
-        got = gdf.stack(level=level, dropna=dropna, future_stack=False)
-    with expect_warning_if(PANDAS_GE_220, FutureWarning):
-        expect = pdf.stack(level=level, dropna=dropna, future_stack=False)
-
-    assert_eq(expect, got, check_dtype=False)
-
-    got = gdf.stack(level=level, future_stack=True)
-    expect = pdf.stack(level=level, future_stack=True)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_df_stack_mixed_dtypes():
-    pdf = pd.DataFrame(
-        {
-            "A": pd.Series([1, 2, 3], dtype="f4"),
-            "B": pd.Series([4, 5, 6], dtype="f8"),
-        }
-    )
-
-    gdf = cudf.from_pandas(pdf)
-
-    got = gdf.stack()
-    expect = pdf.stack()
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Need pandas-2.1.0+ to match `stack` api",
-)
-@pytest.mark.parametrize("level", [["animal", "hair_length"], [1, 2]])
-def test_df_stack_multiindex_column_axis_pd_example(level):
-    columns = pd.MultiIndex.from_tuples(
-        [
-            ("A", "cat", "long"),
-            ("B", "cat", "long"),
-            ("A", "dog", "short"),
-            ("B", "dog", "short"),
-        ],
-        names=["exp", "animal", "hair_length"],
-    )
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(rng.standard_normal(size=(4, 4)), columns=columns)
-
-    with expect_warning_if(PANDAS_GE_220, FutureWarning):
-        expect = df.stack(level=level, future_stack=False)
-    gdf = cudf.from_pandas(df)
-    with pytest.warns(FutureWarning):
-        got = gdf.stack(level=level, future_stack=False)
-
-    assert_eq(expect, got)
-
-    expect = df.stack(level=level, future_stack=True)
-    got = gdf.stack(level=level, future_stack=True)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("num_rows", [1, 2, 10, 1000])
-@pytest.mark.parametrize("num_cols", [1, 2, 10])
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + DATETIME_TYPES + ["category"]
-)
-@pytest.mark.parametrize("nulls", ["none", "some"])
-def test_interleave_columns(nulls, num_cols, num_rows, dtype):
-    if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(reason="nulls not supported in dtype: " + dtype)
-
-    pdf = pd.DataFrame(dtype=dtype)
-    rng = np.random.default_rng(seed=0)
-    for i in range(num_cols):
-        colname = str(i)
-        data = pd.Series(rng.integers(0, 26, num_rows)).astype(dtype)
-
-        if nulls == "some":
-            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
-            data[idx] = np.nan
-        pdf[colname] = data
-
-    gdf = cudf.from_pandas(pdf)
-
-    if dtype == "category":
-        with pytest.raises(ValueError):
-            assert gdf.interleave_columns()
-    else:
-        got = gdf.interleave_columns()
-
-        expect = pd.Series(np.vstack(pdf.to_numpy()).reshape((-1,))).astype(
-            dtype
-        )
-
-        assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("num_cols", [1, 2, 10])
-@pytest.mark.parametrize("num_rows", [1, 2, 1000])
-@pytest.mark.parametrize("count", [1, 2, 10])
-@pytest.mark.parametrize("dtype", ALL_TYPES)
-@pytest.mark.parametrize("nulls", ["none", "some"])
-def test_tile(nulls, num_cols, num_rows, dtype, count):
-    if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(reason="nulls not supported in dtype: " + dtype)
-
-    pdf = pd.DataFrame(dtype=dtype)
-    rng = np.random.default_rng(seed=0)
-    for i in range(num_cols):
-        colname = str(i)
-        data = pd.Series(rng.integers(num_cols, 26, num_rows)).astype(dtype)
-
-        if nulls == "some":
-            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
-            data[idx] = np.nan
-        pdf[colname] = data
-
-    gdf = cudf.from_pandas(pdf)
-
-    got = gdf.tile(count)
-    expect = pd.DataFrame(pd.concat([pdf] * count))
-
-    assert_eq(expect, got)
-
-
-def _prepare_merge_sorted_test(
-    size,
-    nparts,
-    keys,
-    add_null=False,
-    na_position="last",
-    ascending=True,
-    series=False,
-    index=False,
-):
-    if index:
-        df = (
-            cudf.datasets.timeseries()[:size]
-            .reset_index(drop=False)
-            .set_index(keys, drop=True)
-        )
-    else:
-        df = cudf.datasets.timeseries()[:size].reset_index(drop=False)
-        if add_null:
-            df.iloc[1, df.columns.get_loc(keys[0])] = None
-    chunk = int(size / nparts)
-    indices = [i * chunk for i in range(0, nparts)] + [size]
-    if index:
-        dfs = [
-            df.iloc[indices[i] : indices[i + 1]]
-            .copy()
-            .sort_index(ascending=ascending)
-            for i in range(nparts)
-        ]
-    elif series:
-        df = df[keys[0]]
-        dfs = [
-            df.iloc[indices[i] : indices[i + 1]]
-            .copy()
-            .sort_values(na_position=na_position, ascending=ascending)
-            for i in range(nparts)
-        ]
-    else:
-        dfs = [
-            df.iloc[indices[i] : indices[i + 1]]
-            .copy()
-            .sort_values(keys, na_position=na_position, ascending=ascending)
-            for i in range(nparts)
-        ]
-    return df, dfs
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-@pytest.mark.parametrize("keys", [None, ["id"], ["name", "timestamp"]])
-@pytest.mark.parametrize("nparts", [2, 10])
-def test_df_merge_sorted(nparts, keys, na_position, ascending):
-    size = 100
-    keys_1 = keys or ["timestamp"]
-    # Null values NOT currently supported with Categorical data
-    # or when `ascending=False`
-    add_null = keys_1[0] not in ("name")
-    df, dfs = _prepare_merge_sorted_test(
-        size,
-        nparts,
-        keys_1,
-        add_null=add_null,
-        na_position=na_position,
-        ascending=ascending,
-    )
-
-    expect = df.sort_values(
-        keys_1, na_position=na_position, ascending=ascending
-    )
-    result = cudf.core.reshape._merge_sorted(
-        dfs, keys=keys, na_position=na_position, ascending=ascending
-    )
-    if keys:
-        expect = expect[keys]
-        result = result[keys]
-
-    assert expect.index.dtype == result.index.dtype
-    assert_eq(expect.reset_index(drop=True), result.reset_index(drop=True))
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("index", ["id", "x"])
-@pytest.mark.parametrize("nparts", [2, 10])
-def test_df_merge_sorted_index(nparts, index, ascending):
-    size = 100
-    df, dfs = _prepare_merge_sorted_test(
-        size, nparts, index, ascending=ascending, index=True
-    )
-
-    expect = df.sort_index(ascending=ascending)
-    result = cudf.core.reshape._merge_sorted(
-        dfs, by_index=True, ascending=ascending
-    )
-
-    assert_eq(expect.index, result.index)
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-@pytest.mark.parametrize("keys", [None, ["name", "timestamp"]])
-def test_df_merge_sorted_ignore_index(keys, na_position, ascending):
-    size = 100
-    nparts = 3
-    keys_1 = keys or ["timestamp"]
-    # Null values NOT currently supported with Categorical data
-    # or when `ascending=False`
-    add_null = keys_1[0] not in ("name")
-    df, dfs = _prepare_merge_sorted_test(
-        size,
-        nparts,
-        keys_1,
-        add_null=add_null,
-        na_position=na_position,
-        ascending=ascending,
-    )
-
-    expect = df.sort_values(
-        keys_1, na_position=na_position, ascending=ascending
-    )
-    result = cudf.core.reshape._merge_sorted(
-        dfs,
-        keys=keys,
-        na_position=na_position,
-        ascending=ascending,
-        ignore_index=True,
-    )
-    if keys:
-        expect = expect[keys]
-        result = result[keys]
-
-    assert_eq(expect.reset_index(drop=True), result)
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-@pytest.mark.parametrize("key", ["id", "name", "timestamp"])
-@pytest.mark.parametrize("nparts", [2, 10])
-def test_series_merge_sorted(nparts, key, na_position, ascending):
-    size = 100
-    df, dfs = _prepare_merge_sorted_test(
-        size,
-        nparts,
-        [key],
-        na_position=na_position,
-        ascending=ascending,
-        series=True,
-    )
-
-    expect = df.sort_values(na_position=na_position, ascending=ascending)
-    result = cudf.core.reshape._merge_sorted(
-        dfs, na_position=na_position, ascending=ascending
-    )
-
-    assert_eq(expect.reset_index(drop=True), result.reset_index(drop=True))
-
-
-@pytest.mark.parametrize(
-    "index, column, data",
-    [
-        ([], [], []),
-        ([0], [0], [0]),
-        ([0, 0], [0, 1], [1, 2.0]),
-        ([0, 1], [0, 0], [1, 2.0]),
-        ([0, 1], [0, 1], [1, 2.0]),
-        (["a", "a", "b", "b"], ["c", "d", "c", "d"], [1, 2, 3, 4]),
-        (
-            ["a", "a", "b", "b", "a"],
-            ["c", "d", "c", "d", "e"],
-            [1, 2, 3, 4, 5],
-        ),
-    ],
-)
-def test_pivot_simple(index, column, data):
-    pdf = pd.DataFrame({"index": index, "column": column, "data": data})
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.pivot(columns="column", index="index")
-    got = gdf.pivot(columns="column", index="index")
-
-    check_index_and_columns = expect.shape != (0, 0)
-    assert_eq(
-        expect,
-        got,
-        check_dtype=False,
-        check_index_type=check_index_and_columns,
-        check_column_type=check_index_and_columns,
-    )
-
-
-def test_pivot_multi_values():
-    # from Pandas docs:
-    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot.html
-    pdf = pd.DataFrame(
-        {
-            "foo": ["one", "one", "one", "two", "two", "two"],
-            "bar": ["A", "B", "C", "A", "B", "C"],
-            "baz": [1, 2, 3, 4, 5, 6],
-            "zoo": ["x", "y", "z", "q", "w", "t"],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(
-        pdf.pivot(index="foo", columns="bar", values=["baz", "zoo"]),
-        gdf.pivot(index="foo", columns="bar", values=["baz", "zoo"]),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "values", ["z", "z123", ["z123"], ["z", "z123", "123z"]]
-)
-def test_pivot_values(values):
-    data = [
-        ["A", "a", 0, 0, 0],
-        ["A", "b", 1, 1, 1],
-        ["A", "c", 2, 2, 2],
-        ["B", "a", 0, 0, 0],
-        ["B", "b", 1, 1, 1],
-        ["B", "c", 2, 2, 2],
-        ["C", "a", 0, 0, 0],
-        ["C", "b", 1, 1, 1],
-        ["C", "c", 2, 2, 2],
-    ]
-    columns = ["x", "y", "z", "z123", "123z"]
-    pdf = pd.DataFrame(data, columns=columns)
-    cdf = cudf.DataFrame(data, columns=columns)
-    expected = pd.pivot(pdf, index="x", columns="y", values=values)
-    actual = cudf.pivot(cdf, index="x", columns="y", values=values)
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "level",
-    [
-        0,
-        pytest.param(
-            1,
-            marks=pytest_xfail(
-                reason="Categorical column indexes not supported"
-            ),
-        ),
-        2,
-        "foo",
-        pytest.param(
-            "bar",
-            marks=pytest_xfail(
-                reason="Categorical column indexes not supported"
-            ),
-        ),
-        "baz",
-        [],
-        pytest.param(
-            [0, 1],
-            marks=pytest_xfail(
-                reason="Categorical column indexes not supported"
-            ),
-        ),
-        ["foo"],
-        pytest.param(
-            ["foo", "bar"],
-            marks=pytest_xfail(
-                reason="Categorical column indexes not supported"
-            ),
-        ),
-        pytest.param(
-            [0, 1, 2],
-            marks=pytest_xfail(reason="Pandas behaviour unclear"),
-        ),
-        pytest.param(
-            ["foo", "bar", "baz"],
-            marks=pytest_xfail(reason="Pandas behaviour unclear"),
-        ),
-    ],
-)
-def test_unstack_multiindex(level):
-    pdf = pd.DataFrame(
-        {
-            "foo": ["one", "one", "one", "two", "two", "two"],
-            "bar": pd.Categorical(["A", "B", "C", "A", "B", "C"]),
-            "baz": [1, 2, 3, 4, 5, 6],
-            "zoo": ["x", "y", "z", "q", "w", "t"],
-        }
-    ).set_index(["foo", "bar", "baz"])
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(
-        pdf.unstack(level=level),
-        gdf.unstack(level=level),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        pd.Index(range(0, 5), name=None),
-        pd.Index(range(0, 5), name="row_index"),
-        pytest.param(
-            pd.CategoricalIndex(["d", "e", "f", "g", "h"]),
-            marks=pytest_xfail(
-                reason="Categorical column indexes not supported"
-            ),
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "col_idx",
-    [
-        pd.Index(["a", "b"], name=None),
-        pd.Index(["a", "b"], name="col_index"),
-        pd.MultiIndex.from_tuples([("c", 1), ("c", 2)], names=[None, None]),
-        pd.MultiIndex.from_tuples(
-            [("c", 1), ("c", 2)], names=["col_index1", "col_index2"]
-        ),
-    ],
-)
-def test_unstack_index(index, col_idx):
-    data = {
-        "A": [1.0, 2.0, 3.0, 4.0, 5.0],
-        "B": [11.0, 12.0, 13.0, 14.0, 15.0],
-    }
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    pdf.index = index
-    pdf.columns = col_idx
-
-    gdf.index = cudf.from_pandas(index)
-    gdf.columns = cudf.from_pandas(col_idx)
-
-    assert_eq(pdf.unstack(), gdf.unstack())
-
-
-def test_unstack_index_invalid():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
-    with pytest.raises(
-        ValueError,
-        match=re.escape(
-            "Calling unstack() on single index dataframe with "
-            "different column datatype is not supported."
-        ),
-    ):
-        gdf.unstack()
-
-
-def test_pivot_duplicate_error():
-    gdf = cudf.DataFrame(
-        {"a": [0, 1, 2, 2], "b": [1, 2, 3, 3], "d": [1, 2, 3, 4]}
-    )
-    with pytest.raises(ValueError):
-        gdf.pivot(index="a", columns="b")
-    with pytest.raises(ValueError):
-        gdf.pivot(index="b", columns="a")
-
-
-@pytest.mark.parametrize(
-    "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
-)
-def test_pivot_table_simple(aggfunc):
-    rng = np.random.default_rng(seed=0)
-    fill_value = 0
-    pdf = pd.DataFrame(
-        {
-            "A": ["one", "one", "two", "three"] * 6,
-            "B": ["A", "B", "C"] * 8,
-            "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
-            "D": rng.standard_normal(size=24),
-            "E": rng.standard_normal(size=24),
-        }
-    )
-    expected = pd.pivot_table(
-        pdf,
-        values=["D", "E"],
-        index=["A", "B"],
-        columns=["C"],
-        aggfunc=aggfunc,
-        fill_value=fill_value,
-    )
-    cdf = cudf.DataFrame.from_pandas(pdf)
-    actual = cudf.pivot_table(
-        cdf,
-        values=["D", "E"],
-        index=["A", "B"],
-        columns=["C"],
-        aggfunc=aggfunc,
-        fill_value=fill_value,
-    )
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
-)
-def test_dataframe_pivot_table_simple(aggfunc):
-    rng = np.random.default_rng(seed=0)
-    fill_value = 0
-    pdf = pd.DataFrame(
-        {
-            "A": ["one", "one", "two", "three"] * 6,
-            "B": ["A", "B", "C"] * 8,
-            "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
-            "D": rng.standard_normal(size=24),
-            "E": rng.standard_normal(size=24),
-        }
-    )
-    expected = pdf.pivot_table(
-        values=["D", "E"],
-        index=["A", "B"],
-        columns=["C"],
-        aggfunc=aggfunc,
-        fill_value=fill_value,
-    )
-    cdf = cudf.DataFrame.from_pandas(pdf)
-    actual = cdf.pivot_table(
-        values=["D", "E"],
-        index=["A", "B"],
-        columns=["C"],
-        aggfunc=aggfunc,
-        fill_value=fill_value,
-    )
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize("index", ["A", ["A"]])
-@pytest.mark.parametrize("columns", ["C", ["C"]])
-def test_pivot_table_scalar_index_columns(index, columns):
-    data = {
-        "A": ["one", "one", "two", "three"] * 6,
-        "B": ["A", "B", "C"] * 8,
-        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
-        "D": range(24),
-        "E": range(24),
-    }
-    result = cudf.DataFrame(data).pivot_table(
-        values="D", index=index, columns=columns, aggfunc="sum"
-    )
-    expected = pd.DataFrame(data).pivot_table(
-        values="D", index=index, columns=columns, aggfunc="sum"
-    )
-    assert_eq(result, expected)
-
-
-def test_crosstab_simple():
-    a = np.array(
-        [
-            "foo",
-            "foo",
-            "foo",
-            "foo",
-            "bar",
-            "bar",
-            "bar",
-            "bar",
-            "foo",
-            "foo",
-            "foo",
-        ],
-        dtype=object,
-    )
-    b = np.array(
-        [
-            "one",
-            "one",
-            "one",
-            "two",
-            "one",
-            "one",
-            "one",
-            "two",
-            "two",
-            "two",
-            "one",
-        ],
-        dtype=object,
-    )
-    c = np.array(
-        [
-            "dull",
-            "dull",
-            "shiny",
-            "dull",
-            "dull",
-            "shiny",
-            "shiny",
-            "dull",
-            "shiny",
-            "shiny",
-            "shiny",
-        ],
-        dtype=object,
-    )
-    expected = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"])
-    actual = cudf.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"])
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize("index", [["ix"], ["ix", "foo"]])
-@pytest.mark.parametrize("columns", [["col"], ["col", "baz"]])
-def test_pivot_list_like_index_columns(index, columns):
-    data = {
-        "bar": ["x", "y", "z", "w"],
-        "col": ["a", "b", "a", "b"],
-        "foo": [1, 2, 3, 4],
-        "ix": [1, 1, 2, 2],
-        "baz": [0, 0, 0, 0],
-    }
-    pd_df = pd.DataFrame(data)
-    cudf_df = cudf.DataFrame(data)
-    result = cudf_df.pivot(columns=columns, index=index)
-    expected = pd_df.pivot(columns=columns, index=index)
-    assert_eq(result, expected)

From f19b7ed4647df6cc6814b444cfe2f34f2d2be04d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 14 Aug 2025 17:28:17 -0700
Subject: [PATCH 129/366] Move test_csv/feather/json.py to new cudf classic
 test directory structure (#19639)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19639
---
 .../cudf/cudf/tests/input_output/test_csv.py  | 2254 ++++++++++++++++-
 .../cudf/tests/input_output/test_feather.py   |   66 +-
 .../cudf/cudf/tests/input_output/test_json.py | 1447 ++++++++++-
 python/cudf/cudf/tests/test_csv.py            | 2249 ----------------
 python/cudf/cudf/tests/test_feather.py        |   78 -
 python/cudf/cudf/tests/test_json.py           | 1467 -----------
 6 files changed, 3764 insertions(+), 3797 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/test_csv.py
 delete mode 100644 python/cudf/cudf/tests/test_feather.py
 delete mode 100644 python/cudf/cudf/tests/test_json.py

diff --git a/python/cudf/cudf/tests/input_output/test_csv.py b/python/cudf/cudf/tests/input_output/test_csv.py
index 06777c8e6af..9ab62b11c7b 100644
--- a/python/cudf/cudf/tests/input_output/test_csv.py
+++ b/python/cudf/cudf/tests/input_output/test_csv.py
@@ -1 +1,2253 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+import codecs
+import gzip
+import os
+import re
+import shutil
+from io import BytesIO, StringIO
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf import read_csv
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
+
+
+@pytest.fixture
+def pd_mixed_dataframe():
+    return pd.DataFrame(
+        {
+            "Integer": [2345, 11987, 9027, 9027],
+            "Date": ["18/04/1995", "14/07/1994", "07/06/2006", "16/09/2005"],
+            "Float": [9.001, 8.343, 6, 2.781],
+            "Integer2": [2345, 106, 2088, 789277],
+            "Category": ["M", "F", "F", "F"],
+            "String": ["Alpha", "Beta", "Gamma", "Delta"],
+            "Boolean": [True, False, True, False],
+        }
+    )
+
+
+@pytest.fixture
+def cudf_mixed_dataframe(pd_mixed_dataframe):
+    return cudf.from_pandas(pd_mixed_dataframe)
+
+
+@pytest.fixture
+def gdf_np_dtypes():
+    gdf_dtypes = [
+        "float",
+        "float32",
+        "double",
+        "float64",
+        "int8",
+        "short",
+        "int16",
+        "int",
+        "int32",
+        "long",
+        "int64",
+        "uint8",
+        "uint16",
+        "uint32",
+        "uint64",
+    ]
+
+    np_dtypes = [
+        np.float32,
+        np.float32,
+        np.float64,
+        np.float64,
+        np.int8,
+        np.int16,
+        np.int16,
+        np.int32,
+        np.int32,
+        np.int64,
+        np.int64,
+        np.uint8,
+        np.uint16,
+        np.uint32,
+        np.uint64,
+    ]
+    return dict(zip(gdf_dtypes, np_dtypes, strict=True))
+
+
+@pytest.fixture
+def numeric_extremes_dataframe(gdf_np_dtypes):
+    data = {}
+    for typ, np_type in gdf_np_dtypes.items():
+        if np.dtype(np_type).kind in "iu":
+            itype = np.iinfo(np_type)
+            extremes = [0, +1, -1, itype.min, itype.max]
+            data[typ] = np.array(extremes * 4).astype(np_type)[:20]
+        else:
+            ftype = np.finfo(np_type)
+            extremes = [
+                0.0,
+                -0.0,
+                +1,
+                -1,
+                np.nan,
+                -np.nan,
+                # ftype.min, # TODO enable after fixing truncation issue #6235
+                # ftype.max, # TODO enable after fixing truncation issue #6235
+                np_type(np.inf),
+                -np_type(np.inf),
+                ftype.eps,
+                ftype.epsneg,
+                ftype.tiny,
+                -ftype.eps,
+                -ftype.epsneg,
+                -ftype.tiny,
+            ]
+            data[typ] = np.array(extremes * 4, dtype=np_type)[:20]
+    return pd.DataFrame(data)
+
+
+def test_csv_reader_numeric_data(numeric_types_as_str, tmp_path):
+    fname = tmp_path / "tmp_csvreader_file1.csv"
+
+    df = pd.DataFrame(
+        {"col1": [1, 2, 3], "col2": [4, 5, 6]},
+    ).astype(numeric_types_as_str)
+    df.to_csv(fname, index=False, header=False)
+
+    dtypes = [df[k].dtype for k in df.columns]
+    out = read_csv(str(fname), names=list(df.columns.values), dtype=dtypes)
+
+    assert len(out.columns) == len(df.columns)
+    assert_eq(df, out)
+
+
+@pytest.mark.parametrize("parse_dates", [["date2"], [0], ["date1", 1, "bad"]])
+def test_csv_reader_datetime(parse_dates):
+    df = pd.DataFrame(
+        {
+            "col1": [
+                "31/10/2010",
+                "05/03/2001",
+                "20/10/1994",
+                "18/10/1990",
+                "1/1/1970",
+                "2016-04-30T01:02:03.000",
+                "2038-01-19 03:14:07",
+            ],
+            "col2": [
+                "18/04/1995",
+                "14 / 07 / 1994",
+                "07/06/2006",
+                "16/09/2005",
+                "2/2/1970",
+                "2007-4-30 1:6:40.000PM",
+                "2038-01-19 03:14:08",
+            ],
+            "col3": [
+                "1 Jan",
+                "2 January 1994",
+                "Feb 2002",
+                "31-01-2000",
+                "1-1-1996",
+                "15-May-2009",
+                "21-Dec-3262",
+            ],
+        }
+    )
+    buffer = df.to_csv(index=False, header=False)
+
+    gdf = read_csv(
+        StringIO(buffer),
+        names=["date1", "date2", "bad"],
+        parse_dates=parse_dates,
+        dayfirst=True,
+    )
+    # Need to used `date_format='mixed'`,
+    # https://github.com/pandas-dev/pandas/issues/53355
+    pdf = pd.read_csv(
+        StringIO(buffer),
+        names=["date1", "date2", "bad"],
+        parse_dates=parse_dates,
+        dayfirst=True,
+        date_format="mixed",
+    )
+
+    assert_eq(gdf, pdf)
+
+
+@pytest.mark.parametrize("p_arg", ["delimiter", "sep"])
+@pytest.mark.parametrize("c_arg", ["sep", "delimiter"])
+def test_csv_reader_mixed_data_delimiter_sep(
+    tmp_path, p_arg, c_arg, pd_mixed_dataframe
+):
+    pandas_arg = {p_arg: "|"}
+    cudf_arg = {c_arg: "|"}
+    fname = tmp_path / "tmp_csvreader_file3.csv"
+
+    pd_mixed_dataframe.to_csv(fname, sep="|", index=False, header=False)
+
+    gdf1 = read_csv(
+        str(fname),
+        names=["1", "2", "3", "4", "5", "6", "7"],
+        dtype=[
+            "int64",
+            "datetime64[ns]",
+            "float64",
+            "int64",
+            "category",
+            "str",
+            "bool",
+        ],
+        dayfirst=True,
+        **cudf_arg,
+    )
+    gdf2 = read_csv(
+        str(fname),
+        names=["1", "2", "3", "4", "5", "6", "7"],
+        dtype=[
+            "int64",
+            "datetime64[ns]",
+            "float64",
+            "int64",
+            "category",
+            "str",
+            "bool",
+        ],
+        dayfirst=True,
+        **pandas_arg,
+    )
+
+    pdf = pd.read_csv(
+        fname,
+        names=["1", "2", "3", "4", "5", "6", "7"],
+        parse_dates=[1],
+        dayfirst=True,
+        **pandas_arg,
+    )
+
+    assert len(gdf1.columns) == len(pdf.columns)
+    assert len(gdf2.columns) == len(pdf.columns)
+    assert_eq(gdf1, gdf2)
+
+
+@pytest.mark.parametrize("use_list", [False, True])
+def test_csv_reader_dtype_list(use_list):
+    df = pd.DataFrame(
+        {"col1": [1, 2, 3], "col2": [4, 5, 6]},
+    ).astype(np.float32)
+    buffer = df.to_csv(index=False, header=False)
+
+    # PANDAS doesn't list but cudf does (treated as implied ordered dict)
+    # Select first column's dtype if non-list; expect the same dtype for all
+    if use_list:
+        dtypes = [df[k].dtype for k in df.columns]
+    else:
+        dtypes = df[df.columns[0]].dtype
+
+    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=df.columns)
+
+    assert_eq(gdf, df)
+
+
+@pytest.mark.parametrize("use_names", [False, True])
+def test_csv_reader_dtype_dict(use_names, gdf_np_dtypes):
+    # Save with the column header if not explicitly specifying a list of names
+    df = pd.DataFrame(
+        {
+            typ: np.zeros(3, dtype=np_type)
+            for typ, np_type in gdf_np_dtypes.items()
+        }
+    )
+    buffer = df.to_csv(index=False, header=not use_names)
+    dtypes = df.dtypes.to_dict()
+    names = list(gdf_np_dtypes.keys()) if use_names else None
+    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=names)
+    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=names)
+
+    assert_eq(gdf, pdf)
+
+
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
+@pytest.mark.parametrize("use_names", [True, False])
+def test_csv_reader_dtype_extremes(use_names, numeric_extremes_dataframe):
+    # Save with the column header if not explicitly specifying a list of names
+    df = numeric_extremes_dataframe
+    buffer = df.to_csv(index=False, header=not use_names)
+    dtypes = df.dtypes.to_dict()
+    names = df.columns.to_list() if use_names else None
+
+    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=names)
+    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=names)
+
+    assert_eq(gdf, pdf)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/52449",
+)
+def test_csv_reader_skiprows_skipfooter(tmp_path, pd_mixed_dataframe):
+    fname = tmp_path / "tmp_csvreader_file5.csv"
+
+    pd_mixed_dataframe.to_csv(
+        fname, columns=["Integer", "Date", "Float"], index=False, header=False
+    )
+
+    # Using engine='python' to eliminate pandas warning of using python engine.
+    df_out = pd.read_csv(
+        fname,
+        names=["1", "2", "3"],
+        parse_dates=[1],
+        dayfirst=True,
+        skiprows=1,
+        skipfooter=1,
+        engine="python",
+    )
+    out = read_csv(
+        str(fname),
+        names=["1", "2", "3"],
+        dtype=["int64", "datetime64[ns]", "float64"],
+        skiprows=1,
+        skipfooter=1,
+        dayfirst=True,
+    )
+
+    assert len(out.columns) == len(df_out.columns)
+    assert len(out) == len(df_out)
+
+    assert_eq(df_out, out, check_dtype=False)
+
+
+def test_csv_reader_negative_vals(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file6.csv"
+
+    names = ["0", "1", "2"]
+    dtypes = ["float32", "float32", "float32"]
+    lines = [
+        ",".join(names),
+        "-181.5060,-185.37000,-3",
+        "-127.6300,-230.54600,-9",
+    ]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    zero = [-181.5060, -127.6300]
+    one = [-185.370, -230.54600]
+    two = [-3, -9]
+
+    df = read_csv(str(fname), names=names, dtype=dtypes, skiprows=1)
+
+    np.testing.assert_allclose(zero, df["0"].to_numpy())
+    np.testing.assert_allclose(one, df["1"].to_numpy())
+    np.testing.assert_allclose(two, df["2"].to_numpy())
+
+
+def test_csv_reader_strings(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file7.csv"
+
+    names = ["text", "int"]
+    dtypes = ["str", "int"]
+    lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    df = read_csv(
+        str(fname),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        decimal=".",
+        thousands="'",
+    )
+
+    assert len(df.columns) == 2
+    assert df["text"].dtype == np.dtype("object")
+    assert df["int"].dtype == np.dtype("int64")
+    assert df["text"][0] == "a"
+    assert df["text"][1] == "b"
+    assert df["text"][2] == "c"
+    assert df["text"][3] == "d"
+
+
+def test_csv_reader_strings_quotechars(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file8.csv"
+
+    names = ["text", "int"]
+    dtypes = ["str", "int"]
+    lines = [",".join(names), '"a,\n",0', '"b ""c"" d",0', "e,0", '"f,,!.,",0']
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    df = read_csv(
+        str(fname),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        quotechar='"',
+        quoting=1,
+    )
+
+    assert len(df.columns) == 2
+    assert df["text"].dtype == np.dtype("object")
+    assert df["int"].dtype == np.dtype("int64")
+    assert df["text"][0] == "a,\n"
+    assert df["text"][1] == 'b "c" d'
+    assert df["text"][2] == "e"
+    assert df["text"][3] == "f,,!.,"
+
+
+def test_csv_reader_usecols_int_char(tmp_path, pd_mixed_dataframe):
+    fname = tmp_path / "tmp_csvreader_file10.csv"
+    pd_mixed_dataframe.to_csv(
+        fname,
+        columns=["Integer", "Date", "Float", "Integer2"],
+        index=False,
+        header=False,
+    )
+
+    df_out = pd.read_csv(fname, usecols=[0, 1, 3])
+    out = read_csv(fname, usecols=[0, 1, 3])
+
+    assert len(out.columns) == len(df_out.columns)
+    assert len(out) == len(df_out)
+    assert_eq(df_out, out, check_names=False)
+
+
+@pytest.mark.parametrize(
+    "buffer",
+    [
+        "abc,ABC,abc,abcd,abc\n1,2,3,4,5\n",
+        "A,A,A.1,A,A.2,A,A.4,A,A\n1,2,3.1,4,a.2,a,a.4,a,a",
+        "A,A,A.1,,Unnamed: 4,A,A.4,A,A\n1,2,3.1,4,a.2,a,a.4,a,a",
+    ],
+)
+@pytest.mark.parametrize("mangle_dupe_cols", [True, False])
+def test_csv_reader_mangle_dupe_cols(buffer, mangle_dupe_cols):
+    # Default: mangle_dupe_cols=True
+    cu_df = read_csv(StringIO(buffer), mangle_dupe_cols=mangle_dupe_cols)
+    if mangle_dupe_cols:
+        pd_df = pd.read_csv(StringIO(buffer))
+    else:
+        # Pandas does not support mangle_dupe_cols=False
+        head = buffer.split("\n")[0].split(",")
+        first_cols = np.unique(head, return_index=True)[1]
+        pd_df = pd.read_csv(StringIO(buffer), usecols=first_cols)
+    assert_eq(cu_df, pd_df)
+
+
+def test_csv_reader_float_decimal(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file12.csv"
+
+    names = ["basic_32", "basic_64", "round", "decimal_only", "precision"]
+    dtypes = ["float32", "float64", "float64", "float32", "float64"]
+    lines = [
+        ";".join(names),
+        "1,2;1234,5678;12345;0,123;-73,98007199999998",
+        "3,4;3456,7890;67890;,456;1,7976931348623157e+307",
+        "5,6e0;0,5679e2;1,2e10;0,07e-001;0,0",
+    ]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    basic_32_ref = [1.2, 3.4, 5.6]
+    basic_64_ref = [1234.5678, 3456.7890, 56.79]
+    round_ref = [12345, 67890, 12000000000]
+    decimal_only_ref = [0.123, 0.456, 0.007]
+    precision_ref = [-73.98007199999998, 1.7976931348623157e307, 0.0]
+
+    df = read_csv(
+        str(fname),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        delimiter=";",
+        decimal=",",
+    )
+
+    np.testing.assert_allclose(basic_32_ref, df["basic_32"].to_numpy())
+    np.testing.assert_allclose(basic_64_ref, df["basic_64"].to_numpy())
+    np.testing.assert_allclose(round_ref, df["round"].to_numpy())
+    np.testing.assert_allclose(decimal_only_ref, df["decimal_only"].to_numpy())
+    np.testing.assert_allclose(precision_ref, df["precision"].to_numpy())
+
+
+def test_csv_reader_NaN_values():
+    names = dtypes = ["float32"]
+    empty_cells = '\n""\n'
+    default_na_cells = (
+        "#N/A\n#N/A N/A\n#NA\n-1.#IND\n"
+        "-1.#QNAN\n-NaN\n-nan\n1.#IND\n"
+        "1.#QNAN\nN/A\n<NA>\nNA\nNULL\n"
+        "NaN\nn/a\nnan\nnull\n"
+    )
+    custom_na_cells = "NV_NAN\nNotANumber\n"
+    all_cells = empty_cells + default_na_cells + custom_na_cells
+    custom_na_values = ["NV_NAN", "NotANumber"]
+
+    # test default NA values. empty cells should also yield NaNs
+    gdf = read_csv(
+        StringIO(default_na_cells + empty_cells), names=names, dtype=dtypes
+    )
+    pdf = pd.read_csv(
+        StringIO(default_na_cells + empty_cells), names=names, dtype=np.float32
+    )
+    assert_eq(pdf, gdf)
+
+    # custom NA values
+    gdf = read_csv(
+        StringIO(all_cells),
+        names=names,
+        dtype=dtypes,
+        na_values=custom_na_values,
+    )
+    pdf = pd.read_csv(
+        StringIO(all_cells),
+        names=names,
+        dtype=np.float32,
+        na_values=custom_na_values,
+    )
+    assert_eq(pdf, gdf)
+
+    # custom NA values
+    gdf = read_csv(
+        StringIO(empty_cells + default_na_cells + "_NAA_\n"),
+        names=names,
+        dtype=dtypes,
+        na_values="_NAA_",
+    )
+    pdf = pd.read_csv(
+        StringIO(empty_cells + default_na_cells + "_NAA_\n"),
+        names=names,
+        dtype=np.float32,
+        na_values="_NAA_",
+    )
+    assert_eq(pdf, gdf)
+
+    # data type detection should evaluate the column to int8 (all nulls)
+    gdf = read_csv(
+        StringIO(all_cells),
+        header=None,
+        na_values=custom_na_values,
+    )
+    assert gdf.dtypes.iloc[0] == "int8"
+    assert all(gdf["0"][idx] is cudf.NA for idx in range(len(gdf["0"])))
+
+    # data type detection should evaluate the column to object if some nulls
+    gdf = read_csv(StringIO(all_cells), header=None)
+    assert gdf.dtypes.iloc[0] == np.dtype("object")
+
+
+def test_csv_reader_thousands(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file13.csv"
+
+    names = dtypes = [
+        "float32",
+        "float64",
+        "int32",
+        "int64",
+        "uint32",
+        "uint64",
+    ]
+    lines = [
+        ",".join(names),
+        "1'234.5, 1'234.567, 1'234'567, 1'234'567'890,\
+                1'234'567, 1'234'567'890",
+        "12'345.6, 123'456.7, 12'345, 123'456'789, 12'345, 123'456'789",
+    ]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    f32_ref = [1234.5, 12345.6]
+    f64_ref = [1234.567, 123456.7]
+    int32_ref = [1234567, 12345]
+    int64_ref = [1234567890, 123456789]
+    uint32_ref = [1234567, 12345]
+    uint64_ref = [1234567890, 123456789]
+
+    df = read_csv(
+        str(fname), names=names, dtype=dtypes, skiprows=1, thousands="'"
+    )
+
+    np.testing.assert_allclose(f32_ref, df["float32"].to_numpy())
+    np.testing.assert_allclose(f64_ref, df["float64"].to_numpy())
+    np.testing.assert_allclose(int32_ref, df["int32"].to_numpy())
+    np.testing.assert_allclose(int64_ref, df["int64"].to_numpy())
+    np.testing.assert_allclose(uint32_ref, df["uint32"].to_numpy())
+    np.testing.assert_allclose(uint64_ref, df["uint64"].to_numpy())
+
+
+def test_csv_reader_buffer_strings():
+    names = ["text", "int"]
+    dtypes = ["str", "int"]
+    lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
+
+    buffer = "\n".join(lines)
+
+    df = read_csv(StringIO(buffer), names=names, dtype=dtypes, skiprows=1)
+    assert len(df.columns) == 2
+    assert df["text"].dtype == np.dtype("object")
+    assert df["int"].dtype == np.dtype("int64")
+    assert df["text"][0] == "a"
+    assert df["text"][1] == "b"
+    assert df["text"][2] == "c"
+    assert df["text"][3] == "d"
+
+    df2 = read_csv(
+        BytesIO(str.encode(buffer)), names=names, dtype=dtypes, skiprows=1
+    )
+    assert len(df2.columns) == 2
+    assert df2["text"].dtype == np.dtype("object")
+    assert df2["int"].dtype == np.dtype("int64")
+    assert df2["text"][0] == "a"
+    assert df2["text"][1] == "b"
+    assert df2["text"][2] == "c"
+    assert df2["text"][3] == "d"
+
+
+@pytest.mark.parametrize(
+    "ext, out_comp, in_comp",
+    [
+        (".geez", "gzip", "gzip"),
+        (".beez", "bz2", "bz2"),
+        (".gz", "gzip", "infer"),
+        (".bz2", "bz2", "infer"),
+        (".beez", "bz2", np.str_("bz2")),
+        (".data", None, "infer"),
+        (".txt", None, None),
+        ("", None, None),
+    ],
+)
+def test_csv_reader_compression(
+    tmp_path, ext, out_comp, in_comp, pd_mixed_dataframe
+):
+    fname = tmp_path / f"tmp_csvreader_compression.{ext}"
+
+    df = pd_mixed_dataframe
+    df.to_csv(fname, index=False, header=False, compression=out_comp)
+
+    gdf = read_csv(fname, names=list(df.columns.values), compression=in_comp)
+    pdf = pd.read_csv(
+        fname, names=list(df.columns.values), compression=in_comp
+    )
+
+    assert_eq(gdf, pdf)
+
+
+@pytest.mark.parametrize(
+    "names, dtypes, data, trues, falses",
+    [
+        (
+            ["A", "B"],
+            ["bool", "bool"],
+            "True,True\nFalse,False\nTrue,False",
+            None,
+            None,
+        ),
+        (
+            ["A", "B"],
+            ["int32", "int32"],
+            "True,1\nFalse,2\nTrue,3",
+            None,
+            None,
+        ),
+        (
+            ["A", "B"],
+            ["int32", "int32"],
+            "YES,1\nno,2\nyes,3\nNo,4\nYes,5",
+            ["yes", "Yes", "YES"],
+            ["no", "NO", "No"],
+        ),
+        (["A", "B"], ["int32", "int32"], "foo,bar\nbar,foo", ["foo"], ["bar"]),
+        (["x", "y"], None, "True,1\nFalse,0", None, None),
+    ],
+)
+def test_csv_reader_bools(tmp_path, names, dtypes, data, trues, falses):
+    fname = tmp_path / "tmp_csvreader_file11.csv"
+
+    lines = [",".join(names), data]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    # Usage of true_values and false_values makes that column into bool type
+    df_out = pd.read_csv(
+        fname,
+        names=names,
+        skiprows=1,
+        dtype=(dtypes[0] if dtypes else None),
+        true_values=trues,
+        false_values=falses,
+    )
+
+    out = read_csv(
+        fname,
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+
+    assert_eq(df_out, out)
+
+
+def test_csv_reader_bools_custom():
+    names = ["text", "bool"]
+    dtypes = {"text": "str", "bool": "bool"}
+    trues = ["foo", "1"]
+    falses = ["bar", "0"]
+    lines = [
+        ",".join(names),
+        "true,true",
+        "false,false",
+        "foo,foo",
+        "bar,bar",
+        "0,0",
+        "1,1",
+    ]
+    buffer = "\n".join(lines)
+
+    df = read_csv(
+        StringIO(buffer),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+
+    # Note: bool literals give parsing errors as int
+    # "0" and "1" give parsing errors as bool in pandas
+    expected = pd.read_csv(
+        StringIO(buffer),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+    assert_eq(df, expected, check_dtype=True)
+
+
+def test_csv_reader_bools_NA():
+    names = ["text", "int"]
+    dtypes = ["str", "int"]
+    trues = ["foo"]
+    falses = ["bar"]
+    lines = [
+        ",".join(names),
+        "true,true",
+        "false,false",
+        "foo,foo",
+        "bar,bar",
+        "qux,qux",
+    ]
+
+    buffer = "\n".join(lines)
+
+    df = read_csv(
+        StringIO(buffer),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+    assert len(df.columns) == 2
+    assert df["text"].dtype == np.dtype("object")
+    assert df["int"].dtype == np.dtype("int64")
+    expected = pd.DataFrame(
+        {
+            "text": ["true", "false", "foo", "bar", "qux"],
+            "int": [1.0, 0.0, 1.0, 0.0, np.nan],
+        }
+    )
+    assert_eq(df, expected)
+
+
+def test_csv_quotednumbers(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file12.csv"
+
+    names = ["integer", "decimal"]
+    dtypes = ["int32", "float32"]
+    lines = [
+        ",".join(names),
+        '1,"3.14"',
+        '"2","300"',
+        '"3",10101.0101',
+        '4,"6.28318"',
+    ]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    integer_ref = [1, 2, 3, 4]
+    decimal_ref = [3.14, 300, 10101.0101, 6.28318]
+
+    df1 = read_csv(str(fname), names=names, dtype=dtypes, skiprows=1)
+    df2 = read_csv(str(fname), names=names, dtype=dtypes, skiprows=1)
+
+    assert len(df2.columns) == 2
+    np.testing.assert_allclose(integer_ref, df1["integer"].to_numpy())
+    np.testing.assert_allclose(decimal_ref, df1["decimal"].to_numpy())
+    np.testing.assert_allclose(integer_ref, df2["integer"].to_numpy())
+    np.testing.assert_allclose(decimal_ref, df2["decimal"].to_numpy())
+
+
+def test_csv_reader_nrows(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file14.csv"
+
+    names = ["int1", "int2"]
+    dtypes = ["int32", "int32"]
+
+    rows = 4000
+    read_rows = (rows * 3) // 4
+    skip_rows = (rows - read_rows) // 2
+    sample_skip = 100
+
+    with open(str(fname), "w") as fp:
+        fp.write(",".join(names) + "\n")
+        for i in range(rows):
+            fp.write(str(i) + ", " + str(2 * i) + " \n")
+
+    # with specified names
+    df = read_csv(
+        str(fname),
+        names=names,
+        dtype=dtypes,
+        skiprows=skip_rows + 1,
+        nrows=read_rows,
+    )
+    assert df.shape == (read_rows, 2)
+    for row in range(0, read_rows // sample_skip, sample_skip):
+        assert df["int1"][row] == row + skip_rows
+        assert df["int2"][row] == 2 * (row + skip_rows)
+    assert df["int2"][read_rows - 1] == 2 * (read_rows - 1 + skip_rows)
+
+    # with column name inference
+    df = read_csv(
+        str(fname), dtype=dtypes, skiprows=skip_rows + 1, nrows=read_rows
+    )
+    assert df.shape == (read_rows, 2)
+    assert str(skip_rows) in next(iter(df))
+    assert str(2 * skip_rows) in list(df)[1]
+    for row in range(0, read_rows // sample_skip, sample_skip):
+        assert df[next(iter(df))][row] == row + skip_rows + 1
+        assert df[list(df)[1]][row] == 2 * (row + skip_rows + 1)
+    assert df[list(df)[1]][read_rows - 1] == 2 * (read_rows + skip_rows)
+
+    # nrows larger than the file
+    df = read_csv(str(fname), dtype=dtypes, nrows=rows * 2)
+    assert df.shape == (rows, 2)
+    for row in range(0, rows // sample_skip, sample_skip):
+        assert df["int1"][row] == row
+        assert df["int2"][row] == 2 * row
+    assert df["int2"][rows - 1] == 2 * (rows - 1)
+
+    # nrows + skiprows larger than the file
+    df = read_csv(
+        str(fname), dtype=dtypes, nrows=read_rows, skiprows=read_rows
+    )
+    assert df.shape == (rows - read_rows, 2)
+
+    # nrows equal to zero
+    df = read_csv(str(fname), dtype=dtypes, nrows=0)
+    assert df.shape == (0, 2)
+
+    # with both skipfooter and nrows - should throw
+    with pytest.raises(ValueError):
+        read_csv(str(fname), nrows=read_rows, skipfooter=1)
+
+
+def test_csv_reader_gzip_compression_strings(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file15.csv"
+    fnamez = tmp_path / "tmp_csvreader_file15.csv.gz"
+
+    names = ["text", "int"]
+    dtypes = ["str", "int"]
+    lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
+
+    with open(str(fname), "w") as fp:
+        fp.write("\n".join(lines))
+
+    with open(str(fname), "rb") as f_in, gzip.open(str(fnamez), "wb") as f_out:
+        shutil.copyfileobj(f_in, f_out)
+
+    df = read_csv(
+        str(fnamez),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        decimal=".",
+        thousands="'",
+        compression="gzip",
+    )
+
+    assert len(df.columns) == 2
+    assert df["text"].dtype == np.dtype("object")
+    assert df["int"].dtype == np.dtype("int64")
+    assert df["text"][0] == "a"
+    assert df["text"][1] == "b"
+    assert df["text"][2] == "c"
+    assert df["text"][3] == "d"
+
+
+@pytest.mark.parametrize("skip_rows", [0, 4])
+@pytest.mark.parametrize("header_row", [0, 2])
+def test_csv_reader_skiprows_header(skip_rows, header_row):
+    names = ["float_point", "integer"]
+    dtypes = ["float64", "int64"]
+    lines = [
+        ",".join(names),
+        "1.2, 1",
+        "2.3, 2",
+        "3.4, 3",
+        "4.5, 4",
+        "5.6, 5",
+        "6.7, 6",
+    ]
+    buffer = "\n".join(lines)
+
+    cu_df = read_csv(
+        StringIO(buffer), dtype=dtypes, skiprows=skip_rows, header=header_row
+    )
+    pd_df = pd.read_csv(
+        StringIO(buffer), skiprows=skip_rows, header=header_row
+    )
+
+    assert cu_df.shape == pd_df.shape
+    assert list(cu_df.columns.values) == list(pd_df.columns.values)
+
+
+def test_csv_reader_dtype_inference():
+    names = ["float_point", "integer"]
+    lines = [
+        ",".join(names),
+        "1.2,1",
+        "2.3,2",
+        "3.4,3",
+        "4.5,4",
+        "5.6,5",
+        "6.7,6",
+    ]
+    buffer = "\n".join(lines)
+    cu_df = read_csv(StringIO(buffer))
+    pd_df = pd.read_csv(StringIO(buffer))
+
+    assert cu_df.shape == pd_df.shape
+    assert list(cu_df.columns.values) == list(pd_df.columns.values)
+
+
+def test_csv_reader_dtype_inference_whitespace():
+    names = ["float_point", "integer"]
+    lines = [
+        ",".join(names),
+        "  1.2,    1",
+        "2.3,2    ",
+        "  3.4,   3",
+        " 4.5,4",
+        "5.6,  5",
+        " 6.7,6 ",
+    ]
+    buffer = "\n".join(lines)
+    cu_df = read_csv(StringIO(buffer))
+    pd_df = pd.read_csv(StringIO(buffer))
+
+    assert cu_df.shape == pd_df.shape
+    assert list(cu_df.columns.values) == list(pd_df.columns.values)
+
+
+def test_csv_reader_empty_dataframe():
+    dtypes = ["float64", "int64"]
+    buffer = "float_point, integer"
+
+    # should work fine with dtypes
+    df = read_csv(StringIO(buffer), dtype=dtypes)
+    assert df.shape == (0, 2)
+    assert all(df.dtypes == ["float64", "int64"])
+
+    # should default to string columns without dtypes
+    df = read_csv(StringIO(buffer))
+    assert df.shape == (0, 2)
+    assert all(df.dtypes == ["object", "object"])
+
+
+def test_csv_reader_filenotfound(tmp_path):
+    fname = str(tmp_path / "non-existing-filename.csv")
+    with pytest.raises(FileNotFoundError):
+        read_csv(fname)
+
+    dir_name = tmp_path / "gdf_csv"
+    dir_name.mkdir()
+    with pytest.raises(FileNotFoundError):
+        read_csv(str(dir_name))
+
+
+@pytest.mark.parametrize(
+    "src",
+    [
+        lambda path: str(path),
+        lambda path: path,
+        lambda path: BytesIO(path.read_bytes()),
+        lambda path: StringIO(path.read_text()),
+        lambda path: path.as_uri(),
+    ],
+    ids=["filepath", "pathlib.Path", "ByteIO", "StringIO", "url"],
+)
+def test_csv_reader_filepath_or_buffer(tmp_path, src):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=np.int32)
+    csv_path = tmp_path / "tmp.csv"
+    df.to_csv(csv_path, index=False, header=False)
+    expect = pd.read_csv(csv_path)
+    got = cudf.read_csv(src(csv_path))
+
+    assert_eq(expect, got)
+
+
+def test_small_zip(tmp_path):
+    df = pd.DataFrame(
+        {
+            "a": [1997] * 2,
+            "b": ["Ford"] * 2,
+            "c": ["Super, luxurious truck"] * 2,
+        }
+    )
+
+    fname = tmp_path / "small_zip_file.zip"
+    df.to_csv(fname, index=False)
+
+    got = cudf.read_csv(fname)
+    assert_eq(df, got)
+
+
+def test_csv_reader_carriage_return():
+    rows = 100
+    names = ["int_row", "int_double_row"]
+    buffer = ",".join(names) + "\r\n"
+    for row in range(rows):
+        buffer += str(row) + ", " + str(2 * row) + "\r\n"
+
+    df = read_csv(StringIO(buffer))
+    expect = cudf.DataFrame(
+        {"int_row": cp.arange(rows), "int_double_row": cp.arange(rows) * 2}
+    )
+
+    assert len(df) == rows
+    assert_eq(expect, df)
+
+
+def test_csv_reader_tabs():
+    names = ["float_point", "integer", "date"]
+    lines = [
+        ",".join(names),
+        "1.2,\t12,     \t11/22/1995",
+        "3.4\t,\t34\t,\t 01/01/2001",
+        "\t 5.6,56 \t, 12/12/1970",
+        "\t7.8 , 78\t,06/15/2018 \t",
+    ]
+    buffer = "\n".join(lines)
+
+    df = read_csv(StringIO(buffer), parse_dates=["date"])
+
+    assert df.shape == (4, 3)
+
+    floats = [1.2, 3.4, 5.6, 7.8]
+    ints = [12, 34, 56, 78]
+    dates = [
+        "1995-11-22T00:00:00.000000000",
+        "2001-01-01T00:00:00.000000000",
+        "1970-12-12T00:00:00.000000000",
+        "2018-06-15T00:00:00.000000000",
+    ]
+    np.testing.assert_allclose(floats, df["float_point"].to_numpy())
+    np.testing.assert_allclose(ints, df["integer"].to_numpy())
+    for row in range(4):
+        assert str(df["date"][row]) == dates[row]
+
+
+@pytest.mark.parametrize("segment_bytes", [10000, 19999, 30001, 36000])
+def test_csv_reader_byte_range(tmp_path, segment_bytes):
+    fname = tmp_path / "tmp_csvreader_file16.csv"
+
+    names = ["int1", "int2"]
+
+    rows = 10000
+    with open(str(fname), "w") as fp:
+        for i in range(rows):
+            fp.write(str(i) + ", " + str(2 * i) + " \n")
+    file_size = os.stat(str(fname)).st_size
+
+    ref_df = read_csv(str(fname), names=names).to_pandas()
+
+    dfs = []
+    for segment in range((file_size + segment_bytes - 1) // segment_bytes):
+        dfs.append(
+            read_csv(
+                str(fname),
+                names=names,
+                byte_range=(segment * segment_bytes, segment_bytes),
+            )
+        )
+    df = cudf.concat(dfs).to_pandas()
+
+    assert list(df["int1"]) == list(ref_df["int1"])
+    assert list(df["int2"]) == list(ref_df["int2"])
+
+
+def test_csv_reader_byte_range_type_corner_case(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file17.csv"
+
+    cudf.datasets.timeseries(
+        start="2000-01-01",
+        end="2000-01-02",
+        dtypes={"name": str, "id": int, "x": float, "y": float},
+    ).to_csv(fname, chunksize=100000)
+
+    byte_range = (2_147_483_648, 0)
+    with pytest.raises(ValueError, match="Invalid byte range offset"):
+        cudf.read_csv(fname, byte_range=byte_range, header=None)
+
+
+@pytest.mark.parametrize("segment_bytes", [10, 19, 31, 36])
+def test_csv_reader_byte_range_strings(segment_bytes):
+    names = ["strings"]
+    buffer = "\n".join('"' + str(x) + '"' for x in range(1, 100))
+    file_size = len(buffer)
+
+    ref_df = read_csv(StringIO(buffer), names=names).to_pandas()
+
+    dfs = []
+    for segment in range((file_size + segment_bytes - 1) // segment_bytes):
+        dfs.append(
+            read_csv(
+                StringIO(buffer),
+                names=names,
+                byte_range=(segment * segment_bytes, segment_bytes),
+            )
+        )
+    df = cudf.concat(dfs).to_pandas()
+
+    assert list(df["strings"]) == list(ref_df["strings"])
+
+
+@pytest.mark.parametrize(
+    "header_row, skip_rows, skip_blanks",
+    [
+        (1, 0, True),
+        ("infer", 2, True),
+        (1, 4, True),
+        (3, 0, False),
+        ("infer", 5, False),
+    ],
+)
+@pytest.mark.parametrize("lineterminator", ["\n", "\r\n"])
+def test_csv_reader_blanks_and_comments(
+    skip_rows, header_row, skip_blanks, lineterminator
+):
+    lines = [
+        "# first comment line",
+        lineterminator,
+        "# third comment line",
+        "1,2,3",
+        "4,5,6",
+        "7,8,9",
+        lineterminator,
+        "# last comment line",
+        lineterminator,
+        "1,1,1",
+    ]
+    buffer = lineterminator.join(lines)
+
+    cu_df = read_csv(
+        StringIO(buffer),
+        comment="#",
+        header=header_row,
+        skiprows=skip_rows,
+        skip_blank_lines=skip_blanks,
+    )
+    pd_df = pd.read_csv(
+        StringIO(buffer),
+        comment="#",
+        header=header_row,
+        skiprows=skip_rows,
+        skip_blank_lines=skip_blanks,
+    )
+
+    assert cu_df.shape == pd_df.shape
+    assert list(cu_df.columns.values) == list(pd_df.columns.values)
+
+
+def test_csv_reader_prefix():
+    lines = ["1, 1, 1, 1"]
+    buffer = "\n".join(lines)
+
+    prefix_str = "a_prefix"
+    df = read_csv(StringIO(buffer), header=None, prefix=prefix_str)
+
+    column_names = list(df.columns.values)
+    for col in range(len(column_names)):
+        assert column_names[col] == prefix_str + str(col)
+
+
+def test_csv_reader_delim_whitespace():
+    buffer = "1    2  3\n4  5 6"
+
+    # with header row
+    with pytest.warns(FutureWarning):
+        cu_df = read_csv(StringIO(buffer), delim_whitespace=True)
+    with expect_warning_if(PANDAS_GE_220):
+        pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True)
+    assert_eq(pd_df, cu_df)
+
+    # without header row
+    with pytest.warns(FutureWarning):
+        cu_df = read_csv(StringIO(buffer), delim_whitespace=True, header=None)
+    with expect_warning_if(PANDAS_GE_220):
+        pd_df = pd.read_csv(
+            StringIO(buffer), delim_whitespace=True, header=None
+        )
+    assert pd_df.shape == cu_df.shape
+
+    # should raise an error if used with delimiter or sep
+    with pytest.raises(ValueError):
+        with pytest.warns(FutureWarning):
+            read_csv(StringIO(buffer), delim_whitespace=True, delimiter=" ")
+    with pytest.raises(ValueError):
+        with pytest.warns(FutureWarning):
+            read_csv(StringIO(buffer), delim_whitespace=True, sep=" ")
+
+
+def test_csv_reader_unnamed_cols():
+    # first and last columns are unnamed
+    buffer = ",1,2,3,\n4,5,6,7,8"
+
+    cu_df = read_csv(StringIO(buffer))
+    pd_df = pd.read_csv(StringIO(buffer))
+
+    assert all(pd_df.columns == cu_df.columns)
+    assert pd_df.shape == cu_df.shape
+
+
+def test_csv_reader_header_quotation():
+    buffer = '"1,,1","2,\n,2",3\n+4,+5,+6'
+
+    cu_df = read_csv(StringIO(buffer))
+    pd_df = pd.read_csv(StringIO(buffer))
+    assert cu_df.shape == (1, 3)
+    assert_eq(pd_df, cu_df)
+
+    # test cases that fail with pandas
+    buffer_pd_fail = '"1,one," , ",2,two" ,3\n4,5,6'
+    cu_df = read_csv(StringIO(buffer_pd_fail))
+    assert cu_df.shape == (1, 3)
+
+
+def test_csv_reader_oversized_byte_range():
+    buffer = "a,b,c,d,e\n4,5,6,7,8"
+
+    cu_df = read_csv(StringIO(buffer), byte_range=(0, 1024))
+    pd_df = pd.read_csv(StringIO(buffer))
+
+    assert all(pd_df.columns == cu_df.columns)
+    assert pd_df.shape == cu_df.shape
+
+
+def test_csv_reader_index_col():
+    buffer = "0,1,2\n3,4,5\n6,7,8"
+    names = ["int1", "int2", "int3"]
+
+    # using a column name
+    cu_df = read_csv(StringIO(buffer), names=names, index_col="int1")
+    pd_df = pd.read_csv(StringIO(buffer), names=names, index_col="int1")
+    assert_eq(pd_df, cu_df)
+
+    # using a column index
+    cu_df = read_csv(StringIO(buffer), header=None, index_col=0)
+    pd_df = pd.read_csv(StringIO(buffer), header=None, index_col=0)
+    assert_eq(cu_df.index, pd_df.index)
+
+    # using a column index with names
+    cu_df = read_csv(StringIO(buffer), header=None, index_col=0, names=names)
+    pd_df = pd.read_csv(
+        StringIO(buffer), header=None, index_col=0, names=names
+    )
+    assert_eq(cu_df.index, pd_df.index)
+
+    # passing False to avoid using a column as index (no-op in cuDF)
+    cu_df = read_csv(StringIO(buffer), header=None, index_col=False)
+    pd_df = pd.read_csv(StringIO(buffer), header=None, index_col=False)
+    assert_eq(cu_df.index, pd_df.index)
+
+
+@pytest.mark.parametrize("index_name", [None, "custom name", 124])
+@pytest.mark.parametrize("index_col", [None, 0, "a"])
+def test_csv_reader_index_names(index_name, index_col):
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [10, 11, 12]}, index=["AB", "CD", "EF"]
+    )
+    pdf.index.name = index_name
+
+    buffer = pdf.to_csv()
+    actual = cudf.read_csv(StringIO(buffer), index_col=index_col)
+    expected = pd.read_csv(StringIO(buffer), index_col=index_col)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "names", [["a", "b", "c"], [416, 905, 647], range(3), None]
+)
+def test_csv_reader_column_names(names):
+    buffer = "0,1,2\n3,4,5\n6,7,8"
+
+    df = read_csv(StringIO(buffer), names=names)
+    if names is None:
+        assert list(df) == ["0", "1", "2"]
+    else:
+        assert list(df) == list(names)
+
+
+def test_csv_reader_repeated_column_name():
+    buffer = """A,A,A.1,A,A.2,A,A.4,A,A
+                1,2,3.1,4,a.2,a,a.4,a,a
+                2,4,6.1,8,b.2,b,b.4,b,b"""
+
+    # pandas and cudf to have same repeated column names
+    pdf = pd.read_csv(StringIO(buffer))
+    gdf = cudf.read_csv(StringIO(buffer))
+    assert_eq(pdf.columns, gdf.columns)
+
+
+def test_csv_reader_bools_false_positives():
+    # values that are equal to ["True", "TRUE", "False", "FALSE"]
+    # when using ints to detect bool values
+    items = [3977, 4329, 24015, 27567]
+
+    buffer = "\n".join(str(i) for i in items)
+
+    df = read_csv(StringIO(buffer), header=None, dtype=["int32"])
+
+    np.testing.assert_array_equal(items, df["0"].to_numpy())
+
+
+def test_csv_reader_aligned_byte_range(tmp_path):
+    fname = tmp_path / "tmp_csvreader_file19.csv"
+    nelem = 1000
+
+    input_df = pd.DataFrame(
+        {"key": np.arange(0, nelem), "zeros": np.zeros(nelem)}
+    )
+    input_df.to_csv(fname)
+
+    df = cudf.read_csv(str(fname), byte_range=(0, 4096))
+    # read_csv call above used to crash; the assert below is not crucial
+    assert np.count_nonzero(df["zeros"].to_pandas().values) == 0
+
+
+@pytest.mark.parametrize(
+    "pdf_dtype, gdf_dtype",
+    [(None, None), ("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
+)
+def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
+    lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF"]
+    values = [int(hex_int, 16) for hex_int in lines]
+
+    buffer = "\n".join(lines)
+
+    if gdf_dtype is not None:
+        # require explicit `hex` dtype to parse hexadecimals
+        pdf = pd.DataFrame(data=values, dtype=pdf_dtype, columns=["hex_int"])
+        gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
+        np.testing.assert_array_equal(
+            pdf["hex_int"], gdf["hex_int"].to_numpy()
+        )
+    else:
+        # otherwise, dtype inference returns as object (string)
+        pdf = pd.read_csv(StringIO(buffer), names=["hex_int"])
+        gdf = read_csv(StringIO(buffer), names=["hex_int"])
+        assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "np_dtype, gdf_dtype",
+    [("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
+)
+def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):
+    # This tests values which cause an overflow warning that will become an
+    # error in pandas. NumPy wraps the overflow silently up to the bounds of a
+    # signed int64.
+    lines = [
+        "0x0",
+        "-0x1000",
+        "0xfedcba",
+        "0xABCDEF",
+        "0xaBcDeF",
+        "0x9512c20b",
+        "0x7fffffff",
+        "0x7fffffffffffffff",
+        "-0x8000000000000000",
+    ]
+    values = [int(hex_int, 16) for hex_int in lines]
+    buffer = "\n".join(lines)
+
+    gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
+
+    expected = np.array(values).astype(np_dtype)
+    actual = gdf["hex_int"].to_numpy()
+    np.testing.assert_array_equal(expected, actual)
+
+
+@pytest.mark.parametrize("quoting", [0, 1, 2, 3])
+def test_csv_reader_pd_consistent_quotes(quoting):
+    names = ["text"]
+    dtypes = ["str"]
+    lines = ['"a"', '"b ""c"" d"', '"f!\n."']
+
+    buffer = "\n".join(lines)
+
+    gd_df = read_csv(
+        StringIO(buffer), names=names, dtype=dtypes, quoting=quoting
+    )
+    pd_df = pd.read_csv(StringIO(buffer), names=names, quoting=quoting)
+
+    assert_eq(pd_df, gd_df)
+
+
+def test_read_csv_names_header_combination():
+    pdf = pd.DataFrame(
+        {
+            "firstname": ["Emma", "Ava", "Sophia"],
+            "lastname": ["Olivia", "Isabella", "Charlotte"],
+            "gender": ["F", "F", "F"],
+        }
+    )
+    buffer = pdf.to_csv(header=True, index=False)
+    names = pdf.columns
+
+    gdf = read_csv(StringIO(buffer), names=names, header=0)
+    assert_eq(pdf, gdf)
+
+    gdf = read_csv(StringIO(buffer), header=0)
+    assert_eq(pdf, gdf)
+
+    gdf = read_csv(StringIO(buffer))
+    assert_eq(pdf, gdf)
+
+
+def test_csv_reader_scientific_type_detection():
+    buffer = """1.,1.1,-1.1,1E1,1e1,-1e1,-1e-1,1e-1,1.1e1,1.1e-1,-1.1e-1,-1.1e1
+                +1.1,1E+1,1e+1,+1e1,+1e-1,1e-1,+1.1e1,1.1e+1,+1.1e+1,+1.1e1"""
+    expected = [
+        1.0,
+        1.1,
+        -1.1,
+        10.0,
+        10.0,
+        -10,
+        -0.1,
+        0.1,
+        11,
+        0.11,
+        -0.11,
+        -11,
+        1.1,
+        10.0,
+        10.0,
+        10,
+        0.1,
+        0.1,
+        11,
+        11,
+        11,
+        11,
+    ]
+
+    df = read_csv(StringIO(buffer), header=None)
+
+    for dt in df.dtypes:
+        assert dt == "float64"
+    for col in df:
+        assert np.isclose(df[col][0], expected[int(col)])
+
+
+@pytest.mark.parametrize("lineterminator", ["\n", "\r\n"])
+def test_csv_blank_first_row(lineterminator):
+    lines = ["colA,colB", "", "1, 1.1", "2, 2.2"]
+    buffer = lineterminator.join(lines)
+
+    cu_df = read_csv(StringIO(buffer))
+
+    assert cu_df.shape == (2, 2)
+    assert all(cu_df.columns == ["colA", "colB"])
+
+
+@pytest.mark.parametrize("contents", ["", "\n"])
+def test_csv_empty_file(tmp_path, contents):
+    fname = tmp_path / "test_csv_empty_file.csv"
+    with open(fname, "w") as f:
+        f.write(contents)
+
+    col_names = ["col1", "col2", "col3", "col4"]
+    in_dtypes = ["int", "str", "float", "short"]
+    out_dtypes = ["int64", "object", "float64", "int16"]
+
+    # Empty dataframe if no columns names specified or inferred
+    df = read_csv(str(fname))
+    assert len(df.columns) == 0
+
+    # No row dataframe if columns names are specified or inferred
+    df = read_csv(str(fname), dtype=in_dtypes, names=col_names)
+    assert all(df.columns == col_names)
+    assert list(df.dtypes) == out_dtypes
+
+
+@pytest.mark.parametrize("contents", ["", "\n"])
+def test_csv_empty_buffer(contents):
+    col_names = ["col1", "col2", "col3", "col4"]
+    in_dtypes = ["int", "str", "float", "short"]
+    out_dtypes = ["int64", "object", "float64", "int16"]
+
+    # Empty dataframe if no columns names specified or inferred
+    df = read_csv(StringIO(contents))
+    assert len(df.columns) == 0
+
+    # No row dataframe if columns names are specified or inferred
+    df = read_csv(StringIO(contents), dtype=in_dtypes, names=col_names)
+    assert all(df.columns == col_names)
+    assert list(df.dtypes) == out_dtypes
+
+
+@pytest.mark.parametrize(
+    "dtype", [["short", "float", "int"], {"A": "short", "C": "int"}]
+)
+def test_csv_reader_partial_dtype(dtype):
+    names_df = read_csv(
+        StringIO("0,1,2"),
+        names=["A", "B", "C"],
+        dtype=dtype,
+        usecols=["A", "C"],
+    )
+    header_df = read_csv(
+        StringIO('"A","B","C"\n0,1,2'), dtype=dtype, usecols=["A", "C"]
+    )
+
+    assert_eq(names_df, header_df)
+    assert all(names_df.dtypes == ["int16", "int64"])
+
+
+def test_csv_writer_file_handle(tmp_path):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
+    gdf = cudf.from_pandas(df)
+
+    gdf_df_fname = tmp_path / "gdf_df_1.csv"
+    with open(gdf_df_fname, "w") as f:
+        gdf.to_csv(path_or_buf=f, index=False)
+    assert os.path.exists(gdf_df_fname)
+
+    gdf2 = pd.read_csv(gdf_df_fname)
+    assert_eq(gdf, gdf2)
+
+
+def test_csv_writer_file_append(tmp_path):
+    gdf1 = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
+    gdf2 = cudf.DataFrame({"a": [4, 5, 6], "b": ["foo", "bar", "baz"]})
+
+    gdf_df_fname = tmp_path / "gdf_df_append.csv"
+    with open(gdf_df_fname, "w") as f:
+        gdf1.to_csv(f, index=False)
+    with open(gdf_df_fname, "a") as f:
+        gdf2.to_csv(f, header=False, index=False)
+
+    result = cudf.read_csv(gdf_df_fname)
+    expected = cudf.concat([gdf1, gdf2], ignore_index=True)
+    assert_eq(result, expected, check_index_type=True)
+
+
+def test_csv_writer_buffer():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
+
+    buffer = BytesIO()
+    gdf.to_csv(buffer, index=False)
+
+    result = cudf.read_csv(buffer)
+    assert_eq(result, gdf)
+
+
+def test_csv_writer_numeric_data(numeric_types_as_str, tmp_path):
+    pdf_df_fname = tmp_path / "pdf_df_1.csv"
+    gdf_df_fname = tmp_path / "gdf_df_1.csv"
+
+    df = pd.DataFrame(
+        {"col1": [1, 2, 3], "col2": [4, 5, 6]},
+    ).astype(numeric_types_as_str)
+    gdf = cudf.from_pandas(df)
+    df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
+    gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
+
+    assert os.path.exists(pdf_df_fname)
+    assert os.path.exists(gdf_df_fname)
+
+    expect = pd.read_csv(pdf_df_fname)
+    got = pd.read_csv(gdf_df_fname)
+    assert_eq(expect, got)
+
+
+def test_csv_writer_datetime_data(tmp_path):
+    pdf_df_fname = tmp_path / "pdf_df_2.csv"
+    gdf_df_fname = tmp_path / "gdf_df_2.csv"
+
+    df = pd.DataFrame(
+        {
+            "col1": [
+                "31/10/2010",
+                "05/03/2001",
+                "20/10/1994",
+                "18/10/1990",
+                "1/1/1970",
+                "2016-04-30T01:02:03.000",
+                "2038-01-19 03:14:07",
+            ],
+            "col2": [
+                "18/04/1995",
+                "14 / 07 / 1994",
+                "07/06/2006",
+                "16/09/2005",
+                "2/2/1970",
+                "2007-4-30 1:6:40.000PM",
+                "2038-01-19 03:14:08",
+            ],
+            "col3": [
+                "1 Jan",
+                "2 January 1994",
+                "Feb 2002",
+                "31-01-2000",
+                "1-1-1996",
+                "15-May-2009",
+                "21-Dec-3262",
+            ],
+        }
+    )
+    gdf = cudf.from_pandas(df)
+    df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
+    gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
+
+    assert os.path.exists(pdf_df_fname)
+    assert os.path.exists(gdf_df_fname)
+
+    expect = pd.read_csv(pdf_df_fname)
+    got = pd.read_csv(gdf_df_fname)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("lineterminator", ["\r", "\n", "\t", np.str_("\n")])
+@pytest.mark.parametrize("sep", [",", "/", np.str_(",")])
+def test_csv_writer_terminator_sep(lineterminator, sep, cudf_mixed_dataframe):
+    df = cudf_mixed_dataframe
+
+    buffer = BytesIO()
+    df.to_csv(buffer, lineterminator=lineterminator, sep=sep, index=False)
+
+    got = read_csv(buffer, lineterminator=lineterminator, sep=sep)
+    assert_eq(df, got)
+
+
+@pytest.mark.parametrize(
+    "lineterminator", ["\r\n", "ABC", "\t\t", np.str_("\r\n")]
+)
+def test_csv_writer_multichar_terminator(lineterminator, cudf_mixed_dataframe):
+    df = cudf_mixed_dataframe
+
+    default_terminator_csv = StringIO()
+    df.to_csv(default_terminator_csv)
+
+    # Need to check manually since readers don't support
+    # multicharacter line terminators
+    expected = default_terminator_csv.getvalue().replace("\n", lineterminator)
+
+    buffer = StringIO()
+    df.to_csv(buffer, lineterminator=lineterminator)
+    got = buffer.getvalue()
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["Date", "Float"],
+        ["Integer2", "Float", "Date", "Integer", "String", "Boolean"],
+        None,
+    ],
+)
+@pytest.mark.parametrize("header", [True, False])
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("bool_box", [bool, np.bool_])
+def test_csv_writer_column_and_header_options(
+    columns, header, index, bool_box, pd_mixed_dataframe
+):
+    header = bool_box(header)
+    index = bool_box(index)
+    pdf = pd_mixed_dataframe
+    df = cudf.from_pandas(pdf)
+
+    cudf_buffer = BytesIO()
+    df.to_csv(cudf_buffer, columns=columns, header=header, index=index)
+    pd_buffer = BytesIO()
+    pdf.to_csv(pd_buffer, columns=columns, header=header, index=index)
+
+    expected = cudf.read_csv(pd_buffer, header=0 if header else None)
+    got = cudf.read_csv(cudf_buffer, header=0 if header else None)
+
+    expected_column_cnt = (1 if index else 0) + (
+        len(columns) if columns else pdf.shape[1]
+    )
+    assert_eq(expected_column_cnt, got.shape[1])
+    assert_eq(expected, got)
+
+
+def test_csv_writer_empty_columns_parameter(cudf_mixed_dataframe):
+    write_str = cudf_mixed_dataframe.to_csv(columns=[], index=False)
+    assert_eq(write_str, "\n")
+
+
+def test_csv_writer_multiindex(tmp_path):
+    pdf_df_fname = tmp_path / "pdf_df_3.csv"
+    gdf_df_fname = tmp_path / "gdf_df_3.csv"
+
+    rng = np.random.default_rng(seed=0)
+    gdf = cudf.DataFrame(
+        {
+            "a": rng.integers(0, 5, 20),
+            "b": rng.integers(0, 5, 20),
+            "c": range(20),
+            "d": rng.random(20),
+        }
+    )
+    gdg = gdf.groupby(["a", "b"]).mean()
+    pdg = gdg.to_pandas()
+    pdg.to_csv(pdf_df_fname)
+    gdg.to_csv(gdf_df_fname)
+
+    assert os.path.exists(pdf_df_fname)
+    assert os.path.exists(gdf_df_fname)
+
+    expect = pd.read_csv(pdf_df_fname)
+    got = pd.read_csv(gdf_df_fname)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("chunksize", [None, 2, 1000])
+def test_csv_writer_chunksize(chunksize, numeric_types_as_str):
+    cu_df = cudf.from_pandas(
+        pd.DataFrame(
+            {"col1": [1, 2, 3], "col2": [4, 5, 6]},
+        ).astype(numeric_types_as_str)
+    )
+
+    buffer = BytesIO()
+    cu_df.to_csv(buffer, chunksize=chunksize, index=False)
+
+    got = cudf.read_csv(buffer, dtype=numeric_types_as_str)
+    assert_eq(cu_df, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"vals": [1, 2, 3]},
+        {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]},
+        {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]},
+    ],
+)
+def test_to_csv_empty_filename(data):
+    df = cudf.DataFrame(data)
+    pdf = df.to_pandas()
+
+    actual = df.to_csv()
+    expected = pdf.to_csv()
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"vals": [1, 2, 3]},
+        {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]},
+        {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]},
+    ],
+)
+def test_to_csv_StringIO(data):
+    df = cudf.DataFrame(data)
+    cudf_io = StringIO()
+    pandas_io = StringIO()
+
+    pdf = df.to_pandas()
+
+    df.to_csv(cudf_io)
+    pdf.to_csv(pandas_io)
+
+    cudf_io.seek(0)
+    pandas_io.seek(0)
+
+    assert cudf_io.read() == pandas_io.read()
+
+
+def test_csv_writer_empty_dataframe(tmp_path):
+    df_fname = tmp_path / "gdf_df_5.csv"
+    gdf = cudf.DataFrame({"float_point": [], "integer": []})
+    gdf["float_point"] = gdf["float_point"].astype("float")
+    gdf["integer"] = gdf["integer"].astype("int")
+
+    gdf.to_csv(df_fname, index=False)
+
+    df = cudf.read_csv(df_fname)
+
+    assert df.shape == (0, 2)
+    assert all(df.dtypes == ["object", "object"])
+
+
+def test_csv_write_chunksize_corner_case(tmp_path):
+    # With this num of rows and chunksize
+    # libcudf splits table such a way that it
+    # will end up creating an empty table slice
+    # which caused the issue 5588.
+    df_fname = tmp_path / "gdf_df_17.csv"
+    df = cudf.DataFrame({"a": np.arange(10_000)})
+    df.to_csv(df_fname, chunksize=1000, index=False)
+    got = cudf.read_csv(df_fname)
+
+    assert_eq(df, got)
+
+
+def test_csv_write_no_caller_manipulation():
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+    df_copy = df.copy(deep=True)
+    _ = df.to_csv(index=True)
+    assert_eq(df, df_copy)
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame({"a": [1, 2, 3], "": [10, 20, 40]}),
+        pd.DataFrame({"": [10, 20, 40], "a": [1, 2, 3]}),
+        pd.DataFrame(
+            {"a": [1, 2, 3], "": [10, 20, 40]},
+            index=pd.Index(["a", "z", "v"], name="custom name"),
+        ),
+    ],
+)
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("columns", [["a"], [""], None])
+def test_csv_write_empty_column_name(pdf, index, columns):
+    df = cudf.DataFrame.from_pandas(pdf)
+    expected = pdf.to_csv(index=index, columns=columns)
+    actual = df.to_csv(index=index, columns=columns)
+
+    assert expected == actual
+
+
+@pytest.mark.parametrize("idx", [None, pd.Index([], name="index name")])
+@pytest.mark.parametrize("index", [True, False])
+def test_csv_write_empty_dataframe(idx, index):
+    df = cudf.DataFrame(index=idx)
+    pdf = df.to_pandas()
+
+    expected = pdf.to_csv(index=index)
+    actual = df.to_csv(index=index)
+
+    assert expected == actual
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(
+            {
+                "a": [1, 2, 3, None],
+                "": ["a", "v", None, None],
+                None: [12, 12, 32, 44],
+            }
+        ),
+        pd.DataFrame(
+            {
+                np.nan: [1, 2, 3, None],
+                "": ["a", "v", None, None],
+                None: [12, 12, 32, 44],
+            }
+        ),
+        pd.DataFrame({"": [1, None, 3, 4]}),
+        pd.DataFrame({None: [1, None, 3, 4]}),
+        pd.DataFrame(columns=[None, "", "a", "b"]),
+        pd.DataFrame(columns=[None]),
+        pd.DataFrame(columns=[""]),
+    ],
+)
+@pytest.mark.parametrize(
+    "na_rep", ["", "_NA_", "---", "_____CUSTOM_NA_REP______"]
+)
+def test_csv_write_dataframe_na_rep(df, na_rep):
+    gdf = cudf.from_pandas(df)
+
+    expected = df.to_csv(na_rep=na_rep)
+    actual = gdf.to_csv(na_rep=na_rep)
+
+    assert expected == actual
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        "int",
+        "str",
+        "float",
+        np.int32,
+        np.dtype("float32"),
+        {"a": "int32", "b": "float64", "c": "uint8"},
+        int,
+        str,
+        object,
+    ],
+)
+def test_csv_reader_dtypes(dtype):
+    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n4,13,114\n"
+
+    expected = pd.read_csv(StringIO(buf), dtype=dtype)
+    actual = cudf.read_csv(StringIO(buf), dtype=dtype)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "dtype", ["Int64", "UInt32", {"a": "UInt64", "b": "Float64", "c": "Int32"}]
+)
+def test_csv_reader_nullable_dtypes(dtype):
+    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n4,13,114\n"
+
+    expected = pd.read_csv(StringIO(buf), dtype=dtype)
+    actual = cudf.read_csv(StringIO(buf), dtype=dtype)
+
+    assert_eq(expected, actual.to_pandas(nullable=True))
+
+
+def test_csv_reader_temporal_dtypes(temporal_types_as_str):
+    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n43432423,13342,13243214\n"
+
+    expected = pd.read_csv(StringIO(buf)).astype(temporal_types_as_str)
+    actual = cudf.read_csv(StringIO(buf), dtype=temporal_types_as_str)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        lambda: cudf.DataFrame(
+            {
+                "a": cudf.Series([1, 2, 3, 1, 2], dtype="category"),
+                "b": cudf.Series(["a", "c", "a", "b", "a"], dtype="category"),
+            }
+        ),
+        lambda: cudf.DataFrame(
+            {
+                "a": cudf.Series([1.1, 2, 3, 1.1, 2], dtype="category"),
+                "b": cudf.Series(
+                    [None, "c", None, "b", "a"], dtype="category"
+                ),
+            }
+        ),
+        lambda: cudf.DataFrame(
+            {
+                "b": cudf.Series(
+                    [1.1, 2, 3, 1.1, 2],
+                    dtype="category",
+                    index=cudf.CategoricalIndex(
+                        ["abc", "def", "ghi", "jkl", "xyz"]
+                    ),
+                )
+            }
+        ),
+    ],
+)
+def test_csv_writer_category(df):
+    df = df()
+    pdf = df.to_pandas()
+
+    expected = pdf.to_csv()
+    actual = df.to_csv()
+
+    assert expected == actual
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        "category",
+        {"a": "category", "b": "str"},
+        {"b": "category"},
+        {"a": "category"},
+        {"a": pd.CategoricalDtype([1, 2])},
+        {"b": pd.CategoricalDtype([1, 2, 3])},
+        {"b": pd.CategoricalDtype(["b", "a"]), "a": "str"},
+        pd.CategoricalDtype(["a", "b"]),
+    ],
+)
+def test_csv_reader_category(dtype):
+    df = cudf.DataFrame({"a": [1, 2, 3, None], "b": ["a", "b", None, "c"]})
+    csv_buf = df.to_csv()
+
+    actual = cudf.read_csv(StringIO(csv_buf), dtype=dtype)
+    expected = pd.read_csv(StringIO(csv_buf), dtype=dtype)
+
+    assert_eq(expected, actual, check_dtype=True)
+
+
+def test_csv_writer_datetime_sep():
+    df = cudf.DataFrame(
+        {"a": cudf.Series([22343, 2323423, 234324234], dtype="datetime64[ns]")}
+    )
+    df["a"] = df["a"].astype("datetime64[s]")
+    expected = df.to_pandas().to_csv(date_format="%Y-%m-%dT%H:%M:%SZ", sep="-")
+    actual = df.to_csv(sep="-")
+    assert expected == actual
+
+
+def test_na_filter_empty_fields():
+    test_na = "TEST_NAN"
+    df = pd.DataFrame({"col0": ["valid", None, "also_valid", "", test_na]})
+    buffer = df.to_csv(index=False)
+
+    pdf = pd.read_csv(StringIO(buffer), na_filter=False)
+    gdf = cudf.read_csv(StringIO(buffer), na_filter=False)
+    assert_eq(pdf, gdf)
+
+    pdf = pd.read_csv(StringIO(buffer), keep_default_na=False)
+    gdf = cudf.read_csv(StringIO(buffer), keep_default_na=False)
+    assert_eq(pdf, gdf)
+
+    pdf = pd.read_csv(
+        StringIO(buffer), keep_default_na=False, na_values=test_na
+    )
+    gdf = cudf.read_csv(
+        StringIO(buffer), keep_default_na=False, na_values=test_na
+    )
+    assert_eq(pdf, gdf)
+
+
+def test_csv_sep_error():
+    pdf = pd.DataFrame({"a": [1, 2, 3]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3]})
+    assert_exceptions_equal(
+        lfunc=pdf.to_csv,
+        rfunc=gdf.to_csv,
+        lfunc_args_and_kwargs=([], {"sep": "abc"}),
+        rfunc_args_and_kwargs=([], {"sep": "abc"}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.to_csv,
+        rfunc=gdf.to_csv,
+        lfunc_args_and_kwargs=([], {"sep": 1}),
+        rfunc_args_and_kwargs=([], {"sep": 1}),
+    )
+
+
+def test_to_csv_encoding_error():
+    # TODO: Remove this test once following
+    # issue is fixed: https://github.com/rapidsai/cudf/issues/2957
+    df = cudf.DataFrame({"a": ["你好", "test"]})
+    encoding = "utf-8-sig"
+    error_message = (
+        f"Encoding {encoding} is not supported. "
+        + "Currently, only utf-8 encoding is supported."
+    )
+    with pytest.raises(NotImplementedError, match=re.escape(error_message)):
+        df.to_csv("test.csv", encoding=encoding)
+
+
+def test_to_csv_compression_error():
+    df = cudf.DataFrame({"a": ["test"]})
+    compression = "snappy"
+    error_message = "Writing compressed csv is not currently supported in cudf"
+    with pytest.raises(NotImplementedError, match=re.escape(error_message)):
+        df.to_csv("test.csv", compression=compression)
+
+
+def test_empty_df_no_index():
+    actual = cudf.DataFrame({})
+    buffer = BytesIO()
+    actual.to_csv(buffer, index=False)
+
+    result = cudf.read_csv(buffer)
+
+    assert_eq(actual, result)
+
+
+def test_default_integer_bitwidth(
+    cudf_mixed_dataframe, default_integer_bitwidth
+):
+    # Test that integer columns in csv are _inferred_ as user specified
+    # bitwidth
+    buf = BytesIO()
+    cudf_mixed_dataframe.to_csv(buf)
+    buf.seek(0)
+    read = cudf.read_csv(buf)
+    assert read["Integer"].dtype == np.dtype(
+        f"i{default_integer_bitwidth // 8}"
+    )
+    assert read["Integer2"].dtype == np.dtype(
+        f"i{default_integer_bitwidth // 8}"
+    )
+
+
+def test_default_integer_bitwidth_partial(
+    cudf_mixed_dataframe, default_integer_bitwidth
+):
+    # Test that integer columns in csv are _inferred_ as user specified
+    # bitwidth
+    buf = BytesIO()
+    cudf_mixed_dataframe.to_csv(buf)
+    buf.seek(0)
+    read = cudf.read_csv(buf, dtype={"Integer": "int64"})
+    assert read["Integer"].dtype == np.dtype("i8")
+    assert read["Integer2"].dtype == np.dtype(
+        f"i{default_integer_bitwidth // 8}"
+    )
+
+
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
+def test_default_integer_bitwidth_extremes(
+    numeric_extremes_dataframe, default_integer_bitwidth
+):
+    # Test that integer columns in csv are _inferred_ as user specified
+    # bitwidth
+    buf = BytesIO()
+    cudf.DataFrame.from_pandas(numeric_extremes_dataframe).to_csv(buf)
+    buf.seek(0)
+    read = cudf.read_csv(buf)
+
+    assert read["int64"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
+    assert read["long"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
+    assert read["uint64"].dtype == np.dtype(
+        f"u{default_integer_bitwidth // 8}"
+    )
+
+
+def test_default_float_bitwidth(cudf_mixed_dataframe, default_float_bitwidth):
+    # Test that float columns in csv are _inferred_ as user specified
+    # bitwidth
+    buf = BytesIO()
+    cudf_mixed_dataframe.to_csv(buf)
+    buf.seek(0)
+    read = cudf.read_csv(buf)
+    assert read["Float"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
+
+
+def test_default_float_bitwidth_partial(default_float_bitwidth):
+    # Test that float columns in csv are _inferred_ as user specified
+    # bitwidth
+    read = cudf.read_csv(
+        StringIO("float1,float2\n1.0,2.0\n3.0,4.0"),
+        dtype={"float2": "float64"},
+    )
+    assert read["float1"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
+    assert read["float2"].dtype == np.dtype("f8")
+
+
+@pytest.mark.parametrize(
+    "usecols,names",
+    [
+        # selection using indices; only names of selected columns are specified
+        ([1, 2], ["b", "c"]),
+        # selection using indices; names of all columns are specified
+        ([1, 2], ["a", "b", "c"]),
+        # selection using indices; duplicates
+        ([2, 2], ["a", "b", "c"]),
+        # selection using indices; out of order
+        ([2, 1], ["a", "b", "c"]),
+        # selection using names
+        (["b"], ["a", "b", "c"]),
+        # selection using names; multiple columns
+        (["b", "c"], ["a", "b", "c"]),
+        # selection using names; duplicates
+        (["c", "c"], ["a", "b", "c"]),
+        # selection using names; out of order
+        (["c", "b"], ["a", "b", "c"]),
+    ],
+)
+def test_column_selection_plus_column_names(usecols, names):
+    lines = [
+        "num,datetime,text",
+        "123,2018-11-13T12:00:00,abc",
+        "456,2018-11-14T12:35:01,def",
+        "789,2018-11-15T18:02:59,ghi",
+    ]
+
+    buffer = "\n".join(lines) + "\n"
+
+    assert_eq(
+        pd.read_csv(StringIO(buffer), usecols=usecols, names=names),
+        cudf.read_csv(StringIO(buffer), usecols=usecols, names=names),
+    )
+
+
+def test_read_compressed_BOM(tmp_path):
+    buffer = 'int, string\n1, "a"\n2, "b"\n3, "c"\n'
+
+    fname = tmp_path / "tmp_csvreader_file20.gz"
+    with gzip.open(fname, "wt", encoding="utf-8") as f:
+        f.write(codecs.BOM_UTF8.decode("utf-8"))
+        f.write(buffer)
+
+    assert_eq(pd.read_csv(fname), cudf.read_csv(fname))
+
+
+def test_read_header_none_pandas_compat_column_type():
+    data = "1\n2\n"
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = cudf.read_csv(StringIO(data), header=None).columns
+    expected = pd.read_csv(StringIO(data), header=None).columns
+    pd.testing.assert_index_equal(result, expected, exact=True)
+
+
+@pytest.mark.parametrize("buffer", ["1", '"one"'])
+def test_read_single_unterminated_row(buffer):
+    gdf = cudf.read_csv(StringIO(buffer), header=None)
+    assert_eq(gdf.shape, (1, 1))
+
+
+@pytest.mark.parametrize("buffer", ["\n", "\r\n"])
+def test_read_empty_only_row(buffer):
+    gdf = cudf.read_csv(StringIO(buffer), header=None)
+    assert_eq(gdf.shape, (0, 0))
+
+
+def test_read_empty_only_row_custom_terminator():
+    gdf = cudf.read_csv(StringIO("*"), header=None, lineterminator="*")
+    assert_eq(gdf.shape, (0, 0))
+
+
+def test_empty_file_pandas_compat_raises(tmp_path):
+    empty_file = tmp_path / "empty.csv"
+    empty_file.touch()
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(pd.errors.EmptyDataError):
+            cudf.read_csv(StringIO())
+        with pytest.raises(pd.errors.EmptyDataError):
+            cudf.read_csv(empty_file)
+        with pytest.raises(pd.errors.EmptyDataError):
+            cudf.read_csv(str(empty_file))
diff --git a/python/cudf/cudf/tests/input_output/test_feather.py b/python/cudf/cudf/tests/input_output/test_feather.py
index 06777c8e6af..e32edcd4bc5 100644
--- a/python/cudf/cudf/tests/input_output/test_feather.py
+++ b/python/cudf/cudf/tests/input_output/test_feather.py
@@ -1 +1,65 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+from string import ascii_letters
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES
+
+
+@pytest.fixture(params=[0, 10])
+def pdf(request):
+    rng = np.random.default_rng(seed=0)
+    types = [*NUMERIC_TYPES, "bool"]
+    nrows = request.param
+
+    # Create a pandas dataframe with random data of mixed types
+    test_pdf = pd.DataFrame(
+        {
+            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
+            for typ in types
+        }
+    )
+    # Create non-numeric categorical data otherwise may get typecasted
+    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
+    test_pdf["col_category"] = pd.Series(data, dtype="category")
+    return test_pdf
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+@pytest.mark.filterwarnings("ignore:Strings are not yet supported")
+@pytest.mark.parametrize(
+    "columns",
+    [["col_int8"], ["col_category"], ["col_int32", "col_float32"], None],
+)
+def test_feather_reader(pdf, columns, tmp_path):
+    feather_file = tmp_path / "test.feather"
+    pdf.to_feather(feather_file)
+    expect = pa.feather.read_table(feather_file, columns=columns).to_pandas()
+    got = (
+        cudf.read_feather(feather_file, columns=columns)
+        .to_arrow(preserve_index=False)
+        .to_pandas()
+    )
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_feather_writer(tmp_path, pdf):
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdf_fname = tmp_path / "pdf.feather"
+    gdf_fname = tmp_path / "gdf.feather"
+
+    pdf.to_feather(pdf_fname)
+    gdf.to_feather(gdf_fname)
+
+    expect = pa.feather.read_table(pdf_fname)
+    got = pa.feather.read_table(gdf_fname)
+
+    assert pa.Table.equals(expect, got)
diff --git a/python/cudf/cudf/tests/input_output/test_json.py b/python/cudf/cudf/tests/input_output/test_json.py
index 06777c8e6af..14f6d6d43b1 100644
--- a/python/cudf/cudf/tests/input_output/test_json.py
+++ b/python/cudf/cudf/tests/input_output/test_json.py
@@ -1 +1,1446 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+import copy
+import gzip
+import os
+from io import BytesIO, StringIO
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    DATETIME_TYPES,
+    NUMERIC_TYPES,
+    TIMEDELTA_TYPES,
+    expect_warning_if,
+)
+
+
+@pytest.fixture(params=["auto", "cudf", "pandas"])
+def engine(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def lines(request):
+    return request.param
+
+
+@pytest.fixture(params=[0, 10])
+def pdf(request):
+    rng = np.random.default_rng(seed=0)
+    types = NUMERIC_TYPES + DATETIME_TYPES + ["bool"]
+    nrows = request.param
+
+    # Create a pandas dataframe with random data of mixed types
+    test_pdf = pd.DataFrame(
+        {
+            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
+            for typ in types
+        }
+    )
+    test_pdf.index.name = "test_index"
+    return test_pdf
+
+
+@pytest.fixture
+def gdf(pdf):
+    return cudf.DataFrame.from_pandas(pdf)
+
+
+@pytest.fixture(params=[0, 10])
+def gdf_writer_types(request):
+    # datetime64[us], datetime64[ns] are unsupported due to a bug in parser
+    types = [
+        *NUMERIC_TYPES,
+        "datetime64[s]",
+        "datetime64[ms]",
+        *TIMEDELTA_TYPES,
+        "bool",
+        "str",
+    ]
+    typer = {"col_" + val: val for val in types}
+    ncols = len(types)
+    nrows = request.param
+
+    # Create a pandas dataframe with random data of mixed types
+    test_pdf = cudf.DataFrame(
+        np.ones((nrows, ncols)),
+        columns=pd.Index(typer.keys()),
+    )
+
+    # Cast all the column dtypes to objects, rename them, and then cast to
+    # appropriate types
+    test_pdf = test_pdf.astype(typer)
+
+    return test_pdf
+
+
+@pytest.mark.filterwarnings("ignore:Strings are not yet supported")
+@pytest.mark.filterwarnings("ignore:Using CPU")
+@pytest.mark.parametrize("index", [True, False])
+# tests limited to compressions formats supported by pandas and cudf: bz2, gzip, zip, zstd
+@pytest.mark.parametrize("compression", ["bz2", "gzip", "zip", "zstd", None])
+@pytest.mark.parametrize("orient", ["columns", "records", "table", "split"])
+def test_json_reader(index, compression, orient, pdf, tmp_path):
+    skip_reason = f"{index=} is not valid with {orient=}"
+    if index is False and orient != "split":
+        pytest.skip(skip_reason)
+    if index is True and orient not in ("split", "table", "index", "columns"):
+        pytest.skip(skip_reason)
+    path_df = tmp_path / "test_df.json"
+    path_series = tmp_path / "test_series.json"
+    pdf.to_json(path_df, index=index, compression=compression, orient=orient)
+    pdf["col_int32"].to_json(
+        path_series, index=index, compression=compression, orient=orient
+    )
+    expect_df = pd.read_json(path_df, orient=orient, compression=compression)
+    got_df = cudf.read_json(path_df, orient=orient, compression=compression)
+    if len(expect_df) == 0:
+        expect_df = expect_df.reset_index(drop=True)
+        expect_df.columns = expect_df.columns.astype("object")
+    if len(got_df) == 0:
+        got_df = got_df.reset_index(drop=True)
+
+    assert_eq(expect_df, got_df, check_categorical=False)
+
+    # Only these orients are allowed for Series, but isn't enforced by Pandas
+    if orient in ("split", "records", "index"):
+        expect_series = pd.read_json(
+            path_series, orient=orient, compression=compression, typ="series"
+        )
+        got_series = cudf.read_json(
+            path_series, orient=orient, compression=compression, typ="series"
+        )
+        if len(expect_series) == 0:
+            expect_series = expect_series.reset_index(drop=True)
+        if len(got_df) == 0:
+            got_series = got_series.reset_index(drop=True)
+
+        assert_eq(expect_series, got_series)
+
+
+@pytest.mark.filterwarnings("ignore:Can't infer compression")
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_json_writer(tmp_path, pdf, gdf):
+    pdf_df_fname = tmp_path / "pdf_df.json"
+    gdf_df_fname = tmp_path / "gdf_df.json"
+
+    pdf.to_json(pdf_df_fname)
+    gdf.to_json(gdf_df_fname)
+
+    assert os.path.exists(pdf_df_fname)
+    assert os.path.exists(gdf_df_fname)
+
+    expect_df = pd.read_json(pdf_df_fname)
+    got_df = pd.read_json(gdf_df_fname)
+
+    assert_eq(expect_df, got_df)
+
+    for column in pdf.columns:
+        pdf_series_fname = tmp_path / f"{column}_pdf_series.json"
+        gdf_series_fname = tmp_path / f"{column}_gdf_series.json"
+
+        pdf[column].to_json(pdf_series_fname)
+        gdf[column].to_json(gdf_series_fname)
+
+        assert os.path.exists(pdf_series_fname)
+        assert os.path.exists(gdf_series_fname)
+
+        expect_series = pd.read_json(pdf_series_fname, typ="series")
+        got_series = pd.read_json(gdf_series_fname, typ="series")
+
+        assert_eq(expect_series, got_series)
+
+        # Make sure results align for regular strings, not just files
+        pdf_string = pdf[column].to_json()
+        gdf_string = pdf[column].to_json()
+        assert_eq(pdf_string, gdf_string)
+
+
+def test_cudf_json_writer(pdf, lines):
+    # removing datetime column because pandas doesn't support it
+    for col_name in pdf.columns:
+        if "datetime" in col_name:
+            pdf.drop(col_name, axis=1, inplace=True)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdf_string = pdf.to_json(orient="records", lines=lines)
+    gdf_string = gdf.to_json(orient="records", lines=lines, engine="cudf")
+
+    assert_eq(pdf_string, gdf_string)
+
+    gdf_string = gdf.to_json(
+        orient="records", lines=lines, engine="cudf", rows_per_chunk=8
+    )
+
+    assert_eq(pdf_string, gdf_string)
+
+
+def test_cudf_json_writer_read(gdf_writer_types):
+    dtypes = {
+        col_name: col_name[len("col_") :]
+        for col_name in gdf_writer_types.columns
+    }
+    gdf_string = gdf_writer_types.to_json(
+        orient="records", lines=True, engine="cudf"
+    )
+    gdf2 = cudf.read_json(
+        StringIO(gdf_string),
+        lines=True,
+        engine="cudf",
+        dtype=dict(dtypes),
+    )
+    pdf2 = pd.read_json(StringIO(gdf_string), lines=True, dtype=dict(dtypes))
+
+    # Bug in pandas https://github.com/pandas-dev/pandas/issues/28558
+    if pdf2.empty:
+        pdf2.reset_index(drop=True, inplace=True)
+
+    # Pandas moved to consistent datetimes parsing format:
+    # https://pandas.pydata.org/docs/dev/whatsnew/v2.0.0.html#datetimes-are-now-parsed-with-a-consistent-format
+    for unit in ["s", "ms"]:
+        if f"col_datetime64[{unit}]" in pdf2.columns:
+            pdf2[f"col_datetime64[{unit}]"] = (
+                pd.to_datetime(pdf2[f"col_datetime64[{unit}]"], format="mixed")
+                .dt.tz_localize(None)
+                .astype(f"datetime64[{unit}]")
+            )
+    assert_eq(pdf2, gdf2)
+
+
+@pytest.mark.parametrize(
+    "jsonl_string, expected",
+    [
+        # fixed width
+        ("""{"a":10, "b":1.1}\n {"a":20, "b":2.1}\n""", None),
+        # simple list
+        ("""{"a":[1, 2, 3], "b":1.1}\n {"a":[]}\n""", None),
+        # simple struct
+        ("""{"a":{"c": 123 }, "b":1.1}\n {"a": {"c": 456}}\n""", None),
+        # list of lists
+        ("""{"a":[[], [1, 2], [3, 4]], "b":1.1}\n""", None),
+        ("""{"a":[null, [1, 2], [null, 4]], "b":1.1}\n""", None),
+        # list of structs
+        # error ("""{"a":[null, {}], "b":1.1}\n""", None),
+        (
+            """{"a":[null, {"L": 123}], "b":1.0}\n {"b":1.1}\n {"b":2.1}\n""",
+            None,
+        ),
+        (
+            """{"a":[{"L": 123}, null], "b":1.0}\n {"b":1.1}\n {"b":2.1}\n""",
+            None,
+        ),
+        # struct of lists
+        (
+            """{"a":{"L": [1, 2, 3]}, "b":1.1}\n {"a": {"L": [4, 5, 6]}}\n""",
+            None,
+        ),
+        ("""{"a":{"L": [1, 2, null]}, "b":1.1}\n {"a": {"L": []}}\n""", None),
+        # struct of structs
+        (
+            """{"a":{"L": {"M": 123}}, "b":1.1}
+               {"a": {"L": {"M": 456}}}\n""",
+            None,
+        ),
+        (
+            """{"a":{"L": {"M": null}}, "b":1.1}\n {"a": {"L": {}}}\n""",
+            """{"a":{"L": {}}, "b":1.1}\n {"a": {"L": {}}}\n""",
+        ),
+        # list of structs of lists
+        ("""{"a":[{"L": [1, 2, 3]}, {"L": [4, 5, 6]}], "b":1.1}\n""", None),
+        ("""{"a":[{"L": [1, 2, null]}, {"L": []}], "b":1.1}\n""", None),
+        # struct of lists of structs
+        ("""{"a":{"L": [{"M": 123}, {"M": 456}]}, "b":1.1}\n""", None),
+        (
+            """{"a":{"L": [{"M": null}, {}]}, "b":1.1}\n""",
+            """{"a":{"L": [{}, {}]}, "b":1.1}\n""",
+        ),
+        # empty structs
+        ("""{"A": null}\n {"A": {}}\n {}""", """{}\n{"A":{}}\n{}\n"""),
+        (
+            """{"A": {"B": null}}\n {"A": {"B": {}}}\n {"A": {}}""",
+            """{"A":{}}\n{"A":{"B":{}}}\n{"A":{}}\n""",
+        ),
+    ],
+)
+def test_cudf_json_roundtrip(jsonl_string, expected):
+    gdf = cudf.read_json(
+        StringIO(jsonl_string),
+        lines=True,
+        engine="cudf",
+        # dtype=dict(dtypes),
+    )
+    expected = jsonl_string if expected is None else expected
+    gdf_string = gdf.to_json(
+        orient="records", lines=True, engine="cudf", include_nulls=False
+    )
+    assert_eq(gdf_string, expected.replace(" ", ""))
+
+
+@pytest.mark.parametrize("sink", ["string", "file"])
+def test_cudf_json_writer_sinks(sink, tmp_path):
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    target = None
+    if sink == "string":
+        target = StringIO()
+    elif sink == "file":
+        target = tmp_path / "test_df.json"
+    df.to_json(target, engine="cudf")
+    if sink == "string":
+        assert (
+            target.getvalue() == '[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]'
+        )
+    elif sink == "file":
+        assert os.path.exists(target)
+        with open(target, "r") as f:
+            assert f.read() == '[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]'
+
+
+@pytest.fixture(
+    params=["string", "filepath", "pathobj", "bytes_io", "string_io", "url"]
+)
+def json_input(request, tmp_path):
+    input_type = request.param
+    buffer = "[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9]\n"
+    json_dir = tmp_path / "json"
+    json_dir.mkdir()
+    fname = json_dir / "test_df.json"
+    if not os.path.isfile(fname):
+        with open(str(fname), "w") as fp:
+            fp.write(buffer)
+
+    if input_type == "string":
+        return buffer
+    if input_type == "filepath":
+        return str(fname)
+    if input_type == "pathobj":
+        return Path(fname)
+    if input_type == "bytes_io":
+        return BytesIO(buffer.encode())
+    if input_type == "string_io":
+        return StringIO(buffer)
+    if input_type == "url":
+        return Path(fname).as_uri()
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_json_lines_basic(json_input, engine):
+    can_warn = isinstance(json_input, str) and not json_input.endswith(".json")
+    with expect_warning_if(can_warn):
+        cu_df = cudf.read_json(json_input, engine=engine, lines=True)
+    # io types must seek to the beginning before you can read again
+    if hasattr(json_input, "seek"):
+        json_input.seek(0)
+    with expect_warning_if(can_warn):
+        pd_df = pd.read_json(json_input, lines=True)
+
+    assert all(cu_df.dtypes == ["int64", "int64", "int64"])
+    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
+        assert str(cu_col) == str(pd_col)
+        np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_nonexistent_json_correct_error(engine):
+    json_input = "doesnotexist.json"
+    with pytest.raises(FileNotFoundError):
+        cudf.read_json(json_input, engine=engine)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_json_lines_multiple(tmp_path, json_input, engine):
+    if engine == "pandas":
+        pytest.skip("pandas engine does not support multiple files")
+    tmp_file1 = tmp_path / "MultiInputs1.json"
+    tmp_file2 = tmp_path / "MultiInputs2.json"
+
+    with expect_warning_if(
+        isinstance(json_input, str) and not json_input.endswith(".json")
+    ):
+        pdf = pd.read_json(json_input, lines=True)
+    pdf.to_json(tmp_file1, compression="infer", lines=True, orient="records")
+    pdf.to_json(tmp_file2, compression="infer", lines=True, orient="records")
+
+    cu_df = cudf.read_json([tmp_file1, tmp_file2], engine=engine, lines=True)
+    pd_df = pd.concat([pdf, pdf])
+
+    assert all(cu_df.dtypes == ["int64", "int64", "int64"])
+    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
+        assert str(cu_col) == str(pd_col)
+        np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+def test_json_read_directory(tmp_path, json_input, engine):
+    if engine == "pandas":
+        pytest.skip("pandas engine does not support directories")
+    with expect_warning_if(
+        isinstance(json_input, str) and not json_input.endswith(".json")
+    ):
+        pdf = pd.read_json(json_input, lines=True)
+    json_dir = tmp_path / "jsons"
+    json_dir.mkdir()
+    pdf.to_json(
+        json_dir / "MultiInputs1.json",
+        compression="infer",
+        lines=True,
+        orient="records",
+    )
+    pdf.to_json(
+        json_dir / "MultiInputs2.json",
+        compression="infer",
+        lines=True,
+        orient="records",
+    )
+    pdf.to_json(
+        json_dir / "MultiInputs3.json",
+        compression="infer",
+        lines=True,
+        orient="records",
+    )
+
+    cu_df = cudf.read_json(json_dir, engine=engine, lines=True)
+    pd_df = pd.concat([pdf, pdf, pdf])
+
+    assert all(cu_df.dtypes == ["int64", "int64", "int64"])
+    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
+        assert str(cu_col) == str(pd_col)
+        np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
+
+
+def test_json_lines_byte_range(json_input):
+    # include the first row and half of the second row
+    # should parse the first two rows
+    will_warn = isinstance(json_input, str) and not json_input.endswith(
+        ".json"
+    )
+    with expect_warning_if(will_warn):
+        df = cudf.read_json(
+            copy.deepcopy(json_input), lines=True, byte_range=(0, 15)
+        )
+    assert df.shape == (2, 3)
+
+    # include half of the second row and half of the third row
+    # should parse only the third row
+    with expect_warning_if(will_warn):
+        df = cudf.read_json(
+            copy.deepcopy(json_input), lines=True, byte_range=(15, 10)
+        )
+    assert df.shape == (1, 3)
+
+    # include half of the second row and entire third row
+    # should parse only the third row
+    with expect_warning_if(will_warn):
+        df = cudf.read_json(
+            copy.deepcopy(json_input), lines=True, byte_range=(15, 0)
+        )
+    assert df.shape == (1, 3)
+
+    # include half of the second row till past the end of the file
+    # should parse only the third row
+    with expect_warning_if(will_warn):
+        df = cudf.read_json(
+            copy.deepcopy(json_input), lines=True, byte_range=(10, 50)
+        )
+    assert df.shape == (1, 3)
+
+
+def test_json_lines_dtypes(json_input):
+    with expect_warning_if(
+        isinstance(json_input, str) and not json_input.endswith(".json")
+    ):
+        df = cudf.read_json(
+            json_input, lines=True, dtype={1: "int", 2: "short", 0: "float"}
+        )
+    assert all(df.dtypes == ["float64", "int64", "int16"])
+
+
+@pytest.mark.parametrize(
+    "ext, out_comp, in_comp",
+    [
+        (".geez", "gzip", "gzip"),
+        (".beez", "bz2", "bz2"),
+        (".gz", "gzip", "infer"),
+        (".bz2", "bz2", "infer"),
+        (".data", None, "infer"),
+        (".txt", None, None),
+        ("", None, None),
+    ],
+)
+def test_json_lines_compression(tmp_path, ext, out_comp, in_comp):
+    fname = tmp_path / f"tmp_json_compression.{ext}"
+
+    nrows = 20
+    pd_df = pd.DataFrame(
+        {
+            "col1": np.arange(nrows, dtype=np.int32),
+            "col2": np.arange(1, 1 + nrows, dtype=np.int32),
+        }
+    )
+    pd_df.to_json(fname, compression=out_comp, lines=True, orient="records")
+
+    cu_df = cudf.read_json(
+        str(fname),
+        compression=in_comp,
+        lines=True,
+        dtype={"col1": "int32", "col2": "int32"},
+    )
+    assert_eq(pd_df, cu_df)
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_json_engine_selection():
+    json = "[1, 2, 3]"
+
+    # should use the cudf engine
+    df = cudf.read_json(StringIO(json), lines=True)
+    # column names are strings when parsing with cudf
+    for col_name in df.columns:
+        assert isinstance(col_name, str)
+
+    # should use the pandas engine
+    df = cudf.read_json(StringIO(json), lines=False, engine="pandas")
+    # column names are ints when parsing with pandas
+    for col_name in df.columns:
+        assert isinstance(col_name, int)
+
+    # should use the pandas engine
+    df = cudf.read_json(StringIO(json), lines=True, engine="pandas")
+    # column names are ints when parsing with pandas
+    for col_name in df.columns:
+        assert isinstance(col_name, int)
+
+
+def test_json_bool_values():
+    buffer = "[true,1]\n[false,false]\n[true,true]"
+    cu_df = cudf.read_json(StringIO(buffer), lines=True)
+    pd_df = pd.read_json(StringIO(buffer), lines=True)
+
+    # types should be ['bool', 'int64']
+    np.testing.assert_array_equal(pd_df.dtypes, cu_df.dtypes)
+    np.testing.assert_array_equal(pd_df[0], cu_df["0"].to_numpy())
+    # boolean values should be converted to 0/1
+    np.testing.assert_array_equal(pd_df[1], cu_df["1"].to_numpy())
+
+    cu_df = cudf.read_json(
+        StringIO(buffer), lines=True, dtype={"0": "bool", "1": "long"}
+    )
+    np.testing.assert_array_equal(pd_df.dtypes, cu_df.dtypes)
+
+
+def test_json_bad_protocol_string():
+    test_string = StringIO('{"field": "s3://path"}')
+
+    expect = pd.DataFrame([{"field": "s3://path"}])
+    got = cudf.read_json(test_string, lines=True)
+
+    assert_eq(expect, got)
+
+
+def test_json_corner_case_with_escape_and_double_quote_char_with_pandas(
+    tmp_path,
+):
+    fname = tmp_path / "tmp_json_escape_double_quote"
+
+    pdf = pd.DataFrame(
+        {
+            "a": ['ab"cd', "\\\b", "\r\\", "'"],
+            "b": ["a\tb\t", "\\", '\\"', "\t"],
+            "c": ["aeiou", "try", "json", "cudf"],
+        }
+    )
+    pdf.to_json(fname, compression="infer", lines=True, orient="records")
+
+    df = cudf.read_json(
+        fname, compression="infer", lines=True, orient="records"
+    )
+    pdf = pd.read_json(
+        fname, compression="infer", lines=True, orient="records"
+    )
+
+    assert_eq(cudf.DataFrame(pdf), df)
+
+
+def test_json_corner_case_with_escape_and_double_quote_char_with_strings():
+    str_buffer = StringIO(
+        """{"a":"ab\\"cd","b":"a\\tb\\t","c":"aeiou"}
+           {"a":"\\\\\\b","b":"\\\\","c":"try"}
+           {"a":"\\r\\\\","b":"\\\\\\"","c":"json"}
+           {"a":"\'","b":"\\t","c":"cudf"}"""
+    )
+
+    df = cudf.read_json(
+        str_buffer, compression="infer", lines=True, orient="records"
+    )
+
+    expected = cudf.DataFrame(
+        {
+            "a": ['ab"cd', "\\\b", "\r\\", "'"],
+            "b": ["a\tb\t", "\\", '\\"', "\t"],
+            "c": ["aeiou", "try", "json", "cudf"],
+        }
+    )
+    assert_eq(df, expected)
+
+
+def test_json_to_json_special_characters():
+    df = cudf.DataFrame(
+        {
+            "'a'": ['ab"cd', "\\\b", "\r\\", "'"],
+            "b": ["a\tb\t", "\\", '\\"', "\t"],
+            "c": ["aeiou", "try", "json", "cudf"],
+        }
+    )
+
+    actual = StringIO()
+    df.to_json(actual, engine="cudf", lines=True, orient="records")
+    expected = StringIO()
+    df.to_pandas().to_json(expected, lines=True, orient="records")
+    assert expected.getvalue() == actual.getvalue()
+
+
+@pytest.mark.parametrize(
+    "gdf,pdf",
+    [
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "int col": cudf.Series(
+                        [1, 2, None, 2, 2323, 234, None], dtype="int64"
+                    )
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "int col": pd.Series(
+                        [1, 2, None, 2, 2323, 234, None], dtype=pd.Int64Dtype()
+                    )
+                }
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "int64 col": cudf.Series(
+                        [1, 2, None, 2323, None], dtype="int64"
+                    ),
+                    "string col": cudf.Series(
+                        ["abc", "a", None, "", None], dtype="str"
+                    ),
+                    "float col": cudf.Series(
+                        [0.234, None, 234234.2343, None, 0.0], dtype="float64"
+                    ),
+                    "bool col": cudf.Series(
+                        [None, True, False, None, True], dtype="bool"
+                    ),
+                    "categorical col": cudf.Series(
+                        [1, 2, 1, None, 2], dtype="category"
+                    ),
+                    "datetime col": cudf.Series(
+                        [1231233, None, 2323234, None, 1],
+                        dtype="datetime64[ns]",
+                    ),
+                    "timedelta col": cudf.Series(
+                        [None, 34687236, 2323234, 1, None],
+                        dtype="timedelta64[ns]",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "int64 col": pd.Series(
+                        [1, 2, None, 2323, None], dtype=pd.Int64Dtype()
+                    ),
+                    "string col": pd.Series(
+                        ["abc", "a", None, "", None], dtype=pd.StringDtype()
+                    ),
+                    "float col": pd.Series(
+                        [0.234, None, 234234.2343, None, 0.0], dtype="float64"
+                    ),
+                    "bool col": pd.Series(
+                        [None, True, False, None, True],
+                        dtype=pd.BooleanDtype(),
+                    ),
+                    "categorical col": pd.Series(
+                        [1, 2, 1, None, 2], dtype="category"
+                    ),
+                    "datetime col": pd.Series(
+                        [1231233, None, 2323234, None, 1],
+                        dtype="datetime64[ns]",
+                    ),
+                    "timedelta col": pd.Series(
+                        [None, 34687236, 2323234, 1, None],
+                        dtype="timedelta64[ns]",
+                    ),
+                }
+            ),
+        ),
+    ],
+)
+def test_json_to_json_compare_contents(gdf, pdf):
+    expected_json = pdf.to_json(lines=True, orient="records")
+    with pytest.warns(UserWarning):
+        actual_json = gdf().to_json(lines=True, orient="records")
+
+    assert expected_json == actual_json
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_default_integer_bitwidth(default_integer_bitwidth, engine):
+    buf = BytesIO()
+    pd.DataFrame({"a": range(10)}).to_json(buf, lines=True, orient="records")
+    buf.seek(0)
+    df = cudf.read_json(buf, engine=engine, lines=True, orient="records")
+
+    assert df["a"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_default_integer_bitwidth_partial(default_integer_bitwidth, engine):
+    buf = BytesIO()
+    pd.DataFrame({"a": range(10), "b": range(10, 20)}).to_json(
+        buf, lines=True, orient="records"
+    )
+    buf.seek(0)
+    df = cudf.read_json(
+        buf, engine=engine, lines=True, orient="records", dtype={"b": "i8"}
+    )
+
+    assert df["a"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
+    assert df["b"].dtype == np.dtype("i8")
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_default_integer_bitwidth_extremes(default_integer_bitwidth, engine):
+    # Test that integer columns in json are _inferred_ as 32 bit columns.
+    buf = StringIO(
+        '{"u8":18446744073709551615, "i8":9223372036854775807}\n'
+        '{"u8": 0, "i8": -9223372036854775808}'
+    )
+    df = cudf.read_json(buf, engine=engine, lines=True, orient="records")
+
+    assert df["u8"].dtype == np.dtype(f"u{default_integer_bitwidth // 8}")
+    assert df["i8"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
+
+
+def test_default_float_bitwidth(default_float_bitwidth):
+    # Test that float columns in json are _inferred_ as 32 bit columns.
+    df = cudf.read_json(
+        StringIO('{"a": 1.0, "b": 2.5}\n{"a": 3.5, "b": 4.0}'),
+        engine="cudf",
+        lines=True,
+        orient="records",
+    )
+    assert df["a"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
+    assert df["b"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
+
+
+def test_json_nested_basic():
+    bytes_obj = BytesIO()
+    data = {
+        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
+        "c2": [["l11", "l21"], ["l12", "l22"]],
+    }
+    pdf = pd.DataFrame(data)
+    pdf.to_json(bytes_obj, orient="records")
+
+    df = cudf.read_json(bytes_obj, engine="cudf", orient="records")
+    bytes_obj.seek(0)
+    pdf = pd.read_json(bytes_obj, orient="records")
+
+    assert_eq(pdf, df)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
+            "c2": [["l11", "l21"], ["l12", "l22"]],
+        },
+        # Essential test case to handle omissions
+        {
+            "c1": [{"f2": "sf21"}, {"f1": "sf12"}],
+            "c2": [["l11", "l21"], []],
+        },
+        # empty input
+        {},
+    ],
+)
+def test_json_nested_lines(data, lines):
+    bio = BytesIO()
+    pdf = pd.DataFrame(data)
+    pdf.to_json(bio, orient="records", lines=lines)
+    bio.seek(0)
+    df = cudf.read_json(bio, engine="cudf", orient="records", lines=lines)
+    bio.seek(0)
+    pdf = pd.read_json(bio, orient="records", lines=lines)
+    # In the second test-case we need to take a detour via pyarrow
+    # Pandas omits "f1" in first row, so we have to enforce a common schema,
+    # such that pandas would have the f1 member with null
+    # Also, pyarrow chooses to select different ordering of a nested column
+    # children though key-value pairs are correct.
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+def test_json_nested_data():
+    json_str = (
+        '[{"0":{},"2":{}},{"1":[[""],[]],"2":{"2":""}},'
+        '{"0":{"a":"1"},"2":{"0":"W&RR=+I","1":""}}]'
+    )
+    df = cudf.read_json(StringIO(json_str), engine="cudf", orient="records")
+    pdf = pd.read_json(StringIO(json_str), orient="records")
+    pdf.columns = pdf.columns.astype("str")
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+def test_json_empty_types():
+    json_str = """ {}
+    {"a": [], "b": {}}
+    {"a": []}
+    {"b": {}}
+    {"c": {"d": []}}
+    {"e": [{}]}
+    """
+    df = cudf.read_json(StringIO(json_str), orient="records", lines=True)
+    pdf = pd.read_json(StringIO(json_str), orient="records", lines=True)
+    assert_eq(df, pdf)
+
+
+def test_json_types_data():
+    # 0:<0:string,1:float>
+    # 1:list<int>
+    # 2:<0:bool>
+    json_str = (
+        '[{"0":null,"2":{}},'
+        '{"1":[123],"0":{"0":"foo","1":123.4},"2":{"0":false}},'
+        '{"0":{},"1":[],"2":{"0":null}}]'
+    )
+    df = cudf.read_json(StringIO(json_str), engine="cudf", orient="records")
+    pdf = pd.read_json(StringIO(json_str), orient="records")
+    pdf.columns = pdf.columns.astype("str")
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+@pytest.mark.parametrize(
+    "col_type,json_str,expected_data",
+    [
+        # without quotes
+        ("int", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes
+        ("int", '[{"k": "1"}, {"k": "2"}]', [1, 2]),
+        # with quotes, mixed
+        ("int", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes, null, mixed
+        (
+            "int",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+        # without quotes, null
+        (
+            "int",
+            '[{"k": 1}, {"k": 2}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+        # without quotes
+        ("float", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes
+        ("float", '[{"k": "1"}, {"k": "2"}]', [1, 2]),
+        # with quotes, mixed
+        (
+            "float",
+            '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]',
+            [1, 2, 3, 4],
+        ),
+        # with quotes, null, mixed
+        (
+            "float",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+        # with quotes, NAN
+        (
+            "float",
+            '[{"k": "1"}, {"k": "2"}, {"k": NaN}, {"k": "4"}]',
+            [1, 2, np.nan, 4],
+        ),
+        # without quotes
+        ("str", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes
+        ("str", '[{"k": "1"}, {"k": "2"}]', [1, 2]),
+        # with quotes, mixed
+        ("str", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes, null, mixed
+        (
+            "str",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+        # without quotes, null
+        (
+            "str",
+            '[{"k": 1}, {"k": 2}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+    ],
+)
+def test_json_quoted_values_with_schema(col_type, json_str, expected_data):
+    actual = cudf.read_json(
+        StringIO(json_str),
+        engine="cudf",
+        orient="records",
+        dtype={"k": col_type},
+    )
+    expected = cudf.DataFrame({"k": expected_data}, dtype=col_type)
+
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "col_type,json_str,expected_data",
+    [
+        # with quotes, mixed
+        ("int", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes, null, mixed
+        (
+            "int",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+        # with quotes, mixed
+        (
+            "str",
+            '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]',
+            ["1", "2", "3", "4"],
+        ),
+        # with quotes, null, mixed
+        (
+            "str",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            ["1", "2", None, "4"],
+        ),
+    ],
+)
+def test_json_quoted_values(col_type, json_str, expected_data):
+    actual = cudf.read_json(
+        StringIO(json_str),
+        engine="cudf",
+        orient="records",
+        dtype={"k": col_type},
+    )
+    expected = cudf.DataFrame({"k": expected_data}, dtype=col_type)
+
+    assert_eq(expected, actual)
+    assert_eq(expected_data, actual.k.to_arrow().to_pylist())
+
+
+@pytest.mark.parametrize(
+    "keep_quotes,result",
+    [
+        (
+            True,
+            {
+                "c1": [
+                    {"f1": '"sf11"', "f2": '"sf21"'},
+                    {"f1": '"sf12"', "f2": '"sf22"'},
+                ],
+                "c2": [['"l11"', '"l21"'], ['"l12"', '"l22"']],
+            },
+        ),
+        (
+            False,
+            {
+                "c1": [
+                    {"f1": "sf11", "f2": "sf21"},
+                    {"f1": "sf12", "f2": "sf22"},
+                ],
+                "c2": [["l11", "l21"], ["l12", "l22"]],
+            },
+        ),
+    ],
+)
+def test_json_keep_quotes(keep_quotes, result):
+    bytes_file = BytesIO()
+    data = {
+        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
+        "c2": [["l11", "l21"], ["l12", "l22"]],
+    }
+    pdf = pd.DataFrame(data)
+    pdf.to_json(bytes_file, orient="records", lines=True)
+
+    actual = cudf.read_json(
+        bytes_file,
+        orient="records",
+        lines=True,
+        keep_quotes=keep_quotes,
+    )
+    expected = pd.DataFrame(result)
+
+    assert_eq(actual, expected)
+
+
+def test_json_dtypes_nested_data():
+    # a: StructDtype({'a': StructDtype({'b': dtype('float64')}),
+    #                 'b': dtype('int64')})
+    # b: ListDtype(ListDtype(float64))
+    actual_json_str = (
+        '{"a":{"a":{"b":10.0},"b":11},"b":[[10.0,1.1],[12.0,23.0]]}\n'
+        '{"a":{"a":{"b":107.0},"b":5},"b":[[10.0,11.2],[12.0,0.23]]}\n'
+        '{"a":{"a":{"b":50.7},"b":2},"b":[[10.0,11.3],[12.0,2.3]]}\n'
+        '{"a":{"a":{"b":1.2},"b":67},"b":[[6.0,7.0]]}\n'
+        '{"a":{"a":{"b":40.1},"b":1090},"b":null}\n'
+    )
+
+    """
+    In [3]: df
+    Out[3]:
+                                   a                             b
+    0    {'a': {'b': 10.0}, 'b': 11}   [[10.0, 1.1], [12.0, 23.0]]
+    1    {'a': {'b': 107.0}, 'b': 5}  [[10.0, 11.2], [12.0, 0.23]]
+    2     {'a': {'b': 50.7}, 'b': 2}   [[10.0, 11.3], [12.0, 2.3]]
+    3     {'a': {'b': 1.2}, 'b': 67}                  [[6.0, 7.0]]
+    4  {'a': {'b': 40.1}, 'b': 1090}                          None
+    """
+
+    # a: StructDtype({'a': StructDtype({'b': dtype('int64')}),
+    #                 'b': dtype('float64')})
+    # b: ListDtype(ListDtype(int64))
+    expected_json_str = (
+        '{"a":{"a":{"b":10},"b":11.0},"b":[[10,1],[12,23]]}\n'
+        '{"a":{"a":{"b":107},"b":5.0},"b":[[10,11],[12,0]]}\n'
+        '{"a":{"a":{"b":50},"b":2.0},"b":[[10,11],[12,2]]}\n'
+        '{"a":{"a":{"b":1},"b":67.0},"b":[[6,7]]}\n'
+        '{"a":{"a":{"b":40},"b":1090.0},"b":null}\n'
+    )
+
+    """
+    In [7]: df
+    Out[7]:
+                                  a                    b
+    0    {'a': {'b': 10}, 'b': 11.0}  [[10, 1], [12, 23]]
+    1    {'a': {'b': 107}, 'b': 5.0}  [[10, 11], [12, 0]]
+    2     {'a': {'b': 50}, 'b': 2.0}  [[10, 11], [12, 2]]
+    3     {'a': {'b': 1}, 'b': 67.0}             [[6, 7]]
+    4  {'a': {'b': 40}, 'b': 1090.0}                 None
+    """
+
+    df = cudf.read_json(
+        StringIO(actual_json_str),
+        engine="cudf",
+        orient="records",
+        lines=True,
+        dtype={
+            "a": cudf.StructDtype(
+                {
+                    "a": cudf.StructDtype({"b": cudf.dtype("int64")}),
+                    "b": cudf.dtype("float64"),
+                }
+            ),
+            "b": cudf.ListDtype(cudf.ListDtype("int64")),
+        },
+    )
+
+    pdf = pd.read_json(
+        StringIO(expected_json_str),
+        orient="records",
+        lines=True,
+    )
+
+    assert_eq(df, pdf)
+
+    pdf.columns = pdf.columns.astype("str")
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+@pytest.mark.parametrize(
+    "tag, data",
+    [
+        (
+            "normal",
+            """\
+{"a": 1, "b": 2}
+{"a": 3, "b": 4}""",
+        ),
+        (
+            "multiple",
+            """\
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
+    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14 }""",
+        ),
+        (
+            "reordered",
+            """\
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "c": 12 , "b" : [4, 5   ]}
+    { "b" : [6      ],  "a": { "y" : 6}, "c": 13}
+    { "c" : 14, "a": { "y" : 6}, "b" : [7      ]}
+""",
+        ),
+        (
+            "missing",
+            """
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ]          }
+    { "a": { "y" : 6}, "c": 13                  }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14 }
+""",
+        ),
+        (
+            "dtype_mismatch",
+            """\
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
+    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14.0 }""",
+        ),
+    ],
+)
+class TestNestedJsonReaderCommon:
+    @pytest.mark.parametrize("chunk_size", [10, 100, 1024, 1024 * 1024])
+    def test_chunked_nested_json_reader(self, tag, data, chunk_size):
+        expected = cudf.read_json(StringIO(data), lines=True)
+
+        source_size = len(data)
+        chunks = []
+        for chunk_start in range(0, source_size, chunk_size):
+            chunks.append(
+                cudf.read_json(
+                    StringIO(data),
+                    byte_range=[chunk_start, chunk_size],
+                    lines=True,
+                )
+            )
+        df = cudf.concat(chunks, ignore_index=True)
+        assert expected.to_arrow().equals(df.to_arrow())
+
+    @pytest.mark.skipif(
+        PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+        reason="https://github.com/pandas-dev/pandas/pull/57439",
+    )
+    def test_order_nested_json_reader(self, tag, data):
+        expected = pd.read_json(StringIO(data), lines=True)
+        target = cudf.read_json(StringIO(data), lines=True)
+        # Using pyarrow instead of assert_eq because pandas
+        # doesn't handle nested values comparisons correctly
+        if tag == "dtype_mismatch":
+            with pytest.raises(AssertionError):
+                # pandas parses integer values in float representation
+                # as integer
+                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
+        elif tag == "missing":
+            with pytest.raises(AssertionError):
+                # pandas inferences integer with nulls as float64
+                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
+        else:
+            assert pa.Table.from_pandas(expected).equals(target.to_arrow())
+
+
+def test_json_round_trip_gzip():
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["abc", "def", "ghi"]})
+    bio = BytesIO()
+    with gzip.open(bio, mode="wb") as fo:
+        with pytest.warns(UserWarning):
+            df.to_json(fo, orient="records", lines=True)
+    bio.seek(0)
+    with gzip.open(bio, mode="rb") as fo:
+        written_df = cudf.read_json(fo, orient="records", lines=True)
+    assert_eq(written_df, df)
+
+    # Testing writing from middle of the file.
+    loc = bio.tell()
+
+    with gzip.open(bio, mode="wb") as fo:
+        fo.seek(loc)
+        with pytest.warns(UserWarning):
+            df.to_json(fo, orient="records", lines=True)
+    bio.seek(loc)
+    with gzip.open(bio, mode="rb") as fo:
+        fo.seek(loc)
+        written_df = cudf.read_json(fo, orient="records", lines=True)
+    assert_eq(written_df, df)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # # empty input
+        # assert failing due to missing index size information
+        "",
+        "[]",
+        "[]\n[]\n[]",
+        # simple values
+        """[1]\n[2]\n[3]""",
+        """[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9]""",
+        # nulls
+        """[1, 2, 3]\n[4, 5, null]\n[7, 8, 9]""",
+        """[1, 2, 3]\n[4, 5, null]\n[7, 8, 9]\n[null, null, null]""",
+        """[1, 2, 3]\n[4, 5, null]\n[]""",
+        # missing
+        """[1, 2, 3]\n[4, 5   ]\n[7, 8, 9]""",
+        """[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9, 10]""",
+        """[1, 2, 3]\n[4, 5, 6, {}]\n[7, 8, 9]""",
+        """[1, 2, 3]\n[4, 5, 6, []]\n[7, 8, 9]""",
+        """[1, 2, 3]\n[4, 5, 6, {"a": 10}]\n[7, 8, 9]""",
+        """[1, 2, 3]\n[4, 5, 6, [10]]\n[7, 8, 9]""",
+        # mixed
+        """[1, 2, 3]\n[4, 5, {}]\n[7, 8, 9]""",
+        """[1, 2, {}]\n[4, 5, 6]\n[7, 8, 9]""",
+        """[1, 2, 3]\n[4, 5, [6]]\n[7, 8, 9]""",
+        """[1, 2, [3]]\n[4, 5, 6]\n[7, 8, 9]""",
+        # nested
+        """[1, 2, [3]]\n[4, 5, [6]]\n[7, 8, [9]]""",
+        """[1, 2, {"a": 3}]\n[4, 5, {"b": 6}]\n[7, 8, {"c": 9}]""",
+        """[1, 2, [{"a": 3}, {"a": 3}]]
+           [4, 5, [{"b": 6}, {"b": 6}, {}, {"b": 6}]]
+           [7, 8, [{}]]""",
+        """[1, 2, {"a": [3, 3, 3]}]
+           [4, 5, {"b": [6, 6]}]
+           [7, 8, {"c": 9}]""",
+        """[1, 2, [{"a": 3}, {"a": null}]]
+           [4, 5, [{"b": [6.0, 6, 06]}, {"b": [6]}, {}, {"b": null}]]
+           [7, 8, [{}]]""",
+    ],
+)
+def test_json_array_of_arrays(data, lines):
+    data = data if lines else "[" + data.replace("\n", ",") + "]"
+    pdf = pd.read_json(StringIO(data), orient="values", lines=lines)
+    df = cudf.read_json(
+        StringIO(data),
+        engine="cudf",
+        orient="values",
+        lines=lines,
+    )
+    # if mixed with dict/list type, replace other types with None.
+    if 2 in pdf.columns and any(
+        pdf[2].apply(lambda x: isinstance(x, dict) or isinstance(x, list))
+    ):
+        pdf[2] = pdf[2].apply(
+            lambda x: x if isinstance(x, dict) or isinstance(x, list) else None
+        )
+    # TODO: Replace string column names with integer column names
+    # for values orient in cudf json reader
+    pdf.rename(columns={name: str(name) for name in pdf.columns}, inplace=True)
+    # assert_eq(pdf, df)
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=df.to_arrow().schema, safe=False
+    )
+    assert df.to_arrow().equals(pa_table_pdf)
+
+
+@pytest.mark.parametrize(
+    "jsonl_string",
+    [
+        # simple list with mixed types
+        """{"a":[123, {}], "b":1.1}""",
+        """{"a":[123, {"0": 123}], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
+        """{"a":[{"L": 123}, 123], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
+        """{"a":[123, {"0": 123}, 12.3], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
+        """{"a":[123, {"0": 123}, null], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
+        """{"a":["123", {"0": 123}], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
+        """{"a":[{"0": 123}, "123"], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
+        """{"a":["123", {"0": 123}, "123"], "b":1.0}\n {"b":1.1}""",
+        """{"a":[123]}\n {"a":[{"0": 123}], "b":1.0}\n {"b":1.1}""",
+        """{"a":[{"0": 123}]}\n {"a":[123], "b":1.0}\n {"b":1.1}""",
+        """{"a":[{"0": 123}]}\n {"a": []}\n {"a":[123], "b":1.0}\n{"b":1.1}""",
+        """{"b":1.0, "a":[{"0": 123}]}\n {"a":[123]}\n {"b":1.1}\n{"a": []}""",
+        """{"a": []}\n {"a":[{"0": 123}]}\n {"a":[123], "b":1.0}\n{"b":1.1}""",
+        """{"a": []}\n {"a":[123], "b":1.0}\n {"a":[{"0": 123}]}\n{"b":1.1}""",
+        # nested list with mixed types
+        """{"a":[123, [{"0": 123}, {}]], "b":1.0}
+           {"b":1.1}
+           {"a":[]}
+           {"a":[123]}
+           {"a":[[123], []]}""",
+        """{"a":[], "b":1.0}
+           {"a":[[[456]]]}
+           {"a":[[123]]}
+           {"a":[123]}""",
+        """{"a":[123], "b":1.0}
+           {"b":1.1}
+           {"b":2.1}
+           {"a":[[[[[[]]]]]]}""",
+        """{"a":[123], "b":1.0}
+           {"a":[[[[[[]]]]]]}
+           {"a":[[[[[[]]]]], [[[[[]]]]]]}
+           {"a":[[[[[[]]]], [[[[]]]]]]}
+           {"a":[[[[[[]]], [[[]]]]]]}
+           {"a":[[[[[[]], [[]]]]]]}
+           {"a":[[[[[[], 123, []]]]]]}""",
+        # mixed elements in multiple columns
+        """{"a":[123, {"0": 123}], "b":1.0}
+           {"c": ["abc"], "b":1.1}
+           {"c": ["abc", []] }""",
+    ],
+)
+def test_json_nested_mixed_types_in_list(jsonl_string):
+    # utility function for this test:
+    # replace list elements with None if it has dict and non-dict (ignore None)
+    def _replace_in_list(list_to_replace, replace_items):
+        return [
+            _replace_in_list(x, replace_items)
+            if isinstance(x, list)
+            else None
+            if x in replace_items
+            else x
+            for x in list_to_replace
+        ]
+
+    def _replace_with_nulls(df, replace_items):
+        for col in df.columns:
+            if df[col].dtype == "object":
+                df[col] = df[col].apply(
+                    lambda x: _replace_in_list(x, replace_items)
+                    if isinstance(x, list)
+                    else x
+                )
+        return df
+
+    # both json lines and json string tested.
+    json_string = "[" + jsonl_string.replace("\n", ",") + "]"
+    pdf = pd.read_json(StringIO(jsonl_string), orient="records", lines=True)
+    pdf2 = pd.read_json(StringIO(json_string), orient="records", lines=False)
+    assert_eq(pdf, pdf2)
+    # replace list elements with None if it has dict and non-dict
+    # in above test cases, these items are mixed with dict/list items
+    # so, replace them with None.
+    pdf = _replace_with_nulls(pdf, [123, "123", 12.3, "abc"])
+    gdf = cudf.read_json(
+        StringIO(jsonl_string),
+        orient="records",
+        lines=True,
+    )
+    gdf2 = cudf.read_json(
+        StringIO(json_string),
+        engine="cudf",
+        orient="records",
+        lines=False,
+    )
+    if """[{"0": 123}, {}]""" not in jsonl_string:
+        # {} in pandas is represented as {"0": None} in cudf
+        assert_eq(gdf, pdf)
+        assert_eq(gdf2, pdf)
+    pa_table_pdf = pa.Table.from_pandas(
+        pdf, schema=gdf.to_arrow().schema, safe=False
+    )
+    assert gdf.to_arrow().equals(pa_table_pdf)
+    assert gdf2.to_arrow().equals(pa_table_pdf)
+
+
+@pytest.mark.parametrize(
+    "jsonl_string",
+    [
+        # mixed type in list (in different order)
+        """{"a":[[{"0": 123}, {}], {"1": 321}], "b":1.0}""",
+        """{"a":[{"1": 321}, [{"0": 123}, {}], ], "b":1.0}""",
+        """{"a":[123, [{"0": 123}, {}], {"1": 321}], "b":1.0}""",
+        """{"a":[null, [{"0": 123}, {}], {"1": 321}], "b":1.0}""",
+        # mixed type in struct (in different order)
+        """{"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}
+           {"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}""",
+        """{"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}
+           {"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}""",
+        """{"a": {"b": {"0": 123}, "c": null}, "d":1.0}
+           {"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}
+           {"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}""",
+        """{"a": {"b": {"0": 123}, "c": 123}, "d":1.0}
+           {"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}
+           {"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}""",
+    ],
+)
+def test_json_nested_mixed_types_error(jsonl_string):
+    # mixing list and struct should raise an exception
+    with pytest.raises(RuntimeError):
+        cudf.read_json(
+            StringIO(jsonl_string),
+            orient="records",
+            lines=True,
+        )
+
+
+@pytest.mark.parametrize("on_bad_lines", ["error", "recover", "abc"])
+def test_json_reader_on_bad_lines(on_bad_lines):
+    json_input = StringIO(
+        '{"a":1,"b":10}\n{"a":2,"b":11}\nabc\n{"a":3,"b":12}\n'
+    )
+    if on_bad_lines == "error":
+        with pytest.raises(RuntimeError):
+            cudf.read_json(
+                json_input,
+                lines=True,
+                orient="records",
+                on_bad_lines=on_bad_lines,
+            )
+    elif on_bad_lines == "recover":
+        actual = cudf.read_json(
+            json_input, lines=True, orient="records", on_bad_lines=on_bad_lines
+        )
+        expected = cudf.DataFrame(
+            {"a": [1, 2, None, 3], "b": [10, 11, None, 12]}
+        )
+        assert_eq(actual, expected)
+    else:
+        with pytest.raises(TypeError):
+            cudf.read_json(
+                json_input,
+                lines=True,
+                orient="records",
+                on_bad_lines=on_bad_lines,
+            )
+
+
+def test_chunked_json_reader():
+    df = cudf.DataFrame(
+        {
+            "a": ["aaaa"] * 1_000_000,
+            "b": range(1_000_000),
+        }
+    )
+    buf = BytesIO()
+    df.to_json(buf, lines=True, orient="records", engine="cudf")
+    buf.seek(0)
+    df = df.to_pandas()
+    with cudf.option_context("io.json.low_memory", True):
+        gdf = cudf.read_json(buf, lines=True)
+    assert_eq(df, gdf)
+
+
+# compression formats limited to those supported by both reader and writer
+@pytest.mark.parametrize("compression", ["gzip", "snappy", "zstd"])
+def test_roundtrip_compression(compression, tmp_path):
+    expected = cudf.DataFrame({"a": [1], "b": ["2"]})
+    fle = BytesIO()
+    expected.to_json(fle, engine="cudf", compression=compression)
+    result = cudf.read_json(fle, engine="cudf", compression=compression)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
deleted file mode 100644
index 4f9ca1b4261..00000000000
--- a/python/cudf/cudf/tests/test_csv.py
+++ /dev/null
@@ -1,2249 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import codecs
-import gzip
-import os
-import re
-import shutil
-from io import BytesIO, StringIO
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import read_csv
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_VERSION,
-)
-from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
-
-
-@pytest.fixture
-def numeric_dataframe():
-    return pd.DataFrame(
-        {"col1": [1, 2, 3], "col2": [4, 5, 6]},
-    )
-
-
-@pytest.fixture
-def datetime_dataframe():
-    return pd.DataFrame(
-        {
-            "col1": [
-                "31/10/2010",
-                "05/03/2001",
-                "20/10/1994",
-                "18/10/1990",
-                "1/1/1970",
-                "2016-04-30T01:02:03.000",
-                "2038-01-19 03:14:07",
-            ],
-            "col2": [
-                "18/04/1995",
-                "14 / 07 / 1994",
-                "07/06/2006",
-                "16/09/2005",
-                "2/2/1970",
-                "2007-4-30 1:6:40.000PM",
-                "2038-01-19 03:14:08",
-            ],
-            "col3": [
-                "1 Jan",
-                "2 January 1994",
-                "Feb 2002",
-                "31-01-2000",
-                "1-1-1996",
-                "15-May-2009",
-                "21-Dec-3262",
-            ],
-        }
-    )
-
-
-@pytest.fixture
-def pd_mixed_dataframe():
-    return pd.DataFrame(
-        {
-            "Integer": [2345, 11987, 9027, 9027],
-            "Date": ["18/04/1995", "14/07/1994", "07/06/2006", "16/09/2005"],
-            "Float": [9.001, 8.343, 6, 2.781],
-            "Integer2": [2345, 106, 2088, 789277],
-            "Category": ["M", "F", "F", "F"],
-            "String": ["Alpha", "Beta", "Gamma", "Delta"],
-            "Boolean": [True, False, True, False],
-        }
-    )
-
-
-@pytest.fixture
-def cudf_mixed_dataframe(pd_mixed_dataframe):
-    return cudf.from_pandas(pd_mixed_dataframe)
-
-
-@pytest.fixture
-def gdf_np_dtypes():
-    gdf_dtypes = [
-        "float",
-        "float32",
-        "double",
-        "float64",
-        "int8",
-        "short",
-        "int16",
-        "int",
-        "int32",
-        "long",
-        "int64",
-        "uint8",
-        "uint16",
-        "uint32",
-        "uint64",
-    ]
-
-    np_dtypes = [
-        np.float32,
-        np.float32,
-        np.float64,
-        np.float64,
-        np.int8,
-        np.int16,
-        np.int16,
-        np.int32,
-        np.int32,
-        np.int64,
-        np.int64,
-        np.uint8,
-        np.uint16,
-        np.uint32,
-        np.uint64,
-    ]
-    return dict(zip(gdf_dtypes, np_dtypes, strict=True))
-
-
-@pytest.fixture
-def numeric_extremes_dataframe(gdf_np_dtypes):
-    data = {}
-    for typ, np_type in gdf_np_dtypes.items():
-        if np.dtype(np_type).kind in "iu":
-            itype = np.iinfo(np_type)
-            extremes = [0, +1, -1, itype.min, itype.max]
-            data[typ] = np.array(extremes * 4).astype(np_type)[:20]
-        else:
-            ftype = np.finfo(np_type)
-            extremes = [
-                0.0,
-                -0.0,
-                +1,
-                -1,
-                np.nan,
-                -np.nan,
-                # ftype.min, # TODO enable after fixing truncation issue #6235
-                # ftype.max, # TODO enable after fixing truncation issue #6235
-                np_type(np.inf),
-                -np_type(np.inf),
-                ftype.eps,
-                ftype.epsneg,
-                ftype.tiny,
-                -ftype.eps,
-                -ftype.epsneg,
-                -ftype.tiny,
-            ]
-            data[typ] = np.array(extremes * 4, dtype=np_type)[:20]
-    return pd.DataFrame(data)
-
-
-@pytest.fixture(
-    params=[np.float64, np.float32, np.int64, np.int32, np.uint64, np.uint32]
-)
-def dtype(request):
-    return request.param
-
-
-def test_csv_reader_numeric_data(dtype, numeric_dataframe, tmp_path):
-    fname = tmp_path / "tmp_csvreader_file1.csv"
-
-    df = numeric_dataframe.astype(dtype)
-    df.to_csv(fname, index=False, header=False)
-
-    dtypes = [df[k].dtype for k in df.columns]
-    out = read_csv(str(fname), names=list(df.columns.values), dtype=dtypes)
-
-    assert len(out.columns) == len(df.columns)
-    assert_eq(df, out)
-
-
-@pytest.mark.parametrize("parse_dates", [["date2"], [0], ["date1", 1, "bad"]])
-def test_csv_reader_datetime(datetime_dataframe, parse_dates):
-    df = datetime_dataframe
-    buffer = df.to_csv(index=False, header=False)
-
-    gdf = read_csv(
-        StringIO(buffer),
-        names=["date1", "date2", "bad"],
-        parse_dates=parse_dates,
-        dayfirst=True,
-    )
-    # Need to used `date_format='mixed'`,
-    # https://github.com/pandas-dev/pandas/issues/53355
-    pdf = pd.read_csv(
-        StringIO(buffer),
-        names=["date1", "date2", "bad"],
-        parse_dates=parse_dates,
-        dayfirst=True,
-        date_format="mixed",
-    )
-
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.parametrize("p_arg", ["delimiter", "sep"])
-@pytest.mark.parametrize("c_arg", ["sep", "delimiter"])
-def test_csv_reader_mixed_data_delimiter_sep(
-    tmp_path, p_arg, c_arg, pd_mixed_dataframe
-):
-    pandas_arg = {p_arg: "|"}
-    cudf_arg = {c_arg: "|"}
-    fname = tmp_path / "tmp_csvreader_file3.csv"
-
-    pd_mixed_dataframe.to_csv(fname, sep="|", index=False, header=False)
-
-    gdf1 = read_csv(
-        str(fname),
-        names=["1", "2", "3", "4", "5", "6", "7"],
-        dtype=[
-            "int64",
-            "datetime64[ns]",
-            "float64",
-            "int64",
-            "category",
-            "str",
-            "bool",
-        ],
-        dayfirst=True,
-        **cudf_arg,
-    )
-    gdf2 = read_csv(
-        str(fname),
-        names=["1", "2", "3", "4", "5", "6", "7"],
-        dtype=[
-            "int64",
-            "datetime64[ns]",
-            "float64",
-            "int64",
-            "category",
-            "str",
-            "bool",
-        ],
-        dayfirst=True,
-        **pandas_arg,
-    )
-
-    pdf = pd.read_csv(
-        fname,
-        names=["1", "2", "3", "4", "5", "6", "7"],
-        parse_dates=[1],
-        dayfirst=True,
-        **pandas_arg,
-    )
-
-    assert len(gdf1.columns) == len(pdf.columns)
-    assert len(gdf2.columns) == len(pdf.columns)
-    assert_eq(gdf1, gdf2)
-
-
-@pytest.mark.parametrize("use_list", [False, True])
-def test_csv_reader_dtype_list(numeric_dataframe, use_list):
-    df = numeric_dataframe.astype(np.float32)
-    buffer = df.to_csv(index=False, header=False)
-
-    # PANDAS doesn't list but cudf does (treated as implied ordered dict)
-    # Select first column's dtype if non-list; expect the same dtype for all
-    if use_list:
-        dtypes = [df[k].dtype for k in df.columns]
-    else:
-        dtypes = df[df.columns[0]].dtype
-
-    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=df.columns)
-
-    assert_eq(gdf, df)
-
-
-@pytest.mark.parametrize("use_names", [False, True])
-def test_csv_reader_dtype_dict(use_names, gdf_np_dtypes):
-    # Save with the column header if not explicitly specifying a list of names
-    df = pd.DataFrame(
-        {
-            typ: np.zeros(3, dtype=np_type)
-            for typ, np_type in gdf_np_dtypes.items()
-        }
-    )
-    buffer = df.to_csv(index=False, header=not use_names)
-    dtypes = df.dtypes.to_dict()
-    names = list(gdf_np_dtypes.keys()) if use_names else None
-    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=names)
-    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=names)
-
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
-@pytest.mark.parametrize("use_names", [True, False])
-def test_csv_reader_dtype_extremes(use_names, numeric_extremes_dataframe):
-    # Save with the column header if not explicitly specifying a list of names
-    df = numeric_extremes_dataframe
-    buffer = df.to_csv(index=False, header=not use_names)
-    dtypes = df.dtypes.to_dict()
-    names = df.columns.to_list() if use_names else None
-
-    gdf = read_csv(StringIO(buffer), dtype=dtypes, names=names)
-    pdf = pd.read_csv(StringIO(buffer), dtype=dtypes, names=names)
-
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/52449",
-)
-def test_csv_reader_skiprows_skipfooter(tmp_path, pd_mixed_dataframe):
-    fname = tmp_path / "tmp_csvreader_file5.csv"
-
-    pd_mixed_dataframe.to_csv(
-        fname, columns=["Integer", "Date", "Float"], index=False, header=False
-    )
-
-    # Using engine='python' to eliminate pandas warning of using python engine.
-    df_out = pd.read_csv(
-        fname,
-        names=["1", "2", "3"],
-        parse_dates=[1],
-        dayfirst=True,
-        skiprows=1,
-        skipfooter=1,
-        engine="python",
-    )
-    out = read_csv(
-        str(fname),
-        names=["1", "2", "3"],
-        dtype=["int64", "datetime64[ns]", "float64"],
-        skiprows=1,
-        skipfooter=1,
-        dayfirst=True,
-    )
-
-    assert len(out.columns) == len(df_out.columns)
-    assert len(out) == len(df_out)
-
-    assert_eq(df_out, out, check_dtype=False)
-
-
-def test_csv_reader_negative_vals(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file6.csv"
-
-    names = ["0", "1", "2"]
-    dtypes = ["float32", "float32", "float32"]
-    lines = [
-        ",".join(names),
-        "-181.5060,-185.37000,-3",
-        "-127.6300,-230.54600,-9",
-    ]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    zero = [-181.5060, -127.6300]
-    one = [-185.370, -230.54600]
-    two = [-3, -9]
-
-    df = read_csv(str(fname), names=names, dtype=dtypes, skiprows=1)
-
-    np.testing.assert_allclose(zero, df["0"].to_numpy())
-    np.testing.assert_allclose(one, df["1"].to_numpy())
-    np.testing.assert_allclose(two, df["2"].to_numpy())
-
-
-def test_csv_reader_strings(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file7.csv"
-
-    names = ["text", "int"]
-    dtypes = ["str", "int"]
-    lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    df = read_csv(
-        str(fname),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        decimal=".",
-        thousands="'",
-    )
-
-    assert len(df.columns) == 2
-    assert df["text"].dtype == np.dtype("object")
-    assert df["int"].dtype == np.dtype("int64")
-    assert df["text"][0] == "a"
-    assert df["text"][1] == "b"
-    assert df["text"][2] == "c"
-    assert df["text"][3] == "d"
-
-
-def test_csv_reader_strings_quotechars(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file8.csv"
-
-    names = ["text", "int"]
-    dtypes = ["str", "int"]
-    lines = [",".join(names), '"a,\n",0', '"b ""c"" d",0', "e,0", '"f,,!.,",0']
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    df = read_csv(
-        str(fname),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        quotechar='"',
-        quoting=1,
-    )
-
-    assert len(df.columns) == 2
-    assert df["text"].dtype == np.dtype("object")
-    assert df["int"].dtype == np.dtype("int64")
-    assert df["text"][0] == "a,\n"
-    assert df["text"][1] == 'b "c" d'
-    assert df["text"][2] == "e"
-    assert df["text"][3] == "f,,!.,"
-
-
-def test_csv_reader_usecols_int_char(tmp_path, pd_mixed_dataframe):
-    fname = tmp_path / "tmp_csvreader_file10.csv"
-    pd_mixed_dataframe.to_csv(
-        fname,
-        columns=["Integer", "Date", "Float", "Integer2"],
-        index=False,
-        header=False,
-    )
-
-    df_out = pd.read_csv(fname, usecols=[0, 1, 3])
-    out = read_csv(fname, usecols=[0, 1, 3])
-
-    assert len(out.columns) == len(df_out.columns)
-    assert len(out) == len(df_out)
-    assert_eq(df_out, out, check_names=False)
-
-
-@pytest.mark.parametrize(
-    "buffer",
-    [
-        "abc,ABC,abc,abcd,abc\n1,2,3,4,5\n",
-        "A,A,A.1,A,A.2,A,A.4,A,A\n1,2,3.1,4,a.2,a,a.4,a,a",
-        "A,A,A.1,,Unnamed: 4,A,A.4,A,A\n1,2,3.1,4,a.2,a,a.4,a,a",
-    ],
-)
-@pytest.mark.parametrize("mangle_dupe_cols", [True, False])
-def test_csv_reader_mangle_dupe_cols(tmpdir, buffer, mangle_dupe_cols):
-    # Default: mangle_dupe_cols=True
-    cu_df = read_csv(StringIO(buffer), mangle_dupe_cols=mangle_dupe_cols)
-    if mangle_dupe_cols:
-        pd_df = pd.read_csv(StringIO(buffer))
-    else:
-        # Pandas does not support mangle_dupe_cols=False
-        head = buffer.split("\n")[0].split(",")
-        first_cols = np.unique(head, return_index=True)[1]
-        pd_df = pd.read_csv(StringIO(buffer), usecols=first_cols)
-    assert_eq(cu_df, pd_df)
-
-
-def test_csv_reader_float_decimal(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file12.csv"
-
-    names = ["basic_32", "basic_64", "round", "decimal_only", "precision"]
-    dtypes = ["float32", "float64", "float64", "float32", "float64"]
-    lines = [
-        ";".join(names),
-        "1,2;1234,5678;12345;0,123;-73,98007199999998",
-        "3,4;3456,7890;67890;,456;1,7976931348623157e+307",
-        "5,6e0;0,5679e2;1,2e10;0,07e-001;0,0",
-    ]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    basic_32_ref = [1.2, 3.4, 5.6]
-    basic_64_ref = [1234.5678, 3456.7890, 56.79]
-    round_ref = [12345, 67890, 12000000000]
-    decimal_only_ref = [0.123, 0.456, 0.007]
-    precision_ref = [-73.98007199999998, 1.7976931348623157e307, 0.0]
-
-    df = read_csv(
-        str(fname),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        delimiter=";",
-        decimal=",",
-    )
-
-    np.testing.assert_allclose(basic_32_ref, df["basic_32"].to_numpy())
-    np.testing.assert_allclose(basic_64_ref, df["basic_64"].to_numpy())
-    np.testing.assert_allclose(round_ref, df["round"].to_numpy())
-    np.testing.assert_allclose(decimal_only_ref, df["decimal_only"].to_numpy())
-    np.testing.assert_allclose(precision_ref, df["precision"].to_numpy())
-
-
-def test_csv_reader_NaN_values():
-    names = dtypes = ["float32"]
-    empty_cells = '\n""\n'
-    default_na_cells = (
-        "#N/A\n#N/A N/A\n#NA\n-1.#IND\n"
-        "-1.#QNAN\n-NaN\n-nan\n1.#IND\n"
-        "1.#QNAN\nN/A\n<NA>\nNA\nNULL\n"
-        "NaN\nn/a\nnan\nnull\n"
-    )
-    custom_na_cells = "NV_NAN\nNotANumber\n"
-    all_cells = empty_cells + default_na_cells + custom_na_cells
-    custom_na_values = ["NV_NAN", "NotANumber"]
-
-    # test default NA values. empty cells should also yield NaNs
-    gdf = read_csv(
-        StringIO(default_na_cells + empty_cells), names=names, dtype=dtypes
-    )
-    pdf = pd.read_csv(
-        StringIO(default_na_cells + empty_cells), names=names, dtype=np.float32
-    )
-    assert_eq(pdf, gdf)
-
-    # custom NA values
-    gdf = read_csv(
-        StringIO(all_cells),
-        names=names,
-        dtype=dtypes,
-        na_values=custom_na_values,
-    )
-    pdf = pd.read_csv(
-        StringIO(all_cells),
-        names=names,
-        dtype=np.float32,
-        na_values=custom_na_values,
-    )
-    assert_eq(pdf, gdf)
-
-    # custom NA values
-    gdf = read_csv(
-        StringIO(empty_cells + default_na_cells + "_NAA_\n"),
-        names=names,
-        dtype=dtypes,
-        na_values="_NAA_",
-    )
-    pdf = pd.read_csv(
-        StringIO(empty_cells + default_na_cells + "_NAA_\n"),
-        names=names,
-        dtype=np.float32,
-        na_values="_NAA_",
-    )
-    assert_eq(pdf, gdf)
-
-    # data type detection should evaluate the column to int8 (all nulls)
-    gdf = read_csv(
-        StringIO(all_cells),
-        header=None,
-        na_values=custom_na_values,
-    )
-    assert gdf.dtypes.iloc[0] == "int8"
-    assert all(gdf["0"][idx] is cudf.NA for idx in range(len(gdf["0"])))
-
-    # data type detection should evaluate the column to object if some nulls
-    gdf = read_csv(StringIO(all_cells), header=None)
-    assert gdf.dtypes.iloc[0] == np.dtype("object")
-
-
-def test_csv_reader_thousands(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file13.csv"
-
-    names = dtypes = [
-        "float32",
-        "float64",
-        "int32",
-        "int64",
-        "uint32",
-        "uint64",
-    ]
-    lines = [
-        ",".join(names),
-        "1'234.5, 1'234.567, 1'234'567, 1'234'567'890,\
-                1'234'567, 1'234'567'890",
-        "12'345.6, 123'456.7, 12'345, 123'456'789, 12'345, 123'456'789",
-    ]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    f32_ref = [1234.5, 12345.6]
-    f64_ref = [1234.567, 123456.7]
-    int32_ref = [1234567, 12345]
-    int64_ref = [1234567890, 123456789]
-    uint32_ref = [1234567, 12345]
-    uint64_ref = [1234567890, 123456789]
-
-    df = read_csv(
-        str(fname), names=names, dtype=dtypes, skiprows=1, thousands="'"
-    )
-
-    np.testing.assert_allclose(f32_ref, df["float32"].to_numpy())
-    np.testing.assert_allclose(f64_ref, df["float64"].to_numpy())
-    np.testing.assert_allclose(int32_ref, df["int32"].to_numpy())
-    np.testing.assert_allclose(int64_ref, df["int64"].to_numpy())
-    np.testing.assert_allclose(uint32_ref, df["uint32"].to_numpy())
-    np.testing.assert_allclose(uint64_ref, df["uint64"].to_numpy())
-
-
-def test_csv_reader_buffer_strings():
-    names = ["text", "int"]
-    dtypes = ["str", "int"]
-    lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
-
-    buffer = "\n".join(lines)
-
-    df = read_csv(StringIO(buffer), names=names, dtype=dtypes, skiprows=1)
-    assert len(df.columns) == 2
-    assert df["text"].dtype == np.dtype("object")
-    assert df["int"].dtype == np.dtype("int64")
-    assert df["text"][0] == "a"
-    assert df["text"][1] == "b"
-    assert df["text"][2] == "c"
-    assert df["text"][3] == "d"
-
-    df2 = read_csv(
-        BytesIO(str.encode(buffer)), names=names, dtype=dtypes, skiprows=1
-    )
-    assert len(df2.columns) == 2
-    assert df2["text"].dtype == np.dtype("object")
-    assert df2["int"].dtype == np.dtype("int64")
-    assert df2["text"][0] == "a"
-    assert df2["text"][1] == "b"
-    assert df2["text"][2] == "c"
-    assert df2["text"][3] == "d"
-
-
-@pytest.mark.parametrize(
-    "ext, out_comp, in_comp",
-    [
-        (".geez", "gzip", "gzip"),
-        (".beez", "bz2", "bz2"),
-        (".gz", "gzip", "infer"),
-        (".bz2", "bz2", "infer"),
-        (".beez", "bz2", np.str_("bz2")),
-        (".data", None, "infer"),
-        (".txt", None, None),
-        ("", None, None),
-    ],
-)
-def test_csv_reader_compression(
-    tmpdir, ext, out_comp, in_comp, pd_mixed_dataframe
-):
-    fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_compression" + ext)
-
-    df = pd_mixed_dataframe
-    df.to_csv(fname, index=False, header=False, compression=out_comp)
-
-    gdf = read_csv(fname, names=list(df.columns.values), compression=in_comp)
-    pdf = pd.read_csv(
-        fname, names=list(df.columns.values), compression=in_comp
-    )
-
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.parametrize(
-    "names, dtypes, data, trues, falses",
-    [
-        (
-            ["A", "B"],
-            ["bool", "bool"],
-            "True,True\nFalse,False\nTrue,False",
-            None,
-            None,
-        ),
-        (
-            ["A", "B"],
-            ["int32", "int32"],
-            "True,1\nFalse,2\nTrue,3",
-            None,
-            None,
-        ),
-        (
-            ["A", "B"],
-            ["int32", "int32"],
-            "YES,1\nno,2\nyes,3\nNo,4\nYes,5",
-            ["yes", "Yes", "YES"],
-            ["no", "NO", "No"],
-        ),
-        (["A", "B"], ["int32", "int32"], "foo,bar\nbar,foo", ["foo"], ["bar"]),
-        (["x", "y"], None, "True,1\nFalse,0", None, None),
-    ],
-)
-def test_csv_reader_bools(tmp_path, names, dtypes, data, trues, falses):
-    fname = tmp_path / "tmp_csvreader_file11.csv"
-
-    lines = [",".join(names), data]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    # Usage of true_values and false_values makes that column into bool type
-    df_out = pd.read_csv(
-        fname,
-        names=names,
-        skiprows=1,
-        dtype=(dtypes[0] if dtypes else None),
-        true_values=trues,
-        false_values=falses,
-    )
-
-    out = read_csv(
-        fname,
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        true_values=trues,
-        false_values=falses,
-    )
-
-    assert_eq(df_out, out)
-
-
-def test_csv_reader_bools_custom():
-    names = ["text", "bool"]
-    dtypes = {"text": "str", "bool": "bool"}
-    trues = ["foo", "1"]
-    falses = ["bar", "0"]
-    lines = [
-        ",".join(names),
-        "true,true",
-        "false,false",
-        "foo,foo",
-        "bar,bar",
-        "0,0",
-        "1,1",
-    ]
-    buffer = "\n".join(lines)
-
-    df = read_csv(
-        StringIO(buffer),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        true_values=trues,
-        false_values=falses,
-    )
-
-    # Note: bool literals give parsing errors as int
-    # "0" and "1" give parsing errors as bool in pandas
-    expected = pd.read_csv(
-        StringIO(buffer),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        true_values=trues,
-        false_values=falses,
-    )
-    assert_eq(df, expected, check_dtype=True)
-
-
-def test_csv_reader_bools_NA():
-    names = ["text", "int"]
-    dtypes = ["str", "int"]
-    trues = ["foo"]
-    falses = ["bar"]
-    lines = [
-        ",".join(names),
-        "true,true",
-        "false,false",
-        "foo,foo",
-        "bar,bar",
-        "qux,qux",
-    ]
-
-    buffer = "\n".join(lines)
-
-    df = read_csv(
-        StringIO(buffer),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        true_values=trues,
-        false_values=falses,
-    )
-    assert len(df.columns) == 2
-    assert df["text"].dtype == np.dtype("object")
-    assert df["int"].dtype == np.dtype("int64")
-    expected = pd.DataFrame(
-        {
-            "text": ["true", "false", "foo", "bar", "qux"],
-            "int": [1.0, 0.0, 1.0, 0.0, np.nan],
-        }
-    )
-    assert_eq(df, expected)
-
-
-def test_csv_quotednumbers(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file12.csv"
-
-    names = ["integer", "decimal"]
-    dtypes = ["int32", "float32"]
-    lines = [
-        ",".join(names),
-        '1,"3.14"',
-        '"2","300"',
-        '"3",10101.0101',
-        '4,"6.28318"',
-    ]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    integer_ref = [1, 2, 3, 4]
-    decimal_ref = [3.14, 300, 10101.0101, 6.28318]
-
-    df1 = read_csv(str(fname), names=names, dtype=dtypes, skiprows=1)
-    df2 = read_csv(str(fname), names=names, dtype=dtypes, skiprows=1)
-
-    assert len(df2.columns) == 2
-    np.testing.assert_allclose(integer_ref, df1["integer"].to_numpy())
-    np.testing.assert_allclose(decimal_ref, df1["decimal"].to_numpy())
-    np.testing.assert_allclose(integer_ref, df2["integer"].to_numpy())
-    np.testing.assert_allclose(decimal_ref, df2["decimal"].to_numpy())
-
-
-def test_csv_reader_nrows(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file14.csv"
-
-    names = ["int1", "int2"]
-    dtypes = ["int32", "int32"]
-
-    rows = 4000
-    read_rows = (rows * 3) // 4
-    skip_rows = (rows - read_rows) // 2
-    sample_skip = 100
-
-    with open(str(fname), "w") as fp:
-        fp.write(",".join(names) + "\n")
-        for i in range(rows):
-            fp.write(str(i) + ", " + str(2 * i) + " \n")
-
-    # with specified names
-    df = read_csv(
-        str(fname),
-        names=names,
-        dtype=dtypes,
-        skiprows=skip_rows + 1,
-        nrows=read_rows,
-    )
-    assert df.shape == (read_rows, 2)
-    for row in range(0, read_rows // sample_skip, sample_skip):
-        assert df["int1"][row] == row + skip_rows
-        assert df["int2"][row] == 2 * (row + skip_rows)
-    assert df["int2"][read_rows - 1] == 2 * (read_rows - 1 + skip_rows)
-
-    # with column name inference
-    df = read_csv(
-        str(fname), dtype=dtypes, skiprows=skip_rows + 1, nrows=read_rows
-    )
-    assert df.shape == (read_rows, 2)
-    assert str(skip_rows) in next(iter(df))
-    assert str(2 * skip_rows) in list(df)[1]
-    for row in range(0, read_rows // sample_skip, sample_skip):
-        assert df[next(iter(df))][row] == row + skip_rows + 1
-        assert df[list(df)[1]][row] == 2 * (row + skip_rows + 1)
-    assert df[list(df)[1]][read_rows - 1] == 2 * (read_rows + skip_rows)
-
-    # nrows larger than the file
-    df = read_csv(str(fname), dtype=dtypes, nrows=rows * 2)
-    assert df.shape == (rows, 2)
-    for row in range(0, rows // sample_skip, sample_skip):
-        assert df["int1"][row] == row
-        assert df["int2"][row] == 2 * row
-    assert df["int2"][rows - 1] == 2 * (rows - 1)
-
-    # nrows + skiprows larger than the file
-    df = read_csv(
-        str(fname), dtype=dtypes, nrows=read_rows, skiprows=read_rows
-    )
-    assert df.shape == (rows - read_rows, 2)
-
-    # nrows equal to zero
-    df = read_csv(str(fname), dtype=dtypes, nrows=0)
-    assert df.shape == (0, 2)
-
-    # with both skipfooter and nrows - should throw
-    with pytest.raises(ValueError):
-        read_csv(str(fname), nrows=read_rows, skipfooter=1)
-
-
-def test_csv_reader_gzip_compression_strings(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file15.csv"
-    fnamez = tmp_path / "tmp_csvreader_file15.csv.gz"
-
-    names = ["text", "int"]
-    dtypes = ["str", "int"]
-    lines = [",".join(names), "a,0", "b,0", "c,0", "d,0"]
-
-    with open(str(fname), "w") as fp:
-        fp.write("\n".join(lines))
-
-    with open(str(fname), "rb") as f_in, gzip.open(str(fnamez), "wb") as f_out:
-        shutil.copyfileobj(f_in, f_out)
-
-    df = read_csv(
-        str(fnamez),
-        names=names,
-        dtype=dtypes,
-        skiprows=1,
-        decimal=".",
-        thousands="'",
-        compression="gzip",
-    )
-
-    assert len(df.columns) == 2
-    assert df["text"].dtype == np.dtype("object")
-    assert df["int"].dtype == np.dtype("int64")
-    assert df["text"][0] == "a"
-    assert df["text"][1] == "b"
-    assert df["text"][2] == "c"
-    assert df["text"][3] == "d"
-
-
-@pytest.mark.parametrize("skip_rows", [0, 4])
-@pytest.mark.parametrize("header_row", [0, 2])
-def test_csv_reader_skiprows_header(skip_rows, header_row):
-    names = ["float_point", "integer"]
-    dtypes = ["float64", "int64"]
-    lines = [
-        ",".join(names),
-        "1.2, 1",
-        "2.3, 2",
-        "3.4, 3",
-        "4.5, 4",
-        "5.6, 5",
-        "6.7, 6",
-    ]
-    buffer = "\n".join(lines)
-
-    cu_df = read_csv(
-        StringIO(buffer), dtype=dtypes, skiprows=skip_rows, header=header_row
-    )
-    pd_df = pd.read_csv(
-        StringIO(buffer), skiprows=skip_rows, header=header_row
-    )
-
-    assert cu_df.shape == pd_df.shape
-    assert list(cu_df.columns.values) == list(pd_df.columns.values)
-
-
-def test_csv_reader_dtype_inference():
-    names = ["float_point", "integer"]
-    lines = [
-        ",".join(names),
-        "1.2,1",
-        "2.3,2",
-        "3.4,3",
-        "4.5,4",
-        "5.6,5",
-        "6.7,6",
-    ]
-    buffer = "\n".join(lines)
-    cu_df = read_csv(StringIO(buffer))
-    pd_df = pd.read_csv(StringIO(buffer))
-
-    assert cu_df.shape == pd_df.shape
-    assert list(cu_df.columns.values) == list(pd_df.columns.values)
-
-
-def test_csv_reader_dtype_inference_whitespace():
-    names = ["float_point", "integer"]
-    lines = [
-        ",".join(names),
-        "  1.2,    1",
-        "2.3,2    ",
-        "  3.4,   3",
-        " 4.5,4",
-        "5.6,  5",
-        " 6.7,6 ",
-    ]
-    buffer = "\n".join(lines)
-    cu_df = read_csv(StringIO(buffer))
-    pd_df = pd.read_csv(StringIO(buffer))
-
-    assert cu_df.shape == pd_df.shape
-    assert list(cu_df.columns.values) == list(pd_df.columns.values)
-
-
-def test_csv_reader_empty_dataframe():
-    dtypes = ["float64", "int64"]
-    buffer = "float_point, integer"
-
-    # should work fine with dtypes
-    df = read_csv(StringIO(buffer), dtype=dtypes)
-    assert df.shape == (0, 2)
-    assert all(df.dtypes == ["float64", "int64"])
-
-    # should default to string columns without dtypes
-    df = read_csv(StringIO(buffer))
-    assert df.shape == (0, 2)
-    assert all(df.dtypes == ["object", "object"])
-
-
-def test_csv_reader_filenotfound(tmpdir):
-    fname = "non-existing-filename.csv"
-
-    # should raise an error
-    with pytest.raises(FileNotFoundError):
-        read_csv(str(fname))
-
-    # should raise an error
-    dname = tmpdir.mkdir("gdf_csv")
-    with pytest.raises(FileNotFoundError):
-        read_csv(str(dname))
-
-
-@pytest.mark.parametrize(
-    "src",
-    [
-        lambda path: str(path),
-        lambda path: path,
-        lambda path: BytesIO(path.read_bytes()),
-        lambda path: StringIO(path.read_text()),
-        lambda path: path.as_uri(),
-    ],
-    ids=["filepath", "pathlib.Path", "ByteIO", "StringIO", "url"],
-)
-def test_csv_reader_filepath_or_buffer(tmp_path, src):
-    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=np.int32)
-    csv_path = tmp_path / "tmp.csv"
-    df.to_csv(csv_path, index=False, header=False)
-    expect = pd.read_csv(csv_path)
-    got = cudf.read_csv(src(csv_path))
-
-    assert_eq(expect, got)
-
-
-def test_small_zip(tmp_path):
-    df = pd.DataFrame(
-        {
-            "a": [1997] * 2,
-            "b": ["Ford"] * 2,
-            "c": ["Super, luxurious truck"] * 2,
-        }
-    )
-
-    fname = tmp_path / "small_zip_file.zip"
-    df.to_csv(fname, index=False)
-
-    got = cudf.read_csv(fname)
-    assert_eq(df, got)
-
-
-def test_csv_reader_carriage_return():
-    rows = 100
-    names = ["int_row", "int_double_row"]
-    buffer = ",".join(names) + "\r\n"
-    for row in range(rows):
-        buffer += str(row) + ", " + str(2 * row) + "\r\n"
-
-    df = read_csv(StringIO(buffer))
-    expect = cudf.DataFrame(
-        {"int_row": cp.arange(rows), "int_double_row": cp.arange(rows) * 2}
-    )
-
-    assert len(df) == rows
-    assert_eq(expect, df)
-
-
-def test_csv_reader_tabs():
-    names = ["float_point", "integer", "date"]
-    lines = [
-        ",".join(names),
-        "1.2,\t12,     \t11/22/1995",
-        "3.4\t,\t34\t,\t 01/01/2001",
-        "\t 5.6,56 \t, 12/12/1970",
-        "\t7.8 , 78\t,06/15/2018 \t",
-    ]
-    buffer = "\n".join(lines)
-
-    df = read_csv(StringIO(buffer), parse_dates=["date"])
-
-    assert df.shape == (4, 3)
-
-    floats = [1.2, 3.4, 5.6, 7.8]
-    ints = [12, 34, 56, 78]
-    dates = [
-        "1995-11-22T00:00:00.000000000",
-        "2001-01-01T00:00:00.000000000",
-        "1970-12-12T00:00:00.000000000",
-        "2018-06-15T00:00:00.000000000",
-    ]
-    np.testing.assert_allclose(floats, df["float_point"].to_numpy())
-    np.testing.assert_allclose(ints, df["integer"].to_numpy())
-    for row in range(4):
-        assert str(df["date"][row]) == dates[row]
-
-
-@pytest.mark.parametrize("segment_bytes", [10000, 19999, 30001, 36000])
-def test_csv_reader_byte_range(tmp_path, segment_bytes):
-    fname = tmp_path / "tmp_csvreader_file16.csv"
-
-    names = ["int1", "int2"]
-
-    rows = 10000
-    with open(str(fname), "w") as fp:
-        for i in range(rows):
-            fp.write(str(i) + ", " + str(2 * i) + " \n")
-    file_size = os.stat(str(fname)).st_size
-
-    ref_df = read_csv(str(fname), names=names).to_pandas()
-
-    dfs = []
-    for segment in range((file_size + segment_bytes - 1) // segment_bytes):
-        dfs.append(
-            read_csv(
-                str(fname),
-                names=names,
-                byte_range=(segment * segment_bytes, segment_bytes),
-            )
-        )
-    df = cudf.concat(dfs).to_pandas()
-
-    assert list(df["int1"]) == list(ref_df["int1"])
-    assert list(df["int2"]) == list(ref_df["int2"])
-
-
-def test_csv_reader_byte_range_type_corner_case(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file17.csv"
-
-    cudf.datasets.timeseries(
-        start="2000-01-01",
-        end="2000-01-02",
-        dtypes={"name": str, "id": int, "x": float, "y": float},
-    ).to_csv(fname, chunksize=100000)
-
-    byte_range = (2_147_483_648, 0)
-    with pytest.raises(ValueError, match="Invalid byte range offset"):
-        cudf.read_csv(fname, byte_range=byte_range, header=None)
-
-
-@pytest.mark.parametrize("segment_bytes", [10, 19, 31, 36])
-def test_csv_reader_byte_range_strings(segment_bytes):
-    names = ["strings"]
-    buffer = "\n".join('"' + str(x) + '"' for x in range(1, 100))
-    file_size = len(buffer)
-
-    ref_df = read_csv(StringIO(buffer), names=names).to_pandas()
-
-    dfs = []
-    for segment in range((file_size + segment_bytes - 1) // segment_bytes):
-        dfs.append(
-            read_csv(
-                StringIO(buffer),
-                names=names,
-                byte_range=(segment * segment_bytes, segment_bytes),
-            )
-        )
-    df = cudf.concat(dfs).to_pandas()
-
-    assert list(df["strings"]) == list(ref_df["strings"])
-
-
-@pytest.mark.parametrize(
-    "header_row, skip_rows, skip_blanks",
-    [
-        (1, 0, True),
-        ("infer", 2, True),
-        (1, 4, True),
-        (3, 0, False),
-        ("infer", 5, False),
-    ],
-)
-@pytest.mark.parametrize("lineterminator", ["\n", "\r\n"])
-def test_csv_reader_blanks_and_comments(
-    skip_rows, header_row, skip_blanks, lineterminator
-):
-    lines = [
-        "# first comment line",
-        lineterminator,
-        "# third comment line",
-        "1,2,3",
-        "4,5,6",
-        "7,8,9",
-        lineterminator,
-        "# last comment line",
-        lineterminator,
-        "1,1,1",
-    ]
-    buffer = lineterminator.join(lines)
-
-    cu_df = read_csv(
-        StringIO(buffer),
-        comment="#",
-        header=header_row,
-        skiprows=skip_rows,
-        skip_blank_lines=skip_blanks,
-    )
-    pd_df = pd.read_csv(
-        StringIO(buffer),
-        comment="#",
-        header=header_row,
-        skiprows=skip_rows,
-        skip_blank_lines=skip_blanks,
-    )
-
-    assert cu_df.shape == pd_df.shape
-    assert list(cu_df.columns.values) == list(pd_df.columns.values)
-
-
-def test_csv_reader_prefix():
-    lines = ["1, 1, 1, 1"]
-    buffer = "\n".join(lines)
-
-    prefix_str = "a_prefix"
-    df = read_csv(StringIO(buffer), header=None, prefix=prefix_str)
-
-    column_names = list(df.columns.values)
-    for col in range(len(column_names)):
-        assert column_names[col] == prefix_str + str(col)
-
-
-def test_csv_reader_delim_whitespace():
-    buffer = "1    2  3\n4  5 6"
-
-    # with header row
-    with pytest.warns(FutureWarning):
-        cu_df = read_csv(StringIO(buffer), delim_whitespace=True)
-    with expect_warning_if(PANDAS_GE_220):
-        pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True)
-    assert_eq(pd_df, cu_df)
-
-    # without header row
-    with pytest.warns(FutureWarning):
-        cu_df = read_csv(StringIO(buffer), delim_whitespace=True, header=None)
-    with expect_warning_if(PANDAS_GE_220):
-        pd_df = pd.read_csv(
-            StringIO(buffer), delim_whitespace=True, header=None
-        )
-    assert pd_df.shape == cu_df.shape
-
-    # should raise an error if used with delimiter or sep
-    with pytest.raises(ValueError):
-        with pytest.warns(FutureWarning):
-            read_csv(StringIO(buffer), delim_whitespace=True, delimiter=" ")
-    with pytest.raises(ValueError):
-        with pytest.warns(FutureWarning):
-            read_csv(StringIO(buffer), delim_whitespace=True, sep=" ")
-
-
-def test_csv_reader_unnamed_cols():
-    # first and last columns are unnamed
-    buffer = ",1,2,3,\n4,5,6,7,8"
-
-    cu_df = read_csv(StringIO(buffer))
-    pd_df = pd.read_csv(StringIO(buffer))
-
-    assert all(pd_df.columns == cu_df.columns)
-    assert pd_df.shape == cu_df.shape
-
-
-def test_csv_reader_header_quotation():
-    buffer = '"1,,1","2,\n,2",3\n+4,+5,+6'
-
-    cu_df = read_csv(StringIO(buffer))
-    pd_df = pd.read_csv(StringIO(buffer))
-    assert cu_df.shape == (1, 3)
-    assert_eq(pd_df, cu_df)
-
-    # test cases that fail with pandas
-    buffer_pd_fail = '"1,one," , ",2,two" ,3\n4,5,6'
-    cu_df = read_csv(StringIO(buffer_pd_fail))
-    assert cu_df.shape == (1, 3)
-
-
-def test_csv_reader_oversized_byte_range():
-    buffer = "a,b,c,d,e\n4,5,6,7,8"
-
-    cu_df = read_csv(StringIO(buffer), byte_range=(0, 1024))
-    pd_df = pd.read_csv(StringIO(buffer))
-
-    assert all(pd_df.columns == cu_df.columns)
-    assert pd_df.shape == cu_df.shape
-
-
-def test_csv_reader_index_col():
-    buffer = "0,1,2\n3,4,5\n6,7,8"
-    names = ["int1", "int2", "int3"]
-
-    # using a column name
-    cu_df = read_csv(StringIO(buffer), names=names, index_col="int1")
-    pd_df = pd.read_csv(StringIO(buffer), names=names, index_col="int1")
-    assert_eq(pd_df, cu_df)
-
-    # using a column index
-    cu_df = read_csv(StringIO(buffer), header=None, index_col=0)
-    pd_df = pd.read_csv(StringIO(buffer), header=None, index_col=0)
-    assert_eq(cu_df.index, pd_df.index)
-
-    # using a column index with names
-    cu_df = read_csv(StringIO(buffer), header=None, index_col=0, names=names)
-    pd_df = pd.read_csv(
-        StringIO(buffer), header=None, index_col=0, names=names
-    )
-    assert_eq(cu_df.index, pd_df.index)
-
-    # passing False to avoid using a column as index (no-op in cuDF)
-    cu_df = read_csv(StringIO(buffer), header=None, index_col=False)
-    pd_df = pd.read_csv(StringIO(buffer), header=None, index_col=False)
-    assert_eq(cu_df.index, pd_df.index)
-
-
-@pytest.mark.parametrize("index_name", [None, "custom name", 124])
-@pytest.mark.parametrize("index_col", [None, 0, "a"])
-def test_csv_reader_index_names(index_name, index_col):
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [10, 11, 12]}, index=["AB", "CD", "EF"]
-    )
-    pdf.index.name = index_name
-
-    buffer = pdf.to_csv()
-    actual = cudf.read_csv(StringIO(buffer), index_col=index_col)
-    expected = pd.read_csv(StringIO(buffer), index_col=index_col)
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "names", [["a", "b", "c"], [416, 905, 647], range(3), None]
-)
-def test_csv_reader_column_names(names):
-    buffer = "0,1,2\n3,4,5\n6,7,8"
-
-    df = read_csv(StringIO(buffer), names=names)
-    if names is None:
-        assert list(df) == ["0", "1", "2"]
-    else:
-        assert list(df) == list(names)
-
-
-def test_csv_reader_repeated_column_name():
-    buffer = """A,A,A.1,A,A.2,A,A.4,A,A
-                1,2,3.1,4,a.2,a,a.4,a,a
-                2,4,6.1,8,b.2,b,b.4,b,b"""
-
-    # pandas and cudf to have same repeated column names
-    pdf = pd.read_csv(StringIO(buffer))
-    gdf = cudf.read_csv(StringIO(buffer))
-    assert_eq(pdf.columns, gdf.columns)
-
-
-def test_csv_reader_bools_false_positives():
-    # values that are equal to ["True", "TRUE", "False", "FALSE"]
-    # when using ints to detect bool values
-    items = [3977, 4329, 24015, 27567]
-
-    buffer = "\n".join(str(i) for i in items)
-
-    df = read_csv(StringIO(buffer), header=None, dtype=["int32"])
-
-    np.testing.assert_array_equal(items, df["0"].to_numpy())
-
-
-def test_csv_reader_aligned_byte_range(tmp_path):
-    fname = tmp_path / "tmp_csvreader_file19.csv"
-    nelem = 1000
-
-    input_df = pd.DataFrame(
-        {"key": np.arange(0, nelem), "zeros": np.zeros(nelem)}
-    )
-    input_df.to_csv(fname)
-
-    df = cudf.read_csv(str(fname), byte_range=(0, 4096))
-    # read_csv call above used to crash; the assert below is not crucial
-    assert np.count_nonzero(df["zeros"].to_pandas().values) == 0
-
-
-@pytest.mark.parametrize(
-    "pdf_dtype, gdf_dtype",
-    [(None, None), ("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
-)
-def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
-    lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF"]
-    values = [int(hex_int, 16) for hex_int in lines]
-
-    buffer = "\n".join(lines)
-
-    if gdf_dtype is not None:
-        # require explicit `hex` dtype to parse hexadecimals
-        pdf = pd.DataFrame(data=values, dtype=pdf_dtype, columns=["hex_int"])
-        gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
-        np.testing.assert_array_equal(
-            pdf["hex_int"], gdf["hex_int"].to_numpy()
-        )
-    else:
-        # otherwise, dtype inference returns as object (string)
-        pdf = pd.read_csv(StringIO(buffer), names=["hex_int"])
-        gdf = read_csv(StringIO(buffer), names=["hex_int"])
-        assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "np_dtype, gdf_dtype",
-    [("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
-)
-def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):
-    # This tests values which cause an overflow warning that will become an
-    # error in pandas. NumPy wraps the overflow silently up to the bounds of a
-    # signed int64.
-    lines = [
-        "0x0",
-        "-0x1000",
-        "0xfedcba",
-        "0xABCDEF",
-        "0xaBcDeF",
-        "0x9512c20b",
-        "0x7fffffff",
-        "0x7fffffffffffffff",
-        "-0x8000000000000000",
-    ]
-    values = [int(hex_int, 16) for hex_int in lines]
-    buffer = "\n".join(lines)
-
-    gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
-
-    expected = np.array(values).astype(np_dtype)
-    actual = gdf["hex_int"].to_numpy()
-    np.testing.assert_array_equal(expected, actual)
-
-
-@pytest.mark.parametrize("quoting", [0, 1, 2, 3])
-def test_csv_reader_pd_consistent_quotes(quoting):
-    names = ["text"]
-    dtypes = ["str"]
-    lines = ['"a"', '"b ""c"" d"', '"f!\n."']
-
-    buffer = "\n".join(lines)
-
-    gd_df = read_csv(
-        StringIO(buffer), names=names, dtype=dtypes, quoting=quoting
-    )
-    pd_df = pd.read_csv(StringIO(buffer), names=names, quoting=quoting)
-
-    assert_eq(pd_df, gd_df)
-
-
-def test_read_csv_names_header_combination():
-    pdf = pd.DataFrame(
-        {
-            "firstname": ["Emma", "Ava", "Sophia"],
-            "lastname": ["Olivia", "Isabella", "Charlotte"],
-            "gender": ["F", "F", "F"],
-        }
-    )
-    buffer = pdf.to_csv(header=True, index=False)
-    names = pdf.columns
-
-    gdf = read_csv(StringIO(buffer), names=names, header=0)
-    assert_eq(pdf, gdf)
-
-    gdf = read_csv(StringIO(buffer), header=0)
-    assert_eq(pdf, gdf)
-
-    gdf = read_csv(StringIO(buffer))
-    assert_eq(pdf, gdf)
-
-
-def test_csv_reader_scientific_type_detection():
-    buffer = """1.,1.1,-1.1,1E1,1e1,-1e1,-1e-1,1e-1,1.1e1,1.1e-1,-1.1e-1,-1.1e1
-                +1.1,1E+1,1e+1,+1e1,+1e-1,1e-1,+1.1e1,1.1e+1,+1.1e+1,+1.1e1"""
-    expected = [
-        1.0,
-        1.1,
-        -1.1,
-        10.0,
-        10.0,
-        -10,
-        -0.1,
-        0.1,
-        11,
-        0.11,
-        -0.11,
-        -11,
-        1.1,
-        10.0,
-        10.0,
-        10,
-        0.1,
-        0.1,
-        11,
-        11,
-        11,
-        11,
-    ]
-
-    df = read_csv(StringIO(buffer), header=None)
-
-    for dt in df.dtypes:
-        assert dt == "float64"
-    for col in df:
-        assert np.isclose(df[col][0], expected[int(col)])
-
-
-@pytest.mark.parametrize("lineterminator", ["\n", "\r\n"])
-def test_csv_blank_first_row(lineterminator):
-    lines = ["colA,colB", "", "1, 1.1", "2, 2.2"]
-    buffer = lineterminator.join(lines)
-
-    cu_df = read_csv(StringIO(buffer))
-
-    assert cu_df.shape == (2, 2)
-    assert all(cu_df.columns == ["colA", "colB"])
-
-
-@pytest.mark.parametrize("contents", ["", "\n"])
-def test_csv_empty_file(tmp_path, contents):
-    fname = tmp_path / "test_csv_empty_file.csv"
-    with open(fname, "w") as f:
-        f.write(contents)
-
-    col_names = ["col1", "col2", "col3", "col4"]
-    in_dtypes = ["int", "str", "float", "short"]
-    out_dtypes = ["int64", "object", "float64", "int16"]
-
-    # Empty dataframe if no columns names specified or inferred
-    df = read_csv(str(fname))
-    assert len(df.columns) == 0
-
-    # No row dataframe if columns names are specified or inferred
-    df = read_csv(str(fname), dtype=in_dtypes, names=col_names)
-    assert all(df.columns == col_names)
-    assert list(df.dtypes) == out_dtypes
-
-
-@pytest.mark.parametrize("contents", ["", "\n"])
-def test_csv_empty_buffer(contents):
-    col_names = ["col1", "col2", "col3", "col4"]
-    in_dtypes = ["int", "str", "float", "short"]
-    out_dtypes = ["int64", "object", "float64", "int16"]
-
-    # Empty dataframe if no columns names specified or inferred
-    df = read_csv(StringIO(contents))
-    assert len(df.columns) == 0
-
-    # No row dataframe if columns names are specified or inferred
-    df = read_csv(StringIO(contents), dtype=in_dtypes, names=col_names)
-    assert all(df.columns == col_names)
-    assert list(df.dtypes) == out_dtypes
-
-
-@pytest.mark.parametrize(
-    "dtype", [["short", "float", "int"], {"A": "short", "C": "int"}]
-)
-def test_csv_reader_partial_dtype(dtype):
-    names_df = read_csv(
-        StringIO("0,1,2"),
-        names=["A", "B", "C"],
-        dtype=dtype,
-        usecols=["A", "C"],
-    )
-    header_df = read_csv(
-        StringIO('"A","B","C"\n0,1,2'), dtype=dtype, usecols=["A", "C"]
-    )
-
-    assert_eq(names_df, header_df)
-    assert all(names_df.dtypes == ["int16", "int64"])
-
-
-def test_csv_writer_file_handle(tmp_path):
-    df = pd.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
-    gdf = cudf.from_pandas(df)
-
-    gdf_df_fname = tmp_path / "gdf_df_1.csv"
-    with open(gdf_df_fname, "w") as f:
-        gdf.to_csv(path_or_buf=f, index=False)
-    assert os.path.exists(gdf_df_fname)
-
-    gdf2 = pd.read_csv(gdf_df_fname)
-    assert_eq(gdf, gdf2)
-
-
-def test_csv_writer_file_append(tmp_path):
-    gdf1 = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
-    gdf2 = cudf.DataFrame({"a": [4, 5, 6], "b": ["foo", "bar", "baz"]})
-
-    gdf_df_fname = tmp_path / "gdf_df_append.csv"
-    with open(gdf_df_fname, "w") as f:
-        gdf1.to_csv(f, index=False)
-    with open(gdf_df_fname, "a") as f:
-        gdf2.to_csv(f, header=False, index=False)
-
-    result = cudf.read_csv(gdf_df_fname)
-    expected = cudf.concat([gdf1, gdf2], ignore_index=True)
-    assert_eq(result, expected, check_index_type=True)
-
-
-def test_csv_writer_buffer():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["xxx", "yyyy", "zzzzz"]})
-
-    buffer = BytesIO()
-    gdf.to_csv(buffer, index=False)
-
-    result = cudf.read_csv(buffer)
-    assert_eq(result, gdf)
-
-
-def test_csv_writer_numeric_data(dtype, numeric_dataframe, tmp_path):
-    pdf_df_fname = tmp_path / "pdf_df_1.csv"
-    gdf_df_fname = tmp_path / "gdf_df_1.csv"
-
-    df = numeric_dataframe.astype(dtype)
-    gdf = cudf.from_pandas(df)
-    df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
-    gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
-
-    assert os.path.exists(pdf_df_fname)
-    assert os.path.exists(gdf_df_fname)
-
-    expect = pd.read_csv(pdf_df_fname)
-    got = pd.read_csv(gdf_df_fname)
-    assert_eq(expect, got)
-
-
-def test_csv_writer_datetime_data(datetime_dataframe, tmp_path):
-    pdf_df_fname = tmp_path / "pdf_df_2.csv"
-    gdf_df_fname = tmp_path / "gdf_df_2.csv"
-
-    df = datetime_dataframe
-    gdf = cudf.from_pandas(df)
-    df.to_csv(path_or_buf=pdf_df_fname, index=False, lineterminator="\n")
-    gdf.to_csv(path_or_buf=gdf_df_fname, index=False)
-
-    assert os.path.exists(pdf_df_fname)
-    assert os.path.exists(gdf_df_fname)
-
-    expect = pd.read_csv(pdf_df_fname)
-    got = pd.read_csv(gdf_df_fname)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("lineterminator", ["\r", "\n", "\t", np.str_("\n")])
-@pytest.mark.parametrize("sep", [",", "/", np.str_(",")])
-def test_csv_writer_terminator_sep(lineterminator, sep, cudf_mixed_dataframe):
-    df = cudf_mixed_dataframe
-
-    buffer = BytesIO()
-    df.to_csv(buffer, lineterminator=lineterminator, sep=sep, index=False)
-
-    got = read_csv(buffer, lineterminator=lineterminator, sep=sep)
-    assert_eq(df, got)
-
-
-@pytest.mark.parametrize(
-    "lineterminator", ["\r\n", "ABC", "\t\t", np.str_("\r\n")]
-)
-def test_csv_writer_multichar_terminator(lineterminator, cudf_mixed_dataframe):
-    df = cudf_mixed_dataframe
-
-    default_terminator_csv = StringIO()
-    df.to_csv(default_terminator_csv)
-
-    # Need to check manually since readers don't support
-    # multicharacter line terminators
-    expected = default_terminator_csv.getvalue().replace("\n", lineterminator)
-
-    buffer = StringIO()
-    df.to_csv(buffer, lineterminator=lineterminator)
-    got = buffer.getvalue()
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "columns",
-    [
-        ["Date", "Float"],
-        ["Integer2", "Float", "Date", "Integer", "String", "Boolean"],
-        None,
-    ],
-)
-@pytest.mark.parametrize("header", [True, False])
-@pytest.mark.parametrize("index", [True, False])
-@pytest.mark.parametrize("bool_box", [bool, np.bool_])
-def test_csv_writer_column_and_header_options(
-    columns, header, index, bool_box, pd_mixed_dataframe
-):
-    header = bool_box(header)
-    index = bool_box(index)
-    pdf = pd_mixed_dataframe
-    df = cudf.from_pandas(pdf)
-
-    cudf_buffer = BytesIO()
-    df.to_csv(cudf_buffer, columns=columns, header=header, index=index)
-    pd_buffer = BytesIO()
-    pdf.to_csv(pd_buffer, columns=columns, header=header, index=index)
-
-    expected = cudf.read_csv(pd_buffer, header=0 if header else None)
-    got = cudf.read_csv(cudf_buffer, header=0 if header else None)
-
-    expected_column_cnt = (1 if index else 0) + (
-        len(columns) if columns else pdf.shape[1]
-    )
-    assert_eq(expected_column_cnt, got.shape[1])
-    assert_eq(expected, got)
-
-
-def test_csv_writer_empty_columns_parameter(cudf_mixed_dataframe):
-    write_str = cudf_mixed_dataframe.to_csv(columns=[], index=False)
-    assert_eq(write_str, "\n")
-
-
-def test_csv_writer_multiindex(tmp_path):
-    pdf_df_fname = tmp_path / "pdf_df_3.csv"
-    gdf_df_fname = tmp_path / "gdf_df_3.csv"
-
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame(
-        {
-            "a": rng.integers(0, 5, 20),
-            "b": rng.integers(0, 5, 20),
-            "c": range(20),
-            "d": rng.random(20),
-        }
-    )
-    gdg = gdf.groupby(["a", "b"]).mean()
-    pdg = gdg.to_pandas()
-    pdg.to_csv(pdf_df_fname)
-    gdg.to_csv(gdf_df_fname)
-
-    assert os.path.exists(pdf_df_fname)
-    assert os.path.exists(gdf_df_fname)
-
-    expect = pd.read_csv(pdf_df_fname)
-    got = pd.read_csv(gdf_df_fname)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("chunksize", [None, 2, 1000])
-def test_csv_writer_chunksize(chunksize, numeric_dataframe, dtype):
-    cu_df = cudf.from_pandas(numeric_dataframe.astype(dtype))
-
-    buffer = BytesIO()
-    cu_df.to_csv(buffer, chunksize=chunksize, index=False)
-
-    got = cudf.read_csv(buffer, dtype=[dtype])
-    assert_eq(cu_df, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"vals": [1, 2, 3]},
-        {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]},
-        {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]},
-    ],
-)
-def test_to_csv_empty_filename(data):
-    df = cudf.DataFrame(data)
-    pdf = df.to_pandas()
-
-    actual = df.to_csv()
-    expected = pdf.to_csv()
-
-    assert actual == expected
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"vals": [1, 2, 3]},
-        {"vals1": [1, 2, 3], "vals2": ["hello", "rapids", "cudf"]},
-        {"vals1": [None, 2.0, 3.0], "vals2": ["hello", "rapids", None]},
-    ],
-)
-def test_to_csv_StringIO(data):
-    df = cudf.DataFrame(data)
-    cudf_io = StringIO()
-    pandas_io = StringIO()
-
-    pdf = df.to_pandas()
-
-    df.to_csv(cudf_io)
-    pdf.to_csv(pandas_io)
-
-    cudf_io.seek(0)
-    pandas_io.seek(0)
-
-    assert cudf_io.read() == pandas_io.read()
-
-
-def test_csv_writer_empty_dataframe(tmp_path):
-    df_fname = tmp_path / "gdf_df_5.csv"
-    gdf = cudf.DataFrame({"float_point": [], "integer": []})
-    gdf["float_point"] = gdf["float_point"].astype("float")
-    gdf["integer"] = gdf["integer"].astype("int")
-
-    gdf.to_csv(df_fname, index=False)
-
-    df = cudf.read_csv(df_fname)
-
-    assert df.shape == (0, 2)
-    assert all(df.dtypes == ["object", "object"])
-
-
-def test_csv_write_chunksize_corner_case(tmp_path):
-    # With this num of rows and chunksize
-    # libcudf splits table such a way that it
-    # will end up creating an empty table slice
-    # which caused the issue 5588.
-    df_fname = tmp_path / "gdf_df_17.csv"
-    df = cudf.DataFrame({"a": np.arange(10_000)})
-    df.to_csv(df_fname, chunksize=1000, index=False)
-    got = cudf.read_csv(df_fname)
-
-    assert_eq(df, got)
-
-
-def test_csv_write_no_caller_manipulation():
-    df = cudf.DataFrame({"a": [1, 2, 3]})
-    df_copy = df.copy(deep=True)
-    _ = df.to_csv(index=True)
-    assert_eq(df, df_copy)
-
-
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame({"a": [1, 2, 3], "": [10, 20, 40]}),
-        pd.DataFrame({"": [10, 20, 40], "a": [1, 2, 3]}),
-        pd.DataFrame(
-            {"a": [1, 2, 3], "": [10, 20, 40]},
-            index=pd.Index(["a", "z", "v"], name="custom name"),
-        ),
-    ],
-)
-@pytest.mark.parametrize("index", [True, False])
-@pytest.mark.parametrize("columns", [["a"], [""], None])
-def test_csv_write_empty_column_name(pdf, index, columns):
-    df = cudf.DataFrame.from_pandas(pdf)
-    expected = pdf.to_csv(index=index, columns=columns)
-    actual = df.to_csv(index=index, columns=columns)
-
-    assert expected == actual
-
-
-@pytest.mark.parametrize("idx", [None, pd.Index([], name="index name")])
-@pytest.mark.parametrize("index", [True, False])
-def test_csv_write_empty_dataframe(idx, index):
-    df = cudf.DataFrame(index=idx)
-    pdf = df.to_pandas()
-
-    expected = pdf.to_csv(index=index)
-    actual = df.to_csv(index=index)
-
-    assert expected == actual
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(
-            {
-                "a": [1, 2, 3, None],
-                "": ["a", "v", None, None],
-                None: [12, 12, 32, 44],
-            }
-        ),
-        pd.DataFrame(
-            {
-                np.nan: [1, 2, 3, None],
-                "": ["a", "v", None, None],
-                None: [12, 12, 32, 44],
-            }
-        ),
-        pd.DataFrame({"": [1, None, 3, 4]}),
-        pd.DataFrame({None: [1, None, 3, 4]}),
-        pd.DataFrame(columns=[None, "", "a", "b"]),
-        pd.DataFrame(columns=[None]),
-        pd.DataFrame(columns=[""]),
-    ],
-)
-@pytest.mark.parametrize(
-    "na_rep", ["", "_NA_", "---", "_____CUSTOM_NA_REP______"]
-)
-def test_csv_write_dataframe_na_rep(df, na_rep):
-    gdf = cudf.from_pandas(df)
-
-    expected = df.to_csv(na_rep=na_rep)
-    actual = gdf.to_csv(na_rep=na_rep)
-
-    assert expected == actual
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "int",
-        "str",
-        "float",
-        np.int32,
-        np.dtype("float32"),
-        {"a": "int32", "b": "float64", "c": "uint8"},
-        int,
-        str,
-        object,
-    ],
-)
-def test_csv_reader_dtypes(dtype):
-    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n4,13,114\n"
-
-    expected = pd.read_csv(StringIO(buf), dtype=dtype)
-    actual = cudf.read_csv(StringIO(buf), dtype=dtype)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "dtype", ["Int64", "UInt32", {"a": "UInt64", "b": "Float64", "c": "Int32"}]
-)
-def test_csv_reader_nullable_dtypes(dtype):
-    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n4,13,114\n"
-
-    expected = pd.read_csv(StringIO(buf), dtype=dtype)
-    actual = cudf.read_csv(StringIO(buf), dtype=dtype)
-
-    assert_eq(expected, actual.to_pandas(nullable=True))
-
-
-@pytest.mark.parametrize(
-    "dtype", sorted(list(cudf.utils.dtypes.TIMEDELTA_TYPES))
-)
-def test_csv_reader_timedetla_dtypes(dtype):
-    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n43432423,13342,13243214\n"
-
-    expected = pd.read_csv(StringIO(buf)).astype(dtype)
-    actual = cudf.read_csv(StringIO(buf), dtype=dtype)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "dtype", sorted(list(cudf.utils.dtypes.DATETIME_TYPES))
-)
-def test_csv_reader_datetime_dtypes(dtype):
-    buf = "a,b,c\n1,10,111\n2,11,112\n3,12,113\n43432423,13342,13243214\n"
-
-    expected = pd.read_csv(StringIO(buf)).astype(dtype)
-    actual = cudf.read_csv(StringIO(buf), dtype=dtype)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        lambda: cudf.DataFrame(
-            {
-                "a": cudf.Series([1, 2, 3, 1, 2], dtype="category"),
-                "b": cudf.Series(["a", "c", "a", "b", "a"], dtype="category"),
-            }
-        ),
-        lambda: cudf.DataFrame(
-            {
-                "a": cudf.Series([1.1, 2, 3, 1.1, 2], dtype="category"),
-                "b": cudf.Series(
-                    [None, "c", None, "b", "a"], dtype="category"
-                ),
-            }
-        ),
-        lambda: cudf.DataFrame(
-            {
-                "b": cudf.Series(
-                    [1.1, 2, 3, 1.1, 2],
-                    dtype="category",
-                    index=cudf.CategoricalIndex(
-                        ["abc", "def", "ghi", "jkl", "xyz"]
-                    ),
-                )
-            }
-        ),
-    ],
-)
-def test_csv_writer_category(df):
-    df = df()
-    pdf = df.to_pandas()
-
-    expected = pdf.to_csv()
-    actual = df.to_csv()
-
-    assert expected == actual
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "category",
-        {"a": "category", "b": "str"},
-        {"b": "category"},
-        {"a": "category"},
-        {"a": pd.CategoricalDtype([1, 2])},
-        {"b": pd.CategoricalDtype([1, 2, 3])},
-        {"b": pd.CategoricalDtype(["b", "a"]), "a": "str"},
-        pd.CategoricalDtype(["a", "b"]),
-    ],
-)
-def test_csv_reader_category(dtype):
-    df = cudf.DataFrame({"a": [1, 2, 3, None], "b": ["a", "b", None, "c"]})
-    csv_buf = df.to_csv()
-
-    actual = cudf.read_csv(StringIO(csv_buf), dtype=dtype)
-    expected = pd.read_csv(StringIO(csv_buf), dtype=dtype)
-
-    assert_eq(expected, actual, check_dtype=True)
-
-
-def test_csv_writer_datetime_sep():
-    df = cudf.DataFrame(
-        {"a": cudf.Series([22343, 2323423, 234324234], dtype="datetime64[ns]")}
-    )
-    df["a"] = df["a"].astype("datetime64[s]")
-    expected = df.to_pandas().to_csv(date_format="%Y-%m-%dT%H:%M:%SZ", sep="-")
-    actual = df.to_csv(sep="-")
-    assert expected == actual
-
-
-def test_na_filter_empty_fields():
-    test_na = "TEST_NAN"
-    df = pd.DataFrame({"col0": ["valid", None, "also_valid", "", test_na]})
-    buffer = df.to_csv(index=False)
-
-    pdf = pd.read_csv(StringIO(buffer), na_filter=False)
-    gdf = cudf.read_csv(StringIO(buffer), na_filter=False)
-    assert_eq(pdf, gdf)
-
-    pdf = pd.read_csv(StringIO(buffer), keep_default_na=False)
-    gdf = cudf.read_csv(StringIO(buffer), keep_default_na=False)
-    assert_eq(pdf, gdf)
-
-    pdf = pd.read_csv(
-        StringIO(buffer), keep_default_na=False, na_values=test_na
-    )
-    gdf = cudf.read_csv(
-        StringIO(buffer), keep_default_na=False, na_values=test_na
-    )
-    assert_eq(pdf, gdf)
-
-
-def test_csv_sep_error():
-    pdf = pd.DataFrame({"a": [1, 2, 3]})
-    gdf = cudf.DataFrame({"a": [1, 2, 3]})
-    assert_exceptions_equal(
-        lfunc=pdf.to_csv,
-        rfunc=gdf.to_csv,
-        lfunc_args_and_kwargs=([], {"sep": "abc"}),
-        rfunc_args_and_kwargs=([], {"sep": "abc"}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.to_csv,
-        rfunc=gdf.to_csv,
-        lfunc_args_and_kwargs=([], {"sep": 1}),
-        rfunc_args_and_kwargs=([], {"sep": 1}),
-    )
-
-
-def test_to_csv_encoding_error():
-    # TODO: Remove this test once following
-    # issue is fixed: https://github.com/rapidsai/cudf/issues/2957
-    df = cudf.DataFrame({"a": ["你好", "test"]})
-    encoding = "utf-8-sig"
-    error_message = (
-        f"Encoding {encoding} is not supported. "
-        + "Currently, only utf-8 encoding is supported."
-    )
-    with pytest.raises(NotImplementedError, match=re.escape(error_message)):
-        df.to_csv("test.csv", encoding=encoding)
-
-
-def test_to_csv_compression_error():
-    df = cudf.DataFrame({"a": ["test"]})
-    compression = "snappy"
-    error_message = "Writing compressed csv is not currently supported in cudf"
-    with pytest.raises(NotImplementedError, match=re.escape(error_message)):
-        df.to_csv("test.csv", compression=compression)
-
-
-def test_empty_df_no_index():
-    actual = cudf.DataFrame({})
-    buffer = BytesIO()
-    actual.to_csv(buffer, index=False)
-
-    result = cudf.read_csv(buffer)
-
-    assert_eq(actual, result)
-
-
-def test_default_integer_bitwidth(
-    cudf_mixed_dataframe, default_integer_bitwidth
-):
-    # Test that integer columns in csv are _inferred_ as user specified
-    # bitwidth
-    buf = BytesIO()
-    cudf_mixed_dataframe.to_csv(buf)
-    buf.seek(0)
-    read = cudf.read_csv(buf)
-    assert read["Integer"].dtype == np.dtype(
-        f"i{default_integer_bitwidth // 8}"
-    )
-    assert read["Integer2"].dtype == np.dtype(
-        f"i{default_integer_bitwidth // 8}"
-    )
-
-
-def test_default_integer_bitwidth_partial(
-    cudf_mixed_dataframe, default_integer_bitwidth
-):
-    # Test that integer columns in csv are _inferred_ as user specified
-    # bitwidth
-    buf = BytesIO()
-    cudf_mixed_dataframe.to_csv(buf)
-    buf.seek(0)
-    read = cudf.read_csv(buf, dtype={"Integer": "int64"})
-    assert read["Integer"].dtype == np.dtype("i8")
-    assert read["Integer2"].dtype == np.dtype(
-        f"i{default_integer_bitwidth // 8}"
-    )
-
-
-@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
-def test_default_integer_bitwidth_extremes(
-    numeric_extremes_dataframe, default_integer_bitwidth
-):
-    # Test that integer columns in csv are _inferred_ as user specified
-    # bitwidth
-    buf = BytesIO()
-    cudf.DataFrame.from_pandas(numeric_extremes_dataframe).to_csv(buf)
-    buf.seek(0)
-    read = cudf.read_csv(buf)
-
-    assert read["int64"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
-    assert read["long"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
-    assert read["uint64"].dtype == np.dtype(
-        f"u{default_integer_bitwidth // 8}"
-    )
-
-
-def test_default_float_bitwidth(cudf_mixed_dataframe, default_float_bitwidth):
-    # Test that float columns in csv are _inferred_ as user specified
-    # bitwidth
-    buf = BytesIO()
-    cudf_mixed_dataframe.to_csv(buf)
-    buf.seek(0)
-    read = cudf.read_csv(buf)
-    assert read["Float"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
-
-
-def test_default_float_bitwidth_partial(default_float_bitwidth):
-    # Test that float columns in csv are _inferred_ as user specified
-    # bitwidth
-    read = cudf.read_csv(
-        StringIO("float1,float2\n1.0,2.0\n3.0,4.0"),
-        dtype={"float2": "float64"},
-    )
-    assert read["float1"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
-    assert read["float2"].dtype == np.dtype("f8")
-
-
-@pytest.mark.parametrize(
-    "usecols,names",
-    [
-        # selection using indices; only names of selected columns are specified
-        ([1, 2], ["b", "c"]),
-        # selection using indices; names of all columns are specified
-        ([1, 2], ["a", "b", "c"]),
-        # selection using indices; duplicates
-        ([2, 2], ["a", "b", "c"]),
-        # selection using indices; out of order
-        ([2, 1], ["a", "b", "c"]),
-        # selection using names
-        (["b"], ["a", "b", "c"]),
-        # selection using names; multiple columns
-        (["b", "c"], ["a", "b", "c"]),
-        # selection using names; duplicates
-        (["c", "c"], ["a", "b", "c"]),
-        # selection using names; out of order
-        (["c", "b"], ["a", "b", "c"]),
-    ],
-)
-def test_column_selection_plus_column_names(usecols, names):
-    lines = [
-        "num,datetime,text",
-        "123,2018-11-13T12:00:00,abc",
-        "456,2018-11-14T12:35:01,def",
-        "789,2018-11-15T18:02:59,ghi",
-    ]
-
-    buffer = "\n".join(lines) + "\n"
-
-    assert_eq(
-        pd.read_csv(StringIO(buffer), usecols=usecols, names=names),
-        cudf.read_csv(StringIO(buffer), usecols=usecols, names=names),
-    )
-
-
-def test_read_compressed_BOM(tmp_path):
-    buffer = 'int, string\n1, "a"\n2, "b"\n3, "c"\n'
-
-    fname = tmp_path / "tmp_csvreader_file20.gz"
-    with gzip.open(fname, "wt", encoding="utf-8") as f:
-        f.write(codecs.BOM_UTF8.decode("utf-8"))
-        f.write(buffer)
-
-    assert_eq(pd.read_csv(fname), cudf.read_csv(fname))
-
-
-def test_read_header_none_pandas_compat_column_type():
-    data = "1\n2\n"
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = cudf.read_csv(StringIO(data), header=None).columns
-    expected = pd.read_csv(StringIO(data), header=None).columns
-    pd.testing.assert_index_equal(result, expected, exact=True)
-
-
-@pytest.mark.parametrize("buffer", ["1", '"one"'])
-def test_read_single_unterminated_row(buffer):
-    gdf = cudf.read_csv(StringIO(buffer), header=None)
-    assert_eq(gdf.shape, (1, 1))
-
-
-@pytest.mark.parametrize("buffer", ["\n", "\r\n"])
-def test_read_empty_only_row(buffer):
-    gdf = cudf.read_csv(StringIO(buffer), header=None)
-    assert_eq(gdf.shape, (0, 0))
-
-
-def test_read_empty_only_row_custom_terminator():
-    gdf = cudf.read_csv(StringIO("*"), header=None, lineterminator="*")
-    assert_eq(gdf.shape, (0, 0))
-
-
-def test_empty_file_pandas_compat_raises(tmp_path):
-    empty_file = tmp_path / "empty.csv"
-    empty_file.touch()
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(pd.errors.EmptyDataError):
-            cudf.read_csv(StringIO())
-        with pytest.raises(pd.errors.EmptyDataError):
-            cudf.read_csv(empty_file)
-        with pytest.raises(pd.errors.EmptyDataError):
-            cudf.read_csv(str(empty_file))
diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py
deleted file mode 100644
index 04c4d55afe6..00000000000
--- a/python/cudf/cudf/tests/test_feather.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import os
-from string import ascii_letters
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-from cudf.testing._utils import NUMERIC_TYPES
-
-
-@pytest.fixture(params=[0, 10])
-def pdf(request):
-    rng = np.random.default_rng(seed=0)
-    types = [*NUMERIC_TYPES, "bool"]
-    nrows = request.param
-
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = pd.DataFrame(
-        {
-            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
-            for typ in types
-        }
-    )
-    # Delete the name of the column index, and rename the row index
-    test_pdf.columns.name = None
-    test_pdf.index.name = "index"
-
-    # Create non-numeric categorical data otherwise may get typecasted
-    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
-    test_pdf["col_category"] = pd.Series(data, dtype="category")
-
-    # Feather can't handle indexes properly
-    test_pdf = test_pdf.reset_index(drop=True)
-    test_pdf.index.name = None
-
-    return test_pdf
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.filterwarnings("ignore:Strings are not yet supported")
-@pytest.mark.parametrize(
-    "columns",
-    [["col_int8"], ["col_category"], ["col_int32", "col_float32"], None],
-)
-def test_feather_reader(pdf, columns, tmp_path):
-    feather_file = tmp_path / "test.feather"
-    pdf.to_feather(feather_file)
-    expect = pa.feather.read_table(feather_file, columns=columns).to_pandas()
-    got = (
-        cudf.read_feather(feather_file, columns=columns)
-        .to_arrow(preserve_index=False)
-        .to_pandas()
-    )
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_feather_writer(tmp_path, pdf):
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    pdf_fname = tmp_path / "pdf.feather"
-    gdf_fname = tmp_path / "gdf.feather"
-
-    pdf.to_feather(pdf_fname)
-    gdf.to_feather(gdf_fname)
-
-    assert os.path.exists(pdf_fname)
-    assert os.path.exists(gdf_fname)
-
-    expect = pa.feather.read_table(pdf_fname)
-    got = pa.feather.read_table(gdf_fname)
-
-    assert pa.Table.equals(expect, got)
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
deleted file mode 100644
index adcd3fff21d..00000000000
--- a/python/cudf/cudf/tests/test_json.py
+++ /dev/null
@@ -1,1467 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import copy
-import gzip
-import itertools
-import os
-from io import BytesIO, StringIO
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    TIMEDELTA_TYPES,
-    expect_warning_if,
-)
-
-
-def make_numeric_dataframe(nrows, dtype):
-    df = pd.DataFrame()
-    df["col1"] = np.arange(nrows, dtype=dtype)
-    df["col2"] = np.arange(1, 1 + nrows, dtype=dtype)
-    return df
-
-
-@pytest.fixture(params=[0, 1, 10, 100])
-def pdf(request):
-    rng = np.random.default_rng(seed=0)
-    types = NUMERIC_TYPES + DATETIME_TYPES + ["bool"]
-    nrows = request.param
-
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = pd.DataFrame(
-        {
-            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
-            for typ in types
-        }
-    )
-    # Delete the name of the column index, and rename the row index
-    test_pdf.columns.name = None
-    test_pdf.index.name = "test_index"
-
-    return test_pdf
-
-
-@pytest.fixture
-def gdf(pdf):
-    return cudf.DataFrame.from_pandas(pdf)
-
-
-@pytest.fixture(params=[0, 1, 10, 100])
-def gdf_writer_types(request):
-    # datetime64[us], datetime64[ns] are unsupported due to a bug in parser
-    types = [
-        *NUMERIC_TYPES,
-        "datetime64[s]",
-        "datetime64[ms]",
-        *TIMEDELTA_TYPES,
-        "bool",
-        "str",
-    ]
-    typer = {"col_" + val: val for val in types}
-    ncols = len(types)
-    nrows = request.param
-
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = cudf.DataFrame(
-        [list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)],
-        columns=pd.Index([f"col_{typ}" for typ in types]),
-    )
-
-    # Cast all the column dtypes to objects, rename them, and then cast to
-    # appropriate types
-    test_pdf = test_pdf.astype(typer)
-
-    return test_pdf
-
-
-index_params = [True, False]
-# tests limited to compressions formats supported by pandas and cudf: bz2, gzip, zip, zstd
-compression_params = ["bz2", "gzip", "zip", "zstd", None]
-orient_params = ["columns", "records", "table", "split"]
-params = itertools.product(index_params, compression_params, orient_params)
-
-
-@pytest.fixture(params=params)
-def json_files(request, tmp_path_factory, pdf):
-    index, compression, orient = request.param
-    if index is False and orient not in ("split", "table"):
-        pytest.skip(
-            "'index=False' is only valid when 'orient' is 'split' or 'table'"
-        )
-    if index is False and orient == "table":
-        pytest.skip("'index=False' isn't valid when 'orient' is 'table'")
-    if index is True and orient not in ("split", "table", "index", "columns"):
-        pytest.skip("'index=False' isn't valid when 'orient' is 'table'")
-    fname_df = tmp_path_factory.mktemp("json") / "test_df.json"
-    fname_series = tmp_path_factory.mktemp("json") / "test_series.json"
-    pdf.to_json(fname_df, index=index, compression=compression, orient=orient)
-    pdf["col_int32"].to_json(
-        fname_series, index=index, compression=compression, orient=orient
-    )
-    return (fname_df, fname_series, orient, compression)
-
-
-@pytest.mark.filterwarnings("ignore:Strings are not yet supported")
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_json_reader(json_files):
-    path_df, path_series, orient, compression = json_files
-    expect_df = pd.read_json(path_df, orient=orient, compression=compression)
-    got_df = cudf.read_json(path_df, orient=orient, compression=compression)
-    if len(expect_df) == 0:
-        expect_df = expect_df.reset_index(drop=True)
-        expect_df.columns = expect_df.columns.astype("object")
-    if len(got_df) == 0:
-        got_df = got_df.reset_index(drop=True)
-
-    assert_eq(expect_df, got_df, check_categorical=False)
-
-    # Only these orients are allowed for Series, but isn't enforced by Pandas
-    if orient in ("split", "records", "index"):
-        expect_series = pd.read_json(
-            path_series, orient=orient, compression=compression, typ="series"
-        )
-        got_series = cudf.read_json(
-            path_series, orient=orient, compression=compression, typ="series"
-        )
-        if len(expect_series) == 0:
-            expect_series = expect_series.reset_index(drop=True)
-        if len(got_df) == 0:
-            got_series = got_series.reset_index(drop=True)
-
-        assert_eq(expect_series, got_series)
-
-
-@pytest.mark.filterwarnings("ignore:Can't infer compression")
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_json_writer(tmpdir, pdf, gdf):
-    pdf_df_fname = tmpdir.join("pdf_df.json")
-    gdf_df_fname = tmpdir.join("gdf_df.json")
-
-    pdf.to_json(pdf_df_fname)
-    gdf.to_json(gdf_df_fname)
-
-    assert os.path.exists(pdf_df_fname)
-    assert os.path.exists(gdf_df_fname)
-
-    expect_df = pd.read_json(pdf_df_fname)
-    got_df = pd.read_json(gdf_df_fname)
-
-    assert_eq(expect_df, got_df)
-
-    for column in pdf.columns:
-        pdf_series_fname = tmpdir.join(column + "_" + "pdf_series.json")
-        gdf_series_fname = tmpdir.join(column + "_" + "gdf_series.json")
-
-        pdf[column].to_json(pdf_series_fname)
-        gdf[column].to_json(gdf_series_fname)
-
-        assert os.path.exists(pdf_series_fname)
-        assert os.path.exists(gdf_series_fname)
-
-        expect_series = pd.read_json(pdf_series_fname, typ="series")
-        got_series = pd.read_json(gdf_series_fname, typ="series")
-
-        assert_eq(expect_series, got_series)
-
-        # Make sure results align for regular strings, not just files
-        pdf_string = pdf[column].to_json()
-        gdf_string = pdf[column].to_json()
-        assert_eq(pdf_string, gdf_string)
-
-
-@pytest.mark.parametrize(
-    "lines", [True, False], ids=["lines=True", "lines=False"]
-)
-def test_cudf_json_writer(pdf, lines):
-    # removing datetime column because pandas doesn't support it
-    for col_name in pdf.columns:
-        if "datetime" in col_name:
-            pdf.drop(col_name, axis=1, inplace=True)
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    pdf_string = pdf.to_json(orient="records", lines=lines)
-    gdf_string = gdf.to_json(orient="records", lines=lines, engine="cudf")
-
-    assert_eq(pdf_string, gdf_string)
-
-    gdf_string = gdf.to_json(
-        orient="records", lines=lines, engine="cudf", rows_per_chunk=8
-    )
-
-    assert_eq(pdf_string, gdf_string)
-
-
-def test_cudf_json_writer_read(gdf_writer_types):
-    dtypes = {
-        col_name: col_name[len("col_") :]
-        for col_name in gdf_writer_types.columns
-    }
-    gdf_string = gdf_writer_types.to_json(
-        orient="records", lines=True, engine="cudf"
-    )
-    gdf2 = cudf.read_json(
-        StringIO(gdf_string),
-        lines=True,
-        engine="cudf",
-        dtype=dict(dtypes),
-    )
-    pdf2 = pd.read_json(StringIO(gdf_string), lines=True, dtype=dict(dtypes))
-
-    # Bug in pandas https://github.com/pandas-dev/pandas/issues/28558
-    if pdf2.empty:
-        pdf2.reset_index(drop=True, inplace=True)
-
-    # Pandas moved to consistent datetimes parsing format:
-    # https://pandas.pydata.org/docs/dev/whatsnew/v2.0.0.html#datetimes-are-now-parsed-with-a-consistent-format
-    for unit in ["s", "ms"]:
-        if f"col_datetime64[{unit}]" in pdf2.columns:
-            pdf2[f"col_datetime64[{unit}]"] = (
-                pd.to_datetime(pdf2[f"col_datetime64[{unit}]"], format="mixed")
-                .dt.tz_localize(None)
-                .astype(f"datetime64[{unit}]")
-            )
-    assert_eq(pdf2, gdf2)
-
-
-@pytest.mark.parametrize(
-    "jsonl_string, expected",
-    [
-        # fixed width
-        ("""{"a":10, "b":1.1}\n {"a":20, "b":2.1}\n""", None),
-        # simple list
-        ("""{"a":[1, 2, 3], "b":1.1}\n {"a":[]}\n""", None),
-        # simple struct
-        ("""{"a":{"c": 123 }, "b":1.1}\n {"a": {"c": 456}}\n""", None),
-        # list of lists
-        ("""{"a":[[], [1, 2], [3, 4]], "b":1.1}\n""", None),
-        ("""{"a":[null, [1, 2], [null, 4]], "b":1.1}\n""", None),
-        # list of structs
-        # error ("""{"a":[null, {}], "b":1.1}\n""", None),
-        (
-            """{"a":[null, {"L": 123}], "b":1.0}\n {"b":1.1}\n {"b":2.1}\n""",
-            None,
-        ),
-        (
-            """{"a":[{"L": 123}, null], "b":1.0}\n {"b":1.1}\n {"b":2.1}\n""",
-            None,
-        ),
-        # struct of lists
-        (
-            """{"a":{"L": [1, 2, 3]}, "b":1.1}\n {"a": {"L": [4, 5, 6]}}\n""",
-            None,
-        ),
-        ("""{"a":{"L": [1, 2, null]}, "b":1.1}\n {"a": {"L": []}}\n""", None),
-        # struct of structs
-        (
-            """{"a":{"L": {"M": 123}}, "b":1.1}
-               {"a": {"L": {"M": 456}}}\n""",
-            None,
-        ),
-        (
-            """{"a":{"L": {"M": null}}, "b":1.1}\n {"a": {"L": {}}}\n""",
-            """{"a":{"L": {}}, "b":1.1}\n {"a": {"L": {}}}\n""",
-        ),
-        # list of structs of lists
-        ("""{"a":[{"L": [1, 2, 3]}, {"L": [4, 5, 6]}], "b":1.1}\n""", None),
-        ("""{"a":[{"L": [1, 2, null]}, {"L": []}], "b":1.1}\n""", None),
-        # struct of lists of structs
-        ("""{"a":{"L": [{"M": 123}, {"M": 456}]}, "b":1.1}\n""", None),
-        (
-            """{"a":{"L": [{"M": null}, {}]}, "b":1.1}\n""",
-            """{"a":{"L": [{}, {}]}, "b":1.1}\n""",
-        ),
-        # empty structs
-        ("""{"A": null}\n {"A": {}}\n {}""", """{}\n{"A":{}}\n{}\n"""),
-        (
-            """{"A": {"B": null}}\n {"A": {"B": {}}}\n {"A": {}}""",
-            """{"A":{}}\n{"A":{"B":{}}}\n{"A":{}}\n""",
-        ),
-    ],
-)
-def test_cudf_json_roundtrip(jsonl_string, expected):
-    gdf = cudf.read_json(
-        StringIO(jsonl_string),
-        lines=True,
-        engine="cudf",
-        # dtype=dict(dtypes),
-    )
-    expected = jsonl_string if expected is None else expected
-    gdf_string = gdf.to_json(
-        orient="records", lines=True, engine="cudf", include_nulls=False
-    )
-    assert_eq(gdf_string, expected.replace(" ", ""))
-
-
-@pytest.mark.parametrize("sink", ["string", "file"])
-def test_cudf_json_writer_sinks(sink, tmp_path_factory):
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    target = None
-    if sink == "string":
-        target = StringIO()
-    elif sink == "file":
-        target = tmp_path_factory.mktemp("json") / "test_df.json"
-    df.to_json(target, engine="cudf")
-    if sink == "string":
-        assert (
-            target.getvalue() == '[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]'
-        )
-    elif sink == "file":
-        assert os.path.exists(target)
-        with open(target, "r") as f:
-            assert f.read() == '[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]'
-
-
-@pytest.fixture(
-    params=["string", "filepath", "pathobj", "bytes_io", "string_io", "url"]
-)
-def json_input(request, tmp_path_factory):
-    input_type = request.param
-    buffer = "[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9]\n"
-    fname = tmp_path_factory.mktemp("json") / "test_df.json"
-    if not os.path.isfile(fname):
-        with open(str(fname), "w") as fp:
-            fp.write(buffer)
-
-    if input_type == "string":
-        return buffer
-    if input_type == "filepath":
-        return str(fname)
-    if input_type == "pathobj":
-        return Path(fname)
-    if input_type == "bytes_io":
-        return BytesIO(buffer.encode())
-    if input_type == "string_io":
-        return StringIO(buffer)
-    if input_type == "url":
-        return Path(fname).as_uri()
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["auto", "cudf", "pandas"])
-def test_json_lines_basic(json_input, engine):
-    can_warn = isinstance(json_input, str) and not json_input.endswith(".json")
-    with expect_warning_if(can_warn):
-        cu_df = cudf.read_json(json_input, engine=engine, lines=True)
-    # io types must seek to the beginning before you can read again
-    if hasattr(json_input, "seek"):
-        json_input.seek(0)
-    with expect_warning_if(can_warn):
-        pd_df = pd.read_json(json_input, lines=True)
-
-    assert all(cu_df.dtypes == ["int64", "int64", "int64"])
-    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
-        assert str(cu_col) == str(pd_col)
-        np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["auto", "cudf", "pandas"])
-def test_nonexistent_json_correct_error(engine):
-    json_input = "doesnotexist.json"
-    with pytest.raises(FileNotFoundError):
-        cudf.read_json(json_input, engine=engine)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["auto", "cudf"])
-def test_json_lines_multiple(tmpdir, json_input, engine):
-    tmp_file1 = tmpdir.join("MultiInputs1.json")
-    tmp_file2 = tmpdir.join("MultiInputs2.json")
-
-    with expect_warning_if(
-        isinstance(json_input, str) and not json_input.endswith(".json")
-    ):
-        pdf = pd.read_json(json_input, lines=True)
-    pdf.to_json(tmp_file1, compression="infer", lines=True, orient="records")
-    pdf.to_json(tmp_file2, compression="infer", lines=True, orient="records")
-
-    cu_df = cudf.read_json([tmp_file1, tmp_file2], engine=engine, lines=True)
-    pd_df = pd.concat([pdf, pdf])
-
-    assert all(cu_df.dtypes == ["int64", "int64", "int64"])
-    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
-        assert str(cu_col) == str(pd_col)
-        np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize("engine", ["auto", "cudf"])
-def test_json_read_directory(tmpdir, json_input, engine):
-    with expect_warning_if(
-        isinstance(json_input, str) and not json_input.endswith(".json")
-    ):
-        pdf = pd.read_json(json_input, lines=True)
-    pdf.to_json(
-        tmpdir.join("MultiInputs1.json"),
-        compression="infer",
-        lines=True,
-        orient="records",
-    )
-    pdf.to_json(
-        tmpdir.join("MultiInputs2.json"),
-        compression="infer",
-        lines=True,
-        orient="records",
-    )
-    pdf.to_json(
-        tmpdir.join("MultiInputs3.json"),
-        compression="infer",
-        lines=True,
-        orient="records",
-    )
-
-    cu_df = cudf.read_json(tmpdir, engine=engine, lines=True)
-    pd_df = pd.concat([pdf, pdf, pdf])
-
-    assert all(cu_df.dtypes == ["int64", "int64", "int64"])
-    for cu_col, pd_col in zip(cu_df.columns, pd_df.columns, strict=True):
-        assert str(cu_col) == str(pd_col)
-        np.testing.assert_array_equal(pd_df[pd_col], cu_df[cu_col].to_numpy())
-
-
-def test_json_lines_byte_range(json_input):
-    # include the first row and half of the second row
-    # should parse the first two rows
-    will_warn = isinstance(json_input, str) and not json_input.endswith(
-        ".json"
-    )
-    with expect_warning_if(will_warn):
-        df = cudf.read_json(
-            copy.deepcopy(json_input), lines=True, byte_range=(0, 15)
-        )
-    assert df.shape == (2, 3)
-
-    # include half of the second row and half of the third row
-    # should parse only the third row
-    with expect_warning_if(will_warn):
-        df = cudf.read_json(
-            copy.deepcopy(json_input), lines=True, byte_range=(15, 10)
-        )
-    assert df.shape == (1, 3)
-
-    # include half of the second row and entire third row
-    # should parse only the third row
-    with expect_warning_if(will_warn):
-        df = cudf.read_json(
-            copy.deepcopy(json_input), lines=True, byte_range=(15, 0)
-        )
-    assert df.shape == (1, 3)
-
-    # include half of the second row till past the end of the file
-    # should parse only the third row
-    with expect_warning_if(will_warn):
-        df = cudf.read_json(
-            copy.deepcopy(json_input), lines=True, byte_range=(10, 50)
-        )
-    assert df.shape == (1, 3)
-
-
-def test_json_lines_dtypes(json_input):
-    with expect_warning_if(
-        isinstance(json_input, str) and not json_input.endswith(".json")
-    ):
-        df = cudf.read_json(
-            json_input, lines=True, dtype={1: "int", 2: "short", 0: "float"}
-        )
-    assert all(df.dtypes == ["float64", "int64", "int16"])
-
-
-@pytest.mark.parametrize(
-    "ext, out_comp, in_comp",
-    [
-        (".geez", "gzip", "gzip"),
-        (".beez", "bz2", "bz2"),
-        (".gz", "gzip", "infer"),
-        (".bz2", "bz2", "infer"),
-        (".data", None, "infer"),
-        (".txt", None, None),
-        ("", None, None),
-    ],
-)
-def test_json_lines_compression(tmpdir, ext, out_comp, in_comp):
-    fname = tmpdir.mkdir("gdf_json").join("tmp_json_compression" + ext)
-
-    nrows = 20
-    pd_df = make_numeric_dataframe(nrows, np.int32)
-    pd_df.to_json(fname, compression=out_comp, lines=True, orient="records")
-
-    cu_df = cudf.read_json(
-        str(fname),
-        compression=in_comp,
-        lines=True,
-        dtype={"col1": "int32", "col2": "int32"},
-    )
-    assert_eq(pd_df, cu_df)
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_json_engine_selection():
-    json = "[1, 2, 3]"
-
-    # should use the cudf engine
-    df = cudf.read_json(StringIO(json), lines=True)
-    # column names are strings when parsing with cudf
-    for col_name in df.columns:
-        assert isinstance(col_name, str)
-
-    # should use the pandas engine
-    df = cudf.read_json(StringIO(json), lines=False, engine="pandas")
-    # column names are ints when parsing with pandas
-    for col_name in df.columns:
-        assert isinstance(col_name, int)
-
-    # should use the pandas engine
-    df = cudf.read_json(StringIO(json), lines=True, engine="pandas")
-    # column names are ints when parsing with pandas
-    for col_name in df.columns:
-        assert isinstance(col_name, int)
-
-
-def test_json_bool_values():
-    buffer = "[true,1]\n[false,false]\n[true,true]"
-    cu_df = cudf.read_json(StringIO(buffer), lines=True)
-    pd_df = pd.read_json(StringIO(buffer), lines=True)
-
-    # types should be ['bool', 'int64']
-    np.testing.assert_array_equal(pd_df.dtypes, cu_df.dtypes)
-    np.testing.assert_array_equal(pd_df[0], cu_df["0"].to_numpy())
-    # boolean values should be converted to 0/1
-    np.testing.assert_array_equal(pd_df[1], cu_df["1"].to_numpy())
-
-    cu_df = cudf.read_json(
-        StringIO(buffer), lines=True, dtype={"0": "bool", "1": "long"}
-    )
-    np.testing.assert_array_equal(pd_df.dtypes, cu_df.dtypes)
-
-
-def test_json_bad_protocol_string():
-    test_string = StringIO('{"field": "s3://path"}')
-
-    expect = pd.DataFrame([{"field": "s3://path"}])
-    got = cudf.read_json(test_string, lines=True)
-
-    assert_eq(expect, got)
-
-
-def test_json_corner_case_with_escape_and_double_quote_char_with_pandas(
-    tmpdir,
-):
-    fname = tmpdir.mkdir("gdf_json").join("tmp_json_escape_double_quote")
-
-    pdf = pd.DataFrame(
-        {
-            "a": ['ab"cd', "\\\b", "\r\\", "'"],
-            "b": ["a\tb\t", "\\", '\\"', "\t"],
-            "c": ["aeiou", "try", "json", "cudf"],
-        }
-    )
-    pdf.to_json(fname, compression="infer", lines=True, orient="records")
-
-    df = cudf.read_json(
-        fname, compression="infer", lines=True, orient="records"
-    )
-    pdf = pd.read_json(
-        fname, compression="infer", lines=True, orient="records"
-    )
-
-    assert_eq(cudf.DataFrame(pdf), df)
-
-
-def test_json_corner_case_with_escape_and_double_quote_char_with_strings():
-    str_buffer = StringIO(
-        """{"a":"ab\\"cd","b":"a\\tb\\t","c":"aeiou"}
-           {"a":"\\\\\\b","b":"\\\\","c":"try"}
-           {"a":"\\r\\\\","b":"\\\\\\"","c":"json"}
-           {"a":"\'","b":"\\t","c":"cudf"}"""
-    )
-
-    df = cudf.read_json(
-        str_buffer, compression="infer", lines=True, orient="records"
-    )
-
-    expected = {
-        "a": ['ab"cd', "\\\b", "\r\\", "'"],
-        "b": ["a\tb\t", "\\", '\\"', "\t"],
-        "c": ["aeiou", "try", "json", "cudf"],
-    }
-
-    num_rows = df.shape[0]
-    for col_name in df._data:
-        for i in range(num_rows):
-            assert expected[col_name][i] == df[col_name][i]
-
-
-def test_json_to_json_special_characters():
-    df = cudf.DataFrame(
-        {
-            "'a'": ['ab"cd', "\\\b", "\r\\", "'"],
-            "b": ["a\tb\t", "\\", '\\"', "\t"],
-            "c": ["aeiou", "try", "json", "cudf"],
-        }
-    )
-
-    actual = StringIO()
-    df.to_json(actual, engine="cudf", lines=True, orient="records")
-    expected = StringIO()
-    df.to_pandas().to_json(expected, lines=True, orient="records")
-    assert expected.getvalue() == actual.getvalue()
-
-
-@pytest.mark.parametrize(
-    "gdf,pdf",
-    [
-        (
-            cudf.DataFrame(
-                {
-                    "int col": cudf.Series(
-                        [1, 2, None, 2, 2323, 234, None], dtype="int64"
-                    )
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "int col": pd.Series(
-                        [1, 2, None, 2, 2323, 234, None], dtype=pd.Int64Dtype()
-                    )
-                }
-            ),
-        ),
-        (
-            cudf.DataFrame(
-                {
-                    "int64 col": cudf.Series(
-                        [1, 2, None, 2323, None], dtype="int64"
-                    ),
-                    "string col": cudf.Series(
-                        ["abc", "a", None, "", None], dtype="str"
-                    ),
-                    "float col": cudf.Series(
-                        [0.234, None, 234234.2343, None, 0.0], dtype="float64"
-                    ),
-                    "bool col": cudf.Series(
-                        [None, True, False, None, True], dtype="bool"
-                    ),
-                    "categorical col": cudf.Series(
-                        [1, 2, 1, None, 2], dtype="category"
-                    ),
-                    "datetime col": cudf.Series(
-                        [1231233, None, 2323234, None, 1],
-                        dtype="datetime64[ns]",
-                    ),
-                    "timedelta col": cudf.Series(
-                        [None, 34687236, 2323234, 1, None],
-                        dtype="timedelta64[ns]",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "int64 col": pd.Series(
-                        [1, 2, None, 2323, None], dtype=pd.Int64Dtype()
-                    ),
-                    "string col": pd.Series(
-                        ["abc", "a", None, "", None], dtype=pd.StringDtype()
-                    ),
-                    "float col": pd.Series(
-                        [0.234, None, 234234.2343, None, 0.0], dtype="float64"
-                    ),
-                    "bool col": pd.Series(
-                        [None, True, False, None, True],
-                        dtype=pd.BooleanDtype(),
-                    ),
-                    "categorical col": pd.Series(
-                        [1, 2, 1, None, 2], dtype="category"
-                    ),
-                    "datetime col": pd.Series(
-                        [1231233, None, 2323234, None, 1],
-                        dtype="datetime64[ns]",
-                    ),
-                    "timedelta col": pd.Series(
-                        [None, 34687236, 2323234, 1, None],
-                        dtype="timedelta64[ns]",
-                    ),
-                }
-            ),
-        ),
-    ],
-)
-def test_json_to_json_compare_contents(gdf, pdf):
-    expected_json = pdf.to_json(lines=True, orient="records")
-    with pytest.warns(UserWarning):
-        actual_json = gdf.to_json(lines=True, orient="records")
-
-    assert expected_json == actual_json
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["cudf", "pandas"])
-def test_default_integer_bitwidth(default_integer_bitwidth, engine):
-    buf = BytesIO()
-    pd.DataFrame({"a": range(10)}).to_json(buf, lines=True, orient="records")
-    buf.seek(0)
-    df = cudf.read_json(buf, engine=engine, lines=True, orient="records")
-
-    assert df["a"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize(
-    "engine",
-    [
-        "cudf",
-        "pandas",
-    ],
-)
-def test_default_integer_bitwidth_partial(default_integer_bitwidth, engine):
-    buf = BytesIO()
-    pd.DataFrame({"a": range(10), "b": range(10, 20)}).to_json(
-        buf, lines=True, orient="records"
-    )
-    buf.seek(0)
-    df = cudf.read_json(
-        buf, engine=engine, lines=True, orient="records", dtype={"b": "i8"}
-    )
-
-    assert df["a"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
-    assert df["b"].dtype == np.dtype("i8")
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["cudf", "pandas"])
-def test_default_integer_bitwidth_extremes(default_integer_bitwidth, engine):
-    # Test that integer columns in json are _inferred_ as 32 bit columns.
-    buf = StringIO(
-        '{"u8":18446744073709551615, "i8":9223372036854775807}\n'
-        '{"u8": 0, "i8": -9223372036854775808}'
-    )
-    df = cudf.read_json(buf, engine=engine, lines=True, orient="records")
-
-    assert df["u8"].dtype == np.dtype(f"u{default_integer_bitwidth // 8}")
-    assert df["i8"].dtype == np.dtype(f"i{default_integer_bitwidth // 8}")
-
-
-def test_default_float_bitwidth(default_float_bitwidth):
-    # Test that float columns in json are _inferred_ as 32 bit columns.
-    df = cudf.read_json(
-        StringIO('{"a": 1.0, "b": 2.5}\n{"a": 3.5, "b": 4.0}'),
-        engine="cudf",
-        lines=True,
-        orient="records",
-    )
-    assert df["a"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
-    assert df["b"].dtype == np.dtype(f"f{default_float_bitwidth // 8}")
-
-
-def test_json_nested_basic():
-    bytes_obj = BytesIO()
-    data = {
-        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
-        "c2": [["l11", "l21"], ["l12", "l22"]],
-    }
-    pdf = pd.DataFrame(data)
-    pdf.to_json(bytes_obj, orient="records")
-
-    df = cudf.read_json(bytes_obj, engine="cudf", orient="records")
-    bytes_obj.seek(0)
-    pdf = pd.read_json(bytes_obj, orient="records")
-
-    assert_eq(pdf, df)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
-            "c2": [["l11", "l21"], ["l12", "l22"]],
-        },
-        # Essential test case to handle omissions
-        {
-            "c1": [{"f2": "sf21"}, {"f1": "sf12"}],
-            "c2": [["l11", "l21"], []],
-        },
-        # empty input
-        {},
-    ],
-)
-@pytest.mark.parametrize("lines", [True, False])
-def test_json_nested_lines(data, lines):
-    bytes = BytesIO()
-    pdf = pd.DataFrame(data)
-    pdf.to_json(bytes, orient="records", lines=lines)
-    bytes.seek(0)
-    df = cudf.read_json(bytes, engine="cudf", orient="records", lines=lines)
-    bytes.seek(0)
-    pdf = pd.read_json(bytes, orient="records", lines=lines)
-    # In the second test-case we need to take a detour via pyarrow
-    # Pandas omits "f1" in first row, so we have to enforce a common schema,
-    # such that pandas would have the f1 member with null
-    # Also, pyarrow chooses to select different ordering of a nested column
-    # children though key-value pairs are correct.
-    pa_table_pdf = pa.Table.from_pandas(
-        pdf, schema=df.to_arrow().schema, safe=False
-    )
-    assert df.to_arrow().equals(pa_table_pdf)
-
-
-def test_json_nested_data():
-    json_str = (
-        '[{"0":{},"2":{}},{"1":[[""],[]],"2":{"2":""}},'
-        '{"0":{"a":"1"},"2":{"0":"W&RR=+I","1":""}}]'
-    )
-    df = cudf.read_json(StringIO(json_str), engine="cudf", orient="records")
-    pdf = pd.read_json(StringIO(json_str), orient="records")
-    pdf.columns = pdf.columns.astype("str")
-    pa_table_pdf = pa.Table.from_pandas(
-        pdf, schema=df.to_arrow().schema, safe=False
-    )
-    assert df.to_arrow().equals(pa_table_pdf)
-
-
-def test_json_empty_types():
-    json_str = """ {}
-    {"a": [], "b": {}}
-    {"a": []}
-    {"b": {}}
-    {"c": {"d": []}}
-    {"e": [{}]}
-    """
-    df = cudf.read_json(StringIO(json_str), orient="records", lines=True)
-    pdf = pd.read_json(StringIO(json_str), orient="records", lines=True)
-    assert_eq(df, pdf)
-
-
-def test_json_types_data():
-    # 0:<0:string,1:float>
-    # 1:list<int>
-    # 2:<0:bool>
-    json_str = (
-        '[{"0":null,"2":{}},'
-        '{"1":[123],"0":{"0":"foo","1":123.4},"2":{"0":false}},'
-        '{"0":{},"1":[],"2":{"0":null}}]'
-    )
-    df = cudf.read_json(StringIO(json_str), engine="cudf", orient="records")
-    pdf = pd.read_json(StringIO(json_str), orient="records")
-    pdf.columns = pdf.columns.astype("str")
-    pa_table_pdf = pa.Table.from_pandas(
-        pdf, schema=df.to_arrow().schema, safe=False
-    )
-    assert df.to_arrow().equals(pa_table_pdf)
-
-
-@pytest.mark.parametrize(
-    "col_type,json_str,expected_data",
-    [
-        # without quotes
-        ("int", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
-        # with quotes
-        ("int", '[{"k": "1"}, {"k": "2"}]', [1, 2]),
-        # with quotes, mixed
-        ("int", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
-        # with quotes, null, mixed
-        (
-            "int",
-            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
-            [1, 2, None, 4],
-        ),
-        # without quotes, null
-        (
-            "int",
-            '[{"k": 1}, {"k": 2}, {"k": null}, {"k": 4}]',
-            [1, 2, None, 4],
-        ),
-        # without quotes
-        ("float", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
-        # with quotes
-        ("float", '[{"k": "1"}, {"k": "2"}]', [1, 2]),
-        # with quotes, mixed
-        (
-            "float",
-            '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]',
-            [1, 2, 3, 4],
-        ),
-        # with quotes, null, mixed
-        (
-            "float",
-            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
-            [1, 2, None, 4],
-        ),
-        # with quotes, NAN
-        (
-            "float",
-            '[{"k": "1"}, {"k": "2"}, {"k": NaN}, {"k": "4"}]',
-            [1, 2, np.nan, 4],
-        ),
-        # without quotes
-        ("str", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
-        # with quotes
-        ("str", '[{"k": "1"}, {"k": "2"}]', [1, 2]),
-        # with quotes, mixed
-        ("str", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
-        # with quotes, null, mixed
-        (
-            "str",
-            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
-            [1, 2, None, 4],
-        ),
-        # without quotes, null
-        (
-            "str",
-            '[{"k": 1}, {"k": 2}, {"k": null}, {"k": 4}]',
-            [1, 2, None, 4],
-        ),
-    ],
-)
-def test_json_quoted_values_with_schema(col_type, json_str, expected_data):
-    actual = cudf.read_json(
-        StringIO(json_str),
-        engine="cudf",
-        orient="records",
-        dtype={"k": col_type},
-    )
-    expected = cudf.DataFrame({"k": expected_data}, dtype=col_type)
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "col_type,json_str,expected_data",
-    [
-        # with quotes, mixed
-        ("int", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
-        # with quotes, null, mixed
-        (
-            "int",
-            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
-            [1, 2, None, 4],
-        ),
-        # with quotes, mixed
-        (
-            "str",
-            '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]',
-            ["1", "2", "3", "4"],
-        ),
-        # with quotes, null, mixed
-        (
-            "str",
-            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
-            ["1", "2", None, "4"],
-        ),
-    ],
-)
-def test_json_quoted_values(col_type, json_str, expected_data):
-    actual = cudf.read_json(
-        StringIO(json_str),
-        engine="cudf",
-        orient="records",
-        dtype={"k": col_type},
-    )
-    expected = cudf.DataFrame({"k": expected_data}, dtype=col_type)
-
-    assert_eq(expected, actual)
-    assert_eq(expected_data, actual.k.to_arrow().to_pylist())
-
-
-@pytest.mark.parametrize(
-    "keep_quotes,result",
-    [
-        (
-            True,
-            {
-                "c1": [
-                    {"f1": '"sf11"', "f2": '"sf21"'},
-                    {"f1": '"sf12"', "f2": '"sf22"'},
-                ],
-                "c2": [['"l11"', '"l21"'], ['"l12"', '"l22"']],
-            },
-        ),
-        (
-            False,
-            {
-                "c1": [
-                    {"f1": "sf11", "f2": "sf21"},
-                    {"f1": "sf12", "f2": "sf22"},
-                ],
-                "c2": [["l11", "l21"], ["l12", "l22"]],
-            },
-        ),
-    ],
-)
-def test_json_keep_quotes(keep_quotes, result):
-    bytes_file = BytesIO()
-    data = {
-        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
-        "c2": [["l11", "l21"], ["l12", "l22"]],
-    }
-    pdf = pd.DataFrame(data)
-    pdf.to_json(bytes_file, orient="records", lines=True)
-
-    actual = cudf.read_json(
-        bytes_file,
-        orient="records",
-        lines=True,
-        keep_quotes=keep_quotes,
-    )
-    expected = pd.DataFrame(result)
-
-    assert_eq(actual, expected)
-
-
-def test_json_dtypes_nested_data():
-    # a: StructDtype({'a': StructDtype({'b': dtype('float64')}),
-    #                 'b': dtype('int64')})
-    # b: ListDtype(ListDtype(float64))
-    actual_json_str = (
-        '{"a":{"a":{"b":10.0},"b":11},"b":[[10.0,1.1],[12.0,23.0]]}\n'
-        '{"a":{"a":{"b":107.0},"b":5},"b":[[10.0,11.2],[12.0,0.23]]}\n'
-        '{"a":{"a":{"b":50.7},"b":2},"b":[[10.0,11.3],[12.0,2.3]]}\n'
-        '{"a":{"a":{"b":1.2},"b":67},"b":[[6.0,7.0]]}\n'
-        '{"a":{"a":{"b":40.1},"b":1090},"b":null}\n'
-    )
-
-    """
-    In [3]: df
-    Out[3]:
-                                   a                             b
-    0    {'a': {'b': 10.0}, 'b': 11}   [[10.0, 1.1], [12.0, 23.0]]
-    1    {'a': {'b': 107.0}, 'b': 5}  [[10.0, 11.2], [12.0, 0.23]]
-    2     {'a': {'b': 50.7}, 'b': 2}   [[10.0, 11.3], [12.0, 2.3]]
-    3     {'a': {'b': 1.2}, 'b': 67}                  [[6.0, 7.0]]
-    4  {'a': {'b': 40.1}, 'b': 1090}                          None
-    """
-
-    # a: StructDtype({'a': StructDtype({'b': dtype('int64')}),
-    #                 'b': dtype('float64')})
-    # b: ListDtype(ListDtype(int64))
-    expected_json_str = (
-        '{"a":{"a":{"b":10},"b":11.0},"b":[[10,1],[12,23]]}\n'
-        '{"a":{"a":{"b":107},"b":5.0},"b":[[10,11],[12,0]]}\n'
-        '{"a":{"a":{"b":50},"b":2.0},"b":[[10,11],[12,2]]}\n'
-        '{"a":{"a":{"b":1},"b":67.0},"b":[[6,7]]}\n'
-        '{"a":{"a":{"b":40},"b":1090.0},"b":null}\n'
-    )
-
-    """
-    In [7]: df
-    Out[7]:
-                                  a                    b
-    0    {'a': {'b': 10}, 'b': 11.0}  [[10, 1], [12, 23]]
-    1    {'a': {'b': 107}, 'b': 5.0}  [[10, 11], [12, 0]]
-    2     {'a': {'b': 50}, 'b': 2.0}  [[10, 11], [12, 2]]
-    3     {'a': {'b': 1}, 'b': 67.0}             [[6, 7]]
-    4  {'a': {'b': 40}, 'b': 1090.0}                 None
-    """
-
-    df = cudf.read_json(
-        StringIO(actual_json_str),
-        engine="cudf",
-        orient="records",
-        lines=True,
-        dtype={
-            "a": cudf.StructDtype(
-                {
-                    "a": cudf.StructDtype({"b": cudf.dtype("int64")}),
-                    "b": cudf.dtype("float64"),
-                }
-            ),
-            "b": cudf.ListDtype(cudf.ListDtype("int64")),
-        },
-    )
-
-    pdf = pd.read_json(
-        StringIO(expected_json_str),
-        orient="records",
-        lines=True,
-    )
-
-    assert_eq(df, pdf)
-
-    pdf.columns = pdf.columns.astype("str")
-    pa_table_pdf = pa.Table.from_pandas(
-        pdf, schema=df.to_arrow().schema, safe=False
-    )
-    assert df.to_arrow().equals(pa_table_pdf)
-
-
-@pytest.mark.parametrize(
-    "tag, data",
-    [
-        (
-            "normal",
-            """\
-{"a": 1, "b": 2}
-{"a": 3, "b": 4}""",
-        ),
-        (
-            "multiple",
-            """\
-    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
-    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
-    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
-    { "a": { "y" : 6}, "b" : [7      ], "c": 14 }""",
-        ),
-        (
-            "reordered",
-            """\
-    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
-    { "a": { "y" : 6}, "c": 12 , "b" : [4, 5   ]}
-    { "b" : [6      ],  "a": { "y" : 6}, "c": 13}
-    { "c" : 14, "a": { "y" : 6}, "b" : [7      ]}
-""",
-        ),
-        (
-            "missing",
-            """
-    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
-    { "a": { "y" : 6}, "b" : [4, 5   ]          }
-    { "a": { "y" : 6}, "c": 13                  }
-    { "a": { "y" : 6}, "b" : [7      ], "c": 14 }
-""",
-        ),
-        pytest.param(
-            "dtype_mismatch",
-            """\
-    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
-    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
-    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
-    { "a": { "y" : 6}, "b" : [7      ], "c": 14.0 }""",
-        ),
-    ],
-)
-class TestNestedJsonReaderCommon:
-    @pytest.mark.parametrize("chunk_size", [10, 100, 1024, 1024 * 1024])
-    def test_chunked_nested_json_reader(self, tag, data, chunk_size):
-        expected = cudf.read_json(StringIO(data), lines=True)
-
-        source_size = len(data)
-        chunks = []
-        for chunk_start in range(0, source_size, chunk_size):
-            chunks.append(
-                cudf.read_json(
-                    StringIO(data),
-                    byte_range=[chunk_start, chunk_size],
-                    lines=True,
-                )
-            )
-        df = cudf.concat(chunks, ignore_index=True)
-        assert expected.to_arrow().equals(df.to_arrow())
-
-    @pytest.mark.skipif(
-        PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-        reason="https://github.com/pandas-dev/pandas/pull/57439",
-    )
-    def test_order_nested_json_reader(self, tag, data):
-        expected = pd.read_json(StringIO(data), lines=True)
-        target = cudf.read_json(StringIO(data), lines=True)
-        # Using pyarrow instead of assert_eq because pandas
-        # doesn't handle nested values comparisons correctly
-        if tag == "dtype_mismatch":
-            with pytest.raises(AssertionError):
-                # pandas parses integer values in float representation
-                # as integer
-                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
-        elif tag == "missing":
-            with pytest.raises(AssertionError):
-                # pandas inferences integer with nulls as float64
-                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
-        else:
-            assert pa.Table.from_pandas(expected).equals(target.to_arrow())
-
-
-def test_json_round_trip_gzip():
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["abc", "def", "ghi"]})
-    bytes = BytesIO()
-    with gzip.open(bytes, mode="wb") as fo:
-        with pytest.warns(UserWarning):
-            df.to_json(fo, orient="records", lines=True)
-    bytes.seek(0)
-    with gzip.open(bytes, mode="rb") as fo:
-        written_df = cudf.read_json(fo, orient="records", lines=True)
-    assert_eq(written_df, df)
-
-    # Testing writing from middle of the file.
-    loc = bytes.tell()
-
-    with gzip.open(bytes, mode="wb") as fo:
-        fo.seek(loc)
-        with pytest.warns(UserWarning):
-            df.to_json(fo, orient="records", lines=True)
-    bytes.seek(loc)
-    with gzip.open(bytes, mode="rb") as fo:
-        fo.seek(loc)
-        written_df = cudf.read_json(fo, orient="records", lines=True)
-    assert_eq(written_df, df)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # # empty input
-        # assert failing due to missing index size information
-        "",
-        "[]",
-        "[]\n[]\n[]",
-        # simple values
-        """[1]\n[2]\n[3]""",
-        """[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9]""",
-        # nulls
-        """[1, 2, 3]\n[4, 5, null]\n[7, 8, 9]""",
-        """[1, 2, 3]\n[4, 5, null]\n[7, 8, 9]\n[null, null, null]""",
-        """[1, 2, 3]\n[4, 5, null]\n[]""",
-        # missing
-        """[1, 2, 3]\n[4, 5   ]\n[7, 8, 9]""",
-        """[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9, 10]""",
-        """[1, 2, 3]\n[4, 5, 6, {}]\n[7, 8, 9]""",
-        """[1, 2, 3]\n[4, 5, 6, []]\n[7, 8, 9]""",
-        """[1, 2, 3]\n[4, 5, 6, {"a": 10}]\n[7, 8, 9]""",
-        """[1, 2, 3]\n[4, 5, 6, [10]]\n[7, 8, 9]""",
-        # mixed
-        """[1, 2, 3]\n[4, 5, {}]\n[7, 8, 9]""",
-        """[1, 2, {}]\n[4, 5, 6]\n[7, 8, 9]""",
-        """[1, 2, 3]\n[4, 5, [6]]\n[7, 8, 9]""",
-        """[1, 2, [3]]\n[4, 5, 6]\n[7, 8, 9]""",
-        # nested
-        """[1, 2, [3]]\n[4, 5, [6]]\n[7, 8, [9]]""",
-        """[1, 2, {"a": 3}]\n[4, 5, {"b": 6}]\n[7, 8, {"c": 9}]""",
-        """[1, 2, [{"a": 3}, {"a": 3}]]
-           [4, 5, [{"b": 6}, {"b": 6}, {}, {"b": 6}]]
-           [7, 8, [{}]]""",
-        """[1, 2, {"a": [3, 3, 3]}]
-           [4, 5, {"b": [6, 6]}]
-           [7, 8, {"c": 9}]""",
-        """[1, 2, [{"a": 3}, {"a": null}]]
-           [4, 5, [{"b": [6.0, 6, 06]}, {"b": [6]}, {}, {"b": null}]]
-           [7, 8, [{}]]""",
-    ],
-)
-@pytest.mark.parametrize("lines", [True, False])
-def test_json_array_of_arrays(data, lines):
-    data = data if lines else "[" + data.replace("\n", ",") + "]"
-    pdf = pd.read_json(StringIO(data), orient="values", lines=lines)
-    df = cudf.read_json(
-        StringIO(data),
-        engine="cudf",
-        orient="values",
-        lines=lines,
-    )
-    # if mixed with dict/list type, replace other types with None.
-    if 2 in pdf.columns and any(
-        pdf[2].apply(lambda x: isinstance(x, dict) or isinstance(x, list))
-    ):
-        pdf[2] = pdf[2].apply(
-            lambda x: x if isinstance(x, dict) or isinstance(x, list) else None
-        )
-    # TODO: Replace string column names with integer column names
-    # for values orient in cudf json reader
-    pdf.rename(columns={name: str(name) for name in pdf.columns}, inplace=True)
-    # assert_eq(pdf, df)
-    pa_table_pdf = pa.Table.from_pandas(
-        pdf, schema=df.to_arrow().schema, safe=False
-    )
-    assert df.to_arrow().equals(pa_table_pdf)
-
-
-@pytest.mark.parametrize(
-    "jsonl_string",
-    [
-        # simple list with mixed types
-        """{"a":[123, {}], "b":1.1}""",
-        """{"a":[123, {"0": 123}], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
-        """{"a":[{"L": 123}, 123], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
-        """{"a":[123, {"0": 123}, 12.3], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
-        """{"a":[123, {"0": 123}, null], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
-        """{"a":["123", {"0": 123}], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
-        """{"a":[{"0": 123}, "123"], "b":1.0}\n {"b":1.1}\n {"b":2.1}""",
-        """{"a":["123", {"0": 123}, "123"], "b":1.0}\n {"b":1.1}""",
-        """{"a":[123]}\n {"a":[{"0": 123}], "b":1.0}\n {"b":1.1}""",
-        """{"a":[{"0": 123}]}\n {"a":[123], "b":1.0}\n {"b":1.1}""",
-        """{"a":[{"0": 123}]}\n {"a": []}\n {"a":[123], "b":1.0}\n{"b":1.1}""",
-        """{"b":1.0, "a":[{"0": 123}]}\n {"a":[123]}\n {"b":1.1}\n{"a": []}""",
-        """{"a": []}\n {"a":[{"0": 123}]}\n {"a":[123], "b":1.0}\n{"b":1.1}""",
-        """{"a": []}\n {"a":[123], "b":1.0}\n {"a":[{"0": 123}]}\n{"b":1.1}""",
-        # nested list with mixed types
-        """{"a":[123, [{"0": 123}, {}]], "b":1.0}
-           {"b":1.1}
-           {"a":[]}
-           {"a":[123]}
-           {"a":[[123], []]}""",
-        """{"a":[], "b":1.0}
-           {"a":[[[456]]]}
-           {"a":[[123]]}
-           {"a":[123]}""",
-        """{"a":[123], "b":1.0}
-           {"b":1.1}
-           {"b":2.1}
-           {"a":[[[[[[]]]]]]}""",
-        """{"a":[123], "b":1.0}
-           {"a":[[[[[[]]]]]]}
-           {"a":[[[[[[]]]]], [[[[[]]]]]]}
-           {"a":[[[[[[]]]], [[[[]]]]]]}
-           {"a":[[[[[[]]], [[[]]]]]]}
-           {"a":[[[[[[]], [[]]]]]]}
-           {"a":[[[[[[], 123, []]]]]]}""",
-        # mixed elements in multiple columns
-        """{"a":[123, {"0": 123}], "b":1.0}
-           {"c": ["abc"], "b":1.1}
-           {"c": ["abc", []] }""",
-    ],
-)
-def test_json_nested_mixed_types_in_list(jsonl_string):
-    # utility function for this test:
-    # replace list elements with None if it has dict and non-dict (ignore None)
-    def _replace_in_list(list_to_replace, replace_items):
-        return [
-            _replace_in_list(x, replace_items)
-            if isinstance(x, list)
-            else None
-            if x in replace_items
-            else x
-            for x in list_to_replace
-        ]
-
-    def _replace_with_nulls(df, replace_items):
-        for col in df.columns:
-            if df[col].dtype == "object":
-                df[col] = df[col].apply(
-                    lambda x: _replace_in_list(x, replace_items)
-                    if isinstance(x, list)
-                    else x
-                )
-        return df
-
-    # both json lines and json string tested.
-    json_string = "[" + jsonl_string.replace("\n", ",") + "]"
-    pdf = pd.read_json(StringIO(jsonl_string), orient="records", lines=True)
-    pdf2 = pd.read_json(StringIO(json_string), orient="records", lines=False)
-    assert_eq(pdf, pdf2)
-    # replace list elements with None if it has dict and non-dict
-    # in above test cases, these items are mixed with dict/list items
-    # so, replace them with None.
-    pdf = _replace_with_nulls(pdf, [123, "123", 12.3, "abc"])
-    gdf = cudf.read_json(
-        StringIO(jsonl_string),
-        orient="records",
-        lines=True,
-    )
-    gdf2 = cudf.read_json(
-        StringIO(json_string),
-        engine="cudf",
-        orient="records",
-        lines=False,
-    )
-    if """[{"0": 123}, {}]""" not in jsonl_string:
-        # {} in pandas is represented as {"0": None} in cudf
-        assert_eq(gdf, pdf)
-        assert_eq(gdf2, pdf)
-    pa_table_pdf = pa.Table.from_pandas(
-        pdf, schema=gdf.to_arrow().schema, safe=False
-    )
-    assert gdf.to_arrow().equals(pa_table_pdf)
-    assert gdf2.to_arrow().equals(pa_table_pdf)
-
-
-@pytest.mark.parametrize(
-    "jsonl_string",
-    [
-        # mixed type in list (in different order)
-        """{"a":[[{"0": 123}, {}], {"1": 321}], "b":1.0}""",
-        """{"a":[{"1": 321}, [{"0": 123}, {}], ], "b":1.0}""",
-        """{"a":[123, [{"0": 123}, {}], {"1": 321}], "b":1.0}""",
-        """{"a":[null, [{"0": 123}, {}], {"1": 321}], "b":1.0}""",
-        # mixed type in struct (in different order)
-        """{"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}
-           {"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}""",
-        """{"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}
-           {"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}""",
-        """{"a": {"b": {"0": 123}, "c": null}, "d":1.0}
-           {"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}
-           {"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}""",
-        """{"a": {"b": {"0": 123}, "c": 123}, "d":1.0}
-           {"a": {"b": {"0": 123}, "c": {"1": 321}}, "d":1.0}
-           {"a": {"b": {"0": 123}, "c": [123, 123]}, "d":1.0}""",
-    ],
-)
-def test_json_nested_mixed_types_error(jsonl_string):
-    # mixing list and struct should raise an exception
-    with pytest.raises(RuntimeError):
-        cudf.read_json(
-            StringIO(jsonl_string),
-            orient="records",
-            lines=True,
-        )
-
-
-@pytest.mark.parametrize("on_bad_lines", ["error", "recover", "abc"])
-def test_json_reader_on_bad_lines(on_bad_lines):
-    json_input = StringIO(
-        '{"a":1,"b":10}\n{"a":2,"b":11}\nabc\n{"a":3,"b":12}\n'
-    )
-    if on_bad_lines == "error":
-        with pytest.raises(RuntimeError):
-            cudf.read_json(
-                json_input,
-                lines=True,
-                orient="records",
-                on_bad_lines=on_bad_lines,
-            )
-    elif on_bad_lines == "recover":
-        actual = cudf.read_json(
-            json_input, lines=True, orient="records", on_bad_lines=on_bad_lines
-        )
-        expected = cudf.DataFrame(
-            {"a": [1, 2, None, 3], "b": [10, 11, None, 12]}
-        )
-        assert_eq(actual, expected)
-    else:
-        with pytest.raises(TypeError):
-            cudf.read_json(
-                json_input,
-                lines=True,
-                orient="records",
-                on_bad_lines=on_bad_lines,
-            )
-
-
-def test_chunked_json_reader():
-    df = cudf.DataFrame(
-        {
-            "a": ["aaaa"] * 1_000_000,
-            "b": range(1_000_000),
-        }
-    )
-    buf = BytesIO()
-    df.to_json(buf, lines=True, orient="records", engine="cudf")
-    buf.seek(0)
-    df = df.to_pandas()
-    with cudf.option_context("io.json.low_memory", True):
-        gdf = cudf.read_json(buf, lines=True)
-    assert_eq(df, gdf)
-
-
-# compression formats limited to those supported by both reader and writer
-@pytest.mark.parametrize("compression", ["gzip", "snappy", "zstd"])
-def test_roundtrip_compression(compression, tmp_path):
-    expected = cudf.DataFrame({"a": [1], "b": ["2"]})
-    fle = BytesIO()
-    expected.to_json(fle, engine="cudf", compression=compression)
-    result = cudf.read_json(fle, engine="cudf", compression=compression)
-    assert_eq(result, expected)

From 82c2f4e60f2a501ce75931f70a08740462c523ae Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 14 Aug 2025 17:41:12 -0700
Subject: [PATCH 130/366] Move test_interval/test_dtypes/test_rank.py to new
 cudf directory structure (#19668)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19668
---
 python/cudf/cudf/tests/conftest.py            |  19 +-
 .../{ => dataframe/methods}/test_rank.py      |  66 +---
 .../cudf/tests/dataframe/test_constructors.py |  94 +++++
 .../tests/dtypes/test_categoricaldtype.py     |  36 ++
 .../cudf/tests/dtypes/test_decimaldtype.py    |  30 ++
 python/cudf/cudf/tests/dtypes/test_dtype.py   |  74 ++++
 .../cudf/tests/dtypes/test_intervaldtype.py   |  51 +++
 .../cudf/cudf/tests/dtypes/test_listdtype.py  |  37 ++
 .../cudf/tests/dtypes/test_structdtype.py     |  54 +++
 .../tests/indexes/index/test_constructor.py   |  14 +
 .../intervalindex/test_constructors.py        |  29 ++
 .../indexes/intervalindex/test_reductions.py  |  16 +
 .../indexes/intervalindex/test_unique.py      |  25 ++
 .../cudf/tests/series/methods/test_rank.py    |  32 ++
 .../cudf/cudf/tests/series/test_attributes.py |   7 +
 .../cudf/tests/series/test_constructors.py    | 118 ++++++
 python/cudf/cudf/tests/test_dtypes.py         | 352 ------------------
 python/cudf/cudf/tests/test_interval.py       | 221 -----------
 18 files changed, 653 insertions(+), 622 deletions(-)
 rename python/cudf/cudf/tests/{ => dataframe/methods}/test_rank.py (61%)
 create mode 100644 python/cudf/cudf/tests/dataframe/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/dtypes/test_categoricaldtype.py
 create mode 100644 python/cudf/cudf/tests/dtypes/test_decimaldtype.py
 create mode 100644 python/cudf/cudf/tests/dtypes/test_dtype.py
 create mode 100644 python/cudf/cudf/tests/dtypes/test_intervaldtype.py
 create mode 100644 python/cudf/cudf/tests/dtypes/test_listdtype.py
 create mode 100644 python/cudf/cudf/tests/indexes/intervalindex/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/indexes/intervalindex/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/indexes/intervalindex/test_unique.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_rank.py
 delete mode 100644 python/cudf/cudf/tests/test_dtypes.py
 delete mode 100644 python/cudf/cudf/tests/test_interval.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 4718ecdc711..217b9e6524e 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -300,7 +300,6 @@ def reduction_methods(request):
 def signed_integer_types_as_str(request):
     """
     - "int8", "int16", "int32", "int64"
-    - "uint8", "uint16", "uint32", "uint64"
     """
     return request.param
 
@@ -503,3 +502,21 @@ def ignore_index(request):
 def ascending(request):
     """Param for `ascending` argument"""
     return request.param
+
+
+@pytest.fixture(params=[True, False])
+def numeric_only(request):
+    """Param for `numeric_only` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False, None])
+def categorical_ordered(request):
+    """Param for `ordered` argument for categorical types"""
+    return request.param
+
+
+@pytest.fixture(params=["left", "right", "both", "neither"])
+def interval_closed(request):
+    """Param for `closed` argument for interval types"""
+    return request.param
diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/dataframe/methods/test_rank.py
similarity index 61%
rename from python/cudf/cudf/tests/test_rank.py
rename to python/cudf/cudf/tests/dataframe/methods/test_rank.py
index 07a844333cf..2befdd92581 100644
--- a/python/cudf/cudf/tests/test_rank.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_rank.py
@@ -1,17 +1,18 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
-from cudf import DataFrame
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal
 
 
-@pytest.fixture
-def pdf():
-    return pd.DataFrame(
+@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
+@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
+@pytest.mark.parametrize("pct", [True, False])
+def test_rank_all_arguments(ascending, method, na_option, pct, numeric_only):
+    pdf = pd.DataFrame(
         {
             "col1": np.array([5, 4, 3, 5, 8, 5, 2, 1, 6, 6]),
             "col2": np.array(
@@ -21,26 +22,11 @@ def pdf():
         index=np.array([5, 4, 3, 2, 1, 6, 7, 8, 9, 10]),
     )
 
-
-@pytest.mark.parametrize("dtype", ["O", "f8", "i4"])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
-@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
-@pytest.mark.parametrize("pct", [True, False])
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_rank_all_arguments(
-    pdf, dtype, ascending, method, na_option, pct, numeric_only
-):
-    if method == "first" and dtype == "O":
-        # not supported by pandas
-        return
-
     if numeric_only:
-        pdf = pdf.copy(deep=True)  # for parallel pytest
         pdf["str"] = np.array(
             ["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"]
         )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     kwargs = {
         "method": method,
@@ -73,8 +59,17 @@ def test_rank_all_arguments(
     assert_eq(expected, actual)
 
 
-def test_rank_error_arguments(pdf):
-    gdf = DataFrame.from_pandas(pdf)
+def test_rank_error_arguments():
+    pdf = pd.DataFrame(
+        {
+            "col1": np.array([5, 4, 3, 5, 8, 5, 2, 1, 6, 6]),
+            "col2": np.array(
+                [5, 4, np.nan, 5, 8, 5, np.inf, np.nan, 6, -np.inf]
+            ),
+        },
+        index=np.array([5, 4, 3, 2, 1, 6, 7, 8, 9, 10]),
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
     assert_exceptions_equal(
         lfunc=pdf["col1"].rank,
@@ -121,28 +116,3 @@ def test_rank_error_arguments(pdf):
             },
         ),
     )
-
-
-@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
-@pytest.mark.parametrize("elem1", [np.nan, np.inf, -np.inf, 1.43])
-@pytest.mark.parametrize("elem2", [np.nan, np.inf, -np.inf, 1.43])
-@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
-def test_series_rank_combinations(elem1, elem2, dtype):
-    aa = np.array([elem1, elem2], dtype=np.float64).astype(dtype)
-    gdf = DataFrame({"a": aa})
-    df = pd.DataFrame({"a": aa})
-    ranked_gs = gdf["a"].rank(method="first")
-    ranked_ps = df["a"].rank(method="first")
-    # Check
-    assert_eq(ranked_ps, ranked_gs)
-
-
-@pytest.mark.parametrize("klass", ["Series", "DataFrame"])
-def test_int_nan_pandas_compatible(klass):
-    data = [3, 6, 1, 1, None, 6]
-    pd_obj = getattr(pd, klass)(data)
-    cudf_obj = getattr(cudf, klass)(data)
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = cudf_obj.rank()
-    expected = pd_obj.rank()
-    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
new file mode 100644
index 00000000000..6d45334717f
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data1, data2",
+    [(1, 2), (1.0, 2.0), (3, 4.0)],
+)
+@pytest.mark.parametrize("data3, data4", [(6, 10), (5.0, 9.0), (2, 6.0)])
+def test_create_interval_df(data1, data2, data3, data4, interval_closed):
+    # df for both pandas and cudf only works when interval is in a list
+    expect = pd.DataFrame(
+        [pd.Interval(data1, data2, interval_closed)], dtype="interval"
+    )
+    got = cudf.DataFrame(
+        [pd.Interval(data1, data2, interval_closed)], dtype="interval"
+    )
+    assert_eq(expect, got)
+
+    expect_two = pd.DataFrame(
+        {
+            "a": [
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+            ],
+            "b": [
+                pd.Interval(data3, data4, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+            ],
+        },
+        dtype="interval",
+    )
+    got_two = cudf.DataFrame(
+        {
+            "a": [
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+            ],
+            "b": [
+                pd.Interval(data3, data4, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+            ],
+        },
+        dtype="interval",
+    )
+    assert_eq(expect_two, got_two)
+
+    expect_three = pd.DataFrame(
+        {
+            "a": [
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+            ],
+            "b": [
+                pd.Interval(data3, data4, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+            ],
+            "c": [
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+            ],
+        },
+        dtype="interval",
+    )
+
+    got_three = cudf.DataFrame(
+        {
+            "a": [
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+            ],
+            "b": [
+                pd.Interval(data3, data4, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+            ],
+            "c": [
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data1, data2, interval_closed),
+                pd.Interval(data3, data4, interval_closed),
+            ],
+        },
+        dtype="interval",
+    )
+    assert_eq(expect_three, got_three)
diff --git a/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py b/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py
new file mode 100644
index 00000000000..93311fd3c02
--- /dev/null
+++ b/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "data", [None, [], ["a"], [1], [1.0], ["a", "b", "c"]]
+)
+def test_cdt_eq(data, categorical_ordered):
+    dt = cudf.CategoricalDtype(categories=data, ordered=categorical_ordered)
+    assert dt == "category"
+    assert dt == dt
+    assert dt == cudf.CategoricalDtype(
+        categories=None, ordered=categorical_ordered
+    )
+    assert dt == cudf.CategoricalDtype(
+        categories=data, ordered=categorical_ordered
+    )
+    assert dt != cudf.CategoricalDtype(
+        categories=data, ordered=not categorical_ordered
+    )
+
+
+@pytest.mark.parametrize(
+    "data", [None, [], ["a"], [1], [1.0], ["a", "b", "c"]]
+)
+def test_cdf_to_pandas(data, categorical_ordered):
+    assert (
+        pd.CategoricalDtype(data, categorical_ordered)
+        == cudf.CategoricalDtype(
+            categories=data, ordered=categorical_ordered
+        ).to_pandas()
+    )
diff --git a/python/cudf/cudf/tests/dtypes/test_decimaldtype.py b/python/cudf/cudf/tests/dtypes/test_decimaldtype.py
new file mode 100644
index 00000000000..883b55f0f68
--- /dev/null
+++ b/python/cudf/cudf/tests/dtypes/test_decimaldtype.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "decimal_type",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+)
+def test_decimal_dtype_arrow_roundtrip(decimal_type):
+    dt = decimal_type(4, 2)
+    assert dt.to_arrow() == pa.decimal128(4, 2)
+    assert dt == decimal_type.from_arrow(pa.decimal128(4, 2))
+
+
+@pytest.mark.parametrize(
+    "decimal_type,max_precision",
+    [
+        (cudf.Decimal32Dtype, 9),
+        (cudf.Decimal64Dtype, 18),
+        (cudf.Decimal128Dtype, 38),
+    ],
+)
+def test_max_precision(decimal_type, max_precision):
+    decimal_type(scale=0, precision=max_precision)
+    with pytest.raises(ValueError):
+        decimal_type(scale=0, precision=max_precision + 1)
diff --git a/python/cudf/cudf/tests/dtypes/test_dtype.py b/python/cudf/cudf/tests/dtypes/test_dtype.py
new file mode 100644
index 00000000000..66de2386d35
--- /dev/null
+++ b/python/cudf/cudf/tests/dtypes/test_dtype.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "in_dtype,expect",
+    [
+        (np.dtype("int8"), np.dtype("int8")),
+        (np.int8, np.dtype("int8")),
+        (pd.Int8Dtype(), np.dtype("int8")),
+        (pd.StringDtype(), np.dtype("object")),
+        ("int8", np.dtype("int8")),
+        ("boolean", np.dtype("bool")),
+        ("bool_", np.dtype("bool")),
+        (np.bool_, np.dtype("bool")),
+        (int, np.dtype("int64")),
+        (float, np.dtype("float64")),
+        (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
+        (np.dtype("U"), np.dtype("object")),
+        ("timedelta64[ns]", np.dtype("<m8[ns]")),
+        ("timedelta64[ms]", np.dtype("<m8[ms]")),
+        ("<m8[s]", np.dtype("<m8[s]")),
+        ("datetime64[ns]", np.dtype("<M8[ns]")),
+        ("datetime64[ms]", np.dtype("<M8[ms]")),
+        ("<M8[s]", np.dtype("<M8[s]")),
+        (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
+        ("category", cudf.CategoricalDtype()),
+        (
+            cudf.CategoricalDtype(categories=("a", "b", "c")),
+            cudf.CategoricalDtype(categories=("a", "b", "c")),
+        ),
+        (
+            pd.CategoricalDtype(categories=("a", "b", "c")),
+            cudf.CategoricalDtype(categories=("a", "b", "c")),
+        ),
+        (
+            # this is a pandas.core.arrays.numpy_.PandasDtype...
+            pd.array([1], dtype="int16").dtype,
+            np.dtype("int16"),
+        ),
+        (pd.IntervalDtype("int"), cudf.IntervalDtype("int64")),
+        (cudf.IntervalDtype("int"), cudf.IntervalDtype("int64")),
+        (pd.IntervalDtype("int64"), cudf.IntervalDtype("int64")),
+    ],
+)
+def test_dtype(in_dtype, expect):
+    assert_eq(cudf.dtype(in_dtype), expect)
+
+
+@pytest.mark.parametrize(
+    "in_dtype",
+    [
+        "complex",
+        np.complex128,
+        complex,
+        "S",
+        "V",
+        "float16",
+        np.float16,
+        "timedelta64",
+        "timedelta64[D]",
+        "datetime64[D]",
+        "datetime64",
+    ],
+)
+def test_dtype_raise(in_dtype):
+    with pytest.raises(TypeError):
+        cudf.dtype(in_dtype)
diff --git a/python/cudf/cudf/tests/dtypes/test_intervaldtype.py b/python/cudf/cudf/tests/dtypes/test_intervaldtype.py
new file mode 100644
index 00000000000..ddd0caa693a
--- /dev/null
+++ b/python/cudf/cudf/tests/dtypes/test_intervaldtype.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_from_pandas_intervaldtype():
+    dtype = pd.IntervalDtype("int64", closed="left")
+    result = cudf.from_pandas(dtype)
+    expected = cudf.IntervalDtype("int64", closed="left")
+    assert_eq(result, expected)
+
+
+def test_intervaldtype_eq_string_with_attributes():
+    dtype = cudf.IntervalDtype("int64", closed="left")
+    assert dtype == "interval"
+    assert dtype == "interval[int64, left]"
+
+
+def test_empty_intervaldtype():
+    # "older pandas" supported closed=None, cudf chooses not to support that
+    pd_id = pd.IntervalDtype(closed="right")
+    cudf_id = cudf.IntervalDtype()
+
+    assert str(pd_id) == str(cudf_id)
+    assert pd_id.subtype == cudf_id.subtype
+
+
+def test_interval_dtype_pyarrow_round_trip(
+    signed_integer_types_as_str, interval_closed
+):
+    pa_array = ArrowIntervalType(signed_integer_types_as_str, interval_closed)
+    expect = pa_array
+    got = cudf.IntervalDtype.from_arrow(expect).to_arrow()
+    assert expect.equals(got)
+
+
+def test_interval_dtype_from_pandas(
+    signed_integer_types_as_str, interval_closed
+):
+    expect = cudf.IntervalDtype(
+        signed_integer_types_as_str, closed=interval_closed
+    )
+    pd_type = pd.IntervalDtype(
+        signed_integer_types_as_str, closed=interval_closed
+    )
+    got = cudf.IntervalDtype.from_pandas(pd_type)
+    assert expect == got
diff --git a/python/cudf/cudf/tests/dtypes/test_listdtype.py b/python/cudf/cudf/tests/dtypes/test_listdtype.py
new file mode 100644
index 00000000000..0477ae50003
--- /dev/null
+++ b/python/cudf/cudf/tests/dtypes/test_listdtype.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.utils.dtypes import cudf_dtype_to_pa_type
+
+
+def test_list_dtype_pyarrow_round_trip(all_supported_types_as_str, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str == "category",
+            reason=f"{all_supported_types_as_str} conversion to Arrow not implemented",
+        )
+    )
+    pa_type = pa.list_(
+        cudf_dtype_to_pa_type(cudf.dtype(all_supported_types_as_str))
+    )
+    expect = pa_type
+    got = cudf.ListDtype.from_arrow(expect).to_arrow()
+    assert expect.equals(got)
+
+
+def test_list_dtype_eq():
+    lhs = cudf.ListDtype("int32")
+    rhs = cudf.ListDtype("int32")
+    assert lhs == rhs
+    rhs = cudf.ListDtype("int64")
+    assert lhs != rhs
+
+
+def test_list_nested_dtype():
+    dt = cudf.ListDtype(cudf.ListDtype("int32"))
+    expect = cudf.ListDtype("int32")
+    got = dt.element_type
+    assert expect == got
diff --git a/python/cudf/cudf/tests/dtypes/test_structdtype.py b/python/cudf/cudf/tests/dtypes/test_structdtype.py
index 5ccb4640b76..f1317f0eee5 100644
--- a/python/cudf/cudf/tests/dtypes/test_structdtype.py
+++ b/python/cudf/cudf/tests/dtypes/test_structdtype.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
 import numpy as np
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -25,3 +26,56 @@ def test_serialize_struct_dtype(fields):
     dtype = cudf.StructDtype(fields)
     recreated = dtype.__class__.device_deserialize(*dtype.device_serialize())
     assert recreated == dtype
+
+
+@pytest.mark.parametrize(
+    "fields",
+    [
+        {},
+        {"a": "int64"},
+        {"a": "datetime64[ms]"},
+        {"a": "int32", "b": "int64"},
+    ],
+)
+def test_struct_dtype_pyarrow_round_trip(fields):
+    pa_type = pa.struct(
+        {k: pa.from_numpy_dtype(np.dtype(v)) for k, v in fields.items()}
+    )
+    expect = pa_type
+    got = cudf.StructDtype.from_arrow(expect).to_arrow()
+    assert expect.equals(got)
+
+
+def test_struct_dtype_eq():
+    lhs = cudf.StructDtype(
+        {"a": "int32", "b": cudf.StructDtype({"c": "int64", "ab": "int32"})}
+    )
+    rhs = cudf.StructDtype(
+        {"a": "int32", "b": cudf.StructDtype({"c": "int64", "ab": "int32"})}
+    )
+    assert lhs == rhs
+    rhs = cudf.StructDtype({"a": "int32", "b": "int64"})
+    assert lhs != rhs
+    lhs = cudf.StructDtype({"b": "int64", "a": "int32"})
+    assert lhs != rhs
+
+
+@pytest.mark.parametrize(
+    "fields",
+    [
+        {},
+        {"a": "int32"},
+        {"a": "object"},
+        {"a": "str"},
+        {"a": "datetime64[D]"},
+        {"a": "int32", "b": "int64"},
+        {"a": "int32", "b": cudf.StructDtype({"a": "int32", "b": "int64"})},
+    ],
+)
+def test_struct_dtype_fields(fields):
+    fields = {
+        "a": "int32",
+        "b": cudf.StructDtype({"c": "int64", "d": "int32"}),
+    }
+    dt = cudf.StructDtype(fields)
+    assert dt.fields == fields
diff --git a/python/cudf/cudf/tests/indexes/index/test_constructor.py b/python/cudf/cudf/tests/indexes/index/test_constructor.py
index 7e9a577b28f..6c8abdec247 100644
--- a/python/cudf/cudf/tests/indexes/index/test_constructor.py
+++ b/python/cudf/cudf/tests/indexes/index/test_constructor.py
@@ -2,12 +2,26 @@
 
 import cupy as cp
 import numpy as np
+import pandas as pd
 import pytest
 
 import cudf
 from cudf.testing import assert_eq
 
 
+def test_create_interval_index_from_list():
+    interval_list = [
+        np.nan,
+        pd.Interval(2.0, 3.0, closed="right"),
+        pd.Interval(3.0, 4.0, closed="right"),
+    ]
+
+    expected = pd.Index(interval_list)
+    actual = cudf.Index(interval_list)
+
+    assert_eq(expected, actual)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/indexes/intervalindex/test_constructors.py b/python/cudf/cudf/tests/indexes/intervalindex/test_constructors.py
new file mode 100644
index 00000000000..6fe5933fed9
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/intervalindex/test_constructors.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_GE_220
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("box", [pd.Series, pd.IntervalIndex])
+@pytest.mark.parametrize("tz", ["US/Eastern", None])
+@pytest.mark.skipif(
+    condition=not PANDAS_GE_220,
+    reason="ME frequency new in pandas 2.2",
+)
+def test_interval_with_datetime(tz, box):
+    dti = pd.date_range(
+        start=pd.Timestamp("20180101", tz=tz),
+        end=pd.Timestamp("20181231", tz=tz),
+        freq="ME",
+    )
+    pobj = box(pd.IntervalIndex.from_breaks(dti))
+    if tz is None:
+        gobj = cudf.from_pandas(pobj)
+        assert_eq(pobj, gobj)
+    else:
+        with pytest.raises(NotImplementedError):
+            cudf.from_pandas(pobj)
diff --git a/python/cudf/cudf/tests/indexes/intervalindex/test_reductions.py b/python/cudf/cudf/tests/indexes/intervalindex/test_reductions.py
new file mode 100644
index 00000000000..95341c084c5
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/intervalindex/test_reductions.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+
+
+def test_reduction_return_interval_pandas_compatible():
+    ii = pd.IntervalIndex.from_tuples(
+        [("2017-01-03", "2017-01-04")], dtype="interval[datetime64[ns], right]"
+    )
+    cudf_ii = cudf.IntervalIndex.from_pandas(ii)
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = cudf_ii.min()
+    expected = ii.min()
+    assert result == expected
diff --git a/python/cudf/cudf/tests/indexes/intervalindex/test_unique.py b/python/cudf/cudf/tests/indexes/intervalindex/test_unique.py
new file mode 100644
index 00000000000..0746596b170
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/intervalindex/test_unique.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_interval_index_unique():
+    interval_list = [
+        np.nan,
+        pd.Interval(2.0, 3.0, closed="right"),
+        pd.Interval(3.0, 4.0, closed="right"),
+        np.nan,
+        pd.Interval(3.0, 4.0, closed="right"),
+        pd.Interval(3.0, 4.0, closed="right"),
+    ]
+    pi = pd.Index(interval_list)
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.unique()
+    actual = gi.unique()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_rank.py b/python/cudf/cudf/tests/series/methods/test_rank.py
new file mode 100644
index 00000000000..ee50e028846
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_rank.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
+@pytest.mark.parametrize("elem1", [np.nan, np.inf, -np.inf, 1.43])
+@pytest.mark.parametrize("elem2", [np.nan, np.inf, -np.inf, 1.43])
+def test_series_rank_combinations(elem1, elem2, numeric_types_as_str):
+    aa = np.array([elem1, elem2], dtype=np.float64).astype(
+        numeric_types_as_str, copy=False
+    )
+    pd_ser = pd.Series(aa)
+    cudf_ser = cudf.Series(aa)
+    ranked_gs = cudf_ser.rank(method="first")
+    ranked_ps = pd_ser.rank(method="first")
+    assert_eq(ranked_ps, ranked_gs)
+
+
+@pytest.mark.parametrize("klass", ["Series", "DataFrame"])
+def test_int_nan_pandas_compatible(klass):
+    data = [3, 6, 1, 1, None, 6]
+    pd_obj = getattr(pd, klass)(data)
+    cudf_obj = getattr(cudf, klass)(data)
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = cudf_obj.rank()
+    expected = pd_obj.rank()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 0e99893f530..f105992872d 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -210,6 +210,13 @@ def test_series_hasnans(data):
     assert gs.hasnans == ps.hasnans
 
 
+def test_category_dtype_attribute():
+    psr = pd.Series(["a", "b", "a", "c"], dtype="category")
+    sr = cudf.Series(["a", "b", "a", "c"], dtype="category")
+    assert isinstance(sr.dtype, cudf.CategoricalDtype)
+    assert_eq(sr.dtype.categories, psr.dtype.categories)
+
+
 def test_dtype_dtypes_equal():
     ser = cudf.Series([0])
     assert ser.dtype is ser.dtypes
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index df83ab51043..d4588005999 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -20,6 +20,124 @@
 from cudf.testing._utils import assert_exceptions_equal
 
 
+@pytest.mark.parametrize(
+    "data1, data2",
+    [(1, 2), (1.0, 2.0), (3, 4.0)],
+)
+@pytest.mark.parametrize("data3, data4", [(6, 10), (5.0, 9.0), (2, 6.0)])
+def test_create_interval_series(data1, data2, data3, data4, interval_closed):
+    expect = pd.Series(
+        pd.Interval(data1, data2, interval_closed), dtype="interval"
+    )
+    got = cudf.Series(
+        pd.Interval(data1, data2, interval_closed), dtype="interval"
+    )
+    assert_eq(expect, got)
+
+    expect_two = pd.Series(
+        [
+            pd.Interval(data1, data2, interval_closed),
+            pd.Interval(data3, data4, interval_closed),
+        ],
+        dtype="interval",
+    )
+    got_two = cudf.Series(
+        [
+            pd.Interval(data1, data2, interval_closed),
+            pd.Interval(data3, data4, interval_closed),
+        ],
+        dtype="interval",
+    )
+    assert_eq(expect_two, got_two)
+
+    expect_three = pd.Series(
+        [
+            pd.Interval(data1, data2, interval_closed),
+            pd.Interval(data3, data4, interval_closed),
+            pd.Interval(data1, data2, interval_closed),
+        ],
+        dtype="interval",
+    )
+    got_three = cudf.Series(
+        [
+            pd.Interval(data1, data2, interval_closed),
+            pd.Interval(data3, data4, interval_closed),
+            pd.Interval(data1, data2, interval_closed),
+        ],
+        dtype="interval",
+    )
+    assert_eq(expect_three, got_three)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[{"name": 123}]],
+        [
+            [
+                {
+                    "IsLeapYear": False,
+                    "data": {"Year": 1999, "Month": 7},
+                    "names": ["Mike", None],
+                },
+                {
+                    "IsLeapYear": True,
+                    "data": {"Year": 2004, "Month": 12},
+                    "names": None,
+                },
+                {
+                    "IsLeapYear": False,
+                    "data": {"Year": 1996, "Month": 2},
+                    "names": ["Rose", "Richard"],
+                },
+            ]
+        ],
+        [
+            [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
+            [
+                {"human?": None, "deets": {"weight": 5.3, "age": 25}},
+                {"human?": False, "deets": {"weight": 8.0, "age": 31}},
+                {"human?": False, "deets": None},
+            ],
+            [],
+            None,
+            [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
+        ],
+        [
+            {
+                "name": "var0",
+                "val": [
+                    {"name": "var1", "val": None, "type": "optional<struct>"}
+                ],
+                "type": "list",
+            },
+            {},
+            {
+                "name": "var2",
+                "val": [
+                    {
+                        "name": "var3",
+                        "val": {"field": 42},
+                        "type": "optional<struct>",
+                    },
+                    {
+                        "name": "var4",
+                        "val": {"field": 3.14},
+                        "type": "optional<struct>",
+                    },
+                ],
+                "type": "list",
+            },
+            None,
+        ],
+    ],
+)
+def test_lists_of_structs_data(data):
+    got = cudf.Series(data)
+    expected = cudf.Series(pa.array(data))
+    assert_eq(got, expected)
+
+
 @pytest.fixture(
     params=[
         [1000000, 200000, 3000000],
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
deleted file mode 100644
index 103a286b892..00000000000
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
-
-import cudf
-from cudf.core.column import ColumnBase
-from cudf.core.dtypes import (
-    CategoricalDtype,
-    Decimal32Dtype,
-    Decimal64Dtype,
-    Decimal128Dtype,
-    IntervalDtype,
-    ListDtype,
-    StructDtype,
-)
-from cudf.testing import assert_eq
-from cudf.utils.dtypes import cudf_dtype_to_pa_type
-
-
-def test_cdt_basic():
-    psr = pd.Series(["a", "b", "a", "c"], dtype="category")
-    sr = cudf.Series(["a", "b", "a", "c"], dtype="category")
-    assert isinstance(sr.dtype, CategoricalDtype)
-    assert_eq(sr.dtype.categories, psr.dtype.categories)
-
-
-@pytest.mark.parametrize(
-    "data", [None, [], ["a"], [1], [1.0], ["a", "b", "c"]]
-)
-@pytest.mark.parametrize("ordered", [None, False, True])
-def test_cdt_eq(data, ordered):
-    dt = cudf.CategoricalDtype(categories=data, ordered=ordered)
-    assert dt == "category"
-    assert dt == dt
-    assert dt == cudf.CategoricalDtype(categories=None, ordered=ordered)
-    assert dt == cudf.CategoricalDtype(categories=data, ordered=ordered)
-    assert not dt == cudf.CategoricalDtype(
-        categories=data, ordered=not ordered
-    )
-
-
-@pytest.mark.parametrize(
-    "data", [None, [], ["a"], [1], [1.0], ["a", "b", "c"]]
-)
-@pytest.mark.parametrize("ordered", [None, False, True])
-def test_cdf_to_pandas(data, ordered):
-    assert (
-        pd.CategoricalDtype(data, ordered)
-        == cudf.CategoricalDtype(categories=data, ordered=ordered).to_pandas()
-    )
-
-
-@pytest.mark.parametrize(
-    "value_type",
-    [
-        int,
-        "int32",
-        np.int32,
-        "datetime64[ms]",
-        "datetime64[ns]",
-        "str",
-        "object",
-    ],
-)
-def test_list_dtype_pyarrow_round_trip(value_type):
-    pa_type = pa.list_(cudf_dtype_to_pa_type(cudf.dtype(value_type)))
-    expect = pa_type
-    got = ListDtype.from_arrow(expect).to_arrow()
-    assert expect.equals(got)
-
-
-def test_list_dtype_eq():
-    lhs = ListDtype("int32")
-    rhs = ListDtype("int32")
-    assert lhs == rhs
-    rhs = ListDtype("int64")
-    assert lhs != rhs
-
-
-def test_list_nested_dtype():
-    dt = ListDtype(ListDtype("int32"))
-    expect = ListDtype("int32")
-    got = dt.element_type
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "fields",
-    [
-        {},
-        {"a": "int64"},
-        {"a": "datetime64[ms]"},
-        {"a": "int32", "b": "int64"},
-    ],
-)
-def test_struct_dtype_pyarrow_round_trip(fields):
-    pa_type = pa.struct(
-        {k: pa.from_numpy_dtype(np.dtype(v)) for k, v in fields.items()}
-    )
-    expect = pa_type
-    got = StructDtype.from_arrow(expect).to_arrow()
-    assert expect.equals(got)
-
-
-def test_struct_dtype_eq():
-    lhs = StructDtype(
-        {"a": "int32", "b": StructDtype({"c": "int64", "ab": "int32"})}
-    )
-    rhs = StructDtype(
-        {"a": "int32", "b": StructDtype({"c": "int64", "ab": "int32"})}
-    )
-    assert lhs == rhs
-    rhs = StructDtype({"a": "int32", "b": "int64"})
-    assert lhs != rhs
-    lhs = StructDtype({"b": "int64", "a": "int32"})
-    assert lhs != rhs
-
-
-@pytest.mark.parametrize(
-    "fields",
-    [
-        {},
-        {"a": "int32"},
-        {"a": "object"},
-        {"a": "str"},
-        {"a": "datetime64[D]"},
-        {"a": "int32", "b": "int64"},
-        {"a": "int32", "b": StructDtype({"a": "int32", "b": "int64"})},
-    ],
-)
-def test_struct_dtype_fields(fields):
-    fields = {"a": "int32", "b": StructDtype({"c": "int64", "d": "int32"})}
-    dt = StructDtype(fields)
-    assert_eq(dt.fields, fields)
-
-
-@pytest.mark.parametrize(
-    "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
-)
-def test_decimal_dtype_arrow_roundtrip(decimal_type):
-    dt = decimal_type(4, 2)
-    assert dt.to_arrow() == pa.decimal128(4, 2)
-    assert dt == decimal_type.from_arrow(pa.decimal128(4, 2))
-
-
-@pytest.mark.parametrize(
-    "decimal_type,max_precision",
-    [
-        (cudf.Decimal32Dtype, 9),
-        (cudf.Decimal64Dtype, 18),
-        (cudf.Decimal128Dtype, 38),
-    ],
-)
-def test_max_precision(decimal_type, max_precision):
-    decimal_type(scale=0, precision=max_precision)
-    with pytest.raises(ValueError):
-        decimal_type(scale=0, precision=max_precision + 1)
-
-
-@pytest.fixture(params=["int64", "int32"])
-def subtype(request):
-    return request.param
-
-
-@pytest.fixture(params=["left", "right", "both", "neither"])
-def closed(request):
-    return request.param
-
-
-def test_interval_dtype_pyarrow_round_trip(subtype, closed):
-    pa_array = ArrowIntervalType(subtype, closed)
-    expect = pa_array
-    got = IntervalDtype.from_arrow(expect).to_arrow()
-    assert expect.equals(got)
-
-
-def test_interval_dtype_from_pandas(subtype, closed):
-    expect = cudf.IntervalDtype(subtype, closed=closed)
-    pd_type = pd.IntervalDtype(subtype, closed=closed)
-    got = cudf.IntervalDtype.from_pandas(pd_type)
-    assert expect == got
-
-
-def assert_column_array_dtype_equal(column: ColumnBase, array: pa.array):
-    """
-    In cudf, each column holds its dtype. And since column may have child
-    columns, child columns also holds their datatype. This method tests
-    that every level of `column` matches the type of the given `array`
-    recursively.
-    """
-
-    if isinstance(column.dtype, ListDtype):
-        return array.type.equals(
-            column.dtype.to_arrow()
-        ) and assert_column_array_dtype_equal(
-            column.base_children[1], array.values
-        )
-    elif isinstance(column.dtype, StructDtype):
-        return array.type.equals(column.dtype.to_arrow()) and all(
-            assert_column_array_dtype_equal(child, array.field(i))
-            for i, child in enumerate(column.base_children)
-        )
-    elif isinstance(
-        column.dtype, (Decimal128Dtype, Decimal64Dtype, Decimal32Dtype)
-    ):
-        return array.type.equals(column.dtype.to_arrow())
-    elif isinstance(column.dtype, CategoricalDtype):
-        raise NotImplementedError()
-    else:
-        return array.type.equals(cudf_dtype_to_pa_type(column.dtype))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[{"name": 123}]],
-        [
-            [
-                {
-                    "IsLeapYear": False,
-                    "data": {"Year": 1999, "Month": 7},
-                    "names": ["Mike", None],
-                },
-                {
-                    "IsLeapYear": True,
-                    "data": {"Year": 2004, "Month": 12},
-                    "names": None,
-                },
-                {
-                    "IsLeapYear": False,
-                    "data": {"Year": 1996, "Month": 2},
-                    "names": ["Rose", "Richard"],
-                },
-            ]
-        ],
-        [
-            [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
-            [
-                {"human?": None, "deets": {"weight": 5.3, "age": 25}},
-                {"human?": False, "deets": {"weight": 8.0, "age": 31}},
-                {"human?": False, "deets": None},
-            ],
-            [],
-            None,
-            [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
-        ],
-        [
-            {
-                "name": "var0",
-                "val": [
-                    {"name": "var1", "val": None, "type": "optional<struct>"}
-                ],
-                "type": "list",
-            },
-            {},
-            {
-                "name": "var2",
-                "val": [
-                    {
-                        "name": "var3",
-                        "val": {"field": 42},
-                        "type": "optional<struct>",
-                    },
-                    {
-                        "name": "var4",
-                        "val": {"field": 3.14},
-                        "type": "optional<struct>",
-                    },
-                ],
-                "type": "list",
-            },
-            None,
-        ],
-    ],
-)
-def test_lists_of_structs_dtype(data):
-    got = cudf.Series(data)
-    expected = pa.array(data)
-
-    assert_column_array_dtype_equal(got._column, expected)
-    assert expected.equals(got._column.to_arrow())
-
-
-@pytest.mark.parametrize(
-    "in_dtype,expect",
-    [
-        (np.dtype("int8"), np.dtype("int8")),
-        (np.int8, np.dtype("int8")),
-        (pd.Int8Dtype(), np.dtype("int8")),
-        (pd.StringDtype(), np.dtype("object")),
-        ("int8", np.dtype("int8")),
-        ("boolean", np.dtype("bool")),
-        ("bool_", np.dtype("bool")),
-        (np.bool_, np.dtype("bool")),
-        (int, np.dtype("int64")),
-        (float, np.dtype("float64")),
-        (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
-        (np.dtype("U"), np.dtype("object")),
-        ("timedelta64[ns]", np.dtype("<m8[ns]")),
-        ("timedelta64[ms]", np.dtype("<m8[ms]")),
-        ("<m8[s]", np.dtype("<m8[s]")),
-        ("datetime64[ns]", np.dtype("<M8[ns]")),
-        ("datetime64[ms]", np.dtype("<M8[ms]")),
-        ("<M8[s]", np.dtype("<M8[s]")),
-        (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
-        ("category", cudf.CategoricalDtype()),
-        (
-            cudf.CategoricalDtype(categories=("a", "b", "c")),
-            cudf.CategoricalDtype(categories=("a", "b", "c")),
-        ),
-        (
-            pd.CategoricalDtype(categories=("a", "b", "c")),
-            cudf.CategoricalDtype(categories=("a", "b", "c")),
-        ),
-        (
-            # this is a pandas.core.arrays.numpy_.PandasDtype...
-            pd.array([1], dtype="int16").dtype,
-            np.dtype("int16"),
-        ),
-        (pd.IntervalDtype("int"), cudf.IntervalDtype("int64")),
-        (cudf.IntervalDtype("int"), cudf.IntervalDtype("int64")),
-        (pd.IntervalDtype("int64"), cudf.IntervalDtype("int64")),
-    ],
-)
-def test_dtype(in_dtype, expect):
-    assert_eq(cudf.dtype(in_dtype), expect)
-
-
-@pytest.mark.parametrize(
-    "in_dtype",
-    [
-        "complex",
-        np.complex128,
-        complex,
-        "S",
-        "V",
-        "float16",
-        np.float16,
-        "timedelta64",
-        "timedelta64[D]",
-        "datetime64[D]",
-        "datetime64",
-    ],
-)
-def test_dtype_raise(in_dtype):
-    with pytest.raises(TypeError):
-        cudf.dtype(in_dtype)
diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py
deleted file mode 100644
index 757eed0c9e3..00000000000
--- a/python/cudf/cudf/tests/test_interval.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_GE_220
-from cudf.testing import assert_eq
-
-
-@pytest.mark.parametrize(
-    "data1, data2",
-    [(1, 2), (1.0, 2.0), (3, 4.0)],
-)
-@pytest.mark.parametrize("data3, data4", [(6, 10), (5.0, 9.0), (2, 6.0)])
-@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
-def test_create_interval_series(data1, data2, data3, data4, closed):
-    expect = pd.Series(pd.Interval(data1, data2, closed), dtype="interval")
-    got = cudf.Series(pd.Interval(data1, data2, closed), dtype="interval")
-    assert_eq(expect, got)
-
-    expect_two = pd.Series(
-        [pd.Interval(data1, data2, closed), pd.Interval(data3, data4, closed)],
-        dtype="interval",
-    )
-    got_two = cudf.Series(
-        [pd.Interval(data1, data2, closed), pd.Interval(data3, data4, closed)],
-        dtype="interval",
-    )
-    assert_eq(expect_two, got_two)
-
-    expect_three = pd.Series(
-        [
-            pd.Interval(data1, data2, closed),
-            pd.Interval(data3, data4, closed),
-            pd.Interval(data1, data2, closed),
-        ],
-        dtype="interval",
-    )
-    got_three = cudf.Series(
-        [
-            pd.Interval(data1, data2, closed),
-            pd.Interval(data3, data4, closed),
-            pd.Interval(data1, data2, closed),
-        ],
-        dtype="interval",
-    )
-    assert_eq(expect_three, got_three)
-
-
-@pytest.mark.parametrize(
-    "data1, data2",
-    [(1, 2), (1.0, 2.0), (3, 4.0)],
-)
-@pytest.mark.parametrize("data3, data4", [(6, 10), (5.0, 9.0), (2, 6.0)])
-@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
-def test_create_interval_df(data1, data2, data3, data4, closed):
-    # df for both pandas and cudf only works when interval is in a list
-    expect = pd.DataFrame(
-        [pd.Interval(data1, data2, closed)], dtype="interval"
-    )
-    got = cudf.DataFrame([pd.Interval(data1, data2, closed)], dtype="interval")
-    assert_eq(expect, got)
-
-    expect_two = pd.DataFrame(
-        {
-            "a": [
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-            ],
-            "b": [
-                pd.Interval(data3, data4, closed),
-                pd.Interval(data1, data2, closed),
-            ],
-        },
-        dtype="interval",
-    )
-    got_two = cudf.DataFrame(
-        {
-            "a": [
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-            ],
-            "b": [
-                pd.Interval(data3, data4, closed),
-                pd.Interval(data1, data2, closed),
-            ],
-        },
-        dtype="interval",
-    )
-    assert_eq(expect_two, got_two)
-
-    expect_three = pd.DataFrame(
-        {
-            "a": [
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-                pd.Interval(data1, data2, closed),
-            ],
-            "b": [
-                pd.Interval(data3, data4, closed),
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-            ],
-            "c": [
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-            ],
-        },
-        dtype="interval",
-    )
-
-    got_three = cudf.DataFrame(
-        {
-            "a": [
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-                pd.Interval(data1, data2, closed),
-            ],
-            "b": [
-                pd.Interval(data3, data4, closed),
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-            ],
-            "c": [
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data1, data2, closed),
-                pd.Interval(data3, data4, closed),
-            ],
-        },
-        dtype="interval",
-    )
-    assert_eq(expect_three, got_three)
-
-
-def test_create_interval_index_from_list():
-    interval_list = [
-        np.nan,
-        pd.Interval(2.0, 3.0, closed="right"),
-        pd.Interval(3.0, 4.0, closed="right"),
-    ]
-
-    expected = pd.Index(interval_list)
-    actual = cudf.Index(interval_list)
-
-    assert_eq(expected, actual)
-
-
-def test_interval_index_unique():
-    interval_list = [
-        np.nan,
-        pd.Interval(2.0, 3.0, closed="right"),
-        pd.Interval(3.0, 4.0, closed="right"),
-        np.nan,
-        pd.Interval(3.0, 4.0, closed="right"),
-        pd.Interval(3.0, 4.0, closed="right"),
-    ]
-    pi = pd.Index(interval_list)
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.unique()
-    actual = gi.unique()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("box", [pd.Series, pd.IntervalIndex])
-@pytest.mark.parametrize("tz", ["US/Eastern", None])
-@pytest.mark.skipif(
-    condition=not PANDAS_GE_220,
-    reason="ME frequency new in pandas 2.2",
-)
-def test_interval_with_datetime(tz, box):
-    dti = pd.date_range(
-        start=pd.Timestamp("20180101", tz=tz),
-        end=pd.Timestamp("20181231", tz=tz),
-        freq="ME",
-    )
-    pobj = box(pd.IntervalIndex.from_breaks(dti))
-    if tz is None:
-        gobj = cudf.from_pandas(pobj)
-        assert_eq(pobj, gobj)
-    else:
-        with pytest.raises(NotImplementedError):
-            cudf.from_pandas(pobj)
-
-
-def test_from_pandas_intervaldtype():
-    dtype = pd.IntervalDtype("int64", closed="left")
-    result = cudf.from_pandas(dtype)
-    expected = cudf.IntervalDtype("int64", closed="left")
-    assert_eq(result, expected)
-
-
-def test_intervaldtype_eq_string_with_attributes():
-    dtype = cudf.IntervalDtype("int64", closed="left")
-    assert dtype == "interval"
-    assert dtype == "interval[int64, left]"
-
-
-def test_reduction_return_interval_pandas_compatible():
-    ii = pd.IntervalIndex.from_tuples(
-        [("2017-01-03", "2017-01-04")], dtype="interval[datetime64[ns], right]"
-    )
-    cudf_ii = cudf.IntervalIndex.from_pandas(ii)
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = cudf_ii.min()
-    expected = ii.min()
-    assert result == expected
-
-
-def test_empty_intervaldtype():
-    # "older pandas" supported closed=None, cudf chooses not to support that
-    pd_id = pd.IntervalDtype(closed="right")
-    cudf_id = cudf.IntervalDtype()
-
-    assert str(pd_id) == str(cudf_id)
-    assert pd_id.subtype == cudf_id.subtype

From 54d9c8fc90559e870e18506418c834f36df404d6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 14 Aug 2025 17:43:04 -0700
Subject: [PATCH 131/366] Move test_cuda_array_interface/cut/dataframe_copy.py
 to new cudf classic test directories (#19599)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19599
---
 python/cudf/cudf/tests/conftest.py            |   6 +
 .../methods/test_copy.py}                     |  80 +++---
 .../tests/{ => general_functions}/test_cut.py |  96 +++----
 .../test_register_accessor.py}                |   0
 .../cudf/cudf/tests/series/test_attributes.py |   8 +
 .../cudf/tests/series/test_constructors.py    | 192 ++++++++++++++
 .../cudf/tests/test_cuda_array_interface.py   | 237 ------------------
 7 files changed, 288 insertions(+), 331 deletions(-)
 rename python/cudf/cudf/tests/{test_dataframe_copy.py => dataframe/methods/test_copy.py} (73%)
 rename python/cudf/cudf/tests/{ => general_functions}/test_cut.py (74%)
 rename python/cudf/cudf/tests/{test_custom_accessor.py => general_functions/test_register_accessor.py} (100%)
 delete mode 100644 python/cudf/cudf/tests/test_cuda_array_interface.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 217b9e6524e..0e193b57cd1 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -430,6 +430,12 @@ def all_supported_types_as_str(request):
     return request.param
 
 
+@pytest.fixture(params=[list, np.array])
+def one_dimensional_array_types(request):
+    """1D array containers commonly accepted by cuDF and pandas"""
+    return request.param
+
+
 # pandas can raise warnings for some inputs to the following ufuncs:
 numpy_ufuncs = []
 for name in dir(np):
diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/dataframe/methods/test_copy.py
similarity index 73%
rename from python/cudf/cudf/tests/test_dataframe_copy.py
rename to python/cudf/cudf/tests/dataframe/methods/test_copy.py
index 53257dc8f29..7910451db57 100644
--- a/python/cudf/cudf/tests/test_dataframe_copy.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_copy.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 from copy import copy, deepcopy
 
 import cupy as cp
@@ -8,7 +8,6 @@
 
 from cudf.core.dataframe import DataFrame
 from cudf.testing import assert_eq, assert_neq
-from cudf.testing._utils import ALL_TYPES
 
 """
 DataFrame copy expectations
@@ -22,22 +21,31 @@
 """
 
 
-@pytest.mark.parametrize(
-    "fn",
-    [
-        lambda x: x.copy(),
+@pytest.fixture(
+    params=[
+        lambda x: x.copy(deep=False),
         lambda x: x.copy(deep=True),
-        lambda x: copy(x),
-        lambda x: deepcopy(x),
+        copy,
+        deepcopy,
+    ],
+    ids=[
+        "DatFrame.copy(deep=False)",
+        "DataFrame.copy(deep=True)",
+        "copy.copy()",
+        "copy.deepcopy()",
     ],
 )
-def test_dataframe_deep_copy(fn):
+def copy_fn(request):
+    return request.param
+
+
+def test_dataframe_deep_copy(copy_fn):
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
     )
     gdf = DataFrame.from_pandas(pdf)
-    copy_pdf = fn(pdf)
-    copy_gdf = fn(gdf)
+    copy_pdf = copy_fn(pdf)
+    copy_gdf = copy_fn(gdf)
     copy_pdf["b"] = [0, 0, 0]
     copy_gdf["b"] = [0, 0, 0]
     pdf_is_equal = np.array_equal(pdf["b"].values, copy_pdf["b"].values)
@@ -48,30 +56,13 @@ def test_dataframe_deep_copy(fn):
     assert not gdf_is_equal
 
 
-"""
-DataFrame copy bounds checking - sizes 0 through 10 perform as
-expected_equality
-"""
-
-
-@pytest.mark.parametrize(
-    "copy_fn",
-    [
-        lambda x: x.copy(),
-        lambda x: x.copy(deep=True),
-        lambda x: copy(x),
-        lambda x: deepcopy(x),
-        lambda x: x.copy(deep=False),
-    ],
-)
-@pytest.mark.parametrize("ncols", [0, 1, 10])
-@pytest.mark.parametrize("data_type", ALL_TYPES)
-def test_cudf_dataframe_copy(copy_fn, ncols, data_type):
+@pytest.mark.parametrize("ncols", [0, 2])
+def test_cudf_dataframe_copy(copy_fn, ncols, all_supported_types_as_str):
     rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame(
         {
             chr(i + ord("a")): pd.Series(rng.integers(0, 1000, 20)).astype(
-                data_type
+                all_supported_types_as_str
             )
             for i in range(ncols)
         }
@@ -81,24 +72,15 @@ def test_cudf_dataframe_copy(copy_fn, ncols, data_type):
     assert_eq(df, copy_df)
 
 
-@pytest.mark.parametrize(
-    "copy_fn",
-    [
-        lambda x: x.copy(),
-        lambda x: x.copy(deep=True),
-        lambda x: copy(x),
-        lambda x: deepcopy(x),
-        lambda x: x.copy(deep=False),
-    ],
-)
-@pytest.mark.parametrize("ncols", [0, 1, 10])
-@pytest.mark.parametrize("data_type", ALL_TYPES)
-def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type):
+@pytest.mark.parametrize("ncols", [0, 2])
+def test_cudf_dataframe_copy_then_insert(
+    copy_fn, ncols, all_supported_types_as_str
+):
     rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame(
         {
             chr(i + ord("a")): pd.Series(rng.integers(0, 1000, 20)).astype(
-                data_type
+                all_supported_types_as_str
             )
             for i in range(ncols)
         }
@@ -106,8 +88,12 @@ def test_cudf_dataframe_copy_then_insert(copy_fn, ncols, data_type):
     df = DataFrame.from_pandas(pdf)
     copy_df = copy_fn(df)
     copy_pdf = copy_fn(pdf)
-    copy_df["aa"] = pd.Series(rng.integers(0, 1000, 20)).astype(data_type)
-    copy_pdf["aa"] = pd.Series(rng.integers(0, 1000, 20)).astype(data_type)
+    copy_df["aa"] = pd.Series(rng.integers(0, 1000, 20)).astype(
+        all_supported_types_as_str
+    )
+    copy_pdf["aa"] = pd.Series(rng.integers(0, 1000, 20)).astype(
+        all_supported_types_as_str
+    )
     assert not copy_pdf.to_string().split() == pdf.to_string().split()
     assert not copy_df.to_string().split() == df.to_string().split()
 
diff --git a/python/cudf/cudf/tests/test_cut.py b/python/cudf/cudf/tests/general_functions/test_cut.py
similarity index 74%
rename from python/cudf/cudf/tests/test_cut.py
rename to python/cudf/cudf/tests/general_functions/test_cut.py
index 3fc05599976..35e908ee035 100644
--- a/python/cudf/cudf/tests/test_cut.py
+++ b/python/cudf/cudf/tests/general_functions/test_cut.py
@@ -12,15 +12,29 @@
 from cudf.testing import assert_eq
 
 
-@pytest.mark.parametrize("box", [list, np.array])
+@pytest.fixture(params=[True, False])
+def right(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def include_lowest(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def ordered(request):
+    return request.param
+
+
 @pytest.mark.parametrize("bins", [1, 2, 3])
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("include_lowest", [True, False])
 @pytest.mark.parametrize("precision", [1, 3])
-def test_cut_basic(box, bins, right, include_lowest, precision):
+def test_cut_basic(
+    one_dimensional_array_types, bins, right, include_lowest, precision
+):
     # will test optional labels, retbins and duplicates separately
     # they need more specific parameters to work
-    x = box([1, 7, 5, 4, 6, 3])
+    x = one_dimensional_array_types([1, 7, 5, 4, 6, 3])
     ordered = True
     pcat = pd.cut(
         x=x,
@@ -43,9 +57,6 @@ def test_cut_basic(box, bins, right, include_lowest, precision):
     assert_eq(pindex, gindex)
 
 
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("include_lowest", [True, False])
-@pytest.mark.parametrize("ordered", [True, False])
 @pytest.mark.parametrize("precision", [1, 3])
 @pytest.mark.parametrize(
     "labels", [["bad", "medium", "good"], [1, 2, 3], False]
@@ -76,8 +87,6 @@ def test_cut_labels(right, include_lowest, ordered, precision, labels):
     assert_eq(pindex, gindex)
 
 
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("include_lowest", [True, False])
 @pytest.mark.parametrize("precision", [1, 3])
 @pytest.mark.parametrize("labels", [["bad", "good", "good"], [1, 2, 2], False])
 def test_cut_labels_non_unique(right, include_lowest, precision, labels):
@@ -110,28 +119,26 @@ def test_cut_labels_non_unique(right, include_lowest, precision, labels):
 @pytest.mark.parametrize(
     "x",
     [
-        [1, 7, 5, 4, 6, 3],
-        [1, 7],
-        np.array([1, 7, 5, 4, 6, 3]),
-        np.array([2, 4, 6, 8, 10]),
+        [1, 7, 5],
+        [2, 4, 6, 8, 10],
     ],
 )
 @pytest.mark.parametrize(
     "bins",
     [1, 2, 3, [1, 2, 3], [0, 2, 4, 6, 10]],
 )
-@pytest.mark.parametrize("right", [True, False])
-def test_cut_right(x, bins, right):
+def test_cut_right(x, one_dimensional_array_types, bins, right):
+    arg = one_dimensional_array_types(x)
     precision = 3
     pcat = pd.cut(
-        x=x,
+        x=arg,
         bins=bins,
         right=right,
         precision=precision,
     )
     pindex = pd.CategoricalIndex(pcat)
     gindex = cut(
-        x=x,
+        x=arg,
         bins=bins,
         right=right,
         precision=precision,
@@ -143,24 +150,23 @@ def test_cut_right(x, bins, right):
 @pytest.mark.parametrize(
     "x",
     [
-        [1, 7, 5, 4, 6, 3],
-        [1, 7],
-        np.array([1, 7, 5, 4, 6, 3]),
-        np.array([2, 4, 6, 8, 10]),
+        [1, 7, 5],
+        [2, 4, 6, 8, 10],
     ],
 )
 @pytest.mark.parametrize(
     "bins",
     [[0, 2, 4, 6, 10, 10], [1, 2, 2, 3, 3]],
 )
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("include_lowest", [True, False])
 @pytest.mark.parametrize("precision", [1, 3])
-def test_cut_drop_duplicates(x, bins, right, precision, include_lowest):
+def test_cut_drop_duplicates(
+    x, one_dimensional_array_types, bins, right, precision, include_lowest
+):
     ordered = True
     duplicates = "drop"
+    arg = one_dimensional_array_types(x)
     pcat = pd.cut(
-        x=x,
+        x=arg,
         bins=bins,
         right=right,
         precision=precision,
@@ -170,7 +176,7 @@ def test_cut_drop_duplicates(x, bins, right, precision, include_lowest):
     )
     pindex = pd.CategoricalIndex(pcat)
     gindex = cut(
-        x=x,
+        x=arg,
         bins=bins,
         right=right,
         precision=precision,
@@ -185,26 +191,25 @@ def test_cut_drop_duplicates(x, bins, right, precision, include_lowest):
 @pytest.mark.parametrize(
     "x",
     [
-        [1, 7, 5, 4, 6, 3],
-        [1, 7],
-        np.array([1, 7, 5, 4, 6, 3]),
-        np.array([2, 4, 6, 8, 10]),
+        [1, 7, 5],
+        [2, 4, 6, 8, 10],
     ],
 )
 @pytest.mark.parametrize(
     "bins",
     [[0, 2, 4, 6, 10, 10], [1, 2, 2, 3, 3]],
 )
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("include_lowest", [True, False])
 @pytest.mark.parametrize("precision", [1, 3])
-def test_cut_drop_duplicates_raises(x, bins, right, precision, include_lowest):
+def test_cut_drop_duplicates_raises(
+    x, one_dimensional_array_types, bins, right, precision, include_lowest
+):
+    arg = one_dimensional_array_types(x)
     ordered = True
     duplicates = "raise"
     msg = "Bin edges must be unique"
     with pytest.raises(ValueError, match=msg):
         cut(
-            x=x,
+            x=arg,
             bins=bins,
             right=right,
             precision=precision,
@@ -214,7 +219,7 @@ def test_cut_drop_duplicates_raises(x, bins, right, precision, include_lowest):
         )
     with pytest.raises(ValueError, match=msg):
         pd.cut(
-            x=x,
+            x=arg,
             bins=bins,
             right=right,
             precision=precision,
@@ -227,20 +232,19 @@ def test_cut_drop_duplicates_raises(x, bins, right, precision, include_lowest):
 @pytest.mark.parametrize(
     "x",
     [
-        [0, 0.5, 1.5, 2.5, 4.5],
-        [1, 7, 5, 4, 6, 3],
-        [1, 7],
-        np.array([1, 7, 5, 4, 6, 3]),
-        np.array([2, 4, 6, 8, 10]),
+        [1, 7, 5],
+        [2, 4, 6, 8, 10],
     ],
 )
-@pytest.mark.parametrize("right", [True, False])
 @pytest.mark.parametrize("precision", [1, 3])
 @pytest.mark.parametrize("duplicates", ["drop", "raise"])
-def test_cut_intervalindex_bin(x, right, precision, duplicates):
+def test_cut_intervalindex_bin(
+    x, one_dimensional_array_types, right, precision, duplicates
+):
+    arg = one_dimensional_array_types(x)
     bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
     pcat = pd.cut(
-        x=x,
+        x=arg,
         bins=bins,
         right=right,
         precision=precision,
@@ -248,7 +252,7 @@ def test_cut_intervalindex_bin(x, right, precision, duplicates):
     )
     pindex = pd.CategoricalIndex(pcat)
     gindex = cut(
-        x=x,
+        x=arg,
         bins=bins,
         right=right,
         precision=precision,
@@ -259,8 +263,6 @@ def test_cut_intervalindex_bin(x, right, precision, duplicates):
 
 
 @pytest.mark.parametrize("bins", [1, 3])
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("include_lowest", [True, False])
 def test_cut_series(bins, right, include_lowest):
     x = pd.Series(np.array([2, 4, 6, 8, 10]), index=["a", "b", "c", "d", "e"])
     precision = 3
diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/general_functions/test_register_accessor.py
similarity index 100%
rename from python/cudf/cudf/tests/test_custom_accessor.py
rename to python/cudf/cudf/tests/general_functions/test_register_accessor.py
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index f105992872d..b79b1010cc3 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -251,6 +251,14 @@ def test_timedelta_contains(data, timedelta_types_as_str, scalar):
     assert_eq(expected, actual)
 
 
+def test_cai_after_indexing():
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+    cai1 = df["a"].__cuda_array_interface__
+    df[["a"]]
+    cai2 = df["a"].__cuda_array_interface__
+    assert cai1 == cai2
+
+
 @pytest.mark.parametrize(
     "data, expected",
     [
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index d4588005999..342f60c1580 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -1,8 +1,10 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 import datetime
 import decimal
+import types
 
 import cupy as cp
+import numba.cuda
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -14,6 +16,7 @@
     PANDAS_GE_210,
     PANDAS_VERSION,
 )
+from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.column.column import as_column
 from cudf.errors import MixedTypeError
 from cudf.testing import assert_eq
@@ -778,6 +781,195 @@ def test_series_arrow_decimal_types_roundtrip(pa_type):
         assert_eq(pdf, gdf)
 
 
+@pytest.mark.parametrize("module", ["cupy", "numba"])
+def test_cuda_array_interface_interop_in(
+    numeric_and_temporal_types_as_str, module
+):
+    if module == "cupy":
+        module_constructor = cp.array
+        if numeric_and_temporal_types_as_str.startswith(
+            "datetime"
+        ) or numeric_and_temporal_types_as_str.startswith("timedelta"):
+            pytest.skip(
+                f"cupy doesn't support {numeric_and_temporal_types_as_str}"
+            )
+    elif module == "numba":
+        module_constructor = numba.cuda.to_device
+
+    np_data = np.arange(10).astype(numeric_and_temporal_types_as_str)
+    module_data = module_constructor(np_data)
+
+    pd_data = pd.Series(np_data)
+    # Test using a specific function for __cuda_array_interface__ here
+    cudf_data = cudf.Series(module_data)
+
+    assert_eq(pd_data, cudf_data)
+
+    gdf = cudf.DataFrame()
+    gdf["test"] = module_data
+    pd_data.name = "test"
+    assert_eq(pd_data, gdf["test"])
+
+
+@pytest.mark.parametrize("module", ["cupy", "numba"])
+def test_cuda_array_interface_interop_out(
+    numeric_and_temporal_types_as_str, module
+):
+    if module == "cupy":
+        module_constructor = cp.asarray
+
+        def to_host_function(x):
+            return cp.asnumpy(x)
+    elif module == "numba":
+        module_constructor = numba.cuda.as_cuda_array
+
+        def to_host_function(x):
+            return x.copy_to_host()
+
+    np_data = np.arange(10).astype(numeric_and_temporal_types_as_str)
+    cudf_data = cudf.Series(np_data)
+    assert isinstance(cudf_data.__cuda_array_interface__, dict)
+
+    module_data = module_constructor(cudf_data)
+    got = to_host_function(module_data)
+
+    expect = np_data
+
+    assert_eq(expect, got)
+
+
+def test_cuda_array_interface_interop_out_masked(
+    numeric_and_temporal_types_as_str,
+):
+    np_data = np.arange(10).astype("float64")
+    np_data[[0, 2, 4, 6, 8]] = np.nan
+
+    cudf_data = cudf.Series(np_data).astype(numeric_and_temporal_types_as_str)
+    cai = cudf_data.__cuda_array_interface__
+    assert isinstance(cai, dict)
+    assert "mask" in cai
+
+
+@pytest.mark.parametrize("nulls", ["all", "some", "bools", "none"])
+@pytest.mark.parametrize("mask_type", ["bits", "bools"])
+def test_cuda_array_interface_as_column(
+    numeric_and_temporal_types_as_str, nulls, mask_type
+):
+    sr = cudf.Series(np.arange(10))
+
+    if nulls == "some":
+        mask = [
+            True,
+            False,
+            True,
+            False,
+            False,
+            True,
+            True,
+            False,
+            True,
+            True,
+        ]
+        sr[sr[~np.asarray(mask)]] = None
+    elif nulls == "all":
+        sr[:] = None
+
+    sr = sr.astype(numeric_and_temporal_types_as_str)
+
+    obj = types.SimpleNamespace(
+        __cuda_array_interface__=sr.__cuda_array_interface__
+    )
+
+    if mask_type == "bools":
+        if nulls == "some":
+            obj.__cuda_array_interface__["mask"] = numba.cuda.to_device(mask)
+        elif nulls == "all":
+            obj.__cuda_array_interface__["mask"] = numba.cuda.to_device(
+                [False] * 10
+            )
+
+    expect = sr
+    got = cudf.Series(obj)
+
+    assert_eq(expect, got)
+
+
+def test_series_from_ephemeral_cupy():
+    # Test that we keep a reference to the ephemeral
+    # CuPy array. If we didn't, then `a` would end
+    # up referring to the same memory as `b` due to
+    # CuPy's caching allocator
+    a = cudf.Series(cp.asarray([1, 2, 3]))
+    b = cudf.Series(cp.asarray([1, 1, 1]))
+    assert_eq(pd.Series([1, 2, 3]), a)
+    assert_eq(pd.Series([1, 1, 1]), b)
+
+
+def test_column_from_ephemeral_cupy_try_lose_reference():
+    # Try to lose the reference we keep to the ephemeral
+    # CuPy array
+    a = cudf.Series(cp.asarray([1, 2, 3]))._column
+    a = cudf.core.column.as_column(a)
+    b = cp.asarray([1, 1, 1])
+    assert_eq(pd.Index([1, 2, 3]), a.to_pandas())
+
+    a = cudf.Series(cp.asarray([1, 2, 3]))._column
+    a.name = "b"
+    b = cp.asarray([1, 1, 1])  # noqa: F841
+    assert_eq(pd.Index([1, 2, 3]), a.to_pandas())
+
+
+@pytest.mark.xfail(
+    get_global_manager() is not None,
+    reason=(
+        "spilling doesn't support PyTorch, see "
+        "`cudf.core.buffer.spillable_buffer.DelayedPointerTuple`"
+    ),
+)
+def test_cuda_array_interface_pytorch():
+    torch = pytest.importorskip("torch", minversion="2.4.0")
+    if not torch.cuda.is_available():
+        pytest.skip("need gpu version of pytorch to be installed")
+
+    series = cudf.Series([1, -1, 10, -56])
+    tensor = torch.tensor(series)
+    got = cudf.Series(tensor)
+
+    assert_eq(got, series)
+    buffer = cudf.core.buffer.as_buffer(cp.ones(10, dtype=np.bool_))
+    tensor = torch.tensor(buffer)
+    got = cudf.Series(tensor, dtype=np.bool_)
+
+    assert_eq(got, cudf.Series(buffer, dtype=np.bool_))
+
+    index = cudf.Index([], dtype="float64")
+    tensor = torch.tensor(index)
+    got = cudf.Index(tensor)
+    assert_eq(got, index)
+
+    index = cudf.RangeIndex(start=0, stop=3)
+    tensor = torch.tensor(index)
+    got = cudf.Series(tensor)
+
+    assert_eq(got, cudf.Series(index))
+
+    index = cudf.Index([1, 2, 8, 6])
+    tensor = torch.tensor(index)
+    got = cudf.Index(tensor)
+
+    assert_eq(got, index)
+
+    str_series = cudf.Series(["a", "g"])
+
+    with pytest.raises(AttributeError):
+        str_series.__cuda_array_interface__
+
+    cat_series = str_series.astype("category")
+
+    with pytest.raises(TypeError):
+        cat_series.__cuda_array_interface__
+
+
 def test_series_arrow_struct_types_roundtrip():
     ps = pd.Series(
         [{"a": 1}, {"b": "abc"}],
diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
deleted file mode 100644
index e163f62282b..00000000000
--- a/python/cudf/cudf/tests/test_cuda_array_interface.py
+++ /dev/null
@@ -1,237 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import types
-from contextlib import nullcontext as does_not_raise
-
-import cupy
-import numba.cuda
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing import assert_eq
-from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
-@pytest.mark.parametrize("module", ["cupy", "numba"])
-def test_cuda_array_interface_interop_in(dtype, module):
-    np_data = np.arange(10).astype(dtype)
-
-    expectation = does_not_raise()
-    if module == "cupy":
-        module_constructor = cupy.array
-        if dtype in DATETIME_TYPES:
-            expectation = pytest.raises(ValueError)
-    elif module == "numba":
-        module_constructor = numba.cuda.to_device
-
-    with expectation:
-        module_data = module_constructor(np_data)
-
-        pd_data = pd.Series(np_data)
-        # Test using a specific function for __cuda_array_interface__ here
-        cudf_data = cudf.Series(module_data)
-
-        assert_eq(pd_data, cudf_data)
-
-        gdf = cudf.DataFrame()
-        gdf["test"] = module_data
-        pd_data.name = "test"
-        assert_eq(pd_data, gdf["test"])
-
-
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["str"]
-)
-@pytest.mark.parametrize("module", ["cupy", "numba"])
-def test_cuda_array_interface_interop_out(dtype, module):
-    expectation = does_not_raise()
-    if dtype == "str":
-        expectation = pytest.raises(AttributeError)
-    if module == "cupy":
-        module_constructor = cupy.asarray
-
-        def to_host_function(x):
-            return cupy.asnumpy(x)
-
-    elif module == "numba":
-        module_constructor = numba.cuda.as_cuda_array
-
-        def to_host_function(x):
-            return x.copy_to_host()
-
-    with expectation:
-        np_data = np.arange(10).astype(dtype)
-        cudf_data = cudf.Series(np_data)
-        assert isinstance(cudf_data.__cuda_array_interface__, dict)
-
-        module_data = module_constructor(cudf_data)
-        got = to_host_function(module_data)
-
-        expect = np_data
-
-        assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES
-)
-@pytest.mark.parametrize("module", ["cupy", "numba"])
-def test_cuda_array_interface_interop_out_masked(dtype, module):
-    expectation = does_not_raise()
-    if module == "cupy":
-        pytest.skip(
-            "cupy doesn't support version 1 of `__cuda_array_interface__` yet"
-        )
-        module_constructor = cupy.asarray
-
-        def to_host_function(x):
-            return cupy.asnumpy(x)
-
-    elif module == "numba":
-        expectation = pytest.raises(NotImplementedError)
-        module_constructor = numba.cuda.as_cuda_array
-
-        def to_host_function(x):
-            return x.copy_to_host()
-
-    np_data = np.arange(10).astype("float64")
-    np_data[[0, 2, 4, 6, 8]] = np.nan
-
-    with expectation:
-        cudf_data = cudf.Series(np_data).astype(dtype)
-        assert isinstance(cudf_data.__cuda_array_interface__, dict)
-
-        module_data = module_constructor(cudf_data)  # noqa: F841
-
-
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES
-)
-@pytest.mark.parametrize("nulls", ["all", "some", "bools", "none"])
-@pytest.mark.parametrize("mask_type", ["bits", "bools"])
-def test_cuda_array_interface_as_column(dtype, nulls, mask_type):
-    sr = cudf.Series(np.arange(10))
-
-    if nulls == "some":
-        mask = [
-            True,
-            False,
-            True,
-            False,
-            False,
-            True,
-            True,
-            False,
-            True,
-            True,
-        ]
-        sr[sr[~np.asarray(mask)]] = None
-    elif nulls == "all":
-        sr[:] = None
-
-    sr = sr.astype(dtype)
-
-    obj = types.SimpleNamespace(
-        __cuda_array_interface__=sr.__cuda_array_interface__
-    )
-
-    if mask_type == "bools":
-        if nulls == "some":
-            obj.__cuda_array_interface__["mask"] = numba.cuda.to_device(mask)
-        elif nulls == "all":
-            obj.__cuda_array_interface__["mask"] = numba.cuda.to_device(
-                [False] * 10
-            )
-
-    expect = sr
-    got = cudf.Series(obj)
-
-    assert_eq(expect, got)
-
-
-def test_column_from_ephemeral_cupy():
-    # Test that we keep a reference to the ephemeral
-    # CuPy array. If we didn't, then `a` would end
-    # up referring to the same memory as `b` due to
-    # CuPy's caching allocator
-    a = cudf.Series(cupy.asarray([1, 2, 3]))
-    b = cudf.Series(cupy.asarray([1, 1, 1]))
-    assert_eq(pd.Series([1, 2, 3]), a)
-    assert_eq(pd.Series([1, 1, 1]), b)
-
-
-def test_column_from_ephemeral_cupy_try_lose_reference():
-    # Try to lose the reference we keep to the ephemeral
-    # CuPy array
-    a = cudf.Series(cupy.asarray([1, 2, 3]))._column
-    a = cudf.core.column.as_column(a)
-    b = cupy.asarray([1, 1, 1])
-    assert_eq(pd.Index([1, 2, 3]), a.to_pandas())
-
-    a = cudf.Series(cupy.asarray([1, 2, 3]))._column
-    a.name = "b"
-    b = cupy.asarray([1, 1, 1])  # noqa: F841
-    assert_eq(pd.Index([1, 2, 3]), a.to_pandas())
-
-
-@pytest.mark.xfail(
-    get_global_manager() is not None,
-    reason=(
-        "spilling doesn't support PyTorch, see "
-        "`cudf.core.buffer.spillable_buffer.DelayedPointerTuple`"
-    ),
-)
-def test_cuda_array_interface_pytorch():
-    torch = pytest.importorskip("torch", minversion="2.4.0")
-    if not torch.cuda.is_available():
-        pytest.skip("need gpu version of pytorch to be installed")
-
-    series = cudf.Series([1, -1, 10, -56])
-    tensor = torch.tensor(series)
-    got = cudf.Series(tensor)
-
-    assert_eq(got, series)
-    buffer = cudf.core.buffer.as_buffer(cupy.ones(10, dtype=np.bool_))
-    tensor = torch.tensor(buffer)
-    got = cudf.Series(tensor, dtype=np.bool_)
-
-    assert_eq(got, cudf.Series(buffer, dtype=np.bool_))
-
-    index = cudf.Index([], dtype="float64")
-    tensor = torch.tensor(index)
-    got = cudf.Index(tensor)
-    assert_eq(got, index)
-
-    index = cudf.core.index.RangeIndex(start=0, stop=100)
-    tensor = torch.tensor(index)
-    got = cudf.Series(tensor)
-
-    assert_eq(got, cudf.Series(index))
-
-    index = cudf.Index([1, 2, 8, 6])
-    tensor = torch.tensor(index)
-    got = cudf.Index(tensor)
-
-    assert_eq(got, index)
-
-    str_series = cudf.Series(["a", "g"])
-
-    with pytest.raises(AttributeError):
-        str_series.__cuda_array_interface__
-
-    cat_series = str_series.astype("category")
-
-    with pytest.raises(TypeError):
-        cat_series.__cuda_array_interface__
-
-
-def test_cai_after_indexing():
-    df = cudf.DataFrame({"a": [1, 2, 3]})
-    cai1 = df["a"].__cuda_array_interface__
-    df[["a"]]
-    cai2 = df["a"].__cuda_array_interface__
-    assert cai1 == cai2

From 42b1e19db324c4b11ae4628c0798f6bc606fe036 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 14 Aug 2025 22:45:01 -0400
Subject: [PATCH 132/366] Match polars semantics for rolling-sum with all-null
 windows (non-empty) (#19680)

Closes https://github.com/rapidsai/cudf/issues/19679. Also unblocks #19242

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19680
---
 .../cudf_polars/cudf_polars/dsl/translate.py  |  6 +-
 .../cudf_polars/dsl/utils/aggregations.py     | 80 +++++++++++++++----
 .../cudf_polars/dsl/utils/groupby.py          |  7 +-
 .../cudf_polars/dsl/utils/rolling.py          |  8 +-
 .../tests/expressions/test_rolling.py         | 14 ++++
 python/cudf_polars/tests/test_groupby.py      | 12 +++
 python/cudf_polars/tests/test_rolling.py      | 14 ++++
 7 files changed, 124 insertions(+), 17 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 5c5537be43e..aa82883a51d 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -21,6 +21,7 @@
 
 from cudf_polars.containers import DataType
 from cudf_polars.dsl import expr, ir
+from cudf_polars.dsl.expressions.base import ExecutionContext
 from cudf_polars.dsl.to_ast import insert_colrefs
 from cudf_polars.dsl.utils.aggregations import decompose_single_agg
 from cudf_polars.dsl.utils.groupby import rewrite_groupby
@@ -691,7 +692,10 @@ def _(
         agg = translator.translate_expr(n=node.function, schema=schema)
         name_generator = unique_names(schema)
         aggs, named_post_agg = decompose_single_agg(
-            expr.NamedExpr(next(name_generator), agg), name_generator, is_top=True
+            expr.NamedExpr(next(name_generator), agg),
+            name_generator,
+            is_top=True,
+            context=ExecutionContext.ROLLING,
         )
         named_aggs = [agg for agg, _ in aggs]
         orderby = node.options.index_column
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index a8280c4c3bc..e1ed7a0d3a2 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -15,6 +15,7 @@
 
 from cudf_polars.containers import DataType
 from cudf_polars.dsl import expr, ir
+from cudf_polars.dsl.expressions.base import ExecutionContext
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Generator, Iterable, Sequence
@@ -53,6 +54,7 @@ def decompose_single_agg(
     name_generator: Generator[str, None, None],
     *,
     is_top: bool,
+    context: ExecutionContext,
 ) -> tuple[list[tuple[expr.NamedExpr, bool]], expr.NamedExpr]:
     """
     Decompose a single named aggregation.
@@ -65,6 +67,8 @@ def decompose_single_agg(
         Generator of unique names for temporaries introduced during decomposition.
     is_top
         Is this the top of an aggregation expression?
+    context
+        ExecutionContext in which the aggregation will run.
 
     Returns
     -------
@@ -94,7 +98,10 @@ def decompose_single_agg(
         # Special case to fill nulls with zeros for empty group length calculations
         (child,) = agg.children
         child_agg, post = decompose_single_agg(
-            expr.NamedExpr(next(name_generator), child), name_generator, is_top=True
+            expr.NamedExpr(next(name_generator), child),
+            name_generator,
+            is_top=True,
+            context=context,
         )
         return child_agg, named_expr.reconstruct(
             replace_nulls(
@@ -121,7 +128,10 @@ def decompose_single_agg(
             # pl.col("a").nan_max or nan_min
             raise NotImplementedError("Nan propagation in groupby for min/max")
         aggs, _ = decompose_single_agg(
-            expr.NamedExpr(next(name_generator), child), name_generator, is_top=False
+            expr.NamedExpr(next(name_generator), child),
+            name_generator,
+            is_top=False,
+            context=context,
         )
         if any(has_agg for _, has_agg in aggs):
             raise NotImplementedError("Nested aggs in groupby not supported")
@@ -143,14 +153,43 @@ def decompose_single_agg(
                 )
                 else expr.Col(agg.dtype, name)
             )
-            return [(named_expr, True)], expr.NamedExpr(
-                name,
-                # In polars sum(empty_group) => 0, but in libcudf
-                # sum(empty_group) => null So must post-process by
-                # replacing nulls, but only if we're a "top-level"
-                # agg.
-                replace_nulls(col, 0, is_top=is_top),
-            )
+            # Polars semantics for sum differ by context:
+            # - GROUPBY: sum(all-null group) => 0; sum(empty group) => 0  (fill-null)
+            # - ROLLING: sum(all-null window) => null; sum(empty window) => 0 (fill only if empty)
+            #
+            # Must post-process because libcudf returns null for both empty and all-null windows/groups
+            if context == ExecutionContext.GROUPBY:
+                # GROUPBY: always fill top-level nulls with 0
+                return [(named_expr, True)], expr.NamedExpr(
+                    name, replace_nulls(col, 0, is_top=is_top)
+                )
+            else:
+                # ROLLING:
+                # Add a second rolling agg to compute the window size, then only
+                # replace nulls with 0 when the window size is 0 (ie. empty window).
+                win_len_name = next(name_generator)
+                win_len = expr.NamedExpr(
+                    win_len_name,
+                    expr.Len(DataType(pl.Int32())),
+                )
+
+                win_len_col = expr.Col(DataType(pl.Int32()), win_len_name)
+                win_len_filled = replace_nulls(win_len_col, 0, is_top=True)
+
+                is_empty = expr.BinOp(
+                    DataType(pl.Boolean()),
+                    plc.binaryop.BinaryOperator.EQUAL,
+                    win_len_filled,
+                    expr.Literal(DataType(pl.Int32()), 0),
+                )
+
+                # If empty -> fill 0; else keep libcudf's semantics for all-null windows.
+                filled = replace_nulls(col, 0, is_top=is_top)
+                post_ternary_expr = expr.Ternary(agg.dtype, is_empty, filled, col)
+
+                return [(named_expr, True), (win_len, True)], expr.NamedExpr(
+                    name, post_ternary_expr
+                )
         elif agg.name == "mean":
             post_agg_col: expr.Expr = expr.Col(
                 DataType(pl.Float64), name
@@ -174,6 +213,7 @@ def decompose_single_agg(
             (expr.NamedExpr(next(name_generator), child) for child in agg.children),
             name_generator,
             is_top=False,
+            context=context,
         )
         if any(has_agg for _, has_agg in aggs):
             if not all(
@@ -202,16 +242,23 @@ def _decompose_aggs(
     name_generator: Generator[str, None, None],
     *,
     is_top: bool,
+    context: ExecutionContext,
 ) -> tuple[list[tuple[expr.NamedExpr, bool]], Sequence[expr.NamedExpr]]:
     new_aggs, post = zip(
-        *(decompose_single_agg(agg, name_generator, is_top=is_top) for agg in aggs),
+        *(
+            decompose_single_agg(agg, name_generator, is_top=is_top, context=context)
+            for agg in aggs
+        ),
         strict=True,
     )
     return list(itertools.chain.from_iterable(new_aggs)), post
 
 
 def decompose_aggs(
-    aggs: Iterable[expr.NamedExpr], name_generator: Generator[str, None, None]
+    aggs: Iterable[expr.NamedExpr],
+    name_generator: Generator[str, None, None],
+    *,
+    context: ExecutionContext,
 ) -> tuple[list[expr.NamedExpr], Sequence[expr.NamedExpr]]:
     """
     Process arbitrary aggregations into a form we can handle in grouped aggregations.
@@ -222,6 +269,8 @@ def decompose_aggs(
         List of aggregation expressions
     name_generator
         Generator of unique names for temporaries introduced during decomposition.
+    context
+        ExecutionContext in which the aggregation will run.
 
     Returns
     -------
@@ -241,7 +290,7 @@ def decompose_aggs(
     NotImplementedError
         For unsupported aggregation combinations.
     """
-    new_aggs, post = _decompose_aggs(aggs, name_generator, is_top=True)
+    new_aggs, post = _decompose_aggs(aggs, name_generator, is_top=True, context=context)
     return [agg for agg, _ in new_aggs], post
 
 
@@ -250,6 +299,7 @@ def apply_pre_evaluation(
     keys: Sequence[expr.NamedExpr],
     original_aggs: Sequence[expr.NamedExpr],
     name_generator: Generator[str, None, None],
+    context: ExecutionContext,
     *extra_columns: expr.NamedExpr,
 ) -> tuple[Sequence[expr.NamedExpr], Schema, Callable[[ir.IR], ir.IR]]:
     """
@@ -265,6 +315,8 @@ def apply_pre_evaluation(
         Aggregation expressions to rewrite.
     name_generator
         Generator of unique names for temporaries introduced during decomposition.
+    context
+        ExecutionContext in which the aggregation will run.
     extra_columns
         Any additional columns to be included in the output (only
         relevant for rolling aggregations). Columns will appear in the
@@ -285,7 +337,7 @@ def apply_pre_evaluation(
     NotImplementedError
         If the aggregations are somehow unsupported.
     """
-    aggs, post = decompose_aggs(original_aggs, name_generator)
+    aggs, post = decompose_aggs(original_aggs, name_generator, context=context)
     assert len(post) == len(original_aggs), (
         f"Unexpected number of post-aggs {len(post)=} {len(original_aggs)=}"
     )
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/groupby.py b/python/cudf_polars/cudf_polars/dsl/utils/groupby.py
index 7116bcab022..027af9c08be 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/groupby.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/groupby.py
@@ -10,6 +10,7 @@
 import pylibcudf as plc
 
 from cudf_polars.dsl import ir
+from cudf_polars.dsl.expressions.base import ExecutionContext
 from cudf_polars.dsl.utils.aggregations import apply_pre_evaluation
 from cudf_polars.dsl.utils.naming import unique_names
 
@@ -78,7 +79,11 @@ def rewrite_groupby(
         )
 
     aggs, group_schema, apply_post_evaluation = apply_pre_evaluation(
-        schema, keys, aggs, unique_names(schema.keys())
+        schema,
+        keys,
+        aggs,
+        unique_names(schema.keys()),
+        ExecutionContext.GROUPBY,
     )
     # TODO: use Distinct when the partitioned executor supports it if
     # the requested aggregations are empty
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/rolling.py b/python/cudf_polars/cudf_polars/dsl/utils/rolling.py
index d4526537824..526c5823f31 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/rolling.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/rolling.py
@@ -10,6 +10,7 @@
 import pylibcudf as plc
 
 from cudf_polars.dsl import expr, ir
+from cudf_polars.dsl.expressions.base import ExecutionContext
 from cudf_polars.dsl.utils.aggregations import apply_pre_evaluation
 from cudf_polars.dsl.utils.naming import unique_names
 from cudf_polars.dsl.utils.windows import offsets_to_windows
@@ -81,7 +82,12 @@ def rewrite_rolling(
     temp_prefix = "_" * max(map(len, schema))
     if len(aggs) > 0:
         aggs, rolling_schema, apply_post_evaluation = apply_pre_evaluation(
-            schema, keys, aggs, unique_names(temp_prefix), index
+            schema,
+            keys,
+            aggs,
+            unique_names(temp_prefix),
+            ExecutionContext.ROLLING,
+            index,
         )
     else:
         rolling_schema = schema
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
index 68b1ada463c..0253f14c416 100644
--- a/python/cudf_polars/tests/expressions/test_rolling.py
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -142,3 +142,17 @@ def test_rolling_inside_groupby_raises():
         q.collect(engine="in-memory")
 
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_rolling_sum_all_null_window_returns_null():
+    df = pl.LazyFrame(
+        {
+            "orderby": [1, 2, 3, 4, 5, 6],
+            "null_windows": [None, None, 5, None, None, 1],
+        }
+    )
+    q = df.select(
+        out=pl.col("null_windows").sum().rolling("orderby", period="2i", closed="both")
+    )
+    # Expected: [null, null, 5, 5, 5, 1]
+    assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index f60a46b52d6..b47cc2962db 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -311,3 +311,15 @@ def test_groupby_mean_type_promotion(df: pl.LazyFrame) -> None:
     q = df.group_by("key1").agg(pl.col("float").mean())
 
     assert_gpu_result_equal(q, check_row_order=False)
+
+
+def test_groupby_sum_all_null_group_returns_null():
+    df = pl.LazyFrame(
+        {
+            "key": ["a", "a", "b", "b", "c"],
+            "null_groups": [None, None, None, 2, None],
+        }
+    )
+
+    q = df.group_by("key").agg(out=pl.col("null_groups").sum())
+    assert_gpu_result_equal(q, check_row_order=False)
diff --git a/python/cudf_polars/tests/test_rolling.py b/python/cudf_polars/tests/test_rolling.py
index 23862cb9adb..dd473078fbd 100644
--- a/python/cudf_polars/tests/test_rolling.py
+++ b/python/cudf_polars/tests/test_rolling.py
@@ -202,3 +202,17 @@ def test_unsupported_agg():
         .agg(pl.col("values").n_unique())
     )
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_rolling_sum_all_null_window_returns_null():
+    df = pl.LazyFrame(
+        {
+            "orderby": [1, 2, 3, 4, 5, 6],
+            "null_windows": [None, None, 5, None, None, 1],
+        }
+    )
+    q = df.rolling("orderby", period="2i", closed="both").agg(
+        out=pl.col("null_windows").sum()
+    )
+    # Expected: [0, 0, 5, 5, 5, 1]
+    assert_gpu_result_equal(q)

From 4a309b436685c605404fd82983673c1e94b7cd8c Mon Sep 17 00:00:00 2001
From: Chong Gao <gaochong.gc@qq.com>
Date: Fri, 15 Aug 2025 11:22:56 +0800
Subject: [PATCH 133/366] Add Java JNI interface to get Gpu UUID (#19646)

Contributes to https://github.com/NVIDIA/spark-rapids/issues/12982
Different GPU should generate different UUID sequences.
I plan to use GPU card UUID to participate in the calculation of initial seed, so post this PR to expose the Gpu UUID.

The seed will be calculated on Java side, the following informations will parqicipate in the intial seed:
- Gpu card UUID
- Current timestamp
- Java process ID
- Java process start time
- an static incremental sequence ID on Java side.
......
Maybe more to make sure no same UUID sequences are generated.

Authors:
  - Chong Gao (https://github.com/res-life)

Approvers:
  - Liangcai Li (https://github.com/firestarman)

URL: https://github.com/rapidsai/cudf/pull/19646
---
 java/src/main/java/ai/rapids/cudf/Cuda.java    | 18 ++++++++++++++++++
 java/src/main/native/src/CudaJni.cpp           | 14 ++++++++++++++
 .../src/test/java/ai/rapids/cudf/CudaTest.java | 10 ++++++++++
 3 files changed, 42 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/Cuda.java b/java/src/main/java/ai/rapids/cudf/Cuda.java
index 7cc3d30a9cf..36559f5aa0e 100755
--- a/java/src/main/java/ai/rapids/cudf/Cuda.java
+++ b/java/src/main/java/ai/rapids/cudf/Cuda.java
@@ -270,6 +270,16 @@ public static CudaComputeMode getComputeMode() {
     return CudaComputeMode.fromNative(Cuda.getNativeComputeMode());
   }
 
+  /**
+   * Gets the GPU UUID of the current device.
+   *
+   * @return UUID of the current device as a byte array.
+   */
+  public static byte[] getGpuUuid() {
+    return getNativeGpuUuid();
+  }
+
+
   /**
    * Mapping: cudaMemGetInfo(size_t *free, size_t *total)
    */
@@ -400,6 +410,14 @@ static void asyncMemcpy(long dst, long src, long count, CudaMemcpyKind kind) {
    */
   static native int getNativeComputeMode() throws CudaException;
 
+  /**
+   * Gets the Gpu UUID of the current device.
+   *
+   * @return UUID of the current device as a byte array.
+   * @throws CudaException on any error
+   */
+  static native byte[] getNativeGpuUuid() throws CudaException;
+
   /**
    * Gets the major CUDA compute capability of the current device.
    *
diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index c5359c821ae..8b212d671cd 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -216,6 +216,20 @@ JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getNativeComputeMode(JNIEnv* env
   CATCH_STD(env, -2);
 }
 
+JNIEXPORT jbyteArray JNICALL Java_ai_rapids_cudf_Cuda_getNativeGpuUuid(JNIEnv* env, jclass)
+{
+  try {
+    cudf::jni::auto_set_device(env);
+    int device;
+    CUDF_CUDA_TRY(cudaGetDevice(&device));
+    cudaDeviceProp device_prop;
+    CUDF_CUDA_TRY(cudaGetDeviceProperties(&device_prop, device));
+    cudf::jni::native_jbyteArray jbytes{env, (jbyte*)device_prop.uuid.bytes, 16};
+    return jbytes.get_jArray();
+  }
+  CATCH_STD(env, nullptr);
+}
+
 JNIEXPORT jint JNICALL Java_ai_rapids_cudf_Cuda_getComputeCapabilityMajor(JNIEnv* env, jclass)
 {
   try {
diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java
index a741b0a5e31..4e55bb8f903 100644
--- a/java/src/test/java/ai/rapids/cudf/CudaTest.java
+++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java
@@ -20,6 +20,7 @@
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 public class CudaTest {
@@ -32,6 +33,15 @@ public void testGetCudaRuntimeInfo() {
     assert Cuda.getDriverVersion() >= 1000;
     assert Cuda.getRuntimeVersion() >= 1000;
     assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId);
+
+    // test UUID
+    byte[] uuid = Cuda.getGpuUuid();
+    assertEquals(uuid.length, 16);
+    long v = 0;
+    for (int i = 0; i < uuid.length; i++) {
+      v += uuid[i];
+    }
+    assertNotEquals(0, v);
   }
 
   @Tag("noSanitizer")

From b39b1f21cf69a17c68ada6d90be04ac41818a236 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Aug 2025 10:02:44 -0700
Subject: [PATCH 134/366] Move test_factorize/drop_duplicates.py to new cudf
 classic test directory (#19681)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19681
---
 .../methods/test_drop_duplicates.py}          | 130 ++++---------
 .../tests/general_functions/test_factorize.py |  59 ++++++
 .../indexes/index/methods/test_factorize.py   |  42 ++++
 .../series/methods/test_drop_duplicates.py    |  33 ++++
 .../tests/series/methods/test_factorize.py    |  86 +++++++++
 python/cudf/cudf/tests/test_factorize.py      | 179 ------------------
 6 files changed, 261 insertions(+), 268 deletions(-)
 rename python/cudf/cudf/tests/{test_duplicates.py => dataframe/methods/test_drop_duplicates.py} (86%)
 create mode 100644 python/cudf/cudf/tests/general_functions/test_factorize.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_factorize.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_drop_duplicates.py
 delete mode 100644 python/cudf/cudf/tests/test_factorize.py

diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/dataframe/methods/test_drop_duplicates.py
similarity index 86%
rename from python/cudf/cudf/tests/test_duplicates.py
rename to python/cudf/cudf/tests/dataframe/methods/test_drop_duplicates.py
index ce7e80e0dba..dd6479ab949 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_drop_duplicates.py
@@ -1,7 +1,5 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
-import itertools
-import random
 
 import numpy as np
 import pandas as pd
@@ -12,8 +10,6 @@
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal
 
-# most tests are similar to pandas drop_duplicates
-
 
 @pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]])
 def test_duplicated_with_misspelled_column_name(subset):
@@ -28,29 +24,19 @@ def test_duplicated_with_misspelled_column_name(subset):
     )
 
 
-@pytest.mark.parametrize("keep", ["first", "last", False])
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 4, 5, 6, 6],
-        [],
-        ["a", "b", "s", "sd", "a", "b"],
-        pd.Series(["aaa"] * 10, dtype="object"),
-    ],
-)
-def test_drop_duplicates_series(data, keep, ignore_index):
-    pds = pd.Series(data)
-    gds = cudf.from_pandas(pds)
-
-    assert_eq(
-        pds.drop_duplicates(keep=keep, ignore_index=ignore_index),
-        gds.drop_duplicates(keep=keep, ignore_index=ignore_index),
+@pytest.mark.xfail(reason="cudf does not support duplicate column names yet")
+def test_drop_duplicates_with_duplicate_column_names():
+    df = pd.DataFrame(
+        [[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]
     )
+    df = cudf.DataFrame.from_pandas(df)
 
-    pds.drop_duplicates(keep=keep, inplace=True, ignore_index=ignore_index)
-    gds.drop_duplicates(keep=keep, inplace=True, ignore_index=ignore_index)
-    assert_eq(pds, gds)
+    result0 = df.drop_duplicates()
+    assert_eq(result0, df)
+
+    result1 = df.drop_duplicates("a")
+    expected1 = df[:2]
+    assert_eq(result1, expected1)
 
 
 def test_drop_duplicates():
@@ -127,20 +113,28 @@ def test_drop_duplicates():
     expected = pdf.drop_duplicates("E", keep="last")
     assert_eq(result, expected)
 
+
+def test_drop_duplicates_integers():
     pdf = pd.DataFrame(
         {"x": [7, 6, 3, 3, 4, 8, 0], "y": [0, 6, 5, 5, 9, 1, 2]}
     )
     gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
+
+def test_drop_duplicates_integers_positive():
     pdf = pd.DataFrame([[1, 0], [0, 2]])
     gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
+
+def test_drop_duplicates_integers_negative():
     pdf = pd.DataFrame([[-2, 0], [0, -4]])
     gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
+
+def test_drop_duplicates_integers_max():
     x = np.iinfo(np.int64).max / 3 * 2
     pdf = pd.DataFrame([[-x, x], [0, x + 4]])
     gdf = cudf.DataFrame.from_pandas(pdf)
@@ -150,28 +144,17 @@ def test_drop_duplicates():
     gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
+
+def test_drop_duplicates_integers_unique():
     pdf = pd.DataFrame([i] * 9 for i in range(16))
     pdf = pd.concat([pdf, pd.DataFrame([[1] + [0] * 8])], ignore_index=True)
     gdf = cudf.DataFrame.from_pandas(pdf)
     assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
 
-@pytest.mark.xfail(reason="cudf does not support duplicate column names yet")
-def test_drop_duplicates_with_duplicate_column_names():
-    df = pd.DataFrame(
-        [[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]
-    )
-    df = cudf.DataFrame.from_pandas(df)
-
-    result0 = df.drop_duplicates()
-    assert_eq(result0, df)
-
-    result1 = df.drop_duplicates("a")
-    expected1 = df[:2]
-    assert_eq(result1, expected1)
-
-
-def test_drop_duplicates_for_take_all():
+@pytest.mark.parametrize("subset", ["AAA", ["AAA", "B"]])
+@pytest.mark.parametrize("keep", ["first", "last", False])
+def test_drop_duplicates_for_take_all(subset, keep):
     pdf = pd.DataFrame(
         {
             "AAA": ["foo", "bar", "baz", "bar", "foo", "bar", "qux", "foo"],
@@ -181,30 +164,8 @@ def test_drop_duplicates_for_take_all():
         }
     )
     gdf = cudf.DataFrame.from_pandas(pdf)
-    # single column
-    result = gdf.drop_duplicates("AAA")
-    expected = pdf.drop_duplicates("AAA")
-    assert_eq(result, expected)
-
-    result = gdf.drop_duplicates("AAA", keep="last")
-    expected = pdf.drop_duplicates("AAA", keep="last")
-    assert_eq(result, expected)
-
-    result = gdf.drop_duplicates("AAA", keep=False)
-    expected = pdf.drop_duplicates("AAA", keep=False)
-    assert_eq(result, expected)
-
-    # multiple columns
-    result = gdf.drop_duplicates(["AAA", "B"])
-    expected = pdf.drop_duplicates(["AAA", "B"])
-    assert_eq(result, expected)
-
-    result = gdf.drop_duplicates(["AAA", "B"], keep="last")
-    expected = pdf.drop_duplicates(["AAA", "B"], keep="last")
-    assert_eq(result, expected)
-
-    result = gdf.drop_duplicates(["AAA", "B"], keep=False)
-    expected = pdf.drop_duplicates(["AAA", "B"], keep=False)
+    result = gdf.drop_duplicates(subset, keep=keep)
+    expected = pdf.drop_duplicates(subset, keep=keep)
     assert_eq(result, expected)
 
 
@@ -268,21 +229,15 @@ def test_drop_duplicates_empty(df):
 
 
 def test_dataframe_drop_duplicates_numeric_method():
-    num_columns = 3
-    comb = list(itertools.permutations(range(num_columns), num_columns))
-    shuf = list(comb)
-    random.Random(num_columns).shuffle(shuf)
-
-    def get_pdf(n_dup):
-        # create dataframe with n_dup duplicate rows
-        rows = comb + shuf[:n_dup]
-        random.Random(n_dup).shuffle(rows)
-        return pd.DataFrame(rows)
-
-    for i in range(5):
-        pdf = get_pdf(i)
-        gdf = cudf.DataFrame.from_pandas(pdf)
-        assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
+    pdf = pd.DataFrame(
+        {
+            "A": [1, 1, 1, 4, 5],
+            "B": [1, 1, 1, 4, 5],
+            "C": [1, 1, 1, 4, 5],
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    assert_eq(gdf.drop_duplicates(), pdf.drop_duplicates())
 
     # subset columns, single columns
     assert_eq(
@@ -299,16 +254,13 @@ def get_pdf(n_dup):
     )
 
     # subset columns shuffled
-    cols = list(pdf.columns)
-    random.Random(3).shuffle(cols)
+    cols = ["B", "C", "A"]
     assert_eq(gdf.drop_duplicates(cols), pdf.drop_duplicates(cols))
-    random.Random(3).shuffle(cols)
     assert_eq(gdf.drop_duplicates(cols[:-1]), pdf.drop_duplicates(cols[:-1]))
-    random.Random(3).shuffle(cols)
     assert_eq(gdf.drop_duplicates(cols[-1]), pdf.drop_duplicates(cols[-1]))
     assert_eq(
-        gdf.drop_duplicates(cols, keep="last"),
-        pdf.drop_duplicates(cols, keep="last"),
+        gdf.drop_duplicates(pdf.columns, keep="last"),
+        pdf.drop_duplicates(pdf.columns, keep="last"),
     )
 
 
@@ -597,12 +549,12 @@ def test_drop_duplicates_multi_index():
 
     expected = pdf.drop_duplicates()
     result = gdf.drop_duplicates()
-    assert_eq(result.to_pandas(), expected)
+    assert_eq(result, expected)
     # FIXME: to_pandas needed until sort_index support for MultiIndex
 
     for col in gdf.columns:
         assert_eq(
-            gdf[col].drop_duplicates().to_pandas(),
+            gdf[col].drop_duplicates(),
             pdf[col].drop_duplicates(),
         )
 
diff --git a/python/cudf/cudf/tests/general_functions/test_factorize.py b/python/cudf/cudf/tests/general_functions/test_factorize.py
new file mode 100644
index 00000000000..4f1df7a359a
--- /dev/null
+++ b/python/cudf/cudf/tests/general_functions/test_factorize.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_cudf_factorize_array():
+    data = [1, 2, 3, 4, 5]
+
+    parr = np.array(data)
+    garr = cp.array(data)
+
+    expect = pd.factorize(parr)
+    got = cudf.factorize(garr)
+
+    assert len(expect) == len(got)
+
+    np.testing.assert_array_equal(expect[0], got[0].get())
+    np.testing.assert_array_equal(expect[1], got[1].get())
+
+
+@pytest.mark.parametrize("pandas_compatibility", [True, False])
+def test_factorize_code_pandas_compatibility(pandas_compatibility):
+    psr = pd.Series([1, 2, 3, 4, 5])
+    gsr = cudf.from_pandas(psr)
+
+    expect = pd.factorize(psr)
+    with cudf.option_context("mode.pandas_compatible", pandas_compatibility):
+        got = cudf.factorize(gsr)
+    assert_eq(got[0], expect[0])
+    assert_eq(got[1], expect[1])
+    if pandas_compatibility:
+        assert got[0].dtype == expect[0].dtype
+    else:
+        assert got[0].dtype == cudf.dtype("int8")
+
+
+def test_factorize_result_classes():
+    data = [1, 2, 3]
+
+    labels, cats = cudf.factorize(cudf.Series(data))
+
+    assert isinstance(labels, cp.ndarray)
+    assert isinstance(cats, cudf.Index)
+
+    labels, cats = cudf.factorize(cudf.Index(data))
+
+    assert isinstance(labels, cp.ndarray)
+    assert isinstance(cats, cudf.Index)
+
+    labels, cats = cudf.factorize(cp.array(data))
+
+    assert isinstance(labels, cp.ndarray)
+    assert isinstance(cats, cp.ndarray)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_factorize.py b/python/cudf/cudf/tests/indexes/index/methods/test_factorize.py
new file mode 100644
index 00000000000..1379d0ec9d3
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_factorize.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf import Index
+
+
+def test_factorize_index_obj():
+    rng = np.random.default_rng(seed=0)
+
+    arr = rng.integers(2, size=10, dtype=np.int32)
+    ser = cudf.Index(arr)
+
+    uvals, labels = ser.factorize()
+    unique_values, indices = np.unique(arr, return_index=True)
+    expected_values = unique_values[np.argsort(indices)]
+
+    np.testing.assert_array_equal(labels.values.get(), expected_values)
+    assert isinstance(uvals, cp.ndarray)
+    assert isinstance(labels, Index)
+
+    encoder = {labels[idx]: idx for idx in range(len(labels))}
+    handcoded = [encoder[v] for v in arr]
+    np.testing.assert_array_equal(uvals.get(), handcoded)
+
+
+def test_cudf_factorize_index():
+    data = [1, 2, 3, 4, 5]
+
+    pi = pd.Index(data)
+    gi = cudf.Index(data)
+
+    expect = pd.factorize(pi)
+    got = cudf.factorize(gi)
+
+    assert len(expect) == len(got)
+
+    np.testing.assert_array_equal(expect[0], got[0].get())
+    np.testing.assert_array_equal(expect[1], got[1].values.get())
diff --git a/python/cudf/cudf/tests/series/methods/test_drop_duplicates.py b/python/cudf/cudf/tests/series/methods/test_drop_duplicates.py
new file mode 100644
index 00000000000..68f40879c66
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_drop_duplicates.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 4, 5, 6, 6],
+        [],
+        ["a", "b", "s", "sd", "a", "b"],
+        pd.Series(["aaa"] * 10, dtype="object"),
+    ],
+)
+def test_drop_duplicates_series(data, keep, ignore_index):
+    pds = pd.Series(data)
+    gds = cudf.from_pandas(pds)
+
+    assert_eq(
+        pds.drop_duplicates(keep=keep, ignore_index=ignore_index),
+        gds.drop_duplicates(keep=keep, ignore_index=ignore_index),
+    )
+
+    pds.drop_duplicates(keep=keep, inplace=True, ignore_index=ignore_index)
+    gds.drop_duplicates(keep=keep, inplace=True, ignore_index=ignore_index)
+    assert_eq(pds, gds)
diff --git a/python/cudf/cudf/tests/series/methods/test_factorize.py b/python/cudf/cudf/tests/series/methods/test_factorize.py
index 98cc8187fce..b64ad0fbf63 100644
--- a/python/cudf/cudf/tests/series/methods/test_factorize.py
+++ b/python/cudf/cudf/tests/series/methods/test_factorize.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
+import numpy as np
+import pandas as pd
 import pytest
 
 import cudf
@@ -29,3 +32,86 @@ def test_series_factorize_use_na_sentinel(data, use_na_sentinel, sort):
     )
     assert_eq(expected_labels, actual_labels.get())
     assert_eq(expected_cats, actual_cats.to_pandas(nullable=True))
+
+
+def test_factorize_series_obj():
+    rng = np.random.default_rng(seed=0)
+
+    arr = rng.integers(2, size=10, dtype=np.int32)
+    ser = cudf.Series(arr)
+
+    uvals, labels = ser.factorize()
+    unique_values, indices = np.unique(arr, return_index=True)
+    expected_values = unique_values[np.argsort(indices)]
+
+    np.testing.assert_array_equal(labels.to_numpy(), expected_values)
+    assert isinstance(uvals, cp.ndarray)
+    assert isinstance(labels, cudf.Index)
+
+    encoder = {labels[idx]: idx for idx in range(len(labels))}
+    handcoded = [encoder[v] for v in arr]
+    np.testing.assert_array_equal(uvals.get(), handcoded)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        None,
+        [
+            2992443.0,
+            2992447.0,
+            2992466.0,
+            2992440.0,
+            2992441.0,
+            2992442.0,
+            2992444.0,
+            2992445.0,
+            2992446.0,
+            2992448.0,
+        ],
+    ],
+)
+def test_factorize_series_index(index):
+    data = ["C", "H", "C", "W", "W", "W", "W", "W", "C", "W"]
+    ser = cudf.Series(data, index=index)
+    pser = pd.Series(data, index=index)
+    result_unique, result_labels = ser.factorize()
+    expected_unique, expected_labels = pser.factorize()
+    assert_eq(result_unique.get(), expected_unique)
+    assert_eq(
+        result_labels.to_pandas().values,
+        expected_labels.values,
+    )
+
+
+def test_cudf_factorize_series():
+    data = [1, 2, 3, 4, 5]
+
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    expect = pd.factorize(psr)
+    got = cudf.factorize(gsr)
+
+    assert len(expect) == len(got)
+
+    np.testing.assert_array_equal(expect[0], got[0].get())
+    np.testing.assert_array_equal(expect[1], got[1].values.get())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["abc", "def", "abc", "a", "def", None],
+        [10, 20, 100, -10, 0, 1, None, 10, 100],
+    ],
+)
+def test_category_dtype_factorize(data):
+    gs = cudf.Series(data, dtype="category")
+    ps = gs.to_pandas()
+
+    actual_codes, actual_uniques = gs.factorize()
+    expected_codes, expected_uniques = ps.factorize()
+
+    assert_eq(actual_codes, expected_codes)
+    assert_eq(actual_uniques, expected_uniques)
diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py
deleted file mode 100644
index d16e725088f..00000000000
--- a/python/cudf/cudf/tests/test_factorize.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import DataFrame, Index
-from cudf.testing import assert_eq
-
-
-def test_factorize_series_obj():
-    df = DataFrame()
-    rng = np.random.default_rng(seed=0)
-
-    # initialize data frame
-    df["cats"] = arr = rng.integers(2, size=10, dtype=np.int32)
-
-    uvals, labels = df["cats"].factorize()
-    unique_values, indices = np.unique(arr, return_index=True)
-    expected_values = unique_values[np.argsort(indices)]
-
-    np.testing.assert_array_equal(labels.to_numpy(), expected_values)
-    assert isinstance(uvals, cp.ndarray)
-    assert isinstance(labels, Index)
-
-    encoder = {labels[idx]: idx for idx in range(len(labels))}
-    handcoded = [encoder[v] for v in arr]
-    np.testing.assert_array_equal(uvals.get(), handcoded)
-
-
-def test_factorize_index_obj():
-    df = DataFrame()
-    rng = np.random.default_rng(seed=0)
-
-    # initialize data frame
-    df["cats"] = arr = rng.integers(2, size=10, dtype=np.int32)
-    df = df.set_index("cats")
-
-    uvals, labels = df.index.factorize()
-    unique_values, indices = np.unique(arr, return_index=True)
-    expected_values = unique_values[np.argsort(indices)]
-
-    np.testing.assert_array_equal(labels.values.get(), expected_values)
-    assert isinstance(uvals, cp.ndarray)
-    assert isinstance(labels, Index)
-
-    encoder = {labels[idx]: idx for idx in range(len(labels))}
-    handcoded = [encoder[v] for v in arr]
-    np.testing.assert_array_equal(uvals.get(), handcoded)
-
-
-def test_factorize_series_index():
-    df = DataFrame()
-    df["col1"] = ["C", "H", "C", "W", "W", "W", "W", "W", "C", "W"]
-    df["col2"] = [
-        2992443.0,
-        2992447.0,
-        2992466.0,
-        2992440.0,
-        2992441.0,
-        2992442.0,
-        2992444.0,
-        2992445.0,
-        2992446.0,
-        2992448.0,
-    ]
-    assert_eq(df.col1.factorize()[0].get(), df.to_pandas().col1.factorize()[0])
-    assert_eq(
-        df.col1.factorize()[1].to_pandas().values,
-        df.to_pandas().col1.factorize()[1].values,
-    )
-
-    df = df.set_index("col2")
-
-    assert_eq(df.col1.factorize()[0].get(), df.to_pandas().col1.factorize()[0])
-    assert_eq(
-        df.col1.factorize()[1].to_pandas().values,
-        df.to_pandas().col1.factorize()[1].values,
-    )
-
-
-def test_cudf_factorize_series():
-    data = [1, 2, 3, 4, 5]
-
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    expect = pd.factorize(psr)
-    got = cudf.factorize(gsr)
-
-    assert len(expect) == len(got)
-
-    np.testing.assert_array_equal(expect[0], got[0].get())
-    np.testing.assert_array_equal(expect[1], got[1].values.get())
-
-
-def test_cudf_factorize_index():
-    data = [1, 2, 3, 4, 5]
-
-    pi = pd.Index(data)
-    gi = cudf.Index(data)
-
-    expect = pd.factorize(pi)
-    got = cudf.factorize(gi)
-
-    assert len(expect) == len(got)
-
-    np.testing.assert_array_equal(expect[0], got[0].get())
-    np.testing.assert_array_equal(expect[1], got[1].values.get())
-
-
-def test_cudf_factorize_array():
-    data = [1, 2, 3, 4, 5]
-
-    parr = np.array(data)
-    garr = cp.array(data)
-
-    expect = pd.factorize(parr)
-    got = cudf.factorize(garr)
-
-    assert len(expect) == len(got)
-
-    np.testing.assert_array_equal(expect[0], got[0].get())
-    np.testing.assert_array_equal(expect[1], got[1].get())
-
-
-@pytest.mark.parametrize("pandas_compatibility", [True, False])
-def test_factorize_code_pandas_compatibility(pandas_compatibility):
-    psr = pd.Series([1, 2, 3, 4, 5])
-    gsr = cudf.from_pandas(psr)
-
-    expect = pd.factorize(psr)
-    with cudf.option_context("mode.pandas_compatible", pandas_compatibility):
-        got = cudf.factorize(gsr)
-    assert_eq(got[0], expect[0])
-    assert_eq(got[1], expect[1])
-    if pandas_compatibility:
-        assert got[0].dtype == expect[0].dtype
-    else:
-        assert got[0].dtype == cudf.dtype("int8")
-
-
-def test_factorize_result_classes():
-    data = [1, 2, 3]
-
-    labels, cats = cudf.factorize(cudf.Series(data))
-
-    assert isinstance(labels, cp.ndarray)
-    assert isinstance(cats, cudf.Index)
-
-    labels, cats = cudf.factorize(cudf.Index(data))
-
-    assert isinstance(labels, cp.ndarray)
-    assert isinstance(cats, cudf.Index)
-
-    labels, cats = cudf.factorize(cp.array(data))
-
-    assert isinstance(labels, cp.ndarray)
-    assert isinstance(cats, cp.ndarray)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["abc", "def", "abc", "a", "def", None],
-        [10, 20, 100, -10, 0, 1, None, 10, 100],
-    ],
-)
-def test_category_dtype_factorize(data):
-    gs = cudf.Series(data, dtype="category")
-    ps = gs.to_pandas()
-
-    actual_codes, actual_uniques = gs.factorize()
-    expected_codes, expected_uniques = ps.factorize()
-
-    assert_eq(actual_codes, expected_codes)
-    assert_eq(actual_uniques, expected_uniques)

From c1d37301bb46b8d5d967a79b32de17123acffb22 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Aug 2025 10:09:12 -0700
Subject: [PATCH 135/366] Move test_groupby to new cudf classic directory
 structure (#19688)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19688
---
 python/cudf/cudf/tests/groupby/test_agg.py    |   53 +
 python/cudf/cudf/tests/groupby/test_apply.py  |  889 +++++++-
 .../cudf/tests/groupby/test_attributes.py     |   49 +
 .../cudf/tests/groupby/test_cummulative.py    |   13 +-
 python/cudf/cudf/tests/groupby/test_diff.py   |   12 +
 python/cudf/cudf/tests/groupby/test_ffill.py  |   22 +
 .../cudf/cudf/tests/groupby/test_get_group.py |   99 +-
 .../cudf/cudf/tests/groupby/test_head_tail.py |  106 +
 python/cudf/cudf/tests/groupby/test_ngroup.py |   34 +
 python/cudf/cudf/tests/groupby/test_nth.py    |   34 +
 .../cudf/tests/groupby/test_pct_change.py     |   98 +
 .../cudf/tests/groupby/test_reductions.py     |  311 ++-
 python/cudf/cudf/tests/groupby/test_sample.py |  101 +
 python/cudf/cudf/tests/groupby/test_shift.py  |   24 +-
 python/cudf/cudf/tests/groupby/test_size.py   |   19 +
 .../cudf/cudf/tests/groupby/test_transform.py |   39 +-
 .../cudf/tests/groupby/test_value_counts.py   |   77 +
 python/cudf/cudf/tests/test_groupby.py        | 1917 -----------------
 18 files changed, 1972 insertions(+), 1925 deletions(-)
 create mode 100644 python/cudf/cudf/tests/groupby/test_ffill.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_head_tail.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_ngroup.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_pct_change.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_sample.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_size.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_value_counts.py
 delete mode 100644 python/cudf/cudf/tests/test_groupby.py

diff --git a/python/cudf/cudf/tests/groupby/test_agg.py b/python/cudf/cudf/tests/groupby/test_agg.py
index 42c74f967bb..07d069ef8ff 100644
--- a/python/cudf/cudf/tests/groupby/test_agg.py
+++ b/python/cudf/cudf/tests/groupby/test_agg.py
@@ -633,3 +633,56 @@ def test_groupby_mix_agg_scan():
     gb.agg(func[1:])
     with pytest.raises(NotImplementedError, match=err_msg):
         gb.agg(func)
+
+
+@pytest.mark.parametrize(
+    "op", ["cummax", "cummin", "cumprod", "cumsum", "mean", "median"]
+)
+def test_group_by_raises_string_error(op):
+    df = cudf.DataFrame({"a": [1, 2, 3, 4, 5], "b": ["a", "b", "c", "d", "e"]})
+
+    with pytest.raises(TypeError):
+        df.groupby(df.a).agg(op)
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "cummax",
+        "cummin",
+        "cumprod",
+        "cumsum",
+        "mean",
+        "median",
+        "prod",
+        "sum",
+        list,
+    ],
+)
+def test_group_by_raises_category_error(op):
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": cudf.Series(["a", "b", "c", "d", "e"], dtype="category"),
+        }
+    )
+
+    with pytest.raises(TypeError):
+        df.groupby(df.a).agg(op)
+
+
+def test_agg_duplicate_aggs_pandas_compat_raises():
+    agg = {"b": ["mean", "mean"]}
+    dfgb = cudf.DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]}).groupby(["a"])
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            dfgb.agg(agg)
+
+    with pytest.warns(UserWarning):
+        result = dfgb.agg(agg)
+    expected = cudf.DataFrame(
+        [4.5, 6.0],
+        index=cudf.Index([1, 2], name="a"),
+        columns=pd.MultiIndex.from_tuples([("b", "mean")]),
+    )
+    assert_groupby_results_equal(result, expected)
diff --git a/python/cudf/cudf/tests/groupby/test_apply.py b/python/cudf/cudf/tests/groupby/test_apply.py
index 1c340a34e2a..8458d3069d0 100644
--- a/python/cudf/cudf/tests/groupby/test_apply.py
+++ b/python/cudf/cudf/tests/groupby/test_apply.py
@@ -1,16 +1,25 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
+import textwrap
+from functools import partial
+
 import numpy as np
 import pandas as pd
 import pytest
 from numba import cuda
 
 import cudf
+from cudf import DataFrame
 from cudf.core._compat import (
     PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
     PANDAS_VERSION,
 )
-from cudf.testing import assert_groupby_results_equal
+from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
+from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES
+from cudf.core.udf.utils import UDFError, precompiled
+from cudf.testing import assert_eq, assert_groupby_results_equal
+from cudf.testing._utils import expect_warning_if
 
 
 @pytest.fixture(params=["cudf", "jit"])
@@ -139,3 +148,881 @@ def foo(key1, val1, com1, com2):
     expect["com2"] = np.zeros(20, dtype=np.int32)
 
     assert_groupby_results_equal(expect, got)
+
+
+@pytest.fixture
+def groupby_jit_data_small():
+    """
+    Return a small dataset for testing JIT Groupby Apply. The dataframe
+    contains 4 groups of size 1, 2, 3, 4 as well as an additional key
+    column that can be used to test subgroups within groups. This data
+    is useful for smoke testing basic numeric results
+    """
+    rng = np.random.default_rng(42)
+    df = DataFrame(
+        {
+            "key1": [1] + [2] * 2 + [3] * 3 + [4] * 4,
+            "key2": [1, 2] * 5,
+            "val1": rng.integers(0, 10, 10),
+            "val2": rng.integers(0, 10, 10),
+        }
+    )
+    # randomly permute data
+    df = df.sample(frac=1, random_state=1, ignore_index=True)
+    return df
+
+
+@pytest.fixture
+def groupby_jit_data_large(groupby_jit_data_small):
+    """
+    Larger version of groupby_jit_data_small which contains enough data
+    to require more than one block per group. This data is useful for
+    testing if JIT GroupBy algorithms scale to larger dastasets without
+    manifesting numerical issues such as overflow.
+    """
+    max_tpb = 1024
+    factor = (
+        max_tpb + 1
+    )  # bigger than a block but not always an exact multiple
+    df = cudf.concat([groupby_jit_data_small] * factor)
+
+    return df
+
+
+@pytest.fixture
+def groupby_jit_data_nans(groupby_jit_data_small):
+    """
+    Returns a modified version of groupby_jit_data_small which contains
+    nan values.
+    """
+
+    df = groupby_jit_data_small.sort_values(["key1", "key2"])
+    df["val1"] = df["val1"].astype("float64")
+    df.loc[df.index[::2], "val1"] = np.nan
+    df = df.sample(frac=1, random_state=1, ignore_index=True)
+    return df
+
+
+@pytest.fixture
+def groupby_jit_datasets(
+    groupby_jit_data_small, groupby_jit_data_large, groupby_jit_data_nans
+):
+    return {
+        "small": groupby_jit_data_small,
+        "large": groupby_jit_data_large,
+        "nans": groupby_jit_data_nans,
+    }
+
+
+def run_groupby_apply_jit_test(data, func, keys, *args):
+    expect_groupby_obj = data.to_pandas().groupby(keys)
+    got_groupby_obj = data.groupby(keys)
+
+    # compare cuDF jit to pandas
+    cudf_jit_result = got_groupby_obj.apply(
+        func, *args, engine="jit", include_groups=False
+    )
+    pandas_result = expect_groupby_obj.apply(func, *args, include_groups=False)
+    assert_groupby_results_equal(cudf_jit_result, pandas_result)
+
+
+def groupby_apply_jit_reductions_test_inner(func, data, dtype):
+    # ideally we'd just have:
+    # lambda group: getattr(group, func)()
+    # but the current kernel caching mechanism relies on pickle which
+    # does not play nice with local functions. What's below uses
+    # exec as a workaround to write the test functions dynamically
+
+    funcstr = textwrap.dedent(
+        f"""
+        def func(df):
+            return df['val1'].{func}()
+        """
+    )
+    lcl = {}
+    exec(funcstr, lcl)
+    func = lcl["func"]
+
+    data["val1"] = data["val1"].astype(dtype)
+    data["val2"] = data["val2"].astype(dtype)
+
+    run_groupby_apply_jit_test(data, func, ["key1"])
+
+
+# test unary reductions
+@pytest.mark.parametrize(
+    "dtype",
+    SUPPORTED_GROUPBY_NUMPY_TYPES,
+    ids=[str(t) for t in SUPPORTED_GROUPBY_NUMPY_TYPES],
+)
+@pytest.mark.parametrize(
+    "func", ["min", "max", "sum", "mean", "var", "std", "idxmin", "idxmax"]
+)
+@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
+def test_groupby_apply_jit_unary_reductions(
+    request, func, dtype, dataset, groupby_jit_datasets
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                (
+                    dataset == "nans"
+                    and func in {"var", "std", "mean"}
+                    and str(dtype) in {"int64", "float32", "float64"}
+                )
+                or (
+                    dataset == "nans"
+                    and func in {"idxmax", "idxmin", "sum"}
+                    and dtype.kind == "f"
+                )
+            ),
+            reason=("https://github.com/rapidsai/cudf/issues/14860"),
+        )
+    )
+    warn_condition = (
+        dataset == "nans"
+        and func in {"idxmax", "idxmin"}
+        and dtype.kind == "f"
+    )
+    dataset = groupby_jit_datasets[dataset].copy(deep=True)
+    with expect_warning_if(warn_condition, FutureWarning):
+        groupby_apply_jit_reductions_test_inner(func, dataset, dtype)
+
+
+# test unary reductions for special values
+def groupby_apply_jit_reductions_special_vals_inner(
+    func, data, dtype, special_val
+):
+    funcstr = textwrap.dedent(
+        f"""
+        def func(df):
+            return df['val1'].{func}()
+        """
+    )
+    lcl = {}
+    exec(funcstr, lcl)
+    func = lcl["func"]
+
+    data["val1"] = data["val1"].astype(dtype)
+    data["val2"] = data["val2"].astype(dtype)
+    data["val1"] = special_val
+    data["val2"] = special_val
+
+    run_groupby_apply_jit_test(data, func, ["key1"])
+
+
+# test unary index reductions for special values
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def groupby_apply_jit_idx_reductions_special_vals_inner(
+    func, data, dtype, special_val
+):
+    funcstr = textwrap.dedent(
+        f"""
+        def func(df):
+            return df['val1'].{func}()
+        """
+    )
+    lcl = {}
+    exec(funcstr, lcl)
+    func = lcl["func"]
+
+    data["val1"] = data["val1"].astype(dtype)
+    data["val2"] = data["val2"].astype(dtype)
+    data["val1"] = special_val
+    data["val2"] = special_val
+
+    run_groupby_apply_jit_test(data, func, ["key1"])
+
+
+@pytest.mark.parametrize("dtype", ["float64", "float32"])
+@pytest.mark.parametrize("func", ["min", "max", "sum", "mean", "var", "std"])
+@pytest.mark.parametrize("special_val", [np.nan, np.inf, -np.inf])
+@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
+def test_groupby_apply_jit_reductions_special_vals(
+    func, dtype, dataset, groupby_jit_datasets, special_val
+):
+    dataset = groupby_jit_datasets[dataset].copy(deep=True)
+    with expect_warning_if(
+        func in {"var", "std"} and not np.isnan(special_val), RuntimeWarning
+    ):
+        groupby_apply_jit_reductions_special_vals_inner(
+            func, dataset, dtype, special_val
+        )
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize(
+    "special_val",
+    [
+        pytest.param(
+            np.nan,
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/13832"
+            ),
+        ),
+        np.inf,
+        -np.inf,
+    ],
+)
+@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="include_groups keyword new in pandas 2.2",
+)
+def test_groupby_apply_jit_idx_reductions_special_vals(
+    func, dataset, groupby_jit_datasets, special_val
+):
+    dataset = groupby_jit_datasets[dataset].copy(deep=True)
+    groupby_apply_jit_idx_reductions_special_vals_inner(
+        func, dataset, "float64", special_val
+    )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_jit_sum_integer_overflow():
+    max = np.iinfo("int32").max
+
+    data = DataFrame(
+        {
+            "a": [0, 0, 0],
+            "b": [max, max, max],
+        }
+    )
+
+    def func(group):
+        return group["b"].sum()
+
+    run_groupby_apply_jit_test(data, func, ["a"])
+
+
+@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
+@pytest.mark.parametrize(
+    "dataset",
+    [
+        pytest.param(
+            "small",
+            marks=[
+                pytest.mark.filterwarnings(
+                    "ignore:Degrees of Freedom <= 0 for slice"
+                ),
+                pytest.mark.filterwarnings(
+                    "ignore:divide by zero encountered in divide"
+                ),
+            ],
+        ),
+        "large",
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_jit_correlation(dataset, groupby_jit_datasets, dtype):
+    dataset = groupby_jit_datasets[dataset].copy(deep=True)
+
+    dataset["val1"] = dataset["val1"].astype(dtype)
+    dataset["val2"] = dataset["val2"].astype(dtype)
+
+    keys = ["key1"]
+
+    def func(group):
+        return group["val1"].corr(group["val2"])
+
+    if np.dtype(dtype).kind == "f":
+        # Correlation of floating types is not yet supported:
+        # https://github.com/rapidsai/cudf/issues/13839
+        m = (
+            f"Series.corr\\(Series\\) is not "
+            f"supported for \\({dtype}, {dtype}\\)"
+        )
+        with pytest.raises(UDFError, match=m):
+            run_groupby_apply_jit_test(dataset, func, keys)
+        return
+    with expect_warning_if(dtype in {"int32", "int64"}, RuntimeWarning):
+        run_groupby_apply_jit_test(dataset, func, keys)
+
+
+@pytest.mark.parametrize("dtype", ["int32", "int64"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_jit_correlation_zero_variance(dtype):
+    # pearson correlation is undefined when the variance of either
+    # variable is zero. This test ensures that the jit implementation
+    # returns the same result as pandas in this case.
+    data = DataFrame(
+        {"a": [0, 0, 0, 0, 0], "b": [1, 1, 1, 1, 1], "c": [2, 2, 2, 2, 2]}
+    )
+
+    def func(group):
+        return group["b"].corr(group["c"])
+
+    with expect_warning_if(dtype in {"int32", "int64"}, RuntimeWarning):
+        run_groupby_apply_jit_test(data, func, ["a"])
+
+
+@pytest.mark.parametrize("op", unary_ops)
+def test_groupby_apply_jit_invalid_unary_ops_error(groupby_jit_data_small, op):
+    keys = ["key1"]
+
+    def func(group):
+        return op(group["val1"])
+
+    with pytest.raises(
+        UDFError,
+        match=f"{op.__name__}\\(Series\\) is not supported by JIT GroupBy",
+    ):
+        run_groupby_apply_jit_test(groupby_jit_data_small, func, keys)
+
+
+@pytest.mark.parametrize("op", arith_ops + comparison_ops)
+def test_groupby_apply_jit_invalid_binary_ops_error(
+    groupby_jit_data_small, op
+):
+    keys = ["key1"]
+
+    def func(group):
+        return op(group["val1"], group["val2"])
+
+    with pytest.raises(
+        UDFError,
+        match=f"{op.__name__}\\(Series, Series\\) is not supported",
+    ):
+        run_groupby_apply_jit_test(groupby_jit_data_small, func, keys)
+
+
+def test_groupby_apply_jit_no_df_ops(groupby_jit_data_small):
+    # DataFrame level operations are not yet supported.
+    def func(group):
+        return group.sum()
+
+    with pytest.raises(
+        UDFError,
+        match="JIT GroupBy.apply\\(\\) does not support DataFrame.sum\\(\\)",
+    ):
+        run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1"])
+
+
+@pytest.mark.parametrize("dtype", ["uint8", "str"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_unsupported_dtype(dtype):
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    df["b"] = df["b"].astype(dtype)
+
+    # a UDAF that doesn't actually use the input column
+    # with the unsupported dtype should still succeed
+    def func(group):
+        return group["c"].sum()
+
+    run_groupby_apply_jit_test(df, func, ["a"])
+
+    # however a UDAF that does use the unsupported dtype
+    # should fail
+    def func(group):
+        return group["b"].sum()
+
+    with pytest.raises(UDFError, match="Only columns of the following dtypes"):
+        run_groupby_apply_jit_test(df, func, ["a"])
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df["val1"].max() + df["val2"].min(),
+        lambda df: df["val1"].sum() + df["val2"].var(),
+        lambda df: df["val1"].mean() + df["val2"].std(),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_jit_basic(func, groupby_jit_data_small):
+    run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1", "key2"])
+
+
+def f1(df, k):
+    return df["val1"].max() + df["val2"].min() + k
+
+
+def f2(df, k, L):
+    return df["val1"].sum() - df["val2"].var() + (k / L)
+
+
+def f3(df, k, L, m):
+    return ((k * df["val1"].mean()) + (L * df["val2"].std())) / m
+
+
+@pytest.mark.parametrize(
+    "func,args", [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_jit_args(func, args, groupby_jit_data_small):
+    run_groupby_apply_jit_test(
+        groupby_jit_data_small, func, ["key1", "key2"], *args
+    )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_jit_block_divergence():
+    # https://github.com/rapidsai/cudf/issues/12686
+    df = cudf.DataFrame(
+        {
+            "a": [0, 0, 0, 1, 1, 1],
+            "b": [1, 1, 1, 2, 3, 4],
+        }
+    )
+
+    def diverging_block(grp_df):
+        if grp_df["b"].mean() > 1:
+            return grp_df["b"].mean()
+        return 0
+
+    run_groupby_apply_jit_test(df, diverging_block, ["a"])
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_caching():
+    # Make sure similar functions that differ
+    # by simple things like constants actually
+    # recompile
+
+    # begin with a clear cache
+    precompiled.clear()
+    assert precompiled.currsize == 0
+
+    data = cudf.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 4, 5, 6]})
+
+    def f(group):
+        return group["b"].mean() * 2
+
+    # a single run should result in a cache size of 1
+    run_groupby_apply_jit_test(data, f, ["a"])
+    assert precompiled.currsize == 1
+
+    # a second run with f should not increase the count
+    run_groupby_apply_jit_test(data, f, ["a"])
+    assert precompiled.currsize == 1
+
+    # changing a constant value inside the UDF should miss
+    def f(group):
+        return group["b"].mean() * 3
+
+    run_groupby_apply_jit_test(data, f, ["a"])
+    assert precompiled.currsize == 2
+
+    # changing the dtypes of the columns should miss
+    data["b"] = data["b"].astype("float64")
+    run_groupby_apply_jit_test(data, f, ["a"])
+
+    assert precompiled.currsize == 3
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_no_bytecode_fallback():
+    # tests that a function which contains no bytecode
+    # attribute, but would still be executable using
+    # the iterative groupby apply approach, still works.
+
+    gdf = cudf.DataFrame({"a": [0, 1, 1], "b": [1, 2, 3]})
+    pdf = gdf.to_pandas()
+
+    def f(group):
+        return group.sum()
+
+    part = partial(f)
+
+    expect = pdf.groupby("a").apply(part, include_groups=False)
+    got = gdf.groupby("a").apply(part, engine="auto", include_groups=False)
+    assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_return_col_from_df():
+    # tests a UDF that consists of purely colwise
+    # ops, such as `lambda group: group.x + group.y`
+    # which returns a column
+    df = cudf.DataFrame(
+        {
+            "id": range(10),
+            "x": range(10),
+            "y": range(10),
+        }
+    )
+    pdf = df.to_pandas()
+
+    def func(df):
+        return df.x + df.y
+
+    got = df.groupby("id").apply(func, include_groups=False)
+    expect = pdf.groupby("id").apply(func, include_groups=False)
+    # pandas seems to erroneously add an extra MI level of ids
+    # TODO: Figure out how pandas groupby.apply determines the columns
+    expect = pd.DataFrame(expect.droplevel(1), columns=got.columns)
+    assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.parametrize("func", [lambda group: group.sum()])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_return_df(func):
+    # tests a UDF that reduces over a dataframe
+    # and produces a series with the original column names
+    # as its index, such as lambda group: group.sum() + group.min()
+    df = cudf.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, 4]})
+    pdf = df.to_pandas()
+
+    expect = pdf.groupby("a").apply(func, include_groups=False)
+    got = df.groupby("a").apply(func, include_groups=False)
+    assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.parametrize("as_index", [True, False])
+def test_groupby_apply_return_reindexed_series(as_index):
+    def gdf_func(df):
+        return cudf.Series([df["a"].sum(), df["b"].min(), df["c"].max()])
+
+    def pdf_func(df):
+        return pd.Series([df["a"].sum(), df["b"].min(), df["c"].max()])
+
+    df = cudf.DataFrame(
+        {
+            "key": [0, 0, 1, 1, 2, 2],
+            "a": [1, 2, 3, 4, 5, 6],
+            "b": [7, 8, 9, 10, 11, 12],
+            "c": [13, 14, 15, 16, 17, 18],
+        }
+    )
+    pdf = df.to_pandas()
+
+    kwargs = {}
+    if PANDAS_GE_220:
+        kwargs["include_groups"] = False
+
+    expect = pdf.groupby("key", as_index=as_index).apply(pdf_func, **kwargs)
+    got = df.groupby("key", as_index=as_index).apply(gdf_func, **kwargs)
+    assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
+def test_groupby_apply_noempty_group():
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 2, 2], "b": [1, 2, 1, 2], "c": [1, 2, 3, 4]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = (
+        pdf.groupby("a", group_keys=False)
+        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
+        .reset_index(drop=True)
+    )
+    got = (
+        gdf.groupby("a")
+        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
+        .reset_index(drop=True)
+    )
+    assert_groupby_results_equal(expect, got)
+
+
+def create_test_groupby_apply_return_scalars_params():
+    def f0(x):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = ticker / 10
+        return full
+
+    def f1(x, k):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = ticker / k
+        return full
+
+    def f2(x, k, L):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = L * (ticker / k)
+        return full
+
+    def f3(x, k, L, m):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = L * (ticker / k) % m
+        return full
+
+    return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+
+
+@pytest.mark.parametrize(
+    "func,args", create_test_groupby_apply_return_scalars_params()
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_groupby_apply_return_scalars(func, args):
+    pdf = pd.DataFrame(
+        {
+            "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
+            "B": [
+                0.01,
+                np.nan,
+                0.03,
+                0.04,
+                np.nan,
+                0.06,
+                0.07,
+                0.08,
+                0.09,
+                1.0,
+            ],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.groupby("A").apply(func, *args, include_groups=False)
+    actual = gdf.groupby("A").apply(func, *args, include_groups=False)
+
+    assert_groupby_results_equal(expected, actual)
+
+
+def create_test_groupby_apply_return_series_dataframe_params():
+    def f0(x):
+        return x - x.max()
+
+    def f1(x):
+        return x.min() - x.max()
+
+    def f2(x):
+        return x.min()
+
+    def f3(x, k):
+        return x - x.max() + k
+
+    def f4(x, k, L):
+        return x.min() - x.max() + (k / L)
+
+    def f5(x, k, L, m):
+        return m * x.min() + (k / L)
+
+    return [
+        (f0, ()),
+        (f1, ()),
+        (f2, ()),
+        (f3, (42,)),
+        (f4, (42, 119)),
+        (f5, (41, 119, 212.1)),
+    ]
+
+
+@pytest.mark.parametrize(
+    "func,args", create_test_groupby_apply_return_series_dataframe_params()
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Include groups missing on old versions of pandas",
+)
+def test_groupby_apply_return_series_dataframe(func, args):
+    pdf = pd.DataFrame(
+        {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.groupby(["key"], group_keys=False).apply(
+        func, *args, include_groups=False
+    )
+    actual = gdf.groupby(["key"]).apply(func, *args, include_groups=False)
+
+    assert_groupby_results_equal(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
+)
+def test_groupby_apply_no_keys(pdf):
+    gdf = cudf.from_pandas(pdf)
+    if isinstance(pdf, pd.DataFrame):
+        kwargs = {"check_column_type": False}
+    else:
+        kwargs = {}
+    assert_groupby_results_equal(
+        pdf.groupby([], group_keys=False).apply(lambda x: x.max()),
+        gdf.groupby([]).apply(lambda x: x.max()),
+        check_index_type=False,  # Int64 v/s Float64
+        **kwargs,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"Speed": [380.0, 370.0, 24.0, 26.0], "Score": [50, 30, 90, 80]},
+        {
+            "Speed": [380.0, 370.0, 24.0, 26.0],
+            "Score": [50, 30, 90, 80],
+            "Other": [10, 20, 30, 40],
+        },
+    ],
+)
+@pytest.mark.parametrize("group", ["Score", "Speed"])
+def test_groupby_describe(data, group):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.groupby(group).describe()
+    expect = pdf.groupby(group).describe()
+
+    assert_groupby_results_equal(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [], "b": []},
+        {"a": [2, 1, 2, 1, 1, 3], "b": [None, 1, 2, None, 2, None]},
+        {"a": [None], "b": [None]},
+        {"a": [2, 1, 1], "b": [None, 1, 0], "c": [None, 0, 1]},
+    ],
+)
+@pytest.mark.parametrize("agg", ["first", "last", ["first", "last"]])
+def test_groupby_first(data, agg):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.groupby("a").agg(agg)
+    got = gdf.groupby("a").agg(agg)
+    assert_groupby_results_equal(expect, got, check_dtype=False)
+
+
+def test_groupby_apply_series():
+    def foo(x):
+        return x.sum()
+
+    pdf = pd.DataFrame(
+        {
+            "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "y": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "val": np.arange(10, dtype="float64"),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.groupby("x").y.apply(foo)
+    expect = pdf.groupby("x").y.apply(foo)
+
+    assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.parametrize(
+    "func,args",
+    [
+        (lambda x, k: x + k, (42,)),
+        (lambda x, k, L: x + k - L, (42, 191)),
+        (lambda x, k, L, m: (x + k) / (L * m), (42, 191, 99.9)),
+    ],
+)
+def test_groupby_apply_series_args(func, args):
+    pdf = pd.DataFrame(
+        {
+            "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "y": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "val": np.arange(10, dtype="float64"),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    got = gdf.groupby("x").y.apply(func, *args)
+    expect = pdf.groupby("x", group_keys=False).y.apply(func, *args)
+
+    assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.parametrize("group_keys", [None, True, False])
+@pytest.mark.parametrize("by", ["A", ["A", "B"]])
+def test_groupby_group_keys(group_keys, by):
+    gdf = cudf.DataFrame(
+        {
+            "A": "a a a a b b".split(),
+            "B": [1, 1, 2, 2, 3, 3],
+            "C": [4, 6, 5, 9, 8, 7],
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    g_group = gdf.groupby(by, group_keys=group_keys)
+    p_group = pdf.groupby(by, group_keys=group_keys)
+
+    actual = g_group[["B", "C"]].apply(lambda x: x / x.sum())
+    expected = p_group[["B", "C"]].apply(lambda x: x / x.sum())
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    ["int32", "int64", "float64", "datetime64[ns]", "timedelta64[ns]", "bool"],
+)
+@pytest.mark.parametrize(
+    "apply_op",
+    ["sum", "min", "max", "idxmax"],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_group_by_empty_apply(request, dtype, apply_op):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(dtype == "datetime64[ns]" and apply_op == "sum"),
+            reason=("sum isn't supported for datetime64[ns]"),
+        )
+    )
+
+    gdf = cudf.DataFrame({"a": [], "b": [], "c": []}, dtype=dtype)
+    pdf = gdf.to_pandas()
+
+    gg = gdf.groupby("a")["c"]
+    pg = pdf.groupby("a")["c"]
+
+    assert_eq(
+        gg.apply(apply_op),
+        pg.apply(apply_op),
+        check_dtype=True,
+        check_index_type=True,
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_attributes.py b/python/cudf/cudf/tests/groupby/test_attributes.py
index 16e6741229d..eb95d915e03 100644
--- a/python/cudf/cudf/tests/groupby/test_attributes.py
+++ b/python/cudf/cudf/tests/groupby/test_attributes.py
@@ -4,6 +4,10 @@
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
 
 
@@ -124,3 +128,48 @@ def test_grouping(grouper):
     ):
         assert pdf_group[0] == gdf_group[0]
         assert_eq(pdf_group[1], gdf_group[1])
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
+)
+def test_groupby_dtypes(groups):
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 3], "b": ["x", "y", "z", "a"], "c": [10, 11, 12, 12]}
+    )
+    pdf = df.to_pandas()
+    with pytest.warns(FutureWarning):
+        expected = pdf.groupby(groups).dtypes
+    with pytest.warns(FutureWarning):
+        actual = df.groupby(groups).dtypes
+
+    assert_eq(expected, actual)
+
+
+def test_ngroups():
+    pdf = pd.DataFrame({"a": [1, 1, 3], "b": range(3)})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pgb = pdf.groupby("a")
+    ggb = gdf.groupby("a")
+    assert pgb.ngroups == ggb.ngroups
+    assert len(pgb) == len(ggb)
+
+
+def test_ndim():
+    pdf = pd.DataFrame({"a": [1, 1, 3], "b": range(3)})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pgb = pdf.groupby("a")
+    ggb = gdf.groupby("a")
+    assert pgb.ndim == ggb.ndim
+
+    pser = pd.Series(range(3))
+    gser = cudf.Series.from_pandas(pser)
+    pgb = pser.groupby([0, 0, 1])
+    ggb = gser.groupby(cudf.Series([0, 0, 1]))
+    assert pgb.ndim == ggb.ndim
diff --git a/python/cudf/cudf/tests/groupby/test_cummulative.py b/python/cudf/cudf/tests/groupby/test_cummulative.py
index 1eab8a1b317..0149fe7a008 100644
--- a/python/cudf/cudf/tests/groupby/test_cummulative.py
+++ b/python/cudf/cudf/tests/groupby/test_cummulative.py
@@ -4,7 +4,7 @@
 import pytest
 
 import cudf
-from cudf.testing import assert_groupby_results_equal
+from cudf.testing import assert_eq, assert_groupby_results_equal
 
 
 @pytest.mark.parametrize("index", [None, [1, 2, 3, 4]])
@@ -93,3 +93,14 @@ def test_groupby_scan_null_keys(with_nan, dropna, duplicate_index):
     expect = df.groupby("key", dropna=dropna).cumsum()
     got = cdf.groupby("key", dropna=dropna).cumsum()
     assert_groupby_results_equal(expect, got)
+
+
+@pytest.mark.parametrize("op", ["cumsum", "cumprod", "cummin", "cummax"])
+def test_scan_int_null_pandas_compatible(op):
+    data = {"a": [1, 2, None, 3], "b": ["x"] * 4}
+    df_pd = pd.DataFrame(data)
+    df_cudf = cudf.DataFrame(data)
+    expected = getattr(df_pd.groupby("b")["a"], op)()
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(df_cudf.groupby("b")["a"], op)()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/groupby/test_diff.py b/python/cudf/cudf/tests/groupby/test_diff.py
index a9e60e0f4b5..43d46f9feec 100644
--- a/python/cudf/cudf/tests/groupby/test_diff.py
+++ b/python/cudf/cudf/tests/groupby/test_diff.py
@@ -192,3 +192,15 @@ def test_groupby_fillna_multi_value_df():
         got = gdf.groupby(key_col).fillna(value=fill_values)
 
     assert_groupby_results_equal(expect[value_cols], got[value_cols])
+
+
+def test_groupby_select_then_diff():
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5], "c": [3, 4, 5, 6, 7]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.groupby("a")["c"].diff(1)
+    actual = gdf.groupby("a")["c"].diff(1)
+
+    assert_groupby_results_equal(expected, actual)
diff --git a/python/cudf/cudf/tests/groupby/test_ffill.py b/python/cudf/cudf/tests/groupby/test_ffill.py
new file mode 100644
index 00000000000..60fdd265b38
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_ffill.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+def test_groupby_select_then_ffill():
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2],
+            "b": [1, None, None, 2, None],
+            "c": [3, None, None, 4, None],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.groupby("a")["c"].ffill()
+    actual = gdf.groupby("a")["c"].ffill()
+
+    assert_groupby_results_equal(expected, actual)
diff --git a/python/cudf/cudf/tests/groupby/test_get_group.py b/python/cudf/cudf/tests/groupby/test_get_group.py
index 43b6183fca5..6fb62c63b09 100644
--- a/python/cudf/cudf/tests/groupby/test_get_group.py
+++ b/python/cudf/cudf/tests/groupby/test_get_group.py
@@ -1,6 +1,16 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
 import cudf
-from cudf.testing import assert_eq
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq, assert_groupby_results_equal
+from cudf.testing._utils import expect_warning_if
 
 
 def test_rank_return_type_compatible_mode():
@@ -10,3 +20,88 @@ def test_rank_return_type_compatible_mode():
     expect = pdf.groupby("b").get_group(0)
     result = df.groupby("b").get_group(0)
     assert_eq(expect, result)
+
+
+@pytest.mark.parametrize(
+    "pdf, group, name, obj",
+    [
+        (
+            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
+            "X",
+            "A",
+            None,
+        ),
+        (
+            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
+            "X",
+            "B",
+            None,
+        ),
+        (
+            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
+            "X",
+            "A",
+            pd.DataFrame({"a": [1, 2, 4, 5, 10, 11]}),
+        ),
+        (
+            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
+            "Y",
+            1,
+            pd.DataFrame({"a": [1, 2, 4, 5, 10, 11]}),
+        ),
+        (
+            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
+            "Y",
+            3,
+            pd.DataFrame({"a": [1, 2, 0, 11]}),
+        ),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warnings only given on newer versions.",
+)
+def test_groupby_get_group(pdf, group, name, obj):
+    gdf = cudf.from_pandas(pdf)
+
+    if isinstance(obj, pd.DataFrame):
+        gobj = cudf.from_pandas(obj)
+    else:
+        gobj = obj
+
+    pgb = pdf.groupby(group)
+    ggb = gdf.groupby(group)
+    with expect_warning_if(obj is not None):
+        expected = pgb.get_group(name=name, obj=obj)
+    with expect_warning_if(obj is not None):
+        actual = ggb.get_group(name=name, obj=gobj)
+
+    assert_groupby_results_equal(expected, actual)
+
+    expected = pdf.iloc[pgb.indices.get(name)]
+    actual = gdf.iloc[ggb.indices.get(name)]
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.skipif(
+    not PANDAS_GE_220, reason="pandas behavior applicable in >=2.2"
+)
+def test_get_group_list_like():
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    result = df.groupby(["a"]).get_group((1,))
+    expected = df.to_pandas().groupby(["a"]).get_group((1,))
+    assert_eq(result, expected)
+
+    with pytest.raises(KeyError):
+        df.groupby(["a"]).get_group((1, 2))
+
+    with pytest.raises(KeyError):
+        df.groupby(["a"]).get_group([1])
+
+
+def test_get_group_list_like_len_2():
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [3, 2, 1]})
+    result = df.groupby(["a", "b"]).get_group((1, 4))
+    expected = df.to_pandas().groupby(["a", "b"]).get_group((1, 4))
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/groupby/test_head_tail.py b/python/cudf/cudf/tests/groupby/test_head_tail.py
new file mode 100644
index 00000000000..6457efb043e
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_head_tail.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import itertools
+import operator
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(params=[-3, -1, 0, 1, 2])
+def n(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[False, True], ids=["no-preserve-order", "preserve-order"]
+)
+def preserve_order(request):
+    return request.param
+
+
+@pytest.fixture
+def df():
+    return cudf.DataFrame(
+        {
+            "a": [1, 0, 1, 2, 2, 1, 3, 2, 3, 3, 3],
+            "b": [0, 1, 2, 4, 3, 5, 6, 7, 9, 8, 10],
+        }
+    )
+
+
+@pytest.fixture(params=[True, False], ids=["head", "tail"])
+def take_head(request):
+    return request.param
+
+
+@pytest.fixture
+def expected(df, n, take_head, preserve_order):
+    if n == 0:
+        # We'll get an empty dataframe in this case
+        return df._empty_like(keep_index=True)
+    else:
+        if preserve_order:
+            # Should match pandas here
+            g = df.to_pandas().groupby("a")
+            if take_head:
+                return g.head(n=n)
+            else:
+                return g.tail(n=n)
+        else:
+            # We groupby "a" which is the first column. This
+            # possibly relies on an implementation detail that for
+            # integer group keys, cudf produces groups in sorted
+            # (ascending) order.
+            keyfunc = operator.itemgetter(0)
+            if take_head or n == 0:
+                # Head does group[:n] as does tail for n == 0
+                slicefunc = operator.itemgetter(slice(None, n))
+            else:
+                # Tail does group[-n:] except when n == 0
+                slicefunc = operator.itemgetter(
+                    slice(-n, None) if n else slice(0)
+                )
+            values_to_sort = np.hstack(
+                [df.values_host, np.arange(len(df)).reshape(-1, 1)]
+            )
+            expect_a, expect_b, index = zip(
+                *itertools.chain.from_iterable(
+                    slicefunc(list(group))
+                    for _, group in itertools.groupby(
+                        sorted(values_to_sort.tolist(), key=keyfunc),
+                        key=keyfunc,
+                    )
+                ),
+                strict=True,
+            )
+            return cudf.DataFrame({"a": expect_a, "b": expect_b}, index=index)
+
+
+def test_head_tail(df, n, take_head, expected, preserve_order):
+    if take_head:
+        actual = df.groupby("a").head(n=n, preserve_order=preserve_order)
+    else:
+        actual = df.groupby("a").tail(n=n, preserve_order=preserve_order)
+    assert_eq(actual, expected)
+
+
+def test_head_tail_empty():
+    # GH #13397
+
+    values = [1, 2, 3]
+    pdf = pd.DataFrame({}, index=values)
+    df = cudf.DataFrame({}, index=values)
+
+    expected = pdf.groupby(pd.Series(values)).head()
+    got = df.groupby(cudf.Series(values)).head()
+    assert_eq(expected, got, check_column_type=False)
+
+    expected = pdf.groupby(pd.Series(values)).tail()
+    got = df.groupby(cudf.Series(values)).tail()
+
+    assert_eq(expected, got, check_column_type=False)
diff --git a/python/cudf/cudf/tests/groupby/test_ngroup.py b/python/cudf/cudf/tests/groupby/test_ngroup.py
new file mode 100644
index 00000000000..4125eb01abd
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_ngroup.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        lambda: "a",
+        lambda: "b",
+        lambda: ["a", "b"],
+        lambda: "c",
+        lambda: pd.Series([1, 2, 1, 2, 1, 2]),
+        lambda: pd.Series(["x", "y", "y", "x", "z", "x"]),
+    ],
+)
+def test_groupby_ngroup(by, ascending):
+    df_ngroup = cudf.DataFrame(
+        {
+            "a": [2, 2, 1, 1, 2, 3],
+            "b": [1, 2, 1, 2, 1, 2],
+            "c": ["a", "a", "b", "c", "d", "c"],
+        },
+        index=[1, 3, 5, 7, 4, 2],
+    )
+    df_ngroup.index.name = "foo"
+    by = by()
+    expected = df_ngroup.to_pandas().groupby(by).ngroup(ascending=ascending)
+    actual = df_ngroup.groupby(by).ngroup(ascending=ascending)
+    assert_eq(expected, actual, check_dtype=False)
diff --git a/python/cudf/cudf/tests/groupby/test_nth.py b/python/cudf/cudf/tests/groupby/test_nth.py
index 1fb9d32f535..fd93cc2e208 100644
--- a/python/cudf/cudf/tests/groupby/test_nth.py
+++ b/python/cudf/cudf/tests/groupby/test_nth.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -23,3 +24,36 @@ def test_groupby_nth(n, by):
     got = gdf.groupby(by).nth(n)
 
     assert_groupby_results_equal(expect, got, check_dtype=False)
+
+
+def test_groupby_consecutive_operations():
+    df = cudf.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"])
+    pdf = df.to_pandas()
+
+    gg = df.groupby("A")
+    pg = pdf.groupby("A")
+
+    actual = gg.nth(-1)
+    expected = pg.nth(-1)
+
+    assert_groupby_results_equal(actual, expected, check_dtype=False)
+
+    actual = gg.nth(0)
+    expected = pg.nth(0)
+
+    assert_groupby_results_equal(actual, expected, check_dtype=False)
+
+    actual = gg.cumsum()
+    expected = pg.cumsum()
+
+    assert_groupby_results_equal(actual, expected, check_dtype=False)
+
+    actual = gg.cumcount()
+    expected = pg.cumcount()
+
+    assert_groupby_results_equal(actual, expected, check_dtype=False)
+
+    actual = gg.cumsum()
+    expected = pg.cumsum()
+
+    assert_groupby_results_equal(actual, expected, check_dtype=False)
diff --git a/python/cudf/cudf/tests/groupby/test_pct_change.py b/python/cudf/cudf/tests/groupby/test_pct_change.py
new file mode 100644
index 00000000000..a2335afc1ba
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_pct_change.py
@@ -0,0 +1,98 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pytest
+
+import cudf
+from cudf.api.extensions import no_default
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    expect_warning_if,
+)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "data, gkey",
+    [
+        (
+            {
+                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            },
+            ["id"],
+        ),
+        (
+            {
+                "id": [0, 0, 0, 0, 1, 1, 1],
+                "a": [1, 3, 4, 2.0, -3.0, 9.0, 10.0],
+                "b": [10.0, 23, -4.0, 2, -3.0, None, 19.0],
+            },
+            ["id", "a"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val1": [None, None, None, None, None, None],
+            },
+            ["id"],
+        ),
+    ],
+)
+@pytest.mark.parametrize("periods", [-2, 0, 5])
+@pytest.mark.parametrize("fill_method", ["ffill", "bfill", no_default, None])
+def test_groupby_pct_change(data, gkey, periods, fill_method):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    with expect_warning_if(fill_method not in (no_default, None)):
+        actual = gdf.groupby(gkey).pct_change(
+            periods=periods, fill_method=fill_method
+        )
+    with expect_warning_if(
+        (
+            fill_method not in (no_default, None)
+            or (fill_method is not None and pdf.isna().any().any())
+        )
+    ):
+        expected = pdf.groupby(gkey).pct_change(
+            periods=periods, fill_method=fill_method
+        )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("periods", [-5, 5])
+def test_groupby_pct_change_multiindex_dataframe(periods):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 2, 2],
+            "b": [1, 1, 2, 3],
+            "c": [2, 3, 4, 5],
+            "d": [6, 8, 9, 1],
+        }
+    ).set_index(["a", "b"])
+
+    actual = gdf.groupby(level=["a", "b"]).pct_change(periods)
+    expected = gdf.to_pandas().groupby(level=["a", "b"]).pct_change(periods)
+
+    assert_eq(expected, actual)
+
+
+def test_groupby_pct_change_empty_columns():
+    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby("id").pct_change()
+    expected = pdf.groupby("id").pct_change()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/groupby/test_reductions.py b/python/cudf/cudf/tests/groupby/test_reductions.py
index adbc5af309f..f54c0a79337 100644
--- a/python/cudf/cudf/tests/groupby/test_reductions.py
+++ b/python/cudf/cudf/tests/groupby/test_reductions.py
@@ -5,7 +5,11 @@
 import pytest
 
 import cudf
-from cudf.testing import assert_groupby_results_equal
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq, assert_groupby_results_equal
 from cudf.testing._utils import assert_exceptions_equal
 
 
@@ -676,3 +680,308 @@ def test_groupby_no_keys(pdf):
         check_index_type=False,  # Int64 v/s Float64
         **kwargs,
     )
+
+
+@pytest.mark.parametrize("label", [None, "left", "right"])
+@pytest.mark.parametrize("closed", [None, "left", "right"])
+def test_groupby_freq_week(label, closed):
+    pdf = pd.DataFrame(
+        {
+            "Publish date": [
+                pd.Timestamp("2000-01-03"),
+                pd.Timestamp("2000-01-01"),
+                pd.Timestamp("2000-01-09"),
+                pd.Timestamp("2000-01-02"),
+                pd.Timestamp("2000-01-07"),
+                pd.Timestamp("2000-01-16"),
+            ],
+            "ID": [0, 1, 2, 3, 4, 5],
+            "Price": [10, 20, 30, 40, 50, 60],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.groupby(
+        pd.Grouper(key="Publish date", freq="1W", label=label, closed=closed)
+    ).mean()
+    got = gdf.groupby(
+        cudf.Grouper(key="Publish date", freq="1W", label=label, closed=closed)
+    ).mean()
+    assert_eq(
+        expect,
+        got,
+        check_like=True,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize("label", [None, "left", "right"])
+@pytest.mark.parametrize("closed", [None, "left", "right"])
+def test_groupby_freq_day(label, closed):
+    pdf = pd.DataFrame(
+        {
+            "Publish date": [
+                pd.Timestamp("2000-01-03"),
+                pd.Timestamp("2000-01-01"),
+                pd.Timestamp("2000-01-09"),
+                pd.Timestamp("2000-01-02"),
+                pd.Timestamp("2000-01-07"),
+                pd.Timestamp("2000-01-16"),
+            ],
+            "ID": [0, 1, 2, 3, 4, 5],
+            "Price": [10, 20, 30, 40, 50, 60],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.groupby(
+        pd.Grouper(key="Publish date", freq="3D", label=label, closed=closed)
+    ).mean()
+    got = gdf.groupby(
+        cudf.Grouper(key="Publish date", freq="3D", label=label, closed=closed)
+    ).mean()
+    assert_eq(
+        expect,
+        got,
+        check_like=True,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize("label", [None, "left", "right"])
+@pytest.mark.parametrize("closed", [None, "left", "right"])
+def test_groupby_freq_min(label, closed):
+    pdf = pd.DataFrame(
+        {
+            "Publish date": [
+                pd.Timestamp("2000-01-01 12:01:00"),
+                pd.Timestamp("2000-01-01 12:05:00"),
+                pd.Timestamp("2000-01-01 15:30:00"),
+                pd.Timestamp("2000-01-02 00:00:00"),
+                pd.Timestamp("2000-01-01 23:47:00"),
+                pd.Timestamp("2000-01-02 00:05:00"),
+            ],
+            "ID": [0, 1, 2, 3, 4, 5],
+            "Price": [10, 20, 30, 40, 50, 60],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.groupby(
+        pd.Grouper(key="Publish date", freq="1h", label=label, closed=closed)
+    ).mean()
+    got = gdf.groupby(
+        cudf.Grouper(key="Publish date", freq="1h", label=label, closed=closed)
+    ).mean()
+    assert_eq(
+        expect,
+        got,
+        check_like=True,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize("label", [None, "left", "right"])
+@pytest.mark.parametrize("closed", [None, "left", "right"])
+def test_groupby_freq_s(label, closed):
+    pdf = pd.DataFrame(
+        {
+            "Publish date": [
+                pd.Timestamp("2000-01-01 00:00:02"),
+                pd.Timestamp("2000-01-01 00:00:07"),
+                pd.Timestamp("2000-01-01 00:00:02"),
+                pd.Timestamp("2000-01-02 00:00:15"),
+                pd.Timestamp("2000-01-01 00:00:05"),
+                pd.Timestamp("2000-01-02 00:00:09"),
+            ],
+            "ID": [0, 1, 2, 3, 4, 5],
+            "Price": [10, 20, 30, 40, 50, 60],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    expect = pdf.groupby(
+        pd.Grouper(key="Publish date", freq="3s", label=label, closed=closed)
+    ).mean()
+    got = gdf.groupby(
+        cudf.Grouper(key="Publish date", freq="3s", label=label, closed=closed)
+    ).mean()
+    assert_eq(
+        expect,
+        got,
+        check_like=True,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize("index_names", ["a", "b", "c", ["b", "c"]])
+def test_groupby_by_index_names(index_names):
+    gdf = cudf.DataFrame(
+        {"a": [1, 2, 3, 4], "b": ["a", "b", "a", "a"], "c": [1, 1, 2, 1]}
+    ).set_index(index_names)
+    pdf = gdf.to_pandas()
+
+    assert_groupby_results_equal(
+        pdf.groupby(index_names).min(), gdf.groupby(index_names).min()
+    )
+
+
+@pytest.mark.parametrize(
+    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
+)
+def test_group_by_pandas_compat(groups):
+    with cudf.option_context("mode.pandas_compatible", True):
+        df = cudf.DataFrame(
+            {
+                "a": [1, 3, 2, 3, 3],
+                "b": ["x", "a", "y", "z", "a"],
+                "c": [10, 13, 11, 12, 12],
+            }
+        )
+        pdf = df.to_pandas()
+
+        assert_eq(pdf.groupby(groups).max(), df.groupby(groups).max())
+
+
+@pytest.mark.parametrize(
+    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
+)
+@pytest.mark.parametrize("sort", [True, False])
+def test_group_by_pandas_sort_order(groups, sort):
+    with cudf.option_context("mode.pandas_compatible", True):
+        df = cudf.DataFrame(
+            {
+                "a": [10, 1, 10, 3, 2, 1, 3, 3],
+                "b": [5, 6, 7, 1, 2, 3, 4, 9],
+                "c": [20, 20, 10, 11, 13, 11, 12, 12],
+            }
+        )
+        pdf = df.to_pandas()
+
+        assert_eq(
+            pdf.groupby(groups, sort=sort).sum(),
+            df.groupby(groups, sort=sort).sum(),
+        )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_group_by_empty_reduction(
+    all_supported_types_as_str, groupby_reduction_methods, request
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=all_supported_types_as_str == "category"
+            and groupby_reduction_methods
+            in {"min", "max", "idxmin", "idxmax", "first", "last"},
+            reason=f"cuDF doesn't support {groupby_reduction_methods} on {all_supported_types_as_str}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=all_supported_types_as_str == "str"
+            and groupby_reduction_methods in {"idxmin", "idxmax"},
+            reason=f"cuDF doesn't support {groupby_reduction_methods} on {all_supported_types_as_str}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition="int" in all_supported_types_as_str
+            and groupby_reduction_methods == "mean",
+            reason=f"{all_supported_types_as_str} returns incorrect result type with {groupby_reduction_methods}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition="timedelta" in all_supported_types_as_str
+            and groupby_reduction_methods == "prod",
+            raises=RuntimeError,
+            reason=f"{all_supported_types_as_str} raises libcudf error with {groupby_reduction_methods}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition="datetime" in all_supported_types_as_str
+            and groupby_reduction_methods in {"mean", "prod", "sum"},
+            raises=RuntimeError,
+            reason=f"{all_supported_types_as_str} raises libcudf error with {groupby_reduction_methods}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=all_supported_types_as_str in {"str", "category"}
+            and groupby_reduction_methods in {"sum", "prod", "mean"},
+            raises=TypeError,
+            reason=f"{all_supported_types_as_str} raises TypeError with {groupby_reduction_methods}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=all_supported_types_as_str == "bool"
+            and groupby_reduction_methods in {"sum", "prod", "mean"},
+            reason=f"{all_supported_types_as_str} returns incorrect result type with {groupby_reduction_methods}",
+        )
+    )
+    gdf = cudf.DataFrame(
+        {"a": [], "b": [], "c": []}, dtype=all_supported_types_as_str
+    )
+    pdf = gdf.to_pandas()
+
+    gg = gdf.groupby("a")["c"]
+    pg = pdf.groupby("a", observed=True)["c"]
+
+    assert_eq(
+        getattr(gg, groupby_reduction_methods)(),
+        getattr(pg, groupby_reduction_methods)(),
+        check_dtype=True,
+    )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning only given on newer versions.",
+)
+def test_categorical_grouping_pandas_compatibility():
+    gdf = cudf.DataFrame(
+        {
+            "key": cudf.Series([2, 1, 3, 1, 1], dtype="category"),
+            "a": [0, 1, 3, 2, 3],
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        actual = gdf.groupby("key", sort=False).sum()
+    with pytest.warns(FutureWarning):
+        # observed param deprecation.
+        expected = pdf.groupby("key", sort=False).sum()
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "by,data",
+    [
+        ("a", {"a": [1, 2, 3]}),
+        (["a", "id"], {"id": [0, 0, 1], "a": [1, 2, 3]}),
+        ("a", {"a": [1, 2, 3], "b": ["A", "B", "C"]}),
+        ("id", {"id": [0, 0, 1], "a": [1, 2, 3], "b": ["A", "B", "C"]}),
+        (["b", "id"], {"id": [0, 0, 1], "b": ["A", "B", "C"]}),
+        ("b", {"b": ["A", "B", "C"]}),
+    ],
+)
+def test_group_by_reduce_numeric_only(by, data, groupby_reduction_methods):
+    # Test that simple groupby reductions support numeric_only=True
+    if groupby_reduction_methods == "count":
+        pytest.skip(
+            f"{groupby_reduction_methods} doesn't support numeric_only"
+        )
+    df = cudf.DataFrame(data)
+    expected = getattr(
+        df.to_pandas().groupby(by, sort=True), groupby_reduction_methods
+    )(numeric_only=True)
+    result = getattr(df.groupby(by, sort=True), groupby_reduction_methods)(
+        numeric_only=True
+    )
+    assert_eq(expected, result)
diff --git a/python/cudf/cudf/tests/groupby/test_sample.py b/python/cudf/cudf/tests/groupby/test_sample.py
new file mode 100644
index 00000000000..66104e81396
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_sample.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import collections
+import itertools
+import string
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(params=["default", "rangeindex", "intindex", "strindex"])
+def index(request):
+    n = 12
+    if request.param == "rangeindex":
+        return cudf.RangeIndex(2, n + 2)
+    elif request.param == "intindex":
+        return cudf.Index(
+            [2, 3, 4, 1, 0, 5, 6, 8, 7, 9, 10, 13], dtype="int32"
+        )
+    elif request.param == "strindex":
+        return cudf.Index(list(string.ascii_lowercase[:n]))
+    elif request.param == "default":
+        return None
+
+
+@pytest.fixture(
+    params=[
+        ["a", "a", "b", "b", "c", "c", "c", "d", "d", "d", "d", "d"],
+        [1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4],
+    ],
+    ids=["str-group", "int-group"],
+)
+def df(index, request):
+    return cudf.DataFrame(
+        {"a": request.param, "b": request.param, "v": request.param},
+        index=index,
+    )
+
+
+@pytest.fixture(params=["a", ["a", "b"]], ids=["single-col", "two-col"])
+def by(request):
+    return request.param
+
+
+def expected(df, *, n=None, frac=None):
+    value_counts = collections.Counter(df.a.values_host)
+    if n is not None:
+        values = list(
+            itertools.chain.from_iterable(
+                itertools.repeat(v, n) for v in value_counts.keys()
+            )
+        )
+    elif frac is not None:
+        values = list(
+            itertools.chain.from_iterable(
+                itertools.repeat(v, round(count * frac))
+                for v, count in value_counts.items()
+            )
+        )
+    else:
+        raise ValueError("Must provide either n or frac")
+    values = cudf.Series(sorted(values), dtype=df.a.dtype)
+    return cudf.DataFrame({"a": values, "b": values, "v": values})
+
+
+@pytest.mark.parametrize("n", [None, 0, 1, 2])
+def test_constant_n_no_replace(df, by, n):
+    result = df.groupby(by).sample(n=n).sort_values("a")
+    n = 1 if n is None else n
+    assert_eq(expected(df, n=n), result.reset_index(drop=True))
+
+
+def test_constant_n_no_replace_too_large_raises(df):
+    with pytest.raises(ValueError):
+        df.groupby("a").sample(n=3)
+
+
+@pytest.mark.parametrize("n", [1, 2, 3])
+def test_constant_n_replace(df, by, n):
+    result = df.groupby(by).sample(n=n, replace=True).sort_values("a")
+    assert_eq(expected(df, n=n), result.reset_index(drop=True))
+
+
+def test_invalid_arguments(df):
+    with pytest.raises(ValueError):
+        df.groupby("a").sample(n=1, frac=0.1)
+
+
+def test_not_implemented_arguments(df):
+    with pytest.raises(NotImplementedError):
+        # These are valid weights, but we don't implement this yet.
+        df.groupby("a").sample(n=1, weights=[1 / len(df)] * len(df))
+
+
+@pytest.mark.parametrize("frac", [0, 1 / 3, 1 / 2, 2 / 3, 1])
+@pytest.mark.parametrize("replace", [False, True])
+def test_fraction_rounding(df, by, frac, replace):
+    result = df.groupby(by).sample(frac=frac, replace=replace).sort_values("a")
+    assert_eq(expected(df, frac=frac), result.reset_index(drop=True))
diff --git a/python/cudf/cudf/tests/groupby/test_shift.py b/python/cudf/cudf/tests/groupby/test_shift.py
index edb48826138..47a5a2b16e8 100644
--- a/python/cudf/cudf/tests/groupby/test_shift.py
+++ b/python/cudf/cudf/tests/groupby/test_shift.py
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.testing import assert_groupby_results_equal
+from cudf.testing import assert_eq, assert_groupby_results_equal
 from cudf.testing.dataset_generator import rand_dataframe
 
 
@@ -202,3 +202,25 @@ def test_groupby_shift_row_zero_shift(fill_value):
     assert_groupby_results_equal(
         expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
     )
+
+
+def test_groupby_select_then_shift():
+    pdf = pd.DataFrame(
+        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5], "c": [3, 4, 5, 6, 7]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.groupby("a")["c"].shift(1)
+    actual = gdf.groupby("a")["c"].shift(1)
+
+    assert_groupby_results_equal(expected, actual)
+
+
+def test_groupby_shift_series_multiindex():
+    idx = cudf.MultiIndex.from_tuples(
+        [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=["f", "s"]
+    )
+    ser = cudf.Series(range(4), index=idx)
+    result = ser.groupby(level=0).shift(1)
+    expected = ser.to_pandas().groupby(level=0).shift(1)
+    assert_eq(expected, result)
diff --git a/python/cudf/cudf/tests/groupby/test_size.py b/python/cudf/cudf/tests/groupby/test_size.py
new file mode 100644
index 00000000000..3b83f1e68bc
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_size.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+def test_size_as_index_false():
+    df = pd.DataFrame({"a": [1, 2, 1], "b": [1, 2, 3]}, columns=["a", "b"])
+    expected = df.groupby("a", as_index=False).size()
+    result = cudf.from_pandas(df).groupby("a", as_index=False).size()
+    assert_groupby_results_equal(result, expected, as_index=False, by="a")
+
+
+def test_size_series_with_name():
+    ser = pd.Series(range(3), name="foo")
+    expected = ser.groupby(ser).size()
+    result = cudf.from_pandas(ser).groupby(ser).size()
+    assert_groupby_results_equal(result, expected)
diff --git a/python/cudf/cudf/tests/groupby/test_transform.py b/python/cudf/cudf/tests/groupby/test_transform.py
index f7138036ddf..4c007393a64 100644
--- a/python/cudf/cudf/tests/groupby/test_transform.py
+++ b/python/cudf/cudf/tests/groupby/test_transform.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 import itertools
 
+import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing import assert_eq
+from cudf.testing import assert_eq, assert_groupby_results_equal
 
 
 @pytest.fixture(params=[False, True], ids=["no-null-keys", "null-keys"])
@@ -41,3 +42,37 @@ def test_transform_invalid():
     df = cudf.DataFrame({"key": [1, 1], "values": [4, 5]})
     with pytest.raises(TypeError):
         df.groupby("key").transform({"values": "cumprod"})
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        "a",
+        ["a", "b"],
+        pd.Series([2, 1, 1, 2, 2]),
+        pd.Series(["b", "a", "a", "b", "b"]),
+    ],
+)
+@pytest.mark.parametrize("agg", ["sum", lambda df: df.mean()])
+def test_groupby_transform_aggregation(by, agg):
+    gdf = cudf.DataFrame(
+        {"a": [2, 2, 1, 2, 1], "b": [1, 1, 1, 2, 2], "c": [1, 2, 3, 4, 5]}
+    )
+    pdf = gdf.to_pandas()
+
+    expected = pdf.groupby(by).transform(agg)
+    actual = gdf.groupby(by).transform(agg)
+
+    assert_groupby_results_equal(expected, actual)
+
+
+@pytest.mark.parametrize("by", ["a", ["a", "b"], pd.Series([1, 2, 1, 3])])
+def test_groupby_transform_maintain_index(by):
+    # test that we maintain the index after a groupby transform
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 1, 2], "b": [1, 2, 1, 2]}, index=[3, 2, 1, 0]
+    )
+    pdf = gdf.to_pandas()
+    assert_groupby_results_equal(
+        pdf.groupby(by).transform("max"), gdf.groupby(by).transform("max")
+    )
diff --git a/python/cudf/cudf/tests/groupby/test_value_counts.py b/python/cudf/cudf/tests/groupby/test_value_counts.py
new file mode 100644
index 00000000000..4534d150287
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_value_counts.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_groupby_results_equal
+
+
+@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+def test_group_by_value_counts(normalize, sort, ascending, dropna, as_index):
+    # From Issue#12789
+    df = cudf.DataFrame(
+        {
+            "gender": ["male", "male", "female", "male", "female", "male"],
+            "education": ["low", "medium", np.nan, "low", "high", "low"],
+            "country": ["US", "FR", "US", "FR", "FR", "FR"],
+        }
+    )
+    pdf = df.to_pandas()
+
+    actual = df.groupby("gender", as_index=as_index).value_counts(
+        normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
+    )
+    expected = pdf.groupby("gender", as_index=as_index).value_counts(
+        normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
+    )
+
+    # TODO: Remove `check_names=False` once testing against `pandas>=2.0.0`
+    assert_groupby_results_equal(
+        actual,
+        expected,
+        check_index_type=False,
+        as_index=as_index,
+        by=["gender", "education"],
+        sort=sort,
+    )
+
+
+def test_group_by_value_counts_subset():
+    # From Issue#12789
+    df = cudf.DataFrame(
+        {
+            "gender": ["male", "male", "female", "male", "female", "male"],
+            "education": ["low", "medium", "high", "low", "high", "low"],
+            "country": ["US", "FR", "US", "FR", "FR", "FR"],
+        }
+    )
+    pdf = df.to_pandas()
+
+    actual = df.groupby("gender").value_counts(["education"])
+    expected = pdf.groupby("gender").value_counts(["education"])
+
+    # TODO: Remove `check_names=False` once testing against `pandas>=2.0.0`
+    assert_groupby_results_equal(
+        actual, expected, check_names=False, check_index_type=False
+    )
+
+
+def test_group_by_value_counts_clash_with_subset():
+    df = cudf.DataFrame({"a": [1, 5, 3], "b": [2, 5, 2]})
+    with pytest.raises(ValueError):
+        df.groupby("a").value_counts(["a"])
+
+
+def test_group_by_value_counts_subset_not_exists():
+    df = cudf.DataFrame({"a": [1, 5, 3], "b": [2, 5, 2]})
+    with pytest.raises(ValueError):
+        df.groupby("a").value_counts(["c"])
+
+
+def test_group_by_value_counts_with_count_column():
+    df = cudf.DataFrame({"a": [1, 5, 3], "count": [2, 5, 2]})
+    with pytest.raises(ValueError):
+        df.groupby("a", as_index=False).value_counts()
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
deleted file mode 100644
index 5cab96d3db9..00000000000
--- a/python/cudf/cudf/tests/test_groupby.py
+++ /dev/null
@@ -1,1917 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import collections
-import itertools
-import operator
-import string
-import textwrap
-from functools import partial
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import DataFrame, Series
-from cudf.api.extensions import no_default
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_VERSION,
-)
-from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
-from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES
-from cudf.core.udf.utils import UDFError, precompiled
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    expect_warning_if,
-)
-
-_now = np.datetime64("now")
-_tomorrow = _now + np.timedelta64(1, "D")
-_now = np.int64(_now.astype("datetime64[ns]"))
-_tomorrow = np.int64(_tomorrow.astype("datetime64[ns]"))
-_index_type_aggs = {"count", "idxmin", "idxmax", "cumcount"}
-
-
-def assert_groupby_results_equal(
-    expect, got, sort=True, as_index=True, by=None, **kwargs
-):
-    # Because we don't sort by index by default in groupby,
-    # sort expect and got by index before comparing.
-    if sort:
-        if as_index:
-            expect = expect.sort_index()
-            got = got.sort_index()
-        else:
-            assert by is not None
-            if isinstance(expect, (pd.DataFrame, cudf.DataFrame)):
-                expect = expect.sort_values(by=by).reset_index(drop=True)
-            else:
-                expect = expect.sort_values(by=by).reset_index(drop=True)
-
-            if isinstance(got, cudf.DataFrame):
-                got = got.sort_values(by=by).reset_index(drop=True)
-            else:
-                got = got.sort_values(by=by).reset_index(drop=True)
-
-    assert_eq(expect, got, **kwargs)
-
-
-def make_frame(
-    dataframe_class,
-    nelem,
-    seed=0,
-    extra_levels=(),
-    extra_vals=(),
-    with_datetime=False,
-):
-    rng = np.random.default_rng(seed=seed)
-
-    df = dataframe_class()
-
-    df["x"] = rng.integers(0, 5, nelem)
-    df["y"] = rng.integers(0, 3, nelem)
-    for lvl in extra_levels:
-        df[lvl] = rng.integers(0, 2, nelem)
-
-    df["val"] = rng.random(nelem)
-    for val in extra_vals:
-        df[val] = rng.random(nelem)
-
-    if with_datetime:
-        df["datetime"] = rng.integers(
-            _now, _tomorrow, nelem, dtype=np.int64
-        ).astype("datetime64[ns]")
-
-    return df
-
-
-@pytest.fixture
-def gdf():
-    return DataFrame({"x": [1, 2, 3], "y": [0, 1, 1]})
-
-
-@pytest.fixture
-def pdf(gdf):
-    return gdf.to_pandas()
-
-
-@pytest.fixture(scope="module")
-def groupby_jit_data_small():
-    """
-    Return a small dataset for testing JIT Groupby Apply. The dataframe
-    contains 4 groups of size 1, 2, 3, 4 as well as an additional key
-    column that can be used to test subgroups within groups. This data
-    is useful for smoke testing basic numeric results
-    """
-    rng = np.random.default_rng(42)
-    df = DataFrame()
-    key1 = [1] + [2] * 2 + [3] * 3 + [4] * 4
-    key2 = [1, 2] * 5
-    df["key1"] = key1
-    df["key2"] = key2
-
-    df["val1"] = rng.integers(0, 10, len(key1))
-    df["val2"] = rng.integers(0, 10, len(key1))
-
-    # randomly permute data
-    df = df.sample(frac=1, ignore_index=True)
-    return df
-
-
-@pytest.fixture(scope="module")
-def groupby_jit_data_large(groupby_jit_data_small):
-    """
-    Larger version of groupby_jit_data_small which contains enough data
-    to require more than one block per group. This data is useful for
-    testing if JIT GroupBy algorithms scale to larger dastasets without
-    manifesting numerical issues such as overflow.
-    """
-    max_tpb = 1024
-    factor = (
-        max_tpb + 1
-    )  # bigger than a block but not always an exact multiple
-    df = cudf.concat([groupby_jit_data_small] * factor)
-
-    return df
-
-
-@pytest.fixture(scope="module")
-def groupby_jit_data_nans(groupby_jit_data_small):
-    """
-    Returns a modified version of groupby_jit_data_small which contains
-    nan values.
-    """
-
-    df = groupby_jit_data_small.sort_values(["key1", "key2"])
-    df["val1"] = df["val1"].astype("float64")
-    df["val1"][::2] = np.nan
-    df = df.sample(frac=1, ignore_index=True)
-    return df
-
-
-@pytest.fixture(scope="module")
-def groupby_jit_datasets(
-    groupby_jit_data_small, groupby_jit_data_large, groupby_jit_data_nans
-):
-    return {
-        "small": groupby_jit_data_small,
-        "large": groupby_jit_data_large,
-        "nans": groupby_jit_data_nans,
-    }
-
-
-def run_groupby_apply_jit_test(data, func, keys, *args):
-    expect_groupby_obj = data.to_pandas().groupby(keys)
-    got_groupby_obj = data.groupby(keys)
-
-    # compare cuDF jit to pandas
-    cudf_jit_result = got_groupby_obj.apply(
-        func, *args, engine="jit", include_groups=False
-    )
-    pandas_result = expect_groupby_obj.apply(func, *args, include_groups=False)
-    assert_groupby_results_equal(cudf_jit_result, pandas_result)
-
-
-def groupby_apply_jit_reductions_test_inner(func, data, dtype):
-    # ideally we'd just have:
-    # lambda group: getattr(group, func)()
-    # but the current kernel caching mechanism relies on pickle which
-    # does not play nice with local functions. What's below uses
-    # exec as a workaround to write the test functions dynamically
-
-    funcstr = textwrap.dedent(
-        f"""
-        def func(df):
-            return df['val1'].{func}()
-        """
-    )
-    lcl = {}
-    exec(funcstr, lcl)
-    func = lcl["func"]
-
-    data["val1"] = data["val1"].astype(dtype)
-    data["val2"] = data["val2"].astype(dtype)
-
-    run_groupby_apply_jit_test(data, func, ["key1"])
-
-
-# test unary reductions
-@pytest.mark.parametrize(
-    "dtype",
-    SUPPORTED_GROUPBY_NUMPY_TYPES,
-    ids=[str(t) for t in SUPPORTED_GROUPBY_NUMPY_TYPES],
-)
-@pytest.mark.parametrize(
-    "func", ["min", "max", "sum", "mean", "var", "std", "idxmin", "idxmax"]
-)
-@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_apply_jit_unary_reductions(
-    request, func, dtype, dataset, groupby_jit_datasets
-):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                (
-                    dataset == "nans"
-                    and func in {"var", "std", "mean"}
-                    and str(dtype) in {"int64", "float32", "float64"}
-                )
-                or (
-                    dataset == "nans"
-                    and func in {"idxmax", "idxmin", "sum"}
-                    and dtype.kind == "f"
-                )
-            ),
-            reason=("https://github.com/rapidsai/cudf/issues/14860"),
-        )
-    )
-    warn_condition = (
-        dataset == "nans"
-        and func in {"idxmax", "idxmin"}
-        and dtype.kind == "f"
-    )
-    dataset = groupby_jit_datasets[dataset].copy(deep=True)
-    with expect_warning_if(warn_condition, FutureWarning):
-        groupby_apply_jit_reductions_test_inner(func, dataset, dtype)
-
-
-# test unary reductions for special values
-def groupby_apply_jit_reductions_special_vals_inner(
-    func, data, dtype, special_val
-):
-    funcstr = textwrap.dedent(
-        f"""
-        def func(df):
-            return df['val1'].{func}()
-        """
-    )
-    lcl = {}
-    exec(funcstr, lcl)
-    func = lcl["func"]
-
-    data["val1"] = data["val1"].astype(dtype)
-    data["val2"] = data["val2"].astype(dtype)
-    data["val1"] = special_val
-    data["val2"] = special_val
-
-    run_groupby_apply_jit_test(data, func, ["key1"])
-
-
-# test unary index reductions for special values
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def groupby_apply_jit_idx_reductions_special_vals_inner(
-    func, data, dtype, special_val
-):
-    funcstr = textwrap.dedent(
-        f"""
-        def func(df):
-            return df['val1'].{func}()
-        """
-    )
-    lcl = {}
-    exec(funcstr, lcl)
-    func = lcl["func"]
-
-    data["val1"] = data["val1"].astype(dtype)
-    data["val2"] = data["val2"].astype(dtype)
-    data["val1"] = special_val
-    data["val2"] = special_val
-
-    run_groupby_apply_jit_test(data, func, ["key1"])
-
-
-@pytest.mark.parametrize("dtype", ["float64", "float32"])
-@pytest.mark.parametrize("func", ["min", "max", "sum", "mean", "var", "std"])
-@pytest.mark.parametrize("special_val", [np.nan, np.inf, -np.inf])
-@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_apply_jit_reductions_special_vals(
-    func, dtype, dataset, groupby_jit_datasets, special_val
-):
-    dataset = groupby_jit_datasets[dataset].copy(deep=True)
-    with expect_warning_if(
-        func in {"var", "std"} and not np.isnan(special_val), RuntimeWarning
-    ):
-        groupby_apply_jit_reductions_special_vals_inner(
-            func, dataset, dtype, special_val
-        )
-
-
-@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
-@pytest.mark.parametrize(
-    "special_val",
-    [
-        pytest.param(
-            np.nan,
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/13832"
-            ),
-        ),
-        np.inf,
-        -np.inf,
-    ],
-)
-@pytest.mark.parametrize("dataset", ["small", "large", "nans"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="include_groups keyword new in pandas 2.2",
-)
-def test_groupby_apply_jit_idx_reductions_special_vals(
-    func, dataset, groupby_jit_datasets, special_val
-):
-    dataset = groupby_jit_datasets[dataset].copy(deep=True)
-    groupby_apply_jit_idx_reductions_special_vals_inner(
-        func, dataset, "float64", special_val
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_jit_sum_integer_overflow():
-    max = np.iinfo("int32").max
-
-    data = DataFrame(
-        {
-            "a": [0, 0, 0],
-            "b": [max, max, max],
-        }
-    )
-
-    def func(group):
-        return group["b"].sum()
-
-    run_groupby_apply_jit_test(data, func, ["a"])
-
-
-@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
-@pytest.mark.parametrize(
-    "dataset",
-    [
-        pytest.param(
-            "small",
-            marks=[
-                pytest.mark.filterwarnings(
-                    "ignore:Degrees of Freedom <= 0 for slice"
-                ),
-                pytest.mark.filterwarnings(
-                    "ignore:divide by zero encountered in divide"
-                ),
-            ],
-        ),
-        "large",
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_jit_correlation(dataset, groupby_jit_datasets, dtype):
-    dataset = groupby_jit_datasets[dataset].copy(deep=True)
-
-    dataset["val1"] = dataset["val1"].astype(dtype)
-    dataset["val2"] = dataset["val2"].astype(dtype)
-
-    keys = ["key1"]
-
-    def func(group):
-        return group["val1"].corr(group["val2"])
-
-    if np.dtype(dtype).kind == "f":
-        # Correlation of floating types is not yet supported:
-        # https://github.com/rapidsai/cudf/issues/13839
-        m = (
-            f"Series.corr\\(Series\\) is not "
-            f"supported for \\({dtype}, {dtype}\\)"
-        )
-        with pytest.raises(UDFError, match=m):
-            run_groupby_apply_jit_test(dataset, func, keys)
-        return
-    with expect_warning_if(dtype in {"int32", "int64"}, RuntimeWarning):
-        run_groupby_apply_jit_test(dataset, func, keys)
-
-
-@pytest.mark.parametrize("dtype", ["int32", "int64"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_jit_correlation_zero_variance(dtype):
-    # pearson correlation is undefined when the variance of either
-    # variable is zero. This test ensures that the jit implementation
-    # returns the same result as pandas in this case.
-    data = DataFrame(
-        {"a": [0, 0, 0, 0, 0], "b": [1, 1, 1, 1, 1], "c": [2, 2, 2, 2, 2]}
-    )
-
-    def func(group):
-        return group["b"].corr(group["c"])
-
-    with expect_warning_if(dtype in {"int32", "int64"}, RuntimeWarning):
-        run_groupby_apply_jit_test(data, func, ["a"])
-
-
-@pytest.mark.parametrize("op", unary_ops)
-def test_groupby_apply_jit_invalid_unary_ops_error(groupby_jit_data_small, op):
-    keys = ["key1"]
-
-    def func(group):
-        return op(group["val1"])
-
-    with pytest.raises(
-        UDFError,
-        match=f"{op.__name__}\\(Series\\) is not supported by JIT GroupBy",
-    ):
-        run_groupby_apply_jit_test(groupby_jit_data_small, func, keys)
-
-
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_groupby_apply_jit_invalid_binary_ops_error(
-    groupby_jit_data_small, op
-):
-    keys = ["key1"]
-
-    def func(group):
-        return op(group["val1"], group["val2"])
-
-    with pytest.raises(
-        UDFError,
-        match=f"{op.__name__}\\(Series, Series\\) is not supported",
-    ):
-        run_groupby_apply_jit_test(groupby_jit_data_small, func, keys)
-
-
-def test_groupby_apply_jit_no_df_ops(groupby_jit_data_small):
-    # DataFrame level operations are not yet supported.
-    def func(group):
-        return group.sum()
-
-    with pytest.raises(
-        UDFError,
-        match="JIT GroupBy.apply\\(\\) does not support DataFrame.sum\\(\\)",
-    ):
-        run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1"])
-
-
-@pytest.mark.parametrize("dtype", ["uint8", "str"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_unsupported_dtype(dtype):
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    df["b"] = df["b"].astype(dtype)
-
-    # a UDAF that doesn't actually use the input column
-    # with the unsupported dtype should still succeed
-    def func(group):
-        return group["c"].sum()
-
-    run_groupby_apply_jit_test(df, func, ["a"])
-
-    # however a UDAF that does use the unsupported dtype
-    # should fail
-    def func(group):
-        return group["b"].sum()
-
-    with pytest.raises(UDFError, match="Only columns of the following dtypes"):
-        run_groupby_apply_jit_test(df, func, ["a"])
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda df: df["val1"].max() + df["val2"].min(),
-        lambda df: df["val1"].sum() + df["val2"].var(),
-        lambda df: df["val1"].mean() + df["val2"].std(),
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_jit_basic(func, groupby_jit_data_small):
-    run_groupby_apply_jit_test(groupby_jit_data_small, func, ["key1", "key2"])
-
-
-def f1(df, k):
-    return df["val1"].max() + df["val2"].min() + k
-
-
-def f2(df, k, L):
-    return df["val1"].sum() - df["val2"].var() + (k / L)
-
-
-def f3(df, k, L, m):
-    return ((k * df["val1"].mean()) + (L * df["val2"].std())) / m
-
-
-@pytest.mark.parametrize(
-    "func,args", [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_jit_args(func, args, groupby_jit_data_small):
-    run_groupby_apply_jit_test(
-        groupby_jit_data_small, func, ["key1", "key2"], *args
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_jit_block_divergence():
-    # https://github.com/rapidsai/cudf/issues/12686
-    df = cudf.DataFrame(
-        {
-            "a": [0, 0, 0, 1, 1, 1],
-            "b": [1, 1, 1, 2, 3, 4],
-        }
-    )
-
-    def diverging_block(grp_df):
-        if grp_df["b"].mean() > 1:
-            return grp_df["b"].mean()
-        return 0
-
-    run_groupby_apply_jit_test(df, diverging_block, ["a"])
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_caching():
-    # Make sure similar functions that differ
-    # by simple things like constants actually
-    # recompile
-
-    # begin with a clear cache
-    precompiled.clear()
-    assert precompiled.currsize == 0
-
-    data = cudf.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 4, 5, 6]})
-
-    def f(group):
-        return group["b"].mean() * 2
-
-    # a single run should result in a cache size of 1
-    run_groupby_apply_jit_test(data, f, ["a"])
-    assert precompiled.currsize == 1
-
-    # a second run with f should not increase the count
-    run_groupby_apply_jit_test(data, f, ["a"])
-    assert precompiled.currsize == 1
-
-    # changing a constant value inside the UDF should miss
-    def f(group):
-        return group["b"].mean() * 3
-
-    run_groupby_apply_jit_test(data, f, ["a"])
-    assert precompiled.currsize == 2
-
-    # changing the dtypes of the columns should miss
-    data["b"] = data["b"].astype("float64")
-    run_groupby_apply_jit_test(data, f, ["a"])
-
-    assert precompiled.currsize == 3
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_no_bytecode_fallback():
-    # tests that a function which contains no bytecode
-    # attribute, but would still be executable using
-    # the iterative groupby apply approach, still works.
-
-    gdf = cudf.DataFrame({"a": [0, 1, 1], "b": [1, 2, 3]})
-    pdf = gdf.to_pandas()
-
-    def f(group):
-        return group.sum()
-
-    part = partial(f)
-
-    expect = pdf.groupby("a").apply(part, include_groups=False)
-    got = gdf.groupby("a").apply(part, engine="auto", include_groups=False)
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_return_col_from_df():
-    # tests a UDF that consists of purely colwise
-    # ops, such as `lambda group: group.x + group.y`
-    # which returns a column
-    df = cudf.DataFrame(
-        {
-            "id": range(10),
-            "x": range(10),
-            "y": range(10),
-        }
-    )
-    pdf = df.to_pandas()
-
-    def func(df):
-        return df.x + df.y
-
-    got = df.groupby("id").apply(func, include_groups=False)
-    expect = pdf.groupby("id").apply(func, include_groups=False)
-    # pandas seems to erroneously add an extra MI level of ids
-    # TODO: Figure out how pandas groupby.apply determines the columns
-    expect = pd.DataFrame(expect.droplevel(1), columns=got.columns)
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.parametrize("func", [lambda group: group.sum()])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_return_df(func):
-    # tests a UDF that reduces over a dataframe
-    # and produces a series with the original column names
-    # as its index, such as lambda group: group.sum() + group.min()
-    df = cudf.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, 4]})
-    pdf = df.to_pandas()
-
-    expect = pdf.groupby("a").apply(func, include_groups=False)
-    got = df.groupby("a").apply(func, include_groups=False)
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.parametrize("as_index", [True, False])
-def test_groupby_apply_return_reindexed_series(as_index):
-    def gdf_func(df):
-        return cudf.Series([df["a"].sum(), df["b"].min(), df["c"].max()])
-
-    def pdf_func(df):
-        return pd.Series([df["a"].sum(), df["b"].min(), df["c"].max()])
-
-    df = cudf.DataFrame(
-        {
-            "key": [0, 0, 1, 1, 2, 2],
-            "a": [1, 2, 3, 4, 5, 6],
-            "b": [7, 8, 9, 10, 11, 12],
-            "c": [13, 14, 15, 16, 17, 18],
-        }
-    )
-    pdf = df.to_pandas()
-
-    kwargs = {}
-    if PANDAS_GE_220:
-        kwargs["include_groups"] = False
-
-    expect = pdf.groupby("key", as_index=as_index).apply(pdf_func, **kwargs)
-    got = df.groupby("key", as_index=as_index).apply(gdf_func, **kwargs)
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_apply_noempty_group():
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 2, 2], "b": [1, 2, 1, 2], "c": [1, 2, 3, 4]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = (
-        pdf.groupby("a", group_keys=False)
-        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
-        .reset_index(drop=True)
-    )
-    got = (
-        gdf.groupby("a")
-        .apply(lambda x: x.iloc[[0, 1]], include_groups=False)
-        .reset_index(drop=True)
-    )
-    assert_groupby_results_equal(expect, got)
-
-
-def create_test_groupby_apply_return_scalars_params():
-    def f0(x):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = ticker / 10
-        return full
-
-    def f1(x, k):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = ticker / k
-        return full
-
-    def f2(x, k, L):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = L * (ticker / k)
-        return full
-
-    def f3(x, k, L, m):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = L * (ticker / k) % m
-        return full
-
-    return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
-
-
-@pytest.mark.parametrize(
-    "func,args", create_test_groupby_apply_return_scalars_params()
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_groupby_apply_return_scalars(func, args):
-    pdf = pd.DataFrame(
-        {
-            "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
-            "B": [
-                0.01,
-                np.nan,
-                0.03,
-                0.04,
-                np.nan,
-                0.06,
-                0.07,
-                0.08,
-                0.09,
-                1.0,
-            ],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby("A").apply(func, *args, include_groups=False)
-    actual = gdf.groupby("A").apply(func, *args, include_groups=False)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-def create_test_groupby_apply_return_series_dataframe_params():
-    def f0(x):
-        return x - x.max()
-
-    def f1(x):
-        return x.min() - x.max()
-
-    def f2(x):
-        return x.min()
-
-    def f3(x, k):
-        return x - x.max() + k
-
-    def f4(x, k, L):
-        return x.min() - x.max() + (k / L)
-
-    def f5(x, k, L, m):
-        return m * x.min() + (k / L)
-
-    return [
-        (f0, ()),
-        (f1, ()),
-        (f2, ()),
-        (f3, (42,)),
-        (f4, (42, 119)),
-        (f5, (41, 119, 212.1)),
-    ]
-
-
-@pytest.mark.parametrize(
-    "func,args", create_test_groupby_apply_return_series_dataframe_params()
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Include groups missing on old versions of pandas",
-)
-def test_groupby_apply_return_series_dataframe(func, args):
-    pdf = pd.DataFrame(
-        {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby(["key"], group_keys=False).apply(
-        func, *args, include_groups=False
-    )
-    actual = gdf.groupby(["key"]).apply(func, *args, include_groups=False)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "pdf",
-    [pd.DataFrame(), pd.DataFrame({"a": []}), pd.Series([], dtype="float64")],
-)
-def test_groupby_apply_no_keys(pdf):
-    gdf = cudf.from_pandas(pdf)
-    if isinstance(pdf, pd.DataFrame):
-        kwargs = {"check_column_type": False}
-    else:
-        kwargs = {}
-    assert_groupby_results_equal(
-        pdf.groupby([], group_keys=False).apply(lambda x: x.max()),
-        gdf.groupby([]).apply(lambda x: x.max()),
-        check_index_type=False,  # Int64 v/s Float64
-        **kwargs,
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"Speed": [380.0, 370.0, 24.0, 26.0], "Score": [50, 30, 90, 80]},
-        {
-            "Speed": [380.0, 370.0, 24.0, 26.0],
-            "Score": [50, 30, 90, 80],
-            "Other": [10, 20, 30, 40],
-        },
-    ],
-)
-@pytest.mark.parametrize("group", ["Score", "Speed"])
-def test_groupby_describe(data, group):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    got = gdf.groupby(group).describe()
-    expect = pdf.groupby(group).describe()
-
-    assert_groupby_results_equal(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [], "b": []},
-        {"a": [2, 1, 2, 1, 1, 3], "b": [None, 1, 2, None, 2, None]},
-        {"a": [None], "b": [None]},
-        {"a": [2, 1, 1], "b": [None, 1, 0], "c": [None, 0, 1]},
-    ],
-)
-@pytest.mark.parametrize("agg", ["first", "last", ["first", "last"]])
-def test_groupby_first(data, agg):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.groupby("a").agg(agg)
-    got = gdf.groupby("a").agg(agg)
-    assert_groupby_results_equal(expect, got, check_dtype=False)
-
-
-def test_groupby_apply_series():
-    def foo(x):
-        return x.sum()
-
-    got = make_frame(DataFrame, 100).groupby("x").y.apply(foo)
-    expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(foo)
-
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.parametrize(
-    "func,args",
-    [
-        (lambda x, k: x + k, (42,)),
-        (lambda x, k, L: x + k - L, (42, 191)),
-        (lambda x, k, L, m: (x + k) / (L * m), (42, 191, 99.9)),
-    ],
-)
-def test_groupby_apply_series_args(func, args):
-    got = make_frame(DataFrame, 100).groupby("x").y.apply(func, *args)
-    expect = (
-        make_frame(pd.DataFrame, 100)
-        .groupby("x", group_keys=False)
-        .y.apply(func, *args)
-    )
-
-    assert_groupby_results_equal(expect, got)
-
-
-@pytest.mark.parametrize("label", [None, "left", "right"])
-@pytest.mark.parametrize("closed", [None, "left", "right"])
-def test_groupby_freq_week(label, closed):
-    pdf = pd.DataFrame(
-        {
-            "Publish date": [
-                pd.Timestamp("2000-01-03"),
-                pd.Timestamp("2000-01-01"),
-                pd.Timestamp("2000-01-09"),
-                pd.Timestamp("2000-01-02"),
-                pd.Timestamp("2000-01-07"),
-                pd.Timestamp("2000-01-16"),
-            ],
-            "ID": [0, 1, 2, 3, 4, 5],
-            "Price": [10, 20, 30, 40, 50, 60],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.groupby(
-        pd.Grouper(key="Publish date", freq="1W", label=label, closed=closed)
-    ).mean()
-    got = gdf.groupby(
-        cudf.Grouper(key="Publish date", freq="1W", label=label, closed=closed)
-    ).mean()
-    assert_eq(
-        expect,
-        got,
-        check_like=True,
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize("label", [None, "left", "right"])
-@pytest.mark.parametrize("closed", [None, "left", "right"])
-def test_groupby_freq_day(label, closed):
-    pdf = pd.DataFrame(
-        {
-            "Publish date": [
-                pd.Timestamp("2000-01-03"),
-                pd.Timestamp("2000-01-01"),
-                pd.Timestamp("2000-01-09"),
-                pd.Timestamp("2000-01-02"),
-                pd.Timestamp("2000-01-07"),
-                pd.Timestamp("2000-01-16"),
-            ],
-            "ID": [0, 1, 2, 3, 4, 5],
-            "Price": [10, 20, 30, 40, 50, 60],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.groupby(
-        pd.Grouper(key="Publish date", freq="3D", label=label, closed=closed)
-    ).mean()
-    got = gdf.groupby(
-        cudf.Grouper(key="Publish date", freq="3D", label=label, closed=closed)
-    ).mean()
-    assert_eq(
-        expect,
-        got,
-        check_like=True,
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize("label", [None, "left", "right"])
-@pytest.mark.parametrize("closed", [None, "left", "right"])
-def test_groupby_freq_min(label, closed):
-    pdf = pd.DataFrame(
-        {
-            "Publish date": [
-                pd.Timestamp("2000-01-01 12:01:00"),
-                pd.Timestamp("2000-01-01 12:05:00"),
-                pd.Timestamp("2000-01-01 15:30:00"),
-                pd.Timestamp("2000-01-02 00:00:00"),
-                pd.Timestamp("2000-01-01 23:47:00"),
-                pd.Timestamp("2000-01-02 00:05:00"),
-            ],
-            "ID": [0, 1, 2, 3, 4, 5],
-            "Price": [10, 20, 30, 40, 50, 60],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.groupby(
-        pd.Grouper(key="Publish date", freq="1h", label=label, closed=closed)
-    ).mean()
-    got = gdf.groupby(
-        cudf.Grouper(key="Publish date", freq="1h", label=label, closed=closed)
-    ).mean()
-    assert_eq(
-        expect,
-        got,
-        check_like=True,
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize("label", [None, "left", "right"])
-@pytest.mark.parametrize("closed", [None, "left", "right"])
-def test_groupby_freq_s(label, closed):
-    pdf = pd.DataFrame(
-        {
-            "Publish date": [
-                pd.Timestamp("2000-01-01 00:00:02"),
-                pd.Timestamp("2000-01-01 00:00:07"),
-                pd.Timestamp("2000-01-01 00:00:02"),
-                pd.Timestamp("2000-01-02 00:00:15"),
-                pd.Timestamp("2000-01-01 00:00:05"),
-                pd.Timestamp("2000-01-02 00:00:09"),
-            ],
-            "ID": [0, 1, 2, 3, 4, 5],
-            "Price": [10, 20, 30, 40, 50, 60],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    expect = pdf.groupby(
-        pd.Grouper(key="Publish date", freq="3s", label=label, closed=closed)
-    ).mean()
-    got = gdf.groupby(
-        cudf.Grouper(key="Publish date", freq="3s", label=label, closed=closed)
-    ).mean()
-    assert_eq(
-        expect,
-        got,
-        check_like=True,
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "pdf, group, name, obj",
-    [
-        (
-            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
-            "X",
-            "A",
-            None,
-        ),
-        (
-            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
-            "X",
-            "B",
-            None,
-        ),
-        (
-            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
-            "X",
-            "A",
-            pd.DataFrame({"a": [1, 2, 4, 5, 10, 11]}),
-        ),
-        (
-            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
-            "Y",
-            1,
-            pd.DataFrame({"a": [1, 2, 4, 5, 10, 11]}),
-        ),
-        (
-            pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
-            "Y",
-            3,
-            pd.DataFrame({"a": [1, 2, 0, 11]}),
-        ),
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Warnings only given on newer versions.",
-)
-def test_groupby_get_group(pdf, group, name, obj):
-    gdf = cudf.from_pandas(pdf)
-
-    if isinstance(obj, pd.DataFrame):
-        gobj = cudf.from_pandas(obj)
-    else:
-        gobj = obj
-
-    pgb = pdf.groupby(group)
-    ggb = gdf.groupby(group)
-    with expect_warning_if(obj is not None):
-        expected = pgb.get_group(name=name, obj=obj)
-    with expect_warning_if(obj is not None):
-        actual = ggb.get_group(name=name, obj=gobj)
-
-    assert_groupby_results_equal(expected, actual)
-
-    expected = pdf.iloc[pgb.indices.get(name)]
-    actual = gdf.iloc[ggb.indices.get(name)]
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "by",
-    [
-        "a",
-        ["a", "b"],
-        pd.Series([2, 1, 1, 2, 2]),
-        pd.Series(["b", "a", "a", "b", "b"]),
-    ],
-)
-@pytest.mark.parametrize("agg", ["sum", "mean", lambda df: df.mean()])
-def test_groupby_transform_aggregation(by, agg):
-    gdf = cudf.DataFrame(
-        {"a": [2, 2, 1, 2, 1], "b": [1, 1, 1, 2, 2], "c": [1, 2, 3, 4, 5]}
-    )
-    pdf = gdf.to_pandas()
-
-    expected = pdf.groupby(by).transform(agg)
-    actual = gdf.groupby(by).transform(agg)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-def test_groupby_select_then_ffill():
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2],
-            "b": [1, None, None, 2, None],
-            "c": [3, None, None, 4, None],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby("a")["c"].ffill()
-    actual = gdf.groupby("a")["c"].ffill()
-
-    assert_groupby_results_equal(expected, actual)
-
-
-def test_groupby_select_then_shift():
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5], "c": [3, 4, 5, 6, 7]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby("a")["c"].shift(1)
-    actual = gdf.groupby("a")["c"].shift(1)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-def test_groupby_select_then_diff():
-    pdf = pd.DataFrame(
-        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5], "c": [3, 4, 5, 6, 7]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.groupby("a")["c"].diff(1)
-    actual = gdf.groupby("a")["c"].diff(1)
-
-    assert_groupby_results_equal(expected, actual)
-
-
-# TODO: Add a test including datetime64[ms] column in input data
-
-
-@pytest.mark.parametrize("by", ["a", ["a", "b"], pd.Series([1, 2, 1, 3])])
-def test_groupby_transform_maintain_index(by):
-    # test that we maintain the index after a groupby transform
-    gdf = cudf.DataFrame(
-        {"a": [1, 1, 1, 2], "b": [1, 2, 1, 2]}, index=[3, 2, 1, 0]
-    )
-    pdf = gdf.to_pandas()
-    assert_groupby_results_equal(
-        pdf.groupby(by).transform("max"), gdf.groupby(by).transform("max")
-    )
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "data, gkey",
-    [
-        (
-            {
-                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
-                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-            },
-            ["id"],
-        ),
-        (
-            {
-                "id": [0, 0, 0, 0, 1, 1, 1],
-                "a": [1, 3, 4, 2.0, -3.0, 9.0, 10.0],
-                "b": [10.0, 23, -4.0, 2, -3.0, None, 19.0],
-            },
-            ["id", "a"],
-        ),
-        (
-            {
-                "id": ["a", "a", "b", "b", "c", "c"],
-                "val1": [None, None, None, None, None, None],
-            },
-            ["id"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5])
-@pytest.mark.parametrize("fill_method", ["ffill", "bfill", no_default, None])
-def test_groupby_pct_change(data, gkey, periods, fill_method):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    with expect_warning_if(fill_method not in (no_default, None)):
-        actual = gdf.groupby(gkey).pct_change(
-            periods=periods, fill_method=fill_method
-        )
-    with expect_warning_if(
-        (
-            fill_method not in (no_default, None)
-            or (fill_method is not None and pdf.isna().any().any())
-        )
-    ):
-        expected = pdf.groupby(gkey).pct_change(
-            periods=periods, fill_method=fill_method
-        )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("periods", [-5, 5])
-def test_groupby_pct_change_multiindex_dataframe(periods):
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 1, 2, 2],
-            "b": [1, 1, 2, 3],
-            "c": [2, 3, 4, 5],
-            "d": [6, 8, 9, 1],
-        }
-    ).set_index(["a", "b"])
-
-    actual = gdf.groupby(level=["a", "b"]).pct_change(periods)
-    expected = gdf.to_pandas().groupby(level=["a", "b"]).pct_change(periods)
-
-    assert_eq(expected, actual)
-
-
-def test_groupby_pct_change_empty_columns():
-    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
-    pdf = gdf.to_pandas()
-
-    actual = gdf.groupby("id").pct_change()
-    expected = pdf.groupby("id").pct_change()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("group_keys", [None, True, False])
-@pytest.mark.parametrize("by", ["A", ["A", "B"]])
-def test_groupby_group_keys(group_keys, by):
-    gdf = cudf.DataFrame(
-        {
-            "A": "a a a a b b".split(),
-            "B": [1, 1, 2, 2, 3, 3],
-            "C": [4, 6, 5, 9, 8, 7],
-        }
-    )
-    pdf = gdf.to_pandas()
-
-    g_group = gdf.groupby(by, group_keys=group_keys)
-    p_group = pdf.groupby(by, group_keys=group_keys)
-
-    actual = g_group[["B", "C"]].apply(lambda x: x / x.sum())
-    expected = p_group[["B", "C"]].apply(lambda x: x / x.sum())
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "by",
-    [
-        lambda: "a",
-        lambda: "b",
-        lambda: ["a", "b"],
-        lambda: "c",
-        lambda: pd.Series([1, 2, 1, 2, 1, 2]),
-        lambda: pd.Series(["x", "y", "y", "x", "z", "x"]),
-    ],
-)
-@pytest.mark.parametrize("ascending", [True, False])
-def test_groupby_ngroup(by, ascending):
-    df_ngroup = cudf.DataFrame(
-        {
-            "a": [2, 2, 1, 1, 2, 3],
-            "b": [1, 2, 1, 2, 1, 2],
-            "c": ["a", "a", "b", "c", "d", "c"],
-        },
-        index=[1, 3, 5, 7, 4, 2],
-    )
-    df_ngroup.index.name = "foo"
-    by = by()
-    expected = df_ngroup.to_pandas().groupby(by).ngroup(ascending=ascending)
-    actual = df_ngroup.groupby(by).ngroup(ascending=ascending)
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
-)
-def test_groupby_dtypes(groups):
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3, 3], "b": ["x", "y", "z", "a"], "c": [10, 11, 12, 12]}
-    )
-    pdf = df.to_pandas()
-    with pytest.warns(FutureWarning):
-        expected = pdf.groupby(groups).dtypes
-    with pytest.warns(FutureWarning):
-        actual = df.groupby(groups).dtypes
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("index_names", ["a", "b", "c", ["b", "c"]])
-def test_groupby_by_index_names(index_names):
-    gdf = cudf.DataFrame(
-        {"a": [1, 2, 3, 4], "b": ["a", "b", "a", "a"], "c": [1, 1, 2, 1]}
-    ).set_index(index_names)
-    pdf = gdf.to_pandas()
-
-    assert_groupby_results_equal(
-        pdf.groupby(index_names).min(), gdf.groupby(index_names).min()
-    )
-
-
-@pytest.mark.parametrize(
-    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
-)
-def test_group_by_pandas_compat(groups):
-    with cudf.option_context("mode.pandas_compatible", True):
-        df = cudf.DataFrame(
-            {
-                "a": [1, 3, 2, 3, 3],
-                "b": ["x", "a", "y", "z", "a"],
-                "c": [10, 13, 11, 12, 12],
-            }
-        )
-        pdf = df.to_pandas()
-
-        assert_eq(pdf.groupby(groups).max(), df.groupby(groups).max())
-
-
-class TestSample:
-    @pytest.fixture(params=["default", "rangeindex", "intindex", "strindex"])
-    def index(self, request):
-        n = 12
-        if request.param == "rangeindex":
-            return cudf.RangeIndex(2, n + 2)
-        elif request.param == "intindex":
-            return cudf.Index(
-                [2, 3, 4, 1, 0, 5, 6, 8, 7, 9, 10, 13], dtype="int32"
-            )
-        elif request.param == "strindex":
-            return cudf.Index(list(string.ascii_lowercase[:n]))
-        elif request.param == "default":
-            return None
-
-    @pytest.fixture(
-        params=[
-            ["a", "a", "b", "b", "c", "c", "c", "d", "d", "d", "d", "d"],
-            [1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4],
-        ],
-        ids=["str-group", "int-group"],
-    )
-    def df(self, index, request):
-        return cudf.DataFrame(
-            {"a": request.param, "b": request.param, "v": request.param},
-            index=index,
-        )
-
-    @pytest.fixture(params=["a", ["a", "b"]], ids=["single-col", "two-col"])
-    def by(self, request):
-        return request.param
-
-    def expected(self, df, *, n=None, frac=None):
-        value_counts = collections.Counter(df.a.values_host)
-        if n is not None:
-            values = list(
-                itertools.chain.from_iterable(
-                    itertools.repeat(v, n) for v in value_counts.keys()
-                )
-            )
-        elif frac is not None:
-            values = list(
-                itertools.chain.from_iterable(
-                    itertools.repeat(v, round(count * frac))
-                    for v, count in value_counts.items()
-                )
-            )
-        else:
-            raise ValueError("Must provide either n or frac")
-        values = cudf.Series(sorted(values), dtype=df.a.dtype)
-        return cudf.DataFrame({"a": values, "b": values, "v": values})
-
-    @pytest.mark.parametrize("n", [None, 0, 1, 2])
-    def test_constant_n_no_replace(self, df, by, n):
-        result = df.groupby(by).sample(n=n).sort_values("a")
-        n = 1 if n is None else n
-        assert_eq(self.expected(df, n=n), result.reset_index(drop=True))
-
-    def test_constant_n_no_replace_too_large_raises(self, df):
-        with pytest.raises(ValueError):
-            df.groupby("a").sample(n=3)
-
-    @pytest.mark.parametrize("n", [1, 2, 3])
-    def test_constant_n_replace(self, df, by, n):
-        result = df.groupby(by).sample(n=n, replace=True).sort_values("a")
-        assert_eq(self.expected(df, n=n), result.reset_index(drop=True))
-
-    def test_invalid_arguments(self, df):
-        with pytest.raises(ValueError):
-            df.groupby("a").sample(n=1, frac=0.1)
-
-    def test_not_implemented_arguments(self, df):
-        with pytest.raises(NotImplementedError):
-            # These are valid weights, but we don't implement this yet.
-            df.groupby("a").sample(n=1, weights=[1 / len(df)] * len(df))
-
-    @pytest.mark.parametrize("frac", [0, 1 / 3, 1 / 2, 2 / 3, 1])
-    @pytest.mark.parametrize("replace", [False, True])
-    def test_fraction_rounding(self, df, by, frac, replace):
-        result = (
-            df.groupby(by).sample(frac=frac, replace=replace).sort_values("a")
-        )
-        assert_eq(self.expected(df, frac=frac), result.reset_index(drop=True))
-
-
-class TestHeadTail:
-    @pytest.fixture(params=[-3, -2, -1, 0, 1, 2, 3], ids=lambda n: f"{n=}")
-    def n(self, request):
-        return request.param
-
-    @pytest.fixture(
-        params=[False, True], ids=["no-preserve-order", "preserve-order"]
-    )
-    def preserve_order(self, request):
-        return request.param
-
-    @pytest.fixture
-    def df(self):
-        return cudf.DataFrame(
-            {
-                "a": [1, 0, 1, 2, 2, 1, 3, 2, 3, 3, 3],
-                "b": [0, 1, 2, 4, 3, 5, 6, 7, 9, 8, 10],
-            }
-        )
-
-    @pytest.fixture(params=[True, False], ids=["head", "tail"])
-    def take_head(self, request):
-        return request.param
-
-    @pytest.fixture
-    def expected(self, df, n, take_head, preserve_order):
-        if n == 0:
-            # We'll get an empty dataframe in this case
-            return df._empty_like(keep_index=True)
-        else:
-            if preserve_order:
-                # Should match pandas here
-                g = df.to_pandas().groupby("a")
-                if take_head:
-                    return g.head(n=n)
-                else:
-                    return g.tail(n=n)
-            else:
-                # We groupby "a" which is the first column. This
-                # possibly relies on an implementation detail that for
-                # integer group keys, cudf produces groups in sorted
-                # (ascending) order.
-                keyfunc = operator.itemgetter(0)
-                if take_head or n == 0:
-                    # Head does group[:n] as does tail for n == 0
-                    slicefunc = operator.itemgetter(slice(None, n))
-                else:
-                    # Tail does group[-n:] except when n == 0
-                    slicefunc = operator.itemgetter(
-                        slice(-n, None) if n else slice(0)
-                    )
-                values_to_sort = np.hstack(
-                    [df.values_host, np.arange(len(df)).reshape(-1, 1)]
-                )
-                expect_a, expect_b, index = zip(
-                    *itertools.chain.from_iterable(
-                        slicefunc(list(group))
-                        for _, group in itertools.groupby(
-                            sorted(values_to_sort.tolist(), key=keyfunc),
-                            key=keyfunc,
-                        )
-                    ),
-                    strict=True,
-                )
-                return cudf.DataFrame(
-                    {"a": expect_a, "b": expect_b}, index=index
-                )
-
-    def test_head_tail(self, df, n, take_head, expected, preserve_order):
-        if take_head:
-            actual = df.groupby("a").head(n=n, preserve_order=preserve_order)
-        else:
-            actual = df.groupby("a").tail(n=n, preserve_order=preserve_order)
-        assert_eq(actual, expected)
-
-
-def test_head_tail_empty():
-    # GH #13397
-
-    values = [1, 2, 3]
-    pdf = pd.DataFrame({}, index=values)
-    df = cudf.DataFrame({}, index=values)
-
-    expected = pdf.groupby(pd.Series(values)).head()
-    got = df.groupby(cudf.Series(values)).head()
-    assert_eq(expected, got, check_column_type=False)
-
-    expected = pdf.groupby(pd.Series(values)).tail()
-    got = df.groupby(cudf.Series(values)).tail()
-
-    assert_eq(expected, got, check_column_type=False)
-
-
-@pytest.mark.parametrize(
-    "groups", ["a", "b", "c", ["a", "c"], ["a", "b", "c"]]
-)
-@pytest.mark.parametrize("sort", [True, False])
-def test_group_by_pandas_sort_order(groups, sort):
-    with cudf.option_context("mode.pandas_compatible", True):
-        df = cudf.DataFrame(
-            {
-                "a": [10, 1, 10, 3, 2, 1, 3, 3],
-                "b": [5, 6, 7, 1, 2, 3, 4, 9],
-                "c": [20, 20, 10, 11, 13, 11, 12, 12],
-            }
-        )
-        pdf = df.to_pandas()
-
-        assert_eq(
-            pdf.groupby(groups, sort=sort).sum(),
-            df.groupby(groups, sort=sort).sum(),
-        )
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    ["int32", "int64", "float64", "datetime64[ns]", "timedelta64[ns]", "bool"],
-)
-@pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "min",
-        "max",
-        "idxmin",
-        "idxmax",
-        "first",
-        "last",
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_group_by_empty_reduction(dtype, reduce_op):
-    gdf = cudf.DataFrame({"a": [], "b": [], "c": []}, dtype=dtype)
-    pdf = gdf.to_pandas()
-
-    gg = gdf.groupby("a")["c"]
-    pg = pdf.groupby("a")["c"]
-
-    assert_eq(
-        getattr(gg, reduce_op)(), getattr(pg, reduce_op)(), check_dtype=True
-    )
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    ["int32", "int64", "float64", "datetime64[ns]", "timedelta64[ns]", "bool"],
-)
-@pytest.mark.parametrize(
-    "apply_op",
-    ["sum", "min", "max", "idxmax"],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_group_by_empty_apply(request, dtype, apply_op):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(dtype == "datetime64[ns]" and apply_op == "sum"),
-            reason=("sum isn't supported for datetime64[ns]"),
-        )
-    )
-
-    gdf = cudf.DataFrame({"a": [], "b": [], "c": []}, dtype=dtype)
-    pdf = gdf.to_pandas()
-
-    gg = gdf.groupby("a")["c"]
-    pg = pdf.groupby("a")["c"]
-
-    assert_eq(
-        gg.apply(apply_op),
-        pg.apply(apply_op),
-        check_dtype=True,
-        check_index_type=True,
-    )
-
-
-def test_groupby_consecutive_operations():
-    df = cudf.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"])
-    pdf = df.to_pandas()
-
-    gg = df.groupby("A")
-    pg = pdf.groupby("A")
-
-    actual = gg.nth(-1)
-    expected = pg.nth(-1)
-
-    assert_groupby_results_equal(actual, expected, check_dtype=False)
-
-    actual = gg.nth(0)
-    expected = pg.nth(0)
-
-    assert_groupby_results_equal(actual, expected, check_dtype=False)
-
-    actual = gg.cumsum()
-    expected = pg.cumsum()
-
-    assert_groupby_results_equal(actual, expected, check_dtype=False)
-
-    actual = gg.cumcount()
-    expected = pg.cumcount()
-
-    assert_groupby_results_equal(actual, expected, check_dtype=False)
-
-    actual = gg.cumsum()
-    expected = pg.cumsum()
-
-    assert_groupby_results_equal(actual, expected, check_dtype=False)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Warning only given on newer versions.",
-)
-def test_categorical_grouping_pandas_compatibility():
-    gdf = cudf.DataFrame(
-        {
-            "key": cudf.Series([2, 1, 3, 1, 1], dtype="category"),
-            "a": [0, 1, 3, 2, 3],
-        }
-    )
-    pdf = gdf.to_pandas()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = gdf.groupby("key", sort=False).sum()
-    with pytest.warns(FutureWarning):
-        # observed param deprecation.
-        expected = pdf.groupby("key", sort=False).sum()
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("normalize", [True, False])
-@pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("as_index", [True, False])
-def test_group_by_value_counts(normalize, sort, ascending, dropna, as_index):
-    # From Issue#12789
-    df = cudf.DataFrame(
-        {
-            "gender": ["male", "male", "female", "male", "female", "male"],
-            "education": ["low", "medium", np.nan, "low", "high", "low"],
-            "country": ["US", "FR", "US", "FR", "FR", "FR"],
-        }
-    )
-    pdf = df.to_pandas()
-
-    actual = df.groupby("gender", as_index=as_index).value_counts(
-        normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
-    )
-    expected = pdf.groupby("gender", as_index=as_index).value_counts(
-        normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
-    )
-
-    # TODO: Remove `check_names=False` once testing against `pandas>=2.0.0`
-    assert_groupby_results_equal(
-        actual,
-        expected,
-        check_index_type=False,
-        as_index=as_index,
-        by=["gender", "education"],
-        sort=sort,
-    )
-
-
-def test_group_by_value_counts_subset():
-    # From Issue#12789
-    df = cudf.DataFrame(
-        {
-            "gender": ["male", "male", "female", "male", "female", "male"],
-            "education": ["low", "medium", "high", "low", "high", "low"],
-            "country": ["US", "FR", "US", "FR", "FR", "FR"],
-        }
-    )
-    pdf = df.to_pandas()
-
-    actual = df.groupby("gender").value_counts(["education"])
-    expected = pdf.groupby("gender").value_counts(["education"])
-
-    # TODO: Remove `check_names=False` once testing against `pandas>=2.0.0`
-    assert_groupby_results_equal(
-        actual, expected, check_names=False, check_index_type=False
-    )
-
-
-def test_group_by_value_counts_clash_with_subset():
-    df = cudf.DataFrame({"a": [1, 5, 3], "b": [2, 5, 2]})
-    with pytest.raises(ValueError):
-        df.groupby("a").value_counts(["a"])
-
-
-def test_group_by_value_counts_subset_not_exists():
-    df = cudf.DataFrame({"a": [1, 5, 3], "b": [2, 5, 2]})
-    with pytest.raises(ValueError):
-        df.groupby("a").value_counts(["c"])
-
-
-def test_group_by_value_counts_with_count_column():
-    df = cudf.DataFrame({"a": [1, 5, 3], "count": [2, 5, 2]})
-    with pytest.raises(ValueError):
-        df.groupby("a", as_index=False).value_counts()
-
-
-def test_groupby_internal_groups_empty(gdf):
-    # test that we don't segfault when calling the internal
-    # .groups() method with an empty list:
-    gb = gdf.groupby("y")
-    _, _, grouped_vals = gb._groups([])
-    assert grouped_vals == []
-
-
-def test_groupby_shift_series_multiindex():
-    idx = cudf.MultiIndex.from_tuples(
-        [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=["f", "s"]
-    )
-    ser = Series(range(4), index=idx)
-    result = ser.groupby(level=0).shift(1)
-    expected = ser.to_pandas().groupby(level=0).shift(1)
-    assert_eq(expected, result)
-
-
-@pytest.mark.parametrize(
-    "func", ["min", "max", "sum", "mean", "idxmin", "idxmax"]
-)
-@pytest.mark.parametrize(
-    "by,data",
-    [
-        ("a", {"a": [1, 2, 3]}),
-        (["a", "id"], {"id": [0, 0, 1], "a": [1, 2, 3]}),
-        ("a", {"a": [1, 2, 3], "b": ["A", "B", "C"]}),
-        ("id", {"id": [0, 0, 1], "a": [1, 2, 3], "b": ["A", "B", "C"]}),
-        (["b", "id"], {"id": [0, 0, 1], "b": ["A", "B", "C"]}),
-        ("b", {"b": ["A", "B", "C"]}),
-    ],
-)
-def test_group_by_reduce_numeric_only(by, data, func):
-    # Test that simple groupby reductions support numeric_only=True
-    df = cudf.DataFrame(data)
-    expected = getattr(df.to_pandas().groupby(by, sort=True), func)(
-        numeric_only=True
-    )
-    result = getattr(df.groupby(by, sort=True), func)(numeric_only=True)
-    assert_eq(expected, result)
-
-
-@pytest.mark.parametrize(
-    "op", ["cummax", "cummin", "cumprod", "cumsum", "mean", "median"]
-)
-def test_group_by_raises_string_error(op):
-    df = cudf.DataFrame({"a": [1, 2, 3, 4, 5], "b": ["a", "b", "c", "d", "e"]})
-
-    with pytest.raises(TypeError):
-        df.groupby(df.a).agg(op)
-
-
-@pytest.mark.parametrize(
-    "op",
-    [
-        "cummax",
-        "cummin",
-        "cumprod",
-        "cumsum",
-        "mean",
-        "median",
-        "prod",
-        "sum",
-        list,
-    ],
-)
-def test_group_by_raises_category_error(op):
-    df = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5],
-            "b": cudf.Series(["a", "b", "c", "d", "e"], dtype="category"),
-        }
-    )
-
-    with pytest.raises(TypeError):
-        df.groupby(df.a).agg(op)
-
-
-def test_ngroups():
-    pdf = pd.DataFrame({"a": [1, 1, 3], "b": range(3)})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    pgb = pdf.groupby("a")
-    ggb = gdf.groupby("a")
-    assert pgb.ngroups == ggb.ngroups
-    assert len(pgb) == len(ggb)
-
-
-def test_ndim():
-    pdf = pd.DataFrame({"a": [1, 1, 3], "b": range(3)})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    pgb = pdf.groupby("a")
-    ggb = gdf.groupby("a")
-    assert pgb.ndim == ggb.ndim
-
-    pser = pd.Series(range(3))
-    gser = cudf.Series.from_pandas(pser)
-    pgb = pser.groupby([0, 0, 1])
-    ggb = gser.groupby(cudf.Series([0, 0, 1]))
-    assert pgb.ndim == ggb.ndim
-
-
-@pytest.mark.skipif(
-    not PANDAS_GE_220, reason="pandas behavior applicable in >=2.2"
-)
-def test_get_group_list_like():
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    result = df.groupby(["a"]).get_group((1,))
-    expected = df.to_pandas().groupby(["a"]).get_group((1,))
-    assert_eq(result, expected)
-
-    with pytest.raises(KeyError):
-        df.groupby(["a"]).get_group((1, 2))
-
-    with pytest.raises(KeyError):
-        df.groupby(["a"]).get_group([1])
-
-
-def test_get_group_list_like_len_2():
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [3, 2, 1]})
-    result = df.groupby(["a", "b"]).get_group((1, 4))
-    expected = df.to_pandas().groupby(["a", "b"]).get_group((1, 4))
-    assert_eq(result, expected)
-
-
-def test_size_as_index_false():
-    df = pd.DataFrame({"a": [1, 2, 1], "b": [1, 2, 3]}, columns=["a", "b"])
-    expected = df.groupby("a", as_index=False).size()
-    result = cudf.from_pandas(df).groupby("a", as_index=False).size()
-    assert_groupby_results_equal(result, expected, as_index=False, by="a")
-
-
-def test_size_series_with_name():
-    ser = pd.Series(range(3), name="foo")
-    expected = ser.groupby(ser).size()
-    result = cudf.from_pandas(ser).groupby(ser).size()
-    assert_groupby_results_equal(result, expected)
-
-
-@pytest.mark.parametrize("op", ["cumsum", "cumprod", "cummin", "cummax"])
-def test_scan_int_null_pandas_compatible(op):
-    data = {"a": [1, 2, None, 3], "b": ["x"] * 4}
-    df_pd = pd.DataFrame(data)
-    df_cudf = cudf.DataFrame(data)
-    expected = getattr(df_pd.groupby("b")["a"], op)()
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(df_cudf.groupby("b")["a"], op)()
-    assert_eq(result, expected)
-
-
-def test_agg_duplicate_aggs_pandas_compat_raises():
-    agg = {"b": ["mean", "mean"]}
-    dfgb = cudf.DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]}).groupby(["a"])
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            dfgb.agg(agg)
-
-    with pytest.warns(UserWarning):
-        result = dfgb.agg(agg)
-    expected = cudf.DataFrame(
-        [4.5, 6.0],
-        index=cudf.Index([1, 2], name="a"),
-        columns=pd.MultiIndex.from_tuples([("b", "mean")]),
-    )
-    assert_groupby_results_equal(result, expected)

From df567254b81fc8d8c6d8c4f636d1c288717b77c2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Aug 2025 12:00:31 -0700
Subject: [PATCH 136/366] Add new xfails for xarray release (#19705)

We have two new failures in the pandas test suite that I started seeing yesterday. I strongly suspect that these are due to [the new xarray release](https://pypi.org/project/xarray/). Adding to the xfail list for now to unblock our CI.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19705
---
 python/cudf/cudf/pandas/scripts/conftest-patch.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 3869116496e..9a1051ec158 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -6867,6 +6867,8 @@ def pytest_unconfigure(config):
     "tests/generic/test_to_xarray.py::TestDataFrameToXArray::test_to_xarray_index_types[uint32]",
     "tests/generic/test_to_xarray.py::TestDataFrameToXArray::test_to_xarray_index_types[uint64]",
     "tests/generic/test_to_xarray.py::TestDataFrameToXArray::test_to_xarray_index_types[uint8]",
+    "tests/generic/test_to_xarray.py::TestSeriesToXArray::test_to_xarray_index_types[string-python]",
+    "tests/generic/test_to_xarray.py::TestSeriesToXArray::test_to_xarray_index_types[string-pyarrow]",
     "tests/groupby/aggregate/test_aggregate.py::test_agg_grouping_is_list_tuple",
     "tests/groupby/aggregate/test_aggregate.py::test_agg_multiple_with_as_index_false_subset_to_a_single_column",
     "tests/groupby/aggregate/test_aggregate.py::test_agg_str_with_kwarg_axis_1_raises[count]",

From ff1a803ecdf4ceefca29428e8a421e471387e915 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Aug 2025 14:44:34 -0700
Subject: [PATCH 137/366] Preserve decimal precision in
 `cudf::interop::column_metadata` (#19587)

Towards https://github.com/rapidsai/cudf/issues/18863

To support decimal types in cudf_polars, we'll need to preserve/assign a resulting precision when exporting `to_polars` based on the input decimal precision from Polars. This PR allows us to write out the intended precision to the Arrow schema when utilizing `column_metadata` to interop between cuDF and Polars via Arrow.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19587
---
 cpp/include/cudf/interop.hpp                  |  4 +-
 cpp/src/interop/to_arrow_schema.cpp           | 22 +++++---
 .../pylibcudf/pylibcudf/_interop_helpers.pyx  |  5 +-
 .../pylibcudf/pylibcudf/libcudf/interop.pxd   |  3 ++
 python/pylibcudf/tests/test_interop.py        | 52 +++++++++++++++++++
 5 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 76c7fa1b614..2929b5cc601 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -113,12 +113,12 @@ DLManagedTensor* to_dlpack(
 /**
  * @brief Detailed metadata information for arrow array.
  *
- * As of now this contains only name in the hierarchy of children of cudf column,
- * but in future this can be updated as per requirement.
+ * This contains attributes of the column or type not natively supported by cudf.
  */
 struct column_metadata {
   std::string name;                            ///< Name of the column
   std::string timezone;                        ///< Timezone of the column
+  std::optional<int32_t> precision;            ///< Resulting decimal precision of the column
   std::vector<column_metadata> children_meta;  ///< Metadata of children of the column
 
   /**
diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp
index 2fe562dda94..bd6f7622a31 100644
--- a/cpp/src/interop/to_arrow_schema.cpp
+++ b/cpp/src/interop/to_arrow_schema.cpp
@@ -79,37 +79,43 @@ struct dispatch_to_arrow_type {
 template <typename DeviceType>
 int decimals_to_arrow(column_view input, int32_t precision, ArrowSchema* out)
 {
+  CUDF_EXPECTS(precision >= 1 and precision <= 38,
+               "Precision must be between 1 and 38 inclusive",
+               cudf::data_type_error);
   return ArrowSchemaSetTypeDecimal(
     out, id_to_arrow_type(input.type().id()), precision, -input.type().scale());
 }
 
 template <>
 int dispatch_to_arrow_type::operator()<numeric::decimal32>(column_view input,
-                                                           column_metadata const&,
+                                                           column_metadata const& metadata,
                                                            ArrowSchema* out)
 {
-  using DeviceType = int32_t;
-  return decimals_to_arrow<DeviceType>(input, cudf::detail::max_precision<DeviceType>(), out);
+  using DeviceType  = int32_t;
+  int32_t precision = metadata.precision.value_or(cudf::detail::max_precision<DeviceType>());
+  return decimals_to_arrow<DeviceType>(input, precision, out);
 }
 
 template <>
 int dispatch_to_arrow_type::operator()<numeric::decimal64>(column_view input,
-                                                           column_metadata const&,
+                                                           column_metadata const& metadata,
                                                            ArrowSchema* out)
 {
   using DeviceType = int64_t;
   // Arrow decimal 64 maxes at precision of 18, cudf::detail::max_precision<int64_t>() produces 19.
   // decimal32 has precision 1 - 9, decimal64 has precision 10 - 18, decimal128 is 19 - 38
-  return decimals_to_arrow<DeviceType>(input, cudf::detail::max_precision<DeviceType>() - 1, out);
+  int32_t precision = metadata.precision.value_or(cudf::detail::max_precision<DeviceType>() - 1);
+  return decimals_to_arrow<DeviceType>(input, precision, out);
 }
 
 template <>
 int dispatch_to_arrow_type::operator()<numeric::decimal128>(column_view input,
-                                                            column_metadata const&,
+                                                            column_metadata const& metadata,
                                                             ArrowSchema* out)
 {
-  using DeviceType = __int128_t;
-  return decimals_to_arrow<DeviceType>(input, cudf::detail::max_precision<DeviceType>(), out);
+  using DeviceType  = __int128_t;
+  int32_t precision = metadata.precision.value_or(cudf::detail::max_precision<DeviceType>());
+  return decimals_to_arrow<DeviceType>(input, precision, out);
 }
 
 template <>
diff --git a/python/pylibcudf/pylibcudf/_interop_helpers.pyx b/python/pylibcudf/pylibcudf/_interop_helpers.pyx
index 122858d98b1..3e474426898 100644
--- a/python/pylibcudf/pylibcudf/_interop_helpers.pyx
+++ b/python/pylibcudf/pylibcudf/_interop_helpers.pyx
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
+from libc.stdint cimport int32_t
 from cpython.pycapsule cimport PyCapsule_GetPointer
 
 from pylibcudf.libcudf.interop cimport (
@@ -39,6 +39,7 @@ class ColumnMetadata:
     """
     name: str = ""
     timezone: str = ""
+    precision: int | None = None
     children_meta: list[ColumnMetadata] = field(default_factory=list)
 
 
@@ -78,6 +79,8 @@ cdef column_metadata _metadata_to_libcudf(metadata):
     cdef column_metadata c_metadata
     c_metadata.name = metadata.name.encode()
     c_metadata.timezone = metadata.timezone.encode()
+    if metadata.precision is not None:
+        c_metadata.precision = <int32_t>metadata.precision
     for child_meta in metadata.children_meta:
         c_metadata.children_meta.push_back(_metadata_to_libcudf(child_meta))
     return c_metadata
diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
index 257bdcea739..301ae41d667 100644
--- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
@@ -1,6 +1,8 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
+from libc.stdint cimport int32_t
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.string cimport string
+from libcpp.optional cimport optional
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
@@ -49,6 +51,7 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
         column_metadata(string name_) except +libcudf_exception_handler
         string name
         string timezone
+        optional[int32_t] precision
         vector[column_metadata] children_meta
 
 
diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py
index 35f9b73ebf3..171d70c2496 100644
--- a/python/pylibcudf/tests/test_interop.py
+++ b/python/pylibcudf/tests/test_interop.py
@@ -1,10 +1,13 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+import decimal
+
 import cupy as cp
 import nanoarrow
 import nanoarrow.device
 import numpy as np
 import pyarrow as pa
+import pyarrow.compute as pc
 import pytest
 from packaging.version import parse
 from utils import assert_column_eq, assert_table_eq
@@ -97,6 +100,55 @@ def test_decimal_other(data_type):
     assert arrow_type == pa.decimal128(precision, 0)
 
 
+@pytest.mark.parametrize(
+    "plc_type",
+    [plc.TypeId.DECIMAL128, plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32],
+)
+def test_decimal_respect_metadata_precision(plc_type, request):
+    request.node.add_marker(
+        pytest.mark.xfail(
+            parse(pa.__version__) < parse("19.0.0")
+            and plc_type in {plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32},
+            reason=(
+                "pyarrow does not interpret Arrow schema decimal type string correctly"
+            ),
+        )
+    )
+    precision, scale = 3, 2
+    expected = pa.array(
+        [decimal.Decimal("1.23"), None], type=pa.decimal128(precision, scale)
+    )
+    plc_column = plc.unary.cast(
+        plc.Column.from_arrow(expected), plc.DataType(plc_type, scale=-scale)
+    )
+    result = plc.interop.to_arrow(
+        plc_column, metadata=plc.interop.ColumnMetadata(precision=precision)
+    )
+    if parse(pa.__version__) >= parse("19.0.0"):
+        if plc_type == plc.TypeId.DECIMAL64:
+            expected = pc.cast(expected, pa.decimal64(precision, scale))
+        elif plc_type == plc.TypeId.DECIMAL32:
+            expected = pc.cast(expected, pa.decimal32(precision, scale))
+    assert result.equals(expected)
+
+
+@pytest.mark.parametrize("precision", [0, 39])
+def test_decimal_precision_metadata_out_of_range(precision):
+    scale = 2
+    expected = pa.array(
+        [decimal.Decimal("1.23"), None], type=pa.decimal128(3, scale)
+    )
+    plc_column = plc.unary.cast(
+        plc.Column.from_arrow(expected),
+        plc.DataType(plc.TypeId.DECIMAL128, scale=-scale),
+    )
+    with pytest.raises(TypeError):
+        plc.interop.to_arrow(
+            plc_column,
+            metadata=plc.interop.ColumnMetadata(precision=precision),
+        )
+
+
 def test_round_trip_dlpack_plc_table():
     expected = pa.table({"a": [1, 2, 3], "b": [5, 6, 7]})
     plc_table = plc.Table.from_arrow(expected)

From 7271bf5a9623e8f02ce9d44305f04ed4a2a47313 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Aug 2025 14:53:15 -0700
Subject: [PATCH 138/366] Move test_stats/reductions/quantile and misc to new
 cudf classic testing directory (#19675)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19675
---
 python/cudf/cudf/tests/conftest.py            |   16 +
 .../tests/dataframe/methods/test_cov_corr.py  |   15 +
 .../dataframe/methods/test_reductions.py      |  183 +++
 .../tests/series/methods/test_cov_corr.py     |  159 +++
 .../tests/series/methods/test_pct_change.py   |   49 +
 .../tests/series/methods/test_reductions.py   | 1041 +++++++++++++++++
 .../cudf/tests/series/methods/test_unique.py  |   48 +
 .../cudf/cudf/tests/series/test_reductions.py |   17 -
 python/cudf/cudf/tests/test_datetime.py       |   56 -
 python/cudf/cudf/tests/test_quantiles.py      |  104 --
 python/cudf/cudf/tests/test_reductions.py     |  512 --------
 python/cudf/cudf/tests/test_stats.py          |  664 -----------
 python/cudf/cudf/tests/test_string.py         |  124 --
 python/cudf/cudf/tests/test_timedelta.py      |   79 +-
 14 files changed, 1512 insertions(+), 1555 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_cov_corr.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_pct_change.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_reductions.py
 delete mode 100644 python/cudf/cudf/tests/series/test_reductions.py
 delete mode 100644 python/cudf/cudf/tests/test_quantiles.py
 delete mode 100644 python/cudf/cudf/tests/test_reductions.py
 delete mode 100644 python/cudf/cudf/tests/test_stats.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 0e193b57cd1..f05ec3dd247 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -276,6 +276,16 @@ def reduction_methods(request):
     return request.param
 
 
+@pytest.fixture(params=["linear", "lower", "higher", "midpoint", "nearest"])
+def quantile_interpolation(request):
+    return request.param
+
+
+@pytest.fixture(params=["spearman", "pearson"])
+def corr_method(request):
+    return request.param
+
+
 signed_integer_types = ["int8", "int16", "int32", "int64"]
 unsigned_integer_types = ["uint8", "uint16", "uint32", "uint64"]
 float_types = ["float32", "float64"]
@@ -486,6 +496,12 @@ def dropna(request):
     return request.param
 
 
+@pytest.fixture(params=[True, False])
+def skipna(request):
+    """Param for `skipna` argument"""
+    return request.param
+
+
 @pytest.fixture(params=[True, False, None])
 def nan_as_null(request):
     """Param for `nan_as_null` argument"""
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py b/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
new file mode 100644
index 00000000000..5e172baf92a
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_df_corr(corr_method):
+    gdf = cudf.DataFrame(np.random.default_rng(seed=0).normal(-100, 100, 10))
+    pdf = gdf.to_pandas()
+    got = gdf.corr(corr_method)
+    expected = pdf.corr(corr_method)
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_reductions.py b/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
new file mode 100644
index 00000000000..9b4134e5b3b
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if
+
+
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_kurtosis_df(null_flag, numeric_only):
+    data = cudf.DataFrame(
+        {
+            "a": np.arange(10, dtype="float64"),
+            "b": np.arange(10, dtype="int64"),
+            "c": np.arange(10, dtype="float64"),
+            "d": ["a"] * 10,
+        }
+    )
+    if not numeric_only:
+        data = data.select_dtypes(include="number")
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.kurtosis(numeric_only=numeric_only)
+    got = got if np.isscalar(got) else got.to_numpy()
+
+    expected = pdata.kurtosis(numeric_only=numeric_only)
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt(numeric_only=numeric_only)
+    got = got if np.isscalar(got) else got.to_numpy()
+
+    expected = pdata.kurt(numeric_only=numeric_only)
+    np.testing.assert_array_almost_equal(got, expected)
+
+
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_skew_df(null_flag, numeric_only):
+    data = cudf.DataFrame(
+        {
+            "a": np.arange(10, dtype="float64"),
+            "b": np.arange(10, dtype="int64"),
+            "c": np.arange(10, dtype="float64"),
+            "d": ["a"] * 10,
+        }
+    )
+    if not numeric_only:
+        data = data.select_dtypes(include="number")
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.skew(numeric_only=numeric_only)
+    expected = pdata.skew(numeric_only=numeric_only)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
+
+def test_single_q():
+    q = 0.5
+
+    pdf = pd.DataFrame({"a": [4, 24, 13, 8, 7]})
+    gdf = cudf.from_pandas(pdf)
+
+    pdf_q = pdf.quantile(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
+
+    assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+def test_with_index():
+    q = [0, 0.5, 1]
+
+    pdf = pd.DataFrame({"a": [7, 4, 4, 9, 13]}, index=[0, 4, 3, 2, 7])
+    gdf = cudf.from_pandas(pdf)
+
+    pdf_q = pdf.quantile(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
+
+    assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+def test_with_multiindex():
+    q = [0, 0.5, 1]
+
+    pdf = pd.DataFrame(
+        {
+            "index_1": [3, 1, 9, 7, 5],
+            "index_2": [2, 4, 3, 5, 1],
+            "a": [8, 4, 2, 3, 8],
+        }
+    )
+    pdf.set_index(["index_1", "index_2"], inplace=True)
+
+    gdf = cudf.from_pandas(pdf)
+
+    pdf_q = pdf.quantile(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
+
+    assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [10, 11, 12]},
+        {"a": [1, 0, 3], "b": [10, 11, 12]},
+        {"a": [1, 2, 3], "b": [10, 11, None]},
+        {
+            "a": [],
+        },
+        {},
+    ],
+)
+@pytest.mark.parametrize("op", ["all", "any"])
+def test_any_all_axis_none(data, op):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = getattr(pdf, op)(axis=None)
+    actual = getattr(gdf, op)(axis=None)
+
+    assert expected == actual
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warning not given on older versions of pandas",
+)
+def test_reductions_axis_none_warning(request, reduction_methods):
+    if reduction_methods == "quantile":
+        pytest.skip(f"pandas {reduction_methods} doesn't support axis=None")
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [10, 2, 3]})
+    pdf = df.to_pandas()
+    with expect_warning_if(
+        reduction_methods in {"sum", "product", "std", "var"},
+        FutureWarning,
+    ):
+        actual = getattr(df, reduction_methods)(axis=None)
+    with expect_warning_if(
+        reduction_methods in {"sum", "product", "std", "var"},
+        FutureWarning,
+    ):
+        expected = getattr(pdf, reduction_methods)(axis=None)
+    assert_eq(expected, actual, check_dtype=False)
+
+
+def test_dataframe_reduction_no_args(reduction_methods):
+    df = cudf.DataFrame({"a": range(10), "b": range(10)})
+    pdf = df.to_pandas()
+    result = getattr(df, reduction_methods)()
+    expected = getattr(pdf, reduction_methods)()
+    assert_eq(result, expected)
+
+
+def test_reduction_column_multiindex():
+    idx = cudf.MultiIndex.from_tuples(
+        [("a", 1), ("a", 2)], names=["foo", "bar"]
+    )
+    df = cudf.DataFrame(np.array([[1, 3], [2, 4]]), columns=idx)
+    result = df.mean()
+    expected = df.to_pandas().mean()
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "columns", [pd.RangeIndex(2), pd.Index([0, 1], dtype="int8")]
+)
+def test_dataframe_axis_0_preserve_column_type_in_index(columns):
+    pd_df = pd.DataFrame([[1, 2]], columns=columns)
+    cudf_df = cudf.DataFrame.from_pandas(pd_df)
+    result = cudf_df.sum(axis=0)
+    expected = pd_df.sum(axis=0)
+    assert_eq(result, expected, check_index_type=True)
diff --git a/python/cudf/cudf/tests/series/methods/test_cov_corr.py b/python/cudf/cudf/tests/series/methods/test_cov_corr.py
new file mode 100644
index 00000000000..332e9d68fe3
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_cov_corr.py
@@ -0,0 +1,159 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if
+
+
+@pytest.mark.parametrize(
+    "data1",
+    [
+        np.random.default_rng(seed=0).normal(-100, 100, 1000),
+        np.random.default_rng(seed=0).integers(-50, 50, 1000),
+        np.zeros(100),
+        np.repeat(np.nan, 100),
+        np.array([1.123, 2.343, np.nan, 0.0]),
+        pa.array([5, 10, 53, None, np.nan, None]),
+        pd.Series([1.1, 2.32, 43.4], index=[0, 4, 3]),
+        np.array([], dtype="float64"),
+        np.array([-3]),
+    ],
+)
+@pytest.mark.parametrize(
+    "data2",
+    [
+        np.random.default_rng(seed=0).normal(-100, 100, 1000),
+        np.random.default_rng(seed=0).integers(-50, 50, 1000),
+        np.zeros(100),
+        np.repeat(np.nan, 100),
+        np.array([1.123, 2.343, np.nan, 0.0]),
+        pd.Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
+        np.array([5]),
+    ],
+)
+def test_cov1d(data1, data2):
+    gs1 = cudf.Series(data1)
+    gs2 = cudf.Series(data2)
+
+    ps1 = gs1.to_pandas()
+    ps2 = gs2.to_pandas()
+
+    got = gs1.cov(gs2)
+    ps1_align, ps2_align = ps1.align(ps2, join="inner")
+    with expect_warning_if(
+        (len(ps1_align.dropna()) == 1 and len(ps2_align.dropna()) > 0)
+        or (len(ps2_align.dropna()) == 1 and len(ps1_align.dropna()) > 0),
+        RuntimeWarning,
+    ):
+        expected = ps1.cov(ps2)
+    np.testing.assert_approx_equal(got, expected, significant=8)
+
+
+@pytest.mark.parametrize(
+    "data1",
+    [
+        np.random.default_rng(seed=0).normal(-100, 100, 1000),
+        np.random.default_rng(seed=0).integers(-50, 50, 1000),
+        np.zeros(100),
+        np.repeat(np.nan, 100),
+        np.array([1.123, 2.343, np.nan, 0.0]),
+        pa.array([5, 10, 53, None, np.nan, None]),
+        pd.Series([1.1032, 2.32, 43.4], index=[0, 4, 3]),
+        np.array([], dtype="float64"),
+        np.array([-3]),
+    ],
+)
+@pytest.mark.parametrize(
+    "data2",
+    [
+        np.random.default_rng(seed=0).normal(-100, 100, 1000),
+        np.random.default_rng(seed=0).integers(-50, 50, 1000),
+        np.zeros(100),
+        np.repeat(np.nan, 100),
+        np.array([1.123, 2.343, np.nan, 0.0]),
+        pd.Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
+        np.array([5]),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Warnings missing on older pandas (scipy version seems unrelated?)",
+)
+def test_corr1d(data1, data2, corr_method):
+    if corr_method == "spearman":
+        # Pandas uses scipy.stats.spearmanr code-path
+        pytest.importorskip("scipy")
+
+    gs1 = cudf.Series(data1)
+    gs2 = cudf.Series(data2)
+
+    ps1 = gs1.to_pandas()
+    ps2 = gs2.to_pandas()
+
+    got = gs1.corr(gs2, corr_method)
+
+    ps1_align, ps2_align = ps1.align(ps2, join="inner")
+
+    is_singular = (
+        len(ps1_align.dropna()) == 1 and len(ps2_align.dropna()) > 0
+    ) or (len(ps2_align.dropna()) == 1 and len(ps1_align.dropna()) > 0)
+    is_identical = (
+        len(ps1_align.dropna().unique()) == 1 and len(ps2_align.dropna()) > 0
+    ) or (
+        len(ps2_align.dropna().unique()) == 1 and len(ps1_align.dropna()) > 0
+    )
+
+    # Pearson correlation leads to division by 0 when either sample size is 1.
+    # Spearman allows for size 1 samples, but will error if all data in a
+    # sample is identical since the covariance is zero and so the correlation
+    # coefficient is not defined.
+    cond = ((is_singular or is_identical) and corr_method == "pearson") or (
+        is_identical and not is_singular and corr_method == "spearman"
+    )
+    if corr_method == "spearman":
+        # SciPy has shuffled around the warning it throws a couple of times.
+        # It's not worth the effort of conditionally importing the appropriate
+        # warning based on the scipy version, just catching a base Warning is
+        # good enough validation.
+        expected_warning = Warning
+    elif corr_method == "pearson":
+        expected_warning = RuntimeWarning
+
+    with expect_warning_if(cond, expected_warning):
+        expected = ps1.corr(ps2, corr_method)
+    np.testing.assert_approx_equal(got, expected, significant=8)
+
+
+@pytest.mark.parametrize(
+    "data1",
+    [
+        [1, 2, 3, 4],
+        [10, 1, 3, 5],
+    ],
+)
+@pytest.mark.parametrize(
+    "data2",
+    [
+        [1, 2, 3, 4],
+        [10, 1, 3, 5],
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_cov_corr_datetime_timedelta(data1, data2, temporal_types_as_str):
+    gsr1 = cudf.Series(data1, dtype=temporal_types_as_str)
+    gsr2 = cudf.Series(data2, dtype=temporal_types_as_str)
+    psr1 = gsr1.to_pandas()
+    psr2 = gsr2.to_pandas()
+
+    assert_eq(psr1.corr(psr2), gsr1.corr(gsr2))
+    assert_eq(psr1.cov(psr2), gsr1.cov(gsr2))
diff --git a/python/cudf/cudf/tests/series/methods/test_pct_change.py b/python/cudf/cudf/tests/series/methods/test_pct_change.py
new file mode 100644
index 00000000000..47b1a81151b
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_pct_change.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.api.extensions import no_default
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing._utils import expect_warning_if
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        np.random.default_rng(seed=0).normal(-100, 100, 1000),
+        np.random.default_rng(seed=0).integers(-50, 50, 1000),
+        np.zeros(100),
+        np.array([1.123, 2.343, np.nan, 0.0]),
+        np.array([-2, 3.75, 6, None, None, None, -8.5, None, 4.2]),
+        np.array([], dtype="float64"),
+        np.array([-3]),
+    ],
+)
+@pytest.mark.parametrize("periods", [-5, 0, 5])
+@pytest.mark.parametrize(
+    "fill_method", ["ffill", "bfill", "pad", "backfill", no_default, None]
+)
+def test_series_pct_change(data, periods, fill_method):
+    cs = cudf.Series(data)
+    ps = cs.to_pandas()
+
+    if np.abs(periods) <= len(cs):
+        with expect_warning_if(fill_method not in (no_default, None)):
+            got = cs.pct_change(periods=periods, fill_method=fill_method)
+        with expect_warning_if(
+            (
+                fill_method not in (no_default, None)
+                or (fill_method is not None and ps.isna().any())
+            )
+        ):
+            expected = ps.pct_change(periods=periods, fill_method=fill_method)
+        np.testing.assert_array_almost_equal(
+            got.to_numpy(na_value=np.nan), expected
+        )
diff --git a/python/cudf/cudf/tests/series/methods/test_reductions.py b/python/cudf/cudf/tests/series/methods/test_reductions.py
new file mode 100644
index 00000000000..3b636476651
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_reductions.py
@@ -0,0 +1,1041 @@
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+import re
+from concurrent.futures import ThreadPoolExecutor
+from decimal import Decimal
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+from packaging.version import parse
+
+import cudf
+from cudf.core._compat import PANDAS_GE_230
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
+
+
+@pytest.mark.parametrize("data", [[], [1, 2, 3]])
+def test_series_pandas_methods(data, reduction_methods):
+    arr = np.array(data)
+    sr = cudf.Series(arr)
+    psr = pd.Series(arr)
+    np.testing.assert_equal(
+        getattr(sr, reduction_methods)(), getattr(psr, reduction_methods)()
+    )
+
+
+def test_series_reductions(
+    request, reduction_methods, numeric_types_as_str, skipna
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            reduction_methods == "quantile",
+            raises=TypeError,
+            reason=f"{reduction_methods} doesn't support skipna",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            reduction_methods == "product"
+            and skipna
+            and numeric_types_as_str not in {"float32", "float64"},
+            reason=f"{reduction_methods} incorrect with {skipna=}",
+        )
+    )
+    rng = np.random.default_rng(seed=0)
+    arr = rng.random(100)
+    if np.dtype(numeric_types_as_str).kind in "iu":
+        arr *= 100
+        mask = arr > 10
+    else:
+        mask = arr > 0.5
+
+    arr = arr.astype(numeric_types_as_str)
+    if numeric_types_as_str in ("float32", "float64"):
+        arr[[2, 5, 14, 19, 50, 70]] = np.nan
+    sr = cudf.Series(arr)
+    sr[~mask] = None
+    psr = sr.to_pandas()
+    psr[~mask] = np.nan
+
+    def call_test(sr, skipna):
+        fn = getattr(sr, reduction_methods)
+        if reduction_methods in ["std", "var"]:
+            return fn(ddof=1, skipna=skipna)
+        else:
+            return fn(skipna=skipna)
+
+    expect = call_test(psr, skipna=skipna)
+    got = call_test(sr, skipna=skipna)
+
+    np.testing.assert_approx_equal(expect, got, significant=4)
+
+
+def test_series_reductions_concurrency(reduction_methods):
+    rng = np.random.default_rng(seed=0)
+    srs = [cudf.Series(rng.random(100))]
+
+    def call_test(sr):
+        fn = getattr(sr, reduction_methods)
+        if reduction_methods in ["std", "var"]:
+            return fn(ddof=1)
+        else:
+            return fn()
+
+    def f(sr):
+        return call_test(sr + 1)
+
+    with ThreadPoolExecutor(10) as e:
+        list(e.map(f, srs * 50))
+
+
+@pytest.mark.parametrize("ddof", range(3))
+def test_series_std(ddof):
+    rng = np.random.default_rng(seed=0)
+    arr = rng.random(100) - 0.5
+    sr = cudf.Series(arr)
+    pd = sr.to_pandas()
+    got = sr.std(ddof=ddof)
+    expect = pd.std(ddof=ddof)
+    np.testing.assert_approx_equal(expect, got)
+
+
+def test_series_scale():
+    rng = np.random.default_rng(seed=0)
+    arr = pd.Series(rng.integers(low=-10, high=10, size=100))
+    sr = cudf.Series(arr)
+
+    vmin = arr.min()
+    vmax = arr.max()
+    scaled = (arr - vmin) / (vmax - vmin)
+    assert scaled.min() == 0
+    assert scaled.max() == 1
+    assert_eq(sr.scale(), scaled)
+
+
+def test_exact_quantiles(quantile_interpolation):
+    arr = np.asarray([6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7])
+    quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
+
+    df = pd.DataFrame(arr)
+    gdf_series = cudf.Series(arr)
+
+    q1 = gdf_series.quantile(
+        quant_values, interpolation=quantile_interpolation, exact=True
+    )
+
+    q2 = df.quantile(quant_values, interpolation=quantile_interpolation)
+
+    np.testing.assert_allclose(
+        q1.to_pandas().values, np.array(q2.values).T.flatten(), rtol=1e-10
+    )
+
+
+def test_exact_quantiles_int(quantile_interpolation):
+    arr = np.asarray([7, 0, 3, 4, 2, 1, -1, 1, 6])
+    quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
+
+    df = pd.DataFrame(arr)
+    gdf_series = cudf.Series(arr)
+
+    q1 = gdf_series.quantile(
+        quant_values, interpolation=quantile_interpolation, exact=True
+    )
+
+    q2 = df.quantile(quant_values, interpolation=quantile_interpolation)
+
+    np.testing.assert_allclose(
+        q1.to_pandas().values, np.array(q2.values).T.flatten(), rtol=1e-10
+    )
+
+
+def test_approx_quantiles():
+    arr = np.asarray([6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7])
+    quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
+
+    gdf_series = cudf.Series(arr)
+    pdf_series = pd.Series(arr)
+
+    q1 = gdf_series.quantile(quant_values, exact=False)
+    q2 = pdf_series.quantile(quant_values)
+
+    assert_eq(q1, q2)
+
+
+def test_approx_quantiles_int():
+    arr = np.asarray([1, 2, 3])
+    quant_values = [0.5]
+    approx_results = [2]
+
+    gdf_series = cudf.Series(arr)
+
+    q1 = gdf_series.quantile(quant_values, exact=False)
+
+    assert approx_results == q1.to_pandas().values
+
+
+@pytest.mark.parametrize("data", [[], [1, 2, 3, 10, 326497]])
+@pytest.mark.parametrize(
+    "q",
+    [
+        [],
+        0.5,
+        1,
+        0.234,
+        [0.345],
+        [0.243, 0.5, 1],
+        np.array([0.5, 1]),
+        cp.array([0.5, 1]),
+    ],
+)
+def test_misc_quantiles(data, q):
+    pdf_series = pd.Series(data, dtype="float64" if len(data) == 0 else None)
+    gdf_series = cudf.from_pandas(pdf_series)
+
+    expected = pdf_series.quantile(q.get() if isinstance(q, cp.ndarray) else q)
+    actual = gdf_series.quantile(q)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"data": np.random.default_rng(seed=0).normal(-100, 100, 1000)},
+        {"data": np.random.default_rng(seed=0).integers(-50, 50, 1000)},
+        {"data": (np.zeros(100))},
+        {"data": np.repeat(np.nan, 100)},
+        {"data": np.array([1.123, 2.343, np.nan, 0.0])},
+        {
+            "data": [5, 10, 53, None, np.nan, None, 12, 43, -423],
+            "nan_as_null": False,
+        },
+        {"data": [1.1032, 2.32, 43.4, 13, -312.0], "index": [0, 4, 3, 19, 6]},
+        {"data": [], "dtype": "float64"},
+        {"data": [-3]},
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_kurtosis_series(data, null_flag, numeric_only):
+    gs = cudf.Series(**data)
+    ps = gs.to_pandas()
+
+    if null_flag and len(gs) > 2:
+        gs.iloc[[0, 2]] = None
+        ps.iloc[[0, 2]] = None
+
+    got = gs.kurtosis(numeric_only=numeric_only)
+    expected = ps.kurtosis(numeric_only=numeric_only)
+
+    assert_eq(got, expected)
+
+    got = gs.kurt(numeric_only=numeric_only)
+    expected = ps.kurt(numeric_only=numeric_only)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("op", ["skew", "kurt"])
+def test_kurt_skew_error(op):
+    gs = cudf.Series(["ab", "cd"])
+    ps = gs.to_pandas()
+
+    assert_exceptions_equal(
+        getattr(gs, op),
+        getattr(ps, op),
+        lfunc_args_and_kwargs=([], {"numeric_only": True}),
+        rfunc_args_and_kwargs=([], {"numeric_only": True}),
+    )
+
+
+@pytest.mark.parametrize(
+    "data, index, dtype, nan_as_null",
+    [
+        [
+            np.random.default_rng(seed=0).normal(-100, 100, 1000),
+            None,
+            None,
+            None,
+        ],
+        [
+            np.random.default_rng(seed=0).integers(-50, 50, 1000),
+            None,
+            None,
+            None,
+        ],
+        [np.zeros(100), None, None, None],
+        [np.repeat(np.nan, 100), None, None, None],
+        [np.array([1.123, 2.343, np.nan, 0.0]), None, None, None],
+        [[5, 10, 53, None, np.nan, None, 12, 43, -423], None, None, False],
+        [[1.1032, 2.32, 43.4, 13, -312.0], [0, 4, 3, 19, 6], None, None],
+        [[], None, "float64", None],
+        [[-3], None, None, None],
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_skew_series(data, index, dtype, nan_as_null, null_flag, numeric_only):
+    data = cudf.Series(data, index=index, dtype=dtype, nan_as_null=nan_as_null)
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.skew(numeric_only=numeric_only)
+    expected = pdata.skew(numeric_only=numeric_only)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("num_na", [0, 50, 100])
+def test_series_median(numeric_types_as_str, num_na):
+    rng = np.random.default_rng(seed=0)
+    arr = rng.random(100)
+    dtype = np.dtype(numeric_types_as_str)
+    if dtype.kind in "iu":
+        arr *= 100
+    mask = np.arange(100) >= num_na
+
+    arr = arr.astype(dtype)
+    sr = cudf.Series(arr)
+    sr[~mask] = None
+    arr2 = arr[mask]
+    ps = pd.Series(arr2, dtype=dtype)
+
+    actual = sr.median(skipna=True)
+    desired = ps.median(skipna=True)
+
+    np.testing.assert_approx_equal(actual, desired)
+
+    # only for float until integer null supported convert to pandas in cudf
+    # eg. pd.Int64Dtype
+    if dtype.kind == "f":
+        ps = sr.to_pandas()
+        actual = sr.median(skipna=False)
+        desired = ps.median(skipna=False)
+        np.testing.assert_approx_equal(actual, desired)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0.0, 1, 3, 6, np.nan, 7, 5.0, np.nan, 5, 2, 3, -100],
+        [np.nan] * 3,
+        [1, 5, 3],
+        [],
+    ],
+)
+@pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
+def test_nans_stats(data, reduction_methods, skipna, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            reduction_methods == "quantile",
+            raises=TypeError,
+            reason=f"{reduction_methods} doesn't support skipna",
+        )
+    )
+
+    psr = pd.Series(data, dtype="float64" if len(data) == 0 else None)
+    gsr = cudf.from_pandas(psr)
+
+    assert_eq(
+        getattr(psr, reduction_methods)(skipna=skipna),
+        getattr(gsr, reduction_methods)(skipna=skipna),
+    )
+
+    gsr = cudf.Series(
+        data, dtype="float64" if len(data) == 0 else None, nan_as_null=False
+    )
+    # Since there is no concept of `nan_as_null` in pandas,
+    # nulls will be returned in the operations. So only
+    # testing for `skipna=True` when `nan_as_null=False`
+    assert_eq(
+        getattr(psr, reduction_methods)(skipna=True),
+        getattr(gsr, reduction_methods)(skipna=True),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0.0, 1, 3, 6, np.nan, 7, 5.0, np.nan, 5, 2, 3, -100],
+        [np.nan] * 3,
+        [1, 5, 3],
+    ],
+)
+@pytest.mark.parametrize("min_count", [-10, -1, 0, 1, 5, 10])
+def test_min_count_ops(data, request, reduction_methods, skipna, min_count):
+    request.applymarker(
+        pytest.mark.xfail(
+            reduction_methods == "quantile",
+            raises=TypeError,
+            reason=f"{reduction_methods} doesn't support skipna",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            reduction_methods
+            in {
+                "skew",
+                "kurtosis",
+                "median",
+                "var",
+                "std",
+                "any",
+                "all",
+                "max",
+                "min",
+            },
+            raises=TypeError,
+            reason=f"{reduction_methods} doesn't support min_count",
+        )
+    )
+    psr = pd.Series(data)
+    gsr = cudf.Series(data, nan_as_null=False)
+
+    assert_eq(
+        getattr(psr, reduction_methods)(skipna=skipna, min_count=min_count),
+        getattr(gsr, reduction_methods)(skipna=skipna, min_count=min_count),
+    )
+
+
+@pytest.mark.parametrize("q", [2, [1, 2, 3]])
+def test_quantile_range_error(q):
+    ps = pd.Series([1, 2, 3])
+    gs = cudf.from_pandas(ps)
+    assert_exceptions_equal(
+        lfunc=ps.quantile,
+        rfunc=gs.quantile,
+        lfunc_args_and_kwargs=([q],),
+        rfunc_args_and_kwargs=([q],),
+    )
+
+
+def test_quantile_q_type():
+    gs = cudf.Series([1, 2, 3])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "q must be a scalar or array-like, got <class "
+            "'cudf.core.dataframe.DataFrame'>"
+        ),
+    ):
+        gs.quantile(cudf.DataFrame())
+
+
+def test_quantile_type_int_float(quantile_interpolation):
+    data = [1, 3, 4]
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+
+    expected = psr.quantile(0.5, interpolation=quantile_interpolation)
+    actual = gsr.quantile(0.5, interpolation=quantile_interpolation)
+
+    assert expected == actual
+    assert type(expected) is type(actual)
+
+
+@pytest.mark.parametrize("val", [0.9, float("nan")])
+def test_ignore_nans(val):
+    data = [float("nan"), float("nan"), val]
+    psr = pd.Series(data)
+    gsr = cudf.Series(data, nan_as_null=False)
+
+    expected = gsr.quantile(0.9)
+    result = psr.quantile(0.9)
+    assert_eq(result, expected)
+
+
+def test_sum(numeric_types_as_str):
+    data = np.arange(10, dtype=numeric_types_as_str)
+    sr = cudf.Series(data)
+
+    got = sr.sum()
+    expect = data.sum()
+    significant = 4 if numeric_types_as_str == "float32" else 6
+    np.testing.assert_approx_equal(expect, got, significant=significant)
+
+
+@pytest.mark.parametrize(
+    "middle, expected",
+    [
+        ("there", "HellothereWorld"),
+        (None, "HelloWorld"),
+    ],
+)
+def test_sum_string(middle, expected):
+    s = cudf.Series(["Hello", middle, "World"])
+
+    assert s.sum() == expected
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param(
+            cudf.Decimal64Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(10, 6),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(16, 7),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal32Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
+        cudf.Decimal128Dtype(20, 7),
+    ],
+)
+def test_sum_decimal(dtype):
+    data = [str(x) for x in np.array([1, 11, 111]) / 100]
+
+    expected = pd.Series([Decimal(x) for x in data]).sum()
+    got = cudf.Series(data).astype(dtype).sum()
+
+    assert_eq(expected, got)
+
+
+def test_product(numeric_types_as_str):
+    data = np.arange(10, dtype=numeric_types_as_str)
+    sr = cudf.Series(data)
+
+    got = sr.product()
+    expect = pd.Series(data).product()
+    significant = 4 if numeric_types_as_str == "float32" else 6
+    np.testing.assert_approx_equal(expect, got, significant=significant)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param(
+            cudf.Decimal64Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(8, 4),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(10, 5),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal32Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
+        cudf.Decimal128Dtype(20, 5),
+    ],
+)
+def test_product_decimal(dtype):
+    data = [str(x) for x in np.array([1, 11, 111]) / 100]
+
+    expected = pd.Series([Decimal(x) for x in data]).product()
+    got = cudf.Series(data).astype(dtype).product()
+
+    assert_eq(expected, got)
+
+
+def test_sum_of_squares(numeric_types_as_str):
+    accuracy_for_dtype = {"float64": 6, "float32": 5}
+    data = np.arange(10, dtype=numeric_types_as_str)
+    sr = cudf.Series(data)
+    df = cudf.DataFrame(sr)
+
+    got = (sr**2).sum()
+    got_df = (df**2).sum()
+    expect = (data**2).sum()
+
+    if "int" in numeric_types_as_str:
+        np.testing.assert_array_almost_equal(expect, got)
+        np.testing.assert_array_almost_equal(expect, got_df.iloc[0])
+    else:
+        np.testing.assert_approx_equal(
+            expect, got, significant=accuracy_for_dtype[numeric_types_as_str]
+        )
+        np.testing.assert_approx_equal(
+            expect,
+            got_df.iloc[0],
+            significant=accuracy_for_dtype[numeric_types_as_str],
+        )
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param(
+            cudf.Decimal64Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(8, 4),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(10, 5),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        cudf.Decimal128Dtype(20, 7),
+        pytest.param(
+            cudf.Decimal32Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
+    ],
+)
+def test_sum_of_squares_decimal(dtype):
+    data = [str(x) for x in np.array([1, 11, 111]) / 100]
+
+    expected = pd.Series([Decimal(x) for x in data]).pow(2).sum()
+    got = (cudf.Series(data).astype(dtype) ** 2).sum()
+
+    assert_eq(expected, got)
+
+
+def test_min(numeric_types_as_str):
+    data = np.arange(10, dtype=numeric_types_as_str)
+    sr = cudf.Series(data)
+
+    got = sr.min()
+    expect = getattr(np, numeric_types_as_str)(data.min())
+
+    assert expect == got
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param(
+            cudf.Decimal64Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(10, 6),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(16, 7),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal32Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
+        cudf.Decimal128Dtype(20, 7),
+    ],
+)
+def test_min_decimal(dtype):
+    data = [str(x) for x in np.array([1, 11, 111]) / 100]
+
+    expected = pd.Series([Decimal(x) for x in data]).min()
+    got = cudf.Series(data).astype(dtype).min()
+
+    assert_eq(expected, got)
+
+
+def test_max(numeric_types_as_str):
+    data = np.arange(10, dtype=numeric_types_as_str)
+    sr = cudf.Series(data)
+
+    got = sr.max()
+    expect = getattr(np, numeric_types_as_str)(data.max())
+
+    assert expect == got
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param(
+            cudf.Decimal64Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(10, 6),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype(16, 7),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal32Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                parse(pa.__version__) < parse("19.0.0"),
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
+        cudf.Decimal128Dtype(20, 7),
+    ],
+)
+def test_max_decimal(dtype):
+    data = [str(x) for x in np.array([1, 11, 111]) / 100]
+
+    expected = pd.Series([Decimal(x) for x in data]).max()
+    got = cudf.Series(data).astype(dtype).max()
+
+    assert_eq(expected, got)
+
+
+def test_sum_masked():
+    data = np.array([1.1, 1.2, np.nan], dtype="float64")
+    sr = cudf.Series(data, nan_as_null=True)
+
+    got = sr.sum()
+    expected = np.nansum(data)
+    np.testing.assert_approx_equal(expected, got)
+
+
+def test_sum_boolean():
+    s = cudf.Series(np.arange(100000))
+    got = (s > 1).sum()
+    expect = 99998
+
+    assert expect == got
+
+
+def test_date_minmax():
+    rng = np.random.default_rng(seed=0)
+    np_data = rng.normal(size=10)
+    gdf_data = cudf.Series(np_data)
+
+    np_casted = np_data.astype("datetime64[ms]")
+    gdf_casted = gdf_data.astype("datetime64[ms]")
+
+    np_min = np_casted.min()
+    gdf_min = gdf_casted.min()
+    assert np_min == gdf_min
+
+    np_max = np_casted.max()
+    gdf_max = gdf_casted.max()
+    assert np_max == gdf_max
+
+
+@pytest.mark.parametrize(
+    "op",
+    ["sum", "product", "var", "kurt", "kurtosis", "skew"],
+)
+def test_datetime_unsupported_reductions(op):
+    gsr = cudf.Series([1, 2, 3, None], dtype="datetime64[ns]")
+    psr = gsr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=getattr(psr, op),
+        rfunc=getattr(gsr, op),
+    )
+
+
+@pytest.mark.parametrize("op", ["product", "var", "kurt", "kurtosis", "skew"])
+def test_timedelta_unsupported_reductions(op):
+    gsr = cudf.Series([1, 2, 3, None], dtype="timedelta64[ns]")
+    psr = gsr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=getattr(psr, op),
+        rfunc=getattr(gsr, op),
+    )
+
+
+def test_categorical_reductions(request, reduction_methods):
+    request.applymarker(
+        pytest.mark.xfail(
+            reduction_methods in ["quantile", "all", "any"],
+            reason=f"{reduction_methods} didn't fail",
+        )
+    )
+
+    gsr = cudf.Series([1, 2, 3, None], dtype="category")
+    psr = gsr.to_pandas()
+
+    assert_exceptions_equal(
+        getattr(psr, reduction_methods), getattr(gsr, reduction_methods)
+    )
+
+
+@pytest.mark.parametrize(
+    "data_non_overflow",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+def test_timedelta_reduction_ops(
+    data_non_overflow, timedelta_types_as_str, reduction_methods
+):
+    if reduction_methods not in ["sum", "mean", "median", "quantile"]:
+        pytest.skip(f"{reduction_methods} not supported for timedelta")
+    gsr = cudf.Series(data_non_overflow, dtype=timedelta_types_as_str)
+    psr = gsr.to_pandas()
+
+    if len(psr) > 0 and psr.isnull().all() and reduction_methods == "median":
+        with pytest.warns(RuntimeWarning, match="Mean of empty slice"):
+            expected = getattr(psr, reduction_methods)()
+    else:
+        with expect_warning_if(
+            PANDAS_GE_230
+            and reduction_methods == "quantile"
+            and len(data_non_overflow) == 0
+            and timedelta_types_as_str != "timedelta64[ns]"
+        ):
+            expected = getattr(psr, reduction_methods)()
+    actual = getattr(gsr, reduction_methods)()
+    if pd.isna(expected) and pd.isna(actual):
+        pass
+    elif isinstance(expected, pd.Timedelta) and isinstance(
+        actual, pd.Timedelta
+    ):
+        assert (
+            expected.round(gsr._column.time_unit).value
+            == actual.round(gsr._column.time_unit).value
+        )
+    else:
+        assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
+@pytest.mark.parametrize("ddof", [1, 2, 3])
+def test_timedelta_std(data, timedelta_types_as_str, ddof):
+    gsr = cudf.Series(data, dtype=timedelta_types_as_str)
+    psr = gsr.to_pandas()
+
+    expected = psr.std(ddof=ddof)
+    actual = gsr.std(ddof=ddof)
+
+    if np.isnat(expected.to_numpy()) and np.isnat(actual.to_numpy()):
+        assert True
+    else:
+        np.testing.assert_allclose(
+            expected.to_numpy().astype("float64"),
+            actual.to_numpy().astype("float64"),
+            rtol=1e-5,
+            atol=0,
+        )
+
+
+@pytest.mark.parametrize("op", ["max", "min"])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [1, 2, 3, 100],
+        [10, None, 100, None, None],
+        [None, None, None],
+        [1231],
+    ],
+)
+def test_timedelta_reductions(data, op, timedelta_types_as_str):
+    sr = cudf.Series(data, dtype=timedelta_types_as_str)
+    psr = sr.to_pandas()
+
+    actual = getattr(sr, op)()
+    expected = getattr(psr, op)()
+
+    if np.isnat(expected.to_numpy()) and np.isnat(actual):
+        assert True
+    else:
+        assert_eq(expected.to_numpy(), actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["a", "b", "c", "d", "e"],
+        ["a", "z", ".", '"', "aa", "zz"],
+        ["aa", "zz"],
+        ["z", "a", "zz", "aa"],
+        ["1", "2", "3", "4", "5"],
+        [""],
+        ["a"],
+        ["hello"],
+        ["small text", "this is a larger text......"],
+        ["👋🏻", "🔥", "🥇"],
+        ["This is 💯", "here is a calendar", "📅"],
+        ["", ".", ";", "[", "]"],
+        ["\t", ".", "\n", "\n\t", "\t\n"],
+    ],
+)
+@pytest.mark.parametrize("op", ["min", "max", "sum"])
+def test_str_reductions_supported(data, op):
+    psr = pd.Series(data)
+    sr = cudf.Series(data)
+
+    assert_eq(getattr(psr, op)(), getattr(sr, op)())
+
+
+def test_str_mean():
+    sr = cudf.Series(["a", "b", "c", "d", "e"])
+
+    with pytest.raises(TypeError):
+        sr.mean()
+
+
+def test_string_product():
+    psr = pd.Series(["1", "2", "3", "4", "5"])
+    sr = cudf.Series(["1", "2", "3", "4", "5"])
+
+    assert_exceptions_equal(
+        lfunc=psr.product,
+        rfunc=sr.product,
+    )
+
+
+def test_string_var():
+    psr = pd.Series(["1", "2", "3", "4", "5"])
+    sr = cudf.Series(["1", "2", "3", "4", "5"])
+
+    assert_exceptions_equal(lfunc=psr.var, rfunc=sr.var)
+
+
+def test_string_std():
+    psr = pd.Series(["1", "2", "3", "4", "5"])
+    sr = cudf.Series(["1", "2", "3", "4", "5"])
+
+    assert_exceptions_equal(lfunc=psr.std, rfunc=sr.std)
+
+
+def test_string_reduction_error():
+    s = cudf.Series([None, None], dtype="str")
+    ps = s.to_pandas(nullable=True)
+    assert_exceptions_equal(
+        s.any,
+        ps.any,
+        lfunc_args_and_kwargs=([], {"skipna": False}),
+        rfunc_args_and_kwargs=([], {"skipna": False}),
+    )
+
+    assert_exceptions_equal(
+        s.all,
+        ps.all,
+        lfunc_args_and_kwargs=([], {"skipna": False}),
+        rfunc_args_and_kwargs=([], {"skipna": False}),
+    )
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
+def test_datetime_stats(data, datetime_types_as_str, reduction_methods):
+    if reduction_methods not in ["mean", "quantile"]:
+        pytest.skip(f"{reduction_methods} not applicable for test")
+    gsr = cudf.Series(data, dtype=datetime_types_as_str)
+    psr = gsr.to_pandas()
+
+    with expect_warning_if(
+        PANDAS_GE_230
+        and reduction_methods == "quantile"
+        and len(data) == 0
+        and datetime_types_as_str != "datetime64[ns]"
+    ):
+        expected = getattr(psr, reduction_methods)()
+    actual = getattr(gsr, reduction_methods)()
+
+    if len(data) == 0:
+        assert np.isnat(expected.to_numpy()) and np.isnat(actual.to_numpy())
+    else:
+        assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [1, 2, 3, 100],
+        [10, None, 100, None, None],
+        [None, None, None],
+        [1231],
+    ],
+)
+def test_datetime_reductions(data, reduction_methods, datetime_types_as_str):
+    if reduction_methods not in ["max", "min", "std", "median"]:
+        pytest.skip(f"{reduction_methods} not applicable for test")
+    sr = cudf.Series(data, dtype=datetime_types_as_str)
+    psr = sr.to_pandas()
+
+    actual = getattr(sr, reduction_methods)()
+    with expect_warning_if(
+        psr.size > 0 and psr.isnull().all() and reduction_methods == "median",
+        RuntimeWarning,
+    ):
+        expected = getattr(psr, reduction_methods)()
+
+    if (
+        expected is pd.NaT
+        and actual is pd.NaT
+        or (np.isnat(expected.to_numpy()) and np.isnat(actual))
+    ):
+        assert True
+    else:
+        assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/methods/test_unique.py b/python/cudf/cudf/tests/series/methods/test_unique.py
index 2aa4b78b039..8346388ad78 100644
--- a/python/cudf/cudf/tests/series/methods/test_unique.py
+++ b/python/cudf/cudf/tests/series/methods/test_unique.py
@@ -36,3 +36,51 @@ def test_datetime_unique(data, nulls):
         pd.Series(expected).sort_values(ignore_index=True),
         got.sort_values(ignore_index=True).to_pandas(),
     )
+
+
+def test_series_unique():
+    rng = np.random.default_rng(seed=0)
+    size = 100
+    arr = rng.integers(low=-1, high=10, size=size)
+    mask = arr != -1
+    sr = cudf.Series(arr)
+    sr[~mask] = None
+    assert set(arr[mask]) == set(sr.unique().dropna().to_numpy())
+    assert len(set(arr[mask])) == sr.nunique()
+
+
+def test_series_nunique(request, nan_as_null, dropna):
+    # We remove nulls as opposed to NaNs using the dropna parameter,
+    # so to test against pandas we replace NaN with another discrete value
+    request.applymarker(
+        pytest.mark.xfail(
+            nan_as_null is None,
+            reason=f"{nan_as_null=} returns wrong result",
+        )
+    )
+    cudf_series = cudf.Series([1, 2, 2, 3, 3], nan_as_null=nan_as_null)
+    pd_series = pd.Series([1, 2, 2, 3, 3])
+    expect = pd_series.nunique(dropna=dropna)
+    got = cudf_series.nunique(dropna=dropna)
+    assert expect == got
+
+    cudf_series = cudf.Series(
+        [1.0, 2.0, 3.0, np.nan, None], nan_as_null=nan_as_null
+    )
+    if nan_as_null is True:
+        pd_series = pd.Series([1.0, 2.0, 3.0, np.nan, None])
+    else:
+        pd_series = pd.Series([1.0, 2.0, 3.0, -1.0, None])
+
+    expect = pd_series.nunique(dropna=dropna)
+    got = cudf_series.nunique(dropna=dropna)
+    assert expect == got
+
+    cudf_series = cudf.Series([1.0, np.nan, np.nan], nan_as_null=nan_as_null)
+    if nan_as_null is True:
+        pd_series = pd.Series([1.0, np.nan, np.nan])
+    else:
+        pd_series = pd.Series([1.0, -1.0, -1.0])
+    expect = pd_series.nunique(dropna=dropna)
+    got = cudf_series.nunique(dropna=dropna)
+    assert expect == got
diff --git a/python/cudf/cudf/tests/series/test_reductions.py b/python/cudf/cudf/tests/series/test_reductions.py
deleted file mode 100644
index 1c924444c1e..00000000000
--- a/python/cudf/cudf/tests/series/test_reductions.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pytest
-
-from cudf import Series
-
-
-@pytest.mark.parametrize("data", [[], [1, 2, 3]])
-def test_series_pandas_methods(data, reduction_methods):
-    arr = np.array(data)
-    sr = Series(arr)
-    psr = pd.Series(arr)
-    np.testing.assert_equal(
-        getattr(sr, reduction_methods)(), getattr(psr, reduction_methods)()
-    )
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 1bc6fe19d02..b1678889eff 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -12,14 +12,12 @@
 from cudf import Series
 from cudf.core._compat import (
     PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_230,
     PANDAS_VERSION,
 )
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
     assert_exceptions_equal,
-    expect_warning_if,
 )
 
 
@@ -384,60 +382,6 @@ def test_datetime_invalid_ops():
     )
 
 
-@pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-@pytest.mark.parametrize("stat", ["mean", "quantile"])
-def test_datetime_stats(data, dtype, stat):
-    gsr = cudf.Series(data, dtype=dtype)
-    psr = gsr.to_pandas()
-
-    with expect_warning_if(
-        PANDAS_GE_230
-        and stat == "quantile"
-        and len(data) == 0
-        and dtype != "datetime64[ns]"
-    ):
-        expected = getattr(psr, stat)()
-    actual = getattr(gsr, stat)()
-
-    if len(data) == 0:
-        assert np.isnat(expected.to_numpy()) and np.isnat(actual.to_numpy())
-    else:
-        assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("op", ["max", "min", "std", "median"])
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [1, 2, 3, 100],
-        [10, None, 100, None, None],
-        [None, None, None],
-        [1231],
-    ],
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-def test_datetime_reductions(data, op, dtype):
-    sr = cudf.Series(data, dtype=dtype)
-    psr = sr.to_pandas()
-
-    actual = getattr(sr, op)()
-    with expect_warning_if(
-        psr.size > 0 and psr.isnull().all() and op == "median", RuntimeWarning
-    ):
-        expected = getattr(psr, op)()
-
-    if (
-        expected is pd.NaT
-        and actual is pd.NaT
-        or (np.isnat(expected.to_numpy()) and np.isnat(actual))
-    ):
-        assert True
-    else:
-        assert_eq(expected, actual)
-
-
 def test_datetime_binop_tz_timestamp(op):
     s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
     pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py
deleted file mode 100644
index 8b2f5acb3e1..00000000000
--- a/python/cudf/cudf/tests/test_quantiles.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import re
-
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal
-
-
-def test_single_q():
-    q = 0.5
-
-    pdf = pd.DataFrame({"a": [4, 24, 13, 8, 7]})
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
-
-    assert_eq(pdf_q, gdf_q, check_index_type=False)
-
-
-def test_with_index():
-    q = [0, 0.5, 1]
-
-    pdf = pd.DataFrame({"a": [7, 4, 4, 9, 13]}, index=[0, 4, 3, 2, 7])
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
-
-    assert_eq(pdf_q, gdf_q, check_index_type=False)
-
-
-def test_with_multiindex():
-    q = [0, 0.5, 1]
-
-    pdf = pd.DataFrame(
-        {
-            "index_1": [3, 1, 9, 7, 5],
-            "index_2": [2, 4, 3, 5, 1],
-            "a": [8, 4, 2, 3, 8],
-        }
-    )
-    pdf.set_index(["index_1", "index_2"], inplace=True)
-
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
-
-    assert_eq(pdf_q, gdf_q, check_index_type=False)
-
-
-@pytest.mark.parametrize("q", [2, [1, 2, 3]])
-def test_quantile_range_error(q):
-    ps = pd.Series([1, 2, 3])
-    gs = cudf.from_pandas(ps)
-    assert_exceptions_equal(
-        lfunc=ps.quantile,
-        rfunc=gs.quantile,
-        lfunc_args_and_kwargs=([q],),
-        rfunc_args_and_kwargs=([q],),
-    )
-
-
-def test_quantile_q_type():
-    gs = cudf.Series([1, 2, 3])
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "q must be a scalar or array-like, got <class "
-            "'cudf.core.dataframe.DataFrame'>"
-        ),
-    ):
-        gs.quantile(cudf.DataFrame())
-
-
-@pytest.mark.parametrize(
-    "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"]
-)
-def test_quantile_type_int_float(interpolation):
-    data = [1, 3, 4]
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-
-    expected = psr.quantile(0.5, interpolation=interpolation)
-    actual = gsr.quantile(0.5, interpolation=interpolation)
-
-    assert expected == actual
-    assert type(expected) is type(actual)
-
-
-@pytest.mark.parametrize("val", [0.9, float("nan")])
-def test_ignore_nans(val):
-    data = [float("nan"), float("nan"), val]
-    psr = pd.Series(data)
-    gsr = cudf.Series(data, nan_as_null=False)
-
-    expected = gsr.quantile(0.9)
-    result = psr.quantile(0.9)
-    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
deleted file mode 100644
index 144d79ca94a..00000000000
--- a/python/cudf/cudf/tests/test_reductions.py
+++ /dev/null
@@ -1,512 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-from decimal import Decimal
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf import Series
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.core.column.column import as_column
-from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing import _utils as utils, assert_eq
-from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if, gen_rand
-
-
-@pytest.fixture(params=NUMERIC_TYPES)
-def dtype(request):
-    return request.param
-
-
-@pytest.fixture(params=[1, 20])
-def nelem(request):
-    return request.param
-
-
-def test_sum(dtype, nelem):
-    dtype = cudf.dtype(dtype).type
-    data = gen_rand(dtype, nelem)
-    sr = Series(data)
-
-    got = sr.sum()
-    expect = data.sum()
-    significant = 4 if dtype == np.float32 else 6
-    np.testing.assert_approx_equal(expect, got, significant=significant)
-
-
-def test_sum_string():
-    s = Series(["Hello", "there", "World"])
-
-    got = s.sum()
-    expected = "HellothereWorld"
-
-    assert got == expected
-
-    s = Series(["Hello", None, "World"])
-
-    got = s.sum()
-    expected = "HelloWorld"
-
-    assert got == expected
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pytest.param(
-            Decimal64Dtype(6, 3),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(10, 6),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(16, 7),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal32Dtype(6, 3),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal32 format string only supported in pyarrow >=19",
-            ),
-        ),
-        Decimal128Dtype(20, 7),
-    ],
-)
-def test_sum_decimal(dtype, nelem):
-    data = [str(x) for x in gen_rand("int64", nelem, seed=0) / 100]
-
-    expected = pd.Series([Decimal(x) for x in data]).sum()
-    got = cudf.Series(data).astype(dtype).sum()
-
-    assert_eq(expected, got)
-
-
-def test_product(dtype, nelem):
-    rng = np.random.default_rng(seed=0)
-    dtype = cudf.dtype(dtype).type
-    if cudf.dtype(dtype).kind in {"u", "i"}:
-        data = np.ones(nelem, dtype=dtype)
-        # Set at most 30 items to [0..2) to keep the value within 2^32
-        for _ in range(30):
-            data[rng.integers(low=0, high=nelem, size=1)] = rng.uniform() * 2
-    else:
-        data = gen_rand(dtype, nelem)
-
-    sr = Series(data)
-
-    got = sr.product()
-    expect = pd.Series(data).product()
-    significant = 4 if dtype == np.float32 else 6
-    np.testing.assert_approx_equal(expect, got, significant=significant)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pytest.param(
-            Decimal64Dtype(6, 2),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(8, 4),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(10, 5),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal32Dtype(6, 2),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal32 format string only supported in pyarrow >=19",
-            ),
-        ),
-        Decimal128Dtype(20, 5),
-    ],
-)
-def test_product_decimal(dtype):
-    data = [str(x) for x in gen_rand("int8", 3) / 10]
-
-    expected = pd.Series([Decimal(x) for x in data]).product()
-    got = cudf.Series(data).astype(dtype).product()
-
-    assert_eq(expected, got)
-
-
-accuracy_for_dtype = {np.float64: 6, np.float32: 5}
-
-
-def test_sum_of_squares(dtype, nelem):
-    dtype = cudf.dtype(dtype).type
-    data = gen_rand(dtype, nelem)
-    sr = Series(data)
-    df = cudf.DataFrame(sr)
-
-    got = (sr**2).sum()
-    got_df = (df**2).sum()
-    expect = (data**2).sum()
-
-    if cudf.dtype(dtype).kind in {"u", "i"}:
-        np.testing.assert_array_almost_equal(expect, got)
-        np.testing.assert_array_almost_equal(expect, got_df.iloc[0])
-    else:
-        np.testing.assert_approx_equal(
-            expect, got, significant=accuracy_for_dtype[dtype]
-        )
-        np.testing.assert_approx_equal(
-            expect, got_df.iloc[0], significant=accuracy_for_dtype[dtype]
-        )
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pytest.param(
-            Decimal64Dtype(6, 2),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(8, 4),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(10, 5),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        Decimal128Dtype(20, 7),
-        pytest.param(
-            Decimal32Dtype(6, 2),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal32 format string only supported in pyarrow >=19",
-            ),
-        ),
-    ],
-)
-def test_sum_of_squares_decimal(dtype):
-    data = [str(x) for x in gen_rand("int8", 3) / 10]
-
-    expected = pd.Series([Decimal(x) for x in data]).pow(2).sum()
-    got = (cudf.Series(data).astype(dtype) ** 2).sum()
-
-    assert_eq(expected, got)
-
-
-def test_min(dtype, nelem):
-    dtype = cudf.dtype(dtype).type
-    data = gen_rand(dtype, nelem)
-    sr = Series(data)
-
-    got = sr.min()
-    expect = dtype(data.min())
-
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pytest.param(
-            Decimal64Dtype(6, 3),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(10, 6),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(16, 7),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal32Dtype(6, 3),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal32 format string only supported in pyarrow >=19",
-            ),
-        ),
-        Decimal128Dtype(20, 7),
-    ],
-)
-def test_min_decimal(dtype, nelem):
-    data = [str(x) for x in gen_rand("int64", nelem) / 100]
-
-    expected = pd.Series([Decimal(x) for x in data]).min()
-    got = cudf.Series(data).astype(dtype).min()
-
-    assert_eq(expected, got)
-
-
-def test_max(dtype, nelem):
-    dtype = cudf.dtype(dtype).type
-    data = gen_rand(dtype, nelem)
-    sr = Series(data)
-
-    got = sr.max()
-    expect = dtype(data.max())
-
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pytest.param(
-            Decimal64Dtype(6, 3),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(10, 6),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal64Dtype(16, 7),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal64 format string only supported in pyarrow >=19",
-            ),
-        ),
-        pytest.param(
-            Decimal32Dtype(6, 3),
-            marks=pytest.mark.skipif(
-                pa._generated_version.version_tuple[0] < 19,
-                reason="decimal32 format string only supported in pyarrow >=19",
-            ),
-        ),
-        Decimal128Dtype(20, 7),
-    ],
-)
-def test_max_decimal(dtype, nelem):
-    data = [str(x) for x in gen_rand("int64", nelem) / 100]
-
-    expected = pd.Series([Decimal(x) for x in data]).max()
-    got = cudf.Series(data).astype(dtype).max()
-
-    assert_eq(expected, got)
-
-
-def test_sum_masked(nelem):
-    dtype = np.float64
-    data = gen_rand(dtype, nelem)
-
-    mask = utils.random_bitmask(nelem)
-    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
-
-    sr = Series._from_column(as_column(data).set_mask(mask))
-
-    got = sr.sum()
-    res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size]
-    expect = data[res_mask].sum()
-
-    significant = 4 if dtype == np.float32 else 6
-    np.testing.assert_approx_equal(expect, got, significant=significant)
-
-
-def test_sum_boolean():
-    s = Series(np.arange(100000))
-    got = (s > 1).sum()
-    expect = 99998
-
-    assert expect == got
-
-
-def test_date_minmax():
-    rng = np.random.default_rng(seed=0)
-    np_data = rng.normal(size=10**3)
-    gdf_data = Series(np_data)
-
-    np_casted = np_data.astype("datetime64[ms]")
-    gdf_casted = gdf_data.astype("datetime64[ms]")
-
-    np_min = np_casted.min()
-    gdf_min = gdf_casted.min()
-    assert np_min == gdf_min
-
-    np_max = np_casted.max()
-    gdf_max = gdf_casted.max()
-    assert np_max == gdf_max
-
-
-@pytest.mark.parametrize(
-    "op",
-    ["sum", "product", "var", "kurt", "kurtosis", "skew"],
-)
-def test_datetime_unsupported_reductions(op):
-    gsr = cudf.Series([1, 2, 3, None], dtype="datetime64[ns]")
-    psr = gsr.to_pandas()
-
-    utils.assert_exceptions_equal(
-        lfunc=getattr(psr, op),
-        rfunc=getattr(gsr, op),
-    )
-
-
-@pytest.mark.parametrize("op", ["product", "var", "kurt", "kurtosis", "skew"])
-def test_timedelta_unsupported_reductions(op):
-    gsr = cudf.Series([1, 2, 3, None], dtype="timedelta64[ns]")
-    psr = gsr.to_pandas()
-
-    utils.assert_exceptions_equal(
-        lfunc=getattr(psr, op),
-        rfunc=getattr(gsr, op),
-    )
-
-
-@pytest.mark.parametrize("op", ["sum", "product", "std", "var"])
-def test_categorical_reductions(op):
-    gsr = cudf.Series([1, 2, 3, None], dtype="category")
-    psr = gsr.to_pandas()
-
-    utils.assert_exceptions_equal(getattr(psr, op), getattr(gsr, op))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2, 3], "b": [10, 11, 12]},
-        {"a": [1, 0, 3], "b": [10, 11, 12]},
-        {"a": [1, 2, 3], "b": [10, 11, None]},
-        {
-            "a": [],
-        },
-        {},
-    ],
-)
-@pytest.mark.parametrize("op", ["all", "any"])
-def test_any_all_axis_none(data, op):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    expected = getattr(pdf, op)(axis=None)
-    actual = getattr(gdf, op)(axis=None)
-
-    assert expected == actual
-
-
-@pytest.mark.parametrize(
-    "op",
-    [
-        "sum",
-        "product",
-        "std",
-        "var",
-        "kurt",
-        "kurtosis",
-        "skew",
-        "min",
-        "max",
-        "mean",
-        "median",
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Warning not given on older versions of pandas",
-)
-def test_reductions_axis_none_warning(op):
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [10, 2, 3]})
-    pdf = df.to_pandas()
-    with expect_warning_if(
-        op in {"sum", "product", "std", "var"},
-        FutureWarning,
-    ):
-        actual = getattr(df, op)(axis=None)
-    with expect_warning_if(
-        op in {"sum", "product", "std", "var"},
-        FutureWarning,
-    ):
-        expected = getattr(pdf, op)(axis=None)
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "op",
-    [
-        "sum",
-        "product",
-        "std",
-        "var",
-        "kurt",
-        "kurtosis",
-        "skew",
-        "min",
-        "max",
-        "mean",
-        "median",
-    ],
-)
-def test_dataframe_reduction_no_args(op):
-    df = cudf.DataFrame({"a": range(10), "b": range(10)})
-    pdf = df.to_pandas()
-    result = getattr(df, op)()
-    expected = getattr(pdf, op)()
-    assert_eq(result, expected)
-
-
-def test_reduction_column_multiindex():
-    idx = cudf.MultiIndex.from_tuples(
-        [("a", 1), ("a", 2)], names=["foo", "bar"]
-    )
-    df = cudf.DataFrame(np.array([[1, 3], [2, 4]]), columns=idx)
-    result = df.mean()
-    expected = df.to_pandas().mean()
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "columns", [pd.RangeIndex(2), pd.Index([0, 1], dtype="int8")]
-)
-def test_dataframe_axis_0_preserve_column_type_in_index(columns):
-    pd_df = pd.DataFrame([[1, 2]], columns=columns)
-    cudf_df = cudf.DataFrame.from_pandas(pd_df)
-    result = cudf_df.sum(axis=0)
-    expected = pd_df.sum(axis=0)
-    assert_eq(result, expected, check_index_type=True)
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
deleted file mode 100644
index cb8349e706a..00000000000
--- a/python/cudf/cudf/tests/test_stats.py
+++ /dev/null
@@ -1,664 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-from concurrent.futures import ThreadPoolExecutor
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.api.extensions import no_default
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.datasets import randomdata
-from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
-
-
-@pytest.fixture(params=[np.int32, np.uint32, np.float32, np.float64])
-def dtype(request):
-    return request.param
-
-
-@pytest.fixture(params=["min", "max", "sum", "mean", "var", "std"])
-def method(request):
-    return request.param
-
-
-@pytest.fixture(params=["linear", "lower", "higher", "midpoint", "nearest"])
-def int_method(request):
-    return request.param
-
-
-@pytest.mark.parametrize("skipna", [True, False])
-def test_series_reductions(method, dtype, skipna):
-    rng = np.random.default_rng(seed=0)
-    arr = rng.random(100)
-    if np.dtype(dtype).kind in "iu":
-        arr *= 100
-        mask = arr > 10
-    else:
-        mask = arr > 0.5
-
-    arr = arr.astype(dtype)
-    if dtype in (np.float32, np.float64):
-        arr[[2, 5, 14, 19, 50, 70]] = np.nan
-    sr = cudf.Series(arr)
-    sr[~mask] = None
-    psr = sr.to_pandas()
-    psr[~mask] = np.nan
-
-    def call_test(sr, skipna):
-        fn = getattr(sr, method)
-        if method in ["std", "var"]:
-            return fn(ddof=1, skipna=skipna)
-        else:
-            return fn(skipna=skipna)
-
-    expect, got = call_test(psr, skipna=skipna), call_test(sr, skipna=skipna)
-
-    np.testing.assert_approx_equal(expect, got)
-
-
-def test_series_reductions_concurrency(method):
-    rng = np.random.default_rng(seed=0)
-    srs = [cudf.Series(rng.random(10000))]
-
-    def call_test(sr):
-        fn = getattr(sr, method)
-        if method in ["std", "var"]:
-            return fn(ddof=1)
-        else:
-            return fn()
-
-    def f(sr):
-        return call_test(sr + 1)
-
-    with ThreadPoolExecutor(10) as e:
-        list(e.map(f, srs * 50))
-
-
-@pytest.mark.parametrize("ddof", range(3))
-def test_series_std(ddof):
-    rng = np.random.default_rng(seed=0)
-    arr = rng.random(100) - 0.5
-    sr = cudf.Series(arr)
-    pd = sr.to_pandas()
-    got = sr.std(ddof=ddof)
-    expect = pd.std(ddof=ddof)
-    np.testing.assert_approx_equal(expect, got)
-
-
-def test_series_unique():
-    rng = np.random.default_rng(seed=0)
-    size = 100
-    arr = rng.integers(low=-1, high=10, size=size)
-    mask = arr != -1
-    sr = cudf.Series(arr)
-    sr[~mask] = None
-    assert set(arr[mask]) == set(sr.unique().dropna().to_numpy())
-    assert len(set(arr[mask])) == sr.nunique()
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
-def test_series_nunique(nan_as_null, dropna):
-    # We remove nulls as opposed to NaNs using the dropna parameter,
-    # so to test against pandas we replace NaN with another discrete value
-    cudf_series = cudf.Series([1, 2, 2, 3, 3], nan_as_null=nan_as_null)
-    pd_series = pd.Series([1, 2, 2, 3, 3])
-    expect = pd_series.nunique(dropna=dropna)
-    got = cudf_series.nunique(dropna=dropna)
-    assert expect == got
-
-    cudf_series = cudf.Series(
-        [1.0, 2.0, 3.0, np.nan, None], nan_as_null=nan_as_null
-    )
-    if nan_as_null is True:
-        pd_series = pd.Series([1.0, 2.0, 3.0, np.nan, None])
-    else:
-        pd_series = pd.Series([1.0, 2.0, 3.0, -1.0, None])
-
-    expect = pd_series.nunique(dropna=dropna)
-    got = cudf_series.nunique(dropna=dropna)
-    assert expect == got
-
-    cudf_series = cudf.Series([1.0, np.nan, np.nan], nan_as_null=nan_as_null)
-    if nan_as_null is True:
-        pd_series = pd.Series([1.0, np.nan, np.nan])
-    else:
-        pd_series = pd.Series([1.0, -1.0, -1.0])
-    expect = pd_series.nunique(dropna=dropna)
-    got = cudf_series.nunique(dropna=dropna)
-    assert expect == got
-
-
-def test_series_scale():
-    rng = np.random.default_rng(seed=0)
-    arr = pd.Series(rng.integers(low=-10, high=10, size=100))
-    sr = cudf.Series(arr)
-
-    vmin = arr.min()
-    vmax = arr.max()
-    scaled = (arr - vmin) / (vmax - vmin)
-    assert scaled.min() == 0
-    assert scaled.max() == 1
-    assert_eq(sr.scale(), scaled)
-
-
-def test_exact_quantiles(int_method):
-    arr = np.asarray([6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7])
-    quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
-
-    df = pd.DataFrame(arr)
-    gdf_series = cudf.Series(arr)
-
-    q1 = gdf_series.quantile(
-        quant_values, interpolation=int_method, exact=True
-    )
-
-    q2 = df.quantile(quant_values, interpolation=int_method)
-
-    np.testing.assert_allclose(
-        q1.to_pandas().values, np.array(q2.values).T.flatten(), rtol=1e-10
-    )
-
-
-def test_exact_quantiles_int(int_method):
-    arr = np.asarray([7, 0, 3, 4, 2, 1, -1, 1, 6])
-    quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
-
-    df = pd.DataFrame(arr)
-    gdf_series = cudf.Series(arr)
-
-    q1 = gdf_series.quantile(
-        quant_values, interpolation=int_method, exact=True
-    )
-
-    q2 = df.quantile(quant_values, interpolation=int_method)
-
-    np.testing.assert_allclose(
-        q1.to_pandas().values, np.array(q2.values).T.flatten(), rtol=1e-10
-    )
-
-
-def test_approx_quantiles():
-    arr = np.asarray([6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7])
-    quant_values = [0.0, 0.25, 0.33, 0.5, 1.0]
-
-    gdf_series = cudf.Series(arr)
-    pdf_series = pd.Series(arr)
-
-    q1 = gdf_series.quantile(quant_values, exact=False)
-    q2 = pdf_series.quantile(quant_values)
-
-    assert_eq(q1, q2)
-
-
-def test_approx_quantiles_int():
-    arr = np.asarray([1, 2, 3])
-    quant_values = [0.5]
-    approx_results = [2]
-
-    gdf_series = cudf.Series(arr)
-
-    q1 = gdf_series.quantile(quant_values, exact=False)
-
-    assert approx_results == q1.to_pandas().values
-
-
-@pytest.mark.parametrize("data", [[], [1, 2, 3, 10, 326497]])
-@pytest.mark.parametrize(
-    "q",
-    [
-        [],
-        0.5,
-        1,
-        0.234,
-        [0.345],
-        [0.243, 0.5, 1],
-        np.array([0.5, 1]),
-        cp.array([0.5, 1]),
-    ],
-)
-def test_misc_quantiles(data, q):
-    pdf_series = pd.Series(data, dtype="float64" if len(data) == 0 else None)
-    gdf_series = cudf.from_pandas(pdf_series)
-
-    expected = pdf_series.quantile(q.get() if isinstance(q, cp.ndarray) else q)
-    actual = gdf_series.quantile(q)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"data": np.random.default_rng(seed=0).normal(-100, 100, 1000)},
-        {"data": np.random.default_rng(seed=0).integers(-50, 50, 1000)},
-        {"data": (np.zeros(100))},
-        {"data": np.repeat(np.nan, 100)},
-        {"data": np.array([1.123, 2.343, np.nan, 0.0])},
-        {
-            "data": [5, 10, 53, None, np.nan, None, 12, 43, -423],
-            "nan_as_null": False,
-        },
-        {"data": [1.1032, 2.32, 43.4, 13, -312.0], "index": [0, 4, 3, 19, 6]},
-        {"data": [], "dtype": "float64"},
-        {"data": [-3]},
-    ],
-)
-@pytest.mark.parametrize("null_flag", [False, True])
-@pytest.mark.parametrize("numeric_only", [False, True])
-def test_kurtosis_series(data, null_flag, numeric_only):
-    gs = cudf.Series(**data)
-    ps = gs.to_pandas()
-
-    if null_flag and len(gs) > 2:
-        gs.iloc[[0, 2]] = None
-        ps.iloc[[0, 2]] = None
-
-    got = gs.kurtosis(numeric_only=numeric_only)
-    expected = ps.kurtosis(numeric_only=numeric_only)
-
-    assert_eq(got, expected)
-
-    got = gs.kurt(numeric_only=numeric_only)
-    expected = ps.kurt(numeric_only=numeric_only)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize("op", ["skew", "kurt"])
-def test_kurt_skew_error(op):
-    gs = cudf.Series(["ab", "cd"])
-    ps = gs.to_pandas()
-
-    assert_exceptions_equal(
-        getattr(gs, op),
-        getattr(ps, op),
-        lfunc_args_and_kwargs=([], {"numeric_only": True}),
-        rfunc_args_and_kwargs=([], {"numeric_only": True}),
-    )
-
-
-@pytest.mark.parametrize(
-    "data, index, dtype, nan_as_null",
-    [
-        [
-            np.random.default_rng(seed=0).normal(-100, 100, 1000),
-            None,
-            None,
-            None,
-        ],
-        [
-            np.random.default_rng(seed=0).integers(-50, 50, 1000),
-            None,
-            None,
-            None,
-        ],
-        [np.zeros(100), None, None, None],
-        [np.repeat(np.nan, 100), None, None, None],
-        [np.array([1.123, 2.343, np.nan, 0.0]), None, None, None],
-        [[5, 10, 53, None, np.nan, None, 12, 43, -423], None, None, False],
-        [[1.1032, 2.32, 43.4, 13, -312.0], [0, 4, 3, 19, 6], None, None],
-        [[], None, "float64", None],
-        [[-3], None, None, None],
-    ],
-)
-@pytest.mark.parametrize("null_flag", [False, True])
-@pytest.mark.parametrize("numeric_only", [False, True])
-def test_skew_series(data, index, dtype, nan_as_null, null_flag, numeric_only):
-    data = cudf.Series(data, index=index, dtype=dtype, nan_as_null=nan_as_null)
-    pdata = data.to_pandas()
-
-    if null_flag and len(data) > 2:
-        data.iloc[[0, 2]] = None
-        pdata.iloc[[0, 2]] = None
-
-    got = data.skew(numeric_only=numeric_only)
-    expected = pdata.skew(numeric_only=numeric_only)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize("num_na", [0, 1, 50, 99, 100])
-def test_series_median(dtype, num_na):
-    rng = np.random.default_rng(seed=0)
-    arr = rng.random(100)
-    dtype = np.dtype(dtype)
-    if dtype.kind in "iu":
-        arr *= 100
-    mask = np.arange(100) >= num_na
-
-    arr = arr.astype(dtype)
-    sr = cudf.Series(arr)
-    sr[~mask] = None
-    arr2 = arr[mask]
-    ps = pd.Series(arr2, dtype=dtype)
-
-    actual = sr.median(skipna=True)
-    desired = ps.median(skipna=True)
-
-    np.testing.assert_approx_equal(actual, desired)
-
-    # only for float until integer null supported convert to pandas in cudf
-    # eg. pd.Int64Dtype
-    if dtype.kind == "f":
-        ps = sr.to_pandas()
-        actual = sr.median(skipna=False)
-        desired = ps.median(skipna=False)
-        np.testing.assert_approx_equal(actual, desired)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "data",
-    [
-        np.random.default_rng(seed=0).normal(-100, 100, 1000),
-        np.random.default_rng(seed=0).integers(-50, 50, 1000),
-        np.zeros(100),
-        np.array([1.123, 2.343, np.nan, 0.0]),
-        np.array([-2, 3.75, 6, None, None, None, -8.5, None, 4.2]),
-        np.array([], dtype="float64"),
-        np.array([-3]),
-    ],
-)
-@pytest.mark.parametrize("periods", range(-5, 5))
-@pytest.mark.parametrize(
-    "fill_method", ["ffill", "bfill", "pad", "backfill", no_default, None]
-)
-def test_series_pct_change(data, periods, fill_method):
-    cs = cudf.Series(data)
-    ps = cs.to_pandas()
-
-    if np.abs(periods) <= len(cs):
-        with expect_warning_if(fill_method not in (no_default, None)):
-            got = cs.pct_change(periods=periods, fill_method=fill_method)
-        with expect_warning_if(
-            (
-                fill_method not in (no_default, None)
-                or (fill_method is not None and ps.isna().any())
-            )
-        ):
-            expected = ps.pct_change(periods=periods, fill_method=fill_method)
-        np.testing.assert_array_almost_equal(
-            got.to_numpy(na_value=np.nan), expected
-        )
-
-
-@pytest.mark.parametrize(
-    "data1",
-    [
-        np.random.default_rng(seed=0).normal(-100, 100, 1000),
-        np.random.default_rng(seed=0).integers(-50, 50, 1000),
-        np.zeros(100),
-        np.repeat(np.nan, 100),
-        np.array([1.123, 2.343, np.nan, 0.0]),
-        pa.array([5, 10, 53, None, np.nan, None]),
-        pd.Series([1.1, 2.32, 43.4], index=[0, 4, 3]),
-        np.array([], dtype="float64"),
-        np.array([-3]),
-    ],
-)
-@pytest.mark.parametrize(
-    "data2",
-    [
-        np.random.default_rng(seed=0).normal(-100, 100, 1000),
-        np.random.default_rng(seed=0).integers(-50, 50, 1000),
-        np.zeros(100),
-        np.repeat(np.nan, 100),
-        np.array([1.123, 2.343, np.nan, 0.0]),
-        pd.Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
-        np.array([5]),
-    ],
-)
-def test_cov1d(data1, data2):
-    gs1 = cudf.Series(data1)
-    gs2 = cudf.Series(data2)
-
-    ps1 = gs1.to_pandas()
-    ps2 = gs2.to_pandas()
-
-    got = gs1.cov(gs2)
-    ps1_align, ps2_align = ps1.align(ps2, join="inner")
-    with expect_warning_if(
-        (len(ps1_align.dropna()) == 1 and len(ps2_align.dropna()) > 0)
-        or (len(ps2_align.dropna()) == 1 and len(ps1_align.dropna()) > 0),
-        RuntimeWarning,
-    ):
-        expected = ps1.cov(ps2)
-    np.testing.assert_approx_equal(got, expected, significant=8)
-
-
-@pytest.mark.parametrize(
-    "data1",
-    [
-        np.random.default_rng(seed=0).normal(-100, 100, 1000),
-        np.random.default_rng(seed=0).integers(-50, 50, 1000),
-        np.zeros(100),
-        np.repeat(np.nan, 100),
-        np.array([1.123, 2.343, np.nan, 0.0]),
-        pa.array([5, 10, 53, None, np.nan, None]),
-        pd.Series([1.1032, 2.32, 43.4], index=[0, 4, 3]),
-        np.array([], dtype="float64"),
-        np.array([-3]),
-    ],
-)
-@pytest.mark.parametrize(
-    "data2",
-    [
-        np.random.default_rng(seed=0).normal(-100, 100, 1000),
-        np.random.default_rng(seed=0).integers(-50, 50, 1000),
-        np.zeros(100),
-        np.repeat(np.nan, 100),
-        np.array([1.123, 2.343, np.nan, 0.0]),
-        pd.Series([1.1, 2.32, 43.4], index=[0, 500, 4000]),
-        np.array([5]),
-    ],
-)
-@pytest.mark.parametrize("method", ["spearman", "pearson"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Warnings missing on older pandas (scipy version seems unrelated?)",
-)
-def test_corr1d(data1, data2, method):
-    if method == "spearman":
-        # Pandas uses scipy.stats.spearmanr code-path
-        pytest.importorskip("scipy")
-
-    gs1 = cudf.Series(data1)
-    gs2 = cudf.Series(data2)
-
-    ps1 = gs1.to_pandas()
-    ps2 = gs2.to_pandas()
-
-    got = gs1.corr(gs2, method)
-
-    ps1_align, ps2_align = ps1.align(ps2, join="inner")
-
-    is_singular = (
-        len(ps1_align.dropna()) == 1 and len(ps2_align.dropna()) > 0
-    ) or (len(ps2_align.dropna()) == 1 and len(ps1_align.dropna()) > 0)
-    is_identical = (
-        len(ps1_align.dropna().unique()) == 1 and len(ps2_align.dropna()) > 0
-    ) or (
-        len(ps2_align.dropna().unique()) == 1 and len(ps1_align.dropna()) > 0
-    )
-
-    # Pearson correlation leads to division by 0 when either sample size is 1.
-    # Spearman allows for size 1 samples, but will error if all data in a
-    # sample is identical since the covariance is zero and so the correlation
-    # coefficient is not defined.
-    cond = ((is_singular or is_identical) and method == "pearson") or (
-        is_identical and not is_singular and method == "spearman"
-    )
-    if method == "spearman":
-        # SciPy has shuffled around the warning it throws a couple of times.
-        # It's not worth the effort of conditionally importing the appropriate
-        # warning based on the scipy version, just catching a base Warning is
-        # good enough validation.
-        expected_warning = Warning
-    elif method == "pearson":
-        expected_warning = RuntimeWarning
-
-    with expect_warning_if(cond, expected_warning):
-        expected = ps1.corr(ps2, method)
-    np.testing.assert_approx_equal(got, expected, significant=8)
-
-
-@pytest.mark.parametrize("method", ["spearman", "pearson"])
-def test_df_corr(method):
-    gdf = randomdata(100, {str(x): float for x in range(50)})
-    pdf = gdf.to_pandas()
-    got = gdf.corr(method)
-    expected = pdf.corr(method)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [0.0, 1, 3, 6, np.nan, 7, 5.0, np.nan, 5, 2, 3, -100],
-        [np.nan] * 3,
-        [1, 5, 3],
-        [],
-    ],
-)
-@pytest.mark.parametrize(
-    "ops",
-    [
-        "mean",
-        "min",
-        "max",
-        "sum",
-        "product",
-        "var",
-        "std",
-        "prod",
-        "kurtosis",
-        "skew",
-        "any",
-        "all",
-        "cummin",
-        "cummax",
-        "cumsum",
-        "cumprod",
-    ],
-)
-@pytest.mark.parametrize("skipna", [True, False])
-def test_nans_stats(data, ops, skipna):
-    psr = pd.Series(data, dtype="float64" if len(data) == 0 else None)
-    gsr = cudf.from_pandas(psr)
-
-    assert_eq(
-        getattr(psr, ops)(skipna=skipna), getattr(gsr, ops)(skipna=skipna)
-    )
-
-    gsr = cudf.Series(
-        data, dtype="float64" if len(data) == 0 else None, nan_as_null=False
-    )
-    # Since there is no concept of `nan_as_null` in pandas,
-    # nulls will be returned in the operations. So only
-    # testing for `skipna=True` when `nan_as_null=False`
-    assert_eq(getattr(psr, ops)(skipna=True), getattr(gsr, ops)(skipna=True))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [0.0, 1, 3, 6, np.nan, 7, 5.0, np.nan, 5, 2, 3, -100],
-        [np.nan] * 3,
-        [1, 5, 3],
-    ],
-)
-@pytest.mark.parametrize("ops", ["sum", "product", "prod"])
-@pytest.mark.parametrize("skipna", [True, False])
-@pytest.mark.parametrize("min_count", [-10, -1, 0, 1, 2, 3, 5, 10])
-def test_min_count_ops(data, ops, skipna, min_count):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data, nan_as_null=False)
-
-    assert_eq(
-        getattr(psr, ops)(skipna=skipna, min_count=min_count),
-        getattr(gsr, ops)(skipna=skipna, min_count=min_count),
-    )
-
-
-@pytest.mark.parametrize(
-    "data1",
-    [
-        [1, 2, 3, 4],
-        [10, 1, 3, 5],
-    ],
-)
-@pytest.mark.parametrize(
-    "data2",
-    [
-        [1, 2, 3, 4],
-        [10, 1, 3, 5],
-    ],
-)
-@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_cov_corr_datetime_timedelta(data1, data2, dtype):
-    gsr1 = cudf.Series(data1, dtype=dtype)
-    gsr2 = cudf.Series(data2, dtype=dtype)
-    psr1 = gsr1.to_pandas()
-    psr2 = gsr2.to_pandas()
-
-    assert_eq(psr1.corr(psr2), gsr1.corr(gsr2))
-    assert_eq(psr1.cov(psr2), gsr1.cov(gsr2))
-
-
-@pytest.mark.parametrize("null_flag", [False, True])
-@pytest.mark.parametrize("numeric_only", [False, True])
-def test_kurtosis_df(null_flag, numeric_only):
-    data = randomdata(
-        nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-    )
-    if not numeric_only:
-        data = data.select_dtypes(include="number")
-    pdata = data.to_pandas()
-
-    if null_flag and len(data) > 2:
-        data.iloc[[0, 2]] = None
-        pdata.iloc[[0, 2]] = None
-
-    got = data.kurtosis(numeric_only=numeric_only)
-    got = got if np.isscalar(got) else got.to_numpy()
-
-    expected = pdata.kurtosis(numeric_only=numeric_only)
-    np.testing.assert_array_almost_equal(got, expected)
-
-    got = data.kurt(numeric_only=numeric_only)
-    got = got if np.isscalar(got) else got.to_numpy()
-
-    expected = pdata.kurt(numeric_only=numeric_only)
-    np.testing.assert_array_almost_equal(got, expected)
-
-
-@pytest.mark.parametrize("null_flag", [False, True])
-@pytest.mark.parametrize("numeric_only", [False, True])
-def test_skew_df(null_flag, numeric_only):
-    data = randomdata(
-        nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-    )
-    if not numeric_only:
-        data = data.select_dtypes(include="number")
-    pdata = data.to_pandas()
-
-    if null_flag and len(data) > 2:
-        data.iloc[[0, 2]] = None
-        pdata.iloc[[0, 2]] = None
-
-    got = data.skew(numeric_only=numeric_only)
-    expected = pdata.skew(numeric_only=numeric_only)
-    got = got if np.isscalar(got) else got.to_numpy()
-    np.testing.assert_array_almost_equal(got, expected)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 4b9e47ed275..f04c636757f 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -633,112 +633,6 @@ def test_string_int_to_ipv4_dtype_fail(dtype):
         gsr._column.int2ip()
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["a", "b", "c", "d", "e"],
-        ["a", "z", ".", '"', "aa", "zz"],
-        ["aa", "zz"],
-        ["z", "a", "zz", "aa"],
-        ["1", "2", "3", "4", "5"],
-        [""],
-        ["a"],
-        ["hello"],
-        ["small text", "this is a larger text......"],
-        ["👋🏻", "🔥", "🥇"],
-        ["This is 💯", "here is a calendar", "📅"],
-        ["", ".", ";", "[", "]"],
-        ["\t", ".", "\n", "\n\t", "\t\n"],
-    ],
-)
-def test_str_min(data):
-    psr = pd.Series(data)
-    sr = cudf.Series(data)
-
-    assert_eq(psr.min(), sr.min())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["a", "b", "c", "d", "e"],
-        ["a", "z", ".", '"', "aa", "zz"],
-        ["aa", "zz"],
-        ["z", "a", "zz", "aa"],
-        ["1", "2", "3", "4", "5"],
-        [""],
-        ["a"],
-        ["hello"],
-        ["small text", "this is a larger text......"],
-        ["👋🏻", "🔥", "🥇"],
-        ["This is 💯", "here is a calendar", "📅"],
-        ["", ".", ";", "[", "]"],
-        ["\t", ".", "\n", "\n\t", "\t\n"],
-    ],
-)
-def test_str_max(data):
-    psr = pd.Series(data)
-    sr = cudf.Series(data)
-
-    assert_eq(psr.max(), sr.max())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["a", "b", "c", "d", "e"],
-        ["a", "z", ".", '"', "aa", "zz"],
-        ["aa", "zz"],
-        ["z", "a", "zz", "aa"],
-        ["1", "2", "3", "4", "5"],
-        [""],
-        ["a"],
-        ["hello"],
-        ["small text", "this is a larger text......"],
-        ["👋🏻", "🔥", "🥇"],
-        ["This is 💯", "here is a calendar", "📅"],
-        ["", ".", ";", "[", "]"],
-        ["\t", ".", "\n", "\n\t", "\t\n"],
-    ],
-)
-def test_str_sum(data):
-    psr = pd.Series(data)
-    sr = cudf.Series(data)
-
-    assert_eq(psr.sum(), sr.sum())
-
-
-def test_str_mean():
-    sr = cudf.Series(["a", "b", "c", "d", "e"])
-
-    with pytest.raises(TypeError):
-        sr.mean()
-
-
-def test_string_product():
-    psr = pd.Series(["1", "2", "3", "4", "5"])
-    sr = cudf.Series(["1", "2", "3", "4", "5"])
-
-    assert_exceptions_equal(
-        lfunc=psr.product,
-        rfunc=sr.product,
-    )
-
-
-def test_string_var():
-    psr = pd.Series(["1", "2", "3", "4", "5"])
-    sr = cudf.Series(["1", "2", "3", "4", "5"])
-
-    assert_exceptions_equal(lfunc=psr.var, rfunc=sr.var)
-
-
-def test_string_std():
-    psr = pd.Series(["1", "2", "3", "4", "5"])
-    sr = cudf.Series(["1", "2", "3", "4", "5"])
-
-    assert_exceptions_equal(lfunc=psr.std, rfunc=sr.std)
-
-
 def test_string_slice_with_mask():
     actual = cudf.Series(["hi", "hello", None])
     expected = actual[0:3]
@@ -748,21 +642,3 @@ def test_string_slice_with_mask():
     assert_eq(actual._column.null_count, expected._column.null_count)
 
     assert_eq(actual, expected)
-
-
-def test_string_reduction_error():
-    s = cudf.Series([None, None], dtype="str")
-    ps = s.to_pandas(nullable=True)
-    assert_exceptions_equal(
-        s.any,
-        ps.any,
-        lfunc_args_and_kwargs=([], {"skipna": False}),
-        rfunc_args_and_kwargs=([], {"skipna": False}),
-    )
-
-    assert_exceptions_equal(
-        s.all,
-        ps.all,
-        lfunc_args_and_kwargs=([], {"skipna": False}),
-        rfunc_args_and_kwargs=([], {"skipna": False}),
-    )
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 28741b9f592..833035858be 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -9,9 +9,8 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_230
 from cudf.testing import _utils as utils, assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.fixture(
@@ -335,38 +334,6 @@ def test_timedelta_series_mod_with_scalar_zero(reverse):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("reduction_op", ["sum", "mean", "median", "quantile"])
-def test_timedelta_reduction_ops(
-    data_non_overflow, timedelta_dtype, reduction_op
-):
-    gsr = cudf.Series(data_non_overflow, dtype=timedelta_dtype)
-    psr = gsr.to_pandas()
-
-    if len(psr) > 0 and psr.isnull().all() and reduction_op == "median":
-        with pytest.warns(RuntimeWarning, match="Mean of empty slice"):
-            expected = getattr(psr, reduction_op)()
-    else:
-        with expect_warning_if(
-            PANDAS_GE_230
-            and reduction_op == "quantile"
-            and len(data_non_overflow) == 0
-            and timedelta_dtype != "timedelta64[ns]"
-        ):
-            expected = getattr(psr, reduction_op)()
-    actual = getattr(gsr, reduction_op)()
-    if pd.isna(expected) and pd.isna(actual):
-        pass
-    elif isinstance(expected, pd.Timedelta) and isinstance(
-        actual, pd.Timedelta
-    ):
-        assert (
-            expected.round(gsr._column.time_unit).value
-            == actual.round(gsr._column.time_unit).value
-        )
-    else:
-        assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize("datetime_dtype", utils.DATETIME_TYPES)
 def test_timedelta_index_datetime_index_ops(
     data_non_overflow, datetime_dtype, timedelta_dtype
@@ -618,50 +585,6 @@ def test_timedelta_invalid_ops():
     )
 
 
-@pytest.mark.parametrize("data", [[1, 2, 3], [], [1, 20, 1000, None]])
-@pytest.mark.parametrize("ddof", [1, 2, 3])
-def test_timedelta_std(data, timedelta_dtype, ddof):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = gsr.to_pandas()
-
-    expected = psr.std(ddof=ddof)
-    actual = gsr.std(ddof=ddof)
-
-    if np.isnat(expected.to_numpy()) and np.isnat(actual.to_numpy()):
-        assert True
-    else:
-        np.testing.assert_allclose(
-            expected.to_numpy().astype("float64"),
-            actual.to_numpy().astype("float64"),
-            rtol=1e-5,
-            atol=0,
-        )
-
-
-@pytest.mark.parametrize("op", ["max", "min"])
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [1, 2, 3, 100],
-        [10, None, 100, None, None],
-        [None, None, None],
-        [1231],
-    ],
-)
-def test_timedelta_reductions(data, op, timedelta_dtype):
-    sr = cudf.Series(data, dtype=timedelta_dtype)
-    psr = sr.to_pandas()
-
-    actual = getattr(sr, op)()
-    expected = getattr(psr, op)()
-
-    if np.isnat(expected.to_numpy()) and np.isnat(actual):
-        assert True
-    else:
-        assert_eq(expected.to_numpy(), actual)
-
-
 @pytest.mark.parametrize("op", [operator.add, operator.sub])
 def test_timdelta_binop_tz_timestamp(op):
     s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")

From 265f9143a165fa539b9eb2787f1e964d423d1cf1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Aug 2025 14:58:08 -0700
Subject: [PATCH 139/366] Move some of test_dataframe.py to new cudf classic
 directory structure (#19687)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19687
---
 python/cudf/cudf/tests/conftest.py            |    6 +
 .../tests/dataframe/methods/test_assign.py    |   36 +
 .../tests/dataframe/methods/test_describe.py  |  234 ++
 .../cudf/tests/dataframe/methods/test_drop.py |  278 ++
 .../dataframe/methods/test_hash_values.py     |  157 +
 .../tests/dataframe/methods/test_head_tail.py |   80 +
 .../dataframe/methods/test_isna_notnull.py    |   56 +
 .../tests/dataframe/methods/test_items.py     |   13 +
 .../methods/test_partition_by_hash.py         |  116 +
 .../cudf/tests/dataframe/methods/test_pop.py  |   46 +
 .../tests/dataframe/methods/test_reindex.py   |  191 +
 .../tests/dataframe/methods/test_rename.py    |   70 +
 .../dataframe/methods/test_reset_index.py     |  144 +
 .../tests/dataframe/methods/test_round.py     |   70 +
 .../dataframe/methods/test_select_dtypes.py   |  170 +
 .../tests/dataframe/methods/test_set_index.py |  105 +
 .../dataframe/methods/test_sort_index.py      |  147 +
 .../tests/dataframe/methods/test_swaplevel.py |   69 +
 .../tests/dataframe/methods/test_to_arrow.py  |  116 +-
 .../tests/dataframe/methods/test_to_cupy.py   |  136 +
 .../tests/dataframe/methods/test_transpose.py |   85 +-
 .../tests/dataframe/methods/test_truncate.py  |   61 +
 .../cudf/tests/dataframe/test_attributes.py   |   56 +
 .../cudf/tests/dataframe/test_constructors.py |  396 ++
 .../general_functions/test_from_pandas.py     |   21 +
 .../cudf/tests/series/methods/test_shift.py   |   37 +
 .../tests/series/methods/test_to_frame.py     |   14 +
 .../tests/series/methods/test_to_string.py    |   20 +
 .../cudf/cudf/tests/series/test_attributes.py |    6 +
 .../cudf/tests/series/test_constructors.py    |  109 +
 python/cudf/cudf/tests/test_dataframe.py      | 3356 ++---------------
 31 files changed, 3267 insertions(+), 3134 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_assign.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_describe.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_drop.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_hash_values.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_head_tail.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_isna_notnull.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_items.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_partition_by_hash.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_pop.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_reindex.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_rename.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_reset_index.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_round.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_select_dtypes.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_set_index.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_swaplevel.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_truncate.py
 create mode 100644 python/cudf/cudf/tests/dataframe/test_attributes.py
 create mode 100644 python/cudf/cudf/tests/general_functions/test_from_pandas.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_shift.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_frame.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_string.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index f05ec3dd247..88940a3ec47 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -514,6 +514,12 @@ def inplace(request):
     return request.param
 
 
+@pytest.fixture(params=[True, False])
+def drop(request):
+    """Param for `drop` argument"""
+    return request.param
+
+
 @pytest.fixture(params=[True, False])
 def ignore_index(request):
     """Param for `ignore_index` argument"""
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_assign.py b/python/cudf/cudf/tests/dataframe/methods/test_assign.py
new file mode 100644
index 00000000000..9cbd740a4ef
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_assign.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_assign():
+    gdf = cudf.DataFrame({"x": [1, 2, 3]})
+    gdf2 = gdf.assign(y=gdf.x + 1)
+    assert list(gdf.columns) == ["x"]
+    assert list(gdf2.columns) == ["x", "y"]
+
+    np.testing.assert_equal(gdf2.y.to_numpy(), [2, 3, 4])
+
+
+@pytest.mark.parametrize(
+    "mapping",
+    [
+        {"y": 1, "z": lambda df: df["x"] + df["y"]},
+        {
+            "x": lambda df: df["x"] * 2,
+            "y": lambda df: 2,
+            "z": lambda df: df["x"] / df["y"],
+        },
+    ],
+)
+def test_assign_callable(mapping):
+    df = pd.DataFrame({"x": [1, 2, 3]})
+    cdf = cudf.from_pandas(df)
+    expect = df.assign(**mapping)
+    actual = cdf.assign(**mapping)
+    assert_eq(expect, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_describe.py b/python/cudf/cudf/tests/dataframe/methods/test_describe.py
new file mode 100644
index 00000000000..51ef9a7750e
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_describe.py
@@ -0,0 +1,234 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_dataframe_describe_exclude():
+    rng = np.random.default_rng(seed=12)
+    data_length = 10
+
+    df = cudf.DataFrame(
+        {
+            "x": rng.normal(10, 1, data_length).astype("int64"),
+            "y": rng.normal(10, 1, data_length),
+        }
+    )
+    pdf = df.to_pandas()
+
+    gdf_results = df.describe(exclude=["float"])
+    pdf_results = pdf.describe(exclude=["float"])
+
+    assert_eq(gdf_results, pdf_results)
+
+
+def test_dataframe_describe_include():
+    rng = np.random.default_rng(seed=12)
+    data_length = 10
+
+    df = cudf.DataFrame(
+        {
+            "x": rng.normal(10, 1, data_length).astype("int64"),
+            "y": rng.normal(10, 1, data_length),
+        }
+    )
+    pdf = df.to_pandas()
+    gdf_results = df.describe(include=["int"])
+    pdf_results = pdf.describe(include=["int"])
+
+    assert_eq(gdf_results, pdf_results)
+
+
+def test_dataframe_describe_default():
+    rng = np.random.default_rng(seed=12)
+    data_length = 10
+
+    df = cudf.DataFrame(
+        {
+            "x": rng.normal(10, 1, data_length),
+            "y": rng.normal(10, 1, data_length),
+        }
+    )
+    pdf = df.to_pandas()
+    gdf_results = df.describe()
+    pdf_results = pdf.describe()
+
+    assert_eq(pdf_results, gdf_results)
+
+
+def test_dataframe_describe_percentiles():
+    rng = np.random.default_rng(seed=12)
+    data_length = 100
+    sample_percentiles = [0.0, 0.1, 0.33, 0.84, 0.4, 0.99]
+
+    df = cudf.DataFrame(
+        {
+            "x": rng.normal(10, 1, data_length),
+            "y": rng.normal(10, 1, data_length),
+        }
+    )
+    pdf = df.to_pandas()
+    gdf_results = df.describe(percentiles=sample_percentiles)
+    pdf_results = pdf.describe(percentiles=sample_percentiles)
+
+    assert_eq(pdf_results, gdf_results)
+
+
+def test_dataframe_describe_include_all():
+    rng = np.random.default_rng(seed=12)
+    data_length = 10
+
+    df = cudf.DataFrame(
+        {
+            "x": rng.normal(10, 1, data_length).astype("int64"),
+            "y": rng.normal(10, 1, data_length),
+            "animal": rng.choice(["dog", "cat", "bird"], data_length),
+        }
+    )
+
+    pdf = df.to_pandas()
+    gdf_results = df.describe(include="all")
+    pdf_results = pdf.describe(include="all")
+
+    assert_eq(gdf_results[["x", "y"]], pdf_results[["x", "y"]])
+    assert_eq(gdf_results.index, pdf_results.index)
+    assert_eq(gdf_results.columns, pdf_results.columns)
+    assert_eq(
+        gdf_results[["animal"]].fillna(-1).astype("str"),
+        pdf_results[["animal"]].fillna(-1).astype("str"),
+    )
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame(
+            {
+                "a": [1, 2, 3],
+                "b": [10, 22, 33],
+                "c": [0.3234, 0.23432, 0.0],
+                "d": ["hello", "world", "hello"],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "a": [1, 2, 3],
+                "b": ["hello", "world", "hello"],
+                "c": [0.3234, 0.23432, 0.0],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "int_data": [1, 2, 3],
+                "str_data": ["hello", "world", "hello"],
+                "float_data": [0.3234, 0.23432, 0.0],
+                "timedelta_data": pd.Series(
+                    [1, 2, 1], dtype="timedelta64[ns]"
+                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
+            }
+        ),
+        pd.DataFrame(
+            {
+                "int_data": [1, 2, 3],
+                "str_data": ["hello", "world", "hello"],
+                "float_data": [0.3234, 0.23432, 0.0],
+                "timedelta_data": pd.Series(
+                    [1, 2, 1], dtype="timedelta64[ns]"
+                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
+                "category_data": pd.Series(["a", "a", "b"], dtype="category"),
+            }
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "include",
+    [None, "all", ["object"], ["int"], ["object", "int", "category"]],
+)
+def test_describe_misc_include(pdf, include):
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    expected = pdf.describe(include=include)
+    actual = df.describe(include=include)
+
+    for col in expected.columns:
+        if expected[col].dtype == np.dtype("object"):
+            expected[col] = expected[col].fillna(-1).astype("str")
+            actual[col] = actual[col].fillna(-1).astype("str")
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame(
+            {
+                "a": [1, 2, 3],
+                "b": [10, 22, 33],
+                "c": [0.3234, 0.23432, 0.0],
+                "d": ["hello", "world", "hello"],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "a": [1, 2, 3],
+                "b": ["hello", "world", "hello"],
+                "c": [0.3234, 0.23432, 0.0],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "int_data": [1, 2, 3],
+                "str_data": ["hello", "world", "hello"],
+                "float_data": [0.3234, 0.23432, 0.0],
+                "timedelta_data": pd.Series(
+                    [1, 2, 1], dtype="timedelta64[ns]"
+                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
+            }
+        ),
+        pd.DataFrame(
+            {
+                "int_data": [1, 2, 3],
+                "str_data": ["hello", "world", "hello"],
+                "float_data": [0.3234, 0.23432, 0.0],
+                "timedelta_data": pd.Series(
+                    [1, 2, 1], dtype="timedelta64[ns]"
+                ),
+                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
+                "category_data": pd.Series(["a", "a", "b"], dtype="category"),
+            }
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "exclude", [None, ["object"], ["int"], ["object", "int", "category"]]
+)
+def test_describe_misc_exclude(pdf, exclude):
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    expected = pdf.describe(exclude=exclude)
+    actual = df.describe(exclude=exclude)
+
+    for col in expected.columns:
+        if expected[col].dtype == np.dtype("object"):
+            expected[col] = expected[col].fillna(-1).astype("str")
+            actual[col] = actual[col].fillna(-1).astype("str")
+
+    assert_eq(expected, actual)
+
+
+def test_empty_dataframe_describe():
+    pdf = pd.DataFrame({"a": [], "b": []})
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.describe()
+    actual = gdf.describe()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_drop.py b/python/cudf/cudf/tests/dataframe/methods/test_drop.py
new file mode 100644
index 00000000000..a0e6c1f4049
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_drop.py
@@ -0,0 +1,278 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame(
+            {"a": range(10), "b": range(10, 20), "c": range(1, 11)},
+            index=pd.Index(
+                ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
+                name="custom_name",
+            ),
+        ),
+        pd.DataFrame(
+            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [["a"], ["b"], "a", "b", ["a", "b"]],
+)
+def test_dataframe_drop_columns(pdf, columns, inplace):
+    if inplace:
+        pdf = pdf.copy()
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.drop(columns=columns, inplace=inplace)
+    actual = gdf.drop(columns=columns, inplace=inplace)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("obj", ["Index", "Series"])
+def test_drop_cudf_obj_columns(obj):
+    pdf = pd.DataFrame({"A": [1], "B": [1]})
+    gdf = cudf.from_pandas(pdf)
+
+    columns = ["B"]
+    expected = pdf.drop(labels=getattr(pd, obj)(columns), axis=1)
+    actual = gdf.drop(columns=getattr(cudf, obj)(columns), axis=1)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame(
+            {"a": range(10), "b": range(10, 20), "c": range(1, 11)},
+            index=pd.Index(list(range(10)), name="custom_name"),
+        ),
+        pd.DataFrame(
+            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "labels",
+    [
+        [1],
+        [0],
+        1,
+        5,
+        [5, 9],
+        pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
+        pd.Index([0, 1, 8, 9], name="new name"),
+    ],
+)
+def test_dataframe_drop_labels_axis_0(pdf, labels, inplace):
+    pdf = pdf.copy()
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.drop(labels=labels, axis=0, inplace=inplace)
+    actual = gdf.drop(labels=labels, axis=0, inplace=inplace)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame({"a": range(10), "b": range(10, 20), "c": range(1, 11)}),
+        pd.DataFrame(
+            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
+        ),
+        pd.DataFrame(
+            {
+                "a": range(10),
+                "b": range(10, 20),
+            },
+            index=pd.Index(list(range(10)), dtype="uint64"),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [[1], [0], 1, 5, [5, 9], pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_dataframe_drop_index(pdf, index, inplace):
+    pdf = pdf.copy()
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.drop(index=index, inplace=inplace)
+    actual = gdf.drop(index=index, inplace=inplace)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "index,level",
+    [
+        ("cow", 0),
+        ("lama", 0),
+        ("falcon", 0),
+        ("speed", 1),
+        ("weight", 1),
+        ("length", 1),
+        ("cow", None),
+        (
+            "lama",
+            None,
+        ),
+        (
+            "falcon",
+            None,
+        ),
+    ],
+)
+def test_dataframe_drop_multiindex(index, level, inplace):
+    pdf = pd.DataFrame(
+        {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5},
+        index=pd.MultiIndex(
+            levels=[
+                ["lama", "cow", "falcon"],
+                ["speed", "weight", "length"],
+            ],
+            codes=[
+                [0, 0, 0, 1, 1, 1, 2, 2, 2, 1],
+                [0, 1, 2, 0, 1, 2, 0, 1, 2, 1],
+            ],
+        ),
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.drop(index=index, inplace=inplace, level=level)
+    actual = gdf.drop(index=index, inplace=inplace, level=level)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("data", [{"c": range(1, 11)}, {"d": ["a", "v"] * 5}])
+@pytest.mark.parametrize("labels", [["a"], ["b"], "a", "b", ["a", "b"]])
+def test_dataframe_drop_labels_axis_1(data, labels, inplace):
+    pdf = pd.DataFrame({"a": range(10), "b": range(10, 20), **data})
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.drop(labels=labels, axis=1, inplace=inplace)
+    actual = gdf.drop(labels=labels, axis=1, inplace=inplace)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_drop_error():
+    df = cudf.DataFrame({"a": [1], "b": [2], "c": [3]})
+    pdf = df.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([], {"columns": "d"}),
+        rfunc_args_and_kwargs=([], {"columns": "d"}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([], {"columns": ["a", "d", "b"]}),
+        rfunc_args_and_kwargs=([], {"columns": ["a", "d", "b"]}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
+        rfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([], {"axis": 1}),
+        rfunc_args_and_kwargs=([], {"axis": 1}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([[2, 0]],),
+        rfunc_args_and_kwargs=([[2, 0]],),
+    )
+
+
+def test_dataframe_drop_raises():
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3], "c": [10, 20, 30]}, index=["x", "y", "z"]
+    )
+    pdf = df.to_pandas()
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=(["p"],),
+        rfunc_args_and_kwargs=(["p"],),
+    )
+
+    # label dtype mismatch
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([3],),
+        rfunc_args_and_kwargs=([3],),
+    )
+
+    expect = pdf.drop("p", errors="ignore")
+    actual = df.drop("p", errors="ignore")
+
+    assert_eq(actual, expect)
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([], {"columns": "p"}),
+        rfunc_args_and_kwargs=([], {"columns": "p"}),
+    )
+
+    expect = pdf.drop(columns="p", errors="ignore")
+    actual = df.drop(columns="p", errors="ignore")
+
+    assert_eq(actual, expect)
+
+    assert_exceptions_equal(
+        lfunc=pdf.drop,
+        rfunc=df.drop,
+        lfunc_args_and_kwargs=([], {"labels": "p", "axis": 1}),
+        rfunc_args_and_kwargs=([], {"labels": "p", "axis": 1}),
+    )
+
+    expect = pdf.drop(labels="p", axis=1, errors="ignore")
+    actual = df.drop(labels="p", axis=1, errors="ignore")
+
+    assert_eq(actual, expect)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_hash_values.py b/python/cudf/cudf/tests/dataframe/methods/test_hash_values.py
new file mode 100644
index 00000000000..ff884d0cb2e
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_hash_values.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import contextlib
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq, assert_neq
+
+
+@pytest.mark.parametrize(
+    "method",
+    [
+        "murmur3",
+        "md5",
+        "sha1",
+        "sha224",
+        "sha256",
+        "sha384",
+        "sha512",
+        "xxhash32",
+        "xxhash64",
+    ],
+)
+@pytest.mark.parametrize("seed", [None, 42])
+def test_dataframe_hash_values(method, seed):
+    nrows = 10
+    warning_expected = seed is not None and method not in {
+        "murmur3",
+        "xxhash32",
+        "xxhash64",
+    }
+    potential_warning = (
+        pytest.warns(UserWarning, match="Provided seed value has no effect*")
+        if warning_expected
+        else contextlib.nullcontext()
+    )
+
+    gdf = cudf.DataFrame()
+    data = np.arange(nrows)
+    data[0] = data[-1]  # make first and last the same
+    gdf["a"] = data
+    gdf["b"] = gdf.a + 100
+    with potential_warning:
+        out = gdf.hash_values(method=method, seed=seed)
+    assert isinstance(out, cudf.Series)
+    assert len(out) == nrows
+    expected_dtypes = {
+        "murmur3": np.uint32,
+        "md5": object,
+        "sha1": object,
+        "sha224": object,
+        "sha256": object,
+        "sha384": object,
+        "sha512": object,
+        "xxhash32": np.uint32,
+        "xxhash64": np.uint64,
+    }
+    assert out.dtype == expected_dtypes[method]
+
+    # Check single column
+    with potential_warning:
+        out_one = gdf[["a"]].hash_values(method=method, seed=seed)
+    # First matches last
+    assert out_one.iloc[0] == out_one.iloc[-1]
+    # Equivalent to the cudf.Series.hash_values()
+    with potential_warning:
+        assert_eq(gdf["a"].hash_values(method=method, seed=seed), out_one)
+
+
+@pytest.mark.parametrize("method", ["murmur3", "xxhash32", "xxhash64"])
+def test_dataframe_hash_values_seed(method):
+    gdf = cudf.DataFrame()
+    data = np.arange(10)
+    data[0] = data[-1]  # make first and last the same
+    gdf["a"] = data
+    gdf["b"] = gdf.a + 100
+    out_one = gdf.hash_values(method=method, seed=0)
+    out_two = gdf.hash_values(method=method, seed=1)
+    assert out_one.iloc[0] == out_one.iloc[-1]
+    assert out_two.iloc[0] == out_two.iloc[-1]
+    assert_neq(out_one, out_two)
+
+
+def test_dataframe_hash_values_xxhash32():
+    # xxhash32 has no built-in implementation in Python and we don't want to
+    # add a testing dependency, so we use regression tests against known good
+    # values.
+    gdf = cudf.DataFrame({"a": [0.0, 1.0, 2.0, np.inf, np.nan]})
+    gdf["b"] = -gdf["a"]
+    out_a = gdf["a"].hash_values(method="xxhash32", seed=0)
+    expected_a = cudf.Series(
+        [3736311059, 2307980487, 2906647130, 746578903, 4294967295],
+        dtype=np.uint32,
+    )
+    assert_eq(out_a, expected_a)
+
+    out_b = gdf["b"].hash_values(method="xxhash32", seed=42)
+    expected_b = cudf.Series(
+        [1076387279, 2261349915, 531498073, 650869264, 4294967295],
+        dtype=np.uint32,
+    )
+    assert_eq(out_b, expected_b)
+
+    out_df = gdf.hash_values(method="xxhash32", seed=0)
+    expected_df = cudf.Series(
+        [1223721700, 2885793241, 1920811472, 1146715602, 4294967295],
+        dtype=np.uint32,
+    )
+    assert_eq(out_df, expected_df)
+
+
+def test_dataframe_hash_values_xxhash64():
+    # xxhash64 has no built-in implementation in Python and we don't want to
+    # add a testing dependency, so we use regression tests against known good
+    # values.
+    gdf = cudf.DataFrame({"a": [0.0, 1.0, 2.0, np.inf, np.nan]})
+    gdf["b"] = -gdf["a"]
+    out_a = gdf["a"].hash_values(method="xxhash64", seed=0)
+    expected_a = cudf.Series(
+        [
+            3803688792395291579,
+            10706502109028787093,
+            9835943264235290955,
+            18031741628920313605,
+            18446744073709551615,
+        ],
+        dtype=np.uint64,
+    )
+    assert_eq(out_a, expected_a)
+
+    out_b = gdf["b"].hash_values(method="xxhash64", seed=42)
+    expected_b = cudf.Series(
+        [
+            9826995235083043316,
+            10150515573749944095,
+            5005707091092326006,
+            5326262080505358431,
+            18446744073709551615,
+        ],
+        dtype=np.uint64,
+    )
+    assert_eq(out_b, expected_b)
+
+    out_df = gdf.hash_values(method="xxhash64", seed=0)
+    expected_df = cudf.Series(
+        [
+            10208049663714815266,
+            4949201786888768834,
+            18122173653994477335,
+            11133539368563441730,
+            18446744073709551615,
+        ],
+        dtype=np.uint64,
+    )
+    assert_eq(out_df, expected_df)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py b/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py
new file mode 100644
index 00000000000..3277b80ca1c
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("nelem", [0, 10])
+def test_head_tail(nelem, numeric_types_as_str):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": rng.integers(0, 1000, nelem).astype(numeric_types_as_str),
+            "b": rng.integers(0, 1000, nelem).astype(numeric_types_as_str),
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(gdf.head(), pdf.head())
+    assert_eq(gdf.head(3), pdf.head(3))
+    assert_eq(gdf.head(-2), pdf.head(-2))
+    assert_eq(gdf.head(0), pdf.head(0))
+
+    assert_eq(gdf["a"].head(), pdf["a"].head())
+    assert_eq(gdf["a"].head(3), pdf["a"].head(3))
+    assert_eq(gdf["a"].head(-2), pdf["a"].head(-2))
+
+    assert_eq(gdf.tail(), pdf.tail())
+    assert_eq(gdf.tail(3), pdf.tail(3))
+    assert_eq(gdf.tail(-2), pdf.tail(-2))
+    assert_eq(gdf.tail(0), pdf.tail(0))
+
+    assert_eq(gdf["a"].tail(), pdf["a"].tail())
+    assert_eq(gdf["a"].tail(3), pdf["a"].tail(3))
+    assert_eq(gdf["a"].tail(-2), pdf["a"].tail(-2))
+
+
+def test_tail_for_string():
+    gdf = cudf.DataFrame({"id": ["a", "b"], "v": [1, 2]})
+    assert_eq(gdf.tail(3), gdf.to_pandas().tail(3))
+
+
+def test_dataframe_0_row_dtype(all_supported_types_as_str):
+    data = cudf.Series([1, 2, 3, 4, 5], dtype=all_supported_types_as_str)
+
+    expect = cudf.DataFrame({"x": data, "y": data})
+    got = expect.head(0)
+
+    for col_name in got.columns:
+        assert expect[col_name].dtype == got[col_name].dtype
+
+    expect = cudf.Series(data)
+    got = expect.head(0)
+
+    assert expect.dtype == got.dtype
+
+
+def test_one_row_head():
+    gdf = cudf.DataFrame({"name": ["carl"], "score": [100]}, index=[123])
+    pdf = gdf.to_pandas()
+
+    head_gdf = gdf.head()
+    head_pdf = pdf.head()
+
+    assert_eq(head_pdf, head_gdf)
+
+
+@pytest.mark.parametrize("index", [None, [123], ["a", "b"]])
+def test_no_cols_head(index):
+    pdf = pd.DataFrame(index=index)
+    gdf = cudf.from_pandas(pdf)
+
+    head_gdf = gdf.head()
+    head_pdf = pdf.head()
+
+    assert_eq(head_pdf, head_gdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_isna_notnull.py b/python/cudf/cudf/tests/dataframe/methods/test_isna_notnull.py
new file mode 100644
index 00000000000..8736533fe80
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_isna_notnull.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "na_data",
+    [
+        pd.DataFrame(
+            {
+                "a": [0, 1, 2, np.nan, 4, None, 6],
+                "b": [np.nan, None, "u", "h", "d", "a", "m"],
+            },
+            index=["q", "w", "e", "r", "t", "y", "u"],
+        ),
+        pd.DataFrame({"a": [0, 1, 2, 3, 4], "b": ["a", "b", "u", "h", "d"]}),
+        pd.DataFrame(
+            {
+                "a": [None, None, np.nan, None],
+                "b": [np.nan, None, np.nan, None],
+            }
+        ),
+        pd.DataFrame({"a": []}),
+        pd.DataFrame({"a": [np.nan], "b": [None]}),
+        pd.DataFrame({"a": ["a", "b", "c", None, "e"]}),
+        pd.DataFrame({"a": ["a", "b", "c", "d", "e"]}),
+    ],
+)
+@pytest.mark.parametrize("api_call", ["isnull", "isna", "notna", "notnull"])
+def test_dataframe_isnull_isna_and_reverse(na_data, nan_as_null, api_call):
+    def detect_nan(x):
+        # Check if the input is a float and if it is nan
+        return x.apply(lambda v: isinstance(v, float) and np.isnan(v))
+
+    df = na_data
+    nan_contains = df.select_dtypes(object).apply(detect_nan)
+    if nan_as_null is False and (
+        nan_contains.any().any() and not nan_contains.all().all()
+    ):
+        with pytest.raises(cudf.errors.MixedTypeError):
+            cudf.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
+    else:
+        gdf = cudf.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
+
+        assert_eq(getattr(df, api_call)(), getattr(gdf, api_call)())
+
+        # Test individual columns
+        for col in df:
+            assert_eq(
+                getattr(df[col], api_call)(), getattr(gdf[col], api_call)()
+            )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_items.py b/python/cudf/cudf/tests/dataframe/methods/test_items.py
new file mode 100644
index 00000000000..8e65c50f617
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_items.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_iteritems():
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    for k, v in gdf.items():
+        assert k in gdf.columns
+        assert isinstance(v, cudf.Series)
+        assert_eq(v, gdf[k])
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_partition_by_hash.py b/python/cudf/cudf/tests/dataframe/methods/test_partition_by_hash.py
new file mode 100644
index 00000000000..432c250a288
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_partition_by_hash.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import expand_bits_to_bytes, random_bitmask
+
+
+@pytest.mark.parametrize("nparts", [1, 2])
+def test_dataframe_hash_partition(nparts):
+    nrows = 10
+    nkeys = 2
+    rng = np.random.default_rng(seed=0)
+    gdf = cudf.DataFrame(
+        {f"key{i}": rng.integers(0, 7 - i, nrows) for i in range(nkeys)}
+    )
+    keycols = gdf.columns.to_list()
+    gdf["val1"] = rng.integers(0, nrows * 2, nrows)
+
+    got = gdf.partition_by_hash(keycols, nparts=nparts)
+    # Must return a list
+    assert isinstance(got, list)
+    # Must have correct number of partitions
+    assert len(got) == nparts
+    # All partitions must be DataFrame type
+    assert all(isinstance(p, cudf.DataFrame) for p in got)
+    # Check that all partitions have unique keys
+    part_unique_keys = set()
+    for p in got:
+        if len(p):
+            # Take rows of the keycolumns and build a set of the key-values
+            unique_keys = set(map(tuple, p[keycols].values_host))
+            # Ensure that none of the key-values have occurred in other groups
+            assert not (unique_keys & part_unique_keys)
+            part_unique_keys |= unique_keys
+    assert len(part_unique_keys)
+
+
+def test_dataframe_hash_partition_masked_value():
+    nrows = 10
+    gdf = cudf.DataFrame(
+        {
+            "key": np.arange(nrows),
+            "val": np.arange(nrows) + 100,
+        }
+    )
+    bitmask = random_bitmask(nrows)
+    bytemask = expand_bits_to_bytes(bitmask)
+    gdf["val"] = gdf["val"]._column.set_mask(bitmask)
+    parted = gdf.partition_by_hash(["key"], nparts=3)
+    # Verify that the valid mask is correct
+    for p in parted:
+        df = p.to_pandas()
+        for row in df.itertuples():
+            valid = bool(bytemask[row.key])
+            expected_value = row.key + 100 if valid else np.nan
+            got_value = row.val
+            assert (expected_value == got_value) or (
+                np.isnan(expected_value) and np.isnan(got_value)
+            )
+
+
+def test_dataframe_hash_partition_masked_keys():
+    nrows = 5
+    gdf = cudf.DataFrame(
+        {
+            "key": np.arange(nrows),
+            "val": np.arange(nrows) + 100,
+        }
+    )
+    bitmask = random_bitmask(nrows)
+    bytemask = expand_bits_to_bytes(bitmask)
+    gdf["key"] = gdf["key"]._column.set_mask(bitmask)
+    parted = gdf.partition_by_hash(["key"], nparts=3, keep_index=False)
+    # Verify that the valid mask is correct
+    for p in parted:
+        df = p.to_pandas()
+        for row in df.itertuples():
+            valid = bool(bytemask[row.val - 100])
+            # val is key + 100
+            expected_value = row.val - 100 if valid else np.nan
+            got_value = row.key
+            assert (expected_value == got_value) or (
+                np.isnan(expected_value) and np.isnan(got_value)
+            )
+
+
+@pytest.mark.parametrize("keep_index", [True, False])
+def test_dataframe_hash_partition_keep_index(keep_index):
+    gdf = cudf.DataFrame(
+        {"val": [1, 2, 3, 4, 5], "key": [3, 2, 1, 4, 5]}, index=[5, 4, 3, 2, 1]
+    )
+
+    expected_df1 = cudf.DataFrame(
+        {"val": [1, 5], "key": [3, 5]}, index=[5, 1] if keep_index else None
+    )
+    expected_df2 = cudf.DataFrame(
+        {"val": [2, 3, 4], "key": [2, 1, 4]},
+        index=[4, 3, 2] if keep_index else None,
+    )
+    expected = [expected_df1, expected_df2]
+
+    parts = gdf.partition_by_hash(["key"], nparts=2, keep_index=keep_index)
+
+    for exp, got in zip(expected, parts, strict=True):
+        assert_eq(exp, got)
+
+
+def test_dataframe_hash_partition_empty():
+    gdf = cudf.DataFrame({"val": [1, 2], "key": [3, 2]}, index=["a", "b"])
+    parts = gdf.iloc[:0].partition_by_hash(["key"], nparts=3)
+    assert len(parts) == 3
+    for part in parts:
+        assert_eq(gdf.iloc[:0], part)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_pop.py b/python/cudf/cudf/tests/dataframe/methods/test_pop.py
new file mode 100644
index 00000000000..bdfb1004db1
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_pop.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_dataframe_pop():
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": [7.0, 8.0, 9.0]}
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    # Test non-existing column error
+    with pytest.raises(KeyError) as raises:
+        gdf.pop("fake_colname")
+    raises.match("fake_colname")
+
+    # check pop numeric column
+    pdf_pop = pdf.pop("a")
+    gdf_pop = gdf.pop("a")
+    assert_eq(pdf_pop, gdf_pop)
+    assert_eq(pdf, gdf)
+
+    # check string column
+    pdf_pop = pdf.pop("b")
+    gdf_pop = gdf.pop("b")
+    assert_eq(pdf_pop, gdf_pop)
+    assert_eq(pdf, gdf)
+
+    # check float column and empty dataframe
+    pdf_pop = pdf.pop("c")
+    gdf_pop = gdf.pop("c")
+    assert_eq(pdf_pop, gdf_pop)
+    assert_eq(pdf, gdf)
+
+    # check empty dataframe edge case
+    empty_pdf = pd.DataFrame(columns=["a", "b"])
+    empty_gdf = cudf.DataFrame(columns=["a", "b"])
+    pb = empty_pdf.pop("b")
+    gb = empty_gdf.pop("b")
+    assert len(pb) == len(gb)
+    assert empty_pdf.empty and empty_gdf.empty
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_reindex.py b/python/cudf/cudf/tests/dataframe/methods/test_reindex.py
new file mode 100644
index 00000000000..19efd968a95
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_reindex.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("copy", [True, False])
+@pytest.mark.parametrize(
+    "args,gd_kwargs",
+    [
+        ([], {}),
+        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
+        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
+        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {"axis": 0}),
+        ([["a", "b", "c", "d", "e"]], {"axis": 1}),
+        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": 0}),
+        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": 1}),
+        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": "index"}),
+        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": "columns"}),
+        ([], {"index": [-3, 0, 3, 0, -2, 1, 3, 4, 6]}),
+        ([], {"columns": ["a", "b", "c", "d", "e"]}),
+        (
+            [],
+            {
+                "index": [-3, 0, 3, 0, -2, 1, 3, 4, 6],
+                "columns": ["a", "b", "c", "d", "e"],
+            },
+        ),
+    ],
+)
+def test_dataframe_reindex(copy, args, gd_kwargs):
+    reindex_data = cudf.datasets.randomdata(
+        nrows=6,
+        dtypes={
+            "a": "category",
+            "c": float,
+            "d": str,
+        },
+    )
+    pdf, gdf = reindex_data.to_pandas(), reindex_data
+    assert_eq(
+        pdf.reindex(*args, **gd_kwargs, copy=True),
+        gdf.reindex(*args, **gd_kwargs, copy=copy),
+    )
+
+
+@pytest.mark.parametrize("fill_value", [-1.0, 0.0, 1.5])
+@pytest.mark.parametrize(
+    "args,kwargs",
+    [
+        ([], {}),
+        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
+        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
+        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {"axis": 0}),
+        ([["a", "b", "c", "d", "e"]], {"axis": 1}),
+        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": 0}),
+        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": 1}),
+        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": "index"}),
+        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": "columns"}),
+        ([], {"index": [-3, 0, 3, 0, -2, 1, 3, 4, 6]}),
+        ([], {"columns": ["a", "b", "c", "d", "e"]}),
+        (
+            [],
+            {
+                "index": [-3, 0, 3, 0, -2, 1, 3, 4, 6],
+                "columns": ["a", "b", "c", "d", "e"],
+            },
+        ),
+    ],
+)
+def test_dataframe_reindex_fill_value(args, kwargs, fill_value):
+    reindex_data_numeric = cudf.datasets.randomdata(
+        nrows=6,
+        dtypes={"a": float, "b": float, "c": float},
+    )
+    pdf, gdf = reindex_data_numeric.to_pandas(), reindex_data_numeric
+    assert_eq(
+        pdf.reindex(*args, **kwargs, fill_value=fill_value),
+        gdf.reindex(*args, **kwargs, fill_value=fill_value),
+    )
+
+
+@pytest.mark.parametrize("copy", [True, False])
+def test_dataframe_reindex_change_dtype(copy):
+    index = pd.date_range("12/29/2009", periods=10, freq="D")
+    columns = ["a", "b", "c", "d", "e"]
+    gdf = cudf.datasets.randomdata(
+        nrows=6, dtypes={"a": "category", "c": float, "d": str}
+    )
+    pdf = gdf.to_pandas()
+    # Validate reindexes both labels and column names when
+    # index=index_labels and columns=column_labels
+    assert_eq(
+        pdf.reindex(index=index, columns=columns, copy=True),
+        gdf.reindex(index=index, columns=columns, copy=copy),
+        check_freq=False,
+    )
+
+
+@pytest.mark.parametrize("copy", [True, False])
+def test_series_categorical_reindex(copy):
+    index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
+    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"a": "category"})
+    pdf = gdf.to_pandas()
+    assert_eq(pdf["a"].reindex(copy=True), gdf["a"].reindex(copy=copy))
+    assert_eq(
+        pdf["a"].reindex(index, copy=True), gdf["a"].reindex(index, copy=copy)
+    )
+    assert_eq(
+        pdf["a"].reindex(index=index, copy=True),
+        gdf["a"].reindex(index=index, copy=copy),
+    )
+
+
+@pytest.mark.parametrize("copy", [True, False])
+def test_series_float_reindex(copy):
+    index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
+    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"c": float})
+    pdf = gdf.to_pandas()
+    assert_eq(pdf["c"].reindex(copy=True), gdf["c"].reindex(copy=copy))
+    assert_eq(
+        pdf["c"].reindex(index, copy=True), gdf["c"].reindex(index, copy=copy)
+    )
+    assert_eq(
+        pdf["c"].reindex(index=index, copy=True),
+        gdf["c"].reindex(index=index, copy=copy),
+    )
+
+
+@pytest.mark.parametrize("copy", [True, False])
+def test_series_string_reindex(copy):
+    index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
+    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"d": str})
+    pdf = gdf.to_pandas()
+    assert_eq(pdf["d"].reindex(copy=True), gdf["d"].reindex(copy=copy))
+    assert_eq(
+        pdf["d"].reindex(index, copy=True), gdf["d"].reindex(index, copy=copy)
+    )
+    assert_eq(
+        pdf["d"].reindex(index=index, copy=True),
+        gdf["d"].reindex(index=index, copy=copy),
+    )
+
+
+@pytest.mark.parametrize("names", [None, ["a", "b"]])
+@pytest.mark.parametrize("klass", [cudf.MultiIndex, pd.MultiIndex])
+def test_reindex_multiindex_col_to_multiindex(names, klass):
+    idx = pd.Index(
+        [("A", "one"), ("A", "two")],
+        dtype="object",
+    )
+    df = pd.DataFrame([[1, 2]], columns=idx)
+    gdf = cudf.from_pandas(df)
+    midx = klass.from_tuples([("A", "one"), ("A", "three")], names=names)
+    result = gdf.reindex(columns=midx)
+    expected = cudf.DataFrame([[1, None]], columns=midx)
+    # (pandas2.0): check_dtype=False won't be needed
+    # as None col will return object instead of float
+    assert_eq(result, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize("names", [None, ["a", "b"]])
+@pytest.mark.parametrize("klass", [cudf.MultiIndex, pd.MultiIndex])
+def test_reindex_tuple_col_to_multiindex(names, klass):
+    idx = pd.Index(
+        [("A", "one"), ("A", "two")], dtype="object", tupleize_cols=False
+    )
+    df = pd.DataFrame([[1, 2]], columns=idx)
+    gdf = cudf.from_pandas(df)
+    midx = klass.from_tuples([("A", "one"), ("A", "two")], names=names)
+    result = gdf.reindex(columns=midx)
+    expected = cudf.DataFrame([[1, 2]], columns=midx)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("name", [None, "foo"])
+@pytest.mark.parametrize("klass", [range, cudf.RangeIndex, pd.RangeIndex])
+def test_reindex_columns_rangeindex_keeps_rangeindex(name, klass):
+    new_columns = klass(3)
+    exp_name = None
+    if klass is not range:
+        new_columns.name = name
+        exp_name = name
+    df = cudf.DataFrame([[1, 2]])
+    result = df.reindex(columns=new_columns).columns
+    expected = pd.RangeIndex(3, name=exp_name)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_rename.py b/python/cudf/cudf/tests/dataframe/methods/test_rename.py
new file mode 100644
index 00000000000..b7e9bf3c7ce
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_rename.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("axis", [0, "index"])
+def test_dataframe_index_rename(axis):
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    expect = pdf.rename(mapper={1: 5, 2: 6}, axis=axis)
+    got = gdf.rename(mapper={1: 5, 2: 6}, axis=axis)
+
+    assert_eq(expect, got)
+
+    expect = pdf.rename(index={1: 5, 2: 6})
+    got = gdf.rename(index={1: 5, 2: 6})
+
+    assert_eq(expect, got)
+
+    expect = pdf.rename({1: 5, 2: 6})
+    got = gdf.rename({1: 5, 2: 6})
+
+    assert_eq(expect, got)
+
+    # `pandas` can support indexes with mixed values. We throw a
+    # `NotImplementedError`.
+    with pytest.raises(NotImplementedError):
+        gdf.rename(mapper={1: "x", 2: "y"}, axis=axis)
+
+
+def test_dataframe_MI_rename():
+    gdf = cudf.DataFrame(
+        {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
+    )
+    gdg = gdf.groupby(["a", "b"]).count()
+    pdg = gdg.to_pandas()
+
+    expect = pdg.rename(mapper={1: 5, 2: 6}, axis=0)
+    got = gdg.rename(mapper={1: 5, 2: 6}, axis=0)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("axis", [1, "columns"])
+def test_dataframe_column_rename(axis):
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    expect = pdf.rename(mapper=lambda name: 2 * name, axis=axis)
+    got = gdf.rename(mapper=lambda name: 2 * name, axis=axis)
+
+    assert_eq(expect, got)
+
+    expect = pdf.rename(columns=lambda name: 2 * name)
+    got = gdf.rename(columns=lambda name: 2 * name)
+
+    assert_eq(expect, got)
+
+    rename_mapper = {"a": "z", "b": "y", "c": "x"}
+    expect = pdf.rename(columns=rename_mapper)
+    got = gdf.rename(columns=rename_mapper)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_reset_index.py b/python/cudf/cudf/tests/dataframe/methods/test_reset_index.py
new file mode 100644
index 00000000000..fa0546d526f
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_reset_index.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]])
+@pytest.mark.parametrize(
+    "column_names",
+    [
+        ["v0", "v1"],
+        ["v0", "index"],
+        pd.MultiIndex.from_tuples([("x0", "x1"), ("y0", "y1")]),
+        pd.MultiIndex.from_tuples([(1, 2), (10, 11)], names=["ABC", "DEF"]),
+    ],
+)
+@pytest.mark.parametrize("col_level", [0, 1])
+@pytest.mark.parametrize("col_fill", ["", "some_lv"])
+def test_reset_index(level, drop, column_names, inplace, col_level, col_fill):
+    midx = pd.MultiIndex.from_tuples(
+        [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=["l0", None]
+    )
+    pdf = pd.DataFrame(
+        [[1, 2], [3, 4], [5, 6], [7, 8]], index=midx, columns=column_names
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.reset_index(
+        level=level,
+        drop=drop,
+        inplace=inplace,
+        col_level=col_level,
+        col_fill=col_fill,
+    )
+    got = gdf.reset_index(
+        level=level,
+        drop=drop,
+        inplace=inplace,
+        col_level=col_level,
+        col_fill=col_fill,
+    )
+    if inplace:
+        expect = pdf
+        got = gdf
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("level", [None, 0, 1, [None]])
+@pytest.mark.parametrize("col_level", [0, 1])
+@pytest.mark.parametrize("col_fill", ["", "some_lv"])
+def test_reset_index_dup_level_name(level, drop, inplace, col_level, col_fill):
+    # midx levels are named [None, None]
+    midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
+    pdf = pd.DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=midx)
+    gdf = cudf.from_pandas(pdf)
+    if level == [None]:
+        assert_exceptions_equal(
+            lfunc=pdf.reset_index,
+            rfunc=gdf.reset_index,
+            lfunc_args_and_kwargs=(
+                [],
+                {"level": level, "drop": drop, "inplace": inplace},
+            ),
+            rfunc_args_and_kwargs=(
+                [],
+                {"level": level, "drop": drop, "inplace": inplace},
+            ),
+        )
+        return
+
+    expect = pdf.reset_index(
+        level=level,
+        drop=drop,
+        inplace=inplace,
+        col_level=col_level,
+        col_fill=col_fill,
+    )
+    got = gdf.reset_index(
+        level=level,
+        drop=drop,
+        inplace=inplace,
+        col_level=col_level,
+        col_fill=col_fill,
+    )
+    if inplace:
+        expect = pdf
+        got = gdf
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("col_level", [0, 1])
+@pytest.mark.parametrize("col_fill", ["", "some_lv"])
+def test_reset_index_named(drop, inplace, col_level, col_fill):
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    pdf.index.name = "cudf"
+    gdf.index.name = "cudf"
+
+    expect = pdf.reset_index(
+        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
+    )
+    got = gdf.reset_index(
+        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
+    )
+    if inplace:
+        expect = pdf
+        got = gdf
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("column_names", [["x", "y"], ["index", "y"]])
+@pytest.mark.parametrize("col_level", [0, 1])
+@pytest.mark.parametrize("col_fill", ["", "some_lv"])
+def test_reset_index_unnamed(drop, inplace, column_names, col_level, col_fill):
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    pdf.columns = column_names
+    gdf.columns = column_names
+
+    expect = pdf.reset_index(
+        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
+    )
+    got = gdf.reset_index(
+        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
+    )
+    if inplace:
+        expect = pdf
+        got = gdf
+    assert_eq(expect, got)
+
+
+def test_reset_index_invalid_level():
+    with pytest.raises(IndexError):
+        cudf.DataFrame([1]).reset_index(level=2)
+
+    with pytest.raises(IndexError):
+        pd.DataFrame([1]).reset_index(level=2)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_round.py b/python/cudf/cudf/tests/dataframe/methods/test_round.py
new file mode 100644
index 00000000000..6c4781236c9
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_round.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "decimals",
+    [
+        -3,
+        0,
+        5,
+        pd.Series(
+            [1, 4, 3, -6],
+            index=["floats", "ints", "floats_with_nan", "floats_same"],
+        ),
+        cudf.Series(
+            [-4, -2, 12], index=["ints", "floats_with_nan", "floats_same"]
+        ),
+        {"floats": -1, "ints": 15, "floats_will_nan": 2},
+    ],
+)
+def test_dataframe_round(decimals):
+    rng = np.random.default_rng(seed=0)
+    gdf = cudf.DataFrame(
+        {
+            "floats": np.arange(0.5, 10.5, 1),
+            "ints": rng.normal(-100, 100, 10),
+            "floats_with_na": np.array(
+                [
+                    14.123,
+                    2.343,
+                    np.nan,
+                    0.0,
+                    -8.302,
+                    np.nan,
+                    94.313,
+                    None,
+                    -8.029,
+                    np.nan,
+                ]
+            ),
+            "floats_same": np.repeat([-0.6459412758761901], 10),
+            "bools": rng.choice([True, None, False], 10),
+            "strings": rng.choice(["abc", "xyz", None], 10),
+            "struct": rng.choice([{"abc": 1}, {"xyz": 2}, None], 10),
+            "list": [[1], [2], None, [4], [3]] * 2,
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    if isinstance(decimals, cudf.Series):
+        pdecimals = decimals.to_pandas()
+    else:
+        pdecimals = decimals
+
+    result = gdf.round(decimals)
+    expected = pdf.round(pdecimals)
+
+    assert_eq(result, expected)
+
+
+def test_dataframe_round_dict_decimal_validation():
+    df = cudf.DataFrame({"A": [0.12], "B": [0.13]})
+    with pytest.raises(TypeError):
+        df.round({"A": 1, "B": 0.5})
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_select_dtypes.py b/python/cudf/cudf/tests/dataframe/methods/test_select_dtypes.py
new file mode 100644
index 00000000000..70a8ce0ed49
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_select_dtypes.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_select_dtype():
+    gdf = cudf.datasets.randomdata(
+        nrows=20, dtypes={"a": "category", "b": int, "c": float, "d": str}
+    )
+    pdf = gdf.to_pandas()
+
+    assert_eq(pdf.select_dtypes("float64"), gdf.select_dtypes("float64"))
+    assert_eq(pdf.select_dtypes(np.float64), gdf.select_dtypes(np.float64))
+    assert_eq(
+        pdf.select_dtypes(include=["float64"]),
+        gdf.select_dtypes(include=["float64"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["object", "int", "category"]),
+        gdf.select_dtypes(include=["object", "int", "category"]),
+    )
+
+    assert_eq(
+        pdf.select_dtypes(include=["int64", "float64"]),
+        gdf.select_dtypes(include=["int64", "float64"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=np.number),
+        gdf.select_dtypes(include=np.number),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=[np.int64, np.float64]),
+        gdf.select_dtypes(include=[np.int64, np.float64]),
+    )
+
+    assert_eq(
+        pdf.select_dtypes(include=["category"]),
+        gdf.select_dtypes(include=["category"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(exclude=np.number),
+        gdf.select_dtypes(exclude=np.number),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.select_dtypes,
+        rfunc=gdf.select_dtypes,
+        lfunc_args_and_kwargs=([], {"includes": ["Foo"]}),
+        rfunc_args_and_kwargs=([], {"includes": ["Foo"]}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.select_dtypes,
+        rfunc=gdf.select_dtypes,
+        lfunc_args_and_kwargs=(
+            [],
+            {"exclude": np.number, "include": np.number},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"exclude": np.number, "include": np.number},
+        ),
+    )
+
+    gdf = cudf.DataFrame(
+        {"A": [3, 4, 5], "C": [1, 2, 3], "D": ["a", "b", "c"]}
+    )
+    pdf = gdf.to_pandas()
+    assert_eq(
+        pdf.select_dtypes(include=["object", "int", "category"]),
+        gdf.select_dtypes(include=["object", "int", "category"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["object"], exclude=["category"]),
+        gdf.select_dtypes(include=["object"], exclude=["category"]),
+    )
+
+    gdf = cudf.DataFrame({"a": range(10), "b": range(10, 20)})
+    pdf = gdf.to_pandas()
+    assert_eq(
+        pdf.select_dtypes(include=["category"]),
+        gdf.select_dtypes(include=["category"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["float"]),
+        gdf.select_dtypes(include=["float"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["object"]),
+        gdf.select_dtypes(include=["object"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["int"]), gdf.select_dtypes(include=["int"])
+    )
+    assert_eq(
+        pdf.select_dtypes(exclude=["float"]),
+        gdf.select_dtypes(exclude=["float"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(exclude=["object"]),
+        gdf.select_dtypes(exclude=["object"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["int"], exclude=["object"]),
+        gdf.select_dtypes(include=["int"], exclude=["object"]),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.select_dtypes,
+        rfunc=gdf.select_dtypes,
+    )
+
+    gdf = cudf.DataFrame(
+        {"a": cudf.Series([], dtype="int"), "b": cudf.Series([], dtype="str")}
+    )
+    pdf = gdf.to_pandas()
+    assert_eq(
+        pdf.select_dtypes(exclude=["object"]),
+        gdf.select_dtypes(exclude=["object"]),
+    )
+    assert_eq(
+        pdf.select_dtypes(include=["int"], exclude=["object"]),
+        gdf.select_dtypes(include=["int"], exclude=["object"]),
+    )
+
+    gdf = cudf.DataFrame(
+        {"int_col": [0, 1, 2], "list_col": [[1, 2], [3, 4], [5, 6]]}
+    )
+    pdf = gdf.to_pandas()
+    assert_eq(
+        pdf.select_dtypes("int64"),
+        gdf.select_dtypes("int64"),
+    )
+
+
+def test_select_dtype_datetime():
+    gdf = cudf.datasets.timeseries(
+        start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={"x": int}
+    )
+    gdf = gdf.reset_index()
+    pdf = gdf.to_pandas()
+
+    assert_eq(pdf.select_dtypes("datetime64"), gdf.select_dtypes("datetime64"))
+    assert_eq(
+        pdf.select_dtypes(np.dtype("datetime64")),
+        gdf.select_dtypes(np.dtype("datetime64")),
+    )
+    assert_eq(
+        pdf.select_dtypes(include="datetime64"),
+        gdf.select_dtypes(include="datetime64"),
+    )
+
+
+def test_select_dtype_datetime_with_frequency():
+    gdf = cudf.datasets.timeseries(
+        start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={"x": int}
+    )
+    gdf = gdf.reset_index()
+    pdf = gdf.to_pandas()
+
+    assert_exceptions_equal(
+        pdf.select_dtypes,
+        gdf.select_dtypes,
+        (["datetime64[ms]"],),
+        (["datetime64[ms]"],),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_set_index.py b/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
new file mode 100644
index 00000000000..e1a99df4119
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        "a",
+        ["a", "b"],
+        pd.CategoricalIndex(["I", "II", "III", "IV", "V"]),
+        pd.Series(["h", "i", "k", "l", "m"]),
+        ["b", pd.Index(["I", "II", "III", "IV", "V"])],
+        ["c", [11, 12, 13, 14, 15]],
+        pd.MultiIndex(
+            levels=[
+                ["I", "II", "III", "IV", "V"],
+                ["one", "two", "three", "four", "five"],
+            ],
+            codes=[[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]],
+            names=["col1", "col2"],
+        ),
+        pd.RangeIndex(0, 5),  # corner case
+        [pd.Series(["h", "i", "k", "l", "m"]), pd.RangeIndex(0, 5)],
+        [
+            pd.MultiIndex(
+                levels=[
+                    ["I", "II", "III", "IV", "V"],
+                    ["one", "two", "three", "four", "five"],
+                ],
+                codes=[[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]],
+                names=["col1", "col2"],
+            ),
+            pd.RangeIndex(0, 5),
+        ],
+    ],
+)
+@pytest.mark.parametrize("append", [True, False])
+def test_set_index(index, drop, append, inplace):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "b", "c", "d", "e"],
+            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    expected = pdf.set_index(index, inplace=inplace, drop=drop, append=append)
+    actual = gdf.set_index(index, inplace=inplace, drop=drop, append=append)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])])
+def test_set_index_verify_integrity(index):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 2, 2, 5],
+            "b": ["a", "b", "c", "d", "e"],
+            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    )
+    with pytest.raises(ValueError):
+        gdf.set_index(index, verify_integrity=True)
+
+
+def test_set_index_multi(drop):
+    nelem = 10
+    rng = np.random.default_rng(seed=0)
+    a = np.arange(nelem)
+    rng.shuffle(a)
+    df = pd.DataFrame(
+        {
+            "a": a,
+            "b": rng.integers(0, 4, size=nelem),
+            "c": rng.uniform(low=0, high=4, size=nelem),
+            "d": rng.choice(["green", "black", "white"], nelem),
+        }
+    )
+    df["e"] = df["d"].astype("category")
+    gdf = cudf.DataFrame.from_pandas(df)
+
+    assert_eq(gdf.set_index("a", drop=drop), gdf.set_index(["a"], drop=drop))
+    assert_eq(
+        df.set_index(["b", "c"], drop=drop),
+        gdf.set_index(["b", "c"], drop=drop),
+    )
+    assert_eq(
+        df.set_index(["d", "b"], drop=drop),
+        gdf.set_index(["d", "b"], drop=drop),
+    )
+    assert_eq(
+        df.set_index(["b", "d", "e"], drop=drop),
+        gdf.set_index(["b", "d", "e"], drop=drop),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py b/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
new file mode 100644
index 00000000000..905c4622ade
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+
+
+def test_dataframe_empty_sort_index():
+    pdf = pd.DataFrame({"x": []})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    expect = pdf.sort_index()
+    got = gdf.sort_index()
+
+    assert_eq(expect, got, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(0, 3, 1),
+        [3.0, 1.0, np.nan],
+        # Test for single column MultiIndex
+        pd.MultiIndex.from_arrays(
+            [
+                [2, 0, 1],
+            ]
+        ),
+        pd.RangeIndex(2, -1, -1),
+    ],
+)
+@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_dataframe_sort_index(
+    request, index, axis, ascending, inplace, ignore_index, na_position
+):
+    if not PANDAS_GE_220 and axis in (1, "columns") and ignore_index:
+        pytest.skip(reason="Bug fixed in pandas-2.2")
+
+    pdf = pd.DataFrame(
+        {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
+        index=index,
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    expected = pdf.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+    got = gdf.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+
+    if inplace is True:
+        assert_eq(pdf, gdf, check_index_type=True)
+    else:
+        assert_eq(expected, got, check_index_type=True)
+
+
+@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
+@pytest.mark.parametrize(
+    "level",
+    [
+        0,
+        "b",
+        1,
+        ["b"],
+        "a",
+        ["a", "b"],
+        ["b", "a"],
+        [0, 1],
+        [1, 0],
+        [0, 2],
+        None,
+    ],
+)
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_dataframe_mulitindex_sort_index(
+    request, axis, level, ascending, inplace, ignore_index, na_position
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=axis in (1, "columns")
+            and level is None
+            and not ascending
+            and ignore_index,
+            reason="https://github.com/pandas-dev/pandas/issues/57293",
+        )
+    )
+    pdf = pd.DataFrame(
+        {
+            "b": [1.0, 3.0, np.nan],
+            "a": [1, 4, 3],
+            1: ["a", "b", "c"],
+            "e": [3, 1, 4],
+            "d": [1, 2, 8],
+        }
+    ).set_index(["b", "a", 1])
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    expected = pdf.sort_index(
+        axis=axis,
+        level=level,
+        ascending=ascending,
+        inplace=inplace,
+        na_position=na_position,
+        ignore_index=ignore_index,
+    )
+    got = gdf.sort_index(
+        axis=axis,
+        level=level,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+
+    if inplace is True:
+        assert_eq(pdf, gdf)
+    else:
+        assert_eq(expected, got)
+
+
+def test_sort_index_axis_1_ignore_index_true_columnaccessor_state_names():
+    gdf = cudf.DataFrame([[1, 2, 3]], columns=["b", "a", "c"])
+    result = gdf.sort_index(axis=1, ignore_index=True)
+    assert result._data.names == tuple(result._data.keys())
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_swaplevel.py b/python/cudf/cudf/tests/dataframe/methods/test_swaplevel.py
new file mode 100644
index 00000000000..5b1a4b6c092
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_swaplevel.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_dataframe_swaplevel_axis_0():
+    midx = cudf.MultiIndex(
+        levels=[
+            ["Work"],
+            ["Final exam", "Coursework"],
+            ["History", "Geography"],
+            ["January", "February", "March", "April"],
+        ],
+        codes=[[0, 0, 0, 0], [0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 3]],
+        names=["a", "b", "c", "d"],
+    )
+    cdf = cudf.DataFrame(
+        {
+            "Grade": ["A", "B", "A", "C"],
+            "Percentage": ["95", "85", "95", "75"],
+        },
+        index=midx,
+    )
+    pdf = cdf.to_pandas()
+
+    assert_eq(pdf.swaplevel(), cdf.swaplevel())
+    assert_eq(pdf.swaplevel(), cdf.swaplevel(-2, -1, 0))
+    assert_eq(pdf.swaplevel(1, 2), cdf.swaplevel(1, 2))
+    assert_eq(cdf.swaplevel(2, 1), cdf.swaplevel(1, 2))
+    assert_eq(pdf.swaplevel(-1, -3), cdf.swaplevel(-1, -3))
+    assert_eq(pdf.swaplevel("a", "b", 0), cdf.swaplevel("a", "b", 0))
+    assert_eq(cdf.swaplevel("a", "b"), cdf.swaplevel("b", "a"))
+
+
+def test_dataframe_swaplevel_TypeError():
+    cdf = cudf.DataFrame(
+        {"a": [1, 2, 3], "c": [10, 20, 30]}, index=["x", "y", "z"]
+    )
+
+    with pytest.raises(TypeError):
+        cdf.swaplevel()
+
+
+def test_dataframe_swaplevel_axis_1():
+    midx = cudf.MultiIndex(
+        levels=[
+            ["b", "a"],
+            ["bb", "aa"],
+            ["bbb", "aaa"],
+        ],
+        codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 0, 1]],
+        names=[None, "a", "b"],
+    )
+    cdf = cudf.DataFrame(
+        data=[[45, 30, 100, 90], [200, 100, 50, 80]],
+        columns=midx,
+    )
+    pdf = cdf.to_pandas()
+
+    assert_eq(pdf.swaplevel(1, 2, 1), cdf.swaplevel(1, 2, 1))
+    assert_eq(pdf.swaplevel("a", "b", 1), cdf.swaplevel("a", "b", 1))
+    assert_eq(cdf.swaplevel(2, 1, 1), cdf.swaplevel(1, 2, 1))
+    assert_eq(pdf.swaplevel(0, 2, 1), cdf.swaplevel(0, 2, 1))
+    assert_eq(pdf.swaplevel(2, 0, 1), cdf.swaplevel(2, 0, 1))
+    assert_eq(cdf.swaplevel("a", "a", 1), cdf.swaplevel("b", "b", 1))
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
index c033efed0b0..2d3174a8225 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
@@ -8,7 +9,11 @@
 from cudf.testing import assert_eq
 
 
-@pytest.mark.parametrize("preserve_index", [False, True, None])
+@pytest.fixture(params=[False, True, None])
+def preserve_index(request):
+    return request.param
+
+
 def test_dataframe_to_arrow_preserve_index(preserve_index):
     df = cudf.DataFrame({"x": ["cat", "dog"] * 5})
     pf = df.to_pandas()
@@ -23,3 +28,112 @@ def test_datetime_to_arrow(datetime_types_as_str):
     assert_eq(
         gdf, cudf.DataFrame.from_arrow(gdf.to_arrow(preserve_index=False))
     )
+
+
+def test_arrow_pandas_compat(preserve_index):
+    data = {"a": range(10), "b": range(10)}
+    pdf = pd.DataFrame(data, index=pd.Index(np.arange(10), name="z"))
+    gdf = cudf.DataFrame(data, index=cudf.Index(np.arange(10), name="z"))
+
+    pdf_arrow_table = pa.Table.from_pandas(pdf, preserve_index=preserve_index)
+    gdf_arrow_table = gdf.to_arrow(preserve_index=preserve_index)
+
+    assert pa.Table.equals(pdf_arrow_table, gdf_arrow_table)
+
+    gdf2 = cudf.DataFrame.from_arrow(pdf_arrow_table)
+    pdf2 = pdf_arrow_table.to_pandas()
+
+    assert_eq(pdf2, gdf2)
+    pdf.columns.name = "abc"
+    pdf_arrow_table = pa.Table.from_pandas(pdf, preserve_index=preserve_index)
+
+    gdf2 = cudf.DataFrame.from_arrow(pdf_arrow_table)
+    pdf2 = pdf_arrow_table.to_pandas()
+    assert_eq(pdf2, gdf2)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        None,
+        cudf.RangeIndex(3, name="a"),
+        "a",
+        "b",
+        ["a", "b"],
+        cudf.RangeIndex(0, 5, 2, name="a"),
+    ],
+)
+def test_arrow_round_trip(preserve_index, index):
+    data = {"a": [4, 5, 6], "b": ["cat", "dog", "bird"]}
+    if isinstance(index, (list, str)):
+        gdf = cudf.DataFrame(data).set_index(index)
+    else:
+        gdf = cudf.DataFrame(data, index=index)
+
+    table = gdf.to_arrow(preserve_index=preserve_index)
+    table_pd = pa.Table.from_pandas(
+        gdf.to_pandas(), preserve_index=preserve_index
+    )
+
+    gdf_out = cudf.DataFrame.from_arrow(table)
+    pdf_out = table_pd.to_pandas()
+
+    assert_eq(gdf_out, pdf_out)
+
+
+@pytest.mark.parametrize("nelem", [0, 2])
+def test_to_arrow(nelem, all_supported_types_as_str):
+    if all_supported_types_as_str in {"category", "str"}:
+        pytest.skip(f"Test not applicable with {all_supported_types_as_str}")
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        {
+            "a": rng.integers(0, 1000, nelem).astype(
+                all_supported_types_as_str
+            ),
+            "b": rng.integers(0, 1000, nelem).astype(
+                all_supported_types_as_str
+            ),
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(df)
+
+    pa_df = pa.Table.from_pandas(
+        df, preserve_index=False
+    ).replace_schema_metadata(None)
+
+    pa_gdf = gdf.to_arrow(preserve_index=False).replace_schema_metadata(None)
+
+    assert isinstance(pa_gdf, pa.Table)
+    assert pa.Table.equals(pa_df, pa_gdf)
+
+    pa_s = pa.Array.from_pandas(df.a)
+    pa_gs = gdf["a"].to_arrow()
+
+    assert isinstance(pa_gs, pa.Array)
+    assert pa.Array.equals(pa_s, pa_gs)
+
+    pa_i = pa.Array.from_pandas(df.index)
+    pa_gi = gdf.index.to_arrow()
+
+    assert isinstance(pa_gi, pa.Array)
+    assert pa.Array.equals(pa_i, pa_gi)
+
+
+def test_to_arrow_categorical():
+    df = pd.DataFrame({"a": pd.Series(["a", "b", "c"], dtype="category")})
+    gdf = cudf.DataFrame.from_pandas(df)
+
+    pa_df = pa.Table.from_pandas(
+        df, preserve_index=False
+    ).replace_schema_metadata(None)
+    pa_gdf = gdf.to_arrow(preserve_index=False).replace_schema_metadata(None)
+
+    assert isinstance(pa_gdf, pa.Table)
+    assert pa.Table.equals(pa_df, pa_gdf)
+
+    pa_s = pa.Array.from_pandas(df.a)
+    pa_gs = gdf["a"].to_arrow()
+
+    assert isinstance(pa_gs, pa.Array)
+    assert pa.Array.equals(pa_s, pa_gs)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
new file mode 100644
index 00000000000..2d47bd17840
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import expand_bits_to_bytes, random_bitmask
+
+
+def test_empty_dataframe_to_cupy():
+    df = cudf.DataFrame()
+
+    # Check fully empty dataframe.
+    mat = df.to_cupy()
+    assert mat.shape == (0, 0)
+    mat = df.to_numpy()
+    assert mat.shape == (0, 0)
+
+    rng = np.random.default_rng(seed=0)
+    df = cudf.DataFrame(
+        {
+            "a": rng.random(10),
+            "b": rng.random(10),
+            "c": rng.random(10),
+        }
+    )
+
+    # Check all columns in empty dataframe.
+    mat = df.head(0).to_cupy()
+    assert mat.shape == (0, 3)
+
+
+def test_dataframe_to_cupy():
+    nelem = 123
+    rng = np.random.default_rng(seed=0)
+    df = cudf.DataFrame(
+        {
+            "a": rng.random(nelem),
+            "b": rng.random(nelem),
+            "c": rng.random(nelem),
+            "d": rng.random(nelem),
+        }
+    )
+
+    # Check all columns
+    mat = df.to_cupy()
+    assert mat.shape == (nelem, 4)
+    assert mat.strides == (8, 984)
+
+    mat = df.to_numpy()
+    assert mat.shape == (nelem, 4)
+    assert mat.strides == (8, 984)
+    for i, k in enumerate(df.columns):
+        np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
+
+    # Check column subset
+    mat = df[["a", "c"]].to_cupy().get()
+    assert mat.shape == (nelem, 2)
+
+    for i, k in enumerate("ac"):
+        np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
+
+
+@pytest.mark.parametrize("has_nulls", [False, True])
+@pytest.mark.parametrize("use_na_value", [False, True])
+def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
+    nelem = 10
+    data = np.arange(nelem, dtype=np.float64)
+
+    if has_nulls:
+        data = data.astype("object")
+        data[::2] = None
+
+    df = cudf.DataFrame({"a": data})
+
+    if has_nulls and not use_na_value:
+        with pytest.raises(ValueError, match="Column must have no nulls"):
+            df.to_cupy()
+        return
+
+    na_value = 0.0 if use_na_value else None
+    expected = (
+        cp.asarray(df["a"].fillna(na_value))
+        if has_nulls
+        else cp.asarray(df["a"])
+    )
+    result = df.to_cupy(na_value=na_value)
+    assert result.shape == (nelem, 1)
+    assert_eq(result.ravel(), expected)
+
+
+def test_dataframe_to_cupy_null_values():
+    df = cudf.DataFrame()
+
+    nelem = 123
+    na = -10000
+
+    refvalues = {}
+    rng = np.random.default_rng(seed=0)
+    for k in "abcd":
+        df[k] = data = rng.random(nelem)
+        bitmask = random_bitmask(nelem)
+        df[k] = df[k]._column.set_mask(bitmask)
+        boolmask = np.asarray(
+            expand_bits_to_bytes(bitmask)[:nelem], dtype=np.bool_
+        )
+        data[~boolmask] = na
+        refvalues[k] = data
+
+    # Check null value causes error
+    with pytest.raises(ValueError):
+        df.to_cupy()
+    with pytest.raises(ValueError):
+        df.to_numpy()
+
+    for k in df.columns:
+        df[k] = df[k].fillna(na)
+
+    mat = df.to_numpy()
+    for i, k in enumerate(df.columns):
+        np.testing.assert_array_equal(refvalues[k], mat[:, i])
+
+
+@pytest.mark.parametrize("method", ["to_cupy", "to_numpy"])
+@pytest.mark.parametrize("value", [1, True, 1.5])
+@pytest.mark.parametrize("constructor", ["DataFrame", "Series"])
+def test_to_array_categorical(method, value, constructor):
+    data = [value]
+    expected = getattr(pd, constructor)(data, dtype="category").to_numpy()
+    result = getattr(
+        getattr(cudf, constructor)(data, dtype="category"), method
+    )()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
index bb64e6d4896..96ef5deb49a 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
@@ -1,7 +1,9 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import string
 
-
+import numpy as np
 import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -14,3 +16,84 @@ def test_multiindex_transpose():
     )
     gdf = cudf.from_pandas(pdf)
     assert_eq(pdf.transpose(), gdf.transpose())
+
+
+@pytest.mark.parametrize("num_cols", [1, 3])
+@pytest.mark.parametrize("num_rows", [1, 4])
+@pytest.mark.parametrize("nulls", ["none", "some", "all"])
+def test_dataframe_transpose(
+    nulls, num_cols, num_rows, all_supported_types_as_str
+):
+    # In case of `bool` dtype: pandas <= 1.2.5 type-casts
+    # a boolean series to `float64` series if a `np.nan` is assigned to it:
+    # >>> s = pd.Series([True, False, True])
+    # >>> s
+    # 0     True
+    # 1    False
+    # 2     True
+    # dtype: bool
+    # >>> s[[2]] = np.nan
+    # >>> s
+    # 0    1.0
+    # 1    0.0
+    # 2    NaN
+    # dtype: float64
+    # In pandas >= 1.3.2 this behavior is fixed:
+    # >>> s = pd.Series([True, False, True])
+    # >>> s
+    # 0
+    # True
+    # 1
+    # False
+    # 2
+    # True
+    # dtype: bool
+    # >>> s[[2]] = np.nan
+    # >>> s
+    # 0
+    # True
+    # 1
+    # False
+    # 2
+    # NaN
+    # dtype: object
+    # In cudf we change `object` dtype to `str` type - for which there
+    # is no transpose implemented yet. Hence we need to test transpose
+    # against pandas nullable types as they are the ones that closely
+    # resemble `cudf` dtypes behavior.
+    if all_supported_types_as_str in {"category", "str"}:
+        pytest.skip(f"Test not applicable with {all_supported_types_as_str}")
+    pdf = pd.DataFrame()
+    rng = np.random.default_rng(seed=0)
+    null_rep = (
+        np.nan
+        if all_supported_types_as_str in ["float32", "float64"]
+        else None
+    )
+    np_dtype = all_supported_types_as_str
+    dtype = np.dtype(all_supported_types_as_str)
+    dtype = cudf.utils.dtypes.np_dtypes_to_pandas_dtypes.get(dtype, dtype)
+    for i in range(num_cols):
+        colname = string.ascii_lowercase[i]
+        data = pd.Series(
+            rng.integers(0, 26, num_rows).astype(np_dtype),
+            dtype=dtype,
+        )
+        if nulls == "some":
+            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
+            if len(idx):
+                data[idx] = null_rep
+        elif nulls == "all":
+            data[:] = null_rep
+        pdf[colname] = data
+
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    got_function = gdf.transpose()
+    got_property = gdf.T
+
+    expect = pdf.transpose()
+    nullable = dtype.kind not in "Mm"
+
+    assert_eq(expect, got_function.to_pandas(nullable=nullable))
+    assert_eq(expect, got_property.to_pandas(nullable=nullable))
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_truncate.py b/python/cudf/cudf/tests/dataframe/methods/test_truncate.py
new file mode 100644
index 00000000000..640a26fe562
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_truncate.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_dataframe_truncate_axis_0():
+    df = cudf.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e"],
+            "B": ["f", "g", "h", "i", "j"],
+            "C": ["k", "l", "m", "n", "o"],
+        },
+        index=[1, 2, 3, 4, 5],
+    )
+    pdf = df.to_pandas()
+
+    expected = pdf.truncate(before=2, after=4, axis="index")
+    actual = df.truncate(before=2, after=4, axis="index")
+    assert_eq(actual, expected)
+
+    expected = pdf.truncate(before=1, after=4, axis=0)
+    actual = df.truncate(before=1, after=4, axis=0)
+    assert_eq(expected, actual)
+
+
+def test_dataframe_truncate_axis_1():
+    df = cudf.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e"],
+            "B": ["f", "g", "h", "i", "j"],
+            "C": ["k", "l", "m", "n", "o"],
+        },
+        index=[1, 2, 3, 4, 5],
+    )
+    pdf = df.to_pandas()
+
+    expected = pdf.truncate(before="A", after="B", axis="columns")
+    actual = df.truncate(before="A", after="B", axis="columns")
+    assert_eq(actual, expected)
+
+    expected = pdf.truncate(before="A", after="B", axis=1)
+    actual = df.truncate(before="A", after="B", axis=1)
+    assert_eq(actual, expected)
+
+
+def test_dataframe_truncate_datetimeindex():
+    dates = cudf.date_range(
+        "2021-01-01 23:45:00", "2021-01-01 23:46:00", freq="s"
+    )
+    df = cudf.DataFrame(data={"A": 1, "B": 2}, index=dates)
+    pdf = df.to_pandas()
+    expected = pdf.truncate(
+        before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+    )
+    actual = df.truncate(
+        before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+    )
+
+    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
new file mode 100644
index 00000000000..afe2dbc10c4
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2]},
+        {"a": [1, 2, 3], "b": [3, 4, 5]},
+        {"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [1, 3, 5, 7]},
+        {"a": [np.nan, 2, 3, 4], "b": [3, 4, np.nan, 6], "c": [1, 3, 5, 7]},
+        {1: [1, 2, 3], 2: [3, 4, 5]},
+        {"a": [1, None, None], "b": [3, np.nan, np.nan]},
+        {1: ["a", "b", "c"], 2: ["q", "w", "u"]},
+        {1: ["a", np.nan, "c"], 2: ["q", None, "u"]},
+        {},
+        {1: [], 2: [], 3: []},
+        [1, 2, 3],
+    ],
+)
+def test_axes(data):
+    csr = cudf.DataFrame(data)
+    psr = pd.DataFrame(data)
+
+    expected = psr.axes
+    actual = csr.axes
+
+    for e, a in zip(expected, actual, strict=True):
+        assert_eq(e, a, exact=False)
+
+
+def test_iter():
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    pdf = pd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
+    assert list(pdf) == list(gdf)
+
+
+def test_column_assignment():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    new_cols = ["q", "r", "s"]
+    gdf.columns = new_cols
+    assert list(gdf.columns) == new_cols
+
+
+def test_ndim():
+    pdf = pd.DataFrame({"x": range(5), "y": range(5, 10)})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    assert pdf.ndim == gdf.ndim
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index 6d45334717f..59463b812a0 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -1,12 +1,408 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+from contextlib import nullcontext as does_not_raise
+
+import cupy as cp
+import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
 from cudf.testing import assert_eq
 
 
+def test_init_via_list_of_tuples():
+    data = [
+        (5, "cats", "jump", np.nan),
+        (2, "dogs", "dig", 7.5),
+        (3, "cows", "moo", -2.1, "occasionally"),
+    ]
+
+    pdf = pd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize("columns", [["a", "b"], pd.Series(["a", "b"])])
+def test_init_via_list_of_series(columns):
+    data = [pd.Series([1, 2]), pd.Series([3, 4])]
+
+    pdf = cudf.DataFrame(data, columns=columns)
+    gdf = cudf.DataFrame(data, columns=columns)
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize("index", [None, [0, 1, 2]])
+def test_init_with_missing_columns(index):
+    """Test initialization when columns and data keys are disjoint."""
+    data = {"a": [1, 2, 3], "b": [2, 3, 4]}
+    columns = ["c", "d"]
+
+    pdf = cudf.DataFrame(data, columns=columns, index=index)
+    gdf = cudf.DataFrame(data, columns=columns, index=index)
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize("rows", [0, 1, 2, 100])
+def test_init_via_list_of_empty_tuples(rows):
+    data = [()] * rows
+
+    pdf = pd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "dict_of_series",
+    [
+        {"a": pd.Series([1.0, 2.0, 3.0])},
+        {"a": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6])},
+        {
+            "a": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
+            "b": pd.Series([1.0, 2.0, 4.0], index=[1, 2, 3]),
+        },
+        {"a": [1, 2, 3], "b": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6])},
+        {
+            "a": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
+            "b": pd.Series([1.0, 2.0, 4.0], index=["c", "d", "e"]),
+        },
+        {
+            "a": pd.Series(
+                ["a", "b", "c"],
+                index=pd.MultiIndex.from_tuples([(1, 2), (1, 3), (2, 3)]),
+            ),
+            "b": pd.Series(
+                ["a", " b", "d"],
+                index=pd.MultiIndex.from_tuples([(1, 2), (1, 3), (2, 3)]),
+            ),
+        },
+    ],
+)
+def test_init_from_series_align(dict_of_series):
+    pdf = pd.DataFrame(dict_of_series)
+    gdf = cudf.DataFrame(dict_of_series)
+
+    assert_eq(pdf, gdf)
+
+    for key in dict_of_series:
+        if isinstance(dict_of_series[key], pd.Series):
+            dict_of_series[key] = cudf.Series(dict_of_series[key])
+
+    gdf = cudf.DataFrame(dict_of_series)
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    ("dict_of_series", "expectation"),
+    [
+        (
+            {
+                "a": pd.Series(["a", "b", "c"], index=[4, 4, 5]),
+                "b": pd.Series(["a", "b", "c"], index=[4, 5, 6]),
+            },
+            pytest.raises(
+                ValueError, match="Cannot align indices with non-unique values"
+            ),
+        ),
+        (
+            {
+                "a": pd.Series(["a", "b", "c"], index=[4, 4, 5]),
+                "b": pd.Series(["a", "b", "c"], index=[4, 4, 5]),
+            },
+            does_not_raise(),
+        ),
+    ],
+)
+def test_init_from_series_align_nonunique(dict_of_series, expectation):
+    with expectation:
+        gdf = cudf.DataFrame(dict_of_series)
+
+    if expectation == does_not_raise():
+        pdf = pd.DataFrame(dict_of_series)
+        assert_eq(pdf, gdf)
+
+
+def test_init_unaligned_with_index():
+    pdf = pd.DataFrame(
+        {
+            "a": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
+            "b": pd.Series([1.0, 2.0, 3.0], index=[1, 2, 3]),
+        },
+        index=[7, 8, 9],
+    )
+    gdf = cudf.DataFrame(
+        {
+            "a": cudf.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
+            "b": cudf.Series([1.0, 2.0, 3.0], index=[1, 2, 3]),
+        },
+        index=[7, 8, 9],
+    )
+
+    assert_eq(pdf, gdf, check_dtype=False)
+
+
+def test_init_series_list_columns_unsort():
+    pseries = [
+        pd.Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)
+    ]
+    gseries = [
+        cudf.Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)
+    ]
+    pdf = pd.DataFrame(pseries)
+    gdf = cudf.DataFrame(gseries)
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize("nelem", [0, 10])
+@pytest.mark.parametrize("nchunks", [1, 5])
+def test_from_arrow_chunked_arrays(nelem, nchunks, numeric_types_as_str):
+    rng = np.random.default_rng(seed=0)
+    np_list_data = [
+        rng.integers(0, 100, nelem).astype(numeric_types_as_str)
+        for i in range(nchunks)
+    ]
+    pa_chunk_array = pa.chunked_array(np_list_data)
+
+    expect = pa_chunk_array.to_pandas()
+    got = cudf.Series(pa_chunk_array)
+
+    assert_eq(expect, got)
+
+    np_list_data2 = [
+        rng.integers(0, 100, nelem).astype(numeric_types_as_str)
+        for i in range(nchunks)
+    ]
+    pa_chunk_array2 = pa.chunked_array(np_list_data2)
+    pa_table = pa.Table.from_arrays(
+        [pa_chunk_array, pa_chunk_array2], names=["a", "b"]
+    )
+
+    expect = pa_table.to_pandas()
+    got = cudf.DataFrame.from_arrow(pa_table)
+
+    assert_eq(expect, got)
+
+
+def test_1row_arrow_table():
+    data = [pa.array([0]), pa.array([1])]
+    batch = pa.RecordBatch.from_arrays(data, ["f0", "f1"])
+    table = pa.Table.from_batches([batch])
+
+    expect = table.to_pandas()
+    got = cudf.DataFrame.from_arrow(table)
+    assert_eq(expect, got)
+
+
+def test_arrow_handle_no_index_name():
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    gdf_arrow = gdf.to_arrow()
+    pdf_arrow = pa.Table.from_pandas(pdf)
+    assert pa.Table.equals(pdf_arrow, gdf_arrow)
+
+    got = cudf.DataFrame.from_arrow(gdf_arrow)
+    expect = pdf_arrow.to_pandas()
+    assert_eq(expect, got)
+
+
+def test_pandas_non_contiguious():
+    rng = np.random.default_rng(seed=0)
+    arr1 = rng.random(size=(5000, 10))
+    assert arr1.flags["C_CONTIGUOUS"] is True
+    df = pd.DataFrame(arr1)
+    for col in df.columns:
+        assert df[col].values.flags["C_CONTIGUOUS"] is False
+
+    gdf = cudf.DataFrame.from_pandas(df)
+    assert_eq(gdf.to_pandas(), df)
+
+
+def test_from_records(numeric_types_as_str):
+    h_ary = np.ndarray(shape=(10, 4), dtype=numeric_types_as_str)
+    rec_ary = h_ary.view(np.recarray)
+
+    gdf = cudf.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
+    df = pd.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
+    assert isinstance(gdf, cudf.DataFrame)
+    assert_eq(df, gdf)
+
+    gdf = cudf.DataFrame.from_records(rec_ary)
+    df = pd.DataFrame.from_records(rec_ary)
+    assert isinstance(gdf, cudf.DataFrame)
+    assert_eq(df, gdf)
+
+
+@pytest.mark.parametrize("columns", [None, ["first", "second", "third"]])
+@pytest.mark.parametrize(
+    "index",
+    [
+        None,
+        ["first", "second"],
+        "name",
+        "age",
+        "weight",
+        [10, 11],
+        ["abc", "xyz"],
+    ],
+)
+def test_from_records_index(columns, index):
+    rec_ary = np.array(
+        [("Rex", 9, 81.0), ("Fido", 3, 27.0)],
+        dtype=[("name", "U10"), ("age", "i4"), ("weight", "f4")],
+    )
+    gdf = cudf.DataFrame.from_records(rec_ary, columns=columns, index=index)
+    df = pd.DataFrame.from_records(rec_ary, columns=columns, index=index)
+    assert isinstance(gdf, cudf.DataFrame)
+    assert_eq(df, gdf)
+
+
+def test_dataframe_construction_from_cupy_arrays():
+    h_ary = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
+    d_ary = cp.asarray(h_ary)
+
+    gdf = cudf.DataFrame(d_ary, columns=["a", "b", "c"])
+    df = pd.DataFrame(h_ary, columns=["a", "b", "c"])
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf)
+
+    gdf = cudf.DataFrame(d_ary)
+    df = pd.DataFrame(h_ary)
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf)
+
+    gdf = cudf.DataFrame(d_ary, index=["a", "b"])
+    df = pd.DataFrame(h_ary, index=["a", "b"])
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf)
+
+    gdf = cudf.DataFrame(d_ary)
+    gdf = gdf.set_index(keys=0, drop=False)
+    df = pd.DataFrame(h_ary)
+    df = df.set_index(keys=0, drop=False)
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf)
+
+    gdf = cudf.DataFrame(d_ary)
+    gdf = gdf.set_index(keys=1, drop=False)
+    df = pd.DataFrame(h_ary)
+    df = df.set_index(keys=1, drop=False)
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf)
+
+
+def test_dataframe_cupy_wrong_dimensions():
+    d_ary = cp.empty((2, 3, 4), dtype=np.int32)
+    with pytest.raises(
+        ValueError, match="records dimension expected 1 or 2 but found: 3"
+    ):
+        cudf.DataFrame(d_ary)
+
+
+def test_dataframe_cupy_array_wrong_index():
+    d_ary = cp.empty((2, 3), dtype=np.int32)
+
+    with pytest.raises(ValueError):
+        cudf.DataFrame(d_ary, index=["a"])
+
+    with pytest.raises(TypeError):
+        cudf.DataFrame(d_ary, index="a")
+
+
+def test_index_in_dataframe_constructor():
+    a = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
+    b = cudf.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
+
+    assert_eq(a, b)
+    assert_eq(a.loc[4:], b.loc[4:])
+
+
+@pytest.mark.parametrize("nelem", [0, 2])
+def test_from_arrow(nelem, all_supported_types_as_str):
+    if all_supported_types_as_str in {"category", "str"}:
+        pytest.skip(f"Test not applicable with {all_supported_types_as_str}")
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        {
+            "a": rng.integers(0, 1000, nelem).astype(
+                all_supported_types_as_str
+            ),
+            "b": rng.integers(0, 1000, nelem).astype(
+                all_supported_types_as_str
+            ),
+        }
+    )
+    padf = pa.Table.from_pandas(
+        df, preserve_index=False
+    ).replace_schema_metadata(None)
+    gdf = cudf.DataFrame.from_arrow(padf)
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf)
+
+    s = pa.Array.from_pandas(df.a)
+    gs = cudf.Series.from_arrow(s)
+    assert isinstance(gs, cudf.Series)
+
+    # For some reason PyArrow to_pandas() converts to numpy array and has
+    # better type compatibility
+    np.testing.assert_array_equal(s.to_pandas(), gs.to_numpy())
+
+
+def test_from_arrow_chunked_categories():
+    # Verify that categories are properly deduplicated across chunked arrays.
+    indices = pa.array([0, 1, 0, 1, 2, 0, None, 2])
+    dictionary = pa.array(["foo", "bar", "baz"])
+    dict_array = pa.DictionaryArray.from_arrays(indices, dictionary)
+    chunked_array = pa.chunked_array([dict_array, dict_array])
+    table = pa.table({"a": chunked_array})
+    df = cudf.DataFrame.from_arrow(table)
+    final_dictionary = df["a"].dtype.categories.to_arrow().to_pylist()
+    assert sorted(final_dictionary) == sorted(dictionary.to_pylist())
+
+
+def test_from_scalar_typing(request, all_supported_types_as_str):
+    if all_supported_types_as_str in {"category", "str"}:
+        pytest.skip(f"Test not applicable with {all_supported_types_as_str}")
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str
+            in {"timedelta64[ms]", "timedelta64[us]", "timedelta64[ns]"},
+            reason=f"{all_supported_types_as_str} incorrectly results in timedelta64[s]",
+        )
+    )
+    rng = np.random.default_rng(seed=0)
+    if all_supported_types_as_str == "datetime64[ms]":
+        scalar = (
+            np.dtype("int64").type(rng.integers(0, 5)).astype("datetime64[ms]")
+        )
+    elif all_supported_types_as_str.startswith("datetime64"):
+        scalar = np.datetime64("2020-01-01").astype("datetime64[ms]")
+        all_supported_types_as_str = "datetime64[ms]"
+    else:
+        scalar = np.dtype(all_supported_types_as_str).type(rng.integers(0, 5))
+
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": scalar,
+        }
+    )
+    assert gdf["b"].dtype == np.dtype(all_supported_types_as_str)
+    assert len(gdf["b"]) == len(gdf["a"])
+
+
 @pytest.mark.parametrize(
     "data1, data2",
     [(1, 2), (1.0, 2.0), (3, 4.0)],
diff --git a/python/cudf/cudf/tests/general_functions/test_from_pandas.py b/python/cudf/cudf/tests/general_functions/test_from_pandas.py
new file mode 100644
index 00000000000..a7e7172d8d8
--- /dev/null
+++ b/python/cudf/cudf/tests/general_functions/test_from_pandas.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_from_pandas_function():
+    pdf = pd.DataFrame({"x": [1, 2, 3]})
+    gdf = cudf.from_pandas(pdf)
+    assert isinstance(gdf, cudf.DataFrame)
+    assert_eq(pdf, gdf)
+
+    gdf = cudf.from_pandas(pdf.x)
+    assert isinstance(gdf, cudf.Series)
+    assert_eq(pdf.x, gdf)
+
+    with pytest.raises(TypeError):
+        cudf.from_pandas(123)
diff --git a/python/cudf/cudf/tests/series/methods/test_shift.py b/python/cudf/cudf/tests/series/methods/test_shift.py
new file mode 100644
index 00000000000..ac99f879f15
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_shift.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("period", [-15, -1, 0, 1, 15])
+@pytest.mark.parametrize("data_empty", [False, True])
+def test_shift(numeric_types_as_str, period, data_empty):
+    # TODO : this function currently tests for series.shift()
+    # but should instead test for dataframe.shift()
+    if data_empty:
+        data = None
+    else:
+        data = np.arange(10, dtype=numeric_types_as_str)
+
+    gs = cudf.Series(data)
+    ps = pd.Series(data)
+
+    shifted_outcome = gs.shift(period)
+    expected_outcome = ps.shift(period)
+
+    # pandas uses NaNs to signal missing value and force converts the
+    # results columns to float types
+    if data_empty:
+        assert_eq(
+            shifted_outcome,
+            expected_outcome,
+            check_index_type=False,
+            check_dtype=False,
+        )
+    else:
+        assert_eq(shifted_outcome, expected_outcome, check_dtype=False)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_frame.py b/python/cudf/cudf/tests/series/methods/test_to_frame.py
new file mode 100644
index 00000000000..6217dec9a6a
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_frame.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("name", [None, False, "foo"])
+def test_to_frame(name):
+    gser = cudf.Series([1, 2, 3], name=name)
+    pser = pd.Series([1, 2, 3], name=name)
+    assert_eq(gser.to_frame(), pser.to_frame())
diff --git a/python/cudf/cudf/tests/series/methods/test_to_string.py b/python/cudf/cudf/tests/series/methods/test_to_string.py
new file mode 100644
index 00000000000..7604d940844
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_string.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cudf
+
+
+def test_series_init_none():
+    # test for creating empty series
+    # 1: without initializing
+    sr1 = cudf.Series()
+    got = sr1.to_string()
+
+    expect = repr(sr1.to_pandas())
+    assert got == expect
+
+    # 2: Using `None` as an initializer
+    sr2 = cudf.Series(None)
+    got = sr2.to_string()
+
+    expect = repr(sr2.to_pandas())
+    assert got == expect
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index b79b1010cc3..80475505bc4 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -308,3 +308,9 @@ def test_error_values_datetime():
     s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
     with pytest.raises(NotImplementedError, match="cupy does not support"):
         s.values
+
+
+def test_ndim():
+    s = pd.Series(dtype="float64")
+    gs = cudf.Series()
+    assert s.ndim == gs.ndim
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index 342f60c1580..24bdcc6f894 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -640,6 +640,98 @@ def test_construct_all_pd_NA_with_dtype(nan_as_null):
     assert_eq(result, expected)
 
 
+def test_to_from_arrow_nulls(all_supported_types_as_str):
+    if all_supported_types_as_str in {"category", "str"}:
+        pytest.skip(f"Test not applicable with {all_supported_types_as_str}")
+    data_type = all_supported_types_as_str
+    if data_type == "bool":
+        s1 = pa.array([True, None, False, None, True], type=data_type)
+    else:
+        dtype = np.dtype(data_type)
+        if dtype.type == np.datetime64:
+            time_unit, _ = np.datetime_data(dtype)
+            data_type = pa.timestamp(unit=time_unit)
+        elif dtype.type == np.timedelta64:
+            time_unit, _ = np.datetime_data(dtype)
+            data_type = pa.duration(unit=time_unit)
+        s1 = pa.array([1, None, 3, None, 5], type=data_type)
+    gs1 = cudf.Series.from_arrow(s1)
+    assert isinstance(gs1, cudf.Series)
+    # We have 64B padded buffers for nulls whereas Arrow returns a minimal
+    # number of bytes, so only check the first byte in this case
+    np.testing.assert_array_equal(
+        np.asarray(s1.buffers()[0]).view("u1")[0],
+        gs1._column.mask_array_view(mode="read").copy_to_host().view("u1")[0],
+    )
+    assert pa.Array.equals(s1, gs1.to_arrow())
+
+    s2 = pa.array([None, None, None, None, None], type=data_type)
+    gs2 = cudf.Series.from_arrow(s2)
+    assert isinstance(gs2, cudf.Series)
+    # We have 64B padded buffers for nulls whereas Arrow returns a minimal
+    # number of bytes, so only check the first byte in this case
+    np.testing.assert_array_equal(
+        np.asarray(s2.buffers()[0]).view("u1")[0],
+        gs2._column.mask_array_view(mode="read").copy_to_host().view("u1")[0],
+    )
+    assert pa.Array.equals(s2, gs2.to_arrow())
+
+
+def test_cuda_array_interface(numeric_and_bool_types_as_str):
+    np_data = np.arange(10).astype(numeric_and_bool_types_as_str)
+    cupy_data = cp.array(np_data)
+    pd_data = pd.Series(np_data)
+
+    cudf_data = cudf.Series(cupy_data)
+    assert_eq(pd_data, cudf_data)
+
+    gdf = cudf.DataFrame()
+    gdf["test"] = cupy_data
+    pd_data.name = "test"
+    assert_eq(pd_data, gdf["test"])
+
+
+@pytest.mark.parametrize("nan_as_null", [True, False])
+def test_series_list_nanasnull(nan_as_null):
+    data = [1.0, 2.0, 3.0, np.nan, None]
+
+    expect = pa.array(data, from_pandas=nan_as_null)
+    got = cudf.Series(data, nan_as_null=nan_as_null).to_arrow()
+
+    # Bug in Arrow 0.14.1 where NaNs aren't handled
+    expect = expect.cast("int64", safe=False)
+    got = got.cast("int64", safe=False)
+
+    assert pa.Array.equals(expect, got)
+
+
+@pytest.mark.parametrize("num_elements", [0, 10])
+@pytest.mark.parametrize("null_type", [np.nan, None, "mixed"])
+def test_series_all_null(num_elements, null_type):
+    if null_type == "mixed":
+        data = []
+        data1 = [np.nan] * int(num_elements / 2)
+        data2 = [None] * int(num_elements / 2)
+        for idx in range(len(data1)):
+            data.append(data1[idx])
+            data.append(data2[idx])
+    else:
+        data = [null_type] * num_elements
+
+    # Typecast Pandas because None will return `object` dtype
+    expect = pd.Series(data, dtype="float64")
+    got = cudf.Series(data, dtype="float64")
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("num_elements", [0, 10])
+def test_series_all_valid_nan(num_elements):
+    data = [np.nan] * num_elements
+    sr = cudf.Series(data, nan_as_null=False)
+    np.testing.assert_equal(sr.null_count, 0)
+
+
 def test_series_empty_dtype():
     expected = pd.Series([])
     actual = cudf.Series([])
@@ -1012,6 +1104,23 @@ def test_from_pandas_object_dtype_passed_dtype(klass):
     assert_eq(result, expected)
 
 
+def test_series_basic():
+    # Make series from buffer
+    a1 = np.arange(10, dtype=np.float64)
+    series = cudf.Series(a1)
+    assert len(series) == 10
+    np.testing.assert_equal(series.to_numpy(), np.hstack([a1]))
+
+
+def test_series_from_cupy_scalars():
+    data = [0.1, 0.2, 0.3]
+    data_np = np.array(data)
+    data_cp = cp.array(data)
+    s_np = cudf.Series([data_np[0], data_np[2]])
+    s_cp = cudf.Series([data_cp[0], data_cp[2]])
+    assert_eq(s_np, s_cp)
+
+
 def test_to_dense_array():
     rng = np.random.default_rng(seed=0)
     data = rng.random(8)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ccf4c7de0d8..72601dd6a1a 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 import array as arr
-import contextlib
 import datetime
 import decimal
 import functools
@@ -13,7 +12,7 @@
 import textwrap
 import warnings
 from collections import OrderedDict, defaultdict, namedtuple
-from contextlib import contextmanager, nullcontext as does_not_raise
+from contextlib import contextmanager
 from copy import copy
 
 import cupy
@@ -28,20 +27,17 @@
 from cudf.api.extensions import no_default
 from cudf.core._compat import (
     PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
     PANDAS_VERSION,
 )
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.column.column import as_column
-from cudf.errors import MixedTypeError
-from cudf.testing import _utils as utils, assert_eq, assert_neq
+from cudf.testing import _utils as utils, assert_eq
 from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
     NUMERIC_TYPES,
     assert_exceptions_equal,
     expect_warning_if,
-    gen_rand,
 )
 from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 
@@ -103,41 +99,6 @@ def _hide_concat_empty_dtype_warning():
         yield
 
 
-def test_init_via_list_of_tuples():
-    data = [
-        (5, "cats", "jump", np.nan),
-        (2, "dogs", "dig", 7.5),
-        (3, "cows", "moo", -2.1, "occasionally"),
-    ]
-
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame(data)
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("columns", [["a", "b"], pd.Series(["a", "b"])])
-def test_init_via_list_of_series(columns):
-    data = [pd.Series([1, 2]), pd.Series([3, 4])]
-
-    pdf = cudf.DataFrame(data, columns=columns)
-    gdf = cudf.DataFrame(data, columns=columns)
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("index", [None, [0, 1, 2]])
-def test_init_with_missing_columns(index):
-    """Test initialization when columns and data keys are disjoint."""
-    data = {"a": [1, 2, 3], "b": [2, 3, 4]}
-    columns = ["c", "d"]
-
-    pdf = cudf.DataFrame(data, columns=columns, index=index)
-    gdf = cudf.DataFrame(data, columns=columns, index=index)
-
-    assert_eq(pdf, gdf)
-
-
 @pytest.fixture(
     params=[
         pd.DataFrame(
@@ -164,143 +125,6 @@ def na_data(request):
     return request.param
 
 
-@pytest.mark.parametrize(
-    "rows",
-    [
-        0,
-        1,
-        2,
-        100,
-    ],
-)
-def test_init_via_list_of_empty_tuples(rows):
-    data = [()] * rows
-
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame(data)
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "dict_of_series",
-    [
-        {"a": pd.Series([1.0, 2.0, 3.0])},
-        {"a": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6])},
-        {
-            "a": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
-            "b": pd.Series([1.0, 2.0, 4.0], index=[1, 2, 3]),
-        },
-        {"a": [1, 2, 3], "b": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6])},
-        {
-            "a": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
-            "b": pd.Series([1.0, 2.0, 4.0], index=["c", "d", "e"]),
-        },
-        {
-            "a": pd.Series(
-                ["a", "b", "c"],
-                index=pd.MultiIndex.from_tuples([(1, 2), (1, 3), (2, 3)]),
-            ),
-            "b": pd.Series(
-                ["a", " b", "d"],
-                index=pd.MultiIndex.from_tuples([(1, 2), (1, 3), (2, 3)]),
-            ),
-        },
-    ],
-)
-def test_init_from_series_align(dict_of_series):
-    pdf = pd.DataFrame(dict_of_series)
-    gdf = cudf.DataFrame(dict_of_series)
-
-    assert_eq(pdf, gdf)
-
-    for key in dict_of_series:
-        if isinstance(dict_of_series[key], pd.Series):
-            dict_of_series[key] = cudf.Series(dict_of_series[key])
-
-    gdf = cudf.DataFrame(dict_of_series)
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    ("dict_of_series", "expectation"),
-    [
-        (
-            {
-                "a": pd.Series(["a", "b", "c"], index=[4, 4, 5]),
-                "b": pd.Series(["a", "b", "c"], index=[4, 5, 6]),
-            },
-            pytest.raises(
-                ValueError, match="Cannot align indices with non-unique values"
-            ),
-        ),
-        (
-            {
-                "a": pd.Series(["a", "b", "c"], index=[4, 4, 5]),
-                "b": pd.Series(["a", "b", "c"], index=[4, 4, 5]),
-            },
-            does_not_raise(),
-        ),
-    ],
-)
-def test_init_from_series_align_nonunique(dict_of_series, expectation):
-    with expectation:
-        gdf = cudf.DataFrame(dict_of_series)
-
-    if expectation == does_not_raise():
-        pdf = pd.DataFrame(dict_of_series)
-        assert_eq(pdf, gdf)
-
-
-def test_init_unaligned_with_index():
-    pdf = pd.DataFrame(
-        {
-            "a": pd.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
-            "b": pd.Series([1.0, 2.0, 3.0], index=[1, 2, 3]),
-        },
-        index=[7, 8, 9],
-    )
-    gdf = cudf.DataFrame(
-        {
-            "a": cudf.Series([1.0, 2.0, 3.0], index=[4, 5, 6]),
-            "b": cudf.Series([1.0, 2.0, 3.0], index=[1, 2, 3]),
-        },
-        index=[7, 8, 9],
-    )
-
-    assert_eq(pdf, gdf, check_dtype=False)
-
-
-def test_init_series_list_columns_unsort():
-    pseries = [
-        pd.Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)
-    ]
-    gseries = [
-        cudf.Series(i, index=["b", "a", "c"], name=str(i)) for i in range(3)
-    ]
-    pdf = pd.DataFrame(pseries)
-    gdf = cudf.DataFrame(gseries)
-    assert_eq(pdf, gdf)
-
-
-def test_series_basic():
-    # Make series from buffer
-    a1 = np.arange(10, dtype=np.float64)
-    series = cudf.Series(a1)
-    assert len(series) == 10
-    np.testing.assert_equal(series.to_numpy(), np.hstack([a1]))
-
-
-def test_series_from_cupy_scalars():
-    data = [0.1, 0.2, 0.3]
-    data_np = np.array(data)
-    data_cp = cupy.array(data)
-    s_np = cudf.Series([data_np[0], data_np[2]])
-    s_cp = cudf.Series([data_cp[0], data_cp[2]])
-    assert_eq(s_np, s_cp)
-
-
 @pytest.mark.parametrize("a", [[1, 2, 3], [1, 10, 30]])
 @pytest.mark.parametrize("b", [[4, 5, 6], [-11, -100, 30]])
 def test_concat_index(a, b):
@@ -325,106 +149,6 @@ def test_concat_index(a, b):
     assert_eq(expected.index, actual.index)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2]},
-        {"a": [1, 2, 3], "b": [3, 4, 5]},
-        {"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [1, 3, 5, 7]},
-        {"a": [np.nan, 2, 3, 4], "b": [3, 4, np.nan, 6], "c": [1, 3, 5, 7]},
-        {1: [1, 2, 3], 2: [3, 4, 5]},
-        {"a": [1, None, None], "b": [3, np.nan, np.nan]},
-        {1: ["a", "b", "c"], 2: ["q", "w", "u"]},
-        {1: ["a", np.nan, "c"], 2: ["q", None, "u"]},
-        {},
-        {1: [], 2: [], 3: []},
-        [1, 2, 3],
-    ],
-)
-def test_axes(data):
-    csr = cudf.DataFrame(data)
-    psr = pd.DataFrame(data)
-
-    expected = psr.axes
-    actual = csr.axes
-
-    for e, a in zip(expected, actual, strict=True):
-        assert_eq(e, a, exact=False)
-
-
-def test_dataframe_truncate_axis_0():
-    df = cudf.DataFrame(
-        {
-            "A": ["a", "b", "c", "d", "e"],
-            "B": ["f", "g", "h", "i", "j"],
-            "C": ["k", "l", "m", "n", "o"],
-        },
-        index=[1, 2, 3, 4, 5],
-    )
-    pdf = df.to_pandas()
-
-    expected = pdf.truncate(before=2, after=4, axis="index")
-    actual = df.truncate(before=2, after=4, axis="index")
-    assert_eq(actual, expected)
-
-    expected = pdf.truncate(before=1, after=4, axis=0)
-    actual = df.truncate(before=1, after=4, axis=0)
-    assert_eq(expected, actual)
-
-
-def test_dataframe_truncate_axis_1():
-    df = cudf.DataFrame(
-        {
-            "A": ["a", "b", "c", "d", "e"],
-            "B": ["f", "g", "h", "i", "j"],
-            "C": ["k", "l", "m", "n", "o"],
-        },
-        index=[1, 2, 3, 4, 5],
-    )
-    pdf = df.to_pandas()
-
-    expected = pdf.truncate(before="A", after="B", axis="columns")
-    actual = df.truncate(before="A", after="B", axis="columns")
-    assert_eq(actual, expected)
-
-    expected = pdf.truncate(before="A", after="B", axis=1)
-    actual = df.truncate(before="A", after="B", axis=1)
-    assert_eq(actual, expected)
-
-
-def test_dataframe_truncate_datetimeindex():
-    dates = cudf.date_range(
-        "2021-01-01 23:45:00", "2021-01-01 23:46:00", freq="s"
-    )
-    df = cudf.DataFrame(data={"A": 1, "B": 2}, index=dates)
-    pdf = df.to_pandas()
-    expected = pdf.truncate(
-        before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
-    )
-    actual = df.truncate(
-        before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
-    )
-
-    assert_eq(actual, expected)
-
-
-def test_series_init_none():
-    # test for creating empty series
-    # 1: without initializing
-    sr1 = cudf.Series()
-    got = sr1.to_string()
-
-    expect = repr(sr1.to_pandas())
-    assert got == expect
-
-    # 2: Using `None` as an initializer
-    sr2 = cudf.Series(None)
-    got = sr2.to_string()
-
-    expect = repr(sr2.to_pandas())
-    assert got == expect
-
-
 def test_dataframe_basic():
     rng = np.random.default_rng(seed=0)
     df = cudf.DataFrame()
@@ -479,522 +203,82 @@ def test_dataframe_basic():
     assert gdf["val"].isnull().all()
 
 
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "c": range(1, 11)},
-            index=pd.Index(
-                ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
-                name="custom_name",
-            ),
-        ),
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "columns",
-    [["a"], ["b"], "a", "b", ["a", "b"]],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_drop_columns(pdf, columns, inplace):
-    pdf = pdf.copy()
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.drop(columns=columns, inplace=inplace)
-    actual = gdf.drop(columns=columns, inplace=inplace)
+def test_dataframe_column_add_drop_via_setitem():
+    df = cudf.DataFrame()
+    data = np.asarray(range(10))
+    df["a"] = data
+    df["b"] = data
+    assert tuple(df.columns) == ("a", "b")
+    del df["a"]
+    assert tuple(df.columns) == ("b",)
+    df["c"] = data
+    assert tuple(df.columns) == ("b", "c")
+    df["a"] = data
+    assert tuple(df.columns) == ("b", "c", "a")
 
-    if inplace:
-        expected = pdf
-        actual = gdf
 
-    assert_eq(expected, actual)
+def test_dataframe_column_set_via_attr():
+    data_0 = np.asarray([0, 2, 4, 5])
+    data_1 = np.asarray([1, 4, 2, 3])
+    data_2 = np.asarray([2, 0, 3, 0])
+    df = cudf.DataFrame({"a": data_0, "b": data_1, "c": data_2})
 
+    for i in range(10):
+        df.c = df.a
+        assert assert_eq(df.c, df.a, check_names=False)
+        assert tuple(df.columns) == ("a", "b", "c")
 
-@pytest.mark.parametrize("obj", ["Index", "Series"])
-def test_drop_cudf_obj_columns(obj):
-    pdf = pd.DataFrame({"A": [1], "B": [1]})
-    gdf = cudf.from_pandas(pdf)
+        df.c = df.b
+        assert assert_eq(df.c, df.b, check_names=False)
+        assert tuple(df.columns) == ("a", "b", "c")
 
-    columns = ["B"]
-    expected = pdf.drop(labels=getattr(pd, obj)(columns), axis=1)
-    actual = gdf.drop(columns=getattr(cudf, obj)(columns), axis=1)
-    assert_eq(expected, actual)
 
+def test_dataframe_column_drop_via_attr():
+    df = cudf.DataFrame({"a": []})
 
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "c": range(1, 11)},
-            index=pd.Index(list(range(10)), name="custom_name"),
-        ),
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "labels",
-    [
-        [1],
-        [0],
-        1,
-        5,
-        [5, 9],
-        pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
-        pd.Index([0, 1, 8, 9], name="new name"),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_drop_labels_axis_0(pdf, labels, inplace):
-    pdf = pdf.copy()
-    gdf = cudf.from_pandas(pdf)
+    with pytest.raises(AttributeError):
+        del df.a
 
-    expected = pdf.drop(labels=labels, axis=0, inplace=inplace)
-    actual = gdf.drop(labels=labels, axis=0, inplace=inplace)
+    assert tuple(df.columns) == tuple("a")
 
-    if inplace:
-        expected = pdf
-        actual = gdf
 
-    assert_eq(expected, actual)
+@pytest.mark.parametrize("nelem", [0, 10])
+def test_dataframe_astype(nelem):
+    df = cudf.DataFrame()
+    data = np.asarray(range(nelem), dtype=np.int32)
+    df["a"] = data
+    assert df["a"].dtype is np.dtype(np.int32)
+    df["b"] = df["a"].astype(np.float32)
+    assert df["b"].dtype is np.dtype(np.float32)
+    np.testing.assert_equal(df["a"].to_numpy(), df["b"].to_numpy())
 
 
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame({"a": range(10), "b": range(10, 20), "c": range(1, 11)}),
-        pd.DataFrame(
-            {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5}
-        ),
-        pd.DataFrame(
-            {
-                "a": range(10),
-                "b": range(10, 20),
-            },
-            index=pd.Index(list(range(10)), dtype="uint64"),
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "index",
-    [[1], [0], 1, 5, [5, 9], pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_drop_index(pdf, index, inplace):
-    pdf = pdf.copy()
-    gdf = cudf.from_pandas(pdf)
+def test_astype_dict():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["1", "2", "3"]})
+    pdf = gdf.to_pandas()
 
-    expected = pdf.drop(index=index, inplace=inplace)
-    actual = gdf.drop(index=index, inplace=inplace)
+    assert_eq(pdf.astype({"a": "str"}), gdf.astype({"a": "str"}))
+    assert_eq(
+        pdf.astype({"a": "str", "b": np.int64}),
+        gdf.astype({"a": "str", "b": np.int64}),
+    )
 
-    if inplace:
-        expected = pdf
-        actual = gdf
 
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "index,level",
-    [
-        ("cow", 0),
-        ("lama", 0),
-        ("falcon", 0),
-        ("speed", 1),
-        ("weight", 1),
-        ("length", 1),
-        ("cow", None),
-        (
-            "lama",
-            None,
-        ),
-        (
-            "falcon",
-            None,
-        ),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_drop_multiindex(pdf, index, level, inplace):
-    pdf = pd.DataFrame(
-        {"a": range(10), "b": range(10, 20), "d": ["a", "v"] * 5},
-        index=pd.MultiIndex(
-            levels=[
-                ["lama", "cow", "falcon"],
-                ["speed", "weight", "length"],
-            ],
-            codes=[
-                [0, 0, 0, 1, 1, 1, 2, 2, 2, 1],
-                [0, 1, 2, 0, 1, 2, 0, 1, 2, 1],
-            ],
-        ),
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.drop(index=index, inplace=inplace, level=level)
-    actual = gdf.drop(index=index, inplace=inplace, level=level)
-
-    if inplace:
-        expected = pdf
-        actual = gdf
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [{"c": range(1, 11)}, {"d": ["a", "v"] * 5}],
-)
-@pytest.mark.parametrize(
-    "labels",
-    [["a"], ["b"], "a", "b", ["a", "b"]],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dataframe_drop_labels_axis_1(data, labels, inplace):
-    pdf = pd.DataFrame({"a": range(10), "b": range(10, 20), **data})
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.drop(labels=labels, axis=1, inplace=inplace)
-    actual = gdf.drop(labels=labels, axis=1, inplace=inplace)
-
-    if inplace:
-        expected = pdf
-        actual = gdf
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_drop_error():
-    df = cudf.DataFrame({"a": [1], "b": [2], "c": [3]})
-    pdf = df.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([], {"columns": "d"}),
-        rfunc_args_and_kwargs=([], {"columns": "d"}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([], {"columns": ["a", "d", "b"]}),
-        rfunc_args_and_kwargs=([], {"columns": ["a", "d", "b"]}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
-        rfunc_args_and_kwargs=(["a"], {"columns": "a", "axis": 1}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([], {"axis": 1}),
-        rfunc_args_and_kwargs=([], {"axis": 1}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([[2, 0]],),
-        rfunc_args_and_kwargs=([[2, 0]],),
-    )
-
-
-def test_dataframe_swaplevel_axis_0():
-    midx = cudf.MultiIndex(
-        levels=[
-            ["Work"],
-            ["Final exam", "Coursework"],
-            ["History", "Geography"],
-            ["January", "February", "March", "April"],
-        ],
-        codes=[[0, 0, 0, 0], [0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 3]],
-        names=["a", "b", "c", "d"],
-    )
-    cdf = cudf.DataFrame(
-        {
-            "Grade": ["A", "B", "A", "C"],
-            "Percentage": ["95", "85", "95", "75"],
-        },
-        index=midx,
-    )
-    pdf = cdf.to_pandas()
-
-    assert_eq(pdf.swaplevel(), cdf.swaplevel())
-    assert_eq(pdf.swaplevel(), cdf.swaplevel(-2, -1, 0))
-    assert_eq(pdf.swaplevel(1, 2), cdf.swaplevel(1, 2))
-    assert_eq(cdf.swaplevel(2, 1), cdf.swaplevel(1, 2))
-    assert_eq(pdf.swaplevel(-1, -3), cdf.swaplevel(-1, -3))
-    assert_eq(pdf.swaplevel("a", "b", 0), cdf.swaplevel("a", "b", 0))
-    assert_eq(cdf.swaplevel("a", "b"), cdf.swaplevel("b", "a"))
-
-
-def test_dataframe_swaplevel_TypeError():
-    cdf = cudf.DataFrame(
-        {"a": [1, 2, 3], "c": [10, 20, 30]}, index=["x", "y", "z"]
-    )
-
-    with pytest.raises(TypeError):
-        cdf.swaplevel()
-
-
-def test_dataframe_swaplevel_axis_1():
-    midx = cudf.MultiIndex(
-        levels=[
-            ["b", "a"],
-            ["bb", "aa"],
-            ["bbb", "aaa"],
-        ],
-        codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 0, 1]],
-        names=[None, "a", "b"],
-    )
-    cdf = cudf.DataFrame(
-        data=[[45, 30, 100, 90], [200, 100, 50, 80]],
-        columns=midx,
-    )
-    pdf = cdf.to_pandas()
-
-    assert_eq(pdf.swaplevel(1, 2, 1), cdf.swaplevel(1, 2, 1))
-    assert_eq(pdf.swaplevel("a", "b", 1), cdf.swaplevel("a", "b", 1))
-    assert_eq(cdf.swaplevel(2, 1, 1), cdf.swaplevel(1, 2, 1))
-    assert_eq(pdf.swaplevel(0, 2, 1), cdf.swaplevel(0, 2, 1))
-    assert_eq(pdf.swaplevel(2, 0, 1), cdf.swaplevel(2, 0, 1))
-    assert_eq(cdf.swaplevel("a", "a", 1), cdf.swaplevel("b", "b", 1))
-
-
-def test_dataframe_drop_raises():
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3], "c": [10, 20, 30]}, index=["x", "y", "z"]
-    )
-    pdf = df.to_pandas()
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=(["p"],),
-        rfunc_args_and_kwargs=(["p"],),
-    )
-
-    # label dtype mismatch
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([3],),
-        rfunc_args_and_kwargs=([3],),
-    )
-
-    expect = pdf.drop("p", errors="ignore")
-    actual = df.drop("p", errors="ignore")
-
-    assert_eq(actual, expect)
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([], {"columns": "p"}),
-        rfunc_args_and_kwargs=([], {"columns": "p"}),
-    )
-
-    expect = pdf.drop(columns="p", errors="ignore")
-    actual = df.drop(columns="p", errors="ignore")
-
-    assert_eq(actual, expect)
-
-    assert_exceptions_equal(
-        lfunc=pdf.drop,
-        rfunc=df.drop,
-        lfunc_args_and_kwargs=([], {"labels": "p", "axis": 1}),
-        rfunc_args_and_kwargs=([], {"labels": "p", "axis": 1}),
-    )
-
-    expect = pdf.drop(labels="p", axis=1, errors="ignore")
-    actual = df.drop(labels="p", axis=1, errors="ignore")
-
-    assert_eq(actual, expect)
-
-
-def test_dataframe_column_add_drop_via_setitem():
-    df = cudf.DataFrame()
-    data = np.asarray(range(10))
-    df["a"] = data
-    df["b"] = data
-    assert tuple(df.columns) == ("a", "b")
-    del df["a"]
-    assert tuple(df.columns) == ("b",)
-    df["c"] = data
-    assert tuple(df.columns) == ("b", "c")
-    df["a"] = data
-    assert tuple(df.columns) == ("b", "c", "a")
-
-
-def test_dataframe_column_set_via_attr():
-    data_0 = np.asarray([0, 2, 4, 5])
-    data_1 = np.asarray([1, 4, 2, 3])
-    data_2 = np.asarray([2, 0, 3, 0])
-    df = cudf.DataFrame({"a": data_0, "b": data_1, "c": data_2})
-
-    for i in range(10):
-        df.c = df.a
-        assert assert_eq(df.c, df.a, check_names=False)
-        assert tuple(df.columns) == ("a", "b", "c")
-
-        df.c = df.b
-        assert assert_eq(df.c, df.b, check_names=False)
-        assert tuple(df.columns) == ("a", "b", "c")
-
-
-def test_dataframe_column_drop_via_attr():
-    df = cudf.DataFrame({"a": []})
-
-    with pytest.raises(AttributeError):
-        del df.a
-
-    assert tuple(df.columns) == tuple("a")
-
-
-@pytest.mark.parametrize("axis", [0, "index"])
-def test_dataframe_index_rename(axis):
-    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    expect = pdf.rename(mapper={1: 5, 2: 6}, axis=axis)
-    got = gdf.rename(mapper={1: 5, 2: 6}, axis=axis)
-
-    assert_eq(expect, got)
-
-    expect = pdf.rename(index={1: 5, 2: 6})
-    got = gdf.rename(index={1: 5, 2: 6})
-
-    assert_eq(expect, got)
-
-    expect = pdf.rename({1: 5, 2: 6})
-    got = gdf.rename({1: 5, 2: 6})
-
-    assert_eq(expect, got)
-
-    # `pandas` can support indexes with mixed values. We throw a
-    # `NotImplementedError`.
-    with pytest.raises(NotImplementedError):
-        gdf.rename(mapper={1: "x", 2: "y"}, axis=axis)
-
-
-def test_dataframe_MI_rename():
-    gdf = cudf.DataFrame(
-        {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
-    )
-    gdg = gdf.groupby(["a", "b"]).count()
-    pdg = gdg.to_pandas()
-
-    expect = pdg.rename(mapper={1: 5, 2: 6}, axis=0)
-    got = gdg.rename(mapper={1: 5, 2: 6}, axis=0)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("axis", [1, "columns"])
-def test_dataframe_column_rename(axis):
-    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    expect = pdf.rename(mapper=lambda name: 2 * name, axis=axis)
-    got = gdf.rename(mapper=lambda name: 2 * name, axis=axis)
-
-    assert_eq(expect, got)
-
-    expect = pdf.rename(columns=lambda name: 2 * name)
-    got = gdf.rename(columns=lambda name: 2 * name)
-
-    assert_eq(expect, got)
-
-    rename_mapper = {"a": "z", "b": "y", "c": "x"}
-    expect = pdf.rename(columns=rename_mapper)
-    got = gdf.rename(columns=rename_mapper)
-
-    assert_eq(expect, got)
-
-
-def test_dataframe_pop():
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": [7.0, 8.0, 9.0]}
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    # Test non-existing column error
-    with pytest.raises(KeyError) as raises:
-        gdf.pop("fake_colname")
-    raises.match("fake_colname")
-
-    # check pop numeric column
-    pdf_pop = pdf.pop("a")
-    gdf_pop = gdf.pop("a")
-    assert_eq(pdf_pop, gdf_pop)
-    assert_eq(pdf, gdf)
-
-    # check string column
-    pdf_pop = pdf.pop("b")
-    gdf_pop = gdf.pop("b")
-    assert_eq(pdf_pop, gdf_pop)
-    assert_eq(pdf, gdf)
-
-    # check float column and empty dataframe
-    pdf_pop = pdf.pop("c")
-    gdf_pop = gdf.pop("c")
-    assert_eq(pdf_pop, gdf_pop)
-    assert_eq(pdf, gdf)
-
-    # check empty dataframe edge case
-    empty_pdf = pd.DataFrame(columns=["a", "b"])
-    empty_gdf = cudf.DataFrame(columns=["a", "b"])
-    pb = empty_pdf.pop("b")
-    gb = empty_gdf.pop("b")
-    assert len(pb) == len(gb)
-    assert empty_pdf.empty and empty_gdf.empty
-
-
-@pytest.mark.parametrize("nelem", [0, 10])
-def test_dataframe_astype(nelem):
-    df = cudf.DataFrame()
-    data = np.asarray(range(nelem), dtype=np.int32)
-    df["a"] = data
-    assert df["a"].dtype is np.dtype(np.int32)
-    df["b"] = df["a"].astype(np.float32)
-    assert df["b"].dtype is np.dtype(np.float32)
-    np.testing.assert_equal(df["a"].to_numpy(), df["b"].to_numpy())
-
-
-def test_astype_dict():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["1", "2", "3"]})
-    pdf = gdf.to_pandas()
-
-    assert_eq(pdf.astype({"a": "str"}), gdf.astype({"a": "str"}))
-    assert_eq(
-        pdf.astype({"a": "str", "b": np.int64}),
-        gdf.astype({"a": "str", "b": np.int64}),
-    )
-
-
-@pytest.mark.parametrize("nelem", [0, 100])
-def test_index_astype(nelem):
-    df = cudf.DataFrame()
-    data = np.asarray(range(nelem), dtype=np.int32)
-    df["a"] = data
-    assert df.index.dtype is np.dtype(np.int64)
-    df.index = df.index.astype(np.float32)
-    assert df.index.dtype is np.dtype(np.float32)
-    df["a"] = df["a"].astype(np.float32)
-    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
-    df["b"] = df["a"]
-    df = df.set_index("b")
-    df["a"] = df["a"].astype(np.int16)
-    df.index = df.index.astype(np.int16)
-    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
+@pytest.mark.parametrize("nelem", [0, 100])
+def test_index_astype(nelem):
+    df = cudf.DataFrame()
+    data = np.asarray(range(nelem), dtype=np.int32)
+    df["a"] = data
+    assert df.index.dtype is np.dtype(np.int64)
+    df.index = df.index.astype(np.float32)
+    assert df.index.dtype is np.dtype(np.float32)
+    df["a"] = df["a"].astype(np.float32)
+    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
+    df["b"] = df["a"]
+    df = df.set_index("b")
+    df["a"] = df["a"].astype(np.int16)
+    df.index = df.index.astype(np.int16)
+    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
 
 
 def test_dataframe_to_string_with_skipped_rows():
@@ -1213,163 +497,44 @@ def test_dataframe_dir_and_getattr():
         df.not_a_column
 
 
-def test_empty_dataframe_to_cupy():
-    df = cudf.DataFrame()
-
-    # Check fully empty dataframe.
-    mat = df.to_cupy()
-    assert mat.shape == (0, 0)
-    mat = df.to_numpy()
-    assert mat.shape == (0, 0)
-
-    df = cudf.DataFrame()
-    nelem = 123
-    rng = np.random.default_rng(seed=0)
-    for k in "abc":
-        df[k] = rng.random(nelem)
+def test_dataframe_append_empty():
+    pdf = pd.DataFrame(
+        {
+            "key": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
+            "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
-    # Check all columns in empty dataframe.
-    mat = df.head(0).to_cupy()
-    assert mat.shape == (0, 3)
+    gdf["newcol"] = 100
+    pdf["newcol"] = 100
 
+    assert len(gdf["newcol"]) == len(pdf)
+    assert len(pdf["newcol"]) == len(pdf)
+    assert_eq(gdf, pdf)
 
-def test_dataframe_to_cupy():
-    df = cudf.DataFrame()
 
-    nelem = 123
+def test_dataframe_setitem_from_masked_object():
     rng = np.random.default_rng(seed=0)
-    for k in "abcd":
-        df[k] = rng.random(nelem)
+    ary = rng.standard_normal(100)
+    mask = np.zeros(100, dtype=bool)
+    mask[:20] = True
+    rng.shuffle(mask)
+    ary[mask] = np.nan
 
-    # Check all columns
-    mat = df.to_cupy()
-    assert mat.shape == (nelem, 4)
-    assert mat.strides == (8, 984)
+    test1_null = cudf.Series(ary, nan_as_null=True)
+    assert test1_null.null_count == 20
+    test1_nan = cudf.Series(ary, nan_as_null=False)
+    assert test1_nan.null_count == 0
 
-    mat = df.to_numpy()
-    assert mat.shape == (nelem, 4)
-    assert mat.strides == (8, 984)
-    for i, k in enumerate(df.columns):
-        np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
-
-    # Check column subset
-    mat = df[["a", "c"]].to_cupy().get()
-    assert mat.shape == (nelem, 2)
-
-    for i, k in enumerate("ac"):
-        np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
-
-
-@pytest.mark.parametrize("has_nulls", [False, True])
-@pytest.mark.parametrize("use_na_value", [False, True])
-def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
-    nelem = 10
-    data = np.arange(nelem, dtype=np.float64)
-
-    if has_nulls:
-        data = data.astype("object")
-        data[::2] = None
-
-    df = cudf.DataFrame({"a": data})
-
-    if has_nulls and not use_na_value:
-        with pytest.raises(ValueError, match="Column must have no nulls"):
-            df.to_cupy()
-        return
-
-    na_value = 0.0 if use_na_value else None
-    expected = (
-        cupy.asarray(df["a"].fillna(na_value))
-        if has_nulls
-        else cupy.asarray(df["a"])
-    )
-    result = df.to_cupy(na_value=na_value)
-    assert result.shape == (nelem, 1)
-    assert_eq(result.ravel(), expected)
-
-
-def test_dataframe_to_cupy_null_values():
-    df = cudf.DataFrame()
-
-    nelem = 123
-    na = -10000
-
-    refvalues = {}
-    rng = np.random.default_rng(seed=0)
-    for k in "abcd":
-        df[k] = data = rng.random(nelem)
-        bitmask = utils.random_bitmask(nelem)
-        df[k] = df[k]._column.set_mask(bitmask)
-        boolmask = np.asarray(
-            utils.expand_bits_to_bytes(bitmask)[:nelem], dtype=np.bool_
-        )
-        data[~boolmask] = na
-        refvalues[k] = data
-
-    # Check null value causes error
-    with pytest.raises(ValueError):
-        df.to_cupy()
-    with pytest.raises(ValueError):
-        df.to_numpy()
-
-    for k in df.columns:
-        df[k] = df[k].fillna(na)
-
-    mat = df.to_numpy()
-    for i, k in enumerate(df.columns):
-        np.testing.assert_array_equal(refvalues[k], mat[:, i])
-
-
-@pytest.mark.parametrize("method", ["to_cupy", "to_numpy"])
-@pytest.mark.parametrize("value", [1, True, 1.5])
-@pytest.mark.parametrize("constructor", ["DataFrame", "Series"])
-def test_to_array_categorical(method, value, constructor):
-    data = [value]
-    expected = getattr(pd, constructor)(data, dtype="category").to_numpy()
-    result = getattr(
-        getattr(cudf, constructor)(data, dtype="category"), method
-    )()
-    assert_eq(result, expected)
-
-
-def test_dataframe_append_empty():
-    pdf = pd.DataFrame(
-        {
-            "key": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
-            "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        }
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    gdf["newcol"] = 100
-    pdf["newcol"] = 100
-
-    assert len(gdf["newcol"]) == len(pdf)
-    assert len(pdf["newcol"]) == len(pdf)
-    assert_eq(gdf, pdf)
-
-
-def test_dataframe_setitem_from_masked_object():
-    rng = np.random.default_rng(seed=0)
-    ary = rng.standard_normal(100)
-    mask = np.zeros(100, dtype=bool)
-    mask[:20] = True
-    rng.shuffle(mask)
-    ary[mask] = np.nan
-
-    test1_null = cudf.Series(ary, nan_as_null=True)
-    assert test1_null.null_count == 20
-    test1_nan = cudf.Series(ary, nan_as_null=False)
-    assert test1_nan.null_count == 0
-
-    test2_null = cudf.DataFrame.from_pandas(
-        pd.DataFrame({"a": ary}), nan_as_null=True
-    )
-    assert test2_null["a"].null_count == 20
-    test2_nan = cudf.DataFrame.from_pandas(
-        pd.DataFrame({"a": ary}), nan_as_null=False
-    )
-    assert test2_nan["a"].null_count == 0
+    test2_null = cudf.DataFrame.from_pandas(
+        pd.DataFrame({"a": ary}), nan_as_null=True
+    )
+    assert test2_null["a"].null_count == 20
+    test2_nan = cudf.DataFrame.from_pandas(
+        pd.DataFrame({"a": ary}), nan_as_null=False
+    )
+    assert test2_nan["a"].null_count == 0
 
     gpu_ary = cupy.asarray(ary)
     test3_null = cudf.Series(gpu_ary, nan_as_null=True)
@@ -1411,284 +576,6 @@ def test_empty_dataframe_setitem_df():
     assert_eq(gdf1, gdf2)
 
 
-def test_assign():
-    gdf = cudf.DataFrame({"x": [1, 2, 3]})
-    gdf2 = gdf.assign(y=gdf.x + 1)
-    assert list(gdf.columns) == ["x"]
-    assert list(gdf2.columns) == ["x", "y"]
-
-    np.testing.assert_equal(gdf2.y.to_numpy(), [2, 3, 4])
-
-
-@pytest.mark.parametrize(
-    "mapping",
-    [
-        {"y": 1, "z": lambda df: df["x"] + df["y"]},
-        {
-            "x": lambda df: df["x"] * 2,
-            "y": lambda df: 2,
-            "z": lambda df: df["x"] / df["y"],
-        },
-    ],
-)
-def test_assign_callable(mapping):
-    df = pd.DataFrame({"x": [1, 2, 3]})
-    cdf = cudf.from_pandas(df)
-    expect = df.assign(**mapping)
-    actual = cdf.assign(**mapping)
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "method",
-    [
-        "murmur3",
-        "md5",
-        "sha1",
-        "sha224",
-        "sha256",
-        "sha384",
-        "sha512",
-        "xxhash32",
-        "xxhash64",
-    ],
-)
-@pytest.mark.parametrize("seed", [None, 42])
-def test_dataframe_hash_values(method, seed):
-    nrows = 10
-    warning_expected = seed is not None and method not in {
-        "murmur3",
-        "xxhash32",
-        "xxhash64",
-    }
-    potential_warning = (
-        pytest.warns(UserWarning, match="Provided seed value has no effect*")
-        if warning_expected
-        else contextlib.nullcontext()
-    )
-
-    gdf = cudf.DataFrame()
-    data = np.arange(nrows)
-    data[0] = data[-1]  # make first and last the same
-    gdf["a"] = data
-    gdf["b"] = gdf.a + 100
-    with potential_warning:
-        out = gdf.hash_values(method=method, seed=seed)
-    assert isinstance(out, cudf.Series)
-    assert len(out) == nrows
-    expected_dtypes = {
-        "murmur3": np.uint32,
-        "md5": object,
-        "sha1": object,
-        "sha224": object,
-        "sha256": object,
-        "sha384": object,
-        "sha512": object,
-        "xxhash32": np.uint32,
-        "xxhash64": np.uint64,
-    }
-    assert out.dtype == expected_dtypes[method]
-
-    # Check single column
-    with potential_warning:
-        out_one = gdf[["a"]].hash_values(method=method, seed=seed)
-    # First matches last
-    assert out_one.iloc[0] == out_one.iloc[-1]
-    # Equivalent to the cudf.Series.hash_values()
-    with potential_warning:
-        assert_eq(gdf["a"].hash_values(method=method, seed=seed), out_one)
-
-
-@pytest.mark.parametrize("method", ["murmur3", "xxhash32", "xxhash64"])
-def test_dataframe_hash_values_seed(method):
-    gdf = cudf.DataFrame()
-    data = np.arange(10)
-    data[0] = data[-1]  # make first and last the same
-    gdf["a"] = data
-    gdf["b"] = gdf.a + 100
-    out_one = gdf.hash_values(method=method, seed=0)
-    out_two = gdf.hash_values(method=method, seed=1)
-    assert out_one.iloc[0] == out_one.iloc[-1]
-    assert out_two.iloc[0] == out_two.iloc[-1]
-    assert_neq(out_one, out_two)
-
-
-def test_dataframe_hash_values_xxhash32():
-    # xxhash32 has no built-in implementation in Python and we don't want to
-    # add a testing dependency, so we use regression tests against known good
-    # values.
-    gdf = cudf.DataFrame({"a": [0.0, 1.0, 2.0, np.inf, np.nan]})
-    gdf["b"] = -gdf["a"]
-    out_a = gdf["a"].hash_values(method="xxhash32", seed=0)
-    expected_a = cudf.Series(
-        [3736311059, 2307980487, 2906647130, 746578903, 4294967295],
-        dtype=np.uint32,
-    )
-    assert_eq(out_a, expected_a)
-
-    out_b = gdf["b"].hash_values(method="xxhash32", seed=42)
-    expected_b = cudf.Series(
-        [1076387279, 2261349915, 531498073, 650869264, 4294967295],
-        dtype=np.uint32,
-    )
-    assert_eq(out_b, expected_b)
-
-    out_df = gdf.hash_values(method="xxhash32", seed=0)
-    expected_df = cudf.Series(
-        [1223721700, 2885793241, 1920811472, 1146715602, 4294967295],
-        dtype=np.uint32,
-    )
-    assert_eq(out_df, expected_df)
-
-
-def test_dataframe_hash_values_xxhash64():
-    # xxhash64 has no built-in implementation in Python and we don't want to
-    # add a testing dependency, so we use regression tests against known good
-    # values.
-    gdf = cudf.DataFrame({"a": [0.0, 1.0, 2.0, np.inf, np.nan]})
-    gdf["b"] = -gdf["a"]
-    out_a = gdf["a"].hash_values(method="xxhash64", seed=0)
-    expected_a = cudf.Series(
-        [
-            3803688792395291579,
-            10706502109028787093,
-            9835943264235290955,
-            18031741628920313605,
-            18446744073709551615,
-        ],
-        dtype=np.uint64,
-    )
-    assert_eq(out_a, expected_a)
-
-    out_b = gdf["b"].hash_values(method="xxhash64", seed=42)
-    expected_b = cudf.Series(
-        [
-            9826995235083043316,
-            10150515573749944095,
-            5005707091092326006,
-            5326262080505358431,
-            18446744073709551615,
-        ],
-        dtype=np.uint64,
-    )
-    assert_eq(out_b, expected_b)
-
-    out_df = gdf.hash_values(method="xxhash64", seed=0)
-    expected_df = cudf.Series(
-        [
-            10208049663714815266,
-            4949201786888768834,
-            18122173653994477335,
-            11133539368563441730,
-            18446744073709551615,
-        ],
-        dtype=np.uint64,
-    )
-    assert_eq(out_df, expected_df)
-
-
-@pytest.mark.parametrize("nparts", [1, 2])
-def test_dataframe_hash_partition(nparts):
-    nrows = 10
-    nkeys = 2
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame(
-        {f"key{i}": rng.integers(0, 7 - i, nrows) for i in range(nkeys)}
-    )
-    keycols = gdf.columns.to_list()
-    gdf["val1"] = rng.integers(0, nrows * 2, nrows)
-
-    got = gdf.partition_by_hash(keycols, nparts=nparts)
-    # Must return a list
-    assert isinstance(got, list)
-    # Must have correct number of partitions
-    assert len(got) == nparts
-    # All partitions must be DataFrame type
-    assert all(isinstance(p, cudf.DataFrame) for p in got)
-    # Check that all partitions have unique keys
-    part_unique_keys = set()
-    for p in got:
-        if len(p):
-            # Take rows of the keycolumns and build a set of the key-values
-            unique_keys = set(map(tuple, p[keycols].values_host))
-            # Ensure that none of the key-values have occurred in other groups
-            assert not (unique_keys & part_unique_keys)
-            part_unique_keys |= unique_keys
-    assert len(part_unique_keys)
-
-
-def test_dataframe_hash_partition_masked_value():
-    nrows = 10
-    gdf = cudf.DataFrame()
-    gdf["key"] = np.arange(nrows)
-    gdf["val"] = np.arange(nrows) + 100
-    bitmask = utils.random_bitmask(nrows)
-    bytemask = utils.expand_bits_to_bytes(bitmask)
-    gdf["val"] = gdf["val"]._column.set_mask(bitmask)
-    parted = gdf.partition_by_hash(["key"], nparts=3)
-    # Verify that the valid mask is correct
-    for p in parted:
-        df = p.to_pandas()
-        for row in df.itertuples():
-            valid = bool(bytemask[row.key])
-            expected_value = row.key + 100 if valid else np.nan
-            got_value = row.val
-            assert (expected_value == got_value) or (
-                np.isnan(expected_value) and np.isnan(got_value)
-            )
-
-
-def test_dataframe_hash_partition_masked_keys():
-    nrows = 5
-    gdf = cudf.DataFrame()
-    gdf["key"] = np.arange(nrows)
-    gdf["val"] = np.arange(nrows) + 100
-    bitmask = utils.random_bitmask(nrows)
-    bytemask = utils.expand_bits_to_bytes(bitmask)
-    gdf["key"] = gdf["key"]._column.set_mask(bitmask)
-    parted = gdf.partition_by_hash(["key"], nparts=3, keep_index=False)
-    # Verify that the valid mask is correct
-    for p in parted:
-        df = p.to_pandas()
-        for row in df.itertuples():
-            valid = bool(bytemask[row.val - 100])
-            # val is key + 100
-            expected_value = row.val - 100 if valid else np.nan
-            got_value = row.key
-            assert (expected_value == got_value) or (
-                np.isnan(expected_value) and np.isnan(got_value)
-            )
-
-
-@pytest.mark.parametrize("keep_index", [True, False])
-def test_dataframe_hash_partition_keep_index(keep_index):
-    gdf = cudf.DataFrame(
-        {"val": [1, 2, 3, 4, 5], "key": [3, 2, 1, 4, 5]}, index=[5, 4, 3, 2, 1]
-    )
-
-    expected_df1 = cudf.DataFrame(
-        {"val": [1, 5], "key": [3, 5]}, index=[5, 1] if keep_index else None
-    )
-    expected_df2 = cudf.DataFrame(
-        {"val": [2, 3, 4], "key": [2, 1, 4]},
-        index=[4, 3, 2] if keep_index else None,
-    )
-    expected = [expected_df1, expected_df2]
-
-    parts = gdf.partition_by_hash(["key"], nparts=2, keep_index=keep_index)
-
-    for exp, got in zip(expected, parts, strict=True):
-        assert_eq(exp, got)
-
-
-def test_dataframe_hash_partition_empty():
-    gdf = cudf.DataFrame({"val": [1, 2], "key": [3, 2]}, index=["a", "b"])
-    parts = gdf.iloc[:0].partition_by_hash(["key"], nparts=3)
-    assert len(parts) == 3
-    for part in parts:
-        assert_eq(gdf.iloc[:0], part)
-
-
 @pytest.mark.parametrize("dtype1", utils.supported_numpy_dtypes)
 @pytest.mark.parametrize("dtype2", utils.supported_numpy_dtypes)
 def test_dataframe_concat_different_numerical_columns(dtype1, dtype2):
@@ -1918,241 +805,6 @@ def test_from_pandas(dtype):
     assert_eq(s, gs, check_dtype="pyarrow" not in dtype)
 
 
-@pytest.mark.parametrize("dtypes", [int, float])
-def test_from_records(dtypes):
-    h_ary = np.ndarray(shape=(10, 4), dtype=dtypes)
-    rec_ary = h_ary.view(np.recarray)
-
-    gdf = cudf.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
-    df = pd.DataFrame.from_records(rec_ary, columns=["a", "b", "c", "d"])
-    assert isinstance(gdf, cudf.DataFrame)
-    assert_eq(df, gdf)
-
-    gdf = cudf.DataFrame.from_records(rec_ary)
-    df = pd.DataFrame.from_records(rec_ary)
-    assert isinstance(gdf, cudf.DataFrame)
-    assert_eq(df, gdf)
-
-
-@pytest.mark.parametrize("columns", [None, ["first", "second", "third"]])
-@pytest.mark.parametrize(
-    "index",
-    [
-        None,
-        ["first", "second"],
-        "name",
-        "age",
-        "weight",
-        [10, 11],
-        ["abc", "xyz"],
-    ],
-)
-def test_from_records_index(columns, index):
-    rec_ary = np.array(
-        [("Rex", 9, 81.0), ("Fido", 3, 27.0)],
-        dtype=[("name", "U10"), ("age", "i4"), ("weight", "f4")],
-    )
-    gdf = cudf.DataFrame.from_records(rec_ary, columns=columns, index=index)
-    df = pd.DataFrame.from_records(rec_ary, columns=columns, index=index)
-    assert isinstance(gdf, cudf.DataFrame)
-    assert_eq(df, gdf)
-
-
-def test_dataframe_construction_from_cupy_arrays():
-    h_ary = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
-    d_ary = cupy.asarray(h_ary)
-
-    gdf = cudf.DataFrame(d_ary, columns=["a", "b", "c"])
-    df = pd.DataFrame(h_ary, columns=["a", "b", "c"])
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf)
-
-    gdf = cudf.DataFrame(d_ary)
-    df = pd.DataFrame(h_ary)
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf)
-
-    gdf = cudf.DataFrame(d_ary, index=["a", "b"])
-    df = pd.DataFrame(h_ary, index=["a", "b"])
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf)
-
-    gdf = cudf.DataFrame(d_ary)
-    gdf = gdf.set_index(keys=0, drop=False)
-    df = pd.DataFrame(h_ary)
-    df = df.set_index(keys=0, drop=False)
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf)
-
-    gdf = cudf.DataFrame(d_ary)
-    gdf = gdf.set_index(keys=1, drop=False)
-    df = pd.DataFrame(h_ary)
-    df = df.set_index(keys=1, drop=False)
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf)
-
-
-def test_dataframe_cupy_wrong_dimensions():
-    d_ary = cupy.empty((2, 3, 4), dtype=np.int32)
-    with pytest.raises(
-        ValueError, match="records dimension expected 1 or 2 but found: 3"
-    ):
-        cudf.DataFrame(d_ary)
-
-
-def test_dataframe_cupy_array_wrong_index():
-    d_ary = cupy.empty((2, 3), dtype=np.int32)
-
-    with pytest.raises(ValueError):
-        cudf.DataFrame(d_ary, index=["a"])
-
-    with pytest.raises(TypeError):
-        cudf.DataFrame(d_ary, index="a")
-
-
-def test_index_in_dataframe_constructor():
-    a = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
-    b = cudf.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
-
-    assert_eq(a, b)
-    assert_eq(a.loc[4:], b.loc[4:])
-
-
-dtypes = NUMERIC_TYPES + DATETIME_TYPES + ["bool"]
-
-
-@pytest.mark.parametrize("nelem", [0, 2])
-@pytest.mark.parametrize("data_type", dtypes)
-def test_from_arrow(nelem, data_type):
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        {
-            "a": rng.integers(0, 1000, nelem).astype(data_type),
-            "b": rng.integers(0, 1000, nelem).astype(data_type),
-        }
-    )
-    padf = pa.Table.from_pandas(
-        df, preserve_index=False
-    ).replace_schema_metadata(None)
-    gdf = cudf.DataFrame.from_arrow(padf)
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf)
-
-    s = pa.Array.from_pandas(df.a)
-    gs = cudf.Series.from_arrow(s)
-    assert isinstance(gs, cudf.Series)
-
-    # For some reason PyArrow to_pandas() converts to numpy array and has
-    # better type compatibility
-    np.testing.assert_array_equal(s.to_pandas(), gs.to_numpy())
-
-
-def test_from_arrow_chunked_categories():
-    # Verify that categories are properly deduplicated across chunked arrays.
-    indices = pa.array([0, 1, 0, 1, 2, 0, None, 2])
-    dictionary = pa.array(["foo", "bar", "baz"])
-    dict_array = pa.DictionaryArray.from_arrays(indices, dictionary)
-    chunked_array = pa.chunked_array([dict_array, dict_array])
-    table = pa.table({"a": chunked_array})
-    df = cudf.DataFrame.from_arrow(table)
-    final_dictionary = df["a"].dtype.categories.to_arrow().to_pylist()
-    assert sorted(final_dictionary) == sorted(dictionary.to_pylist())
-
-
-@pytest.mark.parametrize("nelem", [0, 2])
-@pytest.mark.parametrize("data_type", dtypes)
-def test_to_arrow(nelem, data_type):
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        {
-            "a": rng.integers(0, 1000, nelem).astype(data_type),
-            "b": rng.integers(0, 1000, nelem).astype(data_type),
-        }
-    )
-    gdf = cudf.DataFrame.from_pandas(df)
-
-    pa_df = pa.Table.from_pandas(
-        df, preserve_index=False
-    ).replace_schema_metadata(None)
-
-    pa_gdf = gdf.to_arrow(preserve_index=False).replace_schema_metadata(None)
-
-    assert isinstance(pa_gdf, pa.Table)
-    assert pa.Table.equals(pa_df, pa_gdf)
-
-    pa_s = pa.Array.from_pandas(df.a)
-    pa_gs = gdf["a"].to_arrow()
-
-    assert isinstance(pa_gs, pa.Array)
-    assert pa.Array.equals(pa_s, pa_gs)
-
-    pa_i = pa.Array.from_pandas(df.index)
-    pa_gi = gdf.index.to_arrow()
-
-    assert isinstance(pa_gi, pa.Array)
-    assert pa.Array.equals(pa_i, pa_gi)
-
-
-@pytest.mark.parametrize("data_type", dtypes)
-def test_to_from_arrow_nulls(data_type):
-    if data_type == "longlong":
-        data_type = "int64"
-    if data_type == "bool":
-        s1 = pa.array([True, None, False, None, True], type=data_type)
-    else:
-        dtype = np.dtype(data_type)
-        if dtype.type == np.datetime64:
-            time_unit, _ = np.datetime_data(dtype)
-            data_type = pa.timestamp(unit=time_unit)
-        s1 = pa.array([1, None, 3, None, 5], type=data_type)
-    gs1 = cudf.Series.from_arrow(s1)
-    assert isinstance(gs1, cudf.Series)
-    # We have 64B padded buffers for nulls whereas Arrow returns a minimal
-    # number of bytes, so only check the first byte in this case
-    np.testing.assert_array_equal(
-        np.asarray(s1.buffers()[0]).view("u1")[0],
-        gs1._column.mask_array_view(mode="read").copy_to_host().view("u1")[0],
-    )
-    assert pa.Array.equals(s1, gs1.to_arrow())
-
-    s2 = pa.array([None, None, None, None, None], type=data_type)
-    gs2 = cudf.Series.from_arrow(s2)
-    assert isinstance(gs2, cudf.Series)
-    # We have 64B padded buffers for nulls whereas Arrow returns a minimal
-    # number of bytes, so only check the first byte in this case
-    np.testing.assert_array_equal(
-        np.asarray(s2.buffers()[0]).view("u1")[0],
-        gs2._column.mask_array_view(mode="read").copy_to_host().view("u1")[0],
-    )
-    assert pa.Array.equals(s2, gs2.to_arrow())
-
-
-def test_to_arrow_categorical():
-    df = pd.DataFrame()
-    df["a"] = pd.Series(["a", "b", "c"], dtype="category")
-    gdf = cudf.DataFrame.from_pandas(df)
-
-    pa_df = pa.Table.from_pandas(
-        df, preserve_index=False
-    ).replace_schema_metadata(None)
-    pa_gdf = gdf.to_arrow(preserve_index=False).replace_schema_metadata(None)
-
-    assert isinstance(pa_gdf, pa.Table)
-    assert pa.Table.equals(pa_df, pa_gdf)
-
-    pa_s = pa.Array.from_pandas(df.a)
-    pa_gs = gdf["a"].to_arrow()
-
-    assert isinstance(pa_gs, pa.Array)
-    assert pa.Array.equals(pa_s, pa_gs)
-
-
 def test_from_arrow_missing_categorical():
     pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
     pa_cat = pa.array(pd_cat, from_pandas=True)
@@ -2174,26 +826,6 @@ def test_to_arrow_missing_categorical():
     assert pa.Array.equals(pa_cat, gd_cat.to_arrow())
 
 
-@pytest.mark.parametrize("data_type", dtypes)
-def test_from_scalar_typing(data_type):
-    rng = np.random.default_rng(seed=0)
-    if data_type == "datetime64[ms]":
-        scalar = (
-            np.dtype("int64").type(rng.integers(0, 5)).astype("datetime64[ms]")
-        )
-    elif data_type.startswith("datetime64"):
-        scalar = np.datetime64(datetime.date.today()).astype("datetime64[ms]")
-        data_type = "datetime64[ms]"
-    else:
-        scalar = np.dtype(data_type).type(rng.integers(0, 5))
-
-    gdf = cudf.DataFrame()
-    gdf["a"] = [1, 2, 3, 4, 5]
-    gdf["b"] = scalar
-    assert gdf["b"].dtype == np.dtype(data_type)
-    assert len(gdf["b"]) == len(gdf["a"])
-
-
 @pytest.mark.parametrize("data_type", NUMERIC_TYPES)
 def test_from_python_array(data_type):
     rng = np.random.default_rng(seed=0)
@@ -2234,80 +866,6 @@ def test_dataframe_shape_empty():
     assert pdf.shape == gdf.shape
 
 
-@pytest.mark.parametrize("num_cols", [1, 3])
-@pytest.mark.parametrize("num_rows", [1, 4])
-@pytest.mark.parametrize("dtype", [*dtypes, "object"])
-@pytest.mark.parametrize("nulls", ["none", "some", "all"])
-def test_dataframe_transpose(nulls, num_cols, num_rows, dtype):
-    # In case of `bool` dtype: pandas <= 1.2.5 type-casts
-    # a boolean series to `float64` series if a `np.nan` is assigned to it:
-    # >>> s = pd.Series([True, False, True])
-    # >>> s
-    # 0     True
-    # 1    False
-    # 2     True
-    # dtype: bool
-    # >>> s[[2]] = np.nan
-    # >>> s
-    # 0    1.0
-    # 1    0.0
-    # 2    NaN
-    # dtype: float64
-    # In pandas >= 1.3.2 this behavior is fixed:
-    # >>> s = pd.Series([True, False, True])
-    # >>> s
-    # 0
-    # True
-    # 1
-    # False
-    # 2
-    # True
-    # dtype: bool
-    # >>> s[[2]] = np.nan
-    # >>> s
-    # 0
-    # True
-    # 1
-    # False
-    # 2
-    # NaN
-    # dtype: object
-    # In cudf we change `object` dtype to `str` type - for which there
-    # is no transpose implemented yet. Hence we need to test transpose
-    # against pandas nullable types as they are the ones that closely
-    # resemble `cudf` dtypes behavior.
-    pdf = pd.DataFrame()
-    rng = np.random.default_rng(seed=0)
-    null_rep = np.nan if dtype in ["float32", "float64"] else None
-    np_dtype = dtype
-    dtype = np.dtype(dtype)
-    dtype = cudf.utils.dtypes.np_dtypes_to_pandas_dtypes.get(dtype, dtype)
-    for i in range(num_cols):
-        colname = string.ascii_lowercase[i]
-        data = pd.Series(
-            rng.integers(0, 26, num_rows).astype(np_dtype),
-            dtype=dtype,
-        )
-        if nulls == "some":
-            idx = rng.choice(num_rows, size=int(num_rows / 2), replace=False)
-            if len(idx):
-                data[idx] = null_rep
-        elif nulls == "all":
-            data[:] = null_rep
-        pdf[colname] = data
-
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    got_function = gdf.transpose()
-    got_property = gdf.T
-
-    expect = pdf.transpose()
-    nullable = dtype not in DATETIME_TYPES
-
-    assert_eq(expect, got_function.to_pandas(nullable=nullable))
-    assert_eq(expect, got_property.to_pandas(nullable=nullable))
-
-
 @pytest.mark.parametrize("num_cols", [1, 3])
 @pytest.mark.parametrize("num_rows", [1, 5])
 def test_dataframe_transpose_category(num_cols, num_rows):
@@ -2701,1488 +1259,176 @@ def test_unaryops_df(pdf, unaryop, col_name, assign_col_name):
     if assign_col_name:
         pd_df.columns.name = col_name
     gdf = cudf.from_pandas(pd_df)
-    d = unaryop(pd_df - 5)
-    g = unaryop(gdf - 5)
-    assert_eq(d, g)
-
-
-def test_df_abs(pdf):
-    rng = np.random.default_rng(seed=0)
-    disturbance = pd.Series(rng.random(10))
-    pdf = pdf - 5 + disturbance
-    d = pdf.apply(np.abs)
-    g = cudf.from_pandas(pdf).abs()
-    assert_eq(d, g)
-
-
-def test_scale_df(gdf):
-    got = (gdf - 5).scale()
-    expect = cudf.DataFrame(
-        {"x": np.linspace(0.0, 1.0, 10), "y": np.linspace(0.0, 1.0, 10)}
-    )
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda df: df.empty,
-        lambda df: df.x.empty,
-        lambda df: df.x.fillna(123, limit=None, method=None, axis=None),
-        lambda df: df.drop("x", axis=1, errors="raise"),
-    ],
-)
-def test_unary_operators(func, pdf, gdf):
-    p = func(pdf)
-    g = func(gdf)
-    assert_eq(p, g)
-
-
-def test_is_monotonic(gdf):
-    pdf = pd.DataFrame({"x": [1, 2, 3]}, index=[3, 1, 2])
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    assert not gdf.index.is_monotonic_increasing
-    assert not gdf.index.is_monotonic_decreasing
-
-
-def test_iter(pdf, gdf):
-    assert list(pdf) == list(gdf)
-
-
-def test_iteritems(gdf):
-    for k, v in gdf.items():
-        assert k in gdf.columns
-        assert isinstance(v, cudf.Series)
-        assert_eq(v, gdf[k])
-
-
-@pytest.mark.parametrize("q", [0.5, 1, 0.001, [0.5], [], [0.005, 0.5, 1]])
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_quantile(q, numeric_only):
-    ts = pd.date_range("2018-08-24", periods=5, freq="D")
-    td = pd.to_timedelta(np.arange(5), unit="h")
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {"date": ts, "delta": td, "val": rng.standard_normal(len(ts))}
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    assert_eq(pdf["date"].quantile(q), gdf["date"].quantile(q))
-    assert_eq(pdf["delta"].quantile(q), gdf["delta"].quantile(q))
-    assert_eq(pdf["val"].quantile(q), gdf["val"].quantile(q))
-
-    q = q if isinstance(q, list) else [q]
-    assert_eq(
-        pdf.quantile(q, numeric_only=numeric_only),
-        gdf.quantile(q, numeric_only=numeric_only),
-    )
-
-
-@pytest.mark.parametrize("q", [0.2, 1, 0.001, [0.5], [], [0.005, 0.8, 0.03]])
-@pytest.mark.parametrize("interpolation", ["higher", "lower", "nearest"])
-@pytest.mark.parametrize(
-    "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
-)
-def test_decimal_quantile(q, interpolation, decimal_type):
-    rng = np.random.default_rng(seed=0)
-    data = ["244.8", "32.24", "2.22", "98.14", "453.23", "5.45"]
-    gdf = cudf.DataFrame(
-        {"id": rng.integers(0, 10, size=len(data)), "val": data}
-    )
-    gdf["id"] = gdf["id"].astype("float64")
-    gdf["val"] = gdf["val"].astype(decimal_type(7, 2))
-    pdf = gdf.to_pandas()
-
-    got = gdf.quantile(q, numeric_only=False, interpolation=interpolation)
-    expected = pdf.quantile(
-        q if isinstance(q, list) else [q],
-        numeric_only=False,
-        interpolation=interpolation,
-    )
-
-    assert_eq(got, expected)
-
-
-def test_empty_quantile():
-    pdf = pd.DataFrame({"x": []}, dtype="float64")
-    df = cudf.DataFrame({"x": []}, dtype="float64")
-
-    actual = df.quantile()
-    expected = pdf.quantile()
-
-    assert_eq(actual, expected)
-
-
-def test_from_pandas_function(pdf):
-    gdf = cudf.from_pandas(pdf)
-    assert isinstance(gdf, cudf.DataFrame)
-    assert_eq(pdf, gdf)
-
-    gdf = cudf.from_pandas(pdf.x)
-    assert isinstance(gdf, cudf.Series)
-    assert_eq(pdf.x, gdf)
-
-    with pytest.raises(TypeError):
-        cudf.from_pandas(123)
-
-
-@pytest.mark.parametrize("preserve_index", [True, False])
-def test_arrow_pandas_compat(pdf, gdf, preserve_index):
-    pdf["z"] = range(10)
-    pdf = pdf.set_index("z")
-    gdf["z"] = range(10)
-    gdf = gdf.set_index("z")
-
-    pdf_arrow_table = pa.Table.from_pandas(pdf, preserve_index=preserve_index)
-    gdf_arrow_table = gdf.to_arrow(preserve_index=preserve_index)
-
-    assert pa.Table.equals(pdf_arrow_table, gdf_arrow_table)
-
-    gdf2 = cudf.DataFrame.from_arrow(pdf_arrow_table)
-    pdf2 = pdf_arrow_table.to_pandas()
-
-    assert_eq(pdf2, gdf2)
-    pdf.columns.name = "abc"
-    pdf_arrow_table = pa.Table.from_pandas(pdf, preserve_index=preserve_index)
-
-    gdf2 = cudf.DataFrame.from_arrow(pdf_arrow_table)
-    pdf2 = pdf_arrow_table.to_pandas()
-    assert_eq(pdf2, gdf2)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        None,
-        cudf.RangeIndex(3, name="a"),
-        "a",
-        "b",
-        ["a", "b"],
-        cudf.RangeIndex(0, 5, 2, name="a"),
-    ],
-)
-@pytest.mark.parametrize("preserve_index", [True, False, None])
-def test_arrow_round_trip(preserve_index, index):
-    data = {"a": [4, 5, 6], "b": ["cat", "dog", "bird"]}
-    if isinstance(index, (list, str)):
-        gdf = cudf.DataFrame(data).set_index(index)
-    else:
-        gdf = cudf.DataFrame(data, index=index)
-
-    table = gdf.to_arrow(preserve_index=preserve_index)
-    table_pd = pa.Table.from_pandas(
-        gdf.to_pandas(), preserve_index=preserve_index
-    )
-
-    gdf_out = cudf.DataFrame.from_arrow(table)
-    pdf_out = table_pd.to_pandas()
-
-    assert_eq(gdf_out, pdf_out)
-
-
-@pytest.mark.parametrize("dtype", [*NUMERIC_TYPES, "bool"])
-def test_cuda_array_interface(dtype):
-    np_data = np.arange(10).astype(dtype)
-    cupy_data = cupy.array(np_data)
-    pd_data = pd.Series(np_data)
-
-    cudf_data = cudf.Series(cupy_data)
-    assert_eq(pd_data, cudf_data)
-
-    gdf = cudf.DataFrame()
-    gdf["test"] = cupy_data
-    pd_data.name = "test"
-    assert_eq(pd_data, gdf["test"])
-
-
-@pytest.mark.parametrize("nelem", [0, 10])
-@pytest.mark.parametrize("nchunks", [1, 5])
-@pytest.mark.parametrize("data_type", dtypes)
-def test_from_arrow_chunked_arrays(nelem, nchunks, data_type):
-    rng = np.random.default_rng(seed=0)
-    np_list_data = [
-        rng.integers(0, 100, nelem).astype(data_type) for i in range(nchunks)
-    ]
-    pa_chunk_array = pa.chunked_array(np_list_data)
-
-    expect = pa_chunk_array.to_pandas()
-    got = cudf.Series(pa_chunk_array)
-
-    assert_eq(expect, got)
-
-    np_list_data2 = [
-        rng.integers(0, 100, nelem).astype(data_type) for i in range(nchunks)
-    ]
-    pa_chunk_array2 = pa.chunked_array(np_list_data2)
-    pa_table = pa.Table.from_arrays(
-        [pa_chunk_array, pa_chunk_array2], names=["a", "b"]
-    )
-
-    expect = pa_table.to_pandas()
-    got = cudf.DataFrame.from_arrow(pa_table)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.skip(reason="Test was designed to be run in isolation")
-def test_gpu_memory_usage_with_boolmask():
-    ctx = cuda.current_context()
-
-    def query_GPU_memory(note=""):
-        memInfo = ctx.get_memory_info()
-        usedMemoryGB = (memInfo.total - memInfo.free) / 1e9
-        return usedMemoryGB
-
-    cuda.current_context().deallocations.clear()
-    nRows = int(1e8)
-    nCols = 2
-    rng = np.random.default_rng(seed=0)
-    dataNumpy = np.asfortranarray(rng.random(nRows, nCols))
-    colNames = ["col" + str(iCol) for iCol in range(nCols)]
-    pandasDF = pd.DataFrame(data=dataNumpy, columns=colNames, dtype=np.float32)
-    cudaDF = cudf.core.DataFrame.from_pandas(pandasDF)
-    rng = np.random.default_rng(seed=0)
-    boolmask = cudf.Series(rng.integers(1, 2, len(cudaDF)).astype("bool"))
-
-    memory_used = query_GPU_memory()
-    cudaDF = cudaDF[boolmask]
-
-    assert (
-        cudaDF.index._column.data_array_view(mode="read").device_ctypes_pointer
-        == cudaDF["col0"].index._column.data_array_view.device_ctypes_pointer
-    )
-    assert (
-        cudaDF.index._column.data_array_view(mode="read").device_ctypes_pointer
-        == cudaDF["col1"].index._column.data_array_view.device_ctypes_pointer
-    )
-
-    assert memory_used == query_GPU_memory()
-
-
-def test_boolmask(pdf, gdf):
-    rng = np.random.default_rng(seed=0)
-    boolmask = rng.integers(0, 2, len(pdf)) > 0
-    gdf = gdf[boolmask]
-    pdf = pdf[boolmask]
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "mask_shape",
-    [
-        (2, "ab"),
-        (2, "abc"),
-        (3, "ab"),
-        (3, "abc"),
-        (3, "abcd"),
-        (4, "abc"),
-        (4, "abcd"),
-    ],
-)
-def test_dataframe_boolmask(mask_shape):
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame({col: rng.integers(0, 10, 3) for col in "abc"})
-    pdf_mask = pd.DataFrame(
-        {col: rng.integers(0, 2, mask_shape[0]) > 0 for col in mask_shape[1]}
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    gdf_mask = cudf.DataFrame.from_pandas(pdf_mask)
-    gdf = gdf[gdf_mask]
-    pdf = pdf[pdf_mask]
-
-    assert np.array_equal(gdf.columns, pdf.columns)
-    for col in gdf.columns:
-        assert np.array_equal(
-            gdf[col].fillna(-1).to_pandas().values, pdf[col].fillna(-1).values
-        )
-
-
-@pytest.mark.parametrize(
-    "box",
-    [
-        list,
-        pytest.param(
-            cudf.Series,
-            marks=pytest_xfail(
-                reason="Pandas can't index a multiindex with a Series"
-            ),
-        ),
-    ],
-)
-def test_dataframe_multiindex_boolmask(box):
-    mask = box([True, False, True])
-    gdf = cudf.DataFrame(
-        {"w": [3, 2, 1], "x": [1, 2, 3], "y": [0, 1, 0], "z": [1, 1, 1]}
-    )
-    gdg = gdf.groupby(["w", "x"]).count()
-    pdg = gdg.to_pandas()
-    assert_eq(gdg[mask], pdg[mask])
-
-
-def test_dataframe_assignment():
-    pdf = pd.DataFrame()
-    for col in "abc":
-        pdf[col] = np.array([0, 1, 1, -2, 10])
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    gdf[gdf < 0] = 999
-    pdf[pdf < 0] = 999
-    assert_eq(gdf, pdf)
-
-
-def test_1row_arrow_table():
-    data = [pa.array([0]), pa.array([1])]
-    batch = pa.RecordBatch.from_arrays(data, ["f0", "f1"])
-    table = pa.Table.from_batches([batch])
-
-    expect = table.to_pandas()
-    got = cudf.DataFrame.from_arrow(table)
-    assert_eq(expect, got)
-
-
-def test_arrow_handle_no_index_name(pdf, gdf):
-    gdf_arrow = gdf.to_arrow()
-    pdf_arrow = pa.Table.from_pandas(pdf)
-    assert pa.Table.equals(pdf_arrow, gdf_arrow)
-
-    got = cudf.DataFrame.from_arrow(gdf_arrow)
-    expect = pdf_arrow.to_pandas()
-    assert_eq(expect, got)
-
-
-def test_pandas_non_contiguious():
-    rng = np.random.default_rng(seed=0)
-    arr1 = rng.random(size=(5000, 10))
-    assert arr1.flags["C_CONTIGUOUS"] is True
-    df = pd.DataFrame(arr1)
-    for col in df.columns:
-        assert df[col].values.flags["C_CONTIGUOUS"] is False
-
-    gdf = cudf.DataFrame.from_pandas(df)
-    assert_eq(gdf.to_pandas(), df)
-
-
-@pytest.mark.parametrize("num_elements", [0, 10])
-@pytest.mark.parametrize("null_type", [np.nan, None, "mixed"])
-def test_series_all_null(num_elements, null_type):
-    if null_type == "mixed":
-        data = []
-        data1 = [np.nan] * int(num_elements / 2)
-        data2 = [None] * int(num_elements / 2)
-        for idx in range(len(data1)):
-            data.append(data1[idx])
-            data.append(data2[idx])
-    else:
-        data = [null_type] * num_elements
-
-    # Typecast Pandas because None will return `object` dtype
-    expect = pd.Series(data, dtype="float64")
-    got = cudf.Series(data, dtype="float64")
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("num_elements", [0, 10])
-def test_series_all_valid_nan(num_elements):
-    data = [np.nan] * num_elements
-    sr = cudf.Series(data, nan_as_null=False)
-    np.testing.assert_equal(sr.null_count, 0)
-
-
-def test_series_rename():
-    pds = pd.Series([1, 2, 3], name="asdf")
-    gds = cudf.Series([1, 2, 3], name="asdf")
-
-    expect = pds.rename("new_name")
-    got = gds.rename("new_name")
-
-    assert_eq(expect, got)
-
-    pds = pd.Series(expect)
-    gds = cudf.Series(got)
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series(expect, name="name name")
-    gds = cudf.Series(got, name="name name")
-
-    assert_eq(pds, gds)
-
-
-@pytest.mark.parametrize("data_type", dtypes)
-@pytest.mark.parametrize("nelem", [0, 100])
-def test_head_tail(nelem, data_type):
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "a": rng.integers(0, 1000, nelem).astype(data_type),
-            "b": rng.integers(0, 1000, nelem).astype(data_type),
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(gdf.head(), pdf.head())
-    assert_eq(gdf.head(3), pdf.head(3))
-    assert_eq(gdf.head(-2), pdf.head(-2))
-    assert_eq(gdf.head(0), pdf.head(0))
-
-    assert_eq(gdf["a"].head(), pdf["a"].head())
-    assert_eq(gdf["a"].head(3), pdf["a"].head(3))
-    assert_eq(gdf["a"].head(-2), pdf["a"].head(-2))
-
-    assert_eq(gdf.tail(), pdf.tail())
-    assert_eq(gdf.tail(3), pdf.tail(3))
-    assert_eq(gdf.tail(-2), pdf.tail(-2))
-    assert_eq(gdf.tail(0), pdf.tail(0))
-
-    assert_eq(gdf["a"].tail(), pdf["a"].tail())
-    assert_eq(gdf["a"].tail(3), pdf["a"].tail(3))
-    assert_eq(gdf["a"].tail(-2), pdf["a"].tail(-2))
-
-
-def test_tail_for_string():
-    gdf = cudf.DataFrame()
-    gdf["id"] = cudf.Series(["a", "b"], dtype=np.object_)
-    gdf["v"] = cudf.Series([1, 2])
-    assert_eq(gdf.tail(3), gdf.to_pandas().tail(3))
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]])
-@pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize(
-    "column_names",
-    [
-        ["v0", "v1"],
-        ["v0", "index"],
-        pd.MultiIndex.from_tuples([("x0", "x1"), ("y0", "y1")]),
-        pd.MultiIndex.from_tuples([(1, 2), (10, 11)], names=["ABC", "DEF"]),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-@pytest.mark.parametrize("col_level", [0, 1])
-@pytest.mark.parametrize("col_fill", ["", "some_lv"])
-def test_reset_index(level, drop, column_names, inplace, col_level, col_fill):
-    midx = pd.MultiIndex.from_tuples(
-        [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=["l0", None]
-    )
-    pdf = pd.DataFrame(
-        [[1, 2], [3, 4], [5, 6], [7, 8]], index=midx, columns=column_names
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.reset_index(
-        level=level,
-        drop=drop,
-        inplace=inplace,
-        col_level=col_level,
-        col_fill=col_fill,
-    )
-    got = gdf.reset_index(
-        level=level,
-        drop=drop,
-        inplace=inplace,
-        col_level=col_level,
-        col_fill=col_fill,
-    )
-    if inplace:
-        expect = pdf
-        got = gdf
-
-    assert_eq(expect, got)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize("level", [None, 0, 1, [None]])
-@pytest.mark.parametrize("drop", [False, True])
-@pytest.mark.parametrize("inplace", [False, True])
-@pytest.mark.parametrize("col_level", [0, 1])
-@pytest.mark.parametrize("col_fill", ["", "some_lv"])
-def test_reset_index_dup_level_name(level, drop, inplace, col_level, col_fill):
-    # midx levels are named [None, None]
-    midx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
-    pdf = pd.DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=midx)
-    gdf = cudf.from_pandas(pdf)
-    if level == [None]:
-        assert_exceptions_equal(
-            lfunc=pdf.reset_index,
-            rfunc=gdf.reset_index,
-            lfunc_args_and_kwargs=(
-                [],
-                {"level": level, "drop": drop, "inplace": inplace},
-            ),
-            rfunc_args_and_kwargs=(
-                [],
-                {"level": level, "drop": drop, "inplace": inplace},
-            ),
-        )
-        return
-
-    expect = pdf.reset_index(
-        level=level,
-        drop=drop,
-        inplace=inplace,
-        col_level=col_level,
-        col_fill=col_fill,
-    )
-    got = gdf.reset_index(
-        level=level,
-        drop=drop,
-        inplace=inplace,
-        col_level=col_level,
-        col_fill=col_fill,
-    )
-    if inplace:
-        expect = pdf
-        got = gdf
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize("inplace", [False, True])
-@pytest.mark.parametrize("col_level", [0, 1])
-@pytest.mark.parametrize("col_fill", ["", "some_lv"])
-def test_reset_index_named(pdf, gdf, drop, inplace, col_level, col_fill):
-    pdf.index.name = "cudf"
-    gdf.index.name = "cudf"
-
-    expect = pdf.reset_index(
-        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
-    )
-    got = gdf.reset_index(
-        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
-    )
-    if inplace:
-        expect = pdf
-        got = gdf
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize("inplace", [False, True])
-@pytest.mark.parametrize("column_names", [["x", "y"], ["index", "y"]])
-@pytest.mark.parametrize("col_level", [0, 1])
-@pytest.mark.parametrize("col_fill", ["", "some_lv"])
-def test_reset_index_unnamed(
-    pdf, gdf, drop, inplace, column_names, col_level, col_fill
-):
-    pdf.columns = column_names
-    gdf.columns = column_names
-
-    expect = pdf.reset_index(
-        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
-    )
-    got = gdf.reset_index(
-        drop=drop, inplace=inplace, col_level=col_level, col_fill=col_fill
-    )
-    if inplace:
-        expect = pdf
-        got = gdf
-    assert_eq(expect, got)
-
-
-def test_reset_index_invalid_level():
-    with pytest.raises(IndexError):
-        cudf.DataFrame([1]).reset_index(level=2)
-
-    with pytest.raises(IndexError):
-        pd.DataFrame([1]).reset_index(level=2)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        "a",
-        ["a", "b"],
-        pd.CategoricalIndex(["I", "II", "III", "IV", "V"]),
-        pd.Series(["h", "i", "k", "l", "m"]),
-        ["b", pd.Index(["I", "II", "III", "IV", "V"])],
-        ["c", [11, 12, 13, 14, 15]],
-        pd.MultiIndex(
-            levels=[
-                ["I", "II", "III", "IV", "V"],
-                ["one", "two", "three", "four", "five"],
-            ],
-            codes=[[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]],
-            names=["col1", "col2"],
-        ),
-        pd.RangeIndex(0, 5),  # corner case
-        [pd.Series(["h", "i", "k", "l", "m"]), pd.RangeIndex(0, 5)],
-        [
-            pd.MultiIndex(
-                levels=[
-                    ["I", "II", "III", "IV", "V"],
-                    ["one", "two", "three", "four", "five"],
-                ],
-                codes=[[0, 1, 2, 3, 4], [4, 3, 2, 1, 0]],
-                names=["col1", "col2"],
-            ),
-            pd.RangeIndex(0, 5),
-        ],
-    ],
-)
-@pytest.mark.parametrize("drop", [True, False])
-@pytest.mark.parametrize("append", [True, False])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_set_index(index, drop, append, inplace):
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5],
-            "b": ["a", "b", "c", "d", "e"],
-            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
-        }
-    )
-    pdf = gdf.to_pandas()
-
-    expected = pdf.set_index(index, inplace=inplace, drop=drop, append=append)
-    actual = gdf.set_index(index, inplace=inplace, drop=drop, append=append)
-
-    if inplace:
-        expected = pdf
-        actual = gdf
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])])
-def test_set_index_verify_integrity(index):
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 1, 2, 2, 5],
-            "b": ["a", "b", "c", "d", "e"],
-            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
-        }
-    )
-    with pytest.raises(ValueError):
-        gdf.set_index(index, verify_integrity=True)
-
-
-@pytest.mark.parametrize("drop", [True, False])
-def test_set_index_multi(drop):
-    nelem = 10
-    rng = np.random.default_rng(seed=0)
-    a = np.arange(nelem)
-    rng.shuffle(a)
-    df = pd.DataFrame(
-        {
-            "a": a,
-            "b": rng.integers(0, 4, size=nelem),
-            "c": rng.uniform(low=0, high=4, size=nelem),
-            "d": rng.choice(["green", "black", "white"], nelem),
-        }
-    )
-    df["e"] = df["d"].astype("category")
-    gdf = cudf.DataFrame.from_pandas(df)
-
-    assert_eq(gdf.set_index("a", drop=drop), gdf.set_index(["a"], drop=drop))
-    assert_eq(
-        df.set_index(["b", "c"], drop=drop),
-        gdf.set_index(["b", "c"], drop=drop),
-    )
-    assert_eq(
-        df.set_index(["d", "b"], drop=drop),
-        gdf.set_index(["d", "b"], drop=drop),
-    )
-    assert_eq(
-        df.set_index(["b", "d", "e"], drop=drop),
-        gdf.set_index(["b", "d", "e"], drop=drop),
-    )
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize("copy", [True, False])
-@pytest.mark.parametrize(
-    "args,gd_kwargs",
-    [
-        ([], {}),
-        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
-        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
-        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {"axis": 0}),
-        ([["a", "b", "c", "d", "e"]], {"axis": 1}),
-        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": 0}),
-        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": 1}),
-        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": "index"}),
-        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": "columns"}),
-        ([], {"index": [-3, 0, 3, 0, -2, 1, 3, 4, 6]}),
-        ([], {"columns": ["a", "b", "c", "d", "e"]}),
-        (
-            [],
-            {
-                "index": [-3, 0, 3, 0, -2, 1, 3, 4, 6],
-                "columns": ["a", "b", "c", "d", "e"],
-            },
-        ),
-    ],
-)
-def test_dataframe_reindex(copy, args, gd_kwargs):
-    reindex_data = cudf.datasets.randomdata(
-        nrows=6,
-        dtypes={
-            "a": "category",
-            "c": float,
-            "d": str,
-        },
-    )
-    pdf, gdf = reindex_data.to_pandas(), reindex_data
-
-    gd_kwargs["copy"] = copy
-    pd_kwargs = gd_kwargs.copy()
-    pd_kwargs["copy"] = True
-    assert_eq(pdf.reindex(*args, **pd_kwargs), gdf.reindex(*args, **gd_kwargs))
-
-
-@pytest.mark.parametrize("fill_value", [-1.0, 0.0, 1.5])
-@pytest.mark.parametrize(
-    "args,kwargs",
-    [
-        ([], {}),
-        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
-        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {}),
-        ([[-3, 0, 3, 0, -2, 1, 3, 4, 6]], {"axis": 0}),
-        ([["a", "b", "c", "d", "e"]], {"axis": 1}),
-        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": 0}),
-        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": 1}),
-        ([], {"labels": [-3, 0, 3, 0, -2, 1, 3, 4, 6], "axis": "index"}),
-        ([], {"labels": ["a", "b", "c", "d", "e"], "axis": "columns"}),
-        ([], {"index": [-3, 0, 3, 0, -2, 1, 3, 4, 6]}),
-        ([], {"columns": ["a", "b", "c", "d", "e"]}),
-        (
-            [],
-            {
-                "index": [-3, 0, 3, 0, -2, 1, 3, 4, 6],
-                "columns": ["a", "b", "c", "d", "e"],
-            },
-        ),
-    ],
-)
-def test_dataframe_reindex_fill_value(args, kwargs, fill_value):
-    reindex_data_numeric = cudf.datasets.randomdata(
-        nrows=6,
-        dtypes={"a": float, "b": float, "c": float},
-    )
-    pdf, gdf = reindex_data_numeric.to_pandas(), reindex_data_numeric
-    kwargs["fill_value"] = fill_value
-    assert_eq(pdf.reindex(*args, **kwargs), gdf.reindex(*args, **kwargs))
-
-
-@pytest.mark.parametrize("copy", [True, False])
-def test_dataframe_reindex_change_dtype(copy):
-    index = pd.date_range("12/29/2009", periods=10, freq="D")
-    columns = ["a", "b", "c", "d", "e"]
-    gdf = cudf.datasets.randomdata(
-        nrows=6, dtypes={"a": "category", "c": float, "d": str}
-    )
-    pdf = gdf.to_pandas()
-    # Validate reindexes both labels and column names when
-    # index=index_labels and columns=column_labels
-    assert_eq(
-        pdf.reindex(index=index, columns=columns, copy=True),
-        gdf.reindex(index=index, columns=columns, copy=copy),
-        check_freq=False,
-    )
-
-
-@pytest.mark.parametrize("copy", [True, False])
-def test_series_categorical_reindex(copy):
-    index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"a": "category"})
-    pdf = gdf.to_pandas()
-    assert_eq(pdf["a"].reindex(copy=True), gdf["a"].reindex(copy=copy))
-    assert_eq(
-        pdf["a"].reindex(index, copy=True), gdf["a"].reindex(index, copy=copy)
-    )
-    assert_eq(
-        pdf["a"].reindex(index=index, copy=True),
-        gdf["a"].reindex(index=index, copy=copy),
-    )
-
-
-@pytest.mark.parametrize("copy", [True, False])
-def test_series_float_reindex(copy):
-    index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"c": float})
-    pdf = gdf.to_pandas()
-    assert_eq(pdf["c"].reindex(copy=True), gdf["c"].reindex(copy=copy))
-    assert_eq(
-        pdf["c"].reindex(index, copy=True), gdf["c"].reindex(index, copy=copy)
-    )
-    assert_eq(
-        pdf["c"].reindex(index=index, copy=True),
-        gdf["c"].reindex(index=index, copy=copy),
-    )
-
-
-@pytest.mark.parametrize("copy", [True, False])
-def test_series_string_reindex(copy):
-    index = [-3, 0, 3, 0, -2, 1, 3, 4, 6]
-    gdf = cudf.datasets.randomdata(nrows=6, dtypes={"d": str})
-    pdf = gdf.to_pandas()
-    assert_eq(pdf["d"].reindex(copy=True), gdf["d"].reindex(copy=copy))
-    assert_eq(
-        pdf["d"].reindex(index, copy=True), gdf["d"].reindex(index, copy=copy)
-    )
-    assert_eq(
-        pdf["d"].reindex(index=index, copy=True),
-        gdf["d"].reindex(index=index, copy=copy),
-    )
-
-
-@pytest.mark.parametrize("names", [None, ["a", "b"]])
-@pytest.mark.parametrize("klass", [cudf.MultiIndex, pd.MultiIndex])
-def test_reindex_multiindex_col_to_multiindex(names, klass):
-    idx = pd.Index(
-        [("A", "one"), ("A", "two")],
-        dtype="object",
-    )
-    df = pd.DataFrame([[1, 2]], columns=idx)
-    gdf = cudf.from_pandas(df)
-    midx = klass.from_tuples([("A", "one"), ("A", "three")], names=names)
-    result = gdf.reindex(columns=midx)
-    expected = cudf.DataFrame([[1, None]], columns=midx)
-    # (pandas2.0): check_dtype=False won't be needed
-    # as None col will return object instead of float
-    assert_eq(result, expected, check_dtype=False)
-
-
-@pytest.mark.parametrize("names", [None, ["a", "b"]])
-@pytest.mark.parametrize("klass", [cudf.MultiIndex, pd.MultiIndex])
-def test_reindex_tuple_col_to_multiindex(names, klass):
-    idx = pd.Index(
-        [("A", "one"), ("A", "two")], dtype="object", tupleize_cols=False
-    )
-    df = pd.DataFrame([[1, 2]], columns=idx)
-    gdf = cudf.from_pandas(df)
-    midx = klass.from_tuples([("A", "one"), ("A", "two")], names=names)
-    result = gdf.reindex(columns=midx)
-    expected = cudf.DataFrame([[1, 2]], columns=midx)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("name", [None, "foo"])
-@pytest.mark.parametrize("klass", [range, cudf.RangeIndex, pd.RangeIndex])
-def test_reindex_columns_rangeindex_keeps_rangeindex(name, klass):
-    new_columns = klass(3)
-    exp_name = None
-    if klass is not range:
-        new_columns.name = name
-        exp_name = name
-    df = cudf.DataFrame([[1, 2]])
-    result = df.reindex(columns=new_columns).columns
-    expected = pd.RangeIndex(3, name=exp_name)
-    assert_eq(result, expected)
-
-
-def test_to_frame(pdf, gdf):
-    assert_eq(pdf.x.to_frame(), gdf.x.to_frame())
-
-    name = "foo"
-    gdf_new_name = gdf.x.to_frame(name=name)
-    pdf_new_name = pdf.x.to_frame(name=name)
-    assert_eq(pdf.x.to_frame(), gdf.x.to_frame())
-
-    name = False
-    gdf_new_name = gdf.x.to_frame(name=name)
-    pdf_new_name = pdf.x.to_frame(name=name)
-    assert_eq(gdf_new_name, pdf_new_name)
-    assert gdf_new_name.columns[0] == name
-
-
-def test_dataframe_empty_sort_index():
-    pdf = pd.DataFrame({"x": []})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    expect = pdf.sort_index()
-    got = gdf.sort_index()
-
-    assert_eq(expect, got, check_index_type=True)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "index",
-    [
-        pd.RangeIndex(0, 3, 1),
-        [3.0, 1.0, np.nan],
-        # Test for single column MultiIndex
-        pd.MultiIndex.from_arrays(
-            [
-                [2, 0, 1],
-            ]
-        ),
-        pd.RangeIndex(2, -1, -1),
-    ],
-)
-@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize("inplace", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-def test_dataframe_sort_index(
-    request, index, axis, ascending, inplace, ignore_index, na_position
-):
-    if not PANDAS_GE_220 and axis in (1, "columns") and ignore_index:
-        pytest.skip(reason="Bug fixed in pandas-2.2")
-
-    pdf = pd.DataFrame(
-        {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
-        index=index,
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    expected = pdf.sort_index(
-        axis=axis,
-        ascending=ascending,
-        ignore_index=ignore_index,
-        inplace=inplace,
-        na_position=na_position,
-    )
-    got = gdf.sort_index(
-        axis=axis,
-        ascending=ascending,
-        ignore_index=ignore_index,
-        inplace=inplace,
-        na_position=na_position,
-    )
-
-    if inplace is True:
-        assert_eq(pdf, gdf, check_index_type=True)
-    else:
-        assert_eq(expected, got, check_index_type=True)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
-@pytest.mark.parametrize(
-    "level",
-    [
-        0,
-        "b",
-        1,
-        ["b"],
-        "a",
-        ["a", "b"],
-        ["b", "a"],
-        [0, 1],
-        [1, 0],
-        [0, 2],
-        None,
-    ],
-)
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize("inplace", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_dataframe_mulitindex_sort_index(
-    request, axis, level, ascending, inplace, ignore_index, na_position
-):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=axis in (1, "columns")
-            and level is None
-            and not ascending
-            and ignore_index,
-            reason="https://github.com/pandas-dev/pandas/issues/57293",
-        )
-    )
-    pdf = pd.DataFrame(
-        {
-            "b": [1.0, 3.0, np.nan],
-            "a": [1, 4, 3],
-            1: ["a", "b", "c"],
-            "e": [3, 1, 4],
-            "d": [1, 2, 8],
-        }
-    ).set_index(["b", "a", 1])
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    expected = pdf.sort_index(
-        axis=axis,
-        level=level,
-        ascending=ascending,
-        inplace=inplace,
-        na_position=na_position,
-        ignore_index=ignore_index,
-    )
-    got = gdf.sort_index(
-        axis=axis,
-        level=level,
-        ascending=ascending,
-        ignore_index=ignore_index,
-        inplace=inplace,
-        na_position=na_position,
-    )
-
-    if inplace is True:
-        assert_eq(pdf, gdf)
-    else:
-        assert_eq(expected, got)
-
-
-def test_sort_index_axis_1_ignore_index_true_columnaccessor_state_names():
-    gdf = cudf.DataFrame([[1, 2, 3]], columns=["b", "a", "c"])
-    result = gdf.sort_index(axis=1, ignore_index=True)
-    assert result._data.names == tuple(result._data.keys())
-
-
-@pytest.mark.parametrize("dtype", [*dtypes, "category"])
-def test_dataframe_0_row_dtype(dtype):
-    if dtype == "category":
-        data = pd.Series(["a", "b", "c", "d", "e"], dtype="category")
-    else:
-        data = np.array([1, 2, 3, 4, 5], dtype=dtype)
-
-    expect = cudf.DataFrame()
-    expect["x"] = data
-    expect["y"] = data
-    got = expect.head(0)
-
-    for col_name in got.columns:
-        assert expect[col_name].dtype == got[col_name].dtype
-
-    expect = cudf.Series(data)
-    got = expect.head(0)
-
-    assert expect.dtype == got.dtype
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False])
-def test_series_list_nanasnull(nan_as_null):
-    data = [1.0, 2.0, 3.0, np.nan, None]
-
-    expect = pa.array(data, from_pandas=nan_as_null)
-    got = cudf.Series(data, nan_as_null=nan_as_null).to_arrow()
-
-    # Bug in Arrow 0.14.1 where NaNs aren't handled
-    expect = expect.cast("int64", safe=False)
-    got = got.cast("int64", safe=False)
-
-    assert pa.Array.equals(expect, got)
-
-
-def test_column_assignment():
-    gdf = cudf.datasets.randomdata(
-        nrows=20, dtypes={"a": "category", "b": int, "c": float}
-    )
-    new_cols = ["q", "r", "s"]
-    gdf.columns = new_cols
-    assert list(gdf.columns) == new_cols
-
-
-def test_select_dtype():
-    gdf = cudf.datasets.randomdata(
-        nrows=20, dtypes={"a": "category", "b": int, "c": float, "d": str}
-    )
-    pdf = gdf.to_pandas()
-
-    assert_eq(pdf.select_dtypes("float64"), gdf.select_dtypes("float64"))
-    assert_eq(pdf.select_dtypes(np.float64), gdf.select_dtypes(np.float64))
-    assert_eq(
-        pdf.select_dtypes(include=["float64"]),
-        gdf.select_dtypes(include=["float64"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["object", "int", "category"]),
-        gdf.select_dtypes(include=["object", "int", "category"]),
-    )
-
-    assert_eq(
-        pdf.select_dtypes(include=["int64", "float64"]),
-        gdf.select_dtypes(include=["int64", "float64"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=np.number),
-        gdf.select_dtypes(include=np.number),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=[np.int64, np.float64]),
-        gdf.select_dtypes(include=[np.int64, np.float64]),
-    )
-
-    assert_eq(
-        pdf.select_dtypes(include=["category"]),
-        gdf.select_dtypes(include=["category"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(exclude=np.number),
-        gdf.select_dtypes(exclude=np.number),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.select_dtypes,
-        rfunc=gdf.select_dtypes,
-        lfunc_args_and_kwargs=([], {"includes": ["Foo"]}),
-        rfunc_args_and_kwargs=([], {"includes": ["Foo"]}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.select_dtypes,
-        rfunc=gdf.select_dtypes,
-        lfunc_args_and_kwargs=(
-            [],
-            {"exclude": np.number, "include": np.number},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"exclude": np.number, "include": np.number},
-        ),
-    )
-
-    gdf = cudf.DataFrame(
-        {"A": [3, 4, 5], "C": [1, 2, 3], "D": ["a", "b", "c"]}
-    )
-    pdf = gdf.to_pandas()
-    assert_eq(
-        pdf.select_dtypes(include=["object", "int", "category"]),
-        gdf.select_dtypes(include=["object", "int", "category"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["object"], exclude=["category"]),
-        gdf.select_dtypes(include=["object"], exclude=["category"]),
-    )
-
-    gdf = cudf.DataFrame({"a": range(10), "b": range(10, 20)})
-    pdf = gdf.to_pandas()
-    assert_eq(
-        pdf.select_dtypes(include=["category"]),
-        gdf.select_dtypes(include=["category"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["float"]),
-        gdf.select_dtypes(include=["float"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["object"]),
-        gdf.select_dtypes(include=["object"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["int"]), gdf.select_dtypes(include=["int"])
-    )
-    assert_eq(
-        pdf.select_dtypes(exclude=["float"]),
-        gdf.select_dtypes(exclude=["float"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(exclude=["object"]),
-        gdf.select_dtypes(exclude=["object"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["int"], exclude=["object"]),
-        gdf.select_dtypes(include=["int"], exclude=["object"]),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.select_dtypes,
-        rfunc=gdf.select_dtypes,
-    )
-
-    gdf = cudf.DataFrame(
-        {"a": cudf.Series([], dtype="int"), "b": cudf.Series([], dtype="str")}
-    )
-    pdf = gdf.to_pandas()
-    assert_eq(
-        pdf.select_dtypes(exclude=["object"]),
-        gdf.select_dtypes(exclude=["object"]),
-    )
-    assert_eq(
-        pdf.select_dtypes(include=["int"], exclude=["object"]),
-        gdf.select_dtypes(include=["int"], exclude=["object"]),
-    )
-
-    gdf = cudf.DataFrame(
-        {"int_col": [0, 1, 2], "list_col": [[1, 2], [3, 4], [5, 6]]}
-    )
-    pdf = gdf.to_pandas()
-    assert_eq(
-        pdf.select_dtypes("int64"),
-        gdf.select_dtypes("int64"),
-    )
-
-
-def test_select_dtype_datetime():
-    gdf = cudf.datasets.timeseries(
-        start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={"x": int}
-    )
-    gdf = gdf.reset_index()
-    pdf = gdf.to_pandas()
-
-    assert_eq(pdf.select_dtypes("datetime64"), gdf.select_dtypes("datetime64"))
-    assert_eq(
-        pdf.select_dtypes(np.dtype("datetime64")),
-        gdf.select_dtypes(np.dtype("datetime64")),
-    )
-    assert_eq(
-        pdf.select_dtypes(include="datetime64"),
-        gdf.select_dtypes(include="datetime64"),
-    )
-
-
-def test_select_dtype_datetime_with_frequency():
-    gdf = cudf.datasets.timeseries(
-        start="2000-01-01", end="2000-01-02", freq="3600s", dtypes={"x": int}
-    )
-    gdf = gdf.reset_index()
-    pdf = gdf.to_pandas()
-
-    assert_exceptions_equal(
-        pdf.select_dtypes,
-        gdf.select_dtypes,
-        (["datetime64[ms]"],),
-        (["datetime64[ms]"],),
-    )
-
-
-def test_dataframe_describe_exclude():
-    rng = np.random.default_rng(seed=12)
-    data_length = 10
-
-    df = cudf.DataFrame()
-    df["x"] = rng.normal(10, 1, data_length)
-    df["x"] = df.x.astype("int64")
-    df["y"] = rng.normal(10, 1, data_length)
-    pdf = df.to_pandas()
-
-    gdf_results = df.describe(exclude=["float"])
-    pdf_results = pdf.describe(exclude=["float"])
-
-    assert_eq(gdf_results, pdf_results)
-
-
-def test_dataframe_describe_include():
-    rng = np.random.default_rng(seed=12)
-    data_length = 10
-
-    df = cudf.DataFrame()
-    df["x"] = rng.normal(10, 1, data_length)
-    df["x"] = df.x.astype("int64")
-    df["y"] = rng.normal(10, 1, data_length)
-    pdf = df.to_pandas()
-    gdf_results = df.describe(include=["int"])
-    pdf_results = pdf.describe(include=["int"])
-
-    assert_eq(gdf_results, pdf_results)
-
-
-def test_dataframe_describe_default():
-    rng = np.random.default_rng(seed=12)
-    data_length = 10
-
-    df = cudf.DataFrame()
-    df["x"] = rng.normal(10, 1, data_length)
-    df["y"] = rng.normal(10, 1, data_length)
-    pdf = df.to_pandas()
-    gdf_results = df.describe()
-    pdf_results = pdf.describe()
-
-    assert_eq(pdf_results, gdf_results)
-
+    d = unaryop(pd_df - 5)
+    g = unaryop(gdf - 5)
+    assert_eq(d, g)
 
-def test_series_describe_include_all():
-    rng = np.random.default_rng(seed=12)
-    data_length = 10
 
-    df = cudf.DataFrame()
-    df["x"] = rng.normal(10, 1, data_length)
-    df["x"] = df.x.astype("int64")
-    df["y"] = rng.normal(10, 1, data_length)
-    df["animal"] = rng.choice(["dog", "cat", "bird"], data_length)
+def test_df_abs(pdf):
+    rng = np.random.default_rng(seed=0)
+    disturbance = pd.Series(rng.random(10))
+    pdf = pdf - 5 + disturbance
+    d = pdf.apply(np.abs)
+    g = cudf.from_pandas(pdf).abs()
+    assert_eq(d, g)
 
-    pdf = df.to_pandas()
-    gdf_results = df.describe(include="all")
-    pdf_results = pdf.describe(include="all")
 
-    assert_eq(gdf_results[["x", "y"]], pdf_results[["x", "y"]])
-    assert_eq(gdf_results.index, pdf_results.index)
-    assert_eq(gdf_results.columns, pdf_results.columns)
-    assert_eq(
-        gdf_results[["animal"]].fillna(-1).astype("str"),
-        pdf_results[["animal"]].fillna(-1).astype("str"),
+def test_scale_df(gdf):
+    got = (gdf - 5).scale()
+    expect = cudf.DataFrame(
+        {"x": np.linspace(0.0, 1.0, 10), "y": np.linspace(0.0, 1.0, 10)}
     )
+    assert_eq(expect, got)
 
 
-def test_dataframe_describe_percentiles():
-    rng = np.random.default_rng(seed=12)
-    data_length = 100
-    sample_percentiles = [0.0, 0.1, 0.33, 0.84, 0.4, 0.99]
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.empty,
+        lambda df: df.x.empty,
+        lambda df: df.x.fillna(123, limit=None, method=None, axis=None),
+        lambda df: df.drop("x", axis=1, errors="raise"),
+    ],
+)
+def test_unary_operators(func, pdf, gdf):
+    p = func(pdf)
+    g = func(gdf)
+    assert_eq(p, g)
 
-    df = cudf.DataFrame()
-    df["x"] = rng.normal(10, 1, data_length)
-    df["y"] = rng.normal(10, 1, data_length)
-    pdf = df.to_pandas()
-    gdf_results = df.describe(percentiles=sample_percentiles)
-    pdf_results = pdf.describe(percentiles=sample_percentiles)
 
-    assert_eq(pdf_results, gdf_results)
+def test_is_monotonic(gdf):
+    pdf = pd.DataFrame({"x": [1, 2, 3]}, index=[3, 1, 2])
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    assert not gdf.index.is_monotonic_increasing
+    assert not gdf.index.is_monotonic_decreasing
 
 
-def test_get_numeric_data():
+@pytest.mark.parametrize("q", [0.5, 1, 0.001, [0.5], [], [0.005, 0.5, 1]])
+@pytest.mark.parametrize("numeric_only", [True, False])
+def test_quantile(q, numeric_only):
+    ts = pd.date_range("2018-08-24", periods=5, freq="D")
+    td = pd.to_timedelta(np.arange(5), unit="h")
+    rng = np.random.default_rng(seed=0)
     pdf = pd.DataFrame(
-        {"x": [1, 2, 3], "y": [1.0, 2.0, 3.0], "z": ["a", "b", "c"]}
+        {"date": ts, "delta": td, "val": rng.standard_normal(len(ts))}
     )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf._get_numeric_data(), gdf._get_numeric_data())
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("period", [-15, -1, 0, 1, 15])
-@pytest.mark.parametrize("data_empty", [False, True])
-def test_shift(dtype, period, data_empty):
-    # TODO : this function currently tests for series.shift()
-    # but should instead test for dataframe.shift()
-    if data_empty:
-        data = None
-    else:
-        if dtype == np.int8:
-            # to keep data in range
-            data = gen_rand(dtype, 10, low=-2, high=2)
-        else:
-            data = gen_rand(dtype, 10)
-
-    gs = cudf.DataFrame({"a": cudf.Series(data, dtype=dtype)})
-    ps = pd.DataFrame({"a": pd.Series(data, dtype=dtype)})
+    gdf = cudf.DataFrame.from_pandas(pdf)
 
-    shifted_outcome = gs.a.shift(period)
-    expected_outcome = ps.a.shift(period)
+    assert_eq(pdf["date"].quantile(q), gdf["date"].quantile(q))
+    assert_eq(pdf["delta"].quantile(q), gdf["delta"].quantile(q))
+    assert_eq(pdf["val"].quantile(q), gdf["val"].quantile(q))
 
-    # pandas uses NaNs to signal missing value and force converts the
-    # results columns to float types
-    if data_empty:
-        assert_eq(
-            shifted_outcome,
-            expected_outcome,
-            check_index_type=False,
-            check_dtype=False,
-        )
-    else:
-        assert_eq(shifted_outcome, expected_outcome, check_dtype=False)
+    q = q if isinstance(q, list) else [q]
+    assert_eq(
+        pdf.quantile(q, numeric_only=numeric_only),
+        gdf.quantile(q, numeric_only=numeric_only),
+    )
 
 
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("period", [-15, -1, 0, 1, 15])
-@pytest.mark.parametrize("data_empty", [False, True])
-def test_diff(dtype, period, data_empty):
-    if data_empty:
-        data = None
-    else:
-        if dtype == np.int8:
-            # to keep data in range
-            data = gen_rand(dtype, 100000, low=-2, high=2)
-        else:
-            data = gen_rand(dtype, 100000)
+@pytest.mark.parametrize("q", [0.2, 1, 0.001, [0.5], [], [0.005, 0.8, 0.03]])
+@pytest.mark.parametrize("interpolation", ["higher", "lower", "nearest"])
+@pytest.mark.parametrize(
+    "decimal_type",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+)
+def test_decimal_quantile(q, interpolation, decimal_type):
+    rng = np.random.default_rng(seed=0)
+    data = ["244.8", "32.24", "2.22", "98.14", "453.23", "5.45"]
+    gdf = cudf.DataFrame(
+        {"id": rng.integers(0, 10, size=len(data)), "val": data}
+    )
+    gdf["id"] = gdf["id"].astype("float64")
+    gdf["val"] = gdf["val"].astype(decimal_type(7, 2))
+    pdf = gdf.to_pandas()
 
-    gdf = cudf.DataFrame({"a": cudf.Series(data, dtype=dtype)})
-    pdf = pd.DataFrame({"a": pd.Series(data, dtype=dtype)})
+    got = gdf.quantile(q, numeric_only=False, interpolation=interpolation)
+    expected = pdf.quantile(
+        q if isinstance(q, list) else [q],
+        numeric_only=False,
+        interpolation=interpolation,
+    )
 
-    expected_outcome = pdf.a.diff(period)
-    diffed_outcome = gdf.a.diff(period).astype(expected_outcome.dtype)
+    assert_eq(got, expected)
 
-    if data_empty:
-        assert_eq(diffed_outcome, expected_outcome, check_index_type=False)
-    else:
-        assert_eq(diffed_outcome, expected_outcome)
 
+def test_empty_quantile():
+    pdf = pd.DataFrame({"x": []}, dtype="float64")
+    df = cudf.DataFrame({"x": []}, dtype="float64")
 
-@pytest.mark.parametrize("nan_as_null", [True, False, None])
-@pytest.mark.parametrize("api_call", ["isnull", "isna", "notna", "notnull"])
-def test_dataframe_isnull_isna_and_reverse(na_data, nan_as_null, api_call):
-    def detect_nan(x):
-        # Check if the input is a float and if it is nan
-        return x.apply(lambda v: isinstance(v, float) and np.isnan(v))
+    actual = df.quantile()
+    expected = pdf.quantile()
 
-    df = na_data
-    nan_contains = df.select_dtypes(object).apply(detect_nan)
-    if nan_as_null is False and (
-        nan_contains.any().any() and not nan_contains.all().all()
-    ):
-        with pytest.raises(MixedTypeError):
-            cudf.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
-    else:
-        gdf = cudf.DataFrame.from_pandas(df, nan_as_null=nan_as_null)
+    assert_eq(actual, expected)
 
-        assert_eq(getattr(df, api_call)(), getattr(gdf, api_call)())
 
-        # Test individual columns
-        for col in df:
-            assert_eq(
-                getattr(df[col], api_call)(), getattr(gdf[col], api_call)()
-            )
+def test_boolmask(pdf, gdf):
+    rng = np.random.default_rng(seed=0)
+    boolmask = rng.integers(0, 2, len(pdf)) > 0
+    gdf = gdf[boolmask]
+    pdf = pdf[boolmask]
+    assert_eq(pdf, gdf)
 
 
-def test_ndim():
-    pdf = pd.DataFrame({"x": range(5), "y": range(5, 10)})
+@pytest.mark.parametrize(
+    "mask_shape",
+    [
+        (2, "ab"),
+        (2, "abc"),
+        (3, "ab"),
+        (3, "abc"),
+        (3, "abcd"),
+        (4, "abc"),
+        (4, "abcd"),
+    ],
+)
+def test_dataframe_boolmask(mask_shape):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame({col: rng.integers(0, 10, 3) for col in "abc"})
+    pdf_mask = pd.DataFrame(
+        {col: rng.integers(0, 2, mask_shape[0]) > 0 for col in mask_shape[1]}
+    )
     gdf = cudf.DataFrame.from_pandas(pdf)
-    assert pdf.ndim == gdf.ndim
-    assert pdf.x.ndim == gdf.x.ndim
+    gdf_mask = cudf.DataFrame.from_pandas(pdf_mask)
+    gdf = gdf[gdf_mask]
+    pdf = pdf[pdf_mask]
 
-    s = pd.Series(dtype="float64")
-    gs = cudf.Series()
-    assert s.ndim == gs.ndim
+    assert np.array_equal(gdf.columns, pdf.columns)
+    for col in gdf.columns:
+        assert np.array_equal(
+            gdf[col].fillna(-1).to_pandas().values, pdf[col].fillna(-1).values
+        )
 
 
 @pytest.mark.parametrize(
-    "decimals",
+    "box",
     [
-        -3,
-        0,
-        5,
-        pd.Series(
-            [1, 4, 3, -6],
-            index=["floats", "ints", "floats_with_nan", "floats_same"],
-        ),
-        cudf.Series(
-            [-4, -2, 12], index=["ints", "floats_with_nan", "floats_same"]
+        list,
+        pytest.param(
+            cudf.Series,
+            marks=pytest_xfail(
+                reason="Pandas can't index a multiindex with a Series"
+            ),
         ),
-        {"floats": -1, "ints": 15, "floats_will_nan": 2},
     ],
 )
-def test_dataframe_round(decimals):
-    rng = np.random.default_rng(seed=0)
+def test_dataframe_multiindex_boolmask(box):
+    mask = box([True, False, True])
     gdf = cudf.DataFrame(
-        {
-            "floats": np.arange(0.5, 10.5, 1),
-            "ints": rng.normal(-100, 100, 10),
-            "floats_with_na": np.array(
-                [
-                    14.123,
-                    2.343,
-                    np.nan,
-                    0.0,
-                    -8.302,
-                    np.nan,
-                    94.313,
-                    None,
-                    -8.029,
-                    np.nan,
-                ]
-            ),
-            "floats_same": np.repeat([-0.6459412758761901], 10),
-            "bools": rng.choice([True, None, False], 10),
-            "strings": rng.choice(["abc", "xyz", None], 10),
-            "struct": rng.choice([{"abc": 1}, {"xyz": 2}, None], 10),
-            "list": [[1], [2], None, [4], [3]] * 2,
-        }
+        {"w": [3, 2, 1], "x": [1, 2, 3], "y": [0, 1, 0], "z": [1, 1, 1]}
     )
-    pdf = gdf.to_pandas()
-
-    if isinstance(decimals, cudf.Series):
-        pdecimals = decimals.to_pandas()
-    else:
-        pdecimals = decimals
-
-    result = gdf.round(decimals)
-    expected = pdf.round(pdecimals)
-
-    assert_eq(result, expected)
+    gdg = gdf.groupby(["w", "x"]).count()
+    pdg = gdg.to_pandas()
+    assert_eq(gdg[mask], pdg[mask])
 
 
-def test_dataframe_round_dict_decimal_validation():
-    df = cudf.DataFrame({"A": [0.12], "B": [0.13]})
-    with pytest.raises(TypeError):
-        df.round({"A": 1, "B": 0.5})
+def test_dataframe_assignment():
+    pdf = pd.DataFrame()
+    for col in "abc":
+        pdf[col] = np.array([0, 1, 1, -2, 10])
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    gdf[gdf < 0] = 999
+    pdf[pdf < 0] = 999
+    assert_eq(gdf, pdf)
 
 
 @pytest.mark.parametrize(
@@ -4316,16 +1562,6 @@ def test_create_dataframe_cols_empty_data(a, b, misc_data, non_list_data):
     assert_eq(actual, expected)
 
 
-def test_empty_dataframe_describe():
-    pdf = pd.DataFrame({"a": [], "b": []})
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.describe()
-    actual = gdf.describe()
-
-    assert_eq(expected, actual)
-
-
 def test_as_column_types():
     col = as_column(cudf.Series([], dtype="float64"))
     assert_eq(col.dtype, np.dtype("float64"))
@@ -4400,27 +1636,6 @@ def test_as_column_types():
     assert_eq(pds, gds)
 
 
-def test_one_row_head():
-    gdf = cudf.DataFrame({"name": ["carl"], "score": [100]}, index=[123])
-    pdf = gdf.to_pandas()
-
-    head_gdf = gdf.head()
-    head_pdf = pdf.head()
-
-    assert_eq(head_pdf, head_gdf)
-
-
-@pytest.mark.parametrize("index", [None, [123], ["a", "b"]])
-def test_no_cols_head(index):
-    pdf = pd.DataFrame(index=index)
-    gdf = cudf.from_pandas(pdf)
-
-    head_gdf = gdf.head()
-    head_pdf = pdf.head()
-
-    assert_eq(head_pdf, head_gdf)
-
-
 @pytest.mark.parametrize("dtype", ALL_TYPES)
 @pytest.mark.parametrize(
     "np_dtype,pd_dtype",
@@ -8391,129 +5606,6 @@ def test_dataframe_iterrows_itertuples():
         df.iterrows()
 
 
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(
-            {
-                "a": [1, 2, 3],
-                "b": [10, 22, 33],
-                "c": [0.3234, 0.23432, 0.0],
-                "d": ["hello", "world", "hello"],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "a": [1, 2, 3],
-                "b": ["hello", "world", "hello"],
-                "c": [0.3234, 0.23432, 0.0],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "int_data": [1, 2, 3],
-                "str_data": ["hello", "world", "hello"],
-                "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": pd.Series(
-                    [1, 2, 1], dtype="timedelta64[ns]"
-                ),
-                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
-            }
-        ),
-        pd.DataFrame(
-            {
-                "int_data": [1, 2, 3],
-                "str_data": ["hello", "world", "hello"],
-                "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": pd.Series(
-                    [1, 2, 1], dtype="timedelta64[ns]"
-                ),
-                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
-                "category_data": pd.Series(["a", "a", "b"], dtype="category"),
-            }
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "include",
-    [None, "all", ["object"], ["int"], ["object", "int", "category"]],
-)
-def test_describe_misc_include(pdf, include):
-    df = cudf.DataFrame.from_pandas(pdf)
-
-    expected = pdf.describe(include=include)
-    actual = df.describe(include=include)
-
-    for col in expected.columns:
-        if expected[col].dtype == np.dtype("object"):
-            expected[col] = expected[col].fillna(-1).astype("str")
-            actual[col] = actual[col].fillna(-1).astype("str")
-
-    assert_eq(expected, actual)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(
-            {
-                "a": [1, 2, 3],
-                "b": [10, 22, 33],
-                "c": [0.3234, 0.23432, 0.0],
-                "d": ["hello", "world", "hello"],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "a": [1, 2, 3],
-                "b": ["hello", "world", "hello"],
-                "c": [0.3234, 0.23432, 0.0],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "int_data": [1, 2, 3],
-                "str_data": ["hello", "world", "hello"],
-                "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": pd.Series(
-                    [1, 2, 1], dtype="timedelta64[ns]"
-                ),
-                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
-            }
-        ),
-        pd.DataFrame(
-            {
-                "int_data": [1, 2, 3],
-                "str_data": ["hello", "world", "hello"],
-                "float_data": [0.3234, 0.23432, 0.0],
-                "timedelta_data": pd.Series(
-                    [1, 2, 1], dtype="timedelta64[ns]"
-                ),
-                "datetime_data": pd.Series([1, 2, 1], dtype="datetime64[ns]"),
-                "category_data": pd.Series(["a", "a", "b"], dtype="category"),
-            }
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "exclude", [None, ["object"], ["int"], ["object", "int", "category"]]
-)
-def test_describe_misc_exclude(pdf, exclude):
-    df = cudf.DataFrame.from_pandas(pdf)
-
-    expected = pdf.describe(exclude=exclude)
-    actual = df.describe(exclude=exclude)
-
-    for col in expected.columns:
-        if expected[col].dtype == np.dtype("object"):
-            expected[col] = expected[col].fillna(-1).astype("str")
-            actual[col] = actual[col].fillna(-1).astype("str")
-
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize(
     "df",
     [

From ef3fe33ea0fe25d5f47b1c82f7d48abf34fb3faa Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Fri, 15 Aug 2025 17:08:38 -0500
Subject: [PATCH 140/366] Fix "--executor" pytest parameter for cudf-polars
 (#19703)

The default value for the `--executor` pytest parameter is still `"in-memory"`. This means we are currently running the in-memory tests twice in CI.

The good news is that the `--blocksize-mode small` tests have still been hitting the `"streaming"` engine, so I'm pretty sure there is no need to worry about 25.08.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19703
---
 ci/run_cudf_polars_pytests.sh        | 2 +-
 python/cudf_polars/tests/conftest.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/run_cudf_polars_pytests.sh b/ci/run_cudf_polars_pytests.sh
index 4fdf7080c03..304573d6a85 100755
--- a/ci/run_cudf_polars_pytests.sh
+++ b/ci/run_cudf_polars_pytests.sh
@@ -12,7 +12,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/
 python -m pytest --cache-clear "$@" tests --executor in-memory
 
 # Test the default "streaming" executor
-python -m pytest --cache-clear "$@" tests
+python -m pytest --cache-clear "$@" tests --executor streaming
 
 # Test the "streaming" executor with small blocksize
 python -m pytest --cache-clear "$@" tests --executor streaming --blocksize-mode small
diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py
index 334f0f38820..3537a66d938 100644
--- a/python/cudf_polars/tests/conftest.py
+++ b/python/cudf_polars/tests/conftest.py
@@ -18,7 +18,7 @@ def pytest_addoption(parser):
     parser.addoption(
         "--executor",
         action="store",
-        default="in-memory",
+        default="streaming",
         choices=("in-memory", "streaming"),
         help="Executor to use for GPUEngine.",
     )

From ebfdba5ca3fa04575ce09a956693490446d1a637 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 15 Aug 2025 18:35:13 -0400
Subject: [PATCH 141/366] Move quantiles libcudf benchmark to nvbench (#19692)

Moves the libcudf quantiles benchmark from googlebench to nvbench.

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19692
---
 cpp/benchmarks/CMakeLists.txt          |  2 +-
 cpp/benchmarks/quantiles/quantiles.cpp | 60 ++++++++++++--------------
 2 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index e111b6395e9..ec1fb42cdab 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -217,7 +217,7 @@ ConfigureNVBench(STRUCT_CREATION_NVBENCH structs/create_structs.cpp)
 # ##################################################################################################
 # * quantiles benchmark
 # --------------------------------------------------------------------------------
-ConfigureBench(QUANTILES_BENCH quantiles/quantiles.cpp)
+ConfigureNVBench(QUANTILES_NVBENCH quantiles/quantiles.cpp)
 
 # ##################################################################################################
 # * tdigest benchmark
diff --git a/cpp/benchmarks/quantiles/quantiles.cpp b/cpp/benchmarks/quantiles/quantiles.cpp
index 24f9cc9c68e..25eae2d68e4 100644
--- a/cpp/benchmarks/quantiles/quantiles.cpp
+++ b/cpp/benchmarks/quantiles/quantiles.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,55 +15,51 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/quantiles.hpp>
+#include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <thrust/execution_policy.h>
 #include <thrust/tabulate.h>
 
-class Quantiles : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-static void BM_quantiles(benchmark::State& state, bool nulls)
+static void bench_quantiles(nvbench::state& state)
 {
-  using Type = int;
+  cudf::size_type const num_rows{static_cast<cudf::size_type>(state.get_int64("num_rows"))};
+  cudf::size_type const num_cols{static_cast<cudf::size_type>(state.get_int64("num_cols"))};
+  cudf::size_type const num_quantiles{
+    static_cast<cudf::size_type>(state.get_int64("num_quantiles"))};
+  bool const nulls{static_cast<bool>(state.get_int64("nulls"))};
 
-  cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
-  cudf::size_type const n_cols{(cudf::size_type)state.range(1)};
-  cudf::size_type const n_quantiles{(cudf::size_type)state.range(2)};
+  auto const data_type = cudf::type_to_id<int32_t>();
 
   // Create columns with values in the range [0,100)
-  data_profile profile = data_profile_builder().cardinality(0).distribution(
-    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+  data_profile profile =
+    data_profile_builder().cardinality(0).distribution(data_type, distribution_id::UNIFORM, 0, 100);
   profile.set_null_probability(nulls ? std::optional{0.01}
                                      : std::nullopt);  // 1% nulls or no null mask (<0)
 
-  auto input_table = create_random_table(
-    cycle_dtypes({cudf::type_to_id<Type>()}, n_cols), row_count{n_rows}, profile);
+  auto input_table =
+    create_random_table(cycle_dtypes({data_type}, num_cols), row_count{num_rows}, profile);
   auto input = cudf::table_view(*input_table);
 
-  std::vector<double> q(n_quantiles);
-  thrust::tabulate(
-    thrust::seq, q.begin(), q.end(), [n_quantiles](auto i) { return i * (1.0f / n_quantiles); });
+  std::vector<double> q(num_quantiles);
+  thrust::tabulate(thrust::seq, q.begin(), q.end(), [num_quantiles](auto i) {
+    return i * (1.0f / num_quantiles);
+  });
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
 
-    auto result = cudf::quantiles(input, q);
-    // auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input);
-  }
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { auto result = cudf::quantiles(input, q); });
 }
 
-#define QUANTILES_BENCHMARK_DEFINE(name, nulls)          \
-  BENCHMARK_DEFINE_F(Quantiles, name)                    \
-  (::benchmark::State & st) { BM_quantiles(st, nulls); } \
-  BENCHMARK_REGISTER_F(Quantiles, name)                  \
-    ->RangeMultiplier(4)                                 \
-    ->Ranges({{1 << 16, 1 << 26}, {1, 8}, {1, 12}})      \
-    ->UseManualTime()                                    \
-    ->Unit(benchmark::kMillisecond);
-
-QUANTILES_BENCHMARK_DEFINE(no_nulls, false)
-QUANTILES_BENCHMARK_DEFINE(nulls, true)
+NVBENCH_BENCH(bench_quantiles)
+  .set_name("quantiles")
+  .add_int64_power_of_two_axis("num_rows", {16, 18, 20, 22, 24, 26})
+  .add_int64_axis("num_cols", {1, 2, 4, 8})
+  .add_int64_axis("num_quantiles", {1, 4, 8, 12})
+  .add_int64_axis("nulls", {0, 1});

From 6dc891ab9dbb3adfb553a86b7eb91f20b4f5e50b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Aug 2025 16:38:07 -0700
Subject: [PATCH 142/366] Move to pyarrow and numpy to run_constrained (#19706)

This is a follow-up to #19657 to remove hard pyarrow and numpy requirements from the pylibcudf conda recipe.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19706
---
 conda/recipes/pylibcudf/recipe.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/conda/recipes/pylibcudf/recipe.yaml b/conda/recipes/pylibcudf/recipe.yaml
index 6ba6e189d0f..548a35da119 100644
--- a/conda/recipes/pylibcudf/recipe.yaml
+++ b/conda/recipes/pylibcudf/recipe.yaml
@@ -67,14 +67,15 @@ requirements:
     - python
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.4.0dev0
-    - numpy >=1.23,<3.0a0
-    - pyarrow>=14.0.0,<20.0.0a0
     - libcudf =${{ version }}
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
     - cuda-python >=12.9.1,<13.0a0
     - nvtx >=0.2.1
     - packaging
+  run_constraints:
+    - numpy >=1.23,<3.0a0
+    - pyarrow>=14.0.0,<20.0.0a0
   ignore_run_exports:
     from_package:
       - cuda-cudart-dev

From cef896de223d6c9d13faca0c45fe191e63d6eb9b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Aug 2025 17:52:50 -0700
Subject: [PATCH 143/366] Move test_concat/test_reductions.py to new cudf
 classic directory structure (#19626)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Includes some reduction themed test in test_string.py and test_timedelta.py Some test were consolidated or removed as similar to existing test

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19626
---
 python/cudf/cudf/tests/conftest.py            |   28 +
 .../cudf/tests/dataframe/test_reductions.py   |   75 +
 python/cudf/cudf/tests/reshape/test_concat.py | 2035 ++++++++++++++++
 python/cudf/cudf/tests/test_concat.py         | 2060 -----------------
 4 files changed, 2138 insertions(+), 2060 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/test_reductions.py
 delete mode 100644 python/cudf/cudf/tests/test_concat.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 88940a3ec47..772abdfba32 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -532,6 +532,34 @@ def ascending(request):
     return request.param
 
 
+axis_0s = [0, "index"]
+axis_1s = [1, "columns"]
+
+
+@pytest.fixture(params=axis_0s)
+def axis_0(request):
+    """Param for `axis=0` argument"""
+    return request.param
+
+
+@pytest.fixture(params=axis_1s)
+def axis_1(request):
+    """Param for `axis=1` argument"""
+    return request.param
+
+
+@pytest.fixture(params=axis_0s + axis_1s)
+def axis(request):
+    """Param for `axis` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """Param for `sort` argument"""
+    return request.param
+
+
 @pytest.fixture(params=[True, False])
 def numeric_only(request):
     """Param for `numeric_only` argument"""
diff --git a/python/cudf/cudf/tests/dataframe/test_reductions.py b/python/cudf/cudf/tests/dataframe/test_reductions.py
new file mode 100644
index 00000000000..167ba2bd427
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_reductions.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_single_q():
+    q = 0.5
+
+    pdf = pd.DataFrame({"a": [4, 24, 13, 8, 7]})
+    gdf = cudf.from_pandas(pdf)
+
+    pdf_q = pdf.quantile(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
+
+    assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+def test_with_index():
+    q = [0, 0.5, 1]
+
+    pdf = pd.DataFrame({"a": [7, 4, 4, 9, 13]}, index=[0, 4, 3, 2, 7])
+    gdf = cudf.from_pandas(pdf)
+
+    pdf_q = pdf.quantile(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
+
+    assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+def test_with_multiindex():
+    q = [0, 0.5, 1]
+
+    pdf = pd.DataFrame(
+        {
+            "index_1": [3, 1, 9, 7, 5],
+            "index_2": [2, 4, 3, 5, 1],
+            "a": [8, 4, 2, 3, 8],
+        }
+    )
+    pdf.set_index(["index_1", "index_2"], inplace=True)
+
+    gdf = cudf.from_pandas(pdf)
+
+    pdf_q = pdf.quantile(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
+
+    assert_eq(pdf_q, gdf_q, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [10, 11, 12]},
+        {"a": [1, 0, 3], "b": [10, 11, 12]},
+        {"a": [1, 2, 3], "b": [10, 11, None]},
+        {
+            "a": [],
+        },
+        {},
+    ],
+)
+@pytest.mark.parametrize("op", ["all", "any"])
+def test_any_all_axis_none(data, op):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = getattr(pdf, op)(axis=None)
+    actual = getattr(gdf, op)(axis=None)
+
+    assert expected == actual
diff --git a/python/cudf/cudf/tests/reshape/test_concat.py b/python/cudf/cudf/tests/reshape/test_concat.py
index 8da43038159..e533bfac9df 100644
--- a/python/cudf/cudf/tests/reshape/test_concat.py
+++ b/python/cudf/cudf/tests/reshape/test_concat.py
@@ -1,11 +1,2046 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import warnings
+from contextlib import contextmanager
+from decimal import Decimal
+
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
+from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
+
+
+@pytest.fixture(params=["outer", "inner"])
+def join(request):
+    return request.param
+
+
+@contextmanager
+def _hide_concat_empty_dtype_warning():
+    with warnings.catch_warnings():
+        # Ignoring warnings in this test as warnings are
+        # being caught and validated in other tests.
+        warnings.filterwarnings(
+            "ignore",
+            "The behavior of array concatenation with empty entries "
+            "is deprecated.",
+            category=FutureWarning,
+        )
+        yield
+
+
+def make_frames(index=None, nulls="none"):
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        {
+            "x": range(10),
+            "y": list(map(float, range(10))),
+            "z": list("abcde") * 2,
+        }
+    )
+    df.z = df.z.astype("category")
+    df2 = pd.DataFrame(
+        {
+            "x": range(10, 20),
+            "y": list(map(float, range(10, 20))),
+            "z": list("edcba") * 2,
+        }
+    )
+    df2.z = df2.z.astype("category")
+    if nulls == "all":
+        df.y = np.full_like(df.y, np.nan)
+        df2.y = np.full_like(df2.y, np.nan)
+    if nulls == "some":
+        mask = np.arange(10)
+        rng.shuffle(mask)
+        mask = mask[:5]
+        df.loc[mask, "y"] = np.nan
+        df2.loc[mask, "y"] = np.nan
+    gdf = cudf.DataFrame.from_pandas(df)
+    gdf2 = cudf.DataFrame.from_pandas(df2)
+    if index:
+        df = df.set_index(index)
+        df2 = df2.set_index(index)
+        gdf = gdf.set_index(index)
+        gdf2 = gdf2.set_index(index)
+    return df, df2, gdf, gdf2
+
+
+@pytest.mark.parametrize("nulls", ["none", "some", "all"])
+@pytest.mark.parametrize("index", [False, "z", "y"])
+def test_concat_dataframe(index, nulls, axis_0):
+    if index == "y" and nulls in ("some", "all"):
+        pytest.skip("nulls in columns, dont index")
+    df, df2, gdf, gdf2 = make_frames(index, nulls=nulls)
+    # Make empty frame
+    gdf_empty1 = gdf2[:0]
+    assert len(gdf_empty1) == 0
+    df_empty1 = gdf_empty1.to_pandas()
+
+    # DataFrame
+    with _hide_concat_empty_dtype_warning():
+        res = cudf.concat(
+            [gdf, gdf2, gdf, gdf_empty1], axis=axis_0
+        ).to_pandas()
+        sol = pd.concat([df, df2, df, df_empty1], axis=axis_0)
+    assert_eq(
+        res,
+        sol,
+        check_names=False,
+        check_categorical=False,
+        check_index_type=True,
+    )
+
+    # Series
+    for c in [i for i in ("x", "y", "z") if i != index]:
+        res = cudf.concat([gdf[c], gdf2[c], gdf[c]], axis=axis_0).to_pandas()
+        sol = pd.concat([df[c], df2[c], df[c]], axis=axis_0)
+        assert_eq(
+            res,
+            sol,
+            check_names=False,
+            check_categorical=False,
+            check_index_type=True,
+        )
+
+    # Index
+    res = cudf.concat([gdf.index, gdf2.index], axis=axis_0).to_pandas()
+    sol = df.index.append(df2.index)
+    assert_eq(res, sol, check_names=False, check_categorical=False)
+
+
+@pytest.mark.parametrize(
+    "values",
+    [["foo", "bar"], [1.0, 2.0], pd.Series(["one", "two"], dtype="category")],
+)
+def test_concat_all_nulls(values):
+    pa = pd.Series(values)
+    pb = pd.Series([None])
+    ps = pd.concat([pa, pb])
+
+    ga = cudf.Series(values)
+    gb = cudf.Series([None])
+    gs = cudf.concat([ga, gb])
+
+    assert_eq(
+        ps,
+        gs,
+        check_dtype=False,
+        check_categorical=False,
+        check_index_type=True,
+    )
+
+
+def test_concat_errors():
+    df, df2, gdf, gdf2 = make_frames()
+
+    # No objs
+    assert_exceptions_equal(
+        lfunc=pd.concat,
+        rfunc=cudf.concat,
+        lfunc_args_and_kwargs=([], {"objs": []}),
+        rfunc_args_and_kwargs=([], {"objs": []}),
+    )
+
+    # All None
+    assert_exceptions_equal(
+        lfunc=pd.concat,
+        rfunc=cudf.concat,
+        lfunc_args_and_kwargs=([], {"objs": [None, None]}),
+        rfunc_args_and_kwargs=([], {"objs": [None, None]}),
+    )
+
+    # Mismatched types
+    assert_exceptions_equal(
+        lfunc=pd.concat,
+        rfunc=cudf.concat,
+        lfunc_args_and_kwargs=([], {"objs": [df, df.index, df.x]}),
+        rfunc_args_and_kwargs=([], {"objs": [gdf, gdf.index, gdf.x]}),
+    )
+
+    # Unknown type
+    assert_exceptions_equal(
+        lfunc=pd.concat,
+        rfunc=cudf.concat,
+        lfunc_args_and_kwargs=([], {"objs": ["bar", "foo"]}),
+        rfunc_args_and_kwargs=([], {"objs": ["bar", "foo"]}),
+    )
+
+    # Mismatched index dtypes
+    gdf3 = gdf2.copy()
+    del gdf3["z"]
+    gdf4 = gdf2.set_index("z")
+
+    with pytest.raises(ValueError, match="All columns must be the same type"):
+        cudf.concat([gdf3, gdf4])
+
+    # Bad axis value
+    assert_exceptions_equal(
+        lfunc=pd.concat,
+        rfunc=cudf.concat,
+        lfunc_args_and_kwargs=(
+            [],
+            {"objs": [gdf.to_pandas(), gdf2.to_pandas()], "axis": "bad_value"},
+        ),
+        rfunc_args_and_kwargs=([], {"objs": [gdf, gdf2], "axis": "bad_value"}),
+    )
+
+
+def test_concat_misordered_columns():
+    df, df2, gdf, gdf2 = make_frames(False)
+    gdf2 = gdf2[["z", "x", "y"]]
+    df2 = df2[["z", "x", "y"]]
+
+    res = cudf.concat([gdf, gdf2]).to_pandas()
+    sol = pd.concat([df, df2], sort=False)
+
+    assert_eq(
+        res,
+        sol,
+        check_names=False,
+        check_categorical=False,
+        check_index_type=True,
+    )
+
+
+def test_concat_columns(axis_1):
+    rng = np.random.default_rng(seed=0)
+    pdf1 = pd.DataFrame(rng.integers(10, size=(5, 3)), columns=[1, 2, 3])
+    pdf2 = pd.DataFrame(rng.integers(10, size=(5, 4)), columns=[4, 5, 6, 7])
+    gdf1 = cudf.from_pandas(pdf1)
+    gdf2 = cudf.from_pandas(pdf2)
+
+    expect = pd.concat([pdf1, pdf2], axis=axis_1)
+    got = cudf.concat([gdf1, gdf2], axis=axis_1)
+
+    assert_eq(expect, got, check_index_type=True)
+
+
+def test_concat_multiindex_dataframe(axis):
+    gdf = cudf.DataFrame(
+        {
+            "w": np.arange(4),
+            "x": np.arange(4),
+            "y": np.arange(4),
+            "z": np.arange(4),
+        }
+    )
+    gdg = gdf.groupby(["w", "x"]).min()
+    pdg = gdg.to_pandas()
+    pdg1 = pdg.iloc[:, :1]
+    pdg2 = pdg.iloc[:, 1:]
+    gdg1 = cudf.from_pandas(pdg1)
+    gdg2 = cudf.from_pandas(pdg2)
+    expected = pd.concat([pdg1, pdg2], axis=axis)
+    result = cudf.concat([gdg1, gdg2], axis=axis)
+    assert_eq(
+        expected,
+        result,
+        check_index_type=True,
+    )
+
+
+def test_concat_multiindex_series():
+    gdf = cudf.DataFrame(
+        {
+            "w": np.arange(4),
+            "x": np.arange(4),
+            "y": np.arange(4),
+            "z": np.arange(4),
+        }
+    )
+    gdg = gdf.groupby(["w", "x"]).min()
+    pdg = gdg.to_pandas()
+    pdg1 = pdg["y"]
+    pdg2 = pdg["z"]
+    gdg1 = cudf.from_pandas(pdg1)
+    gdg2 = cudf.from_pandas(pdg2)
+    assert_eq(
+        cudf.concat([gdg1, gdg2]),
+        pd.concat([pdg1, pdg2]),
+        check_index_type=True,
+    )
+    assert_eq(
+        cudf.concat([gdg1, gdg2], axis=1), pd.concat([pdg1, pdg2], axis=1)
+    )
+
+
+def test_concat_multiindex_dataframe_and_series():
+    gdf = cudf.DataFrame(
+        {
+            "w": np.arange(4),
+            "x": np.arange(4),
+            "y": np.arange(4),
+            "z": np.arange(4),
+        }
+    )
+    gdg = gdf.groupby(["w", "x"]).min()
+    pdg = gdg.to_pandas()
+    pdg1 = pdg[["y", "z"]]
+    pdg2 = pdg["z"]
+    pdg2.name = "a"
+    gdg1 = cudf.from_pandas(pdg1)
+    gdg2 = cudf.from_pandas(pdg2)
+    assert_eq(
+        cudf.concat([gdg1, gdg2], axis=1),
+        pd.concat([pdg1, pdg2], axis=1),
+        check_index_type=True,
+    )
+
+
+def test_concat_multiindex_series_and_dataframe():
+    gdf = cudf.DataFrame(
+        {
+            "w": np.arange(4),
+            "x": np.arange(4),
+            "y": np.arange(4),
+            "z": np.arange(4),
+        }
+    )
+    gdg = gdf.groupby(["w", "x"]).min()
+    pdg = gdg.to_pandas()
+    pdg1 = pdg["z"]
+    pdg2 = pdg[["y", "z"]]
+    pdg1.name = "a"
+    gdg1 = cudf.from_pandas(pdg1)
+    gdg2 = cudf.from_pandas(pdg2)
+    assert_eq(
+        cudf.concat([gdg1, gdg2], axis=1),
+        pd.concat([pdg1, pdg2], axis=1),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize("myindex", ["a", "b"])
+def test_concat_string_index_name(myindex):
+    # GH-Issue #3420
+    data = {"a": [123, 456], "b": ["s1", "s2"]}
+    df1 = cudf.DataFrame(data).set_index(myindex)
+    df2 = df1.copy()
+    df3 = cudf.concat([df1, df2])
+
+    assert df3.index.name == myindex
+
+
+def test_pandas_concat_compatibility_axis1():
+    d1 = cudf.datasets.randomdata(
+        3, dtypes={"a": float, "ind": float}
+    ).set_index("ind")
+    d2 = cudf.datasets.randomdata(
+        3, dtypes={"b": float, "ind": float}
+    ).set_index("ind")
+    d3 = cudf.datasets.randomdata(
+        3, dtypes={"c": float, "ind": float}
+    ).set_index("ind")
+    d4 = cudf.datasets.randomdata(
+        3, dtypes={"d": float, "ind": float}
+    ).set_index("ind")
+    d5 = cudf.datasets.randomdata(
+        3, dtypes={"e": float, "ind": float}
+    ).set_index("ind")
+
+    pd1 = d1.to_pandas()
+    pd2 = d2.to_pandas()
+    pd3 = d3.to_pandas()
+    pd4 = d4.to_pandas()
+    pd5 = d5.to_pandas()
+
+    expect = pd.concat([pd1, pd2, pd3, pd4, pd5], axis=1)
+    got = cudf.concat([d1, d2, d3, d4, d5], axis=1)
+
+    assert_eq(
+        got.sort_index(),
+        expect.sort_index(),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize("index", [[0, 1, 2], [2, 1, 0], [5, 9, 10]])
+@pytest.mark.parametrize("names", [False, (0, 1)])
+@pytest.mark.parametrize(
+    "data",
+    [
+        (["a", "b", "c"], ["a", "b", "c"]),
+        (["a", "b", "c"], ["XX", "YY", "ZZ"]),
+    ],
+)
+def test_pandas_concat_compatibility_axis1_overlap(index, names, data):
+    s1 = cudf.Series(data[0], index=[0, 1, 2])
+    s2 = cudf.Series(data[1], index=index)
+    if names:
+        s1.name = names[0]
+        s2.name = names[1]
+    ps1 = s1.to_pandas()
+    ps2 = s2.to_pandas()
+    got = cudf.concat([s1, s2], axis=1)
+    expect = pd.concat([ps1, ps2], axis=1)
+    assert_eq(got, expect, check_index_type=True)
+
+
+def test_pandas_concat_compatibility_axis1_eq_index():
+    s1 = cudf.Series(["a", "b", "c"], index=[0, 1, 2])
+    s2 = cudf.Series(["a", "b", "c"], index=[1, 1, 1])
+    ps1 = s1.to_pandas()
+    ps2 = s2.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=pd.concat,
+        rfunc=cudf.concat,
+        lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
+        rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
+    )
+
+
+@pytest.mark.parametrize("name", [None, "a"])
+def test_pandas_concat_compatibility_axis1_single_column(name):
+    # Pandas renames series name `None` to 0
+    # and preserves anything else
+    s = cudf.Series([1, 2, 3], name=name)
+    got = cudf.concat([s], axis=1)
+    expected = pd.concat([s.to_pandas()], axis=1)
+    assert_eq(expected, got)
+
+
+def test_concat_duplicate_columns():
+    cdf = cudf.DataFrame(
+        {
+            "id4": 4 * list(range(6)),
+            "id5": 4 * list(reversed(range(6))),
+            "v3": 6 * list(range(4)),
+        }
+    )
+    cdf_std = cdf.groupby(["id4", "id5"])[["v3"]].std()
+    cdf_med = cdf.groupby(["id4", "id5"])[["v3"]].quantile(q=0.5)
+    with pytest.raises(NotImplementedError):
+        cudf.concat([cdf_med, cdf_std], axis=1)
+
+
+def test_concat_mixed_input():
+    pdf1 = pd.DataFrame({"a": [10, 20, 30]})
+    pdf2 = pd.DataFrame({"a": [11, 22, 33]})
+
+    gdf1 = cudf.from_pandas(pdf1)
+    gdf2 = cudf.from_pandas(pdf2)
+
+    assert_eq(
+        pd.concat([pdf1, None, pdf2, None]),
+        cudf.concat([gdf1, None, gdf2, None]),
+        check_index_type=True,
+    )
+    assert_eq(
+        pd.concat([pdf1, None]),
+        cudf.concat([gdf1, None]),
+        check_index_type=True,
+    )
+    assert_eq(
+        pd.concat([None, pdf2]),
+        cudf.concat([None, gdf2]),
+        check_index_type=True,
+    )
+    assert_eq(
+        pd.concat([None, pdf2, pdf1]),
+        cudf.concat([None, gdf2, gdf1]),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [pd.Series([1, 2, 3]), pd.DataFrame({"a": [1, 2]})],
+        [pd.Series([1, 2, 3]), pd.DataFrame({"a": []})],
+        [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})],
+        [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})],
+        pytest.param(
+            [
+                pd.Series([1, 2, 3.0, 1.2], name="abc"),
+                pd.DataFrame({"a": [1, 2]}),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
+            ),
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130]
+                ),
+                pd.DataFrame({"a": [1, 2]}),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
+            ),
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"]
+                ),
+                pd.DataFrame({"a": [1, 2]}, index=["a", "b"]),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
+            ),
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2, 8, 100],
+                    name="New name",
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+                pd.DataFrame(
+                    {"a": [1, 2, 4, 10, 11, 12]},
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+            ],
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
+            ),
+        ),
+        pytest.param(
+            [
+                pd.Series(
+                    [1, 2, 3.0, 1.2, 8, 100],
+                    name="New name",
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+                pd.DataFrame(
+                    {"a": [1, 2, 4, 10, 11, 12]},
+                    index=["a", "b", "c", "d", "e", "f"],
+                ),
+            ]
+            * 7,
+            marks=pytest.mark.skipif(
+                not PANDAS_GE_220,
+                reason="https://github.com/pandas-dev/pandas/pull/56365",
+            ),
+        ),
+    ],
+)
+def test_concat_series_dataframe_input(objs):
+    pd_objs = objs
+    gd_objs = [cudf.from_pandas(obj) for obj in objs]
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(pd_objs)
+        actual = cudf.concat(gd_objs)
+
+    assert_eq(
+        expected.fillna(-1),
+        actual.fillna(-1),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [
+            pd.Series(["a", "b", "c", "d"]),
+            pd.Series(["1", "2", "3", "4"]),
+            pd.DataFrame({"first col": ["10", "11", "12", "13"]}),
+        ],
+        [
+            pd.Series(["a", "b", "c", "d"]),
+            pd.Series(["1", "2", "3", "4"]),
+            pd.DataFrame(
+                {
+                    "first col": ["10", "11", "12", "13"],
+                    "second col": ["a", "b", "c", "d"],
+                }
+            ),
+        ],
+        [
+            pd.Series(["a", "b", "c"]),
+            pd.Series(["1", "2", "3", "4"]),
+            pd.DataFrame(
+                {
+                    "first col": ["10", "11", "12", "13"],
+                    "second col": ["a", "b", "c", "d"],
+                }
+            ),
+        ],
+    ],
+)
+def test_concat_series_dataframe_input_str(objs):
+    pd_objs = objs
+    gd_objs = [cudf.from_pandas(obj) for obj in objs]
+
+    expected = pd.concat(pd_objs)
+    actual = cudf.concat(gd_objs)
+    assert_eq(expected, actual, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[10, 20, 30]),
+        pd.DataFrame(
+            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+        ),
+        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"l": [10]}, index=[200]),
+        pd.DataFrame([], index=[100]),
+        pd.DataFrame({"cat": pd.Series(["one", "two"], dtype="category")}),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
+        [
+            pd.DataFrame(
+                {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+            ),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        ],
+        [
+            pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+            pd.DataFrame({"l": [10]}),
+            pd.DataFrame({"l": [10]}, index=[200]),
+            pd.DataFrame(
+                {"cat": pd.Series(["two", "three"], dtype="category")}
+            ),
+        ],
+        [
+            pd.DataFrame([]),
+            pd.DataFrame([], index=[100]),
+            pd.DataFrame(
+                {"cat": pd.Series(["two", "three"], dtype="category")}
+            ),
+        ],
+    ],
+)
+def test_concat_empty_dataframes(df, other, ignore_index):
+    other_pd = [df, *other]
+
+    gdf = cudf.from_pandas(df)
+    other_gd = [gdf] + [cudf.from_pandas(o) for o in other]
+
+    expected = pd.concat(other_pd, ignore_index=ignore_index)
+    actual = cudf.concat(other_gd, ignore_index=ignore_index)
+    if expected.shape != df.shape:
+        for key, col in actual[actual.columns].items():
+            if isinstance(col.dtype, cudf.CategoricalDtype):
+                if not isinstance(expected[key].dtype, pd.CategoricalDtype):
+                    # TODO: Pandas bug:
+                    # https://github.com/pandas-dev/pandas/issues/42840
+                    expected[key] = expected[key].fillna("-1").astype("str")
+                else:
+                    expected[key] = (
+                        expected[key]
+                        .cat.add_categories(["-1"])
+                        .fillna("-1")
+                        .astype("str")
+                    )
+                actual[key] = col.astype("str").fillna("-1")
+            else:
+                expected[key] = expected[key].fillna(-1)
+                actual[key] = col.fillna(-1)
+        assert_eq(expected, actual, check_dtype=False, check_index_type=True)
+    else:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=not gdf.empty,
+            check_column_type=False,
+        )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        (["a", "b", "c"], ["a", "b", "c"]),
+        (["a", "b", "c"], ["XX", "YY", "ZZ"]),
+    ],
+)
+def test_concat_empty_and_nonempty_series(ignore_index, data, axis_0):
+    s1 = cudf.Series()
+    s2 = cudf.Series(data[0])
+    ps1 = s1.to_pandas()
+    ps2 = s2.to_pandas()
+    got = cudf.concat([s1, s2], axis=axis_0, ignore_index=ignore_index)
+    expect = pd.concat([ps1, ps2], axis=axis_0, ignore_index=ignore_index)
+
+    assert_eq(got, expect, check_index_type=True)
+
+
+def test_concat_two_empty_series(ignore_index, axis_0):
+    s1 = cudf.Series()
+    s2 = cudf.Series()
+    ps1 = s1.to_pandas()
+    ps2 = s2.to_pandas()
+    got = cudf.concat([s1, s2], axis=axis_0, ignore_index=ignore_index)
+    expect = pd.concat([ps1, ps2], axis=axis_0, ignore_index=ignore_index)
+
+    assert_eq(got, expect, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "key2", [[0, 1], [1, 0]], ids=["matching", "different"]
+)
+def test_concat_dataframe_with_multiindex(key2):
+    gdf1 = cudf.DataFrame({"k1": [0, 1], "k2": [2, 3], "v1": [4, 5]})
+    gdf1 = gdf1.set_index(["k1", "k2"])
+
+    gdf2 = cudf.DataFrame({"k1": key2, "k2": [3, 2], "v2": [6, 7]})
+    gdf2 = gdf2.set_index(["k1", "k2"])
+
+    pdf1 = gdf1.to_pandas()
+    pdf2 = gdf2.to_pandas()
+
+    actual = cudf.concat([gdf1, gdf2], axis=1)
+    expected = pd.concat([pdf1, pdf2], axis=1)
+
+    # Will need to sort_index before comparing as
+    # ordering is not deterministic in case of pandas
+    # multiIndex with concat.
+    assert_eq(
+        expected.sort_index(),
+        actual.sort_index(),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                }
+            ),
+            pd.DataFrame(
+                {"x": range(10, 20), "y": list(map(float, range(10, 20)))}
+            ),
+        ],
+        [
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                },
+                index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
+            ),
+            pd.DataFrame(
+                {"x": range(10, 20), "y": list(map(float, range(10, 20)))},
+                index=["k", "l", "m", "n", "o", "p", "q", "r", "s", "t"],
+            ),
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                },
+                index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
+            ),
+            pd.DataFrame(
+                {"x": range(10, 20), "y": list(map(float, range(10, 20)))},
+                index=["a", "b", "c", "d", "z", "f", "g", "h", "i", "w"],
+            ),
+        ],
+    ],
+)
+def test_concat_join(objs, ignore_index, sort, join, axis):
+    axis = 0
+    gpu_objs = [cudf.from_pandas(o) for o in objs]
+
+    assert_eq(
+        pd.concat(
+            objs, sort=sort, join=join, ignore_index=ignore_index, axis=axis
+        ),
+        cudf.concat(
+            gpu_objs,
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        ),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                }
+            ),
+            pd.DataFrame(
+                {"x": range(10, 20), "y": list(map(float, range(10, 20)))}
+            ),
+        ],
+    ],
+)
+def test_concat_join_axis_1_dup_error(objs):
+    gpu_objs = [cudf.from_pandas(o) for o in objs]
+    # we do not support duplicate columns
+    with pytest.raises(NotImplementedError):
+        assert_eq(
+            pd.concat(
+                objs,
+                axis=1,
+            ),
+            cudf.concat(
+                gpu_objs,
+                axis=1,
+            ),
+        )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                }
+            ),
+            pd.DataFrame(
+                {"l": range(10, 20), "m": list(map(float, range(10, 20)))}
+            ),
+        ],
+    ],
+)
+def test_concat_join_axis_1(objs, ignore_index, sort, join, axis):
+    # no duplicate columns
+    axis = 1
+    gpu_objs = [cudf.from_pandas(o) for o in objs]
+    expected = pd.concat(
+        objs, sort=sort, join=join, ignore_index=ignore_index, axis=axis
+    )
+    actual = cudf.concat(
+        gpu_objs,
+        sort=sort,
+        join=join,
+        ignore_index=ignore_index,
+        axis=axis,
+    )
+
+    assert_eq(expected, actual, check_index_type=True)
+
+
+def test_concat_join_many_df_and_empty_df(ignore_index, sort, join, axis):
+    # no duplicate columns
+    pdf1 = pd.DataFrame(
+        {
+            "x": range(10),
+            "y": list(map(float, range(10))),
+            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        }
+    )
+    pdf2 = pd.DataFrame(
+        {"l": range(10, 20), "m": list(map(float, range(10, 20)))}
+    )
+    pdf3 = pd.DataFrame({"j": [1, 2], "k": [1, 2], "s": [1, 2], "t": [1, 2]})
+    pdf_empty1 = pd.DataFrame()
+
+    gdf1 = cudf.from_pandas(pdf1)
+    gdf2 = cudf.from_pandas(pdf2)
+    gdf3 = cudf.from_pandas(pdf3)
+    gdf_empty1 = cudf.from_pandas(pdf_empty1)
+
+    with _hide_concat_empty_dtype_warning():
+        assert_eq(
+            pd.concat(
+                [pdf1, pdf2, pdf3, pdf_empty1],
+                sort=sort,
+                join=join,
+                ignore_index=ignore_index,
+                axis=axis,
+            ),
+            cudf.concat(
+                [gdf1, gdf2, gdf3, gdf_empty1],
+                sort=sort,
+                join=join,
+                ignore_index=ignore_index,
+                axis=axis,
+            ),
+            check_index_type=False,
+        )
+
+
+def test_concat_join_one_df(ignore_index, sort, join, axis):
+    pdf1 = pd.DataFrame(
+        {
+            "x": range(10),
+            "y": list(map(float, range(10))),
+            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        }
+    )
+
+    gdf1 = cudf.from_pandas(pdf1)
+    expected = pd.concat(
+        [pdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
+    )
+    actual = cudf.concat(
+        [gdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
+    )
+
+    assert_eq(expected, actual, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "pdf1,pdf2",
+    [
+        (
+            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
+            pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}),
+        ),
+        (
+            pd.DataFrame(
+                {"a": [1, 2, 3], "b": [4, 5, 6]}, index=["p", "q", "r"]
+            ),
+            pd.DataFrame(
+                {"c": [7, 8, 9], "d": [10, 11, 12]}, index=["r", "p", "z"]
+            ),
+        ),
+    ],
+)
+def test_concat_join_no_overlapping_columns(
+    pdf1, pdf2, ignore_index, sort, join, axis
+):
+    gdf1 = cudf.from_pandas(pdf1)
+    gdf2 = cudf.from_pandas(pdf2)
+
+    expected = pd.concat(
+        [pdf1, pdf2],
+        sort=sort,
+        join=join,
+        ignore_index=ignore_index,
+        axis=axis,
+    )
+    actual = cudf.concat(
+        [gdf1, gdf2],
+        sort=sort,
+        join=join,
+        ignore_index=ignore_index,
+        axis=axis,
+    )
+
+    assert_eq(expected, actual, check_index_type=True)
+
+
+def test_concat_join_no_overlapping_columns_many_and_empty(
+    ignore_index, sort, join, axis
+):
+    pdf4 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    pdf5 = pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
+    pdf6 = pd.DataFrame(
+        {
+            "x": range(10),
+            "y": list(map(float, range(10))),
+            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        }
+    )
+    pdf_empty = pd.DataFrame()
+
+    gdf4 = cudf.from_pandas(pdf4)
+    gdf5 = cudf.from_pandas(pdf5)
+    gdf6 = cudf.from_pandas(pdf6)
+    gdf_empty = cudf.from_pandas(pdf_empty)
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            [pdf4, pdf5, pdf6, pdf_empty],
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+        actual = cudf.concat(
+            [gdf4, gdf5, gdf6, gdf_empty],
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+    assert_eq(
+        expected,
+        actual,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [
+            pd.DataFrame(
+                {"a": [1, 2, 3], "b": [4, 5, 6]}, index=["z", "t", "k"]
+            ),
+            pd.DataFrame(
+                {"c": [7, 8, 9], "d": [10, 11, 12]}, index=["z", "t", "k"]
+            ),
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                },
+                index=["z", "t", "k", "a", "b", "c", "d", "e", "f", "g"],
+            ),
+            pd.DataFrame(index=pd.Index([], dtype="str")),
+        ],
+        [
+            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
+            pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}),
+            pd.DataFrame(
+                {
+                    "x": range(10),
+                    "y": list(map(float, range(10))),
+                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                }
+            ),
+            pd.DataFrame(index=pd.Index([], dtype="str")),
+        ],
+        pytest.param(
+            [
+                pd.DataFrame(
+                    {"a": [1, 2, 3], "nb": [10, 11, 12]}, index=["Q", "W", "R"]
+                ),
+                None,
+            ],
+        ),
+    ],
+)
+def test_concat_join_no_overlapping_columns_many_and_empty2(
+    objs, ignore_index, sort, join, axis
+):
+    objs_gd = [cudf.from_pandas(o) if o is not None else o for o in objs]
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            objs,
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+        actual = cudf.concat(
+            objs_gd,
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+    assert_eq(expected, actual, check_index_type=False)
+
+
+def test_concat_join_no_overlapping_columns_empty_df_basic(
+    ignore_index, sort, join, axis
+):
+    pdf6 = pd.DataFrame(
+        {
+            "x": range(10),
+            "y": list(map(float, range(10))),
+            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        }
+    )
+    pdf_empty = pd.DataFrame()
+
+    gdf6 = cudf.from_pandas(pdf6)
+    gdf_empty = cudf.from_pandas(pdf_empty)
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            [pdf6, pdf_empty],
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+        actual = cudf.concat(
+            [gdf6, gdf_empty],
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+    assert_eq(
+        expected,
+        actual,
+        check_index_type=True,
+        check_column_type=False,
+    )
+
+
+def test_concat_join_series(ignore_index, sort, join, axis):
+    s1 = cudf.Series(["a", "b", "c"])
+    s2 = cudf.Series(["a", "b"])
+    s3 = cudf.Series(["a", "b", "c", "d"])
+    s4 = cudf.Series(dtype="str")
+
+    ps1 = s1.to_pandas()
+    ps2 = s2.to_pandas()
+    ps3 = s3.to_pandas()
+    ps4 = s4.to_pandas()
+
+    expected = pd.concat(
+        [ps1, ps2, ps3, ps4],
+        sort=sort,
+        join=join,
+        ignore_index=ignore_index,
+        axis=axis,
+    )
+    with expect_warning_if(axis in {1, "columns"}):
+        actual = cudf.concat(
+            [s1, s2, s3, s4],
+            sort=sort,
+            join=join,
+            ignore_index=ignore_index,
+            axis=axis,
+        )
+
+    assert_eq(
+        expected,
+        actual,
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[10, 20, 30]),
+        pd.DataFrame(
+            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+        ),
+        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"l": [10]}, index=[200]),
+        pd.DataFrame([], index=[100]),
+        pd.DataFrame({"cat": pd.Series(["one", "two"], dtype="category")}),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
+        [
+            pd.DataFrame(
+                {"b": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+            ),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        ],
+        [
+            pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+            pd.DataFrame({"l": [10]}),
+            pd.DataFrame({"k": [10]}, index=[200]),
+            pd.DataFrame(
+                {"cat": pd.Series(["two", "three"], dtype="category")}
+            ),
+        ],
+        [
+            pd.DataFrame([]),
+            pd.DataFrame([], index=[100]),
+            pd.DataFrame(
+                {"cat": pd.Series(["two", "three"], dtype="category")}
+            ),
+        ],
+    ],
+)
+def test_concat_join_empty_dataframes(
+    request, df, other, ignore_index, join, sort
+):
+    axis = 0
+    other_pd = [df, *other]
+    gdf = cudf.from_pandas(df)
+    other_gd = [gdf] + [cudf.from_pandas(o) for o in other]
+
+    expected = pd.concat(
+        other_pd, ignore_index=ignore_index, axis=axis, join=join, sort=sort
+    )
+    actual = cudf.concat(
+        other_gd, ignore_index=ignore_index, axis=axis, join=join, sort=sort
+    )
+    if (
+        join == "outer"
+        and any(
+            isinstance(dtype, pd.CategoricalDtype)
+            for dtype in df.dtypes.tolist()
+        )
+        and any(
+            isinstance(dtype, pd.CategoricalDtype)
+            for other_df in other
+            for dtype in other_df.dtypes.tolist()
+        )
+    ):
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="https://github.com/pandas-dev/pandas/issues/42840"
+            )
+        )
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+        check_column_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[10, 20, 30]),
+        pd.DataFrame(
+            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+        ),
+        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"m": [10]}, index=[200]),
+        pd.DataFrame([], index=[100]),
+        pd.DataFrame({"cat": pd.Series(["one", "two"], dtype="category")}),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
+        [
+            pd.DataFrame(
+                {"b": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+            ),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("CD")),
+        ],
+        [
+            pd.DataFrame({"g": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+            pd.DataFrame({"h": [10]}),
+            pd.DataFrame({"k": [10]}, index=[200]),
+            pd.DataFrame(
+                {"dog": pd.Series(["two", "three"], dtype="category")}
+            ),
+        ],
+        [
+            pd.DataFrame([]),
+            pd.DataFrame([], index=[100]),
+            pd.DataFrame(
+                {"bird": pd.Series(["two", "three"], dtype="category")}
+            ),
+        ],
+    ],
+)
+def test_concat_join_empty_dataframes_axis_1(
+    df, other, ignore_index, axis, join, sort
+):
+    # no duplicate columns
+    axis = 1
+    other_pd = [df, *other]
+    gdf = cudf.from_pandas(df)
+    other_gd = [gdf] + [cudf.from_pandas(o) for o in other]
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            other_pd,
+            ignore_index=ignore_index,
+            axis=axis,
+            join=join,
+            sort=sort,
+        )
+        actual = cudf.concat(
+            other_gd,
+            ignore_index=ignore_index,
+            axis=axis,
+            join=join,
+            sort=sort,
+        )
+    if expected.shape != df.shape:
+        if axis == 0:
+            for key, col in actual[actual.columns].items():
+                if isinstance(expected[key].dtype, pd.CategoricalDtype):
+                    expected[key] = expected[key].fillna("-1")
+                    actual[key] = col.astype("str").fillna("-1")
+            # if not expected.empty:
+            assert_eq(
+                expected.fillna(-1),
+                actual.fillna(-1),
+                check_dtype=False,
+                check_index_type=False
+                if len(expected) == 0 or actual.empty
+                else True,
+                check_column_type=False,
+            )
+        else:
+            # no need to fill in if axis=1
+            assert_eq(
+                expected,
+                actual,
+                check_index_type=False,
+                check_column_type=False,
+            )
+    assert_eq(
+        expected, actual, check_index_type=False, check_column_type=False
+    )
+
+
+def test_concat_preserve_order():
+    """Ensure that order is preserved on 'inner' concatenations."""
+    df = pd.DataFrame([["d", 3, 4.0], ["c", 4, 5.0]], columns=["c", "b", "a"])
+    dfs = [df, df]
+
+    assert_eq(
+        pd.concat(dfs, join="inner"),
+        cudf.concat([cudf.DataFrame(df) for df in dfs], join="inner"),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize("typ", [cudf.DataFrame, cudf.Series])
+def test_concat_single_object(ignore_index, typ):
+    """Ensure that concat on a single object does not change it."""
+    obj = typ([1, 2, 3])
+    assert_eq(
+        cudf.concat([obj], ignore_index=ignore_index, axis=0),
+        obj,
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "ltype",
+    [Decimal64Dtype(3, 1), Decimal64Dtype(7, 2), Decimal64Dtype(8, 4)],
+)
+@pytest.mark.parametrize(
+    "rtype",
+    [
+        Decimal64Dtype(3, 2),
+        Decimal64Dtype(8, 4),
+        cudf.Decimal128Dtype(3, 2),
+        cudf.Decimal32Dtype(8, 4),
+    ],
+)
+def test_concat_decimal_dataframe(ltype, rtype):
+    rng = np.random.default_rng(seed=0)
+    gdf1 = cudf.DataFrame(
+        {"id": rng.integers(0, 10, 3), "val": ["22.3", "59.5", "81.1"]}
+    )
+    gdf2 = cudf.DataFrame(
+        {"id": rng.integers(0, 10, 3), "val": ["2.35", "5.59", "8.14"]}
+    )
+
+    gdf1["val"] = gdf1["val"].astype(ltype)
+    gdf2["val"] = gdf2["val"].astype(rtype)
+
+    pdf1 = gdf1.to_pandas()
+    pdf2 = gdf2.to_pandas()
+
+    got = cudf.concat([gdf1, gdf2])
+    expected = pd.concat([pdf1, pdf2])
+
+    assert_eq(expected, got, check_index_type=True)
+
+
+@pytest.mark.parametrize("ltype", [Decimal64Dtype(4, 1), Decimal64Dtype(8, 2)])
+@pytest.mark.parametrize(
+    "rtype",
+    [
+        Decimal64Dtype(4, 3),
+        Decimal64Dtype(10, 4),
+        Decimal32Dtype(8, 3),
+        Decimal128Dtype(18, 3),
+    ],
+)
+def test_concat_decimal_series(ltype, rtype):
+    gs1 = cudf.Series(["228.3", "559.5", "281.1"]).astype(ltype)
+    gs2 = cudf.Series(["2.345", "5.259", "8.154"]).astype(rtype)
+
+    ps1 = gs1.to_pandas()
+    ps2 = gs2.to_pandas()
+
+    got = cudf.concat([gs1, gs2])
+    expected = pd.concat([ps1, ps2])
+
+    assert_eq(expected, got, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "data1, dtype1, index1, data2, dtype2, index2, data3, dtype3, index3, expected_data, expected_dtype, expected_index",
+    [
+        [
+            {"val": [Decimal("42.5"), Decimal("8.7")]},
+            Decimal64Dtype(5, 2),
+            None,
+            {"val": [Decimal("9.23"), Decimal("-67.49")]},
+            Decimal64Dtype(6, 4),
+            None,
+            {"val": [8, -5]},
+            "int32",
+            None,
+            {
+                "val": [
+                    Decimal("42.5"),
+                    Decimal("8.7"),
+                    Decimal("9.23"),
+                    Decimal("-67.49"),
+                    Decimal("8"),
+                    Decimal("-5"),
+                ]
+            },
+            Decimal32Dtype(7, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            {"val": [Decimal("95.2"), Decimal("23.4")]},
+            Decimal64Dtype(5, 2),
+            None,
+            {"val": [54, 509]},
+            "uint16",
+            None,
+            {"val": [24, -48]},
+            "int32",
+            None,
+            {
+                "val": [
+                    Decimal("95.2"),
+                    Decimal("23.4"),
+                    Decimal("54"),
+                    Decimal("509"),
+                    Decimal("24"),
+                    Decimal("-48"),
+                ]
+            },
+            Decimal32Dtype(5, 2),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            {"val": [Decimal("36.56"), Decimal("-59.24")]},
+            Decimal64Dtype(9, 4),
+            None,
+            {"val": [403.21, 45.13]},
+            "float32",
+            None,
+            {"val": [52.262, -49.25]},
+            "float64",
+            None,
+            {
+                "val": [
+                    Decimal("36.56"),
+                    Decimal("-59.24"),
+                    Decimal("403.21"),
+                    Decimal("45.13"),
+                    Decimal("52.262"),
+                    Decimal("-49.25"),
+                ]
+            },
+            Decimal32Dtype(9, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            {"val": [Decimal("9563.24"), Decimal("236.633")]},
+            Decimal64Dtype(9, 4),
+            None,
+            {"val": [5393, -95832]},
+            "int64",
+            None,
+            {"val": [-29.234, -31.945]},
+            "float64",
+            None,
+            {
+                "val": [
+                    Decimal("9563.24"),
+                    Decimal("236.633"),
+                    Decimal("5393"),
+                    Decimal("-95832"),
+                    Decimal("-29.234"),
+                    Decimal("-31.945"),
+                ]
+            },
+            Decimal32Dtype(9, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            {"val": [Decimal("95633.24"), Decimal("236.633")]},
+            Decimal128Dtype(19, 4),
+            None,
+            {"val": [5393, -95832]},
+            "int64",
+            None,
+            {"val": [-29.234, -31.945]},
+            "float64",
+            None,
+            {
+                "val": [
+                    Decimal("95633.24"),
+                    Decimal("236.633"),
+                    Decimal("5393"),
+                    Decimal("-95832"),
+                    Decimal("-29.234"),
+                    Decimal("-31.945"),
+                ]
+            },
+            Decimal128Dtype(19, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+    ],
+)
+def test_concat_decimal_numeric_dataframe(
+    data1,
+    dtype1,
+    index1,
+    data2,
+    dtype2,
+    index2,
+    data3,
+    dtype3,
+    index3,
+    expected_data,
+    expected_dtype,
+    expected_index,
+):
+    df1 = cudf.DataFrame(data1, dtype=dtype1, index=index1)
+    df2 = cudf.DataFrame(data2, dtype=dtype2, index=index2)
+    df3 = cudf.DataFrame(data3, dtype=dtype3, index=index3)
+    expected = cudf.DataFrame(
+        expected_data, dtype=expected_dtype, index=expected_index
+    )
+    df = cudf.concat([df1, df2, df3])
+    assert_eq(df, expected, check_index_type=True)
+    assert_eq(df.val.dtype, expected.val.dtype)
+
+
+@pytest.mark.parametrize(
+    "data1, dtype1, index1, data2, dtype2, index2, data3, dtype3, index3, expected_data, expected_dtype, expected_index",
+    [
+        [
+            [Decimal("32.8"), Decimal("-87.7")],
+            Decimal64Dtype(6, 2),
+            None,
+            [Decimal("101.243"), Decimal("-92.449")],
+            Decimal64Dtype(9, 6),
+            None,
+            [94, -22],
+            "int32",
+            None,
+            [
+                Decimal("32.8"),
+                Decimal("-87.7"),
+                Decimal("101.243"),
+                Decimal("-92.449"),
+                Decimal("94"),
+                Decimal("-22"),
+            ],
+            Decimal64Dtype(10, 6),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            [Decimal("7.2"), Decimal("122.1")],
+            Decimal64Dtype(5, 2),
+            None,
+            [33, 984],
+            "uint32",
+            None,
+            [593, -702],
+            "int32",
+            None,
+            [
+                Decimal("7.2"),
+                Decimal("122.1"),
+                Decimal("33"),
+                Decimal("984"),
+                Decimal("593"),
+                Decimal("-702"),
+            ],
+            Decimal32Dtype(5, 2),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            [Decimal("982.94"), Decimal("-493.626")],
+            Decimal64Dtype(9, 4),
+            None,
+            [847.98, 254.442],
+            "float32",
+            None,
+            [5299.262, -2049.25],
+            "float64",
+            None,
+            [
+                Decimal("982.94"),
+                Decimal("-493.626"),
+                Decimal("847.98"),
+                Decimal("254.442"),
+                Decimal("5299.262"),
+                Decimal("-2049.25"),
+            ],
+            Decimal32Dtype(9, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            [Decimal("492.204"), Decimal("-72824.455")],
+            Decimal64Dtype(9, 4),
+            None,
+            [8438, -27462],
+            "int64",
+            None,
+            [-40.292, 49202.953],
+            "float64",
+            None,
+            [
+                Decimal("492.204"),
+                Decimal("-72824.455"),
+                Decimal("8438"),
+                Decimal("-27462"),
+                Decimal("-40.292"),
+                Decimal("49202.953"),
+            ],
+            Decimal32Dtype(9, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+        [
+            [Decimal("492.204"), Decimal("-72824.455")],
+            Decimal64Dtype(10, 4),
+            None,
+            [Decimal("8438"), Decimal("-27462")],
+            Decimal32Dtype(9, 4),
+            None,
+            [Decimal("-40.292"), Decimal("49202.953")],
+            Decimal128Dtype(19, 4),
+            None,
+            [
+                Decimal("492.204"),
+                Decimal("-72824.455"),
+                Decimal("8438"),
+                Decimal("-27462"),
+                Decimal("-40.292"),
+                Decimal("49202.953"),
+            ],
+            Decimal128Dtype(19, 4),
+            [0, 1, 0, 1, 0, 1],
+        ],
+    ],
+)
+def test_concat_decimal_numeric_series(
+    data1,
+    dtype1,
+    index1,
+    data2,
+    dtype2,
+    index2,
+    data3,
+    dtype3,
+    index3,
+    expected_data,
+    expected_dtype,
+    expected_index,
+):
+    s1 = cudf.Series(data1, dtype=dtype1, index=index1)
+    s2 = cudf.Series(data2, dtype=dtype2, index=index2)
+    s3 = cudf.Series(data3, dtype=dtype3, index=index3)
+    expected = cudf.Series(
+        expected_data, dtype=expected_dtype, index=expected_index
+    )
+    s = cudf.concat([s1, s2, s3])
+    assert_eq(s, expected, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "data1, dtype1, index1, data2, dtype2, index2, expected_data, expected_dtype, expected_index",
+    [
+        [
+            [Decimal("955.22"), Decimal("8.2")],
+            Decimal64Dtype(5, 2),
+            None,
+            ["2007-06-12", "2006-03-14"],
+            "datetime64[s]",
+            None,
+            [
+                "955.22",
+                "8.20",
+                "2007-06-12 00:00:00",
+                "2006-03-14 00:00:00",
+            ],
+            None,
+            [0, 1, 0, 1],
+        ],
+        [
+            [Decimal("-52.44"), Decimal("365.22")],
+            Decimal64Dtype(5, 2),
+            None,
+            np.arange(
+                "2005-02-01T12", "2005-02-01T15", dtype="datetime64[h]"
+            ).astype("datetime64[s]"),
+            "datetime64[s]",
+            None,
+            [
+                "-52.44",
+                "365.22",
+                "2005-02-01 12:00:00",
+                "2005-02-01 13:00:00",
+                "2005-02-01 14:00:00",
+            ],
+            None,
+            [0, 1, 0, 1, 2],
+        ],
+        [
+            [Decimal("753.0"), Decimal("94.22")],
+            Decimal64Dtype(5, 2),
+            None,
+            [np.timedelta64(111, "s"), np.timedelta64(509, "s")],
+            None,
+            None,
+            [
+                "753.00",
+                "94.22",
+                "0 days 00:01:51",
+                "0 days 00:08:29",
+            ],
+            None,
+            [0, 1, 0, 1],
+        ],
+        [
+            [Decimal("753.0"), Decimal("94.22")],
+            Decimal64Dtype(5, 2),
+            None,
+            [np.timedelta64(940252, "s"), np.timedelta64(758385, "s")],
+            None,
+            None,
+            [
+                "753.00",
+                "94.22",
+                "10 days 21:10:52",
+                "8 days 18:39:45",
+            ],
+            None,
+            [0, 1, 0, 1],
+        ],
+    ],
+)
+def test_concat_decimal_non_numeric(
+    data1,
+    dtype1,
+    index1,
+    data2,
+    dtype2,
+    index2,
+    expected_data,
+    expected_dtype,
+    expected_index,
+):
+    s1 = cudf.Series(data1, dtype=dtype1, index=index1)
+    s2 = cudf.Series(data2, dtype=dtype2, index=index2)
+    expected = cudf.Series(
+        expected_data, dtype=expected_dtype, index=expected_index
+    )
+    s = cudf.concat([s1, s2])
+    assert_eq(s, expected, check_index_type=True)
+
+
+def test_concat_struct_column():
+    s1 = cudf.Series([{"a": 5}, {"c": "hello"}, {"b": 7}])
+    s2 = cudf.Series([{"a": 5, "c": "hello", "b": 7}])
+    expected = cudf.Series(
+        [
+            {"a": 5, "b": None, "c": None},
+            {"a": None, "b": None, "c": "hello"},
+            {"a": None, "b": 7, "c": None},
+            {"a": 5, "b": 7, "c": "hello"},
+        ],
+        index=[0, 1, 2, 0],
+    )
+    s = cudf.concat([s1, s2])
+    assert_eq(s, expected, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "frame1_cls, frame1_data, frame2_cls, frame2_data, expected_cls, expected_data",
+    [
+        (
+            cudf.Series,
+            {"data": [[{"b": 0}], [{"b": 1}], [{"b": 3}]]},
+            cudf.Series,
+            {"data": [[{"b": 10}], [{"b": 12}], None]},
+            cudf.Series,
+            {
+                "data": [
+                    [{"b": 0}],
+                    [{"b": 1}],
+                    [{"b": 3}],
+                    [{"b": 10}],
+                    [{"b": 12}],
+                    None,
+                ],
+                "index": [0, 1, 2, 0, 1, 2],
+            },
+        ),
+        (
+            cudf.DataFrame,
+            {"data": {"a": [[{"b": 0}], [{"b": 1}], [{"b": 3}]]}},
+            cudf.DataFrame,
+            {"data": {"a": [[{"b": 10}], [{"b": 12}], None]}},
+            cudf.DataFrame,
+            {
+                "data": {
+                    "a": [
+                        [{"b": 0}],
+                        [{"b": 1}],
+                        [{"b": 3}],
+                        [{"b": 10}],
+                        [{"b": 12}],
+                        None,
+                    ]
+                },
+                "index": [0, 1, 2, 0, 1, 2],
+            },
+        ),
+    ],
+)
+def test_concat_list_column(
+    frame1_cls,
+    frame1_data,
+    frame2_cls,
+    frame2_data,
+    expected_cls,
+    expected_data,
+):
+    frame1 = frame1_cls(**frame1_data)
+    frame2 = frame2_cls(**frame2_data)
+    expected = expected_cls(**expected_data)
+    actual = cudf.concat([frame1, frame2])
+    assert_eq(actual, expected, check_index_type=True)
+
+
+def test_concat_categorical_ordering():
+    # https://github.com/rapidsai/cudf/issues/11486
+    sr = pd.Series(
+        ["a", "b", "c", "d", "e", "a", "b", "c", "d", "e"], dtype="category"
+    )
+    sr = sr.cat.set_categories(["d", "a", "b", "c", "e"])
+
+    df = pd.DataFrame({"a": sr})
+    gdf = cudf.from_pandas(df)
+
+    expect = pd.concat([df, df, df])
+    got = cudf.concat([gdf, gdf, gdf])
+
+    assert_eq(expect, got)
+
+
+@pytest.fixture(params=["rangeindex", "index"])
+def singleton_concat_index(request):
+    if request.param == "rangeindex":
+        return pd.RangeIndex(0, 4)
+    else:
+        return pd.Index(["a", "h", "g", "f"])
+
+
+@pytest.fixture(params=["dataframe", "series"])
+def singleton_concat_obj(request, singleton_concat_index):
+    if request.param == "dataframe":
+        return pd.DataFrame(
+            {
+                "b": [1, 2, 3, 4],
+                "d": [7, 8, 9, 10],
+                "a": [4, 5, 6, 7],
+                "c": [10, 11, 12, 13],
+            },
+            index=singleton_concat_index,
+        )
+    else:
+        return pd.Series([4, 5, 5, 6], index=singleton_concat_index)
+
+
+def test_concat_singleton_sorting(
+    axis, sort, ignore_index, singleton_concat_obj
+):
+    gobj = cudf.from_pandas(singleton_concat_obj)
+    gconcat = cudf.concat(
+        [gobj], axis=axis, sort=sort, ignore_index=ignore_index
+    )
+    pconcat = pd.concat(
+        [singleton_concat_obj], axis=axis, sort=sort, ignore_index=ignore_index
+    )
+    assert_eq(pconcat, gconcat)
+
+
+@pytest.mark.parametrize("axis", [2, "invalid"])
+def test_concat_invalid_axis(axis):
+    s = cudf.Series([1, 2, 3])
+    with pytest.raises(ValueError):
+        cudf.concat([s], axis=axis)
+
+
+@pytest.mark.parametrize(
+    "s1,s2",
+    [
+        ([1, 2], [[1, 2], [3, 4]]),
+    ],
+)
+def test_concat_mixed_list_types_error(s1, s2):
+    s1, s2 = cudf.Series(s1), cudf.Series(s2)
+
+    with pytest.raises(NotImplementedError):
+        cudf.concat([s1, s2], ignore_index=True)
+
+
+@pytest.mark.parametrize(
+    "axis",
+    [
+        pytest.param(
+            0,
+            marks=pytest.mark.xfail(
+                reason="concat dictionaries with axis=0 not implemented"
+            ),
+        ),
+        1,
+        "columns",
+    ],
+)
+@pytest.mark.parametrize(
+    "d",
+    [
+        {"first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}})},
+        {
+            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
+            "second": (cudf.DataFrame, {"data": {"A": [5, 6], "B": [7, 8]}}),
+            "third": (cudf.DataFrame, {"data": {"C": [1, 2, 3]}}),
+        },
+        {
+            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
+            "second": (cudf.DataFrame, {"data": {"C": [1, 2, 3]}}),
+            "third": (cudf.DataFrame, {"data": {"A": [5, 6], "B": [7, 8]}}),
+        },
+        {
+            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
+            "second": (cudf.DataFrame, {"data": {"C": [1, 2, 3]}}),
+            "third": (cudf.DataFrame, {"data": {"A": [5, 6], "C": [7, 8]}}),
+            "fourth": (cudf.DataFrame, {"data": {"B": [9, 10]}}),
+        },
+        pytest.param(
+            {
+                "first": (cudf.DataFrame, {"data": {2.0: [1, 1]}}),
+                "second": (cudf.DataFrame, {"data": {"test": ["abc", "def"]}}),
+            },
+            marks=pytest.mark.xfail(
+                reason=(
+                    "Cannot construct a MultiIndex column with multiple "
+                    "label types in cuDF at this time. You must convert "
+                    "the labels to the same type."
+                )
+            ),
+        ),
+        {
+            "first": (cudf.Series, {"data": [1, 2, 3]}),
+            "second": (cudf.Series, {"data": [4, 5, 6]}),
+        },
+        {
+            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
+            "second": (cudf.Series, {"data": [5, 6], "name": "C"}),
+        },
+        pytest.param(
+            {
+                "first": (
+                    cudf.DataFrame,
+                    {"data": {("A", "B"): [1, 2], "C": [3, 4]}},
+                ),
+                "second": (
+                    cudf.DataFrame,
+                    {"data": {"D": [5, 6], ("A", "B"): [7, 8]}},
+                ),
+            },
+            marks=pytest.mark.xfail(
+                reason=(
+                    "Cannot construct a MultiIndex column with multiple "
+                    "label types in cuDF at this time. You must convert "
+                    "the labels to the same type."
+                )
+            ),
+        ),
+        pytest.param(
+            {
+                "first": (
+                    cudf.DataFrame,
+                    {"data": {("A", "B"): [3, 4], 2.0: [1, 1]}},
+                ),
+                "second": (
+                    cudf.DataFrame,
+                    {"data": {("C", "D"): [3, 4], 3.0: [5, 6]}},
+                ),
+            },
+            marks=pytest.mark.xfail(
+                reason=(
+                    "Cannot construct a MultiIndex column with multiple "
+                    "label types in cuDF at this time. You must convert "
+                    "the labels to the same type."
+                )
+            ),
+        ),
+        {
+            "first": (
+                cudf.DataFrame,
+                {"data": {(1, 2): [1, 2], (3, 4): [3, 4]}},
+            ),
+            "second": (
+                cudf.DataFrame,
+                {"data": {(1, 2): [5, 6], (5, 6): [7, 8]}},
+            ),
+        },
+    ],
+)
+def test_concat_dictionary(d, axis):
+    _dict = {k: c(**v) for k, (c, v) in d.items()}
+    result = cudf.concat(_dict, axis=axis)
+    expected = cudf.from_pandas(
+        pd.concat({k: df.to_pandas() for k, df in _dict.items()}, axis=axis)
+    )
+    assert_eq(expected, result)
+
+
+@pytest.mark.parametrize(
+    "idx_cls, idx_data",
+    [
+        [cudf.Index, {"data": [1, 2, 3]}],
+        [
+            cudf.MultiIndex,
+            {
+                "levels": [[1, 2], ["blue", "red"]],
+                "codes": [[0, 0, 1, 1], [1, 0, 1, 0]],
+            },
+        ],
+        [cudf.CategoricalIndex, {"data": [1, 2, 3]}],
+    ],
+)
+def test_concat_dict_incorrect_type_index(idx_cls, idx_data):
+    idx = idx_cls(**idx_data)
+    with pytest.raises(
+        TypeError,
+        match="cannot concatenate a dictionary containing indices",
+    ):
+        cudf.concat({"first": idx}, axis=1)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
deleted file mode 100644
index a2657228b94..00000000000
--- a/python/cudf/cudf/tests/test_concat.py
+++ /dev/null
@@ -1,2060 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import warnings
-from contextlib import contextmanager
-from decimal import Decimal
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_GE_220
-from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
-
-
-@pytest.fixture(params=[True, False])
-def ignore_index(request):
-    return request.param
-
-
-@pytest.fixture(params=[True, False])
-def sort(request):
-    return request.param
-
-
-@pytest.fixture(params=["outer", "inner"])
-def join(request):
-    return request.param
-
-
-@pytest.fixture(params=[0, "index", 1, "columns"])
-def axis(request):
-    return request.param
-
-
-@contextmanager
-def _hide_concat_empty_dtype_warning():
-    with warnings.catch_warnings():
-        # Ignoring warnings in this test as warnings are
-        # being caught and validated in other tests.
-        warnings.filterwarnings(
-            "ignore",
-            "The behavior of array concatenation with empty entries "
-            "is deprecated.",
-            category=FutureWarning,
-        )
-        yield
-
-
-def make_frames(index=None, nulls="none"):
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        {
-            "x": range(10),
-            "y": list(map(float, range(10))),
-            "z": list("abcde") * 2,
-        }
-    )
-    df.z = df.z.astype("category")
-    df2 = pd.DataFrame(
-        {
-            "x": range(10, 20),
-            "y": list(map(float, range(10, 20))),
-            "z": list("edcba") * 2,
-        }
-    )
-    df2.z = df2.z.astype("category")
-    if nulls == "all":
-        df.y = np.full_like(df.y, np.nan)
-        df2.y = np.full_like(df2.y, np.nan)
-    if nulls == "some":
-        mask = np.arange(10)
-        rng.shuffle(mask)
-        mask = mask[:5]
-        df.loc[mask, "y"] = np.nan
-        df2.loc[mask, "y"] = np.nan
-    gdf = cudf.DataFrame.from_pandas(df)
-    gdf2 = cudf.DataFrame.from_pandas(df2)
-    if index:
-        df = df.set_index(index)
-        df2 = df2.set_index(index)
-        gdf = gdf.set_index(index)
-        gdf2 = gdf2.set_index(index)
-    return df, df2, gdf, gdf2
-
-
-@pytest.mark.parametrize("nulls", ["none", "some", "all"])
-@pytest.mark.parametrize("index", [False, "z", "y"])
-@pytest.mark.parametrize("axis", [0, "index"])
-def test_concat_dataframe(index, nulls, axis):
-    if index == "y" and nulls in ("some", "all"):
-        pytest.skip("nulls in columns, dont index")
-    df, df2, gdf, gdf2 = make_frames(index, nulls=nulls)
-    # Make empty frame
-    gdf_empty1 = gdf2[:0]
-    assert len(gdf_empty1) == 0
-    df_empty1 = gdf_empty1.to_pandas()
-
-    # DataFrame
-    with _hide_concat_empty_dtype_warning():
-        res = cudf.concat([gdf, gdf2, gdf, gdf_empty1], axis=axis).to_pandas()
-        sol = pd.concat([df, df2, df, df_empty1], axis=axis)
-    assert_eq(
-        res,
-        sol,
-        check_names=False,
-        check_categorical=False,
-        check_index_type=True,
-    )
-
-    # Series
-    for c in [i for i in ("x", "y", "z") if i != index]:
-        res = cudf.concat([gdf[c], gdf2[c], gdf[c]], axis=axis).to_pandas()
-        sol = pd.concat([df[c], df2[c], df[c]], axis=axis)
-        assert_eq(
-            res,
-            sol,
-            check_names=False,
-            check_categorical=False,
-            check_index_type=True,
-        )
-
-    # Index
-    res = cudf.concat([gdf.index, gdf2.index], axis=axis).to_pandas()
-    sol = df.index.append(df2.index)
-    assert_eq(res, sol, check_names=False, check_categorical=False)
-
-
-@pytest.mark.parametrize(
-    "values",
-    [["foo", "bar"], [1.0, 2.0], pd.Series(["one", "two"], dtype="category")],
-)
-def test_concat_all_nulls(values):
-    pa = pd.Series(values)
-    pb = pd.Series([None])
-    ps = pd.concat([pa, pb])
-
-    ga = cudf.Series(values)
-    gb = cudf.Series([None])
-    gs = cudf.concat([ga, gb])
-
-    assert_eq(
-        ps,
-        gs,
-        check_dtype=False,
-        check_categorical=False,
-        check_index_type=True,
-    )
-
-
-def test_concat_errors():
-    df, df2, gdf, gdf2 = make_frames()
-
-    # No objs
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=([], {"objs": []}),
-        rfunc_args_and_kwargs=([], {"objs": []}),
-    )
-
-    # All None
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=([], {"objs": [None, None]}),
-        rfunc_args_and_kwargs=([], {"objs": [None, None]}),
-    )
-
-    # Mismatched types
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=([], {"objs": [df, df.index, df.x]}),
-        rfunc_args_and_kwargs=([], {"objs": [gdf, gdf.index, gdf.x]}),
-    )
-
-    # Unknown type
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=([], {"objs": ["bar", "foo"]}),
-        rfunc_args_and_kwargs=([], {"objs": ["bar", "foo"]}),
-    )
-
-    # Mismatched index dtypes
-    gdf3 = gdf2.copy()
-    del gdf3["z"]
-    gdf4 = gdf2.set_index("z")
-
-    with pytest.raises(ValueError, match="All columns must be the same type"):
-        cudf.concat([gdf3, gdf4])
-
-    # Bad axis value
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=(
-            [],
-            {"objs": [gdf.to_pandas(), gdf2.to_pandas()], "axis": "bad_value"},
-        ),
-        rfunc_args_and_kwargs=([], {"objs": [gdf, gdf2], "axis": "bad_value"}),
-    )
-
-
-def test_concat_misordered_columns():
-    df, df2, gdf, gdf2 = make_frames(False)
-    gdf2 = gdf2[["z", "x", "y"]]
-    df2 = df2[["z", "x", "y"]]
-
-    res = cudf.concat([gdf, gdf2]).to_pandas()
-    sol = pd.concat([df, df2], sort=False)
-
-    assert_eq(
-        res,
-        sol,
-        check_names=False,
-        check_categorical=False,
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("axis", [1, "columns"])
-def test_concat_columns(axis):
-    rng = np.random.default_rng(seed=0)
-    pdf1 = pd.DataFrame(rng.integers(10, size=(5, 3)), columns=[1, 2, 3])
-    pdf2 = pd.DataFrame(rng.integers(10, size=(5, 4)), columns=[4, 5, 6, 7])
-    gdf1 = cudf.from_pandas(pdf1)
-    gdf2 = cudf.from_pandas(pdf2)
-
-    expect = pd.concat([pdf1, pdf2], axis=axis)
-    got = cudf.concat([gdf1, gdf2], axis=axis)
-
-    assert_eq(expect, got, check_index_type=True)
-
-
-def test_concat_multiindex_dataframe(axis):
-    gdf = cudf.DataFrame(
-        {
-            "w": np.arange(4),
-            "x": np.arange(4),
-            "y": np.arange(4),
-            "z": np.arange(4),
-        }
-    )
-    gdg = gdf.groupby(["w", "x"]).min()
-    pdg = gdg.to_pandas()
-    pdg1 = pdg.iloc[:, :1]
-    pdg2 = pdg.iloc[:, 1:]
-    gdg1 = cudf.from_pandas(pdg1)
-    gdg2 = cudf.from_pandas(pdg2)
-    expected = pd.concat([pdg1, pdg2], axis=axis)
-    result = cudf.concat([gdg1, gdg2], axis=axis)
-    assert_eq(
-        expected,
-        result,
-        check_index_type=True,
-    )
-
-
-def test_concat_multiindex_series():
-    gdf = cudf.DataFrame(
-        {
-            "w": np.arange(4),
-            "x": np.arange(4),
-            "y": np.arange(4),
-            "z": np.arange(4),
-        }
-    )
-    gdg = gdf.groupby(["w", "x"]).min()
-    pdg = gdg.to_pandas()
-    pdg1 = pdg["y"]
-    pdg2 = pdg["z"]
-    gdg1 = cudf.from_pandas(pdg1)
-    gdg2 = cudf.from_pandas(pdg2)
-    assert_eq(
-        cudf.concat([gdg1, gdg2]),
-        pd.concat([pdg1, pdg2]),
-        check_index_type=True,
-    )
-    assert_eq(
-        cudf.concat([gdg1, gdg2], axis=1), pd.concat([pdg1, pdg2], axis=1)
-    )
-
-
-def test_concat_multiindex_dataframe_and_series():
-    gdf = cudf.DataFrame(
-        {
-            "w": np.arange(4),
-            "x": np.arange(4),
-            "y": np.arange(4),
-            "z": np.arange(4),
-        }
-    )
-    gdg = gdf.groupby(["w", "x"]).min()
-    pdg = gdg.to_pandas()
-    pdg1 = pdg[["y", "z"]]
-    pdg2 = pdg["z"]
-    pdg2.name = "a"
-    gdg1 = cudf.from_pandas(pdg1)
-    gdg2 = cudf.from_pandas(pdg2)
-    assert_eq(
-        cudf.concat([gdg1, gdg2], axis=1),
-        pd.concat([pdg1, pdg2], axis=1),
-        check_index_type=True,
-    )
-
-
-def test_concat_multiindex_series_and_dataframe():
-    gdf = cudf.DataFrame(
-        {
-            "w": np.arange(4),
-            "x": np.arange(4),
-            "y": np.arange(4),
-            "z": np.arange(4),
-        }
-    )
-    gdg = gdf.groupby(["w", "x"]).min()
-    pdg = gdg.to_pandas()
-    pdg1 = pdg["z"]
-    pdg2 = pdg[["y", "z"]]
-    pdg1.name = "a"
-    gdg1 = cudf.from_pandas(pdg1)
-    gdg2 = cudf.from_pandas(pdg2)
-    assert_eq(
-        cudf.concat([gdg1, gdg2], axis=1),
-        pd.concat([pdg1, pdg2], axis=1),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("myindex", ["a", "b"])
-def test_concat_string_index_name(myindex):
-    # GH-Issue #3420
-    data = {"a": [123, 456], "b": ["s1", "s2"]}
-    df1 = cudf.DataFrame(data).set_index(myindex)
-    df2 = df1.copy()
-    df3 = cudf.concat([df1, df2])
-
-    assert df3.index.name == myindex
-
-
-def test_pandas_concat_compatibility_axis1():
-    d1 = cudf.datasets.randomdata(
-        3, dtypes={"a": float, "ind": float}
-    ).set_index("ind")
-    d2 = cudf.datasets.randomdata(
-        3, dtypes={"b": float, "ind": float}
-    ).set_index("ind")
-    d3 = cudf.datasets.randomdata(
-        3, dtypes={"c": float, "ind": float}
-    ).set_index("ind")
-    d4 = cudf.datasets.randomdata(
-        3, dtypes={"d": float, "ind": float}
-    ).set_index("ind")
-    d5 = cudf.datasets.randomdata(
-        3, dtypes={"e": float, "ind": float}
-    ).set_index("ind")
-
-    pd1 = d1.to_pandas()
-    pd2 = d2.to_pandas()
-    pd3 = d3.to_pandas()
-    pd4 = d4.to_pandas()
-    pd5 = d5.to_pandas()
-
-    expect = pd.concat([pd1, pd2, pd3, pd4, pd5], axis=1)
-    got = cudf.concat([d1, d2, d3, d4, d5], axis=1)
-
-    assert_eq(
-        got.sort_index(),
-        expect.sort_index(),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("index", [[0, 1, 2], [2, 1, 0], [5, 9, 10]])
-@pytest.mark.parametrize("names", [False, (0, 1)])
-@pytest.mark.parametrize(
-    "data",
-    [
-        (["a", "b", "c"], ["a", "b", "c"]),
-        (["a", "b", "c"], ["XX", "YY", "ZZ"]),
-    ],
-)
-def test_pandas_concat_compatibility_axis1_overlap(index, names, data):
-    s1 = cudf.Series(data[0], index=[0, 1, 2])
-    s2 = cudf.Series(data[1], index=index)
-    if names:
-        s1.name = names[0]
-        s2.name = names[1]
-    ps1 = s1.to_pandas()
-    ps2 = s2.to_pandas()
-    got = cudf.concat([s1, s2], axis=1)
-    expect = pd.concat([ps1, ps2], axis=1)
-    assert_eq(got, expect, check_index_type=True)
-
-
-def test_pandas_concat_compatibility_axis1_eq_index():
-    s1 = cudf.Series(["a", "b", "c"], index=[0, 1, 2])
-    s2 = cudf.Series(["a", "b", "c"], index=[1, 1, 1])
-    ps1 = s1.to_pandas()
-    ps2 = s2.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=([], {"objs": [ps1, ps2], "axis": 1}),
-        rfunc_args_and_kwargs=([], {"objs": [s1, s2], "axis": 1}),
-    )
-
-
-@pytest.mark.parametrize("name", [None, "a"])
-def test_pandas_concat_compatibility_axis1_single_column(name):
-    # Pandas renames series name `None` to 0
-    # and preserves anything else
-    s = cudf.Series([1, 2, 3], name=name)
-    got = cudf.concat([s], axis=1)
-    expected = pd.concat([s.to_pandas()], axis=1)
-    assert_eq(expected, got)
-
-
-def test_concat_duplicate_columns():
-    cdf = cudf.DataFrame(
-        {
-            "id4": 4 * list(range(6)),
-            "id5": 4 * list(reversed(range(6))),
-            "v3": 6 * list(range(4)),
-        }
-    )
-    cdf_std = cdf.groupby(["id4", "id5"])[["v3"]].std()
-    cdf_med = cdf.groupby(["id4", "id5"])[["v3"]].quantile(q=0.5)
-    with pytest.raises(NotImplementedError):
-        cudf.concat([cdf_med, cdf_std], axis=1)
-
-
-def test_concat_mixed_input():
-    pdf1 = pd.DataFrame({"a": [10, 20, 30]})
-    pdf2 = pd.DataFrame({"a": [11, 22, 33]})
-
-    gdf1 = cudf.from_pandas(pdf1)
-    gdf2 = cudf.from_pandas(pdf2)
-
-    assert_eq(
-        pd.concat([pdf1, None, pdf2, None]),
-        cudf.concat([gdf1, None, gdf2, None]),
-        check_index_type=True,
-    )
-    assert_eq(
-        pd.concat([pdf1, None]),
-        cudf.concat([gdf1, None]),
-        check_index_type=True,
-    )
-    assert_eq(
-        pd.concat([None, pdf2]),
-        cudf.concat([None, gdf2]),
-        check_index_type=True,
-    )
-    assert_eq(
-        pd.concat([None, pdf2, pdf1]),
-        cudf.concat([None, gdf2, gdf1]),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [pd.Series([1, 2, 3]), pd.DataFrame({"a": [1, 2]})],
-        [pd.Series([1, 2, 3]), pd.DataFrame({"a": []})],
-        [pd.Series([], dtype="float64"), pd.DataFrame({"a": []})],
-        [pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})],
-        pytest.param(
-            [
-                pd.Series([1, 2, 3.0, 1.2], name="abc"),
-                pd.DataFrame({"a": [1, 2]}),
-            ],
-            marks=pytest.mark.skipif(
-                not PANDAS_GE_220,
-                reason="https://github.com/pandas-dev/pandas/pull/56365",
-            ),
-        ),
-        pytest.param(
-            [
-                pd.Series(
-                    [1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130]
-                ),
-                pd.DataFrame({"a": [1, 2]}),
-            ],
-            marks=pytest.mark.skipif(
-                not PANDAS_GE_220,
-                reason="https://github.com/pandas-dev/pandas/pull/56365",
-            ),
-        ),
-        pytest.param(
-            [
-                pd.Series(
-                    [1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"]
-                ),
-                pd.DataFrame({"a": [1, 2]}, index=["a", "b"]),
-            ],
-            marks=pytest.mark.skipif(
-                not PANDAS_GE_220,
-                reason="https://github.com/pandas-dev/pandas/pull/56365",
-            ),
-        ),
-        pytest.param(
-            [
-                pd.Series(
-                    [1, 2, 3.0, 1.2, 8, 100],
-                    name="New name",
-                    index=["a", "b", "c", "d", "e", "f"],
-                ),
-                pd.DataFrame(
-                    {"a": [1, 2, 4, 10, 11, 12]},
-                    index=["a", "b", "c", "d", "e", "f"],
-                ),
-            ],
-            marks=pytest.mark.skipif(
-                not PANDAS_GE_220,
-                reason="https://github.com/pandas-dev/pandas/pull/56365",
-            ),
-        ),
-        pytest.param(
-            [
-                pd.Series(
-                    [1, 2, 3.0, 1.2, 8, 100],
-                    name="New name",
-                    index=["a", "b", "c", "d", "e", "f"],
-                ),
-                pd.DataFrame(
-                    {"a": [1, 2, 4, 10, 11, 12]},
-                    index=["a", "b", "c", "d", "e", "f"],
-                ),
-            ]
-            * 7,
-            marks=pytest.mark.skipif(
-                not PANDAS_GE_220,
-                reason="https://github.com/pandas-dev/pandas/pull/56365",
-            ),
-        ),
-    ],
-)
-def test_concat_series_dataframe_input(objs):
-    pd_objs = objs
-    gd_objs = [cudf.from_pandas(obj) for obj in objs]
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(pd_objs)
-        actual = cudf.concat(gd_objs)
-
-    assert_eq(
-        expected.fillna(-1),
-        actual.fillna(-1),
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [
-            pd.Series(["a", "b", "c", "d"]),
-            pd.Series(["1", "2", "3", "4"]),
-            pd.DataFrame({"first col": ["10", "11", "12", "13"]}),
-        ],
-        [
-            pd.Series(["a", "b", "c", "d"]),
-            pd.Series(["1", "2", "3", "4"]),
-            pd.DataFrame(
-                {
-                    "first col": ["10", "11", "12", "13"],
-                    "second col": ["a", "b", "c", "d"],
-                }
-            ),
-        ],
-        [
-            pd.Series(["a", "b", "c"]),
-            pd.Series(["1", "2", "3", "4"]),
-            pd.DataFrame(
-                {
-                    "first col": ["10", "11", "12", "13"],
-                    "second col": ["a", "b", "c", "d"],
-                }
-            ),
-        ],
-    ],
-)
-def test_concat_series_dataframe_input_str(objs):
-    pd_objs = objs
-    gd_objs = [cudf.from_pandas(obj) for obj in objs]
-
-    expected = pd.concat(pd_objs)
-    actual = cudf.concat(gd_objs)
-    assert_eq(expected, actual, check_dtype=False, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[10, 20, 30]),
-        pd.DataFrame(
-            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-        ),
-        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"l": [10]}, index=[200]),
-        pd.DataFrame([], index=[100]),
-        pd.DataFrame({"cat": pd.Series(["one", "two"], dtype="category")}),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
-        [
-            pd.DataFrame(
-                {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-            ),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        ],
-        [
-            pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-            pd.DataFrame({"l": [10]}),
-            pd.DataFrame({"l": [10]}, index=[200]),
-            pd.DataFrame(
-                {"cat": pd.Series(["two", "three"], dtype="category")}
-            ),
-        ],
-        [
-            pd.DataFrame([]),
-            pd.DataFrame([], index=[100]),
-            pd.DataFrame(
-                {"cat": pd.Series(["two", "three"], dtype="category")}
-            ),
-        ],
-    ],
-)
-def test_concat_empty_dataframes(df, other, ignore_index):
-    other_pd = [df, *other]
-
-    gdf = cudf.from_pandas(df)
-    other_gd = [gdf] + [cudf.from_pandas(o) for o in other]
-
-    expected = pd.concat(other_pd, ignore_index=ignore_index)
-    actual = cudf.concat(other_gd, ignore_index=ignore_index)
-    if expected.shape != df.shape:
-        for key, col in actual[actual.columns].items():
-            if isinstance(col.dtype, cudf.CategoricalDtype):
-                if not isinstance(expected[key].dtype, pd.CategoricalDtype):
-                    # TODO: Pandas bug:
-                    # https://github.com/pandas-dev/pandas/issues/42840
-                    expected[key] = expected[key].fillna("-1").astype("str")
-                else:
-                    expected[key] = (
-                        expected[key]
-                        .cat.add_categories(["-1"])
-                        .fillna("-1")
-                        .astype("str")
-                    )
-                actual[key] = col.astype("str").fillna("-1")
-            else:
-                expected[key] = expected[key].fillna(-1)
-                actual[key] = col.fillna(-1)
-        assert_eq(expected, actual, check_dtype=False, check_index_type=True)
-    else:
-        assert_eq(
-            expected,
-            actual,
-            check_index_type=not gdf.empty,
-            check_column_type=False,
-        )
-
-
-@pytest.mark.parametrize("axis", [0, "index"])
-@pytest.mark.parametrize(
-    "data",
-    [
-        (["a", "b", "c"], ["a", "b", "c"]),
-        (["a", "b", "c"], ["XX", "YY", "ZZ"]),
-    ],
-)
-def test_concat_empty_and_nonempty_series(ignore_index, data, axis):
-    s1 = cudf.Series()
-    s2 = cudf.Series(data[0])
-    ps1 = s1.to_pandas()
-    ps2 = s2.to_pandas()
-    got = cudf.concat([s1, s2], axis=axis, ignore_index=ignore_index)
-    expect = pd.concat([ps1, ps2], axis=axis, ignore_index=ignore_index)
-
-    assert_eq(got, expect, check_index_type=True)
-
-
-@pytest.mark.parametrize("axis", [0, "index"])
-def test_concat_two_empty_series(ignore_index, axis):
-    s1 = cudf.Series()
-    s2 = cudf.Series()
-    ps1 = s1.to_pandas()
-    ps2 = s2.to_pandas()
-    got = cudf.concat([s1, s2], axis=axis, ignore_index=ignore_index)
-    expect = pd.concat([ps1, ps2], axis=axis, ignore_index=ignore_index)
-
-    assert_eq(got, expect, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "key2", [[0, 1], [1, 0]], ids=["matching", "different"]
-)
-def test_concat_dataframe_with_multiindex(key2):
-    gdf1 = cudf.DataFrame({"k1": [0, 1], "k2": [2, 3], "v1": [4, 5]})
-    gdf1 = gdf1.set_index(["k1", "k2"])
-
-    gdf2 = cudf.DataFrame({"k1": key2, "k2": [3, 2], "v2": [6, 7]})
-    gdf2 = gdf2.set_index(["k1", "k2"])
-
-    pdf1 = gdf1.to_pandas()
-    pdf2 = gdf2.to_pandas()
-
-    actual = cudf.concat([gdf1, gdf2], axis=1)
-    expected = pd.concat([pdf1, pdf2], axis=1)
-
-    # Will need to sort_index before comparing as
-    # ordering is not deterministic in case of pandas
-    # multiIndex with concat.
-    assert_eq(
-        expected.sort_index(),
-        actual.sort_index(),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                }
-            ),
-            pd.DataFrame(
-                {"x": range(10, 20), "y": list(map(float, range(10, 20)))}
-            ),
-        ],
-        [
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                },
-                index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
-            ),
-            pd.DataFrame(
-                {"x": range(10, 20), "y": list(map(float, range(10, 20)))},
-                index=["k", "l", "m", "n", "o", "p", "q", "r", "s", "t"],
-            ),
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                },
-                index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
-            ),
-            pd.DataFrame(
-                {"x": range(10, 20), "y": list(map(float, range(10, 20)))},
-                index=["a", "b", "c", "d", "z", "f", "g", "h", "i", "w"],
-            ),
-        ],
-    ],
-)
-def test_concat_join(objs, ignore_index, sort, join, axis):
-    axis = 0
-    gpu_objs = [cudf.from_pandas(o) for o in objs]
-
-    assert_eq(
-        pd.concat(
-            objs, sort=sort, join=join, ignore_index=ignore_index, axis=axis
-        ),
-        cudf.concat(
-            gpu_objs,
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        ),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                }
-            ),
-            pd.DataFrame(
-                {"x": range(10, 20), "y": list(map(float, range(10, 20)))}
-            ),
-        ],
-    ],
-)
-def test_concat_join_axis_1_dup_error(objs):
-    gpu_objs = [cudf.from_pandas(o) for o in objs]
-    # we do not support duplicate columns
-    with pytest.raises(NotImplementedError):
-        assert_eq(
-            pd.concat(
-                objs,
-                axis=1,
-            ),
-            cudf.concat(
-                gpu_objs,
-                axis=1,
-            ),
-        )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                }
-            ),
-            pd.DataFrame(
-                {"l": range(10, 20), "m": list(map(float, range(10, 20)))}
-            ),
-        ],
-    ],
-)
-def test_concat_join_axis_1(objs, ignore_index, sort, join, axis):
-    # no duplicate columns
-    axis = 1
-    gpu_objs = [cudf.from_pandas(o) for o in objs]
-    expected = pd.concat(
-        objs, sort=sort, join=join, ignore_index=ignore_index, axis=axis
-    )
-    actual = cudf.concat(
-        gpu_objs,
-        sort=sort,
-        join=join,
-        ignore_index=ignore_index,
-        axis=axis,
-    )
-
-    assert_eq(expected, actual, check_index_type=True)
-
-
-def test_concat_join_many_df_and_empty_df(ignore_index, sort, join, axis):
-    # no duplicate columns
-    pdf1 = pd.DataFrame(
-        {
-            "x": range(10),
-            "y": list(map(float, range(10))),
-            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        }
-    )
-    pdf2 = pd.DataFrame(
-        {"l": range(10, 20), "m": list(map(float, range(10, 20)))}
-    )
-    pdf3 = pd.DataFrame({"j": [1, 2], "k": [1, 2], "s": [1, 2], "t": [1, 2]})
-    pdf_empty1 = pd.DataFrame()
-
-    gdf1 = cudf.from_pandas(pdf1)
-    gdf2 = cudf.from_pandas(pdf2)
-    gdf3 = cudf.from_pandas(pdf3)
-    gdf_empty1 = cudf.from_pandas(pdf_empty1)
-
-    with _hide_concat_empty_dtype_warning():
-        assert_eq(
-            pd.concat(
-                [pdf1, pdf2, pdf3, pdf_empty1],
-                sort=sort,
-                join=join,
-                ignore_index=ignore_index,
-                axis=axis,
-            ),
-            cudf.concat(
-                [gdf1, gdf2, gdf3, gdf_empty1],
-                sort=sort,
-                join=join,
-                ignore_index=ignore_index,
-                axis=axis,
-            ),
-            check_index_type=False,
-        )
-
-
-def test_concat_join_one_df(ignore_index, sort, join, axis):
-    pdf1 = pd.DataFrame(
-        {
-            "x": range(10),
-            "y": list(map(float, range(10))),
-            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        }
-    )
-
-    gdf1 = cudf.from_pandas(pdf1)
-    expected = pd.concat(
-        [pdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
-    )
-    actual = cudf.concat(
-        [gdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
-    )
-
-    assert_eq(expected, actual, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "pdf1,pdf2",
-    [
-        (
-            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
-            pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}),
-        ),
-        (
-            pd.DataFrame(
-                {"a": [1, 2, 3], "b": [4, 5, 6]}, index=["p", "q", "r"]
-            ),
-            pd.DataFrame(
-                {"c": [7, 8, 9], "d": [10, 11, 12]}, index=["r", "p", "z"]
-            ),
-        ),
-    ],
-)
-def test_concat_join_no_overlapping_columns(
-    pdf1, pdf2, ignore_index, sort, join, axis
-):
-    gdf1 = cudf.from_pandas(pdf1)
-    gdf2 = cudf.from_pandas(pdf2)
-
-    expected = pd.concat(
-        [pdf1, pdf2],
-        sort=sort,
-        join=join,
-        ignore_index=ignore_index,
-        axis=axis,
-    )
-    actual = cudf.concat(
-        [gdf1, gdf2],
-        sort=sort,
-        join=join,
-        ignore_index=ignore_index,
-        axis=axis,
-    )
-
-    assert_eq(expected, actual, check_index_type=True)
-
-
-def test_concat_join_no_overlapping_columns_many_and_empty(
-    ignore_index, sort, join, axis
-):
-    pdf4 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pdf5 = pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
-    pdf6 = pd.DataFrame(
-        {
-            "x": range(10),
-            "y": list(map(float, range(10))),
-            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        }
-    )
-    pdf_empty = pd.DataFrame()
-
-    gdf4 = cudf.from_pandas(pdf4)
-    gdf5 = cudf.from_pandas(pdf5)
-    gdf6 = cudf.from_pandas(pdf6)
-    gdf_empty = cudf.from_pandas(pdf_empty)
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            [pdf4, pdf5, pdf6, pdf_empty],
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-        actual = cudf.concat(
-            [gdf4, gdf5, gdf6, gdf_empty],
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [
-            pd.DataFrame(
-                {"a": [1, 2, 3], "b": [4, 5, 6]}, index=["z", "t", "k"]
-            ),
-            pd.DataFrame(
-                {"c": [7, 8, 9], "d": [10, 11, 12]}, index=["z", "t", "k"]
-            ),
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                },
-                index=["z", "t", "k", "a", "b", "c", "d", "e", "f", "g"],
-            ),
-            pd.DataFrame(index=pd.Index([], dtype="str")),
-        ],
-        [
-            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
-            pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}),
-            pd.DataFrame(
-                {
-                    "x": range(10),
-                    "y": list(map(float, range(10))),
-                    "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                }
-            ),
-            pd.DataFrame(index=pd.Index([], dtype="str")),
-        ],
-        pytest.param(
-            [
-                pd.DataFrame(
-                    {"a": [1, 2, 3], "nb": [10, 11, 12]}, index=["Q", "W", "R"]
-                ),
-                None,
-            ],
-        ),
-    ],
-)
-def test_concat_join_no_overlapping_columns_many_and_empty2(
-    objs, ignore_index, sort, join, axis
-):
-    objs_gd = [cudf.from_pandas(o) if o is not None else o for o in objs]
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            objs,
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-        actual = cudf.concat(
-            objs_gd,
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-    assert_eq(expected, actual, check_index_type=False)
-
-
-def test_concat_join_no_overlapping_columns_empty_df_basic(
-    ignore_index, sort, join, axis
-):
-    pdf6 = pd.DataFrame(
-        {
-            "x": range(10),
-            "y": list(map(float, range(10))),
-            "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        }
-    )
-    pdf_empty = pd.DataFrame()
-
-    gdf6 = cudf.from_pandas(pdf6)
-    gdf_empty = cudf.from_pandas(pdf_empty)
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            [pdf6, pdf_empty],
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-        actual = cudf.concat(
-            [gdf6, gdf_empty],
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=True,
-        check_column_type=False,
-    )
-
-
-def test_concat_join_series(ignore_index, sort, join, axis):
-    s1 = cudf.Series(["a", "b", "c"])
-    s2 = cudf.Series(["a", "b"])
-    s3 = cudf.Series(["a", "b", "c", "d"])
-    s4 = cudf.Series(dtype="str")
-
-    ps1 = s1.to_pandas()
-    ps2 = s2.to_pandas()
-    ps3 = s3.to_pandas()
-    ps4 = s4.to_pandas()
-
-    expected = pd.concat(
-        [ps1, ps2, ps3, ps4],
-        sort=sort,
-        join=join,
-        ignore_index=ignore_index,
-        axis=axis,
-    )
-    with expect_warning_if(axis in {1, "columns"}):
-        actual = cudf.concat(
-            [s1, s2, s3, s4],
-            sort=sort,
-            join=join,
-            ignore_index=ignore_index,
-            axis=axis,
-        )
-
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[10, 20, 30]),
-        pd.DataFrame(
-            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-        ),
-        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"l": [10]}, index=[200]),
-        pd.DataFrame([], index=[100]),
-        pd.DataFrame({"cat": pd.Series(["one", "two"], dtype="category")}),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
-        [
-            pd.DataFrame(
-                {"b": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-            ),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        ],
-        [
-            pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-            pd.DataFrame({"l": [10]}),
-            pd.DataFrame({"k": [10]}, index=[200]),
-            pd.DataFrame(
-                {"cat": pd.Series(["two", "three"], dtype="category")}
-            ),
-        ],
-        [
-            pd.DataFrame([]),
-            pd.DataFrame([], index=[100]),
-            pd.DataFrame(
-                {"cat": pd.Series(["two", "three"], dtype="category")}
-            ),
-        ],
-    ],
-)
-def test_concat_join_empty_dataframes(
-    request, df, other, ignore_index, join, sort
-):
-    axis = 0
-    other_pd = [df, *other]
-    gdf = cudf.from_pandas(df)
-    other_gd = [gdf] + [cudf.from_pandas(o) for o in other]
-
-    expected = pd.concat(
-        other_pd, ignore_index=ignore_index, axis=axis, join=join, sort=sort
-    )
-    actual = cudf.concat(
-        other_gd, ignore_index=ignore_index, axis=axis, join=join, sort=sort
-    )
-    if (
-        join == "outer"
-        and any(
-            isinstance(dtype, pd.CategoricalDtype)
-            for dtype in df.dtypes.tolist()
-        )
-        and any(
-            isinstance(dtype, pd.CategoricalDtype)
-            for other_df in other
-            for dtype in other_df.dtypes.tolist()
-        )
-    ):
-        request.applymarker(
-            pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/42840"
-            )
-        )
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
-        check_column_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[10, 20, 30]),
-        pd.DataFrame(
-            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-        ),
-        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"m": [10]}, index=[200]),
-        pd.DataFrame([], index=[100]),
-        pd.DataFrame({"cat": pd.Series(["one", "two"], dtype="category")}),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
-        [
-            pd.DataFrame(
-                {"b": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-            ),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("CD")),
-        ],
-        [
-            pd.DataFrame({"g": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-            pd.DataFrame({"h": [10]}),
-            pd.DataFrame({"k": [10]}, index=[200]),
-            pd.DataFrame(
-                {"dog": pd.Series(["two", "three"], dtype="category")}
-            ),
-        ],
-        [
-            pd.DataFrame([]),
-            pd.DataFrame([], index=[100]),
-            pd.DataFrame(
-                {"bird": pd.Series(["two", "three"], dtype="category")}
-            ),
-        ],
-    ],
-)
-def test_concat_join_empty_dataframes_axis_1(
-    df, other, ignore_index, axis, join, sort
-):
-    # no duplicate columns
-    axis = 1
-    other_pd = [df, *other]
-    gdf = cudf.from_pandas(df)
-    other_gd = [gdf] + [cudf.from_pandas(o) for o in other]
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            other_pd,
-            ignore_index=ignore_index,
-            axis=axis,
-            join=join,
-            sort=sort,
-        )
-        actual = cudf.concat(
-            other_gd,
-            ignore_index=ignore_index,
-            axis=axis,
-            join=join,
-            sort=sort,
-        )
-    if expected.shape != df.shape:
-        if axis == 0:
-            for key, col in actual[actual.columns].items():
-                if isinstance(expected[key].dtype, pd.CategoricalDtype):
-                    expected[key] = expected[key].fillna("-1")
-                    actual[key] = col.astype("str").fillna("-1")
-            # if not expected.empty:
-            assert_eq(
-                expected.fillna(-1),
-                actual.fillna(-1),
-                check_dtype=False,
-                check_index_type=False
-                if len(expected) == 0 or actual.empty
-                else True,
-                check_column_type=False,
-            )
-        else:
-            # no need to fill in if axis=1
-            assert_eq(
-                expected,
-                actual,
-                check_index_type=False,
-                check_column_type=False,
-            )
-    assert_eq(
-        expected, actual, check_index_type=False, check_column_type=False
-    )
-
-
-def test_concat_preserve_order():
-    """Ensure that order is preserved on 'inner' concatenations."""
-    df = pd.DataFrame([["d", 3, 4.0], ["c", 4, 5.0]], columns=["c", "b", "a"])
-    dfs = [df, df]
-
-    assert_eq(
-        pd.concat(dfs, join="inner"),
-        cudf.concat([cudf.DataFrame(df) for df in dfs], join="inner"),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("typ", [cudf.DataFrame, cudf.Series])
-def test_concat_single_object(ignore_index, typ):
-    """Ensure that concat on a single object does not change it."""
-    obj = typ([1, 2, 3])
-    assert_eq(
-        cudf.concat([obj], ignore_index=ignore_index, axis=0),
-        obj,
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "ltype",
-    [Decimal64Dtype(3, 1), Decimal64Dtype(7, 2), Decimal64Dtype(8, 4)],
-)
-@pytest.mark.parametrize(
-    "rtype",
-    [
-        Decimal64Dtype(3, 2),
-        Decimal64Dtype(8, 4),
-        cudf.Decimal128Dtype(3, 2),
-        cudf.Decimal32Dtype(8, 4),
-    ],
-)
-def test_concat_decimal_dataframe(ltype, rtype):
-    rng = np.random.default_rng(seed=0)
-    gdf1 = cudf.DataFrame(
-        {"id": rng.integers(0, 10, 3), "val": ["22.3", "59.5", "81.1"]}
-    )
-    gdf2 = cudf.DataFrame(
-        {"id": rng.integers(0, 10, 3), "val": ["2.35", "5.59", "8.14"]}
-    )
-
-    gdf1["val"] = gdf1["val"].astype(ltype)
-    gdf2["val"] = gdf2["val"].astype(rtype)
-
-    pdf1 = gdf1.to_pandas()
-    pdf2 = gdf2.to_pandas()
-
-    got = cudf.concat([gdf1, gdf2])
-    expected = pd.concat([pdf1, pdf2])
-
-    assert_eq(expected, got, check_index_type=True)
-
-
-@pytest.mark.parametrize("ltype", [Decimal64Dtype(4, 1), Decimal64Dtype(8, 2)])
-@pytest.mark.parametrize(
-    "rtype",
-    [
-        Decimal64Dtype(4, 3),
-        Decimal64Dtype(10, 4),
-        Decimal32Dtype(8, 3),
-        Decimal128Dtype(18, 3),
-    ],
-)
-def test_concat_decimal_series(ltype, rtype):
-    gs1 = cudf.Series(["228.3", "559.5", "281.1"]).astype(ltype)
-    gs2 = cudf.Series(["2.345", "5.259", "8.154"]).astype(rtype)
-
-    ps1 = gs1.to_pandas()
-    ps2 = gs2.to_pandas()
-
-    got = cudf.concat([gs1, gs2])
-    expected = pd.concat([ps1, ps2])
-
-    assert_eq(expected, got, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "data1, dtype1, index1, data2, dtype2, index2, data3, dtype3, index3, expected_data, expected_dtype, expected_index",
-    [
-        [
-            {"val": [Decimal("42.5"), Decimal("8.7")]},
-            Decimal64Dtype(5, 2),
-            None,
-            {"val": [Decimal("9.23"), Decimal("-67.49")]},
-            Decimal64Dtype(6, 4),
-            None,
-            {"val": [8, -5]},
-            "int32",
-            None,
-            {
-                "val": [
-                    Decimal("42.5"),
-                    Decimal("8.7"),
-                    Decimal("9.23"),
-                    Decimal("-67.49"),
-                    Decimal("8"),
-                    Decimal("-5"),
-                ]
-            },
-            Decimal32Dtype(7, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            {"val": [Decimal("95.2"), Decimal("23.4")]},
-            Decimal64Dtype(5, 2),
-            None,
-            {"val": [54, 509]},
-            "uint16",
-            None,
-            {"val": [24, -48]},
-            "int32",
-            None,
-            {
-                "val": [
-                    Decimal("95.2"),
-                    Decimal("23.4"),
-                    Decimal("54"),
-                    Decimal("509"),
-                    Decimal("24"),
-                    Decimal("-48"),
-                ]
-            },
-            Decimal32Dtype(5, 2),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            {"val": [Decimal("36.56"), Decimal("-59.24")]},
-            Decimal64Dtype(9, 4),
-            None,
-            {"val": [403.21, 45.13]},
-            "float32",
-            None,
-            {"val": [52.262, -49.25]},
-            "float64",
-            None,
-            {
-                "val": [
-                    Decimal("36.56"),
-                    Decimal("-59.24"),
-                    Decimal("403.21"),
-                    Decimal("45.13"),
-                    Decimal("52.262"),
-                    Decimal("-49.25"),
-                ]
-            },
-            Decimal32Dtype(9, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            {"val": [Decimal("9563.24"), Decimal("236.633")]},
-            Decimal64Dtype(9, 4),
-            None,
-            {"val": [5393, -95832]},
-            "int64",
-            None,
-            {"val": [-29.234, -31.945]},
-            "float64",
-            None,
-            {
-                "val": [
-                    Decimal("9563.24"),
-                    Decimal("236.633"),
-                    Decimal("5393"),
-                    Decimal("-95832"),
-                    Decimal("-29.234"),
-                    Decimal("-31.945"),
-                ]
-            },
-            Decimal32Dtype(9, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            {"val": [Decimal("95633.24"), Decimal("236.633")]},
-            Decimal128Dtype(19, 4),
-            None,
-            {"val": [5393, -95832]},
-            "int64",
-            None,
-            {"val": [-29.234, -31.945]},
-            "float64",
-            None,
-            {
-                "val": [
-                    Decimal("95633.24"),
-                    Decimal("236.633"),
-                    Decimal("5393"),
-                    Decimal("-95832"),
-                    Decimal("-29.234"),
-                    Decimal("-31.945"),
-                ]
-            },
-            Decimal128Dtype(19, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-    ],
-)
-def test_concat_decimal_numeric_dataframe(
-    data1,
-    dtype1,
-    index1,
-    data2,
-    dtype2,
-    index2,
-    data3,
-    dtype3,
-    index3,
-    expected_data,
-    expected_dtype,
-    expected_index,
-):
-    df1 = cudf.DataFrame(data1, dtype=dtype1, index=index1)
-    df2 = cudf.DataFrame(data2, dtype=dtype2, index=index2)
-    df3 = cudf.DataFrame(data3, dtype=dtype3, index=index3)
-    expected = cudf.DataFrame(
-        expected_data, dtype=expected_dtype, index=expected_index
-    )
-    df = cudf.concat([df1, df2, df3])
-    assert_eq(df, expected, check_index_type=True)
-    assert_eq(df.val.dtype, expected.val.dtype)
-
-
-@pytest.mark.parametrize(
-    "data1, dtype1, index1, data2, dtype2, index2, data3, dtype3, index3, expected_data, expected_dtype, expected_index",
-    [
-        [
-            [Decimal("32.8"), Decimal("-87.7")],
-            Decimal64Dtype(6, 2),
-            None,
-            [Decimal("101.243"), Decimal("-92.449")],
-            Decimal64Dtype(9, 6),
-            None,
-            [94, -22],
-            "int32",
-            None,
-            [
-                Decimal("32.8"),
-                Decimal("-87.7"),
-                Decimal("101.243"),
-                Decimal("-92.449"),
-                Decimal("94"),
-                Decimal("-22"),
-            ],
-            Decimal64Dtype(10, 6),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            [Decimal("7.2"), Decimal("122.1")],
-            Decimal64Dtype(5, 2),
-            None,
-            [33, 984],
-            "uint32",
-            None,
-            [593, -702],
-            "int32",
-            None,
-            [
-                Decimal("7.2"),
-                Decimal("122.1"),
-                Decimal("33"),
-                Decimal("984"),
-                Decimal("593"),
-                Decimal("-702"),
-            ],
-            Decimal32Dtype(5, 2),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            [Decimal("982.94"), Decimal("-493.626")],
-            Decimal64Dtype(9, 4),
-            None,
-            [847.98, 254.442],
-            "float32",
-            None,
-            [5299.262, -2049.25],
-            "float64",
-            None,
-            [
-                Decimal("982.94"),
-                Decimal("-493.626"),
-                Decimal("847.98"),
-                Decimal("254.442"),
-                Decimal("5299.262"),
-                Decimal("-2049.25"),
-            ],
-            Decimal32Dtype(9, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            [Decimal("492.204"), Decimal("-72824.455")],
-            Decimal64Dtype(9, 4),
-            None,
-            [8438, -27462],
-            "int64",
-            None,
-            [-40.292, 49202.953],
-            "float64",
-            None,
-            [
-                Decimal("492.204"),
-                Decimal("-72824.455"),
-                Decimal("8438"),
-                Decimal("-27462"),
-                Decimal("-40.292"),
-                Decimal("49202.953"),
-            ],
-            Decimal32Dtype(9, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-        [
-            [Decimal("492.204"), Decimal("-72824.455")],
-            Decimal64Dtype(10, 4),
-            None,
-            [Decimal("8438"), Decimal("-27462")],
-            Decimal32Dtype(9, 4),
-            None,
-            [Decimal("-40.292"), Decimal("49202.953")],
-            Decimal128Dtype(19, 4),
-            None,
-            [
-                Decimal("492.204"),
-                Decimal("-72824.455"),
-                Decimal("8438"),
-                Decimal("-27462"),
-                Decimal("-40.292"),
-                Decimal("49202.953"),
-            ],
-            Decimal128Dtype(19, 4),
-            [0, 1, 0, 1, 0, 1],
-        ],
-    ],
-)
-def test_concat_decimal_numeric_series(
-    data1,
-    dtype1,
-    index1,
-    data2,
-    dtype2,
-    index2,
-    data3,
-    dtype3,
-    index3,
-    expected_data,
-    expected_dtype,
-    expected_index,
-):
-    s1 = cudf.Series(data1, dtype=dtype1, index=index1)
-    s2 = cudf.Series(data2, dtype=dtype2, index=index2)
-    s3 = cudf.Series(data3, dtype=dtype3, index=index3)
-    expected = cudf.Series(
-        expected_data, dtype=expected_dtype, index=expected_index
-    )
-    s = cudf.concat([s1, s2, s3])
-    assert_eq(s, expected, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "data1, dtype1, index1, data2, dtype2, index2, expected_data, expected_dtype, expected_index",
-    [
-        [
-            [Decimal("955.22"), Decimal("8.2")],
-            Decimal64Dtype(5, 2),
-            None,
-            ["2007-06-12", "2006-03-14"],
-            "datetime64[s]",
-            None,
-            [
-                "955.22",
-                "8.20",
-                "2007-06-12 00:00:00",
-                "2006-03-14 00:00:00",
-            ],
-            None,
-            [0, 1, 0, 1],
-        ],
-        [
-            [Decimal("-52.44"), Decimal("365.22")],
-            Decimal64Dtype(5, 2),
-            None,
-            np.arange(
-                "2005-02-01T12", "2005-02-01T15", dtype="datetime64[h]"
-            ).astype("datetime64[s]"),
-            "datetime64[s]",
-            None,
-            [
-                "-52.44",
-                "365.22",
-                "2005-02-01 12:00:00",
-                "2005-02-01 13:00:00",
-                "2005-02-01 14:00:00",
-            ],
-            None,
-            [0, 1, 0, 1, 2],
-        ],
-        [
-            [Decimal("753.0"), Decimal("94.22")],
-            Decimal64Dtype(5, 2),
-            None,
-            [np.timedelta64(111, "s"), np.timedelta64(509, "s")],
-            None,
-            None,
-            [
-                "753.00",
-                "94.22",
-                "0 days 00:01:51",
-                "0 days 00:08:29",
-            ],
-            None,
-            [0, 1, 0, 1],
-        ],
-        [
-            [Decimal("753.0"), Decimal("94.22")],
-            Decimal64Dtype(5, 2),
-            None,
-            [np.timedelta64(940252, "s"), np.timedelta64(758385, "s")],
-            None,
-            None,
-            [
-                "753.00",
-                "94.22",
-                "10 days 21:10:52",
-                "8 days 18:39:45",
-            ],
-            None,
-            [0, 1, 0, 1],
-        ],
-    ],
-)
-def test_concat_decimal_non_numeric(
-    data1,
-    dtype1,
-    index1,
-    data2,
-    dtype2,
-    index2,
-    expected_data,
-    expected_dtype,
-    expected_index,
-):
-    s1 = cudf.Series(data1, dtype=dtype1, index=index1)
-    s2 = cudf.Series(data2, dtype=dtype2, index=index2)
-    expected = cudf.Series(
-        expected_data, dtype=expected_dtype, index=expected_index
-    )
-    s = cudf.concat([s1, s2])
-    assert_eq(s, expected, check_index_type=True)
-
-
-def test_concat_struct_column():
-    s1 = cudf.Series([{"a": 5}, {"c": "hello"}, {"b": 7}])
-    s2 = cudf.Series([{"a": 5, "c": "hello", "b": 7}])
-    expected = cudf.Series(
-        [
-            {"a": 5, "b": None, "c": None},
-            {"a": None, "b": None, "c": "hello"},
-            {"a": None, "b": 7, "c": None},
-            {"a": 5, "b": 7, "c": "hello"},
-        ],
-        index=[0, 1, 2, 0],
-    )
-    s = cudf.concat([s1, s2])
-    assert_eq(s, expected, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "frame1_cls, frame1_data, frame2_cls, frame2_data, expected_cls, expected_data",
-    [
-        (
-            cudf.Series,
-            {"data": [[{"b": 0}], [{"b": 1}], [{"b": 3}]]},
-            cudf.Series,
-            {"data": [[{"b": 10}], [{"b": 12}], None]},
-            cudf.Series,
-            {
-                "data": [
-                    [{"b": 0}],
-                    [{"b": 1}],
-                    [{"b": 3}],
-                    [{"b": 10}],
-                    [{"b": 12}],
-                    None,
-                ],
-                "index": [0, 1, 2, 0, 1, 2],
-            },
-        ),
-        (
-            cudf.DataFrame,
-            {"data": {"a": [[{"b": 0}], [{"b": 1}], [{"b": 3}]]}},
-            cudf.DataFrame,
-            {"data": {"a": [[{"b": 10}], [{"b": 12}], None]}},
-            cudf.DataFrame,
-            {
-                "data": {
-                    "a": [
-                        [{"b": 0}],
-                        [{"b": 1}],
-                        [{"b": 3}],
-                        [{"b": 10}],
-                        [{"b": 12}],
-                        None,
-                    ]
-                },
-                "index": [0, 1, 2, 0, 1, 2],
-            },
-        ),
-    ],
-)
-def test_concat_list_column(
-    frame1_cls,
-    frame1_data,
-    frame2_cls,
-    frame2_data,
-    expected_cls,
-    expected_data,
-):
-    frame1 = frame1_cls(**frame1_data)
-    frame2 = frame2_cls(**frame2_data)
-    expected = expected_cls(**expected_data)
-    actual = cudf.concat([frame1, frame2])
-    assert_eq(actual, expected, check_index_type=True)
-
-
-def test_concat_categorical_ordering():
-    # https://github.com/rapidsai/cudf/issues/11486
-    sr = pd.Series(
-        ["a", "b", "c", "d", "e", "a", "b", "c", "d", "e"], dtype="category"
-    )
-    sr = sr.cat.set_categories(["d", "a", "b", "c", "e"])
-
-    df = pd.DataFrame({"a": sr})
-    gdf = cudf.from_pandas(df)
-
-    expect = pd.concat([df, df, df])
-    got = cudf.concat([gdf, gdf, gdf])
-
-    assert_eq(expect, got)
-
-
-@pytest.fixture(params=["rangeindex", "index"])
-def singleton_concat_index(request):
-    if request.param == "rangeindex":
-        return pd.RangeIndex(0, 4)
-    else:
-        return pd.Index(["a", "h", "g", "f"])
-
-
-@pytest.fixture(params=["dataframe", "series"])
-def singleton_concat_obj(request, singleton_concat_index):
-    if request.param == "dataframe":
-        return pd.DataFrame(
-            {
-                "b": [1, 2, 3, 4],
-                "d": [7, 8, 9, 10],
-                "a": [4, 5, 6, 7],
-                "c": [10, 11, 12, 13],
-            },
-            index=singleton_concat_index,
-        )
-    else:
-        return pd.Series([4, 5, 5, 6], index=singleton_concat_index)
-
-
-def test_concat_singleton_sorting(
-    axis, sort, ignore_index, singleton_concat_obj
-):
-    gobj = cudf.from_pandas(singleton_concat_obj)
-    gconcat = cudf.concat(
-        [gobj], axis=axis, sort=sort, ignore_index=ignore_index
-    )
-    pconcat = pd.concat(
-        [singleton_concat_obj], axis=axis, sort=sort, ignore_index=ignore_index
-    )
-    assert_eq(pconcat, gconcat)
-
-
-@pytest.mark.parametrize("axis", [2, "invalid"])
-def test_concat_invalid_axis(axis):
-    s = cudf.Series([1, 2, 3])
-    with pytest.raises(ValueError):
-        cudf.concat([s], axis=axis)
-
-
-@pytest.mark.parametrize(
-    "s1,s2",
-    [
-        ([1, 2], [[1, 2], [3, 4]]),
-    ],
-)
-def test_concat_mixed_list_types_error(s1, s2):
-    s1, s2 = cudf.Series(s1), cudf.Series(s2)
-
-    with pytest.raises(NotImplementedError):
-        cudf.concat([s1, s2], ignore_index=True)
-
-
-@pytest.mark.parametrize(
-    "axis",
-    [
-        pytest.param(
-            0,
-            marks=pytest.mark.xfail(
-                reason="concat dictionaries with axis=0 not implemented"
-            ),
-        ),
-        1,
-        "columns",
-    ],
-)
-@pytest.mark.parametrize(
-    "d",
-    [
-        {"first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}})},
-        {
-            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
-            "second": (cudf.DataFrame, {"data": {"A": [5, 6], "B": [7, 8]}}),
-            "third": (cudf.DataFrame, {"data": {"C": [1, 2, 3]}}),
-        },
-        {
-            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
-            "second": (cudf.DataFrame, {"data": {"C": [1, 2, 3]}}),
-            "third": (cudf.DataFrame, {"data": {"A": [5, 6], "B": [7, 8]}}),
-        },
-        {
-            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
-            "second": (cudf.DataFrame, {"data": {"C": [1, 2, 3]}}),
-            "third": (cudf.DataFrame, {"data": {"A": [5, 6], "C": [7, 8]}}),
-            "fourth": (cudf.DataFrame, {"data": {"B": [9, 10]}}),
-        },
-        pytest.param(
-            {
-                "first": (cudf.DataFrame, {"data": {2.0: [1, 1]}}),
-                "second": (cudf.DataFrame, {"data": {"test": ["abc", "def"]}}),
-            },
-            marks=pytest.mark.xfail(
-                reason=(
-                    "Cannot construct a MultiIndex column with multiple "
-                    "label types in cuDF at this time. You must convert "
-                    "the labels to the same type."
-                )
-            ),
-        ),
-        {
-            "first": (cudf.Series, {"data": [1, 2, 3]}),
-            "second": (cudf.Series, {"data": [4, 5, 6]}),
-        },
-        {
-            "first": (cudf.DataFrame, {"data": {"A": [1, 2], "B": [3, 4]}}),
-            "second": (cudf.Series, {"data": [5, 6], "name": "C"}),
-        },
-        pytest.param(
-            {
-                "first": (
-                    cudf.DataFrame,
-                    {"data": {("A", "B"): [1, 2], "C": [3, 4]}},
-                ),
-                "second": (
-                    cudf.DataFrame,
-                    {"data": {"D": [5, 6], ("A", "B"): [7, 8]}},
-                ),
-            },
-            marks=pytest.mark.xfail(
-                reason=(
-                    "Cannot construct a MultiIndex column with multiple "
-                    "label types in cuDF at this time. You must convert "
-                    "the labels to the same type."
-                )
-            ),
-        ),
-        pytest.param(
-            {
-                "first": (
-                    cudf.DataFrame,
-                    {"data": {("A", "B"): [3, 4], 2.0: [1, 1]}},
-                ),
-                "second": (
-                    cudf.DataFrame,
-                    {"data": {("C", "D"): [3, 4], 3.0: [5, 6]}},
-                ),
-            },
-            marks=pytest.mark.xfail(
-                reason=(
-                    "Cannot construct a MultiIndex column with multiple "
-                    "label types in cuDF at this time. You must convert "
-                    "the labels to the same type."
-                )
-            ),
-        ),
-        {
-            "first": (
-                cudf.DataFrame,
-                {"data": {(1, 2): [1, 2], (3, 4): [3, 4]}},
-            ),
-            "second": (
-                cudf.DataFrame,
-                {"data": {(1, 2): [5, 6], (5, 6): [7, 8]}},
-            ),
-        },
-    ],
-)
-def test_concat_dictionary(d, axis):
-    _dict = {k: c(**v) for k, (c, v) in d.items()}
-    result = cudf.concat(_dict, axis=axis)
-    expected = cudf.from_pandas(
-        pd.concat({k: df.to_pandas() for k, df in _dict.items()}, axis=axis)
-    )
-    assert_eq(expected, result)
-
-
-@pytest.mark.parametrize(
-    "idx_cls, idx_data",
-    [
-        [cudf.Index, {"data": [1, 2, 3]}],
-        [
-            cudf.MultiIndex,
-            {
-                "levels": [[1, 2], ["blue", "red"]],
-                "codes": [[0, 0, 1, 1], [1, 0, 1, 0]],
-            },
-        ],
-        [cudf.CategoricalIndex, {"data": [1, 2, 3]}],
-    ],
-)
-def test_concat_dict_incorrect_type_index(idx_cls, idx_data):
-    idx = idx_cls(**idx_data)
-    with pytest.raises(
-        TypeError,
-        match="cannot concatenate a dictionary containing indices",
-    ):
-        cudf.concat({"first": idx}, axis=1)

From d71e00debdcd91e0eb7a5d258a84fb09c0d49cbc Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Sat, 16 Aug 2025 02:05:31 +0100
Subject: [PATCH 144/366] [FEA] Switch to NVIDIA's JITIFY2 (#19561)

This merge request switches CUDF's JITIFY2 upstream from the RAPIDSAI fork (https://github.com/rapidsai/jitify/tree/jitify2) to the NVIDIA fork (https://github.com/NVIDIA/jitify/tree/jitify2).

The NVIDIA fork has newer features & improvements, including NVTX ranges, which are needed to properly benchmark each compilation/cache step of a specific  JIT workload. This was previously impossible to track accurately and concisely.


Follows up: https://github.com/rapidsai/cudf/issues/18023

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19561
---
 conda/environments/all_cuda-129_arch-aarch64.yaml |  1 +
 conda/environments/all_cuda-129_arch-x86_64.yaml  |  1 +
 conda/recipes/libcudf/recipe.yaml                 |  1 +
 cpp/cmake/Modules/JitifyPreprocessKernels.cmake   |  5 +++--
 cpp/cmake/thirdparty/get_jitify.cmake             |  6 +++---
 cpp/src/binaryop/jit/kernel.cu                    |  2 +-
 cpp/src/jit/accessors.cuh                         |  4 ++--
 cpp/src/jit/cache.hpp                             |  2 ++
 cpp/src/jit/helpers.hpp                           |  3 +--
 cpp/src/rolling/jit/kernel.cu                     |  6 +++---
 cpp/src/rolling/jit/operation.hpp                 |  6 +++---
 cpp/src/stream_compaction/filter/filter.cu        | 11 +++++------
 cpp/src/stream_compaction/filter/jit/kernel.cu    |  6 +++---
 cpp/src/transform/jit/kernel.cu                   |  6 +++---
 cpp/src/transform/transform.cpp                   | 15 +++++++--------
 dependencies.yaml                                 |  1 +
 16 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index b53ce1f224e..ce67a5abbca 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -41,6 +41,7 @@ dependencies:
 - libcurand-dev
 - libkvikio==25.10.*,>=0.0.0a0
 - libnvcomp-dev==4.2.0.11
+- libnvjitlink-dev
 - librdkafka>=2.8.0,<2.9.0a0
 - librmm==25.10.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 4d1af2746ac..04178f88b83 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -42,6 +42,7 @@ dependencies:
 - libcurand-dev
 - libkvikio==25.10.*,>=0.0.0a0
 - libnvcomp-dev==4.2.0.11
+- libnvjitlink-dev
 - librdkafka>=2.8.0,<2.9.0a0
 - librmm==25.10.*,>=0.0.0a0
 - make
diff --git a/conda/recipes/libcudf/recipe.yaml b/conda/recipes/libcudf/recipe.yaml
index 814a304ebef..3e97dc84a7d 100644
--- a/conda/recipes/libcudf/recipe.yaml
+++ b/conda/recipes/libcudf/recipe.yaml
@@ -68,6 +68,7 @@ cache:
       - cuda-nvrtc-dev
       - cuda-nvtx-dev
       - libcurand-dev
+      - libnvjitlink-dev
       - if: linux and x86_64
         then:
           - libcufile-dev
diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index 388c748c694..7c27920ee28 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -33,6 +33,8 @@ function(jit_preprocess_files)
     get_filename_component(jit_output_directory "${ARG_OUTPUT}" DIRECTORY)
     list(APPEND JIT_PREPROCESSED_FILES "${ARG_OUTPUT}")
 
+    get_filename_component(ARG_OUTPUT_DIR "${ARG_OUTPUT}" DIRECTORY)
+
     # Note: need to pass _FILE_OFFSET_BITS=64 in COMMAND due to a limitation in how conda builds
     # glibc
     add_custom_command(
@@ -43,8 +45,7 @@ function(jit_preprocess_files)
       COMMAND ${CMAKE_COMMAND} -E make_directory "${jit_output_directory}"
       COMMAND
         "${CMAKE_COMMAND}" -E env LD_LIBRARY_PATH=${CUDAToolkit_LIBRARY_DIR}
-        $<TARGET_FILE:jitify_preprocess> ${ARG_FILE} -o
-        ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -std=c++20
+        $<TARGET_FILE:jitify_preprocess> ${ARG_FILE} -o ${ARG_OUTPUT_DIR} -i -std=c++20
         -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -DCUDF_RUNTIME_JIT
         -I${CUDF_SOURCE_DIR}/include -I${CUDF_SOURCE_DIR}/src ${includes}
         --no-preinclude-workarounds --no-replace-pragma-once
diff --git a/cpp/cmake/thirdparty/get_jitify.cmake b/cpp/cmake/thirdparty/get_jitify.cmake
index d98abdf8824..b6f11e30d28 100644
--- a/cpp/cmake/thirdparty/get_jitify.cmake
+++ b/cpp/cmake/thirdparty/get_jitify.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -18,8 +18,8 @@
 function(find_and_configure_jitify)
   rapids_cpm_find(
     jitify 2.0.0
-    GIT_REPOSITORY https://github.com/rapidsai/jitify.git
-    GIT_TAG jitify2
+    GIT_REPOSITORY https://github.com/NVIDIA/jitify.git
+    GIT_TAG 70783a3ad7b0cad2992a26a1ebf8fbe3d6b44e25 # jitify2 branch as of 5th Aug 2025
     GIT_SHALLOW TRUE
     DOWNLOAD_ONLY TRUE
   )
diff --git a/cpp/src/binaryop/jit/kernel.cu b/cpp/src/binaryop/jit/kernel.cu
index dc021ea99a6..2951178af97 100644
--- a/cpp/src/binaryop/jit/kernel.cu
+++ b/cpp/src/binaryop/jit/kernel.cu
@@ -28,7 +28,7 @@
 #include <cuda/std/type_traits>
 
 // clang-format off
-#include "binaryop/jit/operation-udf.hpp"
+#include <binaryop/jit/operation-udf.hpp>
 // clang-format on
 
 namespace cudf {
diff --git a/cpp/src/jit/accessors.cuh b/cpp/src/jit/accessors.cuh
index 705028ddedd..adfb9ea7328 100644
--- a/cpp/src/jit/accessors.cuh
+++ b/cpp/src/jit/accessors.cuh
@@ -15,13 +15,13 @@
  */
 
 #pragma once
-#include "jit/span.cuh"
-
 #include <cudf/column/column_device_view_base.cuh>
 #include <cudf/types.hpp>
 
 #include <cuda/std/cstddef>
 
+#include <jit/span.cuh>
+
 #include <cstddef>
 
 namespace cudf {
diff --git a/cpp/src/jit/cache.hpp b/cpp/src/jit/cache.hpp
index 1772134bb90..00ad23ace5a 100644
--- a/cpp/src/jit/cache.hpp
+++ b/cpp/src/jit/cache.hpp
@@ -15,6 +15,8 @@
  */
 
 #pragma once
+#pragma GCC diagnostic ignored "-Wignored-attributes"  // Work-around for JITIFY2's false-positive
+                                                       // warnings when compiled with GCC13
 
 #include <cudf/utilities/export.hpp>
 
diff --git a/cpp/src/jit/helpers.hpp b/cpp/src/jit/helpers.hpp
index 5b2427575b0..65e28e9c39a 100644
--- a/cpp/src/jit/helpers.hpp
+++ b/cpp/src/jit/helpers.hpp
@@ -15,12 +15,11 @@
  */
 #pragma once
 
-#include "jit/span.cuh"
-
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/utilities/cuda_memcpy.hpp>
 
+#include <jit/span.cuh>
 #include <jit_preprocessed_files/transform/jit/kernel.cu.jit.hpp>
 
 namespace cudf {
diff --git a/cpp/src/rolling/jit/kernel.cu b/cpp/src/rolling/jit/kernel.cu
index 12219a286f7..9b4f11ebdb6 100644
--- a/cpp/src/rolling/jit/kernel.cu
+++ b/cpp/src/rolling/jit/kernel.cu
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "rolling/detail/rolling_jit.hpp"
-#include "rolling/jit/operation.hpp"
-
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
 
+#include <rolling/detail/rolling_jit.hpp>
+#include <rolling/jit/operation.hpp>
+
 namespace cudf {
 namespace rolling {
 namespace jit {
diff --git a/cpp/src/rolling/jit/operation.hpp b/cpp/src/rolling/jit/operation.hpp
index 3be739ec5bf..51cd0a6b121 100644
--- a/cpp/src/rolling/jit/operation.hpp
+++ b/cpp/src/rolling/jit/operation.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 
 #pragma once
 
-#include "rolling/jit/operation-udf.hpp"
-
 #include <cudf/types.hpp>
 
+#include <rolling/jit/operation-udf.hpp>
+
 struct rolling_udf_ptx {
   template <typename OutType, typename InType>
   static OutType operate(InType const* in_col, cudf::size_type start, cudf::size_type count)
diff --git a/cpp/src/stream_compaction/filter/filter.cu b/cpp/src/stream_compaction/filter/filter.cu
index 9369eee30d4..a518b566c97 100644
--- a/cpp/src/stream_compaction/filter/filter.cu
+++ b/cpp/src/stream_compaction/filter/filter.cu
@@ -14,11 +14,6 @@
  * limitations under the License.
  */
 
-#include "jit/cache.hpp"
-#include "jit/helpers.hpp"
-#include "jit/parser.hpp"
-#include "jit/span.cuh"
-
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
@@ -37,6 +32,10 @@
 #include <cuda/std/iterator>
 #include <thrust/copy.h>
 
+#include <jit/cache.hpp>
+#include <jit/helpers.hpp>
+#include <jit/parser.hpp>
+#include <jit/span.cuh>
 #include <jit_preprocessed_files/stream_compaction/filter/jit/kernel.cu.jit.hpp>
 
 #include <utility>
@@ -175,7 +174,7 @@ void launch_filter_kernel(jitify2::ConfiguredKernel& kernel,
 
   std::array<void*, 3> args{&outputs_ptr, &inputs_ptr, &p_user_data};
 
-  kernel->launch(args.data());
+  kernel->launch_raw(args.data());
 }
 
 void perform_checks(column_view base_column,
diff --git a/cpp/src/stream_compaction/filter/jit/kernel.cu b/cpp/src/stream_compaction/filter/jit/kernel.cu
index 524c389aaae..07aeebee688 100644
--- a/cpp/src/stream_compaction/filter/jit/kernel.cu
+++ b/cpp/src/stream_compaction/filter/jit/kernel.cu
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-#include "jit/accessors.cuh"
-#include "jit/span.cuh"
-
 #include <cudf/column/column_device_view_base.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/strings/string_view.cuh>
@@ -26,6 +23,9 @@
 
 #include <cuda/std/cstddef>
 
+#include <jit/accessors.cuh>
+#include <jit/span.cuh>
+
 // clang-format off
 // This header is an inlined header that defines the GENERIC_FILTER_OP function. It is placed here
 // so the symbols in the headers above can be used by it.
diff --git a/cpp/src/transform/jit/kernel.cu b/cpp/src/transform/jit/kernel.cu
index 5c8068f0bc7..b3647e374c5 100644
--- a/cpp/src/transform/jit/kernel.cu
+++ b/cpp/src/transform/jit/kernel.cu
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-#include "jit/accessors.cuh"
-#include "jit/span.cuh"
-
 #include <cudf/column/column_device_view_base.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/strings/string_view.cuh>
@@ -26,6 +23,9 @@
 
 #include <cuda/std/cstddef>
 
+#include <jit/accessors.cuh>
+#include <jit/span.cuh>
+
 // clang-format off
 // This header is an inlined header that defines the GENERIC_FILTER_OP function. It is placed here
 // so the symbols in the headers above can be used by it.
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp
index 9c7fc4772b4..ad3c6bf15c6 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/transform.cpp
@@ -14,12 +14,6 @@
  * limitations under the License.
  */
 
-#include "jit/cache.hpp"
-#include "jit/helpers.hpp"
-#include "jit/parser.hpp"
-#include "jit/span.cuh"
-#include "jit/util.hpp"
-
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
@@ -32,6 +26,11 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <jit/cache.hpp>
+#include <jit/helpers.hpp>
+#include <jit/parser.hpp>
+#include <jit/span.cuh>
+#include <jit/util.hpp>
 #include <jit_preprocessed_files/transform/jit/kernel.cu.jit.hpp>
 
 namespace cudf {
@@ -125,7 +124,7 @@ void launch_column_output_kernel(jitify2::ConfiguredKernel& kernel,
 
   std::array<void*, 3> args{&outputs_ptr, &inputs_ptr, &p_user_data};
 
-  kernel->launch(args.data());
+  kernel->launch_raw(args.data());
 }
 
 template <typename T>
@@ -152,7 +151,7 @@ void launch_span_kernel(jitify2::ConfiguredKernel& kernel,
 
   std::array<void*, 3> args{&outputs_ptr, &inputs_ptr, &p_user_data};
 
-  kernel->launch(args.data());
+  kernel->launch_raw(args.data());
 }
 
 std::tuple<rmm::device_buffer, size_type> make_transform_null_mask(
diff --git a/dependencies.yaml b/dependencies.yaml
index 37883b03b4f..8411fa111a7 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -569,6 +569,7 @@ dependencies:
               - cuda-nvrtc-dev
               - cuda-nvtx-dev
               - libcurand-dev
+              - libnvjitlink-dev
       - output_types: conda
         matrices:
           - matrix:

From 063697cc47f5a6427bd402e7f10d0dd2de346772 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Sun, 17 Aug 2025 13:34:35 -0400
Subject: [PATCH 145/366] Run cudf-polars tests with all supported polars
 versions (#19353)

Runs with all supported polars versions. If there are multiple minor versions, we run on the latest one.

- Needs #19352

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19353
---
 ci/test_wheel_cudf_polars.sh                  | 48 ++++++++--
 ci/utils/fetch_polars_versions.py             | 93 +++++++++++++++++++
 dependencies.yaml                             |  1 +
 python/cudf_polars/pyproject.toml             |  1 +
 .../tests/expressions/test_stringfunction.py  |  8 +-
 5 files changed, 141 insertions(+), 10 deletions(-)
 create mode 100644 ci/utils/fetch_polars_versions.py

diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh
index 5ec9558e0de..b2683eeadbc 100755
--- a/ci/test_wheel_cudf_polars.sh
+++ b/ci/test_wheel_cudf_polars.sh
@@ -35,6 +35,8 @@ rapids-pip-retry install \
 
 rapids-logger "Run cudf_polars tests"
 
+POLARS_VERSIONS=$(python ci/utils/fetch_polars_versions.py --latest-patch-only dependencies.yaml)
+
 # shellcheck disable=SC2317
 function set_exitcode()
 {
@@ -44,16 +46,50 @@ EXITCODE=0
 trap set_exitcode ERR
 set +e
 
-./ci/run_cudf_polars_pytests.sh \
-       --cov=cudf_polars \
-       --cov-fail-under=100 \
-       --cov-report=term-missing:skip-covered \
-       --cov-config=./pyproject.toml \
-       --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml"
+PASSED=()
+FAILED=()
+
+read -r -a VERSIONS <<< "${POLARS_VERSIONS}"
+LATEST_VERSION="${VERSIONS[-1]}"
+
+for version in "${VERSIONS[@]}"; do
+    rapids-logger "Installing polars==${version}"
+    pip install -U "polars==${version}"
+
+    rapids-logger "Running tests for polars==${version}"
+
+    if [ "${version}" == "${LATEST_VERSION}" ]; then
+        COVERAGE_ARGS=(
+            --cov=cudf_polars
+            --cov-fail-under=100
+            --cov-report=term-missing:skip-covered
+            --cov-config=./pyproject.toml
+        )
+    else
+        COVERAGE_ARGS=(--no-cov)
+    fi
+
+    ./ci/run_cudf_polars_pytests.sh \
+        "${COVERAGE_ARGS[@]}" \
+        --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars-${version}.xml"
+
+    if [ $? -ne 0 ]; then
+        EXITCODE=1
+        FAILED+=("${version}")
+        rapids-logger "Tests failed for polars==${version}"
+    else
+        PASSED+=("${version}")
+        rapids-logger "Tests passed for polars==${version}"
+    fi
+done
 
 trap ERR
 set -e
 
+rapids-logger "Polars test summary:"
+rapids-logger "PASSED: ${PASSED[*]:-none}"
+rapids-logger "FAILED: ${FAILED[*]:-none}"
+
 if [ ${EXITCODE} != 0 ]; then
     rapids-logger "Testing FAILED: exitcode ${EXITCODE}"
 else
diff --git a/ci/utils/fetch_polars_versions.py b/ci/utils/fetch_polars_versions.py
new file mode 100644
index 00000000000..643081c5642
--- /dev/null
+++ b/ci/utils/fetch_polars_versions.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import argparse
+import json
+import ssl
+import urllib.request
+
+import certifi
+import yaml
+from packaging.specifiers import SpecifierSet
+from packaging.version import Version
+
+
+def get_polars_specifier(deps_yaml_path):
+    with open(deps_yaml_path, "r") as f:
+        deps = yaml.safe_load(f)
+
+    try:
+        includes = deps["files"]["all"]["includes"]
+        if "run_cudf_polars" not in includes:
+            raise KeyError()
+    except KeyError:
+        raise RuntimeError("run_cudf_polars not found in dependencies.yaml")
+
+    try:
+        pkgs = deps["dependencies"]["run_cudf_polars"]["common"]
+        for entry in pkgs:
+            for pkg in entry.get("packages", []):
+                if isinstance(pkg, str) and pkg.startswith("polars"):
+                    spec = pkg.removeprefix("polars").strip()
+                    if spec:
+                        return spec
+    except KeyError:
+        pass
+
+    raise RuntimeError("Polars specifier not found in dependencies.yaml")
+
+
+def get_latest_versions_per_minor(versions):
+    latest = {}
+    for v in versions:
+        key = (v.major, v.minor)
+        if key not in latest or v > latest[key]:
+            latest[key] = v
+    return sorted(latest.values())
+
+
+def get_polars_versions(polars_range, latest_only=False):
+    url = "https://pypi.org/pypi/polars/json"
+    # Set a timeout for the request to avoid hanging
+    timeout = 10  # seconds
+
+    try:
+        context = ssl.create_default_context(cafile=certifi.where())
+        with urllib.request.urlopen(
+            url, timeout=timeout, context=context
+        ) as response:
+            data = json.loads(response.read())
+    except Exception as e:
+        raise RuntimeError(f"Failed to fetch polars metadata from PyPI: {e}")
+
+    all_versions = [Version(v) for v in data["releases"]]
+    specifier = SpecifierSet(polars_range)
+    matching = [v for v in all_versions if v in specifier]
+
+    if latest_only:
+        matching = get_latest_versions_per_minor(matching)
+
+    return [str(v) for v in sorted(matching)]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Filter polars versions by dependencies.yaml."
+    )
+    parser.add_argument(
+        "deps_yaml",
+        nargs="?",
+        default="./dependencies.yaml",
+        help="Path to dependencies.yaml",
+    )
+    parser.add_argument(
+        "--latest-patch-only",
+        action="store_true",
+        help="Return only the latest patch per minor version",
+    )
+    args = parser.parse_args()
+
+    polars_range = get_polars_specifier(args.deps_yaml)
+    versions = get_polars_versions(
+        polars_range, latest_only=args.latest_patch_only
+    )
+    print(" ".join(versions))
diff --git a/dependencies.yaml b/dependencies.yaml
index 8411fa111a7..de83eae1276 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -726,6 +726,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - nvidia-ml-py
+          - packaging
           - polars>=1.28,<1.32
     specific:
       - output_types: [requirements, pyproject]
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 472520df984..5fadd6b8656 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -20,6 +20,7 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "nvidia-ml-py",
+    "packaging",
     "polars>=1.28,<1.32",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "typing-extensions; python_version < '3.11'",
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 9923f031300..3f1874df702 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -543,10 +543,10 @@ def test_string_zfill(fill, input_strings):
     "fill",
     [
         5
-        if not POLARS_VERSION_LT_130
+        if not POLARS_VERSION_LT_131
         else pytest.param(5, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
         999
-        if not POLARS_VERSION_LT_130
+        if not POLARS_VERSION_LT_131
         else pytest.param(999, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
     ],
 )
@@ -563,10 +563,10 @@ def test_string_zfill_pl_129(fill):
         1,
         2,
         5
-        if not POLARS_VERSION_LT_130
+        if not POLARS_VERSION_LT_131
         else pytest.param(5, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
         999
-        if not POLARS_VERSION_LT_130
+        if not POLARS_VERSION_LT_131
         else pytest.param(999, marks=pytest.mark.xfail(reason="fixed in Polars 1.30")),
         -1,
         pytest.param(None, marks=pytest.mark.xfail(reason="None dtype")),

From 1bb30c5251522d4c042cc1cd6a41cb4fc7cf7353 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Mon, 18 Aug 2025 07:33:35 -0500
Subject: [PATCH 146/366] RapidsMPF "single" shuffle integration (#19530)

Depends on https://github.com/rapidsai/rapidsmpf/pull/380

Enables the `"rapidsmpf"` shuffle options to work with cudf-polars when the `"synchronous"` scheduler is active.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19530
---
 .../cudf_polars/experimental/shuffle.py       | 30 ++++++--
 .../cudf_polars/cudf_polars/utils/config.py   | 61 ++++++++++++----
 .../tests/experimental/test_rapidsmpf.py      | 69 +++++++++++++++++++
 python/cudf_polars/tests/test_config.py       | 65 +++++++++++------
 4 files changed, 183 insertions(+), 42 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py
index b6ecfc35fa8..9a4e63e4b76 100644
--- a/python/cudf_polars/cudf_polars/experimental/shuffle.py
+++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 import operator
-from typing import TYPE_CHECKING, Any, TypedDict
+from typing import TYPE_CHECKING, Any, Literal, TypedDict
 
 import pylibcudf as plc
 from rmm.pylibrmm.stream import DEFAULT_STREAM
@@ -39,6 +39,7 @@ class ShuffleOptions(TypedDict):
     on: Sequence[str]
     column_names: Sequence[str]
     dtypes: Sequence[DataType]
+    cluster_kind: str
 
 
 # Experimental rapidsmpf shuffler integration
@@ -57,7 +58,12 @@ def insert_partition(
     ) -> None:
         """Add cudf-polars DataFrame chunks to an RMP shuffler."""
         from rapidsmpf.integrations.cudf.partition import partition_and_pack
-        from rapidsmpf.integrations.dask.core import get_worker_context
+
+        if options["cluster_kind"] == "dask":
+            from rapidsmpf.integrations.dask import get_worker_context
+
+        else:
+            from rapidsmpf.integrations.single import get_worker_context
 
         context = get_worker_context()
 
@@ -90,7 +96,12 @@ def extract_partition(
             unpack_and_concat,
             unspill_partitions,
         )
-        from rapidsmpf.integrations.dask.core import get_worker_context
+
+        if options["cluster_kind"] == "dask":
+            from rapidsmpf.integrations.dask import get_worker_context
+
+        else:
+            from rapidsmpf.integrations.single import get_worker_context
 
         context = get_worker_context()
         if context.br is None:  # pragma: no cover
@@ -286,10 +297,18 @@ def _(
     # Try using rapidsmpf shuffler if we have "simple" shuffle
     # keys, and the "shuffle_method" config is set to "rapidsmpf"
     _keys: list[Col]
-    if shuffle_method == "rapidsmpf" and len(
+    if shuffle_method in ("rapidsmpf", "rapidsmpf-single") and len(
         _keys := [ne.value for ne in ir.keys if isinstance(ne.value, Col)]
     ) == len(ir.keys):  # pragma: no cover
-        from rapidsmpf.integrations.dask import rapidsmpf_shuffle_graph
+        cluster_kind: Literal["dask", "single"]
+        if shuffle_method == "rapidsmpf-single":
+            from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph
+
+            cluster_kind = "single"
+        else:
+            from rapidsmpf.integrations.dask import rapidsmpf_shuffle_graph
+
+            cluster_kind = "dask"
 
         shuffle_on = [k.name for k in _keys]
 
@@ -304,6 +323,7 @@ def _(
                     "on": shuffle_on,
                     "column_names": list(ir.schema.keys()),
                     "dtypes": list(ir.schema.values()),
+                    "cluster_kind": cluster_kind,
                 },
             )
         except ValueError as err:
diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py
index 578dfd0694e..e65cf877b4e 100644
--- a/python/cudf_polars/cudf_polars/utils/config.py
+++ b/python/cudf_polars/cudf_polars/utils/config.py
@@ -82,8 +82,17 @@ def get_total_device_memory() -> int | None:
 
 
 @functools.cache
-def rapidsmpf_available() -> bool:  # pragma: no cover
-    """Query whether rapidsmpf is available as a shuffle method."""
+def rapidsmpf_single_available() -> bool:  # pragma: no cover
+    """Query whether rapidsmpf is available as a single-process shuffle method."""
+    try:
+        return importlib.util.find_spec("rapidsmpf.integrations.single") is not None
+    except (ImportError, ValueError):
+        return False
+
+
+@functools.cache
+def rapidsmpf_distributed_available() -> bool:  # pragma: no cover
+    """Query whether rapidsmpf is available as a distributed shuffle method."""
     try:
         return importlib.util.find_spec("rapidsmpf.integrations.dask") is not None
     except (ImportError, ValueError):
@@ -129,15 +138,21 @@ class ShuffleMethod(str, enum.Enum):
     The method to use for shuffling data between workers with the streaming executor.
 
     * ``ShuffleMethod.TASKS`` : Use the task-based shuffler.
-    * ``ShuffleMethod.RAPIDSMPF`` : Use the rapidsmpf scheduler.
+    * ``ShuffleMethod.RAPIDSMPF`` : Use the rapidsmpf shuffler.
+    * ``ShuffleMethod._RAPIDSMPF_SINGLE`` : Use the single-process rapidsmpf shuffler.
 
-    With :class:`cudf_polars.utils.config.StreamingExecutor`, the default of ``None`` will attempt to use
-    ``ShuffleMethod.RAPIDSMPF``, but will fall back to ``ShuffleMethod.TASKS``
-    if rapidsmpf is not installed.
+    With :class:`cudf_polars.utils.config.StreamingExecutor`, the default of ``None``
+    will attempt to use ``ShuffleMethod.RAPIDSMPF`` for the distributed scheduler,
+    but will fall back to ``ShuffleMethod.TASKS`` if rapidsmpf is not installed.
+
+    The user should **not** specify ``ShuffleMethod._RAPIDSMPF_SINGLE`` directly.
+    A setting of ``ShuffleMethod.RAPIDSMPF`` will be converted to the single-process
+    shuffler automatically when the 'synchronous' scheduler is active.
     """
 
     TASKS = "tasks"
     RAPIDSMPF = "rapidsmpf"
+    _RAPIDSMPF_SINGLE = "rapidsmpf-single"
 
 
 T = TypeVar("T")
@@ -420,24 +435,32 @@ class StreamingExecutor:
     def __post_init__(self) -> None:  # noqa: D105
         # Handle shuffle_method defaults for streaming executor
         if self.shuffle_method is None:
-            if self.scheduler == "distributed" and rapidsmpf_available():
+            if self.scheduler == "distributed" and rapidsmpf_distributed_available():
                 # For distributed scheduler, prefer rapidsmpf if available
                 object.__setattr__(self, "shuffle_method", "rapidsmpf")
             else:
+                # Otherwise, use task-based shuffle for now.
+                # TODO: Evaluate single-process shuffle by default.
                 object.__setattr__(self, "shuffle_method", "tasks")
-        else:
+        elif self.shuffle_method == "rapidsmpf-single":
+            # The user should NOT specify "rapidsmpf-single" directly.
+            raise ValueError("rapidsmpf-single is not a supported shuffle method.")
+        elif self.shuffle_method == "rapidsmpf":
+            # Check that we have rapidsmpf installed
             if (
                 self.scheduler == "distributed"
-                and self.shuffle_method == "rapidsmpf"
-                and not rapidsmpf_available()
+                and not rapidsmpf_distributed_available()
             ):
                 raise ValueError(
-                    "rapidsmpf shuffle method requested, but rapidsmpf is not installed"
+                    "rapidsmpf shuffle method requested, but rapidsmpf.integrations.dask is not installed."
                 )
-        if self.scheduler == "synchronous" and self.shuffle_method == "rapidsmpf":
-            raise ValueError(
-                "rapidsmpf shuffle method is not supported for synchronous scheduler"
-            )
+            elif self.scheduler == "synchronous" and not rapidsmpf_single_available():
+                raise ValueError(
+                    "rapidsmpf shuffle method requested, but rapidsmpf is not installed."
+                )
+            # Select "rapidsmpf-single" for the synchronous
+            if self.scheduler == "synchronous":
+                object.__setattr__(self, "shuffle_method", "rapidsmpf-single")
 
         # frozen dataclass, so use object.__setattr__
         object.__setattr__(
@@ -482,6 +505,14 @@ def __post_init__(self) -> None:  # noqa: D105
         if not isinstance(self.sink_to_directory, bool):
             raise TypeError("sink_to_directory must be bool")
 
+        # RapidsMPF spill is only supported for the distributed scheduler for now.
+        # This is because the spilling API is still within the RMPF-Dask integration.
+        # (See https://github.com/rapidsai/rapidsmpf/issues/439)
+        if self.scheduler == "synchronous" and self.rapidsmpf_spill:  # pragma: no cover
+            raise ValueError(
+                "rapidsmpf_spill is not supported for the synchronous scheduler."
+            )
+
     def __hash__(self) -> int:  # noqa: D105
         # cardinality factory, a dict, isn't natively hashable. We'll dump it
         # to json and hash that.
diff --git a/python/cudf_polars/tests/experimental/test_rapidsmpf.py b/python/cudf_polars/tests/experimental/test_rapidsmpf.py
index 4b611355b12..c550a5e2d9f 100644
--- a/python/cudf_polars/tests/experimental/test_rapidsmpf.py
+++ b/python/cudf_polars/tests/experimental/test_rapidsmpf.py
@@ -8,6 +8,7 @@
 import polars as pl
 
 from cudf_polars.testing.asserts import assert_gpu_result_equal
+from cudf_polars.utils.config import ConfigOptions
 
 
 @pytest.mark.parametrize("rapidsmpf_spill", [False, True])
@@ -68,3 +69,71 @@ def test_join_rapidsmpf(
     q = left.join(right, on="y", how="inner")
 
     assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+
+
+@pytest.mark.parametrize("max_rows_per_partition", [1, 5])
+def test_join_rapidsmpf_single(max_rows_per_partition: int) -> None:
+    # check that we have a rapidsmpf cluster running
+    pytest.importorskip("rapidsmpf")
+
+    # Setup the GPUEngine config
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": max_rows_per_partition,
+            "broadcast_join_limit": 2,
+            "shuffle_method": "rapidsmpf",
+            "scheduler": "synchronous",
+        },
+    )
+
+    left = pl.LazyFrame(
+        {
+            "x": range(15),
+            "y": [1, 2, 3] * 5,
+            "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3,
+        }
+    )
+    right = pl.LazyFrame(
+        {
+            "xx": range(6),
+            "y": [2, 4, 3] * 2,
+            "zz": [1, 2] * 3,
+        }
+    )
+    q = left.join(right, on="y", how="inner")
+
+    assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+
+
+def test_join_rapidsmpf_single_private_config() -> None:
+    # The user may not specify "rapidsmpf-single" directly
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "shuffle_method": "rapidsmpf-single",
+            "scheduler": "synchronous",
+        },
+    )
+    with pytest.raises(ValueError, match="not a supported shuffle method"):
+        ConfigOptions.from_polars_engine(engine)
+
+
+def test_rapidsmpf_spill_synchronous_unsupported() -> None:
+    # check that we have a rapidsmpf cluster running
+    pytest.importorskip("rapidsmpf")
+
+    # rapidsmpf_spill=True is not yet supported with synchronous scheduler.
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "shuffle_method": "rapidsmpf",
+            "scheduler": "synchronous",
+            "rapidsmpf_spill": True,
+        },
+    )
+    with pytest.raises(ValueError, match="rapidsmpf_spill.*not supported.*synchronous"):
+        ConfigOptions.from_polars_engine(engine)
diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py
index 3de18e0a4e4..795f35c5f8e 100644
--- a/python/cudf_polars/tests/test_config.py
+++ b/python/cudf_polars/tests/test_config.py
@@ -24,10 +24,22 @@
 from cudf_polars.utils.versions import POLARS_VERSION_LT_130
 
 
-@pytest.fixture(params=[False, True], ids=["norapidsmpf", "rapidsmpf"])
-def rapidsmpf_available(request, monkeypatch):
+@pytest.fixture(params=[False, True], ids=["norapidsmpf.single", "rapidsmpf.single"])
+def rapidsmpf_single_available(request, monkeypatch):
     monkeypatch.setattr(
-        cudf_polars.utils.config, "rapidsmpf_available", lambda: request.param
+        cudf_polars.utils.config,
+        "rapidsmpf_single_available",
+        lambda: request.param,
+    )
+    return request.param
+
+
+@pytest.fixture(params=[False, True], ids=["norapidsmpf.dask", "rapidsmpf.dask"])
+def rapidsmpf_distributed_available(request, monkeypatch):
+    monkeypatch.setattr(
+        cudf_polars.utils.config,
+        "rapidsmpf_distributed_available",
+        lambda: request.param,
     )
     return request.param
 
@@ -151,7 +163,9 @@ def test_parquet_options(executor: str) -> None:
     assert config.parquet_options.n_output_chunks == 16
 
 
-def test_validate_streaming_executor_shuffle_method(rapidsmpf_available) -> None:
+def test_validate_streaming_executor_shuffle_method(
+    *, rapidsmpf_distributed_available: bool, rapidsmpf_single_available: bool
+) -> None:
     config = ConfigOptions.from_polars_engine(
         pl.GPUEngine(
             executor="streaming",
@@ -161,30 +175,34 @@ def test_validate_streaming_executor_shuffle_method(rapidsmpf_available) -> None
     assert config.executor.name == "streaming"
     assert config.executor.shuffle_method == "tasks"
 
+    # rapidsmpf with distributed scheduler
     engine = pl.GPUEngine(
         executor="streaming",
         executor_options={"shuffle_method": "rapidsmpf", "scheduler": "distributed"},
     )
-    if rapidsmpf_available:
+    if rapidsmpf_distributed_available:
         config = ConfigOptions.from_polars_engine(engine)
         assert config.executor.name == "streaming"
         assert config.executor.shuffle_method == "rapidsmpf"
     else:
-        with pytest.raises(ValueError, match="rapidsmpf is not installed"):
+        with pytest.raises(
+            ValueError, match="rapidsmpf.integrations.dask is not installed"
+        ):
             ConfigOptions.from_polars_engine(engine)
 
-    # rapidsmpf with sync is not allowed
+    # rapidsmpf with sync scheduler
+    engine = pl.GPUEngine(
+        executor="streaming",
+        executor_options={"shuffle_method": "rapidsmpf", "scheduler": "synchronous"},
+    )
 
-    with pytest.raises(ValueError, match="rapidsmpf shuffle method"):
-        ConfigOptions.from_polars_engine(
-            pl.GPUEngine(
-                executor="streaming",
-                executor_options={
-                    "shuffle_method": "rapidsmpf",
-                    "scheduler": "synchronous",
-                },
-            )
-        )
+    if rapidsmpf_single_available:
+        config = ConfigOptions.from_polars_engine(engine)
+        assert config.executor.name == "streaming"
+        assert config.executor.shuffle_method == "rapidsmpf-single"
+    else:
+        with pytest.raises(ValueError, match="rapidsmpf is not installed"):
+            ConfigOptions.from_polars_engine(engine)
 
 
 @pytest.mark.parametrize("executor", ["in-memory", "streaming"])
@@ -233,7 +251,10 @@ def test_validate_scheduler() -> None:
         )
 
 
-def test_validate_shuffle_method_defaults(rapidsmpf_available) -> None:
+def test_validate_shuffle_method_defaults(
+    *,
+    rapidsmpf_distributed_available: bool,
+) -> None:
     config = ConfigOptions.from_polars_engine(
         pl.GPUEngine(
             executor="streaming",
@@ -252,7 +273,7 @@ def test_validate_shuffle_method_defaults(rapidsmpf_available) -> None:
         )
     )
     assert config.executor.name == "streaming"
-    if rapidsmpf_available:
+    if rapidsmpf_distributed_available:
         # Should be "rapidsmpf" if available, otherwise "tasks"
         assert config.executor.shuffle_method == "rapidsmpf"
     else:
@@ -330,7 +351,7 @@ def test_parquet_options_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
 
 
 def test_config_option_from_env(
-    monkeypatch: pytest.MonkeyPatch, *, rapidsmpf_available: bool
+    monkeypatch: pytest.MonkeyPatch, *, rapidsmpf_distributed_available: bool
 ) -> None:
     with monkeypatch.context() as m:
         m.setenv("CUDF_POLARS__EXECUTOR__SCHEDULER", "distributed")
@@ -343,7 +364,7 @@ def test_config_option_from_env(
         m.setenv("CUDF_POLARS__EXECUTOR__RAPIDSMPF_SPILL", "1")
         m.setenv("CUDF_POLARS__EXECUTOR__SINK_TO_DIRECTORY", "1")
 
-        if rapidsmpf_available:
+        if rapidsmpf_distributed_available:
             m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "rapidsmpf")
         else:
             m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "tasks")
@@ -361,7 +382,7 @@ def test_config_option_from_env(
         assert config.executor.rapidsmpf_spill is True
         assert config.executor.sink_to_directory is True
 
-        if rapidsmpf_available:
+        if rapidsmpf_distributed_available:
             assert config.executor.shuffle_method == "rapidsmpf"
         else:
             assert config.executor.shuffle_method == "tasks"

From 17d1837ef83e25ea4026ea5183bdefe4bd805174 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 18 Aug 2025 10:49:41 -0400
Subject: [PATCH 147/366] Handle `TIMESTAMP_DAYS` in rolling window offsets
 (#19689)

Contributes to #18633

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19689
---
 python/cudf_polars/cudf_polars/dsl/utils/windows.py  | 8 +++++++-
 python/cudf_polars/tests/expressions/test_rolling.py | 4 ++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/utils/windows.py b/python/cudf_polars/cudf_polars/dsl/utils/windows.py
index 91ea566f683..49aebc57fdf 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/windows.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/windows.py
@@ -110,8 +110,14 @@ def duration_to_scalar(dtype: plc.DataType, value: int) -> plc.Scalar:
         return plc.Scalar.from_py(
             value // 1_000_000, plc.DataType(plc.TypeId.DURATION_MILLISECONDS)
         )
+    elif tid == plc.TypeId.TIMESTAMP_DAYS:
+        return plc.Scalar.from_py(
+            value // 86_400_000_000_000, plc.DataType(plc.TypeId.DURATION_DAYS)
+        )
     else:
-        raise NotImplementedError("Unsupported data type in rolling window offset")
+        raise NotImplementedError(
+            "Unsupported data type in rolling window offset"
+        )  # pragma: no cover; polars raises first
 
 
 def offsets_to_windows(
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
index 0253f14c416..373e4903cef 100644
--- a/python/cudf_polars/tests/expressions/test_rolling.py
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -38,7 +38,7 @@ def test_rolling_datetime(time_unit):
     assert_gpu_result_equal(q)
 
 
-def test_rolling_date_raises():
+def test_rolling_date():
     dates = [
         "2020-01-01",
         "2020-01-01",
@@ -56,7 +56,7 @@ def test_rolling_date_raises():
         max_a=pl.max("a").rolling(index_column="dt", period="10d", offset="2d"),
     )
 
-    assert_ir_translation_raises(q, NotImplementedError)
+    assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("dtype", [pl.Int32, pl.UInt32, pl.Int64, pl.UInt64])

From 916c64aeac23525352b58b480261adc4715a2cea Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 18 Aug 2025 10:56:25 -0500
Subject: [PATCH 148/366] Enable casting `pl.Datetime` to integer types in
 `cudf-polars` (#19647)

Part of https://github.com/rapidsai/cudf/issues/17060

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19647
---
 .../cudf_polars/containers/column.py          | 15 ++++++++
 .../cudf_polars/cudf_polars/utils/dtypes.py   |  5 +++
 .../tests/expressions/test_casting.py         |  6 ++--
 .../tests/expressions/test_datetime_basic.py  | 36 +++++++++++++++++++
 4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 31e4c66a02c..a278ba32b43 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -307,6 +307,21 @@ def astype(self, dtype: DataType) -> Column:
                 upcasted.children(),
             )
             return Column(result, dtype=dtype).sorted_like(self)
+        elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
+            self.obj.type()
+        ):
+            result = plc.column.Column(
+                plc.DataType(plc.TypeId.INT64),
+                self.obj.size(),
+                self.obj.data(),
+                self.obj.null_mask(),
+                self.obj.null_count(),
+                self.obj.offset(),
+                self.obj.children(),
+            )
+            return Column(plc.unary.cast(result, plc_dtype), dtype=dtype).sorted_like(
+                self
+            )
         else:
             result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
             if is_order_preserving_cast(self.obj.type(), plc_dtype):
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 23a94cf2a67..4756b2a0692 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -69,6 +69,11 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
             and not to_is_empty
             and plc.traits.is_timestamp(to)
         )
+        or (
+            plc.traits.is_integral_not_bool(to)
+            and not to_is_empty
+            and plc.traits.is_timestamp(from_)
+        )
     )
 
 
diff --git a/python/cudf_polars/tests/expressions/test_casting.py b/python/cudf_polars/tests/expressions/test_casting.py
index 0722a0f198a..a8438509ec6 100644
--- a/python/cudf_polars/tests/expressions/test_casting.py
+++ b/python/cudf_polars/tests/expressions/test_casting.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -14,7 +14,7 @@
 _supported_dtypes = [(pl.Int8(), pl.Int64())]
 
 _unsupported_dtypes = [
-    (pl.Datetime("ns"), pl.Int64()),
+    (pl.Boolean(), pl.Datetime("ns")),
 ]
 
 
@@ -28,6 +28,8 @@ def tests(dtypes):
     fromtype, totype = dtypes
     if fromtype == pl.String():
         data = ["a", "b", "c"]
+    elif fromtype == pl.Boolean():
+        data = [True, False, True]
     else:
         data = [1, 2, 3]
     return pl.DataFrame(
diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py
index 5d98165a419..b9f13b9e9b1 100644
--- a/python/cudf_polars/tests/expressions/test_datetime_basic.py
+++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py
@@ -386,3 +386,39 @@ def test_datetime_from_integer(datetime_dtype, integer_dtype):
         )
     else:
         assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "datetime_dtype",
+    [
+        pl.Datetime("ms"),
+        pl.Datetime("us"),
+        pl.Datetime("ns"),
+    ],
+)
+@pytest.mark.parametrize(
+    "integer_dtype",
+    [
+        pl.Int64(),
+        pytest.param(
+            pl.UInt64(), marks=pytest.mark.xfail(reason="INT64 can not fit max(UINT64)")
+        ),
+        pl.Int32(),
+        pl.UInt32(),
+        pl.Int16(),
+        pl.UInt16(),
+        pl.Int8(),
+        pl.UInt8(),
+    ],
+)
+def test_integer_from_datetime(datetime_dtype, integer_dtype):
+    values = [
+        0,
+        1,
+        100,
+        pl.select(integer_dtype.max()).item(),
+        pl.select(integer_dtype.min()).item(),
+    ]
+    df = pl.LazyFrame({"data": pl.Series(values, dtype=datetime_dtype)})
+    q = df.select(pl.col("data").cast(integer_dtype).alias("int_from_datetime"))
+    assert_gpu_result_equal(q)

From fd7e0821ae7852a5f88bfb185703ea2f708ebf36 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Mon, 18 Aug 2025 12:03:43 -0500
Subject: [PATCH 149/366] Add API to "initialize" column statistics (#19447)

Closes https://github.com/rapidsai/cudf/issues/19390

- Adds simple `StatsCollector` API
- Adds `collect_base_stats` API (tested in this PR, but not *used* anywhere internally yet)
- Adds `initialize_column_stats` dispatch functions and registers IR-specific logic for various IR sub-classes (this dispatch function is used by `collect_base_stats`).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19447
---
 .../cudf_polars/experimental/base.py          |  51 ++-
 .../cudf_polars/experimental/dispatch.py      |  37 +-
 .../cudf_polars/experimental/statistics.py    | 208 ++++++++++
 python/cudf_polars/docs/overview.md           |  85 +++++
 .../tests/experimental/test_dataframescan.py  |  45 ---
 .../tests/experimental/test_scan.py           | 121 ------
 .../tests/experimental/test_stats.py          | 357 ++++++++++++++++++
 7 files changed, 735 insertions(+), 169 deletions(-)
 create mode 100644 python/cudf_polars/cudf_polars/experimental/statistics.py
 create mode 100644 python/cudf_polars/tests/experimental/test_stats.py

diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py
index ec4d2109133..01fc4d1e1fc 100644
--- a/python/cudf_polars/cudf_polars/experimental/base.py
+++ b/python/cudf_polars/cudf_polars/experimental/base.py
@@ -11,6 +11,7 @@
     from collections.abc import Generator, Iterator
 
     from cudf_polars.dsl.expr import NamedExpr
+    from cudf_polars.dsl.ir import IR
     from cudf_polars.dsl.nodebase import Node
 
 
@@ -124,7 +125,9 @@ class ColumnStats:
     ----------
     name
         Column name.
-    source
+    children
+        Child ColumnStats objects.
+    source_info
         Datasource information.
     source_name
         Source-column name.
@@ -132,9 +135,10 @@ class ColumnStats:
         Unique-value statistics.
     """
 
-    __slots__ = ("name", "source_info", "source_name", "unique_stats")
+    __slots__ = ("children", "name", "source_info", "source_name", "unique_stats")
 
     name: str
+    children: tuple[ColumnStats, ...]
     source_info: DataSourceInfo
     source_name: str
     unique_stats: UniqueStats
@@ -143,11 +147,54 @@ def __init__(
         self,
         name: str,
         *,
+        children: tuple[ColumnStats, ...] = (),
         source_info: DataSourceInfo | None = None,
         source_name: str | None = None,
         unique_stats: UniqueStats | None = None,
     ) -> None:
         self.name = name
+        self.children = children
         self.source_info = source_info or DataSourceInfo()
         self.source_name = source_name or name
         self.unique_stats = unique_stats or UniqueStats()
+
+    def new_parent(
+        self,
+        *,
+        name: str | None = None,
+    ) -> ColumnStats:
+        """
+        Initialize a new parent ColumnStats object.
+
+        Parameters
+        ----------
+        name
+            The new column name.
+
+        Returns
+        -------
+        A new ColumnStats object.
+
+        Notes
+        -----
+        This API preserves the original DataSourceInfo reference.
+        """
+        return ColumnStats(
+            name=name or self.name,
+            children=(self,),
+            # Want to reference the same DataSourceInfo
+            source_info=self.source_info,
+            source_name=self.source_name,
+            # Want fresh UniqueStats so we can mutate in place
+            unique_stats=UniqueStats(),
+        )
+
+
+class StatsCollector:
+    """Column statistics collector."""
+
+    __slots__ = ("column_stats", "row_count")
+
+    def __init__(self) -> None:
+        self.row_count: dict[IR, ColumnStat[int]] = {}
+        self.column_stats: dict[IR, dict[str, ColumnStats]] = {}
diff --git a/python/cudf_polars/cudf_polars/experimental/dispatch.py b/python/cudf_polars/cudf_polars/experimental/dispatch.py
index 4ea4460fcb3..b48ab73d671 100644
--- a/python/cudf_polars/cudf_polars/experimental/dispatch.py
+++ b/python/cudf_polars/cudf_polars/experimental/dispatch.py
@@ -14,7 +14,11 @@
 
     from cudf_polars.dsl import ir
     from cudf_polars.dsl.ir import IR
-    from cudf_polars.experimental.base import PartitionInfo
+    from cudf_polars.experimental.base import (
+        ColumnStats,
+        PartitionInfo,
+        StatsCollector,
+    )
     from cudf_polars.utils.config import ConfigOptions
 
 
@@ -97,3 +101,34 @@ def generate_ir_tasks(
     task_graph
     """
     raise AssertionError(f"Unhandled type {type(ir)}")  # pragma: no cover
+
+
+@singledispatch
+def initialize_column_stats(
+    ir: IR, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    """
+    Initialize column statistics for an IR node.
+
+    Parameters
+    ----------
+    ir
+        The IR node to collect source statistics for.
+    stats
+        The `StatsCollector` object containing known source statistics.
+    config_options
+        GPUEngine configuration options.
+
+    Returns
+    -------
+    base_stats_mapping
+        Mapping between column names and base ``ColumnStats`` objects.
+
+    Notes
+    -----
+    Base column stats correspond to ``ColumnStats`` objects **without**
+    populated ``unique_stats`` information. The purpose of this function
+    is to propagate ``DataSourceInfo`` references and set ``children``
+    attributes for each column of each IR node.
+    """
+    raise AssertionError(f"Unhandled type {type(ir)}")  # pragma: no cover
diff --git a/python/cudf_polars/cudf_polars/experimental/statistics.py b/python/cudf_polars/cudf_polars/experimental/statistics.py
new file mode 100644
index 00000000000..7c302fe59d7
--- /dev/null
+++ b/python/cudf_polars/cudf_polars/experimental/statistics.py
@@ -0,0 +1,208 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Utilities for tracking column statistics."""
+
+from __future__ import annotations
+
+import itertools
+from typing import TYPE_CHECKING
+
+from cudf_polars.dsl.ir import (
+    IR,
+    DataFrameScan,
+    Distinct,
+    GroupBy,
+    HConcat,
+    Join,
+    Scan,
+    Union,
+)
+from cudf_polars.dsl.traversal import post_traversal
+from cudf_polars.experimental.base import (
+    ColumnStats,
+    StatsCollector,
+)
+from cudf_polars.experimental.dispatch import initialize_column_stats
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from cudf_polars.utils.config import ConfigOptions
+
+
+def collect_base_stats(root: IR, config_options: ConfigOptions) -> StatsCollector:
+    """
+    Collect base datasource statistics.
+
+    Parameters
+    ----------
+    root
+        Root IR node for collecting base datasource statistics.
+    config_options
+        GPUEngine configuration options.
+
+    Returns
+    -------
+    A new StatsCollector object with populated datasource statistics.
+    """
+    stats: StatsCollector = StatsCollector()
+    for node in post_traversal([root]):
+        stats.column_stats[node] = initialize_column_stats(node, stats, config_options)
+    return stats
+
+
+def _update_unique_stats_columns(
+    child_column_stats: dict[str, ColumnStats],
+    key_names: Sequence[str],
+    config_options: ConfigOptions,
+) -> None:
+    """Update set of unique-stats columns in datasource."""
+    assert config_options.executor.name == "streaming", (
+        "'in-memory' executor not supported in 'add_source_stats'"
+    )
+    unique_fraction = config_options.executor.unique_fraction
+    for name in key_names:
+        if (
+            name not in unique_fraction
+            and (column_stats := child_column_stats.get(name)) is not None
+            and (source_stats := column_stats.source_info) is not None
+        ):
+            source_stats.add_unique_stats_column(column_stats.source_name or name)
+
+
+@initialize_column_stats.register(IR)
+def _default_initialize_column_stats(
+    ir: IR, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    # Default `initialize_column_stats` implementation.
+    if len(ir.children) == 1:
+        (child,) = ir.children
+        child_column_stats = stats.column_stats.get(child, {})
+        return {
+            name: child_column_stats.get(name, ColumnStats(name=name)).new_parent()
+            for name in ir.schema
+        }
+    else:  # pragma: no cover
+        # Multi-child nodes loose all information by default.
+        return {name: ColumnStats(name=name) for name in ir.schema}
+
+
+@initialize_column_stats.register(Distinct)
+def _(
+    ir: Distinct, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    # Use default initialize_column_stats after updating
+    # the known unique-stats columns.
+    (child,) = ir.children
+    child_column_stats = stats.column_stats.get(child, {})
+    key_names = ir.subset or ir.schema
+    _update_unique_stats_columns(child_column_stats, list(key_names), config_options)
+    return _default_initialize_column_stats(ir, stats, config_options)
+
+
+@initialize_column_stats.register(Join)
+def _(
+    ir: Join, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    # Copy column statistics from both the left and right children.
+    # Special cases to consider:
+    #   - If a column name appears in both sides of the join,
+    #     we take it from the "primary" column (right for "Right"
+    #     joins, left for all other joins).
+    #   - If a column name doesn't appear in either child, it
+    #     corresponds to a non-"primary" column with a suffix.
+
+    children, on = ir.children, (ir.left_on, ir.right_on)
+    how = ir.options[0]
+    suffix = ir.options[3]
+    if how == "Right":
+        children, on = children[::-1], on[::-1]
+    primary, other = children
+    primary_child_stats = stats.column_stats.get(primary, {})
+    other_child_stats = stats.column_stats.get(other, {})
+
+    # Build output column statistics
+    column_stats: dict[str, ColumnStats] = {}
+    for name in ir.schema:
+        if name in primary.schema:
+            # "Primary" child stats take preference.
+            column_stats[name] = primary_child_stats[name].new_parent()
+        elif name in other.schema:
+            # "Other" column stats apply to everything else.
+            column_stats[name] = other_child_stats[name].new_parent()
+        else:
+            # If the column name was not in either child table,
+            # a suffix was added to a column in "other".
+            _name = name.removesuffix(suffix)
+            column_stats[name] = other_child_stats[_name].new_parent(name=name)
+
+    # Update children
+    for p_key, o_key in zip(*on, strict=True):
+        column_stats[p_key.name].children = (
+            primary_child_stats[p_key.name],
+            other_child_stats[o_key.name],
+        )
+
+    return column_stats
+
+
+@initialize_column_stats.register(GroupBy)
+def _(
+    ir: GroupBy, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    (child,) = ir.children
+    child_column_stats = stats.column_stats.get(child, {})
+
+    # Update set of source columns we may lazily sample
+    _update_unique_stats_columns(
+        child_column_stats, [n.name for n in ir.keys], config_options
+    )
+    return _default_initialize_column_stats(ir, stats, config_options)
+
+
+@initialize_column_stats.register(HConcat)
+def _(
+    ir: HConcat, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    child_column_stats = dict(
+        itertools.chain.from_iterable(
+            stats.column_stats.get(c, {}).items() for c in ir.children
+        )
+    )
+    return {
+        name: child_column_stats.get(name, ColumnStats(name=name)).new_parent()
+        for name in ir.schema
+    }
+
+
+@initialize_column_stats.register(Union)
+def _(
+    ir: IR, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    # Union looses source information for now.
+    return {
+        name: ColumnStats(
+            name=name,
+            children=tuple(stats.column_stats[child][name] for child in ir.children),
+        )
+        for name in ir.schema
+    }
+
+
+@initialize_column_stats.register(Scan)
+def _(
+    ir: Scan, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    from cudf_polars.experimental.io import _extract_scan_stats
+
+    return _extract_scan_stats(ir, config_options)
+
+
+@initialize_column_stats.register(DataFrameScan)
+def _(
+    ir: DataFrameScan, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    from cudf_polars.experimental.io import _extract_dataframescan_stats
+
+    return _extract_dataframescan_stats(ir)
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index df6af87bbf0..0eaf182762b 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -383,6 +383,91 @@ def rename(e: Expr, mapping: Mapping[str, str]) -> Expr:
     return mapper(e)
 ```
 
+# Estimated column statistics
+
+:::{note}
+Column-statistics estimation is experimental and the details are
+likely to change in the future.
+:::
+
+The `cudf-polars` streaming executor (enabled by default) may use
+estimated column statistics to transform translated logical-plan
+IR nodes into the final "physical-plan" IR nodes.
+
+## Storing statistics
+
+The following classes are used to store column statistics (listed
+in order of decreasing granularity):
+
+- `ColumnStat`: This class is used to store an individual column
+statistic (e.g. row count or unique-value count). Each object
+has two important attributes:
+  - `ColumnStat.value`: Returns the actual column-statistic value
+  (e.g. an `int` if the statistic is a row-count) or `None` if no
+  estimate is available.
+  - `ColumnStat.exact`: Whether the statistic is known "exactly".
+- `UniqueStats`: Since we usually sample both the unique-value
+**count** and the unique-value **fraction** of a column at once,
+we use `UniqueStats` to group these `ColumnStat`s into one object.
+- `DataSourceInfo`: This class is used to sample and store
+`ColumnStat`/`UniqueStats` objects associated with a single
+datasource (e.g. a Parquet dataset or in-memory `DataFrame`).
+  - Since it can be expensive to sample datasource statistics,
+  this class is specifically designed to enable **lazy** and
+  **aggregated** column sampling via sub-classing. For example,
+  The `ParquetSourceInfo` sub-class uses caching to avoid
+  redundant file-system access.
+- `ColumnStats`: This class is used to group together the "base"
+`DataSourceInfo` reference and the current `UniqueStats` estimates
+for a specific IR + column combination. We bundle these references
+together to simplify the design and maintenance of `StatsCollector`.
+**NOTE:** The current `UniqueStats` estimates are not yet populated.
+- `StatsCollector`: This class is used to collect and store
+statistics for all IR nodes within a single query. The statistics
+attached to each IR node refer to the **output** columns of the
+IR node in question. The `StatsCollector` class is especially important,
+because it is used to organize **all** statistics within a logical plan.
+Each object has two important attributes:
+  - `StatsCollector.row_count`: Returns a mapping between each IR
+  node and the row-count `ColumnStat` estimate for that node.
+  **NOTE:** This attribute is not yet populated.
+  - `StatsCollector.column_stats`: Returns a mapping between each IR
+  node and the `dict[str, ColumnStats]` mapping for that node.
+
+## Collecting and using statistics
+
+:::{note}
+Column-statistics collection is under active development.
+The existing APIs only support the sampling of base
+datasource statistics. The current row-count and unique-value
+statistics for each IR node are not populated by any traversal
+logic yet.
+:::
+
+### Collecting base statistics
+
+The top-level API for sampling base datasource statistics is
+`cudf_polars.experimental.statistics.collect_base_stats`. This
+function calls into the `initialize_column_stats` single-dispatch
+function to collect a `dict[str, ColumnStats]` mapping for each
+IR node in the logical plan.
+
+The IR-specific logic for each `initialize_column_stats` dispatch is
+relatively simple, because the only goal is to collect and propagate
+the underlying `DataSourceInfo` reference and child-`ColumnStats`
+references for each column. This means that `Scan` and `DataFrameScan`
+are the only IR classes needing specialized sampling logic. All other
+IR classes are typically propagating reference from child-IR nodes.
+
+### Using base statistics
+
+Base `DataSourceInfo` references are currently used to calculate
+the partition count when a Parquet-based `Scan` node is lowered
+by the `cudf-polars` streaming executor.
+
+In the future, these statistics will also be used for
+parallel-algorithm selection and intermediate repartitioning.
+
 # Containers
 
 Containers should be constructed as relatively lightweight objects
diff --git a/python/cudf_polars/tests/experimental/test_dataframescan.py b/python/cudf_polars/tests/experimental/test_dataframescan.py
index 67348b2c30d..aea526a4459 100644
--- a/python/cudf_polars/tests/experimental/test_dataframescan.py
+++ b/python/cudf_polars/tests/experimental/test_dataframescan.py
@@ -3,8 +3,6 @@
 
 from __future__ import annotations
 
-import math
-
 import pytest
 
 import polars as pl
@@ -60,46 +58,3 @@ def test_dataframescan_concat(df):
     )
     df2 = pl.concat([df, df])
     assert_gpu_result_equal(df2, engine=engine)
-
-
-def test_source_statistics(df):
-    from cudf_polars.experimental.io import _extract_dataframescan_stats
-
-    row_count = df.collect().height
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "max_rows_per_partition": 1_000,
-            "scheduler": DEFAULT_SCHEDULER,
-        },
-    )
-    ir = Translator(df._ldf.visit(), engine).translate_ir()
-    column_stats = _extract_dataframescan_stats(ir)
-
-    # Source info is the same for all columns
-    source_info = column_stats["x"].source_info
-    assert source_info is column_stats["y"].source_info
-    assert source_info is column_stats["z"].source_info
-    assert source_info.row_count.value == row_count
-    assert source_info.row_count.exact
-
-    # Storage stats should not be available
-    assert source_info.storage_size("x").value is None
-
-    # Check unique stats
-    assert math.isclose(
-        source_info.unique_stats("x").count.value, row_count, rel_tol=1e-2
-    )
-    assert math.isclose(source_info.unique_stats("x").fraction.value, 1.0, abs_tol=1e-2)
-    assert not source_info.unique_stats("x").count.exact
-    assert math.isclose(source_info.unique_stats("y").count.value, 3, rel_tol=1e-2)
-    assert math.isclose(
-        source_info.unique_stats("y").fraction.value, 3 / row_count, abs_tol=1e-2
-    )
-    assert not source_info.unique_stats("y").count.exact
-    assert math.isclose(source_info.unique_stats("z").count.value, 5, rel_tol=1e-2)
-    assert math.isclose(
-        source_info.unique_stats("z").fraction.value, 5 / row_count, abs_tol=1e-2
-    )
-    assert not source_info.unique_stats("z").count.exact
diff --git a/python/cudf_polars/tests/experimental/test_scan.py b/python/cudf_polars/tests/experimental/test_scan.py
index 4123c47bce8..c9ede627ae6 100644
--- a/python/cudf_polars/tests/experimental/test_scan.py
+++ b/python/cudf_polars/tests/experimental/test_scan.py
@@ -84,124 +84,3 @@ def test_split_scan_predicate(tmp_path, df, mask):
         },
     )
     assert_gpu_result_equal(q, engine=engine)
-
-
-@pytest.mark.parametrize("n_files", [1, 3])
-@pytest.mark.parametrize("row_group_size", [None, 10_000])
-@pytest.mark.parametrize("max_footer_samples", [3, 0])
-@pytest.mark.parametrize("max_row_group_samples", [1, 0])
-def test_source_statistics(
-    tmp_path,
-    df,
-    n_files,
-    row_group_size,
-    max_footer_samples,
-    max_row_group_samples,
-):
-    from cudf_polars.experimental.io import (
-        _clear_source_info_cache,
-        _extract_scan_stats,
-    )
-
-    _clear_source_info_cache()
-    make_partitioned_source(
-        df,
-        tmp_path,
-        "parquet",
-        n_files=n_files,
-        row_group_size=row_group_size,
-    )
-    q = pl.scan_parquet(tmp_path)
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "target_partition_size": 10_000,
-            "scheduler": DEFAULT_SCHEDULER,
-        },
-        parquet_options={
-            "max_footer_samples": max_footer_samples,
-            "max_row_group_samples": max_row_group_samples,
-        },
-    )
-    ir = Translator(q._ldf.visit(), engine).translate_ir()
-    column_stats = _extract_scan_stats(ir, ConfigOptions.from_polars_engine(engine))
-
-    # Source info is the same for all columns
-    source_info = column_stats["x"].source_info
-    assert source_info is column_stats["y"].source_info
-    assert source_info is column_stats["z"].source_info
-    if max_footer_samples:
-        assert source_info.row_count.value == df.height
-        assert source_info.row_count.exact
-    else:
-        assert source_info.row_count.value is None
-
-    # Storage stats should be available
-    if max_footer_samples:
-        assert source_info.storage_size("x").value > 0
-        assert source_info.storage_size("y").value > 0
-    else:
-        assert source_info.storage_size("x").value is None
-        assert source_info.storage_size("y").value is None
-
-    # Check that we can query a missing column name
-    assert source_info.storage_size("foo").value is None
-    assert source_info.unique_stats("foo").count.value is None
-    assert source_info.unique_stats("foo").fraction.value is None
-
-    # source._unique_stats should be empty
-    assert set(source_info._unique_stats) == set()
-
-    if max_footer_samples and max_row_group_samples:
-        assert source_info.unique_stats("x").count.value == df.height
-        assert source_info.unique_stats("x").fraction.value == 1.0
-    else:
-        assert source_info.unique_stats("x").count.value is None
-        assert source_info.unique_stats("x").fraction.value is None
-
-    # source_info._unique_stats should only contain 'x'
-    if max_footer_samples and max_row_group_samples:
-        assert set(source_info._unique_stats) == {"x"}
-    else:
-        assert set(source_info._unique_stats) == set()
-
-    # Check add_unique_stats_column behavior
-    if max_footer_samples and max_row_group_samples:
-        # Can add a "bad"/missing key column
-        source_info.add_unique_stats_column("foo")
-        assert set(source_info._unique_stats) == {"x"}
-
-        # Mark 'z' as a key column, and query 'y' stats
-        source_info.add_unique_stats_column("z")
-        if n_files == 1 and row_group_size == 10_000:
-            assert source_info.unique_stats("y").count.value == 3
-        else:
-            assert source_info.unique_stats("y").count.value is None
-        assert source_info.unique_stats("y").fraction.value < 1.0
-
-        # source_info._unique_stats should contain all columns now
-        assert set(source_info._unique_stats) == {"x", "y", "z"}
-
-
-def test_source_statistics_csv(tmp_path, df):
-    from cudf_polars.experimental.io import _extract_scan_stats
-
-    make_partitioned_source(df, tmp_path, "csv", n_files=3)
-    q = pl.scan_csv(tmp_path)
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "target_partition_size": 10_000,
-            "scheduler": DEFAULT_SCHEDULER,
-        },
-    )
-    ir = Translator(q._ldf.visit(), engine).translate_ir()
-    column_stats = _extract_scan_stats(ir, ConfigOptions.from_polars_engine(engine))
-
-    # Source info should be empty for CSV
-    source_info = column_stats["x"].source_info
-    assert source_info.row_count.value is None
-    assert source_info.unique_stats("x").count.value is None
-    assert source_info.unique_stats("x").fraction.value is None
diff --git a/python/cudf_polars/tests/experimental/test_stats.py b/python/cudf_polars/tests/experimental/test_stats.py
new file mode 100644
index 00000000000..147cfc98555
--- /dev/null
+++ b/python/cudf_polars/tests/experimental/test_stats.py
@@ -0,0 +1,357 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import math
+
+import pytest
+
+import polars as pl
+
+from cudf_polars import Translator
+from cudf_polars.experimental.io import _clear_source_info_cache
+from cudf_polars.experimental.statistics import collect_base_stats
+from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
+from cudf_polars.testing.io import make_partitioned_source
+from cudf_polars.utils.config import ConfigOptions
+
+
+@pytest.fixture(scope="module")
+def df():
+    return pl.DataFrame(
+        {
+            "x": range(3_000),
+            "y": ["cat", "dog", "fish"] * 1_000,
+            "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 600,
+        }
+    )
+
+
+def test_base_stats_dataframescan(df):
+    row_count = df.height
+    q = pl.LazyFrame(df)
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": 1_000,
+            "scheduler": DEFAULT_SCHEDULER,
+        },
+    )
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
+    column_stats = stats.column_stats[ir]
+
+    # Source info is the same for all columns
+    source_info = column_stats["x"].source_info
+    assert source_info is column_stats["y"].source_info
+    assert source_info is column_stats["z"].source_info
+    assert source_info.row_count.value == row_count
+    assert source_info.row_count.exact
+
+    # Storage stats should not be available
+    assert source_info.storage_size("x").value is None
+
+    # Check unique stats
+    assert math.isclose(
+        source_info.unique_stats("x").count.value, row_count, rel_tol=5e-2
+    )
+    assert math.isclose(source_info.unique_stats("x").fraction.value, 1.0, abs_tol=1e-2)
+    assert not source_info.unique_stats("x").count.exact
+    assert math.isclose(source_info.unique_stats("y").count.value, 3, rel_tol=5e-2)
+    assert math.isclose(
+        source_info.unique_stats("y").fraction.value, 3 / row_count, abs_tol=1e-2
+    )
+    assert not source_info.unique_stats("y").count.exact
+    assert math.isclose(source_info.unique_stats("z").count.value, 5, rel_tol=5e-2)
+    assert math.isclose(
+        source_info.unique_stats("z").fraction.value, 5 / row_count, abs_tol=1e-2
+    )
+    assert not source_info.unique_stats("z").count.exact
+
+
+@pytest.mark.parametrize("n_files", [1, 3])
+@pytest.mark.parametrize("row_group_size", [None, 10_000])
+@pytest.mark.parametrize("max_footer_samples", [3, 0])
+@pytest.mark.parametrize("max_row_group_samples", [1, 0])
+def test_base_stats_parquet(
+    tmp_path,
+    df,
+    n_files,
+    row_group_size,
+    max_footer_samples,
+    max_row_group_samples,
+):
+    _clear_source_info_cache()
+    make_partitioned_source(
+        df,
+        tmp_path,
+        "parquet",
+        n_files=n_files,
+        row_group_size=row_group_size,
+    )
+    q = pl.scan_parquet(tmp_path)
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "target_partition_size": 10_000,
+            "scheduler": DEFAULT_SCHEDULER,
+        },
+        parquet_options={
+            "max_footer_samples": max_footer_samples,
+            "max_row_group_samples": max_row_group_samples,
+        },
+    )
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
+    column_stats = stats.column_stats[ir]
+
+    # Source info is the same for all columns
+    source_info = column_stats["x"].source_info
+    assert source_info is column_stats["y"].source_info
+    assert source_info is column_stats["z"].source_info
+    if max_footer_samples:
+        assert source_info.row_count.value == df.height
+        assert source_info.row_count.exact
+    else:
+        assert source_info.row_count.value is None
+
+    # Storage stats should be available
+    if max_footer_samples:
+        assert source_info.storage_size("x").value > 0
+        assert source_info.storage_size("y").value > 0
+    else:
+        assert source_info.storage_size("x").value is None
+        assert source_info.storage_size("y").value is None
+
+    # Check that we can query a missing column name
+    assert source_info.storage_size("foo").value is None
+    assert source_info.unique_stats("foo").count.value is None
+    assert source_info.unique_stats("foo").fraction.value is None
+
+    # source._unique_stats should be empty
+    assert set(source_info._unique_stats) == set()
+
+    if max_footer_samples and max_row_group_samples:
+        assert source_info.unique_stats("x").count.value == df.height
+        assert source_info.unique_stats("x").fraction.value == 1.0
+    else:
+        assert source_info.unique_stats("x").count.value is None
+        assert source_info.unique_stats("x").fraction.value is None
+
+    # source_info._unique_stats should only contain 'x'
+    if max_footer_samples and max_row_group_samples:
+        assert set(source_info._unique_stats) == {"x"}
+    else:
+        assert set(source_info._unique_stats) == set()
+
+    # Check add_unique_stats_column behavior
+    if max_footer_samples and max_row_group_samples:
+        # Can add a "bad"/missing key column
+        source_info.add_unique_stats_column("foo")
+        assert set(source_info._unique_stats) == {"x"}
+
+        # Mark 'z' as a key column, and query 'y' stats
+        source_info.add_unique_stats_column("z")
+        if n_files == 1 and row_group_size == 10_000:
+            assert source_info.unique_stats("y").count.value == 3
+        else:
+            assert source_info.unique_stats("y").count.value is None
+        assert source_info.unique_stats("y").fraction.value < 1.0
+
+        # source_info._unique_stats should contain all columns now
+        assert set(source_info._unique_stats) == {"x", "y", "z"}
+
+
+def test_base_stats_csv(tmp_path, df):
+    make_partitioned_source(df, tmp_path, "csv", n_files=3)
+    q = pl.scan_csv(tmp_path)
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "target_partition_size": 10_000,
+            "scheduler": DEFAULT_SCHEDULER,
+        },
+    )
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
+    column_stats = stats.column_stats[ir]
+
+    # Source info should be empty for CSV
+    source_info = column_stats["x"].source_info
+    assert source_info.row_count.value is None
+    assert source_info.unique_stats("x").count.value is None
+    assert source_info.unique_stats("x").fraction.value is None
+
+
+@pytest.mark.parametrize("max_footer_samples", [1, 3])
+@pytest.mark.parametrize("max_row_group_samples", [1, 2])
+def test_base_stats_parquet_groupby(
+    tmp_path,
+    df,
+    max_footer_samples,
+    max_row_group_samples,
+):
+    n_files = 3
+    _clear_source_info_cache()
+    make_partitioned_source(df, tmp_path, "parquet", n_files=n_files)
+    q = pl.scan_parquet(tmp_path)
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "target_partition_size": 10_000,
+            "scheduler": DEFAULT_SCHEDULER,
+        },
+        parquet_options={
+            "max_footer_samples": max_footer_samples,
+            "max_row_group_samples": max_row_group_samples,
+        },
+    )
+
+    # Check simple selection
+    q1 = q.select(pl.col("x"), pl.col("y"))
+    qir1 = Translator(q1._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(qir1, ConfigOptions.from_polars_engine(engine))
+    source_info_y = stats.column_stats[qir1]["y"].source_info
+    unique_stats_y = source_info_y.unique_stats("y")
+    y_unique_fraction = unique_stats_y.fraction
+    y_row_count = source_info_y.row_count
+    assert y_unique_fraction.value < 1.0
+    assert y_unique_fraction.value > 0.0
+    assert unique_stats_y.count.value is None
+    if max_footer_samples >= n_files:
+        # We should have "exact" row-count statistics
+        assert y_row_count.value == df.height
+        assert y_row_count.exact
+    else:
+        # We should have "estimated" row-count statistics
+        assert y_row_count.value > 0
+        assert not y_row_count.exact
+    assert_gpu_result_equal(q1.sort(pl.col("x")).slice(0, 2), engine=engine)
+
+    # Source statistics of "y" should match after GroupBy/Select/HStack/etc
+    q2 = (
+        pl.concat(
+            [
+                q.select(pl.col("x")),
+                q.select(pl.col("y")),
+            ],
+            how="horizontal",
+        )
+        .group_by(pl.col("y"))
+        .sum()
+        .select(pl.col("x").max(), pl.col("y"))
+        .with_columns((pl.col("x") * pl.col("x")).alias("x2"))
+    )
+    qir2 = Translator(q2._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(qir2, ConfigOptions.from_polars_engine(engine))
+    source_info_y = stats.column_stats[qir2]["y"].source_info
+    assert source_info_y.unique_stats("y").fraction == y_unique_fraction
+    assert y_row_count == source_info_y.row_count
+    assert_gpu_result_equal(q2.sort(pl.col("y")).slice(0, 2), engine=engine)
+
+
+@pytest.mark.parametrize("how", ["inner", "left", "right"])
+def test_base_stats_join(how):
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+        },
+    )
+    left = pl.LazyFrame(
+        {
+            "x": range(15),
+            "y": [1, 2, 3] * 5,
+            "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3,
+        }
+    )
+    right = pl.LazyFrame(
+        {
+            "xx": range(9),
+            "y": [2, 4, 3] * 3,
+            "z": [1, 2, 3] * 3,
+        }
+    )
+    q = left.join(right, on="y", how=how)
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
+
+    ir_column_stats = stats.column_stats[ir]
+    left_count, right_count = 15, 9
+    if how == "left":
+        assert ir_column_stats["x"].source_info.row_count.value == left_count
+        assert ir_column_stats["y"].source_info.row_count.value == left_count
+        assert ir_column_stats["z"].source_info.row_count.value == left_count
+    if how == "inner":
+        assert ir_column_stats["x"].source_info.row_count.value == left_count
+        assert ir_column_stats["y"].source_info.row_count.value == left_count
+        assert ir_column_stats["z"].source_info.row_count.value == left_count
+        assert ir_column_stats["xx"].source_info.row_count.value == right_count
+        assert ir_column_stats["z_right"].source_info.row_count.value == right_count
+    if how == "right":
+        assert ir_column_stats["xx"].source_info.row_count.value == right_count
+        assert ir_column_stats["y"].source_info.row_count.value == right_count
+        assert ir_column_stats["z"].source_info.row_count.value == right_count
+
+
+def test_base_stats_union():
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+        },
+    )
+    left = pl.LazyFrame(
+        {
+            "x": range(15),
+            "y": [1, 2, 3] * 5,
+            "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3,
+        }
+    )
+    right = pl.LazyFrame(
+        {
+            "x": range(9),
+            "y": [2, 4, 3] * 3,
+            "z": [1.0, 2.0, 3.0] * 3,
+        }
+    )
+
+    q = pl.concat([left, right])
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
+    column_stats = stats.column_stats[ir]
+
+    # We lose source info after a Union, but we
+    # can set accurate row-count and unique-value
+    # estimates for the current IR in #19392
+    source_info = column_stats["x"].source_info
+    assert source_info.row_count.value is None
+
+
+def test_base_stats_distinct(df):
+    row_count = df.height
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+        },
+    )
+    q = pl.LazyFrame(df).unique(subset=["y"])
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
+    column_stats = stats.column_stats[ir]
+
+    source_info = column_stats["y"].source_info
+    assert source_info.row_count.value == row_count
+    assert source_info.row_count.exact

From a4d6e0566556631e73c3f064945bf17b01d67d09 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 18 Aug 2025 11:45:45 -0700
Subject: [PATCH 150/366] Use the nvCOMP 5.0 API to better estimate
 decompression memory requirements (#19616)

nvCOMP 5 adds synchronous APIs that more accurately determine the scratch size requirements for decompression (ZSTD only for now).

This PR integrates the new API in `batched_decompress`, as well as into the chunked reader (where it's used to determine memory overhead from decompression). The new API is expensive to call, as it launches a kernel. For this reason, it's used differently than the old API in the chunked reader. The result may be imprecise when blocks have very uneven compression ratio, but it should be minor given that the new API reduces the memory overhead significantly*.

Another change - the temp sizes in the chunked reader are now stored between subpasses so we don't perform redundant computation.

*measured up to 2.2x lower peak memory use in micro-benchmarks.

Did not measure any performance overhead from the use of the new API in the non-chunked read_parquet benchmarks.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - https://github.com/nvdbaranec
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/19616
---
 cpp/src/io/comp/common.cpp                    |  13 --
 cpp/src/io/comp/common_internal.hpp           |  14 +-
 cpp/src/io/comp/decompression.cpp             |  50 +++++-
 cpp/src/io/comp/decompression.hpp             |  23 ++-
 cpp/src/io/comp/nvcomp_adapter.cpp            | 142 +++++++++++++++++-
 cpp/src/io/comp/nvcomp_adapter.cu             |  17 +++
 cpp/src/io/comp/nvcomp_adapter.cuh            |   6 +
 cpp/src/io/comp/nvcomp_adapter.hpp            |  25 +++
 cpp/src/io/parquet/reader_impl_chunking.cu    |   6 +-
 cpp/src/io/parquet/reader_impl_chunking.hpp   |   1 +
 .../io/parquet/reader_impl_chunking_utils.cu  | 116 ++++++++++++--
 .../io/parquet/reader_impl_chunking_utils.cuh |  11 +-
 12 files changed, 381 insertions(+), 43 deletions(-)

diff --git a/cpp/src/io/comp/common.cpp b/cpp/src/io/comp/common.cpp
index 1db3e48f823..99805f693ed 100644
--- a/cpp/src/io/comp/common.cpp
+++ b/cpp/src/io/comp/common.cpp
@@ -23,19 +23,6 @@
 
 namespace cudf::io::detail {
 
-[[nodiscard]] std::optional<nvcomp::compression_type> to_nvcomp_compression(
-  compression_type compression)
-{
-  switch (compression) {
-    case compression_type::GZIP: return nvcomp::compression_type::GZIP;
-    case compression_type::LZ4: return nvcomp::compression_type::LZ4;
-    case compression_type::SNAPPY: return nvcomp::compression_type::SNAPPY;
-    case compression_type::ZLIB: return nvcomp::compression_type::DEFLATE;
-    case compression_type::ZSTD: return nvcomp::compression_type::ZSTD;
-    default: return std::nullopt;
-  }
-}
-
 [[nodiscard]] std::string compression_type_name(compression_type compression)
 {
   switch (compression) {
diff --git a/cpp/src/io/comp/common_internal.hpp b/cpp/src/io/comp/common_internal.hpp
index 3bf430b9a1e..d6b45a18da3 100644
--- a/cpp/src/io/comp/common_internal.hpp
+++ b/cpp/src/io/comp/common_internal.hpp
@@ -49,8 +49,18 @@ constexpr double default_host_device_decompression_work_ratio = 100;
 // single GPU block; higher values lead to more host compression in HYBRID mode
 constexpr double default_host_device_compression_work_ratio = 100;
 
-[[nodiscard]] std::optional<nvcomp::compression_type> to_nvcomp_compression(
-  compression_type compression);
+[[nodiscard]] constexpr std::optional<nvcomp::compression_type> to_nvcomp_compression(
+  compression_type compression)
+{
+  switch (compression) {
+    case compression_type::GZIP: return nvcomp::compression_type::GZIP;
+    case compression_type::LZ4: return nvcomp::compression_type::LZ4;
+    case compression_type::SNAPPY: return nvcomp::compression_type::SNAPPY;
+    case compression_type::ZLIB: return nvcomp::compression_type::DEFLATE;
+    case compression_type::ZSTD: return nvcomp::compression_type::ZSTD;
+    default: return std::nullopt;
+  }
+}
 
 struct sorted_codec_parameters {
   rmm::device_uvector<device_span<uint8_t const>> inputs;
diff --git a/cpp/src/io/comp/decompression.cpp b/cpp/src/io/comp/decompression.cpp
index 8cf49506d17..7362e41a5fa 100644
--- a/cpp/src/io/comp/decompression.cpp
+++ b/cpp/src/io/comp/decompression.cpp
@@ -37,7 +37,6 @@
 #include <cstdint>
 #include <cstring>  // memset
 #include <future>
-#include <numeric>
 #include <sstream>
 
 namespace cudf::io::detail {
@@ -537,10 +536,10 @@ void device_decompress(compression_type compression,
   CUDF_FUNC_RANGE();
   if (compression == compression_type::NONE or inputs.empty()) { return; }
 
-  auto const nvcomp_type      = to_nvcomp_compression(compression);
-  auto nvcomp_disabled_reason = nvcomp_type.has_value()
-                                  ? nvcomp::is_decompression_disabled(*nvcomp_type)
-                                  : "invalid compression type";
+  auto const nvcomp_type            = to_nvcomp_compression(compression);
+  auto const nvcomp_disabled_reason = nvcomp_type.has_value()
+                                        ? nvcomp::is_decompression_disabled(*nvcomp_type)
+                                        : "invalid compression type";
   if (not nvcomp_disabled_reason) {
     return nvcomp::batched_decompress(
       *nvcomp_type, inputs, outputs, results, max_uncomp_chunk_size, max_total_uncomp_size, stream);
@@ -638,9 +637,10 @@ size_t get_uncompressed_size(compression_type compression, host_span<uint8_t con
     return 0;
   }
 
-  auto const nvcomp_type = to_nvcomp_compression(di.type);
-  auto nvcomp_disabled   = nvcomp_type.has_value() ? nvcomp::is_decompression_disabled(*nvcomp_type)
-                                                   : "invalid compression type";
+  auto const nvcomp_type     = to_nvcomp_compression(di.type);
+  auto const nvcomp_disabled = nvcomp_type.has_value()
+                                 ? nvcomp::is_decompression_disabled(*nvcomp_type)
+                                 : "invalid compression type";
   if (not nvcomp_disabled) {
     return nvcomp::batched_decompress_temp_size(
       nvcomp_type.value(), di.num_pages, di.max_page_decompressed_size, di.total_decompressed_size);
@@ -651,6 +651,40 @@ size_t get_uncompressed_size(compression_type compression, host_span<uint8_t con
   return 0;
 }
 
+[[nodiscard]] size_t get_decompression_scratch_size_ex(
+  compression_type compression,
+  device_span<device_span<uint8_t const> const> inputs,
+  size_t max_uncomp_chunk_size,
+  size_t max_total_uncomp_size,
+  rmm::cuda_stream_view stream)
+{
+  if (compression == compression_type::NONE or
+      get_host_engine_state(compression) == host_engine_state::ON) {
+    return 0;
+  }
+
+  auto const nvcomp_type     = to_nvcomp_compression(compression);
+  auto const nvcomp_disabled = nvcomp_type.has_value()
+                                 ? nvcomp::is_decompression_disabled(*nvcomp_type)
+                                 : "invalid compression type";
+  if (nvcomp_disabled) {
+    CUDF_FAIL("Cannot compute decompression scratch size for " +
+              compression_type_name(compression));
+  }
+  return nvcomp::batched_decompress_temp_size_ex(
+    nvcomp_type.value(), inputs, max_uncomp_chunk_size, max_total_uncomp_size, stream);
+}
+
+[[nodiscard]] bool is_decompression_scratch_size_ex_supported(compression_type compression)
+{
+  auto const nvcomp_type     = to_nvcomp_compression(compression);
+  auto const nvcomp_disabled = nvcomp_type.has_value()
+                                 ? nvcomp::is_decompression_disabled(*nvcomp_type)
+                                 : "invalid compression type";
+  if (nvcomp_disabled) { return false; }
+  return nvcomp::is_batched_decompress_temp_size_ex_supported(nvcomp_type.value());
+}
+
 size_t decompress(compression_type compression,
                   host_span<uint8_t const> src,
                   host_span<uint8_t> dst)
diff --git a/cpp/src/io/comp/decompression.hpp b/cpp/src/io/comp/decompression.hpp
index 932bf182936..08700d65663 100644
--- a/cpp/src/io/comp/decompression.hpp
+++ b/cpp/src/io/comp/decompression.hpp
@@ -48,12 +48,29 @@ struct decompression_info {
 };
 
 /**
- * @brief Functor which returns total scratch space required based on computed decompression_info
- * data.
- *
+ * @brief Returns total scratch space required based on computed decompression_info data.
  */
 [[nodiscard]] size_t get_decompression_scratch_size(decompression_info const& di);
 
+/**
+ * @brief Returns total scratch space required based on the compressed input data.
+ *
+ * Might launch a kernel. Should be used only if is_decompression_scratch_size_ex_supported returns
+ * true.
+ */
+[[nodiscard]] size_t get_decompression_scratch_size_ex(
+  compression_type compression,
+  device_span<device_span<uint8_t const> const> inputs,
+  size_t max_uncomp_chunk_size,
+  size_t max_total_uncomp_size,
+  rmm::cuda_stream_view stream);
+
+/**
+ * @brief Checks if the decompression scratch size can be computed using the extended API of the
+ * nvcomp library.
+ */
+[[nodiscard]] bool is_decompression_scratch_size_ex_supported(compression_type compression);
+
 /**
  * @brief Computes the uncompressed sizes of Snappy-compressed input data.
  *
diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index 2887072fce6..5dd78515bcf 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -16,9 +16,9 @@
 
 #include "nvcomp_adapter.hpp"
 
-#include "io/utilities/getenv_or.hpp"
 #include "nvcomp_adapter.cuh"
 
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/config_utils.hpp>
 #include <cudf/logger.hpp>
 #include <cudf/utilities/error.hpp>
@@ -558,6 +558,115 @@ std::optional<std::string> is_decompression_disabled_impl(compression_type compr
   return "Unsupported compression type";
 }
 
+#if NVCOMP_VER_MAJOR >= 5
+// Dispatcher for nvcompBatched<format>DecompressGetTempSizeSync
+auto batched_decompress_get_temp_size_sync(compression_type compression,
+                                           void const* const* device_compressed_chunk_ptrs,
+                                           size_t const* device_compressed_chunk_bytes,
+                                           size_t num_chunks,
+                                           size_t max_uncompressed_chunk_bytes,
+                                           size_t* temp_bytes,
+                                           size_t max_total_uncompressed_bytes,
+                                           nvcompStatus_t* device_statuses,
+                                           cudaStream_t stream)
+{
+  switch (compression) {
+    case compression_type::SNAPPY:
+      return nvcompBatchedSnappyDecompressGetTempSizeSync(device_compressed_chunk_ptrs,
+                                                          device_compressed_chunk_bytes,
+                                                          num_chunks,
+                                                          max_uncompressed_chunk_bytes,
+                                                          temp_bytes,
+                                                          max_total_uncompressed_bytes,
+                                                          nvcompBatchedSnappyDecompressDefaultOpts,
+                                                          device_statuses,
+                                                          stream);
+    case compression_type::ZSTD:
+      return nvcompBatchedZstdDecompressGetTempSizeSync(device_compressed_chunk_ptrs,
+                                                        device_compressed_chunk_bytes,
+                                                        num_chunks,
+                                                        max_uncompressed_chunk_bytes,
+                                                        temp_bytes,
+                                                        max_total_uncompressed_bytes,
+                                                        nvcompBatchedZstdDecompressDefaultOpts,
+                                                        device_statuses,
+                                                        stream);
+    case compression_type::LZ4:
+      return nvcompBatchedLZ4DecompressGetTempSizeSync(device_compressed_chunk_ptrs,
+                                                       device_compressed_chunk_bytes,
+                                                       num_chunks,
+                                                       max_uncompressed_chunk_bytes,
+                                                       temp_bytes,
+                                                       max_total_uncompressed_bytes,
+                                                       nvcompBatchedLZ4DecompressDefaultOpts,
+                                                       device_statuses,
+                                                       stream);
+    case compression_type::DEFLATE:
+      return nvcompBatchedDeflateDecompressGetTempSizeSync(
+        device_compressed_chunk_ptrs,
+        device_compressed_chunk_bytes,
+        num_chunks,
+        max_uncompressed_chunk_bytes,
+        temp_bytes,
+        max_total_uncompressed_bytes,
+        nvcompBatchedDeflateDecompressDefaultOpts,
+        device_statuses,
+        stream);
+    case compression_type::GZIP:
+      return nvcompBatchedGzipDecompressGetTempSizeSync(device_compressed_chunk_ptrs,
+                                                        device_compressed_chunk_bytes,
+                                                        num_chunks,
+                                                        max_uncompressed_chunk_bytes,
+                                                        temp_bytes,
+                                                        max_total_uncompressed_bytes,
+                                                        nvcompBatchedGzipDecompressDefaultOpts,
+                                                        device_statuses,
+                                                        stream);
+    default: UNSUPPORTED_COMPRESSION(compression);
+  }
+}
+
+#endif
+
+// Overload for internal use that takes device pointers and sizes directly
+size_t batched_decompress_temp_size_ex(compression_type compression,
+                                       device_span<void const* const> input_data_ptrs,
+                                       device_span<size_t const> input_data_sizes,
+                                       size_t max_uncomp_chunk_size,
+                                       size_t max_total_uncomp_size,
+                                       rmm::cuda_stream_view stream)
+{
+#if NVCOMP_VER_MAJOR >= 5
+  if (is_batched_decompress_temp_size_ex_supported(compression)) {
+    size_t temp_size = 0;
+    auto d_statuses  = rmm::device_uvector<nvcompStatus_t>(input_data_ptrs.size(), stream);
+    nvcompStatus_t const nvcomp_status =
+      batched_decompress_get_temp_size_sync(compression,
+                                            input_data_ptrs.data(),
+                                            input_data_sizes.data(),
+                                            input_data_ptrs.size(),
+                                            max_uncomp_chunk_size,
+                                            &temp_size,
+                                            max_total_uncomp_size,
+                                            d_statuses.data(),
+                                            stream.value());
+    if (nvcomp_status == nvcompStatus_t::nvcompSuccess) {
+      auto const h_statuses = cudf::detail::make_host_vector(d_statuses, stream);
+      auto const are_all_success =
+        std::all_of(h_statuses.begin(), h_statuses.end(), [](nvcompStatus_t status) {
+          return status == nvcompStatus_t::nvcompSuccess;
+        });
+      if (are_all_success) { return temp_size; }
+    }
+    CUDF_LOG_WARN(
+      "batched_decompress_get_temp_size_sync failed, falling back to batched_decompress_temp_size");
+  }
+#endif
+  // Fallback to the original batched decompress temp size calculation
+  return batched_decompress_temp_size(
+    compression, input_data_ptrs.size(), max_uncomp_chunk_size, max_total_uncomp_size);
+}
+
 }  // namespace
 
 size_t batched_decompress_temp_size(compression_type compression,
@@ -577,6 +686,27 @@ size_t batched_decompress_temp_size(compression_type compression,
   return temp_size;
 }
 
+bool is_batched_decompress_temp_size_ex_supported(compression_type compression)
+{
+#if NVCOMP_VER_MAJOR >= 5
+  return compression == compression_type::ZSTD;
+#else
+  return false;
+#endif
+}
+
+size_t batched_decompress_temp_size_ex(compression_type compression,
+                                       device_span<device_span<uint8_t const> const> inputs,
+                                       size_t max_uncomp_chunk_size,
+                                       size_t max_total_uncomp_size,
+                                       rmm::cuda_stream_view stream)
+{
+  auto const [d_input_ptrs, d_input_sizes] = create_get_temp_size_args(inputs, stream);
+
+  return batched_decompress_temp_size_ex(
+    compression, d_input_ptrs, d_input_sizes, max_uncomp_chunk_size, max_total_uncomp_size, stream);
+}
+
 void batched_decompress(compression_type compression,
                         device_span<device_span<uint8_t const> const> inputs,
                         device_span<device_span<uint8_t> const> outputs,
@@ -591,10 +721,16 @@ void batched_decompress(compression_type compression,
   auto const nvcomp_args = create_batched_nvcomp_args(inputs, outputs, stream);
   rmm::device_uvector<size_t> actual_uncompressed_data_sizes(num_chunks, stream);
   rmm::device_uvector<nvcompStatus_t> nvcomp_statuses(num_chunks, stream);
+
   // Temporary space required for decompression
-  auto const temp_size = batched_decompress_temp_size(
-    compression, num_chunks, max_uncomp_chunk_size, max_total_uncomp_size);
+  auto const temp_size = batched_decompress_temp_size_ex(compression,
+                                                         nvcomp_args.input_data_ptrs,
+                                                         nvcomp_args.input_data_sizes,
+                                                         max_uncomp_chunk_size,
+                                                         max_total_uncomp_size,
+                                                         stream);
   rmm::device_buffer scratch(temp_size, stream);
+
   auto const nvcomp_status = batched_decompress_async(compression,
 #if NVCOMP_VER_MAJOR >= 5
                                                       use_hw_decompression(),
diff --git a/cpp/src/io/comp/nvcomp_adapter.cu b/cpp/src/io/comp/nvcomp_adapter.cu
index 8c32ffe6c77..f699b7214ba 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cu
+++ b/cpp/src/io/comp/nvcomp_adapter.cu
@@ -60,6 +60,23 @@ batched_args create_batched_nvcomp_args(device_span<device_span<uint8_t const> c
           std::move(output_data_sizes)};
 }
 
+std::pair<rmm::device_uvector<void const*>, rmm::device_uvector<size_t>> create_get_temp_size_args(
+  device_span<device_span<uint8_t const> const> inputs, rmm::cuda_stream_view stream)
+{
+  rmm::device_uvector<void const*> input_data_ptrs(inputs.size(), stream);
+  rmm::device_uvector<size_t> input_data_sizes(inputs.size(), stream);
+
+  auto ins_it = thrust::make_zip_iterator(input_data_ptrs.begin(), input_data_sizes.begin());
+  thrust::transform(
+    rmm::exec_policy_nosync(stream),
+    inputs.begin(),
+    inputs.end(),
+    ins_it,
+    [] __device__(auto const& in) { return thrust::make_tuple(in.data(), in.size()); });
+
+  return {std::move(input_data_ptrs), std::move(input_data_sizes)};
+}
+
 void update_compression_results(device_span<nvcompStatus_t const> nvcomp_stats,
                                 device_span<size_t const> actual_output_sizes,
                                 device_span<codec_exec_result> results,
diff --git a/cpp/src/io/comp/nvcomp_adapter.cuh b/cpp/src/io/comp/nvcomp_adapter.cuh
index dd0d9cf7843..a6222d2d4b4 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cuh
+++ b/cpp/src/io/comp/nvcomp_adapter.cuh
@@ -47,6 +47,12 @@ batched_args create_batched_nvcomp_args(device_span<device_span<uint8_t const> c
                                         device_span<device_span<uint8_t> const> outputs,
                                         rmm::cuda_stream_view stream);
 
+/**
+ * @brief Prepares device arrays of input pointers and sizes for use with nvCOMP temp size APIs.
+ */
+std::pair<rmm::device_uvector<void const*>, rmm::device_uvector<size_t>> create_get_temp_size_args(
+  device_span<device_span<uint8_t const> const> inputs, rmm::cuda_stream_view stream);
+
 /**
  * @brief Convert nvcomp statuses and output sizes into cuIO compression results.
  */
diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp
index 4e586234b94..bd50faee1de 100644
--- a/cpp/src/io/comp/nvcomp_adapter.hpp
+++ b/cpp/src/io/comp/nvcomp_adapter.hpp
@@ -19,6 +19,7 @@
 #include "io/comp/compression.hpp"
 
 #include <cudf/io/nvcomp_adapter.hpp>
+#include <cudf/io/types.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -63,6 +64,30 @@ size_t batched_decompress_temp_size(compression_type compression,
                                     size_t max_uncomp_chunk_size,
                                     size_t max_total_uncomp_size);
 
+/**
+ * @brief Return the amount of temporary space required in bytes for a given decompression
+ * operation using synchronous nvcomp APIs.
+ *
+ * The size returned reflects the size of the scratch buffer to be passed to
+ * `batched_decompress_async`. This version uses the sync APIs which are more precise, but
+ * potentially require a kernel launch.
+ *
+ * @param[in] compression Compression type
+ * @param[in] inputs Device span of compressed data chunks
+ * @param[in] max_uncomp_chunk_size Maximum size of any single uncompressed chunk
+ * @param[in] max_total_uncomp_size Maximum total size of uncompressed data
+ * @param[in] stream CUDA stream to use
+ * @returns The total required size in bytes
+ */
+[[nodiscard]] size_t batched_decompress_temp_size_ex(
+  compression_type compression,
+  device_span<device_span<uint8_t const> const> inputs,
+  size_t max_uncomp_chunk_size,
+  size_t max_total_uncomp_size,
+  rmm::cuda_stream_view stream);
+
+[[nodiscard]] bool is_batched_decompress_temp_size_ex_supported(compression_type compression);
+
 /**
  * @brief Gets the maximum size any chunk could compress to in the batch.
  *
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
index 4b42ef36436..15e0ced9fa1 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -276,7 +276,11 @@ void reader_impl::setup_next_subpass(read_mode mode)
 
     // include scratch space needed for decompression. for certain codecs (eg ZSTD) this
     // can be considerable.
-    include_decompression_scratch_size(pass.chunks, pass.pages, c_info, _stream);
+    if (is_first_subpass) {
+      pass.decomp_scratch_sizes =
+        compute_decompression_scratch_sizes(pass.chunks, pass.pages, _stream);
+    }
+    include_decompression_scratch_size(pass.decomp_scratch_sizes, c_info, _stream);
 
     auto iter               = thrust::make_counting_iterator(0);
     auto const pass_max_row = pass.skip_rows + pass.num_rows;
diff --git a/cpp/src/io/parquet/reader_impl_chunking.hpp b/cpp/src/io/parquet/reader_impl_chunking.hpp
index 294eaf9ac16..660e51b38c8 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.hpp
+++ b/cpp/src/io/parquet/reader_impl_chunking.hpp
@@ -133,6 +133,7 @@ struct pass_intermediate_data {
   rmm::device_uvector<size_type> page_offsets{0, cudf::get_default_stream()};
 
   rmm::device_buffer decomp_dict_data{0, cudf::get_default_stream()};
+  rmm::device_uvector<size_t> decomp_scratch_sizes{0, cudf::get_default_stream()};
   rmm::device_uvector<string_index_pair> str_dict_index{0, cudf::get_default_stream()};
 
   int level_type_size{0};
diff --git a/cpp/src/io/parquet/reader_impl_chunking_utils.cu b/cpp/src/io/parquet/reader_impl_chunking_utils.cu
index 4ae82faf312..38fd132acf7 100644
--- a/cpp/src/io/parquet/reader_impl_chunking_utils.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking_utils.cu
@@ -37,6 +37,8 @@
 #include <thrust/transform_scan.h>
 #include <thrust/unique.h>
 
+#include <algorithm>
+#include <array>
 #include <iostream>
 #include <numeric>
 
@@ -684,14 +686,11 @@ void detect_malformed_pages(device_span<PageInfo const> pages,
   }
 }
 
-void include_decompression_scratch_size(device_span<ColumnChunkDesc const> chunks,
-                                        device_span<PageInfo const> pages,
-                                        device_span<cumulative_page_info> c_info,
-                                        rmm::cuda_stream_view stream)
+rmm::device_uvector<size_t> compute_decompression_scratch_sizes(
+  device_span<ColumnChunkDesc const> chunks,
+  device_span<PageInfo const> pages,
+  rmm::cuda_stream_view stream)
 {
-  CUDF_EXPECTS(pages.size() == c_info.size(),
-               "Encountered page/cumulative_page_info size mismatch");
-
   auto page_keys = make_page_key_iterator(pages);
 
   // per-codec page counts and decompression sizes
@@ -712,17 +711,112 @@ void include_decompression_scratch_size(device_span<ColumnChunkDesc const> chunk
     return cudf::io::detail::get_decompression_scratch_size(d);
   });
 
-  // add to the cumulative_page_info data
   rmm::device_uvector<size_t> d_temp_cost = cudf::detail::make_device_uvector_async(
     temp_cost, stream, cudf::get_current_device_resource_ref());
+
+  std::array codecs{compression_type::BROTLI,
+                    compression_type::GZIP,
+                    compression_type::LZ4,
+                    compression_type::SNAPPY,
+                    compression_type::ZSTD};
+  for (auto const codec : codecs) {
+    if (cudf::io::detail::is_decompression_scratch_size_ex_supported(codec)) {
+      auto const total_decomp_info = thrust::transform_reduce(
+        rmm::exec_policy(stream),
+        decomp_iter,
+        decomp_iter + pages.size(),
+        cuda::proclaim_return_type<decompression_info>(
+          [codec] __device__(decompression_info const& d) {
+            return d.type == codec ? d : decompression_info{codec, 0, 0, 0};
+          }),
+        decompression_info{codec, 0, 0, 0},
+        decomp_sum{});
+
+      // Collect pages with matching codecs
+      rmm::device_uvector<device_span<uint8_t const>> temp_spans(pages.size(), stream);
+      auto iter = thrust::make_counting_iterator(size_t{0});
+      thrust::for_each(
+        rmm::exec_policy_nosync(stream),
+        iter,
+        iter + pages.size(),
+        [pages      = pages.begin(),
+         chunks     = chunks.begin(),
+         temp_spans = temp_spans.begin(),
+         codec] __device__(size_t i) {
+          auto const& page = pages[i];
+          if (parquet_compression_support(chunks[page.chunk_idx].codec).first == codec) {
+            temp_spans[i] = {page.page_data, static_cast<size_t>(page.compressed_page_size)};
+          } else {
+            temp_spans[i] = {nullptr, 0};  // Mark pages with other codecs as empty
+          }
+        });
+      // Copy only non-null spans
+      rmm::device_uvector<device_span<uint8_t const>> page_spans(pages.size(), stream);
+      auto end_iter =
+        thrust::copy_if(rmm::exec_policy_nosync(stream),
+                        temp_spans.begin(),
+                        temp_spans.end(),
+                        page_spans.begin(),
+                        [] __device__(auto const& span) { return span.data() != nullptr; });
+      if (end_iter == page_spans.begin()) {
+        // No pages compressed with this codec, skip
+        continue;
+      }
+      page_spans.resize(end_iter - page_spans.begin(), stream);
+
+      auto const total_temp_size    = get_decompression_scratch_size(total_decomp_info);
+      auto const total_temp_size_ex = cudf::io::detail::get_decompression_scratch_size_ex(
+        total_decomp_info.type,
+        page_spans,
+        total_decomp_info.max_page_decompressed_size,
+        total_decomp_info.total_decompressed_size,
+        stream);
+
+      // Make use of the extended API if it provides a more accurate estimate
+      if (total_temp_size_ex < total_temp_size) {
+        // The new extended API provides a more accurate (smaller) estimate than the legacy API.
+        // We cannot efficiently use the extended API to get per-page scratch sizes, so we adjust
+        // the per-page scratch sizes to on-average reflect the better estimate. This means that
+        // the scratch size might not be accurate for each page, but it will in aggregate.
+        auto const adjustment_ratio = static_cast<double>(total_temp_size_ex) / total_temp_size;
+
+        // Apply the adjustment ratio to each page's temporary cost
+        thrust::for_each(rmm::exec_policy_nosync(stream),
+                         thrust::make_counting_iterator(size_t{0}),
+                         thrust::make_counting_iterator(pages.size()),
+                         [pages           = pages.begin(),
+                          chunks          = chunks.begin(),
+                          d_temp_cost_ptr = d_temp_cost.begin(),
+                          adjustment_ratio,
+                          codec] __device__(size_t i) {
+                           auto const page_codec =
+                             parquet_compression_support(chunks[pages[i].chunk_idx].codec).first;
+                           // Only adjust pages that use the current compression codec
+                           if (page_codec == codec) {
+                             auto const cost = d_temp_cost_ptr[i];
+                             // Scale down the cost and round up to ensure we don't underestimate
+                             auto const adjusted =
+                               static_cast<size_t>(cuda::std::ceil(cost * adjustment_ratio));
+                             d_temp_cost_ptr[i] = adjusted;
+                           }
+                         });
+      }
+    }
+  }
+  return d_temp_cost;
+}
+
+void include_decompression_scratch_size(device_span<size_t const> temp_cost,
+                                        device_span<cumulative_page_info> c_info,
+                                        rmm::cuda_stream_view stream)
+{
   auto iter = thrust::make_counting_iterator(size_t{0});
   thrust::for_each(rmm::exec_policy_nosync(stream),
                    iter,
-                   iter + pages.size(),
-                   [temp_cost = d_temp_cost.begin(), c_info = c_info.begin()] __device__(size_t i) {
+                   iter + c_info.size(),
+                   [temp_cost = temp_cost.begin(), c_info = c_info.begin()] __device__(size_t i) {
                      c_info[i].size_bytes += temp_cost[i];
                    });
-  stream.synchronize();
 }
 
 }  // namespace cudf::io::parquet::detail
diff --git a/cpp/src/io/parquet/reader_impl_chunking_utils.cuh b/cpp/src/io/parquet/reader_impl_chunking_utils.cuh
index 6a8cd599953..1101c98c86d 100644
--- a/cpp/src/io/parquet/reader_impl_chunking_utils.cuh
+++ b/cpp/src/io/parquet/reader_impl_chunking_utils.cuh
@@ -227,12 +227,19 @@ void detect_malformed_pages(device_span<PageInfo const> pages,
                             std::optional<size_t> expected_row_count,
                             rmm::cuda_stream_view stream);
 
+/**
+ * @brief Computes the per-page scratch space required for decompression.
+ */
+rmm::device_uvector<size_t> compute_decompression_scratch_sizes(
+  device_span<ColumnChunkDesc const> chunks,
+  device_span<PageInfo const> pages,
+  rmm::cuda_stream_view stream);
+
 /**
  * @brief Add the cost of decompression codec scratch space to the per-page cumulative
  * size information
  */
-void include_decompression_scratch_size(device_span<ColumnChunkDesc const> chunks,
-                                        device_span<PageInfo const> pages,
+void include_decompression_scratch_size(device_span<size_t const> pages,
                                         device_span<cumulative_page_info> c_info,
                                         rmm::cuda_stream_view stream);
 

From e431c40686cb32d698736f8beb38c52972b4741d Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Mon, 18 Aug 2025 20:11:10 +0100
Subject: [PATCH 151/366] [FEA] Add Filter Benchmark (#19678)

This merge request implements a simple benchmark to compare the performance of AST `compute_column` + `apply_boolean_mask` to a UDF-Filter.

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19678
---
 cpp/benchmarks/CMakeLists.txt           |   5 +
 cpp/benchmarks/filter/minmax_filter.cpp | 169 ++++++++++++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 cpp/benchmarks/filter/minmax_filter.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index ec1fb42cdab..9aad6a21012 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -173,6 +173,11 @@ ConfigureNVBench(NDSH_Q06_NVBENCH ndsh/q06.cpp ndsh/utilities.cpp)
 ConfigureNVBench(NDSH_Q09_NVBENCH ndsh/q09.cpp ndsh/utilities.cpp)
 ConfigureNVBench(NDSH_Q10_NVBENCH ndsh/q10.cpp ndsh/utilities.cpp)
 
+# ##################################################################################################
+# * filter benchmark -------------------------------------------------------------------
+
+ConfigureNVBench(FILTER_NVBENCH filter/minmax_filter.cpp)
+
 # ##################################################################################################
 # * stream_compaction benchmark -------------------------------------------------------------------
 ConfigureNVBench(
diff --git a/cpp/benchmarks/filter/minmax_filter.cpp b/cpp/benchmarks/filter/minmax_filter.cpp
new file mode 100644
index 00000000000..069bfe67107
--- /dev/null
+++ b/cpp/benchmarks/filter/minmax_filter.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/ast/expressions.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/transform.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <nvbench/nvbench.cuh>
+#include <nvbench/types.cuh>
+
+#include <concepts>
+#include <vector>
+
+template <typename T>
+struct benchmark_data;
+
+template <std::floating_point T>
+struct benchmark_data<T> {
+  static T dist_min() { return 0; }
+
+  static T dist_max() { return 1; }
+
+  static T filter_min() { return 0.05; }
+
+  static T filter_max() { return 0.07; }
+};
+
+template <std::integral T>
+struct benchmark_data<T> {
+  static T dist_min() { return -128; }
+
+  static T dist_max() { return 127; }
+
+  static T filter_min() { return -64; }
+
+  static T filter_max() { return 64; }
+};
+
+enum class engine_type : uint8_t { AST = 0, JIT = 1 };
+
+engine_type engine_from_string(std::string_view str)
+{
+  if (str == "ast") {
+    return engine_type::AST;
+  } else if (str == "jit") {
+    return engine_type::JIT;
+  } else {
+    CUDF_FAIL("unrecognized engine enum: " + std::string(str));
+  }
+}
+
+bool boolean_from_string(std::string_view str)
+{
+  if (str == "true") {
+    return true;
+  } else if (str == "false") {
+    return false;
+  } else {
+    CUDF_FAIL("unrecognized boolean value: " + std::string(str));
+  }
+}
+
+template <typename key_type>
+static void BM_filter_min_max(nvbench::state& state)
+{
+  auto const num_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const engine_name = state.get_string("engine");
+  auto const nullable    = boolean_from_string(state.get_string("nullable"));
+  auto const engine      = engine_from_string(engine_name);
+
+  auto profile = data_profile{};
+  profile.set_distribution_params(cudf::type_to_id<key_type>(),
+                                  distribution_id::NORMAL,
+                                  benchmark_data<key_type>::dist_min(),
+                                  benchmark_data<key_type>::dist_max());
+  profile.set_null_probability(nullable ? std::optional{0.3} : std::nullopt);
+
+  auto const column =
+    create_random_column(cudf::type_to_id<key_type>(), row_count{num_rows}, profile);
+
+  std::string type_name = cudf::type_to_name(cudf::data_type{cudf::type_to_id<key_type>()});
+
+  auto udf = std::format(
+    R"***(
+    __device__ void transform(bool * out, {0} c0, {0} min, {0} max) {{
+    *out = (c0 >= min && c0 <= max);
+    }}
+    )***",
+    type_name);
+
+  auto tree              = cudf::ast::tree{};
+  auto min_scalar        = cudf::numeric_scalar<key_type>{benchmark_data<key_type>::filter_min()};
+  auto max_scalar        = cudf::numeric_scalar<key_type>{benchmark_data<key_type>::filter_max()};
+  auto min_scalar_column = cudf::make_column_from_scalar(min_scalar, 1);
+  auto max_scalar_column = cudf::make_column_from_scalar(max_scalar, 1);
+
+  {
+    auto& column_ref  = tree.push(cudf::ast::column_reference{0});
+    auto& min_literal = tree.push(cudf::ast::literal{min_scalar});
+    auto& max_literal = tree.push(cudf::ast::literal{max_scalar});
+    auto& filter_min  = tree.push(
+      cudf::ast::operation{cudf::ast::ast_operator::GREATER_EQUAL, column_ref, min_literal});
+    auto& filter_max =
+      tree.push(cudf::ast::operation{cudf::ast::ast_operator::LESS_EQUAL, column_ref, max_literal});
+    tree.push(cudf::ast::operation{cudf::ast::ast_operator::LOGICAL_AND, filter_min, filter_max});
+  }
+
+  std::vector<cudf::column_view> filter_inputs;
+  filter_inputs.push_back(column->view());
+  filter_inputs.push_back(min_scalar_column->view());
+  filter_inputs.push_back(max_scalar_column->view());
+
+  // Use the number of bytes read from global memory
+  state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows));
+  state.add_global_memory_writes<key_type>(num_rows);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto stream = launch.get_stream().get_stream();
+    auto mr     = cudf::get_current_device_resource_ref();
+
+    switch (engine) {
+      case engine_type::AST: {
+        auto input_table          = cudf::table_view{{column->view()}};
+        auto const filter_boolean = cudf::compute_column(input_table, tree.back(), stream, mr);
+        auto const result =
+          cudf::apply_boolean_mask(input_table, filter_boolean->view(), stream, mr);
+      } break;
+      case engine_type::JIT: {
+        auto result = cudf::filter(
+          filter_inputs, udf, false, std::nullopt, std::vector{true, false, false}, stream, mr);
+      } break;
+      default: CUDF_UNREACHABLE("Unrecognised engine type requested");
+    }
+  });
+}
+
+#define FILTER_BENCHMARK_DEFINE(name, key_type)                                 \
+  static void name(::nvbench::state& st) { ::BM_filter_min_max<key_type>(st); } \
+  NVBENCH_BENCH(name)                                                           \
+    .set_name(#name)                                                            \
+    .add_string_axis("engine", {"ast", "jit"})                                  \
+    .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})  \
+    .add_string_axis("nullable", {"true", "false"})
+
+FILTER_BENCHMARK_DEFINE(filter_min_max_int32, int32_t);
+FILTER_BENCHMARK_DEFINE(filter_min_max_int64, int64_t);
+FILTER_BENCHMARK_DEFINE(filter_min_max_float32, float);
+FILTER_BENCHMARK_DEFINE(filter_min_max_float64, double);

From 36a7519b00ae47eb0222f5f18dca173152082a2f Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Mon, 18 Aug 2025 13:34:59 -0700
Subject: [PATCH 152/366] Correctly decode boolean lists in chunked parquet
 reader (#19707)

Closes #19702

This PR adds crucial bug fixes to correctly decode boolean lists in the chunked parquet reader.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Paul Mattione (https://github.com/pmattione-nvidia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19707
---
 cpp/src/io/parquet/decode_fixed.cu           | 41 ++++-----
 cpp/src/io/parquet/reader_impl_preprocess.cu |  6 +-
 cpp/tests/io/parquet_chunked_reader_test.cu  | 95 +++++++++++++++++++-
 cpp/tests/io/parquet_common.cpp              |  6 +-
 4 files changed, 119 insertions(+), 29 deletions(-)

diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu
index 077f9f80715..051dd79b5db 100644
--- a/cpp/src/io/parquet/decode_fixed.cu
+++ b/cpp/src/io/parquet/decode_fixed.cu
@@ -894,32 +894,27 @@ __device__ inline bool maybe_has_nulls(page_state_s* s)
   return run_val != s->col.max_level[lvl];
 }
 
-template <int decode_block_size_t, typename state_buf>
-inline __device__ void bool_plain_decode(page_state_s* s, state_buf* sb, int t, int to_decode)
+template <typename state_buf, typename thread_group>
+inline __device__ void bool_plain_decode(page_state_s* s,
+                                         state_buf* sb,
+                                         int target_pos,
+                                         thread_group const& group)
 {
-  int pos              = s->dict_pos;
-  int const target_pos = pos + to_decode;
-  __syncthreads();  // Make sure all threads have read dict_pos before it changes at the end.
+  int const pos = s->dict_pos;
+  int const t   = group.thread_rank();
+  // Ensure all threads have the dict_pos
+  group.sync();
 
-  while (pos < target_pos) {
-    int const batch_len = min(target_pos - pos, decode_block_size_t);
+  for (auto bit_pos = pos + t; bit_pos < target_pos; bit_pos += group.size()) {
+    int const byte_offset       = bit_pos >> 3;
+    int const bit_in_byte_index = bit_pos & 7;
 
-    if (t < batch_len) {
-      int const bit_pos           = pos + t;
-      int const byte_offset       = bit_pos >> 3;
-      int const bit_in_byte_index = bit_pos & 7;
+    uint8_t const* const read_from = s->data_start + byte_offset;
+    bool const read_bit            = (*read_from) & (1 << bit_in_byte_index);
 
-      uint8_t const* const read_from = s->data_start + byte_offset;
-      bool const read_bit            = (*read_from) & (1 << bit_in_byte_index);
-
-      int const write_to_index     = rolling_index<state_buf::dict_buf_size>(bit_pos);
-      sb->dict_idx[write_to_index] = read_bit;
-    }
-
-    pos += batch_len;
+    int const write_to_index     = rolling_index<state_buf::dict_buf_size>(bit_pos);
+    sb->dict_idx[write_to_index] = read_bit;
   }
-
-  if (t == 0) { s->dict_pos = pos; }
 }
 
 template <int rolling_buf_size, typename stream_type>
@@ -1235,7 +1230,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
       if (bools_are_rle_stream) {
         bool_stream.decode_next(t, next_valid_count - valid_count);
       } else {
-        bool_plain_decode<decode_block_size_t>(s, sb, t, next_valid_count - valid_count);
+        auto const target_pos = next_valid_count + skipped_leaf_values;
+        bool_plain_decode(s, sb, target_pos, block);
+        if (t == 0) { s->dict_pos = target_pos; }
       }
       block.sync();
     }
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 87fb1950017..1c5c1f2641c 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -606,9 +606,9 @@ void reader_impl::preprocess_subpass_pages(read_mode mode, size_t chunk_read_lim
     // subpass since we know that will safely completed.
     bool const is_list = last_chunk.max_level[level_type::REPETITION] > 0;
     // corner case: only decode up to the second-to-last row, except if this is the last page in the
-    // entire pass. this handles the case where we only have 1 chunk, 1 page, and potentially even
-    // just 1 row.
-    if (is_list && max_col_row < last_pass_row) {
+    // entire pass or if we have the page index. this handles the case where we only have 1 chunk, 1
+    // page, and potentially even just 1 row.
+    if (is_list and std::cmp_less(max_col_row, last_pass_row) and not _has_page_index) {
       // compute min row for this column in the subpass
       auto const& first_page  = subpass.pages[first_page_index];
       auto const& first_chunk = pass.chunks[first_page.chunk_idx];
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cu b/cpp/tests/io/parquet_chunked_reader_test.cu
index 52d277de869..0dbccc18230 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cu
+++ b/cpp/tests/io/parquet_chunked_reader_test.cu
@@ -1907,6 +1907,97 @@ TEST_F(ParquetChunkedReaderTest, TestNumRowsPerSourceEmptyTable)
     std::equal(expected_counts.cbegin(), expected_counts.cend(), num_rows_per_source.cbegin()));
 }
 
+TEST_F(ParquetReaderTest, BooleanList)
+{
+  std::mt19937 gen(0xcaffe);
+
+  // Parquet file
+  auto const parquet_filepath = temp_env->get_temp_filepath("BooleanList.parquet");
+  {
+    auto constexpr num_rows = num_ordered_rows;
+
+    // Validity helpers
+    std::bernoulli_distribution bn(0.7f);
+    auto valids =
+      cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+    auto list_valids =
+      cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 100; });
+
+    // str(non-nullable)
+    auto col0 = testdata::ascending<cudf::string_view>();
+
+    // list<bool(nullable)>(nullable)
+    auto bools_iter =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+    auto bools_col =
+      cudf::test::fixed_width_column_wrapper<bool>(bools_iter, bools_iter + num_rows, valids);
+    auto offsets_iter = thrust::counting_iterator<cudf::size_type>(0);
+    auto offsets_col  = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+      offsets_iter, offsets_iter + num_rows + 1);
+    auto [null_mask, null_count] =
+      cudf::test::detail::make_null_mask(list_valids, list_valids + num_rows);
+    auto _col1 = cudf::make_lists_column(
+      num_rows, offsets_col.release(), bools_col.release(), null_count, std::move(null_mask));
+    auto col1 = cudf::purge_nonempty_nulls(*_col1);
+
+    // list<list<bool(nullable)>(nullable)>(nullable)
+    auto col2 = make_parquet_list_list_col<bool>(0, num_rows, 5, 8, true);
+
+    // Input table
+    auto constexpr num_concat = 3;
+    auto table                = cudf::table_view{{col0, *col1, *col2}};
+    auto expected             = cudf::concatenate(std::vector<table_view>(num_concat, table));
+    table                     = expected->view();
+    // Write to parquet buffer
+    cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{parquet_filepath}, table)
+        // Note: The following writer options have been chosen to work with the chunked reader's
+        // empirical memory limits. We may need to adjust them in the future.
+        .row_group_size_rows(num_rows)
+        .max_page_size_rows(page_size_for_ordered_tests)
+        .compression(cudf::io::compression_type::SNAPPY)
+        .dictionary_policy(cudf::io::dictionary_policy::ALWAYS);
+
+    cudf::io::write_parquet(out_opts);
+  }
+
+  // Parquet reader options
+  cudf::io::parquet_reader_options const options =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(parquet_filepath));
+
+  // Read parquet using the chunked parquet reader
+  auto const read = [&]() {
+    // Note that this test relies on these empirically chosen memory limits to have the chunked
+    // reader create subpasses such that we end a subpass at `page.num_rows - 1` and let next
+    // subpass read that last row from that page. We may need to adjust these limits in the future
+    // to hit this edge case in chunking.
+    auto constexpr input_mem_limit  = 102400;
+    auto constexpr output_mem_limit = 0;
+    auto reader = cudf::io::chunked_parquet_reader(output_mem_limit, input_mem_limit, options);
+
+    auto table_chunks = std::vector<std::unique_ptr<cudf::table>>();
+    while (reader.has_next()) {
+      auto chunk = reader.read_chunk();
+      table_chunks.push_back(std::move(chunk.tbl));
+    }
+
+    // Commented as future changes to chunking logic may change the number of table_chunks
+    // EXPECT_GT(table_chunks.size(), 1);
+
+    auto out_tviews = std::vector<cudf::table_view>{};
+    for (auto const& tbl : table_chunks) {
+      out_tviews.emplace_back(tbl->view());
+    }
+
+    return cudf::concatenate(out_tviews);
+  }();
+
+  // Read using the main parquet reader
+  auto const expected = cudf::io::read_parquet(options).tbl;
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), read->view());
+}
+
 TEST_F(ParquetReaderTest, ManyLargeLists)
 {
   auto const stream = cudf::get_default_stream();
@@ -1968,8 +2059,8 @@ TEST_F(ParquetReaderTest, ManyLargeLists)
       EXPECT_NO_THROW(std::ignore = reader.read_chunk());
       num_chunks++;
     }
-    // We will end up with exactly two chunks as the total number of leaf rows is just above 2B rows
-    // per table chunk limit and we haven't set any chunk or pass read limits
+    // We will end up with exactly two chunks as the total number of leaf rows is just above 2B
+    // rows per table chunk limit and we haven't set any chunk or pass read limits
     EXPECT_EQ(num_chunks, 2);
   };
 
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index f0be469e5ed..7fd9f474723 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -139,8 +139,8 @@ std::unique_ptr<cudf::column> make_parquet_list_list_col(
 
   // child values
   std::vector<T> child_values(num_rows * lists_per_row * list_size);
-  T first_child_value_index = skip_rows * lists_per_row * list_size;
-  int child_value_count     = 0;
+  auto first_child_value_index = skip_rows * lists_per_row * list_size;
+  int child_value_count        = 0;
   {
     for (int idx = 0; idx < (num_rows * lists_per_row * list_size); idx++) {
       int row_index = idx / (lists_per_row * list_size);
@@ -178,6 +178,8 @@ std::unique_ptr<cudf::column> make_parquet_list_list_col(
 
 template std::unique_ptr<cudf::column> make_parquet_list_list_col<int>(
   int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity);
+template std::unique_ptr<cudf::column> make_parquet_list_list_col<bool>(
+  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity);
 
 template <typename T>
 std::vector<T> random_values(size_t size)

From 15031459759977f6a94feecfb37ad22706b0ba0a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Aug 2025 16:09:59 -0700
Subject: [PATCH 153/366] Add streams to reshape (#19728)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19728
---
 python/pylibcudf/pylibcudf/libcudf/reshape.pxd |  7 +++++--
 python/pylibcudf/pylibcudf/reshape.pxd         |  4 ++--
 python/pylibcudf/pylibcudf/reshape.pyi         | 10 +++++++---
 python/pylibcudf/pylibcudf/reshape.pyx         | 18 ++++++++++++------
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
index 84fcd7cdaa8..f5024648137 100644
--- a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd
@@ -16,10 +16,13 @@ cdef extern from "cuda/functional" namespace "cuda::std":
 
 cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] interleave_columns(
-        table_view source_table
+        table_view source_table,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[table] tile(
-        table_view source_table, size_type count
+        table_view source_table,
+        size_type count,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef void table_to_array(
         table_view input_table,
diff --git a/python/pylibcudf/pylibcudf/reshape.pxd b/python/pylibcudf/pylibcudf/reshape.pxd
index efb7217f0e8..ba61a06a7b8 100644
--- a/python/pylibcudf/pylibcudf/reshape.pxd
+++ b/python/pylibcudf/pylibcudf/reshape.pxd
@@ -14,8 +14,8 @@ from .table cimport Table
 from .types cimport DataType
 
 
-cpdef Column interleave_columns(Table source_table)
-cpdef Table tile(Table source_table, size_type count)
+cpdef Column interleave_columns(Table source_table, Stream stream=*)
+cpdef Table tile(Table source_table, size_type count, Stream stream=*)
 cpdef void table_to_array(
     Table input_table,
     uintptr_t ptr,
diff --git a/python/pylibcudf/pylibcudf/reshape.pyi b/python/pylibcudf/pylibcudf/reshape.pyi
index 8f2e33903af..4d8d0c16ded 100644
--- a/python/pylibcudf/pylibcudf/reshape.pyi
+++ b/python/pylibcudf/pylibcudf/reshape.pyi
@@ -1,12 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
-def interleave_columns(source_table: Table) -> Column: ...
-def tile(source_table: Table, count: int) -> Table: ...
+def interleave_columns(
+    source_table: Table, stream: Stream | None = None
+) -> Column: ...
+def tile(
+    source_table: Table, count: int, stream: Stream | None = None
+) -> Table: ...
 def table_to_array(
     input_table: Table,
     ptr: int,
diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx
index 0ebe61af713..a8ffa348eef 100644
--- a/python/pylibcudf/pylibcudf/reshape.pyx
+++ b/python/pylibcudf/pylibcudf/reshape.pyx
@@ -25,7 +25,7 @@ from .utils cimport _get_stream
 
 __all__ = ["interleave_columns", "tile", "table_to_array"]
 
-cpdef Column interleave_columns(Table source_table):
+cpdef Column interleave_columns(Table source_table, Stream stream=None):
     """Interleave columns of a table into a single column.
 
     Converts the column major table `input` into a row major column.
@@ -40,6 +40,8 @@ cpdef Column interleave_columns(Table source_table):
     ----------
     source_table: Table
         The input table to interleave
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -47,14 +49,15 @@ cpdef Column interleave_columns(Table source_table):
         A new column which is the result of interleaving the input columns
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_interleave_columns(source_table.view())
+        c_result = cpp_interleave_columns(source_table.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Table tile(Table source_table, size_type count):
+cpdef Table tile(Table source_table, size_type count, Stream stream=None):
     """Repeats the rows from input table count times to form a new table.
 
     For details, see :cpp:func:`tile`.
@@ -65,6 +68,8 @@ cpdef Table tile(Table source_table, size_type count):
         The input table containing rows to be repeated
     count: size_type
         The number of times to tile "rows". Must be non-negative
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -72,11 +77,12 @@ cpdef Table tile(Table source_table, size_type count):
         The table containing the tiled "rows"
     """
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_tile(source_table.view(), count)
+        c_result = cpp_tile(source_table.view(), count, stream.view())
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
 cpdef void table_to_array(

From 7fbce137570ff1a66b112f4c96e29e4c286e32c5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Aug 2025 16:10:06 -0700
Subject: [PATCH 154/366] Add streams to null mask APIs (#19727)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19727
---
 .../pylibcudf/pylibcudf/libcudf/null_mask.pxd | 16 +++--
 python/pylibcudf/pylibcudf/null_mask.pxd      | 16 +++--
 python/pylibcudf/pylibcudf/null_mask.pyi      | 23 +++++--
 python/pylibcudf/pylibcudf/null_mask.pyx      | 60 ++++++++++++++-----
 4 files changed, 83 insertions(+), 32 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
index 5b49ddc3bbe..bb53d5716d8 100644
--- a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp.pair cimport pair
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -7,11 +7,13 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
     cdef device_buffer copy_bitmask "cudf::copy_bitmask" (
-        column_view view
+        column_view view,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef size_t bitmask_allocation_size_bytes (
@@ -25,19 +27,23 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
 
     cdef device_buffer create_null_mask (
         size_type size,
-        mask_state state
+        mask_state state,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef pair[device_buffer, size_type] bitmask_and(
-        table_view view
+        table_view view,
+        cuda_stream_view stream
     )
 
     cdef pair[device_buffer, size_type] bitmask_or(
-        table_view view
+        table_view view,
+        cuda_stream_view stream
     )
 
     cdef size_type null_count(
         const bitmask_type * bitmask,
         size_type start,
         size_type stop,
+        cuda_stream_view stream
     )
diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd
index bd6969cc415..96aa80b4b4f 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/null_mask.pxd
@@ -5,18 +5,24 @@ from pylibcudf.libcudf.types cimport mask_state, size_type
 from pylibcudf.gpumemoryview cimport gpumemoryview
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 
 
-cpdef DeviceBuffer copy_bitmask(Column col)
+cpdef DeviceBuffer copy_bitmask(Column col, Stream stream=*)
 
 cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits)
 
-cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)
+cpdef DeviceBuffer create_null_mask(size_type size, mask_state state=*, Stream stream=*)
 
-cpdef tuple bitmask_and(list columns)
+cpdef tuple bitmask_and(list columns, Stream stream=*)
 
-cpdef tuple bitmask_or(list columns)
+cpdef tuple bitmask_or(list columns, Stream stream=*)
 
-cpdef size_type null_count(gpumemoryview bitmask, size_type start, size_type stop)
+cpdef size_type null_count(
+    gpumemoryview bitmask,
+    size_type start,
+    size_type stop,
+    Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi
index 524b3d432b2..d3e6d9e8642 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pyi
+++ b/python/pylibcudf/pylibcudf/null_mask.pyi
@@ -1,16 +1,27 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from rmm.pylibrmm.device_buffer import DeviceBuffer
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.gpumemoryview import gpumemoryview
 from pylibcudf.types import MaskState
 
-def copy_bitmask(col: Column) -> DeviceBuffer: ...
+def copy_bitmask(
+    col: Column, stream: Stream | None = None
+) -> DeviceBuffer: ...
 def bitmask_allocation_size_bytes(number_of_bits: int) -> int: ...
 def create_null_mask(
-    size: int, state: MaskState = MaskState.UNINITIALIZED
+    size: int,
+    state: MaskState = MaskState.UNINITIALIZED,
+    stream: Stream | None = None,
 ) -> DeviceBuffer: ...
-def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
-def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
-def null_count(bitmask: gpumemoryview, start: int, stop: int) -> int: ...
+def bitmask_and(
+    columns: list[Column], stream: Stream | None = None
+) -> tuple[DeviceBuffer, int]: ...
+def bitmask_or(
+    columns: list[Column], stream: Stream | None = None
+) -> tuple[DeviceBuffer, int]: ...
+def null_count(
+    bitmask: gpumemoryview, start: int, stop: int, stream: Stream | None = None
+) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx
index 8a38563481a..c06ce424f26 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pyx
+++ b/python/pylibcudf/pylibcudf/null_mask.pyx
@@ -8,11 +8,13 @@ from pylibcudf.gpumemoryview cimport gpumemoryview
 
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint
 
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
 __all__ = [
     "bitmask_allocation_size_bytes",
@@ -23,11 +25,11 @@ __all__ = [
     "null_count",
 ]
 
-cdef DeviceBuffer buffer_to_python(device_buffer buf):
-    return DeviceBuffer.c_from_unique_ptr(make_unique[device_buffer](move(buf)))
+cdef DeviceBuffer buffer_to_python(device_buffer buf, Stream stream):
+    return DeviceBuffer.c_from_unique_ptr(make_unique[device_buffer](move(buf)), stream)
 
 
-cpdef DeviceBuffer copy_bitmask(Column col):
+cpdef DeviceBuffer copy_bitmask(Column col, Stream stream=None):
     """Copies ``col``'s bitmask into a ``DeviceBuffer``.
 
     For details, see :cpp:func:`copy_bitmask`.
@@ -36,6 +38,8 @@ cpdef DeviceBuffer copy_bitmask(Column col):
     ----------
     col : Column
         Column whose bitmask needs to be copied
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -44,11 +48,12 @@ cpdef DeviceBuffer copy_bitmask(Column col):
         ``DeviceBuffer`` if ``col`` is not nullable
     """
     cdef device_buffer db
+    stream = _get_stream(stream)
 
     with nogil:
-        db = cpp_null_mask.copy_bitmask(col.view())
+        db = cpp_null_mask.copy_bitmask(col.view(), stream.view())
 
-    return buffer_to_python(move(db))
+    return buffer_to_python(move(db), stream)
 
 cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
     """
@@ -73,7 +78,8 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
 
 cpdef DeviceBuffer create_null_mask(
     size_type size,
-    mask_state state = mask_state.UNINITIALIZED
+    mask_state state = mask_state.UNINITIALIZED,
+    Stream stream=None
 ):
     """Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a
     ``Column``.
@@ -88,6 +94,8 @@ cpdef DeviceBuffer create_null_mask(
         The desired state of the mask. Can be one of { MaskState.UNALLOCATED,
         MaskState.UNINITIALIZED, MaskState.ALL_VALID, MaskState.ALL_NULL }
         (default MaskState.UNINITIALIZED)
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -96,14 +104,15 @@ cpdef DeviceBuffer create_null_mask(
         state
     """
     cdef device_buffer db
+    stream = _get_stream(stream)
 
     with nogil:
-        db = cpp_null_mask.create_null_mask(size, state)
+        db = cpp_null_mask.create_null_mask(size, state, stream.view())
 
-    return buffer_to_python(move(db))
+    return buffer_to_python(move(db), stream)
 
 
-cpdef tuple bitmask_and(list columns):
+cpdef tuple bitmask_and(list columns, Stream stream=None):
     """Performs bitwise AND of the bitmasks of a list of columns.
 
     For details, see :cpp:func:`bitmask_and`.
@@ -112,6 +121,8 @@ cpdef tuple bitmask_and(list columns):
     ----------
     columns : list
         The list of columns
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -120,14 +131,15 @@ cpdef tuple bitmask_and(list columns):
     """
     cdef Table c_table = Table(columns)
     cdef pair[device_buffer, size_type] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_null_mask.bitmask_and(c_table.view())
+        c_result = cpp_null_mask.bitmask_and(c_table.view(), stream.view())
 
-    return buffer_to_python(move(c_result.first)), c_result.second
+    return buffer_to_python(move(c_result.first), stream), c_result.second
 
 
-cpdef tuple bitmask_or(list columns):
+cpdef tuple bitmask_or(list columns, Stream stream=None):
     """Performs bitwise OR of the bitmasks of a list of columns.
 
     For details, see :cpp:func:`bitmask_or`.
@@ -136,6 +148,8 @@ cpdef tuple bitmask_or(list columns):
     ----------
     columns : list
         The list of columns
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -144,14 +158,20 @@ cpdef tuple bitmask_or(list columns):
     """
     cdef Table c_table = Table(columns)
     cdef pair[device_buffer, size_type] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_null_mask.bitmask_or(c_table.view())
+        c_result = cpp_null_mask.bitmask_or(c_table.view(), stream.view())
 
-    return buffer_to_python(move(c_result.first)), c_result.second
+    return buffer_to_python(move(c_result.first), stream), c_result.second
 
 
-cpdef size_type null_count(gpumemoryview bitmask, size_type start, size_type stop):
+cpdef size_type null_count(
+    gpumemoryview bitmask,
+    size_type start,
+    size_type stop,
+    Stream stream=None
+):
     """Given a validity bitmask, counts the number of null elements.
 
     For details, see :cpp:func:`null_count`.
@@ -164,11 +184,19 @@ cpdef size_type null_count(gpumemoryview bitmask, size_type start, size_type sto
         Index of the first bit to count (inclusive).
     stop : int
         Index of the last bit to count (exclusive).
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
     int
         The number of null elements in the specified range.
     """
+    stream = _get_stream(stream)
     with nogil:
-        return cpp_null_mask.null_count(<bitmask_type*>(bitmask.ptr), start, stop)
+        return cpp_null_mask.null_count(
+            <bitmask_type*>(bitmask.ptr),
+            start,
+            stop,
+            stream.view()
+        )

From 79ec4f28e676ce4717f3116dc9997c6278a15c1c Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Tue, 19 Aug 2025 00:15:40 +0100
Subject: [PATCH 155/366] [FEA] Implement null-aware transforms and filters
 (#19502)

This merge request implements null-aware transforms where the user can decide the nullness of the output and predicate on the nullness of the inputs.
It also implements null-aware filters to allow UDFs to predicate on the null-ness of the inputs.
This enables operators like `IS_NULL` and `NULL_EQUAL`.

Follows-up: https://github.com/rapidsai/cudf/issues/18023
Closes: https://github.com/rapidsai/cudf/issues/18820

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19502
---
 cpp/benchmarks/ndsh/q09.cpp                   |   1 +
 cpp/benchmarks/transform/polynomials.cpp      |   1 +
 cpp/benchmarks/transform/transform.cpp        |   1 +
 .../compute_checksum_jit.cpp                  |  10 +-
 .../string_transforms/extract_email_jit.cpp   |  10 +-
 .../string_transforms/format_phone_jit.cpp    |   1 +
 .../string_transforms/localize_phone_jit.cpp  |   1 +
 cpp/include/cudf/stream_compaction.hpp        |   2 +
 cpp/include/cudf/transform.hpp                |   2 +
 cpp/include/cudf/types.hpp                    |   8 +-
 cpp/src/jit/accessors.cuh                     |  50 +++----
 cpp/src/jit/helpers.hpp                       |   2 +
 cpp/src/jit/span.cuh                          |  14 ++
 cpp/src/stream_compaction/filter/filter.cu    |  14 +-
 .../stream_compaction/filter/jit/kernel.cu    |  20 ++-
 cpp/src/transform/jit/kernel.cu               |  61 ++++++---
 cpp/src/transform/transform.cpp               |  38 +++++-
 cpp/tests/filter/filter_test.cpp              |  60 ++++++---
 cpp/tests/streams/transform_test.cpp          |   1 +
 .../integration/unary_transform_test.cpp      | 124 +++++++++++++++---
 .../pylibcudf/pylibcudf/libcudf/transform.pxd |   3 +-
 python/pylibcudf/pylibcudf/libcudf/types.pxd  |   4 +
 python/pylibcudf/pylibcudf/transform.pxd      |   3 +-
 python/pylibcudf/pylibcudf/transform.pyi      |   3 +-
 python/pylibcudf/pylibcudf/transform.pyx      |   8 +-
 python/pylibcudf/pylibcudf/types.pxd          |   1 +
 python/pylibcudf/pylibcudf/types.pyi          |   4 +
 python/pylibcudf/pylibcudf/types.pyx          |   3 +
 python/pylibcudf/tests/test_transform.py      |   1 +
 29 files changed, 349 insertions(+), 102 deletions(-)

diff --git a/cpp/benchmarks/ndsh/q09.cpp b/cpp/benchmarks/ndsh/q09.cpp
index 15dbcbd485e..28063533f16 100644
--- a/cpp/benchmarks/ndsh/q09.cpp
+++ b/cpp/benchmarks/ndsh/q09.cpp
@@ -161,6 +161,7 @@ struct q9_data {
                          cudf::data_type{cudf::type_id::FLOAT64},
                          false,
                          std::nullopt,
+                         cudf::null_aware::NO,
                          stream,
                          mr);
 }
diff --git a/cpp/benchmarks/transform/polynomials.cpp b/cpp/benchmarks/transform/polynomials.cpp
index a3e09535fa3..5b03ae1dfbe 100644
--- a/cpp/benchmarks/transform/polynomials.cpp
+++ b/cpp/benchmarks/transform/polynomials.cpp
@@ -95,6 +95,7 @@ static void BM_transform_polynomials(nvbench::state& state)
                     cudf::data_type{cudf::type_to_id<key_type>()},
                     false,
                     std::nullopt,
+                    cudf::null_aware::NO,
                     launch.get_stream().get_stream());
   });
 }
diff --git a/cpp/benchmarks/transform/transform.cpp b/cpp/benchmarks/transform/transform.cpp
index f487f191d94..a404a9e9582 100644
--- a/cpp/benchmarks/transform/transform.cpp
+++ b/cpp/benchmarks/transform/transform.cpp
@@ -101,6 +101,7 @@ static void BM_transform(nvbench::state& state)
                     cudf::data_type{cudf::type_to_id<key_type>()},
                     false,
                     std::nullopt,
+                    cudf::null_aware::NO,
                     launch.get_stream().get_stream());
   });
 }
diff --git a/cpp/examples/string_transforms/compute_checksum_jit.cpp b/cpp/examples/string_transforms/compute_checksum_jit.cpp
index f1fbc289406..e23729a930d 100644
--- a/cpp/examples/string_transforms/compute_checksum_jit.cpp
+++ b/cpp/examples/string_transforms/compute_checksum_jit.cpp
@@ -49,8 +49,14 @@ std::tuple<std::unique_ptr<cudf::column>, std::vector<int32_t>> transform(
   auto name        = table.column(0);
   auto email       = table.column(1);
 
-  auto result = cudf::transform(
-    {name, email}, udf, cudf::data_type{cudf::type_id::UINT16}, false, std::nullopt, stream, mr);
+  auto result = cudf::transform({name, email},
+                                udf,
+                                cudf::data_type{cudf::type_id::UINT16},
+                                false,
+                                std::nullopt,
+                                cudf::null_aware::NO,
+                                stream,
+                                mr);
 
   return std::make_tuple(std::move(result), transformed);
 }
diff --git a/cpp/examples/string_transforms/extract_email_jit.cpp b/cpp/examples/string_transforms/extract_email_jit.cpp
index 7a20946dcd5..686dc5814b3 100644
--- a/cpp/examples/string_transforms/extract_email_jit.cpp
+++ b/cpp/examples/string_transforms/extract_email_jit.cpp
@@ -64,8 +64,14 @@ __device__ void email_provider(cudf::string_view* out,
   auto transformed = std::vector<int32_t>{1};
   auto emails      = table.column(1);
 
-  auto providers = cudf::transform(
-    {emails, *alt}, udf, cudf::data_type{cudf::type_id::STRING}, false, std::nullopt, stream, mr);
+  auto providers = cudf::transform({emails, *alt},
+                                   udf,
+                                   cudf::data_type{cudf::type_id::STRING},
+                                   false,
+                                   std::nullopt,
+                                   cudf::null_aware::NO,
+                                   stream,
+                                   mr);
 
   return {std::move(providers), std::move(transformed)};
 }
diff --git a/cpp/examples/string_transforms/format_phone_jit.cpp b/cpp/examples/string_transforms/format_phone_jit.cpp
index 680d82a489b..dfbf106fec4 100644
--- a/cpp/examples/string_transforms/format_phone_jit.cpp
+++ b/cpp/examples/string_transforms/format_phone_jit.cpp
@@ -132,6 +132,7 @@ __device__ void e164_format(void* scratch,
                     cudf::data_type{cudf::type_id::STRING},
                     false,
                     scratch.data(),
+                    cudf::null_aware::NO,
                     stream,
                     mr);
 
diff --git a/cpp/examples/string_transforms/localize_phone_jit.cpp b/cpp/examples/string_transforms/localize_phone_jit.cpp
index 1c065ca11f0..cd720ec47d0 100644
--- a/cpp/examples/string_transforms/localize_phone_jit.cpp
+++ b/cpp/examples/string_transforms/localize_phone_jit.cpp
@@ -155,6 +155,7 @@ __device__ void format_phone(void* scratch,
                                 cudf::data_type{cudf::type_id::STRING},
                                 false,
                                 scratch.data(),
+                                cudf::null_aware::NO,
                                 stream,
                                 mr);
 
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index a956905b6d6..8ac3d7bb2e2 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -451,6 +451,7 @@ cudf::size_type distinct_count(table_view const& input,
  * @param user_data     User-defined device data to pass to the UDF.
  * @param copy_mask     Optional vector of booleans indicating which columns to copy from the input
  *                      columns to the output. If not provided, all columns are copied.
+ * @param is_null_aware Signifies the UDF will receive row inputs as optional values
  * @param stream        CUDA stream used for device memory operations and kernel launches
  * @param mr            Device memory resource used to allocate the returned column's device memory
  * @return              The filtered target columns
@@ -461,6 +462,7 @@ std::vector<std::unique_ptr<column>> filter(
   bool is_ptx,
   std::optional<void*> user_data             = std::nullopt,
   std::optional<std::vector<bool>> copy_mask = std::nullopt,
+  null_aware is_null_aware                   = null_aware::NO,
   rmm::cuda_stream_view stream               = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr          = cudf::get_current_device_resource_ref());
 
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index 49bf3114778..c49392c92aa 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -56,6 +56,7 @@ namespace CUDF_EXPORT cudf {
  * @param output_type   The output type that is compatible with the output type in the UDF
  * @param is_ptx        true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
  * @param user_data     User-defined device data to pass to the UDF.
+ * @param is_null_aware Signifies the UDF will receive row inputs as optional values
  * @param stream        CUDA stream used for device memory operations and kernel launches
  * @param mr            Device memory resource used to allocate the returned column's device memory
  * @return              The column resulting from applying the transform function to
@@ -67,6 +68,7 @@ std::unique_ptr<column> transform(
   data_type output_type,
   bool is_ptx,
   std::optional<void*> user_data    = std::nullopt,
+  null_aware is_null_aware          = null_aware::NO,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 9443bd5cb52..408594bf080 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -234,6 +234,12 @@ enum class type_id : int32_t {
   NUM_TYPE_IDS  ///< Total number of type ids
 };
 
+/// @brief Indicates whether a function is null-aware or not.
+enum class null_aware : bool {
+  NO  = 0,  ///< The function is not null-aware
+  YES = 1   ///< The function is null-aware
+};
+
 /**
  * @brief Indicator for the logical data type of an element in a column.
  *
diff --git a/cpp/src/jit/accessors.cuh b/cpp/src/jit/accessors.cuh
index adfb9ea7328..a8e68d49e44 100644
--- a/cpp/src/jit/accessors.cuh
+++ b/cpp/src/jit/accessors.cuh
@@ -19,6 +19,7 @@
 #include <cudf/types.hpp>
 
 #include <cuda/std/cstddef>
+#include <cuda/std/optional>
 
 #include <jit/span.cuh>
 
@@ -32,16 +33,10 @@ struct column_accessor {
   using type                     = T;
   static constexpr int32_t index = Index;
 
-  static __device__ decltype(auto) element(cudf::mutable_column_device_view_core const* outputs,
-                                           cudf::size_type row)
+  template <typename ColumnView>
+  static __device__ decltype(auto) element(ColumnView const* columns, cudf::size_type row)
   {
-    return outputs[index].element<T>(row);
-  }
-
-  static __device__ decltype(auto) element(cudf::column_device_view_core const* inputs,
-                                           cudf::size_type row)
-  {
-    return inputs[index].element<T>(row);
+    return columns[index].template element<T>(row);
   }
 
   static __device__ void assign(cudf::mutable_column_device_view_core const* outputs,
@@ -51,15 +46,18 @@ struct column_accessor {
     outputs[index].assign<T>(row, value);
   }
 
-  static __device__ bool is_null(cudf::column_device_view_core const* inputs, cudf::size_type row)
+  template <typename ColumnView>
+  static __device__ bool is_null(ColumnView const* inputs, cudf::size_type row)
   {
     return inputs[index].is_null(row);
   }
 
-  static __device__ bool is_null(cudf::mutable_column_device_view_core const* inputs,
-                                 cudf::size_type row)
+  template <typename ColumnView>
+  static __device__ cuda::std::optional<T> nullable_element(ColumnView const* outputs,
+                                                            cudf::size_type row)
   {
-    return inputs[index].is_null(row);
+    if (is_null(outputs, row)) { return cuda::std::nullopt; }
+    return outputs[index].template element<T>(row);
   }
 };
 
@@ -86,6 +84,13 @@ struct span_accessor {
   {
     return inputs[index].is_null(row);
   }
+
+  static __device__ cuda::std::optional<T> nullable_element(
+    cudf::jit::device_optional_span<T> const* outputs, cudf::size_type row)
+  {
+    if (is_null(outputs, row)) { return cuda::std::nullopt; }
+    return outputs[index].element(row);
+  }
 };
 
 template <typename Accessor>
@@ -93,18 +98,12 @@ struct scalar_accessor {
   using type                     = typename Accessor::type;
   static constexpr int32_t index = Accessor::index;
 
-  static __device__ decltype(auto) element(cudf::mutable_column_device_view_core const* outputs,
-                                           cudf::size_type)
+  template <typename ColumnView>
+  static __device__ decltype(auto) element(ColumnView const* outputs, cudf::size_type)
   {
     return Accessor::element(outputs, 0);
   }
 
-  static __device__ decltype(auto) element(cudf::column_device_view_core const* inputs,
-                                           cudf::size_type)
-  {
-    return Accessor::element(inputs, 0);
-  }
-
   static __device__ void assign(cudf::mutable_column_device_view_core const* outputs,
                                 cudf::size_type,
                                 type value)
@@ -112,15 +111,16 @@ struct scalar_accessor {
     return Accessor::assign(outputs, 0, value);
   }
 
-  static __device__ bool is_null(cudf::column_device_view_core const* inputs, cudf::size_type)
+  template <typename ColumnView>
+  static __device__ bool is_null(ColumnView const* inputs, cudf::size_type)
   {
     return Accessor::is_null(inputs, 0);
   }
 
-  static __device__ bool is_null(cudf::mutable_column_device_view_core const* inputs,
-                                 cudf::size_type)
+  template <typename ColumnView>
+  static __device__ decltype(auto) nullable_element(ColumnView const* outputs, cudf::size_type)
   {
-    return Accessor::is_null(inputs, 0);
+    return Accessor::nullable_element(outputs, 0);
   }
 };
 
diff --git a/cpp/src/jit/helpers.hpp b/cpp/src/jit/helpers.hpp
index 65e28e9c39a..e8aa089fa3c 100644
--- a/cpp/src/jit/helpers.hpp
+++ b/cpp/src/jit/helpers.hpp
@@ -48,6 +48,7 @@ struct input_column_reflection {
 
 jitify2::StringVec build_jit_template_params(
   bool has_user_data,
+  null_aware is_null_aware,
   std::vector<std::string> const& span_outputs,
   std::vector<std::string> const& column_outputs,
   std::vector<input_column_reflection> const& column_inputs)
@@ -55,6 +56,7 @@ jitify2::StringVec build_jit_template_params(
   jitify2::StringVec tparams;
 
   tparams.emplace_back(jitify2::reflection::reflect(has_user_data));
+  tparams.emplace_back(jitify2::reflection::reflect(is_null_aware == null_aware::YES));
 
   std::transform(thrust::counting_iterator<size_t>(0),
                  thrust::counting_iterator(span_outputs.size()),
diff --git a/cpp/src/jit/span.cuh b/cpp/src/jit/span.cuh
index 07f91d8a06a..f26bbf3c4f6 100644
--- a/cpp/src/jit/span.cuh
+++ b/cpp/src/jit/span.cuh
@@ -154,6 +154,20 @@ struct device_optional_span : device_span<T> {
     return !is_valid(element_index);
   }
 
+  CUDF_HOST_DEVICE constexpr T& element(size_t idx) const { return base::operator[](idx); }
+
+  /// @copydoc column_device_view::element
+  __device__ void set_valid(size_type element_index) const noexcept
+  {
+    return set_bit(_null_mask, element_index);
+  }
+
+  /// @copydoc column_device_view::set_null
+  __device__ void set_null(size_type element_index) const noexcept
+  {
+    return clear_bit(_null_mask, element_index);
+  }
+
   /// @brief converts the optional span to a regular non-nullable span.
   [[nodiscard]] __device__ base to_span() const noexcept { return static_cast<base const&>(*this); }
 
diff --git a/cpp/src/stream_compaction/filter/filter.cu b/cpp/src/stream_compaction/filter/filter.cu
index a518b566c97..79f17d7d7c9 100644
--- a/cpp/src/stream_compaction/filter/filter.cu
+++ b/cpp/src/stream_compaction/filter/filter.cu
@@ -218,11 +218,15 @@ jitify2::ConfiguredKernel build_kernel(std::string const& kernel_name,
                                        std::vector<std::string> const& span_outputs,
                                        std::vector<column_view> const& input_columns,
                                        bool has_user_data,
+                                       null_aware is_null_aware,
                                        std::string const& udf,
                                        bool is_ptx,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
+  CUDF_EXPECTS(!(is_null_aware == null_aware::YES && is_ptx),
+               "Optional types are not supported in PTX UDFs",
+               std::invalid_argument);
   auto const cuda_source =
     is_ptx ? cudf::jit::parse_single_function_ptx(
                udf,
@@ -234,6 +238,7 @@ jitify2::ConfiguredKernel build_kernel(std::string const& kernel_name,
   return get_kernel(jitify2::reflection::Template(kernel_name)
                       .instantiate(cudf::jit::build_jit_template_params(
                         has_user_data,
+                        is_null_aware,
                         span_outputs,
                         {},
                         cudf::jit::reflect_input_columns(base_column_size, input_columns))),
@@ -247,6 +252,7 @@ std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
                                                       bool is_ptx,
                                                       std::optional<void*> user_data,
                                                       std::optional<std::vector<bool>> copy_mask,
+                                                      null_aware is_null_aware,
                                                       rmm::cuda_stream_view stream,
                                                       rmm::device_async_resource_ref mr)
 {
@@ -258,6 +264,7 @@ std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
                              {"cudf::size_type"},
                              columns,
                              user_data.has_value(),
+                             is_null_aware,
                              predicate_udf,
                              is_ptx,
                              stream,
@@ -309,6 +316,7 @@ std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& colu
                                             bool is_ptx,
                                             std::optional<void*> user_data,
                                             std::optional<std::vector<bool>> copy_mask,
+                                            null_aware is_null_aware,
                                             rmm::cuda_stream_view stream,
                                             rmm::device_async_resource_ref mr)
 {
@@ -320,7 +328,7 @@ std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& colu
   perform_checks(*base_column, columns, copy_mask);
 
   auto filtered = filter_operation(
-    *base_column, columns, predicate_udf, is_ptx, user_data, copy_mask, stream, mr);
+    *base_column, columns, predicate_udf, is_ptx, user_data, copy_mask, is_null_aware, stream, mr);
 
   return filtered;
 }
@@ -332,11 +340,13 @@ std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& colu
                                             bool is_ptx,
                                             std::optional<void*> user_data,
                                             std::optional<std::vector<bool>> copy_mask,
+                                            null_aware is_null_aware,
                                             rmm::cuda_stream_view stream,
                                             rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::filter(columns, predicate_udf, is_ptx, user_data, copy_mask, stream, mr);
+  return detail::filter(
+    columns, predicate_udf, is_ptx, user_data, copy_mask, is_null_aware, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/filter/jit/kernel.cu b/cpp/src/stream_compaction/filter/jit/kernel.cu
index 07aeebee688..c43c896ef86 100644
--- a/cpp/src/stream_compaction/filter/jit/kernel.cu
+++ b/cpp/src/stream_compaction/filter/jit/kernel.cu
@@ -36,7 +36,7 @@ namespace cudf {
 namespace filtering {
 namespace jit {
 
-template <bool has_user_data, typename Out, typename... In>
+template <bool has_user_data, bool is_null_aware, typename Out, typename... In>
 CUDF_KERNEL void kernel(cudf::jit::device_optional_span<typename Out::type> const* outputs,
                         cudf::column_device_view_core const* inputs,
                         void* user_data)
@@ -51,15 +51,23 @@ CUDF_KERNEL void kernel(cudf::jit::device_optional_span<typename Out::type> cons
   auto const size   = output.size();
 
   for (auto i = start; i < size; i += stride) {
-    auto const any_null = (false || ... || In::is_null(inputs, i));
-
     bool applies = false;
 
-    if (!any_null) {
+    if constexpr (!is_null_aware) {
+      auto const any_null = (false || ... || In::is_null(inputs, i));
+
+      if (!any_null) {
+        if constexpr (has_user_data) {
+          GENERIC_FILTER_OP(user_data, i, &applies, In::element(inputs, i)...);
+        } else {
+          GENERIC_FILTER_OP(&applies, In::element(inputs, i)...);
+        }
+      }
+    } else {
       if constexpr (has_user_data) {
-        GENERIC_FILTER_OP(user_data, i, &applies, In::element(inputs, i)...);
+        GENERIC_FILTER_OP(user_data, i, &applies, In::nullable_element(inputs, i)...);
       } else {
-        GENERIC_FILTER_OP(&applies, In::element(inputs, i)...);
+        GENERIC_FILTER_OP(&applies, In::nullable_element(inputs, i)...);
       }
     }
 
diff --git a/cpp/src/transform/jit/kernel.cu b/cpp/src/transform/jit/kernel.cu
index b3647e374c5..53409e9defd 100644
--- a/cpp/src/transform/jit/kernel.cu
+++ b/cpp/src/transform/jit/kernel.cu
@@ -36,7 +36,7 @@ namespace cudf {
 namespace transformation {
 namespace jit {
 
-template <bool has_user_data, typename Out, typename... In>
+template <bool has_user_data, bool is_null_aware, typename Out, typename... In>
 CUDF_KERNEL void kernel(cudf::mutable_column_device_view_core const* outputs,
                         cudf::column_device_view_core const* inputs,
                         void* user_data)
@@ -49,17 +49,26 @@ CUDF_KERNEL void kernel(cudf::mutable_column_device_view_core const* outputs,
   auto const size   = outputs[0].size();
 
   for (auto i = start; i < size; i += stride) {
-    if (Out::is_null(outputs, i)) { continue; }
-
-    if constexpr (has_user_data) {
-      GENERIC_TRANSFORM_OP(user_data, i, &Out::element(outputs, i), In::element(inputs, i)...);
+    if constexpr (!is_null_aware) {
+      if (Out::is_null(outputs, i)) { continue; }
+
+      if constexpr (has_user_data) {
+        GENERIC_TRANSFORM_OP(user_data, i, &Out::element(outputs, i), In::element(inputs, i)...);
+      } else {
+        GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::element(inputs, i)...);
+      }
     } else {
-      GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::element(inputs, i)...);
+      if constexpr (has_user_data) {
+        GENERIC_TRANSFORM_OP(
+          user_data, i, &Out::element(outputs, i), In::nullable_element(inputs, i)...);
+      } else {
+        GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::nullable_element(inputs, i)...);
+      }
     }
   }
 }
 
-template <bool has_user_data, typename Out, typename... In>
+template <bool has_user_data, bool is_null_aware, typename Out, typename... In>
 CUDF_KERNEL void fixed_point_kernel(cudf::mutable_column_device_view_core const* outputs,
                                     cudf::column_device_view_core const* inputs,
                                     void* user_data)
@@ -72,19 +81,28 @@ CUDF_KERNEL void fixed_point_kernel(cudf::mutable_column_device_view_core const*
   for (auto i = start; i < size; i += stride) {
     typename Out::type result{numeric::scaled_integer<typename Out::type::rep>{0, output_scale}};
 
-    if (Out::is_null(outputs, i)) { continue; }
+    if constexpr (!is_null_aware) {
+      if (Out::is_null(outputs, i)) { continue; }
+
+      if constexpr (has_user_data) {
+        GENERIC_TRANSFORM_OP(user_data, i, &result, In::element(inputs, i)...);
+      } else {
+        GENERIC_TRANSFORM_OP(&result, In::element(inputs, i)...);
+      }
 
-    if constexpr (has_user_data) {
-      GENERIC_TRANSFORM_OP(user_data, i, &result, In::element(inputs, i)...);
     } else {
-      GENERIC_TRANSFORM_OP(&result, In::element(inputs, i)...);
+      if constexpr (has_user_data) {
+        GENERIC_TRANSFORM_OP(user_data, i, &result, In::nullable_element(inputs, i)...);
+      } else {
+        GENERIC_TRANSFORM_OP(&result, In::nullable_element(inputs, i)...);
+      }
     }
 
     Out::assign(outputs, i, result);
   }
 }
 
-template <bool has_user_data, typename Out, typename... In>
+template <bool has_user_data, bool is_null_aware, typename Out, typename... In>
 CUDF_KERNEL void span_kernel(cudf::jit::device_optional_span<typename Out::type> const* outputs,
                              cudf::column_device_view_core const* inputs,
                              void* user_data)
@@ -94,12 +112,21 @@ CUDF_KERNEL void span_kernel(cudf::jit::device_optional_span<typename Out::type>
   auto const size   = outputs[0].size();
 
   for (auto i = start; i < size; i += stride) {
-    if (Out::is_null(outputs, i)) { continue; }
-
-    if constexpr (has_user_data) {
-      GENERIC_TRANSFORM_OP(user_data, i, &Out::element(outputs, i), In::element(inputs, i)...);
+    if constexpr (!is_null_aware) {
+      if (Out::is_null(outputs, i)) { continue; }
+
+      if constexpr (has_user_data) {
+        GENERIC_TRANSFORM_OP(user_data, i, &Out::element(outputs, i), In::element(inputs, i)...);
+      } else {
+        GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::element(inputs, i)...);
+      }
     } else {
-      GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::element(inputs, i)...);
+      if constexpr (has_user_data) {
+        GENERIC_TRANSFORM_OP(
+          user_data, i, &Out::element(outputs, i), In::nullable_element(inputs, i)...);
+      } else {
+        GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::nullable_element(inputs, i)...);
+      }
     }
   }
 }
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp
index ad3c6bf15c6..4d3aa4cb443 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/transform.cpp
@@ -17,6 +17,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/transform.hpp>
 #include <cudf/null_mask.hpp>
@@ -53,6 +54,7 @@ jitify2::ConfiguredKernel build_transform_kernel(
   std::vector<mutable_column_view> const& output_columns,
   std::vector<column_view> const& input_columns,
   bool has_user_data,
+  null_aware is_null_aware,
   std::string const& udf,
   bool is_ptx,
   rmm::cuda_stream_view stream,
@@ -70,6 +72,7 @@ jitify2::ConfiguredKernel build_transform_kernel(
   return get_kernel(jitify2::reflection::Template(kernel_name)
                       .instantiate(cudf::jit::build_jit_template_params(
                         has_user_data,
+                        is_null_aware,
                         {},
                         cudf::jit::column_type_names(output_columns),
                         cudf::jit::reflect_input_columns(base_column_size, input_columns))),
@@ -82,6 +85,7 @@ jitify2::ConfiguredKernel build_span_kernel(std::string const& kernel_name,
                                             std::vector<std::string> const& span_outputs,
                                             std::vector<column_view> const& input_columns,
                                             bool has_user_data,
+                                            null_aware is_null_aware,
                                             std::string const& udf,
                                             bool is_ptx,
                                             rmm::cuda_stream_view stream,
@@ -98,6 +102,7 @@ jitify2::ConfiguredKernel build_span_kernel(std::string const& kernel_name,
   return get_kernel(jitify2::reflection::Template(kernel_name)
                       .instantiate(cudf::jit::build_jit_template_params(
                         has_user_data,
+                        is_null_aware,
                         span_outputs,
                         {},
                         cudf::jit::reflect_input_columns(base_column_size, input_columns))),
@@ -188,10 +193,17 @@ std::unique_ptr<column> transform_operation(column_view base_column,
                                             std::string const& udf,
                                             bool is_ptx,
                                             std::optional<void*> user_data,
+                                            null_aware is_null_aware,
                                             rmm::cuda_stream_view stream,
                                             rmm::device_async_resource_ref mr)
 {
-  auto [null_mask, null_count] = make_transform_null_mask(base_column, inputs, stream, mr);
+  rmm::device_buffer null_mask{};
+  cudf::size_type null_count{0};
+  if (is_null_aware == null_aware::NO) {
+    std::tie(null_mask, null_count) = make_transform_null_mask(base_column, inputs, stream, mr);
+  } else {
+    null_mask = create_null_mask(base_column.size(), mask_state::UNALLOCATED, stream, mr);
+  }
 
   auto output = make_fixed_width_column(
     output_type, base_column.size(), std::move(null_mask), null_count, stream, mr);
@@ -203,13 +215,13 @@ std::unique_ptr<column> transform_operation(column_view base_column,
                                        {*output},
                                        inputs,
                                        user_data.has_value(),
+                                       is_null_aware,
                                        udf,
                                        is_ptx,
                                        stream,
                                        mr);
 
   launch_column_output_kernel(kernel, {*output}, inputs, user_data, stream, mr);
-
   return output;
 }
 
@@ -218,16 +230,25 @@ std::unique_ptr<column> string_view_operation(column_view base_column,
                                               std::string const& udf,
                                               bool is_ptx,
                                               std::optional<void*> user_data,
+                                              null_aware is_null_aware,
                                               rmm::cuda_stream_view stream,
                                               rmm::device_async_resource_ref mr)
 {
-  auto [null_mask, null_count] = make_transform_null_mask(base_column, inputs, stream, mr);
+  rmm::device_buffer null_mask{};
+  cudf::size_type null_count{0};
+
+  if (is_null_aware == null_aware::NO) {
+    std::tie(null_mask, null_count) = make_transform_null_mask(base_column, inputs, stream, mr);
+  } else {
+    null_mask = create_null_mask(base_column.size(), mask_state::UNALLOCATED, stream, mr);
+  }
 
   auto kernel = build_span_kernel("cudf::transformation::jit::span_kernel",
                                   base_column.size(),
                                   {"cudf::string_view"},
                                   inputs,
                                   user_data.has_value(),
+                                  is_null_aware,
                                   udf,
                                   is_ptx,
                                   stream,
@@ -287,11 +308,15 @@ std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
                                   data_type output_type,
                                   bool is_ptx,
                                   std::optional<void*> user_data,
+                                  null_aware is_null_aware,
                                   rmm::cuda_stream_view stream,
                                   rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(
     !inputs.empty(), "Transform must have at least 1 input column", std::invalid_argument);
+  CUDF_EXPECTS(!(is_null_aware == null_aware::YES && is_ptx),
+               "Optional types are not supported in PTX UDFs",
+               std::invalid_argument);
 
   auto const base_column = std::max_element(
     inputs.begin(), inputs.end(), [](auto& a, auto& b) { return a.size() < b.size(); });
@@ -300,10 +325,10 @@ std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
 
   if (is_fixed_width(output_type)) {
     return transformation::jit::transform_operation(
-      *base_column, output_type, inputs, udf, is_ptx, user_data, stream, mr);
+      *base_column, output_type, inputs, udf, is_ptx, user_data, is_null_aware, stream, mr);
   } else if (output_type.id() == type_id::STRING) {
     return transformation::jit::string_view_operation(
-      *base_column, inputs, udf, is_ptx, user_data, stream, mr);
+      *base_column, inputs, udf, is_ptx, user_data, is_null_aware, stream, mr);
   } else {
     CUDF_FAIL("Unsupported output type for transform operation");
   }
@@ -316,11 +341,12 @@ std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
                                   data_type output_type,
                                   bool is_ptx,
                                   std::optional<void*> user_data,
+                                  null_aware is_null_aware,
                                   rmm::cuda_stream_view stream,
                                   rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::transform(inputs, udf, output_type, is_ptx, user_data, stream, mr);
+  return detail::transform(inputs, udf, output_type, is_ptx, user_data, is_null_aware, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/tests/filter/filter_test.cpp b/cpp/tests/filter/filter_test.cpp
index 1c080b374c7..9c0a577b137 100644
--- a/cpp/tests/filter/filter_test.cpp
+++ b/cpp/tests/filter/filter_test.cpp
@@ -55,8 +55,9 @@ TYPED_TEST(FilterNumericTest, NoAssertions)
 
   std::vector<std::unique_ptr<cudf::column>> results;
 
-  EXPECT_NO_THROW(results =
-                    cudf::filter({a, b}, this->udf, false, std::nullopt, std::vector{true, false}));
+  EXPECT_NO_THROW(
+    results = cudf::filter(
+      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
 
@@ -77,8 +78,9 @@ TYPED_TEST(FilterChronoTest, NoAssertions)
   auto expected = cudf::test::fixed_width_column_wrapper<T>{T{}, T{}, T{}, T{}, T{}, T{}};
 
   std::vector<std::unique_ptr<cudf::column>> results;
-  EXPECT_NO_THROW(results =
-                    cudf::filter({a, b}, this->udf, false, std::nullopt, std::vector{true, false}));
+  EXPECT_NO_THROW(
+    results = cudf::filter(
+      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
 
@@ -101,8 +103,9 @@ TYPED_TEST(FilterFixedPointTest, NoAssertions)
 
   std::vector<std::unique_ptr<cudf::column>> results;
 
-  EXPECT_NO_THROW(results =
-                    cudf::filter({a, b}, this->udf, false, std::nullopt, std::vector{true, false}));
+  EXPECT_NO_THROW(
+    results = cudf::filter(
+      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
@@ -118,8 +121,9 @@ TEST_F(FilterTestFixture, StringNoAssertions)
 
   std::vector<std::unique_ptr<cudf::column>> results;
 
-  EXPECT_NO_THROW(results =
-                    cudf::filter({a, b}, this->udf, false, std::nullopt, std::vector{true, false}));
+  EXPECT_NO_THROW(
+    results = cudf::filter(
+      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
 
@@ -133,26 +137,42 @@ TEST_F(FilterAssertsTest, CopyMask)
 __device__ void is_divisible(bool* out, int32_t a, int32_t b) { *out = ((a % b) == 0); }
   )***";
 
-  EXPECT_NO_THROW(cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true, true}));
-  EXPECT_THROW(cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true}),
-               std::invalid_argument);
-  EXPECT_THROW(cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true, true, true}),
-               std::invalid_argument);
+  EXPECT_NO_THROW(
+    cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true, true}, cudf::null_aware::NO));
+  EXPECT_THROW(
+    cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true}, cudf::null_aware::NO),
+    std::invalid_argument);
+  EXPECT_THROW(
+    cudf::filter(
+      {a, b}, cuda, false, std::nullopt, std::vector{true, true, true}, cudf::null_aware::NO),
+    std::invalid_argument);
 }
 
 struct FilterTest : public FilterTestFixture {};
 
 TEST_F(FilterTest, Basic)
 {
-  auto a           = cudf::test::fixed_width_column_wrapper<int32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+  auto a = cudf::test::fixed_width_column_wrapper<int32_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+                                                           {1, 1, 1, 1, 1, 1, 1, 0, 0, 0});
   std::string cuda = R"***(
 __device__ void is_even(bool* out, int32_t a) { *out = (a % 2 == 0); }
   )***";
 
-  auto result   = cudf::filter({a}, cuda, false, std::nullopt, std::vector{true});
-  auto expected = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6, 8, 10};
+  auto result =
+    cudf::filter({a}, cuda, false, std::nullopt, std::vector{true}, cudf::null_aware::NO);
+  auto expected = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result[0]->view());
+
+  std::string null_cuda = R"***(
+__device__ void is_even(bool* out, cuda::std::optional<int32_t> a) { *out = a.has_value() && (*a % 2 == 0); }
+  )***";
+
+  auto null_result =
+    cudf::filter({a}, null_cuda, false, std::nullopt, std::vector{true}, cudf::null_aware::YES);
+  auto null_expected = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(null_expected, null_result[0]->view());
 }
 
 TEST_F(FilterTest, ScalarBroadcast)
@@ -163,7 +183,7 @@ TEST_F(FilterTest, ScalarBroadcast)
 __device__ void is_divisible(bool* out, int32_t a, int32_t b) { *out = ((a % b) == 0); }
   )***";
 
-  auto result     = cudf::filter({a, b}, cuda, false, std::nullopt);
+  auto result = cudf::filter({a, b}, cuda, false, std::nullopt, std::nullopt, cudf::null_aware::NO);
   auto expected_a = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6, 8, 10};
   auto expected_b = cudf::test::fixed_width_column_wrapper<int32_t>{2, 2, 2, 2, 2};
 
@@ -220,7 +240,8 @@ __device__ void filter(bool* out,
                  cuda,
                  false,
                  std::nullopt,
-                 std::vector{true, true, false, false, false, false, false, false, false, false});
+                 std::vector{true, true, false, false, false, false, false, false, false, false},
+                 cudf::null_aware::NO);
 
   EXPECT_EQ(result.size(), 2);
 
@@ -282,7 +303,8 @@ __device__ void filter(bool* out,
                  cuda,
                  false,
                  std::nullopt,
-                 std::vector{true, true, false, false, false, false, false, false, false, false});
+                 std::vector{true, true, false, false, false, false, false, false, false, false},
+                 cudf::null_aware::NO);
 
   auto expected_countries = cudf::test::strings_column_wrapper{"Germany", "Spain"};
 
diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp
index 993ddc90819..91f6c197857 100644
--- a/cpp/tests/streams/transform_test.cpp
+++ b/cpp/tests/streams/transform_test.cpp
@@ -38,6 +38,7 @@ void test_udf(char const* udf, Data data_init, cudf::size_type size, bool is_ptx
                   cudf::data_type(cudf::type_to_id<dtype>()),
                   is_ptx,
                   std::nullopt,
+                  cudf::null_aware::NO,
                   cudf::test::get_default_stream());
 }
 
diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp
index fdf16c34aab..ab82c94c348 100644
--- a/cpp/tests/transform/integration/unary_transform_test.cpp
+++ b/cpp/tests/transform/integration/unary_transform_test.cpp
@@ -56,31 +56,49 @@ struct AssertsTest : public RuntimeSupportTest {};
 
 TEST_F(AssertsTest, TypeSupport)
 {
-  EXPECT_NO_THROW(
-    cudf::transform({a, b, t}, udf, cudf::data_type{cudf::type_id::FLOAT32}, false, std::nullopt));
-
-  EXPECT_THROW(
-    cudf::transform({a, b, t}, udf, cudf::data_type{cudf::type_id::STRUCT}, false, std::nullopt),
-    std::invalid_argument);
-
-  EXPECT_THROW(
-    cudf::transform(
-      {struct_col, t}, udf, cudf::data_type{cudf::type_id::FLOAT32}, false, std::nullopt),
-    std::invalid_argument);
+  EXPECT_NO_THROW(cudf::transform({a, b, t},
+                                  udf,
+                                  cudf::data_type{cudf::type_id::FLOAT32},
+                                  false,
+                                  std::nullopt,
+                                  cudf::null_aware::NO));
+
+  EXPECT_THROW(cudf::transform({a, b, t},
+                               udf,
+                               cudf::data_type{cudf::type_id::STRUCT},
+                               false,
+                               std::nullopt,
+                               cudf::null_aware::NO),
+               std::invalid_argument);
+
+  EXPECT_THROW(cudf::transform({struct_col, t},
+                               udf,
+                               cudf::data_type{cudf::type_id::FLOAT32},
+                               false,
+                               std::nullopt,
+                               cudf::null_aware::NO),
+               std::invalid_argument);
 }
 
 TEST_F(AssertsTest, UnequalRowCount)
 {
-  EXPECT_THROW(
-    cudf::transform(
-      {a, b, bad_col}, udf, cudf::data_type{cudf::type_id::FLOAT32}, false, std::nullopt),
-    std::invalid_argument);
+  EXPECT_THROW(cudf::transform({a, b, bad_col},
+                               udf,
+                               cudf::data_type{cudf::type_id::FLOAT32},
+                               false,
+                               std::nullopt,
+                               cudf::null_aware::NO),
+               std::invalid_argument);
 }
 
 TEST_F(AssertsTest, NullSupport)
 {
-  EXPECT_NO_THROW(cudf::transform(
-    {a, b_nulls, t}, udf, cudf::data_type{cudf::type_id::FLOAT32}, false, std::nullopt));
+  EXPECT_NO_THROW(cudf::transform({a, b_nulls, t},
+                                  udf,
+                                  cudf::data_type{cudf::type_id::FLOAT32},
+                                  false,
+                                  std::nullopt,
+                                  cudf::null_aware::NO));
 }
 
 struct UnaryOperationIntegrationTest : public cudf::test::BaseFixture {};
@@ -590,7 +608,8 @@ __device__ void transform(void* user_data, cudf::size_type row,
                                 cuda,
                                 cudf::data_type(cudf::type_id::STRING),
                                 false,
-                                scratch.data());
+                                scratch.data(),
+                                cudf::null_aware::NO);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
@@ -781,4 +800,73 @@ TEST_F(NullTest, ColumnNulls_And_ScalarNull)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ptx_result, *expected);
 }
 
+TEST_F(NullTest, IsNull)
+{
+  auto udf = R"***(
+  __device__ inline void is_null(bool * output, cuda::std::optional<float> input)
+  {
+    *output = !input.has_value();
+  }
+  )***";
+
+  auto value = cudf::test::fixed_width_column_wrapper<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f},
+                                                             {false, false, true, false, true})
+                 .release();
+
+  auto expected = cudf::test::fixed_width_column_wrapper<bool>({true, true, false, true, false});
+
+  auto result = cudf::transform({*value},
+                                udf,
+                                cudf::data_type(cudf::type_id::BOOL8),
+                                false,
+                                std::nullopt,
+                                cudf::null_aware::YES);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
+}
+
+TEST_F(NullTest, NullProject)
+{
+  auto udf = R"***(
+__device__ inline void null_lerp(
+       float* output,
+       cuda::std::optional<float> low,
+       cuda::std::optional<float> high,
+       cuda::std::optional<float> t
+)
+{
+auto lerp = [] (auto l, auto h, auto t) {
+return l - t * l + t * h;
+};
+  *output =  low.has_value() && high.has_value() && t.has_value()
+    ? lerp(*low, *high, *t)
+    : 0.0F;
+}
+)***";
+
+  auto low =
+    cudf::test::fixed_width_column_wrapper<float>(low_host.begin(), low_host.end(), fourth())
+      .release();
+  auto high =
+    cudf::test::fixed_width_column_wrapper<float>(high_host.begin(), high_host.end(), fifth())
+      .release();
+  auto t = cudf::test::fixed_width_column_wrapper<float>(t_host.begin(), t_host.end()).release();
+
+  auto expected_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return ((i % 5) == 0) && ((i % 4) == 0) ? expected_host[i] : 0.0F; });
+
+  auto expected =
+    cudf::test::fixed_width_column_wrapper<float>(expected_iter, expected_iter + low_host.size())
+      .release();
+
+  auto cuda_result = cudf::transform({*low, *high, *t},
+                                     udf,
+                                     cudf::data_type(cudf::type_id::FLOAT32),
+                                     false,
+                                     std::nullopt,
+                                     cudf::null_aware::YES);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*cuda_result, *expected);
+}
+
 }  // namespace transformation
diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
index ce55a4a841a..19d49e143a6 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.expressions cimport expression
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
-from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
+from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type, null_aware
 
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
@@ -45,6 +45,7 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
         data_type output_type,
         bool is_ptx,
         optional[void *] user_data,
+        null_aware is_null_aware,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd
index 627ebe96ff8..727bc4e926c 100644
--- a/python/pylibcudf/pylibcudf/libcudf/types.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd
@@ -49,6 +49,10 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
         EQUAL
         UNEQUAL
 
+    cpdef enum class null_aware(bool):
+        NO
+        YES
+
     cpdef enum class nan_equality(bool):
         ALL_EQUAL
         UNEQUAL
diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd
index 09fc4cf72bd..8785dd64f40 100644
--- a/python/pylibcudf/pylibcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/transform.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
-from pylibcudf.libcudf.types cimport bitmask_type, data_type
+from pylibcudf.libcudf.types cimport bitmask_type, data_type, null_aware
 from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
@@ -27,6 +27,7 @@ cpdef Column transform(list[Column] inputs,
                        str transform_udf,
                        DataType output_type,
                        bool is_ptx,
+                       null_aware is_null_aware,
                        Stream stream = *)
 
 cpdef tuple[Table, Column] encode(Table input, Stream stream = *)
diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi
index b38d19d732a..3d277b4f016 100644
--- a/python/pylibcudf/pylibcudf/transform.pyi
+++ b/python/pylibcudf/pylibcudf/transform.pyi
@@ -5,7 +5,7 @@ from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
 from pylibcudf.gpumemoryview import gpumemoryview
 from pylibcudf.table import Table
-from pylibcudf.types import DataType
+from pylibcudf.types import DataType, NullAware
 
 def nans_to_nulls(
     input: Column, stream: Stream | None = None
@@ -24,6 +24,7 @@ def transform(
     transform_udf: str,
     output_type: DataType,
     is_ptx: bool,
+    null_aware: NullAware = NullAware.NO,
     stream: Stream | None = None,
 ) -> Column: ...
 def encode(
diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
index 0581bef2b19..13049782219 100644
--- a/python/pylibcudf/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -21,7 +21,7 @@ from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview
-from .types cimport DataType
+from .types cimport DataType, null_aware
 from .utils cimport _get_stream
 
 __all__ = [
@@ -163,6 +163,7 @@ cpdef Column transform(list[Column] inputs,
                        str transform_udf,
                        DataType output_type,
                        bool is_ptx,
+                       null_aware is_null_aware,
                        Stream stream=None):
     """Create a new column by applying a transform function against
        multiple input columns.
@@ -178,6 +179,9 @@ cpdef Column transform(list[Column] inputs,
     is_ptx : bool
         If `True`, the UDF is treated as PTX code.
         If `False`, the UDF is treated as CUDA code.
+    is_null_aware: NullAware
+        If `NO`, the UDF gets non-nullable parameters
+        If `YES`, the UDF gets nullable parameters
 
     Returns
     -------
@@ -188,6 +192,7 @@ cpdef Column transform(list[Column] inputs,
     cdef unique_ptr[column] c_result
     cdef string c_transform_udf = transform_udf.encode()
     cdef bool c_is_ptx = is_ptx
+    cdef null_aware c_is_null_aware = is_null_aware
     cdef optional[void *] user_data
 
     stream = _get_stream(stream)
@@ -202,6 +207,7 @@ cpdef Column transform(list[Column] inputs,
             output_type.c_obj,
             c_is_ptx,
             user_data,
+            c_is_null_aware,
             stream.view(),
         )
 
diff --git a/python/pylibcudf/pylibcudf/types.pxd b/python/pylibcudf/pylibcudf/types.pxd
index e9de74b878c..2a263862b59 100644
--- a/python/pylibcudf/pylibcudf/types.pxd
+++ b/python/pylibcudf/pylibcudf/types.pxd
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.types cimport (
     null_equality,
     null_order,
     null_policy,
+    null_aware,
     order,
     size_type,
     sorted,
diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi
index 377a389dafa..e3c0951da52 100644
--- a/python/pylibcudf/pylibcudf/types.pyi
+++ b/python/pylibcudf/pylibcudf/types.pyi
@@ -29,6 +29,10 @@ class NullEquality(IntEnum):
     EQUAL = ...
     UNEQUAL = ...
 
+class NullAware(IntEnum):
+    NO = ...
+    YES = ...
+
 class NullOrder(IntEnum):
     AFTER = ...
     BEFORE = ...
diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx
index 6e3eb19be1a..a84171f991c 100644
--- a/python/pylibcudf/pylibcudf/types.pyx
+++ b/python/pylibcudf/pylibcudf/types.pyx
@@ -17,6 +17,7 @@ from pylibcudf.libcudf.types import interpolation as Interpolation  # no-cython-
 from pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint, isort:skip
 from pylibcudf.libcudf.types import nan_equality as NanEquality  # no-cython-lint, isort:skip
 from pylibcudf.libcudf.types import null_equality as NullEquality  # no-cython-lint, isort:skip
+from pylibcudf.libcudf.types import null_aware as NullAware  # no-cython-lint, isort:skip
 from pylibcudf.libcudf.types import null_order as NullOrder  # no-cython-lint, isort:skip
 from pylibcudf.libcudf.types import order as Order  # no-cython-lint, isort:skip
 from pylibcudf.libcudf.types import sorted as Sorted  # no-cython-lint, isort:skip
@@ -79,6 +80,7 @@ __all__ = [
     "NanPolicy",
     "NullEquality",
     "NullOrder",
+    "NullAware",
     "NullPolicy",
     "Order",
     "SIZE_TYPE",
@@ -313,6 +315,7 @@ Interpolation.__str__ = Interpolation.__repr__
 MaskState.__str__ = MaskState.__repr__
 NanEquality.__str__ = NanEquality.__repr__
 NullEquality.__str__ = NullEquality.__repr__
+NullAware.__str__ = NullAware.__repr__
 NullOrder.__str__ = NullOrder.__repr__
 Order.__str__ = Order.__repr__
 Sorted.__str__ = Sorted.__repr__
diff --git a/python/pylibcudf/tests/test_transform.py b/python/pylibcudf/tests/test_transform.py
index 62b9b6f636f..2d0a9e188fd 100644
--- a/python/pylibcudf/tests/test_transform.py
+++ b/python/pylibcudf/tests/test_transform.py
@@ -102,5 +102,6 @@ def op(a, b, c):
         transform_udf=ptx,
         output_type=plc.DataType(plc.TypeId.FLOAT64),
         is_ptx=True,
+        is_null_aware=plc.types.NullAware.NO,
     )
     assert_column_eq(expect, got)

From e16e76291b5cbc4f8f265367451c49d9dfd133a8 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 19 Aug 2025 07:59:05 -0400
Subject: [PATCH 156/366] Pin polars version to <1.33 (#19582)

Updates the upper pinning for polars to `1.32.1`. Also bumped the Narwhals version to `2.0.1` to avoid having to xfail ~10 more tests (so this PR contributes to https://github.com/rapidsai/cudf/issues/19594 too).

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Tom Augspurger (https://github.com/TomAugspurger)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19582
---
 ci/test_narwhals.sh                           |  37 +----
 .../all_cuda-129_arch-aarch64.yaml            |   2 +-
 .../all_cuda-129_arch-x86_64.yaml             |   2 +-
 conda/recipes/cudf-polars/recipe.yaml         |   2 +-
 dependencies.yaml                             |   4 +-
 .../cudf_polars/dsl/expressions/boolean.py    |   7 +
 .../cudf_polars/dsl/expressions/datetime.py   |   1 +
 .../cudf_polars/dsl/expressions/string.py     |  44 ++++--
 .../cudf_polars/dsl/expressions/struct.py     |   7 +-
 python/cudf_polars/cudf_polars/dsl/ir.py      | 128 +++++++++++++-----
 .../cudf_polars/cudf_polars/dsl/translate.py  |  28 +++-
 .../cudf_polars/dsl/utils/aggregations.py     |   5 +-
 .../cudf_polars/experimental/expressions.py   |   2 +-
 .../cudf_polars/experimental/io.py            |   9 +-
 .../cudf_polars/experimental/select.py        |   2 +-
 .../cudf_polars/testing/asserts.py            |  29 ++--
 .../cudf_polars/cudf_polars/testing/plugin.py |  18 +--
 .../cudf_polars/cudf_polars/utils/versions.py |   4 +-
 python/cudf_polars/pyproject.toml             |   2 +-
 .../tests/dsl/test_serialization.py           |  33 ++++-
 .../tests/expressions/test_booleanfunction.py |  14 +-
 .../tests/expressions/test_stringfunction.py  |  23 +++-
 python/cudf_polars/tests/test_cache.py        |  11 +-
 python/cudf_polars/tests/test_groupby.py      |   9 +-
 python/cudf_polars/tests/test_join.py         |  15 ++
 python/cudf_polars/tests/test_scan.py         |  15 +-
 26 files changed, 317 insertions(+), 136 deletions(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index a6ade73e5f3..d30d0d92c05 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -27,53 +27,20 @@ rapids-pip-retry install -U -e .
 rapids-logger "Check narwhals versions"
 python -c "import narwhals; print(narwhals.show_versions())"
 
-# test_horizontal_slice_with_series: xpassing in Narwhals, fixed in cuDF https://github.com/rapidsai/cudf/pull/18558
-# test_rolling_mean_expr_lazy_grouped: xpassing in Narwhals
-# test_rolling_std_expr_lazy_grouped: xpassing in Narwhals
-# test_rolling_sum_expr_lazy_grouped: xpassing in Narwhals
-# test_rolling_var_expr_lazy_grouped: xpassing in Narwhals
-# test_offset_by_tz: xpassing in Narwhals
-# test_double_same_aggregation: xpassing in Narwhals
-# test_all_kind_of_aggs: xpassing in Narwhals
-TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF="not test_rolling_mean_expr_lazy_grouped[cudf-expected_a4-3-1-True] \
-and not test_rolling_mean_expr_lazy_grouped[cudf-expected_a5-4-1-True] \
-and not test_rolling_mean_expr_lazy_grouped[cudf-expected_a6-5-1-True] \
-and not test_rolling_std_expr_lazy_grouped[cudf-expected_a4-3-1-True-1] \
-and not test_rolling_std_expr_lazy_grouped[cudf-expected_a5-4-1-True-1] \
-and not test_rolling_std_expr_lazy_grouped[cudf-expected_a6-5-1-True-0] \
-and not test_rolling_sum_expr_lazy_grouped[cudf-expected_a4-3-1-True] \
-and not test_rolling_sum_expr_lazy_grouped[cudf-expected_a5-4-1-True] \
-and not test_rolling_sum_expr_lazy_grouped[cudf-expected_a6-5-1-True] \
-and not test_rolling_var_expr_lazy_grouped[cudf-expected_a4-3-1-True-1] \
-and not test_rolling_var_expr_lazy_grouped[cudf-expected_a5-4-1-True-1] \
-and not test_rolling_var_expr_lazy_grouped[cudf-expected_a6-5-1-True-0] \
-and not test_horizontal_slice_with_series \
-and not test_offset_by_tz \
-and not test_double_same_aggregation \
-and not test_all_kind_of_aggs"
-
 rapids-logger "Run narwhals tests for cuDF"
 PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 python -m pytest \
     --cache-clear \
-    --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-narwhals.xml" \
     -p xdist \
     -p env \
     -p no:pytest_benchmark \
     -p cudf.testing.narwhals_test_plugin \
-    -k "$TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF" \
     --numprocesses=8 \
     --dist=worksteal \
     --constructors=cudf
 
-# test_dtypes: With cudf.pandas loaded, to_pandas() preserves Arrow dtypes like list and struct, so pandas
-# columns aren't object anymore. The test expects object, causing a mismatch.
-# test_nan: Narwhals expect this test to fail, but as of polars 1.30 we raise a RuntimeError,
-# not polars ComputeError. So the test is looking for the wrong error and fails.
-# test_floordiv_int_by_zero: This bug is fixed as of 25.08, narwhals should remove the xfail
+# test_datetime[polars[lazy]]: Fixed in the next narwhals release >2.0.1
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF_POLARS=" \
-test_dtypes or \
-test_nan or \
-test_floordiv_int_by_zero \
+test_datetime[polars[lazy]] \
 "
 
 rapids-logger "Run narwhals tests for cuDF Polars"
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index ce67a5abbca..bd9e288cb43 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -66,7 +66,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.4.0dev0
 - pandoc
-- polars>=1.28,<1.32
+- polars>=1.28,<1.33
 - pre-commit
 - pyarrow>=14.0.0,<20.0.0a0
 - pydata-sphinx-theme>=0.15.4
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 04178f88b83..7d3b41c97e5 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -67,7 +67,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.4.0dev0
 - pandoc
-- polars>=1.28,<1.32
+- polars>=1.28,<1.33
 - pre-commit
 - pyarrow>=14.0.0,<20.0.0a0
 - pydata-sphinx-theme>=0.15.4
diff --git a/conda/recipes/cudf-polars/recipe.yaml b/conda/recipes/cudf-polars/recipe.yaml
index d3c080249f1..4c398235a1a 100644
--- a/conda/recipes/cudf-polars/recipe.yaml
+++ b/conda/recipes/cudf-polars/recipe.yaml
@@ -50,7 +50,7 @@ requirements:
     - nvidia-ml-py
     - python
     - pylibcudf =${{ version }}
-    - polars >=1.28,<1.32
+    - polars >=1.28,<1.33
     - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
     - if: python == "3.10"
       then: typing_extensions
diff --git a/dependencies.yaml b/dependencies.yaml
index de83eae1276..d28f2b5e5fb 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -727,7 +727,7 @@ dependencies:
         packages:
           - nvidia-ml-py
           - packaging
-          - polars>=1.28,<1.32
+          - polars>=1.28,<1.33
     specific:
       - output_types: [requirements, pyproject]
         matrices:
@@ -1142,4 +1142,4 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - narwhals==1.47
+          - narwhals==2.0.1
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
index 81b28be8086..ce8c4fc3276 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
@@ -38,6 +38,7 @@ class Name(IntEnum):
         Any = auto()
         AnyHorizontal = auto()
         IsBetween = auto()
+        IsClose = auto()
         IsDuplicated = auto()
         IsFinite = auto()
         IsFirstDistinct = auto()
@@ -85,6 +86,12 @@ def __init__(
             BooleanFunction.Name.IsLastDistinct,
             BooleanFunction.Name.IsUnique,
         )
+        if self.name in {
+            BooleanFunction.Name.IsClose,
+        }:
+            raise NotImplementedError(
+                f"Boolean function {self.name}"
+            )  # pragma: no cover
 
     @staticmethod
     def _distinct(
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
index 46eade5c507..5c663f15697 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
@@ -38,6 +38,7 @@ class Name(IntEnum):
         Datetime = auto()
         DatetimeFunction = auto()
         Day = auto()
+        DaysInMonth = auto()
         Duration = auto()
         Hour = auto()
         IsLeapYear = auto()
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
index d47828b4175..b7cd24972ad 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
@@ -19,6 +19,7 @@
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal, LiteralColumn
 from cudf_polars.dsl.utils.reshape import broadcast
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -270,7 +271,11 @@ def _validate_input(self) -> None:
             if isinstance(self.children[1], Literal):
                 _, width = self.children
                 assert isinstance(width, Literal)
-                if width.value is not None and width.value < 0:
+                if (
+                    POLARS_VERSION_LT_132
+                    and width.value is not None
+                    and width.value < 0
+                ):  # pragma: no cover
                     dtypestr = dtype_str_repr(width.dtype.polars)
                     raise InvalidOperationError(
                         f"conversion from `{dtypestr}` to `u64` "
@@ -283,6 +288,8 @@ def _create_regex_program(
         pattern: str,
         flags: plc.strings.regex_flags.RegexFlags = plc.strings.regex_flags.RegexFlags.DEFAULT,
     ) -> plc.strings.regex_program.RegexProgram:
+        if pattern == "":
+            raise NotImplementedError("Empty regex pattern is not yet supported")
         try:
             return plc.strings.regex_program.RegexProgram.create(
                 pattern,
@@ -380,11 +387,14 @@ def do_evaluate(
                     plc.DataType(plc.TypeId.BOOL8),
                 )
 
-                if not plc.reduce.reduce(
-                    all_gt_0,
-                    plc.aggregation.all(),
-                    plc.DataType(plc.TypeId.BOOL8),
-                ).to_py():
+                if (
+                    POLARS_VERSION_LT_132
+                    and not plc.reduce.reduce(
+                        all_gt_0,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    ).to_py()
+                ):  # pragma: no cover
                     raise InvalidOperationError("fill conversion failed.")
 
                 return Column(
@@ -792,8 +802,15 @@ def do_evaluate(
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.PadStart:
-            (column,) = columns
-            width, char = self.options
+            if POLARS_VERSION_LT_132:  # pragma: no cover
+                (column,) = columns
+                width, char = self.options
+            else:
+                (column, width_col) = columns
+                (char,) = self.options
+                # TODO: Maybe accept a string scalar in
+                # cudf::strings::pad to avoid DtoH transfer
+                width = width_col.obj.to_scalar().to_py()
             return Column(
                 plc.strings.padding.pad(
                     column.obj, width, plc.strings.SideType.LEFT, char
@@ -801,8 +818,15 @@ def do_evaluate(
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.PadEnd:
-            (column,) = columns
-            width, char = self.options
+            if POLARS_VERSION_LT_132:  # pragma: no cover
+                (column,) = columns
+                width, char = self.options
+            else:
+                (column, width_col) = columns
+                (char,) = self.options
+                # TODO: Maybe accept a string scalar in
+                # cudf::strings::pad to avoid DtoH transfer
+                width = width_col.obj.to_scalar().to_py()
             return Column(
                 plc.strings.padding.pad(
                     column.obj, width, plc.strings.SideType.RIGHT, char
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/struct.py b/python/cudf_polars/cudf_polars/dsl/expressions/struct.py
index 034e1253981..f19f074254b 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/struct.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/struct.py
@@ -29,7 +29,6 @@ class StructFunction(Expr):
     class Name(IntEnum):
         """Internal and picklable representation of polars' `StructFunction`."""
 
-        FieldByIndex = auto()
         FieldByName = auto()
         RenameFields = auto()
         PrefixFields = auto()
@@ -37,6 +36,7 @@ class Name(IntEnum):
         JsonEncode = auto()
         WithFields = auto()  # TODO: https://github.com/rapidsai/cudf/issues/19284
         MapFieldNames = auto()  # TODO: https://github.com/rapidsai/cudf/issues/19285
+        FieldByIndex = auto()
         MultipleFields = (
             auto()
         )  # https://github.com/pola-rs/polars/pull/23022#issuecomment-2933910958
@@ -56,8 +56,7 @@ def from_polars(cls, obj: pl_expr.StructFunction) -> Self:
     __slots__ = ("name", "options")
     _non_child = ("dtype", "name", "options")
 
-    _valid_ops: ClassVar[set[Name]] = {
-        Name.FieldByIndex,
+    _supported_ops: ClassVar[set[Name]] = {
         Name.FieldByName,
         Name.RenameFields,
         Name.PrefixFields,
@@ -77,7 +76,7 @@ def __init__(
         self.name = name
         self.children = children
         self.is_pointwise = True
-        if self.name not in self._valid_ops:
+        if self.name not in self._supported_ops:
             raise NotImplementedError(
                 f"Struct function {self.name}"
             )  # pragma: no cover
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 28b20c835a0..161c8f4a576 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -47,6 +47,7 @@
 
     from polars.polars import _expr_nodes as pl_expr
 
+    from cudf_polars.containers.dataframe import NamedColumn
     from cudf_polars.typing import CSECache, ClosedInterval, Schema, Slice as Zlice
     from cudf_polars.utils.config import ParquetOptions
     from cudf_polars.utils.timer import Timer
@@ -324,8 +325,17 @@ def __init__(
             raise NotImplementedError(
                 "Read from cloud storage"
             )  # pragma: no cover; no test yet
-        if any(str(p).startswith("https:/") for p in self.paths):
+        if any(
+            str(p).startswith("https:/" if POLARS_VERSION_LT_131 else "https://")
+            for p in self.paths
+        ):
             raise NotImplementedError("Read from https")
+        if any(
+            str(p).startswith("file:/" if POLARS_VERSION_LT_131 else "file://")
+            for p in self.paths
+        ):
+            # TODO: removing the file:// may work
+            raise NotImplementedError("Read from file URI")
         if self.typ == "csv":
             if self.reader_options["skip_rows_after_header"] != 0:
                 raise NotImplementedError("Skipping rows after header in CSV reader")
@@ -954,10 +964,10 @@ class Cache(IR):
     _non_child = ("schema", "key", "refcount")
     key: int
     """The cache key."""
-    refcount: int
+    refcount: int | None
     """The number of cache hits."""
 
-    def __init__(self, schema: Schema, key: int, refcount: int, value: IR):
+    def __init__(self, schema: Schema, key: int, refcount: int | None, value: IR):
         self.schema = schema
         self.key = key
         self.refcount = refcount
@@ -979,7 +989,7 @@ def is_equal(self, other: Self) -> bool:  # noqa: D102
     @classmethod
     @nvtx_annotate_cudf_polars(message="Cache")
     def do_evaluate(
-        cls, key: int, refcount: int, df: DataFrame
+        cls, key: int, refcount: int | None, df: DataFrame
     ) -> DataFrame:  # pragma: no cover; basic evaluation never calls this
         """Evaluate and return a dataframe."""
         # Our value has already been computed for us, so let's just
@@ -998,12 +1008,15 @@ def evaluate(self, *, cache: CSECache, timer: Timer | None) -> DataFrame:
             cache[self.key] = (result, 0)
             return result
         else:
-            hits += 1
-            if hits == self.refcount:
+            if self.refcount is None:
+                return result
+
+            hits += 1  # pragma: no cover
+            if hits == self.refcount:  # pragma: no cover
                 del cache[self.key]
-            else:
+            else:  # pragma: no cover
                 cache[self.key] = (result, hits)
-            return result
+            return result  # pragma: no cover
 
 
 class DataFrameScan(IR):
@@ -1758,6 +1771,38 @@ def _reorder_maps(
             [plc.types.NullOrder.AFTER, plc.types.NullOrder.AFTER],
         ).columns()
 
+    @staticmethod
+    def _build_columns(
+        columns: Iterable[plc.Column],
+        template: Iterable[NamedColumn],
+        *,
+        left: bool = True,
+        empty: bool = False,
+        rename: Callable[[str], str] = lambda name: name,
+    ) -> list[Column]:
+        if empty:
+            return [
+                Column(
+                    plc.column_factories.make_empty_column(col.dtype.plc),
+                    col.dtype,
+                    name=rename(col.name),
+                )
+                for col in template
+            ]
+
+        columns = [
+            Column(new, col.dtype, name=rename(col.name))
+            for new, col in zip(columns, template, strict=True)
+        ]
+
+        if left:
+            columns = [
+                col.sorted_like(orig)
+                for col, orig in zip(columns, template, strict=True)
+            ]
+
+        return columns
+
     @classmethod
     @nvtx_annotate_cudf_polars(message="Join")
     def do_evaluate(
@@ -1780,28 +1825,32 @@ def do_evaluate(
         if how == "Cross":
             # Separate implementation, since cross_join returns the
             # result, not the gather maps
-            columns = plc.join.cross_join(left.table, right.table).columns()
-            left_cols = [
-                Column(new, name=old.name, dtype=old.dtype).sorted_like(old)
-                for new, old in zip(
-                    columns[: left.num_columns], left.columns, strict=True
-                )
-            ]
-            right_cols = [
-                Column(
-                    new,
-                    name=name
+            if right.num_rows == 0:
+                left_cols = Join._build_columns([], left.columns, empty=True)
+                right_cols = Join._build_columns(
+                    [],
+                    right.columns,
+                    left=False,
+                    empty=True,
+                    rename=lambda name: name
                     if name not in left.column_names_set
                     else f"{name}{suffix}",
-                    dtype=old.dtype,
-                )
-                for new, name, old in zip(
-                    columns[left.num_columns :],
-                    right.column_names,
-                    right.columns,
-                    strict=True,
                 )
-            ]
+                return DataFrame([*left_cols, *right_cols])
+
+            columns = plc.join.cross_join(left.table, right.table).columns()
+            left_cols = Join._build_columns(
+                columns[: left.num_columns],
+                left.columns,
+            )
+            right_cols = Join._build_columns(
+                columns[left.num_columns :],
+                right.columns,
+                rename=lambda name: name
+                if name not in left.column_names_set
+                else f"{name}{suffix}",
+                left=False,
+            )
             return DataFrame([*left_cols, *right_cols]).slice(zlice)
         # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
         left_on = DataFrame(broadcast(*(e.evaluate(left) for e in left_on_exprs)))
@@ -2485,18 +2534,27 @@ def do_evaluate(
 
 
 class Empty(IR):
-    """Represents an empty DataFrame."""
+    """Represents an empty DataFrame with a known schema."""
 
-    __slots__ = ()
-    _non_child = ()
+    __slots__ = ("schema",)
+    _non_child = ("schema",)
 
-    def __init__(self) -> None:
-        self.schema = {}
-        self._non_child_args = ()
+    def __init__(self, schema: Schema):
+        self.schema = schema
+        self._non_child_args = (schema,)
         self.children = ()
 
     @classmethod
     @nvtx_annotate_cudf_polars(message="Empty")
-    def do_evaluate(cls) -> DataFrame:  # pragma: no cover
+    def do_evaluate(cls, schema: Schema) -> DataFrame:  # pragma: no cover
         """Evaluate and return a dataframe."""
-        return DataFrame([])
+        return DataFrame(
+            [
+                Column(
+                    plc.column_factories.make_empty_column(dtype.plc),
+                    dtype=dtype,
+                    name=name,
+                )
+                for name, dtype in schema.items()
+            ]
+        )
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index aa82883a51d..27aa6bfbf4a 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -30,7 +30,11 @@
 from cudf_polars.dsl.utils.rolling import rewrite_rolling
 from cudf_polars.typing import Schema
 from cudf_polars.utils import config, sorting
-from cudf_polars.utils.versions import POLARS_VERSION_LT_131
+from cudf_polars.utils.versions import (
+    POLARS_VERSION_LT_131,
+    POLARS_VERSION_LT_132,
+    POLARS_VERSION_LT_1323,
+)
 
 if TYPE_CHECKING:
     from polars import GPUEngine
@@ -91,7 +95,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR:
         # IR is versioned with major.minor, minor is bumped for backwards
         # compatible changes (e.g. adding new nodes), major is bumped for
         # incompatible changes (e.g. renaming nodes).
-        if (version := self.visitor.version()) >= (8, 1):
+        if (version := self.visitor.version()) >= (10, 1):
             e = NotImplementedError(
                 f"No support for polars IR {version=}"
             )  # pragma: no cover; no such version for now.
@@ -218,6 +222,13 @@ def _(node: pl_ir.PythonScan, translator: Translator, schema: Schema) -> ir.IR:
 @_translate_ir.register
 def _(node: pl_ir.Scan, translator: Translator, schema: Schema) -> ir.IR:
     typ, *options = node.scan_type
+    paths = node.paths
+    # Polars can produce a Scan with an empty ``node.paths`` (eg. the native
+    # Iceberg reader on a table with no data files yet). In this case, polars returns an
+    # empty DataFrame with the declared schema. Mirror that here by
+    # replacing the Scan with an Empty IR node.
+    if not paths:  # pragma: no cover
+        return ir.Empty(schema)
     if typ == "ndjson":
         (reader_options,) = map(json.loads, options)
         cloud_options = None
@@ -248,7 +259,7 @@ def _(node: pl_ir.Scan, translator: Translator, schema: Schema) -> ir.IR:
         typ,
         reader_options,
         cloud_options,
-        node.paths,
+        paths,
         with_columns,
         skip_rows,
         n_rows,
@@ -263,8 +274,15 @@ def _(node: pl_ir.Scan, translator: Translator, schema: Schema) -> ir.IR:
 
 @_translate_ir.register
 def _(node: pl_ir.Cache, translator: Translator, schema: Schema) -> ir.IR:
+    if POLARS_VERSION_LT_1323:  # pragma: no cover
+        refcount = node.cache_hits
+    else:
+        refcount = None
     return ir.Cache(
-        schema, node.id_, node.cache_hits, translator.translate_ir(n=node.input)
+        schema,
+        node.id_,
+        refcount,
+        translator.translate_ir(n=node.input),
     )
 
 
@@ -523,7 +541,7 @@ def _(node: pl_ir.Sink, translator: Translator, schema: Schema) -> ir.IR:
     return ir.Sink(
         schema=schema,
         kind=sink_kind,
-        path=file["target"],
+        path=file["target"] if POLARS_VERSION_LT_132 else file["target"]["Local"],
         parquet_options=translator.config_options.parquet_options,
         options=options,
         cloud_options=cloud_options,
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index e1ed7a0d3a2..29fd7155799 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -16,6 +16,7 @@
 from cudf_polars.containers import DataType
 from cudf_polars.dsl import expr, ir
 from cudf_polars.dsl.expressions.base import ExecutionContext
+from cudf_polars.utils.versions import POLARS_VERSION_LT_1323
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Generator, Iterable, Sequence
@@ -158,12 +159,12 @@ def decompose_single_agg(
             # - ROLLING: sum(all-null window) => null; sum(empty window) => 0 (fill only if empty)
             #
             # Must post-process because libcudf returns null for both empty and all-null windows/groups
-            if context == ExecutionContext.GROUPBY:
+            if not POLARS_VERSION_LT_1323 or context == ExecutionContext.GROUPBY:
                 # GROUPBY: always fill top-level nulls with 0
                 return [(named_expr, True)], expr.NamedExpr(
                     name, replace_nulls(col, 0, is_top=is_top)
                 )
-            else:
+            else:  # pragma: no cover
                 # ROLLING:
                 # Add a second rolling agg to compute the window size, then only
                 # replace nulls with 0 when the window size is 0 (ie. empty window).
diff --git a/python/cudf_polars/cudf_polars/experimental/expressions.py b/python/cudf_polars/cudf_polars/experimental/expressions.py
index 68dee9b9974..3c4ff10fc57 100644
--- a/python/cudf_polars/cudf_polars/experimental/expressions.py
+++ b/python/cudf_polars/cudf_polars/experimental/expressions.py
@@ -419,7 +419,7 @@ def _decompose_expr_node(
         # For Literal nodes, we don't actually want an
         # input IR with real columns, because it will
         # mess up the result of ``HConcat``.
-        input_ir = Empty()
+        input_ir = Empty({})
         partition_info[input_ir] = PartitionInfo(count=1)
 
     partition_count = partition_info[input_ir].count
diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index e45b6aae470..57f314ed571 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -17,7 +17,7 @@
 
 import pylibcudf as plc
 
-from cudf_polars.dsl.ir import IR, DataFrameScan, Scan, Sink, Union
+from cudf_polars.dsl.ir import IR, DataFrameScan, Empty, Scan, Sink, Union
 from cudf_polars.experimental.base import (
     ColumnStat,
     ColumnStats,
@@ -277,6 +277,13 @@ def do_evaluate(
         )
 
 
+@lower_ir_node.register(Empty)
+def _(
+    ir: Empty, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    return ir, {ir: PartitionInfo(count=1)}  # pragma: no cover
+
+
 @lower_ir_node.register(Scan)
 def _(
     ir: Scan, rec: LowerIRTransformer
diff --git a/python/cudf_polars/cudf_polars/experimental/select.py b/python/cudf_polars/cudf_polars/experimental/select.py
index 0a3b5820b3d..1ca199a8af6 100644
--- a/python/cudf_polars/cudf_polars/experimental/select.py
+++ b/python/cudf_polars/cudf_polars/experimental/select.py
@@ -138,7 +138,7 @@ def _(
         isinstance(expr, (Col, Len)) for expr in traversal([e.value for e in ir.exprs])
     ):
         # Special Case: Selection does not depend on any columns.
-        new_node = ir.reconstruct([input_ir := Empty()])
+        new_node = ir.reconstruct([input_ir := Empty({})])
         partition_info[input_ir] = partition_info[new_node] = PartitionInfo(count=1)
         return new_node, partition_info
 
diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py
index 256e0a6267f..80e5eed574b 100644
--- a/python/cudf_polars/cudf_polars/testing/asserts.py
+++ b/python/cudf_polars/cudf_polars/testing/asserts.py
@@ -14,6 +14,7 @@
 
 from cudf_polars.dsl.translate import Translator
 from cudf_polars.utils.config import ConfigOptions, StreamingFallbackMode
+from cudf_polars.utils.versions import POLARS_VERSION_LT_1323
 
 if TYPE_CHECKING:
     from cudf_polars.typing import OptimizationArgs
@@ -112,16 +113,26 @@ def assert_gpu_result_equal(
     # the 'misc' is for 'error: Keywords must be strings'
     expect = lazydf.collect(**final_polars_collect_kwargs)  # type: ignore[call-overload,misc]
     got = lazydf.collect(**final_cudf_collect_kwargs, engine=engine)  # type: ignore[call-overload,misc]
+
+    assert_kwargs_bool: dict[str, bool] = {
+        "check_row_order": check_row_order,
+        "check_column_order": check_column_order,
+        "check_dtypes": check_dtypes,
+        "check_exact": check_exact,
+        "categorical_as_str": categorical_as_str,
+    }
+
+    tol_kwargs: dict[str, float]
+    if POLARS_VERSION_LT_1323:  # pragma: no cover
+        tol_kwargs = {"rtol": rtol, "atol": atol}
+    else:
+        tol_kwargs = {"rel_tol": rtol, "abs_tol": atol}
+
     assert_frame_equal(
         expect,
         got,
-        check_row_order=check_row_order,
-        check_column_order=check_column_order,
-        check_dtypes=check_dtypes,
-        check_exact=check_exact,
-        rtol=rtol,
-        atol=atol,
-        categorical_as_str=categorical_as_str,
+        **assert_kwargs_bool,
+        **tol_kwargs,
     )
 
 
@@ -246,10 +257,10 @@ def assert_collect_raises(
         Useful for controlling optimization settings.
     polars_except
         Exception or exceptions polars CPU is expected to raise. If
-        None, CPU is not expected to raise an exception.
+        an empty tuple ``()``, CPU is expected to succeed without raising.
     cudf_except
         Exception or exceptions polars GPU is expected to raise. If
-        None, GPU is not expected to raise an exception.
+        an empty tuple ``()``, GPU is expected to succeed without raising.
     collect_kwargs
         Common keyword arguments to pass to collect for both polars CPU and
         cudf-polars.
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 8491b2fa400..14956d2cfbc 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -54,6 +54,9 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning",
     "tests/unit/io/test_delta.py::test_scan_delta_relative": "Need to expose hive partitioning",
     "tests/unit/io/test_delta.py::test_read_delta_version": "Need to expose hive partitioning",
+    "tests/unit/io/test_delta.py::test_scan_delta_schema_evolution_nested_struct_field_19915": "Need to expose hive partitioning",
+    "tests/unit/io/test_delta.py::test_scan_delta_nanosecond_timestamp": "polars generates the wrong schema: https://github.com/pola-rs/polars/issues/23949",
+    "tests/unit/io/test_delta.py::test_scan_delta_nanosecond_timestamp_nested": "polars generates the wrong schema: https://github.com/pola-rs/polars/issues/23949",
     "tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
     "tests/unit/io/test_lazy_count_star.py::test_count_parquet[small.parquet-4]": "Debug output on stderr doesn't match",
     "tests/unit/io/test_lazy_count_star.py::test_count_parquet[foods*.parquet-54]": "Debug output on stderr doesn't match",
@@ -64,7 +67,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_partition.py::test_partition_to_memory[io_type1]": "partition sinks not yet supported in standard engine.",
     "tests/unit/io/test_partition.py::test_partition_to_memory[io_type2]": "partition sinks not yet supported in standard engine.",
     "tests/unit/io/test_partition.py::test_partition_to_memory[io_type3]": "partition sinks not yet supported in standard engine.",
-    "tests/unit/io/test_partition.py::test_partition_key_order_22645": "partition sinks not yet supported in standard engine.",
     "tests/unit/io/test_partition.py::test_partition_to_memory_finish_callback[io_type1]": "partition sinks not yet supported in standard engine.",
     "tests/unit/io/test_partition.py::test_partition_to_memory_finish_callback[io_type2]": "partition sinks not yet supported in standard engine.",
     "tests/unit/io/test_partition.py::test_partition_to_memory_finish_callback[io_type3]": "partition sinks not yet supported in standard engine.",
@@ -115,11 +117,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_parquet_field_overwrites.py::test_required_list[dtype0]": "cannot serialize in-memory sink target.",
     "tests/unit/io/test_parquet_field_overwrites.py::test_required_list[dtype1]": "cannot serialize in-memory sink target.",
     "tests/unit/io/test_parquet_field_overwrites.py::test_required_struct": "cannot serialize in-memory sink target.",
-    "tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
-    "tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
-    "tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
-    "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
-    "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
     "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
     "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
     "tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
@@ -143,9 +140,11 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input16-expected16-input_dtype16-output_dtype16]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_binary_agg_with_literal": "Incorrect broadcasting of literals in groupby-agg",
     "tests/unit/operations/test_group_by.py::test_group_by_lit_series": "Incorrect broadcasting of literals in groupby-agg",
+    "tests/unit/operations/test_group_by.py::test_group_by_series_lit_22103[False]": "Incorrect broadcasting of literals in groupby-agg",
+    "tests/unit/operations/test_group_by.py::test_group_by_series_lit_22103[True]": "Incorrect broadcasting of literals in groupby-agg",
     "tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins",
+    "tests/unit/operations/test_join.py::test_join_filter_pushdown_iejoin": "Row order differs due to multiple matches per left row index; join results are correct but unsorted",
     "tests/unit/operations/namespaces/string/test_pad.py::test_str_zfill_unicode_not_respected": "polars doesn't add zeros for unicode characters.",
-    "tests/unit/operations/test_rolling.py::test_rolling_group_by_empty_groups_by_take_6330": "Ordering difference, might be polars bug",
     "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values2-values::int1-conversion from `i64` to `i8` failed]": "Casting that raises not supported on GPU",
@@ -156,10 +155,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/test_cse.py::test_nested_cache_no_panic_16553": "Needs https://github.com/rapidsai/cudf/issues/18630",
     "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised",
     "tests/unit/test_predicates.py::test_predicate_pushdown_split_pushable": "Casting that raises not supported on GPU",
-    "tests/unit/io/test_scan.py::test_async_read_21945[scan_type0]": "Debug output on stderr doesn't match",
-    "tests/unit/io/test_scan.py::test_async_read_21945[scan_type1]": "Debug output on stderr doesn't match",
-    "tests/unit/io/test_scan.py::test_async_read_21945[scan_type2]": "Debug output on stderr doesn't match",
-    "tests/unit/io/test_scan.py::test_async_read_21945[scan_type3]": "Debug output on stderr doesn't match",
     "tests/unit/io/test_scan_row_deletion.py::test_scan_row_deletion_skips_file_with_all_rows_deleted": "The test intentionally corrupts the parquet file, so we cannot read the row count from the header.",
     "tests/unit/io/test_multiscan.py::test_multiscan_row_index[scan_csv-write_csv-csv]": "Debug output on stderr doesn't match",
     "tests/unit/functions/range/test_linear_space.py::test_linear_space_date": "Needs https://github.com/pola-rs/polars/issues/23020",
@@ -196,6 +191,7 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/lazyframe/test_serde.py::test_lf_serde_roundtrip_binary": "chrono_tz doesn't have all tzdata symlink names",
     # Tests performance difference of CPU engine
     "tests/unit/operations/test_join.py::test_join_where_eager_perf_21145": "Tests performance bug in CPU engine",
+    "tests/unit/operations/namespaces/list/test_list.py::test_list_struct_field_perf": "Tests CPU Engine perf",
     # The test may segfault with the legacy streaming engine. We should
     # remove this skip when all polars tests use the new streaming engine.
     "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine",
diff --git a/python/cudf_polars/cudf_polars/utils/versions.py b/python/cudf_polars/cudf_polars/utils/versions.py
index 82ba779b234..d8695cdb851 100644
--- a/python/cudf_polars/cudf_polars/utils/versions.py
+++ b/python/cudf_polars/cudf_polars/utils/versions.py
@@ -11,11 +11,13 @@
 from polars import __version__
 
 POLARS_VERSION = parse(__version__)
-
 POLARS_LOWER_BOUND = parse("1.28")
 POLARS_VERSION_LT_129 = POLARS_VERSION < parse("1.29")
 POLARS_VERSION_LT_130 = POLARS_VERSION < parse("1.30")
 POLARS_VERSION_LT_131 = POLARS_VERSION < parse("1.31")
+POLARS_VERSION_LT_132 = POLARS_VERSION < parse("1.32")
+POLARS_VERSION_LT_1321 = POLARS_VERSION < parse("1.32.1")
+POLARS_VERSION_LT_1323 = POLARS_VERSION < parse("1.32.3")
 
 
 def _ensure_polars_version() -> None:
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 5fadd6b8656..d50c2d08b24 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -21,7 +21,7 @@ requires-python = ">=3.10"
 dependencies = [
     "nvidia-ml-py",
     "packaging",
-    "polars>=1.28,<1.32",
+    "polars>=1.28,<1.33",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "typing-extensions; python_version < '3.11'",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cudf_polars/tests/dsl/test_serialization.py b/python/cudf_polars/tests/dsl/test_serialization.py
index 3e6a75e45ba..b4b89992e67 100644
--- a/python/cudf_polars/tests/dsl/test_serialization.py
+++ b/python/cudf_polars/tests/dsl/test_serialization.py
@@ -12,7 +12,11 @@
 from cudf_polars.dsl.expressions.boolean import BooleanFunction
 from cudf_polars.dsl.expressions.datetime import TemporalFunction
 from cudf_polars.dsl.expressions.string import StringFunction
-from cudf_polars.utils.versions import POLARS_VERSION_LT_131
+from cudf_polars.utils.versions import (
+    POLARS_VERSION_LT_131,
+    POLARS_VERSION_LT_132,
+    POLARS_VERSION_LT_1321,
+)
 
 if not POLARS_VERSION_LT_131:
     from cudf_polars.dsl.expressions.struct import StructFunction
@@ -46,8 +50,31 @@ def test_from_polars_all_names(function):
     polars_function = getattr(pl_expr, function.__name__)
     polars_names = [name for name in dir(polars_function) if not name.startswith("_")]
     # Check names advertised by polars are the same as we advertise
-    assert set(polars_names) == set(function.Name.__members__)
-    for name in function.Name:
+    polars_names_set = set(polars_names)
+    cudf_polars_names_set = set(function.Name.__members__)
+    if not POLARS_VERSION_LT_132 and function == StructFunction:
+        cudf_polars_names_set = cudf_polars_names_set - {
+            "FieldByIndex",
+            "MultipleFields",
+        }
+    if POLARS_VERSION_LT_1321 and function == TemporalFunction:
+        cudf_polars_names_set = cudf_polars_names_set - {
+            "DaysInMonth",
+        }
+    if POLARS_VERSION_LT_132 and function == BooleanFunction:
+        cudf_polars_names_set = cudf_polars_names_set - {"IsClose"}
+    assert polars_names_set == cudf_polars_names_set
+    names = function.Name
+    if not POLARS_VERSION_LT_132 and function == StructFunction:
+        names = set(names) - {
+            StructFunction.Name.FieldByIndex,
+            StructFunction.Name.MultipleFields,
+        }
+    if POLARS_VERSION_LT_1321 and function == TemporalFunction:
+        names = set(names) - {TemporalFunction.Name.DaysInMonth}
+    if POLARS_VERSION_LT_132 and function == BooleanFunction:
+        names = set(names) - {BooleanFunction.Name.IsClose}
+    for name in names:
         attr = getattr(polars_function, name.name)
         assert function.Name.from_polars(attr) == name
 
diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py
index 200b9571c7f..236a35935b8 100644
--- a/python/cudf_polars/tests/expressions/test_booleanfunction.py
+++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py
@@ -12,7 +12,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
-from cudf_polars.utils.versions import POLARS_VERSION_LT_130
+from cudf_polars.utils.versions import POLARS_VERSION_LT_130, POLARS_VERSION_LT_132
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -242,3 +242,15 @@ def test_expr_is_in_empty_list():
     ldf = pl.LazyFrame({"a": [1, 2, 3, 4]})
     q = ldf.select(pl.col("a").is_in([]))
     assert_gpu_result_equal(q)
+
+
+def test_boolean_is_close(request):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=POLARS_VERSION_LT_132, reason="Not supported until polars 1.32"
+        )
+    )
+    ldf = pl.LazyFrame({"a": [1.0, 1.2, 1.4, 1.45, 1.6]})
+    q = ldf.select(pl.col("a").is_close(1.4, abs_tol=0.1))
+
+    assert_ir_translation_raises(q, NotImplementedError)
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 3f1874df702..21fa5779757 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -18,6 +18,7 @@
     POLARS_VERSION_LT_129,
     POLARS_VERSION_LT_130,
     POLARS_VERSION_LT_131,
+    POLARS_VERSION_LT_132,
 )
 
 
@@ -440,7 +441,7 @@ def test_invalid_regex_raises():
     )
 
 
-@pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)"])
+@pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)", ""])
 def test_unsupported_regex_raises(pattern):
     df = pl.LazyFrame({"a": ["abc"]})
 
@@ -530,10 +531,15 @@ def test_string_zfill(fill, input_strings):
     q = ldf.select(pl.col("a").str.zfill(fill))
 
     if fill is not None and fill < 0:
+        cudf_except = (
+            pl.exceptions.InvalidOperationError
+            if not POLARS_VERSION_LT_132
+            else pl.exceptions.ComputeError
+        )
         assert_collect_raises(
             q,
             polars_except=pl.exceptions.InvalidOperationError,
-            cudf_except=pl.exceptions.ComputeError,
+            cudf_except=cudf_except,
         )
     else:
         assert_gpu_result_equal(q)
@@ -581,12 +587,19 @@ def test_string_zfill_column(fill):
     ).lazy()
     q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill")))
     if fill is not None and fill < 0:
+        cudf_except = (
+            (
+                pl.exceptions.InvalidOperationError
+                if not POLARS_VERSION_LT_130
+                else pl.exceptions.ComputeError
+            )
+            if POLARS_VERSION_LT_132
+            else ()
+        )
         assert_collect_raises(
             q,
             polars_except=pl.exceptions.InvalidOperationError,
-            cudf_except=pl.exceptions.InvalidOperationError
-            if not POLARS_VERSION_LT_130
-            else pl.exceptions.ComputeError,
+            cudf_except=cudf_except,
         )
     else:
         assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_cache.py b/python/cudf_polars/tests/test_cache.py
index 90c14911287..e242cd52cb2 100644
--- a/python/cudf_polars/tests/test_cache.py
+++ b/python/cudf_polars/tests/test_cache.py
@@ -2,15 +2,24 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import pytest
+
 import polars as pl
 
 from cudf_polars import Translator
 from cudf_polars.dsl import ir
 from cudf_polars.dsl.traversal import traversal
 from cudf_polars.testing.asserts import assert_gpu_result_equal
+from cudf_polars.utils.versions import POLARS_VERSION_LT_1323
 
 
-def test_cache():
+def test_cache(request):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=not POLARS_VERSION_LT_1323,
+            reason="python no longer manages cache hits",
+        )
+    )
     df1 = pl.LazyFrame(
         {
             "a": [1, 2, 3, 4, 5, 6, 7],
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index b47cc2962db..38a1cee6ab7 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -14,6 +14,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
+from cudf_polars.utils.versions import POLARS_VERSION_LT_1321
 
 
 @pytest.fixture
@@ -213,7 +214,13 @@ def test_groupby_nan_minmax_raises(op):
             pl.lit([[4, 5, 6]]).alias("value"),
             marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"),
         ),
-        pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
+        pytest.param(
+            pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
+            marks=pytest.mark.xfail(
+                condition=not POLARS_VERSION_LT_1321,
+                reason="https://github.com/rapidsai/cudf/issues/19610",
+            ),
+        ),
         pl.col("float") * (1 - pl.col("int")),
         [pl.lit(2).alias("value"), pl.col("float") * 2],
     ],
diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py
index f7d67341bcb..0670e6c5643 100644
--- a/python/cudf_polars/tests/test_join.py
+++ b/python/cudf_polars/tests/test_join.py
@@ -11,6 +11,7 @@
     assert_ir_translation_raises,
     get_default_engine,
 )
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 
 
 @pytest.fixture(params=[False, True], ids=["nulls_not_equal", "nulls_equal"])
@@ -157,3 +158,17 @@ def test_join_where(left, right, conditions, zlice):
         # therefore we only check the length
 
         assert_gpu_result_equal(q_len)
+
+
+def test_cross_join_empty_right_table(request):
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="nested loop join")
+    )
+    a = pl.LazyFrame({"a": [1, 2, 3], "x": [7, 2, 1]})
+    b = pl.LazyFrame({"b": [2, 2, 2], "x": [7, 1, 3]})
+
+    q = a.join(b, how="cross").filter(
+        (pl.col("a") == pl.col("a")) & (pl.col("b") < pl.col("b"))
+    )
+
+    assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index 84641be497f..8481105baad 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -467,7 +467,14 @@ def test_scan_with_row_index(tmp_path: Path) -> None:
     df.write_csv(tmp_path / "test-0.csv")
     df.write_csv(tmp_path / "test-1.csv")
 
-    result = pl.scan_csv(
-        tmp_path / "test-*.csv", row_index_name="index", row_index_offset=0
-    )
-    assert_gpu_result_equal(result)
+    q = pl.scan_csv(tmp_path / "test-*.csv", row_index_name="index", row_index_offset=0)
+    assert_gpu_result_equal(q)
+
+
+def test_scan_from_file_uri(tmp_path: Path) -> None:
+    tmp_path.mkdir(exist_ok=True)
+    path = tmp_path / "out.parquet"
+    df = pl.DataFrame({"a": 1})
+    df.write_parquet(path)
+    q = pl.scan_parquet(f"file://{path}")
+    assert_ir_translation_raises(q, NotImplementedError)

From 3c36493bbdb39a62c3075fd343450cac526fc55e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 19 Aug 2025 07:11:28 -0700
Subject: [PATCH 157/366] Fix filter call in benchmark (#19732)

https://github.com/rapidsai/cudf/pull/19502/ modified the API for `cudf::filter` but did not update the `minmax_filter` benchmark accordingly. I'm not sure how CI passed on that PR since I thought we were building benchmarks in CI. Probably some race condition I'm not thinking of at the moment.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Basit Ayantunde (https://github.com/lamarrr)

URL: https://github.com/rapidsai/cudf/pull/19732
---
 cpp/benchmarks/filter/minmax_filter.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cpp/benchmarks/filter/minmax_filter.cpp b/cpp/benchmarks/filter/minmax_filter.cpp
index 069bfe67107..af3e02b7eb0 100644
--- a/cpp/benchmarks/filter/minmax_filter.cpp
+++ b/cpp/benchmarks/filter/minmax_filter.cpp
@@ -147,8 +147,14 @@ static void BM_filter_min_max(nvbench::state& state)
           cudf::apply_boolean_mask(input_table, filter_boolean->view(), stream, mr);
       } break;
       case engine_type::JIT: {
-        auto result = cudf::filter(
-          filter_inputs, udf, false, std::nullopt, std::vector{true, false, false}, stream, mr);
+        auto result = cudf::filter(filter_inputs,
+                                   udf,
+                                   false,
+                                   std::nullopt,
+                                   std::vector{true, false, false},
+                                   cudf::null_aware::NO,
+                                   stream,
+                                   mr);
       } break;
       default: CUDF_UNREACHABLE("Unrecognised engine type requested");
     }

From d3f2d4021e948723216df0266ac8e1a456cc3842 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Tue, 19 Aug 2025 09:22:57 -0700
Subject: [PATCH 158/366] Add reduction with overflow detection (#19641)

Contributes to #19243

This PR adds overflow detection to `reduce`, enabling the `SUM_WITH_OVERFLOW` aggregation kind. It returns a struct scalar containing the sum result and a boolean flag indicating whether an overflow occurred. Currently, only `INT64` is supported.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19641
---
 cpp/CMakeLists.txt                            |   1 +
 .../cudf/detail/aggregation/aggregation.hpp   |   4 +-
 cpp/include/cudf/reduction.hpp                |  22 +-
 .../reduction/detail/reduction_functions.hpp  |  21 ++
 cpp/src/aggregation/aggregation.cpp           |   4 +
 cpp/src/reductions/reductions.cpp             |  27 +-
 cpp/src/reductions/sum_with_overflow.cu       | 188 +++++++++++++
 cpp/tests/reductions/reduction_tests.cpp      | 249 ++++++++++++++++++
 8 files changed, 502 insertions(+), 14 deletions(-)
 create mode 100644 cpp/src/reductions/sum_with_overflow.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 6b9601856c0..1598d7fb51b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -662,6 +662,7 @@ add_library(
   src/reductions/std.cu
   src/reductions/sum.cu
   src/reductions/sum_of_squares.cu
+  src/reductions/sum_with_overflow.cu
   src/reductions/var.cu
   src/replace/clamp.cu
   src/replace/nans.cu
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 81084f8bdfb..32f4d8c572e 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -185,7 +185,9 @@ class sum_aggregation final : public rolling_aggregation,
  * @brief Derived class for specifying a sum_with_overflow aggregation
  */
 class sum_with_overflow_aggregation final : public groupby_aggregation,
-                                            public groupby_scan_aggregation {
+                                            public groupby_scan_aggregation,
+                                            public reduce_aggregation,
+                                            public segmented_reduce_aggregation {
  public:
   sum_with_overflow_aggregation() : aggregation(SUM_WITH_OVERFLOW) {}
 
diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp
index e4150f4153a..216bf5b9734 100644
--- a/cpp/include/cudf/reduction.hpp
+++ b/cpp/include/cudf/reduction.hpp
@@ -39,10 +39,13 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
 /**
  * @brief  Computes the reduction of the values in all rows of a column.
  *
- * This function does not detect overflows in reductions. When `output_type`
- * does not match the `col.type()`, their values may be promoted to
- * `int64_t` or `double` for computing aggregations and then cast to
- * `output_type` before returning.
+ * This function does not detect overflows in reductions except for the `SUM_WITH_OVERFLOW`
+ * aggregation. When `output_type` does not match the `col.type()`, their values may be promoted to
+ * `int64_t` or `double` for computing aggregations and then cast to `output_type` before returning.
+ *
+ * The `SUM_WITH_OVERFLOW` aggregation is a special case that detects integer
+ * overflow during summation of `int64_t` values and returns a struct containing
+ * both the sum result and an overflow flag.
  *
  * Only `min` and `max` ops are supported for reduction of non-arithmetic
  * types (e.g. timestamp or string).
@@ -61,6 +64,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
  * | Aggregation | Output Type | Init Value | Empty Input | Comments |
  * | :---------: | ----------- | :--------: | ----------- | -------- |
  * | SUM/PRODUCT | output_type | yes | NA | Input accumulated into output_type variable |
+ * | SUM_WITH_OVERFLOW | STRUCT{INT64,BOOL8} | yes | {null,false} | {sum, overflow_flag}, input must be INT64 |
  * | SUM_OF_SQUARES | output_type | no | NA | Input accumulated into output_type variable |
  * | MIN/MAX | col.type | yes | NA | Supports arithmetic, timestamp, duration, string types only |
  * | ANY/ALL | BOOL8 | yes | True for ALL only | Checks for non-zero elements |
@@ -84,6 +88,8 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
  * @throw std::invalid_argument if `any` or `all` reduction is called and the output type is not BOOL8.
  * @throw std::invalid_argument if `mean`, `var`, or `std` reduction is called and
  * the `output_type` is not floating point.
+ * @throw std::invalid_argument if `sum_with_overflow` reduction is called and the
+ * input column type is not `INT64` or the `output_dtype` is not `STRUCT`.
  *
  * @param col Input column view
  * @param agg Aggregation operator applied by the reduction
@@ -103,13 +109,15 @@ std::unique_ptr<scalar> reduce(
 /**
  * @brief  Computes the reduction of the values in all rows of a column with an initial value
  *
- * Only `sum`, `product`, `min`, `max`, `any`, and `all` reductions are supported.
+ * Only `sum`, `product`, `min`, `max`, `any`, `all`, and `sum_with_overflow` reductions are
+ * supported. For `sum_with_overflow`, the initial value is added to the sum and overflow
+ * detection is performed throughout the entire computation.
  *
  * @see cudf::reduce(column_view const&,reduce_aggregation
  * const&,data_type,rmm::cuda_stream_view,rmm::device_async_resource_ref) for more details
  *
- * @throw std::invalid_argument if reduction is not `sum`, `product`, `min`, `max`, `any`, or `all`
- * and `init` is specified.
+ * @throw std::invalid_argument if reduction is not `sum`, `product`, `min`, `max`, `any`, `all`,
+ * or `sum_with_overflow` and `init` is specified.
  *
  * @param col Input column view
  * @param agg Aggregation operator applied by the reduction
diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
index 910e3b9c2e3..d7c49acca32 100644
--- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
@@ -51,6 +51,27 @@ std::unique_ptr<scalar> sum(column_view const& col,
                             rmm::cuda_stream_view stream,
                             rmm::device_async_resource_ref mr);
 
+/**
+ * @brief Computes sum with overflow detection of int64_t elements in input column
+ *
+ * Returns a struct scalar with {sum: int64_t, overflow: bool} fields.
+ * Only supports int64_t input columns.
+ *
+ * @throw std::invalid_argument if input column type is not int64_t
+ *
+ * @param col input column to compute sum with overflow detection (must be int64_t)
+ * @param output_type data type of return type (must be struct)
+ * @param init initial value of the sum
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Struct scalar with sum and overflow flag
+ */
+std::unique_ptr<scalar> sum_with_overflow(column_view const& col,
+                                          data_type const output_type,
+                                          std::optional<std::reference_wrapper<scalar const>> init,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::device_async_resource_ref mr);
+
 /**
  * @brief Computes minimum of elements in input column
  *
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index cb3a2b80ae4..39355983ed4 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -480,6 +480,10 @@ template CUDF_EXPORT std::unique_ptr<groupby_aggregation>
 make_sum_with_overflow_aggregation<groupby_aggregation>();
 template CUDF_EXPORT std::unique_ptr<groupby_scan_aggregation>
 make_sum_with_overflow_aggregation<groupby_scan_aggregation>();
+template CUDF_EXPORT std::unique_ptr<reduce_aggregation>
+make_sum_with_overflow_aggregation<reduce_aggregation>();
+template CUDF_EXPORT std::unique_ptr<segmented_reduce_aggregation>
+make_sum_with_overflow_aggregation<segmented_reduce_aggregation>();
 
 /// Factory to create a PRODUCT aggregation
 template <typename Base>
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 4c75cc312ba..2c1c582091b 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -16,6 +16,8 @@
 
 #include <cudf/aggregation/host_udf.hpp>
 #include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
@@ -49,6 +51,13 @@ std::unique_ptr<scalar> reduce_aggregate_impl(
 {
   switch (agg.kind) {
     case aggregation::SUM: return sum(col, output_dtype, init, stream, mr);
+    case aggregation::SUM_WITH_OVERFLOW: {
+      // Validate that input column is int64_t (unified validation for SUM_WITH_OVERFLOW)
+      CUDF_EXPECTS(col.type().id() == cudf::type_id::INT64,
+                   "SUM_WITH_OVERFLOW aggregation only supports int64_t input types",
+                   std::invalid_argument);
+      return sum_with_overflow(col, output_dtype, init, stream, mr);
+    }
     case aggregation::PRODUCT: return product(col, output_dtype, init, stream, mr);
     case aggregation::MIN: return min(col, output_dtype, init, stream, mr);
     case aggregation::MAX: return max(col, output_dtype, init, stream, mr);
@@ -173,6 +182,10 @@ std::unique_ptr<scalar> reduce_no_data_impl(reduce_aggregation const& agg,
       auto valid = !col.is_empty() && (nunique_agg._null_handling == cudf::null_policy::INCLUDE);
       return std::make_unique<numeric_scalar<size_type>>(!col.is_empty(), valid, stream, mr);
     }
+    case aggregation::SUM_WITH_OVERFLOW: {
+      // For empty input, return {null, false} struct
+      return sum_with_overflow(col, output_dtype, std::nullopt, stream, mr);
+    }
     default: {
       return cudf::is_nested(output_dtype)
                ? make_empty_scalar_like(col, stream, mr)
@@ -192,13 +205,14 @@ std::unique_ptr<scalar> reduce(column_view const& col,
   CUDF_EXPECTS(!init.has_value() || cudf::have_same_types(col, init.value().get()),
                "column and initial value must be the same type",
                cudf::data_type_error);
-  if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT ||
-                            agg.kind == aggregation::MIN || agg.kind == aggregation::MAX ||
-                            agg.kind == aggregation::ANY || agg.kind == aggregation::ALL ||
-                            agg.kind == aggregation::HOST_UDF)) {
+  if (init.has_value() &&
+      !(agg.kind == aggregation::SUM || agg.kind == aggregation::SUM_WITH_OVERFLOW ||
+        agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN ||
+        agg.kind == aggregation::MAX || agg.kind == aggregation::ANY ||
+        agg.kind == aggregation::ALL || agg.kind == aggregation::HOST_UDF)) {
     CUDF_FAIL(
-      "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, ALL, and HOST_UDF "
-      "aggregation types",
+      "Initial value is only supported for SUM, SUM_WITH_OVERFLOW, PRODUCT, MIN, MAX, ANY, ALL, "
+      "and HOST_UDF aggregation types",
       std::invalid_argument);
   }
 
@@ -230,4 +244,5 @@ std::unique_ptr<scalar> reduce(column_view const& col,
   CUDF_FUNC_RANGE();
   return reduction::detail::reduce(col, agg, output_dtype, init, stream, mr);
 }
+
 }  // namespace cudf
diff --git a/cpp/src/reductions/sum_with_overflow.cu b/cpp/src/reductions/sum_with_overflow.cu
new file mode 100644
index 00000000000..5f98fc28443
--- /dev/null
+++ b/cpp/src/reductions/sum_with_overflow.cu
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/reduction/detail/reduction_functions.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <cuda/std/limits>
+#include <thrust/functional.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/transform_reduce.h>
+
+namespace cudf::reduction::detail {
+
+// Simple pair to hold sum and overflow flag
+struct sum_overflow_result {
+  int64_t sum;
+  bool overflow;
+
+  CUDF_HOST_DEVICE sum_overflow_result() : sum(0), overflow(false) {}
+  CUDF_HOST_DEVICE sum_overflow_result(int64_t s, bool o) : sum(s), overflow(o) {}
+};
+
+// Binary operator for combining sum_overflow_result values
+struct overflow_sum_op {
+  __device__ sum_overflow_result operator()(sum_overflow_result const& lhs,
+                                            sum_overflow_result const& rhs) const
+  {
+    // If either operand already has overflow, result has overflow
+    if (lhs.overflow || rhs.overflow) {
+      // Still compute the sum for consistency, but mark as overflow
+      // This addition may wrap but we've already detected overflow
+      return sum_overflow_result{lhs.sum + rhs.sum, true};
+    }
+
+    // Check for overflow BEFORE performing the addition to avoid UB
+    bool overflow_detected = false;
+
+    // Check for positive overflow: would the addition exceed INT64_MAX?
+    if (rhs.sum > 0 && lhs.sum > cuda::std::numeric_limits<int64_t>::max() - rhs.sum) {
+      overflow_detected = true;
+    }
+    // Check for negative overflow: would the addition go below INT64_MIN?
+    else if (rhs.sum < 0 && lhs.sum < cuda::std::numeric_limits<int64_t>::min() - rhs.sum) {
+      overflow_detected = true;
+    }
+
+    // Perform the addition (safe if no overflow detected)
+    int64_t const result_sum = lhs.sum + rhs.sum;
+
+    return sum_overflow_result{result_sum, overflow_detected};
+  }
+};
+
+// Transform function to convert int64_t values to sum_overflow_result
+struct to_sum_overflow {
+  __device__ sum_overflow_result operator()(int64_t value) const
+  {
+    return sum_overflow_result{value, false};
+  }
+};
+
+// Transform functor for null-aware conversion using index
+struct null_aware_to_sum_overflow {
+  cudf::column_device_view const* dcol_ptr;
+
+  CUDF_HOST_DEVICE null_aware_to_sum_overflow(cudf::column_device_view const* dcol) : dcol_ptr(dcol)
+  {
+  }
+
+  __device__ sum_overflow_result operator()(cudf::size_type idx) const
+  {
+    return dcol_ptr->is_valid(idx) ? sum_overflow_result{dcol_ptr->element<int64_t>(idx), false}
+                                   : sum_overflow_result{0, false};
+  }
+};
+
+std::unique_ptr<cudf::scalar> sum_with_overflow(
+  column_view const& col,
+  cudf::data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+
+  // SUM_WITH_OVERFLOW only supports int64_t input
+  CUDF_EXPECTS(col.type().id() == cudf::type_id::INT64,
+               "SUM_WITH_OVERFLOW only supports int64_t input types",
+               std::invalid_argument);
+
+  // Handle empty column
+  if (col.size() == 0 || col.size() == col.null_count()) {
+    // Create struct with {null sum, false overflow}
+    auto sum_scalar =
+      cudf::make_default_constructed_scalar(cudf::data_type{cudf::type_id::INT64}, stream, mr);
+    sum_scalar->set_valid_async(false, stream);
+    auto overflow_scalar = cudf::make_fixed_width_scalar<bool>(false, stream, mr);
+
+    std::vector<std::unique_ptr<cudf::column>> children;
+    children.push_back(cudf::make_column_from_scalar(*sum_scalar, 1, stream, mr));
+    children.push_back(cudf::make_column_from_scalar(*overflow_scalar, 1, stream, mr));
+
+    // Use host_span of column_views instead of table_view to avoid double wrapping
+    std::vector<cudf::column_view> child_views;
+    child_views.push_back(children[0]->view());
+    child_views.push_back(children[1]->view());
+
+    return cudf::make_struct_scalar(
+      cudf::host_span<cudf::column_view const>{child_views}, stream, mr);
+  }
+
+  // Create device view
+  auto dcol = cudf::column_device_view::create(col, stream);
+
+  // Set up initial value
+  sum_overflow_result initial_value{0, false};
+  if (init.has_value() && init.value().get().is_valid(stream)) {
+    auto const& init_scalar = static_cast<cudf::numeric_scalar<int64_t> const&>(init.value().get());
+    initial_value.sum       = init_scalar.value(stream);
+  }
+
+  // Perform the reduction using thrust::transform_reduce
+  auto counting_iter = thrust::make_counting_iterator<cudf::size_type>(0);
+  auto dcol_ptr      = dcol.get();
+  sum_overflow_result result;
+
+  if (col.has_nulls()) {
+    // Use null-aware transform functor
+    result = thrust::transform_reduce(rmm::exec_policy_nosync(stream),
+                                      counting_iter,
+                                      counting_iter + col.size(),
+                                      null_aware_to_sum_overflow{dcol_ptr},
+                                      initial_value,
+                                      overflow_sum_op{});
+  } else {
+    // Use direct iterator for non-null case
+    auto input_iter = dcol->begin<int64_t>();
+    result          = thrust::transform_reduce(rmm::exec_policy_nosync(stream),
+                                      input_iter,
+                                      input_iter + col.size(),
+                                      to_sum_overflow{},
+                                      initial_value,
+                                      overflow_sum_op{});
+  }
+
+  // Create result struct scalar with {sum: int64_t, overflow: bool}
+  auto sum_scalar      = cudf::make_fixed_width_scalar<int64_t>(result.sum, stream, mr);
+  auto overflow_scalar = cudf::make_fixed_width_scalar<bool>(result.overflow, stream, mr);
+
+  // Create struct scalar using cudf::make_struct_scalar with host_span of column_views
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(cudf::make_column_from_scalar(*sum_scalar, 1, stream, mr));
+  children.push_back(cudf::make_column_from_scalar(*overflow_scalar, 1, stream, mr));
+
+  // Use host_span of column_views instead of table_view to avoid double wrapping
+  std::vector<cudf::column_view> child_views;
+  child_views.push_back(children[0]->view());
+  child_views.push_back(children[1]->view());
+
+  return cudf::make_struct_scalar(
+    cudf::host_span<cudf::column_view const>{child_views}, stream, mr);
+}
+
+}  // namespace cudf::reduction::detail
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index ff0806d6dfa..ec22328e84a 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -3173,4 +3173,253 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls)
   }
 }
 
+// Test for SUM_WITH_OVERFLOW aggregation using regular reduce() function
+struct ReduceWithOverflowTest : public cudf::test::BaseFixture {
+  // Helper function to extract sum and overflow from struct scalar returned by reduce()
+  std::pair<std::unique_ptr<cudf::scalar>, std::unique_ptr<cudf::scalar>> extract_sum_overflow(
+    std::unique_ptr<cudf::scalar> const& result)
+  {
+    EXPECT_TRUE(result->is_valid());
+    EXPECT_EQ(result->type().id(), cudf::type_id::STRUCT);
+
+    auto struct_scalar_ptr = static_cast<cudf::struct_scalar const*>(result.get());
+    auto table_view        = struct_scalar_ptr->view();
+
+    EXPECT_EQ(table_view.num_columns(), 2);
+    EXPECT_EQ(table_view.column(0).size(), 1);
+    EXPECT_EQ(table_view.column(1).size(), 1);
+
+    auto sum_result    = cudf::get_element(table_view.column(0), 0);
+    auto overflow_flag = cudf::get_element(table_view.column(1), 0);
+    return std::make_pair(std::move(sum_result), std::move(overflow_flag));
+  }
+};
+
+TEST_F(ReduceWithOverflowTest, SumWithoutOverflow)
+{
+  std::vector<int64_t> values{1, 2, 3, 4, 5};
+  cudf::test::fixed_width_column_wrapper<int64_t> col(values.begin(), values.end());
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT});
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto sum_value = static_cast<cudf::numeric_scalar<int64_t> const*>(sum_result.get())->value();
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  EXPECT_EQ(sum_value, 15);      // 1+2+3+4+5 = 15
+  EXPECT_FALSE(overflow_value);  // No overflow expected
+}
+
+TEST_F(ReduceWithOverflowTest, PositiveOverflow)
+{
+  std::vector<int64_t> positive_overflow_values{std::numeric_limits<int64_t>::max(),
+                                                1};  // max + 1 should overflow
+  cudf::test::fixed_width_column_wrapper<int64_t> col(positive_overflow_values.begin(),
+                                                      positive_overflow_values.end());
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT});
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  EXPECT_TRUE(overflow_value);  // Should detect positive overflow
+}
+
+TEST_F(ReduceWithOverflowTest, NegativeOverflow)
+{
+  std::vector<int64_t> negative_overflow_values{std::numeric_limits<int64_t>::min(),
+                                                -1};  // min - 1 should overflow
+  cudf::test::fixed_width_column_wrapper<int64_t> col(negative_overflow_values.begin(),
+                                                      negative_overflow_values.end());
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT});
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  EXPECT_TRUE(overflow_value);  // Should detect negative overflow
+}
+
+TEST_F(ReduceWithOverflowTest, AccumulatingOverflow)
+{
+  // Use large values that when accumulated could cause overflow
+  std::vector<int64_t> accumulating_overflow{
+    std::numeric_limits<int64_t>::max() / 3,
+    std::numeric_limits<int64_t>::max() / 3,
+    std::numeric_limits<int64_t>::max() / 3,
+    std::numeric_limits<int64_t>::max() / 3};  // This should overflow
+  cudf::test::fixed_width_column_wrapper<int64_t> col(accumulating_overflow.begin(),
+                                                      accumulating_overflow.end());
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT});
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  // Should detect overflow since we're adding 4 * (max/3) which > max
+  EXPECT_TRUE(overflow_value);  // Should detect accumulating overflow
+}
+
+TEST_F(ReduceWithOverflowTest, EmptyColumn)
+{
+  cudf::test::fixed_width_column_wrapper<int64_t> empty_col{};
+
+  auto result = cudf::reduce(empty_col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT});
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_FALSE(sum_result->is_valid());  // Should be null for empty input
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+  EXPECT_FALSE(overflow_value);  // No overflow for empty input
+}
+
+TEST_F(ReduceWithOverflowTest, AllNullColumn)
+{
+  std::vector<int64_t> values{1, 2, 3};
+  std::vector<bool> validity{false, false, false};
+  cudf::test::fixed_width_column_wrapper<int64_t> null_col(
+    values.begin(), values.end(), validity.begin());
+
+  auto result = cudf::reduce(null_col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT});
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_FALSE(sum_result->is_valid());  // Should be null for all-null input
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+  EXPECT_FALSE(overflow_value);  // No overflow for all-null input
+}
+
+TEST_F(ReduceWithOverflowTest, WithInitialValue)
+{
+  std::vector<int64_t> values{1, 2, 3};
+  cudf::test::fixed_width_column_wrapper<int64_t> col(values.begin(), values.end());
+  auto init_scalar = cudf::make_fixed_width_scalar<int64_t>(10);
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT},
+                             *init_scalar);
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto sum_value = static_cast<cudf::numeric_scalar<int64_t> const*>(sum_result.get())->value();
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  EXPECT_EQ(sum_value, 16);      // 10 + 1 + 2 + 3 = 16
+  EXPECT_FALSE(overflow_value);  // No overflow expected
+}
+
+TEST_F(ReduceWithOverflowTest, InitialValuePositiveOverflow)
+{
+  std::vector<int64_t> values{1, 2, 3};
+  cudf::test::fixed_width_column_wrapper<int64_t> col(values.begin(), values.end());
+  auto init_scalar = cudf::make_fixed_width_scalar<int64_t>(std::numeric_limits<int64_t>::max() -
+                                                            3);  // max - 3 + 6 = max + 3 (overflow)
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT},
+                             *init_scalar);
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  // (max - 3) + 1 + 2 + 3 = max + 3, which should overflow
+  EXPECT_TRUE(overflow_value);  // Should detect overflow with initial value
+}
+
+TEST_F(ReduceWithOverflowTest, InitialValueNegativeOverflow)
+{
+  std::vector<int64_t> values{-1, -2, -3};
+  cudf::test::fixed_width_column_wrapper<int64_t> col(values.begin(), values.end());
+  auto init_scalar = cudf::make_fixed_width_scalar<int64_t>(std::numeric_limits<int64_t>::min() +
+                                                            3);  // min + 3 - 6 = min - 3 (overflow)
+
+  auto result = cudf::reduce(col,
+                             *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                             cudf::data_type{cudf::type_id::STRUCT},
+                             *init_scalar);
+
+  auto [sum_result, overflow_flag] = extract_sum_overflow(result);
+
+  EXPECT_TRUE(sum_result->is_valid());
+  EXPECT_TRUE(overflow_flag->is_valid());
+
+  auto overflow_value =
+    static_cast<cudf::numeric_scalar<bool> const*>(overflow_flag.get())->value();
+
+  // (min + 3) + (-1) + (-2) + (-3) = min - 3, which should overflow
+  EXPECT_TRUE(overflow_value);  // Should detect negative overflow with initial value
+}
+
+TEST_F(ReduceWithOverflowTest, ErrorHandlingNonInt64)
+{
+  std::vector<int32_t> int32_values{1, 2, 3};
+  cudf::test::fixed_width_column_wrapper<int32_t> int32_col(int32_values.begin(),
+                                                            int32_values.end());
+
+  EXPECT_THROW(cudf::reduce(int32_col,
+                            *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                            cudf::data_type{cudf::type_id::STRUCT}),
+               std::invalid_argument);
+}
+
+TEST_F(ReduceWithOverflowTest, ErrorHandlingNonArithmetic)
+{
+  std::vector<std::string> string_values{"a", "b", "c"};
+  cudf::test::strings_column_wrapper string_col(string_values.begin(), string_values.end());
+
+  EXPECT_THROW(cudf::reduce(string_col,
+                            *cudf::make_sum_with_overflow_aggregation<reduce_aggregation>(),
+                            cudf::data_type{cudf::type_id::STRUCT}),
+               std::invalid_argument);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From c5b93f18fe07851a023d4573cf5b9b2bb26b73fb Mon Sep 17 00:00:00 2001
From: Avinash Raj <avistylein3105@gmail.com>
Date: Tue, 19 Aug 2025 22:23:33 +0530
Subject: [PATCH 159/366] Updated libcudf-example conda package to preserve
 directories structure (#19440)

Currently, the `libcudf-example` conda package installs all example files into a single directory, making it hard to identify which files belong to each example. This PR resolves the issue by organizing the files into respective subfolders.

closes #19360

Authors:
  - Avinash Raj (https://github.com/Avinash-Raj)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Shruti Shivakumar (https://github.com/shrshi)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19440
---
 ci/run_cudf_examples.sh                       | 10 ++++++++++
 cpp/examples/basic/CMakeLists.txt             |  4 ++--
 cpp/examples/billion_rows/CMakeLists.txt      |  6 +++---
 cpp/examples/nested_types/CMakeLists.txt      |  4 ++--
 cpp/examples/parquet_io/CMakeLists.txt        |  6 +++---
 cpp/examples/string_transforms/CMakeLists.txt | 18 ++++++++++--------
 cpp/examples/strings/CMakeLists.txt           | 10 +++++-----
 7 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/ci/run_cudf_examples.sh b/ci/run_cudf_examples.sh
index 24de63ef45f..cc202fa9d56 100755
--- a/ci/run_cudf_examples.sh
+++ b/ci/run_cudf_examples.sh
@@ -9,14 +9,21 @@ trap "EXITCODE=1" ERR
 # Support customizing the examples' install location
 cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/examples/libcudf/" || exit
 
+cd basic || exit
 compute-sanitizer --tool memcheck basic_example
+cd ..
 
+cd nested_types || exit
 compute-sanitizer --tool memcheck deduplication
+cd ..
 
+cd strings || exit
 compute-sanitizer --tool memcheck custom_optimized names.csv
 compute-sanitizer --tool memcheck custom_prealloc names.csv
 compute-sanitizer --tool memcheck custom_with_malloc names.csv
+cd ..
 
+cd string_transformers || exit
 compute-sanitizer --tool memcheck compute_checksum_jit info.csv output.csv
 compute-sanitizer --tool memcheck extract_email_jit info.csv output.csv
 compute-sanitizer --tool memcheck extract_email_precompiled info.csv output.csv
@@ -24,11 +31,14 @@ compute-sanitizer --tool memcheck format_phone_jit info.csv output.csv
 compute-sanitizer --tool memcheck format_phone_precompiled info.csv output.csv
 compute-sanitizer --tool memcheck localize_phone_jit info.csv output.csv
 compute-sanitizer --tool memcheck localize_phone_precompiled info.csv output.csv
+cd ..
 
+cd parquet_io || exit
 compute-sanitizer --tool memcheck parquet_io example.parquet
 compute-sanitizer --tool memcheck parquet_io example.parquet output.parquet DELTA_BINARY_PACKED ZSTD TRUE
 
 compute-sanitizer --tool memcheck parquet_io_multithreaded example.parquet
 compute-sanitizer --tool memcheck parquet_io_multithreaded example.parquet 4 DEVICE_BUFFER 2 2
+cd ..
 
 exit ${EXITCODE}
diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
index 447c8709297..6e9604ff15e 100644
--- a/cpp/examples/basic/CMakeLists.txt
+++ b/cpp/examples/basic/CMakeLists.txt
@@ -28,5 +28,5 @@ add_executable(basic_example src/process_csv.cpp)
 target_link_libraries(basic_example PRIVATE cudf::cudf)
 target_compile_features(basic_example PRIVATE cxx_std_20)
 
-install(TARGETS basic_example DESTINATION bin/examples/libcudf)
-install(FILES ${CMAKE_CURRENT_LIST_DIR}/4stock_5day.csv DESTINATION bin/examples/libcudf)
+install(TARGETS basic_example DESTINATION bin/examples/libcudf/basic)
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/4stock_5day.csv DESTINATION bin/examples/libcudf/basic)
diff --git a/cpp/examples/billion_rows/CMakeLists.txt b/cpp/examples/billion_rows/CMakeLists.txt
index 962ff79c537..ed83be6216a 100644
--- a/cpp/examples/billion_rows/CMakeLists.txt
+++ b/cpp/examples/billion_rows/CMakeLists.txt
@@ -35,7 +35,7 @@ target_link_libraries(
               $<TARGET_OBJECTS:groupby_results>
 )
 target_compile_features(brc PRIVATE cxx_std_20)
-install(TARGETS brc DESTINATION bin/examples/libcudf)
+install(TARGETS brc DESTINATION bin/examples/libcudf/billion_rows)
 
 add_executable(brc_chunks brc_chunks.cpp)
 target_link_libraries(
@@ -43,7 +43,7 @@ target_link_libraries(
                      $<TARGET_OBJECTS:groupby_results>
 )
 target_compile_features(brc_chunks PRIVATE cxx_std_20)
-install(TARGETS brc_chunks DESTINATION bin/examples/libcudf)
+install(TARGETS brc_chunks DESTINATION bin/examples/libcudf/billion_rows)
 
 add_executable(brc_pipeline brc_pipeline.cpp)
 target_link_libraries(
@@ -51,4 +51,4 @@ target_link_libraries(
                        $<TARGET_OBJECTS:groupby_results>
 )
 target_compile_features(brc_pipeline PRIVATE cxx_std_20)
-install(TARGETS brc_pipeline DESTINATION bin/examples/libcudf)
+install(TARGETS brc_pipeline DESTINATION bin/examples/libcudf/billion_rows)
diff --git a/cpp/examples/nested_types/CMakeLists.txt b/cpp/examples/nested_types/CMakeLists.txt
index e91a85d4ee0..6532585f496 100644
--- a/cpp/examples/nested_types/CMakeLists.txt
+++ b/cpp/examples/nested_types/CMakeLists.txt
@@ -28,5 +28,5 @@ add_executable(deduplication deduplication.cpp)
 target_link_libraries(deduplication PRIVATE cudf::cudf)
 target_compile_features(deduplication PRIVATE cxx_std_20)
 
-install(TARGETS deduplication DESTINATION bin/examples/libcudf)
-install(FILES ${CMAKE_CURRENT_LIST_DIR}/example.json DESTINATION bin/examples/libcudf)
+install(TARGETS deduplication DESTINATION bin/examples/libcudf/nested_types)
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/example.json DESTINATION bin/examples/libcudf/nested_types)
diff --git a/cpp/examples/parquet_io/CMakeLists.txt b/cpp/examples/parquet_io/CMakeLists.txt
index 4f520b2bdad..3c381d1b4c0 100644
--- a/cpp/examples/parquet_io/CMakeLists.txt
+++ b/cpp/examples/parquet_io/CMakeLists.txt
@@ -34,7 +34,7 @@ target_link_libraries(
                      $<TARGET_OBJECTS:parquet_io_utils>
 )
 target_compile_features(parquet_io PRIVATE cxx_std_20)
-install(TARGETS parquet_io DESTINATION bin/examples/libcudf)
+install(TARGETS parquet_io DESTINATION bin/examples/libcudf/parquet_io)
 
 # Build and install parquet_io_multithreaded
 add_executable(parquet_io_multithreaded parquet_io_multithreaded.cpp)
@@ -43,7 +43,7 @@ target_link_libraries(
                                    $<TARGET_OBJECTS:parquet_io_utils>
 )
 target_compile_features(parquet_io_multithreaded PRIVATE cxx_std_20)
-install(TARGETS parquet_io_multithreaded DESTINATION bin/examples/libcudf)
+install(TARGETS parquet_io_multithreaded DESTINATION bin/examples/libcudf/parquet_io)
 
 # Install the example.parquet file
-install(FILES ${CMAKE_CURRENT_LIST_DIR}/example.parquet DESTINATION bin/examples/libcudf)
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/example.parquet DESTINATION bin/examples/libcudf/parquet_io)
diff --git a/cpp/examples/string_transforms/CMakeLists.txt b/cpp/examples/string_transforms/CMakeLists.txt
index 90830eb2820..fb31c93ba21 100644
--- a/cpp/examples/string_transforms/CMakeLists.txt
+++ b/cpp/examples/string_transforms/CMakeLists.txt
@@ -33,7 +33,7 @@ target_compile_options(
 target_link_libraries(
   compute_checksum_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS compute_checksum_jit DESTINATION bin/examples/libcudf)
+install(TARGETS compute_checksum_jit DESTINATION bin/examples/libcudf/string_transformers)
 
 add_executable(extract_email_jit extract_email_jit.cpp)
 target_compile_features(extract_email_jit PRIVATE cxx_std_20)
@@ -41,7 +41,7 @@ target_compile_options(extract_email_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${C
 target_link_libraries(
   extract_email_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS extract_email_jit DESTINATION bin/examples/libcudf)
+install(TARGETS extract_email_jit DESTINATION bin/examples/libcudf/string_transformers)
 
 add_executable(extract_email_precompiled extract_email_precompiled.cpp)
 target_compile_features(extract_email_precompiled PRIVATE cxx_std_20)
@@ -51,13 +51,13 @@ target_compile_options(
 target_link_libraries(
   extract_email_precompiled PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS extract_email_precompiled DESTINATION bin/examples/libcudf)
+install(TARGETS extract_email_precompiled DESTINATION bin/examples/libcudf/string_transformers)
 
 add_executable(format_phone_jit format_phone_jit.cpp)
 target_compile_features(format_phone_jit PRIVATE cxx_std_20)
 target_compile_options(format_phone_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
 target_link_libraries(format_phone_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
-install(TARGETS format_phone_jit DESTINATION bin/examples/libcudf)
+install(TARGETS format_phone_jit DESTINATION bin/examples/libcudf/string_transformers)
 
 add_executable(format_phone_precompiled format_phone_precompiled.cpp)
 target_compile_features(format_phone_precompiled PRIVATE cxx_std_20)
@@ -67,7 +67,7 @@ target_compile_options(
 target_link_libraries(
   format_phone_precompiled PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS format_phone_precompiled DESTINATION bin/examples/libcudf)
+install(TARGETS format_phone_precompiled DESTINATION bin/examples/libcudf/string_transformers)
 
 add_executable(localize_phone_jit localize_phone_jit.cpp)
 target_compile_features(localize_phone_jit PRIVATE cxx_std_20)
@@ -75,7 +75,7 @@ target_compile_options(localize_phone_jit PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${
 target_link_libraries(
   localize_phone_jit PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS localize_phone_jit DESTINATION bin/examples/libcudf)
+install(TARGETS localize_phone_jit DESTINATION bin/examples/libcudf/string_transformers)
 
 add_executable(localize_phone_precompiled localize_phone_precompiled.cpp)
 target_compile_features(localize_phone_precompiled PRIVATE cxx_std_20)
@@ -85,6 +85,8 @@ target_compile_options(
 target_link_libraries(
   localize_phone_precompiled PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS localize_phone_precompiled DESTINATION bin/examples/libcudf)
+install(TARGETS localize_phone_precompiled DESTINATION bin/examples/libcudf/string_transformers)
 
-install(FILES ${CMAKE_CURRENT_LIST_DIR}/info.csv DESTINATION bin/examples/libcudf)
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/info.csv
+        DESTINATION bin/examples/libcudf/string_transformers
+)
diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt
index 4e890118dcb..1a7eb60d571 100644
--- a/cpp/examples/strings/CMakeLists.txt
+++ b/cpp/examples/strings/CMakeLists.txt
@@ -28,7 +28,7 @@ list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 add_executable(libcudf_apis libcudf_apis.cpp)
 target_compile_features(libcudf_apis PRIVATE cxx_std_20)
 target_link_libraries(libcudf_apis PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
-install(TARGETS libcudf_apis DESTINATION bin/examples/libcudf)
+install(TARGETS libcudf_apis DESTINATION bin/examples/libcudf/strings)
 
 add_executable(custom_with_malloc custom_with_malloc.cu)
 target_compile_features(custom_with_malloc PRIVATE cxx_std_20)
@@ -36,18 +36,18 @@ target_compile_options(custom_with_malloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${
 target_link_libraries(
   custom_with_malloc PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>
 )
-install(TARGETS custom_with_malloc DESTINATION bin/examples/libcudf)
+install(TARGETS custom_with_malloc DESTINATION bin/examples/libcudf/strings)
 
 add_executable(custom_prealloc custom_prealloc.cu)
 target_compile_features(custom_prealloc PRIVATE cxx_std_20)
 target_compile_options(custom_prealloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
 target_link_libraries(custom_prealloc PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
-install(TARGETS custom_prealloc DESTINATION bin/examples/libcudf)
+install(TARGETS custom_prealloc DESTINATION bin/examples/libcudf/strings)
 
 add_executable(custom_optimized custom_optimized.cu)
 target_compile_features(custom_optimized PRIVATE cxx_std_20)
 target_compile_options(custom_optimized PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
 target_link_libraries(custom_optimized PRIVATE cudf::cudf $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp>)
-install(TARGETS custom_optimized DESTINATION bin/examples/libcudf)
+install(TARGETS custom_optimized DESTINATION bin/examples/libcudf/strings)
 
-install(FILES ${CMAKE_CURRENT_LIST_DIR}/names.csv DESTINATION bin/examples/libcudf)
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/names.csv DESTINATION bin/examples/libcudf/strings)

From 0deee7b8a2d40a3b3f40dad218de27fb1726006f Mon Sep 17 00:00:00 2001
From: Paul Taylor <178183+trxcllnt@users.noreply.github.com>
Date: Tue, 19 Aug 2025 11:16:06 -0700
Subject: [PATCH 160/366] Use build cluster in devcontainers (#19652)

RAPIDS has deployed an autoscaling cloud build cluster that can be used to accelerate building large RAPIDS projects. This contributes to https://github.com/rapidsai/build-planning/issues/209.

Authors:
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19652
---
 .devcontainer/Dockerfile                      | 36 +++++++++++++++++--
 .../cuda12.9-conda/devcontainer.json          |  4 ++-
 .devcontainer/cuda12.9-pip/devcontainer.json  |  4 ++-
 .github/workflows/pr.yaml                     | 16 ++++++---
 4 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 315a389339a..b0f367e1f87 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -18,6 +18,8 @@ ENV DEFAULT_CONDA_ENV=rapids
 
 FROM ${PYTHON_PACKAGE_MANAGER}-base
 
+ARG TARGETARCH
+
 ARG CUDA
 ENV CUDAARCHS="RAPIDS"
 ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
@@ -29,8 +31,36 @@ ENV PYTHONSAFEPATH="1"
 ENV PYTHONUNBUFFERED="1"
 ENV PYTHONDONTWRITEBYTECODE="1"
 
-ENV SCCACHE_REGION="us-east-2"
-ENV SCCACHE_BUCKET="rapids-sccache-devs"
-ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
 ENV HISTFILE="/home/coder/.cache/._bash_history"
 ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
+
+###
+# sccache configuration
+###
+ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
+ENV SCCACHE_REGION="us-east-2"
+ENV SCCACHE_BUCKET="rapids-sccache-devs"
+# 2hr (1 minute longer than sccache-dist request timeout)
+ENV SCCACHE_IDLE_TIMEOUT=7200
+
+###
+# sccache-dist configuration
+###
+# Enable sccache-dist by default
+ENV DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST=1
+# Compile locally if max retries exceeded
+ENV SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=true
+# Retry transient errors 4 times (for a total of 5 attempts)
+ENV SCCACHE_DIST_MAX_RETRIES=4
+ENV SCCACHE_DIST_CONNECT_TIMEOUT=30
+ENV SCCACHE_DIST_CONNECTION_POOL=false
+# 1hr 59min (to accommodate debug builds)
+ENV SCCACHE_DIST_REQUEST_TIMEOUT=7140
+ENV SCCACHE_DIST_KEEPALIVE_ENABLED=true
+ENV SCCACHE_DIST_KEEPALIVE_INTERVAL=20
+ENV SCCACHE_DIST_KEEPALIVE_TIMEOUT=600
+ENV SCCACHE_DIST_URL="https://${TARGETARCH}.linux.sccache.rapids.nvidia.com"
+
+# Build as much in parallel as possible
+ENV INFER_NUM_DEVICE_ARCHITECTURES=1
+ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20
diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json
index 1ed542f11f3..9e5bc0306a3 100644
--- a/.devcontainer/cuda12.9-conda/devcontainer.json
+++ b/.devcontainer/cuda12.9-conda/devcontainer.json
@@ -11,7 +11,9 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda",
+    "--ulimit",
+    "nofile=500000"
   ],
   "hostRequirements": {
     "gpu": "optional"
diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json
index 3b35d4398c5..ea7d5a19515 100644
--- a/.devcontainer/cuda12.9-pip/devcontainer.json
+++ b/.devcontainer/cuda12.9-pip/devcontainer.json
@@ -11,7 +11,9 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip",
+    "--ulimit",
+    "nofile=500000"
   ],
   "hostRequirements": {
     "gpu": "optional"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 2592a50a05e..061a24e226b 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -316,13 +316,19 @@ jobs:
     needs: telemetry-setup
     uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10
     with:
-      node_type: "cpu32"
-      arch: '["amd64"]'
+      arch: '["amd64", "arm64"]'
       cuda: '["12.9"]'
+      node_type: "cpu8"
+      rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
+      env: |
+        SCCACHE_DIST_MAX_RETRIES=inf
+        SCCACHE_SERVER_LOG=sccache=debug
+        SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false
+        SCCACHE_DIST_AUTH_TOKEN_VAR=RAPIDS_AUX_SECRET_1
       build_command: |
-        sccache -z;
-        build-all -DBUILD_BENCHMARKS=ON --verbose;
-        sccache -s;
+        sccache --zero-stats;
+        build-all -j0 -DBUILD_BENCHMARKS=ON --verbose 2>&1 | tee telemetry-artifacts/build.log;
+        sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt;
   unit-tests-cudf-pandas:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit

From 0223c7a39b3decf5e21a7fd81f50ef7e31bd7842 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 19 Aug 2025 15:09:21 -0400
Subject: [PATCH 161/366] Fix `group_by().agg()` on non-aggregatable dtypes
 (#19669)

Closes #19664. The bug was caused by not checking if the dtype was actually summable. Now we do.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19669
---
 .../cudf_polars/dsl/utils/aggregations.py        | 15 +++++++++++++++
 python/cudf_polars/tests/test_groupby.py         | 16 ++++++++++++++++
 python/pylibcudf/pylibcudf/aggregation.pyx       |  1 +
 3 files changed, 32 insertions(+)

diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 29fd7155799..ebebc9bc361 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -136,6 +136,21 @@ def decompose_single_agg(
         )
         if any(has_agg for _, has_agg in aggs):
             raise NotImplementedError("Nested aggs in groupby not supported")
+
+        child_dtype = child.dtype.plc
+        req = agg.agg_request
+        is_median = agg.name == "median"
+        is_quantile = agg.name == "quantile"
+
+        is_group_quantile_supported = plc.traits.is_integral(
+            child_dtype
+        ) or plc.traits.is_floating_point(child_dtype)
+
+        unsupported = (
+            (is_median or is_quantile) and not is_group_quantile_supported
+        ) or (not plc.aggregation.is_valid_aggregation(child_dtype, req))
+        if unsupported:
+            return [], named_expr.reconstruct(expr.Literal(child.dtype, None))
         if needs_masking:
             child = expr.UnaryFunction(child.dtype, "mask_nans", (), child)
             # The aggregation is just reconstructed with the new
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index 38a1cee6ab7..36196522f34 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -330,3 +330,19 @@ def test_groupby_sum_all_null_group_returns_null():
 
     q = df.group_by("key").agg(out=pl.col("null_groups").sum())
     assert_gpu_result_equal(q, check_row_order=False)
+
+
+@pytest.mark.parametrize(
+    "agg_expr",
+    [
+        pl.all().sum(),
+        pl.all().mean(),
+        pl.all().median(),
+        pl.all().quantile(0.5),
+    ],
+    ids=["sum", "mean", "median", "quantile-0.5"],
+)
+def test_groupby_aggs_keep_unsupported_as_null(df: pl.LazyFrame, agg_expr) -> None:
+    lf = df.filter(pl.col("datetime") == date(2004, 12, 1))
+    q = lf.group_by("datetime").agg(agg_expr)
+    assert_gpu_result_equal(q)
diff --git a/python/pylibcudf/pylibcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/aggregation.pyx
index 9bef36e5c06..87a39193d38 100644
--- a/python/pylibcudf/pylibcudf/aggregation.pyx
+++ b/python/pylibcudf/pylibcudf/aggregation.pyx
@@ -102,6 +102,7 @@ __all__ = [
     "covariance",
     "ewma",
     "histogram",
+    "is_valid_aggregation",
     "lag",
     "lead",
     "m2",

From 38223a57e4cf0b49c36e25f75d811b9a5f6c7cbd Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 19 Aug 2025 13:05:54 -0700
Subject: [PATCH 162/366] Move test_{io}.py files to new cudf classic test
 directory (#19709)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19709
---
 python/cudf/cudf/io/orc.py                    |    7 +
 .../cudf/cudf/tests/input_output/test_csv.py  |   38 +
 .../cudf/cudf/tests/input_output/test_hdf5.py |  141 +-
 .../tests/{ => input_output}/test_hdfs.py     |    9 +-
 .../cudf/cudf/tests/input_output/test_orc.py  | 2023 +++++++-
 .../cudf/tests/input_output/test_parquet.py   | 4564 +++++++++++++++-
 .../cudf/tests/{ => input_output}/test_s3.py  |    0
 python/cudf/cudf/tests/test_gcs.py            |   69 -
 python/cudf/cudf/tests/test_hdf.py            |  142 -
 python/cudf/cudf/tests/test_orc.py            | 2054 --------
 python/cudf/cudf/tests/test_parquet.py        | 4601 -----------------
 11 files changed, 6773 insertions(+), 6875 deletions(-)
 rename python/cudf/cudf/tests/{ => input_output}/test_hdfs.py (98%)
 rename python/cudf/cudf/tests/{ => input_output}/test_s3.py (100%)
 delete mode 100644 python/cudf/cudf/tests/test_gcs.py
 delete mode 100644 python/cudf/cudf/tests/test_hdf.py
 delete mode 100644 python/cudf/cudf/tests/test_orc.py
 delete mode 100644 python/cudf/cudf/tests/test_parquet.py

diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index cbab80a9ee7..4b334587815 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -544,6 +544,13 @@ def __init__(
         self.stripe_size_rows = stripe_size_rows
         self.row_index_stride = row_index_stride
         self.initialized = False
+        self.writer = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
 
     def write_table(self, table):
         """Writes a single table to the file"""
diff --git a/python/cudf/cudf/tests/input_output/test_csv.py b/python/cudf/cudf/tests/input_output/test_csv.py
index 9ab62b11c7b..24ea277ccbb 100644
--- a/python/cudf/cudf/tests/input_output/test_csv.py
+++ b/python/cudf/cudf/tests/input_output/test_csv.py
@@ -2251,3 +2251,41 @@ def test_empty_file_pandas_compat_raises(tmp_path):
             cudf.read_csv(empty_file)
         with pytest.raises(pd.errors.EmptyDataError):
             cudf.read_csv(str(empty_file))
+
+
+def test_read_csv_gcs(monkeypatch):
+    gcsfs = pytest.importorskip("gcsfs")
+    pdf = pd.DataFrame(
+        {
+            "Integer": np.array([2345, 11987, 9027, 9027]),
+            "Float": np.array([9.001, 8.343, 6, 2.781]),
+            "Integer2": np.array([2345, 106, 2088, 789277]),
+            "String": np.array(["Alpha", "Beta", "Gamma", "Delta"]),
+            "Boolean": np.array([True, False, True, False]),
+        }
+    )
+
+    # Write to buffer
+    fpath = "cudf-gcs-test-bucket/test_csv_reader.csv"
+    buffer = pdf.to_csv(index=False)
+
+    def mock_open(*args, **kwargs):
+        return BytesIO(buffer.encode())
+
+    def mock_size(*args):
+        return len(buffer.encode())
+
+    monkeypatch.setattr(gcsfs.GCSFileSystem, "open", mock_open)
+    monkeypatch.setattr(gcsfs.GCSFileSystem, "size", mock_size)
+
+    # Test read from explicit path.
+    got = cudf.read_csv(f"gcs://{fpath}")
+    assert_eq(pdf, got)
+
+    # AbstractBufferedFile -> PythonFile conversion
+    # will work fine with the monkey-patched FS if we
+    # pass in an fsspec file object
+    fs = gcsfs.GCSFileSystem()
+    with fs.open(f"gcs://{fpath}") as f:
+        got = cudf.read_csv(f)
+    assert_eq(pdf, got)
diff --git a/python/cudf/cudf/tests/input_output/test_hdf5.py b/python/cudf/cudf/tests/input_output/test_hdf5.py
index 06777c8e6af..dc48a9bdc4f 100644
--- a/python/cudf/cudf/tests/input_output/test_hdf5.py
+++ b/python/cudf/cudf/tests/input_output/test_hdf5.py
@@ -1 +1,140 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+import os
+from string import ascii_letters
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES
+
+pytest.importorskip("tables")
+
+
+@pytest.fixture(params=[0, 10])
+def pdf(request):
+    types = set([*NUMERIC_TYPES, "datetime64[ns]", "bool"]) - set(
+        UNSIGNED_TYPES
+    )
+    typer = {"col_" + val: val for val in types}
+    ncols = len(types)
+    nrows = request.param
+
+    rng = np.random.default_rng(1)
+    # Create a pandas dataframe with random data of mixed types
+    test_pdf = pd.DataFrame(
+        rng.integers(0, 50, size=(nrows, ncols)),
+        columns=pd.Index([f"col_{typ}" for typ in types]),
+        index=pd.RangeIndex(nrows, name="test_index"),
+    )
+    # Cast all the column dtypes to objects, rename them, and then cast to
+    # appropriate types
+    test_pdf = test_pdf.astype(typer).rename(
+        {"col_datetime64[ns]": "col_datetime64"}, axis=1
+    )
+
+    # Create non-numeric categorical data otherwise may be typecasted
+    data = rng.choice(list(ascii_letters), size=nrows)
+    test_pdf["col_category"] = pd.Series(data, dtype="category")
+
+    return (test_pdf, nrows)
+
+
+@pytest.fixture
+def gdf(pdf):
+    pdf, nrows = pdf
+    return (cudf.DataFrame.from_pandas(pdf), nrows)
+
+
+@pytest.fixture(params=["fixed", "table"])
+def hdf_files(request, tmp_path, pdf):
+    pdf, nrows = pdf
+    if request.param == "fixed":
+        pdf = pdf.drop("col_category", axis=1)
+
+    fname_df = tmp_path / "test_df.hdf"
+    pdf.to_hdf(fname_df, key="hdf_df_tests", format=request.param)
+
+    fname_series = {}
+    for column in pdf.columns:
+        fname_series[column] = tmp_path / "test_series.hdf"
+        pdf[column].to_hdf(
+            fname_series[column], key="hdf_series_tests", format=request.param
+        )
+    return (fname_df, fname_series, request.param, nrows)
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+@pytest.mark.filterwarnings("ignore:Strings are not yet supported")
+@pytest.mark.parametrize(
+    "columns",
+    [["col_int8"], ["col_category"], ["col_int32", "col_float32"], None],
+)
+def test_hdf_reader(hdf_files, columns):
+    hdf_df_file, hdf_series, format, nrows = hdf_files
+    if format == "fixed" and columns is not None:
+        pytest.skip("Can't use columns with format 'fixed'")
+    if format == "table" and nrows == 0:
+        pytest.skip("Can't read 0 row table with format 'table'")
+    expect_df = pd.read_hdf(hdf_df_file, columns=columns)
+    got_df = cudf.read_hdf(hdf_df_file, columns=columns)
+
+    assert_eq(
+        expect_df, got_df, check_categorical=False, check_index_type=False
+    )
+
+    for column in hdf_series.keys():
+        expect_series = pd.read_hdf(hdf_series[column])
+        got_series = cudf.read_hdf(hdf_series[column])
+
+        assert_eq(expect_series, got_series, check_index_type=False)
+
+
+@pytest.mark.parametrize("format", ["fixed", "table"])
+@pytest.mark.parametrize("complib", ["zlib", "bzip2", "lzo", "blosc"])
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_hdf_writer(tmp_path, pdf, gdf, complib, format):
+    pdf, nrows = pdf
+    if format == "table" and nrows == 0:
+        pytest.skip("Can't read 0 row table with format 'table'")
+    gdf, _ = gdf
+
+    if format == "fixed":
+        pdf = pdf.drop("col_category", axis=1)
+        gdf = gdf.drop("col_category", axis=1)
+
+    pdf_df_fname = tmp_path / "pdf_df.hdf"
+    gdf_df_fname = tmp_path / "gdf_df.hdf"
+
+    pdf.to_hdf(pdf_df_fname, key="hdf_tests", format=format, complib=complib)
+    gdf.to_hdf(gdf_df_fname, key="hdf_tests", format=format, complib=complib)
+
+    assert os.path.exists(pdf_df_fname)
+    assert os.path.exists(gdf_df_fname)
+
+    expect = pd.read_hdf(pdf_df_fname)
+    got = pd.read_hdf(gdf_df_fname)
+
+    assert_eq(expect, got, check_index_type=False)
+
+    for column in pdf.columns:
+        pdf_series_fname = tmp_path / (column + "_" + "pdf_series.hdf")
+        gdf_series_fname = tmp_path / (column + "_" + "gdf_series.hdf")
+
+        pdf[column].to_hdf(
+            pdf_series_fname, key="hdf_tests", format=format, complib=complib
+        )
+        gdf[column].to_hdf(
+            gdf_series_fname, key="hdf_tests", format=format, complib=complib
+        )
+
+        assert os.path.exists(pdf_series_fname)
+        assert os.path.exists(gdf_series_fname)
+
+        expect_series = pd.read_hdf(pdf_series_fname)
+        got_series = pd.read_hdf(gdf_series_fname)
+
+        assert_eq(expect_series, got_series, check_index_type=False)
diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/input_output/test_hdfs.py
similarity index 98%
rename from python/cudf/cudf/tests/test_hdfs.py
rename to python/cudf/cudf/tests/input_output/test_hdfs.py
index 098b5192d4a..f7845e7cef0 100644
--- a/python/cudf/cudf/tests/test_hdfs.py
+++ b/python/cudf/cudf/tests/input_output/test_hdfs.py
@@ -1,6 +1,5 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
-import os
 from io import BytesIO
 
 import fastavro
@@ -12,8 +11,10 @@
 import cudf
 from cudf.testing import assert_eq
 
-if not os.environ.get("RUN_HDFS_TESTS"):
-    pytestmark = pytest.mark.skip("Env not configured to run HDFS tests")
+pytest.skip(
+    reason="https://github.com/rapidsai/cudf/issues/19633",
+    allow_module_level=True,
+)
 
 
 basedir = "/tmp/test-hdfs"
diff --git a/python/cudf/cudf/tests/input_output/test_orc.py b/python/cudf/cudf/tests/input_output/test_orc.py
index 06777c8e6af..2590c41a315 100644
--- a/python/cudf/cudf/tests/input_output/test_orc.py
+++ b/python/cudf/cudf/tests/input_output/test_orc.py
@@ -1 +1,2022 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+
+import datetime
+import decimal
+import os
+import random
+from io import BytesIO
+from string import ascii_letters, ascii_lowercase
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+from pyarrow import orc
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.io.orc import ORCWriter
+from cudf.testing import assert_eq, assert_frame_equal
+from cudf.testing._utils import (
+    expect_warning_if,
+    gen_rand_series,
+    supported_numpy_dtypes,
+)
+
+# Removal of these deprecated features is no longer imminent. They will not be
+# removed until a suitable alternative has been implemented. As a result, we
+# also do not want to stop testing them yet.
+# https://github.com/rapidsai/cudf/issues/11519
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:(num_rows|skiprows) is deprecated and will be removed."
+)
+
+
+@pytest.fixture(scope="module")
+def datadir(datadir):
+    return datadir / "orc"
+
+
+@pytest.fixture
+def path_or_buf(datadir):
+    fname = datadir / "TestOrcFile.test1.orc"
+    try:
+        with open(fname, "rb") as f:
+            buffer = BytesIO(f.read())
+    except Exception as excpr:
+        if type(excpr).__name__ == "FileNotFoundError":
+            pytest.skip(".parquet file is not found")
+        raise excpr
+
+    def _make_path_or_buf(src):
+        if src == "filepath":
+            return str(fname)
+        if src == "pathobj":
+            return fname
+        if src == "bytes_io":
+            return buffer
+        if src == "bytes":
+            return buffer.getvalue()
+        if src == "url":
+            return fname.as_uri()
+
+        raise ValueError("Invalid source type")
+
+    yield _make_path_or_buf
+
+
+@pytest.fixture(params=[True, False])
+def use_index(request):
+    return request.param
+
+
+@pytest.fixture(params=["pyarrow", "cudf"])
+def engine(request):
+    return request.param
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+@pytest.mark.parametrize(
+    "inputfile, columns",
+    [
+        ("TestOrcFile.emptyFile.orc", ["boolean1"]),
+        (
+            "TestOrcFile.test1.orc",
+            [
+                "boolean1",
+                "byte1",
+                "short1",
+                "int1",
+                "long1",
+                "float1",
+                "double1",
+            ],
+        ),
+        ("TestOrcFile.RLEv2.orc", ["x", "y"]),
+        ("TestOrcFile.testSnappy.orc", None),
+        ("TestOrcFile.demo-12-zlib.orc", ["_col2", "_col3", "_col4", "_col5"]),
+    ],
+)
+def test_orc_reader_basic(datadir, inputfile, columns, use_index, engine):
+    path = datadir / inputfile
+
+    expect = pd.read_orc(path, columns=columns)
+    got = cudf.read_orc(
+        path, engine=engine, columns=columns, use_index=use_index
+    )
+
+    assert_frame_equal(cudf.from_pandas(expect), got, check_categorical=False)
+
+
+def test_orc_reader_filenotfound(tmpdir):
+    with pytest.raises(FileNotFoundError):
+        cudf.read_orc("TestMissingFile.orc")
+
+    with pytest.raises(FileNotFoundError):
+        cudf.read_orc(tmpdir.mkdir("cudf_orc"))
+
+
+def test_orc_reader_local_filepath():
+    path = "~/TestLocalFile.orc"
+    if not os.path.isfile(path):
+        pytest.skip("Local .orc file is not found")
+
+    cudf.read_orc(path)
+
+
+@pytest.mark.parametrize(
+    "src", ["filepath", "pathobj", "bytes_io", "bytes", "url"]
+)
+def test_orc_reader_filepath_or_buffer(path_or_buf, src):
+    cols = ["int1", "long1", "float1", "double1"]
+
+    expect = pd.read_orc(path_or_buf("filepath"), columns=cols)
+    got = cudf.read_orc(path_or_buf(src), columns=cols)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Bug in older version of pandas",
+)
+def test_orc_reader_trailing_nulls(datadir):
+    path = datadir / "TestOrcFile.nulls-at-end-snappy.orc"
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+
+    assert_eq(expect, got, check_categorical=True)
+
+
+@pytest.mark.parametrize(
+    "inputfile",
+    ["TestOrcFile.testDate1900.orc", "TestOrcFile.testDate2038.orc"],
+)
+def test_orc_reader_datetimestamp(datadir, inputfile, use_index):
+    path = datadir / inputfile
+    try:
+        orcfile = orc.ORCFile(path)
+    except pa.ArrowIOError as e:
+        pytest.skip(".orc file is not found: %s" % e)
+
+    pdf = orcfile.read().to_pandas(date_as_object=False)
+    gdf = cudf.read_orc(path, use_index=use_index)
+
+    assert_eq(pdf, gdf, check_categorical=False, check_exact=False)
+
+
+def test_orc_reader_strings(datadir):
+    path = datadir / "TestOrcFile.testStringAndBinaryStatistics.orc"
+
+    expect = pd.read_orc(path, columns=["string1"])
+    got = cudf.read_orc(path, columns=["string1"])
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+def test_orc_read_statistics(datadir):
+    # Read in file containing 2 columns ("int1" and "string1") and 3 stripes
+    # (sizes 5000, 5000 and 1000 respectively). Each stripe has the same value
+    # in every one of its rows. The values the stripes have are 1, 2, and 3 in
+    # "int1" and "one", "two", and "three" in "string1".
+    path = datadir / "TestOrcFile.testStripeLevelStats.orc"
+    try:
+        (
+            file_statistics,
+            stripes_statistics,
+        ) = cudf.io.orc.read_orc_statistics([path, path])
+    except pa.ArrowIOError as e:
+        pytest.skip(".orc file is not found: %s" % e)
+
+    # Check numberOfValues
+    assert_eq(file_statistics[0]["int1"].number_of_values, 11_000)
+    assert_eq(
+        file_statistics[0]["int1"].number_of_values,
+        sum(
+            [
+                stripes_statistics[0]["int1"].number_of_values,
+                stripes_statistics[1]["int1"].number_of_values,
+                stripes_statistics[2]["int1"].number_of_values,
+            ]
+        ),
+    )
+    assert_eq(
+        stripes_statistics[1]["int1"].number_of_values,
+        stripes_statistics[1]["string1"].number_of_values,
+    )
+    assert_eq(stripes_statistics[2]["string1"].number_of_values, 1_000)
+
+    # Check other statistics
+    assert_eq(stripes_statistics[2]["string1"].has_null, False)
+    assert_eq(
+        file_statistics[0]["int1"]["minimum"],
+        min(
+            stripes_statistics[0]["int1"]["minimum"],
+            stripes_statistics[1]["int1"]["minimum"],
+            stripes_statistics[2]["int1"]["minimum"],
+        ),
+    )
+    assert_eq(file_statistics[0]["int1"]["minimum"], 1)
+    assert_eq(file_statistics[0]["string1"]["minimum"], "one")
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+@pytest.mark.parametrize(
+    "predicate,expected_len",
+    [
+        (("int1", "==", 1), 5000),
+        (("int1", "<=", 2), 10000),
+        (("int1", "==", -1), 0),
+        (("int1", "in", range(3)), 10000),
+        (("int1", "in", {1, 3}), 6000),
+        (("int1", "not in", {1, 3}), 5000),
+    ],
+)
+def test_orc_read_filtered(datadir, engine, predicate, expected_len):
+    predicate = [[predicate]]
+    path = datadir / "TestOrcFile.testStripeLevelStats.orc"
+    try:
+        df_filtered = cudf.read_orc(path, engine=engine, filters=predicate)
+    except pa.ArrowIOError as e:
+        pytest.skip(".orc file is not found: %s" % e)
+
+    # Assert # of rows after filtering
+    assert len(df_filtered) == expected_len
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_orc_read_stripes(datadir, engine):
+    path = datadir / "TestOrcFile.testDate1900.orc"
+    try:
+        pdf = cudf.read_orc(path, engine=engine)
+    except pa.ArrowIOError as e:
+        pytest.skip(".orc file is not found: %s" % e)
+
+    num_rows, stripes, col_names = cudf.io.read_orc_metadata(path)
+
+    # Read stripes one at a time
+    gdf = [
+        cudf.read_orc(path, engine=engine, stripes=[[i]])
+        for i in range(stripes)
+    ]
+    gdf = cudf.concat(gdf).reset_index(drop=True)
+    assert_eq(pdf, gdf, check_categorical=False, check_index_type=True)
+
+    # Read stripes all at once
+    gdf = cudf.read_orc(
+        path, engine=engine, stripes=[[int(x) for x in range(stripes)]]
+    )
+    assert_eq(pdf, gdf, check_categorical=False)
+
+    # Read only some stripes
+    gdf = cudf.read_orc(path, engine=engine, stripes=[[0, 1]])
+    assert_eq(gdf, pdf.head(25000))
+    gdf = cudf.read_orc(path, engine=engine, stripes=[[0, stripes - 1]])
+    assert_eq(
+        gdf,
+        cudf.concat([pdf.head(15000), pdf.tail(10000)], ignore_index=True),
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize("num_rows", [1, 100, 3000])
+@pytest.mark.parametrize("skiprows", [0, 1, 3000])
+def test_orc_read_rows(datadir, skiprows, num_rows):
+    path = datadir / "TestOrcFile.decimal.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path, skiprows=skiprows, num_rows=num_rows)
+
+    # Slice rows out of the whole dataframe for comparison as PyArrow doesn't
+    # have an API to read a subsection of rows from the file
+    pdf = pdf[skiprows : skiprows + num_rows]
+    pdf = pdf.reset_index(drop=True)
+
+    assert_eq(pdf, gdf)
+
+
+def test_orc_read_skiprows():
+    buff = BytesIO()
+    df = pd.DataFrame(
+        {
+            "a": [
+                True,
+                False,
+                True,
+                False,
+                None,
+                True,
+                True,
+                True,
+                False,
+                None,
+                False,
+                False,
+                True,
+                True,
+                True,
+                True,
+            ]
+        }
+    )
+    df.to_orc(buff)
+    # testing 10 skiprows due to a boolean specific bug fix that didn't
+    # repro for other sizes of data
+    skiprows = 10
+
+    expected = (
+        pd.read_orc(buff)[skiprows:].reset_index(drop=True).astype("bool")
+    )
+    got = cudf.read_orc(buff, skiprows=skiprows)
+    assert_eq(expected, got)
+
+
+def test_orc_reader_uncompressed_block(datadir):
+    path = datadir / "uncompressed_snappy.orc"
+
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+def test_orc_reader_nodata_block(datadir):
+    path = datadir / "nodata.orc"
+
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path, num_rows=1)
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+@pytest.mark.parametrize("compression", [None, "snappy"])
+@pytest.mark.parametrize(
+    "reference_file, columns",
+    [
+        (
+            "TestOrcFile.test1.orc",
+            [
+                "boolean1",
+                "byte1",
+                "short1",
+                "int1",
+                "long1",
+                "float1",
+                "double1",
+            ],
+        ),
+        ("TestOrcFile.demo-12-zlib.orc", ["_col1", "_col3", "_col5"]),
+    ],
+)
+def test_orc_writer(datadir, tmp_path, reference_file, columns, compression):
+    pdf_fname = datadir / reference_file
+    gdf_fname = tmp_path / "gdf.orc"
+
+    expect = cudf.from_pandas(pd.read_orc(pdf_fname, columns=columns))
+    expect.to_orc(gdf_fname, compression=compression)
+    got = cudf.from_pandas(pd.read_orc(gdf_fname, columns=columns))
+
+    assert_frame_equal(expect, got)
+
+
+@pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
+def test_orc_writer_statistics_frequency(datadir, tmp_path, stats_freq):
+    reference_file = "TestOrcFile.demo-12-zlib.orc"
+    pdf_fname = datadir / reference_file
+    gdf_fname = tmp_path / "gdf.orc"
+
+    expect = cudf.from_pandas(pd.read_orc(pdf_fname))
+    expect.to_orc(gdf_fname, statistics=stats_freq)
+    got = cudf.from_pandas(pd.read_orc(gdf_fname))
+
+    assert_frame_equal(expect, got)
+
+
+@pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
+def test_chunked_orc_writer_statistics_frequency(
+    datadir, tmp_path, stats_freq
+):
+    reference_file = "TestOrcFile.test1.orc"
+    pdf_fname = datadir / reference_file
+    gdf_fname = tmp_path / "chunked_gdf.orc"
+
+    columns = [
+        "boolean1",
+        "byte1",
+        "short1",
+        "int1",
+        "long1",
+        "float1",
+        "double1",
+    ]
+    pdf = pd.read_orc(pdf_fname, columns=columns)
+    gdf = cudf.from_pandas(pdf)
+    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
+
+    with ORCWriter(gdf_fname, statistics=stats_freq) as writer:
+        writer.write_table(gdf)
+        writer.write_table(gdf)
+
+    got = pd.read_orc(gdf_fname)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("compression", [None, "snappy"])
+@pytest.mark.parametrize(
+    "reference_file, columns",
+    [
+        (
+            "TestOrcFile.test1.orc",
+            [
+                "boolean1",
+                "byte1",
+                "short1",
+                "int1",
+                "long1",
+                "float1",
+                "double1",
+            ],
+        ),
+        ("TestOrcFile.demo-12-zlib.orc", ["_col1", "_col3", "_col5"]),
+    ],
+)
+def test_chunked_orc_writer(
+    datadir, tmp_path, reference_file, columns, compression
+):
+    pdf_fname = datadir / reference_file
+    gdf_fname = tmp_path / "chunked_gdf.orc"
+
+    pdf = pd.read_orc(pdf_fname, columns=columns)
+    gdf = cudf.from_pandas(pdf)
+    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
+
+    with ORCWriter(gdf_fname, compression=compression) as writer:
+        writer.write_table(gdf)
+        writer.write_table(gdf)
+
+    got = pd.read_orc(gdf_fname, columns=columns)
+    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
+
+
+@pytest.mark.parametrize(
+    "dtypes",
+    [
+        {"c": str, "a": int},
+        {"c": int, "a": str},
+        {"c": int, "a": str, "b": float},
+        {"c": str, "a": object},
+    ],
+)
+def test_orc_writer_strings(tmp_path, dtypes):
+    gdf_fname = tmp_path / "gdf_strings.orc"
+
+    expect = cudf.datasets.randomdata(nrows=10, dtypes=dtypes, seed=1)
+    expect.to_orc(gdf_fname)
+    got = pd.read_orc(gdf_fname)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "dtypes",
+    [
+        {"c": str, "a": int},
+        {"c": int, "a": str},
+        {"c": int, "a": str, "b": float},
+        {"c": str, "a": object},
+    ],
+)
+def test_chunked_orc_writer_strings(tmp_path, dtypes):
+    gdf_fname = tmp_path / "chunked_gdf_strings.orc"
+
+    gdf = cudf.datasets.randomdata(nrows=10, dtypes=dtypes, seed=1)
+    pdf = gdf.to_pandas()
+    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
+    with ORCWriter(gdf_fname) as writer:
+        writer.write_table(gdf)
+        writer.write_table(gdf)
+
+    got = pd.read_orc(gdf_fname)
+
+    assert_eq(expect, got)
+
+
+def test_orc_writer_sliced(tmp_path):
+    cudf_path = tmp_path / "cudf.orc"
+
+    df = pd.DataFrame()
+    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
+    df = cudf.from_pandas(df)
+
+    df_select = df.iloc[1:3]
+
+    df_select.to_orc(cudf_path)
+    assert_eq(cudf.read_orc(cudf_path), df_select)
+
+
+@pytest.mark.parametrize(
+    "orc_file",
+    [
+        "TestOrcFile.decimal.orc",
+        "TestOrcFile.decimal.same.values.orc",
+        "TestOrcFile.decimal.multiple.values.orc",
+        # For additional information take look at PR 7034
+        "TestOrcFile.decimal.runpos.issue.orc",
+    ],
+)
+def test_orc_reader_decimal_type(datadir, orc_file):
+    file_path = datadir / orc_file
+
+    pdf = pd.read_orc(file_path)
+    df = cudf.read_orc(file_path)
+
+    assert_eq(pdf, df)
+
+
+def test_orc_decimal_precision_fail(datadir):
+    file_path = datadir / "TestOrcFile.int_decimal.precision_19.orc"
+
+    # Shouldn't cause failure if decimal column is not chosen to be read.
+    pdf = pd.read_orc(file_path, columns=["int"])
+    gdf = cudf.read_orc(file_path, columns=["int"])
+
+    assert_eq(pdf, gdf)
+
+
+# For additional information take look at PR 6636 and 6702
+@pytest.mark.parametrize(
+    "orc_file",
+    [
+        "TestOrcFile.boolean_corruption_PR_6636.orc",
+        "TestOrcFile.boolean_corruption_PR_6702.orc",
+    ],
+)
+def test_orc_reader_boolean_type(datadir, orc_file):
+    file_path = datadir / orc_file
+
+    pdf = pd.read_orc(file_path)
+    df = cudf.read_orc(file_path).to_pandas()
+
+    assert_eq(pdf, df)
+
+
+def test_orc_reader_tzif_timestamps(datadir):
+    # Contains timstamps in the range covered by the TZif file
+    # Other timedate tests only cover "future" times
+    path = datadir / "TestOrcFile.lima_timezone.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+
+    assert_eq(pdf, gdf)
+
+
+def test_int_overflow(tmp_path):
+    file_path = tmp_path / "gdf_overflow.orc"
+
+    # The number of rows and the large element trigger delta encoding
+    num_rows = 513
+    df = cudf.DataFrame({"a": [None] * num_rows}, dtype="int64")
+    df["a"][0] = 1024 * 1024 * 1024
+    df["a"][num_rows - 1] = 1
+    df.to_orc(file_path)
+
+    assert_eq(cudf.read_orc(file_path), df)
+
+
+def normalized_equals(value1, value2):
+    # need naive time object for numpy to convert to datetime64
+    if isinstance(value1, datetime.datetime):
+        value1 = value1.replace(tzinfo=None)
+    if isinstance(value2, datetime.datetime):
+        value2 = value2.replace(tzinfo=None)
+
+    if isinstance(value1, (datetime.datetime, np.datetime64)):
+        value1 = np.datetime64(value1, "ms")
+    if isinstance(value2, (datetime.datetime, np.datetime64)):
+        value2 = np.datetime64(value2, "ms")
+
+    # Compare integers with floats now
+    if isinstance(value1, float) or isinstance(value2, float):
+        return np.isclose(value1, value2)
+
+    return value1 == value2
+
+
+@pytest.mark.parametrize("stats_freq", ["STRIPE", "ROWGROUP"])
+@pytest.mark.parametrize("nrows", [1, 100, 100000])
+def test_orc_write_statistics(tmp_path, datadir, nrows, stats_freq):
+    supported_stat_types = [*supported_numpy_dtypes, "str"]
+    # Writing bool columns to multiple row groups is disabled
+    # until #6763 is fixed
+    if nrows == 100000:
+        supported_stat_types.remove("bool")
+
+    # Make a dataframe
+    gdf = cudf.DataFrame(
+        {
+            "col_" + str(dtype): gen_rand_series(dtype, nrows, has_nulls=True)
+            for dtype in supported_stat_types
+        }
+    )
+    fname = tmp_path / "gdf.orc"
+
+    # Write said dataframe to ORC with cuDF
+    gdf.to_orc(fname, statistics=stats_freq, stripe_size_rows=30000)
+
+    # Read back written ORC's statistics
+    orc_file = orc.ORCFile(fname)
+    (
+        file_stats,
+        stripes_stats,
+    ) = cudf.io.orc.read_orc_statistics([fname])
+
+    # check file stats
+    for col in gdf:
+        if "minimum" in file_stats[0][col]:
+            stats_min = file_stats[0][col]["minimum"]
+            if stats_min is not None:
+                actual_min = gdf[col].min()
+                assert normalized_equals(actual_min, stats_min)
+        if "maximum" in file_stats[0][col]:
+            stats_max = file_stats[0][col]["maximum"]
+            if stats_max is not None:
+                actual_max = gdf[col].max()
+                assert normalized_equals(actual_max, stats_max)
+        if "number_of_values" in file_stats[0][col]:
+            stats_num_vals = file_stats[0][col]["number_of_values"]
+            if stats_num_vals is not None:
+                actual_num_vals = gdf[col].count()
+                assert stats_num_vals == actual_num_vals
+
+    # compare stripe statistics with actual min/max
+    for stripe_idx in range(0, orc_file.nstripes):
+        stripe = orc_file.read_stripe(stripe_idx)
+        # pandas is unable to handle min/max of string col with nulls
+        stripe_df = cudf.DataFrame(stripe.to_pandas())
+        for col in stripe_df:
+            if "minimum" in stripes_stats[stripe_idx][col]:
+                stats_min = stripes_stats[stripe_idx][col]["minimum"]
+                if stats_min is not None:
+                    actual_min = stripe_df[col].min()
+                    assert normalized_equals(actual_min, stats_min)
+
+            if "maximum" in stripes_stats[stripe_idx][col]:
+                stats_max = stripes_stats[stripe_idx][col]["maximum"]
+                if stats_max is not None:
+                    actual_max = stripe_df[col].max()
+                    assert normalized_equals(actual_max, stats_max)
+
+            if "number_of_values" in stripes_stats[stripe_idx][col]:
+                stats_num_vals = stripes_stats[stripe_idx][col][
+                    "number_of_values"
+                ]
+                if stats_num_vals is not None:
+                    actual_num_vals = stripe_df[col].count()
+                    assert stats_num_vals == actual_num_vals
+
+
+@pytest.mark.parametrize("stats_freq", ["STRIPE", "ROWGROUP"])
+@pytest.mark.parametrize("nrows", [2, 100, 1024])
+def test_orc_chunked_write_statistics(tmp_path, datadir, nrows, stats_freq):
+    supported_stat_types = [*supported_numpy_dtypes, "str"]
+    # Writing bool columns to multiple row groups is disabled
+    # until #6763 is fixed
+    if nrows == 1024:
+        supported_stat_types.remove("bool")
+
+    gdf_fname = tmp_path / "chunked_stats.orc"
+    with ORCWriter(
+        gdf_fname, statistics=stats_freq, stripe_size_rows=512
+    ) as writer:
+        max_char_length = 100 if nrows < 1000 else 10
+
+        # Make a dataframe
+        gdf = cudf.DataFrame(
+            {
+                "col_" + str(dtype): gen_rand_series(
+                    dtype,
+                    nrows // 2,
+                    has_nulls=True,
+                    low=0,
+                    high=max_char_length,
+                    seed=0,
+                )
+                for dtype in supported_stat_types
+            }
+        )
+
+        pdf1 = gdf.to_pandas()
+        writer.write_table(gdf)
+        # gdf is specifically being reused here to ensure the data is destroyed
+        # before the next write_table call to ensure the data is persisted inside
+        # write and no pointers are saved into the original table
+        gdf = cudf.DataFrame(
+            {
+                "col_" + str(dtype): gen_rand_series(
+                    dtype,
+                    nrows // 2,
+                    has_nulls=True,
+                    low=0,
+                    high=max_char_length,
+                )
+                for dtype in supported_stat_types
+            }
+        )
+        pdf2 = gdf.to_pandas()
+        writer.write_table(gdf)
+
+    # pandas is unable to handle min/max of string col with nulls
+    expect = cudf.DataFrame(pd.concat([pdf1, pdf2]).reset_index(drop=True))
+
+    # Read back written ORC's statistics
+    orc_file = orc.ORCFile(gdf_fname)
+    (
+        file_stats,
+        stripes_stats,
+    ) = cudf.io.orc.read_orc_statistics([gdf_fname])
+
+    # check file stats
+    for col in expect:
+        if "minimum" in file_stats[0][col]:
+            stats_min = file_stats[0][col]["minimum"]
+            if stats_min is not None:
+                actual_min = expect[col].min()
+                assert normalized_equals(actual_min, stats_min)
+        if "maximum" in file_stats[0][col]:
+            stats_max = file_stats[0][col]["maximum"]
+            if stats_max is not None:
+                actual_max = expect[col].max()
+                assert normalized_equals(actual_max, stats_max)
+        if "number_of_values" in file_stats[0][col]:
+            stats_num_vals = file_stats[0][col]["number_of_values"]
+            if stats_num_vals is not None:
+                actual_num_vals = expect[col].count()
+                assert stats_num_vals == actual_num_vals
+
+    # compare stripe statistics with actual min/max
+    for stripe_idx in range(0, orc_file.nstripes):
+        stripe = orc_file.read_stripe(stripe_idx)
+        # pandas is unable to handle min/max of string col with nulls
+        stripe_df = cudf.DataFrame(stripe.to_pandas())
+        for col in stripe_df:
+            if "minimum" in stripes_stats[stripe_idx][col]:
+                stats_min = stripes_stats[stripe_idx][col]["minimum"]
+                if stats_min is not None:
+                    actual_min = stripe_df[col].min()
+                    assert normalized_equals(actual_min, stats_min)
+
+            if "maximum" in stripes_stats[stripe_idx][col]:
+                stats_max = stripes_stats[stripe_idx][col]["maximum"]
+                if stats_max is not None:
+                    actual_max = stripe_df[col].max()
+                    assert normalized_equals(actual_max, stats_max)
+
+            if "number_of_values" in stripes_stats[stripe_idx][col]:
+                stats_num_vals = stripes_stats[stripe_idx][col][
+                    "number_of_values"
+                ]
+                if stats_num_vals is not None:
+                    actual_num_vals = stripe_df[col].count()
+                    assert stats_num_vals == actual_num_vals
+
+
+@pytest.mark.parametrize("nrows", [1, 100, 100000])
+def test_orc_write_bool_statistics(tmp_path, datadir, nrows):
+    # Make a dataframe
+    gdf = cudf.DataFrame({"col_bool": gen_rand_series("bool", nrows)})
+    fname = tmp_path / "gdf.orc"
+
+    # Write said dataframe to ORC with cuDF
+    gdf.to_orc(fname, stripe_size_rows=30000)
+
+    # Read back written ORC's statistics
+    orc_file = orc.ORCFile(fname)
+    (
+        file_stats,
+        stripes_stats,
+    ) = cudf.io.orc.read_orc_statistics([fname])
+
+    # check file stats
+    col = "col_bool"
+    if "true_count" in file_stats[0][col]:
+        stats_true_count = file_stats[0][col]["true_count"]
+        actual_true_count = gdf[col].sum()
+        assert normalized_equals(actual_true_count, stats_true_count)
+
+    if "number_of_values" in file_stats[0][col]:
+        stats_valid_count = file_stats[0][col]["number_of_values"]
+        actual_valid_count = len(gdf[col]) - gdf[col].null_count
+        assert normalized_equals(actual_valid_count, stats_valid_count)
+
+    # compare stripe statistics with actual min/max
+    for stripe_idx in range(0, orc_file.nstripes):
+        stripe = orc_file.read_stripe(stripe_idx)
+        # pandas is unable to handle min/max of string col with nulls
+        stripe_df = cudf.DataFrame(stripe.to_pandas())
+
+        if "true_count" in stripes_stats[stripe_idx][col]:
+            actual_true_count = stripe_df[col].sum()
+            stats_true_count = stripes_stats[stripe_idx][col]["true_count"]
+            assert normalized_equals(actual_true_count, stats_true_count)
+
+        if "number_of_values" in stripes_stats[stripe_idx][col]:
+            actual_valid_count = (
+                len(stripe_df[col]) - stripe_df[col].null_count
+            )
+            stats_valid_count = stripes_stats[stripe_idx][col][
+                "number_of_values"
+            ]
+            assert normalized_equals(actual_valid_count, stats_valid_count)
+
+
+def test_orc_reader_gmt_timestamps(datadir):
+    path = datadir / "TestOrcFile.gmt.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+    assert_eq(pdf, gdf)
+
+
+def test_orc_bool_encode_fail():
+    buffer = BytesIO()
+
+    # Generate a boolean column longer than a single row group
+    fail_df = cudf.DataFrame({"col": gen_rand_series("bool", 20000)})
+    # Invalidate a row in the first row group
+    fail_df["col"][5000] = None
+
+    # Should throw instead of generating a file that is incompatible
+    # with other readers (see issue #6763)
+    with pytest.raises(RuntimeError):
+        fail_df.to_orc(buffer)
+
+    # Generate a boolean column longer than a single row group
+    okay_df = cudf.DataFrame({"col": gen_rand_series("bool", 20000)})
+    okay_df["col"][15000] = None
+    # Invalid row is in the last row group; encoding is assumed to be correct
+    okay_df.to_orc(buffer)
+
+    # Also validate data
+    pdf = pd.read_orc(buffer)
+
+    assert_eq(okay_df.to_pandas(nullable=True), pdf)
+
+
+def test_nanoseconds_overflow():
+    buffer = BytesIO()
+    # Use nanosecond values that take more than 32 bits to encode
+    s = cudf.Series([710424008, -1338482640], dtype="datetime64[ns]")
+    expected = cudf.DataFrame({"s": s})
+    expected.to_orc(buffer)
+
+    cudf_got = cudf.read_orc(buffer)
+    assert_eq(expected, cudf_got)
+
+    pandas_got = pd.read_orc(buffer)
+    assert_eq(expected, pandas_got)
+
+
+def test_empty_dataframe():
+    buffer = BytesIO()
+    expected = cudf.DataFrame()
+    expected.to_orc(buffer)
+
+    # Raise error if column name is mentioned, but it doesn't exist.
+    with pytest.raises(RuntimeError):
+        cudf.read_orc(buffer, columns=["a"])
+
+    got_df = cudf.read_orc(buffer)
+    expected_pdf = pd.read_orc(buffer)
+
+    assert_eq(expected, got_df)
+    assert_eq(expected_pdf, got_df)
+
+
+@pytest.mark.parametrize(
+    "data", [[None, ""], ["", None], [None, None], ["", ""]]
+)
+def test_empty_string_columns(data):
+    buffer = BytesIO()
+
+    expected = cudf.DataFrame({"string": data}, dtype="str")
+    expected.to_orc(buffer)
+
+    expected_pdf = pd.read_orc(buffer)
+    got_df = cudf.read_orc(buffer)
+
+    assert_eq(expected, got_df)
+    assert_eq(
+        expected_pdf,
+        got_df.to_pandas(nullable=True)
+        if expected_pdf["string"].dtype == pd.StringDtype()
+        else got_df,
+    )
+
+
+@pytest.mark.parametrize("scale", [-3, 0, 3])
+@pytest.mark.parametrize(
+    "decimal_type",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+)
+def test_orc_writer_decimal(tmp_path, scale, decimal_type):
+    fname = tmp_path / "decimal.orc"
+
+    expected = cudf.DataFrame({"dec_val": gen_rand_series("i", 100)})
+    expected["dec_val"] = expected["dec_val"].astype(decimal_type(7, scale))
+
+    expected.to_orc(fname)
+
+    got = pd.read_orc(fname)
+    assert_eq(expected.to_pandas()["dec_val"], got["dec_val"])
+
+
+@pytest.mark.parametrize("num_rows", [1, 100, 3000])
+def test_orc_reader_multiple_files(datadir, num_rows):
+    path = datadir / "TestOrcFile.testSnappy.orc"
+
+    df_1 = pd.read_orc(path)
+    df_2 = pd.read_orc(path)
+    df = pd.concat([df_1, df_2], ignore_index=True)
+
+    gdf = cudf.read_orc([path, path], num_rows=num_rows).to_pandas()
+
+    # Slice rows out of the whole dataframe for comparison as PyArrow doesn't
+    # have an API to read a subsection of rows from the file
+    df = df[:num_rows]
+    df = df.reset_index(drop=True)
+
+    assert_eq(df, gdf)
+
+
+def test_orc_reader_multi_file_single_stripe(datadir):
+    path = datadir / "TestOrcFile.testSnappy.orc"
+
+    # should raise an exception
+    with pytest.raises(ValueError):
+        cudf.read_orc([path, path], stripes=[0])
+
+
+def test_orc_reader_multi_file_multi_stripe(datadir):
+    path = datadir / "TestOrcFile.testStripeLevelStats.orc"
+    gdf = cudf.read_orc([path, path], stripes=[[0, 1], [2]])
+    pdf = pd.read_orc(path)
+    assert_eq(pdf, gdf)
+
+
+def test_orc_string_stream_offset_issue():
+    size = 30000
+    vals = {
+        str(x): [decimal.Decimal(1)] * size if x != 0 else ["XYZ"] * size
+        for x in range(0, 5)
+    }
+    df = cudf.DataFrame(vals)
+
+    buffer = BytesIO()
+    df.to_orc(buffer)
+
+    assert_eq(df, cudf.read_orc(buffer))
+
+
+@pytest.fixture(scope="module")
+def list_struct_buff():
+    size = 100
+    rd = random.Random(1)
+    rng = np.random.default_rng(seed=1)
+
+    buff = BytesIO()
+
+    lvl3_list = [
+        rd.choice(
+            [
+                None,
+                [
+                    [
+                        [
+                            rd.choice([None, rng.integers(1, 3)])
+                            for _ in range(rng.integers(1, 3))
+                        ]
+                        for _ in range(rng.integers(0, 3))
+                    ]
+                    for _ in range(rng.integers(0, 3))
+                ],
+            ]
+        )
+        for _ in range(size)
+    ]
+    lvl1_list = [
+        [
+            rd.choice([None, rng.integers(0, 3)])
+            for _ in range(rng.integers(1, 4))
+        ]
+        for _ in range(size)
+    ]
+    lvl1_struct = [
+        rd.choice(
+            [
+                None,
+                {"a": rng.integers(0, 3), "b": rng.integers(0, 3)},
+            ]
+        )
+        for _ in range(size)
+    ]
+    lvl2_struct = [
+        rd.choice(
+            [
+                None,
+                {"a": rd.choice([None, rng.integers(0, 3)])},
+                {
+                    "lvl1_struct": {
+                        "c": rd.choice([None, rng.integers(0, 3)]),
+                        "d": rng.integers(0, 3),
+                    },
+                },
+            ]
+        )
+        for _ in range(size)
+    ]
+    list_nests_struct = [
+        [
+            {"a": rd.choice(lvl1_struct), "b": rd.choice(lvl1_struct)}
+            for _ in range(rng.integers(1, 4))
+        ]
+        for _ in range(size)
+    ]
+    struct_nests_list = [
+        {"struct": lvl1_struct[x], "list": lvl1_list[x]} for x in range(size)
+    ]
+
+    pa_table = pa.table(
+        {
+            "lvl3_list": lvl3_list,
+            "lvl1_list": lvl1_list,
+            "lvl1_struct": lvl1_struct,
+            "lvl2_struct": lvl2_struct,
+            "list_nests_struct": list_nests_struct,
+            "struct_nests_list": struct_nests_list,
+        }
+    )
+    with orc.ORCWriter(buff, stripe_size=16) as writer:
+        writer.write(pa_table)
+    return buff
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        None,
+        ["lvl3_list", "list_nests_struct", "lvl2_struct", "struct_nests_list"],
+        ["lvl2_struct", "lvl1_struct"],
+    ],
+)
+@pytest.mark.parametrize("num_rows", [0, 15, 100])
+def test_lists_struct_nests(columns, num_rows, use_index, list_struct_buff):
+    gdf = cudf.read_orc(
+        list_struct_buff,
+        columns=columns,
+        num_rows=num_rows,
+        use_index=use_index,
+    )
+
+    pyarrow_tbl = orc.ORCFile(list_struct_buff).read()
+
+    pyarrow_tbl = (
+        pyarrow_tbl[:num_rows]
+        if columns is None
+        else pyarrow_tbl.select(columns)[:num_rows]
+    )
+
+    if num_rows > 0:
+        assert pyarrow_tbl.equals(gdf.to_arrow())
+    else:
+        assert_eq(pyarrow_tbl.to_pandas(), gdf)
+
+
+@pytest.mark.parametrize("columns", [None, ["lvl1_struct"], ["lvl1_list"]])
+def test_skip_rows_for_nested_types(columns, list_struct_buff):
+    with pytest.raises(
+        RuntimeError, match="skip_rows is not supported by nested column"
+    ):
+        cudf.read_orc(
+            list_struct_buff,
+            columns=columns,
+            use_index=True,
+            skiprows=5,
+        )
+
+
+def test_pyspark_struct(datadir):
+    path = datadir / "TestOrcFile.testPySparkStruct.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.fixture
+def map_buff():
+    size = 100
+    rd = random.Random(1)
+    rng = np.random.default_rng(seed=1)
+
+    buff = BytesIO()
+
+    lvl1_map = pa.array(
+        [
+            rd.choice(
+                [
+                    None,
+                    {
+                        rd.choice(ascii_letters): rd.choice(
+                            [None, rng.integers(1, 1500)]
+                        ),
+                    },
+                ]
+            )
+            for _ in range(size)
+        ],
+        type=pa.map_(pa.string(), pa.int64()),
+    )
+    lvl2_map = pa.array(
+        [
+            rd.choice(
+                [
+                    None,
+                    *(
+                        {
+                            rd.choice(ascii_letters): rd.choice(
+                                [
+                                    None,
+                                    [
+                                        rd.choice(
+                                            [None, rng.integers(1, 1500)]
+                                        )
+                                        for _ in range(5)
+                                    ],
+                                ]
+                            )
+                        }
+                        for _ in range(2)
+                    ),
+                ]
+            )
+            for _ in range(size)
+        ],
+        type=pa.map_(pa.string(), pa.list_(pa.int64())),
+    )
+    lvl2_struct_map = pa.array(
+        [
+            rd.choice(
+                [
+                    None,
+                    *(
+                        {
+                            rd.choice(ascii_letters): rd.choice(
+                                [
+                                    None,
+                                    {
+                                        "a": rd.choice(
+                                            [None, rng.integers(1, 1500)]
+                                        ),
+                                        "b": rd.choice(
+                                            [None, rng.integers(1, 1500)]
+                                        ),
+                                    },
+                                ]
+                            )
+                        }
+                        for _ in range(2)
+                    ),
+                ]
+            )
+            for _ in range(size)
+        ],
+        type=pa.map_(
+            pa.string(), pa.struct({"a": pa.int64(), "b": pa.int64()})
+        ),
+    )
+
+    pa_table = pa.Table.from_arrays(
+        [lvl1_map, lvl2_map, lvl2_struct_map],
+        ["lvl1_map", "lvl2_map", "lvl2_struct_map"],
+    )
+
+    orc.write_table(pa_table, buff, stripe_size=16, compression="UNCOMPRESSED")
+    return buff
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [None, ["lvl1_map", "lvl2_struct_map"], ["lvl2_struct_map", "lvl2_map"]],
+)
+@pytest.mark.parametrize("num_rows", [0, 15, 100])
+def test_map_type_read(columns, num_rows, use_index, map_buff):
+    tbl = orc.read_table(map_buff)
+
+    lvl1_map = (
+        tbl["lvl1_map"]
+        .combine_chunks()
+        .view(pa.list_(pa.struct({"key": pa.string(), "value": pa.int64()})))
+    )
+    lvl2_map = (
+        tbl["lvl2_map"]
+        .combine_chunks()
+        .view(
+            pa.list_(
+                pa.struct({"key": pa.string(), "value": pa.list_(pa.int64())})
+            )
+        )
+    )
+    lvl2_struct_map = (
+        tbl["lvl2_struct_map"]
+        .combine_chunks()
+        .view(
+            pa.list_(
+                pa.struct(
+                    {
+                        "key": pa.string(),
+                        "value": pa.struct({"a": pa.int64(), "b": pa.int64()}),
+                    }
+                )
+            )
+        )
+    )
+
+    expected_tbl = pa.table(
+        {
+            "lvl1_map": lvl1_map,
+            "lvl2_map": lvl2_map,
+            "lvl2_struct_map": lvl2_struct_map,
+        }
+    )
+    gdf = cudf.read_orc(
+        map_buff, columns=columns, num_rows=num_rows, use_index=use_index
+    )
+
+    expected_tbl = (
+        expected_tbl[:num_rows]
+        if columns is None
+        else expected_tbl.select(columns)[:num_rows]
+    )
+
+    if num_rows > 0:
+        assert expected_tbl.equals(gdf.to_arrow())
+    else:
+        assert_eq(expected_tbl.to_pandas(), gdf)
+
+
+def test_orc_reader_decimal(datadir):
+    path = datadir / "TestOrcFile.decimal.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+
+    assert_eq(pdf, gdf)
+
+
+# This test case validates the issue raised in #8665,
+# please check the issue for more details.
+def test_orc_timestamp_read(datadir):
+    path = datadir / "TestOrcFile.timestamp.issue.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+
+    assert_eq(pdf, gdf)
+
+
+def dec(num):
+    return decimal.Decimal(str(num))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # basic + nested strings
+        {
+            "lls": [[["a"], ["bb"]] * 5 for i in range(12345)],
+            "lls2": [[["ccc", "dddd"]] * 6 for i in range(12345)],
+            "ls_dict": [["X"] * 7 for i in range(12345)],
+            "ls_direct": [[str(i)] * 9 for i in range(12345)],
+            "li": [[i] * 11 for i in range(12345)],
+            "lf": [[i * 0.5] * 13 for i in range(12345)],
+            "ld": [[dec(i / 2)] * 15 for i in range(12345)],
+        },
+        # with nulls
+        {
+            "ls": [
+                [str(i) if i % 5 else None, str(2 * i)] if i % 2 else None
+                for i in range(12345)
+            ],
+            "li": [[i, i * i, i % 2] if i % 3 else None for i in range(12345)],
+            "ld": [
+                [dec(i), dec(i / 2) if i % 7 else None] if i % 5 else None
+                for i in range(12345)
+            ],
+        },
+        # with empty elements
+        {
+            "ls": [
+                [str(i), str(2 * i)] if i % 2 else [] for i in range(12345)
+            ],
+            "lls": [
+                [[str(i), str(2 * i)]] if i % 2 else [[], []]
+                for i in range(12345)
+            ],
+            "li": [[i, i * i, i % 2] if i % 3 else [] for i in range(12345)],
+            "lli": [
+                [[i], [i * i], [i % 2]] if i % 3 else [[]]
+                for i in range(12345)
+            ],
+            "ld": [
+                [dec(i), dec(i / 2)] if i % 5 else [] for i in range(12345)
+            ],
+        },
+        # variable list lengths
+        {
+            "ls": [[str(i)] * i for i in range(123)],
+            "li": [[i, i * i] * i for i in range(123)],
+            "ld": [[dec(i), dec(i / 2)] * i for i in range(123)],
+        },
+        # many child elements (more that max_stripe_rows)
+        {"li": [[i] * 1100 for i in range(11000)]},
+    ],
+)
+def test_orc_writer_lists(data):
+    buffer = BytesIO()
+    cudf.DataFrame(data).to_orc(
+        buffer, stripe_size_rows=2048, row_index_stride=512
+    )
+    # Read in as pandas but compare with pyarrow
+    # since pandas doesn't have a list type
+    pa_out = pa.Table.from_pandas(pd.read_orc(buffer))
+    pa_in = pa.table(data)
+    assert pa_out.equals(pa_in)
+
+
+def test_chunked_orc_writer_lists():
+    num_rows = 3000
+    pdf_in = pd.DataFrame(
+        {
+            "ls": [[str(i), str(2 * i)] for i in range(num_rows)],
+            "ld": [[dec(i / 2)] * 5 for i in range(num_rows)],
+        }
+    )
+
+    gdf = cudf.from_pandas(pdf_in)
+    expect = pd.concat([pdf_in, pdf_in]).reset_index(drop=True)
+
+    buffer = BytesIO()
+    with ORCWriter(buffer) as writer:
+        writer.write_table(gdf)
+        writer.write_table(gdf)
+
+    got = pd.read_orc(buffer)
+    assert_eq(expect, got)
+
+
+def test_writer_timestamp_stream_size(datadir, tmp_path):
+    pdf_fname = datadir / "TestOrcFile.largeTimestamps.orc"
+    gdf_fname = tmp_path / "gdf.orc"
+
+    expect = pd.read_orc(pdf_fname)
+    cudf.from_pandas(expect).to_orc(gdf_fname)
+    got = pd.read_orc(gdf_fname)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "fname",
+    [
+        "TestOrcFile.NoIndStrm.StructWithNoNulls.orc",
+        "TestOrcFile.NoIndStrm.StructAndIntWithNulls.orc",
+        "TestOrcFile.NoIndStrm.StructAndIntWithNulls.TwoStripes.orc",
+        "TestOrcFile.NoIndStrm.IntWithNulls.orc",
+    ],
+)
+def test_no_row_group_index_orc_read(datadir, fname):
+    fpath = datadir / fname
+
+    expect = orc.ORCFile(fpath).read()
+    got = cudf.read_orc(fpath)
+
+    assert expect.equals(got.to_arrow())
+
+
+def test_names_in_struct_dtype_nesting(datadir):
+    fname = datadir / "TestOrcFile.NestedStructDataFrame.orc"
+
+    expect = orc.ORCFile(fname).read()
+    got = cudf.read_orc(fname)
+
+    # test dataframes
+    assert expect.equals(got.to_arrow())
+
+    edf = cudf.DataFrame(expect.to_pandas())
+    # test schema
+    assert edf.dtypes.equals(got.dtypes)
+
+
+def test_writer_lists_structs(list_struct_buff):
+    df_in = cudf.read_orc(list_struct_buff)
+
+    buff = BytesIO()
+    df_in.to_orc(buff)
+
+    pyarrow_tbl = orc.ORCFile(buff).read()
+
+    assert pyarrow_tbl.equals(df_in.to_arrow())
+
+
+def test_orc_writer_lists_empty_rg():
+    data = {
+        "with_pd": [
+            [i if i % 3 else None] if i < 25 or i > 30 else None
+            for i in range(50)
+        ],
+        "no_pd": [
+            [i if i % 3 else None] if i < 25 or i > 30 else []
+            for i in range(50)
+        ],
+    }
+    pdf_in = pd.DataFrame(data)
+    buffer = BytesIO()
+    cudf_in = cudf.from_pandas(pdf_in)
+
+    cudf_in.to_orc(buffer)
+
+    df = cudf.read_orc(buffer)
+    assert_eq(df, cudf_in)
+
+    pdf_out = pd.read_orc(buffer)
+    assert_eq(pdf_in, pdf_out)
+
+
+def test_statistics_sum_overflow():
+    maxint64 = np.iinfo(np.int64).max
+    minint64 = np.iinfo(np.int64).min
+
+    buff = BytesIO()
+    df = pd.DataFrame(
+        {"a": [maxint64, 1], "b": [minint64, -1], "c": [minint64, 1]}
+    )
+    df.to_orc(buff)
+
+    file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
+    assert file_stats[0]["a"].get("sum") is None
+    assert file_stats[0]["b"].get("sum") is None
+    assert file_stats[0]["c"].get("sum") == minint64 + 1
+
+    assert stripe_stats[0]["a"].get("sum") is None
+    assert stripe_stats[0]["b"].get("sum") is None
+    assert stripe_stats[0]["c"].get("sum") == minint64 + 1
+
+
+def test_empty_statistics():
+    buff = BytesIO()
+    pa_table = pa.Table.from_arrays(
+        [
+            pa.array([None], type=pa.int64()),
+            pa.array([None], type=pa.float64()),
+            pa.array([None], type=pa.string()),
+            pa.array([None], type=pa.decimal128(11, 2)),
+            pa.array([None], type=pa.timestamp("ns")),
+            pa.array([None], type=pa.date64()),
+            pa.array([None], type=pa.bool_()),
+            pa.array([None], type=pa.binary()),
+            pa.array([1], type=pa.int64()),
+        ],
+        ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
+    )
+    orc.write_table(pa_table, buff)
+
+    got = cudf.io.orc.read_orc_statistics([buff])
+
+    # Check for both file and stripe stats
+    for stats in got:
+        # Similar expected stats for the first 6 columns in this case
+        for col_name in ascii_lowercase[:6]:
+            assert stats[0][col_name].number_of_values == 0
+            assert stats[0][col_name].has_null is True
+            assert stats[0][col_name].get("minimum") is None
+            assert stats[0][col_name].get("maximum") is None
+        for col_name in ascii_lowercase[:3]:
+            assert stats[0][col_name].get("sum") == 0
+        # Sum for decimal column is a string
+        assert stats[0]["d"].get("sum") == "0"
+
+        assert stats[0]["g"].number_of_values == 0
+        assert stats[0]["g"].has_null is True
+        assert stats[0]["g"].get("true_count") == 0
+        assert stats[0]["g"].get("false_count") == 0
+
+        assert stats[0]["h"].number_of_values == 0
+        assert stats[0]["h"].has_null is True
+        assert stats[0]["h"].get("sum") == 0
+
+        assert stats[0]["i"].number_of_values == 1
+        assert stats[0]["i"].has_null is False
+        assert stats[0]["i"].get("minimum") == 1
+        assert stats[0]["i"].get("maximum") == 1
+        assert stats[0]["i"].get("sum") == 1
+
+
+@pytest.mark.parametrize(
+    "equivalent_columns",
+    [
+        (["lvl1_struct.a", "lvl1_struct.b"], ["lvl1_struct"]),
+        (["lvl1_struct", "lvl1_struct.a"], ["lvl1_struct"]),
+        (["lvl1_struct.a", "lvl1_struct"], ["lvl1_struct"]),
+        (["lvl1_struct.b", "lvl1_struct.a"], ["lvl1_struct.b", "lvl1_struct"]),
+        (["lvl2_struct.lvl1_struct", "lvl2_struct"], ["lvl2_struct"]),
+        (
+            ["lvl2_struct.a", "lvl2_struct.lvl1_struct.c", "lvl2_struct"],
+            ["lvl2_struct"],
+        ),
+    ],
+)
+def test_select_nested(list_struct_buff, equivalent_columns):
+    # The two column selections should be equivalent
+    df_cols1 = cudf.read_orc(list_struct_buff, columns=equivalent_columns[0])
+    df_cols2 = cudf.read_orc(list_struct_buff, columns=equivalent_columns[1])
+    assert_eq(df_cols1, df_cols2)
+
+
+def test_orc_writer_rle_stream_size(datadir, tmp_path):
+    original = datadir / "TestOrcFile.int16.rle.size.orc"
+    reencoded = tmp_path / "int16_map.orc"
+
+    df = cudf.read_orc(original)
+    df.to_orc(reencoded)
+
+    # Segfaults when RLE stream sizes don't account for varint length
+    pa_out = orc.ORCFile(reencoded).read()
+    assert df.to_arrow().equals(pa_out)
+
+
+def test_empty_columns():
+    buffer = BytesIO()
+    # string and decimal columns have additional steps that need to be skipped
+    expected = cudf.DataFrame(
+        {
+            "string": cudf.Series([], dtype="str"),
+            "decimal": cudf.Series([], dtype=cudf.Decimal64Dtype(10, 1)),
+        }
+    )
+    expected.to_orc(buffer, compression="snappy")
+
+    got_df = cudf.read_orc(buffer)
+    assert_eq(expected, got_df)
+
+
+def test_orc_reader_zstd_compression(list_struct_buff):
+    expected = cudf.read_orc(list_struct_buff)
+    # save with ZSTD compression
+    buffer = BytesIO()
+    pyarrow_tbl = orc.ORCFile(list_struct_buff).read()
+    with orc.ORCWriter(buffer, compression="zstd") as writer:
+        writer.write(pyarrow_tbl)
+    got = cudf.read_orc(buffer)
+    # compare with pyarrow since pandas doesn't
+    # have a list or struct
+    assert expected.to_arrow().equals(got.to_arrow())
+
+
+def test_writer_protobuf_large_rowindexentry():
+    s = [
+        "Length of the two strings needs to add up to at least ~120",
+        "So that the encoded statistics are larger than 128 bytes",
+    ] * 5001  # generate more than 10K rows to have two row groups
+    df = cudf.DataFrame({"s1": s})
+
+    buff = BytesIO()
+    df.to_orc(buff)
+
+    got = cudf.read_orc(buff)
+    assert_frame_equal(df, got)
+
+
+@pytest.mark.parametrize("compression", ["ZLIB", "ZSTD"])
+def test_orc_writer_nvcomp(compression):
+    expected = cudf.datasets.randomdata(
+        nrows=12345, dtypes={"a": int, "b": str, "c": float}, seed=1
+    )
+
+    buff = BytesIO()
+    try:
+        expected.to_orc(buff, compression=compression)
+    except RuntimeError:
+        pytest.skip(reason="Newer nvCOMP version is required")
+    else:
+        got = pd.read_orc(buff)
+        assert_eq(expected, got)
+
+
+def run_orc_columns_and_index_param(index_obj, index, columns):
+    buffer = BytesIO()
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=index_obj
+    )
+    df.to_orc(buffer, index=index)
+
+    expected = pd.read_orc(buffer, columns=columns)
+    got = cudf.read_orc(buffer, columns=columns)
+
+    assert_eq(expected, got, check_index_type=True)
+
+
+@pytest.mark.parametrize("index_obj", [None, [10, 11, 12], ["x", "y", "z"]])
+@pytest.mark.parametrize("index", [True, False, None])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        None,
+        pytest.param(
+            [],
+            marks=pytest.mark.skipif(
+                PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+                reason="Bug in older version of pandas",
+            ),
+        ),
+    ],
+)
+def test_orc_columns_and_index_param(index_obj, index, columns):
+    run_orc_columns_and_index_param(index_obj, index, columns)
+
+
+@pytest.mark.parametrize(
+    "columns,index,index_obj",
+    [
+        (
+            ["a", "b"],
+            True,
+            None,
+        ),
+        (
+            ["a", "b"],
+            True,
+            [10, 11, 12],
+        ),
+        (
+            ["a", "b"],
+            True,
+            ["x", "y", "z"],
+        ),
+        (
+            ["a", "b"],
+            None,
+            [10, 11, 12],
+        ),
+        (
+            ["a", "b"],
+            None,
+            ["x", "y", "z"],
+        ),
+    ],
+)
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12026")
+def test_orc_columns_and_index_param_read_index(index_obj, index, columns):
+    run_orc_columns_and_index_param(index_obj, index, columns)
+
+
+@pytest.mark.parametrize(
+    "columns,index,index_obj",
+    [
+        (["a", "b"], False, None),
+        (["a", "b"], False, [10, 11, 12]),
+        (["a", "b"], False, ["x", "y", "z"]),
+        (["a", "b"], None, None),
+    ],
+)
+def test_orc_columns_and_index_param_no_read_index(index_obj, index, columns):
+    run_orc_columns_and_index_param(index_obj, index, columns)
+
+
+@pytest.mark.parametrize(
+    "df_data,cols_as_map_type,expected_data",
+    [
+        (
+            {"a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]]},
+            ["a"],
+            {"a": [[(10, 20)], [(1, 21)]]},
+        ),
+        (
+            {
+                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
+                "b": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
+            },
+            ["b"],
+            {
+                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
+                "b": [[(10, 20)], [(1, 21)]],
+            },
+        ),
+        (
+            {
+                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
+                "b": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
+                "c": [
+                    [{"a": {"a": 10}, "b": 20}],
+                    [{"a": {"a": 12}, "b": 21}],
+                ],
+            },
+            ["b", "c"],
+            {
+                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
+                "b": [[(10, 20)], [(1, 21)]],
+                "c": [[({"a": 10}, 20)], [({"a": 12}, 21)]],
+            },
+        ),
+    ],
+)
+def test_orc_writer_cols_as_map_type(df_data, cols_as_map_type, expected_data):
+    df = cudf.DataFrame(df_data)
+    buffer = BytesIO()
+    df.to_orc(buffer, cols_as_map_type=cols_as_map_type)
+
+    got = pd.read_orc(buffer)
+    expected = pd.DataFrame(expected_data)
+
+    assert_eq(got, expected)
+
+
+def test_orc_writer_cols_as_map_type_error():
+    df = cudf.DataFrame(
+        {"a": cudf.Series([[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]])}
+    )
+    buffer = BytesIO()
+    with pytest.raises(
+        TypeError, match="cols_as_map_type must be a list of column names."
+    ):
+        df.to_orc(buffer, cols_as_map_type=1)
+
+
+def test_orc_reader_negative_timestamp(engine):
+    negative_timestamp_df = cudf.DataFrame(
+        {
+            "a": [
+                pd.Timestamp("1969-12-31 23:59:59.000123"),
+                pd.Timestamp("1969-12-31 23:59:58.000999"),
+                pd.Timestamp("1969-12-31 23:59:58.001001"),
+                pd.Timestamp("1839-12-24 03:58:56.000826"),
+            ]
+        }
+    )
+    buffer = BytesIO()
+    negative_timestamp_df.to_orc(buffer)
+
+    # We warn the user that this function will fall back to the CPU for reading
+    # when the engine is pyarrow.
+    with expect_warning_if(engine == "pyarrow", UserWarning):
+        got = cudf.read_orc(buffer, engine=engine)
+
+    assert_eq(negative_timestamp_df, got, check_dtype=False)
+
+
+def test_orc_writer_negative_timestamp():
+    negative_timestamp_df = cudf.DataFrame(
+        {
+            "a": [
+                pd.Timestamp("1969-12-31 23:59:59.000123"),
+                pd.Timestamp("1969-12-31 23:59:58.000999"),
+                pd.Timestamp("1969-12-31 23:59:58.001001"),
+                pd.Timestamp("1839-12-24 03:58:56.000826"),
+            ]
+        }
+    )
+    buffer = BytesIO()
+    negative_timestamp_df.to_orc(buffer)
+
+    assert_eq(negative_timestamp_df, pd.read_orc(buffer), check_dtype=False)
+    assert_eq(
+        negative_timestamp_df, orc.ORCFile(buffer).read(), check_dtype=False
+    )
+
+
+@pytest.mark.skip(
+    reason="Bug specific to rockylinux8: https://github.com/rapidsai/cudf/issues/15802",
+)
+def test_orc_reader_apache_negative_timestamp(datadir):
+    path = datadir / "TestOrcFile.apache_timestamp.orc"
+
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
+
+    assert_eq(pdf, gdf)
+
+
+def test_statistics_string_sum():
+    strings = ["a string", "another string!"]
+    buff = BytesIO()
+    df = cudf.DataFrame({"str": strings})
+    df.to_orc(buff)
+
+    file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
+    assert_eq(file_stats[0]["str"].get("sum"), sum(len(s) for s in strings))
+
+
+@pytest.mark.parametrize(
+    "fname",
+    [
+        "TestOrcFile.Hive.OneEmptyMap.orc",
+        "TestOrcFile.Hive.OneEmptyList.orc",
+        "TestOrcFile.Hive.OneNullStruct.orc",
+        "TestOrcFile.Hive.EmptyListStripe.orc",
+        "TestOrcFile.Hive.NullStructStripe.orc",
+        "TestOrcFile.Hive.AllNulls.orc",
+    ],
+)
+def test_reader_empty_stripe(datadir, fname):
+    path = datadir / fname
+
+    expected = pd.read_orc(path)
+    got = cudf.read_orc(path)
+    assert_eq(expected, got)
+
+
+# needs enough data for multiple row groups
+@pytest.mark.parametrize("data", [["*"] * 10001, ["**", None] * 5001])
+def test_reader_row_index_order(data):
+    expected = cudf.DataFrame({"str": data}, dtype="string")
+
+    buffer = BytesIO()
+    expected.to_pandas().to_orc(buffer)
+    got = cudf.read_orc(buffer)
+    assert_eq(expected, got)
+
+
+# Test the corner case where empty blocks are compressed
+# Decompressed data size is zero, even though compressed data size is non-zero
+# For more information see https://github.com/rapidsai/cudf/issues/13608
+def test_orc_reader_empty_decomp_data(datadir):
+    path = datadir / "TestOrcFile.Spark.EmptyDecompData.orc"
+
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+
+    assert_eq(expect, got)
+
+
+def test_orc_reader_empty_deeply_nested_level(datadir):
+    # Test the case where top level struct has nulls, but the nested struct is
+    # not nullable. In this case there is no data in the second level, but we
+    # still need to pass the parent null mask to the third level.
+    path = datadir / "TestOrcFile.Spark.NestedNotNullableStruct.orc"
+
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+
+    assert_eq(expect, got)
+
+
+def test_orc_chunked_writer_stripe_size(datadir):
+    df = cudf.DataFrame({"col": gen_rand_series("int", 100000)})
+
+    buffer = BytesIO()
+    with ORCWriter(buffer, stripe_size_bytes=64 * 1024) as writer:
+        writer.write_table(df)
+
+    orc_file = orc.ORCFile(buffer)
+    assert_eq(orc_file.nstripes, 10)
+
+    buffer = BytesIO()
+    with ORCWriter(buffer, stripe_size_rows=20000) as writer:
+        writer.write_table(df)
+
+    orc_file = orc.ORCFile(buffer)
+    assert_eq(orc_file.nstripes, 5)
+
+
+def test_reader_lz4():
+    pdf = pd.DataFrame({"ints": [1, 2] * 5001})
+    pa_table = pa.Table.from_pandas(pdf)
+
+    buffer = BytesIO()
+    with orc.ORCWriter(buffer, compression="LZ4") as writer:
+        writer.write(pa_table)
+
+    got = cudf.read_orc(buffer)
+    assert_eq(pdf, got)
+
+
+def test_writer_lz4():
+    gdf = cudf.DataFrame({"ints": [1, 2] * 5001})
+
+    buffer = BytesIO()
+    gdf.to_orc(buffer, compression="LZ4")
+
+    got = pd.read_orc(buffer)
+    assert_eq(gdf, got)
+
+
+def test_row_group_alignment(datadir):
+    path = datadir / "TestOrcFile.MapManyNulls.parquet"
+
+    expected = cudf.read_parquet(path)
+
+    buffer = BytesIO()
+    expected.to_orc(buffer)
+
+    got = cudf.read_orc(buffer)
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "inputfile",
+    [
+        # These sample data have a single column my_timestamp of the TIMESTAMP type,
+        # 2660 rows, and 1536 rows per row group.
+        "TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc",
+        "TestOrcFile.timestamp.desynced.snappy.RLEv2.orc",
+        # These two data are the same with the above, except that every 100 rows start
+        # with a null value.
+        "TestOrcFile.timestamp.desynced.uncompressed.RLEv2.hasNull.orc",
+        "TestOrcFile.timestamp.desynced.snappy.RLEv2.hasNull.orc",
+    ],
+)
+def test_orc_reader_desynced_timestamp(datadir, inputfile):
+    # Test a special case where the DATA stream (second) in a TIMESTAMP column
+    # is progressed faster than the SECONDARY stream (nanosecond) at the start of a row
+    # group. In this case, the "run cache manager" in the decoder kernel is used to
+    # orchestrate the dual-stream processing.
+    # For more information, see https://github.com/rapidsai/cudf/issues/17155.
+
+    path = datadir / inputfile
+
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+
+    assert_frame_equal(cudf.from_pandas(expect), got)
+
+
+@pytest.mark.parametrize("compression", ["LZ4", "SNAPPY", "ZLIB", "ZSTD"])
+def test_orc_decompression(set_decomp_env_vars, compression):
+    # Write the DataFrame to a Parquet file
+    buffer = BytesIO()
+    rng = np.random.default_rng(seed=0)
+    types = [
+        "bool",
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "float32",
+        "float64",
+        "datetime64[ns]",
+        "str",
+    ]
+    nrows = 123
+
+    # Create a pandas dataframe with random data of mixed types
+    pd_data = {
+        f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
+        for typ in types
+    }
+    pd_data["col_datetime64[ns]"] = pd.Series(
+        np.asarray(
+            rng.integers(0, (0x7FFFFFFFFFFFFFFF / 1000), size=nrows),
+            dtype="datetime64[ns]",
+        )
+    )
+
+    # Create non-numeric str data
+    pd_data["col_str"] = pd.Series(
+        rng.choice(list(ascii_letters), size=nrows), dtype="str"
+    )
+
+    non_nested_pdf = pd.DataFrame(pd_data)
+    non_nested_pdf.to_orc(buffer, engine_kwargs={"compression": compression})
+
+    # Read the Parquet file back into a DataFrame
+    got = cudf.read_orc(buffer)
+
+    assert_eq(non_nested_pdf, got)
+
+
+def test_write_orc_gcs(monkeypatch, tmp_path):
+    gcsfs = pytest.importorskip("gcsfs")
+    pdf = pd.DataFrame(
+        {
+            "Integer": np.array([2345, 11987, 9027, 9027]),
+            "Float": np.array([9.001, 8.343, 6, 2.781]),
+            "Integer2": np.array([2345, 106, 2088, 789277]),
+            "String": np.array(["Alpha", "Beta", "Gamma", "Delta"]),
+            "Boolean": np.array([True, False, True, False]),
+        }
+    )
+    gcs_fname = "cudf-gcs-test-bucket/test_orc_writer.orc"
+    local_filepath = tmp_path / "test_orc.orc"
+    gdf = cudf.from_pandas(pdf)
+
+    def mock_open(*args, **kwargs):
+        return local_filepath.open("wb")
+
+    monkeypatch.setattr(gcsfs.GCSFileSystem, "open", mock_open)
+    gdf.to_orc(f"gcs://{gcs_fname}")
+
+    got = pd.read_orc(local_filepath)
+    assert_eq(pdf, got)
diff --git a/python/cudf/cudf/tests/input_output/test_parquet.py b/python/cudf/cudf/tests/input_output/test_parquet.py
index 13acf6825b4..c57f91e6c42 100644
--- a/python/cudf/cudf/tests/input_output/test_parquet.py
+++ b/python/cudf/cudf/tests/input_output/test_parquet.py
@@ -1,14 +1,4572 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
+import datetime
+import decimal
+import glob
+import hashlib
+import math
+import os
+import pathlib
+import random
+import string
+from contextlib import contextmanager
 from io import BytesIO
+from string import ascii_letters
 
+import cupy
+import numpy as np
 import pandas as pd
 import pyarrow as pa
-import pyarrow.parquet as pq
 import pytest
+from fsspec.core import get_fs_token_paths
+from packaging import version
+from pyarrow import parquet as pq
 
 import cudf
-from cudf.testing import assert_eq
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.io.parquet import (
+    ParquetDatasetWriter,
+    ParquetWriter,
+    merge_parquet_filemetadata,
+)
+from cudf.testing import assert_eq, dataset_generator as dg
+from cudf.testing._utils import TIMEDELTA_TYPES, set_random_null_mask_inplace
+
+
+@contextmanager
+def _hide_pyarrow_parquet_cpu_warnings(engine):
+    if engine == "pyarrow":
+        with pytest.warns(
+            UserWarning,
+            match="Using CPU via PyArrow to read Parquet dataset. This option "
+            "is both inefficient and unstable!",
+        ):
+            yield
+    else:
+        yield
+
+
+@pytest.fixture(scope="module")
+def datadir(datadir):
+    return datadir / "parquet"
+
+
+@pytest.fixture
+def simple_pdf():
+    nrows = 10
+    rng = np.random.default_rng(seed=0)
+    types = [
+        "bool",
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "uint8",
+        "uint16",
+        # "uint32", pandas promotes uint32 to int64
+        # https://issues.apache.org/jira/browse/ARROW-9215
+        "uint64",
+        "float32",
+        "float64",
+    ]
+    # Create a pandas dataframe with random data of mixed types
+    test_pdf = pd.DataFrame(
+        {
+            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
+            for typ in types
+        },
+        # Need to ensure that this index is not a RangeIndex to get the
+        # expected round-tripping behavior from Parquet reader/writer.
+        index=pd.Index(list(range(nrows))),
+    )
+    # Delete the name of the column index, and rename the row index
+    test_pdf.columns.name = None
+    test_pdf.index.name = "test_index"
+
+    return test_pdf
+
+
+@pytest.fixture
+def simple_gdf(simple_pdf):
+    return cudf.DataFrame.from_pandas(simple_pdf)
+
+
+def build_pdf(num_columns, day_resolution_timestamps):
+    rng = np.random.default_rng(seed=0)
+    types = [
+        "bool",
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "uint8",
+        "uint16",
+        # "uint32", pandas promotes uint32 to int64
+        # https://issues.apache.org/jira/browse/ARROW-9215
+        "uint64",
+        "float32",
+        "float64",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "str",
+    ]
+    nrows = num_columns
+
+    # Create a pandas dataframe with random data of mixed types
+    test_pdf = pd.DataFrame(
+        {
+            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
+            for typ in types
+        },
+        # Need to ensure that this index is not a RangeIndex to get the
+        # expected round-tripping behavior from Parquet reader/writer.
+        index=pd.Index(list(range(nrows))),
+    )
+    # Delete the name of the column index, and rename the row index
+    test_pdf.columns.name = None
+    test_pdf.index.name = "test_index"
+
+    # make datetime64's a little more interesting by increasing the range of
+    # dates note that pandas will convert these to ns timestamps, so care is
+    # taken to avoid overflowing a ns timestamp. There is also the ability to
+    # request timestamps be whole days only via `day_resolution_timestamps`.
+    for t in [
+        {
+            "name": "datetime64[ms]",
+            "nsDivisor": 1000000,
+            "dayModulus": 86400000,
+        },
+        {
+            "name": "datetime64[us]",
+            "nsDivisor": 1000,
+            "dayModulus": 86400000000,
+        },
+    ]:
+        data = [
+            rng.integers(0, (0x7FFFFFFFFFFFFFFF / t["nsDivisor"]))
+            for i in range(nrows)
+        ]
+        if day_resolution_timestamps:
+            data = [int(d / t["dayModulus"]) * t["dayModulus"] for d in data]
+        test_pdf["col_" + t["name"]] = pd.Series(
+            np.asarray(data, dtype=t["name"])
+        )
+
+    # Create non-numeric categorical data otherwise parquet may typecast it
+    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
+    test_pdf["col_category"] = pd.Series(data, dtype="category")
+
+    # Create non-numeric str data
+    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
+    test_pdf["col_str"] = pd.Series(data, dtype="str")
+
+    return test_pdf
+
+
+@pytest.fixture(params=[0, 10])
+def pdf(request):
+    return build_pdf(request.param, False)
+
+
+@pytest.fixture(params=[0, 10])
+def pdf_day_timestamps(request):
+    return build_pdf(request.param, True)
+
+
+@pytest.fixture
+def gdf(pdf):
+    return cudf.DataFrame.from_pandas(pdf)
+
+
+@pytest.fixture
+def gdf_day_timestamps(pdf_day_timestamps):
+    return cudf.DataFrame.from_pandas(pdf_day_timestamps)
+
+
+@pytest.fixture
+def parquet_path_or_buf(datadir):
+    fname = datadir / "spark_timestamp.snappy.parquet"
+    try:
+        with open(fname, "rb") as f:
+            buffer = BytesIO(f.read())
+    except Exception as excpr:
+        if type(excpr).__name__ == "FileNotFoundError":
+            pytest.skip(".parquet file is not found")
+        raise excpr
+
+    def _make_parquet_path_or_buf(src):
+        if src == "filepath":
+            return str(fname)
+        if src == "pathobj":
+            return fname
+        if src == "bytes_io":
+            return buffer
+        if src == "bytes":
+            return buffer.getvalue()
+        if src == "url":
+            return fname.as_uri()
+
+        raise ValueError("Invalid source type")
+
+    yield _make_parquet_path_or_buf
+
+
+@pytest.fixture(scope="module")
+def large_int64_gdf():
+    return cudf.DataFrame.from_pandas(pd.DataFrame({"col": range(0, 1 << 20)}))
+
+
+@pytest.fixture(params=["pyarrow", "cudf"])
+def engine(request):
+    return request.param
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+@pytest.mark.parametrize(
+    "compression", ["snappy", "gzip", "brotli", None, np.str_("snappy")]
+)
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["col_int8"],
+        ["col_category"],
+        ["col_int32", "col_float32"],
+        ["col_int16", "col_float64", "col_int8"],
+        None,
+    ],
+)
+def test_parquet_reader_basic(tmp_path, pdf, columns, engine, compression):
+    parquet_file = tmp_path / f"{compression}_test.parquet"
+    pdf.to_parquet(parquet_file, engine="pyarrow", compression=compression)
+    expect = pd.read_parquet(parquet_file, columns=columns)
+    got = cudf.read_parquet(parquet_file, engine=engine, columns=columns)
+
+    # PANDAS returns category objects whereas cuDF returns hashes
+    if engine == "cudf":
+        if "col_category" in expect.columns:
+            expect = expect.drop(columns=["col_category"])
+        if "col_category" in got.columns:
+            got = got.drop(columns=["col_category"])
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_parquet_reader_empty_pandas_dataframe(tmp_path):
+    df = pd.DataFrame()
+    fname = tmp_path / "test_pq_reader_empty_pandas_dataframe.parquet"
+    df.to_parquet(fname)
+    assert os.path.exists(fname)
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname, engine="cudf")
+    expect = expect.reset_index(drop=True)
+    got = got.reset_index(drop=True)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("has_null", [False, True])
+def test_parquet_reader_strings(tmp_path, has_null):
+    df = pd.DataFrame(
+        [(1, "aaa", 9.0), (2, "bbb", 8.0), (3, "ccc", 7.0)],
+        columns=pd.Index(list("abc")),
+    )
+    if has_null:
+        df.at[1, "b"] = None
+    fname = tmp_path / "test_pq_reader_strings.parquet"
+    df.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    gdf = cudf.read_parquet(fname, engine="cudf")
+
+    assert gdf["b"].dtype == np.dtype("object")
+    assert_eq(gdf["b"], df["b"])
+
+
+@pytest.mark.parametrize("columns", [None, ["b"]])
+@pytest.mark.parametrize("index_col", ["b", "Nameless", None])
+def test_parquet_reader_index_col(tmp_path, index_col, columns):
+    df = pd.DataFrame({"a": range(3), "b": range(3, 6), "c": range(6, 9)})
+
+    if index_col is None:
+        # No index column
+        df.reset_index(drop=True, inplace=True)
+    elif index_col == "Nameless":
+        # Index column but no name
+        df.set_index("a", inplace=True)
+        df.index.name = None
+    else:
+        # Index column as normal
+        df.set_index(index_col, inplace=True)
+
+    fname = tmp_path / "test_pq_reader_index_col.parquet"
+
+    # PANDAS' PyArrow backend always writes the index unless disabled
+    df.to_parquet(fname, index=(index_col is not None))
+    assert os.path.exists(fname)
+
+    pdf = pd.read_parquet(fname, columns=columns)
+    gdf = cudf.read_parquet(fname, engine="cudf", columns=columns)
+
+    assert_eq(pdf, gdf, check_categorical=False)
+
+
+@pytest.mark.parametrize("pandas_compat", [True, False])
+@pytest.mark.parametrize(
+    "columns", [["a"], ["d"], ["a", "b"], ["a", "d"], None]
+)
+def test_parquet_reader_pandas_metadata(tmp_path, columns, pandas_compat):
+    df = pd.DataFrame(
+        {
+            "a": range(6, 9),
+            "b": range(3, 6),
+            "c": range(6, 9),
+            "d": ["abc", "def", "xyz"],
+        }
+    )
+    df.set_index("b", inplace=True)
+
+    fname = tmp_path / "test_pq_reader_pandas_metadata.parquet"
+    df.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    # PANDAS `read_parquet()` and PyArrow `read_pandas()` always includes index
+    # Instead, directly use PyArrow to optionally omit the index
+    expect = pa.parquet.read_table(
+        fname, columns=columns, use_pandas_metadata=pandas_compat
+    ).to_pandas()
+    got = cudf.read_parquet(
+        fname, columns=columns, use_pandas_metadata=pandas_compat
+    )
+
+    if pandas_compat or columns is None or "b" in columns:
+        assert got.index.name == "b"
+    else:
+        assert got.index.name is None
+    assert_eq(expect, got, check_categorical=False)
+
+
+@pytest.mark.parametrize("pandas_compat", [True, False])
+@pytest.mark.parametrize("as_bytes", [True, False])
+def test_parquet_range_index_pandas_metadata(
+    tmp_path, pandas_compat, as_bytes
+):
+    df = pd.DataFrame(
+        {"a": range(6, 9), "b": ["abc", "def", "xyz"]},
+        index=pd.RangeIndex(3, 6, 1, name="c"),
+    )
+
+    fname = tmp_path / "test_parquet_range_index_pandas_metadata.parquet"
+    df.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    # PANDAS `read_parquet()` and PyArrow `read_pandas()` always includes index
+    # Instead, directly use PyArrow to optionally omit the index
+    expect = pa.parquet.read_table(
+        fname, use_pandas_metadata=pandas_compat
+    ).to_pandas()
+    if as_bytes:
+        # Make sure we can handle RangeIndex parsing
+        # in pandas when the input is `bytes`
+        with open(fname, "rb") as f:
+            got = cudf.read_parquet(
+                f.read(), use_pandas_metadata=pandas_compat
+            )
+    else:
+        got = cudf.read_parquet(fname, use_pandas_metadata=pandas_compat)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_read_metadata(tmp_path, pdf):
+    def num_row_groups(rows, group_size):
+        return max(1, (rows + (group_size - 1)) // group_size)
+
+    fname = tmp_path / "metadata.parquet"
+    row_group_size = 5
+    pdf.to_parquet(fname, compression="snappy", row_group_size=row_group_size)
+
+    (
+        num_rows,
+        row_groups,
+        col_names,
+        num_columns,
+        _,  # rowgroup_metadata
+    ) = cudf.io.read_parquet_metadata(fname)
+
+    assert num_columns == len(pdf.columns)
+    assert num_rows == len(pdf.index)
+    assert row_groups == num_row_groups(num_rows, row_group_size)
+    for a, b in zip(col_names, pdf.columns, strict=True):
+        assert a == b
+
+
+def test_parquet_read_filtered(set_decomp_env_vars, tmp_path):
+    # Generate data
+    fname = tmp_path / "filtered.parquet"
+    dg.generate(
+        fname,
+        dg.Parameters(
+            num_rows=100,
+            column_parameters=[
+                dg.ColumnParameters(
+                    cardinality=40,
+                    null_frequency=0.05,
+                    generator=lambda: [
+                        "".join(
+                            random.sample(
+                                string.ascii_letters, random.randint(4, 8)
+                            )
+                        )
+                        for _ in range(10)
+                    ],
+                    is_sorted=False,
+                ),
+                dg.ColumnParameters(
+                    40,
+                    0.2,
+                    lambda: np.random.default_rng(seed=0).integers(
+                        0, 100, size=10
+                    ),
+                    True,
+                ),
+            ],
+            seed=42,
+        ),
+        format={"name": "parquet", "row_group_size": 10},
+        use_threads=False,
+    )
+
+    # Get dataframes to compare
+    df = cudf.read_parquet(fname)
+    df_filtered = cudf.read_parquet(fname, filters=[("1", ">", 60)])
+    # PyArrow's read_table function does row-group-level filtering in addition
+    # to applying given filters once the table has been read into memory.
+    # Because of this, we aren't using PyArrow as a reference for testing our
+    # row-group selection method since the only way to only select row groups
+    # with PyArrow is with the method we use and intend to test.
+    tbl_filtered = pq.read_table(fname, filters=[("1", ">", 60)])
+
+    assert_eq(cudf.io.read_parquet_metadata(fname)[1], 10)
+    assert len(df_filtered) < len(df)
+    assert len(tbl_filtered) <= len(df_filtered)
+
+
+def test_parquet_read_filtered_everything(tmp_path):
+    # Generate data
+    fname = tmp_path / "filtered_everything.parquet"
+    df = pd.DataFrame({"x": range(10), "y": list("aabbccddee")})
+    df.to_parquet(fname, row_group_size=2)
+
+    # Check filter
+    df_filtered = cudf.read_parquet(fname, filters=[("x", "==", 12)])
+    assert_eq(len(df_filtered), 0)
+    assert_eq(df_filtered["x"].dtype, "int64")
+    assert_eq(df_filtered["y"].dtype, "object")
+
+
+def test_parquet_read_filtered_multiple_files(tmp_path):
+    # Generate data
+    fname_0 = tmp_path / "filtered_multiple_files_0.parquet"
+    df = pd.DataFrame({"x": range(10), "y": list("aabbccddee")})
+    df.to_parquet(fname_0, row_group_size=2)
+    fname_1 = tmp_path / "filtered_multiple_files_1.parquet"
+    df = pd.DataFrame({"x": range(10), "y": list("aaccccddee")})
+    df.to_parquet(fname_1, row_group_size=2)
+    fname_2 = tmp_path / "filtered_multiple_files_2.parquet"
+    df = pd.DataFrame(
+        {"x": [0, 1, 9, 9, 4, 5, 6, 7, 8, 9], "y": list("aabbzzddee")}
+    )
+    df.to_parquet(fname_2, row_group_size=2)
+
+    # Check filter
+    filtered_df = cudf.read_parquet(
+        [fname_0, fname_1, fname_2], filters=[("x", "==", 2)]
+    )
+    assert_eq(
+        filtered_df,
+        cudf.DataFrame({"x": [2, 2], "y": list("bc")}, index=[2, 2]),
+    )
+
+
+@pytest.mark.parametrize(
+    "predicate,expected_len",
+    [
+        ([[("x", "==", 0)], [("z", "==", 0)]], 2),
+        ([("x", "==", 0), ("z", "==", 0)], 0),
+        ([("x", "==", 0), ("z", "!=", 0)], 1),
+        ([("y", "==", "c"), ("x", ">", 8)], 0),
+        ([("y", "==", "c"), ("x", ">=", 5)], 1),
+        ([[("y", "==", "c")], [("x", "<", 3)]], 5),
+        ([[("x", "not in", (0, 9)), ("z", "not in", (4, 5))]], 6),
+        ([[("y", "==", "c")], [("x", "in", (0, 9)), ("z", "in", (0, 9))]], 4),
+        ([[("x", "==", 0)], [("x", "==", 1)], [("x", "==", 2)]], 3),
+        ([[("x", "==", 0), ("z", "==", 9), ("y", "==", "a")]], 1),
+    ],
+)
+def test_parquet_read_filtered_complex_predicate(
+    tmp_path, predicate, expected_len
+):
+    # Generate data
+    fname = tmp_path / "filtered_complex_predicate.parquet"
+    df = pd.DataFrame(
+        {
+            "x": range(10),
+            "y": list("aabbccddee"),
+            "z": reversed(range(10)),
+        }
+    )
+    df.to_parquet(fname, row_group_size=2)
+
+    # Check filters
+    df_filtered = cudf.read_parquet(fname, filters=predicate)
+    assert_eq(cudf.io.read_parquet_metadata(fname)[1], 10 / 2)
+    assert_eq(len(df_filtered), expected_len)
+
+
+@pytest.mark.parametrize("row_group_size", [1, 5])
+def test_parquet_read_row_groups(tmp_path, pdf, row_group_size):
+    if "col_category" in pdf.columns:
+        pdf = pdf.drop(columns=["col_category"])
+    fname = tmp_path / "row_group.parquet"
+    pdf.to_parquet(fname, compression="gzip", row_group_size=row_group_size)
+
+    num_rows, row_groups, col_names, _, _ = cudf.io.read_parquet_metadata(
+        fname
+    )
+
+    gdf = [cudf.read_parquet(fname, row_groups=[i]) for i in range(row_groups)]
+    gdf = cudf.concat(gdf)
+    assert_eq(pdf.reset_index(drop=True), gdf.reset_index(drop=True))
+
+    # first half rows come from the first source, rest from the second
+    gdf = cudf.read_parquet(
+        [fname, fname],
+        row_groups=[
+            list(range(row_groups // 2)),
+            list(range(row_groups // 2, row_groups)),
+        ],
+    )
+    assert_eq(pdf.reset_index(drop=True), gdf.reset_index(drop=True))
+
+
+@pytest.mark.parametrize("row_group_size", [1, 5])
+def test_parquet_read_row_groups_non_contiguous(tmp_path, pdf, row_group_size):
+    fname = tmp_path / "row_group.parquet"
+    pdf.to_parquet(fname, compression="gzip", row_group_size=row_group_size)
+
+    num_rows, row_groups, col_names, _, _ = cudf.io.read_parquet_metadata(
+        fname
+    )
+
+    # alternate rows between the two sources
+    gdf = cudf.read_parquet(
+        [fname, fname],
+        row_groups=[
+            list(range(0, row_groups, 2)),
+            list(range(1, row_groups, 2)),
+        ],
+    )
+
+    ref_df = [
+        cudf.read_parquet(fname, row_groups=i)
+        for i in list(range(0, row_groups, 2)) + list(range(1, row_groups, 2))
+    ]
+    ref_df = cudf.concat(ref_df)
+
+    assert_eq(ref_df, gdf)
+
+
+def test_parquet_reader_spark_timestamps(datadir):
+    fname = datadir / "spark_timestamp.snappy.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_spark_decimals(datadir):
+    fname = datadir / "spark_decimal.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("columns", [["a"], ["b", "a"], None])
+def test_parquet_reader_decimal128(datadir, columns):
+    fname = datadir / "nested_decimal128_file.parquet"
+    got = cudf.read_parquet(fname, columns=columns)
+    expect = cudf.read_parquet(fname, columns=columns)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_microsecond_timestamps(datadir):
+    fname = datadir / "usec_timestamp.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_mixedcompression(datadir):
+    fname = datadir / "mixed_compression.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_select_columns(datadir):
+    fname = datadir / "nested_column_map.parquet"
+
+    expect = cudf.read_parquet(fname).to_pandas()[["value"]]
+    got = cudf.read_parquet(fname, columns=["value"])
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_invalids(tmp_path):
+    test_pdf = cudf.DataFrame(
+        {"a": np.array([1, np.nan, np.nan, 2])}, index=cudf.Index([0, 1, 2, 3])
+    )
+
+    fname = tmp_path / "invalids.parquet"
+    test_pdf.to_parquet(fname, engine="pyarrow")
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_parquet_reader_filenotfound(tmpdir):
+    with pytest.raises(FileNotFoundError):
+        cudf.read_parquet("TestMissingFile.parquet")
+
+    with pytest.raises(FileNotFoundError):
+        cudf.read_parquet(tmpdir.mkdir("cudf_parquet"))
+
+
+def test_parquet_reader_local_filepath():
+    fname = "~/TestLocalFile.parquet"
+    if not os.path.isfile(fname):
+        pytest.skip("Local .parquet file is not found")
+
+    cudf.read_parquet(fname)
+
+
+@pytest.mark.parametrize(
+    "src", ["filepath", "pathobj", "bytes_io", "bytes", "url"]
+)
+def test_parquet_reader_filepath_or_buffer(parquet_path_or_buf, src):
+    expect = pd.read_parquet(parquet_path_or_buf("filepath"))
+    got = cudf.read_parquet(parquet_path_or_buf(src))
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_file_types(parquet_path_or_buf):
+    expect = cudf.read_parquet(parquet_path_or_buf("filepath"))
+    fs, _, paths = get_fs_token_paths(parquet_path_or_buf("filepath"))
+
+    # Pass open fsspec file
+    with fs.open(paths[0], mode="rb") as fil:
+        got1 = cudf.read_parquet(fil)
+    assert_eq(expect, got1)
+
+    # Pass path only
+    got2 = cudf.read_parquet(paths[0])
+    assert_eq(expect, got2)
+
+
+def create_parquet_source(df, src_type, fname):
+    if src_type == "filepath":
+        df.to_parquet(fname, engine="pyarrow")
+        return str(fname)
+    if src_type == "pathobj":
+        df.to_parquet(fname, engine="pyarrow")
+        return fname
+    if src_type == "bytes_io":
+        buffer = BytesIO()
+        df.to_parquet(buffer, engine="pyarrow")
+        return buffer
+    if src_type == "bytes":
+        buffer = BytesIO()
+        df.to_parquet(buffer, engine="pyarrow")
+        return buffer.getvalue()
+    if src_type == "url":
+        df.to_parquet(fname, engine="pyarrow")
+        return pathlib.Path(fname).as_uri()
+
+
+@pytest.mark.parametrize(
+    "src", ["filepath", "pathobj", "bytes_io", "bytes", "url"]
+)
+def test_parquet_reader_multiple_files(tmp_path, src):
+    test_pdf1 = pd.DataFrame(
+        {
+            "a": np.concatenate(
+                [np.arange(10, dtype="float64"), np.full(10, np.nan)]
+            )
+        },
+        index=pd.Index(np.arange(20)),
+    )
+    test_pdf2 = pd.DataFrame(
+        {"a": np.arange(10, dtype="float64")}, index=pd.Index(np.arange(10))
+    )
+    expect = pd.concat([test_pdf1, test_pdf2])
+
+    src1 = create_parquet_source(test_pdf1, src, tmp_path / "multi1.parquet")
+    src2 = create_parquet_source(test_pdf2, src, tmp_path / "multi2.parquet")
+    got = cudf.read_parquet([src1, src2])
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_reordered_columns(tmp_path):
+    src = pd.DataFrame(
+        {"name": ["cow", None, "duck", "fish", None], "id": [0, 1, 2, 3, 4]}
+    )
+    fname = tmp_path / "test_parquet_reader_reordered_columns.parquet"
+    src.to_parquet(fname)
+    assert os.path.exists(fname)
+    expect = pd.DataFrame(
+        {"id": [0, 1, 2, 3, 4], "name": ["cow", None, "duck", "fish", None]}
+    )
+    got = cudf.read_parquet(fname, columns=["id", "name"])
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_parquet_reader_reordered_columns_mixed(tmp_path):
+    src = pd.DataFrame(
+        {
+            "name": ["cow", None, "duck", "fish", None],
+            "list0": [
+                [[1, 2], [3, 4]],
+                None,
+                [[5, 6], None],
+                [[1]],
+                [[5], [6, None, 8]],
+            ],
+            "id": [0, 1, 2, 3, 4],
+            "list1": [
+                [[1, 2], [3, 4]],
+                [[0, 0]],
+                [[5, 6], [10, 12]],
+                [[1]],
+                [[5], [6, 8]],
+            ],
+        }
+    )
+    fname = tmp_path / "test_parquet_reader_reordered_columns.parquet"
+    src.to_parquet(fname)
+    assert os.path.exists(fname)
+    expect = pd.DataFrame(
+        {
+            "list1": [
+                [[1, 2], [3, 4]],
+                [[0, 0]],
+                [[5, 6], [10, 12]],
+                [[1]],
+                [[5], [6, 8]],
+            ],
+            "id": [0, 1, 2, 3, 4],
+            "list0": [
+                [[1, 2], [3, 4]],
+                None,
+                [[5, 6], None],
+                [[1]],
+                [[5], [6, None, 8]],
+            ],
+            "name": ["cow", None, "duck", "fish", None],
+        }
+    )
+    got = cudf.read_parquet(fname, columns=["list1", "id", "list0", "name"])
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_parquet_reader_list_basic(tmp_path):
+    expect = pd.DataFrame({"a": [[[1, 2], [3, 4]], None, [[5, 6], None]]})
+    fname = tmp_path / "test_parquet_reader_list_basic.parquet"
+    expect.to_parquet(fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_list_table(tmp_path):
+    expect = pd.DataFrame(
+        {
+            "a": [[[1, 2], [3, 4]], None, [[5, 6], None]],
+            "b": [[None, None], None, [None, None]],
+            "c": [[[1, 2, 3]], [[None]], [[], None]],
+            "d": [[[]], [[None]], [[1, 2, 3], None]],
+            "e": [[["cows"]], [["dogs"]], [["cats", "birds", "owls"], None]],
+        }
+    )
+    fname = tmp_path / "test_parquet_reader_list_table.parquet"
+    expect.to_parquet(fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
+
+
+def int_gen(first_val, i):
+    """
+    Returns an integer based on an absolute index and a starting value. Used
+    as input to `list_gen`.
+    """
+    return int(i + first_val)
+
+
+strings = [
+    "cats",
+    "dogs",
+    "cows",
+    "birds",
+    "fish",
+    "sheep",
+    "owls",
+    "bears",
+    "ants",
+]
+
+
+def string_gen(first_val, i):
+    """
+    Returns a string based on an absolute index and a starting value. Used as
+    input to `list_gen`.
+    """
+    return strings[int_gen(first_val, i) % len(strings)]
+
+
+def list_row_gen(
+    gen, first_val, list_size, lists_per_row, include_validity=False
+):
+    """
+    Generate a single row for a List<List<>> column based on input parameters.
+
+    Parameters
+    ----------
+    gen : A callable which generates an individual leaf element based on an
+        absolute index.
+    first_val : Generate the column as if it had started at 'first_val'
+        instead of 0.
+    list_size : Size of each generated list.
+    lists_per_row : Number of lists to generate per row.
+    include_validity : Whether or not to include nulls as part of the
+        column. If true, it will add a selection of nulls at both the
+        topmost row level and at the leaf level.
+
+    Returns
+    -------
+    The generated list column.
+    """
+
+    def L(list_size, first_val):
+        return [
+            (gen(first_val, i) if i % 2 == 0 else None)
+            if include_validity
+            else (gen(first_val, i))
+            for i in range(list_size)
+        ]
+
+    return [
+        (L(list_size, first_val + (list_size * i)) if i % 2 == 0 else None)
+        if include_validity
+        else L(list_size, first_val + (list_size * i))
+        for i in range(lists_per_row)
+    ]
+
+
+def list_gen(gen, num_rows, lists_per_row, list_size, include_validity=False):
+    """
+    Generate a list column based on input parameters.
+
+    Parameters
+    ----------
+    gen : A callable which generates an individual leaf element based on an
+        absolute index.
+    num_rows : Number of rows to generate.
+    lists_per_row : Number of lists to generate per row.
+    list_size : Size of each generated list.
+    include_validity : Whether or not to include nulls as part of the
+        column. If true, it will add a selection of nulls at both the
+        topmost row level and at the leaf level.
+
+    Returns
+    -------
+    The generated list column.
+    """
+
+    def L(list_size, first_val):
+        return [
+            (gen(first_val, i) if i % 2 == 0 else None)
+            if include_validity
+            else (gen(first_val, i))
+            for i in range(list_size)
+        ]
+
+    def R(first_val, lists_per_row, list_size):
+        return [
+            L(list_size, first_val + (list_size * i))
+            for i in range(lists_per_row)
+        ]
+
+    return [
+        (
+            R(
+                lists_per_row * list_size * i,
+                lists_per_row,
+                list_size,
+            )
+            if i % 2 == 0
+            else None
+        )
+        if include_validity
+        else R(
+            lists_per_row * list_size * i,
+            lists_per_row,
+            list_size,
+        )
+        for i in range(num_rows)
+    ]
+
+
+def test_parquet_reader_list_large(tmp_path):
+    expect = pd.DataFrame({"a": list_gen(int_gen, 64, 40, 25)})
+    fname = tmp_path / "test_parquet_reader_list_large.parquet"
+    expect.to_parquet(fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_parquet_reader_list_validity(tmp_path):
+    expect = pd.DataFrame(
+        {"a": list_gen(int_gen, 64, 40, 25, include_validity=True)}
+    )
+    fname = tmp_path / "test_parquet_reader_list_validity.parquet"
+    expect.to_parquet(fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_parquet_reader_list_large_mixed(tmp_path):
+    expect = pd.DataFrame(
+        {
+            "a": list_gen(string_gen, 64, 40, 25),
+            "b": list_gen(int_gen, 64, 40, 25),
+            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
+            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
+        }
+    )
+    fname = tmp_path / "test_parquet_reader_list_large_mixed.parquet"
+    expect.to_parquet(fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
+
+
+def test_parquet_reader_list_large_multi_rowgroup(tmp_path):
+    # > 40 row groups
+    num_rows = 10000
+    num_docs = num_rows / 2
+    num_categories = 100
+    row_group_size = 100
+
+    cupy.random.seed(0)
+
+    # generate a random pairing of doc: category
+    documents = cudf.DataFrame(
+        {
+            "document_id": cupy.random.randint(num_docs, size=num_rows),
+            "category_id": cupy.random.randint(num_categories, size=num_rows),
+        }
+    )
+
+    # group categories by document_id to create a list column
+    expect = documents.groupby("document_id").agg({"category_id": ["collect"]})
+    expect.columns = expect.columns.get_level_values(0)
+    expect.reset_index(inplace=True)
+
+    # round trip the dataframe to/from parquet
+    fname = tmp_path / "test_parquet_reader_list_large_multi_rowgroup.parquet"
+    expect.to_pandas().to_parquet(fname, row_group_size=row_group_size)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_list_large_multi_rowgroup_nulls(tmp_path):
+    # 25 row groups
+    num_rows = 2500
+    row_group_size = 100
+
+    expect = cudf.DataFrame(
+        {"a": list_gen(int_gen, num_rows, 3, 2, include_validity=True)}
+    )
+
+    # round trip the dataframe to/from parquet
+    fname = (
+        tmp_path
+        / "test_parquet_reader_list_large_multi_rowgroup_nulls.parquet"
+    )
+    expect.to_pandas().to_parquet(fname, row_group_size=row_group_size)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert_eq(expect, got)
+
+
+def struct_gen(gen, skip_rows, num_rows, include_validity=False):
+    """
+    Generate a struct column based on input parameters.
+
+    Parameters
+    ----------
+    gen : A array of callables which generate an individual row based on an
+        absolute index.
+    skip_rows : Generate the column as if it had started at 'skip_rows'
+        instead of 0. The intent here is to emulate the skip_rows
+        parameter of the parquet reader.
+    num_fields : Number of fields in the struct.
+    include_validity : Whether or not to include nulls as part of the
+        column. If true, it will add a selection of nulls at both the
+        field level and at the value level.
+
+    Returns
+    -------
+    The generated struct column.
+    """
+
+    def R(first_val, num_fields):
+        return {
+            "col" + str(f): (
+                gen[f](first_val, first_val) if f % 4 != 0 else None
+            )
+            if include_validity
+            else (gen[f](first_val, first_val))
+            for f in range(len(gen))
+        }
+
+    return [
+        (R((i + skip_rows), len(gen)) if (i + skip_rows) % 4 != 0 else None)
+        if include_validity
+        else R((i + skip_rows), len(gen))
+        for i in range(num_rows)
+    ]
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # struct
+        [
+            {"a": 1, "b": 2},
+            {"a": 10, "b": 20},
+            {"a": None, "b": 22},
+            {"a": None, "b": None},
+            {"a": 15, "b": None},
+        ],
+        # struct-of-list
+        [
+            {"a": 1, "b": 2, "c": [1, 2, 3]},
+            {"a": 10, "b": 20, "c": [4, 5]},
+            {"a": None, "b": 22, "c": [6]},
+            {"a": None, "b": None, "c": None},
+            {"a": 15, "b": None, "c": [-1, -2]},
+            None,
+            {"a": 100, "b": 200, "c": [-10, None, -20]},
+        ],
+        # list-of-struct
+        [
+            [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
+            None,
+            [{"a": 10, "b": 20}],
+            [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
+        ],
+        # struct-of-struct
+        [
+            {"a": 1, "b": {"inner_a": 10, "inner_b": 20}, "c": 2},
+            {"a": 3, "b": {"inner_a": 30, "inner_b": 40}, "c": 4},
+            {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
+            {"a": 7, "b": None, "c": 8},
+            {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
+            None,
+            {"a": None, "b": {"inner_a": None, "inner_b": 100}, "c": 10},
+        ],
+    ],
+)
+def test_parquet_reader_struct_basic(tmp_path, data):
+    expect = pa.Table.from_pydict({"struct": data})
+    fname = tmp_path / "test_parquet_reader_struct_basic.parquet"
+    pa.parquet.write_table(expect, fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert expect.equals(got.to_arrow())
+
+
+def select_columns_params():
+    dfs = [
+        # struct
+        (
+            [
+                {"a": 1, "b": 2},
+                {"a": 10, "b": 20},
+                {"a": None, "b": 22},
+                {"a": None, "b": None},
+                {"a": 15, "b": None},
+            ],
+            [["struct"], ["struct.a"], ["struct.b"], ["c"]],
+        ),
+        # struct-of-list
+        (
+            [
+                {"a": 1, "b": 2, "c": [1, 2, 3]},
+                {"a": 10, "b": 20, "c": [4, 5]},
+                {"a": None, "b": 22, "c": [6]},
+                {"a": None, "b": None, "c": None},
+                {"a": 15, "b": None, "c": [-1, -2]},
+                None,
+                {"a": 100, "b": 200, "c": [-10, None, -20]},
+            ],
+            [
+                ["struct"],
+                ["struct.c"],
+                ["struct.c.list"],
+                ["struct.c.list.item"],
+                ["struct.b", "struct.c"],
+                ["struct.b", "struct.d", "struct.c"],
+            ],
+        ),
+        # list-of-struct
+        (
+            [
+                [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
+                None,
+                [{"a": 10, "b": 20}],
+                [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
+            ],
+            [
+                ["struct"],
+                ["struct.list"],
+                ["struct.list.item"],
+                ["struct.list.item.a", "struct.list.item.b"],
+                ["struct.list.item.c"],
+            ],
+        ),
+        # struct with "." in field names
+        (
+            [
+                {"a.b": 1, "b.a": 2},
+                {"a.b": 10, "b.a": 20},
+                {"a.b": None, "b.a": 22},
+                {"a.b": None, "b.a": None},
+                {"a.b": 15, "b.a": None},
+            ],
+            [["struct"], ["struct.a"], ["struct.b.a"]],
+        ),
+    ]
+    for df_col_pair in dfs:
+        for cols in df_col_pair[1]:
+            yield df_col_pair[0], cols
+
+
+@pytest.mark.parametrize("data, columns", select_columns_params())
+def test_parquet_reader_struct_select_columns(data, columns):
+    table = pa.Table.from_pydict({"struct": data})
+    buff = BytesIO()
+
+    pa.parquet.write_table(table, buff)
+
+    expect = pq.ParquetFile(buff).read(columns=columns)
+    got = cudf.read_parquet(buff, columns=columns)
+    assert expect.equals(got.to_arrow())
+
+
+def test_parquet_reader_struct_los_large(tmp_path):
+    num_rows = 256
+    list_size = 64
+    data = [
+        struct_gen([string_gen, int_gen, string_gen], 0, list_size, False)
+        if i % 2 == 0
+        else None
+        for i in range(num_rows)
+    ]
+    expect = pa.Table.from_pydict({"los": data})
+    fname = tmp_path / "test_parquet_reader_struct_los_large.parquet"
+    pa.parquet.write_table(expect, fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert expect.equals(got.to_arrow())
+
+
+@pytest.mark.parametrize(
+    "params", [[3, 4, 32, False], [3, 4, 32, True], [50, 10, 64, True]]
+)
+def test_parquet_reader_struct_sol_table(tmp_path, params):
+    # Struct<List<List>>
+    lists_per_row = params[0]
+    list_size = params[1]
+    num_rows = params[2]
+    include_validity = params[3]
+
+    def list_gen_wrapped(x, y):
+        return list_row_gen(
+            int_gen, x * list_size * lists_per_row, list_size, lists_per_row
+        )
+
+    def string_list_gen_wrapped(x, y):
+        return list_row_gen(
+            string_gen,
+            x * list_size * lists_per_row,
+            list_size,
+            lists_per_row,
+            include_validity,
+        )
+
+    data = struct_gen(
+        [int_gen, string_gen, list_gen_wrapped, string_list_gen_wrapped],
+        0,
+        num_rows,
+        include_validity,
+    )
+    expect = pa.Table.from_pydict({"sol": data})
+    fname = tmp_path / "test_parquet_reader_struct_sol_table.parquet"
+    pa.parquet.write_table(expect, fname)
+    assert os.path.exists(fname)
+    got = cudf.read_parquet(fname)
+    assert expect.equals(got.to_arrow())
+
+
+def test_parquet_reader_v2(tmp_path, simple_pdf):
+    pdf_fname = tmp_path / "pdfv2.parquet"
+    simple_pdf.to_parquet(pdf_fname, data_page_version="2.0")
+    assert_eq(cudf.read_parquet(pdf_fname), simple_pdf)
+
+    cudf.from_pandas(simple_pdf).to_parquet(pdf_fname, header_version="2.0")
+    assert_eq(cudf.read_parquet(pdf_fname), simple_pdf)
+
+
+def test_parquet_delta_byte_array(datadir):
+    fname = datadir / "delta_byte_arr.parquet"
+    assert_eq(cudf.read_parquet(fname), pd.read_parquet(fname))
+
+
+# values chosen to exercise:
+#    1 - header only, no bitpacked values
+#    2 - one bitpacked value
+#   23 - one partially filled miniblock
+#   32 - almost full miniblock
+#   33 - one full miniblock
+#   34 - one full miniblock plus one value in new miniblock
+#  128 - almost full block
+#  129 - one full block
+#  130 - one full block plus one value in new block
+# 129 * 3 - multiple blocks
+@pytest.fixture(params=[1, 2, 23, 32, 33, 34, 128, 129, 130, 129 * 3])
+def delta_num_rows(request):
+    return request.param
+
+
+@pytest.mark.parametrize("add_nulls", [True, False])
+def test_delta_binary(
+    delta_num_rows, add_nulls, signed_integer_types_as_str, tmp_path
+):
+    null_frequency = 0.25 if add_nulls else 0
+
+    # Create a pandas dataframe with random data of mixed types
+    arrow_table = dg.rand_dataframe(
+        dtypes_meta=[
+            {
+                "dtype": signed_integer_types_as_str,
+                "null_frequency": null_frequency,
+                "cardinality": delta_num_rows,
+            },
+        ],
+        rows=delta_num_rows,
+        seed=0,
+        use_threads=False,
+    )
+    # Roundabout conversion to pandas to preserve nulls/data types
+    cudf_table = cudf.DataFrame.from_arrow(arrow_table)
+    test_pdf = cudf_table.to_pandas(nullable=True)
+    pdf_fname = tmp_path / "pdfv2.parquet"
+    test_pdf.to_parquet(
+        pdf_fname,
+        version="2.6",
+        column_encoding="DELTA_BINARY_PACKED",
+        data_page_version="2.0",
+        data_page_size=64 * 1024,
+        engine="pyarrow",
+        use_dictionary=False,
+    )
+    cdf = cudf.read_parquet(pdf_fname)
+    pcdf = cudf.from_pandas(test_pdf)
+    assert_eq(cdf, pcdf)
+
+    # Write back out with cudf and make sure pyarrow can read it
+    cudf_fname = tmp_path / "cudfv2.parquet"
+    pcdf.to_parquet(
+        cudf_fname,
+        compression=None,
+        header_version="2.0",
+        use_dictionary=False,
+    )
+
+    cdf2 = cudf.from_pandas(pd.read_parquet(cudf_fname))
+    assert_eq(cdf2, cdf)
+
+
+@pytest.mark.parametrize("add_nulls", [True, False])
+@pytest.mark.parametrize("max_string_length", [12, 48, 96, 128])
+@pytest.mark.parametrize(
+    "str_encoding", ["DELTA_BYTE_ARRAY", "DELTA_LENGTH_BYTE_ARRAY"]
+)
+def test_delta_byte_array_roundtrip(
+    delta_num_rows, add_nulls, max_string_length, str_encoding, tmp_path
+):
+    null_frequency = 0.25 if add_nulls else 0
+
+    # Create a pandas dataframe with random data of mixed lengths
+    test_pdf = dg.rand_dataframe(
+        dtypes_meta=[
+            {
+                "dtype": "str",
+                "null_frequency": null_frequency,
+                "cardinality": delta_num_rows,
+                "max_string_length": max_string_length,
+            },
+        ],
+        rows=delta_num_rows,
+        seed=0,
+        use_threads=False,
+    ).to_pandas()
+
+    pdf_fname = tmp_path / "pdfdeltaba.parquet"
+    test_pdf.to_parquet(
+        pdf_fname,
+        version="2.6",
+        column_encoding=str_encoding,
+        data_page_version="2.0",
+        data_page_size=64 * 1024,
+        engine="pyarrow",
+        use_dictionary=False,
+    )
+    cdf = cudf.read_parquet(pdf_fname)
+    pcdf = cudf.from_pandas(test_pdf)
+    assert_eq(cdf, pcdf)
+
+    # Write back out with cudf and make sure pyarrow can read it
+    cudf_fname = tmp_path / "cdfdeltaba.parquet"
+    pcdf.to_parquet(
+        cudf_fname,
+        compression="snappy",
+        header_version="2.0",
+        use_dictionary=False,
+    )
+    cdf2 = cudf.from_pandas(pd.read_parquet(cudf_fname))
+    assert_eq(cdf2, cdf)
+
+
+@pytest.mark.parametrize("add_nulls", [True, False])
+@pytest.mark.parametrize(
+    "str_encoding", ["DELTA_BYTE_ARRAY", "DELTA_LENGTH_BYTE_ARRAY"]
+)
+def test_delta_struct_list(tmp_path, delta_num_rows, add_nulls, str_encoding):
+    # Struct<List<List>>
+    lists_per_row = 3
+    list_size = 4
+    num_rows = delta_num_rows
+    include_validity = add_nulls
+
+    def list_gen_wrapped(x, y):
+        return list_row_gen(
+            int_gen, x * list_size * lists_per_row, list_size, lists_per_row
+        )
+
+    def string_list_gen_wrapped(x, y):
+        return list_row_gen(
+            string_gen,
+            x * list_size * lists_per_row,
+            list_size,
+            lists_per_row,
+            include_validity,
+        )
+
+    data = struct_gen(
+        [int_gen, string_gen, list_gen_wrapped, string_list_gen_wrapped],
+        0,
+        num_rows,
+        include_validity,
+    )
+    test_pdf = pa.Table.from_pydict({"sol": data}).to_pandas()
+    pdf_fname = tmp_path / "pdfdeltaba.parquet"
+    test_pdf.to_parquet(
+        pdf_fname,
+        version="2.6",
+        column_encoding={
+            "sol.col0": "DELTA_BINARY_PACKED",
+            "sol.col1": str_encoding,
+            "sol.col2.list.element.list.element": "DELTA_BINARY_PACKED",
+            "sol.col3.list.element.list.element": str_encoding,
+        },
+        data_page_version="2.0",
+        data_page_size=64 * 1024,
+        engine="pyarrow",
+        use_dictionary=False,
+    )
+    # sanity check to verify file is written properly
+    assert_eq(test_pdf, pd.read_parquet(pdf_fname))
+    cdf = cudf.read_parquet(pdf_fname)
+    pcdf = cudf.from_pandas(test_pdf)
+    assert_eq(cdf, pcdf)
+
+    # Write back out with cudf and make sure pyarrow can read it
+    cudf_fname = tmp_path / "cdfdeltaba.parquet"
+    pcdf.to_parquet(
+        cudf_fname,
+        compression="snappy",
+        header_version="2.0",
+        use_dictionary=False,
+    )
+    cdf2 = cudf.from_pandas(pd.read_parquet(cudf_fname))
+    assert_eq(cdf2, cdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # Structs
+        {
+            "being": [
+                None,
+                {"human?": True, "Deets": {"Name": "Carrot", "Age": 27}},
+                {"human?": None, "Deets": {"Name": "Angua", "Age": 25}},
+                {"human?": False, "Deets": {"Name": "Cheery", "Age": 31}},
+                {"human?": False, "Deets": None},
+                {"human?": None, "Deets": {"Name": "Mr", "Age": None}},
+            ]
+        },
+        # List of Structs
+        {
+            "family": [
+                [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
+                [
+                    {"human?": None, "deets": {"weight": 5.3, "age": 25}},
+                    {"human?": False, "deets": {"weight": 8.0, "age": 31}},
+                    {"human?": False, "deets": None},
+                ],
+                [],
+                [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
+            ]
+        },
+        # Struct of Lists
+        {
+            "Real estate records": [
+                None,
+                {
+                    "Status": "NRI",
+                    "Ownerships": {
+                        "land_unit": [None, 2, None],
+                        "flats": [[1, 2, 3], [], [4, 5], [], [0, 6, 0]],
+                    },
+                },
+                {
+                    "Status": None,
+                    "Ownerships": {
+                        "land_unit": [4, 5],
+                        "flats": [[7, 8], []],
+                    },
+                },
+                {
+                    "Status": "RI",
+                    "Ownerships": {"land_unit": None, "flats": [[]]},
+                },
+                {"Status": "RI", "Ownerships": None},
+                {
+                    "Status": None,
+                    "Ownerships": {
+                        "land_unit": [7, 8, 9],
+                        "flats": [[], [], []],
+                    },
+                },
+            ]
+        },
+    ],
+)
+def test_parquet_reader_nested_v2(tmp_path, data):
+    expect = pd.DataFrame(data)
+    pdf_fname = tmp_path / "pdfv2.parquet"
+    expect.to_parquet(pdf_fname, data_page_version="2.0")
+    assert_eq(cudf.read_parquet(pdf_fname), expect)
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_parquet_writer_cpu_pyarrow(
+    tmp_path, pdf_day_timestamps, gdf_day_timestamps
+):
+    pdf_fname = tmp_path / "pdf.parquet"
+    gdf_fname = tmp_path / "gdf.parquet"
+
+    if len(pdf_day_timestamps) == 0:
+        pdf_day_timestamps = pdf_day_timestamps.reset_index(drop=True)
+        gdf_day_timestamps = pdf_day_timestamps.reset_index(drop=True)
+
+    pdf_day_timestamps.to_parquet(pdf_fname)
+    gdf_day_timestamps.to_parquet(gdf_fname, engine="pyarrow")
+
+    assert os.path.exists(pdf_fname)
+    assert os.path.exists(gdf_fname)
+
+    expect = pa.parquet.read_pandas(pdf_fname)
+    got = pa.parquet.read_pandas(gdf_fname)
+
+    assert_eq(expect, got)
+
+    def clone_field(table, name, datatype):
+        f = table.schema.field(name)
+        return pa.field(f.name, datatype, f.nullable, f.metadata)
+
+    # Pandas uses a datetime64[ns] while we use a datetime64[ms]
+    for t in [expect, got]:
+        for t_col in ["col_datetime64[ms]", "col_datetime64[us]"]:
+            idx = t.schema.get_field_index(t_col)
+            field = clone_field(t, t_col, pa.timestamp("ms"))
+            t = t.set_column(idx, field, t.column(idx).cast(field.type))
+            t = t.replace_schema_metadata()
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.filterwarnings("ignore:Using CPU")
+def test_parquet_writer_int96_timestamps(tmp_path, pdf, gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+
+    if len(pdf) == 0:
+        pdf = pdf.reset_index(drop=True)
+        gdf = gdf.reset_index(drop=True)
+
+    if "col_category" in pdf.columns:
+        pdf = pdf.drop(columns=["col_category"])
+    if "col_category" in gdf.columns:
+        gdf = gdf.drop(columns=["col_category"])
+
+    assert_eq(pdf, gdf)
+
+    # Write out the gdf using the GPU accelerated writer with INT96 timestamps
+    gdf.to_parquet(
+        gdf_fname,
+        index=None,
+        int96_timestamps=True,
+    )
+
+    assert os.path.exists(gdf_fname)
+
+    expect = pdf
+    got = pd.read_parquet(gdf_fname)
+
+    # verify INT96 timestamps were converted back to the same data.
+    assert_eq(expect, got, check_categorical=False, check_dtype=False)
+
+
+def test_multifile_parquet_folder(tmp_path):
+    test_pdf1 = pd.DataFrame(
+        {
+            "a": np.concatenate(
+                [np.arange(10, dtype="float64"), np.full(10, np.nan)]
+            )
+        },
+        index=pd.Index(np.arange(20)),
+    )
+    test_pdf2 = pd.DataFrame(
+        {"a": np.arange(10, dtype="float64")}, index=pd.Index(np.arange(10))
+    )
+    expect = pd.concat([test_pdf1, test_pdf2])
+
+    par_dir = tmp_path / "multi_part"
+    par_dir.mkdir()
+
+    create_parquet_source(test_pdf1, "filepath", par_dir / "multi1.parquet")
+    create_parquet_source(test_pdf2, "filepath", par_dir / "multi2.parquet")
+
+    got1 = cudf.read_parquet(tmp_path / "multi_part/*.parquet")
+    assert_eq(expect, got1)
+
+    got2 = cudf.read_parquet(tmp_path / "multi_part")
+    assert_eq(expect, got2)
+
+
+# Validates the metadata return path of the parquet writer
+def test_parquet_writer_return_metadata(tmp_path, simple_gdf):
+    gdf_fname = tmp_path / "data1.parquet"
+
+    # Write out the gdf using the GPU accelerated writer
+    df_metadata = simple_gdf.to_parquet(
+        gdf_fname, index=None, metadata_file_path="test/data1.parquet"
+    )
+    # Verify that we got a valid parquet signature in the initial metadata blob
+    assert df_metadata.tobytes()[0:4] == b"PAR1"
+
+    df_metadata_list1 = [df_metadata]
+    df_metadata_list2 = [df_metadata, df_metadata]
+    merged_metadata1 = merge_parquet_filemetadata(df_metadata_list1)
+    merged_metadata2 = merge_parquet_filemetadata(df_metadata_list2)
+
+    # Verify that we got a valid parquet signature in the final metadata blob
+    assert merged_metadata1.tobytes()[0:4] == b"PAR1"
+    assert merged_metadata2.tobytes()[0:4] == b"PAR1"
+
+    # Make sure aggregation is combining metadata correctly
+    fmd1 = pa.parquet.ParquetFile(BytesIO(merged_metadata1.tobytes())).metadata
+    fmd2 = pa.parquet.ParquetFile(BytesIO(merged_metadata2.tobytes())).metadata
+    assert fmd2.num_columns == fmd1.num_columns
+    assert fmd2.num_rows == 2 * fmd1.num_rows
+    assert fmd2.num_row_groups == 2 * fmd1.num_row_groups
+
+
+# Validates the integrity of the GPU accelerated parquet writer.
+def test_parquet_writer_gpu_none_index(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+    pdf_fname = tmp_path / "pdf.parquet"
+
+    assert_eq(simple_pdf, simple_gdf)
+
+    # Write out the gdf using the GPU accelerated writer
+    simple_gdf.to_parquet(gdf_fname, index=None)
+    simple_pdf.to_parquet(pdf_fname, index=None)
+
+    assert os.path.exists(gdf_fname)
+    assert os.path.exists(pdf_fname)
+
+    expect = pd.read_parquet(pdf_fname)
+    got = pd.read_parquet(gdf_fname)
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+def test_parquet_writer_gpu_true_index(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+    pdf_fname = tmp_path / "pdf.parquet"
+
+    assert_eq(simple_pdf, simple_gdf)
+
+    # Write out the gdf using the GPU accelerated writer
+    simple_gdf.to_parquet(gdf_fname, index=True)
+    simple_pdf.to_parquet(pdf_fname, index=True)
+
+    assert os.path.exists(gdf_fname)
+    assert os.path.exists(pdf_fname)
+
+    expect = pd.read_parquet(pdf_fname)
+    got = pd.read_parquet(gdf_fname)
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+def test_parquet_writer_gpu_false_index(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+    pdf_fname = tmp_path / "pdf.parquet"
+
+    assert_eq(simple_pdf, simple_gdf)
+
+    # Write out the gdf using the GPU accelerated writer
+    simple_gdf.to_parquet(gdf_fname, index=False)
+    simple_pdf.to_parquet(pdf_fname, index=False)
+
+    assert os.path.exists(gdf_fname)
+    assert os.path.exists(pdf_fname)
+
+    expect = pd.read_parquet(pdf_fname)
+    got = pd.read_parquet(gdf_fname)
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+def test_parquet_writer_gpu_multi_index(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+    pdf_fname = tmp_path / "pdf.parquet"
+
+    simple_pdf = simple_pdf.set_index(["col_bool", "col_int8"])
+    simple_gdf = simple_gdf.set_index(["col_bool", "col_int8"])
+
+    assert_eq(simple_pdf, simple_gdf)
+
+    # Write out the gdf using the GPU accelerated writer
+    simple_gdf.to_parquet(gdf_fname, index=None)
+    simple_pdf.to_parquet(pdf_fname, index=None)
+
+    assert os.path.exists(gdf_fname)
+    assert os.path.exists(pdf_fname)
+
+    expect = pd.read_parquet(pdf_fname)
+    got = pd.read_parquet(gdf_fname)
+
+    assert_eq(expect, got, check_categorical=False)
+
+
+def test_parquet_writer_gpu_chunked(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+
+    writer = ParquetWriter(gdf_fname)
+    writer.write_table(simple_gdf)
+    writer.write_table(simple_gdf)
+    writer.close()
+
+    assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf]))
+
+
+def test_parquet_writer_gpu_chunked_context(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+
+    with ParquetWriter(gdf_fname) as writer:
+        writer.write_table(simple_gdf)
+        writer.write_table(simple_gdf)
+
+    got = pd.read_parquet(gdf_fname)
+    expect = pd.concat([simple_pdf, simple_pdf])
+    assert_eq(got, expect)
+
+
+def test_parquet_write_bytes_io(simple_gdf):
+    output = BytesIO()
+    simple_gdf.to_parquet(output)
+    assert_eq(cudf.read_parquet(output), simple_gdf)
+
+
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_writer_bytes_io(simple_gdf, store_schema):
+    output = BytesIO()
+
+    writer = ParquetWriter(output, store_schema=store_schema)
+    writer.write_table(simple_gdf)
+    writer.write_table(simple_gdf)
+    writer.close()
+
+    assert_eq(cudf.read_parquet(output), cudf.concat([simple_gdf, simple_gdf]))
+
+
+@pytest.mark.parametrize(
+    "row_group_size_kwargs",
+    [
+        {"row_group_size_bytes": 4 * 1024},
+        {"row_group_size_rows": 5000},
+    ],
+)
+def test_parquet_writer_row_group_size(tmp_path, row_group_size_kwargs):
+    # Check that row_group_size options are exposed in Python
+    # See https://github.com/rapidsai/cudf/issues/10978
+
+    size = 20000
+    gdf = cudf.DataFrame({"a": range(size), "b": [1] * size})
+
+    fname = tmp_path / "gdf.parquet"
+    with ParquetWriter(fname, **row_group_size_kwargs) as writer:
+        writer.write_table(gdf)
+
+    # Simple check for multiple row-groups
+    nrows, nrow_groups, columns, _, _ = cudf.io.parquet.read_parquet_metadata(
+        fname
+    )
+    assert nrows == size
+    assert nrow_groups > 1
+    assert columns == ["a", "b"]
+
+    # Know the specific row-group count for row_group_size_rows
+    if "row_group_size_rows" in row_group_size_kwargs:
+        assert (
+            nrow_groups == size // row_group_size_kwargs["row_group_size_rows"]
+        )
+
+    assert_eq(cudf.read_parquet(fname), gdf)
+
+
+def test_parquet_writer_column_index(tmp_path):
+    # Simple test for presence of indices. validity is checked
+    # in libcudf tests.
+    # Write 2 files, one with column index set, one without.
+    # Make sure the former is larger in size.
+
+    size = 20000
+    gdf = cudf.DataFrame({"a": range(size), "b": [1] * size})
+
+    fname = tmp_path / "gdf.parquet"
+    with ParquetWriter(fname, statistics="ROWGROUP") as writer:
+        writer.write_table(gdf)
+    s1 = os.path.getsize(fname)
+
+    fname = tmp_path / "gdfi.parquet"
+    with ParquetWriter(fname, statistics="COLUMN") as writer:
+        writer.write_table(gdf)
+    s2 = os.path.getsize(fname)
+    assert s2 > s1
+
+
+@pytest.mark.parametrize(
+    "max_page_size_kwargs",
+    [
+        {"max_page_size_bytes": 4 * 1024},
+        {"max_page_size_rows": 5000},
+    ],
+)
+def test_parquet_writer_max_page_size(tmp_path, max_page_size_kwargs):
+    # Check that max_page_size options are exposed in Python
+    # Since we don't have access to page metadata, instead check that
+    # file written with more pages will be slightly larger
+
+    size = 20000
+    gdf = cudf.DataFrame({"a": range(size), "b": [1] * size})
+
+    fname = tmp_path / "gdf.parquet"
+    with ParquetWriter(fname, **max_page_size_kwargs) as writer:
+        writer.write_table(gdf)
+    s1 = os.path.getsize(fname)
+
+    assert_eq(cudf.read_parquet(fname), gdf)
+
+    fname = tmp_path / "gdf0.parquet"
+    with ParquetWriter(fname) as writer:
+        writer.write_table(gdf)
+    s2 = os.path.getsize(fname)
+
+    assert_eq(cudf.read_parquet(fname), gdf)
+    assert s1 > s2
+
+
+@pytest.mark.parametrize("use_dict", [False, True])
+@pytest.mark.parametrize("max_dict_size", [0, 1048576])
+def test_parquet_writer_dictionary_setting(use_dict, max_dict_size):
+    # Simple test for checking the validity of dictionary encoding setting
+    # and behavior of ParquetWriter in cudf.
+    # Write a table with repetitive data with varying dictionary settings.
+    # Make sure the written columns are dictionary-encoded accordingly.
+
+    # Table with repetitive data
+    table = cudf.DataFrame(
+        {
+            "int32": cudf.Series([1024] * 1024, dtype="int64"),
+        }
+    )
+
+    # Write to Parquet using ParquetWriter
+    buffer = BytesIO()
+    writer = ParquetWriter(
+        buffer,
+        use_dictionary=use_dict,
+        max_dictionary_size=max_dict_size,
+    )
+    writer.write_table(table)
+    writer.close()
+
+    # Read encodings from parquet file
+    got = pq.ParquetFile(buffer)
+    encodings = got.metadata.row_group(0).column(0).encodings
+
+    # Check for `PLAIN_DICTIONARY` encoding if dictionary encoding enabled
+    # and dictionary page limit > 0
+    if use_dict is True and max_dict_size > 0:
+        assert "PLAIN_DICTIONARY" in encodings
+    else:
+        assert "PLAIN_DICTIONARY" not in encodings
+
+
+@pytest.mark.parametrize("filename", ["myfile.parquet", None])
+@pytest.mark.parametrize("cols", [["b"], ["c", "b"]])
+def test_parquet_partitioned(tmpdir_factory, cols, filename):
+    rng = np.random.default_rng(seed=0)
+    # Checks that write_to_dataset is wrapping to_parquet
+    # as expected
+    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    size = 100
+    pdf = pd.DataFrame(
+        {
+            "a": np.arange(0, stop=size, dtype="int64"),
+            "b": rng.choice(list("abcd"), size=size),
+            "c": rng.choice(np.arange(4), size=size),
+        }
+    )
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=cols)
+    gdf = cudf.from_pandas(pdf)
+    gdf.to_parquet(
+        gdf_dir, index=False, partition_cols=cols, partition_file_name=filename
+    )
+
+    # Read back with pandas to compare
+    expect_pd = pd.read_parquet(pdf_dir)
+    got_pd = pd.read_parquet(gdf_dir)
+    assert_eq(expect_pd, got_pd)
+
+    # Check that cudf and pd return the same read
+    got_cudf = cudf.read_parquet(gdf_dir)
+    if isinstance(got_pd["c"].dtype, pd.CategoricalDtype):
+        # Work-around for pandas bug:
+        # https://github.com/pandas-dev/pandas/issues/53345
+        got_pd["c"] = got_pd["c"].astype(
+            pd.CategoricalDtype(
+                categories=got_pd["c"].dtype.categories.astype("int64"),
+                ordered=got_pd["c"].dtype.ordered,
+            )
+        )
+    assert_eq(got_pd, got_cudf)
+
+    # If filename is specified, check that it is correct
+    if filename:
+        for _, _, files in os.walk(gdf_dir):
+            for fn in files:
+                assert fn == filename
+
+
+@pytest.mark.parametrize("kwargs", [{"nrows": 1}, {"skip_rows": 1}])
+def test_parquet_partitioned_notimplemented(tmpdir_factory, kwargs):
+    rng = np.random.default_rng(seed=0)
+    # Checks that write_to_dataset is wrapping to_parquet
+    # as expected
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    size = 100
+    pdf = pd.DataFrame(
+        {
+            "a": np.arange(0, stop=size, dtype="int64"),
+            "b": rng.choice(list("abcd"), size=size),
+            "c": rng.choice(np.arange(4), size=size),
+        }
+    )
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=["b"])
+
+    with pytest.raises(NotImplementedError):
+        cudf.read_parquet(pdf_dir, **kwargs)
+
+
+@pytest.mark.parametrize("return_meta", [True, False])
+def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta):
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
+
+    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]})
+    df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]})
+
+    cw = ParquetDatasetWriter(gdf_dir, partition_cols=["a"], index=False)
+    cw.write_table(df1)
+    cw.write_table(df2)
+    meta_byte_array = cw.close(return_metadata=return_meta)
+    pdf = cudf.concat([df1, df2]).to_pandas()
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
+
+    if return_meta:
+        fmd = pq.ParquetFile(BytesIO(meta_byte_array)).metadata
+        assert fmd.num_rows == len(pdf)
+        assert fmd.num_row_groups == 4
+        files = {
+            os.path.join(directory, files[0])
+            for directory, _, files in os.walk(gdf_dir)
+            if files
+        }
+        meta_files = {
+            os.path.join(gdf_dir, fmd.row_group(i).column(c).file_path)
+            for i in range(fmd.num_row_groups)
+            for c in range(fmd.row_group(i).num_columns)
+        }
+        assert files == meta_files
+
+    # Read back with pandas to compare
+    expect_pd = pd.read_parquet(pdf_dir)
+    got_pd = pd.read_parquet(gdf_dir)
+    assert_eq(expect_pd, got_pd)
+
+    # Check that cudf and pd return the same read
+    got_cudf = cudf.read_parquet(gdf_dir)
+
+    # Work-around for pandas bug:
+    # https://github.com/pandas-dev/pandas/issues/53345
+    got_pd["a"] = got_pd["a"].astype(
+        pd.CategoricalDtype(
+            categories=got_pd["a"].dtype.categories.astype("int64"),
+            ordered=got_pd["a"].dtype.ordered,
+        )
+    )
+    assert_eq(got_pd, got_cudf)
+
+
+@pytest.mark.parametrize(
+    "max_file_size,max_file_size_in_bytes",
+    [("500KB", 500000), ("MB", 1000000)],
+)
+def test_parquet_writer_chunked_max_file_size(
+    tmpdir_factory, max_file_size, max_file_size_in_bytes
+):
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
+
+    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1] * 1000, "b": range(0, 5000)})
+    df2 = cudf.DataFrame(
+        {"a": [1, 3, 3, 1, 3] * 1000, "b": range(5000, 10000)}
+    )
+
+    cw = ParquetDatasetWriter(
+        gdf_dir,
+        partition_cols=["a"],
+        max_file_size=max_file_size,
+        file_name_prefix="sample",
+    )
+    cw.write_table(df1)
+    cw.write_table(df2)
+    cw.close()
+    pdf = cudf.concat([df1, df2]).to_pandas()
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
+
+    expect_pd = pd.read_parquet(pdf_dir)
+    got_pd = pd.read_parquet(gdf_dir)
+
+    assert_eq(
+        expect_pd.sort_values(["b"]).reset_index(drop=True),
+        got_pd.sort_values(["b"]).reset_index(drop=True),
+    )
+
+    # Check that cudf and pd return the same read
+    got_cudf = cudf.read_parquet(gdf_dir)
+
+    # Work-around for pandas bug:
+    # https://github.com/pandas-dev/pandas/issues/53345
+    got_pd["a"] = got_pd["a"].astype(
+        pd.CategoricalDtype(
+            categories=got_pd["a"].dtype.categories.astype("int64"),
+            ordered=got_pd["a"].dtype.ordered,
+        )
+    )
+    assert_eq(
+        got_pd.sort_values(["b"]).reset_index(drop=True),
+        got_cudf.sort_values(["b"]).reset_index(drop=True),
+    )
+
+    all_files = glob.glob(gdf_dir + "/**/*.parquet", recursive=True)
+    for each_file in all_files:
+        # Validate file sizes with some extra 1000
+        # bytes buffer to spare
+        assert os.path.getsize(each_file) <= (max_file_size_in_bytes), (
+            "File exceeded max_file_size"
+        )
+
+
+def test_parquet_writer_chunked_max_file_size_error():
+    with pytest.raises(
+        ValueError,
+        match="file_name_prefix cannot be None if max_file_size is passed",
+    ):
+        ParquetDatasetWriter("sample", partition_cols=["a"], max_file_size=100)
+
+
+def test_parquet_writer_chunked_partitioned_context(tmpdir_factory):
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
+
+    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]})
+    df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]})
+
+    with ParquetDatasetWriter(
+        gdf_dir, partition_cols=["a"], index=False
+    ) as cw:
+        cw.write_table(df1)
+        cw.write_table(df2)
+
+    pdf = cudf.concat([df1, df2]).to_pandas()
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
+
+    # Read back with pandas to compare
+    expect_pd = pd.read_parquet(pdf_dir)
+    got_pd = pd.read_parquet(gdf_dir)
+    assert_eq(expect_pd, got_pd)
+
+    # Check that cudf and pd return the same read
+    got_cudf = cudf.read_parquet(gdf_dir)
+
+    # Work-around for pandas bug:
+    # https://github.com/pandas-dev/pandas/issues/53345
+    got_pd["a"] = got_pd["a"].astype(
+        pd.CategoricalDtype(
+            categories=got_pd["a"].dtype.categories.astype("int64"),
+            ordered=got_pd["a"].dtype.ordered,
+        )
+    )
+    assert_eq(got_pd, got_cudf)
+
+
+@pytest.mark.parametrize("cols", [None, ["b"]])
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_write_to_dataset(tmpdir_factory, cols, store_schema):
+    rng = np.random.default_rng(seed=0)
+    dir1 = tmpdir_factory.mktemp("dir1")
+    dir2 = tmpdir_factory.mktemp("dir2")
+    if cols is None:
+        dir1 = dir1.join("file.pq")
+        dir2 = dir2.join("file.pq")
+    dir1 = str(dir1)
+    dir2 = str(dir2)
+
+    size = 100
+    gdf = cudf.DataFrame(
+        {
+            "a": np.arange(0, stop=size),
+            "b": rng.choice(np.arange(4), size=size),
+        }
+    )
+    gdf.to_parquet(dir1, partition_cols=cols, store_schema=store_schema)
+    cudf.io.write_to_dataset(gdf, dir2, partition_cols=cols)
+
+    # Read back with cudf
+    expect = cudf.read_parquet(dir1)
+    got = cudf.read_parquet(dir2)
+    assert_eq(expect, got)
+
+    gdf = cudf.DataFrame(
+        {
+            "a": cudf.Series([1, 2, 3]),
+            "b": cudf.Series([1, 2, 3]),
+            "c": cudf.Series(["a", "b", "c"], dtype="category"),
+        }
+    )
+    with pytest.raises(ValueError):
+        gdf.to_parquet(dir1, partition_cols=cols, store_schema=store_schema)
+
+
+@pytest.mark.parametrize(
+    "pfilters",
+    [[("b", "==", "b")], [("b", "==", "a"), ("c", "==", 1)]],
+)
+@pytest.mark.parametrize("selection", ["directory", "files", "row-groups"])
+@pytest.mark.parametrize("use_cat", [True, False])
+def test_read_parquet_partitioned_filtered(
+    tmp_path, pfilters, selection, use_cat
+):
+    rng = np.random.default_rng(2)
+    path = str(tmp_path)
+    size = 100
+    df = cudf.DataFrame(
+        {
+            "a": np.arange(0, stop=size, dtype="int64"),
+            "b": rng.choice(list("abcd"), size=size),
+            "c": rng.choice(np.arange(4), size=size),
+        }
+    )
+    df.to_parquet(path, partition_cols=["c", "b"])
+
+    if selection == "files":
+        # Pass in a list of paths
+        fs = get_fs_token_paths(path)[0]
+        read_path = fs.find(path)
+        row_groups = None
+    elif selection == "row-groups":
+        # Pass in a list of paths AND row-group ids
+        fs = get_fs_token_paths(path)[0]
+        read_path = fs.find(path)
+        row_groups = [[0] for p in read_path]
+    else:
+        # Pass in a directory path
+        # (row-group selection not allowed in this case)
+        read_path = path
+        row_groups = None
+
+    # Filter on partitioned columns
+    expect = pd.read_parquet(read_path, filters=pfilters)
+    got = cudf.read_parquet(
+        read_path,
+        filters=pfilters,
+        row_groups=row_groups,
+        categorical_partitions=use_cat,
+    )
+    expect["b"] = expect["b"].astype(str)
+    expect["c"] = expect["c"].astype(int)
+    if use_cat:
+        assert got.dtypes["b"] == "category"
+        assert got.dtypes["c"] == "category"
+        got["b"] = got["b"].astype(str)
+        got["c"] = got["c"].astype(int)
+    else:
+        # Check that we didn't get categorical
+        # columns, but convert back to categorical
+        # for comparison with pandas
+        assert got.dtypes["b"] == "object"
+        assert got.dtypes["c"] == "int"
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "filters", [[("a", "==", 10)], [[("a", "==", 10)], [("c", "==", 1)]]]
+)
+def test_read_parquet_partitioned_filtered_other(tmp_path, filters):
+    rng = np.random.default_rng(2)
+    path = str(tmp_path)
+    size = 10
+    df = cudf.DataFrame(
+        {
+            "a": np.arange(0, stop=size, dtype="int64"),
+            "b": rng.choice(list("abcd"), size=size),
+            "c": rng.choice(np.arange(4), size=size),
+        }
+    )
+    df.to_parquet(path, partition_cols=["c", "b"])
+    got = cudf.read_parquet(path, filters=filters)
+    expect = pd.read_parquet(path, filters=filters)
+
+    # Work-around for pandas bug:
+    # https://github.com/pandas-dev/pandas/issues/53345
+    expect["c"] = expect["c"].astype(
+        pd.CategoricalDtype(
+            categories=expect["c"].dtype.categories.astype("int64"),
+            ordered=expect["c"].dtype.ordered,
+        )
+    )
+    assert_eq(expect, got)
+
+
+def test_parquet_writer_chunked_metadata(tmp_path, simple_pdf, simple_gdf):
+    gdf_fname = tmp_path / "gdf.parquet"
+    test_path = "test/path"
+
+    writer = ParquetWriter(gdf_fname)
+    writer.write_table(simple_gdf)
+    writer.write_table(simple_gdf)
+    meta_byte_array = writer.close(metadata_file_path=test_path)
+    fmd = pq.ParquetFile(BytesIO(meta_byte_array)).metadata
+
+    assert fmd.num_rows == 2 * len(simple_gdf)
+    assert fmd.num_row_groups == 2
+
+    for r in range(fmd.num_row_groups):
+        for c in range(fmd.num_columns):
+            assert fmd.row_group(r).column(c).file_path == test_path
+
+
+def test_write_read_cudf(tmp_path, pdf):
+    file_path = tmp_path / "cudf.parquet"
+    if "col_category" in pdf.columns:
+        pdf = pdf.drop(columns=["col_category"])
+
+    gdf = cudf.from_pandas(pdf)
+    gdf.to_parquet(file_path)
+    gdf = cudf.read_parquet(file_path)
+
+    assert_eq(gdf, pdf, check_index_type=not pdf.empty)
+
+
+def test_write_cudf_read_pandas_pyarrow(tmp_path, pdf):
+    cudf_path = tmp_path / "cudf.parquet"
+    pandas_path = tmp_path / "pandas.parquet"
+
+    if "col_category" in pdf.columns:
+        pdf = pdf.drop(columns=["col_category"])
+
+    df = cudf.from_pandas(pdf)
+
+    df.to_parquet(cudf_path)
+    pdf.to_parquet(pandas_path)
+
+    cudf_res = pd.read_parquet(cudf_path)
+    pd_res = pd.read_parquet(pandas_path)
+
+    assert_eq(pd_res, cudf_res, check_index_type=not pdf.empty)
+
+    cudf_res = pa.parquet.read_table(
+        cudf_path, use_pandas_metadata=True
+    ).to_pandas()
+    pd_res = pa.parquet.read_table(
+        pandas_path, use_pandas_metadata=True
+    ).to_pandas()
+
+    assert_eq(cudf_res, pd_res, check_index_type=not pdf.empty)
+
+
+def test_parquet_writer_criteo(tmp_path):
+    # To run this test, download the day 0 of criteo dataset from
+    # http://labs.criteo.com/2013/12/download-terabyte-click-logs/
+    # and place the uncompressed dataset in the home directory
+    fname = os.path.expanduser("~/day_0")
+    if not os.path.isfile(fname):
+        pytest.skip("Local criteo day 0 tsv file is not found")
+
+    cudf_path = tmp_path / "cudf.parquet"
+
+    cont_names = ["I" + str(x) for x in range(1, 14)]
+    cat_names = ["C" + str(x) for x in range(1, 27)]
+    cols = ["label", *cont_names, *cat_names]
+
+    df = cudf.read_csv(fname, sep="\t", names=cols, byte_range=(0, 1000000000))
+    df = df.drop(columns=cont_names)
+
+    df.to_parquet(cudf_path)
+
+
+def test_trailing_nans(datadir, tmp_path):
+    fname = "trailing_nans.parquet"
+    file_path = datadir / fname
+    cu_df = cudf.read_parquet(file_path)
+
+    tmp_file_path = tmp_path / fname
+    cu_df.to_parquet(tmp_file_path)
+
+    pd.read_parquet(tmp_file_path)
+
+
+def test_parquet_writer_sliced(tmp_path):
+    cudf_path = tmp_path / "cudf.parquet"
+
+    df = pd.DataFrame()
+    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
+    df = cudf.from_pandas(df)
+
+    df_select = df.iloc[1:3]
+
+    df_select.to_parquet(cudf_path)
+    assert_eq(cudf.read_parquet(cudf_path), df_select)
+
+
+def test_parquet_writer_list_basic(tmp_path):
+    expect = pd.DataFrame({"a": [[[1, 2], [3, 4]], None, [[5, 6], None]]})
+    fname = tmp_path / "test_parquet_writer_list_basic.parquet"
+
+    gdf = cudf.from_pandas(expect)
+
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    got = pd.read_parquet(fname)
+    assert_eq(expect, got)
+
+
+def test_parquet_writer_list_large(tmp_path):
+    gdf = cudf.DataFrame({"a": list_gen(int_gen, 128, 40, 25)})
+    fname = tmp_path / "test_parquet_writer_list_large.parquet"
+
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    got = pd.read_parquet(fname)
+    assert gdf.to_arrow().equals(pa.Table.from_pandas(got))
+
+
+def test_parquet_writer_list_large_mixed(tmp_path):
+    expect = pd.DataFrame(
+        {
+            "a": list_gen(string_gen, 64, 40, 25),
+            "b": list_gen(int_gen, 64, 40, 25),
+            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
+            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
+        }
+    )
+    fname = tmp_path / "test_parquet_writer_list_large_mixed.parquet"
+    gdf = cudf.from_pandas(expect)
+
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    got = pd.read_parquet(fname)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_writer_list_chunked(tmp_path, store_schema):
+    if store_schema and version.parse(pa.__version__) < version.parse(
+        "15.0.0"
+    ):
+        pytest.skip("https://github.com/apache/arrow/pull/37792")
+    table1 = cudf.DataFrame(
+        {
+            "a": list_gen(string_gen, 64, 40, 25),
+            "b": list_gen(int_gen, 64, 40, 25),
+            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
+            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
+        }
+    )
+    table2 = cudf.DataFrame(
+        {
+            "a": list_gen(string_gen, 64, 40, 25),
+            "b": list_gen(int_gen, 64, 40, 25),
+            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
+            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
+        }
+    )
+    fname = tmp_path / "test_parquet_writer_list_chunked.parquet"
+    expect = cudf.concat([table1, table2])
+    expect = expect.reset_index(drop=True)
+
+    with ParquetWriter(fname, store_schema=store_schema) as writer:
+        writer.write_table(table1)
+        writer.write_table(table2)
+
+    assert os.path.exists(fname)
+    got = pq.read_table(fname)
+    # compare with pyarrow since pandas doesn't
+    # have a list or struct dtype
+    assert expect.to_arrow().equals(got)
+
+
+def test_parquet_nullable_boolean(tmp_path, engine):
+    pandas_path = tmp_path / "pandas_bools.parquet"
+
+    pdf = pd.DataFrame(
+        {
+            "a": pd.Series(
+                [True, False, None, True, False], dtype=pd.BooleanDtype()
+            )
+        }
+    )
+    expected_gdf = cudf.DataFrame({"a": [True, False, None, True, False]})
+
+    pdf.to_parquet(pandas_path)
+    with _hide_pyarrow_parquet_cpu_warnings(engine):
+        actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
+
+    assert_eq(actual_gdf, expected_gdf)
+
+
+def run_parquet_index(pdf, index):
+    pandas_buffer = BytesIO()
+    cudf_buffer = BytesIO()
+
+    gdf = cudf.from_pandas(pdf)
+
+    pdf.to_parquet(pandas_buffer, index=index)
+    gdf.to_parquet(cudf_buffer, index=index)
+
+    expected = pd.read_parquet(cudf_buffer)
+    actual = cudf.read_parquet(pandas_buffer)
+
+    if expected.empty and actual.empty:
+        # We return RangeIndex columns compared
+        # to pandas' Index[object] columns
+        actual.columns = expected.columns
+
+    assert_eq(expected, actual, check_index_type=True)
+
+    expected = pd.read_parquet(pandas_buffer)
+    actual = cudf.read_parquet(cudf_buffer)
+
+    if expected.empty and actual.empty:
+        # We return RangeIndex columns compared
+        # to pandas' Index[object] columns
+        actual.columns = expected.columns
+
+    assert_eq(
+        expected,
+        actual,
+        check_index_type=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame(index=[1, 2, 3]),
+        pd.DataFrame({"a": [1, 2, 3]}, index=[0.43534, 345, 0.34534]),
+        pd.DataFrame(
+            {"b": [11, 22, 33], "c": ["a", "b", "c"]},
+            index=pd.Index(["a", "b", "c"], name="custom name"),
+        ),
+        pd.DataFrame(
+            {"a": [10, 11, 12], "b": [99, 88, 77]},
+            index=pd.RangeIndex(12, 17, 2),
+        ),
+        pd.DataFrame(
+            {"b": [99, 88, 77]},
+            index=pd.RangeIndex(22, 27, 2, name="hello index"),
+        ),
+        pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name")),
+        pd.DataFrame(
+            {"a": ["a", "bb", "cc"], "b": [10, 21, 32]},
+            index=pd.MultiIndex.from_tuples([[1, 2], [10, 11], [15, 16]]),
+        ),
+        pd.DataFrame(
+            {"a": ["a", "bb", "cc"], "b": [10, 21, 32]},
+            index=pd.MultiIndex.from_tuples(
+                [[1, 2], [10, 11], [15, 16]], names=["first", "second"]
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("index", [None, True, False])
+def test_parquet_index(pdf, index):
+    run_parquet_index(pdf, index)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pytest.param(
+            None,
+            marks=pytest.mark.xfail(
+                reason="https://github.com/apache/arrow/issues/40743"
+            ),
+        ),
+        True,
+    ],
+)
+def test_parquet_index_empty(index):
+    pdf = pd.DataFrame(index=pd.RangeIndex(0, 10, 1))
+    run_parquet_index(pdf, index)
+
+
+def test_parquet_no_index_empty():
+    pdf = pd.DataFrame(index=pd.RangeIndex(0, 10, 1))
+    run_parquet_index(pdf, index=False)
+
+
+def test_parquet_allnull_str(tmp_path, engine):
+    pandas_path = tmp_path / "pandas_allnulls.parquet"
+
+    pdf = pd.DataFrame(
+        {"a": pd.Series([None, None, None, None, None], dtype="str")}
+    )
+    expected_gdf = cudf.DataFrame(
+        {"a": cudf.Series([None, None, None, None, None], dtype="str")}
+    )
+
+    pdf.to_parquet(pandas_path)
+    with _hide_pyarrow_parquet_cpu_warnings(engine):
+        actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
+
+    assert_eq(actual_gdf, expected_gdf)
+
+
+def normalized_equals(value1, value2):
+    if value1 is pd.NA or value1 is pd.NaT:
+        value1 = None
+    if value2 is pd.NA or value2 is pd.NaT:
+        value2 = None
+    if isinstance(value1, np.datetime64):
+        value1 = pd.Timestamp(value1).to_pydatetime()
+    if isinstance(value2, np.datetime64):
+        value2 = pd.Timestamp(value2).to_pydatetime()
+    if isinstance(value1, pd.Timestamp):
+        value1 = value1.to_pydatetime()
+    if isinstance(value2, pd.Timestamp):
+        value2 = value2.to_pydatetime()
+    if isinstance(value1, datetime.datetime):
+        value1 = value1.replace(tzinfo=None)
+    if isinstance(value2, datetime.datetime):
+        value2 = value2.replace(tzinfo=None)
+    if isinstance(value1, pd.Timedelta):
+        unit = "ms" if value1.unit == "s" else value1.unit
+        value2 = pd.Timedelta(value2, unit=unit)
+
+    # if one is datetime then both values are datetimes now
+    if isinstance(value1, datetime.datetime):
+        return value1 == value2
+
+    # Compare integers with floats now
+    if isinstance(value1, float) or isinstance(value2, float):
+        return math.isclose(value1, value2)
+
+    return value1 == value2
+
+
+@pytest.mark.parametrize("add_nulls", [True, False])
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_writer_statistics(tmp_path, pdf, add_nulls, store_schema):
+    if store_schema and version.parse(pa.__version__) < version.parse(
+        "15.0.0"
+    ):
+        pytest.skip("https://github.com/apache/arrow/pull/37792")
+    file_path = tmp_path / "cudf.parquet"
+    if "col_category" in pdf.columns:
+        pdf = pdf.drop(columns=["col_category", "col_bool"])
+
+    if not add_nulls:
+        # Timedelta types convert NaT to None when reading from parquet into
+        # pandas which interferes with series.max()/min()
+        for t in TIMEDELTA_TYPES:
+            pdf["col_" + t] = pd.Series(np.arange(len(pdf.index))).astype(t)
+        # pyarrow can't read values with non-zero nanoseconds
+        pdf["col_timedelta64[ns]"] = pdf["col_timedelta64[ns]"] * 1000
+
+    gdf = cudf.from_pandas(pdf)
+    if add_nulls:
+        for col in gdf:
+            set_random_null_mask_inplace(gdf[col])
+    gdf.to_parquet(file_path, index=False, store_schema=store_schema)
+
+    # Read back from pyarrow
+    pq_file = pq.ParquetFile(file_path)
+    # verify each row group's statistics
+    for rg in range(0, pq_file.num_row_groups):
+        pd_slice = pq_file.read_row_group(rg).to_pandas()
+
+        # statistics are per-column. So need to verify independently
+        for i, col in enumerate(pd_slice):
+            stats = pq_file.metadata.row_group(rg).column(i).statistics
+
+            actual_min = pd_slice[col].min()
+            stats_min = stats.min
+            assert normalized_equals(actual_min, stats_min)
+
+            actual_max = pd_slice[col].max()
+            stats_max = stats.max
+            assert normalized_equals(actual_max, stats_max)
+
+            assert stats.null_count == pd_slice[col].isna().sum()
+            assert stats.num_values == pd_slice[col].count()
+
+
+def test_parquet_writer_list_statistics(tmp_path):
+    df = pd.DataFrame(
+        {
+            "a": list_gen(string_gen, 64, 40, 25),
+            "b": list_gen(int_gen, 64, 40, 25),
+            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
+            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
+        }
+    )
+    fname = tmp_path / "test_parquet_writer_list_statistics.parquet"
+    gdf = cudf.from_pandas(df)
+
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    # Read back from pyarrow
+    pq_file = pq.ParquetFile(fname)
+    # verify each row group's statistics
+    for rg in range(0, pq_file.num_row_groups):
+        pd_slice = pq_file.read_row_group(rg).to_pandas()
+
+        # statistics are per-column. So need to verify independently
+        for i, col in enumerate(pd_slice):
+            stats = pq_file.metadata.row_group(rg).column(i).statistics
+
+            actual_min = pd_slice[col].explode().explode().dropna().min()
+            stats_min = stats.min
+            assert normalized_equals(actual_min, stats_min)
+
+            actual_max = pd_slice[col].explode().explode().dropna().max()
+            stats_max = stats.max
+            assert normalized_equals(actual_max, stats_max)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # Structs
+        {
+            "being": [
+                None,
+                {"human?": True, "Deets": {"Name": "Carrot", "Age": 27}},
+                {"human?": None, "Deets": {"Name": "Angua", "Age": 25}},
+                {"human?": False, "Deets": {"Name": "Cheery", "Age": 31}},
+                {"human?": False, "Deets": None},
+                {"human?": None, "Deets": {"Name": "Mr", "Age": None}},
+            ]
+        },
+        # List of Structs
+        {
+            "family": [
+                [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
+                [
+                    {"human?": None, "deets": {"weight": 5.3, "age": 25}},
+                    {"human?": False, "deets": {"weight": 8.0, "age": 31}},
+                    {"human?": False, "deets": None},
+                ],
+                [],
+                [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
+            ]
+        },
+        # Struct of Lists
+        {
+            "Real estate records": [
+                None,
+                {
+                    "Status": "NRI",
+                    "Ownerships": {
+                        "land_unit": [None, 2, None],
+                        "flats": [[1, 2, 3], [], [4, 5], [], [0, 6, 0]],
+                    },
+                },
+                {
+                    "Status": None,
+                    "Ownerships": {
+                        "land_unit": [4, 5],
+                        "flats": [[7, 8], []],
+                    },
+                },
+                {
+                    "Status": "RI",
+                    "Ownerships": {"land_unit": None, "flats": [[]]},
+                },
+                {"Status": "RI", "Ownerships": None},
+                {
+                    "Status": None,
+                    "Ownerships": {
+                        "land_unit": [7, 8, 9],
+                        "flats": [[], [], []],
+                    },
+                },
+            ]
+        },
+    ],
+)
+def test_parquet_writer_nested(tmp_path, data):
+    expect = pd.DataFrame(data)
+    gdf = cudf.from_pandas(expect)
+
+    fname = tmp_path / "test_parquet_writer_nested.parquet"
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    got = pd.read_parquet(fname)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "decimal_type",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+)
+@pytest.mark.parametrize("data", [[1, 2, 3], [0.00, 0.01, None, 0.5]])
+def test_parquet_writer_decimal(decimal_type, data):
+    gdf = cudf.DataFrame({"val": data})
+
+    gdf["dec_val"] = gdf["val"].astype(decimal_type(7, 2))
+
+    buff = BytesIO()
+    gdf.to_parquet(buff)
+
+    got = pd.read_parquet(buff, dtype_backend="numpy_nullable")
+    assert_eq(gdf["val"].to_pandas(nullable=True), got["val"])
+    assert_eq(gdf["dec_val"].to_pandas(), got["dec_val"])
+
+
+def test_parquet_writer_column_validation():
+    cudf_parquet = BytesIO()
+    pandas_parquet = BytesIO()
+    df = cudf.DataFrame({1: [1, 2, 3], "a": ["a", "b", "c"]})
+    pdf = df.to_pandas()
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.warns(UserWarning):
+            df.to_parquet(cudf_parquet)
+
+    with pytest.warns(UserWarning):
+        pdf.to_parquet(pandas_parquet)
+
+    assert_eq(
+        pd.read_parquet(cudf_parquet),
+        cudf.read_parquet(pandas_parquet),
+    )
+    assert_eq(
+        cudf.read_parquet(cudf_parquet),
+        pd.read_parquet(pandas_parquet),
+    )
+
+    with cudf.option_context("mode.pandas_compatible", False):
+        with pytest.raises(ValueError):
+            df.to_parquet(cudf_parquet)
+
+
+def test_parquet_writer_nulls_pandas_read(tmp_path, pdf):
+    if "col_bool" in pdf.columns:
+        pdf.drop(columns="col_bool", inplace=True)
+    if "col_category" in pdf.columns:
+        pdf.drop(columns="col_category", inplace=True)
+    gdf = cudf.from_pandas(pdf)
+
+    num_rows = len(gdf)
+
+    if num_rows > 0:
+        for col in gdf.columns:
+            gdf[col][random.randint(0, num_rows - 1)] = None
+
+    fname = tmp_path / "test_parquet_writer_nulls_pandas_read.parquet"
+    gdf.to_parquet(fname)
+    assert os.path.exists(fname)
+
+    got = pd.read_parquet(fname)
+    nullable = num_rows > 0
+
+    if nullable:
+        gdf = gdf.drop(columns="col_datetime64[ms]")
+        gdf = gdf.drop(columns="col_datetime64[us]")
+        got = got.drop(columns="col_datetime64[ms]")
+        got = got.drop(columns="col_datetime64[us]")
+
+    assert_eq(gdf.to_pandas(nullable=nullable), got)
+
+
+@pytest.mark.parametrize(
+    "decimal_type",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+)
+def test_parquet_decimal_precision(tmp_path, decimal_type):
+    df = cudf.DataFrame({"val": ["3.5", "4.2"]}).astype(decimal_type(5, 2))
+    assert df.val.dtype.precision == 5
+
+    fname = tmp_path / "decimal_test.parquet"
+    df.to_parquet(fname)
+    df = cudf.read_parquet(fname)
+    assert df.val.dtype.precision == 5
+
+
+def test_parquet_decimal_precision_empty(tmp_path):
+    df = (
+        cudf.DataFrame({"val": ["3.5", "4.2"]})
+        .astype(cudf.Decimal64Dtype(5, 2))
+        .iloc[:0]
+    )
+    assert df.val.dtype.precision == 5
+
+    fname = tmp_path / "decimal_test.parquet"
+    df.to_parquet(fname)
+    df = cudf.read_parquet(fname)
+    assert df.val.dtype.precision == 5
+
+
+def test_parquet_reader_brotli(datadir):
+    fname = datadir / "brotli_int16.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname).to_pandas(nullable=True)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_one_level_list(datadir):
+    fname = datadir / "one_level_list.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_binary_decimal(datadir):
+    fname = datadir / "binary_decimal.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname).to_pandas()
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_fixed_bin(datadir):
+    fname = datadir / "fixed_len_byte_array.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+def test_parquet_reader_fixed_len_with_dict(tmp_path):
+    def flba(i):
+        hasher = hashlib.sha256()
+        hasher.update(i.to_bytes(4, "little"))
+        return hasher.digest()
+
+    # use pyarrow to write table of fixed_len_byte_array
+    num_rows = 200
+    data = pa.array([flba(i) for i in range(num_rows)], type=pa.binary(32))
+    padf = pa.Table.from_arrays([data], names=["flba"])
+    padf_fname = tmp_path / "padf.parquet"
+    pq.write_table(padf, padf_fname, use_dictionary=True)
+
+    expect = pd.read_parquet(padf_fname)
+    got = cudf.read_parquet(padf_fname)
+    assert_eq(expect, got)
+
+
+def test_parquet_flba_round_trip(tmp_path):
+    def flba(i):
+        hasher = hashlib.sha256()
+        hasher.update(i.to_bytes(4, "little"))
+        return hasher.digest()
+
+    # use pyarrow to write table of fixed_len_byte_array
+    num_rows = 200
+    data = pa.array([flba(i) for i in range(num_rows)], type=pa.binary(32))
+    padf = pa.Table.from_arrays([data], names=["flba"])
+    padf_fname = tmp_path / "padf.parquet"
+    pq.write_table(padf, padf_fname)
+
+    # round trip data with cudf
+    cdf = cudf.read_parquet(padf_fname)
+    cdf_fname = tmp_path / "cdf.parquet"
+    cdf.to_parquet(cdf_fname, column_type_length={"flba": 32})
+
+    # now read back in with pyarrow to test it was written properly by cudf
+    padf2 = pq.read_table(padf_fname)
+    padf3 = pq.read_table(cdf_fname)
+    assert_eq(padf2, padf3)
+    assert_eq(padf2.schema[0].type, padf3.schema[0].type)
+
+
+@pytest.mark.parametrize(
+    "encoding",
+    [
+        "PLAIN",
+        "DICTIONARY",
+        "DELTA_BINARY_PACKED",
+        "BYTE_STREAM_SPLIT",
+        "USE_DEFAULT",
+    ],
+)
+def test_per_column_encoding_option(encoding):
+    pdf = pd.DataFrame({"ilist": [[1, 2, 3, 1, 2, 3]], "i1": [1]})
+    cdf = cudf.from_pandas(pdf)
+    buffer = BytesIO()
+    cdf.to_parquet(
+        buffer,
+        column_encoding={"ilist.list.element": encoding},
+    )
+    # DICTIONARY and USE_DEFAULT should both result in a PLAIN_DICTIONARY encoding in parquet
+    encoding_name = (
+        "PLAIN_DICTIONARY"
+        if encoding == "DICTIONARY" or encoding == "USE_DEFAULT"
+        else encoding
+    )
+    pf = pq.ParquetFile(buffer)
+    fmd = pf.metadata
+    assert encoding_name in fmd.row_group(0).column(0).encodings
+
+
+@pytest.mark.parametrize("compression", ["SNAPPY", "ZSTD"])
+def test_per_column_compression_option(set_decomp_env_vars, compression):
+    pdf = pd.DataFrame(
+        {"ilist": [[1, 2, 3, 1, 2, 3]], "i1": [[1, 2, 3, 1, 2, 3]]}
+    )
+    cdf = cudf.from_pandas(pdf)
+    buffer = BytesIO()
+    cdf.to_parquet(
+        buffer,
+        compression=compression,
+        skip_compression={"ilist.list.element"},
+        use_dictionary=False,  # to make sure that data is compressible
+    )
+
+    pf = pq.ParquetFile(buffer)
+    fmd = pf.metadata
+    assert fmd.row_group(0).column(0).compression == "UNCOMPRESSED"
+    assert fmd.row_group(0).column(1).compression == compression
+
+
+@pytest.mark.parametrize(
+    "encoding",
+    ["DELTA_LENGTH_BYTE_ARRAY", "DELTA_BYTE_ARRAY"],
+)
+def test_per_column_options_string_col(tmp_path, encoding):
+    pdf = pd.DataFrame({"s": ["a string"], "i1": [1]})
+    cdf = cudf.from_pandas(pdf)
+    fname = tmp_path / "strcol.parquet"
+    cdf.to_parquet(
+        fname,
+        column_encoding={"s": encoding},
+        compression="SNAPPY",
+    )
+    pf = pq.ParquetFile(fname)
+    fmd = pf.metadata
+    assert encoding in fmd.row_group(0).column(0).encodings
+
+
+@pytest.mark.skipif(
+    version.parse(pa.__version__) < version.parse("16.0.0"),
+    reason="https://github.com/apache/arrow/pull/39748",
+)
+def test_parquet_bss_round_trip(tmp_path):
+    num_rows = 200
+
+    def flba(i):
+        hasher = hashlib.sha256()
+        hasher.update(i.to_bytes(4, "little"))
+        return hasher.digest()
+
+    # use pyarrow to write table of types that support BYTE_STREAM_SPLIT encoding
+    rows_per_rowgroup = 5000
+    fixed_data = pa.array(
+        [flba(i) for i in range(num_rows)], type=pa.binary(32)
+    )
+    i32_data = pa.array(list(range(num_rows)), type=pa.int32())
+    i64_data = pa.array(list(range(num_rows)), type=pa.int64())
+    f32_data = pa.array([float(i) for i in range(num_rows)], type=pa.float32())
+    f64_data = pa.array([float(i) for i in range(num_rows)], type=pa.float64())
+    padf = pa.Table.from_arrays(
+        [fixed_data, i32_data, i64_data, f32_data, f64_data],
+        names=["flba", "i32", "i64", "f32", "f64"],
+    )
+    padf_fname = tmp_path / "padf.parquet"
+    pq.write_table(
+        padf,
+        padf_fname,
+        column_encoding="BYTE_STREAM_SPLIT",
+        use_dictionary=False,
+        row_group_size=rows_per_rowgroup,
+    )
+
+    # round trip data with cudf
+    cdf = cudf.read_parquet(padf_fname)
+    cdf_fname = tmp_path / "cdf.parquet"
+    cdf.to_parquet(
+        cdf_fname,
+        column_type_length={"flba": 32},
+        column_encoding={
+            "flba": "BYTE_STREAM_SPLIT",
+            "i32": "BYTE_STREAM_SPLIT",
+            "i64": "BYTE_STREAM_SPLIT",
+            "f32": "BYTE_STREAM_SPLIT",
+            "f64": "BYTE_STREAM_SPLIT",
+        },
+        row_group_size_rows=rows_per_rowgroup,
+    )
+
+    # now read back in with pyarrow to test it was written properly by cudf
+    padf2 = pq.read_table(padf_fname)
+    padf3 = pq.read_table(cdf_fname)
+    assert_eq(padf2, padf3)
+    assert_eq(padf2.schema[0].type, padf3.schema[0].type)
+
+
+def test_parquet_reader_rle_boolean(datadir):
+    fname = datadir / "rle_boolean_encoding.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got)
+
+
+# testing a specific bug-fix/edge case.
+# specifically:  int a parquet file containing a particular way of representing
+#                a list column in a schema, the cudf reader was confusing
+#                nesting information between a list column and a subsequent
+#                string column, ultimately causing a crash.
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Older versions of pandas do not have DataFrame.map()",
+)
+def test_parquet_reader_one_level_list2(datadir):
+    # we are reading in a file containing binary types, but cudf returns
+    # those as strings. so we have to massage the pandas data to get
+    # them to compare correctly.
+    def postprocess(val):
+        if isinstance(val, bytes):
+            return val.decode()
+        elif isinstance(val, np.ndarray):
+            return np.array([v.decode() for v in val])
+        else:
+            return val
+
+    fname = datadir / "one_level_list2.parquet"
+
+    expect = pd.read_parquet(fname)
+    expect = expect.map(postprocess)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+# testing a specific bug-fix/edge case.
+# specifically:  in a parquet file containing a particular way of representing
+#                a list column in a schema, the cudf reader was confusing
+#                nesting information and building a list of list of int instead
+#                of a list of int
+def test_parquet_reader_one_level_list3(datadir):
+    fname = datadir / "one_level_list3.parquet"
+
+    expect = pd.read_parquet(fname)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got, check_dtype=True)
+
+
+@pytest.mark.parametrize("size_bytes", [4_000_000, 1_000_000, 600_000])
+@pytest.mark.parametrize("size_rows", [1_000_000, 100_000, 10_000])
+def test_to_parquet_row_group_size(
+    tmp_path, large_int64_gdf, size_bytes, size_rows
+):
+    fname = tmp_path / "row_group_size.parquet"
+    large_int64_gdf.to_parquet(
+        fname, row_group_size_bytes=size_bytes, row_group_size_rows=size_rows
+    )
+
+    num_rows, row_groups, col_names, _, _ = cudf.io.read_parquet_metadata(
+        fname
+    )
+    # 8 bytes per row, as the column is int64
+    expected_num_rows = max(
+        math.ceil(num_rows / size_rows), math.ceil(8 * num_rows / size_bytes)
+    )
+    assert expected_num_rows == row_groups
+
+
+@pytest.mark.parametrize("size_rows", [500_000, 100_000, 10_000])
+def test_parquet_row_group_metadata(tmp_path, large_int64_gdf, size_rows):
+    fname = tmp_path / "row_group_size.parquet"
+    large_int64_gdf.to_parquet(fname, row_group_size_rows=size_rows)
+
+    # read file metadata from parquet
+    (
+        num_rows,
+        row_groups,
+        _,  # col_names
+        _,  # num_columns
+        row_group_metadata,
+    ) = cudf.io.read_parquet_metadata(fname)
+
+    # length(RowGroupsMetaData) == number of row groups
+    assert len(row_group_metadata) == row_groups
+    # sum of rows in row groups == total rows
+    assert num_rows == sum(
+        [row_group["num_rows"] for row_group in row_group_metadata]
+    )
+
+
+def test_parquet_reader_decimal_columns():
+    df = cudf.DataFrame(
+        {
+            "col1": cudf.Series([1, 2, 3], dtype=cudf.Decimal64Dtype(10, 2)),
+            "col2": [10, 11, 12],
+            "col3": [12, 13, 14],
+            "col4": ["a", "b", "c"],
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+
+    actual = cudf.read_parquet(buffer, columns=["col3", "col2", "col1"])
+    expected = pd.read_parquet(buffer, columns=["col3", "col2", "col1"])
+
+    assert_eq(actual, expected)
+
+
+def test_parquet_reader_zstd_compression(datadir):
+    fname = datadir / "spark_zstd.parquet"
+    try:
+        df = cudf.read_parquet(fname)
+        pdf = pd.read_parquet(fname)
+        assert_eq(df, pdf)
+    except RuntimeError:
+        pytest.skip(reason="zstd support is not enabled")
+
+
+def test_read_parquet_multiple_files(tmp_path):
+    df_1_path = tmp_path / "df_1.parquet"
+    df_2_path = tmp_path / "df_2.parquet"
+    df_1 = cudf.DataFrame({"id": range(100), "a": [1] * 100})
+    df_1.to_parquet(df_1_path)
+
+    df_2 = cudf.DataFrame({"id": range(200, 2200), "a": [2] * 2000})
+    df_2.to_parquet(df_2_path)
+
+    expected = pd.read_parquet([df_1_path, df_2_path])
+    actual = cudf.read_parquet([df_1_path, df_2_path])
+    assert_eq(expected, actual)
+
+    expected = pd.read_parquet([df_2_path, df_1_path])
+    actual = cudf.read_parquet([df_2_path, df_1_path])
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("index", [True, False, None])
+@pytest.mark.parametrize("columns", [None, [], ["b", "a"]])
+def test_parquet_columns_and_index_param(index, columns):
+    buffer = BytesIO()
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    df.to_parquet(buffer, index=index)
+
+    expected = pd.read_parquet(buffer, columns=columns)
+    got = cudf.read_parquet(buffer, columns=columns)
+    if columns == [] and index in {False, None}:
+        # cuDF returns RangeIndex columns compared
+        # to pandas' Index[object] columns
+        got.columns = expected.columns
+
+    assert_eq(expected, got, check_index_type=True)
+
+
+@pytest.mark.parametrize("columns", [None, ["b", "a"]])
+def test_parquet_columns_and_range_index(columns):
+    buffer = BytesIO()
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=pd.RangeIndex(2, 5)
+    )
+    df.to_parquet(buffer)
+
+    expected = pd.read_parquet(buffer, columns=columns)
+    got = cudf.read_parquet(buffer, columns=columns)
+
+    assert_eq(expected, got, check_index_type=True)
+
+
+def test_parquet_nested_struct_list():
+    buffer = BytesIO()
+    data = {
+        "payload": {
+            "Domain": {
+                "Name": "abc",
+                "Id": {"Name": "host", "Value": "127.0.0.8"},
+            },
+            "StreamId": "12345678",
+            "Duration": 10,
+            "Offset": 12,
+            "Resource": [{"Name": "ZoneName", "Value": "RAPIDS"}],
+        }
+    }
+    df = cudf.DataFrame({"a": cudf.Series(data)})
+
+    df.to_parquet(buffer)
+    expected = pd.read_parquet(buffer)
+    actual = cudf.read_parquet(buffer)
+    assert_eq(expected, actual)
+    assert_eq(actual.a.dtype, df.a.dtype)
+
+
+def test_parquet_writer_zstd():
+    size = 12345
+    rng = np.random.default_rng(seed=0)
+    expected = cudf.DataFrame(
+        {
+            "a": np.arange(0, stop=size, dtype="float64"),
+            "b": rng.choice(list("abcd"), size=size),
+            "c": rng.choice(np.arange(4), size=size),
+        }
+    )
+
+    buff = BytesIO()
+    try:
+        expected.to_parquet(buff, compression="ZSTD")
+    except RuntimeError:
+        pytest.mark.xfail(reason="Newer nvCOMP version is required")
+    else:
+        got = pd.read_parquet(buff)
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_writer_time_delta_physical_type(store_schema):
+    df = cudf.DataFrame(
+        {
+            "s": cudf.Series([1], dtype="timedelta64[s]"),
+            "ms": cudf.Series([2], dtype="timedelta64[ms]"),
+            "us": cudf.Series([3], dtype="timedelta64[us]"),
+            # 4K because Pandas/pyarrow don't support non-zero nanoseconds
+            # in Parquet files
+            "ns": cudf.Series([4000], dtype="timedelta64[ns]"),
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer, store_schema=store_schema)
+
+    got = pd.read_parquet(buffer)
+
+    if store_schema:
+        expected = pd.DataFrame(
+            {
+                "s": ["0 days 00:00:01"],
+                "ms": ["0 days 00:00:00.002000"],
+                "us": ["0 days 00:00:00.000003"],
+                "ns": ["0 days 00:00:00.000004"],
+            },
+            dtype="str",
+        )
+    else:
+        expected = pd.DataFrame(
+            {
+                "s": ["00:00:01"],
+                "ms": ["00:00:00.002000"],
+                "us": ["00:00:00.000003"],
+                "ns": ["00:00:00.000004"],
+            },
+            dtype="str",
+        )
+    assert_eq(got.astype("str"), expected)
+
+
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_roundtrip_time_delta(store_schema):
+    num_rows = 12345
+    df = cudf.DataFrame(
+        {
+            "s": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[s]",
+            ),
+            "ms": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[ms]",
+            ),
+            "us": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[us]",
+            ),
+            "ns": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[ns]",
+            ),
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer, store_schema=store_schema)
+    # `check_dtype` cannot be removed here as timedelta64[s] will change to `timedelta[ms]`
+    assert_eq(df, cudf.read_parquet(buffer), check_dtype=False)
+    if store_schema:
+        assert_eq(df, pd.read_parquet(buffer))
+
+
+def test_parquet_reader_malformed_file(datadir):
+    fname = datadir / "nested-unsigned-malformed.parquet"
+
+    # expect a failure when reading the whole file
+    with pytest.raises(RuntimeError):
+        cudf.read_parquet(fname)
+
+
+def test_parquet_reader_unsupported_page_encoding(datadir):
+    fname = datadir / "delta_encoding.parquet"
+
+    # expect a failure when reading the whole file
+    with pytest.raises(RuntimeError):
+        cudf.read_parquet(fname)
+
+
+def test_parquet_reader_detect_bad_dictionary(datadir):
+    fname = datadir / "bad_dict.parquet"
+
+    # expect a failure when reading the whole file
+    with pytest.raises(RuntimeError):
+        cudf.read_parquet(fname)
+
+
+@pytest.mark.parametrize("data", [{"a": [1, 2, 3, 4]}, {"b": [1, None, 2, 3]}])
+@pytest.mark.parametrize("force_nullable_schema", [True, False])
+def test_parquet_writer_schema_nullability(data, force_nullable_schema):
+    df = cudf.DataFrame(data)
+    file_obj = BytesIO()
+
+    df.to_parquet(file_obj, force_nullable_schema=force_nullable_schema)
+
+    assert pa.parquet.read_schema(file_obj).field(0).nullable == (
+        force_nullable_schema or df.isnull().any().any()
+    )
+
+
+def test_parquet_read_filter_and_project():
+    # Filter on columns that are not included
+    # in the current column projection
+
+    with BytesIO() as buffer:
+        # Write parquet data
+        df = cudf.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5] * 10,
+                "b": [0, 1, 2, 3, 4] * 10,
+                "c": range(50),
+                "d": [6, 7] * 25,
+                "e": [8, 9] * 25,
+            }
+        )
+        df.to_parquet(buffer)
+
+        # Read back with filter and projection
+        columns = ["b"]
+        filters = [[("a", "==", 5), ("c", ">", 20)]]
+        got = cudf.read_parquet(buffer, columns=columns, filters=filters)
+
+    # Check result
+    expected = df[(df.a == 5) & (df.c > 20)][columns].reset_index(drop=True)
+    assert_eq(got, expected)
+
+
+def test_parquet_reader_multiindex():
+    expected = pd.DataFrame(
+        {"A": [1, 2, 3]},
+        index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),
+    )
+    file_obj = BytesIO()
+    expected.to_parquet(file_obj, engine="pyarrow")
+    with pytest.warns(UserWarning):
+        actual = cudf.read_parquet(file_obj, engine="pyarrow")
+    assert_eq(actual, expected)
+
+
+def test_parquet_reader_engine_error():
+    with pytest.raises(ValueError):
+        cudf.read_parquet(BytesIO(), engine="abc")
+
+
+def test_reader_lz4():
+    pdf = pd.DataFrame({"ints": [1, 2] * 5001})
+
+    buffer = BytesIO()
+    pdf.to_parquet(buffer, compression="LZ4")
+
+    got = cudf.read_parquet(buffer)
+    assert_eq(pdf, got)
+
+
+def test_writer_lz4():
+    gdf = cudf.DataFrame({"ints": [1, 2] * 5001})
+
+    buffer = BytesIO()
+    gdf.to_parquet(buffer, compression="LZ4")
+
+    got = pd.read_parquet(buffer)
+    assert_eq(gdf, got)
+
+
+def test_parquet_reader_zstd_huff_tables(datadir):
+    # Ensure that this zstd-compressed file does not overrun buffers. The
+    # problem was fixed in nvcomp 3.0.6.
+    # See https://github.com/rapidsai/cudf/issues/15096
+    fname = datadir / "zstd_huff_tables_bug.parquet"
+
+    expected = pa.parquet.read_table(fname).to_pandas()
+    actual = cudf.read_parquet(fname)
+    assert_eq(actual, expected)
+
+
+def test_parquet_reader_roundtrip_with_arrow_schema():
+    # Ensure that the nested types are faithfully being roundtripped
+    # across Parquet with arrow schema which is used to faithfully
+    # round trip duration types (timedelta64) across Parquet read and write.
+    pdf = pd.DataFrame(
+        {
+            "s": pd.Series([None, None, None], dtype="timedelta64[s]"),
+            "ms": pd.Series([1234, None, 32442], dtype="timedelta64[ms]"),
+            "us": pd.Series([None, 3456, None], dtype="timedelta64[us]"),
+            "ns": pd.Series([1234, 3456, 32442], dtype="timedelta64[ns]"),
+            "duration_list": list(
+                [
+                    [
+                        datetime.timedelta(minutes=7, seconds=4),
+                        datetime.timedelta(minutes=7),
+                    ],
+                    [
+                        None,
+                        None,
+                    ],
+                    [
+                        datetime.timedelta(minutes=7, seconds=4),
+                        None,
+                    ],
+                ]
+            ),
+            "int64": pd.Series([1234, 123, 4123], dtype="int64"),
+            "list": list([[1, 2], [1, 2], [1, 2]]),
+            "datetime": pd.Series([1234, 123, 4123], dtype="datetime64[ms]"),
+            "map": pd.Series(["cat", "dog", "lion"]).map(
+                {"cat": "kitten", "dog": "puppy", "lion": "cub"}
+            ),
+        }
+    )
+
+    # Write parquet with arrow for now (to write arrow:schema)
+    buffer = BytesIO()
+    pdf.to_parquet(buffer, engine="pyarrow")
+
+    # Read parquet with arrow schema
+    got = cudf.read_parquet(buffer)
+    # Convert to cudf table for an apple to apple comparison
+    expected = cudf.from_pandas(pdf)
+
+    # Check results for reader with schema
+    assert_eq(expected, got)
+
+    # Reset buffer
+    buffer = BytesIO()
+
+    # Write to buffer with cudf
+    expected.to_parquet(buffer, store_schema=True)
+
+    # Read parquet with arrow schema
+    got = cudf.read_parquet(buffer)
+    # Convert to cudf table for an apple to apple comparison
+    expected = cudf.from_pandas(pdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # struct
+        [
+            {"a": 1, "b": 2},
+            {"a": 10, "b": 20},
+            {"a": None, "b": 22},
+            {"a": None, "b": None},
+            {"a": 15, "b": None},
+        ],
+        # struct-of-list
+        [
+            {"a": 1, "b": 2, "c": [1, 2, 3]},
+            {"a": 10, "b": 20, "c": [4, 5]},
+            {"a": None, "b": 22, "c": [6]},
+            {"a": None, "b": None, "c": None},
+            {"a": 15, "b": None, "c": [-1, -2]},
+            None,
+            {"a": 100, "b": 200, "c": [-10, None, -20]},
+        ],
+        # list-of-struct
+        [
+            [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
+            None,
+            [{"a": 10, "b": 20}],
+            [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
+        ],
+        # struct-of-struct
+        [
+            {"a": 1, "b": {"inner_a": 10, "inner_b": 20}, "c": 2},
+            {"a": 3, "b": {"inner_a": 30, "inner_b": 40}, "c": 4},
+            {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
+            {"a": 7, "b": None, "c": 8},
+            {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
+            None,
+            {"a": None, "b": {"inner_a": None, "inner_b": 100}, "c": 10},
+        ],
+        # struct-with-mixed-types
+        [
+            {
+                "struct": {
+                    "payload": {
+                        "Domain": {
+                            "Name": "abc",
+                            "Id": {"Name": "host", "Value": "127.0.0.8"},
+                            "Duration": datetime.timedelta(minutes=12),
+                        },
+                        "StreamId": "12345678",
+                        "Duration": datetime.timedelta(minutes=4),
+                        "Offset": None,
+                        "Resource": [
+                            {
+                                "Name": "ZoneName",
+                                "Value": "RAPIDS",
+                                "Duration": datetime.timedelta(seconds=1),
+                            }
+                        ],
+                    }
+                }
+            }
+        ],
+    ],
+)
+def test_parquet_reader_roundtrip_structs_with_arrow_schema(tmp_path, data):
+    # Ensure that the structs with duration types are faithfully being
+    # roundtripped across Parquet with arrow schema
+    pdf = pd.DataFrame({"struct": pd.Series(data)})
+
+    buffer = BytesIO()
+    pdf.to_parquet(buffer, engine="pyarrow")
+
+    # Read parquet with arrow schema
+    got = cudf.read_parquet(buffer)
+    # Convert to cudf table for an apple to apple comparison
+    expected = cudf.from_pandas(pdf)
+
+    # Check results
+    assert_eq(expected, got)
+
+    # Reset buffer
+    buffer = BytesIO()
+
+    # Write to buffer with cudf
+    expected.to_parquet(buffer, store_schema=True)
+
+    # Read parquet with arrow schema
+    got = cudf.read_parquet(buffer)
+    # Convert to cudf table for an apple to apple comparison
+    expected = cudf.from_pandas(pdf)
+
+    # Check results
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("index", [None, True, False])
+@pytest.mark.skipif(
+    version.parse(pa.__version__) < version.parse("15.0.0"),
+    reason="https://github.com/apache/arrow/pull/37792",
+)
+def test_parquet_writer_roundtrip_with_arrow_schema(index):
+    # Ensure that the concrete and nested types are faithfully being roundtripped
+    # across Parquet with arrow schema
+    expected = cudf.DataFrame(
+        {
+            "s": cudf.Series([None, None, None], dtype="timedelta64[s]"),
+            "us": cudf.Series([None, 3456, None], dtype="timedelta64[us]"),
+            "duration_list": list(
+                [
+                    [
+                        datetime.timedelta(minutes=7, seconds=4),
+                        datetime.timedelta(minutes=7),
+                    ],
+                    [
+                        None,
+                        None,
+                    ],
+                    [
+                        datetime.timedelta(minutes=7, seconds=4),
+                        None,
+                    ],
+                ]
+            ),
+            "int64": cudf.Series([-1234, 123, 4123], dtype="int64"),
+            "uint32": cudf.Series([1234, 123, 4123], dtype="uint32"),
+            "list": list([[1, 2], [1, 2], [1, 2]]),
+            "bool": cudf.Series([True, None, False], dtype=bool),
+            "fixed32": cudf.Series([0.00, 1.0, None]).astype(
+                cudf.Decimal32Dtype(7, 2)
+            ),
+            "fixed64": cudf.Series([0.00, 1.0, None]).astype(
+                cudf.Decimal64Dtype(7, 2)
+            ),
+            "fixed128": cudf.Series([0.00, 1.0, None]).astype(
+                cudf.Decimal128Dtype(7, 2)
+            ),
+            "datetime": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
+            "map": cudf.Series(["cat", "dog", "lion"]).map(
+                {"cat": "kitten", "dog": "puppy", "lion": "cub"}
+            ),
+        }
+    )
+
+    # Convert decimals32/64 to decimal128 if pyarrow version is < 19.0.0
+    if version.parse(pa.__version__) < version.parse("19.0.0"):
+        expected = expected.astype({"fixed32": cudf.Decimal128Dtype(9, 2)})
+        expected = expected.astype({"fixed64": cudf.Decimal128Dtype(18, 2)})
+
+    # Write to Parquet with arrow schema for faithful roundtrip
+    buffer = BytesIO()
+    expected.to_parquet(buffer, store_schema=True, index=index)
+
+    # Read parquet with pyarrow, pandas and cudf readers
+    got = cudf.DataFrame.from_arrow(pq.read_table(buffer))
+    got2 = cudf.DataFrame.from_pandas(pd.read_parquet(buffer))
+    got3 = cudf.read_parquet(buffer)
+
+    # drop the index column for comparison: __index_level_0__
+    if index:
+        got.drop(columns="__index_level_0__", inplace=True)
+        got2.drop(columns="__index_level_0__", inplace=True)
+
+    # Check results
+    assert_eq(expected, got)
+    assert_eq(expected, got2)
+    assert_eq(expected, got3)
+
+
+def test_parquet_writer_int96_timestamps_and_arrow_schema():
+    df = cudf.DataFrame(
+        {
+            "timestamp": cudf.Series(
+                [1234, 123, 4123], dtype="datetime64[ms]"
+            ),
+        }
+    )
+
+    # Output buffer
+    buffer = BytesIO()
+
+    # Writing out parquet with both INT96 timestamps and arrow_schema
+    # enabled should throw an exception.
+    with pytest.raises(RuntimeError):
+        df.to_parquet(buffer, int96_timestamps=True, store_schema=True)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # struct
+        [
+            {"a": 1, "b": 2},
+            {"a": 10, "b": 20},
+            {"a": None, "b": 22},
+            {"a": None, "b": None},
+            {"a": 15, "b": None},
+        ],
+        # struct-of-list
+        [
+            {"a": 1, "b": 2, "c": [1, 2, 3]},
+            {"a": 10, "b": 20, "c": [4, 5]},
+            {"a": None, "b": 22, "c": [6]},
+            {"a": None, "b": None, "c": None},
+            {"a": 15, "b": None, "c": [-1, -2]},
+            None,
+            {"a": 100, "b": 200, "c": [-10, None, -20]},
+        ],
+        # list-of-struct
+        [
+            [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
+            None,
+            [{"a": 10, "b": 20}],
+            [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
+        ],
+        # struct-of-struct
+        [
+            {"a": 1, "b": {"inner_a": 10, "inner_b": 20}, "c": 2},
+            {"a": 3, "b": {"inner_a": 30, "inner_b": 40}, "c": 4},
+            {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
+            {"a": 7, "b": None, "c": 8},
+            {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
+            None,
+            {"a": None, "b": {"inner_a": None, "inner_b": 100}, "c": 10},
+        ],
+        # struct-with-mixed-types
+        [
+            {
+                "struct": {
+                    "payload": {
+                        "Domain": {
+                            "Name": "abc",
+                            "Id": {"Name": "host", "Value": "127.0.0.8"},
+                            "Duration": datetime.timedelta(minutes=12),
+                        },
+                        "StreamId": "12345678",
+                        "Duration": datetime.timedelta(minutes=4),
+                        "Offset": None,
+                        "Resource": [
+                            {
+                                "Name": "ZoneName",
+                                "Value": "RAPIDS",
+                                "Duration": datetime.timedelta(seconds=1),
+                            }
+                        ],
+                    }
+                }
+            }
+        ],
+    ],
+)
+@pytest.mark.parametrize("index", [None, True, False])
+@pytest.mark.skipif(
+    version.parse(pa.__version__) < version.parse("15.0.0"),
+    reason="https://github.com/apache/arrow/pull/37792",
+)
+def test_parquet_writer_roundtrip_structs_with_arrow_schema(data, index):
+    # Ensure that the structs are faithfully being roundtripped across
+    # Parquet with arrow schema
+    pa_expected = pa.Table.from_pydict({"struct": data})
+
+    expected = cudf.DataFrame.from_arrow(pa_expected)
+
+    # Write expected data frame to Parquet with arrow schema
+    buffer = BytesIO()
+    expected.to_parquet(buffer, store_schema=True, index=index)
+
+    # Read Parquet with pyarrow
+    pa_got = pq.read_table(buffer)
+
+    # drop the index column for comparison: __index_level_0__
+    if index:
+        pa_got = pa_got.drop(columns="__index_level_0__")
+
+    # Check results
+    assert_eq(pa_expected, pa_got)
+
+    # Convert to cuDF table and also read Parquet with cuDF reader
+    got = cudf.DataFrame.from_arrow(pa_got)
+    got2 = cudf.read_parquet(buffer)
+
+    # Check results
+    assert_eq(expected, got)
+    assert_eq(expected, got2)
+
+
+@pytest.mark.parametrize("chunk_read_limit", [0, 240, 1024000000])
+@pytest.mark.parametrize("pass_read_limit", [0, 240, 1024000000])
+@pytest.mark.parametrize("use_pandas_metadata", [True, False])
+@pytest.mark.parametrize("row_groups", [[[0]], None, [[0, 1]]])
+def test_parquet_chunked_reader(
+    chunk_read_limit, pass_read_limit, use_pandas_metadata, row_groups
+):
+    df = pd.DataFrame(
+        {"a": [1, 2, 3, None] * 1000, "b": ["av", "qw", None, "xyz"] * 1000}
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer, row_group_size=1000)
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            use_pandas_metadata=use_pandas_metadata,
+            row_groups=row_groups,
+        )
+    expected = cudf.read_parquet(
+        buffer, use_pandas_metadata=use_pandas_metadata, row_groups=row_groups
+    )
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("chunk_read_limit", [256, 2560])
+@pytest.mark.parametrize("pass_read_limit", [256, 2560])
+@pytest.mark.parametrize("num_rows", [49, 291])
+@pytest.mark.parametrize("skip_rows", [412, 601])
+@pytest.mark.parametrize("data_size", [100, 200])
+def test_parquet_chunked_reader_structs(
+    chunk_read_limit, pass_read_limit, num_rows, skip_rows, data_size
+):
+    data = [
+        {
+            "a": "g",
+            "b": {
+                "b_a": 10,
+                "b_b": {"b_b_b": None, "b_b_a": 2},
+            },
+            "c": None,
+        },
+        {"a": None, "b": {"b_a": None, "b_b": None}, "c": [15, 16]},
+        {"a": "j", "b": None, "c": [8, 10]},
+        {"a": None, "b": {"b_a": None, "b_b": None}, "c": None},
+        None,
+        {
+            "a": None,
+            "b": {"b_a": None, "b_b": {"b_b_b": 1}},
+            "c": [18, 19],
+        },
+        {"a": None, "b": None, "c": None},
+    ] * data_size
+
+    pa_struct = pa.Table.from_pydict({"struct": data})
+    df = cudf.DataFrame.from_arrow(pa_struct)
+    buffer = BytesIO()
+    df.to_parquet(buffer, row_group_size_rows=7000, max_page_size_rows=100)
+
+    # Number of rows to read
+    nrows = num_rows if skip_rows + num_rows < len(df) else len(df) - skip_rows
+
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+            skip_rows=skip_rows,
+        ).reset_index(drop=True)
+    expected = cudf.read_parquet(
+        buffer, nrows=nrows, skip_rows=skip_rows
+    ).reset_index(drop=True)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("chunk_read_limit", [0, 24, 10240000])
+@pytest.mark.parametrize("pass_read_limit", [0, 24, 10240000])
+@pytest.mark.parametrize("num_rows", [47, 97, None])
+@pytest.mark.parametrize(
+    "str_encoding",
+    [
+        "PLAIN",
+        "DELTA_BYTE_ARRAY",
+        "DELTA_LENGTH_BYTE_ARRAY",
+    ],
+)
+def test_parquet_chunked_reader_string_decoders(
+    chunk_read_limit,
+    pass_read_limit,
+    num_rows,
+    str_encoding,
+):
+    df = pd.DataFrame(
+        {
+            "i64": [1, 2, 3, None] * 100,
+            "str": ["av", "qw", "asd", "xyz"] * 100,
+            "list": list(
+                [["ad", "cd"], ["asd", "fd"], None, ["asd", None]] * 100
+            ),
+        }
+    )
+    buffer = BytesIO()
+    # Write 4 Parquet row groups with string column encoded
+    df.to_parquet(
+        buffer,
+        row_group_size=100,
+        use_dictionary=False,
+        column_encoding={"str": str_encoding},
+    )
+
+    # Number of rows to read
+    nrows = num_rows if num_rows is not None else len(df)
+
+    # Check with num_rows specified
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+        )
+    expected = cudf.read_parquet(
+        buffer,
+        nrows=nrows,
+    )
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "nrows, skip_rows",
+    [
+        (0, 0),
+        (99, 101),
+        (988, 61),
+        (99, 1011),
+        (101, 1601),
+        (99, 1901),
+    ],
+)
+@pytest.mark.parametrize(
+    "row_group_size_rows, page_size_rows",
+    [
+        (1000, 1000),  # 1 RG, 1 page per RG
+        (1000, 100),  # 1 RG, multiple pages per RG
+        (100, 100),  # multiple RGs, 1 page per RG
+        (100, 10),  # multiple RGs, multiple pages per RG
+    ],
+)
+@pytest.mark.parametrize(
+    "chunk_read_limit, pass_read_limit",
+    [
+        (256, 256),  # small chunk and pass read limits
+        (0, 1024),  # zero chunk and small pass read limit
+        (256, 0),  # small chunk and zero pass read limit
+        (256000, 256000),  # large chunk and pass read limits
+    ],
+)
+def test_chunked_parquet_reader_nrows_skiprows(
+    nrows,
+    skip_rows,
+    row_group_size_rows,
+    page_size_rows,
+    chunk_read_limit,
+    pass_read_limit,
+):
+    df = cudf.DataFrame(
+        {
+            "a": list(
+                [
+                    ["cat", "lion", "deer"],
+                    ["bear", "ibex", None],
+                    ["tiger", None, "bull"],
+                    [None, "wolf", "fox"],
+                ]
+            )
+            * 500,
+            "b": ["av", "qw", None, "xyz"] * 500,
+        }
+    )
+    expected = df[skip_rows : skip_rows + nrows]
+    buffer = BytesIO()
+    df.to_parquet(
+        buffer,
+        row_group_size_rows=row_group_size_rows,
+        max_page_size_rows=page_size_rows,
+    )
+    got = cudf.read_parquet(buffer, nrows=nrows, skip_rows=skip_rows)
+    assert_eq(expected, got)
+
+    # Check for chunked parquet reader
+    with cudf.option_context("io.parquet.low_memory", True):
+        got = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+            skip_rows=skip_rows,
+        ).reset_index(drop=True)
+        # Reset index for comparison
+        expected = expected.reset_index(drop=True)
+        assert_eq(expected, got)
+
+
+def test_parquet_reader_pandas_compatibility():
+    df = pd.DataFrame(
+        {"a": [1, 2, 3, 4] * 10000, "b": ["av", "qw", "hi", "xyz"] * 10000}
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+    with cudf.option_context("io.parquet.low_memory", True):
+        expected = cudf.read_parquet(buffer)
+    assert_eq(expected, df)
+
+
+@pytest.mark.parametrize("store_schema", [True, False])
+def test_parquet_reader_with_mismatched_tables(store_schema):
+    # cuDF tables with mixed types
+    df1 = cudf.DataFrame(
+        {
+            "i32": cudf.Series([None, None, None], dtype="int32"),
+            "i64": cudf.Series([1234, 467, 123], dtype="int64"),
+            "list": list([[1, 2], None, [None, 6]]),
+            "time": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
+            "str": ["vfd", None, "ghu"],
+            "d_list": list(
+                [
+                    [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                    [None, pd.Timedelta(minutes=3)],
+                    [pd.Timedelta(minutes=8), None],
+                ]
+            ),
+        }
+    )
+
+    df2 = cudf.DataFrame(
+        {
+            "str": ["abc", "def", "ghi"],
+            "i64": cudf.Series([None, 65, 98], dtype="int64"),
+            "times": cudf.Series([1234, None, 4123], dtype="datetime64[us]"),
+            "list": list([[7, 8], [9, 10], [11, 12]]),
+            "d_list": list(
+                [
+                    [pd.Timedelta(minutes=4), None],
+                    None,
+                    [pd.Timedelta(minutes=6), None],
+                ]
+            ),
+        }
+    )
+
+    # IO buffers
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    # Write Parquet with and without arrow schema
+    df1.to_parquet(buf1, store_schema=store_schema)
+    df2.to_parquet(buf2, store_schema=store_schema)
+
+    # Read mismatched Parquet files
+    got = cudf.read_parquet(
+        [buf1, buf2],
+        columns=["list", "d_list", "str"],
+        filters=[("i64", ">", 20)],
+        allow_mismatched_pq_schemas=True,
+    )
+
+    # Construct the expected table
+    expected = cudf.concat(
+        [
+            df1[df1["i64"] > 20][["list", "d_list", "str"]],
+            df2[df2["i64"] > 20][["list", "d_list", "str"]],
+        ]
+    ).reset_index(drop=True)
+
+    # Read with chunked reader (filter columns not supported)
+    with cudf.option_context("io.parquet.low_memory", True):
+        got_chunked = cudf.read_parquet(
+            [buf1, buf2],
+            columns=["list", "d_list", "str"],
+            _chunk_read_limit=240,
+            _pass_read_limit=240,
+            allow_mismatched_pq_schemas=True,
+        )
+
+    # Construct the expected table without filter columns
+    expected_chunked = cudf.concat(
+        [df1[["list", "d_list", "str"]], df2[["list", "d_list", "str"]]]
+    ).reset_index(drop=True)
+
+    # Check results
+    assert_eq(expected, got)
+    assert_eq(expected_chunked, got_chunked)
+
+
+def test_parquet_reader_with_mismatched_structs():
+    data1 = [
+        {
+            "a": 1,
+            "b": {
+                "a_a": 10,
+                "b_b": {"b_b_b": 1, "b_b_a": 2},
+            },
+            "c": 2,
+        },
+        {
+            "a": 3,
+            "b": {"b_a": 30, "b_b": {"b_b_a": 210}},
+            "c": 4,
+        },
+        {"a": 5, "b": {"b_a": 50, "b_b": None}, "c": 6},
+        {"a": 7, "b": None, "c": 8},
+        {"a": 5, "b": {"b_a": None, "b_b": None}, "c": None},
+    ]
+
+    data2 = [
+        {"a": 1, "b": {"b_b": {"b_b_a": None}}},
+        {"a": 5, "b": {"b_b": None}},
+        {"a": 7, "b": {"b_b": {"b_b_b": 1, "b_b_a": 0}}},
+        {"a": None, "b": {"b_b": None}},
+        None,
+    ]
+
+    # cuDF tables from struct data
+    df1 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data1}))
+    df2 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data2}))
+
+    # Buffers
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    # Write to parquet
+    df1.to_parquet(buf1)
+    df2.to_parquet(buf2)
+
+    # Read the struct.b.inner_b.inner_inner_a column from parquet
+    got = cudf.read_parquet(
+        [buf1, buf2],
+        columns=["struct.b.b_b.b_b_a"],
+        allow_mismatched_pq_schemas=True,
+    )
+    got = (
+        cudf.Series(got["struct"])
+        .struct.field("b")
+        .struct.field("b_b")
+        .struct.field("b_b_a")
+    )
+
+    # Read with chunked reader
+    with cudf.option_context("io.parquet.low_memory", True):
+        got_chunked = cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b.b_b.b_b_a"],
+            _chunk_read_limit=240,
+            _pass_read_limit=240,
+            allow_mismatched_pq_schemas=True,
+        )
+    got_chunked = (
+        cudf.Series(got_chunked["struct"])
+        .struct.field("b")
+        .struct.field("b_b")
+        .struct.field("b_b_a")
+    )
+
+    # Construct the expected series
+    expected = cudf.concat(
+        [
+            cudf.Series(df1["struct"])
+            .struct.field("b")
+            .struct.field("b_b")
+            .struct.field("b_b_a"),
+            cudf.Series(df2["struct"])
+            .struct.field("b")
+            .struct.field("b_b")
+            .struct.field("b_b_a"),
+        ]
+    ).reset_index(drop=True)
+
+    # Check results
+    assert_eq(expected, got)
+    assert_eq(expected, got_chunked)
+
+
+def test_parquet_reader_with_mismatched_schemas_error():
+    df1 = cudf.DataFrame(
+        {
+            "millis": cudf.Series([123, 3454, 123], dtype="timedelta64[ms]"),
+            "i64": cudf.Series([123, 3454, 123], dtype="int64"),
+            "i32": cudf.Series([123, 3454, 123], dtype="int32"),
+        }
+    )
+    df2 = cudf.DataFrame(
+        {
+            "i64": cudf.Series([123, 3454, 123], dtype="int64"),
+            "millis": cudf.Series([123, 3454, 123], dtype="timedelta64[ms]"),
+        }
+    )
+
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    df1.to_parquet(buf1, store_schema=True)
+    df2.to_parquet(buf2, store_schema=False)
+
+    with pytest.raises(
+        ValueError,
+        match="Encountered mismatching SchemaElement properties for a column in the selected path",
+    ):
+        cudf.read_parquet(
+            [buf1, buf2], columns=["millis"], allow_mismatched_pq_schemas=True
+        )
+
+    data1 = [
+        {"a": 1, "b": {"b_a": 1, "b_b": 6}},
+        {"a": 3, "b": {"b_a": None, "b_b": 2}},
+    ]
+    data2 = [
+        {"b": {"b_a": 1}, "c": "str"},
+        {"b": {"b_a": None}, "c": None},
+    ]
+
+    # cuDF tables from struct data
+    df1 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data1}))
+    df2 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data2}))
+
+    # Buffers
+    buf1 = BytesIO()
+    buf2 = BytesIO()
+
+    # Write to parquet
+    df1.to_parquet(buf1)
+    df2.to_parquet(buf2)
+
+    with pytest.raises(
+        IndexError,
+        match="Encountered mismatching number of children for a column in the selected path",
+    ):
+        cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b"],
+            allow_mismatched_pq_schemas=True,
+        )
+
+    with pytest.raises(
+        IndexError,
+        match="Encountered mismatching schema tree depths across data sources",
+    ):
+        cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b.b_b"],
+            allow_mismatched_pq_schemas=True,
+        )
+
+
+def test_parquet_roundtrip_zero_rows_no_column_mask():
+    expected = cudf.DataFrame._from_data(
+        {
+            "int": cudf.core.column.column_empty(0, np.dtype(np.int64)),
+            "float": cudf.core.column.column_empty(0, np.dtype(np.float64)),
+            "datetime": cudf.core.column.column_empty(
+                0, np.dtype("datetime64[ns]")
+            ),
+            "timedelta": cudf.core.column.column_empty(
+                0, np.dtype("timedelta64[ns]")
+            ),
+            "bool": cudf.core.column.column_empty(0, np.dtype(np.bool_)),
+            "decimal": cudf.core.column.column_empty(
+                0, cudf.Decimal64Dtype(1)
+            ),
+            "struct": cudf.core.column.column_empty(
+                0, cudf.StructDtype({"a": "int64"})
+            ),
+            "list": cudf.core.column.column_empty(
+                0, cudf.ListDtype("float64")
+            ),
+        }
+    )
+    with BytesIO() as bio:
+        expected.to_parquet(bio)
+        result = cudf.read_parquet(bio)
+    assert_eq(result, expected)
+
+
+def test_parquet_reader_mismatched_nullability():
+    # Ensure that we can faithfully read the tables with mismatched nullabilities
+    df1 = cudf.DataFrame(
+        {
+            "timedelta": cudf.Series([12, 54, 1231], dtype="timedelta64[ms]"),
+            "duration_list": list(
+                [
+                    [
+                        [
+                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                            None,
+                            [pd.Timedelta(minutes=8), None],
+                        ],
+                        None,
+                    ],
+                    None,
+                    [
+                        [
+                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                            [pd.Timedelta(minutes=5), pd.Timedelta(minutes=3)],
+                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=4)],
+                        ]
+                    ],
+                ]
+            ),
+            "int64": cudf.Series([1234, None, 4123], dtype="int64"),
+            "int32": cudf.Series([1234, 123, 4123], dtype="int32"),
+            "list": list([[1, 2], [1, 2], [1, 2]]),
+            "datetime": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
+            "string": cudf.Series(["kitten", "puppy", "cub"]),
+        }
+    )
+
+    df2 = cudf.DataFrame(
+        {
+            "timedelta": cudf.Series(
+                [None, None, None], dtype="timedelta64[ms]"
+            ),
+            "duration_list": list(
+                [
+                    [
+                        [
+                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=1)],
+                        ],
+                    ],
+                    [
+                        [
+                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                            [pd.Timedelta(minutes=5), pd.Timedelta(minutes=3)],
+                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=4)],
+                        ]
+                    ],
+                    [
+                        [
+                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
+                            [pd.Timedelta(minutes=5), pd.Timedelta(minutes=3)],
+                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=4)],
+                        ]
+                    ],
+                ]
+            ),
+            "int64": cudf.Series([1234, 123, 4123], dtype="int64"),
+            "int32": cudf.Series([1234, None, 4123], dtype="int32"),
+            "list": list([[1, 2], None, [1, 2]]),
+            "datetime": cudf.Series(
+                [1234, None, 4123], dtype="datetime64[ms]"
+            ),
+            "string": cudf.Series(["kitten", None, "cub"]),
+        }
+    )
+
+    # Write tables to parquet with arrow schema for compatibility for duration column(s)
+    fname1 = BytesIO()
+    df1.to_parquet(fname1, store_schema=True)
+    fname2 = BytesIO()
+    df2.to_parquet(fname2, store_schema=True)
+
+    # Read tables back with cudf and arrow in either order and compare
+    assert_eq(
+        cudf.read_parquet([fname1, fname2]),
+        cudf.concat([df1, df2]).reset_index(drop=True),
+    )
+    assert_eq(
+        cudf.read_parquet([fname2, fname1]),
+        cudf.concat([df2, df1]).reset_index(drop=True),
+    )
+
+
+def test_parquet_reader_mismatched_nullability_structs(tmp_path):
+    data1 = [
+        {
+            "a": "a",
+            "b": {
+                "b_a": 10,
+                "b_b": {"b_b_b": 1, "b_b_a": 12},
+            },
+            "c": [1, 2],
+        },
+        {
+            "a": "b",
+            "b": {
+                "b_a": 30,
+                "b_b": {"b_b_b": 2, "b_b_a": 2},
+            },
+            "c": [3, 4],
+        },
+        {
+            "a": "c",
+            "b": {
+                "b_a": 50,
+                "b_b": {"b_b_b": 4, "b_b_a": 5},
+            },
+            "c": [5, 6],
+        },
+        {
+            "a": "d",
+            "b": {
+                "b_a": 135,
+                "b_b": {"b_b_b": 12, "b_b_a": 32},
+            },
+            "c": [7, 8],
+        },
+        {
+            "a": "e",
+            "b": {
+                "b_a": 1,
+                "b_b": {"b_b_b": 1, "b_b_a": 5},
+            },
+            "c": [9, 10],
+        },
+        {
+            "a": "f",
+            "b": {
+                "b_a": 32,
+                "b_b": {"b_b_b": 1, "b_b_a": 6},
+            },
+            "c": [11, 12],
+        },
+    ]
+
+    data2 = [
+        {
+            "a": "g",
+            "b": {
+                "b_a": 10,
+                "b_b": {"b_b_b": None, "b_b_a": 2},
+            },
+            "c": None,
+        },
+        {"a": None, "b": {"b_a": None, "b_b": None}, "c": [15, 16]},
+        {"a": "j", "b": None, "c": [8, 10]},
+        {"a": None, "b": {"b_a": None, "b_b": None}, "c": None},
+        None,
+        {
+            "a": None,
+            "b": {"b_a": None, "b_b": {"b_b_b": 1}},
+            "c": [18, 19],
+        },
+        {"a": None, "b": None, "c": None},
+    ]
+
+    pa_table1 = pa.Table.from_pydict({"struct": data1})
+    df1 = cudf.DataFrame.from_arrow(pa_table1)
+
+    pa_table2 = pa.Table.from_pydict({"struct": data2})
+    df2 = cudf.DataFrame.from_arrow(pa_table2)
+
+    # Write tables to parquet
+    buf1 = BytesIO()
+    df1.to_parquet(buf1)
+    buf2 = BytesIO()
+    df2.to_parquet(buf2)
+
+    # Read tables back with cudf and compare with expected.
+    assert_eq(
+        cudf.read_parquet([buf1, buf2]),
+        cudf.concat([df1, df2]).reset_index(drop=True),
+    )
+    assert_eq(
+        cudf.read_parquet([buf2, buf1]),
+        cudf.concat([df2, df1]).reset_index(drop=True),
+    )
+
+
+@pytest.mark.skipif(
+    pa.__version__ == "19.0.0",
+    reason="https://github.com/rapidsai/cudf/issues/17806",
+)
+@pytest.mark.parametrize(
+    "stats_fname,bloom_filter_fname",
+    [
+        (
+            "mixed_card_ndv_100_chunk_stats.snappy.parquet",
+            "mixed_card_ndv_100_bf_fpp0.1_nostats.snappy.parquet",
+        ),
+        (
+            "mixed_card_ndv_500_chunk_stats.snappy.parquet",
+            "mixed_card_ndv_500_bf_fpp0.1_nostats.snappy.parquet",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "predicate,expected_len",
+    [
+        ([[("str", "==", "FINDME")], [("fp64", "==", float(500))]], 2),
+        ([("fixed_pt", "==", decimal.Decimal(float(500)))], 2),
+        ([[("ui32", "==", np.uint32(500)), ("str", "==", "FINDME")]], 2),
+        ([[("str", "==", "FINDME")], [("ui32", ">=", np.uint32(0))]], 1000),
+        (
+            [
+                ("str", "!=", "FINDME"),
+                ("fixed_pt", "==", decimal.Decimal(float(500))),
+            ],
+            0,
+        ),
+    ],
+)
+def test_parquet_bloom_filters(
+    datadir, stats_fname, bloom_filter_fname, predicate, expected_len
+):
+    fname_stats = datadir / stats_fname
+    fname_bf = datadir / bloom_filter_fname
+    df_stats = cudf.read_parquet(fname_stats, filters=predicate).reset_index(
+        drop=True
+    )
+    df_bf = cudf.read_parquet(fname_bf, filters=predicate).reset_index(
+        drop=True
+    )
+
+    # Check if tables equal
+    assert_eq(
+        df_stats,
+        df_bf,
+    )
+
+    # Check for table length
+    assert_eq(
+        len(df_stats),
+        expected_len,
+    )
+
+
+@pytest.fixture(params=["cuda", "pool", "cuda_async"])
+def memory_resource(request):
+    import rmm
+
+    current_mr = rmm.mr.get_current_device_resource()
+
+    kind = request.param
+    if kind == "cuda":
+        mr = rmm.mr.CudaMemoryResource()
+    elif kind == "pool":
+        base = rmm.mr.CudaMemoryResource()
+        free, _ = rmm.mr.available_device_memory()
+        size = int(round(free * 0.5 / 256) * 256)
+        mr = rmm.mr.PoolMemoryResource(base, size, size)
+    elif kind == "cuda_async":
+        mr = rmm.mr.CudaAsyncMemoryResource()
+
+    rmm.mr.set_current_device_resource(mr)
+
+    try:
+        yield mr
+    finally:
+        rmm.mr.set_current_device_resource(current_mr)
+
+
+@pytest.mark.parametrize("columns", [["r_reason_desc"], None])
+def test_parquet_bloom_filters_alignment(datadir, columns, memory_resource):
+    fname = datadir / "bloom_filter_alignment.parquet"
+    filters = [("r_reason_desc", "==", "Did not like the color")]
+
+    # Read expected table using pyarrow
+    expected = pq.read_table(fname, columns=columns, filters=filters)
+
+    # Read with cudf using the memory resource from fixture
+    read = cudf.read_parquet(
+        fname, columns=columns, filters=filters
+    ).to_arrow()
+
+    assert_eq(expected, read)
+
+
+def test_parquet_reader_unsupported_compression(datadir):
+    fname = datadir / "hadoop_lz4_compressed.parquet"
+
+    with pytest.raises(
+        RuntimeError,
+        match="Unsupported Parquet compression type: LZ4",
+    ):
+        cudf.read_parquet(fname)
+
+
+def test_parquet_reader_empty_compressed_page(datadir):
+    fname = datadir / "empty_datapage_v2.parquet"
+
+    df = cudf.DataFrame({"value": cudf.Series([None], dtype="float32")})
+    assert_eq(cudf.read_parquet(fname), df)
+
+
+@pytest.mark.parametrize("compression", ["brotli", "gzip", "snappy", "zstd"])
+def test_parquet_decompression(
+    set_decomp_env_vars, pdf_day_timestamps, compression
+):
+    # PANDAS returns category objects whereas cuDF returns hashes
+    expect = pdf_day_timestamps.drop(columns=["col_category"])
+
+    # Write the DataFrame to a Parquet file
+    buffer = BytesIO()
+    expect.to_parquet(buffer, compression=compression)
+
+    # Read the Parquet file back into a DataFrame
+    got = cudf.read_parquet(buffer)
+
+    assert_eq(expect, got)
 
 
 def test_parquet_long_list(tmp_path):
@@ -63,7 +4621,7 @@ def test_parquet_long_list(tmp_path):
 
 @pytest.mark.parametrize(
     "index",
-    [range(1, 11), list(range(1, 11)), range(1, 11)[::2]],
+    [range(1, 11), list(range(1, 11)), range(1, 11, 2)],
     ids=["RangeIndex", "IntIndex", "StridedRange"],
 )
 @pytest.mark.parametrize("write_index", [False, True, None])
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/input_output/test_s3.py
similarity index 100%
rename from python/cudf/cudf/tests/test_s3.py
rename to python/cudf/cudf/tests/input_output/test_s3.py
diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py
deleted file mode 100644
index b9b0161e6c0..00000000000
--- a/python/cudf/cudf/tests/test_gcs.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import io
-import os
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-gcsfs = pytest.importorskip("gcsfs")
-
-TEST_BUCKET = "cudf-gcs-test-bucket"
-
-
-@pytest.fixture
-def pdf(scope="module"):
-    df = pd.DataFrame()
-    df["Integer"] = np.array([2345, 11987, 9027, 9027])
-    df["Float"] = np.array([9.001, 8.343, 6, 2.781])
-    df["Integer2"] = np.array([2345, 106, 2088, 789277])
-    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
-    df["Boolean"] = np.array([True, False, True, False])
-    return df
-
-
-def test_read_csv(pdf, monkeypatch):
-    # Write to buffer
-    fpath = TEST_BUCKET + "test_csv_reader.csv"
-    buffer = pdf.to_csv(index=False)
-
-    def mock_open(*args, **kwargs):
-        return io.BytesIO(buffer.encode())
-
-    def mock_size(*args):
-        return len(buffer.encode())
-
-    monkeypatch.setattr(gcsfs.core.GCSFileSystem, "open", mock_open)
-    monkeypatch.setattr(gcsfs.core.GCSFileSystem, "size", mock_size)
-
-    # Test read from explicit path.
-    with pytest.warns(FutureWarning):
-        got = cudf.read_csv(f"gcs://{fpath}")
-    assert_eq(pdf, got)
-
-    # AbstractBufferedFile -> PythonFile conversion
-    # will work fine with the monkey-patched FS if we
-    # pass in an fsspec file object
-    fs = gcsfs.core.GCSFileSystem()
-    with fs.open(f"gcs://{fpath}") as f:
-        got = cudf.read_csv(f)
-    assert_eq(pdf, got)
-
-
-def test_write_orc(pdf, monkeypatch, tmpdir):
-    gcs_fname = TEST_BUCKET + "test_orc_writer.orc"
-    local_filepath = os.path.join(tmpdir, "test_orc.orc")
-    gdf = cudf.from_pandas(pdf)
-
-    def mock_open(*args, **kwargs):
-        return open(local_filepath, "wb")
-
-    monkeypatch.setattr(gcsfs.core.GCSFileSystem, "open", mock_open)
-    gdf.to_orc(f"gcs://{gcs_fname}")
-
-    got = pd.read_orc(local_filepath)
-    assert_eq(pdf, got)
diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py
deleted file mode 100644
index 4921b7b51fc..00000000000
--- a/python/cudf/cudf/tests/test_hdf.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-import os
-from string import ascii_letters
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES
-
-pytest.importorskip("tables")
-
-
-@pytest.fixture(params=[0, 1, 10, 100])
-def pdf(request):
-    types = set([*NUMERIC_TYPES, "datetime64[ns]", "bool"]) - set(
-        UNSIGNED_TYPES
-    )
-    typer = {"col_" + val: val for val in types}
-    ncols = len(types)
-    nrows = request.param
-
-    rng = np.random.default_rng(1)
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = pd.DataFrame(
-        rng.integers(0, 50, size=(nrows, ncols)),
-        columns=pd.Index([f"col_{typ}" for typ in types]),
-        index=pd.RangeIndex(nrows, name="test_index"),
-    )
-    # Cast all the column dtypes to objects, rename them, and then cast to
-    # appropriate types
-    test_pdf = test_pdf.astype(typer).rename(
-        {"col_datetime64[ns]": "col_datetime64"}, axis=1
-    )
-
-    # Create non-numeric categorical data otherwise may be typecasted
-    data = rng.choice(list(ascii_letters), size=nrows)
-    test_pdf["col_category"] = pd.Series(data, dtype="category")
-
-    return (test_pdf, nrows)
-
-
-@pytest.fixture
-def gdf(pdf):
-    pdf, nrows = pdf
-    return (cudf.DataFrame.from_pandas(pdf), nrows)
-
-
-@pytest.fixture(params=["fixed", "table"])
-def hdf_files(request, tmp_path_factory, pdf):
-    pdf, nrows = pdf
-    if request.param == "fixed":
-        pdf = pdf.drop("col_category", axis=1)
-
-    fname_df = tmp_path_factory.mktemp("hdf") / "test_df.hdf"
-    pdf.to_hdf(fname_df, key="hdf_df_tests", format=request.param)
-
-    fname_series = {}
-    for column in pdf.columns:
-        fname_series[column] = (
-            tmp_path_factory.mktemp("hdf") / "test_series.hdf"
-        )
-        pdf[column].to_hdf(
-            fname_series[column], key="hdf_series_tests", format=request.param
-        )
-    return (fname_df, fname_series, request.param, nrows)
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.filterwarnings("ignore:Strings are not yet supported")
-@pytest.mark.parametrize(
-    "columns",
-    [["col_int8"], ["col_category"], ["col_int32", "col_float32"], None],
-)
-def test_hdf_reader(hdf_files, columns):
-    hdf_df_file, hdf_series, format, nrows = hdf_files
-    if format == "fixed" and columns is not None:
-        pytest.skip("Can't use columns with format 'fixed'")
-    if format == "table" and nrows == 0:
-        pytest.skip("Can't read 0 row table with format 'table'")
-    expect_df = pd.read_hdf(hdf_df_file, columns=columns)
-    got_df = cudf.read_hdf(hdf_df_file, columns=columns)
-
-    assert_eq(
-        expect_df, got_df, check_categorical=False, check_index_type=False
-    )
-
-    for column in hdf_series.keys():
-        expect_series = pd.read_hdf(hdf_series[column])
-        got_series = cudf.read_hdf(hdf_series[column])
-
-        assert_eq(expect_series, got_series, check_index_type=False)
-
-
-@pytest.mark.parametrize("format", ["fixed", "table"])
-@pytest.mark.parametrize("complib", ["zlib", "bzip2", "lzo", "blosc"])
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_hdf_writer(tmpdir, pdf, gdf, complib, format):
-    pdf, nrows = pdf
-    if format == "table" and nrows == 0:
-        pytest.skip("Can't read 0 row table with format 'table'")
-    gdf, _ = gdf
-
-    if format == "fixed":
-        pdf = pdf.drop("col_category", axis=1)
-        gdf = gdf.drop("col_category", axis=1)
-
-    pdf_df_fname = tmpdir.join("pdf_df.hdf")
-    gdf_df_fname = tmpdir.join("gdf_df.hdf")
-
-    pdf.to_hdf(pdf_df_fname, key="hdf_tests", format=format, complib=complib)
-    gdf.to_hdf(gdf_df_fname, key="hdf_tests", format=format, complib=complib)
-
-    assert os.path.exists(pdf_df_fname)
-    assert os.path.exists(gdf_df_fname)
-
-    expect = pd.read_hdf(pdf_df_fname)
-    got = pd.read_hdf(gdf_df_fname)
-
-    assert_eq(expect, got, check_index_type=False)
-
-    for column in pdf.columns:
-        pdf_series_fname = tmpdir.join(column + "_" + "pdf_series.hdf")
-        gdf_series_fname = tmpdir.join(column + "_" + "gdf_series.hdf")
-
-        pdf[column].to_hdf(
-            pdf_series_fname, key="hdf_tests", format=format, complib=complib
-        )
-        gdf[column].to_hdf(
-            gdf_series_fname, key="hdf_tests", format=format, complib=complib
-        )
-
-        assert os.path.exists(pdf_series_fname)
-        assert os.path.exists(gdf_series_fname)
-
-        expect_series = pd.read_hdf(pdf_series_fname)
-        got_series = pd.read_hdf(gdf_series_fname)
-
-        assert_eq(expect_series, got_series, check_index_type=False)
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
deleted file mode 100644
index fd5dc4c914c..00000000000
--- a/python/cudf/cudf/tests/test_orc.py
+++ /dev/null
@@ -1,2054 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import datetime
-import decimal
-import os
-import random
-from io import BytesIO
-from string import ascii_letters, ascii_lowercase
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-from pyarrow import orc
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.io.orc import ORCWriter
-from cudf.testing import assert_eq, assert_frame_equal
-from cudf.testing._utils import (
-    expect_warning_if,
-    gen_rand_series,
-    supported_numpy_dtypes,
-)
-
-# Removal of these deprecated features is no longer imminent. They will not be
-# removed until a suitable alternative has been implemented. As a result, we
-# also do not want to stop testing them yet.
-# https://github.com/rapidsai/cudf/issues/11519
-pytestmark = pytest.mark.filterwarnings(
-    "ignore:(num_rows|skiprows) is deprecated and will be removed."
-)
-
-
-@pytest.fixture(scope="module")
-def datadir(datadir):
-    return datadir / "orc"
-
-
-@pytest.fixture
-def path_or_buf(datadir):
-    fname = datadir / "TestOrcFile.test1.orc"
-    try:
-        with open(fname, "rb") as f:
-            buffer = BytesIO(f.read())
-    except Exception as excpr:
-        if type(excpr).__name__ == "FileNotFoundError":
-            pytest.skip(".parquet file is not found")
-        raise excpr
-
-    def _make_path_or_buf(src):
-        if src == "filepath":
-            return str(fname)
-        if src == "pathobj":
-            return fname
-        if src == "bytes_io":
-            return buffer
-        if src == "bytes":
-            return buffer.getvalue()
-        if src == "url":
-            return fname.as_uri()
-
-        raise ValueError("Invalid source type")
-
-    yield _make_path_or_buf
-
-
-@pytest.fixture(scope="module")
-def non_nested_pdf():
-    rng = np.random.default_rng(seed=0)
-    types = [
-        "bool",
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-        "float32",
-        "float64",
-        "datetime64[ns]",
-        "str",
-    ]
-    nrows = 12345
-
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = pd.DataFrame(
-        {
-            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
-            for typ in types
-        },
-    )
-
-    for t in [
-        {
-            "name": "datetime64[ns]",
-            "nsDivisor": 1000,
-            "dayModulus": 86400000000,
-        },
-    ]:
-        data = [
-            rng.integers(0, (0x7FFFFFFFFFFFFFFF / t["nsDivisor"]))
-            for i in range(nrows)
-        ]
-
-        test_pdf["col_" + t["name"]] = pd.Series(
-            np.asarray(data, dtype=t["name"])
-        )
-
-    # Create non-numeric str data
-    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
-    test_pdf["col_str"] = pd.Series(data, dtype="str")
-
-    return test_pdf
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["pyarrow", "cudf"])
-@pytest.mark.parametrize("use_index", [False, True])
-@pytest.mark.parametrize(
-    "inputfile, columns",
-    [
-        ("TestOrcFile.emptyFile.orc", ["boolean1"]),
-        (
-            "TestOrcFile.test1.orc",
-            [
-                "boolean1",
-                "byte1",
-                "short1",
-                "int1",
-                "long1",
-                "float1",
-                "double1",
-            ],
-        ),
-        ("TestOrcFile.RLEv2.orc", ["x", "y"]),
-        ("TestOrcFile.testSnappy.orc", None),
-        ("TestOrcFile.demo-12-zlib.orc", ["_col2", "_col3", "_col4", "_col5"]),
-    ],
-)
-def test_orc_reader_basic(datadir, inputfile, columns, use_index, engine):
-    path = datadir / inputfile
-
-    expect = pd.read_orc(path, columns=columns)
-    got = cudf.read_orc(
-        path, engine=engine, columns=columns, use_index=use_index
-    )
-
-    assert_frame_equal(cudf.from_pandas(expect), got, check_categorical=False)
-
-
-def test_orc_reader_filenotfound(tmpdir):
-    with pytest.raises(FileNotFoundError):
-        cudf.read_orc("TestMissingFile.orc")
-
-    with pytest.raises(FileNotFoundError):
-        cudf.read_orc(tmpdir.mkdir("cudf_orc"))
-
-
-def test_orc_reader_local_filepath():
-    path = "~/TestLocalFile.orc"
-    if not os.path.isfile(path):
-        pytest.skip("Local .orc file is not found")
-
-    cudf.read_orc(path)
-
-
-@pytest.mark.parametrize(
-    "src", ["filepath", "pathobj", "bytes_io", "bytes", "url"]
-)
-def test_orc_reader_filepath_or_buffer(path_or_buf, src):
-    cols = ["int1", "long1", "float1", "double1"]
-
-    expect = pd.read_orc(path_or_buf("filepath"), columns=cols)
-    got = cudf.read_orc(path_or_buf(src), columns=cols)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Bug in older version of pandas",
-)
-def test_orc_reader_trailing_nulls(datadir):
-    path = datadir / "TestOrcFile.nulls-at-end-snappy.orc"
-    expect = pd.read_orc(path)
-    got = cudf.read_orc(path)
-
-    assert_eq(expect, got, check_categorical=True)
-
-
-@pytest.mark.parametrize("use_index", [False, True])
-@pytest.mark.parametrize(
-    "inputfile",
-    ["TestOrcFile.testDate1900.orc", "TestOrcFile.testDate2038.orc"],
-)
-def test_orc_reader_datetimestamp(datadir, inputfile, use_index):
-    from pyarrow import orc
-
-    path = datadir / inputfile
-    try:
-        orcfile = orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
-    pdf = orcfile.read().to_pandas(date_as_object=False)
-    gdf = cudf.read_orc(path, use_index=use_index)
-
-    assert_eq(pdf, gdf, check_categorical=False, check_exact=False)
-
-
-def test_orc_reader_strings(datadir):
-    path = datadir / "TestOrcFile.testStringAndBinaryStatistics.orc"
-
-    expect = pd.read_orc(path, columns=["string1"])
-    got = cudf.read_orc(path, columns=["string1"])
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-def test_orc_read_statistics(datadir):
-    # Read in file containing 2 columns ("int1" and "string1") and 3 stripes
-    # (sizes 5000, 5000 and 1000 respectively). Each stripe has the same value
-    # in every one of its rows. The values the stripes have are 1, 2, and 3 in
-    # "int1" and "one", "two", and "three" in "string1".
-    path = datadir / "TestOrcFile.testStripeLevelStats.orc"
-    try:
-        (
-            file_statistics,
-            stripes_statistics,
-        ) = cudf.io.orc.read_orc_statistics([path, path])
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
-    # Check numberOfValues
-    assert_eq(file_statistics[0]["int1"].number_of_values, 11_000)
-    assert_eq(
-        file_statistics[0]["int1"].number_of_values,
-        sum(
-            [
-                stripes_statistics[0]["int1"].number_of_values,
-                stripes_statistics[1]["int1"].number_of_values,
-                stripes_statistics[2]["int1"].number_of_values,
-            ]
-        ),
-    )
-    assert_eq(
-        stripes_statistics[1]["int1"].number_of_values,
-        stripes_statistics[1]["string1"].number_of_values,
-    )
-    assert_eq(stripes_statistics[2]["string1"].number_of_values, 1_000)
-
-    # Check other statistics
-    assert_eq(stripes_statistics[2]["string1"].has_null, False)
-    assert_eq(
-        file_statistics[0]["int1"]["minimum"],
-        min(
-            stripes_statistics[0]["int1"]["minimum"],
-            stripes_statistics[1]["int1"]["minimum"],
-            stripes_statistics[2]["int1"]["minimum"],
-        ),
-    )
-    assert_eq(file_statistics[0]["int1"]["minimum"], 1)
-    assert_eq(file_statistics[0]["string1"]["minimum"], "one")
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["cudf", "pyarrow"])
-@pytest.mark.parametrize(
-    "predicate,expected_len",
-    [
-        ([[("int1", "==", 1)]], 5000),
-        ([[("int1", "<=", 2)]], 10000),
-        ([[("int1", "==", -1)]], 0),
-        ([[("int1", "in", range(3))]], 10000),
-        ([[("int1", "in", {1, 3})]], 6000),
-        ([[("int1", "not in", {1, 3})]], 5000),
-    ],
-)
-def test_orc_read_filtered(datadir, engine, predicate, expected_len):
-    path = datadir / "TestOrcFile.testStripeLevelStats.orc"
-    try:
-        df_filtered = cudf.read_orc(path, engine=engine, filters=predicate)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
-    # Assert # of rows after filtering
-    assert len(df_filtered) == expected_len
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["cudf", "pyarrow"])
-def test_orc_read_stripes(datadir, engine):
-    path = datadir / "TestOrcFile.testDate1900.orc"
-    try:
-        pdf = cudf.read_orc(path, engine=engine)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
-    num_rows, stripes, col_names = cudf.io.read_orc_metadata(path)
-
-    # Read stripes one at a time
-    gdf = [
-        cudf.read_orc(path, engine=engine, stripes=[[i]])
-        for i in range(stripes)
-    ]
-    gdf = cudf.concat(gdf).reset_index(drop=True)
-    assert_eq(pdf, gdf, check_categorical=False, check_index_type=True)
-
-    # Read stripes all at once
-    gdf = cudf.read_orc(
-        path, engine=engine, stripes=[[int(x) for x in range(stripes)]]
-    )
-    assert_eq(pdf, gdf, check_categorical=False)
-
-    # Read only some stripes
-    gdf = cudf.read_orc(path, engine=engine, stripes=[[0, 1]])
-    assert_eq(gdf, pdf.head(25000))
-    gdf = cudf.read_orc(path, engine=engine, stripes=[[0, stripes - 1]])
-    assert_eq(
-        gdf,
-        cudf.concat([pdf.head(15000), pdf.tail(10000)], ignore_index=True),
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize("num_rows", [1, 100, 3000])
-@pytest.mark.parametrize("skiprows", [0, 1, 3000])
-def test_orc_read_rows(datadir, skiprows, num_rows):
-    path = datadir / "TestOrcFile.decimal.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path, skiprows=skiprows, num_rows=num_rows)
-
-    # Slice rows out of the whole dataframe for comparison as PyArrow doesn't
-    # have an API to read a subsection of rows from the file
-    pdf = pdf[skiprows : skiprows + num_rows]
-    pdf = pdf.reset_index(drop=True)
-
-    assert_eq(pdf, gdf)
-
-
-def test_orc_read_skiprows():
-    buff = BytesIO()
-    df = pd.DataFrame(
-        {
-            "a": [
-                True,
-                False,
-                True,
-                False,
-                None,
-                True,
-                True,
-                True,
-                False,
-                None,
-                False,
-                False,
-                True,
-                True,
-                True,
-                True,
-            ]
-        }
-    )
-    df.to_orc(buff)
-    # testing 10 skiprows due to a boolean specific bug fix that didn't
-    # repro for other sizes of data
-    skiprows = 10
-
-    expected = (
-        pd.read_orc(buff)[skiprows:].reset_index(drop=True).astype("bool")
-    )
-    got = cudf.read_orc(buff, skiprows=skiprows)
-    assert_eq(expected, got)
-
-
-def test_orc_reader_uncompressed_block(datadir):
-    path = datadir / "uncompressed_snappy.orc"
-
-    expect = pd.read_orc(path)
-    got = cudf.read_orc(path)
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-def test_orc_reader_nodata_block(datadir):
-    path = datadir / "nodata.orc"
-
-    expect = pd.read_orc(path)
-    got = cudf.read_orc(path, num_rows=1)
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-@pytest.mark.parametrize("compression", [None, "snappy"])
-@pytest.mark.parametrize(
-    "reference_file, columns",
-    [
-        (
-            "TestOrcFile.test1.orc",
-            [
-                "boolean1",
-                "byte1",
-                "short1",
-                "int1",
-                "long1",
-                "float1",
-                "double1",
-            ],
-        ),
-        ("TestOrcFile.demo-12-zlib.orc", ["_col1", "_col3", "_col5"]),
-    ],
-)
-def test_orc_writer(datadir, tmpdir, reference_file, columns, compression):
-    pdf_fname = datadir / reference_file
-    gdf_fname = tmpdir.join("gdf.orc")
-
-    expect = cudf.from_pandas(pd.read_orc(pdf_fname, columns=columns))
-    expect.to_orc(gdf_fname.strpath, compression=compression)
-    got = cudf.from_pandas(pd.read_orc(gdf_fname, columns=columns))
-
-    assert_frame_equal(expect, got)
-
-
-@pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
-def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
-    reference_file = "TestOrcFile.demo-12-zlib.orc"
-    pdf_fname = datadir / reference_file
-    gdf_fname = tmpdir.join("gdf.orc")
-
-    expect = cudf.from_pandas(pd.read_orc(pdf_fname))
-    expect.to_orc(gdf_fname.strpath, statistics=stats_freq)
-    got = cudf.from_pandas(pd.read_orc(gdf_fname))
-
-    assert_frame_equal(expect, got)
-
-
-@pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
-def test_chunked_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
-    reference_file = "TestOrcFile.test1.orc"
-    pdf_fname = datadir / reference_file
-    gdf_fname = tmpdir.join("chunked_gdf.orc")
-
-    columns = [
-        "boolean1",
-        "byte1",
-        "short1",
-        "int1",
-        "long1",
-        "float1",
-        "double1",
-    ]
-    pdf = pd.read_orc(pdf_fname, columns=columns)
-    gdf = cudf.from_pandas(pdf)
-    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
-
-    writer = ORCWriter(gdf_fname, statistics=stats_freq)
-    writer.write_table(gdf)
-    writer.write_table(gdf)
-    writer.close()
-
-    got = pd.read_orc(gdf_fname)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("compression", [None, "snappy"])
-@pytest.mark.parametrize(
-    "reference_file, columns",
-    [
-        (
-            "TestOrcFile.test1.orc",
-            [
-                "boolean1",
-                "byte1",
-                "short1",
-                "int1",
-                "long1",
-                "float1",
-                "double1",
-            ],
-        ),
-        ("TestOrcFile.demo-12-zlib.orc", ["_col1", "_col3", "_col5"]),
-    ],
-)
-def test_chunked_orc_writer(
-    datadir, tmpdir, reference_file, columns, compression
-):
-    pdf_fname = datadir / reference_file
-    gdf_fname = tmpdir.join("chunked_gdf.orc")
-
-    pdf = pd.read_orc(pdf_fname, columns=columns)
-    gdf = cudf.from_pandas(pdf)
-    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
-
-    writer = ORCWriter(gdf_fname, compression=compression)
-    writer.write_table(gdf)
-    writer.write_table(gdf)
-    writer.close()
-
-    got = pd.read_orc(gdf_fname, columns=columns)
-    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
-
-
-@pytest.mark.parametrize(
-    "dtypes",
-    [
-        {"c": str, "a": int},
-        {"c": int, "a": str},
-        {"c": int, "a": str, "b": float},
-        {"c": str, "a": object},
-    ],
-)
-def test_orc_writer_strings(tmpdir, dtypes):
-    gdf_fname = tmpdir.join("gdf_strings.orc")
-
-    expect = cudf.datasets.randomdata(nrows=10, dtypes=dtypes, seed=1)
-    expect.to_orc(gdf_fname)
-    got = pd.read_orc(gdf_fname)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "dtypes",
-    [
-        {"c": str, "a": int},
-        {"c": int, "a": str},
-        {"c": int, "a": str, "b": float},
-        {"c": str, "a": object},
-    ],
-)
-def test_chunked_orc_writer_strings(tmpdir, dtypes):
-    gdf_fname = tmpdir.join("chunked_gdf_strings.orc")
-
-    gdf = cudf.datasets.randomdata(nrows=10, dtypes=dtypes, seed=1)
-    pdf = gdf.to_pandas()
-    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
-    writer = ORCWriter(gdf_fname)
-    writer.write_table(gdf)
-    writer.write_table(gdf)
-    writer.close()
-
-    got = pd.read_orc(gdf_fname)
-
-    assert_eq(expect, got)
-
-
-def test_orc_writer_sliced(tmpdir):
-    cudf_path = tmpdir.join("cudf.orc")
-
-    df = pd.DataFrame()
-    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
-    df = cudf.from_pandas(df)
-
-    df_select = df.iloc[1:3]
-
-    df_select.to_orc(cudf_path)
-    assert_eq(cudf.read_orc(cudf_path), df_select)
-
-
-@pytest.mark.parametrize(
-    "orc_file",
-    [
-        "TestOrcFile.decimal.orc",
-        "TestOrcFile.decimal.same.values.orc",
-        "TestOrcFile.decimal.multiple.values.orc",
-        # For additional information take look at PR 7034
-        "TestOrcFile.decimal.runpos.issue.orc",
-    ],
-)
-def test_orc_reader_decimal_type(datadir, orc_file):
-    file_path = datadir / orc_file
-
-    pdf = pd.read_orc(file_path)
-    df = cudf.read_orc(file_path)
-
-    assert_eq(pdf, df)
-
-
-def test_orc_decimal_precision_fail(datadir):
-    file_path = datadir / "TestOrcFile.int_decimal.precision_19.orc"
-
-    # Shouldn't cause failure if decimal column is not chosen to be read.
-    pdf = pd.read_orc(file_path, columns=["int"])
-    gdf = cudf.read_orc(file_path, columns=["int"])
-
-    assert_eq(pdf, gdf)
-
-
-# For additional information take look at PR 6636 and 6702
-@pytest.mark.parametrize(
-    "orc_file",
-    [
-        "TestOrcFile.boolean_corruption_PR_6636.orc",
-        "TestOrcFile.boolean_corruption_PR_6702.orc",
-    ],
-)
-def test_orc_reader_boolean_type(datadir, orc_file):
-    file_path = datadir / orc_file
-
-    pdf = pd.read_orc(file_path)
-    df = cudf.read_orc(file_path).to_pandas()
-
-    assert_eq(pdf, df)
-
-
-def test_orc_reader_tzif_timestamps(datadir):
-    # Contains timstamps in the range covered by the TZif file
-    # Other timedate tests only cover "future" times
-    path = datadir / "TestOrcFile.lima_timezone.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path)
-
-    assert_eq(pdf, gdf)
-
-
-def test_int_overflow(tmpdir):
-    file_path = tmpdir.join("gdf_overflow.orc")
-
-    # The number of rows and the large element trigger delta encoding
-    num_rows = 513
-    df = cudf.DataFrame({"a": [None] * num_rows}, dtype="int64")
-    df["a"][0] = 1024 * 1024 * 1024
-    df["a"][num_rows - 1] = 1
-    df.to_orc(file_path)
-
-    assert_eq(cudf.read_orc(file_path), df)
-
-
-def normalized_equals(value1, value2):
-    # need naive time object for numpy to convert to datetime64
-    if isinstance(value1, datetime.datetime):
-        value1 = value1.replace(tzinfo=None)
-    if isinstance(value2, datetime.datetime):
-        value2 = value2.replace(tzinfo=None)
-
-    if isinstance(value1, (datetime.datetime, np.datetime64)):
-        value1 = np.datetime64(value1, "ms")
-    if isinstance(value2, (datetime.datetime, np.datetime64)):
-        value2 = np.datetime64(value2, "ms")
-
-    # Compare integers with floats now
-    if isinstance(value1, float) or isinstance(value2, float):
-        return np.isclose(value1, value2)
-
-    return value1 == value2
-
-
-@pytest.mark.parametrize("stats_freq", ["STRIPE", "ROWGROUP"])
-@pytest.mark.parametrize("nrows", [1, 100, 100000])
-def test_orc_write_statistics(tmpdir, datadir, nrows, stats_freq):
-    from pyarrow import orc
-
-    supported_stat_types = [*supported_numpy_dtypes, "str"]
-    # Writing bool columns to multiple row groups is disabled
-    # until #6763 is fixed
-    if nrows == 100000:
-        supported_stat_types.remove("bool")
-
-    # Make a dataframe
-    gdf = cudf.DataFrame(
-        {
-            "col_" + str(dtype): gen_rand_series(dtype, nrows, has_nulls=True)
-            for dtype in supported_stat_types
-        }
-    )
-    fname = tmpdir.join("gdf.orc")
-
-    # Write said dataframe to ORC with cuDF
-    gdf.to_orc(fname.strpath, statistics=stats_freq, stripe_size_rows=30000)
-
-    # Read back written ORC's statistics
-    orc_file = orc.ORCFile(fname)
-    (
-        file_stats,
-        stripes_stats,
-    ) = cudf.io.orc.read_orc_statistics([fname])
-
-    # check file stats
-    for col in gdf:
-        if "minimum" in file_stats[0][col]:
-            stats_min = file_stats[0][col]["minimum"]
-            if stats_min is not None:
-                actual_min = gdf[col].min()
-                assert normalized_equals(actual_min, stats_min)
-        if "maximum" in file_stats[0][col]:
-            stats_max = file_stats[0][col]["maximum"]
-            if stats_max is not None:
-                actual_max = gdf[col].max()
-                assert normalized_equals(actual_max, stats_max)
-        if "number_of_values" in file_stats[0][col]:
-            stats_num_vals = file_stats[0][col]["number_of_values"]
-            if stats_num_vals is not None:
-                actual_num_vals = gdf[col].count()
-                assert stats_num_vals == actual_num_vals
-
-    # compare stripe statistics with actual min/max
-    for stripe_idx in range(0, orc_file.nstripes):
-        stripe = orc_file.read_stripe(stripe_idx)
-        # pandas is unable to handle min/max of string col with nulls
-        stripe_df = cudf.DataFrame(stripe.to_pandas())
-        for col in stripe_df:
-            if "minimum" in stripes_stats[stripe_idx][col]:
-                stats_min = stripes_stats[stripe_idx][col]["minimum"]
-                if stats_min is not None:
-                    actual_min = stripe_df[col].min()
-                    assert normalized_equals(actual_min, stats_min)
-
-            if "maximum" in stripes_stats[stripe_idx][col]:
-                stats_max = stripes_stats[stripe_idx][col]["maximum"]
-                if stats_max is not None:
-                    actual_max = stripe_df[col].max()
-                    assert normalized_equals(actual_max, stats_max)
-
-            if "number_of_values" in stripes_stats[stripe_idx][col]:
-                stats_num_vals = stripes_stats[stripe_idx][col][
-                    "number_of_values"
-                ]
-                if stats_num_vals is not None:
-                    actual_num_vals = stripe_df[col].count()
-                    assert stats_num_vals == actual_num_vals
-
-
-@pytest.mark.parametrize("stats_freq", ["STRIPE", "ROWGROUP"])
-@pytest.mark.parametrize("nrows", [2, 100, 1024])
-def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq):
-    from pyarrow import orc
-
-    supported_stat_types = [*supported_numpy_dtypes, "str"]
-    # Writing bool columns to multiple row groups is disabled
-    # until #6763 is fixed
-    if nrows == 1024:
-        supported_stat_types.remove("bool")
-
-    gdf_fname = tmpdir.join("chunked_stats.orc")
-    writer = ORCWriter(gdf_fname, statistics=stats_freq, stripe_size_rows=512)
-
-    max_char_length = 100 if nrows < 1000 else 10
-
-    # Make a dataframe
-    gdf = cudf.DataFrame(
-        {
-            "col_" + str(dtype): gen_rand_series(
-                dtype,
-                nrows // 2,
-                has_nulls=True,
-                low=0,
-                high=max_char_length,
-                seed=0,
-            )
-            for dtype in supported_stat_types
-        }
-    )
-
-    pdf1 = gdf.to_pandas()
-    writer.write_table(gdf)
-    # gdf is specifically being reused here to ensure the data is destroyed
-    # before the next write_table call to ensure the data is persisted inside
-    # write and no pointers are saved into the original table
-    gdf = cudf.DataFrame(
-        {
-            "col_" + str(dtype): gen_rand_series(
-                dtype,
-                nrows // 2,
-                has_nulls=True,
-                low=0,
-                high=max_char_length,
-            )
-            for dtype in supported_stat_types
-        }
-    )
-    pdf2 = gdf.to_pandas()
-    writer.write_table(gdf)
-    writer.close()
-
-    # pandas is unable to handle min/max of string col with nulls
-    expect = cudf.DataFrame(pd.concat([pdf1, pdf2]).reset_index(drop=True))
-
-    # Read back written ORC's statistics
-    orc_file = orc.ORCFile(gdf_fname)
-    (
-        file_stats,
-        stripes_stats,
-    ) = cudf.io.orc.read_orc_statistics([gdf_fname])
-
-    # check file stats
-    for col in expect:
-        if "minimum" in file_stats[0][col]:
-            stats_min = file_stats[0][col]["minimum"]
-            if stats_min is not None:
-                actual_min = expect[col].min()
-                assert normalized_equals(actual_min, stats_min)
-        if "maximum" in file_stats[0][col]:
-            stats_max = file_stats[0][col]["maximum"]
-            if stats_max is not None:
-                actual_max = expect[col].max()
-                assert normalized_equals(actual_max, stats_max)
-        if "number_of_values" in file_stats[0][col]:
-            stats_num_vals = file_stats[0][col]["number_of_values"]
-            if stats_num_vals is not None:
-                actual_num_vals = expect[col].count()
-                assert stats_num_vals == actual_num_vals
-
-    # compare stripe statistics with actual min/max
-    for stripe_idx in range(0, orc_file.nstripes):
-        stripe = orc_file.read_stripe(stripe_idx)
-        # pandas is unable to handle min/max of string col with nulls
-        stripe_df = cudf.DataFrame(stripe.to_pandas())
-        for col in stripe_df:
-            if "minimum" in stripes_stats[stripe_idx][col]:
-                stats_min = stripes_stats[stripe_idx][col]["minimum"]
-                if stats_min is not None:
-                    actual_min = stripe_df[col].min()
-                    assert normalized_equals(actual_min, stats_min)
-
-            if "maximum" in stripes_stats[stripe_idx][col]:
-                stats_max = stripes_stats[stripe_idx][col]["maximum"]
-                if stats_max is not None:
-                    actual_max = stripe_df[col].max()
-                    assert normalized_equals(actual_max, stats_max)
-
-            if "number_of_values" in stripes_stats[stripe_idx][col]:
-                stats_num_vals = stripes_stats[stripe_idx][col][
-                    "number_of_values"
-                ]
-                if stats_num_vals is not None:
-                    actual_num_vals = stripe_df[col].count()
-                    assert stats_num_vals == actual_num_vals
-
-
-@pytest.mark.parametrize("nrows", [1, 100, 100000])
-def test_orc_write_bool_statistics(tmpdir, datadir, nrows):
-    from pyarrow import orc
-
-    # Make a dataframe
-    gdf = cudf.DataFrame({"col_bool": gen_rand_series("bool", nrows)})
-    fname = tmpdir.join("gdf.orc")
-
-    # Write said dataframe to ORC with cuDF
-    gdf.to_orc(fname.strpath, stripe_size_rows=30000)
-
-    # Read back written ORC's statistics
-    orc_file = orc.ORCFile(fname)
-    (
-        file_stats,
-        stripes_stats,
-    ) = cudf.io.orc.read_orc_statistics([fname])
-
-    # check file stats
-    col = "col_bool"
-    if "true_count" in file_stats[0][col]:
-        stats_true_count = file_stats[0][col]["true_count"]
-        actual_true_count = gdf[col].sum()
-        assert normalized_equals(actual_true_count, stats_true_count)
-
-    if "number_of_values" in file_stats[0][col]:
-        stats_valid_count = file_stats[0][col]["number_of_values"]
-        actual_valid_count = len(gdf[col]) - gdf[col].null_count
-        assert normalized_equals(actual_valid_count, stats_valid_count)
-
-    # compare stripe statistics with actual min/max
-    for stripe_idx in range(0, orc_file.nstripes):
-        stripe = orc_file.read_stripe(stripe_idx)
-        # pandas is unable to handle min/max of string col with nulls
-        stripe_df = cudf.DataFrame(stripe.to_pandas())
-
-        if "true_count" in stripes_stats[stripe_idx][col]:
-            actual_true_count = stripe_df[col].sum()
-            stats_true_count = stripes_stats[stripe_idx][col]["true_count"]
-            assert normalized_equals(actual_true_count, stats_true_count)
-
-        if "number_of_values" in stripes_stats[stripe_idx][col]:
-            actual_valid_count = (
-                len(stripe_df[col]) - stripe_df[col].null_count
-            )
-            stats_valid_count = stripes_stats[stripe_idx][col][
-                "number_of_values"
-            ]
-            assert normalized_equals(actual_valid_count, stats_valid_count)
-
-
-def test_orc_reader_gmt_timestamps(datadir):
-    path = datadir / "TestOrcFile.gmt.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path)
-    assert_eq(pdf, gdf)
-
-
-def test_orc_bool_encode_fail():
-    buffer = BytesIO()
-
-    # Generate a boolean column longer than a single row group
-    fail_df = cudf.DataFrame({"col": gen_rand_series("bool", 20000)})
-    # Invalidate a row in the first row group
-    fail_df["col"][5000] = None
-
-    # Should throw instead of generating a file that is incompatible
-    # with other readers (see issue #6763)
-    with pytest.raises(RuntimeError):
-        fail_df.to_orc(buffer)
-
-    # Generate a boolean column longer than a single row group
-    okay_df = cudf.DataFrame({"col": gen_rand_series("bool", 20000)})
-    okay_df["col"][15000] = None
-    # Invalid row is in the last row group; encoding is assumed to be correct
-    okay_df.to_orc(buffer)
-
-    # Also validate data
-    pdf = pd.read_orc(buffer)
-
-    assert_eq(okay_df.to_pandas(nullable=True), pdf)
-
-
-def test_nanoseconds_overflow():
-    buffer = BytesIO()
-    # Use nanosecond values that take more than 32 bits to encode
-    s = cudf.Series([710424008, -1338482640], dtype="datetime64[ns]")
-    expected = cudf.DataFrame({"s": s})
-    expected.to_orc(buffer)
-
-    cudf_got = cudf.read_orc(buffer)
-    assert_eq(expected, cudf_got)
-
-    pandas_got = pd.read_orc(buffer)
-    assert_eq(expected, pandas_got)
-
-
-def test_empty_dataframe():
-    buffer = BytesIO()
-    expected = cudf.DataFrame()
-    expected.to_orc(buffer)
-
-    # Raise error if column name is mentioned, but it doesn't exist.
-    with pytest.raises(RuntimeError):
-        cudf.read_orc(buffer, columns=["a"])
-
-    got_df = cudf.read_orc(buffer)
-    expected_pdf = pd.read_orc(buffer)
-
-    assert_eq(expected, got_df)
-    assert_eq(expected_pdf, got_df)
-
-
-@pytest.mark.parametrize(
-    "data", [[None, ""], ["", None], [None, None], ["", ""]]
-)
-def test_empty_string_columns(data):
-    buffer = BytesIO()
-
-    expected = cudf.DataFrame({"string": data}, dtype="str")
-    expected.to_orc(buffer)
-
-    expected_pdf = pd.read_orc(buffer)
-    got_df = cudf.read_orc(buffer)
-
-    assert_eq(expected, got_df)
-    assert_eq(
-        expected_pdf,
-        got_df.to_pandas(nullable=True)
-        if expected_pdf["string"].dtype == pd.StringDtype()
-        else got_df,
-    )
-
-
-@pytest.mark.parametrize("scale", [-3, 0, 3])
-@pytest.mark.parametrize(
-    "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
-)
-def test_orc_writer_decimal(tmpdir, scale, decimal_type):
-    fname = tmpdir / "decimal.orc"
-
-    expected = cudf.DataFrame({"dec_val": gen_rand_series("i", 100)})
-    expected["dec_val"] = expected["dec_val"].astype(decimal_type(7, scale))
-
-    expected.to_orc(fname)
-
-    got = pd.read_orc(fname)
-    assert_eq(expected.to_pandas()["dec_val"], got["dec_val"])
-
-
-@pytest.mark.parametrize("num_rows", [1, 100, 3000])
-def test_orc_reader_multiple_files(datadir, num_rows):
-    path = datadir / "TestOrcFile.testSnappy.orc"
-
-    df_1 = pd.read_orc(path)
-    df_2 = pd.read_orc(path)
-    df = pd.concat([df_1, df_2], ignore_index=True)
-
-    gdf = cudf.read_orc([path, path], num_rows=num_rows).to_pandas()
-
-    # Slice rows out of the whole dataframe for comparison as PyArrow doesn't
-    # have an API to read a subsection of rows from the file
-    df = df[:num_rows]
-    df = df.reset_index(drop=True)
-
-    assert_eq(df, gdf)
-
-
-def test_orc_reader_multi_file_single_stripe(datadir):
-    path = datadir / "TestOrcFile.testSnappy.orc"
-
-    # should raise an exception
-    with pytest.raises(ValueError):
-        cudf.read_orc([path, path], stripes=[0])
-
-
-def test_orc_reader_multi_file_multi_stripe(datadir):
-    path = datadir / "TestOrcFile.testStripeLevelStats.orc"
-    gdf = cudf.read_orc([path, path], stripes=[[0, 1], [2]])
-    pdf = pd.read_orc(path)
-    assert_eq(pdf, gdf)
-
-
-def test_orc_string_stream_offset_issue():
-    size = 30000
-    vals = {
-        str(x): [decimal.Decimal(1)] * size if x != 0 else ["XYZ"] * size
-        for x in range(0, 5)
-    }
-    df = cudf.DataFrame(vals)
-
-    buffer = BytesIO()
-    df.to_orc(buffer)
-
-    assert_eq(df, cudf.read_orc(buffer))
-
-
-def generate_list_struct_buff(size=10_000):
-    rd = random.Random(1)
-    rng = np.random.default_rng(seed=1)
-
-    buff = BytesIO()
-
-    lvl3_list = [
-        rd.choice(
-            [
-                None,
-                [
-                    [
-                        [
-                            rd.choice([None, rng.integers(1, 3)])
-                            for _ in range(rng.integers(1, 3))
-                        ]
-                        for _ in range(rng.integers(0, 3))
-                    ]
-                    for _ in range(rng.integers(0, 3))
-                ],
-            ]
-        )
-        for _ in range(size)
-    ]
-    lvl1_list = [
-        [
-            rd.choice([None, rng.integers(0, 3)])
-            for _ in range(rng.integers(1, 4))
-        ]
-        for _ in range(size)
-    ]
-    lvl1_struct = [
-        rd.choice(
-            [
-                None,
-                {"a": rng.integers(0, 3), "b": rng.integers(0, 3)},
-            ]
-        )
-        for _ in range(size)
-    ]
-    lvl2_struct = [
-        rd.choice(
-            [
-                None,
-                {"a": rd.choice([None, rng.integers(0, 3)])},
-                {
-                    "lvl1_struct": {
-                        "c": rd.choice([None, rng.integers(0, 3)]),
-                        "d": rng.integers(0, 3),
-                    },
-                },
-            ]
-        )
-        for _ in range(size)
-    ]
-    list_nests_struct = [
-        [
-            {"a": rd.choice(lvl1_struct), "b": rd.choice(lvl1_struct)}
-            for _ in range(rng.integers(1, 4))
-        ]
-        for _ in range(size)
-    ]
-    struct_nests_list = [
-        {"struct": lvl1_struct[x], "list": lvl1_list[x]} for x in range(size)
-    ]
-
-    pa_table = pa.table(
-        {
-            "lvl3_list": lvl3_list,
-            "lvl1_list": lvl1_list,
-            "lvl1_struct": lvl1_struct,
-            "lvl2_struct": lvl2_struct,
-            "list_nests_struct": list_nests_struct,
-            "struct_nests_list": struct_nests_list,
-        }
-    )
-    with orc.ORCWriter(buff, stripe_size=1024) as writer:
-        writer.write(pa_table)
-    return buff
-
-
-@pytest.fixture(scope="module")
-def list_struct_buff():
-    return generate_list_struct_buff()
-
-
-@pytest.mark.parametrize(
-    "columns",
-    [
-        None,
-        ["lvl3_list", "list_nests_struct", "lvl2_struct", "struct_nests_list"],
-        ["lvl2_struct", "lvl1_struct"],
-    ],
-)
-@pytest.mark.parametrize("num_rows", [0, 15, 1005, 10561, 10_000])
-@pytest.mark.parametrize("use_index", [True, False])
-def test_lists_struct_nests(columns, num_rows, use_index, list_struct_buff):
-    from pyarrow import orc
-
-    gdf = cudf.read_orc(
-        list_struct_buff,
-        columns=columns,
-        num_rows=num_rows,
-        use_index=use_index,
-    )
-
-    pyarrow_tbl = orc.ORCFile(list_struct_buff).read()
-
-    pyarrow_tbl = (
-        pyarrow_tbl[:num_rows]
-        if columns is None
-        else pyarrow_tbl.select(columns)[:num_rows]
-    )
-
-    if num_rows > 0:
-        assert pyarrow_tbl.equals(gdf.to_arrow())
-    else:
-        assert_eq(pyarrow_tbl.to_pandas(), gdf)
-
-
-@pytest.mark.parametrize("columns", [None, ["lvl1_struct"], ["lvl1_list"]])
-def test_skip_rows_for_nested_types(columns, list_struct_buff):
-    with pytest.raises(
-        RuntimeError, match="skip_rows is not supported by nested column"
-    ):
-        cudf.read_orc(
-            list_struct_buff,
-            columns=columns,
-            use_index=True,
-            skiprows=5,
-        )
-
-
-def test_pyspark_struct(datadir):
-    path = datadir / "TestOrcFile.testPySparkStruct.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path)
-
-    assert_eq(pdf, gdf)
-
-
-def gen_map_buff(size):
-    from string import ascii_letters as al
-
-    from pyarrow import orc
-
-    rd = random.Random(1)
-    rng = np.random.default_rng(seed=1)
-
-    buff = BytesIO()
-
-    lvl1_map = pa.array(
-        [
-            rd.choice(
-                [
-                    None,
-                    {
-                        rd.choice(al): rd.choice(
-                            [None, rng.integers(1, 1500)]
-                        ),
-                    },
-                ]
-            )
-            for _ in range(size)
-        ],
-        type=pa.map_(pa.string(), pa.int64()),
-    )
-    lvl2_map = pa.array(
-        [
-            rd.choice(
-                [
-                    None,
-                    *(
-                        {
-                            rd.choice(al): rd.choice(
-                                [
-                                    None,
-                                    [
-                                        rd.choice(
-                                            [None, rng.integers(1, 1500)]
-                                        )
-                                        for _ in range(5)
-                                    ],
-                                ]
-                            )
-                        }
-                        for _ in range(2)
-                    ),
-                ]
-            )
-            for _ in range(size)
-        ],
-        type=pa.map_(pa.string(), pa.list_(pa.int64())),
-    )
-    lvl2_struct_map = pa.array(
-        [
-            rd.choice(
-                [
-                    None,
-                    *(
-                        {
-                            rd.choice(al): rd.choice(
-                                [
-                                    None,
-                                    {
-                                        "a": rd.choice(
-                                            [None, rng.integers(1, 1500)]
-                                        ),
-                                        "b": rd.choice(
-                                            [None, rng.integers(1, 1500)]
-                                        ),
-                                    },
-                                ]
-                            )
-                        }
-                        for _ in range(2)
-                    ),
-                ]
-            )
-            for _ in range(size)
-        ],
-        type=pa.map_(
-            pa.string(), pa.struct({"a": pa.int64(), "b": pa.int64()})
-        ),
-    )
-
-    pa_table = pa.Table.from_arrays(
-        [lvl1_map, lvl2_map, lvl2_struct_map],
-        ["lvl1_map", "lvl2_map", "lvl2_struct_map"],
-    )
-
-    orc.write_table(
-        pa_table, buff, stripe_size=1024, compression="UNCOMPRESSED"
-    )
-
-    return buff
-
-
-map_buff = gen_map_buff(size=100000)
-
-
-@pytest.mark.parametrize(
-    "columns",
-    [None, ["lvl1_map", "lvl2_struct_map"], ["lvl2_struct_map", "lvl2_map"]],
-)
-@pytest.mark.parametrize("num_rows", [0, 15, 1005, 10561, 100000])
-@pytest.mark.parametrize("use_index", [True, False])
-def test_map_type_read(columns, num_rows, use_index):
-    from pyarrow import orc
-
-    tbl = orc.read_table(map_buff)
-
-    lvl1_map = (
-        tbl["lvl1_map"]
-        .combine_chunks()
-        .view(pa.list_(pa.struct({"key": pa.string(), "value": pa.int64()})))
-    )
-    lvl2_map = (
-        tbl["lvl2_map"]
-        .combine_chunks()
-        .view(
-            pa.list_(
-                pa.struct({"key": pa.string(), "value": pa.list_(pa.int64())})
-            )
-        )
-    )
-    lvl2_struct_map = (
-        tbl["lvl2_struct_map"]
-        .combine_chunks()
-        .view(
-            pa.list_(
-                pa.struct(
-                    {
-                        "key": pa.string(),
-                        "value": pa.struct({"a": pa.int64(), "b": pa.int64()}),
-                    }
-                )
-            )
-        )
-    )
-
-    expected_tbl = pa.table(
-        {
-            "lvl1_map": lvl1_map,
-            "lvl2_map": lvl2_map,
-            "lvl2_struct_map": lvl2_struct_map,
-        }
-    )
-    gdf = cudf.read_orc(
-        map_buff, columns=columns, num_rows=num_rows, use_index=use_index
-    )
-
-    expected_tbl = (
-        expected_tbl[:num_rows]
-        if columns is None
-        else expected_tbl.select(columns)[:num_rows]
-    )
-
-    if num_rows > 0:
-        assert expected_tbl.equals(gdf.to_arrow())
-    else:
-        assert_eq(expected_tbl.to_pandas(), gdf)
-
-
-def test_orc_reader_decimal(datadir):
-    path = datadir / "TestOrcFile.decimal.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path)
-
-    assert_eq(pdf, gdf)
-
-
-# This test case validates the issue raised in #8665,
-# please check the issue for more details.
-def test_orc_timestamp_read(datadir):
-    path = datadir / "TestOrcFile.timestamp.issue.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path)
-
-    assert_eq(pdf, gdf)
-
-
-def dec(num):
-    return decimal.Decimal(str(num))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # basic + nested strings
-        {
-            "lls": [[["a"], ["bb"]] * 5 for i in range(12345)],
-            "lls2": [[["ccc", "dddd"]] * 6 for i in range(12345)],
-            "ls_dict": [["X"] * 7 for i in range(12345)],
-            "ls_direct": [[str(i)] * 9 for i in range(12345)],
-            "li": [[i] * 11 for i in range(12345)],
-            "lf": [[i * 0.5] * 13 for i in range(12345)],
-            "ld": [[dec(i / 2)] * 15 for i in range(12345)],
-        },
-        # with nulls
-        {
-            "ls": [
-                [str(i) if i % 5 else None, str(2 * i)] if i % 2 else None
-                for i in range(12345)
-            ],
-            "li": [[i, i * i, i % 2] if i % 3 else None for i in range(12345)],
-            "ld": [
-                [dec(i), dec(i / 2) if i % 7 else None] if i % 5 else None
-                for i in range(12345)
-            ],
-        },
-        # with empty elements
-        {
-            "ls": [
-                [str(i), str(2 * i)] if i % 2 else [] for i in range(12345)
-            ],
-            "lls": [
-                [[str(i), str(2 * i)]] if i % 2 else [[], []]
-                for i in range(12345)
-            ],
-            "li": [[i, i * i, i % 2] if i % 3 else [] for i in range(12345)],
-            "lli": [
-                [[i], [i * i], [i % 2]] if i % 3 else [[]]
-                for i in range(12345)
-            ],
-            "ld": [
-                [dec(i), dec(i / 2)] if i % 5 else [] for i in range(12345)
-            ],
-        },
-        # variable list lengths
-        {
-            "ls": [[str(i)] * i for i in range(123)],
-            "li": [[i, i * i] * i for i in range(123)],
-            "ld": [[dec(i), dec(i / 2)] * i for i in range(123)],
-        },
-        # many child elements (more that max_stripe_rows)
-        {"li": [[i] * 1100 for i in range(11000)]},
-    ],
-)
-def test_orc_writer_lists(data):
-    buffer = BytesIO()
-    cudf.DataFrame(data).to_orc(
-        buffer, stripe_size_rows=2048, row_index_stride=512
-    )
-    # Read in as pandas but compare with pyarrow
-    # since pandas doesn't have a list type
-    pa_out = pa.Table.from_pandas(pd.read_orc(buffer))
-    pa_in = pa.table(data)
-    assert pa_out.equals(pa_in)
-
-
-def test_chunked_orc_writer_lists():
-    num_rows = 12345
-    pdf_in = pd.DataFrame(
-        {
-            "ls": [[str(i), str(2 * i)] for i in range(num_rows)],
-            "ld": [[dec(i / 2)] * 5 for i in range(num_rows)],
-        }
-    )
-
-    gdf = cudf.from_pandas(pdf_in)
-    expect = pd.concat([pdf_in, pdf_in]).reset_index(drop=True)
-
-    buffer = BytesIO()
-    writer = ORCWriter(buffer)
-    writer.write_table(gdf)
-    writer.write_table(gdf)
-    writer.close()
-
-    got = pd.read_orc(buffer)
-    assert_eq(expect, got)
-
-
-def test_writer_timestamp_stream_size(datadir, tmpdir):
-    pdf_fname = datadir / "TestOrcFile.largeTimestamps.orc"
-    gdf_fname = tmpdir.join("gdf.orc")
-
-    expect = pd.read_orc(pdf_fname)
-    cudf.from_pandas(expect).to_orc(gdf_fname.strpath)
-    got = pd.read_orc(gdf_fname)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "fname",
-    [
-        "TestOrcFile.NoIndStrm.StructWithNoNulls.orc",
-        "TestOrcFile.NoIndStrm.StructAndIntWithNulls.orc",
-        "TestOrcFile.NoIndStrm.StructAndIntWithNulls.TwoStripes.orc",
-        "TestOrcFile.NoIndStrm.IntWithNulls.orc",
-    ],
-)
-def test_no_row_group_index_orc_read(datadir, fname):
-    from pyarrow import orc
-
-    fpath = datadir / fname
-
-    expect = orc.ORCFile(fpath).read()
-    got = cudf.read_orc(fpath)
-
-    assert expect.equals(got.to_arrow())
-
-
-def test_names_in_struct_dtype_nesting(datadir):
-    from pyarrow import orc
-
-    fname = datadir / "TestOrcFile.NestedStructDataFrame.orc"
-
-    expect = orc.ORCFile(fname).read()
-    got = cudf.read_orc(fname)
-
-    # test dataframes
-    assert expect.equals(got.to_arrow())
-
-    edf = cudf.DataFrame(expect.to_pandas())
-    # test schema
-    assert edf.dtypes.equals(got.dtypes)
-
-
-def test_writer_lists_structs(list_struct_buff):
-    from pyarrow import orc
-
-    df_in = cudf.read_orc(list_struct_buff)
-
-    buff = BytesIO()
-    df_in.to_orc(buff)
-
-    pyarrow_tbl = orc.ORCFile(buff).read()
-
-    assert pyarrow_tbl.equals(df_in.to_arrow())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "with_pd": [
-                [i if i % 3 else None] if i < 9999 or i > 20001 else None
-                for i in range(21000)
-            ],
-            "no_pd": [
-                [i if i % 3 else None] if i < 9999 or i > 20001 else []
-                for i in range(21000)
-            ],
-        },
-    ],
-)
-def test_orc_writer_lists_empty_rg(data):
-    pdf_in = pd.DataFrame(data)
-    buffer = BytesIO()
-    cudf_in = cudf.from_pandas(pdf_in)
-
-    cudf_in.to_orc(buffer)
-
-    df = cudf.read_orc(buffer)
-    assert_eq(df, cudf_in)
-
-    pdf_out = pd.read_orc(buffer)
-    assert_eq(pdf_in, pdf_out)
-
-
-def test_statistics_sum_overflow():
-    maxint64 = np.iinfo(np.int64).max
-    minint64 = np.iinfo(np.int64).min
-
-    buff = BytesIO()
-    df = pd.DataFrame(
-        {"a": [maxint64, 1], "b": [minint64, -1], "c": [minint64, 1]}
-    )
-    df.to_orc(buff)
-
-    file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
-    assert file_stats[0]["a"].get("sum") is None
-    assert file_stats[0]["b"].get("sum") is None
-    assert file_stats[0]["c"].get("sum") == minint64 + 1
-
-    assert stripe_stats[0]["a"].get("sum") is None
-    assert stripe_stats[0]["b"].get("sum") is None
-    assert stripe_stats[0]["c"].get("sum") == minint64 + 1
-
-
-def test_empty_statistics():
-    from pyarrow import orc
-
-    buff = BytesIO()
-    pa_table = pa.Table.from_arrays(
-        [
-            pa.array([None], type=pa.int64()),
-            pa.array([None], type=pa.float64()),
-            pa.array([None], type=pa.string()),
-            pa.array([None], type=pa.decimal128(11, 2)),
-            pa.array([None], type=pa.timestamp("ns")),
-            pa.array([None], type=pa.date64()),
-            pa.array([None], type=pa.bool_()),
-            pa.array([None], type=pa.binary()),
-            pa.array([1], type=pa.int64()),
-        ],
-        ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
-    )
-    orc.write_table(pa_table, buff)
-
-    got = cudf.io.orc.read_orc_statistics([buff])
-
-    # Check for both file and stripe stats
-    for stats in got:
-        # Similar expected stats for the first 6 columns in this case
-        for col_name in ascii_lowercase[:6]:
-            assert stats[0][col_name].number_of_values == 0
-            assert stats[0][col_name].has_null is True
-            assert stats[0][col_name].get("minimum") is None
-            assert stats[0][col_name].get("maximum") is None
-        for col_name in ascii_lowercase[:3]:
-            assert stats[0][col_name].get("sum") == 0
-        # Sum for decimal column is a string
-        assert stats[0]["d"].get("sum") == "0"
-
-        assert stats[0]["g"].number_of_values == 0
-        assert stats[0]["g"].has_null is True
-        assert stats[0]["g"].get("true_count") == 0
-        assert stats[0]["g"].get("false_count") == 0
-
-        assert stats[0]["h"].number_of_values == 0
-        assert stats[0]["h"].has_null is True
-        assert stats[0]["h"].get("sum") == 0
-
-        assert stats[0]["i"].number_of_values == 1
-        assert stats[0]["i"].has_null is False
-        assert stats[0]["i"].get("minimum") == 1
-        assert stats[0]["i"].get("maximum") == 1
-        assert stats[0]["i"].get("sum") == 1
-
-
-@pytest.mark.parametrize(
-    "equivalent_columns",
-    [
-        (["lvl1_struct.a", "lvl1_struct.b"], ["lvl1_struct"]),
-        (["lvl1_struct", "lvl1_struct.a"], ["lvl1_struct"]),
-        (["lvl1_struct.a", "lvl1_struct"], ["lvl1_struct"]),
-        (["lvl1_struct.b", "lvl1_struct.a"], ["lvl1_struct.b", "lvl1_struct"]),
-        (["lvl2_struct.lvl1_struct", "lvl2_struct"], ["lvl2_struct"]),
-        (
-            ["lvl2_struct.a", "lvl2_struct.lvl1_struct.c", "lvl2_struct"],
-            ["lvl2_struct"],
-        ),
-    ],
-)
-def test_select_nested(list_struct_buff, equivalent_columns):
-    # The two column selections should be equivalent
-    df_cols1 = cudf.read_orc(list_struct_buff, columns=equivalent_columns[0])
-    df_cols2 = cudf.read_orc(list_struct_buff, columns=equivalent_columns[1])
-    assert_eq(df_cols1, df_cols2)
-
-
-def test_orc_writer_rle_stream_size(datadir, tmpdir):
-    from pyarrow import orc
-
-    original = datadir / "TestOrcFile.int16.rle.size.orc"
-    reencoded = tmpdir.join("int16_map.orc")
-
-    df = cudf.read_orc(original)
-    df.to_orc(reencoded)
-
-    # Segfaults when RLE stream sizes don't account for varint length
-    pa_out = orc.ORCFile(reencoded).read()
-    assert df.to_arrow().equals(pa_out)
-
-
-def test_empty_columns():
-    buffer = BytesIO()
-    # string and decimal columns have additional steps that need to be skipped
-    expected = cudf.DataFrame(
-        {
-            "string": cudf.Series([], dtype="str"),
-            "decimal": cudf.Series([], dtype=cudf.Decimal64Dtype(10, 1)),
-        }
-    )
-    expected.to_orc(buffer, compression="snappy")
-
-    got_df = cudf.read_orc(buffer)
-    assert_eq(expected, got_df)
-
-
-def test_orc_reader_zstd_compression(list_struct_buff):
-    from pyarrow import orc
-
-    expected = cudf.read_orc(list_struct_buff)
-    # save with ZSTD compression
-    buffer = BytesIO()
-    pyarrow_tbl = orc.ORCFile(list_struct_buff).read()
-    with orc.ORCWriter(buffer, compression="zstd") as writer:
-        writer.write(pyarrow_tbl)
-    got = cudf.read_orc(buffer)
-    # compare with pyarrow since pandas doesn't
-    # have a list or struct
-    assert expected.to_arrow().equals(got.to_arrow())
-
-
-def test_writer_protobuf_large_rowindexentry():
-    s = [
-        "Length of the two strings needs to add up to at least ~120",
-        "So that the encoded statistics are larger than 128 bytes",
-    ] * 5001  # generate more than 10K rows to have two row groups
-    df = cudf.DataFrame({"s1": s})
-
-    buff = BytesIO()
-    df.to_orc(buff)
-
-    got = cudf.read_orc(buff)
-    assert_frame_equal(df, got)
-
-
-@pytest.mark.parametrize("compression", ["ZLIB", "ZSTD"])
-def test_orc_writer_nvcomp(compression):
-    expected = cudf.datasets.randomdata(
-        nrows=12345, dtypes={"a": int, "b": str, "c": float}, seed=1
-    )
-
-    buff = BytesIO()
-    try:
-        expected.to_orc(buff, compression=compression)
-    except RuntimeError:
-        pytest.mark.xfail(reason="Newer nvCOMP version is required")
-    else:
-        got = pd.read_orc(buff)
-        assert_eq(expected, got)
-
-
-def run_orc_columns_and_index_param(index_obj, index, columns):
-    buffer = BytesIO()
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=index_obj
-    )
-    df.to_orc(buffer, index=index)
-
-    expected = pd.read_orc(buffer, columns=columns)
-    got = cudf.read_orc(buffer, columns=columns)
-
-    assert_eq(expected, got, check_index_type=True)
-
-
-@pytest.mark.parametrize("index_obj", [None, [10, 11, 12], ["x", "y", "z"]])
-@pytest.mark.parametrize("index", [True, False, None])
-@pytest.mark.parametrize(
-    "columns",
-    [
-        None,
-        pytest.param(
-            [],
-            marks=pytest.mark.skipif(
-                PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-                reason="Bug in older version of pandas",
-            ),
-        ),
-    ],
-)
-def test_orc_columns_and_index_param(index_obj, index, columns):
-    run_orc_columns_and_index_param(index_obj, index, columns)
-
-
-@pytest.mark.parametrize(
-    "columns,index,index_obj",
-    [
-        (
-            ["a", "b"],
-            True,
-            None,
-        ),
-        (
-            ["a", "b"],
-            True,
-            [10, 11, 12],
-        ),
-        (
-            ["a", "b"],
-            True,
-            ["x", "y", "z"],
-        ),
-        (
-            ["a", "b"],
-            None,
-            [10, 11, 12],
-        ),
-        (
-            ["a", "b"],
-            None,
-            ["x", "y", "z"],
-        ),
-    ],
-)
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12026")
-def test_orc_columns_and_index_param_read_index(index_obj, index, columns):
-    run_orc_columns_and_index_param(index_obj, index, columns)
-
-
-@pytest.mark.parametrize(
-    "columns,index,index_obj",
-    [
-        (["a", "b"], False, None),
-        (["a", "b"], False, [10, 11, 12]),
-        (["a", "b"], False, ["x", "y", "z"]),
-        (["a", "b"], None, None),
-    ],
-)
-def test_orc_columns_and_index_param_no_read_index(index_obj, index, columns):
-    run_orc_columns_and_index_param(index_obj, index, columns)
-
-
-@pytest.mark.parametrize(
-    "df_data,cols_as_map_type,expected_data",
-    [
-        (
-            {"a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]]},
-            ["a"],
-            {"a": [[(10, 20)], [(1, 21)]]},
-        ),
-        (
-            {
-                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
-                "b": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
-            },
-            ["b"],
-            {
-                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
-                "b": [[(10, 20)], [(1, 21)]],
-            },
-        ),
-        (
-            {
-                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
-                "b": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
-                "c": [
-                    [{"a": {"a": 10}, "b": 20}],
-                    [{"a": {"a": 12}, "b": 21}],
-                ],
-            },
-            ["b", "c"],
-            {
-                "a": [[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]],
-                "b": [[(10, 20)], [(1, 21)]],
-                "c": [[({"a": 10}, 20)], [({"a": 12}, 21)]],
-            },
-        ),
-    ],
-)
-def test_orc_writer_cols_as_map_type(df_data, cols_as_map_type, expected_data):
-    df = cudf.DataFrame(df_data)
-    buffer = BytesIO()
-    df.to_orc(buffer, cols_as_map_type=cols_as_map_type)
-
-    got = pd.read_orc(buffer)
-    expected = pd.DataFrame(expected_data)
-
-    assert_eq(got, expected)
-
-
-def test_orc_writer_cols_as_map_type_error():
-    df = cudf.DataFrame(
-        {"a": cudf.Series([[{"a": 10, "b": 20}], [{"a": 1, "b": 21}]])}
-    )
-    buffer = BytesIO()
-    with pytest.raises(
-        TypeError, match="cols_as_map_type must be a list of column names."
-    ):
-        df.to_orc(buffer, cols_as_map_type=1)
-
-
-@pytest.fixture
-def negative_timestamp_df():
-    return cudf.DataFrame(
-        {
-            "a": [
-                pd.Timestamp("1969-12-31 23:59:59.000123"),
-                pd.Timestamp("1969-12-31 23:59:58.000999"),
-                pd.Timestamp("1969-12-31 23:59:58.001001"),
-                pd.Timestamp("1839-12-24 03:58:56.000826"),
-            ]
-        }
-    )
-
-
-@pytest.mark.parametrize("engine", ["cudf", "pyarrow"])
-def test_orc_reader_negative_timestamp(negative_timestamp_df, engine):
-    buffer = BytesIO()
-    negative_timestamp_df.to_orc(buffer)
-
-    # We warn the user that this function will fall back to the CPU for reading
-    # when the engine is pyarrow.
-    with expect_warning_if(engine == "pyarrow", UserWarning):
-        got = cudf.read_orc(buffer, engine=engine)
-
-    assert_eq(negative_timestamp_df, got, check_dtype=False)
-
-
-def test_orc_writer_negative_timestamp(negative_timestamp_df):
-    from pyarrow import orc
-
-    buffer = BytesIO()
-    negative_timestamp_df.to_orc(buffer)
-
-    assert_eq(negative_timestamp_df, pd.read_orc(buffer), check_dtype=False)
-    assert_eq(
-        negative_timestamp_df, orc.ORCFile(buffer).read(), check_dtype=False
-    )
-
-
-@pytest.mark.skip(
-    reason="Bug specific to rockylinux8: https://github.com/rapidsai/cudf/issues/15802",
-)
-def test_orc_reader_apache_negative_timestamp(datadir):
-    path = datadir / "TestOrcFile.apache_timestamp.orc"
-
-    pdf = pd.read_orc(path)
-    gdf = cudf.read_orc(path)
-
-    assert_eq(pdf, gdf)
-
-
-def test_statistics_string_sum():
-    strings = ["a string", "another string!"]
-    buff = BytesIO()
-    df = cudf.DataFrame({"str": strings})
-    df.to_orc(buff)
-
-    file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
-    assert_eq(file_stats[0]["str"].get("sum"), sum(len(s) for s in strings))
-
-
-@pytest.mark.parametrize(
-    "fname",
-    [
-        "TestOrcFile.Hive.OneEmptyMap.orc",
-        "TestOrcFile.Hive.OneEmptyList.orc",
-        "TestOrcFile.Hive.OneNullStruct.orc",
-        "TestOrcFile.Hive.EmptyListStripe.orc",
-        "TestOrcFile.Hive.NullStructStripe.orc",
-        "TestOrcFile.Hive.AllNulls.orc",
-    ],
-)
-def test_reader_empty_stripe(datadir, fname):
-    path = datadir / fname
-
-    expected = pd.read_orc(path)
-    got = cudf.read_orc(path)
-    assert_eq(expected, got)
-
-
-# needs enough data for multiple row groups
-@pytest.mark.parametrize("data", [["*"] * 10001, ["**", None] * 5001])
-def test_reader_row_index_order(data):
-    expected = cudf.DataFrame({"str": data}, dtype="string")
-
-    buffer = BytesIO()
-    expected.to_pandas().to_orc(buffer)
-    got = cudf.read_orc(buffer)
-    assert_eq(expected, got)
-
-
-# Test the corner case where empty blocks are compressed
-# Decompressed data size is zero, even though compressed data size is non-zero
-# For more information see https://github.com/rapidsai/cudf/issues/13608
-def test_orc_reader_empty_decomp_data(datadir):
-    path = datadir / "TestOrcFile.Spark.EmptyDecompData.orc"
-
-    expect = pd.read_orc(path)
-    got = cudf.read_orc(path)
-
-    assert_eq(expect, got)
-
-
-def test_orc_reader_empty_deeply_nested_level(datadir):
-    # Test the case where top level struct has nulls, but the nested struct is
-    # not nullable. In this case there is no data in the second level, but we
-    # still need to pass the parent null mask to the third level.
-    path = datadir / "TestOrcFile.Spark.NestedNotNullableStruct.orc"
-
-    expect = pd.read_orc(path)
-    got = cudf.read_orc(path)
-
-    assert_eq(expect, got)
-
-
-def test_orc_chunked_writer_stripe_size(datadir):
-    from pyarrow import orc
-
-    df = cudf.DataFrame({"col": gen_rand_series("int", 100000)})
-
-    buffer = BytesIO()
-    writer = ORCWriter(buffer, stripe_size_bytes=64 * 1024)
-    writer.write_table(df)
-    writer.close()
-
-    orc_file = orc.ORCFile(buffer)
-    assert_eq(orc_file.nstripes, 10)
-
-    buffer = BytesIO()
-    writer = ORCWriter(buffer, stripe_size_rows=20000)
-    writer.write_table(df)
-    writer.close()
-
-    orc_file = orc.ORCFile(buffer)
-    assert_eq(orc_file.nstripes, 5)
-
-
-def test_reader_lz4():
-    from pyarrow import orc
-
-    pdf = pd.DataFrame({"ints": [1, 2] * 5001})
-    pa_table = pa.Table.from_pandas(pdf)
-
-    buffer = BytesIO()
-    writer = orc.ORCWriter(buffer, compression="LZ4")
-    writer.write(pa_table)
-    writer.close()
-
-    got = cudf.read_orc(buffer)
-    assert_eq(pdf, got)
-
-
-def test_writer_lz4():
-    gdf = cudf.DataFrame({"ints": [1, 2] * 5001})
-
-    buffer = BytesIO()
-    gdf.to_orc(buffer, compression="LZ4")
-
-    got = pd.read_orc(buffer)
-    assert_eq(gdf, got)
-
-
-def test_row_group_alignment(datadir):
-    path = datadir / "TestOrcFile.MapManyNulls.parquet"
-
-    expected = cudf.read_parquet(path)
-
-    buffer = BytesIO()
-    expected.to_orc(buffer)
-
-    got = cudf.read_orc(buffer)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "inputfile",
-    [
-        # These sample data have a single column my_timestamp of the TIMESTAMP type,
-        # 2660 rows, and 1536 rows per row group.
-        "TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc",
-        "TestOrcFile.timestamp.desynced.snappy.RLEv2.orc",
-        # These two data are the same with the above, except that every 100 rows start
-        # with a null value.
-        "TestOrcFile.timestamp.desynced.uncompressed.RLEv2.hasNull.orc",
-        "TestOrcFile.timestamp.desynced.snappy.RLEv2.hasNull.orc",
-    ],
-)
-def test_orc_reader_desynced_timestamp(datadir, inputfile):
-    # Test a special case where the DATA stream (second) in a TIMESTAMP column
-    # is progressed faster than the SECONDARY stream (nanosecond) at the start of a row
-    # group. In this case, the "run cache manager" in the decoder kernel is used to
-    # orchestrate the dual-stream processing.
-    # For more information, see https://github.com/rapidsai/cudf/issues/17155.
-
-    path = datadir / inputfile
-
-    expect = pd.read_orc(path)
-    got = cudf.read_orc(path)
-
-    assert_frame_equal(cudf.from_pandas(expect), got)
-
-
-@pytest.mark.parametrize("compression", ["LZ4", "SNAPPY", "ZLIB", "ZSTD"])
-def test_orc_decompression(set_decomp_env_vars, compression, non_nested_pdf):
-    # Write the DataFrame to a Parquet file
-    buffer = BytesIO()
-    non_nested_pdf.to_orc(buffer, engine_kwargs={"compression": compression})
-
-    # Read the Parquet file back into a DataFrame
-    got = cudf.read_orc(buffer)
-
-    assert_eq(non_nested_pdf, got)
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
deleted file mode 100644
index 2ab48f7ecce..00000000000
--- a/python/cudf/cudf/tests/test_parquet.py
+++ /dev/null
@@ -1,4601 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import datetime
-import decimal
-import glob
-import hashlib
-import math
-import os
-import pathlib
-import random
-import string
-from contextlib import contextmanager
-from io import BytesIO
-from string import ascii_letters
-
-import cupy
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-from fsspec.core import get_fs_token_paths
-from packaging import version
-from pyarrow import parquet as pq
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.io.parquet import (
-    ParquetDatasetWriter,
-    ParquetWriter,
-    merge_parquet_filemetadata,
-)
-from cudf.testing import assert_eq, dataset_generator as dg
-from cudf.testing._utils import TIMEDELTA_TYPES, set_random_null_mask_inplace
-
-
-@contextmanager
-def _hide_pyarrow_parquet_cpu_warnings(engine):
-    if engine == "pyarrow":
-        with pytest.warns(
-            UserWarning,
-            match="Using CPU via PyArrow to read Parquet dataset. This option "
-            "is both inefficient and unstable!",
-        ):
-            yield
-    else:
-        yield
-
-
-@pytest.fixture(scope="module")
-def datadir(datadir):
-    return datadir / "parquet"
-
-
-@pytest.fixture(params=[1, 5, 10, 100])
-def simple_pdf(request):
-    rng = np.random.default_rng(seed=0)
-    types = [
-        "bool",
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-        "uint8",
-        "uint16",
-        # "uint32", pandas promotes uint32 to int64
-        # https://issues.apache.org/jira/browse/ARROW-9215
-        "uint64",
-        "float32",
-        "float64",
-    ]
-    nrows = request.param
-
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = pd.DataFrame(
-        {
-            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
-            for typ in types
-        },
-        # Need to ensure that this index is not a RangeIndex to get the
-        # expected round-tripping behavior from Parquet reader/writer.
-        index=pd.Index(list(range(nrows))),
-    )
-    # Delete the name of the column index, and rename the row index
-    test_pdf.columns.name = None
-    test_pdf.index.name = "test_index"
-
-    return test_pdf
-
-
-@pytest.fixture
-def simple_gdf(simple_pdf):
-    return cudf.DataFrame.from_pandas(simple_pdf)
-
-
-def build_pdf(num_columns, day_resolution_timestamps):
-    rng = np.random.default_rng(seed=0)
-    types = [
-        "bool",
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-        "uint8",
-        "uint16",
-        # "uint32", pandas promotes uint32 to int64
-        # https://issues.apache.org/jira/browse/ARROW-9215
-        "uint64",
-        "float32",
-        "float64",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "str",
-    ]
-    nrows = num_columns.param
-
-    # Create a pandas dataframe with random data of mixed types
-    test_pdf = pd.DataFrame(
-        {
-            f"col_{typ}": rng.integers(0, nrows, nrows).astype(typ)
-            for typ in types
-        },
-        # Need to ensure that this index is not a RangeIndex to get the
-        # expected round-tripping behavior from Parquet reader/writer.
-        index=pd.Index(list(range(nrows))),
-    )
-    # Delete the name of the column index, and rename the row index
-    test_pdf.columns.name = None
-    test_pdf.index.name = "test_index"
-
-    # make datetime64's a little more interesting by increasing the range of
-    # dates note that pandas will convert these to ns timestamps, so care is
-    # taken to avoid overflowing a ns timestamp. There is also the ability to
-    # request timestamps be whole days only via `day_resolution_timestamps`.
-    for t in [
-        {
-            "name": "datetime64[ms]",
-            "nsDivisor": 1000000,
-            "dayModulus": 86400000,
-        },
-        {
-            "name": "datetime64[us]",
-            "nsDivisor": 1000,
-            "dayModulus": 86400000000,
-        },
-    ]:
-        data = [
-            rng.integers(0, (0x7FFFFFFFFFFFFFFF / t["nsDivisor"]))
-            for i in range(nrows)
-        ]
-        if day_resolution_timestamps:
-            data = [int(d / t["dayModulus"]) * t["dayModulus"] for d in data]
-        test_pdf["col_" + t["name"]] = pd.Series(
-            np.asarray(data, dtype=t["name"])
-        )
-
-    # Create non-numeric categorical data otherwise parquet may typecast it
-    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
-    test_pdf["col_category"] = pd.Series(data, dtype="category")
-
-    # Create non-numeric str data
-    data = [ascii_letters[rng.integers(0, 52)] for i in range(nrows)]
-    test_pdf["col_str"] = pd.Series(data, dtype="str")
-
-    return test_pdf
-
-
-@pytest.fixture(params=[0, 1, 10, 100])
-def pdf(request):
-    return build_pdf(request, False)
-
-
-@pytest.fixture(params=[0, 1, 10, 100])
-def pdf_day_timestamps(request):
-    return build_pdf(request, True)
-
-
-@pytest.fixture
-def gdf(pdf):
-    return cudf.DataFrame.from_pandas(pdf)
-
-
-@pytest.fixture
-def gdf_day_timestamps(pdf_day_timestamps):
-    return cudf.DataFrame.from_pandas(pdf_day_timestamps)
-
-
-@pytest.fixture(params=["snappy", "gzip", "brotli", None, np.str_("snappy")])
-def parquet_file(request, tmp_path_factory, pdf):
-    fname = tmp_path_factory.mktemp("parquet") / (
-        str(request.param) + "_test.parquet"
-    )
-    pdf.to_parquet(fname, engine="pyarrow", compression=request.param)
-    return fname
-
-
-def make_pdf(nrows, ncolumns=1, nvalids=0, dtype=np.int64):
-    test_pdf = pd.DataFrame(
-        [list(range(ncolumns * i, ncolumns * (i + 1))) for i in range(nrows)],
-        columns=pd.Index(["foo"], name="bar"),
-        # Need to ensure that this index is not a RangeIndex to get the
-        # expected round-tripping behavior from Parquet reader/writer.
-        index=pd.Index(list(range(nrows))),
-    )
-    test_pdf.columns.name = None
-
-    if nvalids:
-        # Randomly but reproducibly mark subset of rows as invalid
-        random.seed(1337)
-        mask = random.sample(range(nrows), nvalids)
-        test_pdf[test_pdf.index.isin(mask)] = np.nan
-    if dtype:
-        test_pdf = test_pdf.astype(dtype)
-
-    return test_pdf
-
-
-@pytest.fixture
-def parquet_path_or_buf(datadir):
-    fname = datadir / "spark_timestamp.snappy.parquet"
-    try:
-        with open(fname, "rb") as f:
-            buffer = BytesIO(f.read())
-    except Exception as excpr:
-        if type(excpr).__name__ == "FileNotFoundError":
-            pytest.skip(".parquet file is not found")
-        raise excpr
-
-    def _make_parquet_path_or_buf(src):
-        if src == "filepath":
-            return str(fname)
-        if src == "pathobj":
-            return fname
-        if src == "bytes_io":
-            return buffer
-        if src == "bytes":
-            return buffer.getvalue()
-        if src == "url":
-            return fname.as_uri()
-
-        raise ValueError("Invalid source type")
-
-    yield _make_parquet_path_or_buf
-
-
-@pytest.fixture(scope="module")
-def large_int64_gdf():
-    return cudf.DataFrame.from_pandas(pd.DataFrame({"col": range(0, 1 << 20)}))
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["pyarrow", "cudf"])
-@pytest.mark.parametrize(
-    "columns",
-    [
-        ["col_int8"],
-        ["col_category"],
-        ["col_int32", "col_float32"],
-        ["col_int16", "col_float64", "col_int8"],
-        None,
-    ],
-)
-def test_parquet_reader_basic(parquet_file, columns, engine):
-    expect = pd.read_parquet(parquet_file, columns=columns)
-    got = cudf.read_parquet(parquet_file, engine=engine, columns=columns)
-
-    # PANDAS returns category objects whereas cuDF returns hashes
-    if engine == "cudf":
-        if "col_category" in expect.columns:
-            expect = expect.drop(columns=["col_category"])
-        if "col_category" in got.columns:
-            got = got.drop(columns=["col_category"])
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-@pytest.mark.parametrize("engine", ["cudf"])
-def test_parquet_reader_empty_pandas_dataframe(tmpdir, engine):
-    df = pd.DataFrame()
-    fname = tmpdir.join("test_pq_reader_empty_pandas_dataframe.parquet")
-    df.to_parquet(fname)
-    assert os.path.exists(fname)
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname, engine=engine)
-    expect = expect.reset_index(drop=True)
-    got = got.reset_index(drop=True)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("has_null", [False, True])
-def test_parquet_reader_strings(tmpdir, has_null):
-    df = pd.DataFrame(
-        [(1, "aaa", 9.0), (2, "bbb", 8.0), (3, "ccc", 7.0)],
-        columns=pd.Index(list("abc")),
-    )
-    if has_null:
-        df.at[1, "b"] = None
-    fname = tmpdir.join("test_pq_reader_strings.parquet")
-    df.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    gdf = cudf.read_parquet(fname, engine="cudf")
-
-    assert gdf["b"].dtype == np.dtype("object")
-    assert_eq(gdf["b"], df["b"])
-
-
-@pytest.mark.parametrize("columns", [None, ["b"]])
-@pytest.mark.parametrize("index_col", ["b", "Nameless", None])
-def test_parquet_reader_index_col(tmpdir, index_col, columns):
-    df = pd.DataFrame({"a": range(3), "b": range(3, 6), "c": range(6, 9)})
-
-    if index_col is None:
-        # No index column
-        df.reset_index(drop=True, inplace=True)
-    elif index_col == "Nameless":
-        # Index column but no name
-        df.set_index("a", inplace=True)
-        df.index.name = None
-    else:
-        # Index column as normal
-        df.set_index(index_col, inplace=True)
-
-    fname = tmpdir.join("test_pq_reader_index_col.parquet")
-
-    # PANDAS' PyArrow backend always writes the index unless disabled
-    df.to_parquet(fname, index=(index_col is not None))
-    assert os.path.exists(fname)
-
-    pdf = pd.read_parquet(fname, columns=columns)
-    gdf = cudf.read_parquet(fname, engine="cudf", columns=columns)
-
-    assert_eq(pdf, gdf, check_categorical=False)
-
-
-@pytest.mark.parametrize("pandas_compat", [True, False])
-@pytest.mark.parametrize(
-    "columns", [["a"], ["d"], ["a", "b"], ["a", "d"], None]
-)
-def test_parquet_reader_pandas_metadata(tmpdir, columns, pandas_compat):
-    df = pd.DataFrame(
-        {
-            "a": range(6, 9),
-            "b": range(3, 6),
-            "c": range(6, 9),
-            "d": ["abc", "def", "xyz"],
-        }
-    )
-    df.set_index("b", inplace=True)
-
-    fname = tmpdir.join("test_pq_reader_pandas_metadata.parquet")
-    df.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    # PANDAS `read_parquet()` and PyArrow `read_pandas()` always includes index
-    # Instead, directly use PyArrow to optionally omit the index
-    expect = pa.parquet.read_table(
-        fname, columns=columns, use_pandas_metadata=pandas_compat
-    ).to_pandas()
-    got = cudf.read_parquet(
-        fname, columns=columns, use_pandas_metadata=pandas_compat
-    )
-
-    if pandas_compat or columns is None or "b" in columns:
-        assert got.index.name == "b"
-    else:
-        assert got.index.name is None
-    assert_eq(expect, got, check_categorical=False)
-
-
-@pytest.mark.parametrize("pandas_compat", [True, False])
-@pytest.mark.parametrize("as_bytes", [True, False])
-def test_parquet_range_index_pandas_metadata(tmpdir, pandas_compat, as_bytes):
-    df = pd.DataFrame(
-        {"a": range(6, 9), "b": ["abc", "def", "xyz"]},
-        index=pd.RangeIndex(3, 6, 1, name="c"),
-    )
-
-    fname = tmpdir.join("test_parquet_range_index_pandas_metadata")
-    df.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    # PANDAS `read_parquet()` and PyArrow `read_pandas()` always includes index
-    # Instead, directly use PyArrow to optionally omit the index
-    expect = pa.parquet.read_table(
-        fname, use_pandas_metadata=pandas_compat
-    ).to_pandas()
-    if as_bytes:
-        # Make sure we can handle RangeIndex parsing
-        # in pandas when the input is `bytes`
-        with open(fname, "rb") as f:
-            got = cudf.read_parquet(
-                f.read(), use_pandas_metadata=pandas_compat
-            )
-    else:
-        got = cudf.read_parquet(fname, use_pandas_metadata=pandas_compat)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_read_metadata(tmp_path, pdf):
-    if len(pdf) > 100:
-        pytest.skip("Skipping long setup test")
-
-    def num_row_groups(rows, group_size):
-        return max(1, (rows + (group_size - 1)) // group_size)
-
-    fname = tmp_path / "metadata.parquet"
-    row_group_size = 5
-    pdf.to_parquet(fname, compression="snappy", row_group_size=row_group_size)
-
-    (
-        num_rows,
-        row_groups,
-        col_names,
-        num_columns,
-        _,  # rowgroup_metadata
-    ) = cudf.io.read_parquet_metadata(fname)
-
-    assert num_columns == len(pdf.columns)
-    assert num_rows == len(pdf.index)
-    assert row_groups == num_row_groups(num_rows, row_group_size)
-    for a, b in zip(col_names, pdf.columns, strict=True):
-        assert a == b
-
-
-def test_parquet_read_filtered(set_decomp_env_vars, tmpdir):
-    # Generate data
-    fname = tmpdir.join("filtered.parquet")
-    dg.generate(
-        fname,
-        dg.Parameters(
-            num_rows=100,
-            column_parameters=[
-                dg.ColumnParameters(
-                    cardinality=40,
-                    null_frequency=0.05,
-                    generator=lambda: [
-                        "".join(
-                            random.sample(
-                                string.ascii_letters, random.randint(4, 8)
-                            )
-                        )
-                        for _ in range(10)
-                    ],
-                    is_sorted=False,
-                ),
-                dg.ColumnParameters(
-                    40,
-                    0.2,
-                    lambda: np.random.default_rng(seed=0).integers(
-                        0, 100, size=10
-                    ),
-                    True,
-                ),
-            ],
-            seed=42,
-        ),
-        format={"name": "parquet", "row_group_size": 10},
-        use_threads=False,
-    )
-
-    # Get dataframes to compare
-    df = cudf.read_parquet(fname)
-    df_filtered = cudf.read_parquet(fname, filters=[("1", ">", 60)])
-    # PyArrow's read_table function does row-group-level filtering in addition
-    # to applying given filters once the table has been read into memory.
-    # Because of this, we aren't using PyArrow as a reference for testing our
-    # row-group selection method since the only way to only select row groups
-    # with PyArrow is with the method we use and intend to test.
-    tbl_filtered = pq.read_table(fname, filters=[("1", ">", 60)])
-
-    assert_eq(cudf.io.read_parquet_metadata(fname)[1], 10)
-    assert len(df_filtered) < len(df)
-    assert len(tbl_filtered) <= len(df_filtered)
-
-
-def test_parquet_read_filtered_everything(tmpdir):
-    # Generate data
-    fname = tmpdir.join("filtered_everything.parquet")
-    df = pd.DataFrame({"x": range(10), "y": list("aabbccddee")})
-    df.to_parquet(fname, row_group_size=2)
-
-    # Check filter
-    df_filtered = cudf.read_parquet(fname, filters=[("x", "==", 12)])
-    assert_eq(len(df_filtered), 0)
-    assert_eq(df_filtered["x"].dtype, "int64")
-    assert_eq(df_filtered["y"].dtype, "object")
-
-
-def test_parquet_read_filtered_multiple_files(tmpdir):
-    # Generate data
-    fname_0 = tmpdir.join("filtered_multiple_files_0.parquet")
-    df = pd.DataFrame({"x": range(10), "y": list("aabbccddee")})
-    df.to_parquet(fname_0, row_group_size=2)
-    fname_1 = tmpdir.join("filtered_multiple_files_1.parquet")
-    df = pd.DataFrame({"x": range(10), "y": list("aaccccddee")})
-    df.to_parquet(fname_1, row_group_size=2)
-    fname_2 = tmpdir.join("filtered_multiple_files_2.parquet")
-    df = pd.DataFrame(
-        {"x": [0, 1, 9, 9, 4, 5, 6, 7, 8, 9], "y": list("aabbzzddee")}
-    )
-    df.to_parquet(fname_2, row_group_size=2)
-
-    # Check filter
-    filtered_df = cudf.read_parquet(
-        [fname_0, fname_1, fname_2], filters=[("x", "==", 2)]
-    )
-    assert_eq(
-        filtered_df,
-        cudf.DataFrame({"x": [2, 2], "y": list("bc")}, index=[2, 2]),
-    )
-
-
-@pytest.mark.parametrize(
-    "predicate,expected_len",
-    [
-        ([[("x", "==", 0)], [("z", "==", 0)]], 2),
-        ([("x", "==", 0), ("z", "==", 0)], 0),
-        ([("x", "==", 0), ("z", "!=", 0)], 1),
-        ([("y", "==", "c"), ("x", ">", 8)], 0),
-        ([("y", "==", "c"), ("x", ">=", 5)], 1),
-        ([[("y", "==", "c")], [("x", "<", 3)]], 5),
-        ([[("x", "not in", (0, 9)), ("z", "not in", (4, 5))]], 6),
-        ([[("y", "==", "c")], [("x", "in", (0, 9)), ("z", "in", (0, 9))]], 4),
-        ([[("x", "==", 0)], [("x", "==", 1)], [("x", "==", 2)]], 3),
-        ([[("x", "==", 0), ("z", "==", 9), ("y", "==", "a")]], 1),
-    ],
-)
-def test_parquet_read_filtered_complex_predicate(
-    tmpdir, predicate, expected_len
-):
-    # Generate data
-    fname = tmpdir.join("filtered_complex_predicate.parquet")
-    df = pd.DataFrame(
-        {
-            "x": range(10),
-            "y": list("aabbccddee"),
-            "z": reversed(range(10)),
-        }
-    )
-    df.to_parquet(fname, row_group_size=2)
-
-    # Check filters
-    df_filtered = cudf.read_parquet(fname, filters=predicate)
-    assert_eq(cudf.io.read_parquet_metadata(fname)[1], 10 / 2)
-    assert_eq(len(df_filtered), expected_len)
-
-
-@pytest.mark.parametrize("row_group_size", [1, 5, 100])
-def test_parquet_read_row_groups(tmpdir, pdf, row_group_size):
-    if len(pdf) > 100:
-        pytest.skip("Skipping long setup test")
-
-    if "col_category" in pdf.columns:
-        pdf = pdf.drop(columns=["col_category"])
-    fname = tmpdir.join("row_group.parquet")
-    pdf.to_parquet(fname, compression="gzip", row_group_size=row_group_size)
-
-    num_rows, row_groups, col_names, _, _ = cudf.io.read_parquet_metadata(
-        fname
-    )
-
-    gdf = [cudf.read_parquet(fname, row_groups=[i]) for i in range(row_groups)]
-    gdf = cudf.concat(gdf)
-    assert_eq(pdf.reset_index(drop=True), gdf.reset_index(drop=True))
-
-    # first half rows come from the first source, rest from the second
-    gdf = cudf.read_parquet(
-        [fname, fname],
-        row_groups=[
-            list(range(row_groups // 2)),
-            list(range(row_groups // 2, row_groups)),
-        ],
-    )
-    assert_eq(pdf.reset_index(drop=True), gdf.reset_index(drop=True))
-
-
-@pytest.mark.parametrize("row_group_size", [1, 5, 100])
-def test_parquet_read_row_groups_non_contiguous(tmpdir, pdf, row_group_size):
-    if len(pdf) > 100:
-        pytest.skip("Skipping long setup test")
-
-    fname = tmpdir.join("row_group.parquet")
-    pdf.to_parquet(fname, compression="gzip", row_group_size=row_group_size)
-
-    num_rows, row_groups, col_names, _, _ = cudf.io.read_parquet_metadata(
-        fname
-    )
-
-    # alternate rows between the two sources
-    gdf = cudf.read_parquet(
-        [fname, fname],
-        row_groups=[
-            list(range(0, row_groups, 2)),
-            list(range(1, row_groups, 2)),
-        ],
-    )
-
-    ref_df = [
-        cudf.read_parquet(fname, row_groups=i)
-        for i in list(range(0, row_groups, 2)) + list(range(1, row_groups, 2))
-    ]
-    ref_df = cudf.concat(ref_df)
-
-    assert_eq(ref_df, gdf)
-
-
-def test_parquet_reader_spark_timestamps(datadir):
-    fname = datadir / "spark_timestamp.snappy.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_spark_decimals(datadir):
-    fname = datadir / "spark_decimal.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("columns", [["a"], ["b", "a"], None])
-def test_parquet_reader_decimal128(datadir, columns):
-    fname = datadir / "nested_decimal128_file.parquet"
-    got = cudf.read_parquet(fname, columns=columns)
-    expect = cudf.read_parquet(fname, columns=columns)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_microsecond_timestamps(datadir):
-    fname = datadir / "usec_timestamp.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_mixedcompression(datadir):
-    fname = datadir / "mixed_compression.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_select_columns(datadir):
-    fname = datadir / "nested_column_map.parquet"
-
-    expect = cudf.read_parquet(fname).to_pandas()[["value"]]
-    got = cudf.read_parquet(fname, columns=["value"])
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_invalids(tmpdir):
-    test_pdf = make_pdf(nrows=1000, nvalids=1000 // 4, dtype="Int64")
-
-    fname = tmpdir.join("invalids.parquet")
-    test_pdf.to_parquet(fname, engine="pyarrow")
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got.to_pandas(nullable=True))
-
-
-def test_parquet_reader_filenotfound(tmpdir):
-    with pytest.raises(FileNotFoundError):
-        cudf.read_parquet("TestMissingFile.parquet")
-
-    with pytest.raises(FileNotFoundError):
-        cudf.read_parquet(tmpdir.mkdir("cudf_parquet"))
-
-
-def test_parquet_reader_local_filepath():
-    fname = "~/TestLocalFile.parquet"
-    if not os.path.isfile(fname):
-        pytest.skip("Local .parquet file is not found")
-
-    cudf.read_parquet(fname)
-
-
-@pytest.mark.parametrize(
-    "src", ["filepath", "pathobj", "bytes_io", "bytes", "url"]
-)
-def test_parquet_reader_filepath_or_buffer(parquet_path_or_buf, src):
-    expect = pd.read_parquet(parquet_path_or_buf("filepath"))
-    got = cudf.read_parquet(parquet_path_or_buf(src))
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_file_types(parquet_path_or_buf):
-    expect = cudf.read_parquet(parquet_path_or_buf("filepath"))
-    fs, _, paths = get_fs_token_paths(parquet_path_or_buf("filepath"))
-
-    # Pass open fsspec file
-    with fs.open(paths[0], mode="rb") as fil:
-        got1 = cudf.read_parquet(fil)
-    assert_eq(expect, got1)
-
-    # Pass path only
-    got2 = cudf.read_parquet(paths[0])
-    assert_eq(expect, got2)
-
-
-def create_parquet_source(df, src_type, fname):
-    if src_type == "filepath":
-        df.to_parquet(fname, engine="pyarrow")
-        return str(fname)
-    if src_type == "pathobj":
-        df.to_parquet(fname, engine="pyarrow")
-        return fname
-    if src_type == "bytes_io":
-        buffer = BytesIO()
-        df.to_parquet(buffer, engine="pyarrow")
-        return buffer
-    if src_type == "bytes":
-        buffer = BytesIO()
-        df.to_parquet(buffer, engine="pyarrow")
-        return buffer.getvalue()
-    if src_type == "url":
-        df.to_parquet(fname, engine="pyarrow")
-        return pathlib.Path(fname).as_uri()
-
-
-@pytest.mark.parametrize(
-    "src", ["filepath", "pathobj", "bytes_io", "bytes", "url"]
-)
-def test_parquet_reader_multiple_files(tmpdir, src):
-    test_pdf1 = make_pdf(nrows=1000, nvalids=1000 // 2, dtype="float64")
-    test_pdf2 = make_pdf(nrows=500, dtype="float64")
-    expect = pd.concat([test_pdf1, test_pdf2])
-
-    src1 = create_parquet_source(test_pdf1, src, tmpdir.join("multi1.parquet"))
-    src2 = create_parquet_source(test_pdf2, src, tmpdir.join("multi2.parquet"))
-    got = cudf.read_parquet([src1, src2])
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_reordered_columns(tmpdir):
-    src = pd.DataFrame(
-        {"name": ["cow", None, "duck", "fish", None], "id": [0, 1, 2, 3, 4]}
-    )
-    fname = tmpdir.join("test_parquet_reader_reordered_columns.parquet")
-    src.to_parquet(fname)
-    assert os.path.exists(fname)
-    expect = pd.DataFrame(
-        {"id": [0, 1, 2, 3, 4], "name": ["cow", None, "duck", "fish", None]}
-    )
-    got = cudf.read_parquet(fname, columns=["id", "name"])
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_parquet_reader_reordered_columns_mixed(tmpdir):
-    src = pd.DataFrame(
-        {
-            "name": ["cow", None, "duck", "fish", None],
-            "list0": [
-                [[1, 2], [3, 4]],
-                None,
-                [[5, 6], None],
-                [[1]],
-                [[5], [6, None, 8]],
-            ],
-            "id": [0, 1, 2, 3, 4],
-            "list1": [
-                [[1, 2], [3, 4]],
-                [[0, 0]],
-                [[5, 6], [10, 12]],
-                [[1]],
-                [[5], [6, 8]],
-            ],
-        }
-    )
-    fname = tmpdir.join("test_parquet_reader_reordered_columns.parquet")
-    src.to_parquet(fname)
-    assert os.path.exists(fname)
-    expect = pd.DataFrame(
-        {
-            "list1": [
-                [[1, 2], [3, 4]],
-                [[0, 0]],
-                [[5, 6], [10, 12]],
-                [[1]],
-                [[5], [6, 8]],
-            ],
-            "id": [0, 1, 2, 3, 4],
-            "list0": [
-                [[1, 2], [3, 4]],
-                None,
-                [[5, 6], None],
-                [[1]],
-                [[5], [6, None, 8]],
-            ],
-            "name": ["cow", None, "duck", "fish", None],
-        }
-    )
-    got = cudf.read_parquet(fname, columns=["list1", "id", "list0", "name"])
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_parquet_reader_list_basic(tmpdir):
-    expect = pd.DataFrame({"a": [[[1, 2], [3, 4]], None, [[5, 6], None]]})
-    fname = tmpdir.join("test_parquet_reader_list_basic.parquet")
-    expect.to_parquet(fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_list_table(tmpdir):
-    expect = pd.DataFrame(
-        {
-            "a": [[[1, 2], [3, 4]], None, [[5, 6], None]],
-            "b": [[None, None], None, [None, None]],
-            "c": [[[1, 2, 3]], [[None]], [[], None]],
-            "d": [[[]], [[None]], [[1, 2, 3], None]],
-            "e": [[["cows"]], [["dogs"]], [["cats", "birds", "owls"], None]],
-        }
-    )
-    fname = tmpdir.join("test_parquet_reader_list_table.parquet")
-    expect.to_parquet(fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
-
-
-def int_gen(first_val, i):
-    """
-    Returns an integer based on an absolute index and a starting value. Used
-    as input to `list_gen`.
-    """
-    return int(i + first_val)
-
-
-strings = [
-    "cats",
-    "dogs",
-    "cows",
-    "birds",
-    "fish",
-    "sheep",
-    "owls",
-    "bears",
-    "ants",
-]
-
-
-def string_gen(first_val, i):
-    """
-    Returns a string based on an absolute index and a starting value. Used as
-    input to `list_gen`.
-    """
-    return strings[int_gen(first_val, i) % len(strings)]
-
-
-def list_row_gen(
-    gen, first_val, list_size, lists_per_row, include_validity=False
-):
-    """
-    Generate a single row for a List<List<>> column based on input parameters.
-
-    Parameters
-    ----------
-    gen : A callable which generates an individual leaf element based on an
-        absolute index.
-    first_val : Generate the column as if it had started at 'first_val'
-        instead of 0.
-    list_size : Size of each generated list.
-    lists_per_row : Number of lists to generate per row.
-    include_validity : Whether or not to include nulls as part of the
-        column. If true, it will add a selection of nulls at both the
-        topmost row level and at the leaf level.
-
-    Returns
-    -------
-    The generated list column.
-    """
-
-    def L(list_size, first_val):
-        return [
-            (gen(first_val, i) if i % 2 == 0 else None)
-            if include_validity
-            else (gen(first_val, i))
-            for i in range(list_size)
-        ]
-
-    return [
-        (L(list_size, first_val + (list_size * i)) if i % 2 == 0 else None)
-        if include_validity
-        else L(list_size, first_val + (list_size * i))
-        for i in range(lists_per_row)
-    ]
-
-
-def list_gen(gen, num_rows, lists_per_row, list_size, include_validity=False):
-    """
-    Generate a list column based on input parameters.
-
-    Parameters
-    ----------
-    gen : A callable which generates an individual leaf element based on an
-        absolute index.
-    num_rows : Number of rows to generate.
-    lists_per_row : Number of lists to generate per row.
-    list_size : Size of each generated list.
-    include_validity : Whether or not to include nulls as part of the
-        column. If true, it will add a selection of nulls at both the
-        topmost row level and at the leaf level.
-
-    Returns
-    -------
-    The generated list column.
-    """
-
-    def L(list_size, first_val):
-        return [
-            (gen(first_val, i) if i % 2 == 0 else None)
-            if include_validity
-            else (gen(first_val, i))
-            for i in range(list_size)
-        ]
-
-    def R(first_val, lists_per_row, list_size):
-        return [
-            L(list_size, first_val + (list_size * i))
-            for i in range(lists_per_row)
-        ]
-
-    return [
-        (
-            R(
-                lists_per_row * list_size * i,
-                lists_per_row,
-                list_size,
-            )
-            if i % 2 == 0
-            else None
-        )
-        if include_validity
-        else R(
-            lists_per_row * list_size * i,
-            lists_per_row,
-            list_size,
-        )
-        for i in range(num_rows)
-    ]
-
-
-def test_parquet_reader_list_large(tmpdir):
-    expect = pd.DataFrame({"a": list_gen(int_gen, 64, 40, 25)})
-    fname = tmpdir.join("test_parquet_reader_list_large.parquet")
-    expect.to_parquet(fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_parquet_reader_list_validity(tmpdir):
-    expect = pd.DataFrame(
-        {"a": list_gen(int_gen, 64, 40, 25, include_validity=True)}
-    )
-    fname = tmpdir.join("test_parquet_reader_list_validity.parquet")
-    expect.to_parquet(fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_parquet_reader_list_large_mixed(tmpdir):
-    expect = pd.DataFrame(
-        {
-            "a": list_gen(string_gen, 64, 40, 25),
-            "b": list_gen(int_gen, 64, 40, 25),
-            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
-            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
-        }
-    )
-    fname = tmpdir.join("test_parquet_reader_list_large_mixed.parquet")
-    expect.to_parquet(fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
-
-
-def test_parquet_reader_list_large_multi_rowgroup(tmpdir):
-    # > 40 row groups
-    num_rows = 10000
-    num_docs = num_rows / 2
-    num_categories = 100
-    row_group_size = 100
-
-    cupy.random.seed(0)
-
-    # generate a random pairing of doc: category
-    documents = cudf.DataFrame(
-        {
-            "document_id": cupy.random.randint(num_docs, size=num_rows),
-            "category_id": cupy.random.randint(num_categories, size=num_rows),
-        }
-    )
-
-    # group categories by document_id to create a list column
-    expect = documents.groupby("document_id").agg({"category_id": ["collect"]})
-    expect.columns = expect.columns.get_level_values(0)
-    expect.reset_index(inplace=True)
-
-    # round trip the dataframe to/from parquet
-    fname = tmpdir.join(
-        "test_parquet_reader_list_large_multi_rowgroup.parquet"
-    )
-    expect.to_pandas().to_parquet(fname, row_group_size=row_group_size)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_list_large_multi_rowgroup_nulls(tmpdir):
-    # 25 row groups
-    num_rows = 2500
-    row_group_size = 100
-
-    expect = cudf.DataFrame(
-        {"a": list_gen(int_gen, num_rows, 3, 2, include_validity=True)}
-    )
-
-    # round trip the dataframe to/from parquet
-    fname = tmpdir.join(
-        "test_parquet_reader_list_large_multi_rowgroup_nulls.parquet"
-    )
-    expect.to_pandas().to_parquet(fname, row_group_size=row_group_size)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert_eq(expect, got)
-
-
-def struct_gen(gen, skip_rows, num_rows, include_validity=False):
-    """
-    Generate a struct column based on input parameters.
-
-    Parameters
-    ----------
-    gen : A array of callables which generate an individual row based on an
-        absolute index.
-    skip_rows : Generate the column as if it had started at 'skip_rows'
-        instead of 0. The intent here is to emulate the skip_rows
-        parameter of the parquet reader.
-    num_fields : Number of fields in the struct.
-    include_validity : Whether or not to include nulls as part of the
-        column. If true, it will add a selection of nulls at both the
-        field level and at the value level.
-
-    Returns
-    -------
-    The generated struct column.
-    """
-
-    def R(first_val, num_fields):
-        return {
-            "col" + str(f): (
-                gen[f](first_val, first_val) if f % 4 != 0 else None
-            )
-            if include_validity
-            else (gen[f](first_val, first_val))
-            for f in range(len(gen))
-        }
-
-    return [
-        (R((i + skip_rows), len(gen)) if (i + skip_rows) % 4 != 0 else None)
-        if include_validity
-        else R((i + skip_rows), len(gen))
-        for i in range(num_rows)
-    ]
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # struct
-        [
-            {"a": 1, "b": 2},
-            {"a": 10, "b": 20},
-            {"a": None, "b": 22},
-            {"a": None, "b": None},
-            {"a": 15, "b": None},
-        ],
-        # struct-of-list
-        [
-            {"a": 1, "b": 2, "c": [1, 2, 3]},
-            {"a": 10, "b": 20, "c": [4, 5]},
-            {"a": None, "b": 22, "c": [6]},
-            {"a": None, "b": None, "c": None},
-            {"a": 15, "b": None, "c": [-1, -2]},
-            None,
-            {"a": 100, "b": 200, "c": [-10, None, -20]},
-        ],
-        # list-of-struct
-        [
-            [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
-            None,
-            [{"a": 10, "b": 20}],
-            [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
-        ],
-        # struct-of-struct
-        [
-            {"a": 1, "b": {"inner_a": 10, "inner_b": 20}, "c": 2},
-            {"a": 3, "b": {"inner_a": 30, "inner_b": 40}, "c": 4},
-            {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
-            {"a": 7, "b": None, "c": 8},
-            {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
-            None,
-            {"a": None, "b": {"inner_a": None, "inner_b": 100}, "c": 10},
-        ],
-    ],
-)
-def test_parquet_reader_struct_basic(tmpdir, data):
-    expect = pa.Table.from_pydict({"struct": data})
-    fname = tmpdir.join("test_parquet_reader_struct_basic.parquet")
-    pa.parquet.write_table(expect, fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert expect.equals(got.to_arrow())
-
-
-def select_columns_params():
-    dfs = [
-        # struct
-        (
-            [
-                {"a": 1, "b": 2},
-                {"a": 10, "b": 20},
-                {"a": None, "b": 22},
-                {"a": None, "b": None},
-                {"a": 15, "b": None},
-            ],
-            [["struct"], ["struct.a"], ["struct.b"], ["c"]],
-        ),
-        # struct-of-list
-        (
-            [
-                {"a": 1, "b": 2, "c": [1, 2, 3]},
-                {"a": 10, "b": 20, "c": [4, 5]},
-                {"a": None, "b": 22, "c": [6]},
-                {"a": None, "b": None, "c": None},
-                {"a": 15, "b": None, "c": [-1, -2]},
-                None,
-                {"a": 100, "b": 200, "c": [-10, None, -20]},
-            ],
-            [
-                ["struct"],
-                ["struct.c"],
-                ["struct.c.list"],
-                ["struct.c.list.item"],
-                ["struct.b", "struct.c"],
-                ["struct.b", "struct.d", "struct.c"],
-            ],
-        ),
-        # list-of-struct
-        (
-            [
-                [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
-                None,
-                [{"a": 10, "b": 20}],
-                [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
-            ],
-            [
-                ["struct"],
-                ["struct.list"],
-                ["struct.list.item"],
-                ["struct.list.item.a", "struct.list.item.b"],
-                ["struct.list.item.c"],
-            ],
-        ),
-        # struct with "." in field names
-        (
-            [
-                {"a.b": 1, "b.a": 2},
-                {"a.b": 10, "b.a": 20},
-                {"a.b": None, "b.a": 22},
-                {"a.b": None, "b.a": None},
-                {"a.b": 15, "b.a": None},
-            ],
-            [["struct"], ["struct.a"], ["struct.b.a"]],
-        ),
-    ]
-    for df_col_pair in dfs:
-        for cols in df_col_pair[1]:
-            yield df_col_pair[0], cols
-
-
-@pytest.mark.parametrize("data, columns", select_columns_params())
-def test_parquet_reader_struct_select_columns(tmpdir, data, columns):
-    table = pa.Table.from_pydict({"struct": data})
-    buff = BytesIO()
-
-    pa.parquet.write_table(table, buff)
-
-    expect = pq.ParquetFile(buff).read(columns=columns)
-    got = cudf.read_parquet(buff, columns=columns)
-    assert expect.equals(got.to_arrow())
-
-
-def test_parquet_reader_struct_los_large(tmpdir):
-    num_rows = 256
-    list_size = 64
-    data = [
-        struct_gen([string_gen, int_gen, string_gen], 0, list_size, False)
-        if i % 2 == 0
-        else None
-        for i in range(num_rows)
-    ]
-    expect = pa.Table.from_pydict({"los": data})
-    fname = tmpdir.join("test_parquet_reader_struct_los_large.parquet")
-    pa.parquet.write_table(expect, fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert expect.equals(got.to_arrow())
-
-
-@pytest.mark.parametrize(
-    "params", [[3, 4, 32, False], [3, 4, 32, True], [50, 10, 64, True]]
-)
-def test_parquet_reader_struct_sol_table(tmpdir, params):
-    # Struct<List<List>>
-    lists_per_row = params[0]
-    list_size = params[1]
-    num_rows = params[2]
-    include_validity = params[3]
-
-    def list_gen_wrapped(x, y):
-        return list_row_gen(
-            int_gen, x * list_size * lists_per_row, list_size, lists_per_row
-        )
-
-    def string_list_gen_wrapped(x, y):
-        return list_row_gen(
-            string_gen,
-            x * list_size * lists_per_row,
-            list_size,
-            lists_per_row,
-            include_validity,
-        )
-
-    data = struct_gen(
-        [int_gen, string_gen, list_gen_wrapped, string_list_gen_wrapped],
-        0,
-        num_rows,
-        include_validity,
-    )
-    expect = pa.Table.from_pydict({"sol": data})
-    fname = tmpdir.join("test_parquet_reader_struct_sol_table.parquet")
-    pa.parquet.write_table(expect, fname)
-    assert os.path.exists(fname)
-    got = cudf.read_parquet(fname)
-    assert expect.equals(got.to_arrow())
-
-
-def test_parquet_reader_v2(tmpdir, simple_pdf):
-    pdf_fname = tmpdir.join("pdfv2.parquet")
-    simple_pdf.to_parquet(pdf_fname, data_page_version="2.0")
-    assert_eq(cudf.read_parquet(pdf_fname), simple_pdf)
-
-    cudf.from_pandas(simple_pdf).to_parquet(pdf_fname, header_version="2.0")
-    assert_eq(cudf.read_parquet(pdf_fname), simple_pdf)
-
-
-def test_parquet_delta_byte_array(datadir):
-    fname = datadir / "delta_byte_arr.parquet"
-    assert_eq(cudf.read_parquet(fname), pd.read_parquet(fname))
-
-
-# values chosen to exercise:
-#    1 - header only, no bitpacked values
-#    2 - one bitpacked value
-#   23 - one partially filled miniblock
-#   32 - almost full miniblock
-#   33 - one full miniblock
-#   34 - one full miniblock plus one value in new miniblock
-#  128 - almost full block
-#  129 - one full block
-#  130 - one full block plus one value in new block
-# 129 * 3 - multiple blocks
-def delta_num_rows():
-    return [1, 2, 23, 32, 33, 34, 128, 129, 130, 129 * 3]
-
-
-@pytest.mark.parametrize("nrows", delta_num_rows())
-@pytest.mark.parametrize("add_nulls", [True, False])
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "int8",
-        "int16",
-        "int32",
-        "int64",
-    ],
-)
-def test_delta_binary(nrows, add_nulls, dtype, tmpdir):
-    null_frequency = 0.25 if add_nulls else 0
-
-    # Create a pandas dataframe with random data of mixed types
-    arrow_table = dg.rand_dataframe(
-        dtypes_meta=[
-            {
-                "dtype": dtype,
-                "null_frequency": null_frequency,
-                "cardinality": nrows,
-            },
-        ],
-        rows=nrows,
-        seed=0,
-        use_threads=False,
-    )
-    # Roundabout conversion to pandas to preserve nulls/data types
-    cudf_table = cudf.DataFrame.from_arrow(arrow_table)
-    test_pdf = cudf_table.to_pandas(nullable=True)
-    pdf_fname = tmpdir.join("pdfv2.parquet")
-    test_pdf.to_parquet(
-        pdf_fname,
-        version="2.6",
-        column_encoding="DELTA_BINARY_PACKED",
-        data_page_version="2.0",
-        data_page_size=64 * 1024,
-        engine="pyarrow",
-        use_dictionary=False,
-    )
-    cdf = cudf.read_parquet(pdf_fname)
-    pcdf = cudf.from_pandas(test_pdf)
-    assert_eq(cdf, pcdf)
-
-    # Write back out with cudf and make sure pyarrow can read it
-    cudf_fname = tmpdir.join("cudfv2.parquet")
-    pcdf.to_parquet(
-        cudf_fname,
-        compression=None,
-        header_version="2.0",
-        use_dictionary=False,
-    )
-
-    cdf2 = cudf.from_pandas(pd.read_parquet(cudf_fname))
-    assert_eq(cdf2, cdf)
-
-
-@pytest.mark.parametrize("nrows", delta_num_rows())
-@pytest.mark.parametrize("add_nulls", [True, False])
-@pytest.mark.parametrize("max_string_length", [12, 48, 96, 128])
-@pytest.mark.parametrize(
-    "str_encoding", ["DELTA_BYTE_ARRAY", "DELTA_LENGTH_BYTE_ARRAY"]
-)
-def test_delta_byte_array_roundtrip(
-    nrows, add_nulls, max_string_length, str_encoding, tmpdir
-):
-    null_frequency = 0.25 if add_nulls else 0
-
-    # Create a pandas dataframe with random data of mixed lengths
-    test_pdf = dg.rand_dataframe(
-        dtypes_meta=[
-            {
-                "dtype": "str",
-                "null_frequency": null_frequency,
-                "cardinality": nrows,
-                "max_string_length": max_string_length,
-            },
-        ],
-        rows=nrows,
-        seed=0,
-        use_threads=False,
-    ).to_pandas()
-
-    pdf_fname = tmpdir.join("pdfdeltaba.parquet")
-    test_pdf.to_parquet(
-        pdf_fname,
-        version="2.6",
-        column_encoding=str_encoding,
-        data_page_version="2.0",
-        data_page_size=64 * 1024,
-        engine="pyarrow",
-        use_dictionary=False,
-    )
-    cdf = cudf.read_parquet(pdf_fname)
-    pcdf = cudf.from_pandas(test_pdf)
-    assert_eq(cdf, pcdf)
-
-    # Write back out with cudf and make sure pyarrow can read it
-    cudf_fname = tmpdir.join("cdfdeltaba.parquet")
-    pcdf.to_parquet(
-        cudf_fname,
-        compression="snappy",
-        header_version="2.0",
-        use_dictionary=False,
-    )
-    cdf2 = cudf.from_pandas(pd.read_parquet(cudf_fname))
-    assert_eq(cdf2, cdf)
-
-
-@pytest.mark.parametrize("nrows", delta_num_rows())
-@pytest.mark.parametrize("add_nulls", [True, False])
-@pytest.mark.parametrize(
-    "str_encoding", ["DELTA_BYTE_ARRAY", "DELTA_LENGTH_BYTE_ARRAY"]
-)
-def test_delta_struct_list(tmpdir, nrows, add_nulls, str_encoding):
-    # Struct<List<List>>
-    lists_per_row = 3
-    list_size = 4
-    num_rows = nrows
-    include_validity = add_nulls
-
-    def list_gen_wrapped(x, y):
-        return list_row_gen(
-            int_gen, x * list_size * lists_per_row, list_size, lists_per_row
-        )
-
-    def string_list_gen_wrapped(x, y):
-        return list_row_gen(
-            string_gen,
-            x * list_size * lists_per_row,
-            list_size,
-            lists_per_row,
-            include_validity,
-        )
-
-    data = struct_gen(
-        [int_gen, string_gen, list_gen_wrapped, string_list_gen_wrapped],
-        0,
-        num_rows,
-        include_validity,
-    )
-    test_pdf = pa.Table.from_pydict({"sol": data}).to_pandas()
-    pdf_fname = tmpdir.join("pdfdeltaba.parquet")
-    test_pdf.to_parquet(
-        pdf_fname,
-        version="2.6",
-        column_encoding={
-            "sol.col0": "DELTA_BINARY_PACKED",
-            "sol.col1": str_encoding,
-            "sol.col2.list.element.list.element": "DELTA_BINARY_PACKED",
-            "sol.col3.list.element.list.element": str_encoding,
-        },
-        data_page_version="2.0",
-        data_page_size=64 * 1024,
-        engine="pyarrow",
-        use_dictionary=False,
-    )
-    # sanity check to verify file is written properly
-    assert_eq(test_pdf, pd.read_parquet(pdf_fname))
-    cdf = cudf.read_parquet(pdf_fname)
-    pcdf = cudf.from_pandas(test_pdf)
-    assert_eq(cdf, pcdf)
-
-    # Write back out with cudf and make sure pyarrow can read it
-    cudf_fname = tmpdir.join("cdfdeltaba.parquet")
-    pcdf.to_parquet(
-        cudf_fname,
-        compression="snappy",
-        header_version="2.0",
-        use_dictionary=False,
-    )
-    cdf2 = cudf.from_pandas(pd.read_parquet(cudf_fname))
-    assert_eq(cdf2, cdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # Structs
-        {
-            "being": [
-                None,
-                {"human?": True, "Deets": {"Name": "Carrot", "Age": 27}},
-                {"human?": None, "Deets": {"Name": "Angua", "Age": 25}},
-                {"human?": False, "Deets": {"Name": "Cheery", "Age": 31}},
-                {"human?": False, "Deets": None},
-                {"human?": None, "Deets": {"Name": "Mr", "Age": None}},
-            ]
-        },
-        # List of Structs
-        {
-            "family": [
-                [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
-                [
-                    {"human?": None, "deets": {"weight": 5.3, "age": 25}},
-                    {"human?": False, "deets": {"weight": 8.0, "age": 31}},
-                    {"human?": False, "deets": None},
-                ],
-                [],
-                [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
-            ]
-        },
-        # Struct of Lists
-        {
-            "Real estate records": [
-                None,
-                {
-                    "Status": "NRI",
-                    "Ownerships": {
-                        "land_unit": [None, 2, None],
-                        "flats": [[1, 2, 3], [], [4, 5], [], [0, 6, 0]],
-                    },
-                },
-                {
-                    "Status": None,
-                    "Ownerships": {
-                        "land_unit": [4, 5],
-                        "flats": [[7, 8], []],
-                    },
-                },
-                {
-                    "Status": "RI",
-                    "Ownerships": {"land_unit": None, "flats": [[]]},
-                },
-                {"Status": "RI", "Ownerships": None},
-                {
-                    "Status": None,
-                    "Ownerships": {
-                        "land_unit": [7, 8, 9],
-                        "flats": [[], [], []],
-                    },
-                },
-            ]
-        },
-    ],
-)
-def test_parquet_reader_nested_v2(tmpdir, data):
-    expect = pd.DataFrame(data)
-    pdf_fname = tmpdir.join("pdfv2.parquet")
-    expect.to_parquet(pdf_fname, data_page_version="2.0")
-    assert_eq(cudf.read_parquet(pdf_fname), expect)
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_parquet_writer_cpu_pyarrow(
-    tmpdir, pdf_day_timestamps, gdf_day_timestamps
-):
-    pdf_fname = tmpdir.join("pdf.parquet")
-    gdf_fname = tmpdir.join("gdf.parquet")
-
-    if len(pdf_day_timestamps) == 0:
-        pdf_day_timestamps = pdf_day_timestamps.reset_index(drop=True)
-        gdf_day_timestamps = pdf_day_timestamps.reset_index(drop=True)
-
-    pdf_day_timestamps.to_parquet(pdf_fname.strpath)
-    gdf_day_timestamps.to_parquet(gdf_fname.strpath, engine="pyarrow")
-
-    assert os.path.exists(pdf_fname)
-    assert os.path.exists(gdf_fname)
-
-    expect = pa.parquet.read_pandas(pdf_fname)
-    got = pa.parquet.read_pandas(gdf_fname)
-
-    assert_eq(expect, got)
-
-    def clone_field(table, name, datatype):
-        f = table.schema.field(name)
-        return pa.field(f.name, datatype, f.nullable, f.metadata)
-
-    # Pandas uses a datetime64[ns] while we use a datetime64[ms]
-    for t in [expect, got]:
-        for t_col in ["col_datetime64[ms]", "col_datetime64[us]"]:
-            idx = t.schema.get_field_index(t_col)
-            field = clone_field(t, t_col, pa.timestamp("ms"))
-            t = t.set_column(idx, field, t.column(idx).cast(field.type))
-            t = t.replace_schema_metadata()
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.filterwarnings("ignore:Using CPU")
-def test_parquet_writer_int96_timestamps(tmpdir, pdf, gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-
-    if len(pdf) == 0:
-        pdf = pdf.reset_index(drop=True)
-        gdf = gdf.reset_index(drop=True)
-
-    if "col_category" in pdf.columns:
-        pdf = pdf.drop(columns=["col_category"])
-    if "col_category" in gdf.columns:
-        gdf = gdf.drop(columns=["col_category"])
-
-    assert_eq(pdf, gdf)
-
-    # Write out the gdf using the GPU accelerated writer with INT96 timestamps
-    gdf.to_parquet(
-        gdf_fname.strpath,
-        index=None,
-        int96_timestamps=True,
-    )
-
-    assert os.path.exists(gdf_fname)
-
-    expect = pdf
-    got = pd.read_parquet(gdf_fname)
-
-    # verify INT96 timestamps were converted back to the same data.
-    assert_eq(expect, got, check_categorical=False, check_dtype=False)
-
-
-def test_multifile_parquet_folder(tmpdir):
-    test_pdf1 = make_pdf(nrows=10, nvalids=10 // 2, dtype="float64")
-    test_pdf2 = make_pdf(nrows=20, dtype="float64")
-    expect = pd.concat([test_pdf1, test_pdf2])
-
-    tmpdir.mkdir("multi_part")
-
-    create_parquet_source(
-        test_pdf1, "filepath", tmpdir.join("multi_part/multi1.parquet")
-    )
-    create_parquet_source(
-        test_pdf2, "filepath", tmpdir.join("multi_part/multi2.parquet")
-    )
-
-    got1 = cudf.read_parquet(tmpdir.join("multi_part/*.parquet"))
-    assert_eq(expect, got1)
-
-    got2 = cudf.read_parquet(tmpdir.join("multi_part"))
-    assert_eq(expect, got2)
-
-
-# Validates the metadata return path of the parquet writer
-def test_parquet_writer_return_metadata(tmpdir, simple_gdf):
-    gdf_fname = tmpdir.join("data1.parquet")
-
-    # Write out the gdf using the GPU accelerated writer
-    df_metadata = simple_gdf.to_parquet(
-        gdf_fname.strpath, index=None, metadata_file_path="test/data1.parquet"
-    )
-    # Verify that we got a valid parquet signature in the initial metadata blob
-    assert df_metadata.tobytes()[0:4] == b"PAR1"
-
-    df_metadata_list1 = [df_metadata]
-    df_metadata_list2 = [df_metadata, df_metadata]
-    merged_metadata1 = merge_parquet_filemetadata(df_metadata_list1)
-    merged_metadata2 = merge_parquet_filemetadata(df_metadata_list2)
-
-    # Verify that we got a valid parquet signature in the final metadata blob
-    assert merged_metadata1.tobytes()[0:4] == b"PAR1"
-    assert merged_metadata2.tobytes()[0:4] == b"PAR1"
-
-    # Make sure aggregation is combining metadata correctly
-    fmd1 = pa.parquet.ParquetFile(BytesIO(merged_metadata1.tobytes())).metadata
-    fmd2 = pa.parquet.ParquetFile(BytesIO(merged_metadata2.tobytes())).metadata
-    assert fmd2.num_columns == fmd1.num_columns
-    assert fmd2.num_rows == 2 * fmd1.num_rows
-    assert fmd2.num_row_groups == 2 * fmd1.num_row_groups
-
-
-# Validates the integrity of the GPU accelerated parquet writer.
-def test_parquet_writer_gpu_none_index(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-    pdf_fname = tmpdir.join("pdf.parquet")
-
-    assert_eq(simple_pdf, simple_gdf)
-
-    # Write out the gdf using the GPU accelerated writer
-    simple_gdf.to_parquet(gdf_fname.strpath, index=None)
-    simple_pdf.to_parquet(pdf_fname.strpath, index=None)
-
-    assert os.path.exists(gdf_fname)
-    assert os.path.exists(pdf_fname)
-
-    expect = pd.read_parquet(pdf_fname)
-    got = pd.read_parquet(gdf_fname)
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-def test_parquet_writer_gpu_true_index(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-    pdf_fname = tmpdir.join("pdf.parquet")
-
-    assert_eq(simple_pdf, simple_gdf)
-
-    # Write out the gdf using the GPU accelerated writer
-    simple_gdf.to_parquet(gdf_fname.strpath, index=True)
-    simple_pdf.to_parquet(pdf_fname.strpath, index=True)
-
-    assert os.path.exists(gdf_fname)
-    assert os.path.exists(pdf_fname)
-
-    expect = pd.read_parquet(pdf_fname)
-    got = pd.read_parquet(gdf_fname)
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-def test_parquet_writer_gpu_false_index(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-    pdf_fname = tmpdir.join("pdf.parquet")
-
-    assert_eq(simple_pdf, simple_gdf)
-
-    # Write out the gdf using the GPU accelerated writer
-    simple_gdf.to_parquet(gdf_fname.strpath, index=False)
-    simple_pdf.to_parquet(pdf_fname.strpath, index=False)
-
-    assert os.path.exists(gdf_fname)
-    assert os.path.exists(pdf_fname)
-
-    expect = pd.read_parquet(pdf_fname)
-    got = pd.read_parquet(gdf_fname)
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-def test_parquet_writer_gpu_multi_index(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-    pdf_fname = tmpdir.join("pdf.parquet")
-
-    simple_pdf = simple_pdf.set_index(["col_bool", "col_int8"])
-    simple_gdf = simple_gdf.set_index(["col_bool", "col_int8"])
-
-    assert_eq(simple_pdf, simple_gdf)
-
-    # Write out the gdf using the GPU accelerated writer
-    simple_gdf.to_parquet(gdf_fname.strpath, index=None)
-    simple_pdf.to_parquet(pdf_fname.strpath, index=None)
-
-    assert os.path.exists(gdf_fname)
-    assert os.path.exists(pdf_fname)
-
-    expect = pd.read_parquet(pdf_fname)
-    got = pd.read_parquet(gdf_fname)
-
-    assert_eq(expect, got, check_categorical=False)
-
-
-def test_parquet_writer_gpu_chunked(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-
-    writer = ParquetWriter(gdf_fname)
-    writer.write_table(simple_gdf)
-    writer.write_table(simple_gdf)
-    writer.close()
-
-    assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf]))
-
-
-def test_parquet_writer_gpu_chunked_context(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-
-    with ParquetWriter(gdf_fname) as writer:
-        writer.write_table(simple_gdf)
-        writer.write_table(simple_gdf)
-
-    got = pd.read_parquet(gdf_fname)
-    expect = pd.concat([simple_pdf, simple_pdf])
-    assert_eq(got, expect)
-
-
-def test_parquet_write_bytes_io(simple_gdf):
-    output = BytesIO()
-    simple_gdf.to_parquet(output)
-    assert_eq(cudf.read_parquet(output), simple_gdf)
-
-
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_writer_bytes_io(simple_gdf, store_schema):
-    output = BytesIO()
-
-    writer = ParquetWriter(output, store_schema=store_schema)
-    writer.write_table(simple_gdf)
-    writer.write_table(simple_gdf)
-    writer.close()
-
-    assert_eq(cudf.read_parquet(output), cudf.concat([simple_gdf, simple_gdf]))
-
-
-@pytest.mark.parametrize(
-    "row_group_size_kwargs",
-    [
-        {"row_group_size_bytes": 4 * 1024},
-        {"row_group_size_rows": 5000},
-    ],
-)
-def test_parquet_writer_row_group_size(tmpdir, row_group_size_kwargs):
-    # Check that row_group_size options are exposed in Python
-    # See https://github.com/rapidsai/cudf/issues/10978
-
-    size = 20000
-    gdf = cudf.DataFrame({"a": range(size), "b": [1] * size})
-
-    fname = tmpdir.join("gdf.parquet")
-    with ParquetWriter(fname, **row_group_size_kwargs) as writer:
-        writer.write_table(gdf)
-
-    # Simple check for multiple row-groups
-    nrows, nrow_groups, columns, _, _ = cudf.io.parquet.read_parquet_metadata(
-        fname
-    )
-    assert nrows == size
-    assert nrow_groups > 1
-    assert columns == ["a", "b"]
-
-    # Know the specific row-group count for row_group_size_rows
-    if "row_group_size_rows" in row_group_size_kwargs:
-        assert (
-            nrow_groups == size // row_group_size_kwargs["row_group_size_rows"]
-        )
-
-    assert_eq(cudf.read_parquet(fname), gdf)
-
-
-def test_parquet_writer_column_index(tmpdir):
-    # Simple test for presence of indices. validity is checked
-    # in libcudf tests.
-    # Write 2 files, one with column index set, one without.
-    # Make sure the former is larger in size.
-
-    size = 20000
-    gdf = cudf.DataFrame({"a": range(size), "b": [1] * size})
-
-    fname = tmpdir.join("gdf.parquet")
-    with ParquetWriter(fname, statistics="ROWGROUP") as writer:
-        writer.write_table(gdf)
-    s1 = os.path.getsize(fname)
-
-    fname = tmpdir.join("gdfi.parquet")
-    with ParquetWriter(fname, statistics="COLUMN") as writer:
-        writer.write_table(gdf)
-    s2 = os.path.getsize(fname)
-    assert s2 > s1
-
-
-@pytest.mark.parametrize(
-    "max_page_size_kwargs",
-    [
-        {"max_page_size_bytes": 4 * 1024},
-        {"max_page_size_rows": 5000},
-    ],
-)
-def test_parquet_writer_max_page_size(tmpdir, max_page_size_kwargs):
-    # Check that max_page_size options are exposed in Python
-    # Since we don't have access to page metadata, instead check that
-    # file written with more pages will be slightly larger
-
-    size = 20000
-    gdf = cudf.DataFrame({"a": range(size), "b": [1] * size})
-
-    fname = tmpdir.join("gdf.parquet")
-    with ParquetWriter(fname, **max_page_size_kwargs) as writer:
-        writer.write_table(gdf)
-    s1 = os.path.getsize(fname)
-
-    assert_eq(cudf.read_parquet(fname), gdf)
-
-    fname = tmpdir.join("gdf0.parquet")
-    with ParquetWriter(fname) as writer:
-        writer.write_table(gdf)
-    s2 = os.path.getsize(fname)
-
-    assert_eq(cudf.read_parquet(fname), gdf)
-    assert s1 > s2
-
-
-@pytest.mark.parametrize("use_dict", [False, True])
-@pytest.mark.parametrize("max_dict_size", [0, 1048576])
-def test_parquet_writer_dictionary_setting(use_dict, max_dict_size):
-    # Simple test for checking the validity of dictionary encoding setting
-    # and behavior of ParquetWriter in cudf.
-    # Write a table with repetitive data with varying dictionary settings.
-    # Make sure the written columns are dictionary-encoded accordingly.
-
-    # Table with repetitive data
-    table = cudf.DataFrame(
-        {
-            "int32": cudf.Series([1024] * 1024, dtype="int64"),
-        }
-    )
-
-    # Write to Parquet using ParquetWriter
-    buffer = BytesIO()
-    writer = ParquetWriter(
-        buffer,
-        use_dictionary=use_dict,
-        max_dictionary_size=max_dict_size,
-    )
-    writer.write_table(table)
-    writer.close()
-
-    # Read encodings from parquet file
-    got = pq.ParquetFile(buffer)
-    encodings = got.metadata.row_group(0).column(0).encodings
-
-    # Check for `PLAIN_DICTIONARY` encoding if dictionary encoding enabled
-    # and dictionary page limit > 0
-    if use_dict is True and max_dict_size > 0:
-        assert "PLAIN_DICTIONARY" in encodings
-    else:
-        assert "PLAIN_DICTIONARY" not in encodings
-
-
-@pytest.mark.parametrize("filename", ["myfile.parquet", None])
-@pytest.mark.parametrize("cols", [["b"], ["c", "b"]])
-def test_parquet_partitioned(tmpdir_factory, cols, filename):
-    rng = np.random.default_rng(seed=0)
-    # Checks that write_to_dataset is wrapping to_parquet
-    # as expected
-    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
-    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
-    size = 100
-    pdf = pd.DataFrame(
-        {
-            "a": np.arange(0, stop=size, dtype="int64"),
-            "b": rng.choice(list("abcd"), size=size),
-            "c": rng.choice(np.arange(4), size=size),
-        }
-    )
-    pdf.to_parquet(pdf_dir, index=False, partition_cols=cols)
-    gdf = cudf.from_pandas(pdf)
-    gdf.to_parquet(
-        gdf_dir, index=False, partition_cols=cols, partition_file_name=filename
-    )
-
-    # Read back with pandas to compare
-    expect_pd = pd.read_parquet(pdf_dir)
-    got_pd = pd.read_parquet(gdf_dir)
-    assert_eq(expect_pd, got_pd)
-
-    # Check that cudf and pd return the same read
-    got_cudf = cudf.read_parquet(gdf_dir)
-    if isinstance(got_pd["c"].dtype, pd.CategoricalDtype):
-        # Work-around for pandas bug:
-        # https://github.com/pandas-dev/pandas/issues/53345
-        got_pd["c"] = got_pd["c"].astype(
-            pd.CategoricalDtype(
-                categories=got_pd["c"].dtype.categories.astype("int64"),
-                ordered=got_pd["c"].dtype.ordered,
-            )
-        )
-    assert_eq(got_pd, got_cudf)
-
-    # If filename is specified, check that it is correct
-    if filename:
-        for _, _, files in os.walk(gdf_dir):
-            for fn in files:
-                assert fn == filename
-
-
-@pytest.mark.parametrize("kwargs", [{"nrows": 1}, {"skip_rows": 1}])
-def test_parquet_partitioned_notimplemented(tmpdir_factory, kwargs):
-    rng = np.random.default_rng(seed=0)
-    # Checks that write_to_dataset is wrapping to_parquet
-    # as expected
-    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
-    size = 100
-    pdf = pd.DataFrame(
-        {
-            "a": np.arange(0, stop=size, dtype="int64"),
-            "b": rng.choice(list("abcd"), size=size),
-            "c": rng.choice(np.arange(4), size=size),
-        }
-    )
-    pdf.to_parquet(pdf_dir, index=False, partition_cols=["b"])
-
-    with pytest.raises(NotImplementedError):
-        cudf.read_parquet(pdf_dir, **kwargs)
-
-
-@pytest.mark.parametrize("return_meta", [True, False])
-def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta):
-    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
-    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
-
-    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]})
-    df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]})
-
-    cw = ParquetDatasetWriter(gdf_dir, partition_cols=["a"], index=False)
-    cw.write_table(df1)
-    cw.write_table(df2)
-    meta_byte_array = cw.close(return_metadata=return_meta)
-    pdf = cudf.concat([df1, df2]).to_pandas()
-    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
-
-    if return_meta:
-        fmd = pq.ParquetFile(BytesIO(meta_byte_array)).metadata
-        assert fmd.num_rows == len(pdf)
-        assert fmd.num_row_groups == 4
-        files = {
-            os.path.join(directory, files[0])
-            for directory, _, files in os.walk(gdf_dir)
-            if files
-        }
-        meta_files = {
-            os.path.join(gdf_dir, fmd.row_group(i).column(c).file_path)
-            for i in range(fmd.num_row_groups)
-            for c in range(fmd.row_group(i).num_columns)
-        }
-        assert files == meta_files
-
-    # Read back with pandas to compare
-    expect_pd = pd.read_parquet(pdf_dir)
-    got_pd = pd.read_parquet(gdf_dir)
-    assert_eq(expect_pd, got_pd)
-
-    # Check that cudf and pd return the same read
-    got_cudf = cudf.read_parquet(gdf_dir)
-
-    # Work-around for pandas bug:
-    # https://github.com/pandas-dev/pandas/issues/53345
-    got_pd["a"] = got_pd["a"].astype(
-        pd.CategoricalDtype(
-            categories=got_pd["a"].dtype.categories.astype("int64"),
-            ordered=got_pd["a"].dtype.ordered,
-        )
-    )
-    assert_eq(got_pd, got_cudf)
-
-
-@pytest.mark.parametrize(
-    "max_file_size,max_file_size_in_bytes",
-    [("500KB", 500000), ("MB", 1000000)],
-)
-def test_parquet_writer_chunked_max_file_size(
-    tmpdir_factory, max_file_size, max_file_size_in_bytes
-):
-    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
-    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
-
-    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1] * 1000, "b": range(0, 5000)})
-    df2 = cudf.DataFrame(
-        {"a": [1, 3, 3, 1, 3] * 1000, "b": range(5000, 10000)}
-    )
-
-    cw = ParquetDatasetWriter(
-        gdf_dir,
-        partition_cols=["a"],
-        max_file_size=max_file_size,
-        file_name_prefix="sample",
-    )
-    cw.write_table(df1)
-    cw.write_table(df2)
-    cw.close()
-    pdf = cudf.concat([df1, df2]).to_pandas()
-    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
-
-    expect_pd = pd.read_parquet(pdf_dir)
-    got_pd = pd.read_parquet(gdf_dir)
-
-    assert_eq(
-        expect_pd.sort_values(["b"]).reset_index(drop=True),
-        got_pd.sort_values(["b"]).reset_index(drop=True),
-    )
-
-    # Check that cudf and pd return the same read
-    got_cudf = cudf.read_parquet(gdf_dir)
-
-    # Work-around for pandas bug:
-    # https://github.com/pandas-dev/pandas/issues/53345
-    got_pd["a"] = got_pd["a"].astype(
-        pd.CategoricalDtype(
-            categories=got_pd["a"].dtype.categories.astype("int64"),
-            ordered=got_pd["a"].dtype.ordered,
-        )
-    )
-    assert_eq(
-        got_pd.sort_values(["b"]).reset_index(drop=True),
-        got_cudf.sort_values(["b"]).reset_index(drop=True),
-    )
-
-    all_files = glob.glob(gdf_dir + "/**/*.parquet", recursive=True)
-    for each_file in all_files:
-        # Validate file sizes with some extra 1000
-        # bytes buffer to spare
-        assert os.path.getsize(each_file) <= (max_file_size_in_bytes), (
-            "File exceeded max_file_size"
-        )
-
-
-def test_parquet_writer_chunked_max_file_size_error():
-    with pytest.raises(
-        ValueError,
-        match="file_name_prefix cannot be None if max_file_size is passed",
-    ):
-        ParquetDatasetWriter("sample", partition_cols=["a"], max_file_size=100)
-
-
-def test_parquet_writer_chunked_partitioned_context(tmpdir_factory):
-    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
-    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
-
-    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]})
-    df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]})
-
-    with ParquetDatasetWriter(
-        gdf_dir, partition_cols=["a"], index=False
-    ) as cw:
-        cw.write_table(df1)
-        cw.write_table(df2)
-
-    pdf = cudf.concat([df1, df2]).to_pandas()
-    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
-
-    # Read back with pandas to compare
-    expect_pd = pd.read_parquet(pdf_dir)
-    got_pd = pd.read_parquet(gdf_dir)
-    assert_eq(expect_pd, got_pd)
-
-    # Check that cudf and pd return the same read
-    got_cudf = cudf.read_parquet(gdf_dir)
-
-    # Work-around for pandas bug:
-    # https://github.com/pandas-dev/pandas/issues/53345
-    got_pd["a"] = got_pd["a"].astype(
-        pd.CategoricalDtype(
-            categories=got_pd["a"].dtype.categories.astype("int64"),
-            ordered=got_pd["a"].dtype.ordered,
-        )
-    )
-    assert_eq(got_pd, got_cudf)
-
-
-@pytest.mark.parametrize("cols", [None, ["b"]])
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_write_to_dataset(tmpdir_factory, cols, store_schema):
-    rng = np.random.default_rng(seed=0)
-    dir1 = tmpdir_factory.mktemp("dir1")
-    dir2 = tmpdir_factory.mktemp("dir2")
-    if cols is None:
-        dir1 = dir1.join("file.pq")
-        dir2 = dir2.join("file.pq")
-    dir1 = str(dir1)
-    dir2 = str(dir2)
-
-    size = 100
-    gdf = cudf.DataFrame(
-        {
-            "a": np.arange(0, stop=size),
-            "b": rng.choice(np.arange(4), size=size),
-        }
-    )
-    gdf.to_parquet(dir1, partition_cols=cols, store_schema=store_schema)
-    cudf.io.write_to_dataset(gdf, dir2, partition_cols=cols)
-
-    # Read back with cudf
-    expect = cudf.read_parquet(dir1)
-    got = cudf.read_parquet(dir2)
-    assert_eq(expect, got)
-
-    gdf = cudf.DataFrame(
-        {
-            "a": cudf.Series([1, 2, 3]),
-            "b": cudf.Series([1, 2, 3]),
-            "c": cudf.Series(["a", "b", "c"], dtype="category"),
-        }
-    )
-    with pytest.raises(ValueError):
-        gdf.to_parquet(dir1, partition_cols=cols, store_schema=store_schema)
-
-
-@pytest.mark.parametrize(
-    "pfilters",
-    [[("b", "==", "b")], [("b", "==", "a"), ("c", "==", 1)]],
-)
-@pytest.mark.parametrize("selection", ["directory", "files", "row-groups"])
-@pytest.mark.parametrize("use_cat", [True, False])
-def test_read_parquet_partitioned_filtered(
-    tmpdir, pfilters, selection, use_cat
-):
-    rng = np.random.default_rng(2)
-    path = str(tmpdir)
-    size = 100
-    df = cudf.DataFrame(
-        {
-            "a": np.arange(0, stop=size, dtype="int64"),
-            "b": rng.choice(list("abcd"), size=size),
-            "c": rng.choice(np.arange(4), size=size),
-        }
-    )
-    df.to_parquet(path, partition_cols=["c", "b"])
-
-    if selection == "files":
-        # Pass in a list of paths
-        fs = get_fs_token_paths(path)[0]
-        read_path = fs.find(path)
-        row_groups = None
-    elif selection == "row-groups":
-        # Pass in a list of paths AND row-group ids
-        fs = get_fs_token_paths(path)[0]
-        read_path = fs.find(path)
-        row_groups = [[0] for p in read_path]
-    else:
-        # Pass in a directory path
-        # (row-group selection not allowed in this case)
-        read_path = path
-        row_groups = None
-
-    # Filter on partitioned columns
-    expect = pd.read_parquet(read_path, filters=pfilters)
-    got = cudf.read_parquet(
-        read_path,
-        filters=pfilters,
-        row_groups=row_groups,
-        categorical_partitions=use_cat,
-    )
-    expect["b"] = expect["b"].astype(str)
-    expect["c"] = expect["c"].astype(int)
-    if use_cat:
-        assert got.dtypes["b"] == "category"
-        assert got.dtypes["c"] == "category"
-        got["b"] = got["b"].astype(str)
-        got["c"] = got["c"].astype(int)
-    else:
-        # Check that we didn't get categorical
-        # columns, but convert back to categorical
-        # for comparison with pandas
-        assert got.dtypes["b"] == "object"
-        assert got.dtypes["c"] == "int"
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "filters", [[("a", "==", 10)], [[("a", "==", 10)], [("c", "==", 1)]]]
-)
-def test_read_parquet_partitioned_filtered_other(tmpdir, filters):
-    rng = np.random.default_rng(2)
-    path = str(tmpdir)
-    size = 10
-    df = cudf.DataFrame(
-        {
-            "a": np.arange(0, stop=size, dtype="int64"),
-            "b": rng.choice(list("abcd"), size=size),
-            "c": rng.choice(np.arange(4), size=size),
-        }
-    )
-    df.to_parquet(path, partition_cols=["c", "b"])
-    got = cudf.read_parquet(path, filters=filters)
-    expect = pd.read_parquet(path, filters=filters)
-
-    # Work-around for pandas bug:
-    # https://github.com/pandas-dev/pandas/issues/53345
-    expect["c"] = expect["c"].astype(
-        pd.CategoricalDtype(
-            categories=expect["c"].dtype.categories.astype("int64"),
-            ordered=expect["c"].dtype.ordered,
-        )
-    )
-    assert_eq(expect, got)
-
-
-def test_parquet_writer_chunked_metadata(tmpdir, simple_pdf, simple_gdf):
-    gdf_fname = tmpdir.join("gdf.parquet")
-    test_path = "test/path"
-
-    writer = ParquetWriter(gdf_fname)
-    writer.write_table(simple_gdf)
-    writer.write_table(simple_gdf)
-    meta_byte_array = writer.close(metadata_file_path=test_path)
-    fmd = pq.ParquetFile(BytesIO(meta_byte_array)).metadata
-
-    assert fmd.num_rows == 2 * len(simple_gdf)
-    assert fmd.num_row_groups == 2
-
-    for r in range(fmd.num_row_groups):
-        for c in range(fmd.num_columns):
-            assert fmd.row_group(r).column(c).file_path == test_path
-
-
-def test_write_read_cudf(tmpdir, pdf):
-    file_path = tmpdir.join("cudf.parquet")
-    if "col_category" in pdf.columns:
-        pdf = pdf.drop(columns=["col_category"])
-
-    gdf = cudf.from_pandas(pdf)
-    gdf.to_parquet(file_path)
-    gdf = cudf.read_parquet(file_path)
-
-    assert_eq(gdf, pdf, check_index_type=not pdf.empty)
-
-
-def test_write_cudf_read_pandas_pyarrow(tmpdir, pdf):
-    cudf_path = tmpdir.join("cudf.parquet")
-    pandas_path = tmpdir.join("pandas.parquet")
-
-    if "col_category" in pdf.columns:
-        pdf = pdf.drop(columns=["col_category"])
-
-    df = cudf.from_pandas(pdf)
-
-    df.to_parquet(cudf_path)
-    pdf.to_parquet(pandas_path)
-
-    cudf_res = pd.read_parquet(cudf_path)
-    pd_res = pd.read_parquet(pandas_path)
-
-    assert_eq(pd_res, cudf_res, check_index_type=not pdf.empty)
-
-    cudf_res = pa.parquet.read_table(
-        cudf_path, use_pandas_metadata=True
-    ).to_pandas()
-    pd_res = pa.parquet.read_table(
-        pandas_path, use_pandas_metadata=True
-    ).to_pandas()
-
-    assert_eq(cudf_res, pd_res, check_index_type=not pdf.empty)
-
-
-def test_parquet_writer_criteo(tmpdir):
-    # To run this test, download the day 0 of criteo dataset from
-    # http://labs.criteo.com/2013/12/download-terabyte-click-logs/
-    # and place the uncompressed dataset in the home directory
-    fname = os.path.expanduser("~/day_0")
-    if not os.path.isfile(fname):
-        pytest.skip("Local criteo day 0 tsv file is not found")
-
-    cudf_path = tmpdir.join("cudf.parquet")
-
-    cont_names = ["I" + str(x) for x in range(1, 14)]
-    cat_names = ["C" + str(x) for x in range(1, 27)]
-    cols = ["label", *cont_names, *cat_names]
-
-    df = cudf.read_csv(fname, sep="\t", names=cols, byte_range=(0, 1000000000))
-    df = df.drop(columns=cont_names)
-
-    df.to_parquet(cudf_path)
-
-
-def test_trailing_nans(datadir, tmpdir):
-    fname = "trailing_nans.parquet"
-    file_path = datadir / fname
-    cu_df = cudf.read_parquet(file_path)
-
-    tmp_file_path = tmpdir.join(fname)
-    cu_df.to_parquet(tmp_file_path)
-
-    pd.read_parquet(tmp_file_path)
-
-
-def test_parquet_writer_sliced(tmpdir):
-    cudf_path = tmpdir.join("cudf.parquet")
-
-    df = pd.DataFrame()
-    df["String"] = np.array(["Alpha", "Beta", "Gamma", "Delta"])
-    df = cudf.from_pandas(df)
-
-    df_select = df.iloc[1:3]
-
-    df_select.to_parquet(cudf_path)
-    assert_eq(cudf.read_parquet(cudf_path), df_select)
-
-
-def test_parquet_writer_list_basic(tmpdir):
-    expect = pd.DataFrame({"a": [[[1, 2], [3, 4]], None, [[5, 6], None]]})
-    fname = tmpdir.join("test_parquet_writer_list_basic.parquet")
-
-    gdf = cudf.from_pandas(expect)
-
-    gdf.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    got = pd.read_parquet(fname)
-    assert_eq(expect, got)
-
-
-def test_parquet_writer_list_large(tmpdir):
-    gdf = cudf.DataFrame({"a": list_gen(int_gen, 128, 40, 25)})
-    fname = tmpdir.join("test_parquet_writer_list_large.parquet")
-
-    gdf.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    got = pd.read_parquet(fname)
-    assert gdf.to_arrow().equals(pa.Table.from_pandas(got))
-
-
-def test_parquet_writer_list_large_mixed(tmpdir):
-    expect = pd.DataFrame(
-        {
-            "a": list_gen(string_gen, 64, 40, 25),
-            "b": list_gen(int_gen, 64, 40, 25),
-            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
-            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
-        }
-    )
-    fname = tmpdir.join("test_parquet_writer_list_large_mixed.parquet")
-    gdf = cudf.from_pandas(expect)
-
-    gdf.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    got = pd.read_parquet(fname)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_writer_list_chunked(tmpdir, store_schema):
-    if store_schema and version.parse(pa.__version__) < version.parse(
-        "15.0.0"
-    ):
-        pytest.skip("https://github.com/apache/arrow/pull/37792")
-    table1 = cudf.DataFrame(
-        {
-            "a": list_gen(string_gen, 64, 40, 25),
-            "b": list_gen(int_gen, 64, 40, 25),
-            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
-            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
-        }
-    )
-    table2 = cudf.DataFrame(
-        {
-            "a": list_gen(string_gen, 64, 40, 25),
-            "b": list_gen(int_gen, 64, 40, 25),
-            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
-            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
-        }
-    )
-    fname = tmpdir.join("test_parquet_writer_list_chunked.parquet")
-    expect = cudf.concat([table1, table2])
-    expect = expect.reset_index(drop=True)
-
-    with ParquetWriter(fname, store_schema=store_schema) as writer:
-        writer.write_table(table1)
-        writer.write_table(table2)
-
-    assert os.path.exists(fname)
-    got = pq.read_table(fname)
-    # compare with pyarrow since pandas doesn't
-    # have a list or struct dtype
-    assert expect.to_arrow().equals(got)
-
-
-@pytest.mark.parametrize("engine", ["cudf", "pyarrow"])
-def test_parquet_nullable_boolean(tmpdir, engine):
-    pandas_path = tmpdir.join("pandas_bools.parquet")
-
-    pdf = pd.DataFrame(
-        {
-            "a": pd.Series(
-                [True, False, None, True, False], dtype=pd.BooleanDtype()
-            )
-        }
-    )
-    expected_gdf = cudf.DataFrame({"a": [True, False, None, True, False]})
-
-    pdf.to_parquet(pandas_path)
-    with _hide_pyarrow_parquet_cpu_warnings(engine):
-        actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
-
-    assert_eq(actual_gdf, expected_gdf)
-
-
-def run_parquet_index(pdf, index):
-    pandas_buffer = BytesIO()
-    cudf_buffer = BytesIO()
-
-    gdf = cudf.from_pandas(pdf)
-
-    pdf.to_parquet(pandas_buffer, index=index)
-    gdf.to_parquet(cudf_buffer, index=index)
-
-    expected = pd.read_parquet(cudf_buffer)
-    actual = cudf.read_parquet(pandas_buffer)
-
-    if expected.empty and actual.empty:
-        # We return RangeIndex columns compared
-        # to pandas' Index[object] columns
-        actual.columns = expected.columns
-
-    assert_eq(expected, actual, check_index_type=True)
-
-    expected = pd.read_parquet(pandas_buffer)
-    actual = cudf.read_parquet(cudf_buffer)
-
-    if expected.empty and actual.empty:
-        # We return RangeIndex columns compared
-        # to pandas' Index[object] columns
-        actual.columns = expected.columns
-
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=True,
-    )
-
-
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(index=[1, 2, 3]),
-        pd.DataFrame({"a": [1, 2, 3]}, index=[0.43534, 345, 0.34534]),
-        pd.DataFrame(
-            {"b": [11, 22, 33], "c": ["a", "b", "c"]},
-            index=pd.Index(["a", "b", "c"], name="custom name"),
-        ),
-        pd.DataFrame(
-            {"a": [10, 11, 12], "b": [99, 88, 77]},
-            index=pd.RangeIndex(12, 17, 2),
-        ),
-        pd.DataFrame(
-            {"b": [99, 88, 77]},
-            index=pd.RangeIndex(22, 27, 2, name="hello index"),
-        ),
-        pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name")),
-        pd.DataFrame(
-            {"a": ["a", "bb", "cc"], "b": [10, 21, 32]},
-            index=pd.MultiIndex.from_tuples([[1, 2], [10, 11], [15, 16]]),
-        ),
-        pd.DataFrame(
-            {"a": ["a", "bb", "cc"], "b": [10, 21, 32]},
-            index=pd.MultiIndex.from_tuples(
-                [[1, 2], [10, 11], [15, 16]], names=["first", "second"]
-            ),
-        ),
-    ],
-)
-@pytest.mark.parametrize("index", [None, True, False])
-def test_parquet_index(pdf, index):
-    run_parquet_index(pdf, index)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        pytest.param(
-            None,
-            marks=pytest.mark.xfail(
-                reason="https://github.com/apache/arrow/issues/40743"
-            ),
-        ),
-        True,
-    ],
-)
-def test_parquet_index_empty(index):
-    pdf = pd.DataFrame(index=pd.RangeIndex(0, 10, 1))
-    run_parquet_index(pdf, index)
-
-
-def test_parquet_no_index_empty():
-    pdf = pd.DataFrame(index=pd.RangeIndex(0, 10, 1))
-    run_parquet_index(pdf, index=False)
-
-
-@pytest.mark.parametrize("engine", ["cudf", "pyarrow"])
-def test_parquet_allnull_str(tmpdir, engine):
-    pandas_path = tmpdir.join("pandas_allnulls.parquet")
-
-    pdf = pd.DataFrame(
-        {"a": pd.Series([None, None, None, None, None], dtype="str")}
-    )
-    expected_gdf = cudf.DataFrame(
-        {"a": cudf.Series([None, None, None, None, None], dtype="str")}
-    )
-
-    pdf.to_parquet(pandas_path)
-    with _hide_pyarrow_parquet_cpu_warnings(engine):
-        actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
-
-    assert_eq(actual_gdf, expected_gdf)
-
-
-def normalized_equals(value1, value2):
-    if value1 is pd.NA or value1 is pd.NaT:
-        value1 = None
-    if value2 is pd.NA or value2 is pd.NaT:
-        value2 = None
-    if isinstance(value1, np.datetime64):
-        value1 = pd.Timestamp(value1).to_pydatetime()
-    if isinstance(value2, np.datetime64):
-        value2 = pd.Timestamp(value2).to_pydatetime()
-    if isinstance(value1, pd.Timestamp):
-        value1 = value1.to_pydatetime()
-    if isinstance(value2, pd.Timestamp):
-        value2 = value2.to_pydatetime()
-    if isinstance(value1, datetime.datetime):
-        value1 = value1.replace(tzinfo=None)
-    if isinstance(value2, datetime.datetime):
-        value2 = value2.replace(tzinfo=None)
-    if isinstance(value1, pd.Timedelta):
-        unit = "ms" if value1.unit == "s" else value1.unit
-        value2 = pd.Timedelta(value2, unit=unit)
-
-    # if one is datetime then both values are datetimes now
-    if isinstance(value1, datetime.datetime):
-        return value1 == value2
-
-    # Compare integers with floats now
-    if isinstance(value1, float) or isinstance(value2, float):
-        return math.isclose(value1, value2)
-
-    return value1 == value2
-
-
-@pytest.mark.parametrize("add_nulls", [True, False])
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_writer_statistics(tmpdir, pdf, add_nulls, store_schema):
-    if store_schema and version.parse(pa.__version__) < version.parse(
-        "15.0.0"
-    ):
-        pytest.skip("https://github.com/apache/arrow/pull/37792")
-    file_path = tmpdir.join("cudf.parquet")
-    if "col_category" in pdf.columns:
-        pdf = pdf.drop(columns=["col_category", "col_bool"])
-
-    if not add_nulls:
-        # Timedelta types convert NaT to None when reading from parquet into
-        # pandas which interferes with series.max()/min()
-        for t in TIMEDELTA_TYPES:
-            pdf["col_" + t] = pd.Series(np.arange(len(pdf.index))).astype(t)
-        # pyarrow can't read values with non-zero nanoseconds
-        pdf["col_timedelta64[ns]"] = pdf["col_timedelta64[ns]"] * 1000
-
-    gdf = cudf.from_pandas(pdf)
-    if add_nulls:
-        for col in gdf:
-            set_random_null_mask_inplace(gdf[col])
-    gdf.to_parquet(file_path, index=False, store_schema=store_schema)
-
-    # Read back from pyarrow
-    pq_file = pq.ParquetFile(file_path)
-    # verify each row group's statistics
-    for rg in range(0, pq_file.num_row_groups):
-        pd_slice = pq_file.read_row_group(rg).to_pandas()
-
-        # statistics are per-column. So need to verify independently
-        for i, col in enumerate(pd_slice):
-            stats = pq_file.metadata.row_group(rg).column(i).statistics
-
-            actual_min = pd_slice[col].min()
-            stats_min = stats.min
-            assert normalized_equals(actual_min, stats_min)
-
-            actual_max = pd_slice[col].max()
-            stats_max = stats.max
-            assert normalized_equals(actual_max, stats_max)
-
-            assert stats.null_count == pd_slice[col].isna().sum()
-            assert stats.num_values == pd_slice[col].count()
-
-
-def test_parquet_writer_list_statistics(tmpdir):
-    df = pd.DataFrame(
-        {
-            "a": list_gen(string_gen, 64, 40, 25),
-            "b": list_gen(int_gen, 64, 40, 25),
-            "c": list_gen(int_gen, 64, 40, 25, include_validity=True),
-            "d": list_gen(string_gen, 64, 40, 25, include_validity=True),
-        }
-    )
-    fname = tmpdir.join("test_parquet_writer_list_statistics.parquet")
-    gdf = cudf.from_pandas(df)
-
-    gdf.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    # Read back from pyarrow
-    pq_file = pq.ParquetFile(fname)
-    # verify each row group's statistics
-    for rg in range(0, pq_file.num_row_groups):
-        pd_slice = pq_file.read_row_group(rg).to_pandas()
-
-        # statistics are per-column. So need to verify independently
-        for i, col in enumerate(pd_slice):
-            stats = pq_file.metadata.row_group(rg).column(i).statistics
-
-            actual_min = pd_slice[col].explode().explode().dropna().min()
-            stats_min = stats.min
-            assert normalized_equals(actual_min, stats_min)
-
-            actual_max = pd_slice[col].explode().explode().dropna().max()
-            stats_max = stats.max
-            assert normalized_equals(actual_max, stats_max)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # Structs
-        {
-            "being": [
-                None,
-                {"human?": True, "Deets": {"Name": "Carrot", "Age": 27}},
-                {"human?": None, "Deets": {"Name": "Angua", "Age": 25}},
-                {"human?": False, "Deets": {"Name": "Cheery", "Age": 31}},
-                {"human?": False, "Deets": None},
-                {"human?": None, "Deets": {"Name": "Mr", "Age": None}},
-            ]
-        },
-        # List of Structs
-        {
-            "family": [
-                [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
-                [
-                    {"human?": None, "deets": {"weight": 5.3, "age": 25}},
-                    {"human?": False, "deets": {"weight": 8.0, "age": 31}},
-                    {"human?": False, "deets": None},
-                ],
-                [],
-                [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
-            ]
-        },
-        # Struct of Lists
-        {
-            "Real estate records": [
-                None,
-                {
-                    "Status": "NRI",
-                    "Ownerships": {
-                        "land_unit": [None, 2, None],
-                        "flats": [[1, 2, 3], [], [4, 5], [], [0, 6, 0]],
-                    },
-                },
-                {
-                    "Status": None,
-                    "Ownerships": {
-                        "land_unit": [4, 5],
-                        "flats": [[7, 8], []],
-                    },
-                },
-                {
-                    "Status": "RI",
-                    "Ownerships": {"land_unit": None, "flats": [[]]},
-                },
-                {"Status": "RI", "Ownerships": None},
-                {
-                    "Status": None,
-                    "Ownerships": {
-                        "land_unit": [7, 8, 9],
-                        "flats": [[], [], []],
-                    },
-                },
-            ]
-        },
-    ],
-)
-def test_parquet_writer_nested(tmpdir, data):
-    expect = pd.DataFrame(data)
-    gdf = cudf.from_pandas(expect)
-
-    fname = tmpdir.join("test_parquet_writer_nested.parquet")
-    gdf.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    got = pd.read_parquet(fname)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
-)
-@pytest.mark.parametrize("data", [[1, 2, 3], [0.00, 0.01, None, 0.5]])
-def test_parquet_writer_decimal(decimal_type, data):
-    gdf = cudf.DataFrame({"val": data})
-
-    gdf["dec_val"] = gdf["val"].astype(decimal_type(7, 2))
-
-    buff = BytesIO()
-    gdf.to_parquet(buff)
-
-    got = pd.read_parquet(buff, dtype_backend="numpy_nullable")
-    assert_eq(gdf["val"].to_pandas(nullable=True), got["val"])
-    assert_eq(gdf["dec_val"].to_pandas(), got["dec_val"])
-
-
-def test_parquet_writer_column_validation():
-    cudf_parquet = BytesIO()
-    pandas_parquet = BytesIO()
-    df = cudf.DataFrame({1: [1, 2, 3], "a": ["a", "b", "c"]})
-    pdf = df.to_pandas()
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.warns(UserWarning):
-            df.to_parquet(cudf_parquet)
-
-    with pytest.warns(UserWarning):
-        pdf.to_parquet(pandas_parquet)
-
-    assert_eq(
-        pd.read_parquet(cudf_parquet),
-        cudf.read_parquet(pandas_parquet),
-    )
-    assert_eq(
-        cudf.read_parquet(cudf_parquet),
-        pd.read_parquet(pandas_parquet),
-    )
-
-    with cudf.option_context("mode.pandas_compatible", False):
-        with pytest.raises(ValueError):
-            df.to_parquet(cudf_parquet)
-
-
-def test_parquet_writer_nulls_pandas_read(tmpdir, pdf):
-    if "col_bool" in pdf.columns:
-        pdf.drop(columns="col_bool", inplace=True)
-    if "col_category" in pdf.columns:
-        pdf.drop(columns="col_category", inplace=True)
-    gdf = cudf.from_pandas(pdf)
-
-    num_rows = len(gdf)
-
-    if num_rows > 0:
-        for col in gdf.columns:
-            gdf[col][random.randint(0, num_rows - 1)] = None
-
-    fname = tmpdir.join("test_parquet_writer_nulls_pandas_read.parquet")
-    gdf.to_parquet(fname)
-    assert os.path.exists(fname)
-
-    got = pd.read_parquet(fname)
-    nullable = num_rows > 0
-
-    if nullable:
-        gdf = gdf.drop(columns="col_datetime64[ms]")
-        gdf = gdf.drop(columns="col_datetime64[us]")
-        got = got.drop(columns="col_datetime64[ms]")
-        got = got.drop(columns="col_datetime64[us]")
-
-    assert_eq(gdf.to_pandas(nullable=nullable), got)
-
-
-@pytest.mark.parametrize(
-    "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
-)
-def test_parquet_decimal_precision(tmpdir, decimal_type):
-    df = cudf.DataFrame({"val": ["3.5", "4.2"]}).astype(decimal_type(5, 2))
-    assert df.val.dtype.precision == 5
-
-    fname = tmpdir.join("decimal_test.parquet")
-    df.to_parquet(fname)
-    df = cudf.read_parquet(fname)
-    assert df.val.dtype.precision == 5
-
-
-def test_parquet_decimal_precision_empty(tmpdir):
-    df = (
-        cudf.DataFrame({"val": ["3.5", "4.2"]})
-        .astype(cudf.Decimal64Dtype(5, 2))
-        .iloc[:0]
-    )
-    assert df.val.dtype.precision == 5
-
-    fname = tmpdir.join("decimal_test.parquet")
-    df.to_parquet(fname)
-    df = cudf.read_parquet(fname)
-    assert df.val.dtype.precision == 5
-
-
-def test_parquet_reader_brotli(datadir):
-    fname = datadir / "brotli_int16.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname).to_pandas(nullable=True)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_one_level_list(datadir):
-    fname = datadir / "one_level_list.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_binary_decimal(datadir):
-    fname = datadir / "binary_decimal.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname).to_pandas()
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_fixed_bin(datadir):
-    fname = datadir / "fixed_len_byte_array.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-def test_parquet_reader_fixed_len_with_dict(tmpdir):
-    def flba(i):
-        hasher = hashlib.sha256()
-        hasher.update(i.to_bytes(4, "little"))
-        return hasher.digest()
-
-    # use pyarrow to write table of fixed_len_byte_array
-    num_rows = 200
-    data = pa.array([flba(i) for i in range(num_rows)], type=pa.binary(32))
-    padf = pa.Table.from_arrays([data], names=["flba"])
-    padf_fname = tmpdir.join("padf.parquet")
-    pq.write_table(padf, padf_fname, use_dictionary=True)
-
-    expect = pd.read_parquet(padf_fname)
-    got = cudf.read_parquet(padf_fname)
-    assert_eq(expect, got)
-
-
-def test_parquet_flba_round_trip(tmpdir):
-    def flba(i):
-        hasher = hashlib.sha256()
-        hasher.update(i.to_bytes(4, "little"))
-        return hasher.digest()
-
-    # use pyarrow to write table of fixed_len_byte_array
-    num_rows = 200
-    data = pa.array([flba(i) for i in range(num_rows)], type=pa.binary(32))
-    padf = pa.Table.from_arrays([data], names=["flba"])
-    padf_fname = tmpdir.join("padf.parquet")
-    pq.write_table(padf, padf_fname)
-
-    # round trip data with cudf
-    cdf = cudf.read_parquet(padf_fname)
-    cdf_fname = tmpdir.join("cdf.parquet")
-    cdf.to_parquet(cdf_fname, column_type_length={"flba": 32})
-
-    # now read back in with pyarrow to test it was written properly by cudf
-    padf2 = pq.read_table(padf_fname)
-    padf3 = pq.read_table(cdf_fname)
-    assert_eq(padf2, padf3)
-    assert_eq(padf2.schema[0].type, padf3.schema[0].type)
-
-
-@pytest.mark.parametrize(
-    "encoding",
-    [
-        "PLAIN",
-        "DICTIONARY",
-        "DELTA_BINARY_PACKED",
-        "BYTE_STREAM_SPLIT",
-        "USE_DEFAULT",
-    ],
-)
-def test_per_column_encoding_option(encoding):
-    pdf = pd.DataFrame({"ilist": [[1, 2, 3, 1, 2, 3]], "i1": [1]})
-    cdf = cudf.from_pandas(pdf)
-    buffer = BytesIO()
-    cdf.to_parquet(
-        buffer,
-        column_encoding={"ilist.list.element": encoding},
-    )
-    # DICTIONARY and USE_DEFAULT should both result in a PLAIN_DICTIONARY encoding in parquet
-    encoding_name = (
-        "PLAIN_DICTIONARY"
-        if encoding == "DICTIONARY" or encoding == "USE_DEFAULT"
-        else encoding
-    )
-    pf = pq.ParquetFile(buffer)
-    fmd = pf.metadata
-    assert encoding_name in fmd.row_group(0).column(0).encodings
-
-
-@pytest.mark.parametrize("compression", ["SNAPPY", "ZSTD"])
-def test_per_column_compression_option(set_decomp_env_vars, compression):
-    pdf = pd.DataFrame(
-        {"ilist": [[1, 2, 3, 1, 2, 3]], "i1": [[1, 2, 3, 1, 2, 3]]}
-    )
-    cdf = cudf.from_pandas(pdf)
-    buffer = BytesIO()
-    cdf.to_parquet(
-        buffer,
-        compression=compression,
-        skip_compression={"ilist.list.element"},
-        use_dictionary=False,  # to make sure that data is compressible
-    )
-
-    pf = pq.ParquetFile(buffer)
-    fmd = pf.metadata
-    assert fmd.row_group(0).column(0).compression == "UNCOMPRESSED"
-    assert fmd.row_group(0).column(1).compression == compression
-
-
-@pytest.mark.parametrize(
-    "encoding",
-    ["DELTA_LENGTH_BYTE_ARRAY", "DELTA_BYTE_ARRAY"],
-)
-def test_per_column_options_string_col(tmpdir, encoding):
-    pdf = pd.DataFrame({"s": ["a string"], "i1": [1]})
-    cdf = cudf.from_pandas(pdf)
-    fname = tmpdir.join("strcol.parquet")
-    cdf.to_parquet(
-        fname,
-        column_encoding={"s": encoding},
-        compression="SNAPPY",
-    )
-    pf = pq.ParquetFile(fname)
-    fmd = pf.metadata
-    assert encoding in fmd.row_group(0).column(0).encodings
-
-
-@pytest.mark.skipif(
-    version.parse(pa.__version__) < version.parse("16.0.0"),
-    reason="https://github.com/apache/arrow/pull/39748",
-)
-@pytest.mark.parametrize(
-    "num_rows",
-    [200, 10000],
-)
-def test_parquet_bss_round_trip(tmpdir, num_rows):
-    def flba(i):
-        hasher = hashlib.sha256()
-        hasher.update(i.to_bytes(4, "little"))
-        return hasher.digest()
-
-    # use pyarrow to write table of types that support BYTE_STREAM_SPLIT encoding
-    rows_per_rowgroup = 5000
-    fixed_data = pa.array(
-        [flba(i) for i in range(num_rows)], type=pa.binary(32)
-    )
-    i32_data = pa.array(list(range(num_rows)), type=pa.int32())
-    i64_data = pa.array(list(range(num_rows)), type=pa.int64())
-    f32_data = pa.array([float(i) for i in range(num_rows)], type=pa.float32())
-    f64_data = pa.array([float(i) for i in range(num_rows)], type=pa.float64())
-    padf = pa.Table.from_arrays(
-        [fixed_data, i32_data, i64_data, f32_data, f64_data],
-        names=["flba", "i32", "i64", "f32", "f64"],
-    )
-    padf_fname = tmpdir.join("padf.parquet")
-    pq.write_table(
-        padf,
-        padf_fname,
-        column_encoding="BYTE_STREAM_SPLIT",
-        use_dictionary=False,
-        row_group_size=rows_per_rowgroup,
-    )
-
-    # round trip data with cudf
-    cdf = cudf.read_parquet(padf_fname)
-    cdf_fname = tmpdir.join("cdf.parquet")
-    cdf.to_parquet(
-        cdf_fname,
-        column_type_length={"flba": 32},
-        column_encoding={
-            "flba": "BYTE_STREAM_SPLIT",
-            "i32": "BYTE_STREAM_SPLIT",
-            "i64": "BYTE_STREAM_SPLIT",
-            "f32": "BYTE_STREAM_SPLIT",
-            "f64": "BYTE_STREAM_SPLIT",
-        },
-        row_group_size_rows=rows_per_rowgroup,
-    )
-
-    # now read back in with pyarrow to test it was written properly by cudf
-    padf2 = pq.read_table(padf_fname)
-    padf3 = pq.read_table(cdf_fname)
-    assert_eq(padf2, padf3)
-    assert_eq(padf2.schema[0].type, padf3.schema[0].type)
-
-
-def test_parquet_reader_rle_boolean(datadir):
-    fname = datadir / "rle_boolean_encoding.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got)
-
-
-# testing a specific bug-fix/edge case.
-# specifically:  int a parquet file containing a particular way of representing
-#                a list column in a schema, the cudf reader was confusing
-#                nesting information between a list column and a subsequent
-#                string column, ultimately causing a crash.
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Older versions of pandas do not have DataFrame.map()",
-)
-def test_parquet_reader_one_level_list2(datadir):
-    # we are reading in a file containing binary types, but cudf returns
-    # those as strings. so we have to massage the pandas data to get
-    # them to compare correctly.
-    def postprocess(val):
-        if isinstance(val, bytes):
-            return val.decode()
-        elif isinstance(val, np.ndarray):
-            return np.array([v.decode() for v in val])
-        else:
-            return val
-
-    fname = datadir / "one_level_list2.parquet"
-
-    expect = pd.read_parquet(fname)
-    expect = expect.map(postprocess)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-# testing a specific bug-fix/edge case.
-# specifically:  in a parquet file containing a particular way of representing
-#                a list column in a schema, the cudf reader was confusing
-#                nesting information and building a list of list of int instead
-#                of a list of int
-def test_parquet_reader_one_level_list3(datadir):
-    fname = datadir / "one_level_list3.parquet"
-
-    expect = pd.read_parquet(fname)
-    got = cudf.read_parquet(fname)
-
-    assert_eq(expect, got, check_dtype=True)
-
-
-@pytest.mark.parametrize("size_bytes", [4_000_000, 1_000_000, 600_000])
-@pytest.mark.parametrize("size_rows", [1_000_000, 100_000, 10_000])
-def test_to_parquet_row_group_size(
-    tmpdir, large_int64_gdf, size_bytes, size_rows
-):
-    fname = tmpdir.join("row_group_size.parquet")
-    large_int64_gdf.to_parquet(
-        fname, row_group_size_bytes=size_bytes, row_group_size_rows=size_rows
-    )
-
-    num_rows, row_groups, col_names, _, _ = cudf.io.read_parquet_metadata(
-        fname
-    )
-    # 8 bytes per row, as the column is int64
-    expected_num_rows = max(
-        math.ceil(num_rows / size_rows), math.ceil(8 * num_rows / size_bytes)
-    )
-    assert expected_num_rows == row_groups
-
-
-@pytest.mark.parametrize("size_rows", [500_000, 100_000, 10_000])
-def test_parquet_row_group_metadata(tmpdir, large_int64_gdf, size_rows):
-    fname = tmpdir.join("row_group_size.parquet")
-    large_int64_gdf.to_parquet(fname, row_group_size_rows=size_rows)
-
-    # read file metadata from parquet
-    (
-        num_rows,
-        row_groups,
-        _,  # col_names
-        _,  # num_columns
-        row_group_metadata,
-    ) = cudf.io.read_parquet_metadata(fname)
-
-    # length(RowGroupsMetaData) == number of row groups
-    assert len(row_group_metadata) == row_groups
-    # sum of rows in row groups == total rows
-    assert num_rows == sum(
-        [row_group["num_rows"] for row_group in row_group_metadata]
-    )
-
-
-def test_parquet_reader_decimal_columns():
-    df = cudf.DataFrame(
-        {
-            "col1": cudf.Series([1, 2, 3], dtype=cudf.Decimal64Dtype(10, 2)),
-            "col2": [10, 11, 12],
-            "col3": [12, 13, 14],
-            "col4": ["a", "b", "c"],
-        }
-    )
-    buffer = BytesIO()
-    df.to_parquet(buffer)
-
-    actual = cudf.read_parquet(buffer, columns=["col3", "col2", "col1"])
-    expected = pd.read_parquet(buffer, columns=["col3", "col2", "col1"])
-
-    assert_eq(actual, expected)
-
-
-def test_parquet_reader_zstd_compression(datadir):
-    fname = datadir / "spark_zstd.parquet"
-    try:
-        df = cudf.read_parquet(fname)
-        pdf = pd.read_parquet(fname)
-        assert_eq(df, pdf)
-    except RuntimeError:
-        pytest.mark.xfail(reason="zstd support is not enabled")
-
-
-def test_read_parquet_multiple_files(tmpdir):
-    df_1_path = tmpdir / "df_1.parquet"
-    df_2_path = tmpdir / "df_2.parquet"
-    df_1 = cudf.DataFrame({"id": range(100), "a": [1] * 100})
-    df_1.to_parquet(df_1_path)
-
-    df_2 = cudf.DataFrame({"id": range(200, 2200), "a": [2] * 2000})
-    df_2.to_parquet(df_2_path)
-
-    expected = pd.read_parquet([df_1_path, df_2_path])
-    actual = cudf.read_parquet([df_1_path, df_2_path])
-    assert_eq(expected, actual)
-
-    expected = pd.read_parquet([df_2_path, df_1_path])
-    actual = cudf.read_parquet([df_2_path, df_1_path])
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("index", [True, False, None])
-@pytest.mark.parametrize("columns", [None, [], ["b", "a"]])
-def test_parquet_columns_and_index_param(index, columns):
-    buffer = BytesIO()
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
-    df.to_parquet(buffer, index=index)
-
-    expected = pd.read_parquet(buffer, columns=columns)
-    got = cudf.read_parquet(buffer, columns=columns)
-    if columns == [] and index in {False, None}:
-        # cuDF returns RangeIndex columns compared
-        # to pandas' Index[object] columns
-        got.columns = expected.columns
-
-    assert_eq(expected, got, check_index_type=True)
-
-
-@pytest.mark.parametrize("columns", [None, ["b", "a"]])
-def test_parquet_columns_and_range_index(columns):
-    buffer = BytesIO()
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=pd.RangeIndex(2, 5)
-    )
-    df.to_parquet(buffer)
-
-    expected = pd.read_parquet(buffer, columns=columns)
-    got = cudf.read_parquet(buffer, columns=columns)
-
-    assert_eq(expected, got, check_index_type=True)
-
-
-def test_parquet_nested_struct_list():
-    buffer = BytesIO()
-    data = {
-        "payload": {
-            "Domain": {
-                "Name": "abc",
-                "Id": {"Name": "host", "Value": "127.0.0.8"},
-            },
-            "StreamId": "12345678",
-            "Duration": 10,
-            "Offset": 12,
-            "Resource": [{"Name": "ZoneName", "Value": "RAPIDS"}],
-        }
-    }
-    df = cudf.DataFrame({"a": cudf.Series(data)})
-
-    df.to_parquet(buffer)
-    expected = pd.read_parquet(buffer)
-    actual = cudf.read_parquet(buffer)
-    assert_eq(expected, actual)
-    assert_eq(actual.a.dtype, df.a.dtype)
-
-
-def test_parquet_writer_zstd():
-    size = 12345
-    rng = np.random.default_rng(seed=0)
-    expected = cudf.DataFrame(
-        {
-            "a": np.arange(0, stop=size, dtype="float64"),
-            "b": rng.choice(list("abcd"), size=size),
-            "c": rng.choice(np.arange(4), size=size),
-        }
-    )
-
-    buff = BytesIO()
-    try:
-        expected.to_parquet(buff, compression="ZSTD")
-    except RuntimeError:
-        pytest.mark.xfail(reason="Newer nvCOMP version is required")
-    else:
-        got = pd.read_parquet(buff)
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_writer_time_delta_physical_type(store_schema):
-    df = cudf.DataFrame(
-        {
-            "s": cudf.Series([1], dtype="timedelta64[s]"),
-            "ms": cudf.Series([2], dtype="timedelta64[ms]"),
-            "us": cudf.Series([3], dtype="timedelta64[us]"),
-            # 4K because Pandas/pyarrow don't support non-zero nanoseconds
-            # in Parquet files
-            "ns": cudf.Series([4000], dtype="timedelta64[ns]"),
-        }
-    )
-    buffer = BytesIO()
-    df.to_parquet(buffer, store_schema=store_schema)
-
-    got = pd.read_parquet(buffer)
-
-    if store_schema:
-        expected = pd.DataFrame(
-            {
-                "s": ["0 days 00:00:01"],
-                "ms": ["0 days 00:00:00.002000"],
-                "us": ["0 days 00:00:00.000003"],
-                "ns": ["0 days 00:00:00.000004"],
-            },
-            dtype="str",
-        )
-    else:
-        expected = pd.DataFrame(
-            {
-                "s": ["00:00:01"],
-                "ms": ["00:00:00.002000"],
-                "us": ["00:00:00.000003"],
-                "ns": ["00:00:00.000004"],
-            },
-            dtype="str",
-        )
-    assert_eq(got.astype("str"), expected)
-
-
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_roundtrip_time_delta(store_schema):
-    num_rows = 12345
-    df = cudf.DataFrame(
-        {
-            "s": cudf.Series(
-                random.sample(range(0, 200000), num_rows),
-                dtype="timedelta64[s]",
-            ),
-            "ms": cudf.Series(
-                random.sample(range(0, 200000), num_rows),
-                dtype="timedelta64[ms]",
-            ),
-            "us": cudf.Series(
-                random.sample(range(0, 200000), num_rows),
-                dtype="timedelta64[us]",
-            ),
-            "ns": cudf.Series(
-                random.sample(range(0, 200000), num_rows),
-                dtype="timedelta64[ns]",
-            ),
-        }
-    )
-    buffer = BytesIO()
-    df.to_parquet(buffer, store_schema=store_schema)
-    # `check_dtype` cannot be removed here as timedelta64[s] will change to `timedelta[ms]`
-    assert_eq(df, cudf.read_parquet(buffer), check_dtype=False)
-    if store_schema:
-        assert_eq(df, pd.read_parquet(buffer))
-
-
-def test_parquet_reader_malformed_file(datadir):
-    fname = datadir / "nested-unsigned-malformed.parquet"
-
-    # expect a failure when reading the whole file
-    with pytest.raises(RuntimeError):
-        cudf.read_parquet(fname)
-
-
-def test_parquet_reader_unsupported_page_encoding(datadir):
-    fname = datadir / "delta_encoding.parquet"
-
-    # expect a failure when reading the whole file
-    with pytest.raises(RuntimeError):
-        cudf.read_parquet(fname)
-
-
-def test_parquet_reader_detect_bad_dictionary(datadir):
-    fname = datadir / "bad_dict.parquet"
-
-    # expect a failure when reading the whole file
-    with pytest.raises(RuntimeError):
-        cudf.read_parquet(fname)
-
-
-@pytest.mark.parametrize("data", [{"a": [1, 2, 3, 4]}, {"b": [1, None, 2, 3]}])
-@pytest.mark.parametrize("force_nullable_schema", [True, False])
-def test_parquet_writer_schema_nullability(data, force_nullable_schema):
-    df = cudf.DataFrame(data)
-    file_obj = BytesIO()
-
-    df.to_parquet(file_obj, force_nullable_schema=force_nullable_schema)
-
-    assert pa.parquet.read_schema(file_obj).field(0).nullable == (
-        force_nullable_schema or df.isnull().any().any()
-    )
-
-
-def test_parquet_read_filter_and_project():
-    # Filter on columns that are not included
-    # in the current column projection
-
-    with BytesIO() as buffer:
-        # Write parquet data
-        df = cudf.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5] * 10,
-                "b": [0, 1, 2, 3, 4] * 10,
-                "c": range(50),
-                "d": [6, 7] * 25,
-                "e": [8, 9] * 25,
-            }
-        )
-        df.to_parquet(buffer)
-
-        # Read back with filter and projection
-        columns = ["b"]
-        filters = [[("a", "==", 5), ("c", ">", 20)]]
-        got = cudf.read_parquet(buffer, columns=columns, filters=filters)
-
-    # Check result
-    expected = df[(df.a == 5) & (df.c > 20)][columns].reset_index(drop=True)
-    assert_eq(got, expected)
-
-
-def test_parquet_reader_multiindex():
-    expected = pd.DataFrame(
-        {"A": [1, 2, 3]},
-        index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),
-    )
-    file_obj = BytesIO()
-    expected.to_parquet(file_obj, engine="pyarrow")
-    with pytest.warns(UserWarning):
-        actual = cudf.read_parquet(file_obj, engine="pyarrow")
-    assert_eq(actual, expected)
-
-
-def test_parquet_reader_engine_error():
-    with pytest.raises(ValueError):
-        cudf.read_parquet(BytesIO(), engine="abc")
-
-
-def test_reader_lz4():
-    pdf = pd.DataFrame({"ints": [1, 2] * 5001})
-
-    buffer = BytesIO()
-    pdf.to_parquet(buffer, compression="LZ4")
-
-    got = cudf.read_parquet(buffer)
-    assert_eq(pdf, got)
-
-
-def test_writer_lz4():
-    gdf = cudf.DataFrame({"ints": [1, 2] * 5001})
-
-    buffer = BytesIO()
-    gdf.to_parquet(buffer, compression="LZ4")
-
-    got = pd.read_parquet(buffer)
-    assert_eq(gdf, got)
-
-
-def test_parquet_reader_zstd_huff_tables(datadir):
-    # Ensure that this zstd-compressed file does not overrun buffers. The
-    # problem was fixed in nvcomp 3.0.6.
-    # See https://github.com/rapidsai/cudf/issues/15096
-    fname = datadir / "zstd_huff_tables_bug.parquet"
-
-    expected = pa.parquet.read_table(fname).to_pandas()
-    actual = cudf.read_parquet(fname)
-    assert_eq(actual, expected)
-
-
-def test_parquet_reader_roundtrip_with_arrow_schema():
-    # Ensure that the nested types are faithfully being roundtripped
-    # across Parquet with arrow schema which is used to faithfully
-    # round trip duration types (timedelta64) across Parquet read and write.
-    pdf = pd.DataFrame(
-        {
-            "s": pd.Series([None, None, None], dtype="timedelta64[s]"),
-            "ms": pd.Series([1234, None, 32442], dtype="timedelta64[ms]"),
-            "us": pd.Series([None, 3456, None], dtype="timedelta64[us]"),
-            "ns": pd.Series([1234, 3456, 32442], dtype="timedelta64[ns]"),
-            "duration_list": list(
-                [
-                    [
-                        datetime.timedelta(minutes=7, seconds=4),
-                        datetime.timedelta(minutes=7),
-                    ],
-                    [
-                        None,
-                        None,
-                    ],
-                    [
-                        datetime.timedelta(minutes=7, seconds=4),
-                        None,
-                    ],
-                ]
-            ),
-            "int64": pd.Series([1234, 123, 4123], dtype="int64"),
-            "list": list([[1, 2], [1, 2], [1, 2]]),
-            "datetime": pd.Series([1234, 123, 4123], dtype="datetime64[ms]"),
-            "map": pd.Series(["cat", "dog", "lion"]).map(
-                {"cat": "kitten", "dog": "puppy", "lion": "cub"}
-            ),
-        }
-    )
-
-    # Write parquet with arrow for now (to write arrow:schema)
-    buffer = BytesIO()
-    pdf.to_parquet(buffer, engine="pyarrow")
-
-    # Read parquet with arrow schema
-    got = cudf.read_parquet(buffer)
-    # Convert to cudf table for an apple to apple comparison
-    expected = cudf.from_pandas(pdf)
-
-    # Check results for reader with schema
-    assert_eq(expected, got)
-
-    # Reset buffer
-    buffer = BytesIO()
-
-    # Write to buffer with cudf
-    expected.to_parquet(buffer, store_schema=True)
-
-    # Read parquet with arrow schema
-    got = cudf.read_parquet(buffer)
-    # Convert to cudf table for an apple to apple comparison
-    expected = cudf.from_pandas(pdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # struct
-        [
-            {"a": 1, "b": 2},
-            {"a": 10, "b": 20},
-            {"a": None, "b": 22},
-            {"a": None, "b": None},
-            {"a": 15, "b": None},
-        ],
-        # struct-of-list
-        [
-            {"a": 1, "b": 2, "c": [1, 2, 3]},
-            {"a": 10, "b": 20, "c": [4, 5]},
-            {"a": None, "b": 22, "c": [6]},
-            {"a": None, "b": None, "c": None},
-            {"a": 15, "b": None, "c": [-1, -2]},
-            None,
-            {"a": 100, "b": 200, "c": [-10, None, -20]},
-        ],
-        # list-of-struct
-        [
-            [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
-            None,
-            [{"a": 10, "b": 20}],
-            [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
-        ],
-        # struct-of-struct
-        [
-            {"a": 1, "b": {"inner_a": 10, "inner_b": 20}, "c": 2},
-            {"a": 3, "b": {"inner_a": 30, "inner_b": 40}, "c": 4},
-            {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
-            {"a": 7, "b": None, "c": 8},
-            {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
-            None,
-            {"a": None, "b": {"inner_a": None, "inner_b": 100}, "c": 10},
-        ],
-        # struct-with-mixed-types
-        [
-            {
-                "struct": {
-                    "payload": {
-                        "Domain": {
-                            "Name": "abc",
-                            "Id": {"Name": "host", "Value": "127.0.0.8"},
-                            "Duration": datetime.timedelta(minutes=12),
-                        },
-                        "StreamId": "12345678",
-                        "Duration": datetime.timedelta(minutes=4),
-                        "Offset": None,
-                        "Resource": [
-                            {
-                                "Name": "ZoneName",
-                                "Value": "RAPIDS",
-                                "Duration": datetime.timedelta(seconds=1),
-                            }
-                        ],
-                    }
-                }
-            }
-        ],
-    ],
-)
-def test_parquet_reader_roundtrip_structs_with_arrow_schema(tmpdir, data):
-    # Ensure that the structs with duration types are faithfully being
-    # roundtripped across Parquet with arrow schema
-    pdf = pd.DataFrame({"struct": pd.Series(data)})
-
-    buffer = BytesIO()
-    pdf.to_parquet(buffer, engine="pyarrow")
-
-    # Read parquet with arrow schema
-    got = cudf.read_parquet(buffer)
-    # Convert to cudf table for an apple to apple comparison
-    expected = cudf.from_pandas(pdf)
-
-    # Check results
-    assert_eq(expected, got)
-
-    # Reset buffer
-    buffer = BytesIO()
-
-    # Write to buffer with cudf
-    expected.to_parquet(buffer, store_schema=True)
-
-    # Read parquet with arrow schema
-    got = cudf.read_parquet(buffer)
-    # Convert to cudf table for an apple to apple comparison
-    expected = cudf.from_pandas(pdf)
-
-    # Check results
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("index", [None, True, False])
-@pytest.mark.skipif(
-    version.parse(pa.__version__) < version.parse("15.0.0"),
-    reason="https://github.com/apache/arrow/pull/37792",
-)
-def test_parquet_writer_roundtrip_with_arrow_schema(index):
-    # Ensure that the concrete and nested types are faithfully being roundtripped
-    # across Parquet with arrow schema
-    expected = cudf.DataFrame(
-        {
-            "s": cudf.Series([None, None, None], dtype="timedelta64[s]"),
-            "us": cudf.Series([None, 3456, None], dtype="timedelta64[us]"),
-            "duration_list": list(
-                [
-                    [
-                        datetime.timedelta(minutes=7, seconds=4),
-                        datetime.timedelta(minutes=7),
-                    ],
-                    [
-                        None,
-                        None,
-                    ],
-                    [
-                        datetime.timedelta(minutes=7, seconds=4),
-                        None,
-                    ],
-                ]
-            ),
-            "int64": cudf.Series([-1234, 123, 4123], dtype="int64"),
-            "uint32": cudf.Series([1234, 123, 4123], dtype="uint32"),
-            "list": list([[1, 2], [1, 2], [1, 2]]),
-            "bool": cudf.Series([True, None, False], dtype=bool),
-            "fixed32": cudf.Series([0.00, 1.0, None]).astype(
-                cudf.Decimal32Dtype(7, 2)
-            ),
-            "fixed64": cudf.Series([0.00, 1.0, None]).astype(
-                cudf.Decimal64Dtype(7, 2)
-            ),
-            "fixed128": cudf.Series([0.00, 1.0, None]).astype(
-                cudf.Decimal128Dtype(7, 2)
-            ),
-            "datetime": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
-            "map": cudf.Series(["cat", "dog", "lion"]).map(
-                {"cat": "kitten", "dog": "puppy", "lion": "cub"}
-            ),
-        }
-    )
-
-    # Convert decimals32/64 to decimal128 if pyarrow version is < 19.0.0
-    if version.parse(pa.__version__) < version.parse("19.0.0"):
-        expected = expected.astype({"fixed32": cudf.Decimal128Dtype(9, 2)})
-        expected = expected.astype({"fixed64": cudf.Decimal128Dtype(18, 2)})
-
-    # Write to Parquet with arrow schema for faithful roundtrip
-    buffer = BytesIO()
-    expected.to_parquet(buffer, store_schema=True, index=index)
-
-    # Read parquet with pyarrow, pandas and cudf readers
-    got = cudf.DataFrame.from_arrow(pq.read_table(buffer))
-    got2 = cudf.DataFrame.from_pandas(pd.read_parquet(buffer))
-    got3 = cudf.read_parquet(buffer)
-
-    # drop the index column for comparison: __index_level_0__
-    if index:
-        got.drop(columns="__index_level_0__", inplace=True)
-        got2.drop(columns="__index_level_0__", inplace=True)
-
-    # Check results
-    assert_eq(expected, got)
-    assert_eq(expected, got2)
-    assert_eq(expected, got3)
-
-
-def test_parquet_writer_int96_timestamps_and_arrow_schema():
-    df = cudf.DataFrame(
-        {
-            "timestamp": cudf.Series(
-                [1234, 123, 4123], dtype="datetime64[ms]"
-            ),
-        }
-    )
-
-    # Output buffer
-    buffer = BytesIO()
-
-    # Writing out parquet with both INT96 timestamps and arrow_schema
-    # enabled should throw an exception.
-    with pytest.raises(RuntimeError):
-        df.to_parquet(buffer, int96_timestamps=True, store_schema=True)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        # struct
-        [
-            {"a": 1, "b": 2},
-            {"a": 10, "b": 20},
-            {"a": None, "b": 22},
-            {"a": None, "b": None},
-            {"a": 15, "b": None},
-        ],
-        # struct-of-list
-        [
-            {"a": 1, "b": 2, "c": [1, 2, 3]},
-            {"a": 10, "b": 20, "c": [4, 5]},
-            {"a": None, "b": 22, "c": [6]},
-            {"a": None, "b": None, "c": None},
-            {"a": 15, "b": None, "c": [-1, -2]},
-            None,
-            {"a": 100, "b": 200, "c": [-10, None, -20]},
-        ],
-        # list-of-struct
-        [
-            [{"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 4, "b": 5}],
-            None,
-            [{"a": 10, "b": 20}],
-            [{"a": 100, "b": 200}, {"a": None, "b": 300}, None],
-        ],
-        # struct-of-struct
-        [
-            {"a": 1, "b": {"inner_a": 10, "inner_b": 20}, "c": 2},
-            {"a": 3, "b": {"inner_a": 30, "inner_b": 40}, "c": 4},
-            {"a": 5, "b": {"inner_a": 50, "inner_b": None}, "c": 6},
-            {"a": 7, "b": None, "c": 8},
-            {"a": None, "b": {"inner_a": None, "inner_b": None}, "c": None},
-            None,
-            {"a": None, "b": {"inner_a": None, "inner_b": 100}, "c": 10},
-        ],
-        # struct-with-mixed-types
-        [
-            {
-                "struct": {
-                    "payload": {
-                        "Domain": {
-                            "Name": "abc",
-                            "Id": {"Name": "host", "Value": "127.0.0.8"},
-                            "Duration": datetime.timedelta(minutes=12),
-                        },
-                        "StreamId": "12345678",
-                        "Duration": datetime.timedelta(minutes=4),
-                        "Offset": None,
-                        "Resource": [
-                            {
-                                "Name": "ZoneName",
-                                "Value": "RAPIDS",
-                                "Duration": datetime.timedelta(seconds=1),
-                            }
-                        ],
-                    }
-                }
-            }
-        ],
-    ],
-)
-@pytest.mark.parametrize("index", [None, True, False])
-@pytest.mark.skipif(
-    version.parse(pa.__version__) < version.parse("15.0.0"),
-    reason="https://github.com/apache/arrow/pull/37792",
-)
-def test_parquet_writer_roundtrip_structs_with_arrow_schema(
-    tmpdir, data, index
-):
-    # Ensure that the structs are faithfully being roundtripped across
-    # Parquet with arrow schema
-    pa_expected = pa.Table.from_pydict({"struct": data})
-
-    expected = cudf.DataFrame.from_arrow(pa_expected)
-
-    # Write expected data frame to Parquet with arrow schema
-    buffer = BytesIO()
-    expected.to_parquet(buffer, store_schema=True, index=index)
-
-    # Read Parquet with pyarrow
-    pa_got = pq.read_table(buffer)
-
-    # drop the index column for comparison: __index_level_0__
-    if index:
-        pa_got = pa_got.drop(columns="__index_level_0__")
-
-    # Check results
-    assert_eq(pa_expected, pa_got)
-
-    # Convert to cuDF table and also read Parquet with cuDF reader
-    got = cudf.DataFrame.from_arrow(pa_got)
-    got2 = cudf.read_parquet(buffer)
-
-    # Check results
-    assert_eq(expected, got)
-    assert_eq(expected, got2)
-
-
-@pytest.mark.parametrize("chunk_read_limit", [0, 240, 1024000000])
-@pytest.mark.parametrize("pass_read_limit", [0, 240, 1024000000])
-@pytest.mark.parametrize("use_pandas_metadata", [True, False])
-@pytest.mark.parametrize("row_groups", [[[0]], None, [[0, 1]]])
-def test_parquet_chunked_reader(
-    chunk_read_limit, pass_read_limit, use_pandas_metadata, row_groups
-):
-    df = pd.DataFrame(
-        {"a": [1, 2, 3, None] * 1000, "b": ["av", "qw", None, "xyz"] * 1000}
-    )
-    buffer = BytesIO()
-    df.to_parquet(buffer, row_group_size=1000)
-    with cudf.option_context("io.parquet.low_memory", True):
-        actual = cudf.read_parquet(
-            [buffer],
-            _chunk_read_limit=chunk_read_limit,
-            _pass_read_limit=pass_read_limit,
-            use_pandas_metadata=use_pandas_metadata,
-            row_groups=row_groups,
-        )
-    expected = cudf.read_parquet(
-        buffer, use_pandas_metadata=use_pandas_metadata, row_groups=row_groups
-    )
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("chunk_read_limit", [256, 2560])
-@pytest.mark.parametrize("pass_read_limit", [256, 2560])
-@pytest.mark.parametrize("num_rows", [49, 291])
-@pytest.mark.parametrize("skip_rows", [412, 601])
-@pytest.mark.parametrize("data_size", [100, 200])
-def test_parquet_chunked_reader_structs(
-    chunk_read_limit, pass_read_limit, num_rows, skip_rows, data_size
-):
-    data = [
-        {
-            "a": "g",
-            "b": {
-                "b_a": 10,
-                "b_b": {"b_b_b": None, "b_b_a": 2},
-            },
-            "c": None,
-        },
-        {"a": None, "b": {"b_a": None, "b_b": None}, "c": [15, 16]},
-        {"a": "j", "b": None, "c": [8, 10]},
-        {"a": None, "b": {"b_a": None, "b_b": None}, "c": None},
-        None,
-        {
-            "a": None,
-            "b": {"b_a": None, "b_b": {"b_b_b": 1}},
-            "c": [18, 19],
-        },
-        {"a": None, "b": None, "c": None},
-    ] * data_size
-
-    pa_struct = pa.Table.from_pydict({"struct": data})
-    df = cudf.DataFrame.from_arrow(pa_struct)
-    buffer = BytesIO()
-    df.to_parquet(buffer, row_group_size_rows=7000, max_page_size_rows=100)
-
-    # Number of rows to read
-    nrows = num_rows if skip_rows + num_rows < len(df) else len(df) - skip_rows
-
-    with cudf.option_context("io.parquet.low_memory", True):
-        actual = cudf.read_parquet(
-            [buffer],
-            _chunk_read_limit=chunk_read_limit,
-            _pass_read_limit=pass_read_limit,
-            nrows=nrows,
-            skip_rows=skip_rows,
-        ).reset_index(drop=True)
-    expected = cudf.read_parquet(
-        buffer, nrows=nrows, skip_rows=skip_rows
-    ).reset_index(drop=True)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("chunk_read_limit", [0, 24, 10240000])
-@pytest.mark.parametrize("pass_read_limit", [0, 24, 10240000])
-@pytest.mark.parametrize("num_rows", [47, 97, None])
-@pytest.mark.parametrize(
-    "str_encoding",
-    [
-        "PLAIN",
-        "DELTA_BYTE_ARRAY",
-        "DELTA_LENGTH_BYTE_ARRAY",
-    ],
-)
-def test_parquet_chunked_reader_string_decoders(
-    chunk_read_limit,
-    pass_read_limit,
-    num_rows,
-    str_encoding,
-):
-    df = pd.DataFrame(
-        {
-            "i64": [1, 2, 3, None] * 100,
-            "str": ["av", "qw", "asd", "xyz"] * 100,
-            "list": list(
-                [["ad", "cd"], ["asd", "fd"], None, ["asd", None]] * 100
-            ),
-        }
-    )
-    buffer = BytesIO()
-    # Write 4 Parquet row groups with string column encoded
-    df.to_parquet(
-        buffer,
-        row_group_size=100,
-        use_dictionary=False,
-        column_encoding={"str": str_encoding},
-    )
-
-    # Number of rows to read
-    nrows = num_rows if num_rows is not None else len(df)
-
-    # Check with num_rows specified
-    with cudf.option_context("io.parquet.low_memory", True):
-        actual = cudf.read_parquet(
-            [buffer],
-            _chunk_read_limit=chunk_read_limit,
-            _pass_read_limit=pass_read_limit,
-            nrows=nrows,
-        )
-    expected = cudf.read_parquet(
-        buffer,
-        nrows=nrows,
-    )
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "nrows, skip_rows",
-    [
-        (0, 0),
-        (99, 101),
-        (988, 61),
-        (99, 1011),
-        (101, 1601),
-        (99, 1901),
-    ],
-)
-@pytest.mark.parametrize(
-    "row_group_size_rows, page_size_rows",
-    [
-        (1000, 1000),  # 1 RG, 1 page per RG
-        (1000, 100),  # 1 RG, multiple pages per RG
-        (100, 100),  # multiple RGs, 1 page per RG
-        (100, 10),  # multiple RGs, multiple pages per RG
-    ],
-)
-@pytest.mark.parametrize(
-    "chunk_read_limit, pass_read_limit",
-    [
-        (256, 256),  # small chunk and pass read limits
-        (0, 1024),  # zero chunk and small pass read limit
-        (256, 0),  # small chunk and zero pass read limit
-        (256000, 256000),  # large chunk and pass read limits
-    ],
-)
-def test_chunked_parquet_reader_nrows_skiprows(
-    nrows,
-    skip_rows,
-    row_group_size_rows,
-    page_size_rows,
-    chunk_read_limit,
-    pass_read_limit,
-):
-    df = cudf.DataFrame(
-        {
-            "a": list(
-                [
-                    ["cat", "lion", "deer"],
-                    ["bear", "ibex", None],
-                    ["tiger", None, "bull"],
-                    [None, "wolf", "fox"],
-                ]
-            )
-            * 500,
-            "b": ["av", "qw", None, "xyz"] * 500,
-        }
-    )
-    expected = df[skip_rows : skip_rows + nrows]
-    buffer = BytesIO()
-    df.to_parquet(
-        buffer,
-        row_group_size_rows=row_group_size_rows,
-        max_page_size_rows=page_size_rows,
-    )
-    got = cudf.read_parquet(buffer, nrows=nrows, skip_rows=skip_rows)
-    assert_eq(expected, got)
-
-    # Check for chunked parquet reader
-    with cudf.option_context("io.parquet.low_memory", True):
-        got = cudf.read_parquet(
-            [buffer],
-            _chunk_read_limit=chunk_read_limit,
-            _pass_read_limit=pass_read_limit,
-            nrows=nrows,
-            skip_rows=skip_rows,
-        ).reset_index(drop=True)
-        # Reset index for comparison
-        expected = expected.reset_index(drop=True)
-        assert_eq(expected, got)
-
-
-def test_parquet_reader_pandas_compatibility():
-    df = pd.DataFrame(
-        {"a": [1, 2, 3, 4] * 10000, "b": ["av", "qw", "hi", "xyz"] * 10000}
-    )
-    buffer = BytesIO()
-    df.to_parquet(buffer)
-    with cudf.option_context("io.parquet.low_memory", True):
-        expected = cudf.read_parquet(buffer)
-    assert_eq(expected, df)
-
-
-@pytest.mark.parametrize("store_schema", [True, False])
-def test_parquet_reader_with_mismatched_tables(store_schema):
-    # cuDF tables with mixed types
-    df1 = cudf.DataFrame(
-        {
-            "i32": cudf.Series([None, None, None], dtype="int32"),
-            "i64": cudf.Series([1234, 467, 123], dtype="int64"),
-            "list": list([[1, 2], None, [None, 6]]),
-            "time": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
-            "str": ["vfd", None, "ghu"],
-            "d_list": list(
-                [
-                    [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
-                    [None, pd.Timedelta(minutes=3)],
-                    [pd.Timedelta(minutes=8), None],
-                ]
-            ),
-        }
-    )
-
-    df2 = cudf.DataFrame(
-        {
-            "str": ["abc", "def", "ghi"],
-            "i64": cudf.Series([None, 65, 98], dtype="int64"),
-            "times": cudf.Series([1234, None, 4123], dtype="datetime64[us]"),
-            "list": list([[7, 8], [9, 10], [11, 12]]),
-            "d_list": list(
-                [
-                    [pd.Timedelta(minutes=4), None],
-                    None,
-                    [pd.Timedelta(minutes=6), None],
-                ]
-            ),
-        }
-    )
-
-    # IO buffers
-    buf1 = BytesIO()
-    buf2 = BytesIO()
-
-    # Write Parquet with and without arrow schema
-    df1.to_parquet(buf1, store_schema=store_schema)
-    df2.to_parquet(buf2, store_schema=store_schema)
-
-    # Read mismatched Parquet files
-    got = cudf.read_parquet(
-        [buf1, buf2],
-        columns=["list", "d_list", "str"],
-        filters=[("i64", ">", 20)],
-        allow_mismatched_pq_schemas=True,
-    )
-
-    # Construct the expected table
-    expected = cudf.concat(
-        [
-            df1[df1["i64"] > 20][["list", "d_list", "str"]],
-            df2[df2["i64"] > 20][["list", "d_list", "str"]],
-        ]
-    ).reset_index(drop=True)
-
-    # Read with chunked reader (filter columns not supported)
-    with cudf.option_context("io.parquet.low_memory", True):
-        got_chunked = cudf.read_parquet(
-            [buf1, buf2],
-            columns=["list", "d_list", "str"],
-            _chunk_read_limit=240,
-            _pass_read_limit=240,
-            allow_mismatched_pq_schemas=True,
-        )
-
-    # Construct the expected table without filter columns
-    expected_chunked = cudf.concat(
-        [df1[["list", "d_list", "str"]], df2[["list", "d_list", "str"]]]
-    ).reset_index(drop=True)
-
-    # Check results
-    assert_eq(expected, got)
-    assert_eq(expected_chunked, got_chunked)
-
-
-def test_parquet_reader_with_mismatched_structs():
-    data1 = [
-        {
-            "a": 1,
-            "b": {
-                "a_a": 10,
-                "b_b": {"b_b_b": 1, "b_b_a": 2},
-            },
-            "c": 2,
-        },
-        {
-            "a": 3,
-            "b": {"b_a": 30, "b_b": {"b_b_a": 210}},
-            "c": 4,
-        },
-        {"a": 5, "b": {"b_a": 50, "b_b": None}, "c": 6},
-        {"a": 7, "b": None, "c": 8},
-        {"a": 5, "b": {"b_a": None, "b_b": None}, "c": None},
-    ]
-
-    data2 = [
-        {"a": 1, "b": {"b_b": {"b_b_a": None}}},
-        {"a": 5, "b": {"b_b": None}},
-        {"a": 7, "b": {"b_b": {"b_b_b": 1, "b_b_a": 0}}},
-        {"a": None, "b": {"b_b": None}},
-        None,
-    ]
-
-    # cuDF tables from struct data
-    df1 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data1}))
-    df2 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data2}))
-
-    # Buffers
-    buf1 = BytesIO()
-    buf2 = BytesIO()
-
-    # Write to parquet
-    df1.to_parquet(buf1)
-    df2.to_parquet(buf2)
-
-    # Read the struct.b.inner_b.inner_inner_a column from parquet
-    got = cudf.read_parquet(
-        [buf1, buf2],
-        columns=["struct.b.b_b.b_b_a"],
-        allow_mismatched_pq_schemas=True,
-    )
-    got = (
-        cudf.Series(got["struct"])
-        .struct.field("b")
-        .struct.field("b_b")
-        .struct.field("b_b_a")
-    )
-
-    # Read with chunked reader
-    with cudf.option_context("io.parquet.low_memory", True):
-        got_chunked = cudf.read_parquet(
-            [buf1, buf2],
-            columns=["struct.b.b_b.b_b_a"],
-            _chunk_read_limit=240,
-            _pass_read_limit=240,
-            allow_mismatched_pq_schemas=True,
-        )
-    got_chunked = (
-        cudf.Series(got_chunked["struct"])
-        .struct.field("b")
-        .struct.field("b_b")
-        .struct.field("b_b_a")
-    )
-
-    # Construct the expected series
-    expected = cudf.concat(
-        [
-            cudf.Series(df1["struct"])
-            .struct.field("b")
-            .struct.field("b_b")
-            .struct.field("b_b_a"),
-            cudf.Series(df2["struct"])
-            .struct.field("b")
-            .struct.field("b_b")
-            .struct.field("b_b_a"),
-        ]
-    ).reset_index(drop=True)
-
-    # Check results
-    assert_eq(expected, got)
-    assert_eq(expected, got_chunked)
-
-
-def test_parquet_reader_with_mismatched_schemas_error():
-    df1 = cudf.DataFrame(
-        {
-            "millis": cudf.Series([123, 3454, 123], dtype="timedelta64[ms]"),
-            "i64": cudf.Series([123, 3454, 123], dtype="int64"),
-            "i32": cudf.Series([123, 3454, 123], dtype="int32"),
-        }
-    )
-    df2 = cudf.DataFrame(
-        {
-            "i64": cudf.Series([123, 3454, 123], dtype="int64"),
-            "millis": cudf.Series([123, 3454, 123], dtype="timedelta64[ms]"),
-        }
-    )
-
-    buf1 = BytesIO()
-    buf2 = BytesIO()
-
-    df1.to_parquet(buf1, store_schema=True)
-    df2.to_parquet(buf2, store_schema=False)
-
-    with pytest.raises(
-        ValueError,
-        match="Encountered mismatching SchemaElement properties for a column in the selected path",
-    ):
-        cudf.read_parquet(
-            [buf1, buf2], columns=["millis"], allow_mismatched_pq_schemas=True
-        )
-
-    data1 = [
-        {"a": 1, "b": {"b_a": 1, "b_b": 6}},
-        {"a": 3, "b": {"b_a": None, "b_b": 2}},
-    ]
-    data2 = [
-        {"b": {"b_a": 1}, "c": "str"},
-        {"b": {"b_a": None}, "c": None},
-    ]
-
-    # cuDF tables from struct data
-    df1 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data1}))
-    df2 = cudf.DataFrame.from_arrow(pa.Table.from_pydict({"struct": data2}))
-
-    # Buffers
-    buf1 = BytesIO()
-    buf2 = BytesIO()
-
-    # Write to parquet
-    df1.to_parquet(buf1)
-    df2.to_parquet(buf2)
-
-    with pytest.raises(
-        IndexError,
-        match="Encountered mismatching number of children for a column in the selected path",
-    ):
-        cudf.read_parquet(
-            [buf1, buf2],
-            columns=["struct.b"],
-            allow_mismatched_pq_schemas=True,
-        )
-
-    with pytest.raises(
-        IndexError,
-        match="Encountered mismatching schema tree depths across data sources",
-    ):
-        cudf.read_parquet(
-            [buf1, buf2],
-            columns=["struct.b.b_b"],
-            allow_mismatched_pq_schemas=True,
-        )
-
-
-def test_parquet_roundtrip_zero_rows_no_column_mask():
-    expected = cudf.DataFrame._from_data(
-        {
-            "int": cudf.core.column.column_empty(0, np.dtype(np.int64)),
-            "float": cudf.core.column.column_empty(0, np.dtype(np.float64)),
-            "datetime": cudf.core.column.column_empty(
-                0, np.dtype("datetime64[ns]")
-            ),
-            "timedelta": cudf.core.column.column_empty(
-                0, np.dtype("timedelta64[ns]")
-            ),
-            "bool": cudf.core.column.column_empty(0, np.dtype(np.bool_)),
-            "decimal": cudf.core.column.column_empty(
-                0, cudf.Decimal64Dtype(1)
-            ),
-            "struct": cudf.core.column.column_empty(
-                0, cudf.StructDtype({"a": "int64"})
-            ),
-            "list": cudf.core.column.column_empty(
-                0, cudf.ListDtype("float64")
-            ),
-        }
-    )
-    with BytesIO() as bio:
-        expected.to_parquet(bio)
-        result = cudf.read_parquet(bio)
-    assert_eq(result, expected)
-
-
-def test_parquet_reader_mismatched_nullability():
-    # Ensure that we can faithfully read the tables with mismatched nullabilities
-    df1 = cudf.DataFrame(
-        {
-            "timedelta": cudf.Series([12, 54, 1231], dtype="timedelta64[ms]"),
-            "duration_list": list(
-                [
-                    [
-                        [
-                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
-                            None,
-                            [pd.Timedelta(minutes=8), None],
-                        ],
-                        None,
-                    ],
-                    None,
-                    [
-                        [
-                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
-                            [pd.Timedelta(minutes=5), pd.Timedelta(minutes=3)],
-                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=4)],
-                        ]
-                    ],
-                ]
-            ),
-            "int64": cudf.Series([1234, None, 4123], dtype="int64"),
-            "int32": cudf.Series([1234, 123, 4123], dtype="int32"),
-            "list": list([[1, 2], [1, 2], [1, 2]]),
-            "datetime": cudf.Series([1234, 123, 4123], dtype="datetime64[ms]"),
-            "string": cudf.Series(["kitten", "puppy", "cub"]),
-        }
-    )
-
-    df2 = cudf.DataFrame(
-        {
-            "timedelta": cudf.Series(
-                [None, None, None], dtype="timedelta64[ms]"
-            ),
-            "duration_list": list(
-                [
-                    [
-                        [
-                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
-                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=1)],
-                        ],
-                    ],
-                    [
-                        [
-                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
-                            [pd.Timedelta(minutes=5), pd.Timedelta(minutes=3)],
-                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=4)],
-                        ]
-                    ],
-                    [
-                        [
-                            [pd.Timedelta(minutes=1), pd.Timedelta(minutes=2)],
-                            [pd.Timedelta(minutes=5), pd.Timedelta(minutes=3)],
-                            [pd.Timedelta(minutes=8), pd.Timedelta(minutes=4)],
-                        ]
-                    ],
-                ]
-            ),
-            "int64": cudf.Series([1234, 123, 4123], dtype="int64"),
-            "int32": cudf.Series([1234, None, 4123], dtype="int32"),
-            "list": list([[1, 2], None, [1, 2]]),
-            "datetime": cudf.Series(
-                [1234, None, 4123], dtype="datetime64[ms]"
-            ),
-            "string": cudf.Series(["kitten", None, "cub"]),
-        }
-    )
-
-    # Write tables to parquet with arrow schema for compatibility for duration column(s)
-    fname1 = BytesIO()
-    df1.to_parquet(fname1, store_schema=True)
-    fname2 = BytesIO()
-    df2.to_parquet(fname2, store_schema=True)
-
-    # Read tables back with cudf and arrow in either order and compare
-    assert_eq(
-        cudf.read_parquet([fname1, fname2]),
-        cudf.concat([df1, df2]).reset_index(drop=True),
-    )
-    assert_eq(
-        cudf.read_parquet([fname2, fname1]),
-        cudf.concat([df2, df1]).reset_index(drop=True),
-    )
-
-
-def test_parquet_reader_mismatched_nullability_structs(tmpdir):
-    data1 = [
-        {
-            "a": "a",
-            "b": {
-                "b_a": 10,
-                "b_b": {"b_b_b": 1, "b_b_a": 12},
-            },
-            "c": [1, 2],
-        },
-        {
-            "a": "b",
-            "b": {
-                "b_a": 30,
-                "b_b": {"b_b_b": 2, "b_b_a": 2},
-            },
-            "c": [3, 4],
-        },
-        {
-            "a": "c",
-            "b": {
-                "b_a": 50,
-                "b_b": {"b_b_b": 4, "b_b_a": 5},
-            },
-            "c": [5, 6],
-        },
-        {
-            "a": "d",
-            "b": {
-                "b_a": 135,
-                "b_b": {"b_b_b": 12, "b_b_a": 32},
-            },
-            "c": [7, 8],
-        },
-        {
-            "a": "e",
-            "b": {
-                "b_a": 1,
-                "b_b": {"b_b_b": 1, "b_b_a": 5},
-            },
-            "c": [9, 10],
-        },
-        {
-            "a": "f",
-            "b": {
-                "b_a": 32,
-                "b_b": {"b_b_b": 1, "b_b_a": 6},
-            },
-            "c": [11, 12],
-        },
-    ]
-
-    data2 = [
-        {
-            "a": "g",
-            "b": {
-                "b_a": 10,
-                "b_b": {"b_b_b": None, "b_b_a": 2},
-            },
-            "c": None,
-        },
-        {"a": None, "b": {"b_a": None, "b_b": None}, "c": [15, 16]},
-        {"a": "j", "b": None, "c": [8, 10]},
-        {"a": None, "b": {"b_a": None, "b_b": None}, "c": None},
-        None,
-        {
-            "a": None,
-            "b": {"b_a": None, "b_b": {"b_b_b": 1}},
-            "c": [18, 19],
-        },
-        {"a": None, "b": None, "c": None},
-    ]
-
-    pa_table1 = pa.Table.from_pydict({"struct": data1})
-    df1 = cudf.DataFrame.from_arrow(pa_table1)
-
-    pa_table2 = pa.Table.from_pydict({"struct": data2})
-    df2 = cudf.DataFrame.from_arrow(pa_table2)
-
-    # Write tables to parquet
-    buf1 = BytesIO()
-    df1.to_parquet(buf1)
-    buf2 = BytesIO()
-    df2.to_parquet(buf2)
-
-    # Read tables back with cudf and compare with expected.
-    assert_eq(
-        cudf.read_parquet([buf1, buf2]),
-        cudf.concat([df1, df2]).reset_index(drop=True),
-    )
-    assert_eq(
-        cudf.read_parquet([buf2, buf1]),
-        cudf.concat([df2, df1]).reset_index(drop=True),
-    )
-
-
-@pytest.mark.skipif(
-    pa.__version__ == "19.0.0",
-    reason="https://github.com/rapidsai/cudf/issues/17806",
-)
-@pytest.mark.parametrize(
-    "stats_fname,bloom_filter_fname",
-    [
-        (
-            "mixed_card_ndv_100_chunk_stats.snappy.parquet",
-            "mixed_card_ndv_100_bf_fpp0.1_nostats.snappy.parquet",
-        ),
-        (
-            "mixed_card_ndv_500_chunk_stats.snappy.parquet",
-            "mixed_card_ndv_500_bf_fpp0.1_nostats.snappy.parquet",
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "predicate,expected_len",
-    [
-        ([[("str", "==", "FINDME")], [("fp64", "==", float(500))]], 2),
-        ([("fixed_pt", "==", decimal.Decimal(float(500)))], 2),
-        ([[("ui32", "==", np.uint32(500)), ("str", "==", "FINDME")]], 2),
-        ([[("str", "==", "FINDME")], [("ui32", ">=", np.uint32(0))]], 1000),
-        (
-            [
-                ("str", "!=", "FINDME"),
-                ("fixed_pt", "==", decimal.Decimal(float(500))),
-            ],
-            0,
-        ),
-    ],
-)
-def test_parquet_bloom_filters(
-    datadir, stats_fname, bloom_filter_fname, predicate, expected_len
-):
-    fname_stats = datadir / stats_fname
-    fname_bf = datadir / bloom_filter_fname
-    df_stats = cudf.read_parquet(fname_stats, filters=predicate).reset_index(
-        drop=True
-    )
-    df_bf = cudf.read_parquet(fname_bf, filters=predicate).reset_index(
-        drop=True
-    )
-
-    # Check if tables equal
-    assert_eq(
-        df_stats,
-        df_bf,
-    )
-
-    # Check for table length
-    assert_eq(
-        len(df_stats),
-        expected_len,
-    )
-
-
-@pytest.fixture(params=["cuda", "pool", "cuda_async"])
-def memory_resource(request):
-    import rmm
-
-    current_mr = rmm.mr.get_current_device_resource()
-
-    kind = request.param
-    if kind == "cuda":
-        mr = rmm.mr.CudaMemoryResource()
-    elif kind == "pool":
-        base = rmm.mr.CudaMemoryResource()
-        free, _ = rmm.mr.available_device_memory()
-        size = int(round(free * 0.5 / 256) * 256)
-        mr = rmm.mr.PoolMemoryResource(base, size, size)
-    elif kind == "cuda_async":
-        mr = rmm.mr.CudaAsyncMemoryResource()
-
-    rmm.mr.set_current_device_resource(mr)
-
-    try:
-        yield mr
-    finally:
-        rmm.mr.set_current_device_resource(current_mr)
-
-
-@pytest.mark.parametrize("columns", [["r_reason_desc"], None])
-def test_parquet_bloom_filters_alignment(datadir, columns, memory_resource):
-    fname = datadir / "bloom_filter_alignment.parquet"
-    filters = [("r_reason_desc", "==", "Did not like the color")]
-
-    # Read expected table using pyarrow
-    expected = pq.read_table(fname, columns=columns, filters=filters)
-
-    # Read with cudf using the memory resource from fixture
-    read = cudf.read_parquet(
-        fname, columns=columns, filters=filters
-    ).to_arrow()
-
-    assert_eq(expected, read)
-
-
-def test_parquet_reader_unsupported_compression(datadir):
-    fname = datadir / "hadoop_lz4_compressed.parquet"
-
-    with pytest.raises(
-        RuntimeError,
-        match="Unsupported Parquet compression type: LZ4",
-    ):
-        cudf.read_parquet(fname)
-
-
-def test_parquet_reader_empty_compressed_page(datadir):
-    fname = datadir / "empty_datapage_v2.parquet"
-
-    df = cudf.DataFrame({"value": cudf.Series([None], dtype="float32")})
-    assert_eq(cudf.read_parquet(fname), df)
-
-
-@pytest.fixture(params=[1234], scope="module")
-def my_pdf(request):
-    return build_pdf(request, True)
-
-
-@pytest.mark.parametrize("compression", ["brotli", "gzip", "snappy", "zstd"])
-def test_parquet_decompression(set_decomp_env_vars, my_pdf, compression):
-    # PANDAS returns category objects whereas cuDF returns hashes
-    expect = my_pdf.drop(columns=["col_category"])
-
-    # Write the DataFrame to a Parquet file
-    buffer = BytesIO()
-    expect.to_parquet(buffer, compression=compression)
-
-    # Read the Parquet file back into a DataFrame
-    got = cudf.read_parquet(buffer)
-
-    assert_eq(expect, got)

From 960f93a7f9909a8514e54bef126bc4999907ebed Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Tue, 19 Aug 2025 13:15:27 -0700
Subject: [PATCH 163/366] Cache hash values to improve hash-based groupby
 performance with wide/complex table keys (#19670)

In hash-based groupby, the row indices of the keys table are inserted into a hash set twice. While doing so, row hashes are computed, which can be expensive in term of memory access for wide key tables with/without complex type columns.

This PR pre-computes hash values and stores them in a caching array for such wide/complex keys tables, which helps improving the performance to some extent. A new benchmark is also implemented to test for such particular (wide, complex keys tables) situations.

Contribute to https://github.com/rapidsai/cudf/issues/19513.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19670
---
 cpp/benchmarks/CMakeLists.txt                 |   1 +
 cpp/benchmarks/groupby/group_complex_keys.cpp | 158 ++++++++++++++++++
 cpp/src/groupby/hash/compute_aggregations.cu  |   3 +-
 cpp/src/groupby/hash/compute_aggregations.cuh |  22 ++-
 cpp/src/groupby/hash/compute_aggregations.hpp |   3 +-
 .../groupby/hash/compute_aggregations_null.cu |   3 +-
 .../hash/compute_global_memory_aggs.cu        |   3 +-
 .../hash/compute_global_memory_aggs.cuh       |  14 +-
 .../hash/compute_global_memory_aggs.hpp       |   3 +-
 .../hash/compute_global_memory_aggs_null.cu   |   3 +-
 cpp/src/groupby/hash/compute_groupby.cu       |  96 ++++++++---
 .../groupby/hash/compute_mapping_indices.cu   |   6 +-
 .../groupby/hash/compute_mapping_indices.cuh  |  16 +-
 .../groupby/hash/compute_mapping_indices.hpp  |  16 +-
 .../hash/compute_mapping_indices_null.cu      |   7 +-
 .../hash/compute_shared_memory_aggs.cu        |  15 +-
 .../hash/compute_shared_memory_aggs.hpp       |   1 -
 cpp/src/groupby/hash/helpers.cuh              |  21 ++-
 cpp/src/groupby/hash/single_pass_functors.cuh |  21 +--
 19 files changed, 312 insertions(+), 100 deletions(-)
 create mode 100644 cpp/benchmarks/groupby/group_complex_keys.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 9aad6a21012..5fc041c4e58 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -266,6 +266,7 @@ ConfigureBench(
 
 ConfigureNVBench(
   GROUPBY_NVBENCH
+  groupby/group_complex_keys.cpp
   groupby/group_histogram.cpp
   groupby/group_m2.cpp
   groupby/group_max.cpp
diff --git a/cpp/benchmarks/groupby/group_complex_keys.cpp b/cpp/benchmarks/groupby/group_complex_keys.cpp
new file mode 100644
index 00000000000..8717f0f50d8
--- /dev/null
+++ b/cpp/benchmarks/groupby/group_complex_keys.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <random>
+
+namespace {
+
+auto generate_int_keys(cudf::size_type num_cols,
+                       cudf::size_type num_rows,
+                       cudf::size_type value_key_ratio,
+                       double null_probability)
+{
+  auto const create_column = [&] {
+    auto builder =
+      data_profile_builder()
+        .cardinality(num_rows / value_key_ratio)
+        .distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
+    if (null_probability > 0) {
+      builder.null_probability(null_probability);
+    } else {
+      builder.no_validity();
+    }
+    return create_random_column(
+      cudf::type_to_id<int32_t>(), row_count{num_rows}, data_profile{builder});
+  };
+  std::vector<std::unique_ptr<cudf::column>> cols;
+  cols.reserve(num_cols);
+  for (cudf::size_type i = 0; i < num_cols; ++i) {
+    cols.emplace_back(create_column());
+  }
+  return std::make_unique<cudf::table>(std::move(cols));
+}
+
+auto generate_mixed_types_keys(cudf::size_type num_cols,
+                               cudf::size_type num_rows,
+                               cudf::size_type value_key_ratio,
+                               double null_probability)
+{
+  constexpr auto max_str_length = 50;
+  constexpr auto max_list_size  = 10;
+  constexpr auto nested_depth   = 2;
+
+  auto builder = data_profile_builder()
+                   .cardinality(num_rows / value_key_ratio)
+                   .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_rows)
+                   .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length)
+                   .distribution(cudf::type_id::INT64, distribution_id::UNIFORM, 0, num_rows)
+                   .distribution(cudf::type_id::LIST, distribution_id::UNIFORM, 0, max_list_size)
+                   .list_depth(nested_depth)
+                   .struct_depth(nested_depth);
+  if (null_probability > 0) {
+    builder.null_probability(null_probability);
+  } else {
+    builder.no_validity();
+  }
+
+  return create_random_table(cycle_dtypes({cudf::type_id::INT32,
+                                           cudf::type_id::STRING,
+                                           cudf::type_id::INT64,
+                                           cudf::type_id::LIST,
+                                           cudf::type_id::STRUCT},
+                                          num_cols),
+                             row_count{num_rows},
+                             data_profile{builder});
+}
+
+auto generate_vals(cudf::size_type num_rows, double null_probability)
+{
+  using Type   = int64_t;
+  auto builder = data_profile_builder().cardinality(0).distribution(
+    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
+  if (null_probability > 0) {
+    builder.null_probability(null_probability);
+  } else {
+    builder.no_validity();
+  }
+  return create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
+}
+
+template <bool is_int_keys>
+void run_benchmark_complex_keys(nvbench::state& state)
+{
+  auto const n_cols           = static_cast<cudf::size_type>(state.get_int64("num_cols"));
+  auto const n_rows           = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const value_key_ratio  = static_cast<cudf::size_type>(state.get_int64("value_key_ratio"));
+  auto const null_probability = state.get_float64("null_probability");
+
+  auto const keys_table = [&] {
+    if constexpr (is_int_keys) {
+      return generate_int_keys(n_cols, n_rows, value_key_ratio, null_probability);
+    } else {
+      return generate_mixed_types_keys(n_cols, n_rows, value_key_ratio, null_probability);
+    }
+  }();
+  auto const vals = generate_vals(n_rows, null_probability);
+
+  cudf::groupby::groupby gb_obj(keys_table->view());
+
+  std::vector<cudf::groupby::aggregation_request> requests;
+  requests.emplace_back(cudf::groupby::aggregation_request());
+  requests[0].values = vals->view();
+  requests[0].aggregations.push_back(cudf::make_min_aggregation<cudf::groupby_aggregation>());
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  auto const stream           = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
+    [[maybe_unused]] auto const result = gb_obj.aggregate(requests, stream);
+  });
+
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+}  // namespace
+
+void bench_groupby_int_keys(nvbench::state& state) { run_benchmark_complex_keys<true>(state); }
+void bench_groupby_mixed_types_keys(nvbench::state& state)
+{
+  run_benchmark_complex_keys<false>(state);
+}
+
+#define RUN_BENCH(bench_func)                              \
+  NVBENCH_BENCH(bench_func)                                \
+    .set_name(#bench_func)                                 \
+    .add_int64_power_of_two_axis("num_rows", {12, 18, 24}) \
+    .add_int64_axis("value_key_ratio", {20, 200})          \
+    .add_float64_axis("null_probability", {0, 0.5})
+
+RUN_BENCH(bench_groupby_int_keys).add_int64_axis("num_cols", {1, 2, 4, 8, 16});
+
+// Not enough memory for more mixed types columns.
+RUN_BENCH(bench_groupby_mixed_types_keys).add_int64_axis("num_cols", {1, 2, 3, 4, 5});
diff --git a/cpp/src/groupby/hash/compute_aggregations.cu b/cpp/src/groupby/hash/compute_aggregations.cu
index cac6c2224f0..a0226de5b82 100644
--- a/cpp/src/groupby/hash/compute_aggregations.cu
+++ b/cpp/src/groupby/hash/compute_aggregations.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@
 namespace cudf::groupby::detail::hash {
 template rmm::device_uvector<cudf::size_type> compute_aggregations<global_set_t>(
   int64_t num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   global_set_t& global_set,
   cudf::host_span<cudf::groupby::aggregation_request const> requests,
diff --git a/cpp/src/groupby/hash/compute_aggregations.cuh b/cpp/src/groupby/hash/compute_aggregations.cuh
index 60a8b3c2f38..65fb8a7738a 100644
--- a/cpp/src/groupby/hash/compute_aggregations.cuh
+++ b/cpp/src/groupby/hash/compute_aggregations.cuh
@@ -25,6 +25,7 @@
 #include "single_pass_functors.cuh"
 
 #include <cudf/detail/aggregation/result_cache.hpp>
+#include <cudf/detail/utilities/cuda.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/table/table_device_view.cuh>
@@ -45,6 +46,7 @@
 #include <vector>
 
 namespace cudf::groupby::detail::hash {
+
 /**
  * @brief Computes all aggregations from `requests` that require a single pass
  * over the data and stores the results in `sparse_results`
@@ -52,7 +54,6 @@ namespace cudf::groupby::detail::hash {
 template <typename SetType>
 rmm::device_uvector<cudf::size_type> compute_aggregations(
   int64_t num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   SetType& global_set,
   cudf::host_span<cudf::groupby::aggregation_request const> requests,
@@ -64,8 +65,17 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
   auto const d_agg_kinds                   = cudf::detail::make_device_uvector_async(
     agg_kinds, stream, rmm::mr::get_current_device_resource());
 
-  auto const grid_size =
-    max_occupancy_grid_size<typename SetType::ref_type<cuco::insert_and_find_tag>>(num_rows);
+  auto const grid_size = [&] {
+    auto const max_blocks_mapping =
+      max_active_blocks_mapping_kernel<typename SetType::ref_type<cuco::insert_and_find_tag>>();
+    auto const max_blocks_aggs = max_active_blocks_shmem_aggs_kernel();
+    // We launch the same grid size for both kernels, thus we need to take the minimum of the two.
+    auto const max_blocks    = std::min(max_blocks_mapping, max_blocks_aggs);
+    auto const max_grid_size = max_blocks * cudf::detail::num_multiprocessors();
+    auto const num_blocks =
+      cudf::util::div_rounding_up_safe(static_cast<size_type>(num_rows), GROUPBY_BLOCK_SIZE);
+    return std::min(max_grid_size, num_blocks);
+  }();
   auto const available_shmem_size = get_available_shared_memory_size(grid_size);
   auto const offsets_buffer_size  = compute_shmem_offsets_size(flattened_values.num_columns()) * 2;
   auto const data_buffer_size     = available_shmem_size - offsets_buffer_size;
@@ -94,7 +104,6 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
   // present.
   if (!is_shared_memory_compatible) {
     return compute_global_memory_aggs(num_rows,
-                                      skip_rows_with_nulls,
                                       row_bitmask,
                                       flattened_values,
                                       d_agg_kinds.data(),
@@ -123,7 +132,6 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
                           num_rows,
                           global_set_ref,
                           row_bitmask,
-                          skip_rows_with_nulls,
                           local_mapping_index.data(),
                           global_mapping_index.data(),
                           block_cardinality.data(),
@@ -157,7 +165,6 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
                              available_shmem_size,
                              num_rows,
                              row_bitmask,
-                             skip_rows_with_nulls,
                              local_mapping_index.data(),
                              global_mapping_index.data(),
                              block_cardinality.data(),
@@ -181,8 +188,7 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
                                                  d_agg_kinds.data(),
                                                  block_cardinality.data(),
                                                  stride,
-                                                 row_bitmask,
-                                                 skip_rows_with_nulls});
+                                                 row_bitmask});
     extract_populated_keys(global_set, populated_keys, stream);
   }
 
diff --git a/cpp/src/groupby/hash/compute_aggregations.hpp b/cpp/src/groupby/hash/compute_aggregations.hpp
index 829c3c808b0..e387ffe3085 100644
--- a/cpp/src/groupby/hash/compute_aggregations.hpp
+++ b/cpp/src/groupby/hash/compute_aggregations.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,7 +31,6 @@ namespace cudf::groupby::detail::hash {
 template <typename SetType>
 rmm::device_uvector<cudf::size_type> compute_aggregations(
   int64_t num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   SetType& global_set,
   cudf::host_span<cudf::groupby::aggregation_request const> requests,
diff --git a/cpp/src/groupby/hash/compute_aggregations_null.cu b/cpp/src/groupby/hash/compute_aggregations_null.cu
index 1d7184227ea..6c7f0615be3 100644
--- a/cpp/src/groupby/hash/compute_aggregations_null.cu
+++ b/cpp/src/groupby/hash/compute_aggregations_null.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@
 namespace cudf::groupby::detail::hash {
 template rmm::device_uvector<cudf::size_type> compute_aggregations<nullable_global_set_t>(
   int64_t num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   nullable_global_set_t& global_set,
   cudf::host_span<cudf::groupby::aggregation_request const> requests,
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs.cu b/cpp/src/groupby/hash/compute_global_memory_aggs.cu
index d2830f7d905..2d7687a826d 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs.cu
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@
 namespace cudf::groupby::detail::hash {
 template rmm::device_uvector<cudf::size_type> compute_global_memory_aggs<global_set_t>(
   cudf::size_type num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs.cuh b/cpp/src/groupby/hash/compute_global_memory_aggs.cuh
index 671ee2ea31f..375906b756f 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs.cuh
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,7 +41,6 @@ namespace cudf::groupby::detail::hash {
 template <typename SetType>
 rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
   cudf::size_type num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
@@ -69,12 +68,11 @@ rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
   auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream);
   auto global_set_ref = global_set.ref(cuco::op::insert_and_find);
 
-  thrust::for_each_n(
-    rmm::exec_policy_nosync(stream),
-    thrust::counting_iterator{0},
-    num_rows,
-    hash::compute_single_pass_aggs_fn{
-      global_set_ref, *d_values, *d_sparse_table, d_agg_kinds, row_bitmask, skip_rows_with_nulls});
+  thrust::for_each_n(rmm::exec_policy_nosync(stream),
+                     thrust::counting_iterator{0},
+                     num_rows,
+                     hash::compute_single_pass_aggs_fn{
+                       global_set_ref, *d_values, *d_sparse_table, d_agg_kinds, row_bitmask});
   extract_populated_keys(global_set, populated_keys, stream);
 
   // Add results back to sparse_results cache
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs.hpp b/cpp/src/groupby/hash/compute_global_memory_aggs.hpp
index 437823a3fea..5b7d46fa461 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs.hpp
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,7 +31,6 @@ namespace cudf::groupby::detail::hash {
 template <typename SetType>
 rmm::device_uvector<cudf::size_type> compute_global_memory_aggs(
   cudf::size_type num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
diff --git a/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu b/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
index 7cb3f8f190b..41efbaf96ba 100644
--- a/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
+++ b/cpp/src/groupby/hash/compute_global_memory_aggs_null.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@
 namespace cudf::groupby::detail::hash {
 template rmm::device_uvector<cudf::size_type> compute_global_memory_aggs<nullable_global_set_t>(
   cudf::size_type num_rows,
-  bool skip_rows_with_nulls,
   bitmask_type const* row_bitmask,
   cudf::table_view const& flattened_values,
   cudf::aggregation::Kind const* d_agg_kinds,
diff --git a/cpp/src/groupby/hash/compute_groupby.cu b/cpp/src/groupby/hash/compute_groupby.cu
index 9648d942513..46b8f0a0a3f 100644
--- a/cpp/src/groupby/hash/compute_groupby.cu
+++ b/cpp/src/groupby/hash/compute_groupby.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,14 +29,33 @@
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <cuco/static_set.cuh>
+#include <thrust/tabulate.h>
 
-#include <iterator>
 #include <memory>
 
 namespace cudf::groupby::detail::hash {
+
+namespace {
+
+// The number of columns in the keys table that will trigger caching of row hashes.
+// This is a heuristic to reduce memory read when the keys table is hashed twice.
+constexpr int HASH_CACHING_THRESHOLD = 4;
+
+int count_nested_columns(column_view const& input)
+{
+  if (!is_nested(input.type())) { return 1; }
+
+  // Count the current column too.
+  return 1 + std::accumulate(
+               input.child_begin(), input.child_end(), 0, [](int count, column_view const& child) {
+                 return count + count_nested_columns(child);
+               });
+}
+
+}  // namespace
+
 template <typename Equal, typename Hash>
 std::unique_ptr<table> compute_groupby(table_view const& keys,
                                        host_span<aggregation_request const> requests,
@@ -50,6 +69,51 @@ std::unique_ptr<table> compute_groupby(table_view const& keys,
   // convert to int64_t to avoid potential overflow with large `keys`
   auto const num_keys = static_cast<int64_t>(keys.num_rows());
 
+  [[maybe_unused]] auto const [row_bitmask_data, row_bitmask] =
+    [&]() -> std::pair<rmm::device_buffer, bitmask_type const*> {
+    if (!skip_rows_with_nulls) { return {rmm::device_buffer{0, stream}, nullptr}; }
+
+    if (keys.num_columns() == 1) {
+      auto const& keys_col = keys.column(0);
+      // Only use the input null mask directly if the keys table was not sliced.
+      if (keys_col.offset() == 0) { return {rmm::device_buffer{0, stream}, keys_col.null_mask()}; }
+      // If the keys table was sliced, we need to copy the null mask to ensure its first bit aligns
+      // with the first row of the keys table.
+      auto null_mask_data  = cudf::copy_bitmask(keys_col, stream);
+      auto const null_mask = static_cast<bitmask_type const*>(null_mask_data.data());
+      return {std::move(null_mask_data), null_mask};
+    }
+
+    auto [null_mask_data, null_count] = cudf::bitmask_and(keys, stream);
+    if (null_count == 0) { return {rmm::device_buffer{0, stream}, nullptr}; }
+
+    auto const null_mask = static_cast<bitmask_type const*>(null_mask_data.data());
+    return {std::move(null_mask_data), null_mask};
+  }();
+
+  auto const cached_hashes = [&]() -> rmm::device_uvector<hash_value_type> {
+    auto const num_columns =
+      std::accumulate(keys.begin(), keys.end(), 0, [](int count, column_view const& col) {
+        return count + count_nested_columns(col);
+      });
+
+    if (num_columns <= HASH_CACHING_THRESHOLD) {
+      return rmm::device_uvector<hash_value_type>{0, stream};
+    }
+
+    rmm::device_uvector<hash_value_type> hashes(num_keys, stream);
+    thrust::tabulate(rmm::exec_policy_nosync(stream),
+                     hashes.begin(),
+                     hashes.end(),
+                     [d_row_hash, row_bitmask] __device__(size_type const idx) {
+                       if (!row_bitmask || cudf::bit_is_set(row_bitmask, idx)) {
+                         return d_row_hash(idx);
+                       }
+                       return hash_value_type{0};  // dummy value, as it will be unused
+                     });
+    return hashes;
+  }();
+
   // Cache of sparse results where the location of aggregate value in each
   // column is indexed by the hash set
   cudf::detail::result_cache sparse_results(requests.size());
@@ -59,35 +123,19 @@ std::unique_ptr<table> compute_groupby(table_view const& keys,
     cudf::detail::CUCO_DESIRED_LOAD_FACTOR,  // 50% load factor
     cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
     d_row_equal,
-    probing_scheme_t{d_row_hash},
+    probing_scheme_t{row_hasher_with_cache_t{d_row_hash, cached_hashes.data()}},
     cuco::thread_scope_device,
     cuco::storage<GROUPBY_BUCKET_SIZE>{},
     cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
     stream.value()};
 
-  auto row_bitmask =
-    skip_rows_with_nulls
-      ? cudf::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first
-      : rmm::device_buffer{};
-
   // Compute all single pass aggs first
-  auto gather_map = compute_aggregations(num_keys,
-                                         skip_rows_with_nulls,
-                                         static_cast<bitmask_type*>(row_bitmask.data()),
-                                         set,
-                                         requests,
-                                         &sparse_results,
-                                         stream);
+  auto gather_map =
+    compute_aggregations(num_keys, row_bitmask, set, requests, &sparse_results, stream);
 
   // Compact all results from sparse_results and insert into cache
-  sparse_to_dense_results(requests,
-                          &sparse_results,
-                          cache,
-                          gather_map,
-                          set.ref(cuco::find),
-                          static_cast<bitmask_type*>(row_bitmask.data()),
-                          stream,
-                          mr);
+  sparse_to_dense_results(
+    requests, &sparse_results, cache, gather_map, set.ref(cuco::find), row_bitmask, stream, mr);
 
   return cudf::detail::gather(keys,
                               gather_map,
diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cu b/cpp/src/groupby/hash/compute_mapping_indices.cu
index 519d7cd2f1c..71d18a86d9c 100644
--- a/cpp/src/groupby/hash/compute_mapping_indices.cu
+++ b/cpp/src/groupby/hash/compute_mapping_indices.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,15 +18,13 @@
 #include "compute_mapping_indices.hpp"
 
 namespace cudf::groupby::detail::hash {
-template cudf::size_type max_occupancy_grid_size<hash_set_ref_t<cuco::insert_and_find_tag>>(
-  cudf::size_type n);
+template int32_t max_active_blocks_mapping_kernel<hash_set_ref_t<cuco::insert_and_find_tag>>();
 
 template void compute_mapping_indices<hash_set_ref_t<cuco::insert_and_find_tag>>(
   cudf::size_type grid_size,
   cudf::size_type num,
   hash_set_ref_t<cuco::insert_and_find_tag> global_set,
   bitmask_type const* row_bitmask,
-  bool skip_rows_with_nulls,
   cudf::size_type* local_mapping_index,
   cudf::size_type* global_mapping_index,
   cudf::size_type* block_cardinality,
diff --git a/cpp/src/groupby/hash/compute_mapping_indices.cuh b/cpp/src/groupby/hash/compute_mapping_indices.cuh
index bd043671174..133a43ad972 100644
--- a/cpp/src/groupby/hash/compute_mapping_indices.cuh
+++ b/cpp/src/groupby/hash/compute_mapping_indices.cuh
@@ -30,7 +30,6 @@
 #include <cooperative_groups.h>
 #include <cuco/static_set_ref.cuh>
 #include <cuda/std/atomic>
-#include <cuda/std/utility>
 
 #include <algorithm>
 
@@ -41,13 +40,12 @@ __device__ void find_local_mapping(cooperative_groups::thread_block const& block
                                    cudf::size_type num_input_rows,
                                    SetType shared_set,
                                    bitmask_type const* row_bitmask,
-                                   bool skip_rows_with_nulls,
                                    cudf::size_type* cardinality,
                                    cudf::size_type* local_mapping_index,
                                    cudf::size_type* shared_set_indices)
 {
   auto const is_valid_input =
-    idx < num_input_rows and (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, idx));
+    idx < num_input_rows and (not row_bitmask or cudf::bit_is_set(row_bitmask, idx));
   auto const [result_idx, inserted] = [&]() {
     if (is_valid_input) {
       auto const result      = shared_set.insert_and_find(idx);
@@ -99,7 +97,6 @@ template <class SetRef>
 CUDF_KERNEL void mapping_indices_kernel(cudf::size_type num_input_rows,
                                         SetRef global_set,
                                         bitmask_type const* row_bitmask,
-                                        bool skip_rows_with_nulls,
                                         cudf::size_type* local_mapping_index,
                                         cudf::size_type* global_mapping_index,
                                         cudf::size_type* block_cardinality,
@@ -136,7 +133,6 @@ CUDF_KERNEL void mapping_indices_kernel(cudf::size_type num_input_rows,
                        num_input_rows,
                        shared_set,
                        row_bitmask,
-                       skip_rows_with_nulls,
                        &cardinality,
                        local_mapping_index,
                        shared_set_indices);
@@ -159,14 +155,12 @@ CUDF_KERNEL void mapping_indices_kernel(cudf::size_type num_input_rows,
 }
 
 template <class SetRef>
-cudf::size_type max_occupancy_grid_size(cudf::size_type n)
+int32_t max_active_blocks_mapping_kernel()
 {
-  cudf::size_type max_active_blocks{-1};
+  int32_t max_active_blocks{-1};
   CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
     &max_active_blocks, mapping_indices_kernel<SetRef>, GROUPBY_BLOCK_SIZE, 0));
-  auto const grid_size  = max_active_blocks * cudf::detail::num_multiprocessors();
-  auto const num_blocks = cudf::util::div_rounding_up_safe(n, GROUPBY_BLOCK_SIZE);
-  return std::min(grid_size, num_blocks);
+  return max_active_blocks;
 }
 
 template <class SetRef>
@@ -174,7 +168,6 @@ void compute_mapping_indices(cudf::size_type grid_size,
                              cudf::size_type num,
                              SetRef global_set,
                              bitmask_type const* row_bitmask,
-                             bool skip_rows_with_nulls,
                              cudf::size_type* local_mapping_index,
                              cudf::size_type* global_mapping_index,
                              cudf::size_type* block_cardinality,
@@ -185,7 +178,6 @@ void compute_mapping_indices(cudf::size_type grid_size,
     num,
     global_set,
     row_bitmask,
-    skip_rows_with_nulls,
     local_mapping_index,
     global_mapping_index,
     block_cardinality,
diff --git a/cpp/src/groupby/hash/compute_mapping_indices.hpp b/cpp/src/groupby/hash/compute_mapping_indices.hpp
index 473ad99e650..58bdfb250f5 100644
--- a/cpp/src/groupby/hash/compute_mapping_indices.hpp
+++ b/cpp/src/groupby/hash/compute_mapping_indices.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,19 +22,25 @@
 #include <cuda/std/atomic>
 
 namespace cudf::groupby::detail::hash {
+
+/*
+ * @brief Computes the maximum number of active blocks of the shared memory aggregation kernel that
+ * can be executed on the underlying device.
+ */
+int32_t max_active_blocks_shmem_aggs_kernel();
+
 /*
- * @brief Computes the maximum number of active blocks of the given kernel that can be executed on
- * the underlying device
+ * @brief Computes the maximum number of active blocks of the mapping indices kernel that can be
+ * executed on the underlying device.
  */
 template <class SetRef>
-[[nodiscard]] cudf::size_type max_occupancy_grid_size(cudf::size_type n);
+[[nodiscard]] int32_t max_active_blocks_mapping_kernel();
 
 template <class SetRef>
 void compute_mapping_indices(cudf::size_type grid_size,
                              cudf::size_type num,
                              SetRef global_set,
                              bitmask_type const* row_bitmask,
-                             bool skip_rows_with_nulls,
                              cudf::size_type* local_mapping_index,
                              cudf::size_type* global_mapping_index,
                              cudf::size_type* block_cardinality,
diff --git a/cpp/src/groupby/hash/compute_mapping_indices_null.cu b/cpp/src/groupby/hash/compute_mapping_indices_null.cu
index 81c3c9e456f..01d4657e20a 100644
--- a/cpp/src/groupby/hash/compute_mapping_indices_null.cu
+++ b/cpp/src/groupby/hash/compute_mapping_indices_null.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,15 +18,14 @@
 #include "compute_mapping_indices.hpp"
 
 namespace cudf::groupby::detail::hash {
-template cudf::size_type
-max_occupancy_grid_size<nullable_hash_set_ref_t<cuco::insert_and_find_tag>>(cudf::size_type n);
+template int32_t
+max_active_blocks_mapping_kernel<nullable_hash_set_ref_t<cuco::insert_and_find_tag>>();
 
 template void compute_mapping_indices<nullable_hash_set_ref_t<cuco::insert_and_find_tag>>(
   cudf::size_type grid_size,
   cudf::size_type num,
   nullable_hash_set_ref_t<cuco::insert_and_find_tag> global_set,
   bitmask_type const* row_bitmask,
-  bool skip_rows_with_nulls,
   cudf::size_type* local_mapping_index,
   cudf::size_type* global_mapping_index,
   cudf::size_type* block_cardinality,
diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu
index 878126bbea5..fc24dd727d4 100644
--- a/cpp/src/groupby/hash/compute_shared_memory_aggs.cu
+++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.cu
@@ -105,7 +105,6 @@ __device__ void initialize_shmem_aggregations(cooperative_groups::thread_block c
 __device__ void compute_pre_aggregrations(cudf::size_type col_start,
                                           cudf::size_type col_end,
                                           bitmask_type const* row_bitmask,
-                                          bool skip_rows_with_nulls,
                                           cudf::table_device_view source,
                                           cudf::size_type num_input_rows,
                                           cudf::size_type* local_mapping_index,
@@ -118,7 +117,7 @@ __device__ void compute_pre_aggregrations(cudf::size_type col_start,
   // Aggregates global memory sources to shared memory targets
   for (auto source_idx = cudf::detail::grid_1d::global_thread_id(); source_idx < num_input_rows;
        source_idx += cudf::detail::grid_1d::grid_stride()) {
-    if (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, source_idx)) {
+    if (not row_bitmask or cudf::bit_is_set(row_bitmask, source_idx)) {
       auto const target_idx = local_mapping_index[source_idx] + agg_location_offset;
       for (auto col_idx = col_start; col_idx < col_end; col_idx++) {
         auto const source_col = source.column(col_idx);
@@ -183,7 +182,6 @@ __device__ void compute_final_aggregations(cooperative_groups::thread_block cons
  * pre (shared) and final (global) aggregates*/
 CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows,
                                                bitmask_type const* row_bitmask,
-                                               bool skip_rows_with_nulls,
                                                cudf::size_type* local_mapping_index,
                                                cudf::size_type* global_mapping_index,
                                                cudf::size_type* block_cardinality,
@@ -248,7 +246,6 @@ CUDF_KERNEL void single_pass_shmem_aggs_kernel(cudf::size_type num_rows,
     compute_pre_aggregrations(col_start,
                               col_end,
                               row_bitmask,
-                              skip_rows_with_nulls,
                               input_values,
                               num_rows,
                               local_mapping_index,
@@ -287,11 +284,18 @@ size_type get_available_shared_memory_size(cudf::size_type grid_size)
                                      ALIGNMENT);
 }
 
+int32_t max_active_blocks_shmem_aggs_kernel()
+{
+  int32_t max_active_blocks{-1};
+  CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+    &max_active_blocks, single_pass_shmem_aggs_kernel, GROUPBY_BLOCK_SIZE, 0));
+  return max_active_blocks;
+}
+
 void compute_shared_memory_aggs(cudf::size_type grid_size,
                                 size_type available_shmem_size,
                                 cudf::size_type num_input_rows,
                                 bitmask_type const* row_bitmask,
-                                bool skip_rows_with_nulls,
                                 cudf::size_type* local_mapping_index,
                                 cudf::size_type* global_mapping_index,
                                 cudf::size_type* block_cardinality,
@@ -310,7 +314,6 @@ void compute_shared_memory_aggs(cudf::size_type grid_size,
   single_pass_shmem_aggs_kernel<<<grid_size, GROUPBY_BLOCK_SIZE, available_shmem_size, stream>>>(
     num_input_rows,
     row_bitmask,
-    skip_rows_with_nulls,
     local_mapping_index,
     global_mapping_index,
     block_cardinality,
diff --git a/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp b/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp
index b6ba6898c07..c361a1048b6 100644
--- a/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp
+++ b/cpp/src/groupby/hash/compute_shared_memory_aggs.hpp
@@ -33,7 +33,6 @@ void compute_shared_memory_aggs(cudf::size_type grid_size,
                                 cudf::size_type available_shmem_size,
                                 cudf::size_type num_input_rows,
                                 bitmask_type const* row_bitmask,
-                                bool skip_rows_with_nulls,
                                 cudf::size_type* local_mapping_index,
                                 cudf::size_type* global_mapping_index,
                                 cudf::size_type* block_cardinality,
diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh
index 759a29840f4..c4c96ec210d 100644
--- a/cpp/src/groupby/hash/helpers.cuh
+++ b/cpp/src/groupby/hash/helpers.cuh
@@ -56,8 +56,27 @@ using row_hash_t =
   cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
                                                    cudf::nullate::DYNAMIC>;
 
+/// Adapter to cudf row hasher with caching support.
+class row_hasher_with_cache_t {
+  row_hash_t hasher;
+  hash_value_type const* values;
+
+ public:
+  row_hasher_with_cache_t(row_hash_t const& hasher,
+                          hash_value_type const* values = nullptr) noexcept
+    : hasher(hasher), values(values)
+  {
+  }
+
+  __device__ hash_value_type operator()(size_type const idx) const noexcept
+  {
+    if (values) { return values[idx]; }
+    return hasher(idx);
+  }
+};
+
 /// Probing scheme type used by groupby hash table
-using probing_scheme_t = cuco::linear_probing<GROUPBY_CG_SIZE, row_hash_t>;
+using probing_scheme_t = cuco::linear_probing<GROUPBY_CG_SIZE, row_hasher_with_cache_t>;
 
 using row_comparator_t = cudf::experimental::row::equality::device_row_comparator<
   false,
diff --git a/cpp/src/groupby/hash/single_pass_functors.cuh b/cpp/src/groupby/hash/single_pass_functors.cuh
index 5b810145a2e..b4606cd08a0 100644
--- a/cpp/src/groupby/hash/single_pass_functors.cuh
+++ b/cpp/src/groupby/hash/single_pass_functors.cuh
@@ -191,7 +191,6 @@ struct global_memory_fallback_fn {
   cudf::size_type* block_cardinality;
   cudf::size_type stride;
   bitmask_type const* __restrict__ row_bitmask;
-  bool skip_rows_with_nulls;
 
   global_memory_fallback_fn(SetType set,
                             cudf::table_device_view input_values,
@@ -199,16 +198,14 @@ struct global_memory_fallback_fn {
                             cudf::aggregation::Kind const* aggs,
                             cudf::size_type* block_cardinality,
                             cudf::size_type stride,
-                            bitmask_type const* row_bitmask,
-                            bool skip_rows_with_nulls)
+                            bitmask_type const* row_bitmask)
     : set(set),
       input_values(input_values),
       output_values(output_values),
       aggs(aggs),
       block_cardinality(block_cardinality),
       stride(stride),
-      row_bitmask(row_bitmask),
-      skip_rows_with_nulls(skip_rows_with_nulls)
+      row_bitmask(row_bitmask)
   {
   }
 
@@ -216,7 +213,7 @@ struct global_memory_fallback_fn {
   {
     auto const block_id = (i % stride) / GROUPBY_BLOCK_SIZE;
     if (block_cardinality[block_id] >= GROUPBY_CARDINALITY_THRESHOLD and
-        (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, i))) {
+        (not row_bitmask or cudf::bit_is_set(row_bitmask, i))) {
       auto const result = set.insert_and_find(i);
       cudf::detail::aggregate_row(output_values, *result.first, input_values, i, aggs);
     }
@@ -256,7 +253,6 @@ struct compute_single_pass_aggs_fn {
   mutable_table_device_view output_values;
   aggregation::Kind const* __restrict__ aggs;
   bitmask_type const* __restrict__ row_bitmask;
-  bool skip_rows_with_nulls;
 
   /**
    * @brief Construct a new compute_single_pass_aggs_fn functor object
@@ -270,28 +266,23 @@ struct compute_single_pass_aggs_fn {
    * columns of the `input_values` rows
    * @param row_bitmask Bitmask where bit `i` indicates the presence of a null
    * value in row `i` of input keys. Only used if `skip_rows_with_nulls` is `true`
-   * @param skip_rows_with_nulls Indicates if rows in `input_keys` containing
-   * null values should be skipped. It `true`, it is assumed `row_bitmask` is a
-   * bitmask where bit `i` indicates the presence of a null value in row `i`.
    */
   compute_single_pass_aggs_fn(SetType set,
                               table_device_view input_values,
                               mutable_table_device_view output_values,
                               aggregation::Kind const* aggs,
-                              bitmask_type const* row_bitmask,
-                              bool skip_rows_with_nulls)
+                              bitmask_type const* row_bitmask)
     : set(set),
       input_values(input_values),
       output_values(output_values),
       aggs(aggs),
-      row_bitmask(row_bitmask),
-      skip_rows_with_nulls(skip_rows_with_nulls)
+      row_bitmask(row_bitmask)
   {
   }
 
   __device__ void operator()(size_type i)
   {
-    if (not skip_rows_with_nulls or cudf::bit_is_set(row_bitmask, i)) {
+    if (not row_bitmask or cudf::bit_is_set(row_bitmask, i)) {
       auto const result = set.insert_and_find(i);
 
       cudf::detail::aggregate_row(output_values, *result.first, input_values, i, aggs);

From 21b69cf8c31c42bf981c5c8987b4d0d6ff36b3d5 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 19 Aug 2025 13:40:01 -0700
Subject: [PATCH 164/366] Support decimal columns in cudf_polars (#19589)

closes https://github.com/rapidsai/cudf/issues/18863
On top of https://github.com/rapidsai/cudf/pull/19587

Notes:

* I think there was an error in `pylibcudf.Scalar.to/from_py` with `decimal.Decimal`s. `py-polars/tests/unit/datatypes/test_decimal.py::test_decimal_dynamic_float_st` originally failed because the `scale` of an `pylibcudf.Scalar` was flipped.
* Polars `Decimal` supports `precision=None` meaning "precision will be inferred". The xfailed test in `plugin.py` are failing because IIUC [we cannot set the result back to `precision=None`](https://github.com/pola-rs/polars/issues/23899) and set the precision to the max libcudf precision instead. I'm not sure if we're OK with this difference (and document) or find a way to set `precision=None`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19589
---
 .../cudf_polars/containers/dataframe.py       | 19 ++++++++++----
 .../cudf_polars/containers/datatype.py        |  2 ++
 python/cudf_polars/cudf_polars/dsl/ir.py      | 10 ++++++++
 .../cudf_polars/cudf_polars/testing/plugin.py |  2 ++
 .../tests/expressions/test_literal.py         |  6 ++++-
 python/cudf_polars/tests/test_groupby.py      |  3 +++
 python/cudf_polars/tests/test_scan.py         |  9 +++++++
 python/cudf_polars/tests/test_select.py       | 25 +++++++++++++++++++
 python/pylibcudf/pylibcudf/scalar.pyx         |  8 +++---
 python/pylibcudf/tests/test_interop.py        |  2 +-
 10 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 43ec63738b2..3a095be3cfe 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -29,17 +29,26 @@
 def _create_polars_column_metadata(
     name: str, dtype: PolarsDataType
 ) -> plc.interop.ColumnMetadata:
-    """Create ColumnMetadata preserving pl.Struct field names."""
+    """Create ColumnMetadata preserving dtype attributes not supported by libcudf."""
+    children_meta = []
+    timezone = ""
+    precision: int | None = None
+
     if isinstance(dtype, pl.Struct):
         children_meta = [
             _create_polars_column_metadata(field.name, field.dtype)
             for field in dtype.fields
         ]
-    else:
-        children_meta = []
-    timezone = dtype.time_zone if isinstance(dtype, pl.Datetime) else None
+    elif isinstance(dtype, pl.Datetime):
+        timezone = dtype.time_zone or timezone
+    elif isinstance(dtype, pl.Decimal):
+        precision = dtype.precision
+
     return plc.interop.ColumnMetadata(
-        name=name, timezone=timezone or "", children_meta=children_meta
+        name=name,
+        timezone=timezone,
+        precision=precision,
+        children_meta=children_meta,
     )
 
 
diff --git a/python/cudf_polars/cudf_polars/containers/datatype.py b/python/cudf_polars/cudf_polars/containers/datatype.py
index 5de610425ed..50a5352612a 100644
--- a/python/cudf_polars/cudf_polars/containers/datatype.py
+++ b/python/cudf_polars/cudf_polars/containers/datatype.py
@@ -81,6 +81,8 @@ def _from_polars(dtype: pl.DataType) -> plc.DataType:
         assert_never(dtype.time_unit)
     elif isinstance(dtype, pl.String):
         return plc.DataType(plc.TypeId.STRING)
+    elif isinstance(dtype, pl.Decimal):
+        return plc.DataType(plc.TypeId.DECIMAL128, scale=-dtype.scale)
     elif isinstance(dtype, pl.Null):
         # TODO: Hopefully
         return plc.DataType(plc.TypeId.EMPTY)
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 161c8f4a576..7783b15a207 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -1467,6 +1467,16 @@ def do_evaluate(
                 else:
                     (child,) = value.children
                 col = child.evaluate(df, context=ExecutionContext.GROUPBY).obj
+
+                if value.name == "median" and col.type().id() in {
+                    plc.TypeId.DECIMAL128,
+                    plc.TypeId.DECIMAL64,
+                    plc.TypeId.DECIMAL32,
+                }:
+                    # libcudf doesn't support median (quantile) with decimal types,
+                    # but Polars returns a float result, so just cast the input.
+                    assert isinstance(child, expr.Col)
+                    col = plc.unary.cast(col, schema[child.name].plc)
             else:
                 # Anything else, we pre-evaluate
                 col = value.evaluate(df, context=ExecutionContext.GROUPBY).obj
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 14956d2cfbc..5038a0d5690 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -175,6 +175,8 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "allow_missing_columns argument in read_parquet not translated in IR",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "allow_missing_columns argument in read_parquet not translated in IR",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "allow_missing_columns argument in read_parquet not translated in IR",
+    "tests/unit/datatypes/test_decimal.py::test_decimal_aggregations": "https://github.com/pola-rs/polars/issues/23899",
+    "tests/unit/datatypes/test_decimal.py::test_decimal_arithmetic_schema": "https://github.com/pola-rs/polars/issues/23899",
 }
 
 
diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py
index 69ee80da82e..1c2eb05ebfe 100644
--- a/python/cudf_polars/tests/expressions/test_literal.py
+++ b/python/cudf_polars/tests/expressions/test_literal.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import datetime
+
 import pytest
 
 import polars as pl
@@ -95,7 +97,9 @@ def test_select_literal_series():
     assert_gpu_result_equal(q)
 
 
-@pytest.mark.parametrize("expr", [pl.lit(None), pl.lit(10, dtype=pl.Decimal())])
+@pytest.mark.parametrize(
+    "expr", [pl.lit(None), pl.lit(datetime.time(12, 0), dtype=pl.Time())]
+)
 def test_unsupported_literal_raises(expr):
     df = pl.LazyFrame({})
 
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index 36196522f34..38ece61457e 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import decimal
 import itertools
 import random
 from datetime import date
@@ -25,6 +26,7 @@ def df():
             "key2": [2, 2, 2, 2, 6, 1, 4, 6, 8],
             "int": [1, 2, 3, 4, 5, 6, 7, 8, 9],
             "int32": pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=pl.Int32()),
+            "decimal": [decimal.Decimal("1.23"), None, decimal.Decimal("-0.23")] * 3,
             "uint16_with_null": pl.Series(
                 [1, None, 2, None, None, None, 4, 5, 6], dtype=pl.UInt16()
             ),
@@ -89,6 +91,7 @@ def keys(request):
         [pl.col("float").quantile(0.3, interpolation="lower")],
         [pl.col("float").quantile(0.3, interpolation="midpoint")],
         [pl.col("float").quantile(0.3, interpolation="linear")],
+        [pl.col("decimal").median()],
         [
             pl.col("datetime").max(),
             pl.col("datetime").max().dt.is_leap_year().alias("leapyear"),
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index 8481105baad..7510fe833be 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import decimal
 from typing import TYPE_CHECKING
 
 import pytest
@@ -45,6 +46,14 @@ def df():
             "a": [1, 2, 3, None, 4, 5],
             "b": ["ẅ", "x", "y", "z", "123", "abcd"],
             "c": [None, None, 4, 5, -1, 0],
+            "d": [
+                decimal.Decimal("1.23"),
+                None,
+                decimal.Decimal("0.00"),
+                None,
+                decimal.Decimal("-5.67"),
+                None,
+            ],
         }
     )
 
diff --git a/python/cudf_polars/tests/test_select.py b/python/cudf_polars/tests/test_select.py
index da3f519783b..10fcf9f660d 100644
--- a/python/cudf_polars/tests/test_select.py
+++ b/python/cudf_polars/tests/test_select.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import decimal
+
 import pytest
 
 import polars as pl
@@ -27,6 +29,29 @@ def test_select():
     assert_gpu_result_equal(query)
 
 
+def test_select_decimal():
+    ldf = pl.LazyFrame(
+        {"a": pl.Series(values=[decimal.Decimal("1.0"), None], dtype=pl.Decimal(3, 1))}
+    )
+    query = ldf.select(pl.col("a"))
+    assert_gpu_result_equal(query)
+
+
+def test_select_decimal_precision_none_result_max_precision():
+    ldf = pl.LazyFrame(
+        {
+            "a": pl.Series(
+                values=[decimal.Decimal("1.0"), None], dtype=pl.Decimal(None, 1)
+            )
+        }
+    )
+    query = ldf.select(pl.col("a"))
+    cpu_result = query.collect()
+    gpu_result = query.collect(engine="gpu")
+    assert cpu_result.schema["a"].precision is None
+    assert gpu_result.schema["a"].precision == 38
+
+
 def test_select_reduce():
     ldf = pl.DataFrame(
         {
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 0d533c960a4..31a93abd6fb 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -275,7 +275,7 @@ cdef class Scalar:
             return decimal.Decimal(
                 (<fixed_point_scalar[decimal128]*>slr).value().value()
             ).scaleb(
-                -(<fixed_point_scalar[decimal128]*>slr).type().scale()
+                (<fixed_point_scalar[decimal128]*>slr).type().scale()
             )
         else:
             raise NotImplementedError(
@@ -647,12 +647,12 @@ def _(py_val: datetime.date, dtype: DataType | None):
 
 @_from_py.register(decimal.Decimal)
 def _(py_val: decimal.Decimal, dtype: DataType | None):
-    scale = -py_val.as_tuple().exponent
-    as_int = int(py_val.scaleb(scale))
+    scale = py_val.as_tuple().exponent
+    as_int = int(py_val.scaleb(-scale))
 
     cdef int128_t val = <int128_t>as_int
 
-    dtype = DataType(type_id.DECIMAL128, -scale)
+    dtype = DataType(type_id.DECIMAL128, scale)
 
     if dtype.id() != type_id.DECIMAL128:
         raise TypeError("Expected dtype to be DECIMAL128")
diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py
index 171d70c2496..b1a6e9f2c66 100644
--- a/python/pylibcudf/tests/test_interop.py
+++ b/python/pylibcudf/tests/test_interop.py
@@ -105,7 +105,7 @@ def test_decimal_other(data_type):
     [plc.TypeId.DECIMAL128, plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32],
 )
 def test_decimal_respect_metadata_precision(plc_type, request):
-    request.node.add_marker(
+    request.applymarker(
         pytest.mark.xfail(
             parse(pa.__version__) < parse("19.0.0")
             and plc_type in {plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32},

From f6b9b9eda6c3bdf4587c7a5069f4b2bfa747776b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 19 Aug 2025 14:07:47 -0700
Subject: [PATCH 165/366] Move (most of) test_list.py to new cudf classic test
 directories (#19574)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19574
---
 .../tests/dataframe/indexing/test_setitem.py  |  21 +-
 .../dataframe/methods/test_memory_usage.py    |   9 +
 .../tests/dataframe/methods/test_to_arrow.py  |  24 +
 .../tests/dataframe/methods/test_to_pandas.py |  12 +
 .../cudf/tests/dataframe/test_constructors.py |  38 +
 .../cudf/cudf/tests/dtypes/test_listdtype.py  |  11 +
 .../cudf/tests/series/accessors/test_list.py  | 536 ++++++++++
 .../tests/series/indexing/test_getitem.py     |  63 ++
 .../tests/series/indexing/test_setitem.py     |  67 ++
 .../cudf/tests/series/methods/test_astype.py  |  32 +
 .../cudf/tests/series/methods/test_explode.py |  46 +
 .../tests/series/methods/test_memory_usage.py |  16 +
 .../cudf/tests/series/test_constructors.py    |  35 +
 python/cudf/cudf/tests/test_list.py           | 926 ------------------
 14 files changed, 906 insertions(+), 930 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
 create mode 100644 python/cudf/cudf/tests/series/accessors/test_list.py

diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
index 73f4632fafd..7d2dab7caa6 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -1,12 +1,25 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-
 import pandas as pd
 
-from cudf import DataFrame
+import cudf
+
+
+def test_listcol_setitem_retain_dtype():
+    df = cudf.DataFrame(
+        {"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
+    )
+    df1 = df.head(0)
+    # Performing a setitem on `b` triggers a `column.column_empty` call
+    # which tries to create an empty ListColumn.
+    df1["b"] = df1["c"]
+    # Performing a copy to trigger a copy dtype which is obtained by accessing
+    # `ListColumn.children` that would have been corrupted in previous call
+    # prior to this fix: https://github.com/rapidsai/cudf/pull/10151/
+    df2 = df1.copy()
+    assert df2["a"].dtype == df["a"].dtype
 
 
 def test_setitem_datetime():
-    df = DataFrame()
-    df["date"] = pd.date_range("20010101", "20010105").values
+    df = cudf.DataFrame({"date": pd.date_range("20010101", "20010105").values})
     assert df.date.dtype.kind == "M"
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py b/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py
new file mode 100644
index 00000000000..42185ff6a9c
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+
+
+def test_list_struct_list_memory_usage():
+    df = cudf.DataFrame({"a": [[{"b": [1]}]]})
+    assert df.memory_usage().sum() == 16
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
index 2d3174a8225..d29a6bbcfaf 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
@@ -22,6 +22,30 @@ def test_dataframe_to_arrow_preserve_index(preserve_index):
     assert expect == got
 
 
+def test_dataframe_list_round_trip():
+    data = [{"text": "hello", "list_col": np.asarray([1, 2], dtype="uint32")}]
+    cudf_arrow = cudf.DataFrame(data).to_arrow()
+    pdf_arrow = pa.Table.from_pandas(pd.DataFrame(data))
+
+    for metadata in (
+        None,
+        pdf_arrow.schema.metadata,
+        cudf_arrow.schema.metadata,
+    ):
+        schema = pa.schema(
+            [
+                pa.field("text", pa.string()),
+                pa.field("list_col", pa.list_(pa.uint32())),
+            ],
+            metadata=metadata,
+        )
+
+        data = {"text": ["asd", "pqr"], "list_col": [[1, 2, 3], [4, 5]]}
+
+        table = pa.Table.from_pydict(data, schema=schema)
+        assert_eq(table.to_pandas(), pd.DataFrame(data))
+
+
 def test_datetime_to_arrow(datetime_types_as_str):
     data = pd.date_range("2000-01-01", "2000-01-02", freq="3600s")
     gdf = cudf.DataFrame({"timestamp": data.astype(datetime_types_as_str)})
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py b/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
new file mode 100644
index 00000000000..df549bbac5a
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pytest
+
+import cudf
+
+
+def test_list_to_pandas_nullable_true():
+    df = cudf.DataFrame({"a": cudf.Series([[1, 2, 3]])})
+    with pytest.raises(NotImplementedError):
+        df.to_pandas(nullable=True)
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index 59463b812a0..385f4c44c01 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -403,6 +403,44 @@ def test_from_scalar_typing(request, all_supported_types_as_str):
     assert len(gdf["b"]) == len(gdf["a"])
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [[]]},
+        {"a": [[None]]},
+        {"a": [[1, 2, 3]]},
+        {"a": [[1, 2, 3]], "b": [[2, 3, 4]]},
+        {"a": [[1, 2, 3, None], [None]], "b": [[2, 3, 4], [5]], "c": None},
+        {"a": [[1]], "b": [[1, 2, 3]]},
+        pd.DataFrame({"a": [[1, 2, 3]]}),
+    ],
+)
+def test_df_list_dtypes(data):
+    expect = pd.DataFrame(data)
+    got = cudf.DataFrame(data)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [[]]},
+        {"a": [[1, 2, None, 4]]},
+        {"a": [["cat", None, "dog"]]},
+        {
+            "a": [[1, 2, 3, None], [4, None, 5]],
+            "b": [None, ["fish", "bird"]],
+            "c": [[], []],
+        },
+        {"a": [[1, 2, 3, None], [4, None, 5], None, [6, 7]]},
+    ],
+)
+def test_serialize_list_columns(data):
+    df = cudf.DataFrame(data)
+    reconstructed = df.__class__.deserialize(*df.serialize())
+    assert_eq(reconstructed, df)
+
+
 @pytest.mark.parametrize(
     "data1, data2",
     [(1, 2), (1.0, 2.0), (3, 4.0)],
diff --git a/python/cudf/cudf/tests/dtypes/test_listdtype.py b/python/cudf/cudf/tests/dtypes/test_listdtype.py
index 0477ae50003..84576dc0608 100644
--- a/python/cudf/cudf/tests/dtypes/test_listdtype.py
+++ b/python/cudf/cudf/tests/dtypes/test_listdtype.py
@@ -7,6 +7,17 @@
 from cudf.utils.dtypes import cudf_dtype_to_pa_type
 
 
+def test_listdtype_hash():
+    a = cudf.ListDtype("int64")
+    b = cudf.ListDtype("int64")
+
+    assert hash(a) == hash(b)
+
+    c = cudf.ListDtype("int32")
+
+    assert hash(a) != hash(c)
+
+
 def test_list_dtype_pyarrow_round_trip(all_supported_types_as_str, request):
     request.applymarker(
         pytest.mark.xfail(
diff --git a/python/cudf/cudf/tests/series/accessors/test_list.py b/python/cudf/cudf/tests/series/accessors/test_list.py
new file mode 100644
index 00000000000..18f86dfd7c2
--- /dev/null
+++ b/python/cudf/cudf/tests/series/accessors/test_list.py
@@ -0,0 +1,536 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import functools
+import operator
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.api.types import is_scalar
+from cudf.testing import assert_eq
+from cudf.utils.dtypes import cudf_dtype_to_pa_type
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[]],
+        [[[]]],
+        [[0]],
+        [[0, 1]],
+        [[0, 1], [2, 3]],
+        [[[0, 1], [2]], [[3, 4]]],
+        [[[0, 1, None], None], None, [[3, 2, None], None]],
+        [[["a", "c", None], None], None, [["b", "d", None], None]],
+    ],
+)
+def test_leaves(data):
+    pa_array = pa.array(data)
+    while hasattr(pa_array, "flatten"):
+        pa_array = pa_array.flatten()
+
+    expect = cudf.Series(pa_array)
+    got = cudf.Series(data).list.leaves
+    assert_eq(
+        expect,
+        got,
+        check_dtype=not isinstance(pa_array, pa.NullArray),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[]],
+        [[1, 2, 3], [4, 5]],
+        [[1, 2, 3], [], [4, 5]],
+        [[1, 2, 3], None, [4, 5]],
+        [[None, None], [None]],
+        [[[[[[1, 2, 3]]]]]],
+        cudf.Series([[1, 2]]).iloc[0:0],
+        cudf.Series([None, [1, 2]]).iloc[0:1],
+    ],
+)
+def test_len(data):
+    gsr = cudf.Series(data)
+    psr = gsr.to_pandas()
+
+    expect = psr.map(lambda x: len(x) if x is not None else None)
+    got = gsr.list.len()
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("data", "idx"),
+    [
+        ([[1, 2, 3], [3, 4, 5], [4, 5, 6]], [[0, 1], [2], [1, 2]]),
+        ([[1, 2, 3], [3, 4, 5], [4, 5, 6]], [[1, 2, 0], [1, 0, 2], [0, 1, 2]]),
+        ([[1, 2, 3], []], [[0, 1], []]),
+        ([[1, 2, 3], [None]], [[0, 1], []]),
+        ([[1, None, 3], None], [[0, 1], []]),
+    ],
+)
+def test_take(data, idx):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps)
+
+    expected = pd.Series(zip(ps, idx, strict=True)).map(
+        lambda x: [x[0][i] for i in x[1]] if x[0] is not None else None
+    )
+    got = gs.list.take(idx)
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    ("invalid", "exception"),
+    [
+        ([[0]], pytest.raises(ValueError, match="different size")),
+        ([1, 2, 3, 4], pytest.raises(ValueError, match="should be list type")),
+        (
+            [["a", "b"], ["c"]],
+            pytest.raises(
+                TypeError, match="should be column of values of index types"
+            ),
+        ),
+        (
+            [[[1], [0]], [[0]]],
+            pytest.raises(
+                TypeError, match="should be column of values of index types"
+            ),
+        ),
+        ([[0, 1], None], pytest.raises(ValueError, match="contains null")),
+    ],
+)
+def test_take_invalid(invalid, exception):
+    gs = cudf.Series([[0, 1], [2, 3]])
+    with exception:
+        gs.list.take(invalid)
+
+
+@pytest.mark.parametrize(
+    ("data", "expected"),
+    [
+        ([[1, 1, 2, 2], [], None, [3, 4, 5]], [[1, 2], [], None, [3, 4, 5]]),
+        (
+            [[1.233, np.nan, 1.234, 3.141, np.nan, 1.234]],
+            [[1.233, 1.234, np.nan, 3.141]],
+        ),  # duplicate nans
+        ([[1, 1, 2, 2, None, None]], [[1, 2, None]]),  # duplicate nulls
+        (
+            [[1.233, np.nan, None, 1.234, 3.141, np.nan, 1.234, None]],
+            [[1.233, 1.234, np.nan, None, 3.141]],
+        ),  # duplicate nans and nulls
+        ([[2, None, 1, None, 2]], [[1, 2, None]]),
+        ([[], []], [[], []]),
+        ([[], None], [[], None]),
+    ],
+)
+def test_unique(data, expected):
+    """
+    Pandas de-duplicates nans and nulls respectively in Series.unique.
+    `expected` is setup to mimic such behavior
+    """
+    gs = cudf.Series(data, nan_as_null=False)
+
+    got = gs.list.unique().list.sort_values()
+    expected = cudf.Series(expected, nan_as_null=False).list.sort_values()
+
+    assert_eq(expected, got)
+
+
+def key_func_builder(x, na_position):
+    return x if x is not None else -1e8 if na_position == "first" else 1e8
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[4, 2, None, 9], [8, 8, 2], [2, 1]],
+        [[4, 2, None, 9], [8, 8, 2], None],
+        [[4, 2, None, 9], [], None],
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        None,
+        pd.Index(["a", "b", "c"]),
+        pd.MultiIndex.from_tuples(
+            [(0, "a"), (0, "b"), (1, "a")], names=["l0", "l1"]
+        ),
+    ],
+)
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.parametrize("ignore_index", [True, False])
+def test_sort_values(data, index, ascending, na_position, ignore_index):
+    key_func = functools.partial(key_func_builder, na_position=na_position)
+
+    ps = pd.Series(data, index=index)
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.apply(
+        lambda x: sorted(x, key=key_func, reverse=not ascending)
+        if x is not None
+        else None
+    )
+    if ignore_index:
+        expected.reset_index(drop=True, inplace=True)
+    got = gs.list.sort_values(
+        ascending=ascending, na_position=na_position, ignore_index=ignore_index
+    )
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "data, index, expect",
+    [
+        ([[None, None], [None, None]], 0, [None, None]),
+        ([[1, 2], [3, 4]], 0, [1, 3]),
+        ([["a", "b"], ["c", "d"]], 1, ["b", "d"]),
+        ([[1, None], [None, 2]], 1, [None, 2]),
+        ([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1, [[3, 4], [7, 8]]),
+    ],
+)
+def test_get(data, index, expect):
+    sr = cudf.Series(data)
+    expect = cudf.Series(expect)
+    got = sr.list.get(index)
+
+    assert_eq(expect, got, check_dtype=not expect.isnull().all())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [{"k": "v1"}, {"k": "v2"}],
+        [[{"k": "v1", "b": "v2"}], [{"k": "v3", "b": "v4"}]],
+        [
+            [{"k": "v1", "b": [{"c": 10, "d": "v5"}]}],
+            [{"k": "v3", "b": [{"c": 14, "d": "v6"}]}],
+        ],
+    ],
+)
+@pytest.mark.parametrize("index", [0, 1])
+def test_get_nested_struct_dtype_transfer(data, index):
+    sr = cudf.Series([data])
+    expect = cudf.Series(data[index : index + 1])
+    assert_eq(expect, sr.list.get(index))
+
+
+def test_get_nested_lists():
+    sr = cudf.Series(
+        [
+            [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [], [[3, 4], [7, 8]]],
+            [[], [[9, 10]], [[11, 12], [13, 14]]],
+        ]
+    )
+    expect = cudf.Series([[[1, 2], [3, 4]], []])
+    got = sr.list.get(0)
+    assert_eq(expect, got)
+
+
+def test_get_default():
+    sr = cudf.Series([[1, 2], [3, 4, 5], [6, 7, 8, 9]])
+
+    assert_eq(cudf.Series([cudf.NA, 5, 8]), sr.list.get(2))
+    assert_eq(cudf.Series([cudf.NA, 5, 8]), sr.list.get(2, default=cudf.NA))
+    assert_eq(cudf.Series([0, 5, 8]), sr.list.get(2, default=0))
+    assert_eq(cudf.Series([0, 3, 7]), sr.list.get(-3, default=0))
+    assert_eq(cudf.Series([2, 5, 9]), sr.list.get(-1))
+
+    string_sr = cudf.Series(
+        [["apple", "banana"], ["carrot", "daffodil", "elephant"]]
+    )
+    assert_eq(
+        cudf.Series(["default", "elephant"]),
+        string_sr.list.get(2, default="default"),
+    )
+
+    sr_with_null = cudf.Series([[0, cudf.NA], [1]])
+    assert_eq(cudf.Series([cudf.NA, 0]), sr_with_null.list.get(1, default=0))
+
+    sr_nested = cudf.Series([[[1, 2], [3, 4], [5, 6]], [[5, 6], [7, 8]]])
+    assert_eq(cudf.Series([[3, 4], [7, 8]]), sr_nested.list.get(1))
+    assert_eq(cudf.Series([[5, 6], cudf.NA]), sr_nested.list.get(2))
+    assert_eq(
+        cudf.Series([[5, 6], [0, 0]]), sr_nested.list.get(2, default=[0, 0])
+    )
+
+
+def test_get_ind_sequence():
+    # test .list.get() when `index` is a sequence
+    sr = cudf.Series([[1, 2], [3, 4, 5], [6, 7, 8, 9]])
+    assert_eq(cudf.Series([1, 4, 8]), sr.list.get([0, 1, 2]))
+    assert_eq(cudf.Series([1, 4, 8]), sr.list.get(cudf.Series([0, 1, 2])))
+    assert_eq(cudf.Series([cudf.NA, 5, cudf.NA]), sr.list.get([2, 2, -5]))
+    assert_eq(cudf.Series([0, 5, 0]), sr.list.get([2, 2, -5], default=0))
+    sr_nested = cudf.Series([[[1, 2], [3, 4], [5, 6]], [[5, 6], [7, 8]]])
+    assert_eq(cudf.Series([[1, 2], [7, 8]]), sr_nested.list.get([0, 1]))
+
+
+@pytest.mark.parametrize(
+    "data, scalar, expect",
+    [
+        (
+            [[1, 2, 3], []],
+            1,
+            [True, False],
+        ),
+        (
+            [[1, 2, 3], [], [3, 4, 5]],
+            6,
+            [False, False, False],
+        ),
+        (
+            [[1.0, 2.0, 3.0], None, []],
+            2.0,
+            [True, None, False],
+        ),
+        (
+            [[None, "b", "c"], [], ["b", "e", "f"]],
+            "b",
+            [True, False, True],
+        ),
+        ([[None, 2, 3], None, []], 1, [False, None, False]),
+        (
+            [[None, "b", "c"], [], ["b", "e", "f"]],
+            "d",
+            [False, False, False],
+        ),
+    ],
+)
+def test_contains_scalar(data, scalar, expect):
+    sr = cudf.Series(data)
+    expect = cudf.Series(expect)
+    got = sr.list.contains(
+        pa.scalar(scalar, type=cudf_dtype_to_pa_type(sr.dtype.element_type))
+    )
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data, expect",
+    [
+        (
+            [[1, 2, 3], []],
+            [None, None],
+        ),
+        (
+            [[1.0, 2.0, 3.0], None, []],
+            [None, None, None],
+        ),
+        (
+            [[None, 2, 3], [], None],
+            [None, None, None],
+        ),
+        (
+            [[1, 2, 3], [3, 4, 5]],
+            [None, None],
+        ),
+        (
+            [[], [], []],
+            [None, None, None],
+        ),
+    ],
+)
+def test_contains_null_search_key(data, expect):
+    sr = cudf.Series(data)
+    expect = cudf.Series(expect, dtype="bool")
+    got = sr.list.contains(
+        pa.scalar(None, type=cudf_dtype_to_pa_type(sr.dtype.element_type))
+    )
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data, scalar",
+    [
+        (
+            [[9, 0, 2], [], [1, None, 0]],
+            "x",
+        ),
+        (
+            [["z", "y", None], None, [None, "x"]],
+            5,
+        ),
+    ],
+)
+def test_contains_invalid(data, scalar):
+    sr = cudf.Series(data)
+    with pytest.raises(
+        TypeError,
+        match="Type/Scale of search key does not "
+        "match list column element type.",
+    ):
+        sr.list.contains(scalar)
+
+
+@pytest.mark.parametrize(
+    "data, search_key, expect",
+    [
+        (
+            [[1, 2, 3], [], [3, 4, 5]],
+            3,
+            [2, -1, 0],
+        ),
+        (
+            [[1.0, 2.0, 3.0], None, [2.0, 5.0]],
+            2.0,
+            [1, None, 0],
+        ),
+        (
+            [[None, "b", "c"], [], ["b", "e", "f"]],
+            "f",
+            [-1, -1, 2],
+        ),
+        ([[-5, None, 8], None, []], -5, [0, None, -1]),
+        (
+            [[None, "x", None, "y"], ["z", "i", "j"]],
+            "y",
+            [3, -1],
+        ),
+        (
+            [["h", "a", None], ["t", "g"]],
+            ["a", "b"],
+            [1, -1],
+        ),
+        (
+            [None, ["h", "i"], ["p", "k", "z"]],
+            ["x", None, "z"],
+            [None, None, 2],
+        ),
+        (
+            [["d", None, "e"], [None, "f"], []],
+            pa.scalar(None, type=pa.string()),
+            [None, None, None],
+        ),
+        (
+            [None, [10, 9, 8], [5, 8, None]],
+            pa.scalar(None, type=pa.int64()),
+            [None, None, None],
+        ),
+    ],
+)
+def test_index(data, search_key, expect):
+    sr = cudf.Series(data)
+    expect = cudf.Series(expect, dtype="int32")
+    if is_scalar(search_key):
+        got = sr.list.index(
+            pa.scalar(
+                search_key, type=cudf_dtype_to_pa_type(sr.dtype.element_type)
+            )
+        )
+    else:
+        got = sr.list.index(
+            cudf.Series(search_key, dtype=sr.dtype.element_type)
+        )
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data, search_key",
+    [
+        (
+            [[9, None, 8], [], [7, 6, 5]],
+            "c",
+        ),
+        (
+            [["a", "b", "c"], None, [None, "d"]],
+            2,
+        ),
+        (
+            [["e", "s"], ["t", "w"]],
+            [5, 6],
+        ),
+    ],
+)
+def test_index_invalid_type(data, search_key):
+    sr = cudf.Series(data)
+    with pytest.raises(
+        TypeError,
+        match="Type/Scale of search key does not "
+        "match list column element type.",
+    ):
+        sr.list.index(search_key)
+
+
+@pytest.mark.parametrize(
+    "data, search_key",
+    [
+        (
+            [[5, 8], [2, 6]],
+            [8, 2, 4],
+        ),
+        (
+            [["h", "j"], ["p", None], ["t", "z"]],
+            ["j", "a"],
+        ),
+    ],
+)
+def test_index_invalid_length(data, search_key):
+    sr = cudf.Series(data)
+    with pytest.raises(
+        RuntimeError,
+        match="Number of search keys must match list column size.",
+    ):
+        sr.list.index(search_key)
+
+
+@pytest.mark.parametrize(
+    "row",
+    [
+        [[]],
+        [[1]],
+        [[1, 2]],
+        [[1, 2], [3, 4, 5]],
+        [[1, 2], [], [3, 4, 5]],
+        [[1, 2, None], [3, 4, 5]],
+        [[1, 2, None], None, [3, 4, 5]],
+        [[1, 2, None], None, [], [3, 4, 5]],
+        [[[1, 2], [3, 4]], [[5, 6, 7], [8, 9]]],
+        [[["a", "c", "de", None], None, ["fg"]], [["abc", "de"], None]],
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+def test_concat_elements(row, dropna):
+    if any(x is None for x in row):
+        if dropna:
+            row = [x for x in row if x is not None]
+            result = functools.reduce(operator.add, row)
+        else:
+            result = None
+    else:
+        result = functools.reduce(operator.add, row)
+
+    expect = pd.Series([result])
+    got = cudf.Series([row]).list.concat(dropna=dropna)
+    assert_eq(expect, got)
+
+
+def test_concat_elements_raise():
+    s = cudf.Series([[1, 2, 3]])  # no nesting
+    with pytest.raises(
+        ValueError,
+        match=".*Child of the input lists column must also be a lists column",
+    ):
+        s.list.concat()
+
+
+def test_list_iterate_error():
+    s = cudf.Series([[[[1, 2]], [[2], [3]]], [[[2]]], [[[3]]]])
+    with pytest.raises(TypeError, match="ListMethods object is not iterable"):
+        iter(s.list)
+
+
+def test_list_methods_setattr():
+    ser = cudf.Series([["a", "b", "c"], ["d", "e", "f"]])
+
+    with pytest.raises(AttributeError):
+        ser.list.a = "b"
diff --git a/python/cudf/cudf/tests/series/indexing/test_getitem.py b/python/cudf/cudf/tests/series/indexing/test_getitem.py
index 3ed2ef57d9d..aecc70335f1 100644
--- a/python/cudf/cudf/tests/series/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_getitem.py
@@ -1,12 +1,75 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
 from cudf.testing import assert_eq
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1],
+        [1, 2, 3],
+        [[1, 2, 3], [4, 5, 6]],
+        [pd.NA],
+        [1, pd.NA, 3],
+        [[1, pd.NA, 3], [pd.NA, 5, 6]],
+        [[1.1, pd.NA, 3.3], [4.4, 5.5, pd.NA]],
+        [["a", pd.NA, "c"], ["d", "e", pd.NA]],
+        [["a", "b", "c"], ["d", "e", "f"]],
+    ],
+)
+def test_list_getitem(data):
+    list_sr = cudf.Series([data])
+    assert list_sr[0] == data
+
+
+@pytest.mark.parametrize("nesting_level", [1, 3])
+def test_list_scalar_device_construction_null(nesting_level):
+    data = [[]]
+    for i in range(nesting_level - 1):
+        data = [data]
+
+    arrow_type = pa.infer_type(data)
+    arrow_arr = pa.array([None], type=arrow_type)
+
+    res = cudf.Series(arrow_arr)[0]
+    assert res is cudf.NA
+
+
+@pytest.mark.parametrize(
+    "data, idx",
+    [
+        (
+            [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": pd.NA}]],
+            0,
+        ),
+        (
+            [
+                [
+                    {"f2": {"a": 100, "c": 90, "f2": 10}, "f1": "a"},
+                    {"f1": "sf12", "f2": pd.NA},
+                ]
+            ],
+            0,
+        ),
+        (
+            [[[[1, 2]], [[2], [3]]], [[[2]]], [[[3]]]],
+            0,
+        ),
+        ([[[[1, 2]], [[2], [3]]], [[[2]]], [[[3]]]], 2),
+        ([[[{"a": 1, "b": 2, "c": 10}]]], 0),
+    ],
+)
+def test_nested_list_extract_host_scalars(data, idx):
+    series = cudf.Series(data)
+
+    assert series[idx] == data[idx]
+
+
 @pytest.mark.parametrize(
     "data, idx, expected",
     [
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
index f6ef3d4ddb6..4d78d3f4698 100644
--- a/python/cudf/cudf/tests/series/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -11,6 +12,72 @@
 )
 
 
+@pytest.mark.parametrize(
+    "data,item",
+    [
+        (
+            # basic list into a list column
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            [0, 0, 0],
+        ),
+        (
+            # nested list into nested list column
+            [
+                [[1, 2, 3], [4, 5, 6]],
+                [[1, 2, 3], [4, 5, 6]],
+                [[1, 2, 3], [4, 5, 6]],
+            ],
+            [[0, 0, 0], [0, 0, 0]],
+        ),
+        (
+            # NA into a list column
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            pd.NA,
+        ),
+        (
+            # NA into nested list column
+            [
+                [[1, 2, 3], [4, 5, 6]],
+                [[1, 2, 3], [4, 5, 6]],
+                [[1, 2, 3], [4, 5, 6]],
+            ],
+            pd.NA,
+        ),
+    ],
+)
+def test_listcol_setitem(data, item):
+    sr = cudf.Series(data)
+
+    sr[1] = item
+    data[1] = item
+    expect = cudf.Series(data)
+
+    assert_eq(expect, sr)
+
+
+@pytest.mark.parametrize(
+    "data,item,error_msg,error_type",
+    [
+        (
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            [[1, 2, 3], [4, 5, 6]],
+            "Could not convert .* with type list: tried to convert to int64",
+            pa.ArrowInvalid,
+        ),
+        (
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            0,
+            "Can not set 0 into ListColumn",
+            ValueError,
+        ),
+    ],
+)
+def test_listcol_setitem_error_cases(data, item, error_msg, error_type):
+    sr = cudf.Series(data)
+    with pytest.raises(error_type, match=error_msg):
+        sr[1] = item
+
+
 def test_fill_new_category():
     gs = cudf.Series(pd.Categorical(["a", "b", "c"]))
     with pytest.raises(TypeError):
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 30b4fcbdc4e..fb1942fc64a 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -11,6 +11,38 @@
 from cudf.testing._utils import assert_exceptions_equal
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        [
+            [[1, 2, 3], [4, 5, 6]],
+            [[1, 2, 3], [4, 5, 6]],
+            [[1, 2, 3], [4, 5, 6]],
+        ],
+        [[[1, 2, 3], [4, None, 6]], [], None, [[7, 8], [], None, [9]]],
+        [[1, 2, 3], [4, None, 6], [7, 8], [], None, [9]],
+        [[1.0, 2.0, 3.0], [4.0, None, 6.0], [7.0, 8.0], [], None, [9.0]],
+    ],
+)
+def test_listcol_as_string(data):
+    got = cudf.Series(data).astype("str")
+    expect = pd.Series(data).astype("str")
+    assert_eq(expect, got)
+
+
+def test_list_astype():
+    s = cudf.Series([[1, 2], [3, 4]])
+    s2 = s.list.astype("float64")
+    assert s2.dtype == cudf.ListDtype("float64")
+    assert_eq(s.list.leaves.astype("float64"), s2.list.leaves)
+
+    s = cudf.Series([[[1, 2], [3]], [[5, 6], None]])
+    s2 = s.list.astype("string")
+    assert s2.dtype == cudf.ListDtype(cudf.ListDtype("string"))
+    assert_eq(s.list.leaves.astype("string"), s2.list.leaves)
+
+
 def test_series_typecast_to_object_error():
     actual = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
     with cudf.option_context("mode.pandas_compatible", True):
diff --git a/python/cudf/cudf/tests/series/methods/test_explode.py b/python/cudf/cudf/tests/series/methods/test_explode.py
index 0cb54c98aac..f31abec1d2e 100644
--- a/python/cudf/cudf/tests/series/methods/test_explode.py
+++ b/python/cudf/cudf/tests/series/methods/test_explode.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -29,3 +30,48 @@ def test_explode(data, ignore_index, p_index):
     got = gdf.explode(ignore_index)
 
     assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.fixture(params=["int", "float", "datetime", "timedelta"])
+def leaf_value(request):
+    if request.param == "int":
+        return np.int32(1)
+    elif request.param == "float":
+        return np.float64(1)
+    elif request.param == "datetime":
+        return pd.to_datetime("1900-01-01")
+    elif request.param == "timedelta":
+        return pd.to_timedelta("10d")
+    else:
+        raise ValueError("Unhandled data type")
+
+
+@pytest.fixture(params=["list", "struct"])
+def list_or_struct(request, leaf_value):
+    if request.param == "list":
+        return [[leaf_value], [leaf_value]]
+    elif request.param == "struct":
+        return {"a": leaf_value, "b": [leaf_value], "c": {"d": [leaf_value]}}
+    else:
+        raise ValueError("Unhandled data type")
+
+
+@pytest.fixture(params=["list", "struct"])
+def nested_list(request, list_or_struct, leaf_value):
+    if request.param == "list":
+        return [list_or_struct, list_or_struct]
+    elif request.param == "struct":
+        return [
+            {
+                "a": list_or_struct,
+                "b": leaf_value,
+                "c": {"d": list_or_struct, "e": leaf_value},
+            }
+        ]
+    else:
+        raise ValueError("Unhandled data type")
+
+
+def test_list_dtype_explode(nested_list):
+    sr = cudf.Series([nested_list])
+    assert sr.dtype.element_type == sr.explode().dtype
diff --git a/python/cudf/cudf/tests/series/methods/test_memory_usage.py b/python/cudf/cudf/tests/series/methods/test_memory_usage.py
index 003e2f61960..4b7c8237e68 100644
--- a/python/cudf/cudf/tests/series/methods/test_memory_usage.py
+++ b/python/cudf/cudf/tests/series/methods/test_memory_usage.py
@@ -4,6 +4,22 @@
 from cudf.testing import assert_eq
 
 
+def test_memory_usage_list():
+    s1 = cudf.Series([[1, 2], [3, 4]])
+    assert s1.memory_usage() == 44
+    s2 = cudf.Series([[[[1, 2]]], [[[3, 4]]]])
+    assert s2.memory_usage() == 68
+    s3 = cudf.Series([[{"b": 1, "a": 10}, {"b": 2, "a": 100}]])
+    assert s3.memory_usage() == 40
+
+
+def test_empty_nested_list_uninitialized_offsets_memory_usage():
+    ser = cudf.Series(
+        [[[1, 2], [3]], []], dtype=cudf.ListDtype(cudf.ListDtype("int64"))
+    )
+    assert ser.iloc[:0].memory_usage() == 8
+
+
 def test_series_memory_usage():
     sr = cudf.Series([1, 2, 3, 4], dtype="int64")
     assert sr.memory_usage() == 32
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index 24bdcc6f894..a562a66f312 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -226,6 +226,41 @@ def test_series_init_dict(data):
     assert_eq(pandas_series, cudf_series)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[]],
+        [[[]]],
+        [[0]],
+        [[0, 1]],
+        [[0, 1], [2, 3]],
+        [[[0, 1], [2]], [[3, 4]]],
+        [[None]],
+        [[[None]]],
+        [[None], None],
+        [[1, None], [1]],
+        [[1, None], None],
+        [[[1, None], None], None],
+    ],
+)
+def test_create_list_series(data):
+    expect = pd.Series(data)
+    got = cudf.Series(data)
+    assert_eq(expect, got)
+    assert isinstance(got[0], type(expect[0]))
+    assert isinstance(got.to_pandas()[0], type(expect[0]))
+
+
+@pytest.mark.parametrize(
+    "input_obj", [[[1, pd.NA, 3]], [[1, pd.NA, 3], [4, 5, pd.NA]]]
+)
+def test_construction_series_with_nulls(input_obj):
+    expect = pa.array(input_obj, from_pandas=True)
+    got = cudf.Series(input_obj).to_arrow()
+
+    assert expect == got
+
+
 def test_series_unitness_np_datetimelike_units():
     data = np.array([np.timedelta64(1)])
     with pytest.raises(TypeError):
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index cadd5c80a54..039bb7cbf9c 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -1,638 +1,11 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
-import functools
-import operator
 
-import numpy as np
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import cudf
-from cudf import NA
-from cudf.api.types import is_scalar
-from cudf.core.column.column import column_empty
 from cudf.testing import assert_eq
-from cudf.utils.dtypes import cudf_dtype_to_pa_type
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[]],
-        [[[]]],
-        [[0]],
-        [[0, 1]],
-        [[0, 1], [2, 3]],
-        [[[0, 1], [2]], [[3, 4]]],
-        [[None]],
-        [[[None]]],
-        [[None], None],
-        [[1, None], [1]],
-        [[1, None], None],
-        [[[1, None], None], None],
-    ],
-)
-def test_create_list_series(data):
-    expect = pd.Series(data)
-    got = cudf.Series(data)
-    assert_eq(expect, got)
-    assert isinstance(got[0], type(expect[0]))
-    assert isinstance(got.to_pandas()[0], type(expect[0]))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [[]]},
-        {"a": [[None]]},
-        {"a": [[1, 2, 3]]},
-        {"a": [[1, 2, 3]], "b": [[2, 3, 4]]},
-        {"a": [[1, 2, 3, None], [None]], "b": [[2, 3, 4], [5]], "c": None},
-        {"a": [[1]], "b": [[1, 2, 3]]},
-        pd.DataFrame({"a": [[1, 2, 3]]}),
-    ],
-)
-def test_df_list_dtypes(data):
-    expect = pd.DataFrame(data)
-    got = cudf.DataFrame(data)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[]],
-        [[[]]],
-        [[0]],
-        [[0, 1]],
-        [[0, 1], [2, 3]],
-        [[[0, 1], [2]], [[3, 4]]],
-        [[[0, 1, None], None], None, [[3, 2, None], None]],
-        [[["a", "c", None], None], None, [["b", "d", None], None]],
-    ],
-)
-def test_leaves(data):
-    pa_array = pa.array(data)
-    while hasattr(pa_array, "flatten"):
-        pa_array = pa_array.flatten()
-
-    expect = cudf.Series(pa_array)
-    got = cudf.Series(data).list.leaves
-    assert_eq(
-        expect,
-        got,
-        check_dtype=not isinstance(pa_array, pa.NullArray),
-    )
-
-
-def test_list_to_pandas_nullable_true():
-    df = cudf.DataFrame({"a": cudf.Series([[1, 2, 3]])})
-    with pytest.raises(NotImplementedError):
-        df.to_pandas(nullable=True)
-
-
-def test_listdtype_hash():
-    a = cudf.core.dtypes.ListDtype("int64")
-    b = cudf.core.dtypes.ListDtype("int64")
-
-    assert hash(a) == hash(b)
-
-    c = cudf.core.dtypes.ListDtype("int32")
-
-    assert hash(a) != hash(c)
-
-
-@pytest.fixture(params=["int", "float", "datetime", "timedelta"])
-def leaf_value(request):
-    if request.param == "int":
-        return np.int32(1)
-    elif request.param == "float":
-        return np.float64(1)
-    elif request.param == "datetime":
-        return pd.to_datetime("1900-01-01")
-    elif request.param == "timedelta":
-        return pd.to_timedelta("10d")
-    else:
-        raise ValueError("Unhandled data type")
-
-
-@pytest.fixture(params=["list", "struct"])
-def list_or_struct(request, leaf_value):
-    if request.param == "list":
-        return [[leaf_value], [leaf_value]]
-    elif request.param == "struct":
-        return {"a": leaf_value, "b": [leaf_value], "c": {"d": [leaf_value]}}
-    else:
-        raise ValueError("Unhandled data type")
-
-
-@pytest.fixture(params=["list", "struct"])
-def nested_list(request, list_or_struct, leaf_value):
-    if request.param == "list":
-        return [list_or_struct, list_or_struct]
-    elif request.param == "struct":
-        return [
-            {
-                "a": list_or_struct,
-                "b": leaf_value,
-                "c": {"d": list_or_struct, "e": leaf_value},
-            }
-        ]
-    else:
-        raise ValueError("Unhandled data type")
-
-
-def test_list_dtype_explode(nested_list):
-    sr = cudf.Series([nested_list])
-    assert sr.dtype.element_type == sr.explode().dtype
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[]],
-        [[1, 2, 3], [4, 5]],
-        [[1, 2, 3], [], [4, 5]],
-        [[1, 2, 3], None, [4, 5]],
-        [[None, None], [None]],
-        [[[[[[1, 2, 3]]]]]],
-        cudf.Series([[1, 2]]).iloc[0:0],
-        cudf.Series([None, [1, 2]]).iloc[0:1],
-    ],
-)
-def test_len(data):
-    gsr = cudf.Series(data)
-    psr = gsr.to_pandas()
-
-    expect = psr.map(lambda x: len(x) if x is not None else None)
-    got = gsr.list.len()
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    ("data", "idx"),
-    [
-        ([[1, 2, 3], [3, 4, 5], [4, 5, 6]], [[0, 1], [2], [1, 2]]),
-        ([[1, 2, 3], [3, 4, 5], [4, 5, 6]], [[1, 2, 0], [1, 0, 2], [0, 1, 2]]),
-        ([[1, 2, 3], []], [[0, 1], []]),
-        ([[1, 2, 3], [None]], [[0, 1], []]),
-        ([[1, None, 3], None], [[0, 1], []]),
-    ],
-)
-def test_take(data, idx):
-    ps = pd.Series(data)
-    gs = cudf.from_pandas(ps)
-
-    expected = pd.Series(zip(ps, idx, strict=True)).map(
-        lambda x: [x[0][i] for i in x[1]] if x[0] is not None else None
-    )
-    got = gs.list.take(idx)
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    ("invalid", "exception"),
-    [
-        ([[0]], pytest.raises(ValueError, match="different size")),
-        ([1, 2, 3, 4], pytest.raises(ValueError, match="should be list type")),
-        (
-            [["a", "b"], ["c"]],
-            pytest.raises(
-                TypeError, match="should be column of values of index types"
-            ),
-        ),
-        (
-            [[[1], [0]], [[0]]],
-            pytest.raises(
-                TypeError, match="should be column of values of index types"
-            ),
-        ),
-        ([[0, 1], None], pytest.raises(ValueError, match="contains null")),
-    ],
-)
-def test_take_invalid(invalid, exception):
-    gs = cudf.Series([[0, 1], [2, 3]])
-    with exception:
-        gs.list.take(invalid)
-
-
-@pytest.mark.parametrize(
-    ("data", "expected"),
-    [
-        ([[1, 1, 2, 2], [], None, [3, 4, 5]], [[1, 2], [], None, [3, 4, 5]]),
-        (
-            [[1.233, np.nan, 1.234, 3.141, np.nan, 1.234]],
-            [[1.233, 1.234, np.nan, 3.141]],
-        ),  # duplicate nans
-        ([[1, 1, 2, 2, None, None]], [[1, 2, None]]),  # duplicate nulls
-        (
-            [[1.233, np.nan, None, 1.234, 3.141, np.nan, 1.234, None]],
-            [[1.233, 1.234, np.nan, None, 3.141]],
-        ),  # duplicate nans and nulls
-        ([[2, None, 1, None, 2]], [[1, 2, None]]),
-        ([[], []], [[], []]),
-        ([[], None], [[], None]),
-    ],
-)
-def test_unique(data, expected):
-    """
-    Pandas de-duplicates nans and nulls respectively in Series.unique.
-    `expected` is setup to mimic such behavior
-    """
-    gs = cudf.Series(data, nan_as_null=False)
-
-    got = gs.list.unique()
-    expected = cudf.Series(expected, nan_as_null=False).list.sort_values()
-
-    got = got.list.sort_values()
-
-    assert_eq(expected, got)
-
-
-def key_func_builder(x, na_position):
-    if x is None:
-        if na_position == "first":
-            return -1e8
-        else:
-            return 1e8
-    else:
-        return x
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[4, 2, None, 9], [8, 8, 2], [2, 1]],
-        [[4, 2, None, 9], [8, 8, 2], None],
-        [[4, 2, None, 9], [], None],
-    ],
-)
-@pytest.mark.parametrize(
-    "index",
-    [
-        None,
-        pd.Index(["a", "b", "c"]),
-        pd.MultiIndex.from_tuples(
-            [(0, "a"), (0, "b"), (1, "a")], names=["l0", "l1"]
-        ),
-    ],
-)
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_position", ["first", "last"])
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_sort_values(data, index, ascending, na_position, ignore_index):
-    key_func = functools.partial(key_func_builder, na_position=na_position)
-
-    ps = pd.Series(data, index=index)
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.apply(
-        lambda x: sorted(x, key=key_func, reverse=not ascending)
-        if x is not None
-        else None
-    )
-    if ignore_index:
-        expected.reset_index(drop=True, inplace=True)
-    got = gs.list.sort_values(
-        ascending=ascending, na_position=na_position, ignore_index=ignore_index
-    )
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data, index, expect",
-    [
-        ([[None, None], [None, None]], 0, [None, None]),
-        ([[1, 2], [3, 4]], 0, [1, 3]),
-        ([["a", "b"], ["c", "d"]], 1, ["b", "d"]),
-        ([[1, None], [None, 2]], 1, [None, 2]),
-        ([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 1, [[3, 4], [7, 8]]),
-    ],
-)
-def test_get(data, index, expect):
-    sr = cudf.Series(data)
-    expect = cudf.Series(expect)
-    got = sr.list.get(index)
-
-    assert_eq(expect, got, check_dtype=not expect.isnull().all())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [{"k": "v1"}, {"k": "v2"}],
-        [[{"k": "v1", "b": "v2"}], [{"k": "v3", "b": "v4"}]],
-        [
-            [{"k": "v1", "b": [{"c": 10, "d": "v5"}]}],
-            [{"k": "v3", "b": [{"c": 14, "d": "v6"}]}],
-        ],
-    ],
-)
-@pytest.mark.parametrize("index", [0, 1])
-def test_get_nested_struct_dtype_transfer(data, index):
-    sr = cudf.Series([data])
-    expect = cudf.Series(data[index : index + 1])
-    assert_eq(expect, sr.list.get(index))
-
-
-def test_get_nested_lists():
-    sr = cudf.Series(
-        [
-            [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [], [[3, 4], [7, 8]]],
-            [[], [[9, 10]], [[11, 12], [13, 14]]],
-        ]
-    )
-    expect = cudf.Series([[[1, 2], [3, 4]], []])
-    got = sr.list.get(0)
-    assert_eq(expect, got)
-
-
-def test_get_default():
-    sr = cudf.Series([[1, 2], [3, 4, 5], [6, 7, 8, 9]])
-
-    assert_eq(cudf.Series([cudf.NA, 5, 8]), sr.list.get(2))
-    assert_eq(cudf.Series([cudf.NA, 5, 8]), sr.list.get(2, default=cudf.NA))
-    assert_eq(cudf.Series([0, 5, 8]), sr.list.get(2, default=0))
-    assert_eq(cudf.Series([0, 3, 7]), sr.list.get(-3, default=0))
-    assert_eq(cudf.Series([2, 5, 9]), sr.list.get(-1))
-
-    string_sr = cudf.Series(
-        [["apple", "banana"], ["carrot", "daffodil", "elephant"]]
-    )
-    assert_eq(
-        cudf.Series(["default", "elephant"]),
-        string_sr.list.get(2, default="default"),
-    )
-
-    sr_with_null = cudf.Series([[0, cudf.NA], [1]])
-    assert_eq(cudf.Series([cudf.NA, 0]), sr_with_null.list.get(1, default=0))
-
-    sr_nested = cudf.Series([[[1, 2], [3, 4], [5, 6]], [[5, 6], [7, 8]]])
-    assert_eq(cudf.Series([[3, 4], [7, 8]]), sr_nested.list.get(1))
-    assert_eq(cudf.Series([[5, 6], cudf.NA]), sr_nested.list.get(2))
-    assert_eq(
-        cudf.Series([[5, 6], [0, 0]]), sr_nested.list.get(2, default=[0, 0])
-    )
-
-
-def test_get_ind_sequence():
-    # test .list.get() when `index` is a sequence
-    sr = cudf.Series([[1, 2], [3, 4, 5], [6, 7, 8, 9]])
-    assert_eq(cudf.Series([1, 4, 8]), sr.list.get([0, 1, 2]))
-    assert_eq(cudf.Series([1, 4, 8]), sr.list.get(cudf.Series([0, 1, 2])))
-    assert_eq(cudf.Series([cudf.NA, 5, cudf.NA]), sr.list.get([2, 2, -5]))
-    assert_eq(cudf.Series([0, 5, 0]), sr.list.get([2, 2, -5], default=0))
-    sr_nested = cudf.Series([[[1, 2], [3, 4], [5, 6]], [[5, 6], [7, 8]]])
-    assert_eq(cudf.Series([[1, 2], [7, 8]]), sr_nested.list.get([0, 1]))
-
-
-@pytest.mark.parametrize(
-    "data, scalar, expect",
-    [
-        (
-            [[1, 2, 3], []],
-            1,
-            [True, False],
-        ),
-        (
-            [[1, 2, 3], [], [3, 4, 5]],
-            6,
-            [False, False, False],
-        ),
-        (
-            [[1.0, 2.0, 3.0], None, []],
-            2.0,
-            [True, None, False],
-        ),
-        (
-            [[None, "b", "c"], [], ["b", "e", "f"]],
-            "b",
-            [True, False, True],
-        ),
-        ([[None, 2, 3], None, []], 1, [False, None, False]),
-        (
-            [[None, "b", "c"], [], ["b", "e", "f"]],
-            "d",
-            [False, False, False],
-        ),
-    ],
-)
-def test_contains_scalar(data, scalar, expect):
-    sr = cudf.Series(data)
-    expect = cudf.Series(expect)
-    got = sr.list.contains(
-        pa.scalar(scalar, type=cudf_dtype_to_pa_type(sr.dtype.element_type))
-    )
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data, expect",
-    [
-        (
-            [[1, 2, 3], []],
-            [None, None],
-        ),
-        (
-            [[1.0, 2.0, 3.0], None, []],
-            [None, None, None],
-        ),
-        (
-            [[None, 2, 3], [], None],
-            [None, None, None],
-        ),
-        (
-            [[1, 2, 3], [3, 4, 5]],
-            [None, None],
-        ),
-        (
-            [[], [], []],
-            [None, None, None],
-        ),
-    ],
-)
-def test_contains_null_search_key(data, expect):
-    sr = cudf.Series(data)
-    expect = cudf.Series(expect, dtype="bool")
-    got = sr.list.contains(
-        pa.scalar(None, type=cudf_dtype_to_pa_type(sr.dtype.element_type))
-    )
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data, scalar",
-    [
-        (
-            [[9, 0, 2], [], [1, None, 0]],
-            "x",
-        ),
-        (
-            [["z", "y", None], None, [None, "x"]],
-            5,
-        ),
-    ],
-)
-def test_contains_invalid(data, scalar):
-    sr = cudf.Series(data)
-    with pytest.raises(
-        TypeError,
-        match="Type/Scale of search key does not "
-        "match list column element type.",
-    ):
-        sr.list.contains(scalar)
-
-
-@pytest.mark.parametrize(
-    "data, search_key, expect",
-    [
-        (
-            [[1, 2, 3], [], [3, 4, 5]],
-            3,
-            [2, -1, 0],
-        ),
-        (
-            [[1.0, 2.0, 3.0], None, [2.0, 5.0]],
-            2.0,
-            [1, None, 0],
-        ),
-        (
-            [[None, "b", "c"], [], ["b", "e", "f"]],
-            "f",
-            [-1, -1, 2],
-        ),
-        ([[-5, None, 8], None, []], -5, [0, None, -1]),
-        (
-            [[None, "x", None, "y"], ["z", "i", "j"]],
-            "y",
-            [3, -1],
-        ),
-        (
-            [["h", "a", None], ["t", "g"]],
-            ["a", "b"],
-            [1, -1],
-        ),
-        (
-            [None, ["h", "i"], ["p", "k", "z"]],
-            ["x", None, "z"],
-            [None, None, 2],
-        ),
-        (
-            [["d", None, "e"], [None, "f"], []],
-            pa.scalar(None, type=pa.string()),
-            [None, None, None],
-        ),
-        (
-            [None, [10, 9, 8], [5, 8, None]],
-            pa.scalar(None, type=pa.int64()),
-            [None, None, None],
-        ),
-    ],
-)
-def test_index(data, search_key, expect):
-    sr = cudf.Series(data)
-    expect = cudf.Series(expect, dtype="int32")
-    if is_scalar(search_key):
-        got = sr.list.index(
-            pa.scalar(
-                search_key, type=cudf_dtype_to_pa_type(sr.dtype.element_type)
-            )
-        )
-    else:
-        got = sr.list.index(
-            cudf.Series(search_key, dtype=sr.dtype.element_type)
-        )
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data, search_key",
-    [
-        (
-            [[9, None, 8], [], [7, 6, 5]],
-            "c",
-        ),
-        (
-            [["a", "b", "c"], None, [None, "d"]],
-            2,
-        ),
-        (
-            [["e", "s"], ["t", "w"]],
-            [5, 6],
-        ),
-    ],
-)
-def test_index_invalid_type(data, search_key):
-    sr = cudf.Series(data)
-    with pytest.raises(
-        TypeError,
-        match="Type/Scale of search key does not "
-        "match list column element type.",
-    ):
-        sr.list.index(search_key)
-
-
-@pytest.mark.parametrize(
-    "data, search_key",
-    [
-        (
-            [[5, 8], [2, 6]],
-            [8, 2, 4],
-        ),
-        (
-            [["h", "j"], ["p", None], ["t", "z"]],
-            ["j", "a"],
-        ),
-    ],
-)
-def test_index_invalid_length(data, search_key):
-    sr = cudf.Series(data)
-    with pytest.raises(
-        RuntimeError,
-        match="Number of search keys must match list column size.",
-    ):
-        sr.list.index(search_key)
-
-
-@pytest.mark.parametrize(
-    "row",
-    [
-        [[]],
-        [[1]],
-        [[1, 2]],
-        [[1, 2], [3, 4, 5]],
-        [[1, 2], [], [3, 4, 5]],
-        [[1, 2, None], [3, 4, 5]],
-        [[1, 2, None], None, [3, 4, 5]],
-        [[1, 2, None], None, [], [3, 4, 5]],
-        [[[1, 2], [3, 4]], [[5, 6, 7], [8, 9]]],
-        [[["a", "c", "de", None], None, ["fg"]], [["abc", "de"], None]],
-    ],
-)
-@pytest.mark.parametrize("dropna", [True, False])
-def test_concat_elements(row, dropna):
-    if any(x is None for x in row):
-        if dropna:
-            row = [x for x in row if x is not None]
-            result = functools.reduce(operator.add, row)
-        else:
-            result = None
-    else:
-        result = functools.reduce(operator.add, row)
-
-    expect = pd.Series([result])
-    got = cudf.Series([row]).list.concat(dropna=dropna)
-    assert_eq(expect, got)
-
-
-def test_concat_elements_raise():
-    s = cudf.Series([[1, 2, 3]])  # no nesting
-    with pytest.raises(ValueError):
-        s.list.concat()
 
 
 def test_concatenate_rows_of_lists():
@@ -650,302 +23,3 @@ def test_concatenate_list_with_nonlist():
         gdf1 = cudf.DataFrame({"A": [["a", "c"], ["b", "d"], ["c", "d"]]})
         gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]})
         gdf1["A"] + gdf2["A"]
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1],
-        [1, 2, 3],
-        [[1, 2, 3], [4, 5, 6]],
-        [NA],
-        [1, NA, 3],
-        [[1, NA, 3], [NA, 5, 6]],
-    ],
-)
-def test_list_getitem(data):
-    list_sr = cudf.Series([data])
-    assert list_sr[0] == data
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3],
-        [[1, 2, 3], [4, 5, 6]],
-        ["a", "b", "c"],
-        [["a", "b", "c"], ["d", "e", "f"]],
-        [1.1, 2.2, 3.3],
-        [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]],
-        [1, NA, 3],
-        [[1, NA, 3], [4, 5, NA]],
-        ["a", NA, "c"],
-        [["a", NA, "c"], ["d", "e", NA]],
-        [1.1, NA, 3.3],
-        [[1.1, NA, 3.3], [4.4, 5.5, NA]],
-    ],
-)
-def test_list_scalar_device_construction(data):
-    res = cudf.Series([data])._column.element_indexing(0)
-    assert res == data
-
-
-@pytest.mark.parametrize("nesting_level", [1, 2, 3])
-def test_list_scalar_device_construction_null(nesting_level):
-    data = [[]]
-    for i in range(nesting_level - 1):
-        data = [data]
-
-    arrow_type = pa.infer_type(data)
-    arrow_arr = pa.array([None], type=arrow_type)
-
-    res = cudf.Series(arrow_arr)._column.element_indexing(0)
-    assert res is cudf.NA
-
-
-@pytest.mark.parametrize("input_obj", [[[1, NA, 3]], [[1, NA, 3], [4, 5, NA]]])
-def test_construction_series_with_nulls(input_obj):
-    expect = pa.array(input_obj, from_pandas=True)
-    got = cudf.Series(input_obj).to_arrow()
-
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [[]]},
-        {"a": [[1, 2, None, 4]]},
-        {"a": [["cat", None, "dog"]]},
-        {
-            "a": [[1, 2, 3, None], [4, None, 5]],
-            "b": [None, ["fish", "bird"]],
-            "c": [[], []],
-        },
-        {"a": [[1, 2, 3, None], [4, None, 5], None, [6, 7]]},
-    ],
-)
-def test_serialize_list_columns(data):
-    df = cudf.DataFrame(data)
-    recreated = df.__class__.deserialize(*df.serialize())
-    assert_eq(recreated, df)
-
-
-@pytest.mark.parametrize(
-    "data,item",
-    [
-        (
-            # basic list into a list column
-            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
-            [0, 0, 0],
-        ),
-        (
-            # nested list into nested list column
-            [
-                [[1, 2, 3], [4, 5, 6]],
-                [[1, 2, 3], [4, 5, 6]],
-                [[1, 2, 3], [4, 5, 6]],
-            ],
-            [[0, 0, 0], [0, 0, 0]],
-        ),
-        (
-            # NA into a list column
-            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
-            NA,
-        ),
-        (
-            # NA into nested list column
-            [
-                [[1, 2, 3], [4, 5, 6]],
-                [[1, 2, 3], [4, 5, 6]],
-                [[1, 2, 3], [4, 5, 6]],
-            ],
-            NA,
-        ),
-    ],
-)
-def test_listcol_setitem(data, item):
-    sr = cudf.Series(data)
-
-    sr[1] = item
-    data[1] = item
-    expect = cudf.Series(data)
-
-    assert_eq(expect, sr)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
-        [
-            [[1, 2, 3], [4, 5, 6]],
-            [[1, 2, 3], [4, 5, 6]],
-            [[1, 2, 3], [4, 5, 6]],
-        ],
-        [[[1, 2, 3], [4, None, 6]], [], None, [[7, 8], [], None, [9]]],
-        [[1, 2, 3], [4, None, 6], [7, 8], [], None, [9]],
-        [[1.0, 2.0, 3.0], [4.0, None, 6.0], [7.0, 8.0], [], None, [9.0]],
-    ],
-)
-def test_listcol_as_string(data):
-    got = cudf.Series(data).astype("str")
-    expect = pd.Series(data).astype("str")
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data,item,error",
-    [
-        (
-            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
-            [[1, 2, 3], [4, 5, 6]],
-            "Could not convert .* with type list: tried to convert to int64",
-        ),
-        (
-            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
-            0,
-            "Can not set 0 into ListColumn",
-        ),
-    ],
-)
-def test_listcol_setitem_error_cases(data, item, error):
-    sr = cudf.Series(data)
-    with pytest.raises(BaseException, match=error):
-        sr[1] = item
-
-
-def test_listcol_setitem_retain_dtype():
-    df = cudf.DataFrame(
-        {"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
-    )
-    df1 = df.head(0)
-    # Performing a setitem on `b` triggers a `column.column_empty` call
-    # which tries to create an empty ListColumn.
-    df1["b"] = df1["c"]
-    # Performing a copy to trigger a copy dtype which is obtained by accessing
-    # `ListColumn.children` that would have been corrupted in previous call
-    # prior to this fix: https://github.com/rapidsai/cudf/pull/10151/
-    df2 = df1.copy()
-    assert df2["a"].dtype == df["a"].dtype
-
-
-def test_list_astype():
-    s = cudf.Series([[1, 2], [3, 4]])
-    s2 = s.list.astype("float64")
-    assert s2.dtype == cudf.ListDtype("float64")
-    assert_eq(s.list.leaves.astype("float64"), s2.list.leaves)
-
-    s = cudf.Series([[[1, 2], [3]], [[5, 6], None]])
-    s2 = s.list.astype("string")
-    assert s2.dtype == cudf.ListDtype(cudf.ListDtype("string"))
-    assert_eq(s.list.leaves.astype("string"), s2.list.leaves)
-
-
-def test_memory_usage():
-    s1 = cudf.Series([[1, 2], [3, 4]])
-    assert s1.memory_usage() == 44
-    s2 = cudf.Series([[[[1, 2]]], [[[3, 4]]]])
-    assert s2.memory_usage() == 68
-    s3 = cudf.Series([[{"b": 1, "a": 10}, {"b": 2, "a": 100}]])
-    assert s3.memory_usage() == 40
-
-
-@pytest.mark.parametrize(
-    "data, idx",
-    [
-        (
-            [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": NA}]],
-            0,
-        ),
-        (
-            [
-                [
-                    {"f2": {"a": 100, "c": 90, "f2": 10}, "f1": "a"},
-                    {"f1": "sf12", "f2": NA},
-                ]
-            ],
-            0,
-        ),
-        (
-            [[[[1, 2]], [[2], [3]]], [[[2]]], [[[3]]]],
-            0,
-        ),
-        ([[[[1, 2]], [[2], [3]]], [[[2]]], [[[3]]]], 2),
-        ([[[{"a": 1, "b": 2, "c": 10}]]], 0),
-    ],
-)
-def test_nested_list_extract_host_scalars(data, idx):
-    series = cudf.Series(data)
-
-    assert series[idx] == data[idx]
-
-
-def test_list_iterate_error():
-    s = cudf.Series([[[[1, 2]], [[2], [3]]], [[[2]]], [[[3]]]])
-    with pytest.raises(TypeError):
-        iter(s.list)
-
-
-def test_list_struct_list_memory_usage():
-    df = cudf.DataFrame({"a": [[{"b": [1]}]]})
-    assert df.memory_usage().sum() == 16
-
-
-def test_empty_nested_list_uninitialized_offsets_memory_usage():
-    col = column_empty(0, cudf.ListDtype(cudf.ListDtype("int64")))
-    nested_col = col.children[1]
-    empty_inner = type(nested_col)(
-        data=None,
-        size=nested_col.size,
-        dtype=nested_col.dtype,
-        mask=nested_col.mask,
-        offset=nested_col.offset,
-        null_count=nested_col.null_count,
-        children=(
-            column_empty(0, nested_col.children[0].dtype),
-            nested_col.children[1],
-        ),
-    )
-    col_empty_offset = type(col)(
-        data=None,
-        size=col.size,
-        dtype=col.dtype,
-        mask=col.mask,
-        offset=col.offset,
-        null_count=col.null_count,
-        children=(column_empty(0, col.children[0].dtype), empty_inner),
-    )
-    ser = cudf.Series._from_column(col_empty_offset)
-    assert ser.memory_usage() == 8
-
-
-def test_list_methods_setattr():
-    ser = cudf.Series([["a", "b", "c"], ["d", "e", "f"]])
-
-    with pytest.raises(AttributeError):
-        ser.list.a = "b"
-
-
-def test_dataframe_list_round_trip():
-    data = [{"text": "hello", "list_col": np.asarray([1, 2], dtype="uint32")}]
-    cudf_arrow = cudf.DataFrame(data).to_arrow()
-    pdf_arrow = pa.Table.from_pandas(pd.DataFrame(data))
-
-    for metadata in [
-        None,
-        pdf_arrow.schema.metadata,
-        cudf_arrow.schema.metadata,
-    ]:
-        schema = pa.schema(
-            [
-                pa.field("text", pa.string()),
-                pa.field("list_col", pa.list_(pa.uint32())),
-            ],
-            metadata=metadata,
-        )
-
-        data = {"text": ["asd", "pqr"], "list_col": [[1, 2, 3], [4, 5]]}
-
-        table = pa.Table.from_pydict(data, schema=schema)
-        assert_eq(table.to_pandas(), pd.DataFrame(data))

From 783feb44dc3399da07d2a6e3d95668e6afbdb56e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 19 Aug 2025 14:17:28 -0700
Subject: [PATCH 166/366] Add streams to column APIs (#19726)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19726
---
 python/pylibcudf/pylibcudf/column.pxd         |  2 +-
 python/pylibcudf/pylibcudf/column.pyx         | 84 ++++++++++++++-----
 .../libcudf/column/column_factories.pxd       |  6 ++
 3 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd
index f209b6c3178..51fa36f8913 100644
--- a/python/pylibcudf/pylibcudf/column.pxd
+++ b/python/pylibcudf/pylibcudf/column.pxd
@@ -72,7 +72,7 @@ cdef class Column:
         Column base=*,
     )
 
-    cpdef Scalar to_scalar(self)
+    cpdef Scalar to_scalar(self, Stream stream=*)
     cpdef DataType type(self)
     cpdef Column child(self, size_type index)
     cpdef size_type num_children(self)
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 40ca7b0e4c3..8773c123d75 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -32,7 +32,6 @@ from pylibcudf.libcudf.strings.strings_column_view cimport strings_column_view
 from pylibcudf.libcudf.types cimport size_type, size_of as cpp_size_of, bitmask_type
 from pylibcudf.libcudf.utilities.traits cimport is_fixed_width
 from pylibcudf.libcudf.copying cimport get_element
-from pylibcudf.libcudf.utilities.default_stream cimport get_default_stream
 
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
@@ -330,7 +329,11 @@ cdef class Column:
         self._num_children = len(children)
 
     @staticmethod
-    def from_arrow(obj: ArrowLike, dtype: DataType | None = None) -> Column:
+    def from_arrow(
+        obj: ArrowLike,
+        dtype: DataType | None = None,
+        Stream stream=None
+    ) -> Column:
         """
         Create a Column from an Arrow-like object using the Arrow C Data Interface.
 
@@ -348,6 +351,8 @@ cdef class Column:
             - `__arrow_c_device_stream__` (device Arrow stream)
         dtype : DataType | None
             The pylibcudf data type.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -375,6 +380,8 @@ cdef class Column:
         cdef ArrowDeviceArray* c_device_array
         cdef _ArrowColumnHolder result
         cdef unique_ptr[arrow_column] c_result
+
+        stream = _get_stream(stream)
         if hasattr(obj, "__arrow_c_device_array__"):
             schema, d_array = obj.__arrow_c_device_array__()
             c_schema = <ArrowSchema*>PyCapsule_GetPointer(schema, "arrow_schema")
@@ -385,7 +392,9 @@ cdef class Column:
             result = _ArrowColumnHolder()
             with nogil:
                 c_result = make_unique[arrow_column](
-                    move(dereference(c_schema)), move(dereference(c_device_array))
+                    move(dereference(c_schema)),
+                    move(dereference(c_device_array)),
+                    stream.view(),
                 )
             result.col.swap(c_result)
 
@@ -398,21 +407,26 @@ cdef class Column:
             result = _ArrowColumnHolder()
             with nogil:
                 c_result = make_unique[arrow_column](
-                    move(dereference(c_schema)), move(dereference(c_array))
+                    move(dereference(c_schema)),
+                    move(dereference(c_array)),
+                    stream.view(),
                 )
             result.col.swap(c_result)
 
             return Column.from_column_view_of_arbitrary(result.col.get().view(), result)
         elif hasattr(obj, "__arrow_c_stream__"):
-            stream = obj.__arrow_c_stream__()
-            c_stream = (
-                <ArrowArrayStream*>PyCapsule_GetPointer(stream, "arrow_array_stream")
+            arrow_stream = obj.__arrow_c_stream__()
+            c_arrow_stream = (
+                <ArrowArrayStream*>PyCapsule_GetPointer(
+                    arrow_stream,
+                    "arrow_array_stream",
+                )
             )
 
             result = _ArrowColumnHolder()
             with nogil:
                 c_result = make_unique[arrow_column](
-                    move(dereference(c_stream))
+                    move(dereference(c_arrow_stream)), stream.view()
                 )
             result.col.swap(c_result)
 
@@ -561,7 +575,7 @@ cdef class Column:
         cdef gpumemoryview mask = None
         if null_count > 0:
             mask = gpumemoryview(
-                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask))
+                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), stream)
             )
 
         children = []
@@ -678,7 +692,7 @@ cdef class Column:
         )
 
     @staticmethod
-    def from_scalar(Scalar slr, size_type size):
+    def from_scalar(Scalar slr, size_type size, Stream stream=None):
         """Create a Column from a Scalar.
 
         Parameters
@@ -687,6 +701,8 @@ cdef class Column:
             The scalar to create a column from.
         size : size_type
             The number of elements in the column.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -695,11 +711,16 @@ cdef class Column:
         """
         cdef const scalar* c_scalar = slr.get()
         cdef unique_ptr[column] c_result
+        stream = _get_stream(stream)
         with nogil:
-            c_result = make_column_from_scalar(dereference(c_scalar), size)
-        return Column.from_libcudf(move(c_result))
+            c_result = make_column_from_scalar(
+                dereference(c_scalar),
+                size,
+                stream.view()
+            )
+        return Column.from_libcudf(move(c_result), stream)
 
-    cpdef Scalar to_scalar(self):
+    cpdef Scalar to_scalar(self, Stream stream=None):
         """
         Return the first value of 1-element column as a Scalar.
 
@@ -707,6 +728,8 @@ cdef class Column:
         ------
         ValueError
             If the column has more than one row.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -718,14 +741,15 @@ cdef class Column:
 
         cdef column_view cv = self.view()
         cdef unique_ptr[scalar] result
+        stream = _get_stream(stream)
 
         with nogil:
-            result = get_element(cv, 0, get_default_stream())
+            result = get_element(cv, 0, stream.view())
 
         return Scalar.from_libcudf(move(result))
 
     @staticmethod
-    def all_null_like(Column like, size_type size):
+    def all_null_like(Column like, size_type size, Stream stream=None):
         """Create an all null column from a template.
 
         Parameters
@@ -734,6 +758,8 @@ cdef class Column:
             Column whose type we should mimic
         size : int
             Number of rows in the resulting column.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -742,9 +768,14 @@ cdef class Column:
         """
         cdef Scalar slr = Scalar.empty_like(like)
         cdef unique_ptr[column] c_result
+        stream = _get_stream(stream)
         with nogil:
-            c_result = make_column_from_scalar(dereference(slr.get()), size)
-        return Column.from_libcudf(move(c_result))
+            c_result = make_column_from_scalar(
+                dereference(slr.get()),
+                size,
+                stream.view()
+            )
+        return Column.from_libcudf(move(c_result), stream)
 
     @staticmethod
     cdef Column _wrap_nested_list_column(
@@ -802,7 +833,7 @@ cdef class Column:
         return nested
 
     @classmethod
-    def from_array_interface(cls, obj):
+    def from_array_interface(cls, obj, Stream stream=None):
         """
         Create a Column from an object implementing the NumPy Array Interface.
 
@@ -814,6 +845,8 @@ cdef class Column:
         ----------
         obj : Any
             Must implement the ``__array_interface__`` protocol.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -840,13 +873,14 @@ cdef class Column:
 
         cdef const unsigned char* ptr
         cdef const unsigned char[:] view
+        stream = _get_stream(stream)
 
         if nbytes > 0:
             ptr = <const unsigned char*><uintptr_t>data_ptr
             view = (<const unsigned char[:nbytes]> ptr)[:nbytes]
-            dbuf = DeviceBuffer.to_device(view)
+            dbuf = DeviceBuffer.to_device(view, stream)
         else:
-            dbuf = DeviceBuffer(size=0)
+            dbuf = DeviceBuffer(size=0, stream=stream)
 
         return Column._wrap_nested_list_column(gpumemoryview(dbuf), shape, dtype)
 
@@ -926,7 +960,11 @@ cdef class Column:
         )
 
     @staticmethod
-    def from_iterable_of_py(obj: Iterable, dtype: DataType | None = None) -> Column:
+    def from_iterable_of_py(
+        obj: Iterable,
+        dtype: DataType | None = None,
+        Stream stream=None
+    ) -> Column:
         """
         Create a Column from a Python iterable of scalar values or nested iterables.
 
@@ -936,6 +974,8 @@ cdef class Column:
             An iterable of Python scalar values (int, float, bool, str) or nested lists.
         dtype : DataType | None
             The type of the leaf elements. If not specified, the type is inferred.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -1027,7 +1067,7 @@ cdef class Column:
             "version": 3,
         }
 
-        return Column.from_array_interface(ArrayInterfaceWrapper(iface))
+        return Column.from_array_interface(ArrayInterfaceWrapper(iface), stream)
 
     @classmethod
     def struct_from_children(cls, children: Iterable[Column]):
diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
index 60b48a7bb6a..5abc0523f7e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
@@ -88,6 +88,12 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
         size_type size
     ) except +libcudf_exception_handler
 
+    cdef unique_ptr[column] make_column_from_scalar(
+        const scalar& s,
+        size_type size,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
+
     cdef unique_ptr[column] make_dictionary_from_scalar(
         const scalar& s,
         size_type size

From ba64909422016ba389ab06ed01d7578336c19e8e Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 19 Aug 2025 17:04:20 -0500
Subject: [PATCH 167/366] Require `numba-cuda>=0.19.0,<0.20.0a0` (#19711)

Updates the `numba-cuda` dependency to `>=0.19.0,<0.20.0a0`. This adds CUDA 13 support as well as constraining cuda `cuda-bindings` version to avoid recent cython issues and several other important API fixes/updates.

Authors:
  - https://github.com/brandon-b-miller
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19711
---
 .../environments/all_cuda-129_arch-aarch64.yaml |  4 ++--
 .../environments/all_cuda-129_arch-x86_64.yaml  |  4 ++--
 conda/recipes/cudf/recipe.yaml                  |  7 ++++---
 dependencies.yaml                               | 17 +++++++++++------
 python/cudf/pyproject.toml                      |  6 +++---
 python/pylibcudf/pyproject.toml                 |  4 ++--
 6 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index bd9e288cb43..e3d1d9c3b9b 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -55,8 +55,8 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba-cuda>=0.18.0,<0.19.0a0
-- numba>=0.60.0,<0.62.0a0
+- numba-cuda>=0.19.0,<0.20.0a0
+- numba>=0.61.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 7d3b41c97e5..1b536f03c29 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -56,8 +56,8 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba-cuda>=0.18.0,<0.19.0a0
-- numba>=0.60.0,<0.62.0a0
+- numba-cuda>=0.19.0,<0.20.0a0
+- numba>=0.61.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 3f69e8dcd2e..4b787a8178a 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -55,7 +55,7 @@ requirements:
     - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - scikit-build-core >=0.10.0
     - dlpack >=0.8,<1.0
-    - numba-cuda >=0.18.0,<0.19.0a0
+    - numba-cuda >=0.19.0,<0.20.0a0
     - libcudf =${{ version }}
     - pylibcudf =${{ version }}
     - rmm =${{ minor_version }}
@@ -70,8 +70,9 @@ requirements:
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.4.0dev0
     - cupy >=12.0.0
-    - numba-cuda >=0.18.0,<0.19.0a0
-    - numba >=0.60.0,<0.62.0a0
+    - numba-cuda >=0.19.0,<0.20.0a0
+    # TODO: Revert to numba>=0.60.0,<0.62.0a0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
+    - numba >=0.61.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<20.0.0a0
     - libcudf =${{ version }}
diff --git a/dependencies.yaml b/dependencies.yaml
index d28f2b5e5fb..0c6c1f19900 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -504,14 +504,14 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-        - &numba_cuda numba-cuda>=0.18.0,<0.19.0a0
+        - &numba_cuda numba-cuda>=0.19.0,<0.20.0a0
     specific:
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:
               cuda: "12.*"
             packages:
-              - &numba_cuda_cu12 numba-cuda[cu12]>=0.18.0,<0.19.0a0
+              - &numba_cuda_cu12 numba-cuda[cu12]>=0.19.0,<0.20.0a0
           - matrix: # Fallback for no matrix
             packages:
               - *numba_cuda_cu12
@@ -684,7 +684,8 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cachetools
-          - &numba numba>=0.60.0,<0.62.0a0
+          # TODO: Revert to numba>=0.60.0,<0.62.0a0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
+          - &numba numba>=0.61.0,<0.62.0a0
           - nvtx>=0.2.1
           - packaging
           - rich
@@ -803,9 +804,10 @@ dependencies:
         matrices:
           - matrix: {dependencies: "oldest"}
             packages:
-              - numba==0.60.0
+              # TODO: Revert to numba==0.60.0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
+              - numba==0.61.0
               - pandas==2.0.*
-              - numba-cuda==0.18.0
+              - numba-cuda==0.19.0
           - matrix: {dependencies: "latest"}
             packages:
               - pandas==2.3.1
@@ -875,7 +877,10 @@ dependencies:
         matrices:
           - matrix: {dependencies: "oldest"}
             packages:
-              - numpy==1.23.*
+              # TODO: Revert to numpy==1.23.* once
+              # https://github.com/NVIDIA/numba-cuda/pull/403 is released and
+              # we revert to an oldest pinning of numba==0.60.0.
+              - numpy==1.24.*
               # pyarrow 14 is fine in some circumstances but we require pyarrow
               # 15 in our CI tests in order to get a lz4-c that is compatible
               # with cudf_kafka's dependencies.
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 6cb02397aed..56915721e37 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -23,8 +23,8 @@ dependencies = [
     "cupy-cuda12x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==25.10.*,>=0.0.0a0",
-    "numba-cuda[cu12]>=0.18.0,<0.19.0a0",
-    "numba>=0.60.0,<0.62.0a0",
+    "numba-cuda[cu12]>=0.19.0,<0.20.0a0",
+    "numba>=0.61.0,<0.62.0a0",
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
@@ -125,7 +125,7 @@ requires = [
     "libcudf==25.10.*,>=0.0.0a0",
     "librmm==25.10.*,>=0.0.0a0",
     "ninja",
-    "numba-cuda[cu12]>=0.18.0,<0.19.0a0",
+    "numba-cuda[cu12]>=0.19.0,<0.20.0a0",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "rmm==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 081d4e54a4c..dd871c4bbdb 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -44,8 +44,8 @@ test = [
     "hypothesis>=6.131.7",
     "mmh3",
     "nanoarrow",
-    "numba-cuda[cu12]>=0.18.0,<0.19.0a0",
-    "numba>=0.60.0,<0.62.0a0",
+    "numba-cuda[cu12]>=0.19.0,<0.20.0a0",
+    "numba>=0.61.0,<0.62.0a0",
     "pandas",
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
     "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",

From 18530481b655181b659d9281fb7568a1f7d7c143 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 19 Aug 2025 19:21:37 -0400
Subject: [PATCH 168/366] Support null_count in groupby/rolling context
 (#19739)

Contributes to #19200. Follows up #19314

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19739
---
 .../cudf_polars/dsl/utils/aggregations.py      | 18 +++++++++++++++++-
 python/cudf_polars/tests/test_groupby.py       |  6 +++---
 python/cudf_polars/tests/test_rolling.py       | 10 ++++++++++
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index ebebc9bc361..00145e1d533 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -90,7 +90,23 @@ def decompose_single_agg(
     agg = named_expr.value
     name = named_expr.name
     if isinstance(agg, expr.UnaryFunction) and agg.name == "null_count":
-        raise NotImplementedError("null_count is not supported inside groupby context")
+        (child,) = agg.children
+
+        is_null_bool = expr.BooleanFunction(
+            DataType(pl.Boolean()),
+            expr.BooleanFunction.Name.IsNull,
+            (),
+            child,
+        )
+        u32 = DataType(pl.UInt32())
+        sum_name = next(name_generator)
+        sum_agg = expr.NamedExpr(
+            sum_name,
+            expr.Agg(u32, "sum", (), expr.Cast(u32, is_null_bool)),
+        )
+        return [(sum_agg, True)], named_expr.reconstruct(
+            expr.Cast(u32, expr.Col(u32, sum_name))
+        )
     if isinstance(agg, expr.Col):
         # TODO: collect_list produces null for empty group in libcudf, empty list in polars.
         # But we need the nested value type, so need to track proper dtypes in our DSL.
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index 38ece61457e..18ff5405505 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -285,10 +285,10 @@ def test_groupby_nunique(df: pl.LazyFrame, column):
     assert_gpu_result_equal(q, check_row_order=False)
 
 
-def test_groupby_null_count_raises(df: pl.LazyFrame):
-    q = df.group_by("key1").agg(pl.col("int") + pl.col("uint16_with_null").null_count())
+def test_groupby_null_count(df: pl.LazyFrame):
+    q = df.group_by("key1").agg(pl.col("uint16_with_null").null_count())
 
-    assert_ir_translation_raises(q, NotImplementedError)
+    assert_gpu_result_equal(q, check_row_order=False)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf_polars/tests/test_rolling.py b/python/cudf_polars/tests/test_rolling.py
index dd473078fbd..89a4dc3b083 100644
--- a/python/cudf_polars/tests/test_rolling.py
+++ b/python/cudf_polars/tests/test_rolling.py
@@ -216,3 +216,13 @@ def test_rolling_sum_all_null_window_returns_null():
     )
     # Expected: [0, 0, 5, 5, 5, 1]
     assert_gpu_result_equal(q)
+
+
+def test_rolling_null_count(df):
+    lf = df.with_columns(
+        null=pl.when(pl.col("values") % 2 == 0).then(None).otherwise(pl.col("values"))
+    )
+    q = lf.rolling("dt", period="48h", closed="both").agg(
+        nc=pl.col("null").null_count()
+    )
+    assert_gpu_result_equal(q)

From 1d8c937b955f9e2893b055cd0617c33d8eb59301 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 19 Aug 2025 18:22:39 -0500
Subject: [PATCH 169/366] Remove outdated numba workarounds (#19738)

cuDF now requires numba>=0.60. We can remove these workarounds for bugs in earlier numba versions.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19738
---
 python/cudf/cudf/pandas/scripts/run-pandas-tests.sh | 4 ----
 python/cudf/cudf_pandas_tests/test_cudf_pandas.py   | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
index 7ceff137df7..1ec4c1cb844 100755
--- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
+++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
@@ -51,10 +51,6 @@ if [ ! -d "pandas-tests" ]; then
     cat > pandas-tests/pyproject.toml << \EOF
 [tool.pytest.ini_options]
 xfail_strict = true
-filterwarnings = [
-  # Will be fixed in numba 0.56: https://github.com/numba/numba/issues/7758
-  "ignore:`np.MachAr` is deprecated:DeprecationWarning:numba",
-]
 markers = [
   "single_cpu: tests that should run on a single cpu only",
   "slow: mark a test as slow",
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 4db0129bbae..18bdf427a04 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -719,10 +719,6 @@ def test_rolling_win_type():
     tm.assert_equal(result, expected)
 
 
-@pytest.mark.skipif(
-    version.parse(numba_version) < version.parse("0.59"),
-    reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009",
-)
 @pytest.mark.xfail(
     version.parse(numba_version) >= version.parse("0.59")
     and PANDAS_VERSION < version.parse("2.1"),

From 9f0d3f0bea88cc7522017489418f5de644965290 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 19 Aug 2025 17:55:07 -0700
Subject: [PATCH 170/366] Revert "Support decimal columns in cudf_polars"
 (#19746)

Reverts rapidsai/cudf#19589

Currently CI is failing as the original PR did not sync with the HEAD of `branch-25.10` to catch presumably, new groupby tests that are failing.

Reverting to unblock CI for now and going to reopen this PR once I can investigate why those tests are failing.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19746
---
 .../cudf_polars/containers/dataframe.py       | 19 ++++----------
 .../cudf_polars/containers/datatype.py        |  2 --
 python/cudf_polars/cudf_polars/dsl/ir.py      | 10 --------
 .../cudf_polars/cudf_polars/testing/plugin.py |  2 --
 .../tests/expressions/test_literal.py         |  6 +----
 python/cudf_polars/tests/test_groupby.py      |  3 ---
 python/cudf_polars/tests/test_scan.py         |  9 -------
 python/cudf_polars/tests/test_select.py       | 25 -------------------
 python/pylibcudf/pylibcudf/scalar.pyx         |  8 +++---
 python/pylibcudf/tests/test_interop.py        |  2 +-
 10 files changed, 11 insertions(+), 75 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 3a095be3cfe..43ec63738b2 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -29,26 +29,17 @@
 def _create_polars_column_metadata(
     name: str, dtype: PolarsDataType
 ) -> plc.interop.ColumnMetadata:
-    """Create ColumnMetadata preserving dtype attributes not supported by libcudf."""
-    children_meta = []
-    timezone = ""
-    precision: int | None = None
-
+    """Create ColumnMetadata preserving pl.Struct field names."""
     if isinstance(dtype, pl.Struct):
         children_meta = [
             _create_polars_column_metadata(field.name, field.dtype)
             for field in dtype.fields
         ]
-    elif isinstance(dtype, pl.Datetime):
-        timezone = dtype.time_zone or timezone
-    elif isinstance(dtype, pl.Decimal):
-        precision = dtype.precision
-
+    else:
+        children_meta = []
+    timezone = dtype.time_zone if isinstance(dtype, pl.Datetime) else None
     return plc.interop.ColumnMetadata(
-        name=name,
-        timezone=timezone,
-        precision=precision,
-        children_meta=children_meta,
+        name=name, timezone=timezone or "", children_meta=children_meta
     )
 
 
diff --git a/python/cudf_polars/cudf_polars/containers/datatype.py b/python/cudf_polars/cudf_polars/containers/datatype.py
index 50a5352612a..5de610425ed 100644
--- a/python/cudf_polars/cudf_polars/containers/datatype.py
+++ b/python/cudf_polars/cudf_polars/containers/datatype.py
@@ -81,8 +81,6 @@ def _from_polars(dtype: pl.DataType) -> plc.DataType:
         assert_never(dtype.time_unit)
     elif isinstance(dtype, pl.String):
         return plc.DataType(plc.TypeId.STRING)
-    elif isinstance(dtype, pl.Decimal):
-        return plc.DataType(plc.TypeId.DECIMAL128, scale=-dtype.scale)
     elif isinstance(dtype, pl.Null):
         # TODO: Hopefully
         return plc.DataType(plc.TypeId.EMPTY)
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 7783b15a207..161c8f4a576 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -1467,16 +1467,6 @@ def do_evaluate(
                 else:
                     (child,) = value.children
                 col = child.evaluate(df, context=ExecutionContext.GROUPBY).obj
-
-                if value.name == "median" and col.type().id() in {
-                    plc.TypeId.DECIMAL128,
-                    plc.TypeId.DECIMAL64,
-                    plc.TypeId.DECIMAL32,
-                }:
-                    # libcudf doesn't support median (quantile) with decimal types,
-                    # but Polars returns a float result, so just cast the input.
-                    assert isinstance(child, expr.Col)
-                    col = plc.unary.cast(col, schema[child.name].plc)
             else:
                 # Anything else, we pre-evaluate
                 col = value.evaluate(df, context=ExecutionContext.GROUPBY).obj
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 5038a0d5690..14956d2cfbc 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -175,8 +175,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "allow_missing_columns argument in read_parquet not translated in IR",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "allow_missing_columns argument in read_parquet not translated in IR",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "allow_missing_columns argument in read_parquet not translated in IR",
-    "tests/unit/datatypes/test_decimal.py::test_decimal_aggregations": "https://github.com/pola-rs/polars/issues/23899",
-    "tests/unit/datatypes/test_decimal.py::test_decimal_arithmetic_schema": "https://github.com/pola-rs/polars/issues/23899",
 }
 
 
diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py
index 1c2eb05ebfe..69ee80da82e 100644
--- a/python/cudf_polars/tests/expressions/test_literal.py
+++ b/python/cudf_polars/tests/expressions/test_literal.py
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-import datetime
-
 import pytest
 
 import polars as pl
@@ -97,9 +95,7 @@ def test_select_literal_series():
     assert_gpu_result_equal(q)
 
 
-@pytest.mark.parametrize(
-    "expr", [pl.lit(None), pl.lit(datetime.time(12, 0), dtype=pl.Time())]
-)
+@pytest.mark.parametrize("expr", [pl.lit(None), pl.lit(10, dtype=pl.Decimal())])
 def test_unsupported_literal_raises(expr):
     df = pl.LazyFrame({})
 
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index 18ff5405505..bf97ec41cde 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-import decimal
 import itertools
 import random
 from datetime import date
@@ -26,7 +25,6 @@ def df():
             "key2": [2, 2, 2, 2, 6, 1, 4, 6, 8],
             "int": [1, 2, 3, 4, 5, 6, 7, 8, 9],
             "int32": pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=pl.Int32()),
-            "decimal": [decimal.Decimal("1.23"), None, decimal.Decimal("-0.23")] * 3,
             "uint16_with_null": pl.Series(
                 [1, None, 2, None, None, None, 4, 5, 6], dtype=pl.UInt16()
             ),
@@ -91,7 +89,6 @@ def keys(request):
         [pl.col("float").quantile(0.3, interpolation="lower")],
         [pl.col("float").quantile(0.3, interpolation="midpoint")],
         [pl.col("float").quantile(0.3, interpolation="linear")],
-        [pl.col("decimal").median()],
         [
             pl.col("datetime").max(),
             pl.col("datetime").max().dt.is_leap_year().alias("leapyear"),
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index 7510fe833be..8481105baad 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-import decimal
 from typing import TYPE_CHECKING
 
 import pytest
@@ -46,14 +45,6 @@ def df():
             "a": [1, 2, 3, None, 4, 5],
             "b": ["ẅ", "x", "y", "z", "123", "abcd"],
             "c": [None, None, 4, 5, -1, 0],
-            "d": [
-                decimal.Decimal("1.23"),
-                None,
-                decimal.Decimal("0.00"),
-                None,
-                decimal.Decimal("-5.67"),
-                None,
-            ],
         }
     )
 
diff --git a/python/cudf_polars/tests/test_select.py b/python/cudf_polars/tests/test_select.py
index 10fcf9f660d..da3f519783b 100644
--- a/python/cudf_polars/tests/test_select.py
+++ b/python/cudf_polars/tests/test_select.py
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-import decimal
-
 import pytest
 
 import polars as pl
@@ -29,29 +27,6 @@ def test_select():
     assert_gpu_result_equal(query)
 
 
-def test_select_decimal():
-    ldf = pl.LazyFrame(
-        {"a": pl.Series(values=[decimal.Decimal("1.0"), None], dtype=pl.Decimal(3, 1))}
-    )
-    query = ldf.select(pl.col("a"))
-    assert_gpu_result_equal(query)
-
-
-def test_select_decimal_precision_none_result_max_precision():
-    ldf = pl.LazyFrame(
-        {
-            "a": pl.Series(
-                values=[decimal.Decimal("1.0"), None], dtype=pl.Decimal(None, 1)
-            )
-        }
-    )
-    query = ldf.select(pl.col("a"))
-    cpu_result = query.collect()
-    gpu_result = query.collect(engine="gpu")
-    assert cpu_result.schema["a"].precision is None
-    assert gpu_result.schema["a"].precision == 38
-
-
 def test_select_reduce():
     ldf = pl.DataFrame(
         {
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 31a93abd6fb..0d533c960a4 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -275,7 +275,7 @@ cdef class Scalar:
             return decimal.Decimal(
                 (<fixed_point_scalar[decimal128]*>slr).value().value()
             ).scaleb(
-                (<fixed_point_scalar[decimal128]*>slr).type().scale()
+                -(<fixed_point_scalar[decimal128]*>slr).type().scale()
             )
         else:
             raise NotImplementedError(
@@ -647,12 +647,12 @@ def _(py_val: datetime.date, dtype: DataType | None):
 
 @_from_py.register(decimal.Decimal)
 def _(py_val: decimal.Decimal, dtype: DataType | None):
-    scale = py_val.as_tuple().exponent
-    as_int = int(py_val.scaleb(-scale))
+    scale = -py_val.as_tuple().exponent
+    as_int = int(py_val.scaleb(scale))
 
     cdef int128_t val = <int128_t>as_int
 
-    dtype = DataType(type_id.DECIMAL128, scale)
+    dtype = DataType(type_id.DECIMAL128, -scale)
 
     if dtype.id() != type_id.DECIMAL128:
         raise TypeError("Expected dtype to be DECIMAL128")
diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py
index b1a6e9f2c66..171d70c2496 100644
--- a/python/pylibcudf/tests/test_interop.py
+++ b/python/pylibcudf/tests/test_interop.py
@@ -105,7 +105,7 @@ def test_decimal_other(data_type):
     [plc.TypeId.DECIMAL128, plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32],
 )
 def test_decimal_respect_metadata_precision(plc_type, request):
-    request.applymarker(
+    request.node.add_marker(
         pytest.mark.xfail(
             parse(pa.__version__) < parse("19.0.0")
             and plc_type in {plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32},

From 7ea7c27c0d6971bf3d269cb8489a65efbcafeb12 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 19 Aug 2025 20:21:03 -0700
Subject: [PATCH 171/366] Update cuDF classic testing documention regarding
 testing organization (#19745)

Updates the documented, test organization conventions being worked towards in https://github.com/rapidsai/cudf/issues/9999 and https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19745
---
 docs/cudf/source/developer_guide/testing.md | 30 ++++++++++++---------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/docs/cudf/source/developer_guide/testing.md b/docs/cudf/source/developer_guide/testing.md
index a31677e65af..732e7a3af8e 100644
--- a/docs/cudf/source/developer_guide/testing.md
+++ b/docs/cudf/source/developer_guide/testing.md
@@ -26,18 +26,24 @@ Where tests do not naturally belong to a project, for example the
 
 ## Test organization
 
-How tests are organized depends on which of the following two groups they fall into:
-
-1. Free functions such as `cudf.merge` that operate on classes like `DataFrame` or `Series`.
-2. Methods of the above classes.
-
-Tests of free functions should be grouped into files based on the
-[API sections in the documentation](https://docs.rapids.ai/api/cudf/latest/api_docs/index.html).
-This places tests of similar functionality in the same module.
-Tests of class methods should be organized in the same way, except that this organization should be within a subdirectory corresponding to the class.
-For instance, tests of `DataFrame` indexing should be placed into `dataframe/test_indexing.py`.
-In cases where tests may be shared by multiple classes sharing a common parent (e.g. `DataFrame` and `Series` both require `IndexedFrame` tests),
-the tests may be placed in a directory corresponding to the parent class.
+Generally, the directories under `cudf/tests` describe tests of a certain object (e.g. `series/`)
+or a general topic (e.g. `reshape/`, `series/methods`), and the test files are named according to an APIs name
+(e.g. `series/methods/test_astype.py`, `reshape/test_concat.py`). Sometimes, tested operations and
+APIs do not have a singular name to correspond to the file name and are instead named by a topic as well.
+Some common examples include:
+
+- `test_constructors.py`: `__init__` and `@classmethod` constructors for objects
+- `test_attributes.py`: `@property`s of objects
+- `test_binops.py`: Binary methods (e.g. `+`, `%`)
+- `test_reductions.py`: Reduction methods (e.g. `mean`, `quantile`)
+
+The organization aims to have many, specific test files that target a particular operation for a particular object;
+therefore, there may be test files with the same name but live in different directories e.g.
+
+- `series/methods/test_astype.py`, `dataframe/methods/test_astype.py`, `indexes/index/methods/test_astype.py`
+- `series/methods/test_reductions.py`, `dataframe/methods/test_reductions.py`, `groupby/test_reductions.py`
+
+When adding new tests, make a best-effort to place them in a file according to the tested object and API.
 
 ## Test contents
 

From aea777db5bbbcdc0b81c2adc7fe63f399c0fa3eb Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 19 Aug 2025 20:57:01 -0700
Subject: [PATCH 172/366] Upgrade to nvCOMP 5.0.0.6 (#19636)

Issue #19686

Update the nvCOMP version and the JNI code that directly uses nvCOMP. The libcudf code is already compatible with this version.

Depends on https://github.com/rapidsai/rapids-cmake/pull/896 and https://github.com/rapidsai/kvikio/pull/800

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Liangcai Li (https://github.com/firestarman)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)
  - Alessandro Bellina (https://github.com/abellina)

URL: https://github.com/rapidsai/cudf/pull/19636
---
 ci/build_wheel_libcudf.sh                     |  2 +-
 .../all_cuda-129_arch-aarch64.yaml            |  2 +-
 .../all_cuda-129_arch-x86_64.yaml             |  2 +-
 conda/recipes/libcudf/conda_build_config.yaml |  2 +-
 dependencies.yaml                             |  2 +-
 .../rapids/cudf/nvcomp/BatchedCompressor.java |  7 ++-
 .../cudf/nvcomp/BatchedDecompressor.java      |  6 ++-
 .../cudf/nvcomp/BatchedLZ4Compressor.java     |  4 +-
 .../cudf/nvcomp/BatchedLZ4Decompressor.java   |  6 ++-
 .../cudf/nvcomp/BatchedZstdCompressor.java    |  4 +-
 .../cudf/nvcomp/BatchedZstdDecompressor.java  |  6 ++-
 .../java/ai/rapids/cudf/nvcomp/NvcompJni.java | 14 ++++--
 java/src/main/native/src/NvcompJni.cpp        | 48 ++++++++++++-------
 13 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
index 768ee5c8c0b..ae0ab29c7f8 100755
--- a/ci/build_wheel_libcudf.sh
+++ b/ci/build_wheel_libcudf.sh
@@ -36,8 +36,8 @@ export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_FROM_LIBKVIKIO_WHEEL=ON"
 
 # repair wheels and write to the location that artifact-uploading code expects to find them
 python -m auditwheel repair \
-    --exclude libnvcomp.so.4 \
     --exclude libkvikio.so \
+    --exclude libnvcomp.so.5 \
     --exclude librapids_logger.so \
     --exclude librmm.so \
     -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index e3d1d9c3b9b..ccf44072677 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -40,7 +40,7 @@ dependencies:
 - jupyter_client
 - libcurand-dev
 - libkvikio==25.10.*,>=0.0.0a0
-- libnvcomp-dev==4.2.0.11
+- libnvcomp-dev==5.0.0.6
 - libnvjitlink-dev
 - librdkafka>=2.8.0,<2.9.0a0
 - librmm==25.10.*,>=0.0.0a0
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 1b536f03c29..af6ca634e15 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -41,7 +41,7 @@ dependencies:
 - libcufile-dev
 - libcurand-dev
 - libkvikio==25.10.*,>=0.0.0a0
-- libnvcomp-dev==4.2.0.11
+- libnvcomp-dev==5.0.0.6
 - libnvjitlink-dev
 - librdkafka>=2.8.0,<2.9.0a0
 - librmm==25.10.*,>=0.0.0a0
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 7ca20165585..7acdfdd9698 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -26,7 +26,7 @@ flatbuffers_version:
   - "=24.3.25"
 
 nvcomp_version:
-  - "=4.2.0.11"
+  - "=5.0.0.6"
 
 zlib_version:
   - ">=1.2.13"
diff --git a/dependencies.yaml b/dependencies.yaml
index 0c6c1f19900..6398698546d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -476,7 +476,7 @@ dependencies:
       - output_types: conda
         packages:
           # Align nvcomp version with rapids-cmake
-          - libnvcomp-dev==4.2.0.11
+          - libnvcomp-dev==5.0.0.6
   rapids_build_skbuild:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedCompressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedCompressor.java
index 72dfcdb3cb5..5a0ff574029 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedCompressor.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedCompressor.java
@@ -91,7 +91,8 @@ public DeviceMemoryBuffer[] compress(BaseDeviceMemoryBuffer[] origInputs, Cuda.S
         buildAddrsAndSizes(inputs, inputChunkAddrs, inputChunkSizes, compressedBuffers,
             outputChunkAddrs);
 
-        final long tempBufferSize = batchedCompressGetTempSize(numChunks, chunkSize);
+        final long tempBufferSize = batchedCompressGetTempSize(numChunks, chunkSize,
+          numChunks * chunkSize);
         try (DeviceMemoryBuffer addrsAndSizes = putAddrsAndSizesOnDevice(inputChunkAddrs,
                 inputChunkSizes, outputChunkAddrs, stream);
              DeviceMemoryBuffer tempBuffer =
@@ -308,9 +309,11 @@ private long[] calcOutputBufferSizes(int[] chunksPerInput, long[] outputChunkSiz
    * Get the temporary workspace size required to perform compression of an entire batch.
    * @param batchSize number of chunks in the batch
    * @param maxChunkSize maximum size of an uncompressed chunk in bytes
+   * @param totalSize Upper bound on the total uncompressed size of all chunks
    * @return The size of required temporary workspace in bytes to compress the batch.
    */
-  protected abstract long batchedCompressGetTempSize(long batchSize, long maxChunkSize);
+  protected abstract long batchedCompressGetTempSize(long batchSize, long maxChunkSize,
+    long totalSize);
 
    /**
    * Asynchronously compress a batch of buffers. Note that compressedSizesOutPtr must
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedDecompressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedDecompressor.java
index 5543d2dcb64..af195c84cc7 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedDecompressor.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedDecompressor.java
@@ -71,7 +71,8 @@ public void decompressAsync(BaseDeviceMemoryBuffer[] origInputs,
         totalChunks += numBufferChunks;
       }
 
-      final long tempBufferSize = batchedDecompressGetTempSize(totalChunks, chunkSize);
+      final long tempBufferSize = batchedDecompressGetTempSize(totalChunks, chunkSize,
+        totalChunks * chunkSize);
       try (DeviceMemoryBuffer devAddrsSizes = buildAddrsSizesBuffer(chunkSize, totalChunks,
               inputs.getArray(), chunksPerInput, outputs, stream);
            DeviceMemoryBuffer devTemp = DeviceMemoryBuffer.allocate(tempBufferSize)) {
@@ -198,10 +199,11 @@ private static HostMemoryBuffer fetchMetadata(long totalChunks, BaseDeviceMemory
    * Computes the temporary storage size in bytes needed to decompress a compressed batch.
    * @param numChunks number of chunks in the batch
    * @param maxUncompressedChunkBytes maximum uncompressed size of any chunk in bytes
+   * @param maxTotalSize Upper bound on the total uncompressed size of all chunks
    * @return number of temporary storage bytes needed to decompress the batch
    */
   protected abstract long batchedDecompressGetTempSize(long numChunks,
-      long maxUncompressedChunkBytes);
+      long maxUncompressedChunkBytes, long maxTotalSize);
 
     /**
    * Asynchronously decompress a batch of compressed data buffers.
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Compressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Compressor.java
index 58c0e7ee169..d8b8d4c616e 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Compressor.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Compressor.java
@@ -32,8 +32,8 @@ public BatchedLZ4Compressor(long chunkSize, long maxIntermediateBufferSize) {
   }
 
   @Override
-  protected long batchedCompressGetTempSize(long batchSize, long maxChunkSize) {
-    return NvcompJni.batchedLZ4CompressGetTempSize(batchSize, maxChunkSize);
+  protected long batchedCompressGetTempSize(long batchSize, long maxChunkSize, long totalSize) {
+    return NvcompJni.batchedLZ4CompressGetTempSize(batchSize, maxChunkSize, totalSize);
   }
 
   @Override
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Decompressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Decompressor.java
index d78d537ea13..82e425ae234 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Decompressor.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedLZ4Decompressor.java
@@ -41,8 +41,10 @@ public static void decompressAsync(long chunkSize, BaseDeviceMemoryBuffer[] orig
   }
 
   @Override
-  protected long batchedDecompressGetTempSize(long numChunks, long maxUncompressedChunkBytes) {
-    return NvcompJni.batchedLZ4DecompressGetTempSize(numChunks, maxUncompressedChunkBytes);
+  protected long batchedDecompressGetTempSize(long numChunks, long maxUncompressedChunkBytes,
+      long maxTotalSize) {
+    return NvcompJni.batchedLZ4DecompressGetTempSize(numChunks, maxUncompressedChunkBytes,
+      maxTotalSize);
   }
 
   @Override
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdCompressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdCompressor.java
index 0532b4aa86d..9c4a8aca52e 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdCompressor.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdCompressor.java
@@ -31,8 +31,8 @@ public BatchedZstdCompressor(long chunkSize, long maxIntermediateBufferSize) {
   }
 
   @Override
-  protected long batchedCompressGetTempSize(long batchSize, long maxChunkSize) {
-    return NvcompJni.batchedZstdCompressGetTempSize(batchSize, maxChunkSize);
+  protected long batchedCompressGetTempSize(long batchSize, long maxChunkSize, long totalSize) {
+    return NvcompJni.batchedZstdCompressGetTempSize(batchSize, maxChunkSize, totalSize);
   }
 
   @Override
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdDecompressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdDecompressor.java
index ba11a236834..3ca21ac0b7e 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdDecompressor.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/BatchedZstdDecompressor.java
@@ -23,8 +23,10 @@ public BatchedZstdDecompressor(long chunkSize) {
   }
 
   @Override
-  protected long batchedDecompressGetTempSize(long numChunks, long maxUncompressedChunkBytes) {
-    return NvcompJni.batchedZstdDecompressGetTempSize(numChunks, maxUncompressedChunkBytes);
+  protected long batchedDecompressGetTempSize(long numChunks, long maxUncompressedChunkBytes,
+      long maxTotalSize) {
+    return NvcompJni.batchedZstdDecompressGetTempSize(numChunks, maxUncompressedChunkBytes,
+      maxTotalSize);
   }
 
   @Override
diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java b/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java
index 1a21629a208..f940f781ffb 100644
--- a/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java
+++ b/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java
@@ -29,9 +29,10 @@ class NvcompJni {
    * Get the temporary workspace size required to perform compression of entire LZ4 batch.
    * @param batchSize number of chunks in the batch
    * @param maxChunkSize maximum size of an uncompressed chunk in bytes
+   * @param maxTotalSize Upper bound on the total uncompressed size of all chunks
    * @return The size of required temporary workspace in bytes to compress the batch.
    */
-  static native long batchedLZ4CompressGetTempSize(long batchSize, long maxChunkSize);
+  static native long batchedLZ4CompressGetTempSize(long batchSize, long maxChunkSize, long maxTotalSize);
 
   /**
    * Get the maximum size any chunk could compress to in a LZ4 batch. This is the minimum amount of
@@ -74,11 +75,13 @@ static native void batchedLZ4CompressAsync(
    * Computes the temporary storage size in bytes needed to decompress a LZ4-compressed batch.
    * @param numChunks number of chunks in the batch
    * @param maxUncompressedChunkBytes maximum uncompressed size of any chunk in bytes
+   * @param maxTotalSize Upper bound on the total uncompressed size of all chunks
    * @return number of temporary storage bytes needed to decompress the batch
    */
   static native long batchedLZ4DecompressGetTempSize(
       long numChunks,
-      long maxUncompressedChunkBytes);
+      long maxUncompressedChunkBytes,
+      long maxTotalSize);
 
   /**
    * Asynchronously decompress a batch of LZ4-compressed data buffers.
@@ -121,9 +124,10 @@ static native void batchedLZ4GetDecompressSizeAsync(
    * Get the temporary workspace size required to perform compression of entire zstd batch.
    * @param batchSize number of chunks in the batch
    * @param maxChunkSize maximum size of an uncompressed chunk in bytes
+   * @param maxTotalSize Upper bound on the total uncompressed size of all chunks
    * @return The size of required temporary workspace in bytes to compress the batch.
    */
-  static native long batchedZstdCompressGetTempSize(long batchSize, long maxChunkSize);
+  static native long batchedZstdCompressGetTempSize(long batchSize, long maxChunkSize, long maxTotalSize);
 
   /**
    * Get the maximum size any chunk could compress to in a ZSTD batch. This is the minimum
@@ -167,11 +171,13 @@ static native void batchedZstdCompressAsync(
    * ZSTD-compressed batch.
    * @param numChunks number of chunks in the batch
    * @param maxUncompressedChunkBytes maximum uncompressed size of any chunk in bytes
+   * @param maxTotalSize Upper bound on the total uncompressed size of all chunks
    * @return number of temporary storage bytes needed to decompress the batch
    */
   static native long batchedZstdDecompressGetTempSize(
       long numChunks,
-      long maxUncompressedChunkBytes);
+      long maxUncompressedChunkBytes,
+      long maxTotalSize);
 
   /**
    * Asynchronously decompress a batch of ZSTD-compressed data buffers.
diff --git a/java/src/main/native/src/NvcompJni.cpp b/java/src/main/native/src/NvcompJni.cpp
index 0b3bf1916b9..4e35f2f5688 100644
--- a/java/src/main/native/src/NvcompJni.cpp
+++ b/java/src/main/native/src/NvcompJni.cpp
@@ -63,15 +63,16 @@ extern "C" {
 
 // methods for lz4
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4CompressGetTempSize(
-  JNIEnv* env, jclass, jlong j_batch_size, jlong j_max_chunk_size)
+  JNIEnv* env, jclass, jlong j_batch_size, jlong j_max_chunk_size, jlong j_max_total_size)
 {
   try {
     cudf::jni::auto_set_device(env);
     auto batch_size       = static_cast<std::size_t>(j_batch_size);
     auto max_chunk_size   = static_cast<std::size_t>(j_max_chunk_size);
+    auto total_size       = static_cast<std::size_t>(j_max_total_size);
     std::size_t temp_size = 0;
-    auto status           = nvcompBatchedLZ4CompressGetTempSize(
-      batch_size, max_chunk_size, nvcompBatchedLZ4DefaultOpts, &temp_size);
+    auto status           = nvcompBatchedLZ4CompressGetTempSizeAsync(
+      batch_size, max_chunk_size, nvcompBatchedLZ4CompressDefaultOpts, &temp_size, total_size);
     check_nvcomp_status(env, status);
     return static_cast<jlong>(temp_size);
   }
@@ -88,7 +89,7 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4CompressGetMaxOutputChunkSize(JNI
     auto max_chunk_size         = static_cast<std::size_t>(j_max_chunk_size);
     std::size_t max_output_size = 0;
     auto status                 = nvcompBatchedLZ4CompressGetMaxOutputChunkSize(
-      max_chunk_size, nvcompBatchedLZ4DefaultOpts, &max_output_size);
+      max_chunk_size, nvcompBatchedLZ4CompressDefaultOpts, &max_output_size);
     check_nvcomp_status(env, status);
     return static_cast<jlong>(max_output_size);
   }
@@ -119,7 +120,10 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4CompressAsync(JNIEnv* env,
     auto out_ptrs             = reinterpret_cast<void* const*>(j_out_ptrs);
     auto compressed_out_sizes = reinterpret_cast<std::size_t*>(j_compressed_sizes_out_ptr);
     auto stream               = reinterpret_cast<cudaStream_t>(j_stream);
-    auto status               = nvcompBatchedLZ4CompressAsync(in_ptrs,
+    // FIXME how to use these statuses ? They are not used either in the corresponding
+    // decompressor.
+    auto comp_statuses = rmm::device_uvector<nvcompStatus_t>(batch_size, stream);
+    auto status        = nvcompBatchedLZ4CompressAsync(in_ptrs,
                                                 in_sizes,
                                                 chunk_size,
                                                 batch_size,
@@ -127,7 +131,8 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4CompressAsync(JNIEnv* env,
                                                 temp_size,
                                                 out_ptrs,
                                                 compressed_out_sizes,
-                                                nvcompBatchedLZ4DefaultOpts,
+                                                nvcompBatchedLZ4CompressDefaultOpts,
+                                                comp_statuses.data(),
                                                 stream);
     check_nvcomp_status(env, status);
   }
@@ -135,14 +140,16 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4CompressAsync(JNIEnv* env,
 }
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4DecompressGetTempSize(
-  JNIEnv* env, jclass, jlong j_batch_size, jlong j_chunk_size)
+  JNIEnv* env, jclass, jlong j_batch_size, jlong j_chunk_size, jlong j_max_total_size)
 {
   try {
     cudf::jni::auto_set_device(env);
     auto batch_size       = static_cast<std::size_t>(j_batch_size);
     auto chunk_size       = static_cast<std::size_t>(j_chunk_size);
+    auto total_size       = static_cast<std::size_t>(j_max_total_size);
     std::size_t temp_size = 0;
-    auto status = nvcompBatchedLZ4DecompressGetTempSize(batch_size, chunk_size, &temp_size);
+    auto status           = nvcompBatchedLZ4DecompressGetTempSizeAsync(
+      batch_size, chunk_size, nvcompBatchedLZ4DecompressDefaultOpts, &temp_size, total_size);
     check_nvcomp_status(env, status);
     return static_cast<jlong>(temp_size);
   }
@@ -181,6 +188,7 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4DecompressAsync(JNIEnv* env,
                                                   temp_ptr,
                                                   temp_size,
                                                   uncompressed_ptrs,
+                                                  nvcompBatchedLZ4DecompressDefaultOpts,
                                                   uncompressed_statuses.data(),
                                                   stream);
     check_nvcomp_status(env, status);
@@ -218,15 +226,16 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4GetDecompressSizeAsync(JNIEnv* en
 
 // methods for zstd
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdCompressGetTempSize(
-  JNIEnv* env, jclass, jlong j_batch_size, jlong j_max_chunk_size)
+  JNIEnv* env, jclass, jlong j_batch_size, jlong j_max_chunk_size, jlong j_max_total_size)
 {
   try {
     cudf::jni::auto_set_device(env);
     auto batch_size       = static_cast<std::size_t>(j_batch_size);
     auto max_chunk_size   = static_cast<std::size_t>(j_max_chunk_size);
+    auto total_size       = static_cast<std::size_t>(j_max_total_size);
     std::size_t temp_size = 0;
-    auto status           = nvcompBatchedZstdCompressGetTempSize(
-      batch_size, max_chunk_size, nvcompBatchedZstdDefaultOpts, &temp_size);
+    auto status           = nvcompBatchedZstdCompressGetTempSizeAsync(
+      batch_size, max_chunk_size, nvcompBatchedZstdCompressDefaultOpts, &temp_size, total_size);
     check_nvcomp_status(env, status);
     return static_cast<jlong>(temp_size);
   }
@@ -242,7 +251,7 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdCompressGetMaxOutputChunkSize(
     auto max_chunk_size         = static_cast<std::size_t>(j_max_chunk_size);
     std::size_t max_output_size = 0;
     auto status                 = nvcompBatchedZstdCompressGetMaxOutputChunkSize(
-      max_chunk_size, nvcompBatchedZstdDefaultOpts, &max_output_size);
+      max_chunk_size, nvcompBatchedZstdCompressDefaultOpts, &max_output_size);
     check_nvcomp_status(env, status);
     return static_cast<jlong>(max_output_size);
   }
@@ -273,7 +282,10 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdCompressAsync(JNIEnv* env,
     auto out_ptrs             = reinterpret_cast<void* const*>(j_out_ptrs);
     auto compressed_out_sizes = reinterpret_cast<std::size_t*>(j_compressed_sizes_out_ptr);
     auto stream               = reinterpret_cast<cudaStream_t>(j_stream);
-    auto status               = nvcompBatchedZstdCompressAsync(in_ptrs,
+    // FIXME how to use these statuses ? They are not used either in the corresponding
+    // decompressor.
+    auto comp_statuses = rmm::device_uvector<nvcompStatus_t>(batch_size, stream);
+    auto status        = nvcompBatchedZstdCompressAsync(in_ptrs,
                                                  in_sizes,
                                                  chunk_size,
                                                  batch_size,
@@ -281,7 +293,8 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdCompressAsync(JNIEnv* env,
                                                  temp_size,
                                                  out_ptrs,
                                                  compressed_out_sizes,
-                                                 nvcompBatchedZstdDefaultOpts,
+                                                 nvcompBatchedZstdCompressDefaultOpts,
+                                                 comp_statuses.data(),
                                                  stream);
     check_nvcomp_status(env, status);
   }
@@ -289,14 +302,16 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdCompressAsync(JNIEnv* env,
 }
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdDecompressGetTempSize(
-  JNIEnv* env, jclass, jlong j_batch_size, jlong j_chunk_size)
+  JNIEnv* env, jclass, jlong j_batch_size, jlong j_chunk_size, jlong j_max_total_size)
 {
   try {
     cudf::jni::auto_set_device(env);
     auto batch_size       = static_cast<std::size_t>(j_batch_size);
     auto chunk_size       = static_cast<std::size_t>(j_chunk_size);
+    auto total_size       = static_cast<std::size_t>(j_max_total_size);
     std::size_t temp_size = 0;
-    auto status = nvcompBatchedZstdDecompressGetTempSize(batch_size, chunk_size, &temp_size);
+    auto status           = nvcompBatchedZstdDecompressGetTempSizeAsync(
+      batch_size, chunk_size, nvcompBatchedZstdDecompressDefaultOpts, &temp_size, total_size);
     check_nvcomp_status(env, status);
     return static_cast<jlong>(temp_size);
   }
@@ -335,6 +350,7 @@ Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedZstdDecompressAsync(JNIEnv* env,
                                                    temp_ptr,
                                                    temp_size,
                                                    uncompressed_ptrs,
+                                                   nvcompBatchedZstdDecompressDefaultOpts,
                                                    uncompressed_statuses.data(),
                                                    stream);
     check_nvcomp_status(env, status);

From f91e146a4442e44c4661830d08db229ca60c2fd4 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 20 Aug 2025 00:34:34 -0400
Subject: [PATCH 173/366] Suppress NVRTC warning from stdint.h (#19712)

Fixes a warning from jitify compile with NVRTC when including C++ stdlib stdint.h or cstdint.
```
cuda/std/cstdint(105): warning #47-D: incompatible redefinition of macro "INTPTR_MIN" (declared at line 16 of stdint.h)
  #  define INTPTR_MIN  INT64_MIN
            ^

cuda/std/cstdint(106): warning #47-D: incompatible redefinition of macro "INTPTR_MAX" (declared at line 18 of stdint.h)
  #  define INTPTR_MAX  INT64_MAX
            ^

cuda/std/cstdint(107): warning #47-D: incompatible redefinition of macro "UINTPTR_MAX" (declared at line 20 of stdint.h)
  #  define UINTPTR_MAX UINT64_MAX
            ^

cuda/std/cstdint(109): warning #47-D: incompatible redefinition of macro "INTMAX_MIN" (declared at line 17 of stdint.h)
  #  define INTMAX_MIN  INT64_MIN
            ^

cuda/std/cstdint(110): warning #47-D: incompatible redefinition of macro "INTMAX_MAX" (declared at line 19 of stdint.h)
  #  define INTMAX_MAX  INT64_MAX
            ^

cuda/std/cstdint(111): warning #47-D: incompatible redefinition of macro "UINTMAX_MAX" (declared at line 21 of stdint.h)
  #  define UINTMAX_MAX UINT64_MAX
            ^

cuda/std/cstdint(113): warning #47-D: incompatible redefinition of macro "PTRDIFF_MIN" (declared at line 22 of stdint.h)
  #  define PTRDIFF_MIN INT64_MIN
            ^

cuda/std/cstdint(114): warning #47-D: incompatible redefinition of macro "PTRDIFF_MAX" (declared at line 23 of stdint.h)
  #  define PTRDIFF_MAX INT64_MAX
            ^


D22-125 [1265+2+10=1276] Custom command to JIT-compile files.
cuda/std/cstdint(105): warning #47-D: incompatible redefinition of macro "INTPTR_MIN" (declared at line 16 of stdint.h)
  #  define INTPTR_MIN  INT64_MIN
            ^

cuda/std/cstdint(106): warning #47-D: incompatible redefinition of macro "INTPTR_MAX" (declared at line 18 of stdint.h)
  #  define INTPTR_MAX  INT64_MAX
            ^

cuda/std/cstdint(107): warning #47-D: incompatible redefinition of macro "UINTPTR_MAX" (declared at line 20 of stdint.h)
  #  define UINTPTR_MAX UINT64_MAX
            ^

cuda/std/cstdint(109): warning #47-D: incompatible redefinition of macro "INTMAX_MIN" (declared at line 17 of stdint.h)
  #  define INTMAX_MIN  INT64_MIN
            ^

cuda/std/cstdint(110): warning #47-D: incompatible redefinition of macro "INTMAX_MAX" (declared at line 19 of stdint.h)
  #  define INTMAX_MAX  INT64_MAX
            ^

cuda/std/cstdint(111): warning #47-D: incompatible redefinition of macro "UINTMAX_MAX" (declared at line 21 of stdint.h)
  #  define UINTMAX_MAX UINT64_MAX
            ^

cuda/std/cstdint(113): warning #47-D: incompatible redefinition of macro "PTRDIFF_MIN" (declared at line 22 of stdint.h)
  #  define PTRDIFF_MIN INT64_MIN
            ^

cuda/std/cstdint(114): warning #47-D: incompatible redefinition of macro "PTRDIFF_MAX" (declared at line 23 of stdint.h)
  #  define PTRDIFF_MAX INT64_MAX
            ^


D22-125 [1265+1+11=1276] Custom command to JIT-compile files.
cuda/std/cstdint(105): warning #47-D: incompatible redefinition of macro "INTPTR_MIN" (declared at line 16 of stdint.h)
  #  define INTPTR_MIN  INT64_MIN
            ^

cuda/std/cstdint(106): warning #47-D: incompatible redefinition of macro "INTPTR_MAX" (declared at line 18 of stdint.h)
  #  define INTPTR_MAX  INT64_MAX
            ^

cuda/std/cstdint(107): warning #47-D: incompatible redefinition of macro "UINTPTR_MAX" (declared at line 20 of stdint.h)
  #  define UINTPTR_MAX UINT64_MAX
            ^

cuda/std/cstdint(109): warning #47-D: incompatible redefinition of macro "INTMAX_MIN" (declared at line 17 of stdint.h)
  #  define INTMAX_MIN  INT64_MIN
            ^

cuda/std/cstdint(110): warning #47-D: incompatible redefinition of macro "INTMAX_MAX" (declared at line 19 of stdint.h)
  #  define INTMAX_MAX  INT64_MAX
            ^

cuda/std/cstdint(111): warning #47-D: incompatible redefinition of macro "UINTMAX_MAX" (declared at line 21 of stdint.h)
  #  define UINTMAX_MAX UINT64_MAX
            ^

cuda/std/cstdint(113): warning #47-D: incompatible redefinition of macro "PTRDIFF_MIN" (declared at line 22 of stdint.h)
  #  define PTRDIFF_MIN INT64_MIN
            ^

cuda/std/cstdint(114): warning #47-D: incompatible redefinition of macro "PTRDIFF_MAX" (declared at line 23 of stdint.h)
  #  define PTRDIFF_MAX INT64_MAX
            ^

```

This suppresses the warning by adding the compile flag `'diag-suppress=47` to the `JitifyPreprocessKernels.cmake`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19712
---
 cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index 7c27920ee28..1e363a3e55b 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -48,7 +48,7 @@ function(jit_preprocess_files)
         $<TARGET_FILE:jitify_preprocess> ${ARG_FILE} -o ${ARG_OUTPUT_DIR} -i -std=c++20
         -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -DCUDF_RUNTIME_JIT
         -I${CUDF_SOURCE_DIR}/include -I${CUDF_SOURCE_DIR}/src ${includes}
-        --no-preinclude-workarounds --no-replace-pragma-once
+        --no-preinclude-workarounds --no-replace-pragma-once --diag-suppress=47
       COMMENT "Custom command to JIT-compile files."
     )
   endforeach()

From 965b4a10416645d12ac8927bd8e35b05d5973c35 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Wed, 20 Aug 2025 01:34:28 -0500
Subject: [PATCH 174/366] Optionally print shuffle stats in pdsh benchmarks
 (#19719)

This adds an option to the pdsh benchmarks to control whether the rapidsmpf Shuffler statistics are printed upon completion. Previously this was tied to the `rapidsmpf-oom-protection` flag, but these are two different things.

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: https://github.com/rapidsai/cudf/pull/19719
---
 .../cudf_polars/experimental/benchmarks/utils.py         | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index c85fbf8319e..8074488024c 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -413,7 +413,8 @@ def initialize_dask_cluster(run_config: RunConfig, args: argparse.Namespace):  #
                 options=Options(
                     {
                         "dask_spill_device": str(run_config.spill_device),
-                        "dask_statistics": str(args.rapidsmpf_oom_protection),
+                        "dask_statistics": str(args.rapidsmpf_dask_statistics),
+                        "oom_protection": str(args.rapidsmpf_oom_protection),
                     }
                 ),
             )
@@ -625,6 +626,12 @@ def parse_args(
         default=False,
         help="Use rapidsmpf CUDA managed memory-based OOM protection.",
     )
+    parser.add_argument(
+        "--rapidsmpf-dask-statistics",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Print rapidsmpf shuffle statistics on each Dask worker upon completion.",
+    )
     parser.add_argument(
         "--rapidsmpf-spill",
         action=argparse.BooleanOptionalAction,

From da2a13d77744cb77d0ebcbc102498d7fd2dda579 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Wed, 20 Aug 2025 10:22:55 -0500
Subject: [PATCH 175/366] Remove unreachable code in rapidsmpf shuffle (#19704)

https://github.com/rapidsai/rapidsmpf/pull/417 changed rapidsmpf's worker initialization so that workers are never in a partially initialized state, so we can safely remove the code blocks guarding against that.

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: https://github.com/rapidsai/cudf/pull/19704
---
 python/cudf_polars/cudf_polars/experimental/shuffle.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py
index 9a4e63e4b76..3571ee67e7a 100644
--- a/python/cudf_polars/cudf_polars/experimental/shuffle.py
+++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py
@@ -67,11 +67,6 @@ def insert_partition(
 
         context = get_worker_context()
 
-        if context.br is None:  # pragma: no cover
-            raise ValueError(
-                "rapidsmpf insert_partition called on an uninitialized worker."
-            )
-
         on = options["on"]
         assert not other, f"Unexpected arguments: {other}"
         columns_to_hash = tuple(df.column_names.index(val) for val in on)
@@ -104,10 +99,6 @@ def extract_partition(
             from rapidsmpf.integrations.single import get_worker_context
 
         context = get_worker_context()
-        if context.br is None:  # pragma: no cover
-            raise ValueError(
-                "rapidsmpf extract_partition called on an uninitialized worker."
-            )
 
         shuffler.wait_on(partition_id)
         column_names = options["column_names"]

From b33b7946aa150d51a67af799af94548cdbcb6a86 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 20 Aug 2025 09:06:40 -0700
Subject: [PATCH 176/366] Add streams to all scalar factories (#19729)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19729
---
 .../libcudf/scalar/scalar_factories.pxd       |  24 +-
 .../pylibcudf/nvtext/byte_pair_encode.pyx     |   8 +-
 python/pylibcudf/pylibcudf/nvtext/replace.pyx |  15 +-
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyx   |  17 +-
 python/pylibcudf/pylibcudf/scalar.pxd         |   5 +-
 python/pylibcudf/pylibcudf/scalar.pyi         |  21 +-
 python/pylibcudf/pylibcudf/scalar.pyx         | 213 +++++++++++-------
 .../pylibcudf/strings/capitalize.pyx          |   8 +-
 .../pylibcudf/pylibcudf/strings/combine.pyx   |   9 +-
 .../pylibcudf/pylibcudf/strings/contains.pyx  |   9 +-
 .../strings/convert/convert_lists.pyx         |   8 +-
 .../pylibcudf/pylibcudf/strings/replace.pyx   |   8 +-
 .../pylibcudf/strings/replace_re.pyx          |   8 +-
 python/pylibcudf/pylibcudf/strings/slice.pyx  |   9 +-
 .../pylibcudf/strings/split/partition.pyx     |  12 +-
 python/pylibcudf/pylibcudf/strings/strip.pyx  |   8 +-
 16 files changed, 263 insertions(+), 119 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
index 70c9067288e..f29ed15b4dd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd
@@ -8,30 +8,40 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type
 from pylibcudf.libcudf.types cimport int128 as int128_t
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil:
     cdef unique_ptr[scalar] make_string_scalar(
-        const string & _string
+        const string & _string,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_fixed_width_scalar[T](
-        T value
+        T value,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_fixed_point_scalar[T](
         int128_t value,
         scale_type scale,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_numeric_scalar(
-        data_type type_
+        data_type type_,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_timestamp_scalar(
-        data_type type_
+        data_type type_,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_empty_scalar_like(
-        const column_view &
+        const column_view &,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_duration_scalar(
-        data_type type_
+        data_type type_,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
     cdef unique_ptr[scalar] make_default_constructed_scalar(
-        data_type type_
+        data_type type_,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
index 7565b21084f..ed962f76dc3 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -15,6 +15,8 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
     make_string_scalar as cpp_make_string_scalar,
 )
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["BPEMergePairs", "byte_pair_encoding"]
 
@@ -55,10 +57,12 @@ cpdef Column byte_pair_encoding(
         An encoded column of strings.
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if separator is None:
+        stream = _get_stream(None)
         separator = Scalar.from_libcudf(
-            cpp_make_string_scalar(" ".encode())
+            cpp_make_string_scalar(" ".encode(), stream.view())
         )
 
     with nogil:
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
index a27592fb434..9464ef7324f 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -15,6 +15,8 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["filter_tokens", "replace_tokens"]
 
@@ -47,9 +49,11 @@ cpdef Column replace_tokens(
         New strings column with replaced strings
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
     if delimiter is None:
+        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
     with nogil:
         c_result = cpp_replace_tokens(
@@ -90,13 +94,16 @@ cpdef Column filter_tokens(
         New strings column of filtered strings
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
     if delimiter is None:
+        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
     if replacement is None:
+        stream = _get_stream(None)
         replacement = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
index 43d426489b4..1d49c828df7 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -19,6 +19,9 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
     make_string_scalar as cpp_make_string_scalar,
 )
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "TokenizeVocabulary",
@@ -63,10 +66,12 @@ cpdef Column tokenize_scalar(Column input, Scalar delimiter=None):
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if delimiter is None:
+        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
@@ -126,10 +131,12 @@ cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None):
         New column of token counts
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if delimiter is None:
+        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
@@ -218,10 +225,12 @@ cpdef Column detokenize(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if separator is None:
+        stream = _get_stream(None)
         separator = Scalar.from_libcudf(
-            cpp_make_string_scalar(" ".encode())
+            cpp_make_string_scalar(" ".encode(), stream.view())
         )
 
     with nogil:
diff --git a/python/pylibcudf/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd
index a273647c98d..34b8c060377 100644
--- a/python/pylibcudf/pylibcudf/scalar.pxd
+++ b/python/pylibcudf/pylibcudf/scalar.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .types cimport DataType
@@ -25,7 +26,7 @@ cdef class Scalar:
     cpdef bool is_valid(self)
 
     @staticmethod
-    cdef Scalar empty_like(Column column)
+    cdef Scalar empty_like(Column column, Stream stream=*)
 
     @staticmethod
     cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*)
diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi
index 09c2e1c584c..be84726ef18 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyi
+++ b/python/pylibcudf/pylibcudf/scalar.pyi
@@ -2,6 +2,8 @@
 
 from typing import Any
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
@@ -12,11 +14,22 @@ class Scalar:
     def type(self) -> DataType: ...
     def is_valid(self) -> bool: ...
     @staticmethod
-    def empty_like(column: Column) -> Scalar: ...
+    def empty_like(column: Column, stream: Stream | None = None) -> Scalar: ...
     @staticmethod
-    def from_arrow(pa_val: Any, dtype: DataType | None = None) -> Scalar: ...
+    def from_arrow(
+        pa_val: Any,
+        dtype: DataType | None = None,
+        stream: Stream | None = None,
+    ) -> Scalar: ...
     @classmethod
-    def from_py(cls, py_val: Any, dtype: DataType | None = None) -> Scalar: ...
+    def from_py(
+        cls,
+        py_val: Any,
+        dtype: DataType | None = None,
+        stream: Stream | None = None,
+    ) -> Scalar: ...
     @classmethod
-    def from_numpy(cls, np_val: NpGeneric) -> Scalar: ...
+    def from_numpy(
+        cls, np_val: NpGeneric, stream: Stream | None = None
+    ) -> Scalar: ...
     def to_py(self) -> None | int | float | str | bool: ...
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 0d533c960a4..57ea17d2921 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -52,10 +52,12 @@ from pylibcudf.libcudf.wrappers.timestamps cimport (
 )
 
 from rmm.pylibrmm.memory_resource cimport get_current_device_resource
+from rmm.pylibrmm.stream cimport Stream
 
 from .column cimport Column
 from .traits cimport is_floating_point
 from .types cimport DataType
+from .utils cimport _get_stream
 from functools import singledispatch
 
 try:
@@ -149,7 +151,11 @@ cdef class Scalar:
         return self.get().is_valid()
 
     @staticmethod
-    def from_arrow(pa_val, dtype: DataType | None = None) -> Scalar:
+    def from_arrow(
+        pa_val,
+        dtype: DataType | None = None,
+        stream: Stream | None = None
+    ) -> Scalar:
         """
         Convert a pyarrow scalar to a pylibcudf.Scalar.
 
@@ -160,28 +166,35 @@ cdef class Scalar:
         dtype: DataType | None
             The datatype to cast the value to. If None,
             the type is inferred from the pyarrow scalar.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
         Scalar
             New pylibcudf.Scalar
         """
-        return _from_arrow(pa_val, dtype)
+        return _from_arrow(pa_val, dtype, stream)
 
     @staticmethod
-    cdef Scalar empty_like(Column column):
+    cdef Scalar empty_like(Column column, Stream stream=None):
         """Construct a null scalar with the same type as column.
 
         Parameters
         ----------
         column
             Column to take type from
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
         New empty (null) scalar of the given type.
         """
-        return Scalar.from_libcudf(move(make_empty_scalar_like(column.view())))
+        stream = _get_stream(stream)
+        return Scalar.from_libcudf(
+            move(make_empty_scalar_like(column.view(), stream.view()))
+        )
 
     @staticmethod
     cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=None):
@@ -197,7 +210,12 @@ cdef class Scalar:
         return s
 
     @classmethod
-    def from_py(cls, py_val, dtype: DataType | None = None):
+    def from_py(
+        cls,
+        py_val,
+        dtype: DataType | None = None,
+        stream: Stream | None = None
+    ):
         """
         Convert a Python standard library object to a Scalar.
 
@@ -208,16 +226,18 @@ cdef class Scalar:
         dtype: DataType | None
             The datatype to cast the value to. If None,
             the type is inferred from `py_val`.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
         Scalar
             New pylibcudf.Scalar
         """
-        return _from_py(py_val, dtype)
+        return _from_py(py_val, dtype, stream)
 
     @classmethod
-    def from_numpy(cls, np_val):
+    def from_numpy(cls, np_val, stream: Stream | None = None):
         """
         Convert a NumPy scalar to a Scalar.
 
@@ -225,13 +245,15 @@ cdef class Scalar:
         ----------
         np_val: numpy.generic
             Value to convert to a pylibcudf.Scalar
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
         Scalar
             New pylibcudf.Scalar
         """
-        return _from_numpy(np_val)
+        return _from_numpy(np_val, stream)
 
     def to_py(self):
         """
@@ -292,31 +314,35 @@ cdef Scalar _new_scalar(unique_ptr[scalar] c_obj, DataType dtype):
 
 
 @singledispatch
-def _from_py(py_val, dtype: DataType | None):
+def _from_py(py_val, dtype: DataType | None, stream: Stream | None):
     raise TypeError(f"{type(py_val).__name__} cannot be converted to pylibcudf.Scalar")
 
 
 @_from_py.register(type(None))
-def _(py_val, dtype: DataType | None):
+def _(py_val, dtype: DataType | None, stream: Stream | None):
     cdef DataType c_dtype
     if dtype is None:
         raise ValueError("Must specify a dtype for a None value.")
     else:
         c_dtype = <DataType>dtype
-    cdef unique_ptr[scalar] c_obj = make_default_constructed_scalar(c_dtype.c_obj)
+    stream = _get_stream(stream)
+    cdef unique_ptr[scalar] c_obj = make_default_constructed_scalar(
+        c_dtype.c_obj,
+        stream.view()
+    )
     return _new_scalar(move(c_obj), dtype)
 
 
 @_from_py.register(dict)
 @_from_py.register(list)
-def _(py_val, dtype: DataType | None):
+def _(py_val, dtype: DataType | None, stream: Stream | None):
     raise NotImplementedError(
         f"Conversion from {type(py_val).__name__} is currently not supported."
     )
 
 
 @_from_py.register(float)
-def _(py_val: float, dtype: DataType | None):
+def _(py_val: float, dtype: DataType | None, stream: Stream | None):
     cdef unique_ptr[scalar] c_obj
     cdef DataType c_dtype
     if dtype is None:
@@ -324,15 +350,16 @@ def _(py_val: float, dtype: DataType | None):
     else:
         c_dtype = <DataType>dtype
 
+    stream = _get_stream(stream)
     cdef type_id tid = c_dtype.id()
 
     if tid == type_id.FLOAT32:
         if abs(py_val) > numeric_limits[float].max():
             raise OverflowError(f"{py_val} out of range for FLOAT32 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[float]*>c_obj.get()).set_value(py_val)
     elif tid == type_id.FLOAT64:
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[double]*>c_obj.get()).set_value(py_val)
     else:
         typ = c_dtype.id()
@@ -342,7 +369,7 @@ def _(py_val: float, dtype: DataType | None):
 
 
 @_from_py.register(int)
-def _(py_val: int, dtype: DataType | None):
+def _(py_val: int, dtype: DataType | None, stream: Stream | None):
     cdef unique_ptr[scalar] c_obj
     cdef DataType c_dtype
     cdef duration_ns c_duration_ns
@@ -353,9 +380,10 @@ def _(py_val: int, dtype: DataType | None):
     if dtype is None:
         c_dtype = dtype = DataType(type_id.INT64)
     elif is_floating_point(dtype):
-        return _from_py(float(py_val), dtype)
+        return _from_py(float(py_val), dtype, stream)
     else:
         c_dtype = <DataType>dtype
+    stream = _get_stream(stream)
     cdef type_id tid = c_dtype.id()
 
     if tid == type_id.INT8:
@@ -363,7 +391,7 @@ def _(py_val: int, dtype: DataType | None):
             numeric_limits[int8_t].min() <= py_val <= numeric_limits[int8_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT8 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[int8_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.INT16:
@@ -371,7 +399,7 @@ def _(py_val: int, dtype: DataType | None):
             numeric_limits[int16_t].min() <= py_val <= numeric_limits[int16_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT16 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[int16_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.INT32:
@@ -379,7 +407,7 @@ def _(py_val: int, dtype: DataType | None):
             numeric_limits[int32_t].min() <= py_val <= numeric_limits[int32_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT32 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[int32_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.INT64:
@@ -387,7 +415,7 @@ def _(py_val: int, dtype: DataType | None):
             numeric_limits[int64_t].min() <= py_val <= numeric_limits[int64_t].max()
         ):
             raise OverflowError(f"{py_val} out of range for INT64 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[int64_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.UINT8:
@@ -395,7 +423,7 @@ def _(py_val: int, dtype: DataType | None):
             raise ValueError("Cannot assign negative value to UINT8 scalar")
         if py_val > numeric_limits[uint8_t].max():
             raise OverflowError(f"{py_val} out of range for UINT8 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[uint8_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.UINT16:
@@ -403,7 +431,7 @@ def _(py_val: int, dtype: DataType | None):
             raise ValueError("Cannot assign negative value to UINT16 scalar")
         if py_val > numeric_limits[uint16_t].max():
             raise OverflowError(f"{py_val} out of range for UINT16 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[uint16_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.UINT32:
@@ -411,7 +439,7 @@ def _(py_val: int, dtype: DataType | None):
             raise ValueError("Cannot assign negative value to UINT32 scalar")
         if py_val > numeric_limits[uint32_t].max():
             raise OverflowError(f"{py_val} out of range for UINT32 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[uint32_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.UINT64:
@@ -419,7 +447,7 @@ def _(py_val: int, dtype: DataType | None):
             raise ValueError("Cannot assign negative value to UINT64 scalar")
         if py_val > numeric_limits[uint64_t].max():
             raise OverflowError(f"{py_val} out of range for UINT64 scalar")
-        c_obj = make_numeric_scalar(c_dtype.c_obj)
+        c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view())
         (<numeric_scalar[uint64_t]*>c_obj.get()).set_value(py_val)
 
     elif tid == type_id.DURATION_NANOSECONDS:
@@ -427,7 +455,7 @@ def _(py_val: int, dtype: DataType | None):
             raise OverflowError(
                 f"{py_val} nanoseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_ns = duration_ns(<int64_t>py_val)
         (<duration_scalar[duration_ns]*>c_obj.get()).set_value(c_duration_ns)
 
@@ -436,7 +464,7 @@ def _(py_val: int, dtype: DataType | None):
             raise OverflowError(
                 f"{py_val} microseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_us = duration_us(<int64_t>py_val)
         (<duration_scalar[duration_us]*>c_obj.get()).set_value(c_duration_us)
 
@@ -445,7 +473,7 @@ def _(py_val: int, dtype: DataType | None):
             raise OverflowError(
                 f"{py_val} milliseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_ms = duration_ms(<int64_t>py_val)
         (<duration_scalar[duration_ms]*>c_obj.get()).set_value(c_duration_ms)
 
@@ -454,7 +482,7 @@ def _(py_val: int, dtype: DataType | None):
             raise OverflowError(
                 f"{py_val} seconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_s = duration_s(<int64_t>py_val)
         (<duration_scalar[duration_s]*>c_obj.get()).set_value(c_duration_s)
 
@@ -463,7 +491,7 @@ def _(py_val: int, dtype: DataType | None):
             raise OverflowError(
                 f"{py_val} days out of range for INT32 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_D = duration_D(<int32_t>py_val)
         (<duration_scalar[duration_D]*>c_obj.get()).set_value(c_duration_D)
 
@@ -475,8 +503,9 @@ def _(py_val: int, dtype: DataType | None):
 
 
 @_from_py.register(py_bool)
-def _(py_val: py_bool, dtype: DataType | None):
+def _(py_val: py_bool, dtype: DataType | None, stream: Stream | None):
     if dtype is None:
+        stream = _get_stream(stream)
         dtype = DataType(type_id.BOOL8)
     elif dtype.id() != type_id.BOOL8:
         tid = (<DataType>dtype).id()
@@ -484,26 +513,32 @@ def _(py_val: py_bool, dtype: DataType | None):
             f"Cannot convert bool to Scalar with dtype {tid.name}"
         )
 
-    cdef unique_ptr[scalar] c_obj = make_numeric_scalar((<DataType>dtype).c_obj)
+    stream = _get_stream(stream)
+    cdef unique_ptr[scalar] c_obj = make_numeric_scalar(
+        (<DataType>dtype).c_obj,
+        stream.view()
+    )
     (<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val)
     return _new_scalar(move(c_obj), dtype)
 
 
 @_from_py.register(str)
-def _(py_val: str, dtype: DataType | None):
+def _(py_val: str, dtype: DataType | None, stream: Stream | None):
     if dtype is None:
+        stream = _get_stream(stream)
         dtype = DataType(type_id.STRING)
     elif dtype.id() != type_id.STRING:
         tid = (<DataType>dtype).id()
         raise TypeError(
             f"Cannot convert str to Scalar with dtype {tid.name}"
         )
-    cdef unique_ptr[scalar] c_obj = make_string_scalar(py_val.encode())
+    stream = _get_stream(stream)
+    cdef unique_ptr[scalar] c_obj = make_string_scalar(py_val.encode(), stream.view())
     return _new_scalar(move(c_obj), dtype)
 
 
 @_from_py.register(datetime.timedelta)
-def _(py_val: datetime.timedelta, dtype: DataType | None):
+def _(py_val: datetime.timedelta, dtype: DataType | None, stream: Stream | None):
     cdef unique_ptr[scalar] c_obj
     cdef duration_us c_duration_us
     cdef duration_ns c_duration_ns
@@ -511,8 +546,10 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
     cdef duration_s c_duration_s
     cdef duration_D c_duration_D
     if dtype is None:
+        stream = _get_stream(stream)
         dtype = DataType(type_id.DURATION_MICROSECONDS)
 
+    stream = _get_stream(stream)
     cdef DataType c_dtype = dtype
     cdef type_id tid = c_dtype.id()
     total_seconds = py_val.total_seconds()
@@ -522,7 +559,7 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
             raise OverflowError(
                 f"{total_nanoseconds} nanoseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_ns = duration_ns(<int64_t>total_nanoseconds)
         (<duration_scalar[duration_ns]*>c_obj.get()).set_value(c_duration_ns)
     elif tid == type_id.DURATION_MICROSECONDS:
@@ -531,7 +568,7 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
             raise OverflowError(
                 f"{total_microseconds} microseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_us = duration_us(<int64_t>total_microseconds)
         (<duration_scalar[duration_us]*>c_obj.get()).set_value(c_duration_us)
     elif tid == type_id.DURATION_MILLISECONDS:
@@ -540,7 +577,7 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
             raise OverflowError(
                 f"{total_milliseconds} milliseconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_ms = duration_ms(<int64_t>total_milliseconds)
         (<duration_scalar[duration_ms]*>c_obj.get()).set_value(c_duration_ms)
     elif tid == type_id.DURATION_SECONDS:
@@ -549,7 +586,7 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
             raise OverflowError(
                 f"{total_seconds} seconds out of range for INT64 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_s = duration_s(<int64_t>total_seconds)
         (<duration_scalar[duration_s]*>c_obj.get()).set_value(c_duration_s)
     elif tid == type_id.DURATION_DAYS:
@@ -558,7 +595,7 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
             raise OverflowError(
                 f"{total_days} days out of range for INT32 limit."
             )
-        c_obj = make_duration_scalar(c_dtype.c_obj)
+        c_obj = make_duration_scalar(c_dtype.c_obj, stream.view())
         c_duration_D = duration_D(<int32_t>total_days)
         (<duration_scalar[duration_D]*>c_obj.get()).set_value(c_duration_D)
     else:
@@ -568,7 +605,7 @@ def _(py_val: datetime.timedelta, dtype: DataType | None):
 
 
 @_from_py.register(datetime.date)
-def _(py_val: datetime.date, dtype: DataType | None):
+def _(py_val: datetime.date, dtype: DataType | None, stream: Stream | None):
     cdef unique_ptr[scalar] c_obj
     cdef duration_us c_duration_us
     cdef duration_ns c_duration_ns
@@ -581,8 +618,10 @@ def _(py_val: datetime.date, dtype: DataType | None):
     cdef timestamp_ns c_timestamp_ns
     cdef timestamp_D c_timestamp_D
     if dtype is None:
+        stream = _get_stream(stream)
         dtype = DataType(type_id.TIMESTAMP_MICROSECONDS)
 
+    stream = _get_stream(stream)
     cdef DataType c_dtype = dtype
     cdef type_id tid = c_dtype.id()
     if isinstance(py_val, datetime.datetime):
@@ -595,7 +634,7 @@ def _(py_val: datetime.date, dtype: DataType | None):
             raise OverflowError(
                 f"{epoch_nanoseconds} nanoseconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj)
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view())
         c_duration_ns = duration_ns(<int64_t>epoch_nanoseconds)
         c_timestamp_ns = timestamp_ns(c_duration_ns)
         (<timestamp_scalar[timestamp_ns]*>c_obj.get()).set_value(c_timestamp_ns)
@@ -605,7 +644,7 @@ def _(py_val: datetime.date, dtype: DataType | None):
             raise OverflowError(
                 f"{epoch_microseconds} microseconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj)
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view())
         c_duration_us = duration_us(<int64_t>epoch_microseconds)
         c_timestamp_us = timestamp_us(c_duration_us)
         (<timestamp_scalar[timestamp_us]*>c_obj.get()).set_value(c_timestamp_us)
@@ -615,7 +654,7 @@ def _(py_val: datetime.date, dtype: DataType | None):
             raise OverflowError(
                 f"{epoch_milliseconds} milliseconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj)
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view())
         c_duration_ms = duration_ms(<int64_t>epoch_milliseconds)
         c_timestamp_ms = timestamp_ms(c_duration_ms)
         (<timestamp_scalar[timestamp_ms]*>c_obj.get()).set_value(c_timestamp_ms)
@@ -625,7 +664,7 @@ def _(py_val: datetime.date, dtype: DataType | None):
             raise OverflowError(
                 f"{epoch_seconds} seconds out of range for INT64 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj)
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view())
         c_duration_s = duration_s(<int64_t>epoch_seconds)
         c_timestamp_s = timestamp_s(c_duration_s)
         (<timestamp_scalar[timestamp_s]*>c_obj.get()).set_value(c_timestamp_s)
@@ -635,7 +674,7 @@ def _(py_val: datetime.date, dtype: DataType | None):
             raise OverflowError(
                 f"{epoch_days} days out of range for INT32 limit."
             )
-        c_obj = make_timestamp_scalar(c_dtype.c_obj)
+        c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view())
         c_duration_D = duration_D(<int32_t>epoch_days)
         c_timestamp_D = timestamp_D(c_duration_D)
         (<timestamp_scalar[timestamp_D]*>c_obj.get()).set_value(c_timestamp_D)
@@ -646,7 +685,7 @@ def _(py_val: datetime.date, dtype: DataType | None):
 
 
 @_from_py.register(decimal.Decimal)
-def _(py_val: decimal.Decimal, dtype: DataType | None):
+def _(py_val: decimal.Decimal, dtype: DataType | None, stream: Stream | None):
     scale = -py_val.as_tuple().exponent
     as_int = int(py_val.scaleb(scale))
 
@@ -657,15 +696,17 @@ def _(py_val: decimal.Decimal, dtype: DataType | None):
     if dtype.id() != type_id.DECIMAL128:
         raise TypeError("Expected dtype to be DECIMAL128")
 
+    stream = _get_stream(stream)
     cdef unique_ptr[scalar] c_obj = make_fixed_point_scalar[decimal128](
         val,
-        scale_type(<int32_t>scale)
+        scale_type(<int32_t>scale),
+        stream.view()
     )
     return _new_scalar(move(c_obj), dtype)
 
 
 @singledispatch
-def _from_numpy(np_val):
+def _from_numpy(np_val, stream: Stream | None):
     if np_error is not None:
         raise np_error
     raise TypeError(f"{type(np_val).__name__} cannot be converted to pylibcudf.Scalar")
@@ -674,109 +715,129 @@ def _from_numpy(np_val):
 if np is not None:
     @_from_numpy.register(np.datetime64)
     @_from_numpy.register(np.timedelta64)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
         raise NotImplementedError(
             f"{type(np_val).__name__} is currently not supported."
         )
 
     @_from_numpy.register(np.bool_)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
         cdef DataType dtype = DataType(type_id.BOOL8)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        stream = _get_stream(stream)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         cdef cbool c_val = np_val
         (<numeric_scalar[cbool]*>c_obj.get()).set_value(c_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.str_)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
         cdef DataType dtype = DataType(type_id.STRING)
-        cdef unique_ptr[scalar] c_obj = make_string_scalar(np_val.item().encode())
+        stream = _get_stream(stream)
+        cdef unique_ptr[scalar] c_obj = make_string_scalar(
+            np_val.item().encode(),
+            stream.view()
+        )
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int8)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.INT8)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        stream = _get_stream(stream)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[int8_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int16)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.INT16)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[int16_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int32)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.INT32)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[int32_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.int64)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.INT64)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[int64_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint8)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.UINT8)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[uint8_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint16)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.UINT16)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[uint16_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint32)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.UINT32)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[uint32_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.uint64)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.UINT64)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[uint64_t]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.float32)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.FLOAT32)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[float]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
     @_from_numpy.register(np.float64)
-    def _(np_val):
+    def _(np_val, stream: Stream | None):
+        stream = _get_stream(stream)
         dtype = DataType(type_id.FLOAT64)
-        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
+        cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj, stream.view())
         (<numeric_scalar[double]*>c_obj.get()).set_value(np_val)
         cdef Scalar slr = _new_scalar(move(c_obj), dtype)
         return slr
 
 
-def _from_arrow(obj: pa.Scalar, dtype: DataType | None = None) -> Scalar:
+def _from_arrow(
+    obj: pa.Scalar,
+    dtype: DataType | None = None,
+    stream: Stream | None = None
+) -> Scalar:
     if pa_err is not None:
         raise RuntimeError(
             "pyarrow was not found on your system. Please "
@@ -790,4 +851,4 @@ def _from_arrow(obj: pa.Scalar, dtype: DataType | None = None) -> Scalar:
         pa_array = pa.array([None], type=obj.type)
     else:
         pa_array = pa.array([obj])
-    return Column.from_arrow(pa_array, dtype=dtype).to_scalar()
+    return Column.from_arrow(pa_array, dtype=dtype, stream=stream).to_scalar()
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
index a54480b8e4a..5297696145b 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -11,6 +11,8 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 from pylibcudf.libcudf.strings cimport capitalize as cpp_capitalize
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.char_types cimport string_character_types
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 
@@ -39,10 +41,12 @@ cpdef Column capitalize(
         Column of strings capitalized from the input column
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if delimiters is None:
+        stream = _get_stream(None)
         delimiters = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     cdef const string_scalar* cpp_delimiters = <const string_scalar*>(
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx
index da78c81c0c0..87650c3bcbd 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyx
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyx
@@ -10,6 +10,8 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 from pylibcudf.libcudf.strings cimport combine as cpp_combine
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.table cimport Table
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 from pylibcudf.libcudf.strings.combine import \
@@ -62,10 +64,12 @@ cpdef Column concatenate(
     cdef unique_ptr[column] c_result
     cdef const string_scalar* c_col_narep
     cdef const string_scalar* c_separator
+    cdef Stream stream
 
     if narep is None:
+        stream = _get_stream(None)
         narep = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
     cdef const string_scalar* c_narep = <const string_scalar*>(
         narep.c_obj.get()
@@ -73,8 +77,9 @@ cpdef Column concatenate(
 
     if ColumnOrScalar is Column:
         if col_narep is None:
+            stream = _get_stream(None)
             col_narep = Scalar.from_libcudf(
-                cpp_make_string_scalar("".encode())
+                cpp_make_string_scalar("".encode(), stream.view())
             )
         c_col_narep = <const string_scalar*>(
             col_narep.c_obj.get()
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx
index 7b4c53ed853..7773520d7b3 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pyx
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from cython.operator import dereference
@@ -11,6 +11,9 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 )
 from pylibcudf.libcudf.strings cimport contains as cpp_contains
 from pylibcudf.strings.regex_program cimport RegexProgram
+from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["contains_re", "count_re", "like", "matches_re"]
 
@@ -137,10 +140,12 @@ cpdef Column like(Column input, ColumnOrScalar pattern, Scalar escape_character=
         New column of boolean results for each string
     """
     cdef unique_ptr[column] result
+    cdef Stream stream
 
     if escape_character is None:
+        stream = _get_stream(None)
         escape_character = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     cdef const string_scalar* c_escape_character = <const string_scalar*>(
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
index 518f72f6644..e2abe69e519 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -14,6 +14,8 @@ from pylibcudf.libcudf.strings.convert cimport (
 )
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.types cimport type_id
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 
@@ -48,10 +50,12 @@ cpdef Column format_list_column(
         New strings column
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if na_rep is None:
+        stream = _get_stream(None)
         na_rep = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     cdef const string_scalar* c_na_rep = <const string_scalar*>(
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx
index 3ba6c1b5530..f02dd1c2cdb 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -15,6 +15,8 @@ from pylibcudf.libcudf.strings.replace cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["replace", "replace_multiple", "replace_slice"]
 
@@ -144,10 +146,12 @@ cpdef Column replace_slice(
         New string column
     """
     cdef unique_ptr[column] c_result
+    cdef Stream stream
 
     if repl is None:
+        stream = _get_stream(None)
         repl = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     cdef const string_scalar* scalar_str = <string_scalar*>(repl.c_obj.get())
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
index bdabc779ddf..fd4df37dbac 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -15,6 +15,8 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_flags cimport regex_flags
 from pylibcudf.strings.regex_program cimport RegexProgram
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["replace_re", "replace_with_backrefs"]
 
@@ -58,11 +60,13 @@ cpdef Column replace_re(
     """
     cdef unique_ptr[column] c_result
     cdef vector[string] c_patterns
+    cdef Stream stream
 
     if Patterns is RegexProgram and Replacement is Scalar:
         if replacement is None:
+            stream = _get_stream(None)
             replacement = Scalar.from_libcudf(
-                cpp_make_string_scalar("".encode())
+                cpp_make_string_scalar("".encode(), stream.view())
             )
         with nogil:
             c_result = move(
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx
index bf09d3963ff..eb6ae8505eb 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pyx
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyx
@@ -81,16 +81,19 @@ cpdef Column slice_strings(
 
     elif ColumnOrScalar is Scalar:
         if start is None:
+            stream = _get_stream(None)
             start = Scalar.from_libcudf(
-                cpp_make_fixed_width_scalar(0)
+                cpp_make_fixed_width_scalar(0, stream.view())
             )
         if stop is None:
+            stream = _get_stream(None)
             stop = Scalar.from_libcudf(
-                cpp_make_fixed_width_scalar(0)
+                cpp_make_fixed_width_scalar(0, stream.view())
             )
         if step is None:
+            stream = _get_stream(None)
             step = Scalar.from_libcudf(
-                cpp_make_fixed_width_scalar(1)
+                cpp_make_fixed_width_scalar(1, stream.view())
             )
 
         cpp_start = <numeric_scalar[size_type]*>start.c_obj.get()
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
index 75537ea46d3..36337e82102 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
@@ -10,6 +10,8 @@ from pylibcudf.libcudf.strings.split cimport partition as cpp_partition
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.table cimport Table
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 
@@ -39,10 +41,12 @@ cpdef Table partition(Column input, Scalar delimiter=None):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
+    cdef Stream stream
 
     if delimiter is None:
+        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
@@ -77,10 +81,12 @@ cpdef Table rpartition(Column input, Scalar delimiter=None):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
+    cdef Stream stream
 
     if delimiter is None:
+        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx
index 805d959891b..054bed6cd3c 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pyx
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -12,6 +12,8 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 from pylibcudf.libcudf.strings cimport strip as cpp_strip
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.side_type cimport side_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["strip"]
 
@@ -41,10 +43,12 @@ cpdef Column strip(
     pylibcudf.Column
         New strings column.
     """
+    cdef Stream stream
 
     if to_strip is None:
+        stream = _get_stream(None)
         to_strip = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     cdef unique_ptr[column] c_result

From c937657d384eced46fb8c75a57aaaca711c3cb4d Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 20 Aug 2025 13:50:48 -0400
Subject: [PATCH 177/366] Split up rolling.cuh into separate headers (#19682)

Splits up the `cpp/src/rolling/detail/rolling.cuh` into multiple headers. Moves the jit/udf code into `rolling_udf.cuh` and the device operators into `rolling_operators.cuh` and adjusts the dependent source files. Also cleans up some unnecessary includes.
No function or behavior has changed--just moving internal code around for maintenance.

Attempt to address https://github.com/rapidsai/cudf/issues/18568

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19682
---
 cpp/src/rolling/detail/rolling.cuh            | 726 +-----------------
 .../rolling/detail/rolling_fixed_window.cu    |   3 +-
 cpp/src/rolling/detail/rolling_operators.cuh  | 682 ++++++++++++++++
 cpp/src/rolling/detail/rolling_udf.cuh        | 118 +++
 cpp/src/rolling/detail/rolling_utils.cu       |   2 +-
 .../rolling/detail/rolling_variable_window.cu |   5 +-
 cpp/src/rolling/grouped_rolling.cu            |  13 +-
 7 files changed, 806 insertions(+), 743 deletions(-)
 create mode 100644 cpp/src/rolling/detail/rolling_operators.cuh
 create mode 100644 cpp/src/rolling/detail/rolling_udf.cuh

diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh
index c32d831a4f1..e3cff58fac3 100644
--- a/cpp/src/rolling/detail/rolling.cuh
+++ b/cpp/src/rolling/detail/rolling.cuh
@@ -16,15 +16,12 @@
 
 #pragma once
 
-#include "jit/cache.hpp"
-#include "jit/parser.hpp"
-#include "jit/util.hpp"
 #include "lead_lag_nested.cuh"
 #include "nth_element.cuh"
 #include "reductions/nested_type_minmax_util.cuh"
 #include "rolling.hpp"
 #include "rolling_collect_list.cuh"
-#include "rolling_jit.hpp"
+#include "rolling_operators.cuh"
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column_device_view.cuh>
@@ -32,20 +29,16 @@
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/aggregation/aggregation.cuh>
 #include <cudf/detail/aggregation/aggregation.hpp>
-#include <cudf/detail/copy.hpp>
 #include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/gather.hpp>
-#include <cudf/detail/groupby/sort_helper.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/device_operators.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/dictionary/dictionary_factories.hpp>
 #include <cudf/lists/detail/stream_compaction.hpp>
 #include <cudf/types.hpp>
-#include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/traits.hpp>
@@ -54,418 +47,12 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <cuda/std/climits>
-#include <cuda/std/limits>
-#include <thrust/count.h>
-#include <thrust/execution_policy.h>
-#include <thrust/find.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/reduce.h>
-
-#include <jit_preprocessed_files/rolling/jit/kernel.cu.jit.hpp>
-
 #include <memory>
 
 namespace cudf {
 
 namespace detail {
 
-/**
- * @brief Operator for applying a generic (non-specialized) rolling aggregation on a single window.
- */
-template <typename InputType, aggregation::Kind op>
-struct DeviceRolling {
-  size_type min_periods;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = op>
-  static constexpr bool is_supported()
-  {
-    return cudf::detail::is_valid_aggregation<T, O>() && has_corresponding_operator<O>() &&
-           // MIN/MAX only supports fixed width types
-           (((O == aggregation::MIN || O == aggregation::MAX) && cudf::is_fixed_width<T>()) ||
-            (O == aggregation::SUM) || (O == aggregation::MEAN));
-  }
-
-  // operations we do support
-  template <typename T = InputType, aggregation::Kind O = op>
-  explicit DeviceRolling(size_type _min_periods, std::enable_if_t<is_supported<T, O>()>* = nullptr)
-    : min_periods(_min_periods)
-  {
-  }
-
-  // operations we don't support
-  template <typename T = InputType, aggregation::Kind O = op>
-  explicit DeviceRolling(size_type _min_periods, std::enable_if_t<!is_supported<T, O>()>* = nullptr)
-    : min_periods(_min_periods)
-  {
-    CUDF_FAIL("Invalid aggregation/type pair");
-  }
-
-  // perform the windowing operation
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index) const
-  {
-    using AggOp = typename corresponding_operator<op>::type;
-    AggOp agg_op;
-
-    cudf::size_type count = 0;
-    OutputType val        = AggOp::template identity<OutputType>();
-
-    for (size_type j = start_index; j < end_index; j++) {
-      if (!has_nulls || input.is_valid(j)) {
-        OutputType element = input.element<device_storage_type_t<InputType>>(j);
-        val                = agg_op(element, val);
-        count++;
-      }
-    }
-
-    bool output_is_valid = (count >= min_periods);
-
-    if (output_is_valid) {
-      // store the output value, one per thread, but only if the
-      // output is valid. min_periods is required to be >= 1, and so
-      // here, count must be nonzero. We need to avoid storing if
-      // count is zero since this could cause UB in some aggregations,
-      // which may cause the compiler to deduce nonsense about the loop
-      // that increments count.
-      cudf::detail::rolling_store_output_functor<OutputType, op == aggregation::MEAN>{}(
-        output.element<OutputType>(current_index), val, count);
-    }
-
-    return output_is_valid;
-  }
-};
-
-/**
- * @brief The base struct used for checking if the combination of input type and aggregation op is
- * supported.
- */
-template <typename InputType, aggregation::Kind op>
-struct DeviceRollingArgMinMaxBase {
-  size_type min_periods;
-  explicit DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {}
-
-  static constexpr bool is_supported()
-  {
-    // Right now only support ARGMIN/ARGMAX of strings and structs.
-    auto const type_supported =
-      std::is_same_v<InputType, cudf::string_view> || std::is_same_v<InputType, cudf::struct_view>;
-    auto const op_supported = op == aggregation::Kind::ARGMIN || op == aggregation::Kind::ARGMAX;
-
-    return type_supported && op_supported;
-  }
-};
-
-/**
- * @brief Operator for applying an ARGMAX/ARGMIN rolling aggregation on a single window for string.
- */
-template <aggregation::Kind op>
-struct DeviceRollingArgMinMaxString : DeviceRollingArgMinMaxBase<cudf::string_view, op> {
-  explicit DeviceRollingArgMinMaxString(size_type _min_periods)
-    : DeviceRollingArgMinMaxBase<cudf::string_view, op>(_min_periods)
-  {
-  }
-  using DeviceRollingArgMinMaxBase<cudf::string_view, op>::min_periods;
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index)
-  {
-    auto constexpr default_output = (op == aggregation::ARGMIN) ? ARGMIN_SENTINEL : ARGMAX_SENTINEL;
-
-    using InputType = cudf::string_view;
-    using AggOp     = typename corresponding_operator<op>::type;
-    AggOp agg_op;
-
-    cudf::size_type count = 0;
-    InputType val         = AggOp::template identity<InputType>();
-    OutputType val_index  = default_output;
-
-    for (size_type j = start_index; j < end_index; j++) {
-      if (!has_nulls || input.is_valid(j)) {
-        InputType element = input.element<InputType>(j);
-        val               = agg_op(element, val);
-        if (val == element) { val_index = j; }
-        count++;
-      }
-    }
-
-    bool output_is_valid = (count >= min_periods);
-    // Use the sentinel value (i.e., -1) for the output will help identify null elements while
-    // gathering for Min and Max.
-    output.element<OutputType>(current_index) = output_is_valid ? val_index : default_output;
-
-    // The gather mask shouldn't contain null values, so
-    // always return zero
-    return true;
-  }
-};
-
-/**
- * @brief Operator for applying an ARGMAX/ARGMIN rolling aggregation on a single window for struct.
- */
-template <aggregation::Kind op, typename Comparator>
-struct DeviceRollingArgMinMaxStruct : DeviceRollingArgMinMaxBase<cudf::struct_view, op> {
-  DeviceRollingArgMinMaxStruct(size_type _min_periods, Comparator const& _comp)
-    : DeviceRollingArgMinMaxBase<cudf::struct_view, op>(_min_periods), comp(_comp)
-  {
-  }
-  using DeviceRollingArgMinMaxBase<cudf::struct_view, op>::min_periods;
-  Comparator comp;
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index)
-  {
-    auto constexpr default_output = (op == aggregation::ARGMIN) ? ARGMIN_SENTINEL : ARGMAX_SENTINEL;
-
-    auto const valid_count =
-      has_nulls ? thrust::count_if(thrust::seq,
-                                   thrust::make_counting_iterator(start_index),
-                                   thrust::make_counting_iterator(end_index),
-                                   [&input](size_type idx) { return input.is_valid_nocheck(idx); })
-                : end_index - start_index;
-
-    // Use the sentinel value (i.e., -1) for the output will help identify null elements while
-    // gathering for Min and Max.
-    output.element<OutputType>(current_index) =
-      (valid_count >= min_periods) ? thrust::reduce(thrust::seq,
-                                                    thrust::make_counting_iterator(start_index),
-                                                    thrust::make_counting_iterator(end_index),
-                                                    size_type{start_index},
-                                                    comp)
-                                   : default_output;
-
-    // The gather mask shouldn't contain null values, so always return true.
-    return true;
-  }
-};
-
-/**
- * @brief Operator for applying a COUNT_VALID rolling aggregation on a single window.
- */
-template <typename InputType>
-struct DeviceRollingCountValid {
-  size_type min_periods;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = aggregation::COUNT_VALID>
-  static constexpr bool is_supported()
-  {
-    return true;
-  }
-
-  DeviceRollingCountValid(size_type _min_periods) : min_periods(_min_periods) {}
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index)
-  {
-    bool output_is_valid = ((end_index - start_index) >= min_periods);
-
-    if (output_is_valid) {
-      cudf::size_type count = 0;
-
-      if (!has_nulls) {
-        count = end_index - start_index;
-      } else {
-        count = thrust::count_if(thrust::seq,
-                                 thrust::make_counting_iterator(start_index),
-                                 thrust::make_counting_iterator(end_index),
-                                 [&input](auto i) { return input.is_valid_nocheck(i); });
-      }
-      output.element<OutputType>(current_index) = count;
-    }
-
-    return output_is_valid;
-  }
-};
-
-/**
- * @brief Operator for applying a COUNT_ALL rolling aggregation on a single window.
- */
-template <typename InputType>
-struct DeviceRollingCountAll {
-  size_type min_periods;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = aggregation::COUNT_ALL>
-  static constexpr bool is_supported()
-  {
-    return true;
-  }
-
-  DeviceRollingCountAll(size_type _min_periods) : min_periods(_min_periods) {}
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const&,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index)
-  {
-    cudf::size_type count = end_index - start_index;
-
-    bool output_is_valid                      = count >= min_periods;
-    output.element<OutputType>(current_index) = count;
-
-    return output_is_valid;
-  }
-};
-
-/**
- * @brief Operator for applying a VAR rolling aggregation on a single window.
- */
-template <typename InputType>
-struct DeviceRollingVariance {
-  size_type const min_periods;
-  size_type const ddof;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = aggregation::VARIANCE>
-  static constexpr bool is_supported()
-  {
-    return is_fixed_width<InputType>() and not is_chrono<InputType>();
-  }
-
-  DeviceRollingVariance(size_type _min_periods, size_type _ddof)
-    : min_periods(_min_periods), ddof{_ddof}
-  {
-  }
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index) const
-  {
-    using DeviceInputType = device_storage_type_t<InputType>;
-
-    // valid counts in the window
-    cudf::size_type const count =
-      has_nulls ? thrust::count_if(thrust::seq,
-                                   thrust::make_counting_iterator(start_index),
-                                   thrust::make_counting_iterator(end_index),
-                                   [&input](auto i) { return input.is_valid_nocheck(i); })
-                : end_index - start_index;
-
-    // Result will be null if any of the following conditions are met:
-    // - All inputs are null
-    // - Number of valid inputs is less than `min_periods`
-    bool output_is_valid = count > 0 and (count >= min_periods);
-
-    if (output_is_valid) {
-      if (count >= ddof) {
-        // Welford algorithm
-        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-        OutputType m{0}, m2{0};
-        size_type running_count{0};
-
-        for (size_type i = start_index; i < end_index; i++) {
-          if (has_nulls and input.is_null_nocheck(i)) { continue; }
-
-          OutputType const x = static_cast<OutputType>(input.element<DeviceInputType>(i));
-
-          running_count++;
-          OutputType const tmp1 = x - m;
-          m += tmp1 / running_count;
-          OutputType const tmp2 = x - m;
-          m2 += tmp1 * tmp2;
-        }
-        if constexpr (is_fixed_point<InputType>()) {
-          // For fixed_point types, the previous computed value used unscaled rep-value,
-          // the final result should be multiplied by the square of decimal `scale`.
-          OutputType scaleby = exp10(static_cast<double>(input.type().scale()));
-          scaleby *= scaleby;
-          output.element<OutputType>(current_index) = m2 / (count - ddof) * scaleby;
-        } else {
-          output.element<OutputType>(current_index) = m2 / (count - ddof);
-        }
-      } else {
-        output.element<OutputType>(current_index) =
-          cuda::std::numeric_limits<OutputType>::signaling_NaN();
-      }
-    }
-
-    return output_is_valid;
-  }
-};
-
-/**
- * @brief Operator for applying a ROW_NUMBER rolling aggregation on a single window.
- */
-template <typename InputType>
-struct DeviceRollingRowNumber {
-  size_type min_periods;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = aggregation::ROW_NUMBER>
-  static constexpr bool is_supported()
-  {
-    return true;
-  }
-
-  DeviceRollingRowNumber(size_type _min_periods) : min_periods(_min_periods) {}
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const&,
-                             column_device_view const&,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type end_index,
-                             size_type current_index)
-  {
-    bool output_is_valid                      = end_index - start_index >= min_periods;
-    output.element<OutputType>(current_index) = current_index - start_index + 1;
-
-    return output_is_valid;
-  }
-};
-
-struct agg_specific_empty_output {
-  template <typename InputType, aggregation::Kind op>
-  std::unique_ptr<column> operator()(column_view const& input, rolling_aggregation const&) const
-  {
-    using target_type = cudf::detail::target_type_t<InputType, op>;
-
-    if constexpr (std::is_same_v<cudf::detail::target_type_t<InputType, op>, void>) {
-      CUDF_FAIL("Unsupported combination of column-type and aggregation.");
-    }
-
-    if constexpr (cudf::is_fixed_width<target_type>()) {
-      return cudf::make_empty_column(type_to_id<target_type>());
-    }
-
-    if constexpr (op == aggregation::COLLECT_LIST) {
-      return cudf::make_lists_column(
-        0, make_empty_column(type_to_id<size_type>()), empty_like(input), 0, {});
-    }
-
-    return empty_like(input);
-  }
-};
-
 static std::unique_ptr<column> empty_output_for_rolling_aggregation(column_view const& input,
                                                                     rolling_aggregation const& agg)
 {
@@ -485,241 +72,6 @@ static std::unique_ptr<column> empty_output_for_rolling_aggregation(column_view
                input.type(), agg.kind, agg_specific_empty_output{}, input, agg);
 }
 
-/**
- * @brief Operator for applying a LEAD rolling aggregation on a single window.
- */
-template <typename InputType>
-struct DeviceRollingLead {
-  size_type row_offset;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = aggregation::LEAD>
-  static constexpr bool is_supported()
-  {
-    return cudf::is_fixed_width<T>();
-  }
-
-  template <typename T = InputType>
-  DeviceRollingLead(size_type _row_offset)
-    requires(is_supported<T>())
-    : row_offset(_row_offset)
-  {
-  }
-
-  template <typename T = InputType>
-  DeviceRollingLead(size_type _row_offset)
-    requires(!is_supported<T>())
-    : row_offset(_row_offset)
-  {
-    CUDF_FAIL("Invalid aggregation/type pair");
-  }
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const& default_outputs,
-                             mutable_column_device_view& output,
-                             size_type,
-                             size_type end_index,
-                             size_type current_index)
-  {
-    // Offsets have already been normalized.
-
-    // Check if row is invalid.
-    if (row_offset > (end_index - current_index - 1)) {
-      // Invalid row marked. Use default value, if available.
-      if (default_outputs.size() == 0 || default_outputs.is_null(current_index)) { return false; }
-
-      output.element<OutputType>(current_index) =
-        default_outputs.element<OutputType>(current_index);
-      return true;
-    }
-
-    // Not an invalid row.
-    auto index   = current_index + row_offset;
-    auto is_null = input.is_null(index);
-    if (!is_null) {
-      output.element<OutputType>(current_index) =
-        input.element<device_storage_type_t<InputType>>(index);
-    }
-    return !is_null;
-  }
-};
-
-/**
- * @brief Operator for applying a LAG rolling aggregation on a single window.
- */
-template <typename InputType>
-struct DeviceRollingLag {
-  size_type row_offset;
-
-  // what operations do we support
-  template <typename T = InputType, aggregation::Kind O = aggregation::LAG>
-  static constexpr bool is_supported()
-  {
-    return cudf::is_fixed_width<T>();
-  }
-
-  template <typename T = InputType>
-  DeviceRollingLag(size_type _row_offset)
-    requires(is_supported<T>())
-    : row_offset(_row_offset)
-  {
-  }
-
-  template <typename T = InputType>
-  DeviceRollingLag(size_type _row_offset)
-    requires(!is_supported<T>())
-    : row_offset(_row_offset)
-  {
-    CUDF_FAIL("Invalid aggregation/type pair");
-  }
-
-  template <typename OutputType, bool has_nulls>
-  bool __device__ operator()(column_device_view const& input,
-                             column_device_view const& default_outputs,
-                             mutable_column_device_view& output,
-                             size_type start_index,
-                             size_type,
-                             size_type current_index)
-  {
-    // Offsets have already been normalized.
-
-    // Check if row is invalid.
-    if (row_offset > (current_index - start_index)) {
-      // Invalid row marked. Use default value, if available.
-      if (default_outputs.size() == 0 || default_outputs.is_null(current_index)) { return false; }
-
-      output.element<OutputType>(current_index) =
-        default_outputs.element<OutputType>(current_index);
-      return true;
-    }
-
-    // Not an invalid row.
-    auto index   = current_index - row_offset;
-    auto is_null = input.is_null(index);
-    if (!is_null) {
-      output.element<OutputType>(current_index) =
-        input.element<device_storage_type_t<InputType>>(index);
-    }
-    return !is_null;
-  }
-};
-
-/**
- * @brief Maps an `InputType and `aggregation::Kind` value to its corresponding
- * rolling window operator.
- *
- * @tparam InputType The input type to map to its corresponding operator
- * @tparam k The `aggregation::Kind` value to map to its corresponding operator
- */
-template <typename InputType, aggregation::Kind k>
-struct corresponding_rolling_operator {
-  using type = DeviceRolling<InputType, k>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::ARGMIN> {
-  using type = DeviceRollingArgMinMaxBase<InputType, aggregation::ARGMIN>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::ARGMAX> {
-  using type = DeviceRollingArgMinMaxBase<InputType, aggregation::ARGMAX>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::COUNT_VALID> {
-  using type = DeviceRollingCountValid<InputType>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::COUNT_ALL> {
-  using type = DeviceRollingCountAll<InputType>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::ROW_NUMBER> {
-  using type = DeviceRollingRowNumber<InputType>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::Kind::VARIANCE> {
-  using type = DeviceRollingVariance<InputType>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::Kind::LEAD> {
-  using type = DeviceRollingLead<InputType>;
-};
-
-template <typename InputType>
-struct corresponding_rolling_operator<InputType, aggregation::Kind::LAG> {
-  using type = DeviceRollingLag<InputType>;
-};
-
-/**
- * @brief Functor for creating a device rolling operator based on input type and aggregation type.
- */
-template <typename InputType, aggregation::Kind k, typename = void>
-struct create_rolling_operator {
-  auto operator()(size_type min_periods, rolling_aggregation const&)
-  {
-    return typename corresponding_rolling_operator<InputType, k>::type(min_periods);
-  }
-};
-
-template <typename InputType>
-struct create_rolling_operator<InputType, aggregation::Kind::VARIANCE> {
-  auto operator()(size_type min_periods, rolling_aggregation const& agg)
-  {
-    return DeviceRollingVariance<InputType>{
-      min_periods, dynamic_cast<cudf::detail::var_aggregation const&>(agg)._ddof};
-  }
-};
-
-template <typename InputType>
-struct create_rolling_operator<InputType, aggregation::Kind::LEAD> {
-  auto operator()(size_type, rolling_aggregation const& agg)
-  {
-    return DeviceRollingLead<InputType>{
-      dynamic_cast<cudf::detail::lead_lag_aggregation const&>(agg).row_offset};
-  }
-};
-
-template <typename InputType>
-struct create_rolling_operator<InputType, aggregation::Kind::LAG> {
-  auto operator()(size_type, rolling_aggregation const& agg)
-  {
-    return DeviceRollingLag<InputType>{
-      dynamic_cast<cudf::detail::lead_lag_aggregation const&>(agg).row_offset};
-  }
-};
-
-template <typename InputType, aggregation::Kind k>
-struct create_rolling_operator<
-  InputType,
-  k,
-  typename std::enable_if_t<std::is_same_v<InputType, cudf::string_view> &&
-                            (k == aggregation::Kind::ARGMIN || k == aggregation::Kind::ARGMAX)>> {
-  auto operator()(size_type min_periods, rolling_aggregation const&)
-  {
-    return DeviceRollingArgMinMaxString<k>{min_periods};
-  }
-};
-
-template <typename InputType, aggregation::Kind k>
-struct create_rolling_operator<
-  InputType,
-  k,
-  typename std::enable_if_t<std::is_same_v<InputType, cudf::struct_view> &&
-                            (k == aggregation::Kind::ARGMIN || k == aggregation::Kind::ARGMAX)>> {
-  template <typename Comparator>
-  auto operator()(size_type min_periods, Comparator const& comp)
-  {
-    return DeviceRollingArgMinMaxStruct<k, Comparator>{min_periods, comp};
-  }
-};
-
 /**
  * @brief Rolling window specific implementation of simple_aggregations_collector.
  *
@@ -1229,82 +581,6 @@ struct dispatch_rolling {
   }
 };
 
-// Applies a user-defined rolling window function to the values in a column.
-template <typename PrecedingWindowIterator, typename FollowingWindowIterator>
-std::unique_ptr<column> rolling_window_udf(column_view const& input,
-                                           PrecedingWindowIterator preceding_window,
-                                           std::string const& preceding_window_str,
-                                           FollowingWindowIterator following_window,
-                                           std::string const& following_window_str,
-                                           size_type min_periods,
-                                           rolling_aggregation const& agg,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr)
-{
-  static_assert(warp_size == cudf::detail::size_in_bits<cudf::bitmask_type>(),
-                "bitmask_type size does not match CUDA warp size");
-
-  if (input.has_nulls()) {
-    CUDF_FAIL("Currently the UDF version of rolling window does NOT support inputs with nulls.");
-  }
-
-  min_periods = std::max(min_periods, 0);
-
-  auto& udf_agg = dynamic_cast<udf_aggregation const&>(agg);
-
-  std::string hash = "prog_rolling." + std::to_string(std::hash<std::string>{}(udf_agg._source));
-
-  std::string cuda_source;
-  switch (udf_agg.kind) {
-    case aggregation::Kind::PTX:
-      cuda_source +=
-        cudf::jit::parse_single_function_ptx(udf_agg._source,
-                                             udf_agg._function_name,
-                                             {{0, cudf::type_to_name(udf_agg._output_type) + " *"},
-                                              {5, "void const *"}});  // args 0 and 5 are pointers
-      break;
-    case aggregation::Kind::CUDA:
-      cuda_source += cudf::jit::parse_single_function_cuda(udf_agg._source, udf_agg._function_name);
-      break;
-    default: CUDF_FAIL("Unsupported UDF type.");
-  }
-
-  std::unique_ptr<column> output = make_numeric_column(
-    udf_agg._output_type, input.size(), cudf::mask_state::UNINITIALIZED, stream, mr);
-
-  auto output_view = output->mutable_view();
-  cudf::detail::device_scalar<size_type> device_valid_count{0, stream};
-
-  std::string kernel_name =
-    jitify2::reflection::Template("cudf::rolling::jit::gpu_rolling_new")  //
-      .instantiate(cudf::type_to_name(input.type()),  // list of template arguments
-                   cudf::type_to_name(output->type()),
-                   udf_agg._operator_name,
-                   preceding_window_str.c_str(),
-                   following_window_str.c_str());
-
-  cudf::jit::get_program_cache(*rolling_jit_kernel_cu_jit)
-    .get_kernel(
-      kernel_name, {}, {{"rolling/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."})  //
-    ->configure_1d_max_occupancy(0, 0, nullptr, stream.value())                           //
-    ->launch(input.size(),
-             cudf::jit::get_data_ptr(input),
-             input.null_mask(),
-             cudf::jit::get_data_ptr(output_view),
-             output_view.null_mask(),
-             device_valid_count.data(),
-             preceding_window,
-             following_window,
-             min_periods);
-
-  output->set_null_count(output->size() - device_valid_count.value(stream));
-
-  // check the stream for debugging
-  CUDF_CHECK_CUDA(stream.value());
-
-  return output;
-}
-
 /**
  * @copydoc cudf::rolling_window(column_view const& input,
  *                               PrecedingWindowIterator preceding_window_begin,
diff --git a/cpp/src/rolling/detail/rolling_fixed_window.cu b/cpp/src/rolling/detail/rolling_fixed_window.cu
index 7526c858899..806d67bb7bb 100644
--- a/cpp/src/rolling/detail/rolling_fixed_window.cu
+++ b/cpp/src/rolling/detail/rolling_fixed_window.cu
@@ -15,6 +15,7 @@
  */
 
 #include "rolling.cuh"
+#include "rolling_udf.cuh"
 #include "rolling_utils.cuh"
 
 #include <cudf/detail/aggregation/aggregation.hpp>
@@ -23,8 +24,6 @@
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
-#include <cuda/functional>
-
 namespace cudf::detail {
 
 // Applies a fixed-size rolling window function to the values in a column.
diff --git a/cpp/src/rolling/detail/rolling_operators.cuh b/cpp/src/rolling/detail/rolling_operators.cuh
new file mode 100644
index 00000000000..dea196ab676
--- /dev/null
+++ b/cpp/src/rolling/detail/rolling_operators.cuh
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "rolling.hpp"
+
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.cuh>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/device_operators.cuh>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <cuda/std/limits>
+#include <thrust/count.h>
+#include <thrust/execution_policy.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/reduce.h>
+
+#include <memory>
+
+namespace cudf {
+
+namespace detail {
+
+/**
+ * @brief Operator for applying a generic (non-specialized) rolling aggregation on a single window.
+ */
+template <typename InputType, aggregation::Kind op>
+struct DeviceRolling {
+  size_type min_periods;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = op>
+  static constexpr bool is_supported()
+  {
+    return cudf::detail::is_valid_aggregation<T, O>() && has_corresponding_operator<O>() &&
+           // MIN/MAX only supports fixed width types
+           (((O == aggregation::MIN || O == aggregation::MAX) && cudf::is_fixed_width<T>()) ||
+            (O == aggregation::SUM) || (O == aggregation::MEAN));
+  }
+
+  // operations we do support
+  template <typename T = InputType, aggregation::Kind O = op>
+  explicit DeviceRolling(size_type _min_periods, std::enable_if_t<is_supported<T, O>()>* = nullptr)
+    : min_periods(_min_periods)
+  {
+  }
+
+  // operations we don't support
+  template <typename T = InputType, aggregation::Kind O = op>
+  explicit DeviceRolling(size_type _min_periods, std::enable_if_t<!is_supported<T, O>()>* = nullptr)
+    : min_periods(_min_periods)
+  {
+    CUDF_FAIL("Invalid aggregation/type pair");
+  }
+
+  // perform the windowing operation
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index) const
+  {
+    using AggOp = typename corresponding_operator<op>::type;
+    AggOp agg_op;
+
+    cudf::size_type count = 0;
+    OutputType val        = AggOp::template identity<OutputType>();
+
+    for (size_type j = start_index; j < end_index; j++) {
+      if (!has_nulls || input.is_valid(j)) {
+        OutputType element = input.element<device_storage_type_t<InputType>>(j);
+        val                = agg_op(element, val);
+        count++;
+      }
+    }
+
+    bool output_is_valid = (count >= min_periods);
+
+    if (output_is_valid) {
+      // store the output value, one per thread, but only if the
+      // output is valid. min_periods is required to be >= 1, and so
+      // here, count must be nonzero. We need to avoid storing if
+      // count is zero since this could cause UB in some aggregations,
+      // which may cause the compiler to deduce nonsense about the loop
+      // that increments count.
+      cudf::detail::rolling_store_output_functor<OutputType, op == aggregation::MEAN>{}(
+        output.element<OutputType>(current_index), val, count);
+    }
+
+    return output_is_valid;
+  }
+};
+
+/**
+ * @brief The base struct used for checking if the combination of input type and aggregation op is
+ * supported.
+ */
+template <typename InputType, aggregation::Kind op>
+struct DeviceRollingArgMinMaxBase {
+  size_type min_periods;
+  explicit DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {}
+
+  static constexpr bool is_supported()
+  {
+    // Right now only support ARGMIN/ARGMAX of strings and structs.
+    auto const type_supported =
+      std::is_same_v<InputType, cudf::string_view> || std::is_same_v<InputType, cudf::struct_view>;
+    auto const op_supported = op == aggregation::Kind::ARGMIN || op == aggregation::Kind::ARGMAX;
+
+    return type_supported && op_supported;
+  }
+};
+
+/**
+ * @brief Operator for applying an ARGMAX/ARGMIN rolling aggregation on a single window for string.
+ */
+template <aggregation::Kind op>
+struct DeviceRollingArgMinMaxString : DeviceRollingArgMinMaxBase<cudf::string_view, op> {
+  explicit DeviceRollingArgMinMaxString(size_type _min_periods)
+    : DeviceRollingArgMinMaxBase<cudf::string_view, op>(_min_periods)
+  {
+  }
+  using DeviceRollingArgMinMaxBase<cudf::string_view, op>::min_periods;
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index)
+  {
+    auto constexpr default_output = (op == aggregation::ARGMIN) ? ARGMIN_SENTINEL : ARGMAX_SENTINEL;
+
+    using InputType = cudf::string_view;
+    using AggOp     = typename corresponding_operator<op>::type;
+    AggOp agg_op;
+
+    cudf::size_type count = 0;
+    InputType val         = AggOp::template identity<InputType>();
+    OutputType val_index  = default_output;
+
+    for (size_type j = start_index; j < end_index; j++) {
+      if (!has_nulls || input.is_valid(j)) {
+        InputType element = input.element<InputType>(j);
+        val               = agg_op(element, val);
+        if (val == element) { val_index = j; }
+        count++;
+      }
+    }
+
+    bool output_is_valid = (count >= min_periods);
+    // Use the sentinel value (i.e., -1) for the output will help identify null elements while
+    // gathering for Min and Max.
+    output.element<OutputType>(current_index) = output_is_valid ? val_index : default_output;
+
+    // The gather mask shouldn't contain null values, so
+    // always return zero
+    return true;
+  }
+};
+
+/**
+ * @brief Operator for applying an ARGMAX/ARGMIN rolling aggregation on a single window for struct.
+ */
+template <aggregation::Kind op, typename Comparator>
+struct DeviceRollingArgMinMaxStruct : DeviceRollingArgMinMaxBase<cudf::struct_view, op> {
+  DeviceRollingArgMinMaxStruct(size_type _min_periods, Comparator const& _comp)
+    : DeviceRollingArgMinMaxBase<cudf::struct_view, op>(_min_periods), comp(_comp)
+  {
+  }
+  using DeviceRollingArgMinMaxBase<cudf::struct_view, op>::min_periods;
+  Comparator comp;
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index)
+  {
+    auto constexpr default_output = (op == aggregation::ARGMIN) ? ARGMIN_SENTINEL : ARGMAX_SENTINEL;
+
+    auto const valid_count =
+      has_nulls ? thrust::count_if(thrust::seq,
+                                   thrust::make_counting_iterator(start_index),
+                                   thrust::make_counting_iterator(end_index),
+                                   [&input](size_type idx) { return input.is_valid_nocheck(idx); })
+                : end_index - start_index;
+
+    // Use the sentinel value (i.e., -1) for the output will help identify null elements while
+    // gathering for Min and Max.
+    output.element<OutputType>(current_index) =
+      (valid_count >= min_periods) ? thrust::reduce(thrust::seq,
+                                                    thrust::make_counting_iterator(start_index),
+                                                    thrust::make_counting_iterator(end_index),
+                                                    size_type{start_index},
+                                                    comp)
+                                   : default_output;
+
+    // The gather mask shouldn't contain null values, so always return true.
+    return true;
+  }
+};
+
+/**
+ * @brief Operator for applying a COUNT_VALID rolling aggregation on a single window.
+ */
+template <typename InputType>
+struct DeviceRollingCountValid {
+  size_type min_periods;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = aggregation::COUNT_VALID>
+  static constexpr bool is_supported()
+  {
+    return true;
+  }
+
+  DeviceRollingCountValid(size_type _min_periods) : min_periods(_min_periods) {}
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index)
+  {
+    bool output_is_valid = ((end_index - start_index) >= min_periods);
+
+    if (output_is_valid) {
+      cudf::size_type count = 0;
+
+      if (!has_nulls) {
+        count = end_index - start_index;
+      } else {
+        count = thrust::count_if(thrust::seq,
+                                 thrust::make_counting_iterator(start_index),
+                                 thrust::make_counting_iterator(end_index),
+                                 [&input](auto i) { return input.is_valid_nocheck(i); });
+      }
+      output.element<OutputType>(current_index) = count;
+    }
+
+    return output_is_valid;
+  }
+};
+
+/**
+ * @brief Operator for applying a COUNT_ALL rolling aggregation on a single window.
+ */
+template <typename InputType>
+struct DeviceRollingCountAll {
+  size_type min_periods;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = aggregation::COUNT_ALL>
+  static constexpr bool is_supported()
+  {
+    return true;
+  }
+
+  DeviceRollingCountAll(size_type _min_periods) : min_periods(_min_periods) {}
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const&,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index)
+  {
+    cudf::size_type count = end_index - start_index;
+
+    bool output_is_valid                      = count >= min_periods;
+    output.element<OutputType>(current_index) = count;
+
+    return output_is_valid;
+  }
+};
+
+/**
+ * @brief Operator for applying a VAR rolling aggregation on a single window.
+ */
+template <typename InputType>
+struct DeviceRollingVariance {
+  size_type const min_periods;
+  size_type const ddof;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = aggregation::VARIANCE>
+  static constexpr bool is_supported()
+  {
+    return is_fixed_width<InputType>() and not is_chrono<InputType>();
+  }
+
+  DeviceRollingVariance(size_type _min_periods, size_type _ddof)
+    : min_periods(_min_periods), ddof{_ddof}
+  {
+  }
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index) const
+  {
+    using DeviceInputType = device_storage_type_t<InputType>;
+
+    // valid counts in the window
+    cudf::size_type const count =
+      has_nulls ? thrust::count_if(thrust::seq,
+                                   thrust::make_counting_iterator(start_index),
+                                   thrust::make_counting_iterator(end_index),
+                                   [&input](auto i) { return input.is_valid_nocheck(i); })
+                : end_index - start_index;
+
+    // Result will be null if any of the following conditions are met:
+    // - All inputs are null
+    // - Number of valid inputs is less than `min_periods`
+    bool output_is_valid = count > 0 and (count >= min_periods);
+
+    if (output_is_valid) {
+      if (count >= ddof) {
+        // Welford algorithm
+        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+        OutputType m{0}, m2{0};
+        size_type running_count{0};
+
+        for (size_type i = start_index; i < end_index; i++) {
+          if (has_nulls and input.is_null_nocheck(i)) { continue; }
+
+          OutputType const x = static_cast<OutputType>(input.element<DeviceInputType>(i));
+
+          running_count++;
+          OutputType const tmp1 = x - m;
+          m += tmp1 / running_count;
+          OutputType const tmp2 = x - m;
+          m2 += tmp1 * tmp2;
+        }
+        if constexpr (is_fixed_point<InputType>()) {
+          // For fixed_point types, the previous computed value used unscaled rep-value,
+          // the final result should be multiplied by the square of decimal `scale`.
+          OutputType scaleby = exp10(static_cast<double>(input.type().scale()));
+          scaleby *= scaleby;
+          output.element<OutputType>(current_index) = m2 / (count - ddof) * scaleby;
+        } else {
+          output.element<OutputType>(current_index) = m2 / (count - ddof);
+        }
+      } else {
+        output.element<OutputType>(current_index) =
+          cuda::std::numeric_limits<OutputType>::signaling_NaN();
+      }
+    }
+
+    return output_is_valid;
+  }
+};
+
+/**
+ * @brief Operator for applying a ROW_NUMBER rolling aggregation on a single window.
+ */
+template <typename InputType>
+struct DeviceRollingRowNumber {
+  size_type min_periods;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = aggregation::ROW_NUMBER>
+  static constexpr bool is_supported()
+  {
+    return true;
+  }
+
+  DeviceRollingRowNumber(size_type _min_periods) : min_periods(_min_periods) {}
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const&,
+                             column_device_view const&,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type end_index,
+                             size_type current_index)
+  {
+    bool output_is_valid                      = end_index - start_index >= min_periods;
+    output.element<OutputType>(current_index) = current_index - start_index + 1;
+
+    return output_is_valid;
+  }
+};
+
+struct agg_specific_empty_output {
+  template <typename InputType, aggregation::Kind op>
+  std::unique_ptr<column> operator()(column_view const& input, rolling_aggregation const&) const
+  {
+    using target_type = cudf::detail::target_type_t<InputType, op>;
+
+    if constexpr (std::is_same_v<cudf::detail::target_type_t<InputType, op>, void>) {
+      CUDF_FAIL("Unsupported combination of column-type and aggregation.");
+    }
+
+    if constexpr (cudf::is_fixed_width<target_type>()) {
+      return cudf::make_empty_column(type_to_id<target_type>());
+    }
+
+    if constexpr (op == aggregation::COLLECT_LIST) {
+      return cudf::make_lists_column(
+        0, make_empty_column(type_to_id<size_type>()), empty_like(input), 0, {});
+    }
+
+    return empty_like(input);
+  }
+};
+
+/**
+ * @brief Operator for applying a LEAD rolling aggregation on a single window.
+ */
+template <typename InputType>
+struct DeviceRollingLead {
+  size_type row_offset;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = aggregation::LEAD>
+  static constexpr bool is_supported()
+  {
+    return cudf::is_fixed_width<T>();
+  }
+
+  template <typename T = InputType>
+  DeviceRollingLead(size_type _row_offset)
+    requires(is_supported<T>())
+    : row_offset(_row_offset)
+  {
+  }
+
+  template <typename T = InputType>
+  DeviceRollingLead(size_type _row_offset)
+    requires(!is_supported<T>())
+    : row_offset(_row_offset)
+  {
+    CUDF_FAIL("Invalid aggregation/type pair");
+  }
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const& default_outputs,
+                             mutable_column_device_view& output,
+                             size_type,
+                             size_type end_index,
+                             size_type current_index)
+  {
+    // Offsets have already been normalized.
+
+    // Check if row is invalid.
+    if (row_offset > (end_index - current_index - 1)) {
+      // Invalid row marked. Use default value, if available.
+      if (default_outputs.size() == 0 || default_outputs.is_null(current_index)) { return false; }
+
+      output.element<OutputType>(current_index) =
+        default_outputs.element<OutputType>(current_index);
+      return true;
+    }
+
+    // Not an invalid row.
+    auto index   = current_index + row_offset;
+    auto is_null = input.is_null(index);
+    if (!is_null) {
+      output.element<OutputType>(current_index) =
+        input.element<device_storage_type_t<InputType>>(index);
+    }
+    return !is_null;
+  }
+};
+
+/**
+ * @brief Operator for applying a LAG rolling aggregation on a single window.
+ */
+template <typename InputType>
+struct DeviceRollingLag {
+  size_type row_offset;
+
+  // what operations do we support
+  template <typename T = InputType, aggregation::Kind O = aggregation::LAG>
+  static constexpr bool is_supported()
+  {
+    return cudf::is_fixed_width<T>();
+  }
+
+  template <typename T = InputType>
+  DeviceRollingLag(size_type _row_offset)
+    requires(is_supported<T>())
+    : row_offset(_row_offset)
+  {
+  }
+
+  template <typename T = InputType>
+  DeviceRollingLag(size_type _row_offset)
+    requires(!is_supported<T>())
+    : row_offset(_row_offset)
+  {
+    CUDF_FAIL("Invalid aggregation/type pair");
+  }
+
+  template <typename OutputType, bool has_nulls>
+  bool __device__ operator()(column_device_view const& input,
+                             column_device_view const& default_outputs,
+                             mutable_column_device_view& output,
+                             size_type start_index,
+                             size_type,
+                             size_type current_index)
+  {
+    // Offsets have already been normalized.
+
+    // Check if row is invalid.
+    if (row_offset > (current_index - start_index)) {
+      // Invalid row marked. Use default value, if available.
+      if (default_outputs.size() == 0 || default_outputs.is_null(current_index)) { return false; }
+
+      output.element<OutputType>(current_index) =
+        default_outputs.element<OutputType>(current_index);
+      return true;
+    }
+
+    // Not an invalid row.
+    auto index   = current_index - row_offset;
+    auto is_null = input.is_null(index);
+    if (!is_null) {
+      output.element<OutputType>(current_index) =
+        input.element<device_storage_type_t<InputType>>(index);
+    }
+    return !is_null;
+  }
+};
+
+/**
+ * @brief Maps an `InputType and `aggregation::Kind` value to its corresponding
+ * rolling window operator.
+ *
+ * @tparam InputType The input type to map to its corresponding operator
+ * @tparam k The `aggregation::Kind` value to map to its corresponding operator
+ */
+template <typename InputType, aggregation::Kind k>
+struct corresponding_rolling_operator {
+  using type = DeviceRolling<InputType, k>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::ARGMIN> {
+  using type = DeviceRollingArgMinMaxBase<InputType, aggregation::ARGMIN>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::ARGMAX> {
+  using type = DeviceRollingArgMinMaxBase<InputType, aggregation::ARGMAX>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::COUNT_VALID> {
+  using type = DeviceRollingCountValid<InputType>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::COUNT_ALL> {
+  using type = DeviceRollingCountAll<InputType>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::ROW_NUMBER> {
+  using type = DeviceRollingRowNumber<InputType>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::Kind::VARIANCE> {
+  using type = DeviceRollingVariance<InputType>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::Kind::LEAD> {
+  using type = DeviceRollingLead<InputType>;
+};
+
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::Kind::LAG> {
+  using type = DeviceRollingLag<InputType>;
+};
+
+/**
+ * @brief Functor for creating a device rolling operator based on input type and aggregation type.
+ */
+template <typename InputType, aggregation::Kind k, typename = void>
+struct create_rolling_operator {
+  auto operator()(size_type min_periods, rolling_aggregation const&)
+  {
+    return typename corresponding_rolling_operator<InputType, k>::type(min_periods);
+  }
+};
+
+template <typename InputType>
+struct create_rolling_operator<InputType, aggregation::Kind::VARIANCE> {
+  auto operator()(size_type min_periods, rolling_aggregation const& agg)
+  {
+    return DeviceRollingVariance<InputType>{
+      min_periods, dynamic_cast<cudf::detail::var_aggregation const&>(agg)._ddof};
+  }
+};
+
+template <typename InputType>
+struct create_rolling_operator<InputType, aggregation::Kind::LEAD> {
+  auto operator()(size_type, rolling_aggregation const& agg)
+  {
+    return DeviceRollingLead<InputType>{
+      dynamic_cast<cudf::detail::lead_lag_aggregation const&>(agg).row_offset};
+  }
+};
+
+template <typename InputType>
+struct create_rolling_operator<InputType, aggregation::Kind::LAG> {
+  auto operator()(size_type, rolling_aggregation const& agg)
+  {
+    return DeviceRollingLag<InputType>{
+      dynamic_cast<cudf::detail::lead_lag_aggregation const&>(agg).row_offset};
+  }
+};
+
+template <typename InputType, aggregation::Kind k>
+struct create_rolling_operator<
+  InputType,
+  k,
+  typename std::enable_if_t<std::is_same_v<InputType, cudf::string_view> &&
+                            (k == aggregation::Kind::ARGMIN || k == aggregation::Kind::ARGMAX)>> {
+  auto operator()(size_type min_periods, rolling_aggregation const&)
+  {
+    return DeviceRollingArgMinMaxString<k>{min_periods};
+  }
+};
+
+template <typename InputType, aggregation::Kind k>
+struct create_rolling_operator<
+  InputType,
+  k,
+  typename std::enable_if_t<std::is_same_v<InputType, cudf::struct_view> &&
+                            (k == aggregation::Kind::ARGMIN || k == aggregation::Kind::ARGMAX)>> {
+  template <typename Comparator>
+  auto operator()(size_type min_periods, Comparator const& comp)
+  {
+    return DeviceRollingArgMinMaxStruct<k, Comparator>{min_periods, comp};
+  }
+};
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/rolling/detail/rolling_udf.cuh b/cpp/src/rolling/detail/rolling_udf.cuh
new file mode 100644
index 00000000000..f5c3f3a4b70
--- /dev/null
+++ b/cpp/src/rolling/detail/rolling_udf.cuh
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "jit/cache.hpp"
+#include "jit/parser.hpp"
+#include "jit/util.hpp"
+#include "rolling.hpp"
+#include "rolling_jit.hpp"
+
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <jit_preprocessed_files/rolling/jit/kernel.cu.jit.hpp>
+
+#include <memory>
+
+namespace cudf {
+namespace detail {
+
+// Applies a user-defined rolling window function to the values in a column.
+template <typename PrecedingWindowIterator, typename FollowingWindowIterator>
+std::unique_ptr<column> rolling_window_udf(column_view const& input,
+                                           PrecedingWindowIterator preceding_window,
+                                           std::string const& preceding_window_str,
+                                           FollowingWindowIterator following_window,
+                                           std::string const& following_window_str,
+                                           size_type min_periods,
+                                           rolling_aggregation const& agg,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr)
+{
+  static_assert(warp_size == cudf::detail::size_in_bits<cudf::bitmask_type>(),
+                "bitmask_type size does not match CUDA warp size");
+
+  if (input.has_nulls()) {
+    CUDF_FAIL("Currently the UDF version of rolling window does NOT support inputs with nulls.");
+  }
+
+  min_periods = std::max(min_periods, 0);
+
+  auto& udf_agg = dynamic_cast<udf_aggregation const&>(agg);
+
+  std::string hash = "prog_rolling." + std::to_string(std::hash<std::string>{}(udf_agg._source));
+
+  std::string cuda_source;
+  switch (udf_agg.kind) {
+    case aggregation::Kind::PTX:
+      cuda_source +=
+        cudf::jit::parse_single_function_ptx(udf_agg._source,
+                                             udf_agg._function_name,
+                                             {{0, cudf::type_to_name(udf_agg._output_type) + " *"},
+                                              {5, "void const *"}});  // args 0 and 5 are pointers
+      break;
+    case aggregation::Kind::CUDA:
+      cuda_source += cudf::jit::parse_single_function_cuda(udf_agg._source, udf_agg._function_name);
+      break;
+    default: CUDF_FAIL("Unsupported UDF type.");
+  }
+
+  std::unique_ptr<column> output = make_numeric_column(
+    udf_agg._output_type, input.size(), cudf::mask_state::UNINITIALIZED, stream, mr);
+
+  auto output_view = output->mutable_view();
+  cudf::detail::device_scalar<size_type> device_valid_count{0, stream};
+
+  std::string kernel_name =
+    jitify2::reflection::Template("cudf::rolling::jit::gpu_rolling_new")  //
+      .instantiate(cudf::type_to_name(input.type()),  // list of template arguments
+                   cudf::type_to_name(output->type()),
+                   udf_agg._operator_name,
+                   preceding_window_str.c_str(),
+                   following_window_str.c_str());
+
+  cudf::jit::get_program_cache(*rolling_jit_kernel_cu_jit)
+    .get_kernel(
+      kernel_name, {}, {{"rolling/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."})  //
+    ->configure_1d_max_occupancy(0, 0, nullptr, stream.value())                           //
+    ->launch(input.size(),
+             cudf::jit::get_data_ptr(input),
+             input.null_mask(),
+             cudf::jit::get_data_ptr(output_view),
+             output_view.null_mask(),
+             device_valid_count.data(),
+             preceding_window,
+             following_window,
+             min_periods);
+
+  output->set_null_count(output->size() - device_valid_count.value(stream));
+
+  // check the stream for debugging
+  CUDF_CHECK_CUDA(stream.value());
+
+  return output;
+}
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/rolling/detail/rolling_utils.cu b/cpp/src/rolling/detail/rolling_utils.cu
index 00d7b8648d4..114f174f160 100644
--- a/cpp/src/rolling/detail/rolling_utils.cu
+++ b/cpp/src/rolling/detail/rolling_utils.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "rolling.cuh"
 #include "rolling.hpp"
+#include "rolling_operators.cuh"
 
 #include <cudf/aggregation.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
diff --git a/cpp/src/rolling/detail/rolling_variable_window.cu b/cpp/src/rolling/detail/rolling_variable_window.cu
index a345c0d0c28..f7729168fcf 100644
--- a/cpp/src/rolling/detail/rolling_variable_window.cu
+++ b/cpp/src/rolling/detail/rolling_variable_window.cu
@@ -15,16 +15,13 @@
  */
 
 #include "rolling.cuh"
+#include "rolling_udf.cuh"
 
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
-#include <cuda/functional>
-#include <thrust/extrema.h>
-#include <thrust/iterator/constant_iterator.h>
-
 namespace cudf::detail {
 
 // Applies a variable-size rolling window function to the values in a column.
diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu
index f3e85d4d3be..1dd118d85a5 100644
--- a/cpp/src/rolling/grouped_rolling.cu
+++ b/cpp/src/rolling/grouped_rolling.cu
@@ -17,19 +17,15 @@
 #include "detail/optimized_unbounded_window.hpp"
 #include "detail/range_window_bounds.hpp"
 #include "detail/rolling.cuh"
-#include "detail/rolling_jit.hpp"
+#include "detail/rolling_udf.cuh"
 #include "detail/rolling_utils.cuh"
-#include "rolling/detail/rolling.hpp"
 
+#include <cudf/detail/groupby/sort_helper.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/rolling.hpp>
-#include <cudf/detail/utilities/assert.cuh>
-#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/rolling.hpp>
 #include <cudf/rolling/range_window_bounds.hpp>
 #include <cudf/types.hpp>
-#include <cudf/unary.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
@@ -37,11 +33,6 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <cuda/functional>
-
-#include <iterator>
-#include <variant>
-
 namespace cudf {
 
 namespace detail {

From 9f7e84de927d150da582238499457cbcb19b6b70 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 20 Aug 2025 12:05:24 -0700
Subject: [PATCH 178/366] Move test_udf_masked_ops/test_dropna to new cudf
 classic testing directory (#19730)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/19730
---
 python/cudf/cudf/core/indexed_frame.py        |    2 +-
 .../cudf/pandas/scripts/conftest-patch.py     |    2 +-
 python/cudf/cudf/testing/_utils.py            |    7 -
 python/cudf/cudf/tests/conftest.py            |   55 +
 .../tests/dataframe/methods/test_apply.py     |  772 ++++++++++++
 .../tests/dataframe/methods/test_dropna.py    |  157 ++-
 .../indexes/index/methods/test_dropna.py      |   27 +
 .../tests/indexes/index/test_attributes.py    |   39 +-
 .../indexes/multiindex/methods/test_dropna.py |   41 +
 .../indexes/rangeindex/test_attributes.py     |   10 +-
 .../cudf/tests/series/methods/test_apply.py   |  281 +++++
 .../cudf/tests/series/methods/test_dropna.py  |   81 ++
 python/cudf/cudf/tests/test_contains.py       |   41 -
 python/cudf/cudf/tests/test_dropna.py         |  299 -----
 .../cudf/tests/test_extension_compilation.py  |   10 +-
 python/cudf/cudf/tests/test_udf_masked_ops.py | 1069 -----------------
 16 files changed, 1465 insertions(+), 1428 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_apply.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_dropna.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_dropna.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_apply.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_dropna.py
 delete mode 100644 python/cudf/cudf/tests/test_dropna.py
 delete mode 100644 python/cudf/cudf/tests/test_udf_masked_ops.py

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index f7ef0f0db12..e9c8b71fbe3 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -4225,7 +4225,7 @@ def dropna(
              name        toy       born
         0  Alfred  Batmobile 1940-04-25
         """
-        if axis == 0:
+        if axis in [0, "index"]:
             result = self._drop_na_rows(how=how, subset=subset, thresh=thresh)
             if ignore_index:
                 result.index = RangeIndex(len(result))
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 9a1051ec158..b0419f844fd 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -5157,6 +5157,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_dropna",
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_dropna_multiple_axes",
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_no_nans_in_frame[axis=0]",
+    "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_no_nans_in_frame[axis='index']",
     "tests/frame/methods/test_dtypes.py::TestDataFrameDataTypes::test_dtypes_timedeltas",
     "tests/frame/methods/test_equals.py::TestEquals::test_equals_different_blocks",
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict0-input_index0-expected_dict0-expected_index0]",
@@ -13144,7 +13145,6 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_update.py::TestUpdate::test_update_dtypes[other9-int64-expected9-FutureWarning]",
     "tests/series/methods/test_value_counts.py::TestSeriesValueCounts::test_value_counts_categorical_with_nan",
     "tests/series/test_api.py::TestSeriesMisc::test_attrs",
-    "tests/series/test_api.py::TestSeriesMisc::test_axis_alias",
     "tests/series/test_api.py::TestSeriesMisc::test_index_tab_completion[index0]",
     "tests/series/test_api.py::TestSeriesMisc::test_index_tab_completion[index10]",
     "tests/series/test_api.py::TestSeriesMisc::test_index_tab_completion[index11]",
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 4c662808b9c..2987f94d1c9 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
-import itertools
 import string
 from collections import abc
 from contextlib import contextmanager
@@ -358,12 +357,6 @@ def assert_asserters_equal(
         cudf_asserter(cudf_left, cudf_right, *args, **kwargs)
 
 
-parametrize_numeric_dtypes_pairwise = pytest.mark.parametrize(
-    "left_dtype,right_dtype",
-    list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)),
-)
-
-
 @contextmanager
 def expect_warning_if(condition, warning=FutureWarning, *args, **kwargs):
     """Catch a warning using pytest.warns if the expect_warning is True.
diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 772abdfba32..b9c21a67c43 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import itertools
+import math
 import operator
 import os
 import pathlib
@@ -210,6 +211,44 @@ def set_decomp_env_vars(monkeypatch, request):
     operator.gt,
     operator.ge,
 ]
+bitwise_ops = [
+    operator.and_,
+    operator.or_,
+    operator.xor,
+]
+unary_ops = [
+    math.acos,
+    math.acosh,
+    math.asin,
+    math.asinh,
+    math.atan,
+    math.atanh,
+    math.ceil,
+    math.cos,
+    math.degrees,
+    math.erf,
+    math.erfc,
+    math.exp,
+    math.expm1,
+    math.fabs,
+    math.floor,
+    math.gamma,
+    math.lgamma,
+    math.log,
+    math.log10,
+    math.log1p,
+    math.log2,
+    math.radians,
+    math.sin,
+    math.sinh,
+    math.sqrt,
+    math.tan,
+    math.tanh,
+    operator.pos,
+    operator.neg,
+    operator.not_,
+    operator.invert,
+]
 
 
 @pytest.fixture(params=arithmetic_ops)
@@ -238,6 +277,16 @@ def comparison_op_method(comparison_op):
     return comparison_op.__name__
 
 
+@pytest.fixture(params=bitwise_ops)
+def bitwise_op(request):
+    return request.param
+
+
+@pytest.fixture(params=unary_ops)
+def unary_op(request):
+    return request.param
+
+
 @pytest.fixture(params=arithmetic_ops + comparison_ops)
 def binary_op(request):
     return request.param
@@ -576,3 +625,9 @@ def categorical_ordered(request):
 def interval_closed(request):
     """Param for `closed` argument for interval types"""
     return request.param
+
+
+@pytest.fixture(params=["all", "any"])
+def dropna_how(request):
+    """Param for `how` argument"""
+    return request.param
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_apply.py b/python/cudf/cudf/tests/dataframe/methods/test_apply.py
new file mode 100644
index 00000000000..b49f8d9dc25
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_apply.py
@@ -0,0 +1,772 @@
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
+import decimal
+import math
+import operator
+
+import numpy as np
+import pytest
+from numba import cuda
+from numba.core.typing import signature as nb_signature
+from numba.core.typing.templates import AbstractTemplate
+from numba.cuda.cudadecl import registry as cuda_decl_registry
+from numba.cuda.cudaimpl import lower as cuda_lower
+
+import cudf
+from cudf.core.missing import NA
+from cudf.core.udf._ops import (
+    comparison_ops,
+)
+from cudf.core.udf.strings_lowering import (
+    cast_string_view_to_managed_udf_string,
+)
+from cudf.core.udf.strings_typing import (
+    StringView,
+    managed_udf_string,
+    string_view,
+)
+from cudf.testing import assert_eq
+
+
+def sv_to_managed_udf_str(sv):
+    """
+    Cast a string_view object to a managed_udf_string object
+
+    This placeholder function never runs in python
+    It exists only for numba to have something to replace
+    with the typing and lowering code below
+
+    This is similar conceptually to needing a translation
+    engine to emit an expression in target language "B" when
+    there is no equivalent in the source language "A" to
+    translate from. This function effectively defines the
+    expression in language "A" and the associated typing
+    and lowering describe the translation process, despite
+    the expression having no meaning in language "A"
+    """
+    pass
+
+
+@cuda_decl_registry.register_global(sv_to_managed_udf_str)
+class StringViewToUDFStringDecl(AbstractTemplate):
+    def generic(args, kws):
+        if isinstance(args[0], StringView) and len(args) == 1:
+            return nb_signature(managed_udf_string, string_view)
+
+
+@cuda_lower(sv_to_managed_udf_str, string_view)
+def sv_to_udf_str_testing_lowering(context, builder, sig, args):
+    return cast_string_view_to_managed_udf_string(
+        context, builder, sig.args[0], sig.return_type, args[0]
+    )
+
+
+def run_masked_udf_test(func, data, args=(), nullable=True, **kwargs):
+    gdf = data
+    pdf = data.to_pandas(nullable=nullable)
+
+    expect = pdf.apply(func, args=args, axis=1)
+    obtain = gdf.apply(func, args=args, axis=1)
+    assert_eq(expect, obtain, **kwargs)
+
+
+@pytest.fixture
+def str_udf_data():
+    return cudf.DataFrame(
+        {
+            "str_col": [
+                "abc",
+                "ABC",
+                "AbC",
+                "123",
+                "123aBc",
+                "123@.!",
+                "",
+                "rapids ai",
+                "gpu",
+                "True",
+                "False",
+                "1.234",
+                ".123a",
+                "0.013",
+                "1.0",
+                "01",
+                "20010101",
+                "cudf",
+                "cuda",
+                "gpu",
+                "This Is A Title",
+                "This is Not a Title",
+                "Neither is This a Title",
+                "NoT a TiTlE",
+                "123 Title Works",
+            ]
+        }
+    )
+
+
+@pytest.fixture(params=["a", "2", "gpu", "", " "])
+def substr(request):
+    return request.param
+
+
+def test_string_udf_len(str_udf_data):
+    def func(row):
+        return len(row["str_col"])
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_startswith(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].startswith(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_endswith(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].endswith(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_find(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].find(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_rfind(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].rfind(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_contains(str_udf_data, substr):
+    def func(row):
+        return substr in row["str_col"]
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("other", ["cudf", "123", "", " "])
+@pytest.mark.parametrize("cmpop", comparison_ops)
+def test_string_udf_cmpops(str_udf_data, other, cmpop):
+    def func(row):
+        return cmpop(row["str_col"], other)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_isalnum(str_udf_data):
+    def func(row):
+        return row["str_col"].isalnum()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_isalpha(str_udf_data):
+    def func(row):
+        return row["str_col"].isalpha()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_isdigit(str_udf_data):
+    def func(row):
+        return row["str_col"].isdigit()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_isdecimal(str_udf_data):
+    def func(row):
+        return row["str_col"].isdecimal()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_isupper(str_udf_data):
+    def func(row):
+        return row["str_col"].isupper()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_islower(str_udf_data):
+    def func(row):
+        return row["str_col"].islower()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_isspace(str_udf_data):
+    def func(row):
+        return row["str_col"].isspace()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_istitle(str_udf_data):
+    def func(row):
+        return row["str_col"].istitle()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_count(str_udf_data, substr):
+    def func(row):
+        return row["str_col"].count(substr)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.xfail(reason="Identity function not supported.")
+def test_string_udf_return_string(str_udf_data):
+    def func(row):
+        return row["str_col"]
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_strip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].strip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_lstrip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].lstrip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_rstrip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].rstrip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_upper(str_udf_data):
+    def func(row):
+        return row["str_col"].upper()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_string_udf_lower(str_udf_data):
+    def func(row):
+        return row["str_col"].lower()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat(str_udf_data, concat_char):
+    def func(row):
+        return row["str_col"] + concat_char
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("to_replace", ["a", "1", "", "@"])
+@pytest.mark.parametrize("replacement", ["a", "1", "", "@"])
+def test_string_udf_replace(str_udf_data, to_replace, replacement):
+    def func(row):
+        return row["str_col"].replace(to_replace, replacement)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+def test_arith_masked_vs_masked(arithmetic_op):
+    # This test should test all the typing
+    # and lowering for arithmetic ops between
+    # two columns
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        return arithmetic_op(x, y)
+
+    gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_bitwise_masked_vs_masked(bitwise_op):
+    # This test should test all the typing
+    # and lowering for bitwise ops between
+    # two columns
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        return bitwise_op(x, y)
+
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 0, 1, 0, 0b1011, 42, None],
+            "b": [1, 1, 0, 0, 0b1100, -42, 5],
+        }
+    )
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize("op", [operator.add, operator.sub])
+def test_arith_masked_vs_masked_datelike(
+    op, datetime_types_as_str, temporal_types_as_str
+):
+    # Datetime version of the above
+    # does not test all dtype combinations for now
+    if temporal_types_as_str.startswith("datetime") and op is operator.add:
+        # don't try adding datetimes to datetimes.
+        pytest.skip("Adding datetime to datetime is not valid")
+
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        return op(x, y)
+
+    gdf = cudf.DataFrame(
+        {
+            "a": ["2011-01-01", cudf.NA, "2011-03-01", cudf.NA],
+            "b": [4, 5, cudf.NA, cudf.NA],
+        }
+    )
+    gdf["a"] = gdf["a"].astype(datetime_types_as_str)
+    gdf["b"] = gdf["b"].astype(temporal_types_as_str)
+
+    pdf = gdf.to_pandas()
+    expect = op(pdf["a"], pdf["b"])
+    obtain = gdf.apply(func, axis=1)
+    assert_eq(expect, obtain, check_dtype=False)
+    # TODO: After the following pandas issue is
+    # fixed, uncomment the following line and delete
+    # through `to_pandas()` statement.
+    # https://github.com/pandas-dev/pandas/issues/52411
+
+    # run_masked_udf_test(func, gdf, nullable=False, check_dtype=False)
+
+
+def test_compare_masked_vs_masked(comparison_op):
+    # this test should test all the
+    # typing and lowering for comparisons
+    # between columns
+
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        return comparison_op(x, y)
+
+    # we should get:
+    # [?, ?, <NA>, <NA>, <NA>]
+    gdf = cudf.DataFrame(
+        {"a": [1, 0, None, 1, None], "b": [0, 1, 0, None, None]}
+    )
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize("constant", [1, 1.5, True, False])
+def test_arith_masked_vs_constant(arithmetic_op, constant):
+    if constant is False and arithmetic_op in {
+        operator.mod,
+        operator.pow,
+        operator.truediv,
+        operator.floordiv,
+        operator.imod,
+        operator.ipow,
+        operator.itruediv,
+        operator.ifloordiv,
+    }:
+        # The following tests cases yield undefined behavior:
+        # - truediv(x, False) because its dividing by zero
+        # - floordiv(x, False) because its dividing by zero
+        # - mod(x, False) because its mod by zero,
+        # - pow(x, False) because we have an NA in the series and pandas
+        #   insists that (NA**0 == 1) where we do not
+        pytest.skip(
+            f"{constant=} yields undefined behavior for {arithmetic_op=}"
+        )
+
+    def func(row):
+        x = row["data"]
+        return arithmetic_op(x, constant)
+
+    gdf = cudf.DataFrame({"data": [1, 2, cudf.NA]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize("constant", [1, 1.5, True, False])
+@pytest.mark.parametrize("data", [[2, 3, cudf.NA], [1, cudf.NA, 1]])
+def test_arith_masked_vs_constant_reflected(
+    request, arithmetic_op, constant, data
+):
+    def func(row):
+        x = row["data"]
+        return arithmetic_op(constant, x)
+
+    # Just a single column -> result will be all NA
+    gdf = cudf.DataFrame({"data": data})
+
+    # cudf differs from pandas for 1**NA
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                constant == 1
+                and arithmetic_op in {operator.pow, operator.ipow}
+            ),
+            reason="https://github.com/rapidsai/cudf/issues/7478",
+        )
+    )
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize("data", [[1, cudf.NA, 3], [2, 3, cudf.NA]])
+def test_arith_masked_vs_null(request, arithmetic_op, data):
+    def func(row):
+        x = row["data"]
+        return arithmetic_op(x, NA)
+
+    gdf = cudf.DataFrame({"data": data})
+
+    # In pandas, 1**NA == 1.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                (gdf["data"] == 1).any()
+                and arithmetic_op in {operator.pow, operator.ipow}
+            ),
+            reason="https://github.com/rapidsai/cudf/issues/7478",
+        )
+    )
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_arith_masked_vs_null_reflected(arithmetic_op):
+    def func(row):
+        x = row["data"]
+        return arithmetic_op(NA, x)
+
+    gdf = cudf.DataFrame({"data": [1, None, 3]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_unary_masked(unary_op):
+    # This test should test all the typing
+    # and lowering for unary ops
+
+    def func(row):
+        x = row["a"]
+        return unary_op(x) if x is not NA else NA
+
+    if "log" in unary_op.__name__:
+        gdf = cudf.DataFrame({"a": [0.1, 1.0, None, 3.5, 1e8]})
+    elif unary_op.__name__ in {"asin", "acos"}:
+        gdf = cudf.DataFrame({"a": [0.0, 0.5, None, 1.0]})
+    elif unary_op.__name__ in {"atanh"}:
+        gdf = cudf.DataFrame({"a": [0.0, -0.5, None, 0.8]})
+    elif unary_op.__name__ in {"acosh", "sqrt", "lgamma"}:
+        gdf = cudf.DataFrame({"a": [1.0, 2.0, None, 11.0]})
+    elif unary_op.__name__ in {"gamma"}:
+        gdf = cudf.DataFrame({"a": [0.1, 2, None, 4]})
+    elif unary_op.__name__ in {"invert"}:
+        gdf = cudf.DataFrame({"a": [-100, 128, None, 0]}, dtype="int64")
+    else:
+        gdf = cudf.DataFrame({"a": [-125.60, 395.2, 0.0, None]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_masked_is_null_conditional():
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        if x is NA:
+            return y
+        else:
+            return x + y
+
+    gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_apply_contains():
+    def func(row):
+        x = row["a"]
+        return x in [1, 2]
+
+    gdf = cudf.DataFrame({"a": [1, 3]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
+def test_apply_mixed_dtypes(numeric_types_as_str, numeric_types_as_str2, op):
+    """
+    Test that operations can be performed between columns
+    of different dtypes and return a column with the correct
+    values and nulls
+    """
+
+    # First perform the op on two dummy data on host, if numpy can
+    # safely type cast, we should expect it to work in udf too.
+    try:
+        op(
+            np.dtype(numeric_types_as_str).type(0),
+            np.dtype(numeric_types_as_str2).type(42),
+        )
+    except TypeError:
+        pytest.skip("Operation is unsupported for corresponding dtype.")
+
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        return op(x, y)
+
+    gdf = cudf.DataFrame({"a": [1.5, None, 3, None], "b": [4, 5, None, None]})
+    gdf["a"] = gdf["a"].astype(numeric_types_as_str)
+    gdf["b"] = gdf["b"].astype(numeric_types_as_str2)
+
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize("val", [5, 5.5])
+def test_apply_return_literal(val):
+    """
+    Test unification codepath for scalars and MaskedType
+    makes sure that numba knows how to cast a scalar value
+    to a MaskedType
+    """
+
+    def func(row):
+        x = row["a"]
+        y = row["b"]
+        if x is not NA and x < 2:
+            return val
+        else:
+            return x + y
+
+    gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]})
+
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_apply_return_null():
+    """
+    Tests casting / unification of Masked and NA
+    """
+
+    def func(row):
+        x = row["a"]
+        if x is NA:
+            return NA
+        else:
+            return x
+
+    gdf = cudf.DataFrame({"a": [1, None, 3]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_apply_return_either_null_or_literal():
+    def func(row):
+        x = row["a"]
+        if x > 5:
+            return 2
+        else:
+            return NA
+
+    gdf = cudf.DataFrame({"a": [1, 3, 6]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_apply_return_literal_only():
+    def func(x):
+        return 5
+
+    gdf = cudf.DataFrame({"a": [1, None, 3]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+def test_apply_everything():
+    def func(row):
+        w = row["a"]
+        x = row["b"]
+        y = row["c"]
+        z = row["d"]
+        if x is NA:
+            return w + y - z
+        elif ((z > y) is not NA) and z > y:
+            return x
+        elif ((x + y) is not NA) and x + y == 0:
+            return z / x
+        elif x + y is NA:
+            return 2.5
+        elif w > 100:
+            return (
+                math.sin(x)
+                + math.sqrt(y)
+                - (-z)
+                + math.lgamma(x) * math.fabs(-0.8) / math.radians(3.14)
+            )
+        else:
+            return y > 2
+
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 3, 6, 0, None, 5, None, 101],
+            "b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0, 1.0],
+            "c": [2, 3, 6, 0, None, 5, None, 6],
+            "d": [4, None, 6, 0, None, 5, None, 7.5],
+        }
+    )
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
+###
+
+
+###
+
+
+def test_masked_udf_lambda_support(binary_op):
+    func = lambda row: binary_op(row["a"], row["b"])  # noqa: E731
+
+    data = cudf.DataFrame(
+        {"a": [1, cudf.NA, 3, cudf.NA], "b": [1, 2, cudf.NA, cudf.NA]}
+    )
+
+    run_masked_udf_test(func, data, check_dtype=False)
+
+
+def test_masked_udf_nested_function_support(binary_op):
+    """
+    Nested functions need to be explicitly jitted by the user
+    for numba to recognize them. Unfortunately the object
+    representing the jitted function can not itself be used in
+    pandas udfs.
+    """
+
+    def inner(x, y):
+        return binary_op(x, y)
+
+    def outer(row):
+        x = row["a"]
+        y = row["b"]
+        return inner(x, y)
+
+    gdf = cudf.DataFrame(
+        {"a": [1, cudf.NA, 3, cudf.NA], "b": [1, 2, cudf.NA, cudf.NA]}
+    )
+
+    with pytest.raises(ValueError):
+        gdf.apply(outer, axis=1)
+
+    pdf = gdf.to_pandas(nullable=True)
+    inner_gpu = cuda.jit(device=True)(inner)
+
+    def outer_gpu(row):
+        x = row["a"]
+        y = row["b"]
+        return inner_gpu(x, y)
+
+    got = gdf.apply(outer_gpu, axis=1)
+    expect = pdf.apply(outer, axis=1)
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]},
+        {"a": [1, 2, 3], "c": [4, 5, 6], "b": [7, 8, 9]},
+        {"a": [1, 2, 3], "b": [4, 5, 6], "c": ["a", "b", "c"]},
+    ],
+)
+def test_masked_udf_subset_selection(data):
+    def func(row):
+        return row["a"] + row["b"]
+
+    data = cudf.DataFrame(data)
+    run_masked_udf_test(func, data)
+
+
+@pytest.mark.parametrize(
+    "unsupported_col",
+    [
+        lambda: cudf.Series(
+            [
+                decimal.Decimal("1.0"),
+                decimal.Decimal("2.0"),
+                decimal.Decimal("3.0"),
+            ],
+            dtype=cudf.Decimal64Dtype(2, 1),
+        ),
+        lambda: cudf.Series([1, 2, 3], dtype="category"),
+        lambda: cudf.interval_range(start=0, end=3),
+        lambda: [[1, 2], [3, 4], [5, 6]],
+        lambda: [{"a": 1}, {"a": 2}, {"a": 3}],
+    ],
+)
+def test_masked_udf_unsupported_dtype(unsupported_col):
+    data = cudf.DataFrame({"unsupported_col": unsupported_col()})
+
+    def func(row):
+        return row["unsupported_col"]
+
+    # check that we fail when an unsupported type is used within a function
+    with pytest.raises(ValueError):
+        data.apply(func, axis=1)
+
+    # also check that a DF containing unsupported dtypes can still run a
+    # function that does NOT involve any of the unsupported dtype columns
+    data["supported_col"] = 1
+
+    def other_func(row):
+        return row["supported_col"]
+
+    expect = cudf.Series(np.ones(len(data)))
+    got = data.apply(other_func, axis=1)
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+# tests for `DataFrame.apply(f, args=(x,y,z))`
+# testing the whole space of possibilities is intractable
+# these test the most rudimentary guaranteed functionality
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, cudf.NA, 3]},
+        {"a": [0.5, 2.0, cudf.NA, cudf.NA, 5.0]},
+        {"a": [True, False, cudf.NA]},
+    ],
+)
+def test_masked_udf_scalar_args_binops(data, binary_op):
+    data = cudf.DataFrame(data)
+
+    def func(row, c):
+        return binary_op(row["a"], c)
+
+    run_masked_udf_test(func, data, args=(1,), check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, cudf.NA, 3]},
+        {"a": [0.5, 2.0, cudf.NA, cudf.NA, 5.0]},
+        {"a": [True, False, cudf.NA]},
+    ],
+)
+def test_masked_udf_scalar_args_binops_multiple(data, binary_op):
+    data = cudf.DataFrame(data)
+
+    def func(row, c, k):
+        x = binary_op(row["a"], c)
+        y = binary_op(x, k)
+        return y
+
+    run_masked_udf_test(func, data, args=(1, 2), check_dtype=False)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_dropna.py b/python/cudf/cudf/tests/dataframe/methods/test_dropna.py
index ec27503a0ef..f3c2f5a236a 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_dropna.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_dropna.py
@@ -1,8 +1,9 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-
 import numpy as np
 import pandas as pd
+import pyarrow as pa
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -32,3 +33,157 @@ def test_datetime_dataframe():
     assert_eq(ps.dropna(), gs.dropna())
 
     assert_eq(ps.isnull(), gs.isnull())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, None]},
+        {"a": [1, 2, None], "b": [3, 4, 5]},
+        {"a": [1, 2, None], "b": [3, 4, None]},
+        {"a": [None, 1, 2], "b": [1, 2, None]},
+        {"a": [None, 1, None], "b": [None, 2, None]},
+        {"a": [None, None, 1], "b": [1, 2, None]},
+        {"a": ["d", "e", "f"], "b": ["a", None, "c"]},
+    ],
+)
+def test_dropna_dataframe(data, dropna_how, axis, inplace):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.dropna(axis=axis, how=dropna_how, inplace=inplace)
+    actual = gdf.dropna(axis=axis, how=dropna_how, inplace=inplace)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "a": pa.array([None, None, None], type=pa.float64()),
+            "b": [1, 2, None],
+        },
+        {
+            "a": pa.array([np.nan, np.nan, np.nan]),
+            "b": [1, 2, None],
+        },
+        {"a": pa.array([None, None, None], type=pa.string())},
+    ],
+)
+def test_dropna_with_all_nulls(dropna_how, data, axis):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    assert_eq(
+        pdf.dropna(axis=axis, how=dropna_how),
+        gdf.dropna(axis=axis, how=dropna_how),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data,subset",
+    [
+        ({"a": [1, None], "b": [1, 2]}, ["a"]),
+        ({"a": [1, None], "b": [1, 2]}, ["b"]),
+        ({"a": [1, None], "b": [1, 2]}, []),
+        ({"a": [1, 2], "b": [1, 2]}, ["b"]),
+        ({"a": [1, 2, None], "b": [1, None, 2]}, ["a"]),
+        ({"a": [1, 2, None], "b": [1, None, 2]}, ["b"]),
+        ({"a": [1, 2, None], "b": [1, None, 2]}, ["a", "b"]),
+    ],
+)
+def test_dropna_subset_rows(data, subset):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(pdf.dropna(subset=subset), gdf.dropna(subset=subset))
+
+
+@pytest.mark.parametrize(
+    "data, subset",
+    [
+        ({"a": [1, None], "b": [1, 2]}, [0]),
+        ({"a": [1, None], "b": [1, 2]}, [1]),
+        ({"a": [1, None], "b": [1, 2]}, []),
+        ({"a": [1, 2], "b": [1, 2]}, [0]),
+        ({"a": [1, 2], "b": [None, 2], "c": [3, None]}, [0]),
+        ({"a": [1, 2], "b": [None, 2], "c": [3, None]}, [1]),
+        ({"a": [1, 2], "b": [None, 2], "c": [3, None]}, [0, 1]),
+    ],
+)
+def test_dropna_subset_cols(data, subset):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(
+        pdf.dropna(axis=1, subset=subset), gdf.dropna(axis=1, subset=subset)
+    )
+
+
+# TODO: can't test with subset=[] below since Pandas
+# returns empty DF when both subset=[] and thresh are specified.
+@pytest.mark.parametrize("thresh", [0, 1, 2])
+@pytest.mark.parametrize("subset", [None, ["a"], ["b"], ["a", "b"]])
+def test_dropna_thresh(thresh, subset):
+    pdf = pd.DataFrame({"a": [1, 2, None, None], "b": [1, 2, 3, None]})
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(
+        pdf.dropna(axis=0, thresh=thresh, subset=subset),
+        gdf.dropna(axis=0, thresh=thresh, subset=subset),
+    )
+
+
+@pytest.mark.parametrize("thresh", [0, 1, 2])
+@pytest.mark.parametrize("subset", [None, [0], [1], [0, 1]])
+def test_dropna_thresh_cols(thresh, subset, inplace):
+    pdf = pd.DataFrame(
+        {"a": [1, 2], "b": [3, 4], "c": [5, None], "d": [np.nan, np.nan]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.dropna(
+        axis=1, thresh=thresh, subset=subset, inplace=inplace
+    )
+    actual = gdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
+
+    if inplace:
+        expected = pdf
+        actual = gdf
+
+    assert_eq(
+        expected,
+        actual,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "key": [1, 2, 10],
+            "val": pa.array([np.nan, 3.0, 1.0]),
+            "abc": [np.nan, None, 1],
+        },
+        {
+            "key": [None, 2, 1],
+            "val": pa.array([3.0, None, 0.1]),
+            "abc": [None, 1, None],
+        },
+    ],
+)
+def test_dropna_dataframe_np_nan(data, axis):
+    gdf = cudf.DataFrame(data)
+    pd_data = {
+        key: value.to_pandas() if isinstance(value, cudf.Series) else value
+        for key, value in data.items()
+    }
+    pdf = pd.DataFrame(pd_data)
+
+    assert_eq(pdf.dropna(axis=axis), gdf.dropna(axis=axis), check_dtype=False)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py b/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py
new file mode 100644
index 00000000000..b572e9e156d
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data, dtype",
+    [
+        ([1, float("nan"), 2], "float64"),
+        (["x", None, "y"], "str"),
+        (["x", None, "y"], "category"),
+        (["2020-01-20", pd.NaT, "2020-03-15"], "datetime64[ns]"),
+        (["1s", pd.NaT, "3d"], "timedelta64[ns]"),
+    ],
+)
+def test_dropna_index(data, dtype):
+    pi = pd.Index(data, dtype=dtype)
+    gi = cudf.from_pandas(pi)
+
+    expect = pi.dropna()
+    got = gi.dropna()
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/indexes/index/test_attributes.py b/python/cudf/cudf/tests/indexes/index/test_attributes.py
index 2e80dfb272e..ee4a1654a10 100644
--- a/python/cudf/cudf/tests/indexes/index/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/index/test_attributes.py
@@ -1,10 +1,45 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import datetime
 
 import numpy as np
 import pandas as pd
 import pytest
 
-from cudf import Index
+import cudf
+
+
+@pytest.mark.parametrize(
+    "values, item, expected",
+    [
+        [[1, 2, 3], 2, True],
+        [[1, 2, 3], 4, False],
+        [[1, 2, 3], "a", False],
+        [["a", "b", "c"], "a", True],
+        [["a", "b", "c"], "ab", False],
+        [["a", "b", "c"], 6, False],
+        [pd.Categorical(["a", "b", "c"]), "a", True],
+        [pd.Categorical(["a", "b", "c"]), "ab", False],
+        [pd.Categorical(["a", "b", "c"]), 6, False],
+        [pd.date_range("20010101", periods=5, freq="D"), 20000101, False],
+        [
+            pd.date_range("20010101", periods=5, freq="D"),
+            datetime.datetime(2000, 1, 1),
+            False,
+        ],
+        [
+            pd.date_range("20010101", periods=5, freq="D"),
+            datetime.datetime(2001, 1, 1),
+            True,
+        ],
+    ],
+)
+@pytest.mark.parametrize(
+    "box",
+    [cudf.Index, lambda x: cudf.Series(index=x)],
+    ids=["index", "series"],
+)
+def test_contains(values, item, expected, box):
+    assert (item in box(values)) is expected
 
 
 @pytest.mark.parametrize(
@@ -26,7 +61,7 @@
     ],
 )
 def test_index_is_unique_monotonic(testlist):
-    index = Index(testlist)
+    index = cudf.Index(testlist)
     index_pd = pd.Index(testlist)
 
     assert index.is_unique == index_pd.is_unique
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_dropna.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_dropna.py
new file mode 100644
index 00000000000..049dc847e18
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_dropna.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_dropna_multiindex(dropna_how):
+    pi = pd.MultiIndex.from_arrays([[1, None, 2], [None, None, 2]])
+    gi = cudf.from_pandas(pi)
+
+    expect = pi.dropna(dropna_how)
+    got = gi.dropna(dropna_how)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            [pd.Timestamp("2020-01-01"), pd.NaT, pd.Timestamp("2020-02-01")],
+            [pd.NaT, pd.NaT, pd.Timestamp("2020-03-01")],
+        ],
+        [
+            [pd.Timestamp("2020-01-01"), pd.NaT, pd.Timestamp("2020-02-01")],
+            [np.nan, np.nan, 1.0],
+        ],
+        [[1.0, np.nan, 2.0], [np.nan, np.nan, 1.0]],
+    ],
+)
+def test_dropna_multiindex_2(data, dropna_how):
+    pi = pd.MultiIndex.from_arrays(data)
+    gi = cudf.from_pandas(pi)
+
+    expect = pi.dropna(dropna_how)
+    got = gi.dropna(dropna_how)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
index a9de5d39622..60ee8b432e6 100644
--- a/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
@@ -3,14 +3,20 @@
 import pandas as pd
 import pytest
 
-from cudf.core.index import RangeIndex
+import cudf
+
+
+def test_rangeindex_contains():
+    ridx = cudf.RangeIndex(start=0, stop=10, name="Index")
+    assert 9 in ridx
+    assert 10 not in ridx
 
 
 @pytest.mark.parametrize(
     "start, stop, step", [(10, 20, 1), (0, -10, -1), (5, 5, 1)]
 )
 def test_range_index_is_unique_monotonic(start, stop, step):
-    index = RangeIndex(start=start, stop=stop, step=step)
+    index = cudf.RangeIndex(start=start, stop=stop, step=step)
     index_pd = pd.RangeIndex(start=start, stop=stop, step=step)
 
     assert index.is_unique == index_pd.is_unique
diff --git a/python/cudf/cudf/tests/series/methods/test_apply.py b/python/cudf/cudf/tests/series/methods/test_apply.py
new file mode 100644
index 00000000000..8bdb46e02a2
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_apply.py
@@ -0,0 +1,281 @@
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
+import operator
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core.udf.utils import precompiled
+from cudf.testing import assert_eq
+
+
+def run_masked_udf_series(func, data, args=(), **kwargs):
+    gsr = data
+    psr = data.to_pandas(nullable=True)
+
+    expect = psr.apply(func, args=args)
+    obtain = gsr.apply(func, args=args)
+    assert_eq(expect, obtain, **kwargs)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        np.array(
+            [0, 1, -1, 0, np.iinfo("int64").min, np.iinfo("int64").max],
+            dtype="int64",
+        ),
+        np.array([0, 0, 1, np.iinfo("uint64").max], dtype="uint64"),
+        np.array(
+            [
+                0,
+                0.0,
+                -1.0,
+                1.5,
+                -1.5,
+                np.finfo("float64").min,
+                np.finfo("float64").max,
+                np.nan,
+                np.inf,
+                -np.inf,
+            ],
+            dtype="float64",
+        ),
+        [False, True, False, cudf.NA],
+    ],
+)
+def test_masked_udf_abs(data):
+    data = cudf.Series(data)
+    data[0] = cudf.NA
+
+    def func(x):
+        return abs(x)
+
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
+)
+@pytest.mark.parametrize("operator", [float, int, bool])
+def test_masked_udf_casting(operator, data):
+    data = cudf.Series(data)
+
+    def func(x):
+        return operator(x)
+
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+def test_masked_udf_caching():
+    # Make sure similar functions that differ
+    # by simple things like constants actually
+    # recompile
+
+    data = cudf.Series([1, 2, 3])
+
+    expect = data**2
+    got = data.apply(lambda x: x**2)
+    assert_eq(expect, got, check_dtype=False)
+
+    # update the constant value being used and make sure
+    # it does not result in a cache hit
+
+    expect = data**3
+    got = data.apply(lambda x: x**3)
+    assert_eq(expect, got, check_dtype=False)
+
+    # make sure we get a hit when reapplying
+    def f(x):
+        return x + 1
+
+    precompiled.clear()
+    assert precompiled.currsize == 0
+    data.apply(f)
+
+    assert precompiled.currsize == 1
+    data.apply(f)
+
+    assert precompiled.currsize == 1
+
+    # validate that changing the type of a scalar arg
+    # results in a miss
+    precompiled.clear()
+
+    def f(x, c):
+        return x + c
+
+    data.apply(f, args=(1,))
+    assert precompiled.currsize == 1
+
+    data.apply(f, args=(1.5,))
+    assert precompiled.currsize == 2
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, cudf.NA, 3],
+        [0.5, 2.0, cudf.NA, cudf.NA, 5.0],
+        [True, False, cudf.NA],
+    ],
+)
+def test_masked_udf_scalar_args_binops_multiple_series(
+    request, data, binary_op
+):
+    data = cudf.Series(data)
+    request.applymarker(
+        pytest.mark.xfail(
+            binary_op
+            in [
+                operator.eq,
+                operator.ne,
+                operator.lt,
+                operator.le,
+                operator.gt,
+                operator.ge,
+            ]
+            and PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
+            and data.dtype.kind != "b",
+            reason="https://github.com/pandas-dev/pandas/issues/57390",
+        )
+    )
+
+    def func(data, c, k):
+        x = binary_op(data, c)
+        y = binary_op(x, k)
+        return y
+
+    run_masked_udf_series(func, data, args=(1, 2), check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, cudf.NA, 3],
+        [0.5, 2.0, cudf.NA, cudf.NA, 5.0],
+        [True, False, cudf.NA],
+    ],
+)
+def test_mask_udf_scalar_args_binops_series(data):
+    data = cudf.Series(data)
+
+    def func(x, c):
+        return x + c
+
+    run_masked_udf_series(func, data, args=(1,), check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data,name",
+    [([1, 2, 3], None), ([1, cudf.NA, 3], None), ([1, 2, 3], "test_name")],
+)
+def test_series_apply_basic(data, name):
+    data = cudf.Series(data, name=name)
+
+    def func(x):
+        return x + 1
+
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/57390",
+)
+def test_series_apply_null_conditional():
+    def func(x):
+        if x is cudf.NA:
+            return 42
+        else:
+            return x - 1
+
+    data = cudf.Series([1, cudf.NA, 3])
+
+    run_masked_udf_series(func, data)
+
+
+def test_series_arith_masked_vs_masked(arithmetic_op):
+    def func(x):
+        return arithmetic_op(x, x)
+
+    data = cudf.Series([1, cudf.NA, 3])
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/57390",
+)
+def test_series_compare_masked_vs_masked(comparison_op):
+    """
+    In the series case, only one other MaskedType to compare with
+    - itself
+    """
+
+    def func(x):
+        return comparison_op(x, x)
+
+    data = cudf.Series([1, cudf.NA, 3])
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+@pytest.mark.parametrize("constant", [1, 1.5, cudf.NA])
+def test_series_arith_masked_vs_constant(request, arithmetic_op, constant):
+    def func(x):
+        return arithmetic_op(x, constant)
+
+    # Just a single column -> result will be all NA
+    data = cudf.Series([1, 2, cudf.NA])
+    # in pandas, 1**NA == 1. In cudf, 1**NA == NA.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                constant is cudf.NA
+                and arithmetic_op in {operator.pow, operator.ipow}
+            ),
+            reason="https://github.com/rapidsai/cudf/issues/7478",
+        )
+    )
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+@pytest.mark.parametrize("constant", [1, 1.5, cudf.NA])
+def test_series_arith_masked_vs_constant_reflected(
+    request, arithmetic_op, constant
+):
+    def func(x):
+        return arithmetic_op(constant, x)
+
+    # Just a single column -> result will be all NA
+    data = cudf.Series([1, 2, cudf.NA])
+    # Using in {1} since bool(NA == 1) raises a TypeError since NA is
+    # neither truthy nor falsy
+    # in pandas, 1**NA == 1. In cudf, 1**NA == NA.
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                constant in {1}
+                and arithmetic_op in {operator.pow, operator.ipow}
+            ),
+            reason="https://github.com/rapidsai/cudf/issues/7478",
+        )
+    )
+    run_masked_udf_series(func, data, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="https://github.com/pandas-dev/pandas/issues/57390",
+)
+def test_series_masked_is_null_conditional():
+    def func(x):
+        if x is cudf.NA:
+            return 42
+        else:
+            return x
+
+    data = cudf.Series([1, cudf.NA, 3, cudf.NA])
+
+    run_masked_udf_series(func, data, check_dtype=False)
diff --git a/python/cudf/cudf/tests/series/methods/test_dropna.py b/python/cudf/cudf/tests/series/methods/test_dropna.py
new file mode 100644
index 00000000000..dafcbf5bbfe
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_dropna.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [1.0, 2, None, 4],
+        ["one", "two", "three", "four"],
+        pd.Series(["a", "b", "c", "d"], dtype="category"),
+        pd.Series(pd.date_range("2010-01-01", "2010-01-04")),
+    ],
+)
+@pytest.mark.parametrize("nulls", ["one", "some", "all", "none"])
+def test_dropna_series(data, nulls, inplace):
+    psr = pd.Series(data)
+    rng = np.random.default_rng(seed=0)
+    if len(data) > 0:
+        if nulls == "one":
+            p = rng.integers(0, 4)
+            psr[p] = None
+        elif nulls == "some":
+            p1, p2 = rng.integers(0, 4, (2,))
+            psr[p1] = None
+            psr[p2] = None
+        elif nulls == "all":
+            psr[:] = None
+
+    gsr = cudf.from_pandas(psr)
+
+    check_dtype = True
+    if gsr.null_count == len(gsr):
+        check_dtype = False
+
+    expected = psr.dropna()
+    actual = gsr.dropna()
+
+    if inplace:
+        expected = psr
+        actual = gsr
+
+    assert_eq(expected, actual, check_dtype=check_dtype)
+
+
+def test_dropna_nan_as_null():
+    sr = cudf.Series([1.0, 2.0, np.nan, None], nan_as_null=False)
+    assert_eq(sr.dropna(), sr[:2])
+    sr = sr.nans_to_nulls()
+    assert_eq(sr.dropna(), sr[:2])
+
+    df = cudf.DataFrame(
+        {
+            "a": cudf.Series([1.0, 2.0, np.nan, None], nan_as_null=False),
+            "b": cudf.Series([1, 2, 3, 4]),
+        }
+    )
+
+    got = df.dropna()
+    expected = df[:2]
+    assert_eq(expected, got)
+
+    df = df.nans_to_nulls()
+    got = df.dropna()
+    expected = df[:2]
+    assert_eq(expected, got)
+
+
+def test_ignore_index():
+    pser = pd.Series([1, 2, np.nan], index=[2, 4, 1])
+    gser = cudf.from_pandas(pser)
+
+    result = pser.dropna(ignore_index=True)
+    expected = gser.dropna(ignore_index=True)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
index 2c6bc0e8a00..7db558335fe 100644
--- a/python/cudf/cudf/tests/test_contains.py
+++ b/python/cudf/cudf/tests/test_contains.py
@@ -1,55 +1,14 @@
 # Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
-import datetime
 
 import numpy as np
-import pandas as pd
 import pytest
 
 import cudf
 from cudf import Series
-from cudf.core.index import Index, RangeIndex
 from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
 
 
-@pytest.mark.parametrize(
-    "values, item, expected",
-    [
-        [[1, 2, 3], 2, True],
-        [[1, 2, 3], 4, False],
-        [[1, 2, 3], "a", False],
-        [["a", "b", "c"], "a", True],
-        [["a", "b", "c"], "ab", False],
-        [["a", "b", "c"], 6, False],
-        [pd.Categorical(["a", "b", "c"]), "a", True],
-        [pd.Categorical(["a", "b", "c"]), "ab", False],
-        [pd.Categorical(["a", "b", "c"]), 6, False],
-        [pd.date_range("20010101", periods=5, freq="D"), 20000101, False],
-        [
-            pd.date_range("20010101", periods=5, freq="D"),
-            datetime.datetime(2000, 1, 1),
-            False,
-        ],
-        [
-            pd.date_range("20010101", periods=5, freq="D"),
-            datetime.datetime(2001, 1, 1),
-            True,
-        ],
-    ],
-)
-@pytest.mark.parametrize(
-    "box", [Index, lambda x: Series(index=x)], ids=["index", "series"]
-)
-def test_contains(values, item, expected, box):
-    assert (item in box(values)) is expected
-
-
-def test_rangeindex_contains():
-    ridx = RangeIndex(start=0, stop=10, name="Index")
-    assert 9 in ridx
-    assert 10 not in ridx
-
-
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 def test_lists_contains(dtype):
     dtype = cudf.dtype(dtype)
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
deleted file mode 100644
index 1f927d03e95..00000000000
--- a/python/cudf/cudf/tests/test_dropna.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [],
-        [1.0, 2, None, 4],
-        ["one", "two", "three", "four"],
-        pd.Series(["a", "b", "c", "d"], dtype="category"),
-        pd.Series(pd.date_range("2010-01-01", "2010-01-04")),
-    ],
-)
-@pytest.mark.parametrize("nulls", ["one", "some", "all", "none"])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dropna_series(data, nulls, inplace):
-    psr = pd.Series(data)
-    rng = np.random.default_rng(seed=0)
-    if len(data) > 0:
-        if nulls == "one":
-            p = rng.integers(0, 4)
-            psr[p] = None
-        elif nulls == "some":
-            p1, p2 = rng.integers(0, 4, (2,))
-            psr[p1] = None
-            psr[p2] = None
-        elif nulls == "all":
-            psr[:] = None
-
-    gsr = cudf.from_pandas(psr)
-
-    check_dtype = True
-    if gsr.null_count == len(gsr):
-        check_dtype = False
-
-    expected = psr.dropna()
-    actual = gsr.dropna()
-
-    if inplace:
-        expected = psr
-        actual = gsr
-
-    assert_eq(expected, actual, check_dtype=check_dtype)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2, None]},
-        {"a": [1, 2, None], "b": [3, 4, 5]},
-        {"a": [1, 2, None], "b": [3, 4, None]},
-        {"a": [None, 1, 2], "b": [1, 2, None]},
-        {"a": [None, 1, None], "b": [None, 2, None]},
-        {"a": [None, None, 1], "b": [1, 2, None]},
-        {"a": ["d", "e", "f"], "b": ["a", None, "c"]},
-    ],
-)
-@pytest.mark.parametrize("how", ["all", "any"])
-@pytest.mark.parametrize("axis", [0, 1])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dropna_dataframe(data, how, axis, inplace):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.dropna(axis=axis, how=how, inplace=inplace)
-    actual = gdf.dropna(axis=axis, how=how, inplace=inplace)
-
-    if inplace:
-        expected = pdf
-        actual = gdf
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("how", ["all", "any"])
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "a": pa.array([None, None, None], type=pa.float64()),
-            "b": [1, 2, None],
-        },
-        {
-            "a": pa.array([np.nan, np.nan, np.nan]),
-            "b": [1, 2, None],
-        },
-        {"a": pa.array([None, None, None], type=pa.string())},
-    ],
-)
-@pytest.mark.parametrize("axis", [0, 1])
-def test_dropna_with_all_nulls(how, data, axis):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    assert_eq(
-        pdf.dropna(axis=axis, how=how),
-        gdf.dropna(axis=axis, how=how),
-        check_dtype=False,
-    )
-
-
-def test_dropna_nan_as_null():
-    sr = cudf.Series([1.0, 2.0, np.nan, None], nan_as_null=False)
-    assert_eq(sr.dropna(), sr[:2])
-    sr = sr.nans_to_nulls()
-    assert_eq(sr.dropna(), sr[:2])
-
-    df = cudf.DataFrame(
-        {
-            "a": cudf.Series([1.0, 2.0, np.nan, None], nan_as_null=False),
-            "b": cudf.Series([1, 2, 3, 4]),
-        }
-    )
-
-    got = df.dropna()
-    expected = df[:2]
-    assert_eq(expected, got)
-
-    df = df.nans_to_nulls()
-    got = df.dropna()
-    expected = df[:2]
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data,subset",
-    [
-        ({"a": [1, None], "b": [1, 2]}, ["a"]),
-        ({"a": [1, None], "b": [1, 2]}, ["b"]),
-        ({"a": [1, None], "b": [1, 2]}, []),
-        ({"a": [1, 2], "b": [1, 2]}, ["b"]),
-        ({"a": [1, 2, None], "b": [1, None, 2]}, ["a"]),
-        ({"a": [1, 2, None], "b": [1, None, 2]}, ["b"]),
-        ({"a": [1, 2, None], "b": [1, None, 2]}, ["a", "b"]),
-    ],
-)
-def test_dropna_subset_rows(data, subset):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf.dropna(subset=subset), gdf.dropna(subset=subset))
-
-
-@pytest.mark.parametrize(
-    "data, subset",
-    [
-        ({"a": [1, None], "b": [1, 2]}, [0]),
-        ({"a": [1, None], "b": [1, 2]}, [1]),
-        ({"a": [1, None], "b": [1, 2]}, []),
-        ({"a": [1, 2], "b": [1, 2]}, [0]),
-        ({"a": [1, 2], "b": [None, 2], "c": [3, None]}, [0]),
-        ({"a": [1, 2], "b": [None, 2], "c": [3, None]}, [1]),
-        ({"a": [1, 2], "b": [None, 2], "c": [3, None]}, [0, 1]),
-    ],
-)
-def test_dropna_subset_cols(data, subset):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(
-        pdf.dropna(axis=1, subset=subset), gdf.dropna(axis=1, subset=subset)
-    )
-
-
-# TODO: can't test with subset=[] below since Pandas
-# returns empty DF when both subset=[] and thresh are specified.
-@pytest.mark.parametrize("thresh", [0, 1, 2])
-@pytest.mark.parametrize("subset", [None, ["a"], ["b"], ["a", "b"]])
-def test_dropna_thresh(thresh, subset):
-    pdf = pd.DataFrame({"a": [1, 2, None, None], "b": [1, 2, 3, None]})
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(
-        pdf.dropna(axis=0, thresh=thresh, subset=subset),
-        gdf.dropna(axis=0, thresh=thresh, subset=subset),
-    )
-
-
-@pytest.mark.parametrize("thresh", [0, 1, 2])
-@pytest.mark.parametrize("subset", [None, [0], [1], [0, 1]])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_dropna_thresh_cols(thresh, subset, inplace):
-    pdf = pd.DataFrame(
-        {"a": [1, 2], "b": [3, 4], "c": [5, None], "d": [np.nan, np.nan]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.dropna(
-        axis=1, thresh=thresh, subset=subset, inplace=inplace
-    )
-    actual = gdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
-
-    if inplace:
-        expected = pdf
-        actual = gdf
-
-    assert_eq(
-        expected,
-        actual,
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "key": [1, 2, 10],
-            "val": pa.array([np.nan, 3.0, 1.0]),
-            "abc": [np.nan, None, 1],
-        },
-        {
-            "key": [None, 2, 1],
-            "val": pa.array([3.0, None, 0.1]),
-            "abc": [None, 1, None],
-        },
-    ],
-)
-@pytest.mark.parametrize("axis", [0, 1])
-def test_dropna_dataframe_np_nan(data, axis):
-    gdf = cudf.DataFrame(data)
-    pd_data = {
-        key: value.to_pandas() if isinstance(value, cudf.Series) else value
-        for key, value in data.items()
-    }
-    pdf = pd.DataFrame(pd_data)
-
-    assert_eq(pdf.dropna(axis=axis), gdf.dropna(axis=axis), check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data, dtype",
-    [
-        ([1, float("nan"), 2], "float64"),
-        (["x", None, "y"], "str"),
-        (["x", None, "y"], "category"),
-        (["2020-01-20", pd.NaT, "2020-03-15"], "datetime64[ns]"),
-        (["1s", pd.NaT, "3d"], "timedelta64[ns]"),
-    ],
-)
-def test_dropna_index(data, dtype):
-    pi = pd.Index(data, dtype=dtype)
-    gi = cudf.from_pandas(pi)
-
-    expect = pi.dropna()
-    got = gi.dropna()
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("how", ["all", "any"])
-def test_dropna_multiindex(how):
-    pi = pd.MultiIndex.from_arrays([[1, None, 2], [None, None, 2]])
-    gi = cudf.from_pandas(pi)
-
-    expect = pi.dropna(how)
-    got = gi.dropna(how)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            [pd.Timestamp("2020-01-01"), pd.NaT, pd.Timestamp("2020-02-01")],
-            [pd.NaT, pd.NaT, pd.Timestamp("2020-03-01")],
-        ],
-        [
-            [pd.Timestamp("2020-01-01"), pd.NaT, pd.Timestamp("2020-02-01")],
-            [np.nan, np.nan, 1.0],
-        ],
-        [[1.0, np.nan, 2.0], [np.nan, np.nan, 1.0]],
-    ],
-)
-@pytest.mark.parametrize("how", ["all", "any"])
-def test_dropna_multiindex_2(data, how):
-    pi = pd.MultiIndex.from_arrays(data)
-    gi = cudf.from_pandas(pi)
-
-    expect = pi.dropna(how)
-    got = gi.dropna(how)
-
-    assert_eq(expect, got)
-
-
-def test_ignore_index():
-    pser = pd.Series([1, 2, np.nan], index=[2, 4, 1])
-    gser = cudf.from_pandas(pser)
-
-    result = pser.dropna(ignore_index=True)
-    expected = gser.dropna(ignore_index=True)
-    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 8f08272ce66..78ffc662e11 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -12,7 +12,6 @@
 from cudf import NA
 from cudf.core.udf.api import Masked
 from cudf.core.udf.masked_typing import MaskedType
-from cudf.testing._utils import parametrize_numeric_dtypes_pairwise
 from cudf.utils._numba import _CUDFNumbaConfig
 
 arith_ops = (
@@ -166,20 +165,21 @@ def func(x):
 
 
 @pytest.mark.parametrize("op", ops)
-@parametrize_numeric_dtypes_pairwise
 @pytest.mark.parametrize(
     "masked",
     ((False, True), (True, False), (True, True)),
     ids=("um", "mu", "mm"),
 )
-def test_compile_arith_masked_ops(op, left_dtype, right_dtype, masked):
+def test_compile_arith_masked_ops(
+    op, numeric_types_as_str, numeric_types_as_str2, masked
+):
     def func(x, y):
         return op(x, y)
 
     cc = (7, 5)
 
-    ty1 = from_dtype(np.dtype(left_dtype))
-    ty2 = from_dtype(np.dtype(right_dtype))
+    ty1 = from_dtype(np.dtype(numeric_types_as_str))
+    ty2 = from_dtype(np.dtype(numeric_types_as_str2))
 
     if masked[0]:
         ty1 = MaskedType(ty1)
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
deleted file mode 100644
index 958a3657abb..00000000000
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ /dev/null
@@ -1,1069 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-import math
-import operator
-
-import numpy as np
-import pytest
-from numba import cuda
-from numba.core.typing import signature as nb_signature
-from numba.core.typing.templates import AbstractTemplate
-from numba.cuda.cudadecl import registry as cuda_decl_registry
-from numba.cuda.cudaimpl import lower as cuda_lower
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.core.missing import NA
-from cudf.core.udf._ops import (
-    arith_ops,
-    bitwise_ops,
-    comparison_ops,
-    unary_ops,
-)
-from cudf.core.udf.api import Masked
-from cudf.core.udf.strings_lowering import (
-    cast_string_view_to_managed_udf_string,
-)
-from cudf.core.udf.strings_typing import (
-    StringView,
-    managed_udf_string,
-    string_view,
-)
-from cudf.core.udf.utils import precompiled
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    _decimal_series,
-    parametrize_numeric_dtypes_pairwise,
-)
-
-
-def sv_to_managed_udf_str(sv):
-    """
-    Cast a string_view object to a managed_udf_string object
-
-    This placeholder function never runs in python
-    It exists only for numba to have something to replace
-    with the typing and lowering code below
-
-    This is similar conceptually to needing a translation
-    engine to emit an expression in target language "B" when
-    there is no equivalent in the source language "A" to
-    translate from. This function effectively defines the
-    expression in language "A" and the associated typing
-    and lowering describe the translation process, despite
-    the expression having no meaning in language "A"
-    """
-    pass
-
-
-@cuda_decl_registry.register_global(sv_to_managed_udf_str)
-class StringViewToUDFStringDecl(AbstractTemplate):
-    def generic(self, args, kws):
-        if isinstance(args[0], StringView) and len(args) == 1:
-            return nb_signature(managed_udf_string, string_view)
-
-
-@cuda_lower(sv_to_managed_udf_str, string_view)
-def sv_to_udf_str_testing_lowering(context, builder, sig, args):
-    return cast_string_view_to_managed_udf_string(
-        context, builder, sig.args[0], sig.return_type, args[0]
-    )
-
-
-@pytest.fixture(scope="module")
-def str_udf_data():
-    return cudf.DataFrame(
-        {
-            "str_col": [
-                "abc",
-                "ABC",
-                "AbC",
-                "123",
-                "123aBc",
-                "123@.!",
-                "",
-                "rapids ai",
-                "gpu",
-                "True",
-                "False",
-                "1.234",
-                ".123a",
-                "0.013",
-                "1.0",
-                "01",
-                "20010101",
-                "cudf",
-                "cuda",
-                "gpu",
-                "This Is A Title",
-                "This is Not a Title",
-                "Neither is This a Title",
-                "NoT a TiTlE",
-                "123 Title Works",
-            ]
-        }
-    )
-
-
-@pytest.fixture(params=["a", "cu", "2", "gpu", "", " "])
-def substr(request):
-    return request.param
-
-
-def run_masked_udf_test(func, data, args=(), nullable=True, **kwargs):
-    gdf = data
-    pdf = data.to_pandas(nullable=nullable)
-
-    expect = pdf.apply(func, args=args, axis=1)
-    obtain = gdf.apply(func, args=args, axis=1)
-    assert_eq(expect, obtain, **kwargs)
-
-
-def run_masked_string_udf_test(func, data, args=(), **kwargs):
-    gdf = data
-    pdf = data.to_pandas(nullable=True)
-
-    def row_wrapper(row):
-        st = row["str_col"]
-        return func(st)
-
-    expect = pdf.apply(row_wrapper, args=args, axis=1)
-
-    func = cuda.jit(device=True)(func)
-    obtain = gdf.apply(row_wrapper, args=args, axis=1)
-    assert_eq(expect, obtain, **kwargs)
-
-    # strings that come directly from input columns are backed by
-    # MaskedType(string_view) types. But new strings that are returned
-    # from functions or operators are backed by MaskedType(udf_string)
-    # types. We need to make sure all of our methods work on both kind
-    # of MaskedType. This function promotes the former to the latter
-    # prior to running the input function
-    def udf_string_wrapper(row):
-        masked_udf_str = Masked(
-            sv_to_managed_udf_str(row["str_col"].value), row["str_col"].valid
-        )
-        return func(masked_udf_str)
-
-    obtain = gdf.apply(udf_string_wrapper, args=args, axis=1)
-    assert_eq(expect, obtain, **kwargs)
-
-
-def run_masked_udf_series(func, data, args=(), **kwargs):
-    gsr = data
-    psr = data.to_pandas(nullable=True)
-
-    expect = psr.apply(func, args=args)
-    obtain = gsr.apply(func, args=args)
-    assert_eq(expect, obtain, **kwargs)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-def test_arith_masked_vs_masked(op):
-    # This test should test all the typing
-    # and lowering for arithmetic ops between
-    # two columns
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        return op(x, y)
-
-    gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", bitwise_ops)
-def test_bitwise_masked_vs_masked(op):
-    # This test should test all the typing
-    # and lowering for bitwise ops between
-    # two columns
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        return op(x, y)
-
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 0, 1, 0, 0b1011, 42, None],
-            "b": [1, 1, 0, 0, 0b1100, -42, 5],
-        }
-    )
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "dtype_l",
-    ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
-)
-@pytest.mark.parametrize(
-    "dtype_r",
-    [
-        "timedelta64[ns]",
-        "timedelta64[us]",
-        "timedelta64[ms]",
-        "timedelta64[s]",
-        "datetime64[ns]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[s]",
-    ],
-)
-@pytest.mark.parametrize("op", [operator.add, operator.sub])
-def test_arith_masked_vs_masked_datelike(op, dtype_l, dtype_r):
-    # Datetime version of the above
-    # does not test all dtype combinations for now
-    if "datetime" in dtype_l and "datetime" in dtype_r and op is operator.add:
-        # don't try adding datetimes to datetimes.
-        pytest.skip("Adding datetime to datetime is not valid")
-
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        return op(x, y)
-
-    gdf = cudf.DataFrame(
-        {
-            "a": ["2011-01-01", cudf.NA, "2011-03-01", cudf.NA],
-            "b": [4, 5, cudf.NA, cudf.NA],
-        }
-    )
-    gdf["a"] = gdf["a"].astype(dtype_l)
-    gdf["b"] = gdf["b"].astype(dtype_r)
-
-    pdf = gdf.to_pandas()
-    expect = op(pdf["a"], pdf["b"])
-    obtain = gdf.apply(func, axis=1)
-    assert_eq(expect, obtain, check_dtype=False)
-    # TODO: After the following pandas issue is
-    # fixed, uncomment the following line and delete
-    # through `to_pandas()` statement.
-    # https://github.com/pandas-dev/pandas/issues/52411
-
-    # run_masked_udf_test(func, gdf, nullable=False, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", comparison_ops)
-def test_compare_masked_vs_masked(op):
-    # this test should test all the
-    # typing and lowering for comparisons
-    # between columns
-
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        return op(x, y)
-
-    # we should get:
-    # [?, ?, <NA>, <NA>, <NA>]
-    gdf = cudf.DataFrame(
-        {"a": [1, 0, None, 1, None], "b": [0, 1, 0, None, None]}
-    )
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-@pytest.mark.parametrize("constant", [1, 1.5, True, False])
-@pytest.mark.parametrize("data", [[1, 2, cudf.NA]])
-def test_arith_masked_vs_constant(op, constant, data):
-    def func(row):
-        x = row["data"]
-        return op(x, constant)
-
-    gdf = cudf.DataFrame({"data": data})
-
-    if constant is False and op in {
-        operator.mod,
-        operator.pow,
-        operator.truediv,
-        operator.floordiv,
-        operator.imod,
-        operator.ipow,
-        operator.itruediv,
-        operator.ifloordiv,
-    }:
-        # The following tests cases yield undefined behavior:
-        # - truediv(x, False) because its dividing by zero
-        # - floordiv(x, False) because its dividing by zero
-        # - mod(x, False) because its mod by zero,
-        # - pow(x, False) because we have an NA in the series and pandas
-        #   insists that (NA**0 == 1) where we do not
-        pytest.skip()
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-@pytest.mark.parametrize("constant", [1, 1.5, True, False])
-@pytest.mark.parametrize("data", [[2, 3, cudf.NA], [1, cudf.NA, 1]])
-def test_arith_masked_vs_constant_reflected(request, op, constant, data):
-    def func(row):
-        x = row["data"]
-        return op(constant, x)
-
-    # Just a single column -> result will be all NA
-    gdf = cudf.DataFrame({"data": data})
-
-    # cudf differs from pandas for 1**NA
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(constant == 1 and op in {operator.pow, operator.ipow}),
-            reason="https://github.com/rapidsai/cudf/issues/7478",
-        )
-    )
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-@pytest.mark.parametrize("data", [[1, cudf.NA, 3], [2, 3, cudf.NA]])
-def test_arith_masked_vs_null(request, op, data):
-    def func(row):
-        x = row["data"]
-        return op(x, NA)
-
-    gdf = cudf.DataFrame({"data": data})
-
-    # In pandas, 1**NA == 1.
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                (gdf["data"] == 1).any()
-                and op in {operator.pow, operator.ipow}
-            ),
-            reason="https://github.com/rapidsai/cudf/issues/7478",
-        )
-    )
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-def test_arith_masked_vs_null_reflected(op):
-    def func(row):
-        x = row["data"]
-        return op(NA, x)
-
-    gdf = cudf.DataFrame({"data": [1, None, 3]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", unary_ops)
-def test_unary_masked(op):
-    # This test should test all the typing
-    # and lowering for unary ops
-
-    def func(row):
-        x = row["a"]
-        return op(x) if x is not NA else NA
-
-    if "log" in op.__name__:
-        gdf = cudf.DataFrame({"a": [0.1, 1.0, None, 3.5, 1e8]})
-    elif op.__name__ in {"asin", "acos"}:
-        gdf = cudf.DataFrame({"a": [0.0, 0.5, None, 1.0]})
-    elif op.__name__ in {"atanh"}:
-        gdf = cudf.DataFrame({"a": [0.0, -0.5, None, 0.8]})
-    elif op.__name__ in {"acosh", "sqrt", "lgamma"}:
-        gdf = cudf.DataFrame({"a": [1.0, 2.0, None, 11.0]})
-    elif op.__name__ in {"gamma"}:
-        gdf = cudf.DataFrame({"a": [0.1, 2, None, 4]})
-    elif op.__name__ in {"invert"}:
-        gdf = cudf.DataFrame({"a": [-100, 128, None, 0]}, dtype="int64")
-    else:
-        gdf = cudf.DataFrame({"a": [-125.60, 395.2, 0.0, None]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-def test_masked_is_null_conditional():
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        if x is NA:
-            return y
-        else:
-            return x + y
-
-    gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-def test_apply_contains():
-    def func(row):
-        x = row["a"]
-        return x in [1, 2]
-
-    gdf = cudf.DataFrame({"a": [1, 3]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@parametrize_numeric_dtypes_pairwise
-@pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
-def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
-    """
-    Test that operations can be performed between columns
-    of different dtypes and return a column with the correct
-    values and nulls
-    """
-
-    # First perform the op on two dummy data on host, if numpy can
-    # safely type cast, we should expect it to work in udf too.
-    try:
-        op(np.dtype(left_dtype).type(0), np.dtype(right_dtype).type(42))
-    except TypeError:
-        pytest.skip("Operation is unsupported for corresponding dtype.")
-
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        return op(x, y)
-
-    gdf = cudf.DataFrame({"a": [1.5, None, 3, None], "b": [4, 5, None, None]})
-    gdf["a"] = gdf["a"].astype(left_dtype)
-    gdf["b"] = gdf["b"].astype(right_dtype)
-
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize("val", [5, 5.5])
-def test_apply_return_literal(val):
-    """
-    Test unification codepath for scalars and MaskedType
-    makes sure that numba knows how to cast a scalar value
-    to a MaskedType
-    """
-
-    def func(row):
-        x = row["a"]
-        y = row["b"]
-        if x is not NA and x < 2:
-            return val
-        else:
-            return x + y
-
-    gdf = cudf.DataFrame({"a": [1, None, 3, None], "b": [4, 5, None, None]})
-
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-def test_apply_return_null():
-    """
-    Tests casting / unification of Masked and NA
-    """
-
-    def func(row):
-        x = row["a"]
-        if x is NA:
-            return NA
-        else:
-            return x
-
-    gdf = cudf.DataFrame({"a": [1, None, 3]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-def test_apply_return_either_null_or_literal():
-    def func(row):
-        x = row["a"]
-        if x > 5:
-            return 2
-        else:
-            return NA
-
-    gdf = cudf.DataFrame({"a": [1, 3, 6]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-def test_apply_return_literal_only():
-    def func(x):
-        return 5
-
-    gdf = cudf.DataFrame({"a": [1, None, 3]})
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-def test_apply_everything():
-    def func(row):
-        w = row["a"]
-        x = row["b"]
-        y = row["c"]
-        z = row["d"]
-        if x is NA:
-            return w + y - z
-        elif ((z > y) is not NA) and z > y:
-            return x
-        elif ((x + y) is not NA) and x + y == 0:
-            return z / x
-        elif x + y is NA:
-            return 2.5
-        elif w > 100:
-            return (
-                math.sin(x)
-                + math.sqrt(y)
-                - (-z)
-                + math.lgamma(x) * math.fabs(-0.8) / math.radians(3.14)
-            )
-        else:
-            return y > 2
-
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 3, 6, 0, None, 5, None, 101],
-            "b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0, 1.0],
-            "c": [2, 3, 6, 0, None, 5, None, 6],
-            "d": [4, None, 6, 0, None, 5, None, 7.5],
-        }
-    )
-    run_masked_udf_test(func, gdf, check_dtype=False)
-
-
-###
-
-
-@pytest.mark.parametrize(
-    "data,name",
-    [([1, 2, 3], None), ([1, cudf.NA, 3], None), ([1, 2, 3], "test_name")],
-)
-def test_series_apply_basic(data, name):
-    data = cudf.Series(data, name=name)
-
-    def func(x):
-        return x + 1
-
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.xfail(
-    PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/57390",
-)
-def test_series_apply_null_conditional():
-    def func(x):
-        if x is NA:
-            return 42
-        else:
-            return x - 1
-
-    data = cudf.Series([1, cudf.NA, 3])
-
-    run_masked_udf_series(func, data)
-
-
-###
-
-
-@pytest.mark.parametrize("op", arith_ops)
-def test_series_arith_masked_vs_masked(op):
-    def func(x):
-        return op(x, x)
-
-    data = cudf.Series([1, cudf.NA, 3])
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.xfail(
-    PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/57390",
-)
-@pytest.mark.parametrize("op", comparison_ops)
-def test_series_compare_masked_vs_masked(op):
-    """
-    In the series case, only one other MaskedType to compare with
-    - itself
-    """
-
-    def func(x):
-        return op(x, x)
-
-    data = cudf.Series([1, cudf.NA, 3])
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-@pytest.mark.parametrize("constant", [1, 1.5, cudf.NA])
-def test_series_arith_masked_vs_constant(request, op, constant):
-    def func(x):
-        return op(x, constant)
-
-    # Just a single column -> result will be all NA
-    data = cudf.Series([1, 2, cudf.NA])
-    # in pandas, 1**NA == 1. In cudf, 1**NA == NA.
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                constant is cudf.NA and op in {operator.pow, operator.ipow}
-            ),
-            reason="https://github.com/rapidsai/cudf/issues/7478",
-        )
-    )
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops)
-@pytest.mark.parametrize("constant", [1, 1.5, cudf.NA])
-def test_series_arith_masked_vs_constant_reflected(request, op, constant):
-    def func(x):
-        return op(constant, x)
-
-    # Just a single column -> result will be all NA
-    data = cudf.Series([1, 2, cudf.NA])
-    # Using in {1} since bool(NA == 1) raises a TypeError since NA is
-    # neither truthy nor falsy
-    # in pandas, 1**NA == 1. In cudf, 1**NA == NA.
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                constant in {1} and op in {operator.pow, operator.ipow}
-            ),
-            reason="https://github.com/rapidsai/cudf/issues/7478",
-        )
-    )
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.xfail(
-    PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="https://github.com/pandas-dev/pandas/issues/57390",
-)
-def test_series_masked_is_null_conditional():
-    def func(x):
-        if x is NA:
-            return 42
-        else:
-            return x
-
-    data = cudf.Series([1, cudf.NA, 3, cudf.NA])
-
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_masked_udf_lambda_support(op):
-    func = lambda row: op(row["a"], row["b"])  # noqa: E731
-
-    data = cudf.DataFrame(
-        {"a": [1, cudf.NA, 3, cudf.NA], "b": [1, 2, cudf.NA, cudf.NA]}
-    )
-
-    run_masked_udf_test(func, data, check_dtype=False)
-
-
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_masked_udf_nested_function_support(op):
-    """
-    Nested functions need to be explicitly jitted by the user
-    for numba to recognize them. Unfortunately the object
-    representing the jitted function can not itself be used in
-    pandas udfs.
-    """
-
-    def inner(x, y):
-        return op(x, y)
-
-    def outer(row):
-        x = row["a"]
-        y = row["b"]
-        return inner(x, y)
-
-    gdf = cudf.DataFrame(
-        {"a": [1, cudf.NA, 3, cudf.NA], "b": [1, 2, cudf.NA, cudf.NA]}
-    )
-
-    with pytest.raises(ValueError):
-        gdf.apply(outer, axis=1)
-
-    pdf = gdf.to_pandas(nullable=True)
-    inner_gpu = cuda.jit(device=True)(inner)
-
-    def outer_gpu(row):
-        x = row["a"]
-        y = row["b"]
-        return inner_gpu(x, y)
-
-    got = gdf.apply(outer_gpu, axis=1)
-    expect = pdf.apply(outer, axis=1)
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]},
-        {"a": [1, 2, 3], "c": [4, 5, 6], "b": [7, 8, 9]},
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": ["a", "b", "c"]},
-    ],
-)
-def test_masked_udf_subset_selection(data):
-    def func(row):
-        return row["a"] + row["b"]
-
-    data = cudf.DataFrame(data)
-    run_masked_udf_test(func, data)
-
-
-@pytest.mark.parametrize(
-    "unsupported_col",
-    [
-        _decimal_series(
-            ["1.0", "2.0", "3.0"], dtype=cudf.Decimal64Dtype(2, 1)
-        ),
-        cudf.Series([1, 2, 3], dtype="category"),
-        cudf.interval_range(start=0, end=3),
-        [[1, 2], [3, 4], [5, 6]],
-        [{"a": 1}, {"a": 2}, {"a": 3}],
-    ],
-)
-def test_masked_udf_unsupported_dtype(unsupported_col):
-    data = cudf.DataFrame()
-    data["unsupported_col"] = unsupported_col
-
-    def func(row):
-        return row["unsupported_col"]
-
-    # check that we fail when an unsupported type is used within a function
-    with pytest.raises(ValueError):
-        data.apply(func, axis=1)
-
-    # also check that a DF containing unsupported dtypes can still run a
-    # function that does NOT involve any of the unsupported dtype columns
-    data["supported_col"] = 1
-
-    def other_func(row):
-        return row["supported_col"]
-
-    expect = cudf.Series(np.ones(len(data)))
-    got = data.apply(other_func, axis=1)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-# tests for `DataFrame.apply(f, args=(x,y,z))`
-# testing the whole space of possibilities is intractable
-# these test the most rudimentary guaranteed functionality
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, cudf.NA, 3]},
-        {"a": [0.5, 2.0, cudf.NA, cudf.NA, 5.0]},
-        {"a": [True, False, cudf.NA]},
-    ],
-)
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_masked_udf_scalar_args_binops(data, op):
-    data = cudf.DataFrame(data)
-
-    def func(row, c):
-        return op(row["a"], c)
-
-    run_masked_udf_test(func, data, args=(1,), check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, cudf.NA, 3]},
-        {"a": [0.5, 2.0, cudf.NA, cudf.NA, 5.0]},
-        {"a": [True, False, cudf.NA]},
-    ],
-)
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_masked_udf_scalar_args_binops_multiple(data, op):
-    data = cudf.DataFrame(data)
-
-    def func(row, c, k):
-        x = op(row["a"], c)
-        y = op(x, k)
-        return y
-
-    run_masked_udf_test(func, data, args=(1, 2), check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, cudf.NA, 3],
-        [0.5, 2.0, cudf.NA, cudf.NA, 5.0],
-        [True, False, cudf.NA],
-    ],
-)
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_mask_udf_scalar_args_binops_series(data, op):
-    data = cudf.Series(data)
-
-    def func(x, c):
-        return x + c
-
-    run_masked_udf_series(func, data, args=(1,), check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, cudf.NA, 3],
-        [0.5, 2.0, cudf.NA, cudf.NA, 5.0],
-        [True, False, cudf.NA],
-    ],
-)
-@pytest.mark.parametrize("op", arith_ops + comparison_ops)
-def test_masked_udf_scalar_args_binops_multiple_series(request, data, op):
-    data = cudf.Series(data)
-    request.applymarker(
-        pytest.mark.xfail(
-            op in comparison_ops
-            and PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
-            and data.dtype.kind != "b",
-            reason="https://github.com/pandas-dev/pandas/issues/57390",
-        )
-    )
-
-    def func(data, c, k):
-        x = op(data, c)
-        y = op(x, k)
-        return y
-
-    run_masked_udf_series(func, data, args=(1, 2), check_dtype=False)
-
-
-def test_masked_udf_caching():
-    # Make sure similar functions that differ
-    # by simple things like constants actually
-    # recompile
-
-    data = cudf.Series([1, 2, 3])
-
-    expect = data**2
-    got = data.apply(lambda x: x**2)
-    assert_eq(expect, got, check_dtype=False)
-
-    # update the constant value being used and make sure
-    # it does not result in a cache hit
-
-    expect = data**3
-    got = data.apply(lambda x: x**3)
-    assert_eq(expect, got, check_dtype=False)
-
-    # make sure we get a hit when reapplying
-    def f(x):
-        return x + 1
-
-    precompiled.clear()
-    assert precompiled.currsize == 0
-    data.apply(f)
-
-    assert precompiled.currsize == 1
-    data.apply(f)
-
-    assert precompiled.currsize == 1
-
-    # validate that changing the type of a scalar arg
-    # results in a miss
-    precompiled.clear()
-
-    def f(x, c):
-        return x + c
-
-    data.apply(f, args=(1,))
-    assert precompiled.currsize == 1
-
-    data.apply(f, args=(1.5,))
-    assert precompiled.currsize == 2
-
-
-@pytest.mark.parametrize(
-    "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
-)
-@pytest.mark.parametrize("operator", [float, int, bool])
-def test_masked_udf_casting(operator, data):
-    data = cudf.Series(data)
-
-    def func(x):
-        return operator(x)
-
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        np.array(
-            [0, 1, -1, 0, np.iinfo("int64").min, np.iinfo("int64").max],
-            dtype="int64",
-        ),
-        np.array([0, 0, 1, np.iinfo("uint64").max], dtype="uint64"),
-        np.array(
-            [
-                0,
-                0.0,
-                -1.0,
-                1.5,
-                -1.5,
-                np.finfo("float64").min,
-                np.finfo("float64").max,
-                np.nan,
-                np.inf,
-                -np.inf,
-            ],
-            dtype="float64",
-        ),
-        [False, True, False, cudf.NA],
-    ],
-)
-def test_masked_udf_abs(data):
-    data = cudf.Series(data)
-    data[0] = cudf.NA
-
-    def func(x):
-        return abs(x)
-
-    run_masked_udf_series(func, data, check_dtype=False)
-
-
-class TestStringUDFs:
-    def test_string_udf_len(self, str_udf_data):
-        def func(row):
-            return len(row["str_col"])
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_startswith(self, str_udf_data, substr):
-        def func(row):
-            return row["str_col"].startswith(substr)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_endswith(self, str_udf_data, substr):
-        def func(row):
-            return row["str_col"].endswith(substr)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_find(self, str_udf_data, substr):
-        def func(row):
-            return row["str_col"].find(substr)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_rfind(self, str_udf_data, substr):
-        def func(row):
-            return row["str_col"].rfind(substr)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_contains(self, str_udf_data, substr):
-        def func(row):
-            return substr in row["str_col"]
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.parametrize("other", ["cudf", "123", "", " "])
-    @pytest.mark.parametrize("cmpop", comparison_ops)
-    def test_string_udf_cmpops(self, str_udf_data, other, cmpop):
-        def func(row):
-            return cmpop(row["str_col"], other)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_isalnum(self, str_udf_data):
-        def func(row):
-            return row["str_col"].isalnum()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_isalpha(self, str_udf_data):
-        def func(row):
-            return row["str_col"].isalpha()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_isdigit(self, str_udf_data):
-        def func(row):
-            return row["str_col"].isdigit()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_isdecimal(self, str_udf_data):
-        def func(row):
-            return row["str_col"].isdecimal()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_isupper(self, str_udf_data):
-        def func(row):
-            return row["str_col"].isupper()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_islower(self, str_udf_data):
-        def func(row):
-            return row["str_col"].islower()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_isspace(self, str_udf_data):
-        def func(row):
-            return row["str_col"].isspace()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_istitle(self, str_udf_data):
-        def func(row):
-            return row["str_col"].istitle()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_count(self, str_udf_data, substr):
-        def func(row):
-            return row["str_col"].count(substr)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.xfail(reason="Identity function not supported.")
-    def test_string_udf_return_string(self, str_udf_data):
-        def func(row):
-            return row["str_col"]
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
-    def test_string_udf_strip(self, str_udf_data, strip_char):
-        def func(row):
-            return row["str_col"].strip(strip_char)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
-    def test_string_udf_lstrip(self, str_udf_data, strip_char):
-        def func(row):
-            return row["str_col"].lstrip(strip_char)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
-    def test_string_udf_rstrip(self, str_udf_data, strip_char):
-        def func(row):
-            return row["str_col"].rstrip(strip_char)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_upper(self, str_udf_data):
-        def func(row):
-            return row["str_col"].upper()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    def test_string_udf_lower(self, str_udf_data):
-        def func(row):
-            return row["str_col"].lower()
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.parametrize(
-        "concat_char", ["1", "a", "12", " ", "", ".", "@"]
-    )
-    def test_string_udf_concat(self, str_udf_data, concat_char):
-        def func(row):
-            return row["str_col"] + concat_char
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)
-
-    @pytest.mark.parametrize("to_replace", ["a", "1", "", "@"])
-    @pytest.mark.parametrize("replacement", ["a", "1", "", "@"])
-    def test_string_udf_replace(self, str_udf_data, to_replace, replacement):
-        def func(row):
-            return row["str_col"].replace(to_replace, replacement)
-
-        run_masked_udf_test(func, str_udf_data, check_dtype=False)

From 87f8b59fdc0d07d921c4f91637467c35aba769dc Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Wed, 20 Aug 2025 21:26:22 +0200
Subject: [PATCH 179/366] Fix cudf-polars dependency list docs (#19750)

The cudf-polars documentation only requests installing `ucxx`, but `distributed-ucxx` is required (which also brings `ucxx`), and that needs to be fixed.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19750
---
 docs/cudf/source/cudf_polars/streaming_execution.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/cudf/source/cudf_polars/streaming_execution.md b/docs/cudf/source/cudf_polars/streaming_execution.md
index 5e74af7038d..56a80a9b11a 100644
--- a/docs/cudf/source/cudf_polars/streaming_execution.md
+++ b/docs/cudf/source/cudf_polars/streaming_execution.md
@@ -77,9 +77,9 @@ Unlike the single GPU executor, this does require a number of
 additional dependencies. We currently require
 [Dask](https://www.dask.org/) and
 [Dask-CUDA](https://docs.rapids.ai/api/dask-cuda/nightly/) to be
-installed. In addition, we recommend that
-[ucxx](https://github.com/rapidsai/ucxx) and
-[rapidsmpf](https://github.com/rapidsai/rapidsmpf) are installed to
+installed. In addition, we recommend that Dask Distributed plugin of
+[UCXX](https://github.com/rapidsai/ucxx) and
+[RapidsMPF](https://github.com/rapidsai/rapidsmpf) are installed to
 take advantage of any high-performance networking.
 
 To quickly install all of these dependencies into a conda environment,
@@ -87,7 +87,7 @@ you can run:
 
 ```
 conda install -c rapidsai -c conda-forge \
-    cudf-polars rapidsmpf dask-cuda ucxx
+    cudf-polars rapidsmpf dask-cuda distributed-ucxx
 ```
 
 
From 528eea27f8c211bc3670eb67bd562d29410948bf Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 20 Aug 2025 12:27:15 -0700
Subject: [PATCH 180/366] Skip third-party tests when possible (#19747)

I noticed that we were running these in CI on PRs that didn't need to run them.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19747
---
 .github/workflows/pr.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 061a24e226b..8249fea30a0 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -343,6 +343,7 @@ jobs:
     needs: conda-python-build
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       build_type: pull-request
       branch: ${{ inputs.branch }}

From a46a97f1bca6a7bdf6ac1de927c5103911f4df19 Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Wed, 20 Aug 2025 14:46:54 -0700
Subject: [PATCH 181/366] Improve `M2`, `VARIANCE` and `STD` hash-based groupby
 aggregations (#19694)

Currently, the aggregations `M2`, `VARIANCE` and `STD` are implemented using a two-pass groupby approach:
 * In the first pass, `SUM` and `COUNT_VALID` groupby aggregations are computed.
 * In the second pass, the results of these aggregations are used to perform another groupby aggregation.

The second groupby operation can be avoided by changing the implementation of these aggregations to use another formula:
 * Instead of computing `M2` as groupby aggregation `M2 = SUM((x - avg)*(x-avg))` where `x` are values in the same key group, we can compute it using a linear transformation `M2 = SUM(x^2) - SUM(x)*SUM(x)/group_size`. This linear transformation is much faster than a groupby operation, which involves atomic operations across all values within each key group. In addition, such transformation is performed on the output column, which can be much smaller than the input column used for the second step groupby aggregation.
 * For `VARIANCE` and `STD`, they can also be computed from `M2` with additional linear transformations.

This PR reimplements the aggregations above to use the new formula, changing the second step from a groupby operation into a linear transformation.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Yunsong Wang (https://github.com/PointKernel)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19694
---
 cpp/CMakeLists.txt                            |   1 +
 cpp/benchmarks/CMakeLists.txt                 |   2 +-
 .../{group_m2.cpp => group_m2_var_std.cpp}    |  51 +++--
 cpp/src/groupby/common/m2_var_std.cu          | 196 ++++++++++++++++++
 cpp/src/groupby/common/m2_var_std.hpp         |  46 ++++
 .../groupby/hash/flatten_single_pass_aggs.cpp |   3 +
 .../hash/hash_compound_agg_finalizer.cu       |  99 ++++-----
 .../hash/hash_compound_agg_finalizer.hpp      |   5 +-
 cpp/src/groupby/hash/m2_var_functor.cuh       | 177 ----------------
 .../cudf/pandas/scripts/conftest-patch.py     |   1 -
 10 files changed, 319 insertions(+), 262 deletions(-)
 rename cpp/benchmarks/groupby/{group_m2.cpp => group_m2_var_std.cpp} (57%)
 create mode 100644 cpp/src/groupby/common/m2_var_std.cu
 create mode 100644 cpp/src/groupby/common/m2_var_std.hpp
 delete mode 100644 cpp/src/groupby/hash/m2_var_functor.cuh

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1598d7fb51b..f59b9d6bf7e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -424,6 +424,7 @@ add_library(
   src/filling/fill.cu
   src/filling/repeat.cu
   src/filling/sequence.cu
+  src/groupby/common/m2_var_std.cu
   src/groupby/groupby.cu
   src/groupby/hash/compute_aggregations.cu
   src/groupby/hash/compute_aggregations_null.cu
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 5fc041c4e58..c16f0789795 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -268,7 +268,7 @@ ConfigureNVBench(
   GROUPBY_NVBENCH
   groupby/group_complex_keys.cpp
   groupby/group_histogram.cpp
-  groupby/group_m2.cpp
+  groupby/group_m2_var_std.cpp
   groupby/group_max.cpp
   groupby/group_max_multithreaded.cpp
   groupby/group_nunique.cpp
diff --git a/cpp/benchmarks/groupby/group_m2.cpp b/cpp/benchmarks/groupby/group_m2_var_std.cpp
similarity index 57%
rename from cpp/benchmarks/groupby/group_m2.cpp
rename to cpp/benchmarks/groupby/group_m2_var_std.cpp
index be907e9e343..fbe6ed3cf78 100644
--- a/cpp/benchmarks/groupby/group_m2.cpp
+++ b/cpp/benchmarks/groupby/group_m2_var_std.cpp
@@ -21,11 +21,13 @@
 
 #include <nvbench/nvbench.cuh>
 
-template <typename Type>
-void groupby_m2_helper(nvbench::state& state,
-                       cudf::size_type num_rows,
-                       cudf::size_type value_key_ratio,
-                       double null_probability)
+namespace {
+
+template <typename Type, cudf::aggregation::Kind Agg>
+void run_benchmark(nvbench::state& state,
+                   cudf::size_type num_rows,
+                   cudf::size_type value_key_ratio,
+                   double null_probability)
 {
   auto const keys = [&] {
     data_profile const profile =
@@ -51,13 +53,22 @@ void groupby_m2_helper(nvbench::state& state,
   // Vector of 1 request
   std::vector<cudf::groupby::aggregation_request> requests(1);
   requests.back().values = values->view();
-  requests.back().aggregations.push_back(cudf::make_m2_aggregation<cudf::groupby_aggregation>());
+  if constexpr (Agg == cudf::aggregation::Kind::M2) {
+    requests.back().aggregations.push_back(cudf::make_m2_aggregation<cudf::groupby_aggregation>());
+  } else if constexpr (Agg == cudf::aggregation::Kind::VARIANCE) {
+    requests.back().aggregations.push_back(
+      cudf::make_variance_aggregation<cudf::groupby_aggregation>());
+  } else if constexpr (Agg == cudf::aggregation::Kind::STD) {
+    requests.back().aggregations.push_back(cudf::make_std_aggregation<cudf::groupby_aggregation>());
+  } else {
+    throw std::runtime_error("Unsupported aggregation kind.");
+  }
 
   auto const mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
-    auto gb_obj       = cudf::groupby::groupby(cudf::table_view({keys->view()}));
-    auto const result = gb_obj.aggregate(requests);
+    auto gb_obj                        = cudf::groupby::groupby(cudf::table_view({keys->view()}));
+    [[maybe_unused]] auto const result = gb_obj.aggregate(requests);
   });
 
   auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
@@ -66,18 +77,26 @@ void groupby_m2_helper(nvbench::state& state,
     mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
 }
 
-template <typename Type>
-void bench_groupby_m2(nvbench::state& state, nvbench::type_list<Type>)
+}  // namespace
+
+template <typename Type, cudf::aggregation::Kind Agg>
+void bench_groupby_m2_var_std(nvbench::state& state,
+                              nvbench::type_list<Type, nvbench::enum_type<Agg>>)
 {
   auto const value_key_ratio  = static_cast<cudf::size_type>(state.get_int64("value_key_ratio"));
   auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const null_probability = state.get_float64("null_probability");
 
-  groupby_m2_helper<Type>(state, num_rows, value_key_ratio, null_probability);
+  run_benchmark<Type, Agg>(state, num_rows, value_key_ratio, null_probability);
 }
 
-NVBENCH_BENCH_TYPES(bench_groupby_m2, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, double>))
-  .set_name("groupby_m2")
-  .add_int64_axis("value_key_ratio", {10, 30, 100})
-  .add_int64_axis("num_rows", {10'000, 1'000'000, 10'000'000})
-  .add_float64_axis("null_probability", {0, 0.1, 0.9});
+using Types    = nvbench::type_list<int32_t, double>;
+using AggKinds = nvbench::enum_type_list<cudf::aggregation::Kind::M2,
+                                         cudf::aggregation::Kind::VARIANCE,
+                                         cudf::aggregation::Kind::STD>;
+
+NVBENCH_BENCH_TYPES(bench_groupby_m2_var_std, NVBENCH_TYPE_AXES(Types, AggKinds))
+  .set_name("groupby_m2_var_std")
+  .add_int64_axis("value_key_ratio", {20, 100})
+  .add_int64_axis("num_rows", {100'000, 10'000'000, 100'000'000})
+  .add_float64_axis("null_probability", {0, 0.5});
diff --git a/cpp/src/groupby/common/m2_var_std.cu b/cpp/src/groupby/common/m2_var_std.cu
new file mode 100644
index 00000000000..5ca471c15bf
--- /dev/null
+++ b/cpp/src/groupby/common/m2_var_std.cu
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "m2_var_std.hpp"
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/valid_if.cuh>
+#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/tabulate.h>
+
+namespace cudf::groupby::detail {
+
+namespace {
+
+template <typename Source>
+__device__ constexpr bool is_m2_supported()
+{
+  return is_numeric<Source>() && !is_fixed_point<Source>();
+}
+
+struct m2_functor {
+  template <typename Source, typename... Args>
+  void operator()(Args...)  //
+    requires(!is_m2_supported<Source>())
+  {
+    CUDF_FAIL("Invalid source type for M2 aggregation.");
+  }
+
+  template <typename Target, typename SumSqrType, typename SumType, typename CountType>
+  void evaluate(Target* target,
+                SumSqrType const* sum_sqr,
+                SumType const* sum,
+                CountType const* count,
+                size_type size,
+                rmm::cuda_stream_view stream) const noexcept
+  {
+    thrust::tabulate(rmm::exec_policy_nosync(stream),
+                     target,
+                     target + size,
+                     [sum_sqr, sum, count] __device__(size_type const idx) {
+                       auto const group_count = count[idx];
+                       if (group_count == 0) { return Target{}; }
+                       auto const group_sum_sqr = static_cast<Target>(sum_sqr[idx]);
+                       auto const group_sum     = static_cast<Target>(sum[idx]);
+                       auto const result = group_sum_sqr - group_sum * group_sum / group_count;
+                       return result;
+                     });
+  }
+
+  template <typename Source>
+  void operator()(mutable_column_view const& target,
+                  column_view const& sum_sqr,
+                  column_view const& sum,
+                  column_view const& count,
+                  rmm::cuda_stream_view stream) const noexcept  //
+    requires(is_m2_supported<Source>())
+  {
+    using Target     = cudf::detail::target_type_t<Source, aggregation::M2>;
+    using SumSqrType = cudf::detail::target_type_t<Source, aggregation::SUM_OF_SQUARES>;
+    using SumType    = cudf::detail::target_type_t<Source, aggregation::SUM>;
+    using CountType  = cudf::detail::target_type_t<Source, aggregation::COUNT_VALID>;
+
+    // Separate the implementation into another function, which has fewer instantiations since
+    // the data types (target/sum/count etc) are mostly the same.
+    evaluate(target.begin<Target>(),
+             sum_sqr.begin<SumSqrType>(),
+             sum.begin<SumType>(),
+             count.begin<CountType>(),
+             target.size(),
+             stream);
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<column> compute_m2(data_type source_type,
+                                   column_view const& sum_sqr,
+                                   column_view const& sum,
+                                   column_view const& count,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::device_async_resource_ref mr)
+{
+  auto output = make_numeric_column(cudf::detail::target_type(source_type, aggregation::M2),
+                                    sum.size(),
+                                    cudf::detail::copy_bitmask(sum, stream, mr),
+                                    sum.null_count(),
+                                    stream,
+                                    mr);
+  type_dispatcher(source_type, m2_functor{}, output->mutable_view(), sum_sqr, sum, count, stream);
+  return output;
+}
+
+namespace {
+
+// M2, VARIANCE, STD and COUNT_VALID aggregations always have fixed types, thus we hardcode them
+// instead of using type dispatcher for faster compilation.
+using M2Type       = double;
+using VarianceType = double;
+using StdType      = double;
+using CountType    = int32_t;
+
+void check_input_types(column_view const& m2, column_view const& count)
+{
+  CUDF_EXPECTS(m2.type().id() == type_to_id<M2Type>(),
+               "Data type of M2 aggregation must be FLOAT64.",
+               std::invalid_argument);
+  CUDF_EXPECTS(count.type().id() == type_to_id<CountType>(),
+               "Data type of COUNT_VALID aggregation must be INT32.",
+               std::invalid_argument);
+}
+
+template <typename TargetType, typename TransformFunc>
+std::unique_ptr<column> compute_variance_std(TransformFunc&& transform_fn,
+                                             size_type size,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+{
+  auto output = make_numeric_column(
+    data_type(type_to_id<TargetType>()), size, mask_state::UNALLOCATED, stream, mr);
+
+  // Since we may have new null rows depending on the group count, we need to generate a new null
+  // mask from scratch.
+  rmm::device_uvector<bool> validity(size, stream);
+
+  auto const out_it =
+    thrust::make_zip_iterator(output->mutable_view().begin<TargetType>(), validity.begin());
+  thrust::tabulate(rmm::exec_policy_nosync(stream), out_it, out_it + size, transform_fn);
+
+  auto [null_mask, null_count] =
+    cudf::detail::valid_if(validity.begin(), validity.end(), cuda::std::identity{}, stream, mr);
+  if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); }
+
+  return output;
+}
+
+}  // namespace
+
+std::unique_ptr<column> compute_variance(column_view const& m2,
+                                         column_view const& count,
+                                         size_type ddof,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::device_async_resource_ref mr)
+{
+  check_input_types(m2, count);
+
+  auto const transform_func =
+    [m2 = m2.begin<M2Type>(), count = count.begin<CountType>(), ddof] __device__(
+      size_type const idx) -> cuda::std::pair<VarianceType, bool> {
+    auto const group_count = count[idx];
+    auto const df          = group_count - ddof;
+    if (group_count == 0 || df <= 0) { return {VarianceType{}, false}; }
+    return {m2[idx] / df, true};
+  };
+  return compute_variance_std<VarianceType>(transform_func, m2.size(), stream, mr);
+}
+
+std::unique_ptr<column> compute_std(column_view const& m2,
+                                    column_view const& count,
+                                    size_type ddof,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::device_async_resource_ref mr)
+{
+  check_input_types(m2, count);
+
+  auto const transform_func =
+    [m2 = m2.begin<M2Type>(), count = count.begin<CountType>(), ddof] __device__(
+      size_type const idx) -> cuda::std::pair<StdType, bool> {
+    auto const group_count = count[idx];
+    auto const df          = group_count - ddof;
+    if (group_count == 0 || df <= 0) { return {StdType{}, false}; }
+    return {cuda::std::sqrt(m2[idx] / df), true};
+  };
+  return compute_variance_std<StdType>(transform_func, m2.size(), stream, mr);
+}
+
+}  // namespace cudf::groupby::detail
diff --git a/cpp/src/groupby/common/m2_var_std.hpp b/cpp/src/groupby/common/m2_var_std.hpp
new file mode 100644
index 00000000000..161bfbcb909
--- /dev/null
+++ b/cpp/src/groupby/common/m2_var_std.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/types.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/resource_ref.hpp>
+
+namespace cudf::groupby::detail {
+
+std::unique_ptr<column> compute_m2(data_type source_type,
+                                   column_view const& sum_sqr,
+                                   column_view const& sum,
+                                   column_view const& count,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::device_async_resource_ref mr);
+
+std::unique_ptr<column> compute_variance(column_view const& m2,
+                                         column_view const& count,
+                                         size_type ddof,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::device_async_resource_ref mr);
+
+std::unique_ptr<column> compute_std(column_view const& m2,
+                                    column_view const& count,
+                                    size_type ddof,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::device_async_resource_ref mr);
+
+}  // namespace cudf::groupby::detail
diff --git a/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp b/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
index 8e64560d246..6f2402c5149 100644
--- a/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
+++ b/cpp/src/groupby/hash/flatten_single_pass_aggs.cpp
@@ -72,6 +72,7 @@ class groupby_simple_aggregations_collector final
                                                   cudf::detail::m2_aggregation const&) override
   {
     std::vector<std::unique_ptr<aggregation>> aggs;
+    aggs.push_back(make_sum_of_squares_aggregation());
     aggs.push_back(make_sum_aggregation());
     // COUNT_VALID
     aggs.push_back(make_count_aggregation());
@@ -83,6 +84,7 @@ class groupby_simple_aggregations_collector final
                                                   cudf::detail::var_aggregation const&) override
   {
     std::vector<std::unique_ptr<aggregation>> aggs;
+    aggs.push_back(make_sum_of_squares_aggregation());
     aggs.push_back(make_sum_aggregation());
     // COUNT_VALID
     aggs.push_back(make_count_aggregation());
@@ -94,6 +96,7 @@ class groupby_simple_aggregations_collector final
                                                   cudf::detail::std_aggregation const&) override
   {
     std::vector<std::unique_ptr<aggregation>> aggs;
+    aggs.push_back(make_sum_of_squares_aggregation());
     aggs.push_back(make_sum_aggregation());
     // COUNT_VALID
     aggs.push_back(make_count_aggregation());
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
index 8b61254ce38..bd17780671b 100644
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
@@ -14,28 +14,25 @@
  * limitations under the License.
  */
 
+#include "groupby/common/m2_var_std.hpp"
 #include "hash_compound_agg_finalizer.hpp"
 #include "helpers.cuh"
-#include "m2_var_functor.cuh"
 
-#include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/aggregation/result_cache.hpp>
 #include <cudf/detail/binaryop.hpp>
 #include <cudf/detail/gather.hpp>
-#include <cudf/detail/unary.hpp>
+#include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <memory>
-
 namespace cudf::groupby::detail::hash {
 template <typename SetType>
 hash_compound_agg_finalizer<SetType>::hash_compound_agg_finalizer(
-  column_view col,
+  column_view const& col,
   cudf::detail::result_cache* sparse_results,
   cudf::detail::result_cache* dense_results,
   device_span<size_type const> gather_map,
@@ -52,7 +49,7 @@ hash_compound_agg_finalizer<SetType>::hash_compound_agg_finalizer(
     stream(stream),
     mr(mr)
 {
-  result_type =
+  input_type =
     cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type() : col.type();
 }
 
@@ -106,7 +103,7 @@ template <typename SetType>
 void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::min_aggregation const& agg)
 {
   if (dense_results->has_result(col, agg)) return;
-  if (result_type.id() == type_id::STRING) {
+  if (input_type.id() == type_id::STRING) {
     auto transformed_agg = make_argmin_aggregation();
     dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
   } else {
@@ -119,7 +116,7 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::max_aggregation c
 {
   if (dense_results->has_result(col, agg)) return;
 
-  if (result_type.id() == type_id::STRING) {
+  if (input_type.id() == type_id::STRING) {
     auto transformed_agg = make_argmax_aggregation();
     dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
   } else {
@@ -143,7 +140,7 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::mean_aggregation
     cudf::detail::binary_operation(sum_result,
                                    count_result,
                                    binary_operator::DIV,
-                                   cudf::detail::target_type(result_type, aggregation::MEAN),
+                                   cudf::detail::target_type(input_type, aggregation::MEAN),
                                    stream,
                                    mr);
   dense_results->add_result(col, agg, std::move(result));
@@ -154,31 +151,18 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::m2_aggregation co
 {
   if (dense_results->has_result(col, agg)) { return; }
 
-  auto sum_agg   = make_sum_aggregation();
-  auto count_agg = make_count_aggregation();
+  auto const sum_sqr_agg = make_sum_of_squares_aggregation();
+  auto const sum_agg     = make_sum_aggregation();
+  auto const count_agg   = make_count_aggregation();
+  this->visit(*sum_sqr_agg);
   this->visit(*sum_agg);
   this->visit(*count_agg);
-  auto const sum_result   = sparse_results->get_result(col, *sum_agg);
-  auto const count_result = sparse_results->get_result(col, *count_agg);
-
-  auto const d_values_ptr = column_device_view::create(col, stream);
-  auto const d_sum_ptr    = column_device_view::create(sum_result, stream).release();
-  auto const d_count_ptr  = column_device_view::create(count_result, stream).release();
-
-  auto output = make_fixed_width_column(
-    cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
-  auto output_view  = mutable_column_device_view::create(output->mutable_view(), stream);
-  auto output_tview = mutable_table_view{{output->mutable_view()}};
-  cudf::detail::initialize_with_identity(
-    output_tview, host_span<cudf::aggregation::Kind const>(&agg.kind, 1), stream);
-
-  thrust::for_each_n(
-    rmm::exec_policy_nosync(stream),
-    thrust::make_counting_iterator(0),
-    col.size(),
-    m2_hash_functor{set, row_bitmask, *output_view, *d_values_ptr, *d_sum_ptr, *d_count_ptr});
-  sparse_results->add_result(col, agg, std::move(output));
-  dense_results->add_result(col, agg, to_dense_agg_result(agg));
+  auto const sum_sqr_result = dense_results->get_result(col, *sum_sqr_agg);
+  auto const sum_result     = dense_results->get_result(col, *sum_agg);
+  auto const count_result   = dense_results->get_result(col, *count_agg);
+
+  auto output = compute_m2(input_type, sum_sqr_result, sum_result, count_result, stream, mr);
+  dense_results->add_result(col, agg, std::move(output));
 }
 
 template <typename SetType>
@@ -186,44 +170,31 @@ void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::var_aggregation c
 {
   if (dense_results->has_result(col, agg)) return;
 
-  auto sum_agg   = make_sum_aggregation();
-  auto count_agg = make_count_aggregation();
-  this->visit(*sum_agg);
+  auto const m2_agg    = make_m2_aggregation();
+  auto const count_agg = make_count_aggregation();
+  this->visit(*dynamic_cast<cudf::detail::m2_aggregation*>(m2_agg.get()));
   this->visit(*count_agg);
-  column_view sum_result   = sparse_results->get_result(col, *sum_agg);
-  column_view count_result = sparse_results->get_result(col, *count_agg);
-
-  auto values_view = column_device_view::create(col, stream);
-  auto sum_view    = column_device_view::create(sum_result, stream);
-  auto count_view  = column_device_view::create(count_result, stream);
-
-  auto var_result = make_fixed_width_column(
-    cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
-  auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream);
-  mutable_table_view var_table_view{{var_result->mutable_view()}};
-  cudf::detail::initialize_with_identity(
-    var_table_view, host_span<cudf::aggregation::Kind const>(&agg.kind, 1), stream);
-
-  thrust::for_each_n(
-    rmm::exec_policy_nosync(stream),
-    thrust::make_counting_iterator(0),
-    col.size(),
-    var_hash_functor{
-      set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
-  sparse_results->add_result(col, agg, std::move(var_result));
-  dense_results->add_result(col, agg, to_dense_agg_result(agg));
+  auto const m2_result    = dense_results->get_result(col, *m2_agg);
+  auto const count_result = dense_results->get_result(col, *count_agg);
+
+  auto output = compute_variance(m2_result, count_result, agg._ddof, stream, mr);
+  dense_results->add_result(col, agg, std::move(output));
 }
 
 template <typename SetType>
 void hash_compound_agg_finalizer<SetType>::visit(cudf::detail::std_aggregation const& agg)
 {
-  if (dense_results->has_result(col, agg)) return;
-  auto var_agg = make_variance_aggregation(agg._ddof);
-  this->visit(*dynamic_cast<cudf::detail::var_aggregation*>(var_agg.get()));
-  column_view variance = dense_results->get_result(col, *var_agg);
+  if (dense_results->has_result(col, agg)) { return; }
 
-  auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr);
-  dense_results->add_result(col, agg, std::move(result));
+  auto const m2_agg    = make_m2_aggregation();
+  auto const count_agg = make_count_aggregation();
+  this->visit(*dynamic_cast<cudf::detail::m2_aggregation*>(m2_agg.get()));
+  this->visit(*count_agg);
+  auto const m2_result    = dense_results->get_result(col, *m2_agg);
+  auto const count_result = dense_results->get_result(col, *count_agg);
+
+  auto output = compute_std(m2_result, count_result, agg._ddof, stream, mr);
+  dense_results->add_result(col, agg, std::move(output));
 }
 
 template class hash_compound_agg_finalizer<hash_set_ref_t<cuco::find_tag>>;
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
index 63e08a19177..a78a0729a06 100644
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
@@ -21,13 +21,12 @@
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 
 namespace cudf::groupby::detail::hash {
 template <typename SetType>
 class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer {
   column_view col;
-  data_type result_type;
+  data_type input_type;
   cudf::detail::result_cache* sparse_results;
   cudf::detail::result_cache* dense_results;
   device_span<size_type const> gather_map;
@@ -39,7 +38,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
  public:
   using cudf::detail::aggregation_finalizer::visit;
 
-  hash_compound_agg_finalizer(column_view col,
+  hash_compound_agg_finalizer(column_view const& col,
                               cudf::detail::result_cache* sparse_results,
                               cudf::detail::result_cache* dense_results,
                               device_span<size_type const> gather_map,
diff --git a/cpp/src/groupby/hash/m2_var_functor.cuh b/cpp/src/groupby/hash/m2_var_functor.cuh
deleted file mode 100644
index c8f72fc4fbc..00000000000
--- a/cpp/src/groupby/hash/m2_var_functor.cuh
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2020-2025, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/aggregation.hpp>
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/detail/aggregation/aggregation.hpp>
-#include <cudf/detail/utilities/assert.cuh>
-#include <cudf/dictionary/dictionary_column_view.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <cuda/atomic>
-
-namespace cudf::groupby::detail::hash {
-
-template <typename Source>
-__device__ constexpr static bool is_m2_var_supported()
-{
-  return is_numeric<Source>() && !is_fixed_point<Source>();
-}
-
-template <typename SetType>
-struct m2_hash_functor {
-  SetType set;
-  bitmask_type const* __restrict__ row_bitmask;
-  mutable_column_device_view target;
-  column_device_view source;
-  column_device_view sum;
-  column_device_view count;
-  m2_hash_functor(SetType set,
-                  bitmask_type const* row_bitmask,
-                  mutable_column_device_view target,
-                  column_device_view source,
-                  column_device_view sum,
-                  column_device_view count)
-    : set{set}, row_bitmask{row_bitmask}, target{target}, source{source}, sum{sum}, count{count}
-  {
-  }
-
-  template <typename Source>
-  __device__ void operator()(column_device_view const&, size_type, size_type) noexcept
-    requires(!is_m2_var_supported<Source>())
-  {
-    CUDF_UNREACHABLE("Invalid source type for M2 aggregation.");
-  }
-
-  template <typename Source>
-  __device__ void operator()(column_device_view const& source,
-                             size_type source_index,
-                             size_type target_index) noexcept
-    requires(is_m2_var_supported<Source>())
-  {
-    using Target    = cudf::detail::target_type_t<Source, aggregation::M2>;
-    using SumType   = cudf::detail::target_type_t<Source, aggregation::SUM>;
-    using CountType = cudf::detail::target_type_t<Source, aggregation::COUNT_VALID>;
-
-    if (source.is_null(source_index)) { return; }
-    auto const group_size = count.element<CountType>(target_index);
-    if (group_size == 0) { return; }
-
-    auto const x      = static_cast<Target>(source.element<Source>(source_index));
-    auto const mean   = static_cast<Target>(sum.element<SumType>(target_index)) / group_size;
-    auto const diff   = x - mean;
-    auto const result = diff * diff;
-    cuda::atomic_ref<Target, cuda::thread_scope_device> ref{target.element<Target>(target_index)};
-    ref.fetch_add(result, cuda::std::memory_order_relaxed);
-    if (target.is_null(target_index)) { target.set_valid(target_index); }
-  }
-
-  __device__ inline void operator()(size_type source_index)
-  {
-    if (row_bitmask == nullptr or bit_is_set(row_bitmask, source_index)) {
-      auto const target_index = *set.find(source_index);
-
-      auto col         = source;
-      auto source_type = source.type();
-      if (source_type.id() == type_id::DICTIONARY32) {
-        col          = source.child(cudf::dictionary_column_view::keys_column_index);
-        source_type  = col.type();
-        source_index = static_cast<size_type>(source.element<dictionary32>(source_index));
-      }
-
-      type_dispatcher(source_type, *this, col, source_index, target_index);
-    }
-  }
-};
-
-template <typename SetType>
-struct var_hash_functor {
-  SetType set;
-  bitmask_type const* __restrict__ row_bitmask;
-  mutable_column_device_view target;
-  column_device_view source;
-  column_device_view sum;
-  column_device_view count;
-  size_type ddof;
-  var_hash_functor(SetType set,
-                   bitmask_type const* row_bitmask,
-                   mutable_column_device_view target,
-                   column_device_view source,
-                   column_device_view sum,
-                   column_device_view count,
-                   size_type ddof)
-    : set{set},
-      row_bitmask{row_bitmask},
-      target{target},
-      source{source},
-      sum{sum},
-      count{count},
-      ddof{ddof}
-  {
-  }
-
-  template <typename Source>
-  __device__ void operator()(column_device_view const&, size_type, size_type) noexcept
-    requires(!is_m2_var_supported<Source>())
-  {
-    CUDF_UNREACHABLE("Invalid source type for std, var aggregation combination.");
-  }
-
-  template <typename Source>
-  __device__ void operator()(column_device_view const& source,
-                             size_type source_index,
-                             size_type target_index) noexcept
-    requires(is_m2_var_supported<Source>())
-  {
-    using Target    = cudf::detail::target_type_t<Source, aggregation::VARIANCE>;
-    using SumType   = cudf::detail::target_type_t<Source, aggregation::SUM>;
-    using CountType = cudf::detail::target_type_t<Source, aggregation::COUNT_VALID>;
-
-    if (source.is_null(source_index)) return;
-    CountType group_size = count.element<CountType>(target_index);
-    if (group_size == 0 or group_size - ddof <= 0) return;
-
-    auto x        = static_cast<Target>(source.element<Source>(source_index));
-    auto mean     = static_cast<Target>(sum.element<SumType>(target_index)) / group_size;
-    Target result = (x - mean) * (x - mean) / (group_size - ddof);
-    cuda::atomic_ref<Target, cuda::thread_scope_device> ref{target.element<Target>(target_index)};
-    ref.fetch_add(result, cuda::std::memory_order_relaxed);
-    // STD sqrt is applied in finalize()
-
-    if (target.is_null(target_index)) { target.set_valid(target_index); }
-  }
-
-  __device__ inline void operator()(size_type source_index)
-  {
-    if (row_bitmask == nullptr or cudf::bit_is_set(row_bitmask, source_index)) {
-      auto const target_index = *set.find(source_index);
-
-      auto col         = source;
-      auto source_type = source.type();
-      if (source_type.id() == type_id::DICTIONARY32) {
-        col          = source.child(cudf::dictionary_column_view::keys_column_index);
-        source_type  = col.type();
-        source_index = static_cast<size_type>(source.element<dictionary32>(source_index));
-      }
-
-      type_dispatcher(source_type, *this, col, source_index, target_index);
-    }
-  }
-};
-
-}  // namespace cudf::groupby::detail::hash
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index b0419f844fd..cf12fae4337 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -7586,7 +7586,6 @@ def pytest_unconfigure(config):
     "tests/groupby/test_raises.py::test_groupby_raises_category[by8-True-fillna-method]",
     "tests/groupby/test_raises.py::test_groupby_raises_category[by9-False-fillna-method]",
     "tests/groupby/test_raises.py::test_groupby_raises_category[by9-True-fillna-method]",
-    "tests/groupby/test_raises.py::test_groupby_raises_timedelta[var]",
     "tests/groupby/test_reductions.py::test_empty_categorical[True]",
     "tests/groupby/test_reductions.py::test_first_last_skipna[Float32-False-False-first]",
     "tests/groupby/test_reductions.py::test_first_last_skipna[Float32-False-False-last]",

From ada4351653e0d2acfb0e50f95b05ad9dfb9f926a Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 20 Aug 2025 21:29:17 -0400
Subject: [PATCH 182/366] Support `over` expression (window mapping) in
 cudf-polars (#19684)

- Closes #16227
- Contributes to https://github.com/rapidsai/cudf/issues/18633
Note this PR implements grouped window aggregations (not really rolling). I think we can keep the name `GroupedRollingWindow` though because polars can support expressions like `rolling(...).over(..)` in the future.
- Also contributes to #19200 and helps unblock PDS-DS queries 12, 20, 47, 51, 57, 89, 98

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19684
---
 .../cudf_polars/dsl/expressions/rolling.py    | 172 +++++++++++++++++-
 .../cudf_polars/cudf_polars/dsl/translate.py  |  32 +++-
 .../cudf_polars/dsl/utils/aggregations.py     |  16 +-
 .../cudf_polars/experimental/parallel.py      |  25 ++-
 .../cudf_polars/experimental/utils.py         |   8 +-
 .../cudf_polars/cudf_polars/testing/plugin.py |   1 -
 .../tests/experimental/test_rolling.py        |  21 +++
 .../tests/expressions/test_rolling.py         |  77 +++++++-
 .../tests/test_window_functions.py            |   4 +-
 9 files changed, 325 insertions(+), 31 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
index a9e0eb0c0d8..f89d0e4133f 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
@@ -8,15 +8,19 @@
 
 from typing import TYPE_CHECKING, Any
 
+import polars as pl
+
 import pylibcudf as plc
 
-from cudf_polars.containers import Column
+from cudf_polars.containers import Column, DataFrame, DataType
 from cudf_polars.dsl import expr
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
+from cudf_polars.dsl.utils.reshape import broadcast
 from cudf_polars.dsl.utils.windows import offsets_to_windows, range_window_bounds
 
 if TYPE_CHECKING:
-    from cudf_polars.containers import DataFrame, DataType
+    from collections.abc import Sequence
+
     from cudf_polars.typing import ClosedInterval, Duration
 
 __all__ = ["GroupedRollingWindow", "RollingWindow", "to_request"]
@@ -152,12 +156,166 @@ def do_evaluate(  # noqa: D102
 
 
 class GroupedRollingWindow(Expr):
-    __slots__ = ("options",)
-    _non_child = ("dtype", "options")
+    """
+    Compute a window ``.over(...)`` aggregation and broadcast to rows.
+
+    Notes
+    -----
+    - This expression node currently implements **grouped window mapping**
+      (aggregate once per group, then broadcast back), not rolling windows.
+    - It can be extended later to support `rolling(...).over(...)`
+      when polars supports that expression.
+    """
+
+    __slots__ = ("by_count", "named_aggs", "options", "post")
+    _non_child = ("dtype", "options", "named_aggs", "post", "by_count")
 
-    def __init__(self, dtype: DataType, options: Any, agg: Expr, *by: Expr) -> None:
+    def __init__(
+        self,
+        dtype: DataType,
+        options: Any,
+        named_aggs: Sequence[expr.NamedExpr],
+        post: expr.NamedExpr,
+        *by: Expr,
+    ) -> None:
         self.dtype = dtype
         self.options = options
-        self.children = (agg, *by)
+        self.named_aggs = tuple(named_aggs)
+        self.post = post
         self.is_pointwise = False
-        raise NotImplementedError("Grouped rolling window not implemented")
+
+        unsupported = [
+            type(named_expr.value).__name__
+            for named_expr in self.named_aggs
+            if not isinstance(named_expr.value, (expr.Len, expr.Agg))
+        ]
+        if unsupported:
+            kinds = ", ".join(sorted(set(unsupported)))
+            raise NotImplementedError(
+                f"Unsupported over(...) only expression: {kinds}="
+            )
+
+        # Ensures every partition-by is an Expr
+        # Fixes over(1) cases with the streaming
+        # executor and a small blocksize
+        by_expr = [
+            (b if isinstance(b, Expr) else expr.Literal(DataType(pl.Int64()), b))
+            for b in by
+        ]
+
+        # Expose agg dependencies as children so the streaming
+        # executor retains required source columns
+        child_deps = [
+            v.children[0]
+            for ne in self.named_aggs
+            for v in (ne.value,)
+            if isinstance(v, expr.Agg)
+        ]
+        self.by_count = len(by_expr)
+        self.children = tuple(by_expr) + tuple(child_deps)
+
+    def do_evaluate(  # noqa: D102
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
+    ) -> Column:
+        if context != ExecutionContext.FRAME:
+            raise RuntimeError(
+                "Window mapping (.over) can only be evaluated at the frame level"
+            )  # pragma: no cover; translation raises first
+
+        by_exprs = self.children[: self.by_count]
+        by_cols = list(
+            broadcast(
+                *(b.evaluate(df, context=ExecutionContext.FRAME) for b in by_exprs),
+                target_length=df.num_rows,
+            )
+        )
+        by_tbl = plc.Table([c.obj for c in by_cols])
+
+        sorted_flag = (
+            plc.types.Sorted.YES
+            if all(k.is_sorted for k in by_cols)
+            else plc.types.Sorted.NO
+        )
+        grouper = plc.groupby.GroupBy(
+            by_tbl,
+            null_handling=plc.types.NullPolicy.INCLUDE,
+            keys_are_sorted=sorted_flag,
+            column_order=[k.order for k in by_cols],
+            null_precedence=[k.null_order for k in by_cols],
+        )
+
+        gb_requests: list[plc.groupby.GroupByRequest] = []
+        out_names: list[str] = []
+        out_dtypes: list[DataType] = []
+        for ne in self.named_aggs:
+            val = ne.value
+            out_names.append(ne.name)
+            out_dtypes.append(val.dtype)
+
+            if isinstance(val, expr.Len):
+                # Count rows per group via sum(1).
+                ones = plc.Column.from_scalar(
+                    plc.Scalar.from_py(1, plc.DataType(plc.TypeId.INT8)), df.num_rows
+                )
+                gb_requests.append(
+                    plc.groupby.GroupByRequest(ones, [plc.aggregation.sum()])
+                )
+            elif isinstance(val, expr.Agg):
+                (child,) = (
+                    val.children if val.name != "quantile" else (val.children[0],)
+                )
+                col = child.evaluate(df, context=ExecutionContext.FRAME).obj
+                gb_requests.append(plc.groupby.GroupByRequest(col, [val.agg_request]))
+
+        group_keys_tbl, value_tables = grouper.aggregate(gb_requests)
+        out_cols = (t.columns()[0] for t in value_tables)
+
+        # Build gather maps to broadcast per-group results to all rows.
+        # Also left-join input keys to group-keys so every input row appears exactly once.
+        lg, rg = plc.join.left_join(
+            by_tbl, group_keys_tbl, plc.types.NullEquality.EQUAL
+        )
+
+        # Reorder the gather maps to preserve left/input order
+        left_rows, right_rows = by_tbl.num_rows(), group_keys_tbl.num_rows()
+        init = plc.Scalar.from_py(0, plc.types.SIZE_TYPE)
+        step = plc.Scalar.from_py(1, plc.types.SIZE_TYPE)
+        left_order = plc.copying.gather(
+            plc.Table([plc.filling.sequence(left_rows, init, step)]),
+            lg,
+            plc.copying.OutOfBoundsPolicy.DONT_CHECK,
+        )
+        right_order = plc.copying.gather(
+            plc.Table([plc.filling.sequence(right_rows, init, step)]),
+            rg,
+            plc.copying.OutOfBoundsPolicy.NULLIFY,
+        )
+        # Sort both maps by (left_order, right_order), then use the reordered right map
+        # to gather group aggregates in the original row order.
+        _, rg = plc.sorting.stable_sort_by_key(
+            plc.Table([lg, rg]),
+            plc.Table([*left_order.columns(), *right_order.columns()]),
+            [plc.types.Order.ASCENDING, plc.types.Order.ASCENDING],
+            [plc.types.NullOrder.AFTER, plc.types.NullOrder.AFTER],
+        ).columns()
+
+        # Broadcast each aggregated result back to row-shape using the right map.
+        broadcasted_cols = [
+            Column(
+                plc.copying.gather(
+                    plc.Table([col]),
+                    rg,
+                    plc.copying.OutOfBoundsPolicy.NULLIFY,
+                ).columns()[0],
+                name=named_expr.name,
+                dtype=dtype,
+            )
+            for named_expr, dtype, col in zip(
+                self.named_aggs, out_dtypes, out_cols, strict=True
+            )
+        ]
+
+        # Create a temporary DataFrame with the broadcasted columns named by their
+        # placeholder names from agg decomposition, then evaluate the post-expression.
+        df = DataFrame(broadcasted_cols)
+        return self.post.value.evaluate(df, context=ExecutionContext.FRAME)
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 27aa6bfbf4a..2e0401d006c 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -748,10 +748,38 @@ def _(
         return replace([named_post_agg.value], replacements)[0]
     elif isinstance(node.options, pl_expr.WindowMapping):
         # pl.col("a").over(...)
+        agg = translator.translate_expr(n=node.function, schema=schema)
+        name_gen = unique_names(schema)
+        aggs, post = decompose_single_agg(
+            expr.NamedExpr(next(name_gen), agg),
+            name_gen,
+            is_top=True,
+            # Follows GROUPBY semantics
+            context=ExecutionContext.GROUPBY,
+        )
+
+        mapping = node.options.kind
+        has_order_by = node.order_by is not None
+        descending = bool(getattr(node, "order_by_descending", False))
+        nulls_last = bool(getattr(node, "order_by_nulls_last", False))
+
+        if has_order_by or descending or nulls_last:
+            raise NotImplementedError(
+                f"over(order_by) not supported yet: "
+                f"{node.order_by=}, {descending=}, {nulls_last=}"
+            )
+
+        if mapping != "groups_to_rows":
+            raise NotImplementedError(
+                f"over(mapping_strategy) not supported yet: {mapping=}; "
+                f"expected 'groups_to_rows'"
+            )
+
         return expr.GroupedRollingWindow(
             dtype,
-            node.options,
-            translator.translate_expr(n=node.function, schema=schema),
+            (mapping, has_order_by, descending, nulls_last),
+            [agg for agg, _ in aggs],
+            post,
             *(translator.translate_expr(n=n, schema=schema) for n in node.partition_by),
         )
     assert_never(node.options)
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 00145e1d533..83674fcc61d 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -176,6 +176,14 @@ def decompose_single_agg(
                 [(named_expr.reconstruct(agg.reconstruct([child])), True)],
                 named_expr.reconstruct(expr.Col(agg.dtype, name)),
             )
+        elif agg.name in ("mean", "median", "quantile", "std", "var"):
+            # libcudf promotes these to float64; but polars
+            # keeps Float32, so cast back in post-processing.
+            named = expr.NamedExpr(name, agg)
+            post_col: expr.Expr = expr.Col(DataType(pl.Float64()), name)
+            if agg.dtype.plc.id() == plc.TypeId.FLOAT32:
+                post_col = expr.Cast(agg.dtype, post_col)
+            return [(named, True)], expr.NamedExpr(name, post_col)
         elif agg.name == "sum":
             col = (
                 expr.Cast(agg.dtype, expr.Col(DataType(pl.datatypes.Int64()), name))
@@ -222,14 +230,6 @@ def decompose_single_agg(
                 return [(named_expr, True), (win_len, True)], expr.NamedExpr(
                     name, post_ternary_expr
                 )
-        elif agg.name == "mean":
-            post_agg_col: expr.Expr = expr.Col(
-                DataType(pl.Float64), name
-            )  # libcudf promotes to float64
-            if agg.dtype.plc.id() == plc.TypeId.FLOAT32:
-                # Cast back to float32 to match Polars
-                post_agg_col = expr.Cast(agg.dtype, post_agg_col)
-            return [(named_expr, True)], named_expr.reconstruct(post_agg_col)
         else:
             return [(named_expr, True)], named_expr.reconstruct(
                 expr.Col(agg.dtype, name)
diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py
index bb0aff53f94..f470168bf71 100644
--- a/python/cudf_polars/cudf_polars/experimental/parallel.py
+++ b/python/cudf_polars/cudf_polars/experimental/parallel.py
@@ -35,7 +35,7 @@
 )
 from cudf_polars.experimental.io import _clear_source_info_cache
 from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.utils import _concat, _lower_ir_fallback
+from cudf_polars.experimental.utils import _concat, _contains_over, _lower_ir_fallback
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping
@@ -346,11 +346,32 @@ def _lower_ir_pwise(
 
 _lower_ir_pwise_preserve = partial(_lower_ir_pwise, preserve_partitioning=True)
 lower_ir_node.register(Projection, _lower_ir_pwise_preserve)
-lower_ir_node.register(Filter, _lower_ir_pwise_preserve)
 lower_ir_node.register(Cache, _lower_ir_pwise)
 lower_ir_node.register(HConcat, _lower_ir_pwise)
 
 
+@lower_ir_node.register(Filter)
+def _(
+    ir: Filter, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    child, partition_info = rec(ir.children[0])
+
+    if partition_info[child].count > 1 and _contains_over([ir.mask.value]):
+        # mask contains .over(...), collapse to single partition
+        return _lower_ir_fallback(
+            ir.reconstruct([child]),
+            rec,
+            msg=(
+                "over(...) inside filter is not supported for multiple partitions; "
+                "falling back to in-memory evaluation."
+            ),
+        )
+
+    new_node = ir.reconstruct([child])
+    partition_info[new_node] = partition_info[child]
+    return new_node, partition_info
+
+
 @lower_ir_node.register(Slice)
 def _(
     ir: Slice, rec: LowerIRTransformer
diff --git a/python/cudf_polars/cudf_polars/experimental/utils.py b/python/cudf_polars/cudf_polars/experimental/utils.py
index 2ea74eac82e..bdebc02fac7 100644
--- a/python/cudf_polars/cudf_polars/experimental/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/utils.py
@@ -10,8 +10,9 @@
 from itertools import chain
 from typing import TYPE_CHECKING
 
-from cudf_polars.dsl.expr import Col
+from cudf_polars.dsl.expr import Col, Expr, GroupedRollingWindow
 from cudf_polars.dsl.ir import Union
+from cudf_polars.dsl.traversal import traversal
 from cudf_polars.experimental.base import PartitionInfo
 
 if TYPE_CHECKING:
@@ -110,3 +111,8 @@ def _get_unique_fractions(
         for c, f in user_unique_fractions.items()
         if c in column_names
     }
+
+
+def _contains_over(exprs: Sequence[Expr]) -> bool:
+    """Return True if any expression in 'exprs' contains an over(...) (ie. GroupedRollingWindow)."""
+    return any(isinstance(e, GroupedRollingWindow) for e in traversal(exprs))
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 14956d2cfbc..cd3bbe01020 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -130,7 +130,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input11-expected11-input_dtype11-output_dtype11]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input12-expected12-input_dtype12-output_dtype12]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input13-expected13-input_dtype13-output_dtype13]": "Unsupported groupby-agg for a particular dtype",
-    "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input11-expected11-input_dtype11-output_dtype11]": "Unsupported groupby-agg for a particular dtype",
     "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input12-expected12-input_dtype12-output_dtype12]": "Unsupported groupby-agg for a particular dtype",
diff --git a/python/cudf_polars/tests/experimental/test_rolling.py b/python/cudf_polars/tests/experimental/test_rolling.py
index 3f7046376c1..0748a3ce52e 100644
--- a/python/cudf_polars/tests/experimental/test_rolling.py
+++ b/python/cudf_polars/tests/experimental/test_rolling.py
@@ -34,3 +34,24 @@ def test_rolling_datetime():
     )
     q = df.with_columns(pl.sum("a").rolling(index_column="dt", period="2d"))
     assert_gpu_result_equal(q, engine=engine)
+
+
+def test_over_in_filter_unsupported() -> None:
+    q = pl.concat(
+        [
+            pl.LazyFrame({"k": ["x", "y"], "v": [3, 2]}),
+            pl.LazyFrame({"k": ["x", "y"], "v": [5, 7]}),
+        ]
+    ).filter(pl.len().over("k") == 2)
+
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": 1,
+            "scheduler": DEFAULT_SCHEDULER,
+            "fallback_mode": StreamingFallbackMode.SILENT,
+        },
+    )
+
+    assert_gpu_result_equal(q, engine=engine)
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
index 373e4903cef..6cf3c956358 100644
--- a/python/cudf_polars/tests/expressions/test_rolling.py
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -14,6 +14,19 @@
 from cudf_polars.utils.versions import POLARS_VERSION_LT_130
 
 
+@pytest.fixture
+def df():
+    return pl.LazyFrame(
+        {
+            "g": [1, 1, 2, 2, 2],
+            "x": [1, 2, 3, 4, 5],
+            "x2": [1, 100, 3, 4, 50],
+            "g2": ["a", "a", "b", "a", "a"],
+            "g_null": [1, None, 1, None, 2],
+        }
+    )
+
+
 @pytest.mark.parametrize("time_unit", ["ns", "us", "ms"])
 def test_rolling_datetime(time_unit):
     dates = [
@@ -124,14 +137,6 @@ def test_invalid_duration_spec_raises_in_translation():
     assert_ir_translation_raises(q, pl.exceptions.InvalidOperationError)
 
 
-def test_grouped_rolling():
-    df = pl.LazyFrame({"a": [1, 2, 3, 4, 5, 6], "b": [1, 2, 1, 3, 1, 2]})
-
-    q = df.select(pl.col("a").min().over("b"))
-
-    assert_ir_translation_raises(q, NotImplementedError)
-
-
 def test_rolling_inside_groupby_raises():
     df = pl.LazyFrame(
         {"keys": [1, 1, 1, 2], "orderby": [1, 2, 4, 2], "values": [1, 2, 3, 4]}
@@ -156,3 +161,59 @@ def test_rolling_sum_all_null_window_returns_null():
     )
     # Expected: [null, null, 5, 5, 5, 1]
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pl.col("x").sum().over("g"),
+        pl.len().over("g"),
+        pl.col("x").cast(pl.Float64).mean().round(1).over("g"),
+        pl.col("x2").quantile(0.5, interpolation="lower").over("g"),
+        pl.col("x").sum().over("g", "g2"),
+        pl.col("x").sum().over(pl.col("g") % 2),
+        pl.col("x").sum().over("g_null"),
+        pl.col("x").cast(pl.Float32).mean().over("g"),
+        pl.col("x").sum().over(pl.lit(1)),
+    ],
+    ids=[
+        "sum_broadcast",
+        "len_broadcast",
+        "mean_round",
+        "quantile_lower",
+        "multi_key_partition",
+        "expr_partition",
+        "null_keys",
+        "mean_float32_promotion",
+        "literal_partition",
+    ],
+)
+def test_over_group_various(df, expr):
+    q = df.select(expr)
+    assert_gpu_result_equal(q)
+
+
+def test_window_over_group_sum_all_null_group_is_zero(df):
+    q = df.with_columns(
+        pl.when(pl.col("g") == 1)
+        .then(pl.lit(None, dtype=pl.Int64))
+        .otherwise(pl.col("x"))
+        .alias("null")
+    ).select(s=pl.col("null").sum().over("g"))
+    assert_gpu_result_equal(q)
+
+
+def test_over_with_order_by_unsupported(df):
+    q = df.select(pl.col("x").sum().over("g", order_by="x"))
+    assert_ir_translation_raises(q, NotImplementedError)
+
+
+@pytest.mark.parametrize("strategy", ["explode", "join"], ids=["explode", "join"])
+def test_over_with_mapping_strategy_unsupported(df, strategy):
+    q = df.select(pl.col("x").sum().over("g", mapping_strategy=strategy))
+    assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_over_boolean_function_unsupported(df):
+    q = df.select(pl.col("x").not_().over("g"))
+    assert_ir_translation_raises(q, NotImplementedError)
diff --git a/python/cudf_polars/tests/test_window_functions.py b/python/cudf_polars/tests/test_window_functions.py
index 4bf1e482301..8ddaae4bd75 100644
--- a/python/cudf_polars/tests/test_window_functions.py
+++ b/python/cudf_polars/tests/test_window_functions.py
@@ -84,9 +84,9 @@ def test_over(df: pl.LazyFrame, partition_by, agg_expr):
     result_name = f"{agg_expr!s}_over_{partition_by!s}"
     window_expr = window_expr.alias(result_name)
 
-    query = df.with_columns(window_expr)
+    q = df.with_columns(window_expr)
 
-    assert_ir_translation_raises(query, NotImplementedError)
+    assert_gpu_result_equal(q)
 
 
 def test_over_with_sort(df: pl.LazyFrame):

From c09beacd83227e390b53b45b14abf2db55aee8a4 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 20 Aug 2025 21:45:12 -0400
Subject: [PATCH 183/366] Support ternary expression inside groupby/rolling
 context (#19242)

Closes #18841 and Contributes to #19200. Helps unblock TPC-DS queries 12, 49, 76, 78.
- Depends on #19680
- Depens on #19689

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19242
---
 .../cudf_polars/dsl/utils/aggregations.py     | 44 +++++++++++++++++-
 python/cudf_polars/tests/test_groupby.py      | 46 +++++++++++++++++--
 python/cudf_polars/tests/test_rolling.py      | 46 +++++++++++++++++++
 3 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 83674fcc61d..8e242c55a47 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -235,7 +235,49 @@ def decompose_single_agg(
                 expr.Col(agg.dtype, name)
             )
     if isinstance(agg, expr.Ternary):
-        raise NotImplementedError("Ternary inside groupby")
+        when, then, otherwise = agg.children
+
+        when_aggs, when_post = decompose_single_agg(
+            expr.NamedExpr(next(name_generator), when),
+            name_generator,
+            is_top=False,
+            context=context,
+        )
+        then_aggs, then_post = decompose_single_agg(
+            expr.NamedExpr(next(name_generator), then),
+            name_generator,
+            is_top=False,
+            context=context,
+        )
+        otherwise_aggs, otherwise_post = decompose_single_agg(
+            expr.NamedExpr(next(name_generator), otherwise),
+            name_generator,
+            is_top=False,
+            context=context,
+        )
+
+        when_has = any(h for _, h in when_aggs)
+        then_has = any(h for _, h in then_aggs)
+        otherwise_has = any(h for _, h in otherwise_aggs)
+
+        if is_top and not (when_has or then_has or otherwise_has):
+            raise NotImplementedError(
+                "Broadcasted ternary with list output in groupby is not supported"
+            )
+
+        for post, has in (
+            (when_post, when_has),
+            (then_post, then_has),
+            (otherwise_post, otherwise_has),
+        ):
+            if is_top and not has and not isinstance(post.value, expr.Literal):
+                raise NotImplementedError(
+                    "Broadcasting aggregated expressions in groupby/rolling"
+                )
+
+        return [*when_aggs, *then_aggs, *otherwise_aggs], named_expr.reconstruct(
+            agg.reconstruct([when_post.value, then_post.value, otherwise_post.value])
+        )
     if not agg.is_pointwise and isinstance(agg, expr.BooleanFunction):
         raise NotImplementedError(
             f"Non pointwise boolean function {agg.name!r} not supported in groupby or rolling context"
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index bf97ec41cde..dcbf2d3eec5 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -152,12 +152,17 @@ def test_groupby_len(df, keys):
     "expr",
     [
         (pl.col("int").max() + pl.col("float").min()).max(),
-        pl.when(pl.col("int") < pl.lit(2))
-        .then(pl.col("float").sum())
-        .otherwise(pl.lit(-2)),
+        (
+            pl.when((pl.col("float") - pl.col("float").mean()) > 0)
+            .then(pl.col("float"))
+            .otherwise(None)
+            .sum()
+        ),
+        (pl.when(pl.col("int") > 5).then(pl.col("float")).otherwise(pl.lit(0.0))),
+        (pl.when(pl.col("int").min() >= 3).then(pl.col("float"))),
     ],
 )
-def test_groupby_unsupported(df, expr):
+def test_groupby_unsupported(df: pl.LazyFrame, expr: pl.Expr) -> None:
     q = df.group_by("key1").agg(expr)
 
     assert_ir_translation_raises(q, NotImplementedError)
@@ -346,3 +351,36 @@ def test_groupby_aggs_keep_unsupported_as_null(df: pl.LazyFrame, agg_expr) -> No
     lf = df.filter(pl.col("datetime") == date(2004, 12, 1))
     q = lf.group_by("datetime").agg(agg_expr)
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pl.when(pl.col("int") > 5).then(pl.col("float")).otherwise(None).sum(),
+        pl.when(pl.col("float").count() > 0)
+        .then(pl.col("float").sum())
+        .otherwise(None),
+        (
+            pl.when(pl.col("float").min() < pl.col("float").max())
+            .then(pl.col("float").max() - pl.col("float").min())
+            .otherwise(pl.lit(0.0))
+        ),
+        (
+            pl.when(pl.col("int").count() > 0)
+            .then(
+                pl.col("int").cast(pl.Float64).sum()
+                / pl.col("int").count().cast(pl.Float64)
+            )
+            .otherwise(None)
+        ),
+    ],
+    ids=[
+        "pre_pointwise_then_sum",
+        "post_over_aggs",
+        "post_multiple_aggs_range",
+        "post_manually_compute_mean",
+    ],
+)
+def test_groupby_ternary_supported(df: pl.LazyFrame, expr: pl.Expr) -> None:
+    q = df.group_by("key1").agg(expr)
+    assert_gpu_result_equal(q, check_row_order=False)
diff --git a/python/cudf_polars/tests/test_rolling.py b/python/cudf_polars/tests/test_rolling.py
index 89a4dc3b083..9dbe75e14ba 100644
--- a/python/cudf_polars/tests/test_rolling.py
+++ b/python/cudf_polars/tests/test_rolling.py
@@ -226,3 +226,49 @@ def test_rolling_null_count(df):
         nc=pl.col("null").null_count()
     )
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pl.when(pl.col("values") > 5).then(pl.col("floats")).otherwise(None).sum(),
+        pl.when(pl.col("values").count() > 0)
+        .then(pl.col("values").sum())
+        .otherwise(None),
+        pl.when(pl.col("values").min() < pl.col("values").max())
+        .then(pl.col("values").max() - pl.col("values").min())
+        .otherwise(pl.lit(0)),
+        pl.when(pl.col("values").count() > 0)
+        .then(
+            pl.col("values").cast(pl.Float64).sum()
+            / pl.col("values").count().cast(pl.Float64)
+        )
+        .otherwise(None),
+    ],
+    ids=[
+        "pre_pointwise_then_sum",
+        "post_over_aggs",
+        "post_multiple_aggs_range",
+        "post_manually_compute_mean",
+    ],
+)
+def test_rolling_ternary_supported(df, expr):
+    q = df.rolling("dt", period="48h", closed="both").agg(expr.alias("out"))
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pl.when(pl.col("values") > 3)
+        .then(pl.col("values"))
+        .otherwise(pl.lit(None, dtype=pl.Int64)),
+        pl.when((pl.col("floats") - pl.col("floats").mean()) > 0)
+        .then(pl.col("floats"))
+        .otherwise(None)
+        .sum(),
+    ],
+)
+def test_rolling_ternary_unsupported(df, expr):
+    q = df.rolling("dt", period="48h", closed="both").agg(expr.alias("out"))
+    assert_ir_translation_raises(q, NotImplementedError)

From 0ddd460fe1ffc4a8578c1842e53901691628713e Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:33:27 -0400
Subject: [PATCH 184/366] Fix window var() test failures from float rounding
 (#19761)

Adds relative and absolute tolerances to the window var tests. The failure was discovered after I merged #19684. I'm not sure exactly why CI didn't fail before.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19761
---
 python/cudf_polars/tests/test_window_functions.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/cudf_polars/tests/test_window_functions.py b/python/cudf_polars/tests/test_window_functions.py
index 8ddaae4bd75..e1ad4719402 100644
--- a/python/cudf_polars/tests/test_window_functions.py
+++ b/python/cudf_polars/tests/test_window_functions.py
@@ -86,7 +86,12 @@ def test_over(df: pl.LazyFrame, partition_by, agg_expr):
 
     q = df.with_columns(window_expr)
 
-    assert_gpu_result_equal(q)
+    # CPU: 1.333333333333333
+    # GPU: 1.333333333333334
+    # Classic floating-point gotcha: looks the same, but the test fails
+    assert_gpu_result_equal(
+        q, check_exact=False, rtol=1e-15, atol=1e-15
+    ) if "var" in str(agg_expr) else assert_gpu_result_equal(q)
 
 
 def test_over_with_sort(df: pl.LazyFrame):

From 8c09ceb4866624369d5d84c8c6d6c55fced83a3d Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:54:50 -0700
Subject: [PATCH 185/366] Use `is_compressed` field from Parquet V2 data page
 headers to determine if they are compressed (#19755)

Closes #19756

This PR decodes the `is_compressed` field from each Parquet data page's V2 header to determine if it's actually compressed instead of relying on column chunk codec information to assume that all pages in a column chunk are compressed.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vukasin Milovanovic (https://github.com/vuule)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19755
---
 cpp/src/io/parquet/page_hdr.cu                | 23 ++++++++++++++++++-
 cpp/src/io/parquet/parquet_gpu.hpp            |  1 +
 .../io/parquet/reader_impl_chunking_utils.cu  | 14 +++++++----
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu
index abcdc31aa8b..2279f1cf08f 100644
--- a/cpp/src/io/parquet/page_hdr.cu
+++ b/cpp/src/io/parquet/page_hdr.cu
@@ -234,6 +234,25 @@ __device__ decode_kernel_mask kernel_mask_for_page(PageInfo const& page,
   return decode_kernel_mask::GENERAL;
 }
 
+/**
+ * @brief Functor to set value to bool read from byte stream
+ *
+ * @return True if field type is not bool
+ */
+struct ParquetFieldBool {
+  int field;
+  bool& val;
+
+  __device__ ParquetFieldBool(int f, bool& v) : field(f), val(v) {}
+
+  inline __device__ bool operator()(byte_stream_s* bs, int field_type)
+  {
+    val = static_cast<FieldType>(field_type) == FieldType::BOOLEAN_TRUE;
+    return not(static_cast<FieldType>(field_type) == FieldType::BOOLEAN_TRUE or
+               static_cast<FieldType>(field_type) == FieldType::BOOLEAN_FALSE);
+  }
+};
+
 /**
  * @brief Functor to set value to 32 bit integer read from byte stream
  *
@@ -391,7 +410,8 @@ struct gpuParseDataPageHeaderV2 {
                                  ParquetFieldInt32(3, bs->page.num_rows),
                                  ParquetFieldEnum<Encoding>(4, bs->page.encoding),
                                  ParquetFieldInt32(5, bs->page.lvl_bytes[level_type::DEFINITION]),
-                                 ParquetFieldInt32(6, bs->page.lvl_bytes[level_type::REPETITION]));
+                                 ParquetFieldInt32(6, bs->page.lvl_bytes[level_type::REPETITION]),
+                                 ParquetFieldBool(7, bs->page.is_compressed));
     return parse_header(op, bs);
   }
 };
@@ -470,6 +490,7 @@ void __launch_bounds__(decode_page_headers_block_size)
       bs->page.temp_string_size     = 0;
       bs->page.temp_string_buf      = nullptr;
       bs->page.kernel_mask          = decode_kernel_mask::NONE;
+      bs->page.is_compressed        = true;
     }
     num_values    = bs->ck.num_values;
     page_info     = chunk_pages ? chunk_pages[chunk].pages : nullptr;
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 81303df097b..6ee49ff1c65 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -340,6 +340,7 @@ struct PageInfo {
   Encoding encoding;       // Encoding for data or dictionary page
   Encoding definition_level_encoding;  // Encoding used for definition levels (data page)
   Encoding repetition_level_encoding;  // Encoding used for repetition levels (data page)
+  bool is_compressed;                  // Whether the page is compressed (V2 header)
 
   // for nested types, we run a preprocess step in order to determine output
   // column sizes. Because of this, we can jump directly to the position in the
diff --git a/cpp/src/io/parquet/reader_impl_chunking_utils.cu b/cpp/src/io/parquet/reader_impl_chunking_utils.cu
index 38fd132acf7..978ccd9dde5 100644
--- a/cpp/src/io/parquet/reader_impl_chunking_utils.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking_utils.cu
@@ -129,17 +129,20 @@ void codec_stats::add_pages(host_span<ColumnChunkDesc const> chunks,
                             host_span<bool const> page_mask)
 {
   // Create a page mask iterator that defaults to true if the page_mask is empty
-  auto page_mask_iter =
-    page_mask.empty() ? thrust::make_constant_iterator(true) : page_mask.begin();
+  auto page_mask_iter = thrust::make_constant_iterator(true);
 
   // Zip iterator for iterating over pages and the page mask
   auto zip_iter = thrust::make_zip_iterator(pages.begin(), page_mask_iter);
 
   std::for_each(zip_iter, zip_iter + pages.size(), [&](auto const& item) {
     auto& [page, is_page_needed] = item;
+    // If this is a V2 page, use the `is_compressed` field to determine if it's compressed.
+    // For V1 pages, it's always compressed if the chunk.codec is specified.
+    auto const is_page_compressed = (page.flags & PAGEINFO_FLAGS_V2) ? page.is_compressed : true;
     if (is_page_needed && chunks[page.chunk_idx].codec == compression_type &&
         (page.flags & cudf::io::parquet::detail::PAGEINFO_FLAGS_DICTIONARY) ==
-          (selection == page_selection::DICT_PAGES)) {
+          (selection == page_selection::DICT_PAGES) and
+        is_page_compressed) {
       ++num_pages;
       total_decomp_size += page.uncompressed_page_size;
       max_decompressed_size = std::max(max_decompressed_size, page.uncompressed_page_size);
@@ -540,8 +543,11 @@ std::vector<row_range> compute_page_splits_by_row(device_span<cumulative_page_in
     for (auto page_idx = 0; std::cmp_less(page_idx, pages.size()); ++page_idx) {
       auto& page                = pages[page_idx];
       auto const is_page_needed = page_mask_iter[page_idx];
+      // If this is a V2 page, use the `is_compressed` field to determine if it's compressed.
+      // For V1 pages, it's always compressed if the chunk.codec is specified.
+      auto const is_page_compressed = (page.flags & PAGEINFO_FLAGS_V2) ? page.is_compressed : true;
       if (is_page_needed && chunks[page.chunk_idx].codec == codec.compression_type &&
-          (page.flags & PAGEINFO_FLAGS_DICTIONARY) == select_dict_pages) {
+          (page.flags & PAGEINFO_FLAGS_DICTIONARY) == select_dict_pages and is_page_compressed) {
         auto const dst_base = static_cast<uint8_t*>(decomp_data) + decomp_offset;
         // offset will only be non-zero for V2 pages
         auto const offset =

From 36cd03697e8f71db3508d5e9dda6803b7a034f30 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 21 Aug 2025 07:31:42 -0400
Subject: [PATCH 186/366] Add count aggregation support to cudf::reduce
 (#19734)

Simple aggregation type for `cudf::reduce` just return `input.size()` or `input.size()-input.null_count()` depending if nulls are specified included or not.

Closes #13756
Closes #19700

Also found that `nunique.cu` could be renamed to `nunique.cpp` since it contains no device code in it.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19734
---
 cpp/CMakeLists.txt                            |  3 +-
 .../cudf/detail/aggregation/aggregation.hpp   |  3 +-
 .../reduction/detail/reduction_functions.hpp  | 18 ++++++
 cpp/src/aggregation/aggregation.cpp           |  2 +
 cpp/src/reductions/count.cpp                  | 63 +++++++++++++++++++
 .../reductions/{nunique.cu => nunique.cpp}    |  0
 cpp/src/reductions/reductions.cpp             | 12 ++++
 cpp/tests/reductions/reduction_tests.cpp      | 41 ++++++++++++
 8 files changed, 140 insertions(+), 2 deletions(-)
 create mode 100644 cpp/src/reductions/count.cpp
 rename cpp/src/reductions/{nunique.cu => nunique.cpp} (100%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f59b9d6bf7e..5b4c34ccd09 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -631,13 +631,14 @@ add_library(
   src/reductions/any.cu
   src/reductions/bitwise.cu
   src/reductions/collect_ops.cu
+  src/reductions/count.cpp
   src/reductions/histogram.cu
   src/reductions/max.cu
   src/reductions/mean.cu
   src/reductions/min.cu
   src/reductions/minmax.cu
   src/reductions/nth_element.cu
-  src/reductions/nunique.cu
+  src/reductions/nunique.cpp
   src/reductions/product.cu
   src/reductions/quantile.cu
   src/reductions/reductions.cpp
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 32f4d8c572e..0d3efa425d9 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -279,7 +279,8 @@ class max_aggregation final : public rolling_aggregation,
  */
 class count_aggregation final : public rolling_aggregation,
                                 public groupby_aggregation,
-                                public groupby_scan_aggregation {
+                                public groupby_scan_aggregation,
+                                public reduce_aggregation {
  public:
   count_aggregation(aggregation::Kind kind) : aggregation(kind) {}
 
diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
index d7c49acca32..af43e0697fa 100644
--- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
@@ -427,5 +427,23 @@ std::unique_ptr<scalar> nunique(column_view const& col,
                                 rmm::cuda_stream_view stream,
                                 rmm::device_async_resource_ref mr);
 
+/**
+ * @brief Returns the number of elements in the input column
+ *
+ * Returns `col.size()` or `col.size() - col.null_count()` depending on `null_handling`
+ *
+ * @param col Input column to compute the number of elements
+ * @param null_handling Indicates if null values will be included in the count
+ * @param output_type Data type of return type
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Number of elements as scalar of type `output_type`
+ */
+std::unique_ptr<scalar> count(column_view const& col,
+                              null_policy null_handling,
+                              data_type const output_type,
+                              rmm::cuda_stream_view stream,
+                              rmm::device_async_resource_ref mr);
+
 }  // namespace reduction::detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 39355983ed4..d206989b7a1 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -554,6 +554,8 @@ template CUDF_EXPORT std::unique_ptr<groupby_aggregation>
 make_count_aggregation<groupby_aggregation>(null_policy null_handling);
 template CUDF_EXPORT std::unique_ptr<groupby_scan_aggregation>
 make_count_aggregation<groupby_scan_aggregation>(null_policy null_handling);
+template CUDF_EXPORT std::unique_ptr<reduce_aggregation> make_count_aggregation<reduce_aggregation>(
+  null_policy null_handling);
 
 /// Factory to create a HISTOGRAM aggregation
 template <typename Base>
diff --git a/cpp/src/reductions/count.cpp b/cpp/src/reductions/count.cpp
new file mode 100644
index 00000000000..e02d4bb9f14
--- /dev/null
+++ b/cpp/src/reductions/count.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/stream_compaction.hpp>
+#include <cudf/reduction/detail/reduction_functions.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace reduction {
+namespace detail {
+namespace {
+
+struct count_scalar_fn {
+  template <typename T>
+    requires(cudf::is_numeric_not_bool<T>())
+  std::unique_ptr<cudf::scalar> operator()(size_type count,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr) const
+  {
+    auto const value = static_cast<T>(count);
+    return cudf::make_fixed_width_scalar<T>(value, stream, mr);
+  }
+
+  template <typename T>
+    requires(not cudf::is_numeric_not_bool<T>())
+  std::unique_ptr<cudf::scalar> operator()(size_type,
+                                           rmm::cuda_stream_view,
+                                           rmm::device_async_resource_ref) const
+  {
+    CUDF_FAIL("COUNT is not supported for boolean or non-numeric types", std::invalid_argument);
+  }
+};
+}  // namespace
+
+std::unique_ptr<cudf::scalar> count(column_view const& col,
+                                    cudf::null_policy null_handling,
+                                    cudf::data_type const output_dtype,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::device_async_resource_ref mr)
+{
+  auto const count = col.size() - (null_handling == null_policy::EXCLUDE ? col.null_count() : 0);
+  return cudf::type_dispatcher(output_dtype, count_scalar_fn{}, count, stream, mr);
+}
+}  // namespace detail
+}  // namespace reduction
+}  // namespace cudf
diff --git a/cpp/src/reductions/nunique.cu b/cpp/src/reductions/nunique.cpp
similarity index 100%
rename from cpp/src/reductions/nunique.cu
rename to cpp/src/reductions/nunique.cpp
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 2c1c582091b..97aa0b1eff1 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -85,6 +85,12 @@ std::unique_ptr<scalar> reduce_aggregate_impl(
                    std::invalid_argument);
       return quantile(col, qagg._quantiles.front(), qagg._interpolation, output_dtype, stream, mr);
     }
+    case aggregation::COUNT_ALL:
+    case aggregation::COUNT_VALID: {
+      auto null_handling =
+        agg.kind == aggregation::COUNT_VALID ? null_policy::EXCLUDE : null_policy::INCLUDE;
+      return count(col, null_handling, output_dtype, stream, mr);
+    }
     case aggregation::NUNIQUE: {
       auto nunique_agg = static_cast<cudf::detail::nunique_aggregation const&>(agg);
       return nunique(col, nunique_agg._null_handling, output_dtype, stream, mr);
@@ -177,6 +183,12 @@ std::unique_ptr<scalar> reduce_no_data_impl(reduce_aggregation const& agg,
     case aggregation::ALL: {
       return std::make_unique<numeric_scalar<bool>>(agg.kind == aggregation::ALL, true, stream, mr);
     }
+    case aggregation::COUNT_ALL:
+    case aggregation::COUNT_VALID: {
+      auto null_handling =
+        agg.kind == aggregation::COUNT_VALID ? null_policy::EXCLUDE : null_policy::INCLUDE;
+      return count(col, null_handling, output_dtype, stream, mr);
+    }
     case aggregation::NUNIQUE: {
       auto nunique_agg = static_cast<cudf::detail::nunique_aggregation const&>(agg);
       auto valid = !col.is_empty() && (nunique_agg._null_handling == cudf::null_policy::INCLUDE);
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index ec22328e84a..7ccb1cc8f2c 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -1131,6 +1131,15 @@ TEST_F(ReductionEmptyTest, empty_column)
   EXPECT_EQ(result->is_valid(), false);
   result = cudf::reduce(col_nulls, *quantile_agg, double_type);
   EXPECT_EQ(result->is_valid(), false);
+
+  auto count_agg =
+    cudf::make_count_aggregation<cudf::reduce_aggregation>(cudf::null_policy::INCLUDE);
+  result = cudf::reduce(col0, *count_agg, size_data_type);
+  EXPECT_EQ(result->is_valid(), true);
+  EXPECT_EQ(dynamic_cast<cudf::numeric_scalar<cudf::size_type>*>(result.get())->value(), 0);
+  result = cudf::reduce(col_nulls, *count_agg, size_data_type);
+  EXPECT_EQ(result->is_valid(), true);
+  EXPECT_EQ(dynamic_cast<cudf::numeric_scalar<cudf::size_type>*>(result.get())->value(), col_size);
 }
 
 TEST_F(ReductionEmptyTest, Errors)
@@ -1590,6 +1599,38 @@ TYPED_TEST(ReductionTest, UniqueCount)
             expected_null_value1);
 }
 
+TYPED_TEST(ReductionTest, Count)
+{
+  using T = TypeParam;
+  std::vector<int> int_values({1, -3, 1, 2, 0, 2, -4, 45});
+  std::vector<T> v = convert_values<T>(int_values);
+
+  auto const output_type = cudf::data_type{cudf::type_to_id<cudf::size_type>()};
+
+  // test without nulls
+  auto col            = cudf::test::fixed_width_column_wrapper<T>(v.begin(), v.end());
+  auto expected_value = static_cast<cudf::size_type>(v.size());
+  auto count_agg = cudf::make_count_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE);
+  auto count_agg_exclude =
+    cudf::make_count_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE);
+  EXPECT_EQ(this->template reduction_test<cudf::size_type>(col, *count_agg, output_type).first,
+            expected_value);
+  EXPECT_EQ(
+    this->template reduction_test<cudf::size_type>(col, *count_agg_exclude, output_type).first,
+    expected_value);
+
+  // test with nulls
+  auto validity = cudf::test::iterators::null_at(3);
+  col           = cudf::test::fixed_width_column_wrapper<T>(v.begin(), v.end(), validity);
+  EXPECT_EQ(this->template reduction_test<cudf::size_type>(col, *count_agg, output_type).first,
+            expected_value);
+
+  expected_value = static_cast<cudf::size_type>(v.size() - 1);
+  EXPECT_EQ(
+    this->template reduction_test<cudf::size_type>(col, *count_agg_exclude, output_type).first,
+    expected_value);
+}
+
 template <typename T>
 struct FixedPointTestAllReps : public cudf::test::BaseFixture {};
 

From 5f83c847aa6f09f9cffc73cb1b555f73e0dcd69b Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 21 Aug 2025 07:58:58 -0400
Subject: [PATCH 187/366] Change nvtext::character_tokenize to return a list
 column (#19685)

Changes the return object for `nvtext::character_tokenize()` to a list column instead of a flat strings column.
This is so characters can be identified with their corresponding original input row.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19685
---
 .../cudf/strings/detail/attributes.hpp        | 35 +++++++++++
 cpp/src/strings/attributes.cu                 |  1 +
 cpp/src/text/tokenize.cu                      | 63 ++++++-------------
 cpp/tests/text/tokenize_tests.cpp             | 11 ++--
 python/cudf/cudf/core/accessors/string.py     |  7 ++-
 .../pylibcudf/tests/test_nvtext_tokenize.py   |  2 +-
 6 files changed, 66 insertions(+), 53 deletions(-)
 create mode 100644 cpp/include/cudf/strings/detail/attributes.hpp

diff --git a/cpp/include/cudf/strings/detail/attributes.hpp b/cpp/include/cudf/strings/detail/attributes.hpp
new file mode 100644
index 00000000000..6d8379df167
--- /dev/null
+++ b/cpp/include/cudf/strings/detail/attributes.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+namespace cudf {
+namespace strings {
+namespace detail {
+
+/**
+ * @copydoc cudf::strings::count_characters
+ */
+std::unique_ptr<column> count_characters(strings_column_view const& input,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::device_async_resource_ref mr);
+
+}  // namespace detail
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 46360ee8663..c06eeb09ed0 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -22,6 +22,7 @@
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/strings/attributes.hpp>
+#include <cudf/strings/detail/attributes.hpp>
 #include <cudf/strings/detail/utf8.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu
index 56c00723b77..a37f1c327e5 100644
--- a/cpp/src/text/tokenize.cu
+++ b/cpp/src/text/tokenize.cu
@@ -19,11 +19,9 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/algorithm.cuh>
-#include <cudf/detail/utilities/grid_1d.cuh>
-#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/strings/detail/attributes.hpp>
 #include <cudf/strings/detail/strings_column_factories.cuh>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
@@ -38,10 +36,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <cuda/atomic>
-#include <cuda/std/functional>
 #include <thrust/copy.h>
-#include <thrust/count.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/transform.h>
@@ -104,31 +99,6 @@ std::unique_ptr<cudf::column> tokenize_fn(cudf::size_type strings_count,
   return cudf::strings::detail::make_strings_column(tokens.begin(), tokens.end(), stream, mr);
 }
 
-constexpr int64_t block_size       = 512;  // number of threads per block
-constexpr int64_t bytes_per_thread = 4;    // bytes processed per thread
-
-CUDF_KERNEL void count_characters(uint8_t const* d_chars, int64_t chars_bytes, int64_t* d_output)
-{
-  auto const idx      = cudf::detail::grid_1d::global_thread_id();
-  auto const byte_idx = static_cast<int64_t>(idx) * bytes_per_thread;
-  auto const lane_idx = static_cast<cudf::size_type>(threadIdx.x);
-
-  using block_reduce = cub::BlockReduce<int64_t, block_size>;
-  __shared__ typename block_reduce::TempStorage temp_storage;
-
-  int64_t count = 0;
-  // each thread processes multiple bytes
-  for (auto i = byte_idx; (i < (byte_idx + bytes_per_thread)) && (i < chars_bytes); ++i) {
-    count += cudf::strings::detail::is_begin_utf8_char(d_chars[i]);
-  }
-  auto const total = block_reduce(temp_storage).Reduce(count, cuda::std::plus());
-
-  if ((lane_idx == 0) && (total > 0)) {
-    cuda::atomic_ref<int64_t, cuda::thread_scope_device> ref{*d_output};
-    ref.fetch_add(total, cuda::std::memory_order_relaxed);
-  }
-}
-
 }  // namespace
 
 // detail APIs
@@ -203,7 +173,7 @@ std::unique_ptr<cudf::column> character_tokenize(cudf::strings_column_view const
 {
   auto strings_count = strings_column.size();
   if (strings_count == 0) {
-    return cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING});
+    return cudf::make_empty_lists_column(cudf::data_type{cudf::type_id::STRING});
   }
 
   CUDF_EXPECTS(
@@ -216,21 +186,20 @@ std::unique_ptr<cudf::column> character_tokenize(cudf::strings_column_view const
                              offsets, strings_column.offset() + strings_count, stream) -
                            offset;
   // no bytes -- this could happen in an all-empty column
-  if (chars_bytes == 0) { return cudf::make_empty_column(cudf::type_id::STRING); }
+  if (chars_bytes == 0) {
+    return cudf::make_empty_lists_column(cudf::data_type{cudf::type_id::STRING});
+  }
   auto d_chars =
     strings_column.parent().data<uint8_t>();  // unsigned is necessary for checking bits
   d_chars += offset;
 
-  // To minimize memory, count the number of characters so we can
-  // build the output offsets without an intermediate buffer.
-  // In the worst case each byte is a character so the output is 4x the input.
-  cudf::detail::device_scalar<int64_t> d_count(0, stream);
-  auto const num_blocks = cudf::util::div_rounding_up_safe(
-    cudf::util::div_rounding_up_safe(chars_bytes, static_cast<int64_t>(bytes_per_thread)),
-    block_size);
-  count_characters<<<num_blocks, block_size, 0, stream.value()>>>(
-    d_chars, chars_bytes, d_count.data());
-  auto const num_characters = d_count.value(stream);
+  auto const character_counts = cudf::strings::detail::count_characters(
+    strings_column, stream, cudf::get_current_device_resource_ref());
+  auto [list_offsets, num_characters] =
+    cudf::detail::make_offsets_child_column(character_counts->view().begin<cudf::size_type>(),
+                                            character_counts->view().end<cudf::size_type>(),
+                                            stream,
+                                            mr);
 
   // number of characters becomes the number of rows so need to check the row limit
   CUDF_EXPECTS(
@@ -258,9 +227,13 @@ std::unique_ptr<cudf::column> character_tokenize(cudf::strings_column_view const
   rmm::device_uvector<char> output_chars(chars_bytes, stream, mr);
   thrust::copy(rmm::exec_policy(stream), d_chars, d_chars + chars_bytes, output_chars.data());
 
-  // return new strings column
-  return cudf::make_strings_column(
+  auto output_strings = cudf::make_strings_column(
     num_characters, std::move(offsets_column), output_chars.release(), 0, rmm::device_buffer{});
+  return cudf::make_lists_column(strings_count,
+                                 std::move(list_offsets),
+                                 std::move(output_strings),
+                                 strings_column.null_count(),
+                                 cudf::detail::copy_bitmask(strings_column.parent(), stream, mr));
 }
 
 }  // namespace detail
diff --git a/cpp/tests/text/tokenize_tests.cpp b/cpp/tests/text/tokenize_tests.cpp
index f9ca343eaac..b7526230d91 100644
--- a/cpp/tests/text/tokenize_tests.cpp
+++ b/cpp/tests/text/tokenize_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -111,11 +111,12 @@ TEST_F(TextTokenizeTest, TokenizeErrorTest)
 
 TEST_F(TextTokenizeTest, CharacterTokenize)
 {
-  cudf::test::strings_column_wrapper input({"the mousé ate the cheese", ""});
+  cudf::test::strings_column_wrapper input({"the mousé ate", "the cheese", ""});
 
-  cudf::test::strings_column_wrapper expected{"t", "h", "e", " ", "m", "o", "u", "s",
-                                              "é", " ", "a", "t", "e", " ", "t", "h",
-                                              "e", " ", "c", "h", "e", "e", "s", "e"};
+  using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
+  LCW expected{LCW{"t", "h", "e", " ", "m", "o", "u", "s", "é", " ", "a", "t", "e"},
+               LCW{"t", "h", "e", " ", "c", "h", "e", "e", "s", "e"},
+               LCW{}};
 
   auto results = nvtext::character_tokenize(cudf::strings_column_view(input));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
diff --git a/python/cudf/cudf/core/accessors/string.py b/python/cudf/cudf/core/accessors/string.py
index 843d205c008..0b3f14d6267 100644
--- a/python/cudf/cudf/core/accessors/string.py
+++ b/python/cudf/cudf/core/accessors/string.py
@@ -535,8 +535,11 @@ def join(
     def _split_by_character(self) -> ListColumn:
         col = self._column.fillna("")  # sanitize nulls
         result_col = col.character_tokenize()
+        if isinstance(result_col.dtype, ListDtype) and (result_col.size > 0):
+            return result_col  # type: ignore
 
         offset_col = col.children[0]
+        child_col = result_col.children[1]
 
         return ListColumn(
             data=None,
@@ -545,7 +548,7 @@ def _split_by_character(self) -> ListColumn:
             mask=col.mask,
             offset=0,
             null_count=0,
-            children=(offset_col, result_col),  # type: ignore[arg-type]
+            children=(offset_col, child_col),  # type: ignore[arg-type]
         )
 
     def extract(
@@ -4621,7 +4624,7 @@ def character_tokenize(self) -> Series | Index:
         2    .
         dtype: object
         """
-        result_col = self._column.character_tokenize()
+        result_col = self._column.character_tokenize().children[1]
         if isinstance(self._parent, cudf.Series):
             lengths = self.len().fillna(0)
             index = self._parent.index.repeat(lengths)
diff --git a/python/pylibcudf/tests/test_nvtext_tokenize.py b/python/pylibcudf/tests/test_nvtext_tokenize.py
index 2ecef38984c..9f22dc514de 100644
--- a/python/pylibcudf/tests/test_nvtext_tokenize.py
+++ b/python/pylibcudf/tests/test_nvtext_tokenize.py
@@ -62,7 +62,7 @@ def test_character_tokenize(input_col):
     got = plc.nvtext.tokenize.character_tokenize(
         plc.Column.from_arrow(input_col)
     )
-    expect = pa.array(["a", "b", " ", "c", "d", ".", "e", ":", "f", ";"])
+    expect = pa.array([["a"], ["b", " ", "c"], ["d", ".", "e", ":", "f", ";"]])
     assert_column_eq(expect, got)
 
 
From a133eb5723a0ab7cf16c642ba32928a3bc31be3c Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Thu, 21 Aug 2025 10:05:20 -0500
Subject: [PATCH 188/366] Add ``ColumnSourceInfo`` convenience layer (#19752)

Prerequisite for https://github.com/rapidsai/cudf/pull/19736

Adds a new ``ColumnSourceInfo`` class for convenience.

This new class is a thin wrapper around ``DataSourceInfo``. While working on [this PR](https://github.com/rapidsai/cudf/pull/19736), I found the `ColumnStats -> DataSourceInfo` relationship to be a bit confusing/clumsy. The relationship was a bit cleaner with `ColumnStats -> ColumnSourceInfo`, because the ColumnSourceInfo class is able to better "hide" the fact that the underlying source is actually a table (rather than a column).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19752
---
 .../cudf_polars/experimental/base.py          |  89 +++++++++++--
 .../cudf_polars/experimental/io.py            |  15 +--
 .../cudf_polars/experimental/statistics.py    |   3 +-
 python/cudf_polars/docs/overview.md           |   5 +-
 .../tests/experimental/test_stats.py          | 125 ++++++++++--------
 5 files changed, 157 insertions(+), 80 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py
index 01fc4d1e1fc..e4c0a43aa64 100644
--- a/python/cudf_polars/cudf_polars/experimental/base.py
+++ b/python/cudf_polars/cudf_polars/experimental/base.py
@@ -90,12 +90,12 @@ class UniqueStats:
 
 class DataSourceInfo:
     """
-    Datasource information.
+    Table data source information.
 
     Notes
     -----
     This class should be sub-classed for specific
-    datasource types (e.g. Parquet, DataFrame, etc.).
+    data source types (e.g. Parquet, DataFrame, etc.).
     The required properties/methods enable lazy
     sampling of the underlying datasource.
     """
@@ -117,6 +117,70 @@ def add_unique_stats_column(self, column: str) -> None:
         """Add a column needing unique-value information."""
 
 
+class ColumnSourceInfo:
+    """
+    Source column information.
+
+    Parameters
+    ----------
+    table_source_info
+        Table data source information.
+    column_name
+        Column name in the data source.
+
+    Notes
+    -----
+    This is a thin wrapper around DataSourceInfo that provides
+    direct access to column-specific information.
+    """
+
+    __slots__ = ("_allow_unique_sampling", "column_name", "table_source_info")
+    table_source_info: DataSourceInfo
+    column_name: str
+    _allow_unique_sampling: bool
+
+    def __init__(self, table_source_info: DataSourceInfo, column_name: str) -> None:
+        self.table_source_info = table_source_info
+        self.column_name = column_name
+        self._allow_unique_sampling = False
+
+    @property
+    def row_count(self) -> ColumnStat[int]:
+        """Data source row-count estimate."""
+        return self.table_source_info.row_count
+
+    def unique_stats(self, *, force: bool = False) -> UniqueStats:
+        """
+        Return unique-value statistics for a column.
+
+        Parameters
+        ----------
+        force
+            If True, return unique-value statistics even if the column
+            wasn't marked as needing unique-value information.
+        """
+        return (
+            self.table_source_info.unique_stats(self.column_name)
+            # Avoid sampling unique-stats if this column
+            # wasn't marked as needing unique-stats.
+            if force or self._allow_unique_sampling
+            else UniqueStats()
+        )
+
+    @property
+    def storage_size(self) -> ColumnStat[int]:
+        """Return the average column size for a single file."""
+        return self.table_source_info.storage_size(self.column_name)
+
+    def add_unique_stats_column(self, column: str | None = None) -> None:
+        """Add a column needing unique-value information."""
+        if column in (None, self.column_name):
+            self._allow_unique_sampling = True
+        return self.table_source_info.add_unique_stats_column(
+            column or self.column_name
+        )
+
+
 class ColumnStats:
     """
     Column statistics.
@@ -128,19 +192,16 @@ class ColumnStats:
     children
         Child ColumnStats objects.
     source_info
-        Datasource information.
-    source_name
-        Source-column name.
+        Column source information.
     unique_stats
         Unique-value statistics.
     """
 
-    __slots__ = ("children", "name", "source_info", "source_name", "unique_stats")
+    __slots__ = ("children", "name", "source_info", "unique_stats")
 
     name: str
     children: tuple[ColumnStats, ...]
-    source_info: DataSourceInfo
-    source_name: str
+    source_info: ColumnSourceInfo
     unique_stats: UniqueStats
 
     def __init__(
@@ -148,14 +209,12 @@ def __init__(
         name: str,
         *,
         children: tuple[ColumnStats, ...] = (),
-        source_info: DataSourceInfo | None = None,
-        source_name: str | None = None,
+        source_info: ColumnSourceInfo | None = None,
         unique_stats: UniqueStats | None = None,
     ) -> None:
         self.name = name
         self.children = children
-        self.source_info = source_info or DataSourceInfo()
-        self.source_name = source_name or name
+        self.source_info = source_info or ColumnSourceInfo(DataSourceInfo(), name)
         self.unique_stats = unique_stats or UniqueStats()
 
     def new_parent(
@@ -184,7 +243,6 @@ def new_parent(
             children=(self,),
             # Want to reference the same DataSourceInfo
             source_info=self.source_info,
-            source_name=self.source_name,
             # Want fresh UniqueStats so we can mutate in place
             unique_stats=UniqueStats(),
         )
@@ -195,6 +253,11 @@ class StatsCollector:
 
     __slots__ = ("column_stats", "row_count")
 
+    row_count: dict[IR, ColumnStat[int]]
+    """Estimated row count for each IR node."""
+    column_stats: dict[IR, dict[str, ColumnStats]]
+    """Column statistics for each IR node."""
+
     def __init__(self) -> None:
         self.row_count: dict[IR, ColumnStat[int]] = {}
         self.column_stats: dict[IR, dict[str, ColumnStats]] = {}
diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index 57f314ed571..e36e73c5f90 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -19,6 +19,7 @@
 
 from cudf_polars.dsl.ir import IR, DataFrameScan, Empty, Scan, Sink, Union
 from cudf_polars.experimental.base import (
+    ColumnSourceInfo,
     ColumnStat,
     ColumnStats,
     DataSourceInfo,
@@ -118,8 +119,8 @@ def from_scan(ir: Scan, config_options: ConfigOptions) -> ScanPartitionPlan:
             blocksize: int = config_options.executor.target_partition_size
             column_stats = _extract_scan_stats(ir, config_options)
             column_sizes: list[int] = []
-            for name, cs in column_stats.items():
-                storage_size = cs.source_info.storage_size(name)
+            for cs in column_stats.values():
+                storage_size = cs.source_info.storage_size
                 if storage_size.value is not None:
                     column_sizes.append(storage_size.value)
 
@@ -821,7 +822,7 @@ def _extract_scan_stats(
 ) -> dict[str, ColumnStats]:
     """Extract base ColumnStats for a Scan node."""
     if ir.typ == "parquet":
-        source_info = _sample_pq_stats(
+        table_source_info = _sample_pq_stats(
             tuple(ir.paths),
             config_options.parquet_options.max_footer_samples,
             config_options.parquet_options.max_row_group_samples,
@@ -829,8 +830,7 @@ def _extract_scan_stats(
         return {
             name: ColumnStats(
                 name=name,
-                source_info=source_info,
-                source_name=name,
+                source_info=ColumnSourceInfo(table_source_info, name),
             )
             for name in ir.schema
         }
@@ -879,12 +879,11 @@ def unique_stats(self, column: str) -> UniqueStats:
 
 def _extract_dataframescan_stats(ir: DataFrameScan) -> dict[str, ColumnStats]:
     """Extract base ColumnStats for a DataFrameScan node."""
-    source_info = DataFrameSourceInfo(ir.df)
+    table_source_info = DataFrameSourceInfo(ir.df)
     return {
         name: ColumnStats(
             name=name,
-            source_info=source_info,
-            source_name=name,
+            source_info=ColumnSourceInfo(table_source_info, name),
         )
         for name in ir.schema
     }
diff --git a/python/cudf_polars/cudf_polars/experimental/statistics.py b/python/cudf_polars/cudf_polars/experimental/statistics.py
index 7c302fe59d7..18588ac7e29 100644
--- a/python/cudf_polars/cudf_polars/experimental/statistics.py
+++ b/python/cudf_polars/cudf_polars/experimental/statistics.py
@@ -66,9 +66,8 @@ def _update_unique_stats_columns(
         if (
             name not in unique_fraction
             and (column_stats := child_column_stats.get(name)) is not None
-            and (source_stats := column_stats.source_info) is not None
         ):
-            source_stats.add_unique_stats_column(column_stats.source_name or name)
+            column_stats.source_info.add_unique_stats_column()
 
 
 @initialize_column_stats.register(IR)
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index 0eaf182762b..34e66e62436 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -417,8 +417,11 @@ datasource (e.g. a Parquet dataset or in-memory `DataFrame`).
   **aggregated** column sampling via sub-classing. For example,
   The `ParquetSourceInfo` sub-class uses caching to avoid
   redundant file-system access.
+- `ColumnSourceInfo`: This class wraps a `DataSourceInfo` object.
+Since `DataSourceInfo` tracks information for an entire table, we use
+`ColumnSourceInfo` to provide a single-column view of the object.
 - `ColumnStats`: This class is used to group together the "base"
-`DataSourceInfo` reference and the current `UniqueStats` estimates
+`ColumnSourceInfo` reference and the local `UniqueStats` estimates
 for a specific IR + column combination. We bundle these references
 together to simplify the design and maintenance of `StatsCollector`.
 **NOTE:** The current `UniqueStats` estimates are not yet populated.
diff --git a/python/cudf_polars/tests/experimental/test_stats.py b/python/cudf_polars/tests/experimental/test_stats.py
index 147cfc98555..f139d344eaa 100644
--- a/python/cudf_polars/tests/experimental/test_stats.py
+++ b/python/cudf_polars/tests/experimental/test_stats.py
@@ -44,31 +44,46 @@ def test_base_stats_dataframescan(df):
     column_stats = stats.column_stats[ir]
 
     # Source info is the same for all columns
-    source_info = column_stats["x"].source_info
-    assert source_info is column_stats["y"].source_info
-    assert source_info is column_stats["z"].source_info
-    assert source_info.row_count.value == row_count
-    assert source_info.row_count.exact
+    source_info_x = column_stats["x"].source_info
+    source_info_y = column_stats["y"].source_info
+    source_info_z = column_stats["z"].source_info
+    table_source_info = source_info_x.table_source_info
+    assert table_source_info is source_info_y.table_source_info
+    assert table_source_info is source_info_z.table_source_info
+    assert source_info_x.row_count.value == row_count
+    assert source_info_x.row_count.exact
 
     # Storage stats should not be available
-    assert source_info.storage_size("x").value is None
+    assert source_info_x.storage_size.value is None
 
-    # Check unique stats
+    # Check unique stats.
+    # We need to use force=True to sample unique-value statistics,
+    # because nothing in the query requires unique-value statistics.
     assert math.isclose(
-        source_info.unique_stats("x").count.value, row_count, rel_tol=5e-2
+        source_info_x.unique_stats(force=True).count.value, row_count, rel_tol=5e-2
     )
-    assert math.isclose(source_info.unique_stats("x").fraction.value, 1.0, abs_tol=1e-2)
-    assert not source_info.unique_stats("x").count.exact
-    assert math.isclose(source_info.unique_stats("y").count.value, 3, rel_tol=5e-2)
     assert math.isclose(
-        source_info.unique_stats("y").fraction.value, 3 / row_count, abs_tol=1e-2
+        source_info_x.unique_stats(force=True).fraction.value, 1.0, abs_tol=1e-2
     )
-    assert not source_info.unique_stats("y").count.exact
-    assert math.isclose(source_info.unique_stats("z").count.value, 5, rel_tol=5e-2)
+    assert not source_info_x.unique_stats(force=True).count.exact
     assert math.isclose(
-        source_info.unique_stats("z").fraction.value, 5 / row_count, abs_tol=1e-2
+        source_info_y.unique_stats(force=True).count.value, 3, rel_tol=5e-2
     )
-    assert not source_info.unique_stats("z").count.exact
+    assert math.isclose(
+        source_info_y.unique_stats(force=True).fraction.value,
+        3 / row_count,
+        abs_tol=1e-2,
+    )
+    assert not source_info_y.unique_stats(force=True).count.exact
+    assert math.isclose(
+        source_info_z.unique_stats(force=True).count.value, 5, rel_tol=5e-2
+    )
+    assert math.isclose(
+        source_info_z.unique_stats(force=True).fraction.value,
+        5 / row_count,
+        abs_tol=1e-2,
+    )
+    assert not source_info_z.unique_stats(force=True).count.exact
 
 
 @pytest.mark.parametrize("n_files", [1, 3])
@@ -109,60 +124,58 @@ def test_base_stats_parquet(
     column_stats = stats.column_stats[ir]
 
     # Source info is the same for all columns
-    source_info = column_stats["x"].source_info
-    assert source_info is column_stats["y"].source_info
-    assert source_info is column_stats["z"].source_info
+    source_info_x = column_stats["x"].source_info
+    source_info_y = column_stats["y"].source_info
+    source_info_z = column_stats["z"].source_info
+    table_source_info = source_info_x.table_source_info
+    assert table_source_info is source_info_y.table_source_info
+    assert table_source_info is source_info_z.table_source_info
     if max_footer_samples:
-        assert source_info.row_count.value == df.height
-        assert source_info.row_count.exact
+        assert source_info_x.row_count.value == df.height
+        assert source_info_x.row_count.exact
     else:
-        assert source_info.row_count.value is None
+        assert source_info_x.row_count.value is None
 
     # Storage stats should be available
     if max_footer_samples:
-        assert source_info.storage_size("x").value > 0
-        assert source_info.storage_size("y").value > 0
+        assert source_info_x.storage_size.value > 0
+        assert source_info_y.storage_size.value > 0
     else:
-        assert source_info.storage_size("x").value is None
-        assert source_info.storage_size("y").value is None
-
-    # Check that we can query a missing column name
-    assert source_info.storage_size("foo").value is None
-    assert source_info.unique_stats("foo").count.value is None
-    assert source_info.unique_stats("foo").fraction.value is None
+        assert source_info_x.storage_size.value is None
+        assert source_info_y.storage_size.value is None
 
     # source._unique_stats should be empty
-    assert set(source_info._unique_stats) == set()
+    assert set(table_source_info._unique_stats) == set()
 
     if max_footer_samples and max_row_group_samples:
-        assert source_info.unique_stats("x").count.value == df.height
-        assert source_info.unique_stats("x").fraction.value == 1.0
+        assert source_info_x.unique_stats(force=True).count.value == df.height
+        assert source_info_x.unique_stats(force=True).fraction.value == 1.0
     else:
-        assert source_info.unique_stats("x").count.value is None
-        assert source_info.unique_stats("x").fraction.value is None
+        assert source_info_x.unique_stats(force=True).count.value is None
+        assert source_info_x.unique_stats(force=True).fraction.value is None
 
     # source_info._unique_stats should only contain 'x'
     if max_footer_samples and max_row_group_samples:
-        assert set(source_info._unique_stats) == {"x"}
+        assert set(table_source_info._unique_stats) == {"x"}
     else:
-        assert set(source_info._unique_stats) == set()
+        assert set(table_source_info._unique_stats) == set()
 
     # Check add_unique_stats_column behavior
     if max_footer_samples and max_row_group_samples:
         # Can add a "bad"/missing key column
-        source_info.add_unique_stats_column("foo")
-        assert set(source_info._unique_stats) == {"x"}
+        source_info_x.add_unique_stats_column("foo")
+        assert set(table_source_info._unique_stats) == {"x"}
 
         # Mark 'z' as a key column, and query 'y' stats
-        source_info.add_unique_stats_column("z")
+        source_info_z.add_unique_stats_column()
         if n_files == 1 and row_group_size == 10_000:
-            assert source_info.unique_stats("y").count.value == 3
+            assert source_info_y.unique_stats(force=True).count.value == 3
         else:
-            assert source_info.unique_stats("y").count.value is None
-        assert source_info.unique_stats("y").fraction.value < 1.0
+            assert source_info_y.unique_stats(force=True).count.value is None
+        assert source_info_y.unique_stats(force=True).fraction.value < 1.0
 
         # source_info._unique_stats should contain all columns now
-        assert set(source_info._unique_stats) == {"x", "y", "z"}
+        assert set(table_source_info._unique_stats) == {"x", "y", "z"}
 
 
 def test_base_stats_csv(tmp_path, df):
@@ -181,10 +194,10 @@ def test_base_stats_csv(tmp_path, df):
     column_stats = stats.column_stats[ir]
 
     # Source info should be empty for CSV
-    source_info = column_stats["x"].source_info
-    assert source_info.row_count.value is None
-    assert source_info.unique_stats("x").count.value is None
-    assert source_info.unique_stats("x").fraction.value is None
+    source_info_x = column_stats["x"].source_info
+    assert source_info_x.row_count.value is None
+    assert source_info_x.unique_stats().count.value is None
+    assert source_info_x.unique_stats().fraction.value is None
 
 
 @pytest.mark.parametrize("max_footer_samples", [1, 3])
@@ -217,7 +230,7 @@ def test_base_stats_parquet_groupby(
     qir1 = Translator(q1._ldf.visit(), engine).translate_ir()
     stats = collect_base_stats(qir1, ConfigOptions.from_polars_engine(engine))
     source_info_y = stats.column_stats[qir1]["y"].source_info
-    unique_stats_y = source_info_y.unique_stats("y")
+    unique_stats_y = source_info_y.unique_stats(force=True)
     y_unique_fraction = unique_stats_y.fraction
     y_row_count = source_info_y.row_count
     assert y_unique_fraction.value < 1.0
@@ -250,7 +263,7 @@ def test_base_stats_parquet_groupby(
     qir2 = Translator(q2._ldf.visit(), engine).translate_ir()
     stats = collect_base_stats(qir2, ConfigOptions.from_polars_engine(engine))
     source_info_y = stats.column_stats[qir2]["y"].source_info
-    assert source_info_y.unique_stats("y").fraction == y_unique_fraction
+    assert source_info_y.unique_stats().fraction == y_unique_fraction
     assert y_row_count == source_info_y.row_count
     assert_gpu_result_equal(q2.sort(pl.col("y")).slice(0, 2), engine=engine)
 
@@ -333,8 +346,8 @@ def test_base_stats_union():
     # We lose source info after a Union, but we
     # can set accurate row-count and unique-value
     # estimates for the current IR in #19392
-    source_info = column_stats["x"].source_info
-    assert source_info.row_count.value is None
+    source_info_x = column_stats["x"].source_info
+    assert source_info_x.row_count.value is None
 
 
 def test_base_stats_distinct(df):
@@ -352,6 +365,6 @@ def test_base_stats_distinct(df):
     stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
     column_stats = stats.column_stats[ir]
 
-    source_info = column_stats["y"].source_info
-    assert source_info.row_count.value == row_count
-    assert source_info.row_count.exact
+    source_info_y = column_stats["y"].source_info
+    assert source_info_y.row_count.value == row_count
+    assert source_info_y.row_count.exact

From ba388bd1071c47f60b310801c2cd72d8bd883d6d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 21 Aug 2025 10:36:43 -0500
Subject: [PATCH 189/366] Fix bug in `eval` function with `nvtx-0.2.11`
 (#19754)

When `nvtx-0.2.11` is used with `cudf-pandas`, we run into a scenario of incorrectly fetching the frame from the stack. This PR fixes it.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19754
---
 python/cudf/cudf/pandas/_wrappers/pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 95f559f4b6f..b4cb727721c 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -1132,7 +1132,8 @@ def _find_user_frame():
     frame = inspect.currentframe()
     while frame:
         modname = frame.f_globals.get("__name__", "")
-        if modname == "__main__" or not modname.startswith("cudf."):
+        # TODO: Remove "nvtx." entry once we cross nvtx-0.2.11 as minimum version
+        if modname == "__main__" or not modname.startswith(("cudf.", "nvtx.")):
             return frame
         frame = frame.f_back
     raise RuntimeError("Could not find the user's frame.")

From 4db8a382162fc69107c8a43b2ec8582fae59512a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 21 Aug 2025 09:23:28 -0700
Subject: [PATCH 190/366] Support decimal columns in cudf_polars (#19749)

Redo of https://github.com/rapidsai/cudf/pull/19589

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19749
---
 .../cudf_polars/containers/dataframe.py       | 19 +++--
 .../cudf_polars/containers/datatype.py        |  2 +
 .../cudf_polars/dsl/utils/aggregations.py     | 47 +++++++----
 .../cudf_polars/cudf_polars/testing/plugin.py |  2 +
 .../tests/expressions/test_literal.py         |  6 +-
 python/cudf_polars/tests/test_groupby.py      | 81 +++++++++++--------
 python/cudf_polars/tests/test_scan.py         |  9 +++
 python/cudf_polars/tests/test_select.py       | 25 ++++++
 python/pylibcudf/pylibcudf/scalar.pyx         |  8 +-
 python/pylibcudf/tests/test_interop.py        |  2 +-
 10 files changed, 142 insertions(+), 59 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 43ec63738b2..3a095be3cfe 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -29,17 +29,26 @@
 def _create_polars_column_metadata(
     name: str, dtype: PolarsDataType
 ) -> plc.interop.ColumnMetadata:
-    """Create ColumnMetadata preserving pl.Struct field names."""
+    """Create ColumnMetadata preserving dtype attributes not supported by libcudf."""
+    children_meta = []
+    timezone = ""
+    precision: int | None = None
+
     if isinstance(dtype, pl.Struct):
         children_meta = [
             _create_polars_column_metadata(field.name, field.dtype)
             for field in dtype.fields
         ]
-    else:
-        children_meta = []
-    timezone = dtype.time_zone if isinstance(dtype, pl.Datetime) else None
+    elif isinstance(dtype, pl.Datetime):
+        timezone = dtype.time_zone or timezone
+    elif isinstance(dtype, pl.Decimal):
+        precision = dtype.precision
+
     return plc.interop.ColumnMetadata(
-        name=name, timezone=timezone or "", children_meta=children_meta
+        name=name,
+        timezone=timezone,
+        precision=precision,
+        children_meta=children_meta,
     )
 
 
diff --git a/python/cudf_polars/cudf_polars/containers/datatype.py b/python/cudf_polars/cudf_polars/containers/datatype.py
index 5de610425ed..50a5352612a 100644
--- a/python/cudf_polars/cudf_polars/containers/datatype.py
+++ b/python/cudf_polars/cudf_polars/containers/datatype.py
@@ -81,6 +81,8 @@ def _from_polars(dtype: pl.DataType) -> plc.DataType:
         assert_never(dtype.time_unit)
     elif isinstance(dtype, pl.String):
         return plc.DataType(plc.TypeId.STRING)
+    elif isinstance(dtype, pl.Decimal):
+        return plc.DataType(plc.TypeId.DECIMAL128, scale=-dtype.scale)
     elif isinstance(dtype, pl.Null):
         # TODO: Hopefully
         return plc.DataType(plc.TypeId.EMPTY)
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 8e242c55a47..5f51d54cd44 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -158,12 +158,30 @@ def decompose_single_agg(
         is_median = agg.name == "median"
         is_quantile = agg.name == "quantile"
 
+        # quantile agg on decimal: unsupported -> keep dtype Decimal
+        # mean/median on decimal: Polars returns float -> pre-cast
+        decimal_unsupported = False
+        if plc.traits.is_fixed_point(child_dtype):
+            if is_quantile:
+                decimal_unsupported = True
+            elif agg.name in {"mean", "median"}:
+                tid = agg.dtype.plc.id()
+                if tid in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}:
+                    cast_to = (
+                        DataType(pl.Float64)
+                        if tid == plc.TypeId.FLOAT64
+                        else DataType(pl.Float32)
+                    )
+                    child = expr.Cast(cast_to, child)
+                    child_dtype = child.dtype.plc
+
         is_group_quantile_supported = plc.traits.is_integral(
             child_dtype
         ) or plc.traits.is_floating_point(child_dtype)
 
         unsupported = (
-            (is_median or is_quantile) and not is_group_quantile_supported
+            decimal_unsupported
+            or ((is_median or is_quantile) and not is_group_quantile_supported)
         ) or (not plc.aggregation.is_valid_aggregation(child_dtype, req))
         if unsupported:
             return [], named_expr.reconstruct(expr.Literal(child.dtype, None))
@@ -172,19 +190,12 @@ def decompose_single_agg(
             # The aggregation is just reconstructed with the new
             # (potentially masked) child. This is safe because we recursed
             # to ensure there are no nested aggregations.
-            return (
-                [(named_expr.reconstruct(agg.reconstruct([child])), True)],
-                named_expr.reconstruct(expr.Col(agg.dtype, name)),
-            )
-        elif agg.name in ("mean", "median", "quantile", "std", "var"):
-            # libcudf promotes these to float64; but polars
-            # keeps Float32, so cast back in post-processing.
-            named = expr.NamedExpr(name, agg)
-            post_col: expr.Expr = expr.Col(DataType(pl.Float64()), name)
-            if agg.dtype.plc.id() == plc.TypeId.FLOAT32:
-                post_col = expr.Cast(agg.dtype, post_col)
-            return [(named, True)], expr.NamedExpr(name, post_col)
-        elif agg.name == "sum":
+
+        # rebuild the agg with the transformed child
+        new_children = [child] if not is_quantile else [child, agg.children[1]]
+        named_expr = named_expr.reconstruct(agg.reconstruct(new_children))
+
+        if agg.name == "sum":
             col = (
                 expr.Cast(agg.dtype, expr.Col(DataType(pl.datatypes.Int64()), name))
                 if (
@@ -230,6 +241,14 @@ def decompose_single_agg(
                 return [(named_expr, True), (win_len, True)], expr.NamedExpr(
                     name, post_ternary_expr
                 )
+        elif agg.name in {"mean", "median", "quantile", "std", "var"}:
+            post_agg_col: expr.Expr = expr.Col(
+                DataType(pl.Float64()), name
+            )  # libcudf promotes to float64
+            if agg.dtype.plc.id() == plc.TypeId.FLOAT32:
+                # Cast back to float32 to match Polars
+                post_agg_col = expr.Cast(agg.dtype, post_agg_col)
+            return [(named_expr, True)], named_expr.reconstruct(post_agg_col)
         else:
             return [(named_expr, True)], named_expr.reconstruct(
                 expr.Col(agg.dtype, name)
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index cd3bbe01020..ccb4658b5ba 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -174,6 +174,8 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "allow_missing_columns argument in read_parquet not translated in IR",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "allow_missing_columns argument in read_parquet not translated in IR",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "allow_missing_columns argument in read_parquet not translated in IR",
+    "tests/unit/datatypes/test_decimal.py::test_decimal_aggregations": "https://github.com/pola-rs/polars/issues/23899",
+    "tests/unit/datatypes/test_decimal.py::test_decimal_arithmetic_schema": "https://github.com/pola-rs/polars/issues/23899",
 }
 
 
diff --git a/python/cudf_polars/tests/expressions/test_literal.py b/python/cudf_polars/tests/expressions/test_literal.py
index 69ee80da82e..1c2eb05ebfe 100644
--- a/python/cudf_polars/tests/expressions/test_literal.py
+++ b/python/cudf_polars/tests/expressions/test_literal.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import datetime
+
 import pytest
 
 import polars as pl
@@ -95,7 +97,9 @@ def test_select_literal_series():
     assert_gpu_result_equal(q)
 
 
-@pytest.mark.parametrize("expr", [pl.lit(None), pl.lit(10, dtype=pl.Decimal())])
+@pytest.mark.parametrize(
+    "expr", [pl.lit(None), pl.lit(datetime.time(12, 0), dtype=pl.Time())]
+)
 def test_unsupported_literal_raises(expr):
     df = pl.LazyFrame({})
 
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index dcbf2d3eec5..33404e9579b 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -14,12 +14,12 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
-from cudf_polars.utils.versions import POLARS_VERSION_LT_1321
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132, POLARS_VERSION_LT_1321
 
 
 @pytest.fixture
 def df():
-    return pl.LazyFrame(
+    lf = pl.LazyFrame(
         {
             "key1": [1, 1, 1, 2, 3, 1, 4, 6, 7],
             "key2": [2, 2, 2, 2, 6, 1, 4, 6, 8],
@@ -43,6 +43,11 @@ def df():
             ],
         }
     )
+    if not POLARS_VERSION_LT_132:
+        lf = lf.with_columns(
+            pl.col("float").cast(pl.Decimal(precision=9, scale=2)).alias("decimal")
+        )
+    return lf
 
 
 @pytest.fixture(
@@ -61,39 +66,47 @@ def keys(request):
     return request.param
 
 
-@pytest.fixture(
-    params=[
-        [],
-        ["int"],
-        ["float", "int"],
-        [pl.col("float") + pl.col("int")],
-        [pl.col("float").is_not_null()],
-        [pl.col("int32").sum()],
-        [pl.col("int32").mean()],
-        [
-            pl.col("uint16_with_null").sum(),
-            pl.col("uint16_with_null").mean().alias("mean"),
-        ],
-        [pl.col("float").max() - pl.col("int").min() + pl.col("int").max()],
-        [pl.col("float").mean(), pl.col("int").std()],
-        [(pl.col("float") - pl.lit(2)).max()],
-        [pl.lit(10).alias("literal_value")],
-        [pl.col("float").sum().round(decimals=1)],
-        [pl.col("float").round(decimals=1).sum()],
-        [pl.col("float").sum().round()],
-        [pl.col("float").round().sum()],
-        [pl.col("int").first(), pl.col("float").last()],
-        [pl.col("int").sum(), pl.col("string").str.replace("h", "foo", literal=True)],
-        [pl.col("float").quantile(0.3, interpolation="nearest")],
-        [pl.col("float").quantile(0.3, interpolation="higher")],
-        [pl.col("float").quantile(0.3, interpolation="lower")],
-        [pl.col("float").quantile(0.3, interpolation="midpoint")],
-        [pl.col("float").quantile(0.3, interpolation="linear")],
-        [
-            pl.col("datetime").max(),
-            pl.col("datetime").max().dt.is_leap_year().alias("leapyear"),
-        ],
+_EXPRS: list[list[pl.Expr | str]] = [
+    [],
+    ["int"],
+    ["float", "int"],
+    [pl.col("float") + pl.col("int")],
+    [pl.col("float").is_not_null()],
+    [pl.col("int32").sum()],
+    [pl.col("int32").mean()],
+    [
+        pl.col("uint16_with_null").sum(),
+        pl.col("uint16_with_null").mean().alias("mean"),
+    ],
+    [pl.col("float").max() - pl.col("int").min() + pl.col("int").max()],
+    [pl.col("float").mean(), pl.col("int").std()],
+    [(pl.col("float") - pl.lit(2)).max()],
+    [pl.lit(10).alias("literal_value")],
+    [pl.col("float").sum().round(decimals=1)],
+    [pl.col("float").round(decimals=1).sum()],
+    [pl.col("float").sum().round()],
+    [pl.col("float").round().sum()],
+    [pl.col("int").first(), pl.col("float").last()],
+    [pl.col("int").sum(), pl.col("string").str.replace("h", "foo", literal=True)],
+    [pl.col("float").quantile(0.3, interpolation="nearest")],
+    [pl.col("float").quantile(0.3, interpolation="higher")],
+    [pl.col("float").quantile(0.3, interpolation="lower")],
+    [pl.col("float").quantile(0.3, interpolation="midpoint")],
+    [pl.col("float").quantile(0.3, interpolation="linear")],
+    [
+        pl.col("datetime").max(),
+        pl.col("datetime").max().dt.is_leap_year().alias("leapyear"),
     ],
+]
+
+# polars gives us precision=None, which we
+# do not supprt
+if not POLARS_VERSION_LT_132:
+    _EXPRS.append([pl.col("decimal").median()])
+
+
+@pytest.fixture(
+    params=_EXPRS,
     ids=lambda aggs: "-".join(map(str, aggs)),
 )
 def exprs(request):
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index 8481105baad..7510fe833be 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import decimal
 from typing import TYPE_CHECKING
 
 import pytest
@@ -45,6 +46,14 @@ def df():
             "a": [1, 2, 3, None, 4, 5],
             "b": ["ẅ", "x", "y", "z", "123", "abcd"],
             "c": [None, None, 4, 5, -1, 0],
+            "d": [
+                decimal.Decimal("1.23"),
+                None,
+                decimal.Decimal("0.00"),
+                None,
+                decimal.Decimal("-5.67"),
+                None,
+            ],
         }
     )
 
diff --git a/python/cudf_polars/tests/test_select.py b/python/cudf_polars/tests/test_select.py
index da3f519783b..10fcf9f660d 100644
--- a/python/cudf_polars/tests/test_select.py
+++ b/python/cudf_polars/tests/test_select.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import decimal
+
 import pytest
 
 import polars as pl
@@ -27,6 +29,29 @@ def test_select():
     assert_gpu_result_equal(query)
 
 
+def test_select_decimal():
+    ldf = pl.LazyFrame(
+        {"a": pl.Series(values=[decimal.Decimal("1.0"), None], dtype=pl.Decimal(3, 1))}
+    )
+    query = ldf.select(pl.col("a"))
+    assert_gpu_result_equal(query)
+
+
+def test_select_decimal_precision_none_result_max_precision():
+    ldf = pl.LazyFrame(
+        {
+            "a": pl.Series(
+                values=[decimal.Decimal("1.0"), None], dtype=pl.Decimal(None, 1)
+            )
+        }
+    )
+    query = ldf.select(pl.col("a"))
+    cpu_result = query.collect()
+    gpu_result = query.collect(engine="gpu")
+    assert cpu_result.schema["a"].precision is None
+    assert gpu_result.schema["a"].precision == 38
+
+
 def test_select_reduce():
     ldf = pl.DataFrame(
         {
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 57ea17d2921..7c183b96bbb 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -297,7 +297,7 @@ cdef class Scalar:
             return decimal.Decimal(
                 (<fixed_point_scalar[decimal128]*>slr).value().value()
             ).scaleb(
-                -(<fixed_point_scalar[decimal128]*>slr).type().scale()
+                (<fixed_point_scalar[decimal128]*>slr).type().scale()
             )
         else:
             raise NotImplementedError(
@@ -686,12 +686,12 @@ def _(py_val: datetime.date, dtype: DataType | None, stream: Stream | None):
 
 @_from_py.register(decimal.Decimal)
 def _(py_val: decimal.Decimal, dtype: DataType | None, stream: Stream | None):
-    scale = -py_val.as_tuple().exponent
-    as_int = int(py_val.scaleb(scale))
+    scale = py_val.as_tuple().exponent
+    as_int = int(py_val.scaleb(-scale))
 
     cdef int128_t val = <int128_t>as_int
 
-    dtype = DataType(type_id.DECIMAL128, -scale)
+    dtype = DataType(type_id.DECIMAL128, scale)
 
     if dtype.id() != type_id.DECIMAL128:
         raise TypeError("Expected dtype to be DECIMAL128")
diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py
index 171d70c2496..b1a6e9f2c66 100644
--- a/python/pylibcudf/tests/test_interop.py
+++ b/python/pylibcudf/tests/test_interop.py
@@ -105,7 +105,7 @@ def test_decimal_other(data_type):
     [plc.TypeId.DECIMAL128, plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32],
 )
 def test_decimal_respect_metadata_precision(plc_type, request):
-    request.node.add_marker(
+    request.applymarker(
         pytest.mark.xfail(
             parse(pa.__version__) < parse("19.0.0")
             and plc_type in {plc.TypeId.DECIMAL64, plc.TypeId.DECIMAL32},

From 3ae9ff85e115e4bf3a805de5027c582be24eed3a Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 21 Aug 2025 12:23:36 -0400
Subject: [PATCH 191/366] Skip polars CPU perf test for with_columns (#19763)

Flaky test discovered in https://github.com/rapidsai/cudf/pull/19754. Lets skip it.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19763
---
 python/cudf_polars/cudf_polars/testing/plugin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index ccb4658b5ba..84ef9d9234f 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -193,6 +193,7 @@ def pytest_configure(config: pytest.Config) -> None:
     # Tests performance difference of CPU engine
     "tests/unit/operations/test_join.py::test_join_where_eager_perf_21145": "Tests performance bug in CPU engine",
     "tests/unit/operations/namespaces/list/test_list.py::test_list_struct_field_perf": "Tests CPU Engine perf",
+    "tests/benchmark/test_with_columns.py::test_with_columns_quadratic_19503": "Tests performance bug in CPU engine",
     # The test may segfault with the legacy streaming engine. We should
     # remove this skip when all polars tests use the new streaming engine.
     "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine",

From 75d423fa5014a132ef990288ee9903800cf0f1f2 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 21 Aug 2025 16:42:49 -0400
Subject: [PATCH 192/366] Support fill_null with fill strategy in cudf-polars
 (#19318)

- Contributes to https://github.com/rapidsai/cudf/issues/19200.
- Depends on https://github.com/pola-rs/polars/pull/23479

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19318
---
 .../cudf_polars/dsl/expressions/unary.py      | 59 +++++++++++++++++++
 .../cudf_polars/dsl/utils/aggregations.py     |  9 +++
 .../cudf_polars/experimental/select.py        | 16 ++++-
 .../cudf_polars/experimental/utils.py         | 13 +++-
 .../tests/experimental/test_select.py         | 30 +++++++++-
 python/cudf_polars/tests/test_drop_nulls.py   | 18 ++++--
 python/cudf_polars/tests/test_groupby.py      | 16 +++++
 7 files changed, 151 insertions(+), 10 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 440a2efa9cc..e763775e37c 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -7,6 +7,8 @@
 
 from typing import TYPE_CHECKING, Any, ClassVar, cast
 
+from typing_extensions import assert_never
+
 import pylibcudf as plc
 
 from cudf_polars.containers import Column
@@ -110,6 +112,7 @@ class UnaryFunction(Expr):
             "set_sorted",
             "unique",
             "value_counts",
+            "fill_null_with_strategy",
             "null_count",
             "top_k",
         }
@@ -152,6 +155,10 @@ def __init__(
                 raise NotImplementedError(
                     "reverse=True is not supported for cumulative aggregations"
                 )
+        if self.name == "fill_null_with_strategy" and self.options[1] not in {0, None}:
+            raise NotImplementedError(
+                "Filling null values with limit specified is not yet supported."
+            )
 
     def do_evaluate(
         self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
@@ -326,6 +333,58 @@ def do_evaluate(
                     .obj.to_scalar()
                 )
             return Column(plc.replace.replace_nulls(column.obj, arg), dtype=self.dtype)
+        elif self.name == "fill_null_with_strategy":
+            column = self.children[0].evaluate(df, context=context)
+            strategy, limit = self.options
+            if (
+                column.null_count == 0
+                or limit == 0
+                or (
+                    column.null_count == column.size and strategy not in {"zero", "one"}
+                )
+            ):
+                return column
+            if strategy == "forward":
+                replacement = plc.replace.ReplacePolicy.PRECEDING
+            elif strategy == "backward":
+                replacement = plc.replace.ReplacePolicy.FOLLOWING
+            elif strategy == "min":
+                replacement = plc.reduce.reduce(
+                    column.obj,
+                    plc.aggregation.min(),
+                    column.dtype.plc,
+                )
+            elif strategy == "max":
+                replacement = plc.reduce.reduce(
+                    column.obj,
+                    plc.aggregation.max(),
+                    column.dtype.plc,
+                )
+            elif strategy == "mean":
+                replacement = plc.reduce.reduce(
+                    column.obj,
+                    plc.aggregation.mean(),
+                    plc.DataType(plc.TypeId.FLOAT64),
+                )
+            elif strategy == "zero":
+                replacement = plc.scalar.Scalar.from_py(0, dtype=column.dtype.plc)
+            elif strategy == "one":
+                replacement = plc.scalar.Scalar.from_py(1, dtype=column.dtype.plc)
+            else:
+                assert_never(strategy)  # pragma: no cover
+
+            if strategy == "mean":
+                return Column(
+                    plc.replace.replace_nulls(
+                        plc.unary.cast(column.obj, plc.DataType(plc.TypeId.FLOAT64)),
+                        replacement,
+                    ),
+                    dtype=self.dtype,
+                ).astype(self.dtype)
+            return Column(
+                plc.replace.replace_nulls(column.obj, replacement),
+                dtype=self.dtype,
+            )
         elif self.name == "as_struct":
             children = [
                 child.evaluate(df, context=context).obj for child in self.children
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 5f51d54cd44..2cd7cde44ef 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -131,6 +131,15 @@ def decompose_single_agg(
         return [(named_expr, True)], named_expr.reconstruct(expr.Col(agg.dtype, name))
     if isinstance(agg, (expr.Literal, expr.LiteralColumn)):
         return [], named_expr
+    if (
+        is_top
+        and isinstance(agg, expr.UnaryFunction)
+        and agg.name == "fill_null_with_strategy"
+    ):
+        strategy, _ = agg.options
+        raise NotImplementedError(
+            f"fill_null_with_strategy({strategy!r}) is not supported in groupby aggregations"
+        )
     if isinstance(agg, expr.Agg):
         if agg.name == "quantile":
             # Second child the requested quantile (which is asserted
diff --git a/python/cudf_polars/cudf_polars/experimental/select.py b/python/cudf_polars/cudf_polars/experimental/select.py
index 1ca199a8af6..d2c29aa0f81 100644
--- a/python/cudf_polars/cudf_polars/experimental/select.py
+++ b/python/cudf_polars/cudf_polars/experimental/select.py
@@ -15,7 +15,10 @@
 from cudf_polars.experimental.base import PartitionInfo
 from cudf_polars.experimental.dispatch import lower_ir_node
 from cudf_polars.experimental.expressions import decompose_expr_graph
-from cudf_polars.experimental.utils import _lower_ir_fallback
+from cudf_polars.experimental.utils import (
+    _contains_unsupported_fill_strategy,
+    _lower_ir_fallback,
+)
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping
@@ -107,6 +110,17 @@ def _(
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
     child, partition_info = rec(ir.children[0])
     pi = partition_info[child]
+    if pi.count > 1 and _contains_unsupported_fill_strategy(
+        [e.value for e in ir.exprs]
+    ):
+        return _lower_ir_fallback(
+            ir.reconstruct([child]),
+            rec,
+            msg=(
+                "fill_null with strategy other than 'zero' or 'one' is not supported "
+                "for multiple partitions; falling back to in-memory evaluation."
+            ),
+        )
     if (
         pi.count == 1
         and Select._is_len_expr(ir.exprs)
diff --git a/python/cudf_polars/cudf_polars/experimental/utils.py b/python/cudf_polars/cudf_polars/experimental/utils.py
index bdebc02fac7..e62990c1755 100644
--- a/python/cudf_polars/cudf_polars/experimental/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/utils.py
@@ -10,7 +10,7 @@
 from itertools import chain
 from typing import TYPE_CHECKING
 
-from cudf_polars.dsl.expr import Col, Expr, GroupedRollingWindow
+from cudf_polars.dsl.expr import Col, Expr, GroupedRollingWindow, UnaryFunction
 from cudf_polars.dsl.ir import Union
 from cudf_polars.dsl.traversal import traversal
 from cudf_polars.experimental.base import PartitionInfo
@@ -116,3 +116,14 @@ def _get_unique_fractions(
 def _contains_over(exprs: Sequence[Expr]) -> bool:
     """Return True if any expression in 'exprs' contains an over(...) (ie. GroupedRollingWindow)."""
     return any(isinstance(e, GroupedRollingWindow) for e in traversal(exprs))
+
+
+def _contains_unsupported_fill_strategy(exprs: Sequence[Expr]) -> bool:
+    for e in traversal(exprs):
+        if (
+            isinstance(e, UnaryFunction)
+            and e.name == "fill_null_with_strategy"
+            and e.options[0] not in ("zero", "one")
+        ):
+            return True
+    return False
diff --git a/python/cudf_polars/tests/experimental/test_select.py b/python/cudf_polars/tests/experimental/test_select.py
index cd19f337830..4daa23773ee 100644
--- a/python/cudf_polars/tests/experimental/test_select.py
+++ b/python/cudf_polars/tests/experimental/test_select.py
@@ -9,8 +9,12 @@
 
 import polars as pl
 
-from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
-from cudf_polars.utils.versions import POLARS_VERSION_LT_130
+from cudf_polars.testing.asserts import (
+    DEFAULT_SCHEDULER,
+    assert_gpu_result_equal,
+    assert_ir_translation_raises,
+)
+from cudf_polars.utils.versions import POLARS_VERSION_LT_130, POLARS_VERSION_LT_132
 
 
 @pytest.fixture(scope="module")
@@ -79,6 +83,28 @@ def test_select_reduce_fallback(df, fallback_mode):
         assert_gpu_result_equal(query, engine=engine)
 
 
+def test_select_fill_null_with_strategy(df):
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "fallback_mode": "warn",
+            "max_rows_per_partition": 3,
+            "scheduler": DEFAULT_SCHEDULER,
+        },
+    )
+    q = df.select(pl.col("a").forward_fill())
+
+    if POLARS_VERSION_LT_132:
+        assert_ir_translation_raises(q, NotImplementedError)
+    else:
+        with pytest.warns(
+            UserWarning,
+            match="fill_null with strategy other than 'zero' or 'one' is not supported for multiple partitions",
+        ):
+            assert_gpu_result_equal(q, engine=engine)
+
+
 @pytest.mark.parametrize(
     "aggs",
     [
diff --git a/python/cudf_polars/tests/test_drop_nulls.py b/python/cudf_polars/tests/test_drop_nulls.py
index 46a8007a805..ba60932d063 100644
--- a/python/cudf_polars/tests/test_drop_nulls.py
+++ b/python/cudf_polars/tests/test_drop_nulls.py
@@ -10,6 +10,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 
 
 @pytest.fixture(
@@ -56,15 +57,20 @@ def test_fill_null_with_string():
 )
 def test_fill_null_with_strategy(null_data, strategy):
     q = null_data.select(pl.col("a").fill_null(strategy=strategy))
-
-    # Not yet exposed to python from rust
-    assert_ir_translation_raises(q, NotImplementedError)
+    if POLARS_VERSION_LT_132:
+        assert_ir_translation_raises(q, NotImplementedError)
+    else:
+        assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("strategy", ["forward", "backward"])
 @pytest.mark.parametrize("limit", [0, 1, 2])
 def test_fill_null_with_limit(null_data, strategy, limit):
     q = null_data.select(pl.col("a").fill_null(strategy=strategy, limit=limit))
-
-    # Not yet exposed to python from rust
-    assert_ir_translation_raises(q, NotImplementedError)
+    if limit != 0:
+        assert_ir_translation_raises(q, NotImplementedError)
+    else:
+        if POLARS_VERSION_LT_132:
+            assert_ir_translation_raises(q, NotImplementedError)
+        else:
+            assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index 33404e9579b..d900737c62b 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -397,3 +397,19 @@ def test_groupby_aggs_keep_unsupported_as_null(df: pl.LazyFrame, agg_expr) -> No
 def test_groupby_ternary_supported(df: pl.LazyFrame, expr: pl.Expr) -> None:
     q = df.group_by("key1").agg(expr)
     assert_gpu_result_equal(q, check_row_order=False)
+
+
+@pytest.mark.parametrize(
+    "strategy", ["forward", "backward", "min", "max", "mean", "zero", "one"]
+)
+def test_groupby_fill_null_with_strategy(strategy):
+    lf = pl.LazyFrame(
+        {
+            "key": [1, 1, 2, 2, 2],
+            "val": [None, 2, None, 4, None],
+        }
+    )
+
+    q = lf.group_by("key").agg(pl.col("val").fill_null(strategy=strategy))
+
+    assert_ir_translation_raises(q, NotImplementedError)

From 1e5fc00bae5a8ffa64ecbf2b9c6fb7a0b738bbe9 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 21 Aug 2025 13:46:09 -0700
Subject: [PATCH 193/366] Move more test_dataframe.py tests to new cudf classic
 testing directory (#19731)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19731
---
 .../methods/test_add_prefix_suffix.py         |   24 +
 .../tests/dataframe/methods/test_astype.py    |   79 +
 .../methods/test_nlargest_nsmallest.py        |   22 +-
 .../tests/dataframe/methods/test_squeeze.py   |   15 +
 .../tests/dataframe/methods/test_to_dict.py   |   28 +
 .../tests/dataframe/methods/test_transpose.py |   38 +
 .../tests/dataframe/methods/test_where.py     |  456 +++-
 .../cudf/tests/dataframe/test_attributes.py   |  126 +
 .../cudf/tests/dataframe/test_constructors.py |  834 +++++-
 .../cudf/cudf/tests/groupby/test_cov_corr.py  |  257 ++
 .../cudf/tests/series/methods/test_astype.py  |  282 +++
 .../cudf/tests/series/test_constructors.py    |   12 +
 python/cudf/cudf/tests/test_dataframe.py      | 2254 +----------------
 13 files changed, 2251 insertions(+), 2176 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_add_prefix_suffix.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_astype.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_squeeze.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_dict.py
 create mode 100644 python/cudf/cudf/tests/groupby/test_cov_corr.py

diff --git a/python/cudf/cudf/tests/dataframe/methods/test_add_prefix_suffix.py b/python/cudf/cudf/tests/dataframe/methods/test_add_prefix_suffix.py
new file mode 100644
index 00000000000..db56d2cc360
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_add_prefix_suffix.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_dataframe_add_prefix():
+    cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
+    pdf = cdf.to_pandas()
+
+    got = cdf.add_prefix("item_")
+    expected = pdf.add_prefix("item_")
+
+    assert_eq(got, expected)
+
+
+def test_dataframe_add_suffix():
+    cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
+    pdf = cdf.to_pandas()
+
+    got = cdf.add_suffix("_item")
+    expected = pdf.add_suffix("_item")
+
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_astype.py b/python/cudf/cudf/tests/dataframe/methods/test_astype.py
new file mode 100644
index 00000000000..9eb93d7b6a8
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_astype.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("copy", [True, False])
+def test_df_series_dataframe_astype_copy(copy):
+    gdf = cudf.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    pdf = gdf.to_pandas()
+
+    assert_eq(
+        gdf.astype(dtype="float", copy=copy),
+        pdf.astype(dtype="float", copy=copy),
+    )
+    assert_eq(gdf, pdf)
+
+    gsr = cudf.Series([1, 2])
+    psr = gsr.to_pandas()
+
+    assert_eq(
+        gsr.astype(dtype="float", copy=copy),
+        psr.astype(dtype="float", copy=copy),
+    )
+    assert_eq(gsr, psr)
+
+    gsr = cudf.Series([1, 2])
+    psr = gsr.to_pandas()
+
+    actual = gsr.astype(dtype="int64", copy=copy)
+    expected = psr.astype(dtype="int64", copy=copy)
+    assert_eq(expected, actual)
+    assert_eq(gsr, psr)
+    actual[0] = 3
+    expected[0] = 3
+    assert_eq(gsr, psr)
+
+
+@pytest.mark.parametrize("copy", [True, False])
+def test_df_series_dataframe_astype_dtype_dict(copy):
+    gdf = cudf.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    pdf = gdf.to_pandas()
+
+    assert_eq(
+        gdf.astype(dtype={"col1": "float"}, copy=copy),
+        pdf.astype(dtype={"col1": "float"}, copy=copy),
+    )
+    assert_eq(gdf, pdf)
+
+    gsr = cudf.Series([1, 2])
+    psr = gsr.to_pandas()
+
+    assert_eq(
+        gsr.astype(dtype={None: "float"}, copy=copy),
+        psr.astype(dtype={None: "float"}, copy=copy),
+    )
+    assert_eq(gsr, psr)
+
+    assert_exceptions_equal(
+        lfunc=psr.astype,
+        rfunc=gsr.astype,
+        lfunc_args_and_kwargs=([], {"dtype": {"a": "float"}, "copy": copy}),
+        rfunc_args_and_kwargs=([], {"dtype": {"a": "float"}, "copy": copy}),
+    )
+
+    gsr = cudf.Series([1, 2])
+    psr = gsr.to_pandas()
+
+    actual = gsr.astype({None: "int64"}, copy=copy)
+    expected = psr.astype({None: "int64"}, copy=copy)
+    assert_eq(expected, actual)
+    assert_eq(gsr, psr)
+
+    actual[0] = 3
+    expected[0] = 3
+    assert_eq(gsr, psr)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py b/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py
index 6c148cef4a6..c153d5c92fb 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_nlargest_nsmallest.py
@@ -5,8 +5,9 @@
 import pandas as pd
 import pytest
 
-from cudf import DataFrame
+import cudf
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.mark.parametrize("n", [10, 5])
@@ -18,7 +19,7 @@ def test_dataframe_nlargest_nsmallest(n, op, columns):
     aa = rng.random(nelem)
     bb = rng.random(nelem)
 
-    df = DataFrame({"a": aa, "b": bb})
+    df = cudf.DataFrame({"a": aa, "b": bb})
     pdf = df.to_pandas()
     assert_eq(getattr(df, op)(n, columns), getattr(pdf, op)(n, columns))
 
@@ -35,7 +36,7 @@ def test_dataframe_nlargest_sliced(sliceobj):
     df["b"] = rng.random(nelem)
 
     expect = df[sliceobj].nlargest(n, "a")
-    gdf = DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
     got = gdf[sliceobj].nlargest(n, "a")
     assert (got.to_pandas() == expect).all().all()
 
@@ -52,6 +53,19 @@ def test_dataframe_nsmallest_sliced(sliceobj):
     df["b"] = rng.random(nelem)
 
     expect = df[sliceobj].nsmallest(n, "a")
-    gdf = DataFrame.from_pandas(df)
+    gdf = cudf.DataFrame.from_pandas(df)
     got = gdf[sliceobj].nsmallest(n, "a")
     assert (got.to_pandas() == expect).all().all()
+
+
+@pytest.mark.parametrize("attr", ["nlargest", "nsmallest"])
+def test_dataframe_nlargest_nsmallest_str_error(attr):
+    gdf = cudf.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
+    pdf = gdf.to_pandas()
+
+    assert_exceptions_equal(
+        getattr(gdf, attr),
+        getattr(pdf, attr),
+        ([], {"n": 1, "columns": ["a", "b"]}),
+        ([], {"n": 1, "columns": ["a", "b"]}),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_squeeze.py b/python/cudf/cudf/tests/dataframe/methods/test_squeeze.py
new file mode 100644
index 00000000000..4c89e167752
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_squeeze.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("axis", [None, 0, "index", 1, "columns"])
+@pytest.mark.parametrize("data", [[[1, 2], [2, 3]], [1, 2], [1]])
+def test_squeeze(axis, data):
+    df = cudf.DataFrame(data)
+    result = df.squeeze(axis=axis)
+    expected = df.to_pandas().squeeze(axis=axis)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_dict.py b/python/cudf/cudf/tests/dataframe/methods/test_to_dict.py
new file mode 100644
index 00000000000..bfaf8787f15
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_dict.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import collections
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "orient", ["dict", "list", "split", "tight", "records", "index", "series"]
+)
+@pytest.mark.parametrize(
+    "into", [dict, collections.OrderedDict, collections.defaultdict(list)]
+)
+def test_dataframe_to_dict(orient, into):
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [9, 5, 3]}, index=[10, 11, 12])
+    pdf = df.to_pandas()
+
+    actual = df.to_dict(orient=orient, into=into)
+    expected = pdf.to_dict(orient=orient, into=into)
+    if orient == "series":
+        assert actual.keys() == expected.keys()
+        for key in actual.keys():
+            assert_eq(expected[key], actual[key])
+    else:
+        assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
index 96ef5deb49a..ece422f7822 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import decimal
 import string
 
 import numpy as np
@@ -97,3 +98,40 @@ def test_dataframe_transpose(
 
     assert_eq(expect, got_function.to_pandas(nullable=nullable))
     assert_eq(expect, got_property.to_pandas(nullable=nullable))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"col": [{"a": 1.1}, {"a": 2.1}, {"a": 10.0}, {"a": 11.2323}, None]},
+        {"a": [[{"b": 567}], None] * 10},
+        {"a": [decimal.Decimal(10), decimal.Decimal(20), None]},
+    ],
+)
+def test_dataframe_transpose_complex_types(data):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = pdf.T
+    actual = gdf.T
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_transpose_category():
+    pdf = pd.DataFrame(
+        {
+            "a": pd.Series(["a", "b", "c"], dtype="category"),
+            "b": pd.Series(["a", "b", "c"], dtype="category"),
+        }
+    )
+
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    got_function = gdf.transpose()
+    got_property = gdf.T
+
+    expect = pdf.transpose()
+
+    assert_eq(expect, got_function.to_pandas())
+    assert_eq(expect, got_property.to_pandas())
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_where.py b/python/cudf/cudf/tests/dataframe/methods/test_where.py
index f7af9945272..7b546d20f45 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_where.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_where.py
@@ -1,12 +1,14 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
+from numba import cuda
 
 import cudf
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.mark.parametrize("fill_value", [[888, 999]])
@@ -79,3 +81,455 @@ def test_dataframe_where_with_different_options():
     got = gdf.where(boolean_mask, [8, 9])
 
     assert_eq(expect, got)
+
+
+def test_frame_series_where():
+    gdf = cudf.DataFrame(
+        {"a": [1.0, 2.0, None, 3.0, None], "b": [None, 10.0, 11.0, None, 23.0]}
+    )
+    pdf = gdf.to_pandas()
+    expected = gdf.where(gdf.notna(), gdf.mean())
+    actual = pdf.where(pdf.notna(), pdf.mean(), axis=1)
+    assert_eq(expected, actual)
+
+
+def test_frame_series_where_other():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
+    pdf = gdf.to_pandas()
+
+    expected = gdf.where(gdf["b"] == 1, cudf.NA)
+    actual = pdf.where(pdf["b"] == 1, pd.NA)
+    assert_eq(
+        actual.fillna(-1).values,
+        expected.fillna(-1).values,
+        check_dtype=False,
+    )
+
+    expected = gdf.where(gdf["b"] == 1, 0)
+    actual = pdf.where(pdf["b"] == 1, 0)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data,condition,other,error",
+    [
+        (pd.Series(range(5)), pd.Series(range(5)) > 0, None, None),
+        (pd.Series(range(5)), pd.Series(range(5)) > 1, None, None),
+        (pd.Series(range(5)), pd.Series(range(5)) > 1, 10, None),
+        (
+            pd.Series(range(5)),
+            pd.Series(range(5)) > 1,
+            pd.Series(range(5, 10)),
+            None,
+        ),
+        (
+            pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]),
+            (
+                pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"])
+                % 3
+            )
+            == 0,
+            -pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]),
+            None,
+        ),
+        (
+            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}),
+            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}) == 4,
+            None,
+            None,
+        ),
+        (
+            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}),
+            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}) != 4,
+            None,
+            None,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            [True, True, True],
+            None,
+            ValueError,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            [True, True, True, False],
+            None,
+            ValueError,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            [[True, True, True, False], [True, True, True, False]],
+            None,
+            ValueError,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            [[True, True], [False, True], [True, False], [False, True]],
+            None,
+            None,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            cuda.to_device(
+                np.array(
+                    [[True, True], [False, True], [True, False], [False, True]]
+                )
+            ),
+            None,
+            None,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            cp.array(
+                [[True, True], [False, True], [True, False], [False, True]]
+            ),
+            17,
+            None,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            [[True, True], [False, True], [True, False], [False, True]],
+            17,
+            None,
+        ),
+        (
+            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
+            [
+                [True, True, False, True],
+                [True, True, False, True],
+                [True, True, False, True],
+                [True, True, False, True],
+            ],
+            None,
+            ValueError,
+        ),
+        (
+            pd.Series([1, 2, np.nan]),
+            pd.Series([1, 2, np.nan]) == 4,
+            None,
+            None,
+        ),
+        (
+            pd.Series([1, 2, np.nan]),
+            pd.Series([1, 2, np.nan]) != 4,
+            None,
+            None,
+        ),
+        (
+            pd.Series([4, np.nan, 6]),
+            pd.Series([4, np.nan, 6]) == 4,
+            None,
+            None,
+        ),
+        (
+            pd.Series([4, np.nan, 6]),
+            pd.Series([4, np.nan, 6]) != 4,
+            None,
+            None,
+        ),
+        (
+            pd.Series([4, np.nan, 6], dtype="category"),
+            pd.Series([4, np.nan, 6], dtype="category") != 4,
+            None,
+            None,
+        ),
+        (
+            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category"),
+            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category") == "b",
+            None,
+            None,
+        ),
+        (
+            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category"),
+            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category") == "b",
+            "s",
+            None,
+        ),
+        (
+            pd.Series([1, 2, 3, 2, 5]),
+            pd.Series([1, 2, 3, 2, 5]) == 2,
+            pd.DataFrame(
+                {
+                    "a": pd.Series([1, 2, 3, 2, 5]),
+                    "b": pd.Series([1, 2, 3, 2, 5]),
+                }
+            ),
+            NotImplementedError,
+        ),
+    ],
+)
+def test_df_sr_mask_where(data, condition, other, error, inplace):
+    ps_where = data
+    gs_where = cudf.from_pandas(data)
+
+    ps_mask = ps_where.copy(deep=True)
+    gs_mask = gs_where.copy(deep=True)
+
+    if hasattr(condition, "__cuda_array_interface__"):
+        if type(condition).__module__.split(".")[0] == "cupy":
+            ps_condition = cp.asnumpy(condition)
+        else:
+            ps_condition = np.array(condition).astype("bool")
+    else:
+        ps_condition = condition
+
+    if type(condition).__module__.split(".")[0] == "pandas":
+        gs_condition = cudf.from_pandas(condition)
+    else:
+        gs_condition = condition
+
+    ps_other = other
+    if type(other).__module__.split(".")[0] == "pandas":
+        gs_other = cudf.from_pandas(other)
+    else:
+        gs_other = other
+
+    if error is None:
+        expect_where = ps_where.where(
+            ps_condition, other=ps_other, inplace=inplace
+        )
+        got_where = gs_where.where(
+            gs_condition, other=gs_other, inplace=inplace
+        )
+
+        expect_mask = ps_mask.mask(
+            ps_condition, other=ps_other, inplace=inplace
+        )
+        got_mask = gs_mask.mask(gs_condition, other=gs_other, inplace=inplace)
+
+        if inplace:
+            expect_where = ps_where
+            got_where = gs_where
+
+            expect_mask = ps_mask
+            got_mask = gs_mask
+
+        if isinstance(expect_where, pd.Series) and isinstance(
+            expect_where.dtype, pd.CategoricalDtype
+        ):
+            np.testing.assert_array_equal(
+                expect_where.cat.codes,
+                got_where.cat.codes.astype(expect_where.cat.codes.dtype)
+                .fillna(-1)
+                .to_numpy(),
+            )
+            assert_eq(expect_where.cat.categories, got_where.cat.categories)
+
+            np.testing.assert_array_equal(
+                expect_mask.cat.codes,
+                got_mask.cat.codes.astype(expect_mask.cat.codes.dtype)
+                .fillna(-1)
+                .to_numpy(),
+            )
+            assert_eq(expect_mask.cat.categories, got_mask.cat.categories)
+        else:
+            assert_eq(
+                expect_where.fillna(-1),
+                got_where.fillna(-1),
+                check_dtype=False,
+            )
+            assert_eq(
+                expect_mask.fillna(-1), got_mask.fillna(-1), check_dtype=False
+            )
+    else:
+        assert_exceptions_equal(
+            lfunc=ps_where.where,
+            rfunc=gs_where.where,
+            lfunc_args_and_kwargs=(
+                [ps_condition],
+                {"other": ps_other, "inplace": inplace},
+            ),
+            rfunc_args_and_kwargs=(
+                [gs_condition],
+                {"other": gs_other, "inplace": inplace},
+            ),
+        )
+
+        assert_exceptions_equal(
+            lfunc=ps_mask.mask,
+            rfunc=gs_mask.mask,
+            lfunc_args_and_kwargs=(
+                [ps_condition],
+                {"other": ps_other, "inplace": inplace},
+            ),
+            rfunc_args_and_kwargs=(
+                [gs_condition],
+                {"other": gs_other, "inplace": inplace},
+            ),
+        )
+
+
+@pytest.mark.parametrize(
+    "data,condition,other,has_cat",
+    [
+        (
+            pd.DataFrame(
+                {
+                    "a": pd.Series(["a", "a", "b", "c", "a", "d", "d", "a"]),
+                    "b": pd.Series(["o", "p", "q", "e", "p", "p", "a", "a"]),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(["a", "a", "b", "c", "a", "d", "d", "a"]),
+                    "b": pd.Series(["o", "p", "q", "e", "p", "p", "a", "a"]),
+                }
+            )
+            != "a",
+            None,
+            None,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            )
+            != "a",
+            None,
+            True,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            )
+            == "a",
+            None,
+            True,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            )
+            != "a",
+            "a",
+            True,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        ["a", "a", "b", "c", "a", "d", "d", "a"],
+                        dtype="category",
+                    ),
+                    "b": pd.Series(
+                        ["o", "p", "q", "e", "p", "p", "a", "a"],
+                        dtype="category",
+                    ),
+                }
+            )
+            == "a",
+            "a",
+            True,
+        ),
+    ],
+)
+def test_df_string_cat_types_mask_where(data, condition, other, has_cat):
+    ps = data
+    gs = cudf.from_pandas(data)
+
+    ps_condition = condition
+    if type(condition).__module__.split(".")[0] == "pandas":
+        gs_condition = cudf.from_pandas(condition)
+    else:
+        gs_condition = condition
+
+    ps_other = other
+    if type(other).__module__.split(".")[0] == "pandas":
+        gs_other = cudf.from_pandas(other)
+    else:
+        gs_other = other
+
+    expect_where = ps.where(ps_condition, other=ps_other)
+    got_where = gs.where(gs_condition, other=gs_other)
+
+    expect_mask = ps.mask(ps_condition, other=ps_other)
+    got_mask = gs.mask(gs_condition, other=gs_other)
+
+    if has_cat is None:
+        assert_eq(
+            expect_where.fillna(-1).astype("str"),
+            got_where.fillna(-1),
+            check_dtype=False,
+        )
+        assert_eq(
+            expect_mask.fillna(-1).astype("str"),
+            got_mask.fillna(-1),
+            check_dtype=False,
+        )
+    else:
+        assert_eq(expect_where, got_where, check_dtype=False)
+        assert_eq(expect_mask, got_mask, check_dtype=False)
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
index afe2dbc10c4..d2cef2d8bdc 100644
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import decimal
+import functools
 
 import numpy as np
 import pandas as pd
@@ -7,6 +9,130 @@
 
 import cudf
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("digits", [0, 1, 4])
+def test_dataframe_round_builtin(digits):
+    pdf = pd.DataFrame(
+        {
+            "a": [1.2234242333234, 323432.3243423, np.nan],
+            "b": ["a", "b", "c"],
+            "c": pd.Series([34224, 324324, 324342], dtype="datetime64[ns]"),
+            "d": pd.Series([224.242, None, 2424.234324], dtype="category"),
+            "e": [
+                decimal.Decimal("342.3243234234242"),
+                decimal.Decimal("89.32432497687622"),
+                None,
+            ],
+        }
+    )
+    gdf = cudf.from_pandas(pdf, nan_as_null=False)
+
+    expected = round(pdf, digits)
+    actual = round(gdf, digits)
+
+    assert_eq(expected, actual)
+
+
+def test_bool_raises():
+    assert_exceptions_equal(
+        lfunc=bool,
+        rfunc=bool,
+        lfunc_args_and_kwargs=[[cudf.DataFrame()]],
+        rfunc_args_and_kwargs=[[pd.DataFrame()]],
+    )
+
+
+@pytest.mark.parametrize("name", [None, "foo", 1, 1.0])
+def test_dataframe_column_name(name):
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+    pdf = df.to_pandas()
+
+    df.columns.name = name
+    pdf.columns.name = name
+
+    assert_eq(df, pdf)
+    assert_eq(df.columns.name, pdf.columns.name)
+
+
+def test_dataframe_columns_set_none_raises():
+    df = cudf.DataFrame({"a": [0]})
+    with pytest.raises(TypeError):
+        df.columns = None
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [cudf.RangeIndex(1, name="foo"), pd.RangeIndex(1, name="foo"), range(1)],
+)
+def test_dataframe_columns_set_rangeindex(columns):
+    df = cudf.DataFrame([1], columns=["a"])
+    df.columns = columns
+    result = df.columns
+    expected = pd.RangeIndex(1, name=getattr(columns, "name", None))
+    pd.testing.assert_index_equal(result, expected, exact=True)
+
+
+@pytest.mark.parametrize("klass", [cudf.MultiIndex, pd.MultiIndex])
+def test_dataframe_columns_set_multiindex(klass):
+    columns = klass.from_arrays([[10]], names=["foo"])
+    df = cudf.DataFrame([1], columns=["a"])
+    df.columns = columns
+    result = df.columns
+    expected = pd.MultiIndex.from_arrays([[10]], names=["foo"])
+    pd.testing.assert_index_equal(result, expected, exact=True)
+
+
+@pytest.mark.parametrize(
+    "klass",
+    [
+        functools.partial(cudf.Index, name="foo"),
+        functools.partial(cudf.Series, name="foo"),
+        functools.partial(pd.Index, name="foo"),
+        functools.partial(pd.Series, name="foo"),
+        np.array,
+    ],
+)
+def test_dataframe_columns_set_preserve_type(klass):
+    df = cudf.DataFrame([1], columns=["a"])
+    columns = klass([10], dtype="int8")
+    df.columns = columns
+    result = df.columns
+    expected = pd.Index(
+        [10], dtype="int8", name=getattr(columns, "name", None)
+    )
+    pd.testing.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize("column", [range(1, 2), np.array([1], dtype=np.int8)])
+@pytest.mark.parametrize(
+    "operation",
+    [
+        lambda df: df.where(df < 2, 2),
+        lambda df: df.nans_to_nulls(),
+        lambda df: df.isna(),
+        lambda df: df.notna(),
+        lambda df: abs(df),
+        lambda df: -df,
+        lambda df: ~df,
+        lambda df: df.cumsum(),
+        lambda df: df.replace(1, 2),
+        lambda df: df.replace(10, 20),
+        lambda df: df.clip(0, 10),
+        lambda df: df.rolling(1).mean(),
+        lambda df: df.interpolate(),
+        lambda df: df.shift(),
+        lambda df: df.sort_values(1),
+        lambda df: df.round(),
+        lambda df: df.rank(),
+    ],
+)
+def test_op_preserves_column_metadata(column, operation):
+    df = cudf.DataFrame([1], columns=cudf.Index(column))
+    result = operation(df).columns
+    expected = pd.Index(column)
+    pd.testing.assert_index_equal(result, expected, exact=True)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index 385f4c44c01..15926f4faf0 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import collections
 from contextlib import nullcontext as does_not_raise
 
 import cupy as cp
@@ -7,8 +8,10 @@
 import pandas as pd
 import pyarrow as pa
 import pytest
+from numba import cuda
 
 import cudf
+from cudf.core.column.column import as_column
 from cudf.testing import assert_eq
 
 
@@ -263,7 +266,7 @@ def test_from_records_index(columns, index):
     assert_eq(df, gdf)
 
 
-def test_dataframe_construction_from_cupy_arrays():
+def test_dataframe_construction_from_cp_arrays():
     h_ary = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
     d_ary = cp.asarray(h_ary)
 
@@ -302,7 +305,7 @@ def test_dataframe_construction_from_cupy_arrays():
     assert_eq(df, gdf)
 
 
-def test_dataframe_cupy_wrong_dimensions():
+def test_dataframe_cp_wrong_dimensions():
     d_ary = cp.empty((2, 3, 4), dtype=np.int32)
     with pytest.raises(
         ValueError, match="records dimension expected 1 or 2 but found: 3"
@@ -310,7 +313,7 @@ def test_dataframe_cupy_wrong_dimensions():
         cudf.DataFrame(d_ary)
 
 
-def test_dataframe_cupy_array_wrong_index():
+def test_dataframe_cp_array_wrong_index():
     d_ary = cp.empty((2, 3), dtype=np.int32)
 
     with pytest.raises(ValueError):
@@ -403,6 +406,831 @@ def test_from_scalar_typing(request, all_supported_types_as_str):
     assert len(gdf["b"]) == len(gdf["a"])
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [np.nan, 1, 2], "b": [None, None, None]},
+        {"a": [1, 2, np.nan, 2], "b": [np.nan, np.nan, np.nan, np.nan]},
+        {
+            "a": [1, 2, np.nan, 2, None],
+            "b": [np.nan, np.nan, None, np.nan, np.nan],
+        },
+        {"a": [1, 2, 2, None, 1.1], "b": [1, 2.2, 3, None, 5]},
+    ],
+)
+def test_dataframe_constructor_nan_as_null(data, nan_as_null):
+    actual = cudf.DataFrame(data, nan_as_null=nan_as_null)
+
+    if nan_as_null:
+        assert (
+            not (
+                actual.astype("float").replace(
+                    cudf.Series([np.nan], nan_as_null=False), cudf.Series([-1])
+                )
+                == -1
+            )
+            .any()
+            .any()
+        )
+    else:
+        actual = actual.select_dtypes(exclude=["object"])
+        assert (actual.replace(np.nan, -1) == -1).any().any()
+
+
+@pytest.mark.parametrize(
+    "data,columns,index",
+    [
+        (pd.Series([1, 2, 3]), None, None),
+        (pd.Series(["a", "b", None, "c"], name="abc"), None, None),
+        (
+            pd.Series(["a", "b", None, "c"], name="abc"),
+            ["abc", "b"],
+            [1, 2, 3],
+        ),
+    ],
+)
+def test_dataframe_init_from_series(data, columns, index):
+    expected = pd.DataFrame(data, columns=columns, index=index)
+    actual = cudf.DataFrame(data, columns=columns, index=index)
+
+    assert_eq(
+        expected,
+        actual,
+        check_index_type=len(expected) != 0,
+    )
+
+
+@pytest.mark.parametrize(
+    "dtype,expected_upcast_type,error",
+    [
+        (
+            "float32",
+            np.dtype("float32"),
+            None,
+        ),
+        (
+            "float16",
+            None,
+            TypeError,
+        ),
+        (
+            "float64",
+            np.dtype("float64"),
+            None,
+        ),
+        (
+            "float128",
+            None,
+            ValueError,
+        ),
+    ],
+)
+def test_from_pandas_unsupported_types(dtype, expected_upcast_type, error):
+    data = pd.Series([1.1, 0.55, -1.23], dtype=dtype)
+    pdf = pd.DataFrame({"one_col": data})
+    if error is not None:
+        with pytest.raises(error):
+            cudf.from_pandas(data)
+
+        with pytest.raises(error):
+            cudf.Series(data)
+
+        with pytest.raises(error):
+            cudf.from_pandas(pdf)
+
+        with pytest.raises(error):
+            cudf.DataFrame(pdf)
+    else:
+        df = cudf.from_pandas(data)
+
+        assert_eq(data, df, check_dtype=False)
+        assert df.dtype == expected_upcast_type
+
+        df = cudf.Series(data)
+        assert_eq(data, df, check_dtype=False)
+        assert df.dtype == expected_upcast_type
+
+        df = cudf.from_pandas(pdf)
+        assert_eq(pdf, df, check_dtype=False)
+        assert df["one_col"].dtype == expected_upcast_type
+
+        df = cudf.DataFrame(pdf)
+        assert_eq(pdf, df, check_dtype=False)
+        assert df["one_col"].dtype == expected_upcast_type
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": 4},
+        {"c": 4, "a": [1, 2, 3], "b": ["x", "y", "z"]},
+        {"a": [1, 2, 3], "c": 4},
+    ],
+)
+def test_dataframe_init_from_scalar_and_lists(data):
+    actual = cudf.DataFrame(data)
+    expected = pd.DataFrame(data)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    (
+        [],
+        ["c", "a"],
+        ["a", "d", "b", "e", "c"],
+        ["a", "b", "c"],
+        pd.Index(["b", "a", "c"], name="custom_name"),
+    ),
+)
+@pytest.mark.parametrize("index", (None, [4, 5, 6]))
+def test_dataframe_dict_like_with_columns(columns, index):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
+    expect = pd.DataFrame(data, columns=columns, index=index)
+    actual = cudf.DataFrame(data, columns=columns, index=index)
+    if index is None and len(columns) == 0:
+        # We make an empty range index, pandas makes an empty index
+        expect = expect.reset_index(drop=True)
+    assert_eq(expect, actual)
+
+
+def test_dataframe_init_columns_named_multiindex():
+    rng = np.random.default_rng(seed=0)
+    data = rng.standard_normal(size=(2, 2))
+    columns = cudf.MultiIndex.from_tuples(
+        [("A", "one"), ("A", "two")], names=["y", "z"]
+    )
+    gdf = cudf.DataFrame(data, columns=columns)
+    pdf = pd.DataFrame(data, columns=columns.to_pandas())
+
+    assert_eq(gdf, pdf)
+
+
+def test_dataframe_init_columns_named_index():
+    rng = np.random.default_rng(seed=0)
+    data = rng.standard_normal(size=(2, 2))
+    columns = pd.Index(["a", "b"], name="custom_name")
+    gdf = cudf.DataFrame(data, columns=columns)
+    pdf = pd.DataFrame(data, columns=columns)
+
+    assert_eq(gdf, pdf)
+
+
+def test_dataframe_from_pandas_sparse():
+    pdf = pd.DataFrame(range(2), dtype=pd.SparseDtype(np.int64, 0))
+    with pytest.raises(NotImplementedError):
+        cudf.DataFrame(pdf)
+
+
+def test_dataframe_constructor_unbounded_sequence():
+    class A:
+        def __getitem__(self, key):
+            return 1
+
+    with pytest.raises(TypeError):
+        cudf.DataFrame([A()])
+
+    with pytest.raises(TypeError):
+        cudf.DataFrame({"a": A()})
+
+
+def test_dataframe_constructor_dataframe_list():
+    df = cudf.DataFrame(range(2))
+    with pytest.raises(TypeError):
+        cudf.DataFrame([df])
+
+
+def test_dataframe_constructor_from_namedtuple():
+    Point1 = collections.namedtuple("Point1", ["a", "b", "c"])
+    Point2 = collections.namedtuple("Point1", ["x", "y"])
+
+    data = [Point1(1, 2, 3), Point2(4, 5)]
+    idx = ["a", "b"]
+    gdf = cudf.DataFrame(data, index=idx)
+    pdf = pd.DataFrame(data, index=idx)
+
+    assert_eq(gdf, pdf)
+
+    data = [Point2(4, 5), Point1(1, 2, 3)]
+    with pytest.raises(ValueError):
+        cudf.DataFrame(data, index=idx)
+    with pytest.raises(ValueError):
+        pd.DataFrame(data, index=idx)
+
+
+def test_series_data_no_name_with_columns():
+    gdf = cudf.DataFrame(cudf.Series([1]), columns=[1])
+    pdf = pd.DataFrame(pd.Series([1]), columns=[1])
+    assert_eq(gdf, pdf)
+
+
+def test_series_data_no_name_with_columns_more_than_one_raises():
+    with pytest.raises(ValueError):
+        cudf.DataFrame(cudf.Series([1]), columns=[1, 2])
+    with pytest.raises(ValueError):
+        pd.DataFrame(pd.Series([1]), columns=[1, 2])
+
+
+def test_series_data_with_name_with_columns_matching():
+    gdf = cudf.DataFrame(cudf.Series([1], name=1), columns=[1])
+    pdf = pd.DataFrame(pd.Series([1], name=1), columns=[1])
+    assert_eq(gdf, pdf)
+
+
+def test_series_data_with_name_with_columns_not_matching():
+    gdf = cudf.DataFrame(cudf.Series([1], name=2), columns=[1])
+    pdf = pd.DataFrame(pd.Series([1], name=2), columns=[1])
+    assert_eq(gdf, pdf)
+
+
+def test_series_data_with_name_with_columns_matching_align():
+    gdf = cudf.DataFrame(cudf.Series([1], name=2), columns=[1, 2])
+    pdf = pd.DataFrame(pd.Series([1], name=2), columns=[1, 2])
+    assert_eq(gdf, pdf)
+
+
+def test_generated_column():
+    gdf = cudf.DataFrame({"a": (i for i in range(5))})
+    assert len(gdf) == 5
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        (
+            pd.Series([3, 3.0]),
+            pd.Series([2.3, 3.9]),
+            pd.Series([1.5, 3.9]),
+            pd.Series([1.0, 2]),
+        ),
+        [
+            pd.Series([3, 3.0]),
+            pd.Series([2.3, 3.9]),
+            pd.Series([1.5, 3.9]),
+            pd.Series([1.0, 2]),
+        ],
+    ],
+)
+def test_create_dataframe_from_list_like(data):
+    pdf = pd.DataFrame(data, index=["count", "mean", "std", "min"])
+    gdf = cudf.DataFrame(data, index=["count", "mean", "std", "min"])
+
+    assert_eq(pdf, gdf)
+
+    pdf = pd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
+
+    assert_eq(pdf, gdf)
+
+
+def test_create_dataframe_column():
+    pdf = pd.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
+    gdf = cudf.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
+
+    assert_eq(pdf, gdf)
+
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [2, 3, 5]},
+        columns=["a", "b", "c"],
+        index=["A", "Z", "X"],
+    )
+    gdf = cudf.DataFrame(
+        {"a": [1, 2, 3], "b": [2, 3, 5]},
+        columns=["a", "b", "c"],
+        index=["A", "Z", "X"],
+    )
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.DataFrame(np.eye(2)),
+        cudf.DataFrame(np.eye(2)),
+        np.eye(2),
+        cp.eye(2),
+        None,
+        [[1, 0], [0, 1]],
+        [cudf.Series([0, 1]), cudf.Series([1, 0])],
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [None, range(2), pd.RangeIndex(2), cudf.RangeIndex(2)],
+)
+def test_dataframe_columns_returns_rangeindex(data, columns):
+    if data is None and columns is None:
+        pytest.skip(f"{data=} and {columns=} not relevant.")
+    result = cudf.DataFrame(data=data, columns=columns).columns
+    expected = pd.RangeIndex(range(2))
+    assert_eq(result, expected)
+
+
+def test_dataframe_columns_returns_rangeindex_single_col():
+    result = cudf.DataFrame([1, 2, 3]).columns
+    expected = pd.RangeIndex(range(1))
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
+@pytest.mark.parametrize("idx_data", [[], [1, 2]])
+@pytest.mark.parametrize("data", [None, [], {}])
+def test_dataframe_columns_empty_data_preserves_dtype(dtype, idx_data, data):
+    result = cudf.DataFrame(
+        data, columns=cudf.Index(idx_data, dtype=dtype)
+    ).columns
+    expected = pd.Index(idx_data, dtype=dtype)
+    assert_eq(result, expected)
+
+
+def test_dataframe_init_from_nested_dict():
+    ordered_dict = collections.OrderedDict(
+        [
+            (
+                "one",
+                collections.OrderedDict(
+                    [("col_a", "foo1"), ("col_b", "bar1")]
+                ),
+            ),
+            (
+                "two",
+                collections.OrderedDict(
+                    [("col_a", "foo2"), ("col_b", "bar2")]
+                ),
+            ),
+            (
+                "three",
+                collections.OrderedDict(
+                    [("col_a", "foo3"), ("col_b", "bar3")]
+                ),
+            ),
+        ]
+    )
+    pdf = pd.DataFrame(ordered_dict)
+    gdf = cudf.DataFrame(ordered_dict)
+
+    assert_eq(pdf, gdf)
+    regular_dict = {key: dict(value) for key, value in ordered_dict.items()}
+
+    pdf = pd.DataFrame(regular_dict)
+    gdf = cudf.DataFrame(regular_dict)
+    assert_eq(pdf, gdf)
+
+
+def test_init_from_2_categoricalindex_series_diff_categories():
+    s1 = cudf.Series(
+        [39, 6, 4], index=cudf.CategoricalIndex(["female", "male", "unknown"])
+    )
+    s2 = cudf.Series(
+        [2, 152, 2, 242, 150],
+        index=cudf.CategoricalIndex(["f", "female", "m", "male", "unknown"]),
+    )
+    result = cudf.DataFrame([s1, s2])
+    expected = pd.DataFrame([s1.to_pandas(), s2.to_pandas()])
+    # TODO: Remove once https://github.com/pandas-dev/pandas/issues/57592
+    # is adressed
+    expected.columns = result.columns
+    assert_eq(result, expected, check_dtype=False)
+
+
+def test_data_frame_values_no_cols_but_index():
+    result = cudf.DataFrame(index=range(5)).values
+    expected = pd.DataFrame(index=range(5)).values
+    assert_eq(result, expected)
+
+
+def test_dataframe_from_ndarray_dup_columns():
+    with pytest.raises(ValueError):
+        cudf.DataFrame(np.eye(2), columns=["A", "A"])
+
+
+def test_dataframe_init_with_nans():
+    with cudf.option_context("mode.pandas_compatible", True):
+        gdf = cudf.DataFrame({"a": [1, 2, 3, np.nan]})
+    assert gdf["a"].dtype == np.dtype("float64")
+    pdf = pd.DataFrame({"a": [1, 2, 3, np.nan]})
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        cudf.DataFrame(range(2)),
+        None,
+        [cudf.Series(range(2))],
+        [[0], [1]],
+        {1: range(2)},
+        cp.arange(2),
+    ],
+)
+def test_init_with_index_no_shallow_copy(data):
+    idx = cudf.RangeIndex(2)
+    df = cudf.DataFrame(data, index=idx)
+    assert df.index is idx
+
+
+def test_from_records_with_index_no_shallow_copy():
+    idx = cudf.RangeIndex(2)
+    data = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", "<f8"), ("y", "<i8")])
+    df = cudf.DataFrame(data.view(np.recarray), index=idx)
+    assert df.index is idx
+
+
+def test_from_pandas_preserve_column_dtype():
+    df = pd.DataFrame([[1, 2]], columns=pd.Index([1, 2], dtype="int8"))
+    result = cudf.DataFrame.from_pandas(df)
+    pd.testing.assert_index_equal(result.columns, df.columns, exact=True)
+
+
+def test_dataframe_init_column():
+    s = cudf.Series([1, 2, 3])
+    with pytest.raises(TypeError):
+        cudf.DataFrame(s._column)
+    expect = cudf.DataFrame({"a": s})
+    actual = cudf.DataFrame([1, 2, 3], columns=["a"])
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize("data", [None, {}])
+def test_empty_construction_rangeindex_columns(data):
+    result = cudf.DataFrame(data=data).columns
+    expected = pd.RangeIndex(0)
+    pd.testing.assert_index_equal(result, expected, exact=True)
+
+
+@pytest.mark.parametrize(
+    "shape",
+    [
+        (0, 3),
+        (3, 0),
+        (0, 0),
+    ],
+)
+def test_construct_zero_axis_ndarray(shape):
+    arr = np.empty(shape, dtype=np.float64)
+    result = cudf.DataFrame(arr)
+    expected = pd.DataFrame(arr)
+    assert_eq(result, expected)
+
+
+def test_construct_dict_scalar_values_raises():
+    data = {"a": 1, "b": "2"}
+    with pytest.raises(ValueError):
+        pd.DataFrame(data)
+    with pytest.raises(ValueError):
+        cudf.DataFrame(data)
+
+
+@pytest.mark.parametrize("columns", [None, [3, 4]])
+@pytest.mark.parametrize("index", [None, [1, 2]])
+def test_construct_empty_listlike_index_and_columns(columns, index):
+    result = cudf.DataFrame([], columns=columns, index=index)
+    expected = pd.DataFrame([], columns=columns, index=index)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("index", [None, "a", ["a", "b"]])
+def test_from_pandas_nan_as_null(nan_as_null, index):
+    data = [np.nan, 2.0, 3.0]
+
+    if index is None:
+        pdf = pd.DataFrame({"a": data, "b": data})
+        expected = cudf.DataFrame(
+            {
+                "a": as_column(data, nan_as_null=nan_as_null),
+                "b": as_column(data, nan_as_null=nan_as_null),
+            }
+        )
+    else:
+        pdf = pd.DataFrame({"a": data, "b": data}).set_index(index)
+        expected = cudf.DataFrame(
+            {
+                "a": as_column(data, nan_as_null=nan_as_null),
+                "b": as_column(data, nan_as_null=nan_as_null),
+            }
+        )
+        expected = cudf.DataFrame(
+            {
+                "a": as_column(data, nan_as_null=nan_as_null),
+                "b": as_column(data, nan_as_null=nan_as_null),
+            }
+        )
+        expected = expected.set_index(index)
+
+    got = cudf.from_pandas(pdf, nan_as_null=nan_as_null)
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "data,columns",
+    [
+        ([1, 2, 3, 100, 112, 35464], ["a"]),
+        (range(100), None),
+        (
+            [],
+            None,
+        ),
+        ((-10, 21, 32, 32, 1, 2, 3), ["p"]),
+        (
+            (),
+            None,
+        ),
+        ([[1, 2, 3], [1, 2, 3]], ["col1", "col2", "col3"]),
+        ([range(100), range(100)], ["range" + str(i) for i in range(100)]),
+        (((1, 2, 3), (1, 2, 3)), ["tuple0", "tuple1", "tuple2"]),
+        ([[1, 2, 3]], ["list col1", "list col2", "list col3"]),
+        ([[1, 2, 3]], pd.Index(["col1", "col2", "col3"], name="rapids")),
+        ([range(100)], ["range" + str(i) for i in range(100)]),
+        (((1, 2, 3),), ["k1", "k2", "k3"]),
+    ],
+)
+def test_dataframe_init_1d_list(data, columns):
+    expect = pd.DataFrame(data, columns=columns)
+    actual = cudf.DataFrame(data, columns=columns)
+
+    assert_eq(
+        expect,
+        actual,
+        check_index_type=len(data) != 0,
+    )
+
+    expect = pd.DataFrame(data, columns=None)
+    actual = cudf.DataFrame(data, columns=None)
+
+    assert_eq(
+        expect,
+        actual,
+        check_index_type=len(data) != 0,
+    )
+
+
+@pytest.mark.parametrize("dtype", ["int64", "str"])
+def test_dataframe_from_dictionary_series_same_name_index(dtype):
+    pd_idx1 = pd.Index([1, 2, 0], name="test_index").astype(dtype)
+    pd_idx2 = pd.Index([2, 0, 1], name="test_index").astype(dtype)
+    pd_series1 = pd.Series([1, 2, 3], index=pd_idx1)
+    pd_series2 = pd.Series([1, 2, 3], index=pd_idx2)
+
+    gd_idx1 = cudf.from_pandas(pd_idx1)
+    gd_idx2 = cudf.from_pandas(pd_idx2)
+    gd_series1 = cudf.Series([1, 2, 3], index=gd_idx1)
+    gd_series2 = cudf.Series([1, 2, 3], index=gd_idx2)
+
+    expect = pd.DataFrame({"a": pd_series1, "b": pd_series2})
+    got = cudf.DataFrame({"a": gd_series1, "b": gd_series2})
+
+    if dtype == "str":
+        # Pandas actually loses its index name erroneously here...
+        expect.index.name = "test_index"
+
+    assert_eq(expect, got)
+    assert expect.index.names == got.index.names
+
+
+def test_init_multiindex_from_dict():
+    pdf = pd.DataFrame({("a", "b"): [1]})
+    gdf = cudf.DataFrame({("a", "b"): [1]})
+    assert_eq(pdf, gdf)
+    assert_eq(pdf.columns, gdf.columns)
+
+
+def test_change_column_dtype_in_empty():
+    pdf = pd.DataFrame({"a": [], "b": []})
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf, gdf)
+    pdf["b"] = pdf["b"].astype("int64")
+    gdf["b"] = gdf["b"].astype("int64")
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "data,cols,index",
+    [
+        (
+            np.ndarray(shape=(4, 2), dtype=float, order="F"),
+            ["a", "b"],
+            ["a", "b", "c", "d"],
+        ),
+        (
+            np.ndarray(shape=(4, 2), dtype=float, order="F"),
+            ["a", "b"],
+            [0, 20, 30, 10],
+        ),
+        (
+            np.ndarray(shape=(4, 2), dtype=float, order="F"),
+            ["a", "b"],
+            [0, 1, 2, 3],
+        ),
+        (np.array([11, 123, -2342, 232]), ["a"], [1, 2, 11, 12]),
+        (np.array([11, 123, -2342, 232]), ["a"], ["khsdjk", "a", "z", "kk"]),
+        (
+            cp.ndarray(shape=(4, 2), dtype=float, order="F"),
+            ["a", "z"],
+            ["a", "z", "a", "z"],
+        ),
+        (cp.array([11, 123, -2342, 232]), ["z"], [0, 1, 1, 0]),
+        (cp.array([11, 123, -2342, 232]), ["z"], [1, 2, 3, 4]),
+        (cp.array([11, 123, -2342, 232]), ["z"], ["a", "z", "d", "e"]),
+        (
+            np.random.default_rng(seed=0).standard_normal(size=(2, 4)),
+            ["a", "b", "c", "d"],
+            ["a", "b"],
+        ),
+        (
+            np.random.default_rng(seed=0).standard_normal(size=(2, 4)),
+            ["a", "b", "c", "d"],
+            [1, 0],
+        ),
+        (cp.random.randn(2, 4), ["a", "b", "c", "d"], ["a", "b"]),
+        (cp.random.randn(2, 4), ["a", "b", "c", "d"], [1, 0]),
+    ],
+)
+def test_dataframe_init_from_arrays_cols(data, cols, index):
+    gd_data = data
+    if isinstance(data, cp.ndarray):
+        # pandas can't handle cp arrays in general
+        pd_data = data.get()
+
+        # additional test for building DataFrame with gpu array whose
+        # cuda array interface has no `descr` attribute
+        numba_data = cuda.as_cuda_array(data)
+    else:
+        pd_data = data
+        numba_data = None
+
+    # verify with columns & index
+    pdf = pd.DataFrame(pd_data, columns=cols, index=index)
+    gdf = cudf.DataFrame(gd_data, columns=cols, index=index)
+
+    assert_eq(pdf, gdf, check_dtype=False)
+
+    # verify with columns
+    pdf = pd.DataFrame(pd_data, columns=cols)
+    gdf = cudf.DataFrame(gd_data, columns=cols)
+
+    assert_eq(pdf, gdf, check_dtype=False)
+
+    pdf = pd.DataFrame(pd_data)
+    gdf = cudf.DataFrame(gd_data)
+
+    assert_eq(pdf, gdf, check_dtype=False)
+
+    if numba_data is not None:
+        gdf = cudf.DataFrame(numba_data)
+        assert_eq(pdf, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data, orient, dtype, columns",
+    [
+        (
+            {"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]},
+            "columns",
+            None,
+            None,
+        ),
+        ({"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}, "index", None, None),
+        (
+            {"col_1": [None, 2, 1, 0], "col_2": [3, None, 1, 0]},
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "col_1": ["ab", "cd", "ef", "gh"],
+                "col_2": ["zx", "one", "two", "three"],
+            },
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "index": [("a", "b"), ("a", "c")],
+                "columns": [("x", 1), ("y", 2)],
+                "data": [[1, 3], [2, 4]],
+                "index_names": ["n1", "n2"],
+                "column_names": ["z1", "z2"],
+            },
+            "tight",
+            "float64",
+            None,
+        ),
+    ],
+)
+def test_dataframe_from_dict(data, orient, dtype, columns):
+    expected = pd.DataFrame.from_dict(
+        data=data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    actual = cudf.DataFrame.from_dict(
+        data=data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("dtype", ["int64", "str", None])
+def test_dataframe_from_dict_transposed(dtype):
+    pd_data = {"a": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}
+    gd_data = {key: cudf.Series(val) for key, val in pd_data.items()}
+
+    expected = pd.DataFrame.from_dict(pd_data, orient="index", dtype=dtype)
+    actual = cudf.DataFrame.from_dict(gd_data, orient="index", dtype=dtype)
+
+    gd_data = {key: cp.asarray(val) for key, val in pd_data.items()}
+    actual = cudf.DataFrame.from_dict(gd_data, orient="index", dtype=dtype)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pd_data, gd_data, orient, dtype, columns",
+    [
+        (
+            {"col_1": np.array([3, 2, 1, 0]), "col_2": np.array([3, 2, 1, 0])},
+            {
+                "col_1": cp.array([3, 2, 1, 0]),
+                "col_2": cp.array([3, 2, 1, 0]),
+            },
+            "columns",
+            None,
+            None,
+        ),
+        (
+            {"col_1": np.array([3, 2, 1, 0]), "col_2": np.array([3, 2, 1, 0])},
+            {
+                "col_1": cp.array([3, 2, 1, 0]),
+                "col_2": cp.array([3, 2, 1, 0]),
+            },
+            "index",
+            None,
+            None,
+        ),
+        (
+            {
+                "col_1": np.array([None, 2, 1, 0]),
+                "col_2": np.array([3, None, 1, 0]),
+            },
+            {
+                "col_1": cp.array([np.nan, 2, 1, 0]),
+                "col_2": cp.array([3, np.nan, 1, 0]),
+            },
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "col_1": np.array(["ab", "cd", "ef", "gh"]),
+                "col_2": np.array(["zx", "one", "two", "three"]),
+            },
+            {
+                "col_1": np.array(["ab", "cd", "ef", "gh"]),
+                "col_2": np.array(["zx", "one", "two", "three"]),
+            },
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "index": [("a", "b"), ("a", "c")],
+                "columns": [("x", 1), ("y", 2)],
+                "data": [np.array([1, 3]), np.array([2, 4])],
+                "index_names": ["n1", "n2"],
+                "column_names": ["z1", "z2"],
+            },
+            {
+                "index": [("a", "b"), ("a", "c")],
+                "columns": [("x", 1), ("y", 2)],
+                "data": [cp.array([1, 3]), cp.array([2, 4])],
+                "index_names": ["n1", "n2"],
+                "column_names": ["z1", "z2"],
+            },
+            "tight",
+            "float64",
+            None,
+        ),
+    ],
+)
+def test_dataframe_from_dict_cp_np_arrays(
+    pd_data, gd_data, orient, dtype, columns
+):
+    expected = pd.DataFrame.from_dict(
+        data=pd_data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    actual = cudf.DataFrame.from_dict(
+        data=gd_data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    assert_eq(expected, actual, check_dtype=dtype is not None)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/groupby/test_cov_corr.py b/python/cudf/cudf/tests/groupby/test_cov_corr.py
new file mode 100644
index 00000000000..92a97f54606
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_cov_corr.py
@@ -0,0 +1,257 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+        [1, 1, 1, 2, 2, 2, 3, 3, 3],
+    ],
+)
+@pytest.mark.parametrize("gkey", ["id", "val1", "val2"])
+def test_pearson_corr_invalid_column_types(data, gkey):
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": data,
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        }
+    )
+    with pytest.raises(
+        TypeError,
+        match="Correlation accepts only numerical column-pairs",
+    ):
+        gdf.groupby(gkey).corr("pearson")
+
+
+def test_pearson_corr_multiindex_dataframe():
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [2, 3, 4, 5]}
+    ).set_index(["a", "b"])
+
+    actual = gdf.groupby(level="a").corr("pearson")
+    expected = gdf.to_pandas().groupby(level="a").corr("pearson")
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data, gkey",
+    [
+        (
+            {
+                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            },
+            ["id"],
+        ),
+        (
+            {
+                "id": [0, 0, 0, 0, 1, 1, 1],
+                "a": [10.0, 3, 4, 2.0, -3.0, 9.0, 10.0],
+                "b": [10.0, 23, -4.0, 2, -3.0, 9, 19.0],
+            },
+            ["id", "a"],
+        ),
+    ],
+)
+@pytest.mark.parametrize("min_periods", [0, 3])
+@pytest.mark.parametrize("ddof", [1, 2])
+def test_groupby_covariance(data, gkey, min_periods, ddof):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby(gkey).cov(min_periods=min_periods, ddof=ddof)
+    # We observe a warning if there are too few observations to generate a
+    # non-singular covariance matrix _and_ there are enough that pandas will
+    # actually attempt to compute a value. Groups with fewer than min_periods
+    # inputs will be skipped altogether, so no warning occurs.
+    with expect_warning_if(
+        (pdf.groupby(gkey).count() < 2).all().all()
+        and (pdf.groupby(gkey).count() > min_periods).all().all(),
+        RuntimeWarning,
+    ):
+        expected = pdf.groupby(gkey).cov(min_periods=min_periods, ddof=ddof)
+
+    assert_eq(expected, actual)
+
+
+def test_groupby_covariance_multiindex_dataframe():
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 2, 2],
+            "b": [1, 1, 2, 2],
+            "c": [2, 3, 4, 5],
+            "d": [6, 8, 9, 1],
+        }
+    ).set_index(["a", "b"])
+
+    actual = gdf.groupby(level=["a", "b"]).cov()
+    expected = gdf.to_pandas().groupby(level=["a", "b"]).cov()
+
+    assert_eq(expected, actual)
+
+
+def test_groupby_covariance_empty_columns():
+    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby("id").cov()
+    expected = pdf.groupby("id").cov()
+
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+def test_groupby_cov_invalid_column_types():
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        },
+    )
+    with pytest.raises(
+        TypeError,
+        match="Covariance accepts only numerical column-pairs",
+    ):
+        gdf.groupby("id").cov()
+
+
+def test_groupby_cov_positive_semidefinite_matrix():
+    # Refer to discussions in PR #9889 re "pair-wise deletion" strategy
+    # being used in pandas to compute the covariance of a dataframe with
+    # rows containing missing values.
+    # Note: cuDF currently matches pandas behavior in that the covariance
+    # matrices are not guaranteed PSD (positive semi definite).
+    # https://github.com/rapidsai/cudf/pull/9889#discussion_r794158358
+    gdf = cudf.DataFrame(
+        [[1, 2], [None, 4], [5, None], [7, 8]], columns=["v0", "v1"]
+    )
+    actual = gdf.groupby(by=cudf.Series([1, 1, 1, 1])).cov()
+    actual.reset_index(drop=True, inplace=True)
+
+    pdf = gdf.to_pandas()
+    expected = pdf.groupby(by=pd.Series([1, 1, 1, 1])).cov()
+    expected.reset_index(drop=True, inplace=True)
+
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.xfail
+def test_groupby_cov_for_pandas_bug_case():
+    # Handles case: pandas bug using ddof with missing data.
+    # Filed an issue in Pandas on GH, link below:
+    # https://github.com/pandas-dev/pandas/issues/45814
+    pdf = pd.DataFrame(
+        {"id": ["a", "a"], "val1": [1.0, 2.0], "val2": [np.nan, np.nan]}
+    )
+    expected = pdf.groupby("id").cov(ddof=2)
+
+    gdf = cudf.from_pandas(pdf)
+    actual = gdf.groupby("id").cov(ddof=2)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data, gkey",
+    [
+        (
+            {
+                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            },
+            ["id", "val1", "val2"],
+        ),
+        (
+            {
+                "id": [0] * 4 + [1] * 3,
+                "a": [10, 3, 4, 2, -3, 9, 10],
+                "b": [10, 23, -4, 2, -3, 9, 19],
+            },
+            ["id", "a"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val": pa.array(
+                    [None, None, None, None, None, None], type=pa.float64()
+                ),
+            },
+            ["id"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val1": [None, 4, 6, 8, None, 2],
+                "val2": [4, 5, None, 2, 9, None],
+            },
+            ["id"],
+        ),
+        ({"id": [1.0], "val1": [2.0], "val2": [3.0]}, ["id"]),
+    ],
+)
+@pytest.mark.parametrize("min_per", [0, 1, 2])
+def test_pearson_corr_passing(data, gkey, min_per):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+    expected = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("method", ["kendall", "spearman"])
+def test_pearson_corr_unsupported_methods(method):
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+            "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+        }
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Only pearson correlation is currently supported",
+    ):
+        gdf.groupby("id").corr(method)
+
+
+def test_pearson_corr_empty_columns():
+    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby("id").corr("pearson")
+    expected = pdf.groupby("id").corr("pearson")
+
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+        check_index_type=False,
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index fb1942fc64a..18b13f7c3d1 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -529,3 +529,285 @@ def test_datetime_infer_format(data, timezone, datetime_types_as_str):
             with pytest.raises(NotImplementedError):
                 # pandas doesn't allow parsing "Z" to naive type
                 sr.astype(datetime_types_as_str)
+
+
+@pytest.mark.parametrize(
+    "np_dtype,pd_dtype",
+    [
+        tuple(item)
+        for item in cudf.utils.dtypes.np_dtypes_to_pandas_dtypes.items()
+    ],
+)
+def test_series_astype_pandas_nullable(
+    all_supported_types_as_str, np_dtype, pd_dtype
+):
+    source = cudf.Series([0, 1, None], dtype=all_supported_types_as_str)
+
+    expect = source.astype(np_dtype)
+    got = source.astype(pd_dtype)
+
+    assert_eq(expect, got)
+
+
+def test_series_astype_numeric_to_numeric(
+    numeric_types_as_str, numeric_types_as_str2
+):
+    psr = pd.Series([1, 2, 4, 3], dtype=numeric_types_as_str)
+    gsr = cudf.from_pandas(psr)
+    assert_eq(
+        psr.astype(numeric_types_as_str2), gsr.astype(numeric_types_as_str2)
+    )
+
+
+def test_series_astype_numeric_to_numeric_nulls(
+    numeric_types_as_str, numeric_types_as_str2
+):
+    data = [1, 2, None, 3]
+    sr = cudf.Series(data, dtype=numeric_types_as_str)
+    got = sr.astype(numeric_types_as_str2)
+    expect = cudf.Series([1, 2, None, 3], dtype=numeric_types_as_str2)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "str",
+        "category",
+        "datetime64[s]",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "datetime64[ns]",
+    ],
+)
+def test_series_astype_numeric_to_other(
+    request, all_supported_types_as_str, as_dtype
+):
+    if all_supported_types_as_str.startswith(
+        "timedelta64"
+    ) and as_dtype.startswith("datetime64"):
+        pytest.skip(
+            f"Casting {all_supported_types_as_str} to {as_dtype} is invalid"
+        )
+    if all_supported_types_as_str == "str" and as_dtype.startswith(
+        "datetime64"
+    ):
+        pytest.skip(
+            f"Casting {all_supported_types_as_str} to {as_dtype} for test data is invalid."
+        )
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str
+            in {"timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"}
+            and as_dtype == "str",
+            reason=f"Casting {all_supported_types_as_str} to {as_dtype} is incorrect.",
+        )
+    )
+    psr = pd.Series([1, 2, 3], dtype=all_supported_types_as_str)
+    gsr = cudf.from_pandas(psr)
+    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "str",
+        "int32",
+        "uint32",
+        "float32",
+        "category",
+        "datetime64[s]",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "datetime64[ns]",
+    ],
+)
+def test_series_astype_string_to_other(as_dtype):
+    if "datetime64" in as_dtype:
+        data = ["2001-01-01", "2002-02-02", "2000-01-05"]
+    else:
+        data = ["1", "2", "3"]
+    psr = pd.Series(data)
+    gsr = cudf.from_pandas(psr)
+    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "category",
+        "datetime64[s]",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "datetime64[ns]",
+    ],
+)
+def test_series_astype_datetime_to_other(as_dtype):
+    data = ["2001-01-01", "2002-02-02", "2001-01-05"]
+    psr = pd.Series(data)
+    gsr = cudf.from_pandas(psr)
+    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
+
+
+@pytest.mark.parametrize(
+    "inp",
+    [
+        ("datetime64[ns]", "2011-01-01 00:00:00.000000000"),
+        ("datetime64[us]", "2011-01-01 00:00:00.000000"),
+        ("datetime64[ms]", "2011-01-01 00:00:00.000"),
+        ("datetime64[s]", "2011-01-01 00:00:00"),
+    ],
+)
+def test_series_astype_datetime_to_string(inp):
+    dtype, expect = inp
+    base_date = "2011-01-01"
+    sr = cudf.Series([base_date], dtype=dtype)
+    got = sr.astype(str)[0]
+    assert expect == got
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "int32",
+        "uint32",
+        "float32",
+        "category",
+        "datetime64[s]",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "datetime64[ns]",
+        "str",
+    ],
+)
+def test_series_astype_categorical_to_other(as_dtype):
+    if "datetime64" in as_dtype:
+        data = ["2001-01-01", "2002-02-02", "2000-01-05", "2001-01-01"]
+    else:
+        data = [1, 2, 3, 1]
+    psr = pd.Series(data, dtype="category")
+    gsr = cudf.from_pandas(psr)
+    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
+
+
+def test_series_astype_to_categorical_ordered(categorical_ordered):
+    psr = pd.Series([1, 2, 3, 1], dtype="category")
+    gsr = cudf.from_pandas(psr)
+
+    ordered_dtype_pd = pd.CategoricalDtype(
+        categories=[1, 2, 3], ordered=categorical_ordered
+    )
+    ordered_dtype_gd = cudf.CategoricalDtype.from_pandas(ordered_dtype_pd)
+    assert_eq(
+        psr.astype("int32").astype(ordered_dtype_pd).astype("int32"),
+        gsr.astype("int32").astype(ordered_dtype_gd).astype("int32"),
+    )
+
+
+def test_series_astype_cat_ordered_to_unordered(categorical_ordered):
+    pd_dtype = pd.CategoricalDtype(
+        categories=[1, 2, 3], ordered=categorical_ordered
+    )
+    pd_to_dtype = pd.CategoricalDtype(
+        categories=[1, 2, 3], ordered=not categorical_ordered
+    )
+    gd_dtype = cudf.CategoricalDtype.from_pandas(pd_dtype)
+    gd_to_dtype = cudf.CategoricalDtype.from_pandas(pd_to_dtype)
+
+    psr = pd.Series([1, 2, 3], dtype=pd_dtype)
+    gsr = cudf.Series([1, 2, 3], dtype=gd_dtype)
+
+    expect = psr.astype(pd_to_dtype)
+    got = gsr.astype(gd_to_dtype)
+
+    assert_eq(expect, got)
+
+
+def test_series_astype_null_cases():
+    data = [1, 2, None, 3]
+
+    # numerical to other
+    assert_eq(cudf.Series(data, dtype="str"), cudf.Series(data).astype("str"))
+
+    assert_eq(
+        cudf.Series(data, dtype="category"),
+        cudf.Series(data).astype("category"),
+    )
+
+    assert_eq(
+        cudf.Series(data, dtype="float32"),
+        cudf.Series(data, dtype="int32").astype("float32"),
+    )
+
+    assert_eq(
+        cudf.Series(data, dtype="float32"),
+        cudf.Series(data, dtype="uint32").astype("float32"),
+    )
+
+    assert_eq(
+        cudf.Series(data, dtype="datetime64[ms]"),
+        cudf.Series(data).astype("datetime64[ms]"),
+    )
+
+    # categorical to other
+    assert_eq(
+        cudf.Series(data, dtype="str"),
+        cudf.Series(data, dtype="category").astype("str"),
+    )
+
+    assert_eq(
+        cudf.Series(data, dtype="float32"),
+        cudf.Series(data, dtype="category").astype("float32"),
+    )
+
+    assert_eq(
+        cudf.Series(data, dtype="datetime64[ms]"),
+        cudf.Series(data, dtype="category").astype("datetime64[ms]"),
+    )
+
+    # string to other
+    assert_eq(
+        cudf.Series([1, 2, None, 3], dtype="int32"),
+        cudf.Series(["1", "2", None, "3"]).astype("int32"),
+    )
+
+    assert_eq(
+        cudf.Series(
+            ["2001-01-01", "2001-02-01", None, "2001-03-01"],
+            dtype="datetime64[ms]",
+        ),
+        cudf.Series(["2001-01-01", "2001-02-01", None, "2001-03-01"]).astype(
+            "datetime64[ms]"
+        ),
+    )
+
+    assert_eq(
+        cudf.Series(["a", "b", "c", None], dtype="category").to_pandas(),
+        cudf.Series(["a", "b", "c", None]).astype("category").to_pandas(),
+    )
+
+    # datetime to other
+    data = [
+        "2001-01-01 00:00:00.000000",
+        "2001-02-01 00:00:00.000000",
+        None,
+        "2001-03-01 00:00:00.000000",
+    ]
+    assert_eq(
+        cudf.Series(data),
+        cudf.Series(data, dtype="datetime64[us]").astype("str"),
+    )
+
+    assert_eq(
+        pd.Series(data, dtype="datetime64[ns]").astype("category"),
+        cudf.from_pandas(pd.Series(data, dtype="datetime64[ns]")).astype(
+            "category"
+        ),
+    )
+
+
+def test_series_astype_null_categorical():
+    sr = cudf.Series([None, None, None], dtype="category")
+    expect = cudf.Series([None, None, None], dtype="int32")
+    got = sr.astype("int32")
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index a562a66f312..c0aaf5a1b22 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -72,6 +72,18 @@ def test_create_interval_series(data1, data2, data3, data4, interval_closed):
     assert_eq(expect_three, got_three)
 
 
+def test_from_pandas_for_series_nan_as_null(nan_as_null):
+    data = [np.nan, 2.0, 3.0]
+    psr = pd.Series(data)
+
+    expected = cudf.Series._from_column(
+        as_column(data, nan_as_null=nan_as_null)
+    )
+    got = cudf.from_pandas(psr, nan_as_null=nan_as_null)
+
+    assert_eq(expected, got)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 72601dd6a1a..8d2e7f9a707 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3,15 +3,11 @@
 import array as arr
 import datetime
 import decimal
-import functools
 import io
 import operator
-import random
 import re
-import string
 import textwrap
 import warnings
-from collections import OrderedDict, defaultdict, namedtuple
 from contextlib import contextmanager
 from copy import copy
 
@@ -20,7 +16,6 @@
 import pandas as pd
 import pyarrow as pa
 import pytest
-from numba import cuda
 from packaging import version
 
 import cudf
@@ -866,33 +861,6 @@ def test_dataframe_shape_empty():
     assert pdf.shape == gdf.shape
 
 
-@pytest.mark.parametrize("num_cols", [1, 3])
-@pytest.mark.parametrize("num_rows", [1, 5])
-def test_dataframe_transpose_category(num_cols, num_rows):
-    pdf = pd.DataFrame()
-
-    for i in range(num_cols):
-        colname = string.ascii_lowercase[i]
-        data = pd.Series(list(string.ascii_lowercase), dtype="category")
-        data = data.sample(num_rows, replace=True).reset_index(drop=True)
-        pdf[colname] = data
-
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    got_function = gdf.transpose()
-    got_property = gdf.T
-
-    expect = pdf.transpose()
-
-    assert_eq(expect, got_function.to_pandas())
-    assert_eq(expect, got_property.to_pandas())
-
-
-def test_generated_column():
-    gdf = cudf.DataFrame({"a": (i for i in range(5))})
-    assert len(gdf) == 5
-
-
 @pytest.fixture
 def pdf():
     return pd.DataFrame({"x": range(10), "y": range(10)})
@@ -1636,354 +1604,6 @@ def test_as_column_types():
     assert_eq(pds, gds)
 
 
-@pytest.mark.parametrize("dtype", ALL_TYPES)
-@pytest.mark.parametrize(
-    "np_dtype,pd_dtype",
-    [
-        tuple(item)
-        for item in cudf.utils.dtypes.np_dtypes_to_pandas_dtypes.items()
-    ],
-)
-def test_series_astype_pandas_nullable(dtype, np_dtype, pd_dtype):
-    source = cudf.Series([0, 1, None], dtype=dtype)
-
-    expect = source.astype(np_dtype)
-    got = source.astype(pd_dtype)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("as_dtype", NUMERIC_TYPES)
-def test_series_astype_numeric_to_numeric(dtype, as_dtype):
-    psr = pd.Series([1, 2, 4, 3], dtype=dtype)
-    gsr = cudf.from_pandas(psr)
-    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("as_dtype", NUMERIC_TYPES)
-def test_series_astype_numeric_to_numeric_nulls(dtype, as_dtype):
-    data = [1, 2, None, 3]
-    sr = cudf.Series(data, dtype=dtype)
-    got = sr.astype(as_dtype)
-    expect = cudf.Series([1, 2, None, 3], dtype=as_dtype)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "str",
-        "category",
-        "datetime64[s]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[ns]",
-    ],
-)
-def test_series_astype_numeric_to_other(dtype, as_dtype):
-    psr = pd.Series([1, 2, 3], dtype=dtype)
-    gsr = cudf.from_pandas(psr)
-    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
-
-
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "str",
-        "int32",
-        "uint32",
-        "float32",
-        "category",
-        "datetime64[s]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[ns]",
-    ],
-)
-def test_series_astype_string_to_other(as_dtype):
-    if "datetime64" in as_dtype:
-        data = ["2001-01-01", "2002-02-02", "2000-01-05"]
-    else:
-        data = ["1", "2", "3"]
-    psr = pd.Series(data)
-    gsr = cudf.from_pandas(psr)
-    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
-
-
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "category",
-        "datetime64[s]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[ns]",
-    ],
-)
-def test_series_astype_datetime_to_other(as_dtype):
-    data = ["2001-01-01", "2002-02-02", "2001-01-05"]
-    psr = pd.Series(data)
-    gsr = cudf.from_pandas(psr)
-    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
-
-
-@pytest.mark.parametrize(
-    "inp",
-    [
-        ("datetime64[ns]", "2011-01-01 00:00:00.000000000"),
-        ("datetime64[us]", "2011-01-01 00:00:00.000000"),
-        ("datetime64[ms]", "2011-01-01 00:00:00.000"),
-        ("datetime64[s]", "2011-01-01 00:00:00"),
-    ],
-)
-def test_series_astype_datetime_to_string(inp):
-    dtype, expect = inp
-    base_date = "2011-01-01"
-    sr = cudf.Series([base_date], dtype=dtype)
-    got = sr.astype(str)[0]
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "int32",
-        "uint32",
-        "float32",
-        "category",
-        "datetime64[s]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[ns]",
-        "str",
-    ],
-)
-def test_series_astype_categorical_to_other(as_dtype):
-    if "datetime64" in as_dtype:
-        data = ["2001-01-01", "2002-02-02", "2000-01-05", "2001-01-01"]
-    else:
-        data = [1, 2, 3, 1]
-    psr = pd.Series(data, dtype="category")
-    gsr = cudf.from_pandas(psr)
-    assert_eq(psr.astype(as_dtype), gsr.astype(as_dtype))
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_series_astype_to_categorical_ordered(ordered):
-    psr = pd.Series([1, 2, 3, 1], dtype="category")
-    gsr = cudf.from_pandas(psr)
-
-    ordered_dtype_pd = pd.CategoricalDtype(
-        categories=[1, 2, 3], ordered=ordered
-    )
-    ordered_dtype_gd = cudf.CategoricalDtype.from_pandas(ordered_dtype_pd)
-    assert_eq(
-        psr.astype("int32").astype(ordered_dtype_pd).astype("int32"),
-        gsr.astype("int32").astype(ordered_dtype_gd).astype("int32"),
-    )
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_series_astype_cat_ordered_to_unordered(ordered):
-    pd_dtype = pd.CategoricalDtype(categories=[1, 2, 3], ordered=ordered)
-    pd_to_dtype = pd.CategoricalDtype(
-        categories=[1, 2, 3], ordered=not ordered
-    )
-    gd_dtype = cudf.CategoricalDtype.from_pandas(pd_dtype)
-    gd_to_dtype = cudf.CategoricalDtype.from_pandas(pd_to_dtype)
-
-    psr = pd.Series([1, 2, 3], dtype=pd_dtype)
-    gsr = cudf.Series([1, 2, 3], dtype=gd_dtype)
-
-    expect = psr.astype(pd_to_dtype)
-    got = gsr.astype(gd_to_dtype)
-
-    assert_eq(expect, got)
-
-
-def test_series_astype_null_cases():
-    data = [1, 2, None, 3]
-
-    # numerical to other
-    assert_eq(cudf.Series(data, dtype="str"), cudf.Series(data).astype("str"))
-
-    assert_eq(
-        cudf.Series(data, dtype="category"),
-        cudf.Series(data).astype("category"),
-    )
-
-    assert_eq(
-        cudf.Series(data, dtype="float32"),
-        cudf.Series(data, dtype="int32").astype("float32"),
-    )
-
-    assert_eq(
-        cudf.Series(data, dtype="float32"),
-        cudf.Series(data, dtype="uint32").astype("float32"),
-    )
-
-    assert_eq(
-        cudf.Series(data, dtype="datetime64[ms]"),
-        cudf.Series(data).astype("datetime64[ms]"),
-    )
-
-    # categorical to other
-    assert_eq(
-        cudf.Series(data, dtype="str"),
-        cudf.Series(data, dtype="category").astype("str"),
-    )
-
-    assert_eq(
-        cudf.Series(data, dtype="float32"),
-        cudf.Series(data, dtype="category").astype("float32"),
-    )
-
-    assert_eq(
-        cudf.Series(data, dtype="datetime64[ms]"),
-        cudf.Series(data, dtype="category").astype("datetime64[ms]"),
-    )
-
-    # string to other
-    assert_eq(
-        cudf.Series([1, 2, None, 3], dtype="int32"),
-        cudf.Series(["1", "2", None, "3"]).astype("int32"),
-    )
-
-    assert_eq(
-        cudf.Series(
-            ["2001-01-01", "2001-02-01", None, "2001-03-01"],
-            dtype="datetime64[ms]",
-        ),
-        cudf.Series(["2001-01-01", "2001-02-01", None, "2001-03-01"]).astype(
-            "datetime64[ms]"
-        ),
-    )
-
-    assert_eq(
-        cudf.Series(["a", "b", "c", None], dtype="category").to_pandas(),
-        cudf.Series(["a", "b", "c", None]).astype("category").to_pandas(),
-    )
-
-    # datetime to other
-    data = [
-        "2001-01-01 00:00:00.000000",
-        "2001-02-01 00:00:00.000000",
-        None,
-        "2001-03-01 00:00:00.000000",
-    ]
-    assert_eq(
-        cudf.Series(data),
-        cudf.Series(data, dtype="datetime64[us]").astype("str"),
-    )
-
-    assert_eq(
-        pd.Series(data, dtype="datetime64[ns]").astype("category"),
-        cudf.from_pandas(pd.Series(data, dtype="datetime64[ns]")).astype(
-            "category"
-        ),
-    )
-
-
-def test_series_astype_null_categorical():
-    sr = cudf.Series([None, None, None], dtype="category")
-    expect = cudf.Series([None, None, None], dtype="int32")
-    got = sr.astype("int32")
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        (
-            pd.Series([3, 3.0]),
-            pd.Series([2.3, 3.9]),
-            pd.Series([1.5, 3.9]),
-            pd.Series([1.0, 2]),
-        ),
-        [
-            pd.Series([3, 3.0]),
-            pd.Series([2.3, 3.9]),
-            pd.Series([1.5, 3.9]),
-            pd.Series([1.0, 2]),
-        ],
-    ],
-)
-def test_create_dataframe_from_list_like(data):
-    pdf = pd.DataFrame(data, index=["count", "mean", "std", "min"])
-    gdf = cudf.DataFrame(data, index=["count", "mean", "std", "min"])
-
-    assert_eq(pdf, gdf)
-
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame(data)
-
-    assert_eq(pdf, gdf)
-
-
-def test_create_dataframe_column():
-    pdf = pd.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
-    gdf = cudf.DataFrame(columns=["a", "b", "c"], index=["A", "Z", "X"])
-
-    assert_eq(pdf, gdf)
-
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [2, 3, 5]},
-        columns=["a", "b", "c"],
-        index=["A", "Z", "X"],
-    )
-    gdf = cudf.DataFrame(
-        {"a": [1, 2, 3], "b": [2, 3, 5]},
-        columns=["a", "b", "c"],
-        index=["A", "Z", "X"],
-    )
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.DataFrame(np.eye(2)),
-        cudf.DataFrame(np.eye(2)),
-        np.eye(2),
-        cupy.eye(2),
-        None,
-        [[1, 0], [0, 1]],
-        [cudf.Series([0, 1]), cudf.Series([1, 0])],
-    ],
-)
-@pytest.mark.parametrize(
-    "columns",
-    [None, range(2), pd.RangeIndex(2), cudf.RangeIndex(2)],
-)
-def test_dataframe_columns_returns_rangeindex(data, columns):
-    if data is None and columns is None:
-        pytest.skip(f"{data=} and {columns=} not relevant.")
-    result = cudf.DataFrame(data=data, columns=columns).columns
-    expected = pd.RangeIndex(range(2))
-    assert_eq(result, expected)
-
-
-def test_dataframe_columns_returns_rangeindex_single_col():
-    result = cudf.DataFrame([1, 2, 3]).columns
-    expected = pd.RangeIndex(range(1))
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
-@pytest.mark.parametrize("idx_data", [[], [1, 2]])
-@pytest.mark.parametrize("data", [None, [], {}])
-def test_dataframe_columns_empty_data_preserves_dtype(dtype, idx_data, data):
-    result = cudf.DataFrame(
-        data, columns=cudf.Index(idx_data, dtype=dtype)
-    ).columns
-    expected = pd.Index(idx_data, dtype=dtype)
-    assert_eq(result, expected)
-
-
 @pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
 def test_dataframe_astype_preserves_column_dtype(dtype):
     result = cudf.DataFrame([1], columns=cudf.Index([1], dtype=dtype))
@@ -3227,45 +2847,6 @@ def test_tupleize_cols_False_set():
     assert_eq(pdf.columns, gdf.columns)
 
 
-def test_init_multiindex_from_dict():
-    pdf = pd.DataFrame({("a", "b"): [1]})
-    gdf = cudf.DataFrame({("a", "b"): [1]})
-    assert_eq(pdf, gdf)
-    assert_eq(pdf.columns, gdf.columns)
-
-
-def test_change_column_dtype_in_empty():
-    pdf = pd.DataFrame({"a": [], "b": []})
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(pdf, gdf)
-    pdf["b"] = pdf["b"].astype("int64")
-    gdf["b"] = gdf["b"].astype("int64")
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "str"])
-def test_dataframe_from_dictionary_series_same_name_index(dtype):
-    pd_idx1 = pd.Index([1, 2, 0], name="test_index").astype(dtype)
-    pd_idx2 = pd.Index([2, 0, 1], name="test_index").astype(dtype)
-    pd_series1 = pd.Series([1, 2, 3], index=pd_idx1)
-    pd_series2 = pd.Series([1, 2, 3], index=pd_idx2)
-
-    gd_idx1 = cudf.from_pandas(pd_idx1)
-    gd_idx2 = cudf.from_pandas(pd_idx2)
-    gd_series1 = cudf.Series([1, 2, 3], index=gd_idx1)
-    gd_series2 = cudf.Series([1, 2, 3], index=gd_idx2)
-
-    expect = pd.DataFrame({"a": pd_series1, "b": pd_series2})
-    got = cudf.DataFrame({"a": gd_series1, "b": gd_series2})
-
-    if dtype == "str":
-        # Pandas actually loses its index name erroneously here...
-        expect.index.name = "test_index"
-
-    assert_eq(expect, got)
-    assert expect.index.names == got.index.names
-
-
 @pytest.mark.parametrize(
     "arg", [slice(2, 8, 3), slice(1, 20, 4), slice(-2, -6, -2)]
 )
@@ -3288,732 +2869,9 @@ def test_dataframe_strided_slice(arg):
     assert_eq(expect, got)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
-    "data,condition,other,error",
-    [
-        (pd.Series(range(5)), pd.Series(range(5)) > 0, None, None),
-        (pd.Series(range(5)), pd.Series(range(5)) > 1, None, None),
-        (pd.Series(range(5)), pd.Series(range(5)) > 1, 10, None),
-        (
-            pd.Series(range(5)),
-            pd.Series(range(5)) > 1,
-            pd.Series(range(5, 10)),
-            None,
-        ),
-        (
-            pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]),
-            (
-                pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"])
-                % 3
-            )
-            == 0,
-            -pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]),
-            None,
-        ),
-        (
-            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}),
-            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}) == 4,
-            None,
-            None,
-        ),
-        (
-            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}),
-            pd.DataFrame({"a": [1, 2, np.nan], "b": [4, np.nan, 6]}) != 4,
-            None,
-            None,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            [True, True, True],
-            None,
-            ValueError,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            [True, True, True, False],
-            None,
-            ValueError,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            [[True, True, True, False], [True, True, True, False]],
-            None,
-            ValueError,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            [[True, True], [False, True], [True, False], [False, True]],
-            None,
-            None,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            cuda.to_device(
-                np.array(
-                    [[True, True], [False, True], [True, False], [False, True]]
-                )
-            ),
-            None,
-            None,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            cupy.array(
-                [[True, True], [False, True], [True, False], [False, True]]
-            ),
-            17,
-            None,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            [[True, True], [False, True], [True, False], [False, True]],
-            17,
-            None,
-        ),
-        (
-            pd.DataFrame({"p": [-2, 3, -4, -79], "k": [9, 10, 11, 12]}),
-            [
-                [True, True, False, True],
-                [True, True, False, True],
-                [True, True, False, True],
-                [True, True, False, True],
-            ],
-            None,
-            ValueError,
-        ),
-        (
-            pd.Series([1, 2, np.nan]),
-            pd.Series([1, 2, np.nan]) == 4,
-            None,
-            None,
-        ),
-        (
-            pd.Series([1, 2, np.nan]),
-            pd.Series([1, 2, np.nan]) != 4,
-            None,
-            None,
-        ),
-        (
-            pd.Series([4, np.nan, 6]),
-            pd.Series([4, np.nan, 6]) == 4,
-            None,
-            None,
-        ),
-        (
-            pd.Series([4, np.nan, 6]),
-            pd.Series([4, np.nan, 6]) != 4,
-            None,
-            None,
-        ),
-        (
-            pd.Series([4, np.nan, 6], dtype="category"),
-            pd.Series([4, np.nan, 6], dtype="category") != 4,
-            None,
-            None,
-        ),
-        (
-            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category"),
-            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category") == "b",
-            None,
-            None,
-        ),
-        (
-            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category"),
-            pd.Series(["a", "b", "b", "d", "c", "s"], dtype="category") == "b",
-            "s",
-            None,
-        ),
-        (
-            pd.Series([1, 2, 3, 2, 5]),
-            pd.Series([1, 2, 3, 2, 5]) == 2,
-            pd.DataFrame(
-                {
-                    "a": pd.Series([1, 2, 3, 2, 5]),
-                    "b": pd.Series([1, 2, 3, 2, 5]),
-                }
-            ),
-            NotImplementedError,
-        ),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_df_sr_mask_where(data, condition, other, error, inplace):
-    ps_where = data
-    gs_where = cudf.from_pandas(data)
-
-    ps_mask = ps_where.copy(deep=True)
-    gs_mask = gs_where.copy(deep=True)
-
-    if hasattr(condition, "__cuda_array_interface__"):
-        if type(condition).__module__.split(".")[0] == "cupy":
-            ps_condition = cupy.asnumpy(condition)
-        else:
-            ps_condition = np.array(condition).astype("bool")
-    else:
-        ps_condition = condition
-
-    if type(condition).__module__.split(".")[0] == "pandas":
-        gs_condition = cudf.from_pandas(condition)
-    else:
-        gs_condition = condition
-
-    ps_other = other
-    if type(other).__module__.split(".")[0] == "pandas":
-        gs_other = cudf.from_pandas(other)
-    else:
-        gs_other = other
-
-    if error is None:
-        expect_where = ps_where.where(
-            ps_condition, other=ps_other, inplace=inplace
-        )
-        got_where = gs_where.where(
-            gs_condition, other=gs_other, inplace=inplace
-        )
-
-        expect_mask = ps_mask.mask(
-            ps_condition, other=ps_other, inplace=inplace
-        )
-        got_mask = gs_mask.mask(gs_condition, other=gs_other, inplace=inplace)
-
-        if inplace:
-            expect_where = ps_where
-            got_where = gs_where
-
-            expect_mask = ps_mask
-            got_mask = gs_mask
-
-        if isinstance(expect_where, pd.Series) and isinstance(
-            expect_where.dtype, pd.CategoricalDtype
-        ):
-            np.testing.assert_array_equal(
-                expect_where.cat.codes,
-                got_where.cat.codes.astype(expect_where.cat.codes.dtype)
-                .fillna(-1)
-                .to_numpy(),
-            )
-            assert_eq(expect_where.cat.categories, got_where.cat.categories)
-
-            np.testing.assert_array_equal(
-                expect_mask.cat.codes,
-                got_mask.cat.codes.astype(expect_mask.cat.codes.dtype)
-                .fillna(-1)
-                .to_numpy(),
-            )
-            assert_eq(expect_mask.cat.categories, got_mask.cat.categories)
-        else:
-            assert_eq(
-                expect_where.fillna(-1),
-                got_where.fillna(-1),
-                check_dtype=False,
-            )
-            assert_eq(
-                expect_mask.fillna(-1), got_mask.fillna(-1), check_dtype=False
-            )
-    else:
-        assert_exceptions_equal(
-            lfunc=ps_where.where,
-            rfunc=gs_where.where,
-            lfunc_args_and_kwargs=(
-                [ps_condition],
-                {"other": ps_other, "inplace": inplace},
-            ),
-            rfunc_args_and_kwargs=(
-                [gs_condition],
-                {"other": gs_other, "inplace": inplace},
-            ),
-        )
-
-        assert_exceptions_equal(
-            lfunc=ps_mask.mask,
-            rfunc=gs_mask.mask,
-            lfunc_args_and_kwargs=(
-                [ps_condition],
-                {"other": ps_other, "inplace": inplace},
-            ),
-            rfunc_args_and_kwargs=(
-                [gs_condition],
-                {"other": gs_other, "inplace": inplace},
-            ),
-        )
-
-
-@pytest.mark.parametrize(
-    "data,condition,other,has_cat",
-    [
-        (
-            pd.DataFrame(
-                {
-                    "a": pd.Series(["a", "a", "b", "c", "a", "d", "d", "a"]),
-                    "b": pd.Series(["o", "p", "q", "e", "p", "p", "a", "a"]),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(["a", "a", "b", "c", "a", "d", "d", "a"]),
-                    "b": pd.Series(["o", "p", "q", "e", "p", "p", "a", "a"]),
-                }
-            )
-            != "a",
-            None,
-            None,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            )
-            != "a",
-            None,
-            True,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            )
-            == "a",
-            None,
-            True,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            )
-            != "a",
-            "a",
-            True,
-        ),
-        (
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        ["a", "a", "b", "c", "a", "d", "d", "a"],
-                        dtype="category",
-                    ),
-                    "b": pd.Series(
-                        ["o", "p", "q", "e", "p", "p", "a", "a"],
-                        dtype="category",
-                    ),
-                }
-            )
-            == "a",
-            "a",
-            True,
-        ),
-    ],
-)
-def test_df_string_cat_types_mask_where(data, condition, other, has_cat):
-    ps = data
-    gs = cudf.from_pandas(data)
-
-    ps_condition = condition
-    if type(condition).__module__.split(".")[0] == "pandas":
-        gs_condition = cudf.from_pandas(condition)
-    else:
-        gs_condition = condition
-
-    ps_other = other
-    if type(other).__module__.split(".")[0] == "pandas":
-        gs_other = cudf.from_pandas(other)
-    else:
-        gs_other = other
-
-    expect_where = ps.where(ps_condition, other=ps_other)
-    got_where = gs.where(gs_condition, other=gs_other)
-
-    expect_mask = ps.mask(ps_condition, other=ps_other)
-    got_mask = gs.mask(gs_condition, other=gs_other)
-
-    if has_cat is None:
-        assert_eq(
-            expect_where.fillna(-1).astype("str"),
-            got_where.fillna(-1),
-            check_dtype=False,
-        )
-        assert_eq(
-            expect_mask.fillna(-1).astype("str"),
-            got_mask.fillna(-1),
-            check_dtype=False,
-        )
-    else:
-        assert_eq(expect_where, got_where, check_dtype=False)
-        assert_eq(expect_mask, got_mask, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data,expected_upcast_type,error",
-    [
-        (
-            pd.Series([random.random() for _ in range(10)], dtype="float32"),
-            np.dtype("float32"),
-            None,
-        ),
-        (
-            pd.Series([random.random() for _ in range(10)], dtype="float16"),
-            None,
-            TypeError,
-        ),
-        (
-            pd.Series([random.random() for _ in range(10)], dtype="float64"),
-            np.dtype("float64"),
-            None,
-        ),
-        (
-            pd.Series([random.random() for _ in range(10)], dtype="float128"),
-            None,
-            ValueError,
-        ),
-    ],
-)
-def test_from_pandas_unsupported_types(data, expected_upcast_type, error):
-    pdf = pd.DataFrame({"one_col": data})
-    if error is not None:
-        with pytest.raises(error):
-            cudf.from_pandas(data)
-
-        with pytest.raises(error):
-            cudf.Series(data)
-
-        with pytest.raises(error):
-            cudf.from_pandas(pdf)
-
-        with pytest.raises(error):
-            cudf.DataFrame(pdf)
-    else:
-        df = cudf.from_pandas(data)
-
-        assert_eq(data, df, check_dtype=False)
-        assert df.dtype == expected_upcast_type
-
-        df = cudf.Series(data)
-        assert_eq(data, df, check_dtype=False)
-        assert df.dtype == expected_upcast_type
-
-        df = cudf.from_pandas(pdf)
-        assert_eq(pdf, df, check_dtype=False)
-        assert df["one_col"].dtype == expected_upcast_type
-
-        df = cudf.DataFrame(pdf)
-        assert_eq(pdf, df, check_dtype=False)
-        assert df["one_col"].dtype == expected_upcast_type
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False])
-@pytest.mark.parametrize("index", [None, "a", ["a", "b"]])
-def test_from_pandas_nan_as_null(nan_as_null, index):
-    data = [np.nan, 2.0, 3.0]
-
-    if index is None:
-        pdf = pd.DataFrame({"a": data, "b": data})
-        expected = cudf.DataFrame(
-            {
-                "a": as_column(data, nan_as_null=nan_as_null),
-                "b": as_column(data, nan_as_null=nan_as_null),
-            }
-        )
-    else:
-        pdf = pd.DataFrame({"a": data, "b": data}).set_index(index)
-        expected = cudf.DataFrame(
-            {
-                "a": as_column(data, nan_as_null=nan_as_null),
-                "b": as_column(data, nan_as_null=nan_as_null),
-            }
-        )
-        expected = cudf.DataFrame(
-            {
-                "a": as_column(data, nan_as_null=nan_as_null),
-                "b": as_column(data, nan_as_null=nan_as_null),
-            }
-        )
-        expected = expected.set_index(index)
-
-    got = cudf.from_pandas(pdf, nan_as_null=nan_as_null)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("nan_as_null", [True, False])
-def test_from_pandas_for_series_nan_as_null(nan_as_null):
-    data = [np.nan, 2.0, 3.0]
-    psr = pd.Series(data)
-
-    expected = cudf.Series._from_column(
-        as_column(data, nan_as_null=nan_as_null)
-    )
-    got = cudf.from_pandas(psr, nan_as_null=nan_as_null)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("copy", [True, False])
-def test_df_series_dataframe_astype_copy(copy):
-    gdf = cudf.DataFrame({"col1": [1, 2], "col2": [3, 4]})
-    pdf = gdf.to_pandas()
-
-    assert_eq(
-        gdf.astype(dtype="float", copy=copy),
-        pdf.astype(dtype="float", copy=copy),
-    )
-    assert_eq(gdf, pdf)
-
-    gsr = cudf.Series([1, 2])
-    psr = gsr.to_pandas()
-
-    assert_eq(
-        gsr.astype(dtype="float", copy=copy),
-        psr.astype(dtype="float", copy=copy),
-    )
-    assert_eq(gsr, psr)
-
-    gsr = cudf.Series([1, 2])
-    psr = gsr.to_pandas()
-
-    actual = gsr.astype(dtype="int64", copy=copy)
-    expected = psr.astype(dtype="int64", copy=copy)
-    assert_eq(expected, actual)
-    assert_eq(gsr, psr)
-    actual[0] = 3
-    expected[0] = 3
-    assert_eq(gsr, psr)
-
-
-@pytest.mark.parametrize("copy", [True, False])
-def test_df_series_dataframe_astype_dtype_dict(copy):
-    gdf = cudf.DataFrame({"col1": [1, 2], "col2": [3, 4]})
-    pdf = gdf.to_pandas()
-
-    assert_eq(
-        gdf.astype(dtype={"col1": "float"}, copy=copy),
-        pdf.astype(dtype={"col1": "float"}, copy=copy),
-    )
-    assert_eq(gdf, pdf)
-
-    gsr = cudf.Series([1, 2])
-    psr = gsr.to_pandas()
-
-    assert_eq(
-        gsr.astype(dtype={None: "float"}, copy=copy),
-        psr.astype(dtype={None: "float"}, copy=copy),
-    )
-    assert_eq(gsr, psr)
-
-    assert_exceptions_equal(
-        lfunc=psr.astype,
-        rfunc=gsr.astype,
-        lfunc_args_and_kwargs=([], {"dtype": {"a": "float"}, "copy": copy}),
-        rfunc_args_and_kwargs=([], {"dtype": {"a": "float"}, "copy": copy}),
-    )
-
-    gsr = cudf.Series([1, 2])
-    psr = gsr.to_pandas()
-
-    actual = gsr.astype({None: "int64"}, copy=copy)
-    expected = psr.astype({None: "int64"}, copy=copy)
-    assert_eq(expected, actual)
-    assert_eq(gsr, psr)
-
-    actual[0] = 3
-    expected[0] = 3
-    assert_eq(gsr, psr)
-
-
-@pytest.mark.parametrize(
-    "data,columns",
-    [
-        ([1, 2, 3, 100, 112, 35464], ["a"]),
-        (range(100), None),
-        (
-            [],
-            None,
-        ),
-        ((-10, 21, 32, 32, 1, 2, 3), ["p"]),
-        (
-            (),
-            None,
-        ),
-        ([[1, 2, 3], [1, 2, 3]], ["col1", "col2", "col3"]),
-        ([range(100), range(100)], ["range" + str(i) for i in range(100)]),
-        (((1, 2, 3), (1, 2, 3)), ["tuple0", "tuple1", "tuple2"]),
-        ([[1, 2, 3]], ["list col1", "list col2", "list col3"]),
-        ([[1, 2, 3]], pd.Index(["col1", "col2", "col3"], name="rapids")),
-        ([range(100)], ["range" + str(i) for i in range(100)]),
-        (((1, 2, 3),), ["k1", "k2", "k3"]),
-    ],
-)
-def test_dataframe_init_1d_list(data, columns):
-    expect = pd.DataFrame(data, columns=columns)
-    actual = cudf.DataFrame(data, columns=columns)
-
-    assert_eq(
-        expect,
-        actual,
-        check_index_type=len(data) != 0,
-    )
-
-    expect = pd.DataFrame(data, columns=None)
-    actual = cudf.DataFrame(data, columns=None)
-
-    assert_eq(
-        expect,
-        actual,
-        check_index_type=len(data) != 0,
-    )
-
-
-@pytest.mark.parametrize(
-    "data,cols,index",
-    [
-        (
-            np.ndarray(shape=(4, 2), dtype=float, order="F"),
-            ["a", "b"],
-            ["a", "b", "c", "d"],
-        ),
-        (
-            np.ndarray(shape=(4, 2), dtype=float, order="F"),
-            ["a", "b"],
-            [0, 20, 30, 10],
-        ),
-        (
-            np.ndarray(shape=(4, 2), dtype=float, order="F"),
-            ["a", "b"],
-            [0, 1, 2, 3],
-        ),
-        (np.array([11, 123, -2342, 232]), ["a"], [1, 2, 11, 12]),
-        (np.array([11, 123, -2342, 232]), ["a"], ["khsdjk", "a", "z", "kk"]),
-        (
-            cupy.ndarray(shape=(4, 2), dtype=float, order="F"),
-            ["a", "z"],
-            ["a", "z", "a", "z"],
-        ),
-        (cupy.array([11, 123, -2342, 232]), ["z"], [0, 1, 1, 0]),
-        (cupy.array([11, 123, -2342, 232]), ["z"], [1, 2, 3, 4]),
-        (cupy.array([11, 123, -2342, 232]), ["z"], ["a", "z", "d", "e"]),
-        (
-            np.random.default_rng(seed=0).standard_normal(size=(2, 4)),
-            ["a", "b", "c", "d"],
-            ["a", "b"],
-        ),
-        (
-            np.random.default_rng(seed=0).standard_normal(size=(2, 4)),
-            ["a", "b", "c", "d"],
-            [1, 0],
-        ),
-        (cupy.random.randn(2, 4), ["a", "b", "c", "d"], ["a", "b"]),
-        (cupy.random.randn(2, 4), ["a", "b", "c", "d"], [1, 0]),
-    ],
-)
-def test_dataframe_init_from_arrays_cols(data, cols, index):
-    gd_data = data
-    if isinstance(data, cupy.ndarray):
-        # pandas can't handle cupy arrays in general
-        pd_data = data.get()
-
-        # additional test for building DataFrame with gpu array whose
-        # cuda array interface has no `descr` attribute
-        numba_data = cuda.as_cuda_array(data)
-    else:
-        pd_data = data
-        numba_data = None
-
-    # verify with columns & index
-    pdf = pd.DataFrame(pd_data, columns=cols, index=index)
-    gdf = cudf.DataFrame(gd_data, columns=cols, index=index)
-
-    assert_eq(pdf, gdf, check_dtype=False)
-
-    # verify with columns
-    pdf = pd.DataFrame(pd_data, columns=cols)
-    gdf = cudf.DataFrame(gd_data, columns=cols)
-
-    assert_eq(pdf, gdf, check_dtype=False)
-
-    pdf = pd.DataFrame(pd_data)
-    gdf = cudf.DataFrame(gd_data)
-
-    assert_eq(pdf, gdf, check_dtype=False)
-
-    if numba_data is not None:
-        gdf = cudf.DataFrame(numba_data)
-        assert_eq(pdf, gdf, check_dtype=False)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "col_data",
+    "col_data",
     [
         range(5),
         ["a", "b", "x", "y", "z"],
@@ -4309,223 +3167,57 @@ def test_dataframe_info_null_counts():
 
     df = cudf.DataFrame(
         {
-            "a": [1, 2, 3, None, 10, 11, 12, None],
-            "b": ["a", "b", "c", "sd", "sdf", "sd", None, None],
-        }
-    )
-
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 8 entries, 0 to 7
-    Data columns (total 2 columns):
-     #   Column  Dtype
-    ---  ------  -----
-     0   a       int64
-     1   b       object
-    dtypes: int64(1), object(1)
-    memory usage: 238.0+ bytes
-    """
-    )
-    pd.options.display.max_info_rows = 2
-    df.info(buf=buffer, max_cols=2, null_counts=None)
-    pd.reset_option("display.max_info_rows")
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 8 entries, 0 to 7
-    Data columns (total 2 columns):
-     #   Column  Non-Null Count  Dtype
-    ---  ------  --------------  -----
-     0   a       6 non-null      int64
-     1   b       6 non-null      object
-    dtypes: int64(1), object(1)
-    memory usage: 238.0+ bytes
-    """
-    )
-
-    df.info(buf=buffer, max_cols=2, null_counts=None)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    df.info(buf=buffer, null_counts=True)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-
-@pytest.mark.parametrize(
-    "orient", ["dict", "list", "split", "tight", "records", "index", "series"]
-)
-@pytest.mark.parametrize("into", [dict, OrderedDict, defaultdict(list)])
-def test_dataframe_to_dict(orient, into):
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [9, 5, 3]}, index=[10, 11, 12])
-    pdf = df.to_pandas()
-
-    actual = df.to_dict(orient=orient, into=into)
-    expected = pdf.to_dict(orient=orient, into=into)
-    if orient == "series":
-        assert actual.keys() == expected.keys()
-        for key in actual.keys():
-            assert_eq(expected[key], actual[key])
-    else:
-        assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data, orient, dtype, columns",
-    [
-        (
-            {"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]},
-            "columns",
-            None,
-            None,
-        ),
-        ({"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}, "index", None, None),
-        (
-            {"col_1": [None, 2, 1, 0], "col_2": [3, None, 1, 0]},
-            "index",
-            None,
-            ["A", "B", "C", "D"],
-        ),
-        (
-            {
-                "col_1": ["ab", "cd", "ef", "gh"],
-                "col_2": ["zx", "one", "two", "three"],
-            },
-            "index",
-            None,
-            ["A", "B", "C", "D"],
-        ),
-        (
-            {
-                "index": [("a", "b"), ("a", "c")],
-                "columns": [("x", 1), ("y", 2)],
-                "data": [[1, 3], [2, 4]],
-                "index_names": ["n1", "n2"],
-                "column_names": ["z1", "z2"],
-            },
-            "tight",
-            "float64",
-            None,
-        ),
-    ],
-)
-def test_dataframe_from_dict(data, orient, dtype, columns):
-    expected = pd.DataFrame.from_dict(
-        data=data, orient=orient, dtype=dtype, columns=columns
-    )
-
-    actual = cudf.DataFrame.from_dict(
-        data=data, orient=orient, dtype=dtype, columns=columns
-    )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "str", None])
-def test_dataframe_from_dict_transposed(dtype):
-    pd_data = {"a": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}
-    gd_data = {key: cudf.Series(val) for key, val in pd_data.items()}
-
-    expected = pd.DataFrame.from_dict(pd_data, orient="index", dtype=dtype)
-    actual = cudf.DataFrame.from_dict(gd_data, orient="index", dtype=dtype)
-
-    gd_data = {key: cupy.asarray(val) for key, val in pd_data.items()}
-    actual = cudf.DataFrame.from_dict(gd_data, orient="index", dtype=dtype)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "pd_data, gd_data, orient, dtype, columns",
-    [
-        (
-            {"col_1": np.array([3, 2, 1, 0]), "col_2": np.array([3, 2, 1, 0])},
-            {
-                "col_1": cupy.array([3, 2, 1, 0]),
-                "col_2": cupy.array([3, 2, 1, 0]),
-            },
-            "columns",
-            None,
-            None,
-        ),
-        (
-            {"col_1": np.array([3, 2, 1, 0]), "col_2": np.array([3, 2, 1, 0])},
-            {
-                "col_1": cupy.array([3, 2, 1, 0]),
-                "col_2": cupy.array([3, 2, 1, 0]),
-            },
-            "index",
-            None,
-            None,
-        ),
-        (
-            {
-                "col_1": np.array([None, 2, 1, 0]),
-                "col_2": np.array([3, None, 1, 0]),
-            },
-            {
-                "col_1": cupy.array([np.nan, 2, 1, 0]),
-                "col_2": cupy.array([3, np.nan, 1, 0]),
-            },
-            "index",
-            None,
-            ["A", "B", "C", "D"],
-        ),
-        (
-            {
-                "col_1": np.array(["ab", "cd", "ef", "gh"]),
-                "col_2": np.array(["zx", "one", "two", "three"]),
-            },
-            {
-                "col_1": np.array(["ab", "cd", "ef", "gh"]),
-                "col_2": np.array(["zx", "one", "two", "three"]),
-            },
-            "index",
-            None,
-            ["A", "B", "C", "D"],
-        ),
-        (
-            {
-                "index": [("a", "b"), ("a", "c")],
-                "columns": [("x", 1), ("y", 2)],
-                "data": [np.array([1, 3]), np.array([2, 4])],
-                "index_names": ["n1", "n2"],
-                "column_names": ["z1", "z2"],
-            },
-            {
-                "index": [("a", "b"), ("a", "c")],
-                "columns": [("x", 1), ("y", 2)],
-                "data": [cupy.array([1, 3]), cupy.array([2, 4])],
-                "index_names": ["n1", "n2"],
-                "column_names": ["z1", "z2"],
-            },
-            "tight",
-            "float64",
-            None,
-        ),
-    ],
-)
-def test_dataframe_from_dict_cp_np_arrays(
-    pd_data, gd_data, orient, dtype, columns
-):
-    expected = pd.DataFrame.from_dict(
-        data=pd_data, orient=orient, dtype=dtype, columns=columns
+            "a": [1, 2, 3, None, 10, 11, 12, None],
+            "b": ["a", "b", "c", "sd", "sdf", "sd", None, None],
+        }
+    )
+
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 8 entries, 0 to 7
+    Data columns (total 2 columns):
+     #   Column  Dtype
+    ---  ------  -----
+     0   a       int64
+     1   b       object
+    dtypes: int64(1), object(1)
+    memory usage: 238.0+ bytes
+    """
     )
+    pd.options.display.max_info_rows = 2
+    df.info(buf=buffer, max_cols=2, null_counts=None)
+    pd.reset_option("display.max_info_rows")
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
 
-    actual = cudf.DataFrame.from_dict(
-        data=gd_data, orient=orient, dtype=dtype, columns=columns
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 8 entries, 0 to 7
+    Data columns (total 2 columns):
+     #   Column  Non-Null Count  Dtype
+    ---  ------  --------------  -----
+     0   a       6 non-null      int64
+     1   b       6 non-null      object
+    dtypes: int64(1), object(1)
+    memory usage: 238.0+ bytes
+    """
     )
 
-    assert_eq(expected, actual, check_dtype=dtype is not None)
+    df.info(buf=buffer, max_cols=2, null_counts=None)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    df.info(buf=buffer, null_counts=True)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
 
 
 @pytest.mark.parametrize(
@@ -6376,402 +5068,53 @@ def test_rename_for_level_is_None_MC():
     ],
 )
 def test_explode(data, labels, ignore_index, p_index, label_to_explode):
-    pdf = pd.DataFrame(data, index=p_index, columns=labels)
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.explode(label_to_explode, ignore_index)
-    got = gdf.explode(label_to_explode, ignore_index)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_explode_preserve_categorical():
-    gdf = cudf.DataFrame(
-        {
-            "A": [[1, 2], None, [2, 3]],
-            "B": cudf.Series([0, 1, 2], dtype="category"),
-        }
-    )
-    result = gdf.explode("A")
-    expected = cudf.DataFrame(
-        {
-            "A": [1, 2, None, 2, 3],
-            "B": cudf.Series([0, 0, 1, 2, 2], dtype="category"),
-        }
-    )
-    expected.index = cudf.Index([0, 0, 1, 2, 2])
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "data,ascending,expected_data",
-    [
-        (
-            {"a": [10, 0, 2], "b": [-10, 10, 1]},
-            True,
-            [1, 2, 0],
-        ),
-        (
-            {"a": [10, 0, 2], "b": [-10, 10, 1]},
-            False,
-            [0, 2, 1],
-        ),
-    ],
-)
-def test_dataframe_argsort(data, ascending, expected_data):
-    actual = cudf.DataFrame(data).argsort(ascending=ascending)
-    expected = cupy.array(expected_data, dtype="int32")
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "data,columns,index",
-    [
-        (pd.Series([1, 2, 3]), None, None),
-        (pd.Series(["a", "b", None, "c"], name="abc"), None, None),
-        (
-            pd.Series(["a", "b", None, "c"], name="abc"),
-            ["abc", "b"],
-            [1, 2, 3],
-        ),
-    ],
-)
-def test_dataframe_init_from_series(data, columns, index):
-    expected = pd.DataFrame(data, columns=columns, index=index)
-    actual = cudf.DataFrame(data, columns=columns, index=index)
-
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=len(expected) != 0,
-    )
-
-
-def test_frame_series_where():
-    gdf = cudf.DataFrame(
-        {"a": [1.0, 2.0, None, 3.0, None], "b": [None, 10.0, 11.0, None, 23.0]}
-    )
-    pdf = gdf.to_pandas()
-    expected = gdf.where(gdf.notna(), gdf.mean())
-    actual = pdf.where(pdf.notna(), pdf.mean(), axis=1)
-    assert_eq(expected, actual)
-
-
-def test_frame_series_where_other():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
-    pdf = gdf.to_pandas()
-
-    expected = gdf.where(gdf["b"] == 1, cudf.NA)
-    actual = pdf.where(pdf["b"] == 1, pd.NA)
-    assert_eq(
-        actual.fillna(-1).values,
-        expected.fillna(-1).values,
-        check_dtype=False,
-    )
-
-    expected = gdf.where(gdf["b"] == 1, 0)
-    actual = pdf.where(pdf["b"] == 1, 0)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data, gkey",
-    [
-        (
-            {
-                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
-                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-            },
-            ["id", "val1", "val2"],
-        ),
-        (
-            {
-                "id": [0] * 4 + [1] * 3,
-                "a": [10, 3, 4, 2, -3, 9, 10],
-                "b": [10, 23, -4, 2, -3, 9, 19],
-            },
-            ["id", "a"],
-        ),
-        (
-            {
-                "id": ["a", "a", "b", "b", "c", "c"],
-                "val": pa.array(
-                    [None, None, None, None, None, None], type=pa.float64()
-                ),
-            },
-            ["id"],
-        ),
-        (
-            {
-                "id": ["a", "a", "b", "b", "c", "c"],
-                "val1": [None, 4, 6, 8, None, 2],
-                "val2": [4, 5, None, 2, 9, None],
-            },
-            ["id"],
-        ),
-        ({"id": [1.0], "val1": [2.0], "val2": [3.0]}, ["id"]),
-    ],
-)
-@pytest.mark.parametrize("min_per", [0, 1, 2])
-def test_pearson_corr_passing(data, gkey, min_per):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    actual = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
-    expected = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("method", ["kendall", "spearman"])
-def test_pearson_corr_unsupported_methods(method):
-    gdf = cudf.DataFrame(
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
-            "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-            "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-        }
-    )
-
-    with pytest.raises(
-        NotImplementedError,
-        match="Only pearson correlation is currently supported",
-    ):
-        gdf.groupby("id").corr(method)
-
-
-def test_pearson_corr_empty_columns():
-    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
-    pdf = gdf.to_pandas()
-
-    actual = gdf.groupby("id").corr("pearson")
-    expected = pdf.groupby("id").corr("pearson")
-
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
-        [1, 1, 1, 2, 2, 2, 3, 3, 3],
-    ],
-)
-@pytest.mark.parametrize("gkey", ["id", "val1", "val2"])
-def test_pearson_corr_invalid_column_types(data, gkey):
-    gdf = cudf.DataFrame(
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": data,
-            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
-        }
-    )
-    with pytest.raises(
-        TypeError,
-        match="Correlation accepts only numerical column-pairs",
-    ):
-        gdf.groupby(gkey).corr("pearson")
-
-
-def test_pearson_corr_multiindex_dataframe():
-    gdf = cudf.DataFrame(
-        {"a": [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [2, 3, 4, 5]}
-    ).set_index(["a", "b"])
-
-    actual = gdf.groupby(level="a").corr("pearson")
-    expected = gdf.to_pandas().groupby(level="a").corr("pearson")
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [np.nan, 1, 2], "b": [None, None, None]},
-        {"a": [1, 2, np.nan, 2], "b": [np.nan, np.nan, np.nan, np.nan]},
-        {
-            "a": [1, 2, np.nan, 2, None],
-            "b": [np.nan, np.nan, None, np.nan, np.nan],
-        },
-        {"a": [1, 2, 2, None, 1.1], "b": [1, 2.2, 3, None, 5]},
-    ],
-)
-@pytest.mark.parametrize("nan_as_null", [True, False])
-def test_dataframe_constructor_nan_as_null(data, nan_as_null):
-    actual = cudf.DataFrame(data, nan_as_null=nan_as_null)
-
-    if nan_as_null:
-        assert (
-            not (
-                actual.astype("float").replace(
-                    cudf.Series([np.nan], nan_as_null=False), cudf.Series([-1])
-                )
-                == -1
-            )
-            .any()
-            .any()
-        )
-    else:
-        actual = actual.select_dtypes(exclude=["object"])
-        assert (actual.replace(np.nan, -1) == -1).any().any()
-
-
-def test_dataframe_add_prefix():
-    cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
-    pdf = cdf.to_pandas()
-
-    got = cdf.add_prefix("item_")
-    expected = pdf.add_prefix("item_")
-
-    assert_eq(got, expected)
-
-
-def test_dataframe_add_suffix():
-    cdf = cudf.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]})
-    pdf = cdf.to_pandas()
-
-    got = cdf.add_suffix("_item")
-    expected = pdf.add_suffix("_item")
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data, gkey",
-    [
-        (
-            {
-                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
-                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-            },
-            ["id"],
-        ),
-        (
-            {
-                "id": [0, 0, 0, 0, 1, 1, 1],
-                "a": [10.0, 3, 4, 2.0, -3.0, 9.0, 10.0],
-                "b": [10.0, 23, -4.0, 2, -3.0, 9, 19.0],
-            },
-            ["id", "a"],
-        ),
-    ],
-)
-@pytest.mark.parametrize("min_periods", [0, 3])
-@pytest.mark.parametrize("ddof", [1, 2])
-def test_groupby_covariance(data, gkey, min_periods, ddof):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    actual = gdf.groupby(gkey).cov(min_periods=min_periods, ddof=ddof)
-    # We observe a warning if there are too few observations to generate a
-    # non-singular covariance matrix _and_ there are enough that pandas will
-    # actually attempt to compute a value. Groups with fewer than min_periods
-    # inputs will be skipped altogether, so no warning occurs.
-    with expect_warning_if(
-        (pdf.groupby(gkey).count() < 2).all().all()
-        and (pdf.groupby(gkey).count() > min_periods).all().all(),
-        RuntimeWarning,
-    ):
-        expected = pdf.groupby(gkey).cov(min_periods=min_periods, ddof=ddof)
-
-    assert_eq(expected, actual)
-
-
-def test_groupby_covariance_multiindex_dataframe():
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 1, 2, 2],
-            "b": [1, 1, 2, 2],
-            "c": [2, 3, 4, 5],
-            "d": [6, 8, 9, 1],
-        }
-    ).set_index(["a", "b"])
-
-    actual = gdf.groupby(level=["a", "b"]).cov()
-    expected = gdf.to_pandas().groupby(level=["a", "b"]).cov()
-
-    assert_eq(expected, actual)
-
-
-def test_groupby_covariance_empty_columns():
-    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
-    pdf = gdf.to_pandas()
-
-    actual = gdf.groupby("id").cov()
-    expected = pdf.groupby("id").cov()
-
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
-        check_index_type=False,
-    )
+    pdf = pd.DataFrame(data, index=p_index, columns=labels)
+    gdf = cudf.from_pandas(pdf)
 
+    expect = pdf.explode(label_to_explode, ignore_index)
+    got = gdf.explode(label_to_explode, ignore_index)
 
-def test_groupby_cov_invalid_column_types():
-    gdf = cudf.DataFrame(
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
-            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
-        },
-    )
-    with pytest.raises(
-        TypeError,
-        match="Covariance accepts only numerical column-pairs",
-    ):
-        gdf.groupby("id").cov()
+    assert_eq(expect, got, check_dtype=False)
 
 
-def test_groupby_cov_positive_semidefinite_matrix():
-    # Refer to discussions in PR #9889 re "pair-wise deletion" strategy
-    # being used in pandas to compute the covariance of a dataframe with
-    # rows containing missing values.
-    # Note: cuDF currently matches pandas behavior in that the covariance
-    # matrices are not guaranteed PSD (positive semi definite).
-    # https://github.com/rapidsai/cudf/pull/9889#discussion_r794158358
+def test_explode_preserve_categorical():
     gdf = cudf.DataFrame(
-        [[1, 2], [None, 4], [5, None], [7, 8]], columns=["v0", "v1"]
+        {
+            "A": [[1, 2], None, [2, 3]],
+            "B": cudf.Series([0, 1, 2], dtype="category"),
+        }
     )
-    actual = gdf.groupby(by=cudf.Series([1, 1, 1, 1])).cov()
-    actual.reset_index(drop=True, inplace=True)
-
-    pdf = gdf.to_pandas()
-    expected = pdf.groupby(by=pd.Series([1, 1, 1, 1])).cov()
-    expected.reset_index(drop=True, inplace=True)
-
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
+    result = gdf.explode("A")
+    expected = cudf.DataFrame(
+        {
+            "A": [1, 2, None, 2, 3],
+            "B": cudf.Series([0, 0, 1, 2, 2], dtype="category"),
+        }
     )
+    expected.index = cudf.Index([0, 0, 1, 2, 2])
+    assert_eq(result, expected)
 
 
-@pytest_xfail
-def test_groupby_cov_for_pandas_bug_case():
-    # Handles case: pandas bug using ddof with missing data.
-    # Filed an issue in Pandas on GH, link below:
-    # https://github.com/pandas-dev/pandas/issues/45814
-    pdf = pd.DataFrame(
-        {"id": ["a", "a"], "val1": [1.0, 2.0], "val2": [np.nan, np.nan]}
-    )
-    expected = pdf.groupby("id").cov(ddof=2)
-
-    gdf = cudf.from_pandas(pdf)
-    actual = gdf.groupby("id").cov(ddof=2)
+@pytest.mark.parametrize(
+    "data,ascending,expected_data",
+    [
+        (
+            {"a": [10, 0, 2], "b": [-10, 10, 1]},
+            True,
+            [1, 2, 0],
+        ),
+        (
+            {"a": [10, 0, 2], "b": [-10, 10, 1]},
+            False,
+            [0, 2, 1],
+        ),
+    ],
+)
+def test_dataframe_argsort(data, ascending, expected_data):
+    actual = cudf.DataFrame(data).argsort(ascending=ascending)
+    expected = cupy.array(expected_data, dtype="int32")
 
-    assert_eq(expected, actual)
+    assert_eq(actual, expected)
 
 
 @pytest.mark.parametrize(
@@ -7262,24 +5605,6 @@ def test_dataframe_duplicated(data, subset, keep):
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"col": [{"a": 1.1}, {"a": 2.1}, {"a": 10.0}, {"a": 11.2323}, None]},
-        {"a": [[{"b": 567}], None] * 10},
-        {"a": [decimal.Decimal(10), decimal.Decimal(20), None]},
-    ],
-)
-def test_dataframe_transpose_complex_types(data):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    expected = pdf.T
-    actual = gdf.T
-
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize(
     "data",
     [
@@ -7308,21 +5633,6 @@ def test_dataframe_from_arrow_slice():
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": 4},
-        {"c": 4, "a": [1, 2, 3], "b": ["x", "y", "z"]},
-        {"a": [1, 2, 3], "c": 4},
-    ],
-)
-def test_dataframe_init_from_scalar_and_lists(data):
-    actual = cudf.DataFrame(data)
-    expected = pd.DataFrame(data)
-
-    assert_eq(expected, actual)
-
-
 @pytest.mark.parametrize(
     "data,index",
     [
@@ -7428,91 +5738,6 @@ def test_dataframe_binop_with_datetime_index():
     assert_eq(expected, got)
 
 
-@pytest.mark.parametrize(
-    "columns",
-    (
-        [],
-        ["c", "a"],
-        ["a", "d", "b", "e", "c"],
-        ["a", "b", "c"],
-        pd.Index(["b", "a", "c"], name="custom_name"),
-    ),
-)
-@pytest.mark.parametrize("index", (None, [4, 5, 6]))
-def test_dataframe_dict_like_with_columns(columns, index):
-    data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
-    expect = pd.DataFrame(data, columns=columns, index=index)
-    actual = cudf.DataFrame(data, columns=columns, index=index)
-    if index is None and len(columns) == 0:
-        # We make an empty range index, pandas makes an empty index
-        expect = expect.reset_index(drop=True)
-    assert_eq(expect, actual)
-
-
-def test_dataframe_init_columns_named_multiindex():
-    rng = np.random.default_rng(seed=0)
-    data = rng.standard_normal(size=(2, 2))
-    columns = cudf.MultiIndex.from_tuples(
-        [("A", "one"), ("A", "two")], names=["y", "z"]
-    )
-    gdf = cudf.DataFrame(data, columns=columns)
-    pdf = pd.DataFrame(data, columns=columns.to_pandas())
-
-    assert_eq(gdf, pdf)
-
-
-def test_dataframe_init_columns_named_index():
-    rng = np.random.default_rng(seed=0)
-    data = rng.standard_normal(size=(2, 2))
-    columns = pd.Index(["a", "b"], name="custom_name")
-    gdf = cudf.DataFrame(data, columns=columns)
-    pdf = pd.DataFrame(data, columns=columns)
-
-    assert_eq(gdf, pdf)
-
-
-def test_dataframe_from_pandas_sparse():
-    pdf = pd.DataFrame(range(2), dtype=pd.SparseDtype(np.int64, 0))
-    with pytest.raises(NotImplementedError):
-        cudf.DataFrame(pdf)
-
-
-def test_dataframe_constructor_unbounded_sequence():
-    class A:
-        def __getitem__(self, key):
-            return 1
-
-    with pytest.raises(TypeError):
-        cudf.DataFrame([A()])
-
-    with pytest.raises(TypeError):
-        cudf.DataFrame({"a": A()})
-
-
-def test_dataframe_constructor_dataframe_list():
-    df = cudf.DataFrame(range(2))
-    with pytest.raises(TypeError):
-        cudf.DataFrame([df])
-
-
-def test_dataframe_constructor_from_namedtuple():
-    Point1 = namedtuple("Point1", ["a", "b", "c"])
-    Point2 = namedtuple("Point1", ["x", "y"])
-
-    data = [Point1(1, 2, 3), Point2(4, 5)]
-    idx = ["a", "b"]
-    gdf = cudf.DataFrame(data, index=idx)
-    pdf = pd.DataFrame(data, index=idx)
-
-    assert_eq(gdf, pdf)
-
-    data = [Point2(4, 5), Point1(1, 2, 3)]
-    with pytest.raises(ValueError):
-        cudf.DataFrame(data, index=idx)
-    with pytest.raises(ValueError):
-        pd.DataFrame(data, index=idx)
-
-
 @pytest.mark.parametrize(
     "dtype", ["datetime64[ns]", "timedelta64[ns]", "int64", "float32"]
 )
@@ -7550,114 +5775,6 @@ def test_dataframe_reindex_with_index_names(index_data, name):
     assert_eq(actual, expected)
 
 
-@pytest.mark.parametrize("attr", ["nlargest", "nsmallest"])
-def test_dataframe_nlargest_nsmallest_str_error(attr):
-    gdf = cudf.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
-    pdf = gdf.to_pandas()
-
-    assert_exceptions_equal(
-        getattr(gdf, attr),
-        getattr(pdf, attr),
-        ([], {"n": 1, "columns": ["a", "b"]}),
-        ([], {"n": 1, "columns": ["a", "b"]}),
-    )
-
-
-def test_series_data_no_name_with_columns():
-    gdf = cudf.DataFrame(cudf.Series([1]), columns=[1])
-    pdf = pd.DataFrame(pd.Series([1]), columns=[1])
-    assert_eq(gdf, pdf)
-
-
-def test_series_data_no_name_with_columns_more_than_one_raises():
-    with pytest.raises(ValueError):
-        cudf.DataFrame(cudf.Series([1]), columns=[1, 2])
-    with pytest.raises(ValueError):
-        pd.DataFrame(pd.Series([1]), columns=[1, 2])
-
-
-def test_series_data_with_name_with_columns_matching():
-    gdf = cudf.DataFrame(cudf.Series([1], name=1), columns=[1])
-    pdf = pd.DataFrame(pd.Series([1], name=1), columns=[1])
-    assert_eq(gdf, pdf)
-
-
-def test_series_data_with_name_with_columns_not_matching():
-    gdf = cudf.DataFrame(cudf.Series([1], name=2), columns=[1])
-    pdf = pd.DataFrame(pd.Series([1], name=2), columns=[1])
-    assert_eq(gdf, pdf)
-
-
-def test_series_data_with_name_with_columns_matching_align():
-    gdf = cudf.DataFrame(cudf.Series([1], name=2), columns=[1, 2])
-    pdf = pd.DataFrame(pd.Series([1], name=2), columns=[1, 2])
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.parametrize("digits", [0, 1, 4])
-def test_dataframe_round_builtin(digits):
-    pdf = pd.DataFrame(
-        {
-            "a": [1.2234242333234, 323432.3243423, np.nan],
-            "b": ["a", "b", "c"],
-            "c": pd.Series([34224, 324324, 324342], dtype="datetime64[ns]"),
-            "d": pd.Series([224.242, None, 2424.234324], dtype="category"),
-            "e": [
-                decimal.Decimal("342.3243234234242"),
-                decimal.Decimal("89.32432497687622"),
-                None,
-            ],
-        }
-    )
-    gdf = cudf.from_pandas(pdf, nan_as_null=False)
-
-    expected = round(pdf, digits)
-    actual = round(gdf, digits)
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_init_from_nested_dict():
-    ordered_dict = OrderedDict(
-        [
-            ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
-            ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
-            ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
-        ]
-    )
-    pdf = pd.DataFrame(ordered_dict)
-    gdf = cudf.DataFrame(ordered_dict)
-
-    assert_eq(pdf, gdf)
-    regular_dict = {key: dict(value) for key, value in ordered_dict.items()}
-
-    pdf = pd.DataFrame(regular_dict)
-    gdf = cudf.DataFrame(regular_dict)
-    assert_eq(pdf, gdf)
-
-
-def test_init_from_2_categoricalindex_series_diff_categories():
-    s1 = cudf.Series(
-        [39, 6, 4], index=cudf.CategoricalIndex(["female", "male", "unknown"])
-    )
-    s2 = cudf.Series(
-        [2, 152, 2, 242, 150],
-        index=cudf.CategoricalIndex(["f", "female", "m", "male", "unknown"]),
-    )
-    result = cudf.DataFrame([s1, s2])
-    expected = pd.DataFrame([s1.to_pandas(), s2.to_pandas()])
-    # TODO: Remove once https://github.com/pandas-dev/pandas/issues/57592
-    # is adressed
-    expected.columns = result.columns
-    assert_eq(result, expected, check_dtype=False)
-
-
-def test_data_frame_values_no_cols_but_index():
-    result = cudf.DataFrame(index=range(5)).values
-    expected = pd.DataFrame(index=range(5)).values
-    assert_eq(result, expected)
-
-
 def test_dataframe_reduction_error():
     gdf = cudf.DataFrame(
         {
@@ -7676,11 +5793,6 @@ def test_dataframe_from_generator():
     assert_eq(pdf, gdf)
 
 
-def test_dataframe_from_ndarray_dup_columns():
-    with pytest.raises(ValueError):
-        cudf.DataFrame(np.eye(2), columns=["A", "A"])
-
-
 @pytest.mark.parametrize("name", ["a", 0, None, np.nan, cudf.NA])
 @pytest.mark.parametrize("contains", ["a", 0, None, np.nan, cudf.NA])
 @pytest.mark.parametrize("other_names", [[], ["b", "c"], [1, 2]])
@@ -7785,55 +5897,6 @@ def test_dataframe_duplicate_index_reindex():
     )
 
 
-def test_dataframe_columns_set_none_raises():
-    df = cudf.DataFrame({"a": [0]})
-    with pytest.raises(TypeError):
-        df.columns = None
-
-
-@pytest.mark.parametrize(
-    "columns",
-    [cudf.RangeIndex(1, name="foo"), pd.RangeIndex(1, name="foo"), range(1)],
-)
-def test_dataframe_columns_set_rangeindex(columns):
-    df = cudf.DataFrame([1], columns=["a"])
-    df.columns = columns
-    result = df.columns
-    expected = pd.RangeIndex(1, name=getattr(columns, "name", None))
-    pd.testing.assert_index_equal(result, expected, exact=True)
-
-
-@pytest.mark.parametrize("klass", [cudf.MultiIndex, pd.MultiIndex])
-def test_dataframe_columns_set_multiindex(klass):
-    columns = klass.from_arrays([[10]], names=["foo"])
-    df = cudf.DataFrame([1], columns=["a"])
-    df.columns = columns
-    result = df.columns
-    expected = pd.MultiIndex.from_arrays([[10]], names=["foo"])
-    pd.testing.assert_index_equal(result, expected, exact=True)
-
-
-@pytest.mark.parametrize(
-    "klass",
-    [
-        functools.partial(cudf.Index, name="foo"),
-        functools.partial(cudf.Series, name="foo"),
-        functools.partial(pd.Index, name="foo"),
-        functools.partial(pd.Series, name="foo"),
-        np.array,
-    ],
-)
-def test_dataframe_columns_set_preserve_type(klass):
-    df = cudf.DataFrame([1], columns=["a"])
-    columns = klass([10], dtype="int8")
-    df.columns = columns
-    result = df.columns
-    expected = pd.Index(
-        [10], dtype="int8", name=getattr(columns, "name", None)
-    )
-    pd.testing.assert_index_equal(result, expected)
-
-
 @pytest.mark.parametrize(
     "expected",
     [
@@ -7890,53 +5953,6 @@ def test_dataframe_to_pandas_arrow_type(scalar):
     pd.testing.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("axis", [None, 0, "index", 1, "columns"])
-@pytest.mark.parametrize("data", [[[1, 2], [2, 3]], [1, 2], [1]])
-def test_squeeze(axis, data):
-    df = cudf.DataFrame(data)
-    result = df.squeeze(axis=axis)
-    expected = df.to_pandas().squeeze(axis=axis)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("column", [range(1, 2), np.array([1], dtype=np.int8)])
-@pytest.mark.parametrize(
-    "operation",
-    [
-        lambda df: df.where(df < 2, 2),
-        lambda df: df.nans_to_nulls(),
-        lambda df: df.isna(),
-        lambda df: df.notna(),
-        lambda df: abs(df),
-        lambda df: -df,
-        lambda df: ~df,
-        lambda df: df.cumsum(),
-        lambda df: df.replace(1, 2),
-        lambda df: df.replace(10, 20),
-        lambda df: df.clip(0, 10),
-        lambda df: df.rolling(1).mean(),
-        lambda df: df.interpolate(),
-        lambda df: df.shift(),
-        lambda df: df.sort_values(1),
-        lambda df: df.round(),
-        lambda df: df.rank(),
-    ],
-)
-def test_op_preserves_column_metadata(column, operation):
-    df = cudf.DataFrame([1], columns=cudf.Index(column))
-    result = operation(df).columns
-    expected = pd.Index(column)
-    pd.testing.assert_index_equal(result, expected, exact=True)
-
-
-def test_dataframe_init_with_nans():
-    with cudf.option_context("mode.pandas_compatible", True):
-        gdf = cudf.DataFrame({"a": [1, 2, 3, np.nan]})
-    assert gdf["a"].dtype == np.dtype("float64")
-    pdf = pd.DataFrame({"a": [1, 2, 3, np.nan]})
-    assert_eq(pdf, gdf)
-
-
 @pytest.mark.parametrize("dtype1", ["int16", "float32"])
 @pytest.mark.parametrize("dtype2", ["int16", "float32"])
 def test_dataframe_loc_int_float(dtype1, dtype2):
@@ -7955,66 +5971,6 @@ def test_dataframe_loc_int_float(dtype1, dtype2):
     assert_eq(actual, expected, check_index_type=True, check_dtype=True)
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        cudf.DataFrame(range(2)),
-        None,
-        [cudf.Series(range(2))],
-        [[0], [1]],
-        {1: range(2)},
-        cupy.arange(2),
-    ],
-)
-def test_init_with_index_no_shallow_copy(data):
-    idx = cudf.RangeIndex(2)
-    df = cudf.DataFrame(data, index=idx)
-    assert df.index is idx
-
-
-def test_from_records_with_index_no_shallow_copy():
-    idx = cudf.RangeIndex(2)
-    data = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", "<f8"), ("y", "<i8")])
-    df = cudf.DataFrame(data.view(np.recarray), index=idx)
-    assert df.index is idx
-
-
-def test_bool_raises():
-    assert_exceptions_equal(
-        lfunc=bool,
-        rfunc=bool,
-        lfunc_args_and_kwargs=[[cudf.DataFrame()]],
-        rfunc_args_and_kwargs=[[pd.DataFrame()]],
-    )
-
-
-def test_from_pandas_preserve_column_dtype():
-    df = pd.DataFrame([[1, 2]], columns=pd.Index([1, 2], dtype="int8"))
-    result = cudf.DataFrame.from_pandas(df)
-    pd.testing.assert_index_equal(result.columns, df.columns, exact=True)
-
-
-def test_dataframe_init_column():
-    s = cudf.Series([1, 2, 3])
-    with pytest.raises(TypeError):
-        cudf.DataFrame(s._column)
-    expect = cudf.DataFrame({"a": s})
-    actual = cudf.DataFrame([1, 2, 3], columns=["a"])
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize("name", [None, "foo", 1, 1.0])
-def test_dataframe_column_name(name):
-    df = cudf.DataFrame({"a": [1, 2, 3]})
-    pdf = df.to_pandas()
-
-    df.columns.name = name
-    pdf.columns.name = name
-
-    assert_eq(df, pdf)
-    assert_eq(df.columns.name, pdf.columns.name)
-
-
 @pytest.mark.parametrize("names", [["abc", "def"], [1, 2], ["abc", 10]])
 def test_dataframe_multiindex_column_names(names):
     arrays = [["A", "A", "B", "B"], ["one", "two", "one", "two"]]
@@ -8039,13 +5995,6 @@ def test_roundtrip_dataframe_plc_table(na_data):
     assert_eq(expect, actual)
 
 
-@pytest.mark.parametrize("data", [None, {}])
-def test_empty_construction_rangeindex_columns(data):
-    result = cudf.DataFrame(data=data).columns
-    expected = pd.RangeIndex(0)
-    pd.testing.assert_index_equal(result, expected, exact=True)
-
-
 def test_dataframe_midx_columns_loc():
     idx_1 = ["Hi", "Lo"]
     idx_2 = ["I", "II", "III"]
@@ -8068,37 +6017,6 @@ def test_dataframe_midx_columns_loc():
     assert_eq(df, pdf)
 
 
-@pytest.mark.parametrize(
-    "shape",
-    [
-        (0, 3),
-        (3, 0),
-        (0, 0),
-    ],
-)
-def test_construct_zero_axis_ndarray(shape):
-    arr = np.empty(shape, dtype=np.float64)
-    result = cudf.DataFrame(arr)
-    expected = pd.DataFrame(arr)
-    assert_eq(result, expected)
-
-
-def test_construct_dict_scalar_values_raises():
-    data = {"a": 1, "b": "2"}
-    with pytest.raises(ValueError):
-        pd.DataFrame(data)
-    with pytest.raises(ValueError):
-        cudf.DataFrame(data)
-
-
-@pytest.mark.parametrize("columns", [None, [3, 4]])
-@pytest.mark.parametrize("index", [None, [1, 2]])
-def test_construct_empty_listlike_index_and_columns(columns, index):
-    result = cudf.DataFrame([], columns=columns, index=index)
-    expected = pd.DataFrame([], columns=columns, index=index)
-    assert_eq(result, expected)
-
-
 def test_rename_reset_label_dtype():
     data = {1: [2]}
     col_mapping = {1: "a"}

From ccd4e028e0eced9d2c18fa5eb2a0f9274efebe41 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 21 Aug 2025 20:35:23 -0400
Subject: [PATCH 194/366] Support `nan` in non-floating point column in
 cudf-polars (#19742)

Closes #19741

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19742
---
 ci/test_narwhals.sh                           |  4 +-
 .../cudf_polars/dsl/expressions/boolean.py    | 54 ++++++++++++-------
 .../tests/expressions/test_booleanfunction.py | 31 +++++------
 3 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index d30d0d92c05..c40e49184c0 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -39,8 +39,10 @@ PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 python -m pytest \
     --constructors=cudf
 
 # test_datetime[polars[lazy]]: Fixed in the next narwhals release >2.0.1
+# test_nan[polars[lazy]]: Passes as of https://github.com/rapidsai/cudf/pull/19742
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF_POLARS=" \
-test_datetime[polars[lazy]] \
+test_datetime[polars[lazy]] or \
+test_nan[polars[lazy]] \
 "
 
 rapids-logger "Run narwhals tests for cuDF Polars"
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
index ce8c4fc3276..dc16a18eaf3 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
@@ -153,13 +153,18 @@ def do_evaluate(
         ):
             # Avoid evaluating the child if the dtype tells us it's unnecessary.
             (child,) = self.children
+            needles = child.evaluate(df, context=context)
+            is_float = needles.obj.type().id() in (
+                plc.TypeId.FLOAT32,
+                plc.TypeId.FLOAT64,
+            )
             is_finite = self.name is BooleanFunction.Name.IsFinite
-            if child.dtype.id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64):
-                value = plc.Scalar.from_py(is_finite)
-                return Column(
-                    plc.Column.from_scalar(value, df.num_rows), dtype=self.dtype
+            if not is_float:
+                base = plc.Column.from_scalar(
+                    plc.Scalar.from_py(py_val=is_finite), needles.size
                 )
-            needles = child.evaluate(df, context=context)
+                out = base.with_mask(needles.obj.null_mask(), needles.null_count)
+                return Column(out, dtype=self.dtype)
             to_search = [-float("inf"), float("inf")]
             if is_finite:
                 # NaN is neither finite not infinite
@@ -171,7 +176,10 @@ def do_evaluate(
             result = plc.search.contains(haystack, needles.obj)
             if is_finite:
                 result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT)
-            return Column(result, dtype=self.dtype)
+            return Column(
+                result.with_mask(needles.obj.null_mask(), needles.null_count),
+                dtype=self.dtype,
+            )
         columns = [child.evaluate(df, context=context) for child in self.children]
         # Kleene logic for Any (OR) and All (AND) if ignore_nulls is
         # False
@@ -206,22 +214,28 @@ def do_evaluate(
         elif self.name is BooleanFunction.Name.IsNotNull:
             (column,) = columns
             return Column(plc.unary.is_valid(column.obj), dtype=self.dtype)
-        elif self.name is BooleanFunction.Name.IsNan:
-            (column,) = columns
-            return Column(
-                plc.unary.is_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.null_count
-                ),
-                dtype=self.dtype,
-            )
-        elif self.name is BooleanFunction.Name.IsNotNan:
+        elif self.name in (BooleanFunction.Name.IsNan, BooleanFunction.Name.IsNotNan):
             (column,) = columns
-            return Column(
-                plc.unary.is_not_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.null_count
-                ),
-                dtype=self.dtype,
+            is_float = column.obj.type().id() in (
+                plc.TypeId.FLOAT32,
+                plc.TypeId.FLOAT64,
             )
+            if is_float:
+                op = (
+                    plc.unary.is_nan
+                    if self.name is BooleanFunction.Name.IsNan
+                    else plc.unary.is_not_nan
+                )
+                base = op(column.obj)
+            else:
+                base = plc.Column.from_scalar(
+                    plc.Scalar.from_py(
+                        py_val=self.name is not BooleanFunction.Name.IsNan
+                    ),
+                    column.size,
+                )
+            out = base.with_mask(column.obj.null_mask(), column.null_count)
+            return Column(out, dtype=self.dtype)
         elif self.name is BooleanFunction.Name.IsFirstDistinct:
             (column,) = columns
             return self._distinct(
diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py
index 236a35935b8..6a6926b9d50 100644
--- a/python/cudf_polars/tests/expressions/test_booleanfunction.py
+++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py
@@ -12,7 +12,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
-from cudf_polars.utils.versions import POLARS_VERSION_LT_130, POLARS_VERSION_LT_132
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -91,7 +91,15 @@ def test_boolean_function_unary(
     assert_gpu_result_equal(q)
 
 
-def test_nan_in_non_floating_point_column():
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pytest.param(lambda e: e.is_nan(), id="is_nan"),
+        pytest.param(lambda e: e.is_not_nan(), id="is_not_nan"),
+        pytest.param(lambda e: e.is_finite(), id="is_finite"),
+    ],
+)
+def test_nan_in_non_floating_point_column(expr):
     ldf = pl.LazyFrame({"int": [-1, 1, None]}).with_columns(
         float=pl.col("int").cast(pl.Float64),
         float_na=pl.col("int") ** 0.5,
@@ -99,24 +107,13 @@ def test_nan_in_non_floating_point_column():
 
     q = ldf.select(
         [
-            pl.col("int").is_nan().alias("int"),
-            pl.col("float").is_nan().alias("float"),
-            pl.col("float_na").is_nan().alias("float_na"),
+            expr(pl.col("int")),
+            expr(pl.col("float")),
+            expr(pl.col("float_na")),
         ]
     )
 
-    if POLARS_VERSION_LT_130:
-        with pytest.raises(
-            pl.exceptions.ComputeError,
-            match="NAN is not supported in a Non-floating point type column",
-        ):
-            assert_gpu_result_equal(q)
-    else:
-        with pytest.raises(
-            RuntimeError,
-            match="NAN is not supported in a Non-floating point type column",
-        ):
-            assert_gpu_result_equal(q)
+    assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize(

From 21d062d691252c8f259ac3888f0b77422609c75d Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Fri, 22 Aug 2025 11:56:15 -0500
Subject: [PATCH 195/366] Avoid using multiple `Cache` nodes with the same hash
 (#19769)

Closes https://github.com/rapidsai/cudf/issues/19766

Adds simple caching logic to avoid using multiple `Cache` objects with the same hash in the translated logical plan.

(I'm very open to other ideas as well)

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19769
---
 .../cudf_polars/cudf_polars/dsl/translate.py  | 18 +++++++++++------
 python/cudf_polars/tests/test_cache.py        | 20 +++++++++++++++++++
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 2e0401d006c..6cadbcc4927 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -60,6 +60,7 @@ def __init__(self, visitor: NodeTraverser, engine: GPUEngine):
         self.visitor = visitor
         self.config_options = config.ConfigOptions.from_polars_engine(engine)
         self.errors: list[Exception] = []
+        self._cache_nodes: dict[int, ir.Cache] = {}
 
     def translate_ir(self, *, n: int | None = None) -> ir.IR:
         """
@@ -278,12 +279,17 @@ def _(node: pl_ir.Cache, translator: Translator, schema: Schema) -> ir.IR:
         refcount = node.cache_hits
     else:
         refcount = None
-    return ir.Cache(
-        schema,
-        node.id_,
-        refcount,
-        translator.translate_ir(n=node.input),
-    )
+
+    # Make sure Cache nodes with the same id_
+    # are actually the same object.
+    if node.id_ not in translator._cache_nodes:
+        translator._cache_nodes[node.id_] = ir.Cache(
+            schema,
+            node.id_,
+            refcount,
+            translator.translate_ir(n=node.input),
+        )
+    return translator._cache_nodes[node.id_]
 
 
 @_translate_ir.register
diff --git a/python/cudf_polars/tests/test_cache.py b/python/cudf_polars/tests/test_cache.py
index e242cd52cb2..afb067738a4 100644
--- a/python/cudf_polars/tests/test_cache.py
+++ b/python/cudf_polars/tests/test_cache.py
@@ -59,3 +59,23 @@ def __getitem__(self, key):
     qir.evaluate(cache=node_cache, timer=None)
     assert len(node_cache) == 0
     assert node_cache.hits == 3
+
+
+def test_union_cache_nodes():
+    df = pl.LazyFrame({"a": [7, 8], "b": [12, 13]})
+    q = pl.concat([df, df])
+    qir = Translator(q._ldf.visit(), pl.GPUEngine()).translate_ir()
+    # Logical plan:
+    # UNION ('x', 'y', 'z')
+    #   CACHE ('x', 'y', 'z')
+    #     PROJECTION ('x', 'y', 'z')
+    #       DATAFRAMESCAN ('x', 'y', 'z')
+    #   (repeated 2 times)
+
+    # Check that the concatenated Cache nodes are the same object
+    # See: https://github.com/rapidsai/cudf/issues/19766
+    assert isinstance(qir, ir.Union)
+    assert isinstance(qir.children[0], ir.Cache)
+    assert isinstance(qir.children[1], ir.Cache)
+    assert hash(qir.children[0]) == hash(qir.children[1])
+    assert hash(qir.children[0].children[0]) == hash(qir.children[1].children[0])

From 38fbfb245b1c8e63e023a9389d1a42a69512c1cf Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 22 Aug 2025 13:27:16 -0400
Subject: [PATCH 196/366] Add libcudf top_k_segmented APIs (#19597)

Adds the following libcudf APIs:
```
std::unique_ptr<column> top_k_segmented(
  column_view const& col,
  column_view const& segment_offsets,
  size_type k,
  order sort_order,
  rmm::cuda_stream_view stream,
  rmm::device_async_resource_ref mr);

std::unique_ptr<column> top_k_segmented_order(
  column_view const& col,
  column_view const& segment_offsets,
  size_type k,
  order sort_order,
  rmm::cuda_stream_view stream,
  rmm::device_async_resource_ref mr);
```

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19597
---
 cpp/CMakeLists.txt                      |   1 +
 cpp/benchmarks/CMakeLists.txt           |  13 +-
 cpp/benchmarks/sort/segmented_top_k.cpp |  65 +++++++++
 cpp/include/cudf/sorting.hpp            |  72 ++++++++++
 cpp/src/sort/segmented_top_k.cu         | 182 ++++++++++++++++++++++++
 cpp/src/sort/top_k.cu                   |  18 ++-
 cpp/tests/CMakeLists.txt                |   4 +-
 cpp/tests/sort/sort_test.cpp            |  29 ----
 cpp/tests/sort/top_k_tests.cpp          | 141 ++++++++++++++++++
 9 files changed, 486 insertions(+), 39 deletions(-)
 create mode 100644 cpp/benchmarks/sort/segmented_top_k.cpp
 create mode 100644 cpp/src/sort/segmented_top_k.cu
 create mode 100644 cpp/tests/sort/top_k_tests.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5b4c34ccd09..8ee45a34df7 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -697,6 +697,7 @@ add_library(
   src/sort/is_sorted.cu
   src/sort/rank.cu
   src/sort/segmented_sort.cu
+  src/sort/segmented_top_k.cu
   src/sort/sort.cu
   src/sort/sort_column.cu
   src/sort/sort_radix.cu
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index c16f0789795..4f07dd72ed4 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -210,8 +210,17 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp search/contains_table
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------
 ConfigureNVBench(
-  SORT_NVBENCH sort/rank.cpp sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp
-  sort/sort.cpp sort/sort_lists.cpp sort/sort_strings.cpp sort/sort_structs.cpp sort/top_k.cpp
+  SORT_NVBENCH
+  sort/rank.cpp
+  sort/rank_lists.cpp
+  sort/rank_structs.cpp
+  sort/segmented_top_k.cpp
+  sort/segmented_sort.cpp
+  sort/sort.cpp
+  sort/sort_lists.cpp
+  sort/sort_strings.cpp
+  sort/sort_structs.cpp
+  sort/top_k.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/sort/segmented_top_k.cpp b/cpp/benchmarks/sort/segmented_top_k.cpp
new file mode 100644
index 00000000000..6e6507d2f51
--- /dev/null
+++ b/cpp/benchmarks/sort/segmented_top_k.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/filling.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename DataType>
+void bench_segmented_top_k(nvbench::state& state, nvbench::type_list<DataType>)
+{
+  auto const ordered   = static_cast<bool>(state.get_int64("ordered"));
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const segment   = static_cast<cudf::size_type>(state.get_int64("segment"));
+  auto const k         = static_cast<cudf::size_type>(state.get_int64("k"));
+  auto const data_type = cudf::type_to_id<DataType>();
+
+  data_profile const profile = data_profile_builder().no_validity().distribution(
+    data_type, distribution_id::UNIFORM, 0, segment);
+  auto const input = create_random_column(data_type, row_count{num_rows}, profile);
+
+  auto const segments = cudf::sequence((num_rows / segment) + 1,
+                                       cudf::numeric_scalar<int32_t>(0),
+                                       cudf::numeric_scalar<int32_t>(segment));
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.add_global_memory_reads<nvbench::int32_t>(num_rows);
+  state.add_global_memory_writes<nvbench::int32_t>(segments->size() * k);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    if (ordered) {
+      cudf::segmented_top_k_order(input->view(), segments->view(), k);
+    } else {
+      cudf::segmented_top_k(input->view(), segments->view(), k);
+    }
+  });
+}
+
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_s, "time_s", "time_s");
+
+using Types = nvbench::type_list<int32_t, float, cudf::timestamp_s>;
+
+NVBENCH_BENCH_TYPES(bench_segmented_top_k, NVBENCH_TYPE_AXES(Types))
+  .set_name("segmented_top_k")
+  .add_int64_axis("num_rows", {262144, 2097152, 16777216, 67108864})
+  .add_int64_axis("segment", {1024, 2048})
+  .add_int64_axis("k", {100, 1000})
+  .add_int64_axis("ordered", {0, 1});
diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index 2ef5f66aa42..532cec53e8f 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -390,5 +390,77 @@ std::unique_ptr<column> top_k_order(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Computes the top k values within each segment of a column
+ *
+ * Returns the top k values (largest or smallest) within each segment of the given column.
+ * The values within each segment may not necessarily be sorted.
+ * If a segment contain less than k elements then all values for that segment are returned.
+ *
+ * @code{.pseudo}
+ * Example:
+ * col = [ 3, 4, 5, 4, 1, 2, 3, 5, 6, 7, 8, 9, 10 ]
+ * offsets = [0, 3, 7, 13]
+ * result = cudf::segmented_top_k(col, offsets, 3);
+ * result is [[5,4,3], [4,3,2], [10,8,9]] // each segment may not be sorted
+ * @endcode
+ *
+ * @throw std::invalid_argument if k less than or equal to zero
+ * @throw cudf::data_type_error if segment_offsets is not size_type
+ * @throw std::invalid_argument segments_offsets is empty or contains nulls
+ *
+ * @param col Column to compute top k
+ * @param segment_offsets Start offset index for each contiguous segment
+ * @param k Number of values to return for each segment
+ * @param sort_order DESCENDING is the largest k values (default).
+ *                   ASCENDING is the smallest k values.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return A column with the top k values of the input column.
+ */
+std::unique_ptr<column> segmented_top_k(
+  column_view const& col,
+  column_view const& segment_offsets,
+  size_type k,
+  order sort_order                  = order::DESCENDING,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
+/**
+ * @brief Computes the indices of the top k values within each segment of a column
+ *
+ * The indices will represent the top k elements within each segment but may not represent
+ * those elements as k sorted values.
+ * If a segment contain less than k elements then all values for that segment are returned.
+ *
+ * @code{.pseudo}
+ * Example:
+ * col = [ 3, 4, 5, 4, 1, 2, 3, 5, 6, 7, 8, 9, 10 ]
+ * offsets = [0, 3, 7, 13]
+ * result = cudf::segmented_top_k_order(col, offsets, 3);
+ * result is [[2,1,0], [3,6,5], [12,10,11]] // each segment may not be sorted
+ * @endcode
+ *
+ * @throw std::invalid_argument if k less than or equal to zero
+ * @throw cudf::data_type_error if segment_offsets is not size_type
+ * @throw std::invalid_argument segments_offsets is empty or contains nulls
+ *
+ * @param col Column to compute top k
+ * @param segment_offsets Start offset index for each contiguous segment
+ * @param k Number of values to return for each segment
+ * @param sort_order DESCENDING is the indices of the largest k values (default).
+ *                   ASCENDING is the indices of the smallest k values.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return Indices of the top k values of the input column
+ */
+std::unique_ptr<column> segmented_top_k_order(
+  column_view const& col,
+  column_view const& segment_offsets,
+  size_type k,
+  order sort_order                  = order::DESCENDING,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /** @} */  // end of group
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/sort/segmented_top_k.cu b/cpp/src/sort/segmented_top_k.cu
new file mode 100644
index 00000000000..ef27f2cc9f9
--- /dev/null
+++ b/cpp/src/sort/segmented_top_k.cu
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sort_column_impl.cuh"
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/gather.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/sizes_to_offsets_iterator.cuh>
+#include <cudf/detail/sorting.hpp>
+#include <cudf/detail/utilities/grid_1d.cuh>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <thrust/binary_search.h>
+#include <thrust/execution_policy.h>
+#include <thrust/remove.h>
+
+namespace cudf {
+namespace detail {
+namespace {
+
+/**
+ * @brief Resolves the k indices per segment
+ *
+ * Marks values outside the k range to -1 to be removed in a separate step.
+ * Also computes the total number of valid indices for each segment.
+ * All elements are used in a segment if it has less than k total elements.
+ *
+ * @param d_offsets Offsets for each segment
+ * @param k Number of values to keep in each segment
+ * @param d_indices Mark these indices to be removed
+ * @param d_segment_sizes Store actual sizes of each segment
+ */
+CUDF_KERNEL void resolve_segment_indices(device_span<size_type const> d_offsets,
+                                         size_type k,
+                                         device_span<size_type> d_indices,
+                                         size_type* d_segment_sizes)
+{
+  auto const tid = cudf::detail::grid_1d::global_thread_id();
+  if (tid >= d_indices.size()) { return; }
+
+  auto const sitr = thrust::upper_bound(thrust::seq, d_offsets.begin(), d_offsets.end(), tid);
+  auto const segment_start = *(sitr - 1);
+  auto const segment_end   = *sitr;
+  auto const index         = tid - segment_start;
+  if (index >= k) { d_indices[tid] = -1; }  // mark values outside of top k
+
+  if (index == 0) {
+    auto const segment_size  = segment_end - segment_start;
+    auto const segment_index = thrust::distance(d_offsets.begin(), sitr) - 1;
+    // segment is k or less elements
+    d_segment_sizes[segment_index] = cuda::std::min(k, segment_size);
+  }
+}
+}  // namespace
+
+std::unique_ptr<column> segmented_top_k_order(column_view const& col,
+                                              column_view const& segment_offsets,
+                                              size_type k,
+                                              order sort_order,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(k >= 0, "k must be greater than or equal to 0", std::invalid_argument);
+
+  auto const size_data_type = data_type{type_to_id<size_type>()};
+  if (k == 0 || col.is_empty()) {
+    return cudf::make_empty_lists_column(size_data_type, stream, mr);
+  }
+
+  CUDF_EXPECTS(segment_offsets.size() > 0,
+               "segment_offsets must have at least one element",
+               std::invalid_argument);
+
+  CUDF_EXPECTS(segment_offsets.type() == size_data_type,
+               "segment_offsets must be of type INT32",
+               cudf::data_type_error);
+  CUDF_EXPECTS(segment_offsets.null_count() == 0,
+               "segment_offsets must not have nulls",
+               std::invalid_argument);
+
+  auto const nulls   = sort_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
+  auto const temp_mr = cudf::get_current_device_resource_ref();
+  auto const indices = cudf::detail::segmented_sorted_order(
+    cudf::table_view({col}), segment_offsets, {sort_order}, {nulls}, stream, temp_mr);
+  auto const d_indices = indices->mutable_view().begin<size_type>();
+
+  auto segment_sizes = rmm::device_uvector<size_type>(segment_offsets.size() - 1, stream);
+  auto span_indices  = device_span<size_type>{d_indices, static_cast<std::size_t>(indices->size())};
+  auto const grid    = cudf::detail::grid_1d(indices->size(), 256);
+  resolve_segment_indices<<<grid.num_blocks, grid.num_threads_per_block, 0, stream>>>(
+    segment_offsets, k, span_indices, segment_sizes.data());
+  auto [offsets, total_elements] =
+    cudf::detail::make_offsets_child_column(segment_sizes.begin(), segment_sizes.end(), stream, mr);
+
+  auto result = cudf::make_fixed_width_column(
+    size_data_type, total_elements, mask_state::UNALLOCATED, stream, mr);
+  auto d_result = result->mutable_view().begin<size_type>();
+  // remove the indices marked by resolve_segment_indices
+  thrust::remove_copy(
+    rmm::exec_policy_nosync(stream), d_indices, d_indices + indices->size(), d_result, -1);
+
+  auto const num_rows = static_cast<size_type>(offsets->size() - 1);
+  return make_lists_column(
+    num_rows, std::move(offsets), std::move(result), 0, rmm::device_buffer{}, stream, mr);
+}
+
+std::unique_ptr<column> segmented_top_k(column_view const& col,
+                                        column_view const& segment_offsets,
+                                        size_type k,
+                                        order sort_order,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::device_async_resource_ref mr)
+{
+  if (col.is_empty()) { return cudf::make_empty_column(col.type()); }
+
+  auto ordered =
+    cudf::detail::segmented_top_k_order(col, segment_offsets, k, sort_order, stream, mr);
+  auto lv = cudf::lists_column_view(ordered->view());
+  if (lv.is_empty()) { return cudf::make_empty_lists_column(col.type(), stream, mr); }
+
+  auto result         = cudf::detail::gather(cudf::table_view({col}),
+                                     lv.child(),
+                                     out_of_bounds_policy::DONT_CHECK,
+                                     negative_index_policy::NOT_ALLOWED,
+                                     stream,
+                                     mr);
+  auto offsets        = std::move(ordered->release().children.front());
+  auto const num_rows = static_cast<size_type>(offsets->size() - 1);
+  return make_lists_column(num_rows,
+                           std::move(offsets),
+                           std::move(result->release().front()),
+                           0,
+                           rmm::device_buffer{},
+                           stream,
+                           mr);
+}
+
+}  // namespace detail
+
+std::unique_ptr<column> segmented_top_k(column_view const& col,
+                                        column_view const& segment_offsets,
+                                        size_type k,
+                                        order sort_order,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::segmented_top_k(col, segment_offsets, k, sort_order, stream, mr);
+}
+
+std::unique_ptr<column> segmented_top_k_order(column_view const& col,
+                                              column_view const& segment_offsets,
+                                              size_type k,
+                                              order sort_order,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::segmented_top_k_order(col, segment_offsets, k, sort_order, stream, mr);
+}
+}  // namespace cudf
diff --git a/cpp/src/sort/top_k.cu b/cpp/src/sort/top_k.cu
index 62d7b5f6842..3263144d000 100644
--- a/cpp/src/sort/top_k.cu
+++ b/cpp/src/sort/top_k.cu
@@ -17,10 +17,13 @@
 #include "sort_column_impl.cuh"
 
 #include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/sequence.hpp>
 #include <cudf/detail/sorting.hpp>
+#include <cudf/scalar/scalar.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -37,9 +40,9 @@ std::unique_ptr<column> top_k(column_view const& col,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr)
 {
-  CUDF_EXPECTS(k <= col.size(),
-               "k must be less than or equal to the number of rows in the column",
-               std::invalid_argument);
+  CUDF_EXPECTS(k >= 0, "k must be non-negative", std::invalid_argument);
+  if (k == 0 || col.is_empty()) { return empty_like(col); }
+  if (k >= col.size()) { return std::make_unique<column>(col, stream, mr); }
 
   // code will be specialized for fixed-width types once CUB topk function is available
   auto const nulls     = sort_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
@@ -60,9 +63,12 @@ std::unique_ptr<column> top_k_order(column_view const& col,
                                     rmm::cuda_stream_view stream,
                                     rmm::device_async_resource_ref mr)
 {
-  CUDF_EXPECTS(k <= col.size(),
-               "k must be less than or equal to the number of rows in the column",
-               std::invalid_argument);
+  CUDF_EXPECTS(k >= 0, "k must be non-negative", std::invalid_argument);
+  if (k == 0 || col.is_empty()) { return make_empty_column(cudf::type_to_id<size_type>()); }
+  if (k >= col.size()) {
+    return cudf::detail::sequence(
+      col.size(), numeric_scalar<size_type>(0, true, stream), stream, mr);
+  }
 
   auto const nulls   = sort_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
   auto const indices = sorted_order<sort_method::STABLE>(col, sort_order, nulls, stream, mr);
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 65e3a608660..63b41e55f51 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -365,8 +365,8 @@ ConfigureTest(TYPE_INFERENCE_TEST io/type_inference_test.cu)
 # ##################################################################################################
 # * sort tests ------------------------------------------------------------------------------------
 ConfigureTest(
-  SORT_TEST sort/segmented_sort_tests.cpp sort/sort_nested_types_tests.cpp sort/sort_test.cpp
-  sort/stable_sort_tests.cpp sort/rank_test.cpp
+  SORT_TEST sort/rank_test.cpp sort/segmented_sort_tests.cpp sort/sort_nested_types_tests.cpp
+  sort/sort_test.cpp sort/stable_sort_tests.cpp sort/top_k_tests.cpp
   GPUS 1
   PERCENT 70
 )
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index 05a632c4f2b..9473e181c05 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -1113,33 +1113,4 @@ TEST_F(SortDouble, InfinityAndNan)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
-TYPED_TEST(Sort, TopK)
-{
-  using T = TypeParam;
-  if constexpr (std::is_same_v<T, bool>) { GTEST_SKIP(); }
-
-  auto itr   = thrust::counting_iterator<int32_t>(0);
-  auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
-    itr, itr + 100, cudf::test::iterators::null_at(4));
-  auto expected =
-    cudf::test::fixed_width_column_wrapper<T, int32_t>({99, 98, 97, 96, 95, 94, 93, 92, 91, 90});
-  auto result = cudf::top_k(input, 10);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
-  auto expected_order = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
-    {99, 98, 97, 96, 95, 94, 93, 92, 91, 90});
-  result = cudf::top_k_order(input, 10);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_order, result->view());
-
-  result   = cudf::top_k(input, 10, cudf::order::ASCENDING);
-  expected = cudf::test::fixed_width_column_wrapper<T, int32_t>({0, 1, 2, 3, 5, 6, 7, 8, 9, 10});
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
-  expected_order =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>({0, 1, 2, 3, 5, 6, 7, 8, 9, 10});
-  result = cudf::top_k_order(input, 10, cudf::order::ASCENDING);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_order, result->view());
-
-  EXPECT_THROW(cudf::top_k(input, 101), std::invalid_argument);
-  EXPECT_THROW(cudf::top_k_order(input, 101), std::invalid_argument);
-}
-
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sort/top_k_tests.cpp b/cpp/tests/sort/top_k_tests.cpp
new file mode 100644
index 00000000000..2f087612833
--- /dev/null
+++ b/cpp/tests/sort/top_k_tests.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/copying.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/types.hpp>
+
+#include <type_traits>
+#include <vector>
+
+using TestTypes = cudf::test::
+  Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes, cudf::test::ChronoTypes>;
+
+template <typename T>
+struct TopKTypes : public cudf::test::BaseFixture {};
+
+TYPED_TEST_SUITE(TopKTypes, TestTypes);
+
+TYPED_TEST(TopKTypes, TopK)
+{
+  using T = TypeParam;
+
+  auto itr   = thrust::counting_iterator<int32_t>(0);
+  auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
+    itr, itr + 100, cudf::test::iterators::null_at(4));
+  auto expected =
+    cudf::test::fixed_width_column_wrapper<T, int32_t>({99, 98, 97, 96, 95, 94, 93, 92, 91, 90});
+  auto result = cudf::top_k(input, 10);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
+  auto expected_order = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+    {99, 98, 97, 96, 95, 94, 93, 92, 91, 90});
+  result = cudf::top_k_order(input, 10);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_order, result->view());
+
+  result   = cudf::top_k(input, 10, cudf::order::ASCENDING);
+  expected = cudf::test::fixed_width_column_wrapper<T, int32_t>({0, 1, 2, 3, 5, 6, 7, 8, 9, 10});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
+  expected_order =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>({0, 1, 2, 3, 5, 6, 7, 8, 9, 10});
+  result = cudf::top_k_order(input, 10, cudf::order::ASCENDING);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_order, result->view());
+}
+
+TYPED_TEST(TopKTypes, TopKSegmented)
+{
+  using T    = TypeParam;
+  using LCW  = cudf::test::lists_column_wrapper<T, int32_t>;
+  using LCWO = cudf::test::lists_column_wrapper<cudf::size_type>;
+
+  auto itr   = thrust::counting_iterator<int32_t>(0);
+  auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
+    itr, itr + 100, cudf::test::iterators::null_at(4));
+  auto offsets =
+    cudf::test::fixed_width_column_wrapper<int32_t>({0, 15, 20, 23, 40, 42, 60, 70, 80, 90, 100});
+  {
+    // clang-format off
+    LCW expected({
+      {14, 13, 12}, {19, 18, 17}, {22, 21, 20}, {39, 38, 37}, {41, 40},
+      {59, 58, 57}, {69, 68, 67}, {79, 78, 77}, {89, 88, 87}, {99, 98, 97}});
+    LCWO expected_order({
+      {14, 13, 12}, {19, 18, 17}, {22, 21, 20}, {39, 38, 37}, {41, 40},
+      {59, 58, 57}, {69, 68, 67}, {79, 78, 77}, {89, 88, 87}, {99, 98, 97}});
+    // clang-format on
+    auto result = cudf::segmented_top_k(input, offsets, 3);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
+    result = cudf::segmented_top_k_order(input, offsets, 3);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_order, result->view());
+  }
+
+  {
+    // clang-format off
+    LCW expected({
+      {0,  1,  2},  {15, 16, 17}, {20, 21, 22}, {23, 24, 25}, {40, 41},
+      {42, 43, 44}, {60, 61, 62}, {70, 71, 72}, {80, 81, 82}, {90, 91, 92}});
+     LCWO expected_order({
+      {0,  1,  2},  {15, 16, 17}, {20, 21, 22}, {23, 24, 25}, {40, 41},
+      {42, 43, 44}, {60, 61, 62}, {70, 71, 72}, {80, 81, 82}, {90, 91, 92}});
+    // clang-format on
+    auto result = cudf::segmented_top_k(input, offsets, 3, cudf::order::ASCENDING);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
+    result = cudf::segmented_top_k_order(input, offsets, 3, cudf::order::ASCENDING);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_order, result->view());
+  }
+}
+
+struct TopK : public cudf::test::BaseFixture {};
+
+TEST_F(TopK, Empty)
+{
+  auto input = cudf::test::fixed_width_column_wrapper<int32_t>({0, 1, 2, 3});
+
+  auto result = cudf::top_k(input, 0);
+  EXPECT_EQ(result->size(), 0);
+  result = cudf::top_k_order(input, 0);
+  EXPECT_EQ(result->size(), 0);
+  result = cudf::segmented_top_k(input, input, 0);
+  EXPECT_EQ(result->size(), 0);
+  result = cudf::segmented_top_k_order(input, input, 0);
+  EXPECT_EQ(result->size(), 0);
+}
+
+TEST_F(TopK, Errors)
+{
+  auto itr   = thrust::counting_iterator<int64_t>(0);
+  auto input = cudf::test::fixed_width_column_wrapper<int64_t>(itr, itr + 100);
+
+  EXPECT_THROW(cudf::top_k(input, -1), std::invalid_argument);
+  EXPECT_THROW(cudf::top_k_order(input, -1), std::invalid_argument);
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<int32_t>({0, 15, 20, 23, 40, 42});
+  EXPECT_THROW(cudf::segmented_top_k(input, offsets, -1), std::invalid_argument);
+  EXPECT_THROW(cudf::segmented_top_k_order(input, offsets, -1), std::invalid_argument);
+  offsets = cudf::test::fixed_width_column_wrapper<int32_t>({});
+  EXPECT_THROW(cudf::segmented_top_k(input, offsets, 10), std::invalid_argument);
+  EXPECT_THROW(cudf::segmented_top_k_order(input, offsets, 10), std::invalid_argument);
+  offsets = cudf::test::fixed_width_column_wrapper<int32_t>({0, 15}, {1, 0});
+  EXPECT_THROW(cudf::segmented_top_k(input, offsets, 10), std::invalid_argument);
+  EXPECT_THROW(cudf::segmented_top_k_order(input, offsets, 10), std::invalid_argument);
+
+  EXPECT_THROW(cudf::segmented_top_k(input, input, 10), cudf::data_type_error);
+  EXPECT_THROW(cudf::segmented_top_k_order(input, input, 10), cudf::data_type_error);
+}

From b761f86c7deb96e1cad656e7a63abc64ae88620b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Fri, 22 Aug 2025 12:30:59 -0500
Subject: [PATCH 197/366] Optionally capture Shuffle Stats in cudf-polars pdsh
 benchmarks (#19762)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This updates the PDSH benchmarks to gather and persist the shuffle stats from rapidsmpf after each query iteration. For example:

```
❯ python python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py \
    --path /datasets/toaugspurger/tpch/scale-100/ --no-print-results  \
    --executor streaming --scheduler distributed --n-workers=8 --protocol=ucx  \
    --shuffle=rapidsmpf --rapidsmpf-dask-statistics --no-rapidsmpf-print-statistics \
    --iterations=1 --rmm-async 3
```

Will write the shuffle stats to the `pdsh_results.jsonl` file:

```
❯ tail -n 1 pdsh_results.jsonl | jq '.records."3"'
[
  {
    "query": 3,
    "duration": 5.394909042050131,
    "shuffle_stats": {
      "event-loop-check-future-finish": {
        "count": 561426,
        "value": 0.07810913599999943
      },
      ...
    }
  }
]
```

Additionally, we use the new configuration option from https://github.com/rapidsai/rapidsmpf/pull/448 to control whether statistics are printed to stdout.

Blocked by https://github.com/rapidsai/rapidsmpf/pull/452, which add the ability to clear statistics (which we do between each query iteration).

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19762
---
 .../experimental/benchmarks/utils.py          | 31 +++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 8074488024c..53b492e2e82 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -52,6 +52,7 @@ class Record:
 
     query: int
     duration: float
+    shuffle_stats: dict[str, dict[str, int | float]] | None = None
 
 
 @dataclasses.dataclass
@@ -187,7 +188,8 @@ class RunConfig:
     records: dict[int, list[Record]] = dataclasses.field(default_factory=dict)
     dataset_path: Path
     scale_factor: int | float
-    shuffle: str | None = None
+    shuffle: Literal["rapidsmpf", "tasks"] | None = None
+    gather_shuffle_stats: bool = False
     broadcast_join_limit: int | None = None
     blocksize: int | None = None
     max_rows_per_partition: int | None = None
@@ -203,6 +205,12 @@ class RunConfig:
     spill_device: float
     query_set: str
 
+    def __post_init__(self) -> None:  # noqa: D105
+        if self.gather_shuffle_stats and self.shuffle != "rapidsmpf":
+            raise ValueError(
+                "gather_shuffle_stats is only supported when shuffle='rapidsmpf'."
+            )
+
     @classmethod
     def from_args(cls, args: argparse.Namespace) -> RunConfig:
         """Create a RunConfig from command line arguments."""
@@ -251,6 +259,7 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig:
             scheduler=scheduler,
             n_workers=args.n_workers,
             shuffle=args.shuffle,
+            gather_shuffle_stats=args.rapidsmpf_dask_statistics,
             broadcast_join_limit=args.broadcast_join_limit,
             dataset_path=path,
             scale_factor=scale_factor,
@@ -414,6 +423,7 @@ def initialize_dask_cluster(run_config: RunConfig, args: argparse.Namespace):  #
                     {
                         "dask_spill_device": str(run_config.spill_device),
                         "dask_statistics": str(args.rapidsmpf_dask_statistics),
+                        "dask_print_statistics": str(args.rapidsmpf_print_statistics),
                         "oom_protection": str(args.rapidsmpf_oom_protection),
                     }
                 ),
@@ -630,6 +640,12 @@ def parse_args(
         "--rapidsmpf-dask-statistics",
         action=argparse.BooleanOptionalAction,
         default=False,
+        help="Collect rapidsmpf shuffle statistics. The output will be stored in the 'shuffle_stats' field of each record.",
+    )
+    parser.add_argument(
+        "--rapidsmpf-print-statistics",
+        action=argparse.BooleanOptionalAction,
+        default=False,
         help="Print rapidsmpf shuffle statistics on each Dask worker upon completion.",
     )
     parser.add_argument(
@@ -729,6 +745,17 @@ def run_polars(
 
             result = execute_query(q_id, i, q, run_config, args, engine)
 
+            if run_config.shuffle == "rapidsmpf" and run_config.gather_shuffle_stats:
+                from rapidsmpf.integrations.dask.shuffler import (
+                    clear_shuffle_statistics,
+                    gather_shuffle_statistics,
+                )
+
+                shuffle_stats = gather_shuffle_statistics(client)  # type: ignore[arg-type]
+                clear_shuffle_statistics(client)  # type: ignore[arg-type]
+            else:
+                shuffle_stats = None
+
             if args.validate and run_config.executor != "cpu":
                 try:
                     assert_gpu_result_equal(
@@ -743,7 +770,7 @@ def run_polars(
                     print(f"❌ Query {q_id} failed validation!\n{e}")
 
             t1 = time.monotonic()
-            record = Record(query=q_id, duration=t1 - t0)
+            record = Record(query=q_id, duration=t1 - t0, shuffle_stats=shuffle_stats)
             if args.print_results:
                 print(result)
 

From 4760b4bcdab36cc8cdb7292b65e3580e00d7a48d Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 22 Aug 2025 11:06:44 -0700
Subject: [PATCH 198/366] Expand compression codec coverage in ORC and Parquet
 benchmarks (#19760)

Added ZSTD and ZLIB to the list of covered compression codecs in reader and writer benchmarks, since their support in nvCOMP is stable now.

Also converted IO and compression type template parameters into string axis to match Parquet reader benchmarks and allow changing the used values at runtime.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19760
---
 cpp/benchmarks/io/orc/orc_reader_input.cpp    | 68 ++++++++-----------
 cpp/benchmarks/io/orc/orc_writer.cpp          | 35 ++++------
 .../io/parquet/parquet_reader_input.cpp       |  2 +-
 cpp/benchmarks/io/parquet/parquet_writer.cpp  | 35 ++++------
 4 files changed, 59 insertions(+), 81 deletions(-)

diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index cafd3cc5c39..6a6b48bebac 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -87,14 +87,14 @@ void orc_read_common(cudf::size_type num_rows_to_read,
 
 }  // namespace
 
-template <data_type DataType, io_type IOType>
-void BM_orc_read_data(nvbench::state& state,
-                      nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
+template <data_type DataType>
+void BM_orc_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
-  cuio_source_sink_pair source_sink(IOType);
+  auto const source_type            = retrieve_io_type_enum(state.get_string("io_type"));
+  cuio_source_sink_pair source_sink(source_type);
 
   auto const num_rows_written = [&]() {
     auto const tbl = create_random_table(
@@ -112,16 +112,18 @@ void BM_orc_read_data(nvbench::state& state,
   orc_read_common<false>(num_rows_written, source_sink, state);
 }
 
-template <io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
+template <bool chunked_read>
 void orc_read_io_compression(nvbench::state& state)
 {
-  auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
-                                         static_cast<int32_t>(data_type::FLOAT),
-                                         static_cast<int32_t>(data_type::DECIMAL),
-                                         static_cast<int32_t>(data_type::TIMESTAMP),
-                                         static_cast<int32_t>(data_type::STRING),
-                                         static_cast<int32_t>(data_type::LIST),
-                                         static_cast<int32_t>(data_type::STRUCT)});
+  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));
+  auto const d_type      = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
+                                              static_cast<int32_t>(data_type::FLOAT),
+                                              static_cast<int32_t>(data_type::DECIMAL),
+                                              static_cast<int32_t>(data_type::TIMESTAMP),
+                                              static_cast<int32_t>(data_type::STRING),
+                                              static_cast<int32_t>(data_type::LIST),
+                                              static_cast<int32_t>(data_type::STRUCT)});
 
   auto const [cardinality, run_length] = [&]() -> std::pair<cudf::size_type, cudf::size_type> {
     if constexpr (chunked_read) {
@@ -131,7 +133,7 @@ void orc_read_io_compression(nvbench::state& state)
               static_cast<cudf::size_type>(state.get_int64("run_length"))};
     }
   }();
-  cuio_source_sink_pair source_sink(IOType);
+  cuio_source_sink_pair source_sink(source_type);
 
   auto const num_rows_written = [&]() {
     auto const tbl = create_random_table(
@@ -142,7 +144,7 @@ void orc_read_io_compression(nvbench::state& state)
 
     cudf::io::orc_writer_options opts =
       cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
-        .compression(Compression);
+        .compression(compression);
     cudf::io::write_orc(opts);
     return view.num_rows();
   }();
@@ -150,20 +152,14 @@ void orc_read_io_compression(nvbench::state& state)
   orc_read_common<chunked_read>(num_rows_written, source_sink, state);
 }
 
-template <io_type IOType, cudf::io::compression_type Compression>
-void BM_orc_read_io_compression(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
+void BM_orc_read_io_compression(nvbench::state& state)
 {
-  return orc_read_io_compression<IOType, Compression, false>(state);
+  return orc_read_io_compression<false>(state);
 }
 
-template <cudf::io::compression_type Compression>
-void BM_orc_chunked_read_io_compression(nvbench::state& state,
-                                        nvbench::type_list<nvbench::enum_type<Compression>>)
+void BM_orc_chunked_read_io_compression(nvbench::state& state)
 {
-  // Only run benchmark using HOST_BUFFER IO.
-  return orc_read_io_compression<io_type::HOST_BUFFER, Compression, true>(state);
+  return orc_read_io_compression<true>(state);
 }
 
 using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
@@ -174,31 +170,27 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list =
-  nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;
-
-using compression_list =
-  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
-
-NVBENCH_BENCH_TYPES(BM_orc_read_data,
-                    NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
+NVBENCH_BENCH_TYPES(BM_orc_read_data, NVBENCH_TYPE_AXES(d_type_list))
   .set_name("orc_read_decode")
-  .set_type_axes_names({"data_type", "io"})
+  .set_type_axes_names({"data_type"})
+  .add_string_axis("io_type", {"DEVICE_BUFFER"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
-NVBENCH_BENCH_TYPES(BM_orc_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list))
+NVBENCH_BENCH(BM_orc_read_io_compression)
   .set_name("orc_read_io_compression")
-  .set_type_axes_names({"io", "compression"})
+  .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "DEVICE_BUFFER"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "ZLIB", "NONE"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
 // Should have the same parameters as `BM_orc_read_io_compression` for comparison.
-NVBENCH_BENCH_TYPES(BM_orc_chunked_read_io_compression, NVBENCH_TYPE_AXES(compression_list))
+NVBENCH_BENCH(BM_orc_chunked_read_io_compression)
   .set_name("orc_chunked_read_io_compression")
-  .set_type_axes_names({"compression"})
+  .add_string_axis("io_type", {"DEVICE_BUFFER"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "ZLIB", "NONE"})
   .set_min_samples(4)
   // The input has approximately 520MB and 127K rows.
   // The limits below are given in MBs.
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index b795f3e3164..2021ed9e48d 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -82,10 +82,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum
   state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-template <io_type IO, cudf::io::compression_type Compression>
-void BM_orc_write_io_compression(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+void BM_orc_write_io_compression(nvbench::state& state)
 {
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
@@ -97,8 +94,8 @@ void BM_orc_write_io_compression(
 
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
-  auto const compression            = Compression;
-  auto const sink_type              = IO;
+  auto const sink_type              = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -131,10 +128,9 @@ void BM_orc_write_io_compression(
   state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-template <cudf::io::statistics_freq Statistics, cudf::io::compression_type Compression>
-void BM_orc_write_statistics(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<Statistics>, nvbench::enum_type<Compression>>)
+template <cudf::io::statistics_freq Statistics>
+void BM_orc_write_statistics(nvbench::state& state,
+                             nvbench::type_list<nvbench::enum_type<Statistics>>)
 {
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
                                          static_cast<int32_t>(data_type::FLOAT),
@@ -143,7 +139,7 @@ void BM_orc_write_statistics(
                                          static_cast<int32_t>(data_type::STRING),
                                          static_cast<int32_t>(data_type::LIST)});
 
-  auto const compression = Compression;
+  auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));
   auto const stats_freq  = Statistics;
 
   auto const tbl  = create_random_table(d_type, table_size_bytes{data_size});
@@ -183,11 +179,6 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;
-
-using compression_list =
-  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
-
 using stats_list = nvbench::enum_type_list<cudf::io::STATISTICS_NONE,
                                            cudf::io::ORC_STATISTICS_STRIPE,
                                            cudf::io::ORC_STATISTICS_ROW_GROUP>;
@@ -199,14 +190,16 @@ NVBENCH_BENCH_TYPES(BM_orc_write_encode, NVBENCH_TYPE_AXES(d_type_list))
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
-NVBENCH_BENCH_TYPES(BM_orc_write_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list))
+NVBENCH_BENCH(BM_orc_write_io_compression)
   .set_name("orc_write_io_compression")
-  .set_type_axes_names({"io", "compression"})
+  .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "VOID"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "ZLIB", "NONE"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
-NVBENCH_BENCH_TYPES(BM_orc_write_statistics, NVBENCH_TYPE_AXES(stats_list, compression_list))
+NVBENCH_BENCH_TYPES(BM_orc_write_statistics, NVBENCH_TYPE_AXES(stats_list))
   .set_name("orc_write_statistics")
-  .set_type_axes_names({"statistics", "compression"})
+  .set_type_axes_names({"statistics"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "ZLIB", "NONE"})
   .set_min_samples(4);
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index d1699daff04..d6ae56aa2e4 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -420,7 +420,7 @@ NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
 NVBENCH_BENCH(BM_parquet_read_io_compression)
   .set_name("parquet_read_io_compression")
   .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "DEVICE_BUFFER"})
-  .add_string_axis("compression_type", {"SNAPPY", "NONE"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "NONE"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32})
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index 84e4b8b93c0..a81ae82cae4 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -82,10 +82,7 @@ void BM_parq_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enu
   state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-template <io_type IO, cudf::io::compression_type Compression>
-void BM_parq_write_io_compression(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
+void BM_parq_write_io_compression(nvbench::state& state)
 {
   auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                              static_cast<int32_t>(data_type::FLOAT),
@@ -99,8 +96,8 @@ void BM_parq_write_io_compression(
 
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
-  auto const compression            = Compression;
-  auto const sink_type              = IO;
+  auto const sink_type              = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));
 
   auto const tbl =
     create_random_table(cycle_dtypes(data_types, num_cols),
@@ -133,13 +130,12 @@ void BM_parq_write_io_compression(
   state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
 }
 
-template <cudf::io::statistics_freq Statistics, cudf::io::compression_type Compression>
-void BM_parq_write_varying_options(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<Statistics>, nvbench::enum_type<Compression>>)
+template <cudf::io::statistics_freq Statistics>
+void BM_parq_write_varying_options(nvbench::state& state,
+                                   nvbench::type_list<nvbench::enum_type<Statistics>>)
 {
   auto const enable_stats = Statistics;
-  auto const compression  = Compression;
+  auto const compression  = retrieve_compression_type_enum(state.get_string("compression_type"));
   auto const file_path    = state.get_string("file_path");
 
   auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
@@ -191,11 +187,6 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;
-
-using compression_list =
-  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
-
 using stats_list = nvbench::enum_type_list<cudf::io::STATISTICS_NONE,
                                            cudf::io::STATISTICS_ROWGROUP,
                                            cudf::io::STATISTICS_COLUMN,
@@ -208,15 +199,17 @@ NVBENCH_BENCH_TYPES(BM_parq_write_encode, NVBENCH_TYPE_AXES(d_type_list))
   .add_int64_axis("cardinality", {0, 1000, 10'000, 100'000})
   .add_int64_axis("run_length", {1, 8, 32});
 
-NVBENCH_BENCH_TYPES(BM_parq_write_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list))
+NVBENCH_BENCH(BM_parq_write_io_compression)
   .set_name("parquet_write_io_compression")
-  .set_type_axes_names({"io", "compression"})
+  .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "VOID"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "NONE"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
-NVBENCH_BENCH_TYPES(BM_parq_write_varying_options, NVBENCH_TYPE_AXES(stats_list, compression_list))
+NVBENCH_BENCH_TYPES(BM_parq_write_varying_options, NVBENCH_TYPE_AXES(stats_list))
   .set_name("parquet_write_options")
-  .set_type_axes_names({"statistics", "compression"})
+  .set_type_axes_names({"statistics"})
+  .add_string_axis("compression_type", {"SNAPPY", "ZSTD", "NONE"})
   .set_min_samples(4)
   .add_string_axis("file_path", {"unused_path.parquet", ""});

From d5f24e646deaf2cab4bff41cf56b4f7732308479 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 22 Aug 2025 16:45:53 -0400
Subject: [PATCH 199/366] Support rank expression in cudf-polars (#19340)

- Contributes to https://github.com/rapidsai/cudf/issues/19200
- Depends on https://github.com/pola-rs/polars/pull/23512

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19340
---
 .../cudf_polars/dsl/expressions/unary.py      | 56 +++++++++++++++++--
 .../cudf_polars/dsl/utils/aggregations.py     |  5 ++
 .../expressions/test_numeric_unaryops.py      | 45 ++++++++++++++-
 python/cudf_polars/tests/test_groupby.py      |  6 ++
 python/cudf_polars/tests/test_rolling.py      |  7 +++
 5 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index e763775e37c..9eeb7eae609 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -107,14 +107,15 @@ class UnaryFunction(Expr):
             "as_struct",
             "drop_nulls",
             "fill_null",
+            "fill_null_with_strategy",
             "mask_nans",
+            "null_count",
+            "rank",
             "round",
             "set_sorted",
+            "top_k",
             "unique",
             "value_counts",
-            "fill_null_with_strategy",
-            "null_count",
-            "top_k",
         }
     )
     _supported_cum_aggs = frozenset(
@@ -138,13 +139,14 @@ def __init__(
         self.children = children
         self.is_pointwise = self.name not in (
             "as_struct",
-            "cum_min",
             "cum_max",
+            "cum_min",
             "cum_prod",
             "cum_sum",
             "drop_nulls",
-            "unique",
+            "rank",
             "top_k",
+            "unique",
         )
 
         if self.name not in UnaryFunction._supported_fns:
@@ -159,6 +161,12 @@ def __init__(
             raise NotImplementedError(
                 "Filling null values with limit specified is not yet supported."
             )
+        if self.name == "rank":
+            method, _, _ = self.options
+            if method not in {"average", "min", "max", "dense", "ordinal"}:
+                raise NotImplementedError(
+                    f"ranking with {method=} is not yet supported"
+                )
 
     def do_evaluate(
         self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
@@ -401,6 +409,44 @@ def do_evaluate(
                 ),
                 dtype=self.dtype,
             )
+        elif self.name == "rank":
+            (column,) = (child.evaluate(df, context=context) for child in self.children)
+            method_str, descending, _ = self.options
+
+            method = {
+                "average": plc.aggregation.RankMethod.AVERAGE,
+                "min": plc.aggregation.RankMethod.MIN,
+                "max": plc.aggregation.RankMethod.MAX,
+                "dense": plc.aggregation.RankMethod.DENSE,
+                "ordinal": plc.aggregation.RankMethod.FIRST,
+            }[method_str]
+
+            order = (
+                plc.types.Order.DESCENDING if descending else plc.types.Order.ASCENDING
+            )
+
+            ranked: plc.Column = plc.sorting.rank(
+                column.obj,
+                method,
+                order,
+                plc.types.NullPolicy.EXCLUDE,
+                plc.types.NullOrder.BEFORE if descending else plc.types.NullOrder.AFTER,
+                percentage=False,
+            )
+
+            # Min/Max/Dense/Ordinal -> IDX_DTYPE
+            # See https://github.com/pola-rs/polars/blob/main/crates/polars-ops/src/series/ops/rank.rs
+            if method_str in {"min", "max", "dense", "ordinal"}:
+                dest = self.dtype.plc.id()
+                src = ranked.type().id()
+                if dest == plc.TypeId.UINT32 and src != plc.TypeId.UINT32:
+                    ranked = plc.unary.cast(ranked, plc.DataType(plc.TypeId.UINT32))
+                elif (
+                    dest == plc.TypeId.UINT64 and src != plc.TypeId.UINT64
+                ):  # pragma: no cover
+                    ranked = plc.unary.cast(ranked, plc.DataType(plc.TypeId.UINT64))
+
+            return Column(ranked, dtype=self.dtype)
         elif self.name == "top_k":
             (column, k) = (
                 child.evaluate(df, context=context) for child in self.children
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 2cd7cde44ef..5ddf060c41c 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -89,6 +89,11 @@ def decompose_single_agg(
     """
     agg = named_expr.value
     name = named_expr.name
+    if isinstance(agg, expr.UnaryFunction) and agg.name in {"rank"}:
+        name = agg.name
+        raise NotImplementedError(
+            f"UnaryFunction {name=} not supported in groupby context"
+        )
     if isinstance(agg, expr.UnaryFunction) and agg.name == "null_count":
         (child,) = agg.children
 
diff --git a/python/cudf_polars/tests/expressions/test_numeric_unaryops.py b/python/cudf_polars/tests/expressions/test_numeric_unaryops.py
index dd3c443223c..8df62b18526 100644
--- a/python/cudf_polars/tests/expressions/test_numeric_unaryops.py
+++ b/python/cudf_polars/tests/expressions/test_numeric_unaryops.py
@@ -8,7 +8,11 @@
 
 import polars as pl
 
-from cudf_polars.testing.asserts import assert_gpu_result_equal
+from cudf_polars.testing.asserts import (
+    assert_gpu_result_equal,
+    assert_ir_translation_raises,
+)
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 
 
 @pytest.fixture(
@@ -112,3 +116,42 @@ def test_null_count():
         pl.col("baz").is_null().sum(),
     )
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
+@pytest.mark.parametrize("descending", [False, True])
+def test_rank_supported(request, ldf: pl.LazyFrame, method: str, *, descending: bool):
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="nested loop join")
+    )
+    expr = pl.col("a").rank(method=method, descending=descending)
+    q = ldf.select(expr)
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
+@pytest.mark.parametrize("descending", [False, True])
+@pytest.mark.parametrize("test", ["with_nulls", "with_ties"])
+def test_rank_methods_with_nulls_or_ties(
+    request, ldf: pl.LazyFrame, method: str, *, descending: bool, test: str
+) -> None:
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="nested loop join")
+    )
+
+    base = pl.col("a")
+    if test == "with_nulls":
+        expr = pl.when((base % 2) == 0).then(None).otherwise(base)
+    else:
+        expr = pl.when((base % 2) == 0).then(pl.lit(-5)).otherwise(base)
+
+    q = ldf.select(expr.rank(method=method, descending=descending))
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("seed", [42])
+@pytest.mark.parametrize("method", ["random"])
+def test_rank_unsupported(ldf: pl.LazyFrame, method: str, seed: int) -> None:
+    expr = pl.col("a").rank(method=method, seed=seed)
+    q = ldf.select(expr)
+    assert_ir_translation_raises(q, NotImplementedError)
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index d900737c62b..3261afa7b7c 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -413,3 +413,9 @@ def test_groupby_fill_null_with_strategy(strategy):
     q = lf.group_by("key").agg(pl.col("val").fill_null(strategy=strategy))
 
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_groupby_rank_raises(df: pl.LazyFrame) -> None:
+    q = df.group_by("key1").agg(pl.col("int").rank())
+
+    assert_ir_translation_raises(q, NotImplementedError)
diff --git a/python/cudf_polars/tests/test_rolling.py b/python/cudf_polars/tests/test_rolling.py
index 9dbe75e14ba..53cae2180fe 100644
--- a/python/cudf_polars/tests/test_rolling.py
+++ b/python/cudf_polars/tests/test_rolling.py
@@ -272,3 +272,10 @@ def test_rolling_ternary_supported(df, expr):
 def test_rolling_ternary_unsupported(df, expr):
     q = df.rolling("dt", period="48h", closed="both").agg(expr.alias("out"))
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_rolling_rank_unsupported(df):
+    q = df.rolling("dt", period="48h", closed="both").agg(
+        pl.col("values").rank(method="dense", descending=False)
+    )
+    assert_ir_translation_raises(q, NotImplementedError)

From 24403ca74808797e6043c14c4dceafff786a5ace Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 22 Aug 2025 14:30:22 -0700
Subject: [PATCH 200/366] Remove validation on import (#19775)

This validation can be slow and adds unnecessary import overhead. It was added when cudf and cudf packaging was much less mature. These days if the install environment is not compatible for any reason the user should see comprehensible errors later from lower-level libraries without us doing an extra check ourselves.

Contributes to #627

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19775
---
 python/cudf/cudf/__init__.py        |   5 -
 python/cudf/cudf/utils/gpu_utils.py | 162 ----------------------------
 2 files changed, 167 deletions(-)
 delete mode 100644 python/cudf/cudf/utils/gpu_utils.py

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index b56fe69568c..8f18c3609d0 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -10,11 +10,6 @@
     libcudf.load_library()
     del libcudf
 
-from cudf.utils.gpu_utils import validate_setup
-
-validate_setup()
-
-del validate_setup
 import cupy
 from numba import cuda
 
diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py
deleted file mode 100644
index 1ff638d8830..00000000000
--- a/python/cudf/cudf/utils/gpu_utils.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-
-def validate_setup():
-    import os
-
-    # TODO: Remove the following check once we arrive at a solution for #4827
-    # This is a temporary workaround to unblock internal testing
-    # related issue: https://github.com/rapidsai/cudf/issues/4827
-    if (
-        "RAPIDS_NO_INITIALIZE" in os.environ
-        or "CUDF_NO_INITIALIZE" in os.environ
-    ):
-        return
-
-    import warnings
-
-    from cuda.bindings.runtime import (
-        cudaDeviceAttr,
-        cudaError_t,
-    )
-
-    from rmm._cuda.gpu import (
-        CUDARuntimeError,
-        deviceGetName,
-        driverGetVersion,
-        getDeviceAttribute,
-        getDeviceCount,
-        runtimeGetVersion,
-    )
-
-    from cudf.errors import UnsupportedCUDAError
-
-    notify_caller_errors = {
-        cudaError_t.cudaErrorInitializationError,
-        cudaError_t.cudaErrorInvalidDeviceFunction,
-        cudaError_t.cudaErrorInvalidDevice,
-        cudaError_t.cudaErrorStartupFailure,
-        cudaError_t.cudaErrorInvalidKernelImage,
-        cudaError_t.cudaErrorAlreadyAcquired,
-        cudaError_t.cudaErrorOperatingSystem,
-        cudaError_t.cudaErrorNotPermitted,
-        cudaError_t.cudaErrorNotSupported,
-        cudaError_t.cudaErrorSystemNotReady,
-        cudaError_t.cudaErrorSystemDriverMismatch,
-        cudaError_t.cudaErrorCompatNotSupportedOnDevice,
-        cudaError_t.cudaErrorDeviceUninitialized,
-        cudaError_t.cudaErrorTimeout,
-        cudaError_t.cudaErrorUnknown,
-        cudaError_t.cudaErrorApiFailureBase,
-    }
-
-    try:
-        gpus_count = getDeviceCount()
-    except CUDARuntimeError as e:
-        if e.status in notify_caller_errors:
-            raise e
-
-        # We must distinguish between "CPU only" and "the driver is
-        # insufficient for the runtime".
-        if e.status == cudaError_t.cudaErrorInsufficientDriver:
-            # cudaDriverGetVersion() returns 0 when ``libcuda.so`` is
-            # missing. Otherwise there is a CUDA driver but it is
-            # insufficient for the runtime, so we re-raise the original
-            # exception
-            if driverGetVersion() != 0:
-                raise e
-
-        # If there is no GPU detected, set `gpus_count` to -1
-        gpus_count = -1
-    except RuntimeError as e:
-        # When using cuda-python < 12.9, getDeviceCount() can raise a
-        # RuntimeError if ``libcuda.so`` is missing. We don't want this to
-        # propagate up to the user.
-        warnings.warn(str(e))
-        return
-
-    if gpus_count > 0:
-        # Cupy throws RunTimeException to get GPU count,
-        # hence obtaining GPU count by in-house cpp api above
-
-        major_version = getDeviceAttribute(
-            cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
-        )
-
-        if major_version < 7:
-            # A GPU with NVIDIA Volta™ architecture or newer is required.
-            # Reference: https://developer.nvidia.com/cuda-gpus
-            # Hardware Generation       Compute Capability
-            #    Hopper                 9.x
-            #    Ampere                 8.x
-            #    Turing                 7.5
-            #    Volta                  7.0, 7.2
-            #    Pascal                 6.x
-            #    Maxwell                5.x
-            #    Kepler                 3.x
-            #    Fermi                  2.x
-            device_name = deviceGetName(0)
-            minor_version = getDeviceAttribute(
-                cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0
-            )
-            raise UnsupportedCUDAError(
-                "A GPU with NVIDIA Volta™ (Compute Capability 7.0) "
-                "or newer architecture is required.\n"
-                f"Detected GPU 0: {device_name}\n"
-                f"Detected Compute Capability: {major_version}.{minor_version}"
-            )
-
-        cuda_runtime_version = runtimeGetVersion()
-
-        if cuda_runtime_version < 12000:
-            # Require CUDA Runtime version 12.0 or greater.
-            major_version = cuda_runtime_version // 1000
-            minor_version = (cuda_runtime_version % 1000) // 10
-            raise UnsupportedCUDAError(
-                "Detected CUDA Runtime version is "
-                f"{major_version}.{minor_version}. "
-                "Please update your CUDA Runtime to 12.0 or above."
-            )
-
-        cuda_driver_supported_rt_version = driverGetVersion()
-
-        # Though Yes, Externally driver version is represented like `418.39`
-        # and cuda runtime version like `10.1`. It is not the similar case
-        # at cuda api's level. Coming down to APIs they follow a uniform
-        # convention of an integer which corresponds to the versioning
-        # like (1000 major + 10 minor) for 10.1 Driver version API doesn't
-        # actually indicate driver version, it indicates only the latest
-        # CUDA version supported by the driver.
-        # For reference :
-        # https://docs.nvidia.com/deploy/cuda-compatibility/index.html
-
-        if cuda_driver_supported_rt_version == 0:
-            raise UnsupportedCUDAError(
-                "We couldn't detect the GPU driver properly. Please follow "
-                "the installation guide to ensure your driver is properly "
-                "installed: "
-                "https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"
-            )
-        elif cuda_driver_supported_rt_version >= cuda_runtime_version:
-            # CUDA Driver Version Check:
-            # Driver Runtime version is >= Runtime version
-            pass
-        elif (
-            cuda_driver_supported_rt_version >= 12000
-            and cuda_runtime_version >= 12000
-        ):
-            # With cuda enhanced compatibility any code compiled
-            # with 12.x version of cuda can now run on any
-            # driver >= 525.60.13. 12000 is the minimum cuda
-            # version 525.60.13 supports.
-            pass
-        else:
-            raise UnsupportedCUDAError(
-                "Please update your NVIDIA GPU Driver to support CUDA "
-                "Runtime.\n"
-                f"Detected CUDA Runtime version : {cuda_runtime_version}\n"
-                "Latest version of CUDA supported by current "
-                f"NVIDIA GPU Driver : {cuda_driver_supported_rt_version}"
-            )
-    else:
-        warnings.warn("No NVIDIA GPU detected")

From 3a4a9ff4e622da6849cfa19d0fc870cbb774a561 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 22 Aug 2025 17:28:23 -0500
Subject: [PATCH 201/366] Vendor libnvcomp in libcudf (#19743)

Now that kvikio no longer uses nvcomp, we need to vendor libnvcomp in the libcudf wheels so that nvcomp can be fully removed as a dependency of kvikio.

Merge this just before https://github.com/rapidsai/kvikio/pull/805.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Mike Sarahan (https://github.com/msarahan)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19743
---
 build.sh                              |  6 ----
 ci/build_wheel_cudf.sh                |  2 +-
 ci/build_wheel_libcudf.sh             |  6 +---
 ci/build_wheel_pylibcudf.sh           |  2 +-
 cpp/CMakeLists.txt                    |  1 -
 cpp/cmake/thirdparty/get_nvcomp.cmake |  4 +--
 python/libcudf/CMakeLists.txt         | 50 ++++++++++++++++-----------
 python/libcudf/libcudf/load.py        | 22 +++++++-----
 8 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/build.sh b/build.sh
index d4443695347..28a3a30738c 100755
--- a/build.sh
+++ b/build.sh
@@ -73,7 +73,6 @@ BUILD_PER_THREAD_DEFAULT_STREAM=OFF
 BUILD_REPORT_METRICS=OFF
 BUILD_REPORT_INCL_CACHE_STATS=OFF
 BUILD_DISABLE_LARGE_STRINGS=OFF
-USE_PROPRIETARY_NVCOMP=ON
 PYTHON_ARGS_FOR_INSTALL=("-m" "pip" "install" "--no-build-isolation" "--no-deps" "--config-settings" "rapidsai.disable-cuda=true")
 
 # Set defaults for vars that may not have been defined externally
@@ -153,7 +152,6 @@ function buildLibCudfJniInDocker {
                 -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
                 -DCMAKE_INSTALL_PREFIX=/usr/local/rapids \
                 -DUSE_NVTX=ON \
-                -DCUDF_USE_PROPRIETARY_NVCOMP=ON \
                 -DCUDF_USE_ARROW_STATIC=ON \
                 -DCUDF_ENABLE_ARROW_S3=OFF \
                 -DBUILD_TESTS=OFF \
@@ -221,9 +219,6 @@ fi
 if hasArg --disable_nvtx; then
     BUILD_NVTX="OFF"
 fi
-if hasArg --opensource_nvcomp; then
-    USE_PROPRIETARY_NVCOMP="OFF"
-fi
 if hasArg --show_depr_warn; then
     BUILD_DISABLE_DEPRECATION_WARNINGS=OFF
 fi
@@ -292,7 +287,6 @@ if buildAll || hasArg libcudf; then
           -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" \
           -DCMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES}" \
           -DUSE_NVTX=${BUILD_NVTX} \
-          -DCUDF_USE_PROPRIETARY_NVCOMP=${USE_PROPRIETARY_NVCOMP} \
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
           -DDISABLE_DEPRECATION_WARNINGS=${BUILD_DISABLE_DEPRECATION_WARNINGS} \
diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
index af289fe7229..8b75a01479f 100755
--- a/ci/build_wheel_cudf.sh
+++ b/ci/build_wheel_cudf.sh
@@ -24,7 +24,7 @@ echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${PYLIBCUDF_WHEELHOUSE}
 # repair wheels and write to the location that artifact-uploading code expects to find them
 python -m auditwheel repair \
     --exclude libcudf.so \
-    --exclude libnvcomp.so \
+    --exclude libnvcomp.so.* \
     --exclude libkvikio.so \
     --exclude librapids_logger.so \
     --exclude librmm.so \
diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
index ae0ab29c7f8..ea2a818d97f 100755
--- a/ci/build_wheel_libcudf.sh
+++ b/ci/build_wheel_libcudf.sh
@@ -27,11 +27,7 @@ rapids-pip-retry install \
 # 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
 export PIP_NO_BUILD_ISOLATION=0
 
-# TODO(nvcomp): when `nvcomp` supports Python 3.13 and we de-vendor `nvcomp` from `kvikio`
-# this should be switched back to using the nvcomp runtime wheel
-# https://github.com/rapidsai/build-planning/issues/171
-# export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
-export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_FROM_LIBKVIKIO_WHEEL=ON"
+export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=OFF"
 ./ci/build_wheel.sh "${package_name}" "${package_dir}"
 
 # repair wheels and write to the location that artifact-uploading code expects to find them
diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
index 3f99f75ceb4..4c09752626f 100755
--- a/ci/build_wheel_pylibcudf.sh
+++ b/ci/build_wheel_pylibcudf.sh
@@ -22,7 +22,7 @@ echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${LIBCUDF_WHEELHOUSE}/lib
 # repair wheels and write to the location that artifact-uploading code expects to find them
 python -m auditwheel repair \
     --exclude libcudf.so \
-    --exclude libnvcomp.so \
+    --exclude libnvcomp.so.* \
     --exclude libkvikio.so \
     --exclude librapids_logger.so \
     --exclude librmm.so \
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8ee45a34df7..5eb59323caa 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -53,7 +53,6 @@ option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON)
 option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
 option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON)
 mark_as_advanced(CUDF_BUILD_TESTUTIL)
-option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
 option(CUDF_EXPORT_NVCOMP "Export NVCOMP as a dependency" ON)
 option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF)
 mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED)
diff --git a/cpp/cmake/thirdparty/get_nvcomp.cmake b/cpp/cmake/thirdparty/get_nvcomp.cmake
index 33b1b45fb44..bb5c0c3c215 100644
--- a/cpp/cmake/thirdparty/get_nvcomp.cmake
+++ b/cpp/cmake/thirdparty/get_nvcomp.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -20,7 +20,7 @@ function(find_and_configure_nvcomp)
   if(CUDF_EXPORT_NVCOMP)
     set(export_args BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
   endif()
-  rapids_cpm_nvcomp(${export_args} USE_PROPRIETARY_BINARY ${CUDF_USE_PROPRIETARY_NVCOMP})
+  rapids_cpm_nvcomp(${export_args} USE_PROPRIETARY_BINARY ON)
 
   # Per-thread default stream
   if(TARGET nvcomp AND CUDF_USE_PER_THREAD_DEFAULT_STREAM)
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
index ca737af08f3..6e4c525edbd 100644
--- a/python/libcudf/CMakeLists.txt
+++ b/python/libcudf/CMakeLists.txt
@@ -15,18 +15,18 @@
 cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../cmake/rapids_config.cmake)
+include(rapids-cpm)
+include(rapids-cuda)
+rapids_cuda_init_architectures(libcudf-python)
+rapids_cpm_init()
 
 project(
   libcudf-python
   VERSION "${RAPIDS_VERSION}"
-  LANGUAGES CXX
+  LANGUAGES CXX CUDA
 )
 
-option(USE_NVCOMP_RUNTIME_WHEEL "Use the nvcomp wheel at runtime instead of the system library" OFF)
-
-# TODO(nvcomp): when `nvcomp` supports Python 3.13 and we de-vendor `nvcomp` from `kvikio` this
-# option should be removed https://github.com/rapidsai/build-planning/issues/171
-option(USE_NVCOMP_FROM_LIBKVIKIO_WHEEL "Use nvcomp bundled with libkvikio" OFF)
+option(USE_NVCOMP_RUNTIME_WHEEL "Use the nvcomp wheel at runtime instead of vendoring nvcomp" OFF)
 
 # Check if cudf is already available. If so, it is the user's responsibility to ensure that the
 # CMake package is also available at build time of the Python cudf package.
@@ -42,12 +42,31 @@ set(BUILD_TESTS OFF)
 set(BUILD_BENCHMARKS OFF)
 set(CUDF_BUILD_TESTUTIL OFF)
 set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+set(CUDF_EXPORT_NVCOMP OFF)
+
+include(../../cpp/cmake/thirdparty/get_nvcomp.cmake)
 
-# TODO(nvcomp): when `nvcomp` supports Python 3.13 and we de-vendor `nvcomp` from `kvikio` the
-# libkvikio branch should be removed
-if(USE_NVCOMP_RUNTIME_WHEEL OR USE_NVCOMP_FROM_LIBKVIKIO_WHEEL)
-  set(CUDF_EXPORT_NVCOMP OFF)
+# Install only the specific libnvcomp.so.* library instead of all nvcomp targets
+if(TARGET nvcomp::nvcomp)
+  get_target_property(is_imported nvcomp::nvcomp IMPORTED)
+  if(is_imported)
+    get_target_property(nvcomp_lib_path nvcomp::nvcomp IMPORTED_LOCATION_RELEASE)
+    # Compute the SOVERSION from the library path
+    get_filename_component(nvcomp_lib_dir ${nvcomp_lib_path} DIRECTORY)
+    get_filename_component(nvcomp_lib_name ${nvcomp_lib_path} NAME)
+    string(REGEX REPLACE "libnvcomp\\.so\\.([0-9]+)" "\\1" nvcomp_soversion ${nvcomp_lib_name})
+    install(
+      FILES ${nvcomp_lib_path}
+      DESTINATION ${SKBUILD_PLATLIB_DIR}/libcudf/lib64/
+      RENAME libnvcomp.so.${nvcomp_soversion}
+    )
+  else()
+    message(FATAL_ERROR "nvcomp target must be imported")
+  endif()
+else()
+  message(FATAL_ERROR "nvcomp target not found")
 endif()
+
 set(CUDA_STATIC_RUNTIME ON)
 
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
@@ -62,14 +81,3 @@ if(USE_NVCOMP_RUNTIME_WHEEL)
     APPEND
   )
 endif()
-
-# TODO(nvcomp): when `nvcomp` supports Python 3.13 and we de-vendor `nvcomp` from `kvikio` this
-# block should be removed
-if(USE_NVCOMP_FROM_LIBKVIKIO_WHEEL)
-  set(rpaths "$ORIGIN/../../libkvikio/lib64")
-  set_property(
-    TARGET cudf
-    PROPERTY INSTALL_RPATH ${rpaths}
-    APPEND
-  )
-endif()
diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py
index 4198fcbe385..d32d139d945 100644
--- a/python/libcudf/libcudf/load.py
+++ b/python/libcudf/libcudf/load.py
@@ -62,29 +62,33 @@ def load_library():
         # we assume the library is discoverable on system paths.
         pass
 
+    _load_library("libnvcomp.so.5")
+    return _load_library("libcudf.so")
+
+
+def _load_library(soname):
     prefer_system_installation = (
         os.getenv("RAPIDS_LIBCUDF_PREFER_SYSTEM_LIBRARY", "false").lower()
         != "false"
     )
 
-    soname = "libcudf.so"
-    libcudf_lib = None
+    found_lib = None
     if prefer_system_installation:
         # Prefer a system library if one is present to
         # avoid clobbering symbols that other packages might expect, but if no
         # other library is present use the one in the wheel.
         try:
-            libcudf_lib = _load_system_installation(soname)
+            found_lib = _load_system_installation(soname)
         except OSError:
-            libcudf_lib = _load_wheel_installation(soname)
+            found_lib = _load_wheel_installation(soname)
     else:
         # Prefer the libraries bundled in this package. If they aren't found
         # (which might be the case in builds where the library was prebuilt before
         # packaging the wheel), look for a system installation.
         try:
-            libcudf_lib = _load_wheel_installation(soname)
-            if libcudf_lib is None:
-                libcudf_lib = _load_system_installation(soname)
+            found_lib = _load_wheel_installation(soname)
+            if found_lib is None:
+                found_lib = _load_system_installation(soname)
         except OSError:
             # If none of the searches above succeed, just silently return None
             # and rely on other mechanisms (like RPATHs on other DSOs) to
@@ -93,5 +97,5 @@ def load_library():
 
     # The caller almost never needs to do anything with this library, but no
     # harm in offering the option since this object at least provides a handle
-    # to inspect where libcudf was loaded from.
-    return libcudf_lib
+    # to inspect where the library was loaded from.
+    return found_lib

From ac044f14ecf41de2c1957b5d78a8a79908984f2f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 22 Aug 2025 15:39:22 -0700
Subject: [PATCH 202/366] Moves test_options to cudf testing directory, clean
 up old, stubbed testing files in directory (#19698)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19698
---
 python/cudf/cudf/tests/conftest.py            |  33 ++
 .../datetimeindex/methods/test_tz_convert.py  |   6 +
 .../indexes/datetimeindex/test_attributes.py  |  27 ++
 .../cudf/tests/indexes/test_categorical.py    |   1 -
 .../cudf/cudf/tests/indexes/test_combining.py |   1 -
 .../cudf/tests/indexes/test_computation.py    |   1 -
 .../cudf/tests/indexes/test_constructing.py   |   1 -
 .../cudf/tests/indexes/test_conversion.py     |   1 -
 .../cudf/tests/indexes/test_memory_usage.py   |   1 -
 .../cudf/cudf/tests/indexes/test_missing.py   |   1 -
 .../cudf/cudf/tests/indexes/test_modifying.py |   1 -
 .../tests/indexes/test_multiindex_compat.py   |   1 -
 .../cudf/cudf/tests/indexes/test_numeric.py   |   1 -
 .../cudf/tests/indexes/test_properties.py     |   1 -
 .../cudf/cudf/tests/indexes/test_selecting.py |   1 -
 .../cudf/cudf/tests/indexes/test_sorting.py   |   1 -
 .../cudf/tests/indexes/test_time_specific.py  |   1 -
 python/cudf/cudf/tests/lists/__init__.py      |   0
 .../cudf/tests/lists/test_list_methods.py     |   1 -
 .../cudf/cudf/tests/options/test_options.py   | 131 +++++++-
 .../cudf/tests/series/accessors/test_dt.py    | 157 +++++++++
 .../cudf/tests/series/methods/test_astype.py  |  25 ++
 .../test_convert_dtypes.py}                   |   6 +-
 .../tests/series/methods/test_to_pandas.py    |  13 +
 .../cudf/tests/series/test_constructors.py    |  37 +++
 .../cudf/tests/series/test_datetimelike.py    | 300 ------------------
 python/cudf/cudf/tests/strings/__init__.py    |   0
 .../cudf/tests/strings/test_string_methods.py |   1 -
 python/cudf/cudf/tests/test_options.py        | 129 --------
 29 files changed, 429 insertions(+), 451 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/indexes/test_categorical.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_combining.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_computation.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_constructing.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_conversion.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_memory_usage.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_missing.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_modifying.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_multiindex_compat.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_numeric.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_properties.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_selecting.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_sorting.py
 delete mode 100644 python/cudf/cudf/tests/indexes/test_time_specific.py
 delete mode 100644 python/cudf/cudf/tests/lists/__init__.py
 delete mode 100644 python/cudf/cudf/tests/lists/test_list_methods.py
 rename python/cudf/cudf/tests/series/{test_conversion.py => methods/test_convert_dtypes.py} (90%)
 delete mode 100644 python/cudf/cudf/tests/series/test_datetimelike.py
 delete mode 100644 python/cudf/cudf/tests/strings/__init__.py
 delete mode 100644 python/cudf/cudf/tests/strings/test_string_methods.py
 delete mode 100644 python/cudf/cudf/tests/test_options.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index b9c21a67c43..f4157430185 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -5,9 +5,11 @@
 import operator
 import os
 import pathlib
+import zoneinfo
 
 import cupy as cp
 import numpy as np
+import pandas as pd
 import pytest
 
 import rmm  # noqa: F401
@@ -176,6 +178,37 @@ def pytest_runtest_makereport(item, call):
     setattr(item, "report", {rep.when: rep})
 
 
+def _get_all_zones():
+    zones = []
+    for zone in zoneinfo.available_timezones():
+        # TODO: pandas 3.0 defaults to zoneinfo,
+        # so all_zone_names can use zoneinfo.available_timezones()
+        try:
+            pd.DatetimeTZDtype("ns", zone)
+        except KeyError:
+            continue
+        else:
+            zones.append(zone)
+    return sorted(zones)
+
+
+# NOTE: _get_all_zones is a very large list; we likely do NOT want to
+# use it for more than a handful of tests
+@pytest.fixture(params=_get_all_zones())
+def all_timezones(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=["America/New_York", "Asia/Tokyo", "CET", "Etc/GMT+1", "UTC"]
+)
+def limited_timezones(request):
+    """
+    Small representative set of timezones for testing.
+    """
+    return request.param
+
+
 @pytest.fixture(
     params=[
         {
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py
index 1c026224da3..816b2e108b1 100644
--- a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_tz_convert.py
@@ -2,6 +2,7 @@
 import zoneinfo
 
 import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -14,3 +15,8 @@ def test_tz_convert():
     pidx = pidx.tz_localize("UTC")
     idx = idx.tz_localize("UTC")
     assert_eq(pidx.tz_convert(tz), idx.tz_convert(tz))
+
+
+def test_tz_convert_naive_typeerror():
+    with pytest.raises(TypeError, match="Cannot convert tz-naive timestamps"):
+        cudf.date_range("2020", periods=2, freq="D").tz_convert(None)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
index 8d230451886..d5b24a6a88e 100644
--- a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
@@ -9,6 +9,33 @@
 from cudf.testing import assert_eq
 
 
+@pytest.mark.parametrize(
+    "item, expected",
+    [
+        ["2020-01-01", False],
+        ["2020-01-01T00:00:00+00:00", True],
+        ["2020-01-01T00:00:00-08:00", False],
+        ["2019-12-31T16:00:00-08:00", True],
+    ],
+)
+def test_contains_tz_aware(item, expected):
+    dti = cudf.date_range("2020", periods=2, freq="D").tz_localize("UTC")
+    result = item in dti
+    assert result == expected
+
+
+def test_tz_aware_attributes_local():
+    data = [
+        "2008-05-12 13:50:00",
+        "2008-12-12 14:50:35",
+        "2009-05-12 13:50:32",
+    ]
+    dti = cudf.DatetimeIndex(data).tz_localize("UTC").tz_convert("US/Eastern")
+    result = dti.hour
+    expected = cudf.Index([9, 9, 9], dtype="int16")
+    assert_eq(result, expected)
+
+
 @pytest.mark.parametrize(
     "field",
     [
diff --git a/python/cudf/cudf/tests/indexes/test_categorical.py b/python/cudf/cudf/tests/indexes/test_categorical.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_categorical.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_combining.py b/python/cudf/cudf/tests/indexes/test_combining.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_combining.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_computation.py b/python/cudf/cudf/tests/indexes/test_computation.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_computation.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_constructing.py b/python/cudf/cudf/tests/indexes/test_constructing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_constructing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_conversion.py b/python/cudf/cudf/tests/indexes/test_conversion.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_conversion.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_memory_usage.py b/python/cudf/cudf/tests/indexes/test_memory_usage.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_memory_usage.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_missing.py b/python/cudf/cudf/tests/indexes/test_missing.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_missing.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_modifying.py b/python/cudf/cudf/tests/indexes/test_modifying.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_modifying.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_multiindex_compat.py b/python/cudf/cudf/tests/indexes/test_multiindex_compat.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_multiindex_compat.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_numeric.py b/python/cudf/cudf/tests/indexes/test_numeric.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_numeric.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_properties.py b/python/cudf/cudf/tests/indexes/test_properties.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_properties.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_selecting.py b/python/cudf/cudf/tests/indexes/test_selecting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_selecting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_sorting.py b/python/cudf/cudf/tests/indexes/test_sorting.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_sorting.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/indexes/test_time_specific.py b/python/cudf/cudf/tests/indexes/test_time_specific.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/indexes/test_time_specific.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/lists/__init__.py b/python/cudf/cudf/tests/lists/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/tests/lists/test_list_methods.py b/python/cudf/cudf/tests/lists/test_list_methods.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/lists/test_list_methods.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/options/test_options.py b/python/cudf/cudf/tests/options/test_options.py
index 06777c8e6af..3f994959ec9 100644
--- a/python/cudf/cudf/tests/options/test_options.py
+++ b/python/cudf/cudf/tests/options/test_options.py
@@ -1 +1,130 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+
+from contextlib import redirect_stdout
+from io import StringIO
+
+import pytest
+
+import cudf
+
+
+@pytest.fixture(scope="class", autouse=False)
+def empty_option_environment():
+    old_option_environment = cudf.options._OPTIONS
+    cudf.options._OPTIONS = {}
+    yield
+    cudf.options._OPTIONS = old_option_environment
+
+
+@pytest.fixture(scope="function")
+def odd_option(empty_option_environment):
+    def validator(x):
+        if not x % 2 == 1:
+            raise ValueError(f"Invalid option value {x}")
+
+    cudf.options._register_option(
+        "odd_option",
+        1,
+        "An odd option.",
+        validator,
+    )
+    yield
+    del cudf.options._OPTIONS["odd_option"]
+
+
+@pytest.fixture(scope="function")
+def even_option(empty_option_environment):
+    def validator(x):
+        if not x % 2 == 0:
+            raise ValueError(f"Invalid option value {x}")
+
+    cudf.options._register_option(
+        "even_option", 0, "An even option.", validator
+    )
+    yield
+    del cudf.options._OPTIONS["even_option"]
+
+
+@pytest.mark.usefixtures("odd_option", "even_option")
+class TestCleanOptions:
+    def test_option_get_set(odd_option):
+        assert cudf.get_option("odd_option") == 1
+        cudf.set_option("odd_option", 101)
+        assert cudf.get_option("odd_option") == 101
+
+    def test_option_set_invalid(odd_option):
+        with pytest.raises(ValueError, match="Invalid option value 0"):
+            cudf.set_option("odd_option", 0)
+
+    def test_option_description(odd_option):
+        s = StringIO()
+        with redirect_stdout(s):
+            cudf.describe_option("odd_option")
+        s.seek(0)
+        expected = (
+            "odd_option:\n\tAn odd option.\n\t[Default: 1] [Current: 1]\n"
+        )
+        assert expected == s.read()
+
+    def test_option_description_all(odd_option, even_option):
+        s = StringIO()
+        with redirect_stdout(s):
+            cudf.describe_option()
+        s.seek(0)
+        expected = (
+            "odd_option:\n\tAn odd option.\n\t[Default: 1] [Current: 1]\n"
+            "even_option:\n\tAn even option.\n\t[Default: 0] [Current: 0]\n"
+        )
+        assert expected == s.read()
+
+
+@pytest.mark.parametrize("default_integer_bitwidth", [32, 64, None])
+def test_empty_option_context(default_integer_bitwidth):
+    with cudf.option_context(
+        "default_integer_bitwidth", default_integer_bitwidth
+    ):
+        with cudf.option_context():
+            assert (
+                cudf.get_option("default_integer_bitwidth")
+                == default_integer_bitwidth
+            )
+
+        assert (
+            cudf.get_option("default_integer_bitwidth")
+            == default_integer_bitwidth
+        )
+
+
+@pytest.mark.parametrize("pandas_compatible", [True, False])
+@pytest.mark.parametrize("default_integer_bitwidth", [32, 64])
+def test_option_context(pandas_compatible, default_integer_bitwidth):
+    prev_pandas_compatible_setting = cudf.get_option("mode.pandas_compatible")
+    prev_width_setting = cudf.get_option("default_integer_bitwidth")
+
+    with cudf.option_context(
+        "mode.pandas_compatible",
+        pandas_compatible,
+        "default_integer_bitwidth",
+        default_integer_bitwidth,
+    ):
+        assert cudf.get_option("mode.pandas_compatible") is pandas_compatible
+        assert (
+            cudf.get_option("default_integer_bitwidth")
+            is default_integer_bitwidth
+        )
+
+    assert (
+        cudf.get_option("mode.pandas_compatible")
+        is prev_pandas_compatible_setting
+    )
+    assert cudf.get_option("default_integer_bitwidth") is prev_width_setting
+
+
+def test_options_context_error():
+    with pytest.raises(ValueError):
+        with cudf.option_context("mode.pandas_compatible"):
+            pass
+
+    with pytest.raises(ValueError):
+        with cudf.option_context("mode.pandas_compatible", 1, 2):
+            pass
diff --git a/python/cudf/cudf/tests/series/accessors/test_dt.py b/python/cudf/cudf/tests/series/accessors/test_dt.py
index 401f89aed65..a3f30b8a180 100644
--- a/python/cudf/cudf/tests/series/accessors/test_dt.py
+++ b/python/cudf/cudf/tests/series/accessors/test_dt.py
@@ -1,4 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import datetime
+import zoneinfo
 
 import cupy as cp
 import numpy as np
@@ -571,3 +573,158 @@ def test_dt_series_datetime_fields(data, field):
     base = getattr(pd_data.dt, field)
     test = getattr(gdf_data.dt, field)
     assert_eq(base, test, check_dtype=False)
+
+
+@pytest.mark.parametrize("fmt", ["%Y-%m-%dT%H:%M%z", "%Y-%m-%dT%H:%M"])
+def test_strftime_tz_aware_as_utc(fmt):
+    data = [datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc)]
+    cudf_pacific = cudf.Series(data).dt.tz_convert("US/Pacific")
+    pd_utc = pd.Series(data)
+    assert cudf_pacific.dtype != pd_utc.dtype
+    result = cudf_pacific.dt.strftime(fmt)
+    expected = pd_utc.dt.strftime(fmt)
+    assert_eq(result, expected)
+
+
+def test_tz_localize(datetime_types_as_str, all_timezones):
+    s = cudf.Series(cudf.date_range("2001-01-01", "2001-01-02", freq="1s"))
+    s = s.astype(datetime_types_as_str)
+    s = s.dt.tz_localize(all_timezones)
+    assert isinstance(s.dtype, pd.DatetimeTZDtype)
+    assert s.dtype.unit == datetime_types_as_str.removeprefix(
+        "datetime64["
+    ).removesuffix("]")
+    assert str(s.dtype.tz) == all_timezones
+
+
+def test_localize_ambiguous(request, datetime_types_as_str, all_timezones):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(all_timezones == "America/Metlakatla"),
+            reason="https://www.timeanddate.com/news/time/metlakatla-quits-dst.html",
+        )
+    )
+    s = cudf.Series(
+        [
+            "2018-11-04 00:30:00",
+            "2018-11-04 01:00:00",
+            "2018-11-04 01:30:00",
+            "2018-11-04 02:00:00",
+            None,
+            "2018-11-04 02:30:00",
+        ],
+        dtype=datetime_types_as_str,
+    )
+    expect = s.to_pandas().dt.tz_localize(
+        zoneinfo.ZoneInfo(all_timezones), ambiguous="NaT", nonexistent="NaT"
+    )
+    got = s.dt.tz_localize(all_timezones)
+    assert_eq(expect, got)
+
+
+def test_localize_nonexistent(request, datetime_types_as_str, all_timezones):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=all_timezones == "America/Grand_Turk",
+            reason="https://www.worldtimezone.com/dst_news/dst_news_turkscaicos03.html",
+        )
+    )
+    s = cudf.Series(
+        [
+            "2018-03-11 01:30:00",
+            "2018-03-11 02:00:00",
+            "2018-03-11 02:30:00",
+            "2018-03-11 03:00:00",
+            None,
+            "2018-03-11 03:30:00",
+        ],
+        dtype=datetime_types_as_str,
+    )
+    expect = s.to_pandas().dt.tz_localize(
+        zoneinfo.ZoneInfo(all_timezones), ambiguous="NaT", nonexistent="NaT"
+    )
+    got = s.dt.tz_localize(all_timezones)
+    assert_eq(expect, got)
+
+
+def test_delocalize(datetime_types_as_str, limited_timezones):
+    psr = pd.Series(
+        pd.date_range("2001-01-01", "2001-01-02", freq="1s")
+    ).astype(datetime_types_as_str)
+    sr = cudf.from_pandas(psr)
+
+    expect = psr.dt.tz_localize(limited_timezones).dt.tz_localize(None)
+    got = sr.dt.tz_localize(limited_timezones).dt.tz_localize(None)
+    assert_eq(expect, got)
+
+
+def test_delocalize_naive():
+    # delocalizing naive datetimes should be a no-op
+    psr = pd.Series(["2001-01-01"], dtype="datetime64[ns]")
+    sr = cudf.from_pandas(psr)
+
+    expect = psr.dt.tz_localize(None)
+    got = sr.dt.tz_localize(None)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "from_tz", ["Europe/London", "America/Chicago", "UTC"]
+)
+@pytest.mark.parametrize(
+    "to_tz", ["Europe/London", "America/Chicago", "UTC", None]
+)
+def test_convert(from_tz, to_tz):
+    from_tz = zoneinfo.ZoneInfo(from_tz)
+    if to_tz is not None:
+        to_tz = zoneinfo.ZoneInfo(to_tz)
+    ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="h"))
+    gs = cudf.from_pandas(ps)
+    ps = ps.dt.tz_localize(from_tz)
+    gs = gs.dt.tz_localize(from_tz)
+    expect = ps.dt.tz_convert(to_tz)
+    got = gs.dt.tz_convert(to_tz)
+    assert_eq(expect, got)
+
+
+def test_convert_from_naive():
+    gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="h"))
+    with pytest.raises(TypeError):
+        gs.dt.tz_convert("America/New_York")
+
+
+@pytest.mark.parametrize(
+    "data,original_timezone,target_timezone",
+    [
+        # DST transition:
+        (["2023-03-12 01:30:00"], "America/New_York", "America/Los_Angeles"),
+        # crossing the international date line:
+        (["2023-05-17 23:30:00"], "Pacific/Auckland", "America/Los_Angeles"),
+        # timezone with non-integer offset:
+        (["2023-05-17 12:00:00"], "Asia/Kolkata", "Australia/Eucla"),
+        # timezone with negative offset:
+        (["2023-05-17 09:00:00"], "America/Los_Angeles", "Pacific/Auckland"),
+        # conversion across multiple days:
+        (["2023-05-16 23:30:00"], "America/New_York", "Asia/Kolkata"),
+        # timezone with half-hour offset:
+        (["2023-05-17 12:00:00"], "Asia/Kolkata", "Australia/Adelaide"),
+        # timezone conversion with a timestamp in the future:
+        (["2025-01-01 00:00:00"], "America/New_York", "Europe/London"),
+        # timezone conversion with a timestamp in the past:
+        (["2000-01-01 12:00:00"], "Europe/Paris", "America/Los_Angeles"),
+        # timezone conversion with a timestamp at midnight:
+        (["2023-05-17 00:00:00"], "Asia/Tokyo", "Europe/Paris"),
+    ],
+)
+def test_convert_edge_cases(data, original_timezone, target_timezone):
+    original_timezone = zoneinfo.ZoneInfo(original_timezone)
+    target_timezone = zoneinfo.ZoneInfo(target_timezone)
+    ps = pd.Series(data, dtype="datetime64[s]").dt.tz_localize(
+        original_timezone
+    )
+    gs = cudf.Series(data, dtype="datetime64[s]").dt.tz_localize(
+        original_timezone
+    )
+    expect = ps.dt.tz_convert(target_timezone)
+    got = gs.dt.tz_convert(target_timezone)
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 18b13f7c3d1..609ad8efedd 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import datetime
+import zoneinfo
+
 import cupy as cp
 import numpy as np
 import pandas as pd
@@ -531,6 +534,28 @@ def test_datetime_infer_format(data, timezone, datetime_types_as_str):
                 sr.astype(datetime_types_as_str)
 
 
+@pytest.mark.parametrize("unit", ["ns", "us"])
+def test_astype_aware_to_aware(unit):
+    ser = cudf.Series(
+        [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)]
+    )
+    result = ser.astype(f"datetime64[{unit}, US/Pacific]")
+    expected = ser.to_pandas().astype(f"datetime64[{unit}, US/Pacific]")
+    zoneinfo_type = pd.DatetimeTZDtype(
+        expected.dtype.unit, zoneinfo.ZoneInfo(str(expected.dtype.tz))
+    )
+    expected = ser.astype(zoneinfo_type)
+    assert_eq(result, expected)
+
+
+def test_astype_naive_to_aware_raises():
+    ser = cudf.Series([datetime.datetime(2020, 1, 1)])
+    with pytest.raises(TypeError):
+        ser.astype("datetime64[ns, UTC]")
+    with pytest.raises(TypeError):
+        ser.to_pandas().astype("datetime64[ns, UTC]")
+
+
 @pytest.mark.parametrize(
     "np_dtype,pd_dtype",
     [
diff --git a/python/cudf/cudf/tests/series/test_conversion.py b/python/cudf/cudf/tests/series/methods/test_convert_dtypes.py
similarity index 90%
rename from python/cudf/cudf/tests/series/test_conversion.py
rename to python/cudf/cudf/tests/series/methods/test_convert_dtypes.py
index 1d680d7860d..e3216d01e77 100644
--- a/python/cudf/cudf/tests/series/test_conversion.py
+++ b/python/cudf/cudf/tests/series/methods/test_convert_dtypes.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 import pandas as pd
 import pytest
 
@@ -42,7 +42,3 @@ def test_convert_integer_false_convert_floating_true():
         .to_pandas(nullable=True)
     )
     assert_eq(result, expected)
-
-
-# Now write the same test, but construct a DataFrame
-# as input instead of parametrizing:
diff --git a/python/cudf/cudf/tests/series/methods/test_to_pandas.py b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
index 1768d6ccc0e..c49a4bfc7f3 100644
--- a/python/cudf/cudf/tests/series/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
@@ -13,6 +13,19 @@
 from cudf.testing import assert_eq
 
 
+def test_to_pandas_index_true_timezone():
+    data = [
+        "2008-05-12",
+        "2008-12-12",
+        "2009-05-12",
+    ]
+    dti = cudf.DatetimeIndex(data).tz_localize("UTC")
+    ser = cudf.Series(dti, index=list("abc"))
+    result = ser.to_pandas(index=True)
+    expected = pd.Series(pd.to_datetime(data, utc=True), index=list("abc"))
+    assert_eq(result, expected)
+
+
 @pytest.mark.parametrize(
     "sr_data,expected_psr",
     [
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index c0aaf5a1b22..b5b03f4955b 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -2,6 +2,7 @@
 import datetime
 import decimal
 import types
+import zoneinfo
 
 import cupy as cp
 import numba.cuda
@@ -1337,3 +1338,39 @@ def test_timezone_pyarrow_array():
     result = cudf.Series(pa_array)
     expected = pa_array.to_pandas()
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
+)
+def test_pandas_compatible_non_zoneinfo_raises(klass):
+    pytz = pytest.importorskip("pytz")
+    tz = pytz.timezone("US/Pacific")
+    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
+    pandas_obj = getattr(pd, klass)(tz_aware_data)
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.from_pandas(pandas_obj)
+
+
+@pytest.mark.parametrize(
+    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
+)
+def test_from_pandas_obj_tz_aware(klass):
+    tz = zoneinfo.ZoneInfo("US/Pacific")
+    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
+    pandas_obj = getattr(pd, klass)(tz_aware_data)
+    result = cudf.from_pandas(pandas_obj)
+    expected = getattr(cudf, klass)(tz_aware_data)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
+)
+def test_from_pandas_obj_tz_aware_unsupported(klass):
+    tz = datetime.timezone(datetime.timedelta(hours=1))
+    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
+    pandas_obj = getattr(pd, klass)(tz_aware_data)
+    with pytest.raises(NotImplementedError):
+        cudf.from_pandas(pandas_obj)
diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py
deleted file mode 100644
index 400777e46e1..00000000000
--- a/python/cudf/cudf/tests/series/test_datetimelike.py
+++ /dev/null
@@ -1,300 +0,0 @@
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
-
-import datetime
-import os
-import zoneinfo
-
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import date_range
-from cudf.testing import assert_eq
-
-
-def _get_all_zones():
-    zones = []
-    for root, dirs, files in os.walk("/usr/share/zoneinfo"):
-        for f in files:
-            zone_name = ("/".join([root, f])).lstrip("/usr/share/zoneinfo")
-            try:
-                _ = pd.DatetimeTZDtype("ns", zone_name)
-            except Exception:
-                continue
-            zones.append(zone_name)
-    return zones
-
-
-# NOTE: _get_all_zones is a very large list; we likely do NOT want to
-# use it for more than a handful of tests
-@pytest.fixture(params=_get_all_zones())
-def zone_name(request):
-    return request.param
-
-
-@pytest.fixture(params=["ns", "us", "ms", "s"])
-def unit(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=["America/New_York", "Asia/Tokyo", "CET", "Etc/GMT+1", "UTC"]
-)
-def tz(request):
-    return request.param
-
-
-def test_tz_localize(unit, zone_name):
-    s = cudf.Series(date_range("2001-01-01", "2001-01-02", freq="1s"))
-    s = s.astype(f"<M8[{unit}]")
-    s = s.dt.tz_localize(zone_name)
-    assert isinstance(s.dtype, pd.DatetimeTZDtype)
-    assert s.dtype.unit == unit
-    assert str(s.dtype.tz) == zone_name
-
-
-def test_localize_ambiguous(request, unit, zone_name):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(zone_name == "America/Metlakatla"),
-            reason="https://www.timeanddate.com/news/time/metlakatla-quits-dst.html",
-        )
-    )
-    s = cudf.Series(
-        [
-            "2018-11-04 00:30:00",
-            "2018-11-04 01:00:00",
-            "2018-11-04 01:30:00",
-            "2018-11-04 02:00:00",
-            None,
-            "2018-11-04 02:30:00",
-        ],
-        dtype=f"datetime64[{unit}]",
-    )
-    expect = s.to_pandas().dt.tz_localize(
-        zoneinfo.ZoneInfo(zone_name), ambiguous="NaT", nonexistent="NaT"
-    )
-    got = s.dt.tz_localize(zone_name)
-    assert_eq(expect, got)
-
-
-def test_localize_nonexistent(request, unit, zone_name):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(zone_name == "America/Grand_Turk"),
-            reason="https://www.worldtimezone.com/dst_news/dst_news_turkscaicos03.html",
-        )
-    )
-    s = cudf.Series(
-        [
-            "2018-03-11 01:30:00",
-            "2018-03-11 02:00:00",
-            "2018-03-11 02:30:00",
-            "2018-03-11 03:00:00",
-            None,
-            "2018-03-11 03:30:00",
-        ],
-        dtype=f"datetime64[{unit}]",
-    )
-    expect = s.to_pandas().dt.tz_localize(
-        zoneinfo.ZoneInfo(zone_name), ambiguous="NaT", nonexistent="NaT"
-    )
-    got = s.dt.tz_localize(zone_name)
-    assert_eq(expect, got)
-
-
-def test_delocalize(unit, tz):
-    psr = pd.Series(
-        pd.date_range("2001-01-01", "2001-01-02", freq="1s")
-    ).astype(f"datetime64[{unit}]")
-    sr = cudf.from_pandas(psr)
-
-    expect = psr.dt.tz_localize(tz).dt.tz_localize(None)
-    got = sr.dt.tz_localize(tz).dt.tz_localize(None)
-    assert_eq(expect, got)
-
-
-def test_delocalize_naive():
-    # delocalizing naive datetimes should be a no-op
-    psr = pd.Series(["2001-01-01"], dtype="datetime64[ns]")
-    sr = cudf.from_pandas(psr)
-
-    expect = psr.dt.tz_localize(None)
-    got = sr.dt.tz_localize(None)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "from_tz", ["Europe/London", "America/Chicago", "UTC"]
-)
-@pytest.mark.parametrize(
-    "to_tz", ["Europe/London", "America/Chicago", "UTC", None]
-)
-def test_convert(from_tz, to_tz):
-    from_tz = zoneinfo.ZoneInfo(from_tz)
-    if to_tz is not None:
-        to_tz = zoneinfo.ZoneInfo(to_tz)
-    ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="h"))
-    gs = cudf.from_pandas(ps)
-    ps = ps.dt.tz_localize(from_tz)
-    gs = gs.dt.tz_localize(from_tz)
-    expect = ps.dt.tz_convert(to_tz)
-    got = gs.dt.tz_convert(to_tz)
-    assert_eq(expect, got)
-
-
-def test_convert_from_naive():
-    gs = cudf.Series(cudf.date_range("2023-01-01", periods=3, freq="h"))
-    with pytest.raises(TypeError):
-        gs.dt.tz_convert("America/New_York")
-
-
-@pytest.mark.parametrize(
-    "data,original_timezone,target_timezone",
-    [
-        # DST transition:
-        (["2023-03-12 01:30:00"], "America/New_York", "America/Los_Angeles"),
-        # crossing the international date line:
-        (["2023-05-17 23:30:00"], "Pacific/Auckland", "America/Los_Angeles"),
-        # timezone with non-integer offset:
-        (["2023-05-17 12:00:00"], "Asia/Kolkata", "Australia/Eucla"),
-        # timezone with negative offset:
-        (["2023-05-17 09:00:00"], "America/Los_Angeles", "Pacific/Auckland"),
-        # conversion across multiple days:
-        (["2023-05-16 23:30:00"], "America/New_York", "Asia/Kolkata"),
-        # timezone with half-hour offset:
-        (["2023-05-17 12:00:00"], "Asia/Kolkata", "Australia/Adelaide"),
-        # timezone conversion with a timestamp in the future:
-        (["2025-01-01 00:00:00"], "America/New_York", "Europe/London"),
-        # timezone conversion with a timestamp in the past:
-        (["2000-01-01 12:00:00"], "Europe/Paris", "America/Los_Angeles"),
-        # timezone conversion with a timestamp at midnight:
-        (["2023-05-17 00:00:00"], "Asia/Tokyo", "Europe/Paris"),
-    ],
-)
-def test_convert_edge_cases(data, original_timezone, target_timezone):
-    original_timezone = zoneinfo.ZoneInfo(original_timezone)
-    target_timezone = zoneinfo.ZoneInfo(target_timezone)
-    ps = pd.Series(data, dtype="datetime64[s]").dt.tz_localize(
-        original_timezone
-    )
-    gs = cudf.Series(data, dtype="datetime64[s]").dt.tz_localize(
-        original_timezone
-    )
-    expect = ps.dt.tz_convert(target_timezone)
-    got = gs.dt.tz_convert(target_timezone)
-    assert_eq(expect, got)
-
-
-def test_to_pandas_index_true_timezone():
-    data = [
-        "2008-05-12",
-        "2008-12-12",
-        "2009-05-12",
-    ]
-    dti = cudf.DatetimeIndex(data).tz_localize("UTC")
-    ser = cudf.Series(dti, index=list("abc"))
-    result = ser.to_pandas(index=True)
-    expected = pd.Series(pd.to_datetime(data, utc=True), index=list("abc"))
-    assert_eq(result, expected)
-
-
-def test_tz_aware_attributes_local():
-    data = [
-        "2008-05-12 13:50:00",
-        "2008-12-12 14:50:35",
-        "2009-05-12 13:50:32",
-    ]
-    dti = cudf.DatetimeIndex(data).tz_localize("UTC").tz_convert("US/Eastern")
-    result = dti.hour
-    expected = cudf.Index([9, 9, 9], dtype="int16")
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "item, expected",
-    [
-        ["2020-01-01", False],
-        ["2020-01-01T00:00:00+00:00", True],
-        ["2020-01-01T00:00:00-08:00", False],
-        ["2019-12-31T16:00:00-08:00", True],
-    ],
-)
-def test_contains_tz_aware(item, expected):
-    dti = cudf.date_range("2020", periods=2, freq="D").tz_localize("UTC")
-    result = item in dti
-    assert result == expected
-
-
-def test_tz_convert_naive_typeerror():
-    with pytest.raises(TypeError):
-        cudf.date_range("2020", periods=2, freq="D").tz_convert(None)
-
-
-@pytest.mark.parametrize(
-    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
-)
-def test_from_pandas_obj_tz_aware(klass):
-    tz = zoneinfo.ZoneInfo("US/Pacific")
-    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
-    pandas_obj = getattr(pd, klass)(tz_aware_data)
-    result = cudf.from_pandas(pandas_obj)
-    expected = getattr(cudf, klass)(tz_aware_data)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
-)
-def test_from_pandas_obj_tz_aware_unsupported(klass):
-    tz = datetime.timezone(datetime.timedelta(hours=1))
-    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
-    pandas_obj = getattr(pd, klass)(tz_aware_data)
-    with pytest.raises(NotImplementedError):
-        cudf.from_pandas(pandas_obj)
-
-
-@pytest.mark.parametrize(
-    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
-)
-def test_pandas_compatible_non_zoneinfo_raises(klass):
-    pytz = pytest.importorskip("pytz")
-    tz = pytz.timezone("US/Pacific")
-    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
-    pandas_obj = getattr(pd, klass)(tz_aware_data)
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.from_pandas(pandas_obj)
-
-
-def test_astype_naive_to_aware_raises():
-    ser = cudf.Series([datetime.datetime(2020, 1, 1)])
-    with pytest.raises(TypeError):
-        ser.astype("datetime64[ns, UTC]")
-    with pytest.raises(TypeError):
-        ser.to_pandas().astype("datetime64[ns, UTC]")
-
-
-@pytest.mark.parametrize("unit", ["ns", "us"])
-def test_astype_aware_to_aware(unit):
-    ser = cudf.Series(
-        [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)]
-    )
-    result = ser.astype(f"datetime64[{unit}, US/Pacific]")
-    expected = ser.to_pandas().astype(f"datetime64[{unit}, US/Pacific]")
-    zoneinfo_type = pd.DatetimeTZDtype(
-        expected.dtype.unit, zoneinfo.ZoneInfo(str(expected.dtype.tz))
-    )
-    expected = ser.astype(zoneinfo_type)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("fmt", ["%Y-%m-%dT%H:%M%z", "%Y-%m-%dT%H:%M"])
-def test_strftime_tz_aware_as_utc(fmt):
-    data = [datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc)]
-    cudf_pacific = cudf.Series(data).dt.tz_convert("US/Pacific")
-    pd_utc = pd.Series(data)
-    assert cudf_pacific.dtype != pd_utc.dtype
-    result = cudf_pacific.dt.strftime(fmt)
-    expected = pd_utc.dt.strftime(fmt)
-    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/strings/__init__.py b/python/cudf/cudf/tests/strings/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/tests/strings/test_string_methods.py b/python/cudf/cudf/tests/strings/test_string_methods.py
deleted file mode 100644
index 06777c8e6af..00000000000
--- a/python/cudf/cudf/tests/strings/test_string_methods.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
diff --git a/python/cudf/cudf/tests/test_options.py b/python/cudf/cudf/tests/test_options.py
deleted file mode 100644
index ef9c7ec0656..00000000000
--- a/python/cudf/cudf/tests/test_options.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-
-from contextlib import redirect_stdout
-from io import StringIO
-
-import pytest
-
-import cudf
-
-
-@pytest.fixture(scope="class", autouse=False)
-def empty_option_environment():
-    old_option_environment = cudf.options._OPTIONS
-    cudf.options._OPTIONS = {}
-    yield
-    cudf.options._OPTIONS = old_option_environment
-
-
-@pytest.fixture(scope="function")
-def odd_option(empty_option_environment):
-    def validator(x):
-        if not x % 2 == 1:
-            raise ValueError(f"Invalid option value {x}")
-
-    cudf.options._register_option(
-        "odd_option",
-        1,
-        "An odd option.",
-        validator,
-    )
-    yield
-    del cudf.options._OPTIONS["odd_option"]
-
-
-@pytest.fixture(scope="function")
-def even_option(empty_option_environment):
-    def validator(x):
-        if not x % 2 == 0:
-            raise ValueError(f"Invalid option value {x}")
-
-    cudf.options._register_option(
-        "even_option", 0, "An even option.", validator
-    )
-    yield
-    del cudf.options._OPTIONS["even_option"]
-
-
-@pytest.mark.usefixtures("odd_option", "even_option")
-class TestCleanOptions:
-    def test_option_get_set(odd_option):
-        assert cudf.get_option("odd_option") == 1
-        cudf.set_option("odd_option", 101)
-        assert cudf.get_option("odd_option") == 101
-
-    def test_option_set_invalid(odd_option):
-        with pytest.raises(ValueError, match="Invalid option value 0"):
-            cudf.set_option("odd_option", 0)
-
-    def test_option_description(odd_option):
-        s = StringIO()
-        with redirect_stdout(s):
-            cudf.describe_option("odd_option")
-        s.seek(0)
-        expected = (
-            "odd_option:\n\tAn odd option.\n\t[Default: 1] [Current: 1]\n"
-        )
-        assert expected == s.read()
-
-    def test_option_description_all(odd_option, even_option):
-        s = StringIO()
-        with redirect_stdout(s):
-            cudf.describe_option()
-        s.seek(0)
-        expected = (
-            "odd_option:\n\tAn odd option.\n\t[Default: 1] [Current: 1]\n"
-            "even_option:\n\tAn even option.\n\t[Default: 0] [Current: 0]\n"
-        )
-        assert expected == s.read()
-
-
-@pytest.mark.parametrize("default_integer_bitwidth", [32, 64, None])
-def test_empty_option_context(default_integer_bitwidth):
-    prev_setting = cudf.get_option("default_integer_bitwidth")
-    cudf.set_option("default_integer_bitwidth", default_integer_bitwidth)
-    with cudf.option_context():
-        assert (
-            cudf.get_option("default_integer_bitwidth")
-            == default_integer_bitwidth
-        )
-
-    assert (
-        cudf.get_option("default_integer_bitwidth") == default_integer_bitwidth
-    )
-    cudf.set_option("default_integer_bitwidth", prev_setting)
-
-
-@pytest.mark.parametrize("pandas_compatible", [True, False])
-@pytest.mark.parametrize("default_integer_bitwidth", [32, 64])
-def test_option_context(pandas_compatible, default_integer_bitwidth):
-    prev_pandas_compatible_setting = cudf.get_option("mode.pandas_compatible")
-    prev_width_setting = cudf.get_option("default_integer_bitwidth")
-
-    with cudf.option_context(
-        "mode.pandas_compatible",
-        pandas_compatible,
-        "default_integer_bitwidth",
-        default_integer_bitwidth,
-    ):
-        assert cudf.get_option("mode.pandas_compatible") is pandas_compatible
-        assert (
-            cudf.get_option("default_integer_bitwidth")
-            is default_integer_bitwidth
-        )
-
-    assert (
-        cudf.get_option("mode.pandas_compatible")
-        is prev_pandas_compatible_setting
-    )
-    assert cudf.get_option("default_integer_bitwidth") is prev_width_setting
-
-
-def test_options_context_error():
-    with pytest.raises(ValueError):
-        with cudf.option_context("mode.pandas_compatible"):
-            pass
-
-    with pytest.raises(ValueError):
-        with cudf.option_context("mode.pandas_compatible", 1, 2):
-            pass

From c8a195b39d666ff5a7bd75647cdfa9cec0009d02 Mon Sep 17 00:00:00 2001
From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com>
Date: Fri, 22 Aug 2025 19:19:32 -0400
Subject: [PATCH 203/366] remove initial memset of values in parquet reader
 (#19643)

In the parquet reader we pre-fill the output column data/offset buffers with zeroes to make sure we don't have any uninitialized reads.  This is slow and seems to be unnecessary, because the parquet reader should write over that memory anyway when reading the data.  However, the reader doesn't write to the value buffer's memory where there are nulls, and without zero-initialization, reading this memory leads to crashes.

When do we read nulled values/offsets?  You'd think it'd be never, but we do so when we:
* check_non_empty_nulls()
* Trying to debug-print a column to screen
* Trying to compare/print any differences between expected/result in tests
* element<string_view>() reads AND USES invalid offsets
* etc.

So we still remove the pre-zeroing, but instead write zeroes in data_out in the parquet reader decode functions for the entries that are null.  This makes it so we only spend time zeroing what we need to (which is probably sparse) rather than the entire column.

Also, because we write the input int32 TIME_MILLIS to an output int64 DURATION_MILLISECONDS, we have to fill in the last 4 bytes of zeroes.

The performance improvement on my machine is about 5% for most column types, though is negligible for strings (because per is dominated by the string copy, not the offset write).  Further work can be done to try to remove the null mask zeroing as well.

Authors:
  - Paul Mattione (https://github.com/pmattione-nvidia)

Approvers:
  - https://github.com/nvdbaranec
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19643
---
 .../io/parquet/parquet_reader_input.cpp       |   1 +
 .../cudf/detail/utilities/batched_memset.hpp  |   3 +
 cpp/src/io/parquet/decode_fixed.cu            |  27 ++++-
 cpp/src/io/parquet/decode_preprocess.cu       |   3 +
 cpp/src/io/parquet/page_data.cu               |  30 ++++-
 cpp/src/io/parquet/page_decode.cuh            | 109 ++++++++++++++++++
 cpp/src/io/parquet/page_delta_decode.cu       |  39 +++++++
 cpp/src/io/parquet/reader_impl.cpp            |   3 +
 cpp/src/io/parquet/reader_impl_preprocess.cu  |  12 +-
 9 files changed, 214 insertions(+), 13 deletions(-)

diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index d6ae56aa2e4..5bcae5cab23 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -488,5 +488,6 @@ NVBENCH_BENCH(BM_parquet_read_long_strings)
   .add_string_axis("io_type", {"DEVICE_BUFFER"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
+  .add_int64_axis("data_size", {512 << 20})
   .add_int64_power_of_two_axis("avg_string_length",
                                nvbench::range(4, 16, 2));  // 16, 64, ... -> 64k
diff --git a/cpp/include/cudf/detail/utilities/batched_memset.hpp b/cpp/include/cudf/detail/utilities/batched_memset.hpp
index bba50d61274..55e8821b59f 100644
--- a/cpp/include/cudf/detail/utilities/batched_memset.hpp
+++ b/cpp/include/cudf/detail/utilities/batched_memset.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
@@ -46,6 +47,8 @@ void batched_memset(cudf::host_span<cudf::device_span<T> const> host_buffers,
                     T const value,
                     rmm::cuda_stream_view stream)
 {
+  CUDF_FUNC_RANGE();
+
   // Copy buffer spans into device memory and then get sizes
   auto buffers = cudf::detail::make_device_uvector_async(
     host_buffers, stream, cudf::get_current_device_resource_ref());
diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu
index 051dd79b5db..71c44f915fa 100644
--- a/cpp/src/io/parquet/decode_fixed.cu
+++ b/cpp/src/io/parquet/decode_fixed.cu
@@ -164,7 +164,10 @@ __device__ void decode_fixed_width_values(
           // Reading INT32 TIME_MILLIS into 64-bit DURATION_MILLISECONDS
           // TIME_MILLIS is the only duration type stored as int32:
           // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-time-convertedtype
-          read_fixed_width_value_fast(s, sb, src_pos, static_cast<uint32_t*>(dst));
+          auto const dst_ptr = static_cast<uint32_t*>(dst);
+          read_fixed_width_value_fast(s, sb, src_pos, dst_ptr);
+          // zero out most significant bytes
+          cuda::std::memset(dst_ptr + 1, 0, sizeof(int32_t));
         } else if (s->ts_scale) {
           read_int64_timestamp(s, sb, src_pos, static_cast<int64_t*>(dst));
         } else {
@@ -1148,9 +1151,10 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
   // - valid_count: number of non-null values we have decoded so far. In each iteration of the
   //   loop below, we look at the number of valid items (which could be all for non-nullable),
   //   and valid_count is that running count.
-  int processed_count         = 0;
-  int valid_count             = 0;
-  size_t string_output_offset = 0;
+  int processed_count             = 0;
+  int valid_count                 = 0;
+  size_t string_output_offset     = 0;
+  int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
 
   // Skip ahead in the decoding so that we don't repeat work (skipped_leaf_values = 0 for non-lists)
   auto const skipped_leaf_values = s->page.skipped_leaf_values;
@@ -1253,6 +1257,21 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
     valid_count = next_valid_count;
   }
 
+  // Zero-fill null positions after decoding valid values
+  if (should_process_nulls) {
+    uint32_t const dtype_len = has_strings_t ? sizeof(cudf::size_type) : s->dtype_len;
+    int const num_values     = [&]() {
+      if constexpr (has_lists_t) {
+        auto const& ni = s->nesting_info[s->col.max_nesting_depth - 1];
+        return ni.valid_map_offset - init_valid_map_offset;
+      } else {
+        return s->num_rows;
+      }
+    }();
+    zero_fill_null_positions_shared<decode_block_size_t>(
+      s, dtype_len, init_valid_map_offset, num_values, t);
+  }
+
   if constexpr (has_strings_t) {
     // For large strings, update the initial string buffer offset to be used during large string
     // column construction. Otherwise, convert string sizes to final offsets.
diff --git a/cpp/src/io/parquet/decode_preprocess.cu b/cpp/src/io/parquet/decode_preprocess.cu
index 756f334ebe5..d0da5006780 100644
--- a/cpp/src/io/parquet/decode_preprocess.cu
+++ b/cpp/src/io/parquet/decode_preprocess.cu
@@ -18,6 +18,7 @@
 #include "io/utilities/column_buffer.hpp"
 #include "page_decode.cuh"
 
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/hashing/detail/default_hash.cuh>
 
 #include <rmm/exec_policy.hpp>
@@ -518,6 +519,8 @@ void compute_page_sizes(cudf::detail::hostdevice_span<PageInfo> pages,
                         int level_type_size,
                         rmm::cuda_stream_view stream)
 {
+  CUDF_FUNC_RANGE();
+
   dim3 dim_block(preprocess_block_size, 1);
   dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index cc215e272c0..9f391588c1b 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -105,6 +105,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
   __shared__ level_t rep[rolling_buf_size];  // circular buffer of repetition level values
   __shared__ level_t def[rolling_buf_size];  // circular buffer of definition level values
 
+  // Capture initial valid_map_offset before any processing that might modify it
+  int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
+
   // skipped_leaf_values will always be 0 for flat hierarchies.
   uint32_t skipped_leaf_values = s->page.skipped_leaf_values;
   while (s->error == 0 &&
@@ -216,6 +219,16 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
     }
     block.sync();
   }
+
+  // Zero-fill null positions after decoding valid values
+  int const leaf_level_index = s->col.max_nesting_depth - 1;
+  auto const& ni             = s->nesting_info[leaf_level_index];
+  if (ni.valid_map != nullptr) {
+    int const num_values = ni.valid_map_offset - init_valid_map_offset;
+    zero_fill_null_positions_shared<decode_block_size>(
+      s, s->dtype_len, init_valid_map_offset, num_values, static_cast<int>(block.thread_rank()));
+  }
+
   if (block.thread_rank() == 0 and s->error != 0) { set_error(s->error, error_code); }
 }
 
@@ -308,6 +321,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   PageNestingDecodeInfo* nesting_info_base = s->nesting_info;
 
+  // Capture initial valid_map_offset before any processing that might modify it
+  int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
+
   if (s->dict_base) {
     out_warp_id = (s->dict_bits > 0) ? 2 : 1;
   } else {
@@ -447,7 +463,10 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
             // Reading INT32 TIME_MILLIS into 64-bit DURATION_MILLISECONDS
             // TIME_MILLIS is the only duration type stored as int32:
             // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-time-convertedtype
-            read_fixed_width_value_fast(s, sb, val_src_pos, static_cast<uint32_t*>(dst));
+            auto const dst_ptr = static_cast<uint32_t*>(dst);
+            read_fixed_width_value_fast(s, sb, val_src_pos, dst_ptr);
+            // zero out most significant bytes
+            cuda::std::memset(dst_ptr + 1, 0, sizeof(int32_t));
           } else if (s->ts_scale) {
             read_int64_timestamp(s, sb, val_src_pos, static_cast<int64_t*>(dst));
           } else {
@@ -464,6 +483,15 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
     }
     __syncthreads();
   }
+
+  // Zero-fill null positions after decoding valid values
+  auto const& ni = s->nesting_info[s->col.max_nesting_depth - 1];
+  if (ni.valid_map != nullptr) {
+    int const num_values = ni.valid_map_offset - init_valid_map_offset;
+    zero_fill_null_positions_shared<decode_block_size>(
+      s, s->dtype_len, init_valid_map_offset, num_values, static_cast<int>(block.thread_rank()));
+  }
+
   if (block.thread_rank() == 0 and s->error != 0) { set_error(s->error, error_code); }
 }
 
diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh
index 6c987230709..0ab76a5fd4b 100644
--- a/cpp/src/io/parquet/page_decode.cuh
+++ b/cpp/src/io/parquet/page_decode.cuh
@@ -1506,4 +1506,113 @@ inline __device__ bool setup_local_page_info(page_state_s* const s,
   return true;
 }
 
+/**
+ * @brief Zero-fill null positions in output data using parallel per-validity-block processing
+ *
+ * This function processes the validity bitmap and zero-fills all positions in the output
+ * data that correspond to null values. It uses a parallel approach where each thread
+ * handles one 32-bit validity block at a time, looping only over the zero bits (null positions)
+ * within that block.
+ *
+ * @tparam block_size CUDA block size for the kernel
+ * @param s Page state containing all necessary information
+ * @param dtype_len Size of each data element in bytes
+ * @param valid_map_offset Starting bit offset in the validity map
+ * @param num_values Number of values to process
+ * @param t Thread index within the block
+ */
+template <int block_size>
+__device__ void zero_fill_null_positions_shared(
+  page_state_s* s, uint32_t dtype_len, int valid_map_offset, int num_values, int t)
+{
+  auto const block = cg::this_thread_block();
+  auto const warp  = cg::tiled_partition<cudf::detail::warp_size>(block);
+
+  // nesting level that is storing actual leaf values
+  int const leaf_level_index = s->col.max_nesting_depth - 1;
+  auto const& ni             = s->nesting_info[leaf_level_index];
+
+  // Check if we have nulls to fill
+  if ((ni.valid_map == nullptr) || (num_values == 0)) { return; }
+
+  auto const data_out = ni.data_out;
+
+  constexpr int bits_per_mask = cudf::detail::size_in_bits<bitmask_type>();
+  using cudf::detail::warp_size;
+  constexpr int num_warps = block_size / warp_size;
+
+  // Calculate the range of validity blocks we need to process
+  int const start_bit_idx = valid_map_offset;
+  int const end_bit_idx   = valid_map_offset + num_values;
+  int const start_block   = start_bit_idx / bits_per_mask;
+  int const end_block     = cudf::util::div_rounding_up_safe(end_bit_idx, bits_per_mask);
+  int const num_blocks    = end_block - start_block;
+
+  int const warp_id = t / warp_size;
+  int const lane_id = warp.thread_rank();
+
+  // Helper lambda for warp-parallel bit processing
+  auto process_block_parallel = [&](int block_idx) {
+    static_assert(bits_per_mask == warp_size, "if 64bit mask, use 2 warps per mask");
+
+    cudf::bitmask_type validity_word = ni.valid_map[block_idx];
+    int const block_start_bit        = block_idx * bits_per_mask;
+
+    // Each thread in the warp processes one bit
+    int const bit_idx = block_start_bit + lane_id;
+    int const dst_pos = bit_idx - valid_map_offset;
+
+    // Check if this bit is within our range
+    bool in_range = (bit_idx >= start_bit_idx && bit_idx < end_bit_idx);
+
+    // Check if this bit is null (0 in validity mask)
+    bool const is_null = not cudf::bit_is_set(&validity_word, lane_id);
+
+    if (in_range && is_null) {
+      void* const dst = data_out + (static_cast<size_t>(dst_pos) * dtype_len);
+      cuda::std::memset(dst, 0, dtype_len);
+    }
+  };
+
+  // Helper lambda for sequential bit processing (fallback for remaining blocks)
+  auto process_block_sequential = [&](int block_idx) {
+    cudf::bitmask_type validity_word  = ni.valid_map[block_idx];
+    cudf::bitmask_type null_positions = ~validity_word;
+    int const dst_pos_first_bit       = block_idx * bits_per_mask - valid_map_offset;
+
+    while (null_positions != 0) {
+      int const bit_pos = __ffs(null_positions) - 1;
+      int const dst_pos = dst_pos_first_bit + bit_pos;
+
+      void* const dst = data_out + (static_cast<size_t>(dst_pos) * dtype_len);
+      cuda::std::memset(dst, 0, dtype_len);
+
+      null_positions &= (null_positions - 1);
+    }
+  };
+
+  // Phase 1: Assign specific blocks to warps for warp-parallel processing
+  if (warp_id == 0) {
+    // Warp 0: Process first block
+    process_block_parallel(start_block);
+  } else if (warp_id == 1 && num_blocks > 1) {
+    // Warp 1: Process last block (if different from first)
+    process_block_parallel(end_block - 1);
+  } else if (warp_id >= 2) {
+    // Warps 2+: Process additional blocks from the beginning
+    int const block_idx = start_block + (warp_id - 1);
+    if (block_idx < (end_block - 1)) { process_block_parallel(block_idx); }
+  }
+
+  // Phase 2: All warps cooperatively process remaining middle blocks
+  auto const last_block_processed = static_cast<int>(num_blocks > 1);
+  int const remaining_start       = start_block + num_warps - last_block_processed;
+  int const remaining_end         = end_block - last_block_processed;
+  for (int block_idx = remaining_start + t; block_idx < remaining_end; block_idx += block_size) {
+    process_block_sequential(block_idx);
+  }
+
+  __syncthreads();
+}
+
 }  // namespace cudf::io::parquet::detail
diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu
index 49f21bb2a15..660f7f96876 100644
--- a/cpp/src/io/parquet/page_delta_decode.cu
+++ b/cpp/src/io/parquet/page_delta_decode.cu
@@ -342,6 +342,9 @@ CUDF_KERNEL void __launch_bounds__(decode_delta_binary_block_size)
   // Must be evaluated after setup_local_page_info
   bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
 
+  // Capture initial valid_map_offset before any processing that might modify it
+  int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
+
   // Write list offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
     auto& page      = pages[page_idx];
@@ -434,6 +437,14 @@ CUDF_KERNEL void __launch_bounds__(decode_delta_binary_block_size)
     block.sync();
   }
 
+  // Zero-fill null positions after decoding valid values
+  auto const& ni = s->nesting_info[s->col.max_nesting_depth - 1];
+  if (ni.valid_map != nullptr) {
+    int const num_values = ni.valid_map_offset - init_valid_map_offset;
+    zero_fill_null_positions_shared<decode_block_size>(
+      s, s->dtype_len, init_valid_map_offset, num_values, static_cast<int>(block.thread_rank()));
+  }
+
   if (block.thread_rank() == 0 and s->error != 0) { set_error(s->error, error_code); }
 }
 
@@ -488,6 +499,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
 
+  // Capture initial valid_map_offset before any processing that might modify it
+  int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
+
   // Write list/string offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
     auto page        = &pages[page_idx];
@@ -615,6 +629,17 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
     block.sync();
   }
 
+  // Zero-fill null positions after decoding valid values
+  auto const& ni = s->nesting_info[leaf_level_index];
+  if (ni.valid_map != nullptr) {
+    int const num_values = ni.valid_map_offset - init_valid_map_offset;
+    zero_fill_null_positions_shared<decode_block_size>(s,
+                                                       sizeof(size_type),
+                                                       init_valid_map_offset,
+                                                       num_values,
+                                                       static_cast<int>(block.thread_rank()));
+  }
+
   // For large strings, update the initial string buffer offset to be used during large string
   // column construction. Otherwise, convert string sizes to final offsets.
   if (s->col.is_large_string_col) {
@@ -684,6 +709,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
 
+  // Capture initial valid_map_offset before any processing that might modify it
+  int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
+
   // Write list/string offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
     auto page        = &pages[page_idx];
@@ -806,6 +834,17 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
     block.sync();
   }
 
+  // Zero-fill null positions after decoding valid values
+  auto const& ni = nesting_info_base[leaf_level_index];
+  if (ni.valid_map != nullptr) {
+    int const num_values = ni.valid_map_offset - init_valid_map_offset;
+    zero_fill_null_positions_shared<decode_block_size>(s,
+                                                       sizeof(size_type),
+                                                       init_valid_map_offset,
+                                                       num_values,
+                                                       static_cast<int>(block.thread_rank()));
+  }
+
   // For large strings, update the initial string buffer offset to be used during large string
   // column construction. Otherwise, convert string sizes to final offsets.
   if (s->col.is_large_string_col) {
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 96aa416d27b..ccc75fbd293 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -18,6 +18,7 @@
 
 #include "error.hpp"
 
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/transform.hpp>
@@ -49,6 +50,8 @@ inline bool is_treat_fixed_length_as_string(std::optional<LogicalType> const& lo
 
 void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_rows)
 {
+  CUDF_FUNC_RANGE();
+
   auto& pass    = *_pass_itm_data;
   auto& subpass = *pass.subpass;
 
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 1c5c1f2641c..d617b13165d 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -497,6 +497,8 @@ void reader_impl::generate_list_column_row_counts(is_estimate_row_counts is_esti
 
 void reader_impl::preprocess_subpass_pages(read_mode mode, size_t chunk_read_limit)
 {
+  CUDF_FUNC_RANGE();
+
   auto& pass    = *_pass_itm_data;
   auto& subpass = *pass.subpass;
 
@@ -639,6 +641,8 @@ void reader_impl::preprocess_subpass_pages(read_mode mode, size_t chunk_read_lim
 
 void reader_impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num_rows)
 {
+  CUDF_FUNC_RANGE();
+
   auto& pass    = *_pass_itm_data;
   auto& subpass = *pass.subpass;
 
@@ -665,8 +669,6 @@ void reader_impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num_
   // buffers if they are not part of a list hierarchy. mark down
   // if we have any list columns that need further processing.
   bool has_lists = false;
-  // Casting to std::byte since data buffer pointer is void *
-  std::vector<cudf::device_span<cuda::std::byte>> memset_bufs;
   // Validity Buffer is a uint32_t pointer
   std::vector<cudf::device_span<cudf::bitmask_type>> nullmask_bufs;
 
@@ -694,8 +696,6 @@ void reader_impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num_
                      std::overflow_error);
         out_buf.create_with_mask(
           out_buf_size, cudf::mask_state::UNINITIALIZED, false, _stream, _mr);
-        memset_bufs.emplace_back(static_cast<cuda::std::byte*>(out_buf.data()),
-                                 out_buf.data_size());
         nullmask_bufs.emplace_back(
           out_buf.null_mask(),
           cudf::util::round_up_safe(out_buf.null_mask_size(), sizeof(cudf::bitmask_type)) /
@@ -813,8 +813,6 @@ void reader_impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num_
           // we're going to start null mask as all valid and then turn bits off if necessary
           out_buf.create_with_mask(
             buffer_size, cudf::mask_state::UNINITIALIZED, false, _stream, _mr);
-          memset_bufs.emplace_back(static_cast<cuda::std::byte*>(out_buf.data()),
-                                   out_buf.data_size());
           nullmask_bufs.emplace_back(
             out_buf.null_mask(),
             cudf::util::round_up_safe(out_buf.null_mask_size(), sizeof(cudf::bitmask_type)) /
@@ -824,8 +822,6 @@ void reader_impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num_
     }
   }
 
-  cudf::detail::batched_memset<cuda::std::byte>(
-    memset_bufs, static_cast<cuda::std::byte>(0), _stream);
   // Need to set null mask bufs to all high bits
   cudf::detail::batched_memset<cudf::bitmask_type>(
     nullmask_bufs, std::numeric_limits<cudf::bitmask_type>::max(), _stream);

From 5be7bebf6d650d4606e545c9f90908fef050446a Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 25 Aug 2025 10:33:46 -0400
Subject: [PATCH 204/366] Add examples of null handling to doxygen for
 cudf::rank (#19774)

Updates the doxygen for `cudf::rank` to illustrate how the null-policy, null-order, and order parameter combinations behave for a given input.

Closes #19772

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Bradley Dice (https://github.com/bdice)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19774
---
 cpp/include/cudf/sorting.hpp | 47 ++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index 532cec53e8f..432d8b6f332 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -177,28 +177,55 @@ std::unique_ptr<table> stable_sort_by_key(
  * value starts from 1.
  *
  * @code{.pseudo}
- * input = { 3, 4, 5, 4, 1, 2}
- * Result for different rank_method are
+ * Using default order::ASCENDING
+ * input = {3, 4, 5, 4, 1, 2}
+ * Results for different rank_methods
  * FIRST    = {3, 4, 6, 5, 1, 2}
  * AVERAGE  = {3, 4.5, 6, 4.5, 1, 2}
  * MIN      = {3, 4, 6, 4, 1, 2}
  * MAX      = {3, 5, 6, 5, 1, 2}
  * DENSE    = {3, 4, 5, 4, 1, 2}
+ *
+ * For null_policy::INCLUDE, null_order::AFTER
+ * input = {3, 4, null, 4, 1, 2}
+ * The results are the same as above.
+ *
+ * For null_policy::INCLUDE, null_order::BEFORE
+ * input = {3, 4, null, 4, 1, 2}
+ * Results for different rank_methods
+ * FIRST    = {4, 5, 1, 6, 2, 3}
+ * AVERAGE  = {4, 5.5, 1, 5.5, 2, 3}
+ * MIN      = {4, 5, 1, 5, 2, 3}
+ * MAX      = {4, 6, 1, 6, 2, 3}
+ * DENSE    = {4, 5, 1, 5, 2, 3}
+ *
+ * For null_policy::EXCLUDE (null_order::AFTER only)
+ * input = {3, 4, null, 4, 1, 2}
+ * Results for different rank_methods
+ * FIRST    = {3, 4, null, 5, 1, 2}
+ * AVERAGE  = {3, 4.5, null, 4.5, 1, 2}
+ * MIN      = {3, 4, null, 4, 1, 2}
+ * MAX      = {3, 5, null, 5, 1, 2}
+ * DENSE    = {3, 4, null, 4, 1, 2}
  * @endcode
  *
+ * For null_policy::EXCLUDE with null_order::BEFORE, using column_order::ASCENDING
+ * will result in undefined behavior. Likewise for null_policy::EXCLUDE with
+ * null_order::AFTER and column_order::DESCENDING.
+ *
+ * The output column type will be `double` when `method=rank_method::AVERAGE` or `percentage=True`
+ * and `size_type` otherwise.
+ *
  * @param input The column to rank
  * @param method The ranking method used for tie breaking (same values)
  * @param column_order The desired sort order for ranking
- * @param null_handling  flag to include nulls during ranking. If nulls are not
- * included, corresponding rank will be null.
- * @param null_precedence The desired order of null compared to other elements
- * for column
- * @param percentage flag to convert ranks to percentage in range (0,1]
+ * @param null_handling Flag to include nulls during ranking.
+ *                      If nulls are excluded, the corresponding rank will be null.
+ * @param null_precedence The desired order of null rows compared to other elements
+ * @param percentage Flag to convert ranks to percentage in range (0,1]
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return A column of containing the rank of the each element of the column of `input`. The output
- * column type will be `size_type`column by default or else `double` when
- * `method=rank_method::AVERAGE` or `percentage=True`
+ * @return A column of containing the rank of the each element of the column of `input`
  */
 std::unique_ptr<column> rank(
   column_view const& input,

From c79567ab24d2b53d84768a7b9ccb9edf47206e60 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 25 Aug 2025 10:02:56 -0700
Subject: [PATCH 205/366] Move test_buffer/column/column_accesor/cuda_apply.py
 to new cudf classic testing directory (#19737)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19737
---
 .../methods/test_apply_chunks.py}             | 166 +++---------------
 .../dataframe/methods/test_apply_rows.py      | 135 ++++++++++++++
 .../tests/{ => input_output}/test_dlpack.py   |   0
 .../cudf/tests/private_objects/__init__.py    |   0
 .../{ => private_objects}/test_buffer.py      |   0
 .../{ => private_objects}/test_column.py      |  89 +++-------
 .../test_column_accessor.py                   |   0
 .../{ => private_objects}/test_compile_udf.py |   5 +-
 .../test_extension_compilation.py             |   0
 .../test_memory_records_report.py}            |   0
 .../{ => private_objects}/test_nrt_stats.py   |   0
 .../cudf/tests/series/methods/test_astype.py  |  23 +++
 12 files changed, 212 insertions(+), 206 deletions(-)
 rename python/cudf/cudf/tests/{test_cuda_apply.py => dataframe/methods/test_apply_chunks.py} (50%)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py
 rename python/cudf/cudf/tests/{ => input_output}/test_dlpack.py (100%)
 create mode 100644 python/cudf/cudf/tests/private_objects/__init__.py
 rename python/cudf/cudf/tests/{ => private_objects}/test_buffer.py (100%)
 rename python/cudf/cudf/tests/{ => private_objects}/test_column.py (87%)
 rename python/cudf/cudf/tests/{ => private_objects}/test_column_accessor.py (100%)
 rename python/cudf/cudf/tests/{ => private_objects}/test_compile_udf.py (96%)
 rename python/cudf/cudf/tests/{ => private_objects}/test_extension_compilation.py (100%)
 rename python/cudf/cudf/tests/{test_performance_tracking.py => private_objects/test_memory_records_report.py} (100%)
 rename python/cudf/cudf/tests/{ => private_objects}/test_nrt_stats.py (100%)

diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py
similarity index 50%
rename from python/cudf/cudf/tests/test_cuda_apply.py
rename to python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py
index f7b0af9e51a..f215a29de89 100644
--- a/python/cudf/cudf/tests/test_cuda_apply.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py
@@ -1,105 +1,11 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-"""
-Test method that apply GPU kernel to a frame.
-"""
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 import numpy as np
 import pytest
 from numba import cuda
 
-import cudf
 from cudf import DataFrame
-from cudf.core.column import column
 from cudf.testing import assert_eq
-from cudf.testing._utils import gen_rand_series
-
-
-def _kernel_multiply(a, b, out):
-    # numba doesn't support zip(..., strict=True), so we must tell ruff to ignore it.
-    for i, (x, y) in enumerate(zip(a, b)):  # noqa: B905
-        out[i] = x * y
-
-
-@pytest.mark.parametrize("dtype", [np.dtype("float32"), np.dtype("float64")])
-@pytest.mark.parametrize("has_nulls", [False, True])
-@pytest.mark.parametrize("pessimistic", [False, True])
-def test_dataframe_apply_rows(dtype, has_nulls, pessimistic):
-    count = 1000
-    gdf_series_a = gen_rand_series(dtype, count, has_nulls=has_nulls)
-    gdf_series_b = gen_rand_series(dtype, count, has_nulls=has_nulls)
-    gdf_series_c = gen_rand_series(dtype, count, has_nulls=has_nulls)
-
-    if pessimistic:
-        # pessimistically combine the null masks
-        gdf_series_expected = gdf_series_a * gdf_series_b
-    else:
-        # optimistically ignore the null masks
-        a = cudf.Series._from_column(
-            column.build_column(gdf_series_a.data, dtype)
-        )
-        b = cudf.Series._from_column(
-            column.build_column(gdf_series_b.data, dtype)
-        )
-        gdf_series_expected = a * b
-
-    df_expected = cudf.DataFrame(
-        {
-            "a": gdf_series_a,
-            "b": gdf_series_b,
-            "c": gdf_series_c,
-            "out": gdf_series_expected,
-        }
-    )
-
-    df_original = cudf.DataFrame(
-        {"a": gdf_series_a, "b": gdf_series_b, "c": gdf_series_c}
-    )
-
-    with pytest.warns(FutureWarning):
-        df_actual = df_original.apply_rows(
-            _kernel_multiply,
-            ["a", "b"],
-            {"out": dtype},
-            {},
-            pessimistic_nulls=pessimistic,
-        )
-
-    assert_eq(df_expected, df_actual)
-
-
-def test_df_apply_rows():
-    nelem = 20
-
-    def kernel(in1, in2, in3, out1, out2, extra1, extra2):
-        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):  # noqa: B905
-            out1[i] = extra2 * x - extra1 * y
-            out2[i] = y - extra1 * z
-
-    df = DataFrame()
-    df["in1"] = in1 = np.arange(nelem)
-    df["in2"] = in2 = np.arange(nelem)
-    df["in3"] = in3 = np.arange(nelem)
-
-    extra1 = 2.3
-    extra2 = 3.4
-
-    expect_out1 = extra2 * in1 - extra1 * in2
-    expect_out2 = in2 - extra1 * in3
-
-    with pytest.warns(FutureWarning):
-        outdf = df.apply_rows(
-            kernel,
-            incols=["in1", "in2", "in3"],
-            outcols=dict(out1=np.float64, out2=np.float64),
-            kwargs=dict(extra1=extra1, extra2=extra2),
-        )
-
-    got_out1 = outdf["out1"].to_numpy()
-    got_out2 = outdf["out2"].to_numpy()
-
-    np.testing.assert_array_almost_equal(got_out1, expect_out1)
-    np.testing.assert_array_almost_equal(got_out2, expect_out2)
 
 
 @pytest.mark.parametrize("chunksize", [1, 4, 23])
@@ -111,10 +17,17 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
             out1[i] = extra2 * x - extra1 * y + z
             out2[i] = i
 
-    df = DataFrame()
-    df["in1"] = in1 = np.arange(nelem)
-    df["in2"] = in2 = np.arange(nelem)
-    df["in3"] = in3 = np.arange(nelem)
+    in1 = np.arange(nelem)
+    in2 = np.arange(nelem)
+    in3 = np.arange(nelem)
+
+    df = DataFrame(
+        {
+            "in1": in1,
+            "in2": in2,
+            "in3": in3,
+        }
+    )
 
     extra1 = 2.3
     extra2 = 3.4
@@ -228,37 +141,6 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
     np.testing.assert_array_almost_equal(got_out2.to_numpy(), expect_out2)
 
 
-def test_df_apply_rows_incols_mapping():
-    nelem = 20
-
-    def kernel(x, y, z, out1, out2, extra1, extra2):
-        for i, (a, b, c) in enumerate(zip(x, y, z)):  # noqa: B905
-            out1[i] = extra2 * a - extra1 * b
-            out2[i] = b - extra1 * c
-
-    df = DataFrame()
-    df["in1"] = in1 = np.arange(nelem)
-    df["in2"] = in2 = np.arange(nelem)
-    df["in3"] = in3 = np.arange(nelem)
-
-    extra1 = 2.3
-    extra2 = 3.4
-
-    expected_out = DataFrame()
-    expected_out["out1"] = extra2 * in1 - extra1 * in2
-    expected_out["out2"] = in2 - extra1 * in3
-
-    with pytest.warns(FutureWarning):
-        outdf = df.apply_rows(
-            kernel,
-            incols={"in1": "x", "in2": "y", "in3": "z"},
-            outcols=dict(out1=np.float64, out2=np.float64),
-            kwargs=dict(extra1=extra1, extra2=extra2),
-        )
-
-    assert_eq(outdf[["out1", "out2"]], expected_out)
-
-
 @pytest.mark.parametrize("chunksize", [1, 4, 23])
 def test_df_apply_chunks_incols_mapping(chunksize):
     nelem = 20
@@ -268,17 +150,27 @@ def kernel(q, p, r, out1, out2, extra1, extra2):
             out1[i] = extra2 * a - extra1 * b + c
             out2[i] = i
 
-    df = DataFrame()
-    df["in1"] = in1 = np.arange(nelem)
-    df["in2"] = in2 = np.arange(nelem)
-    df["in3"] = in3 = np.arange(nelem)
+    in1 = np.arange(nelem)
+    in2 = np.arange(nelem)
+    in3 = np.arange(nelem)
+
+    df = DataFrame(
+        {
+            "in1": in1,
+            "in2": in2,
+            "in3": in3,
+        }
+    )
 
     extra1 = 2.3
     extra2 = 3.4
 
-    expected_out = DataFrame()
-    expected_out["out1"] = extra2 * in1 - extra1 * in2 + in3
-    expected_out["out2"] = np.arange(len(df)) % chunksize
+    expected_out = DataFrame(
+        {
+            "out1": extra2 * in1 - extra1 * in2 + in3,
+            "out2": np.arange(len(df)) % chunksize,
+        }
+    )
 
     outdf = df.apply_chunks(
         kernel,
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py b/python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py
new file mode 100644
index 00000000000..dbdec9e5e46
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf import DataFrame
+from cudf.core.column import column
+from cudf.testing import assert_eq
+from cudf.testing._utils import gen_rand_series
+
+
+def _kernel_multiply(a, b, out):
+    # numba doesn't support zip(..., strict=True), so we must tell ruff to ignore it.
+    for i, (x, y) in enumerate(zip(a, b)):  # noqa: B905
+        out[i] = x * y
+
+
+@pytest.mark.parametrize("dtype", [np.dtype("float32"), np.dtype("float64")])
+@pytest.mark.parametrize("has_nulls", [False, True])
+@pytest.mark.parametrize("pessimistic", [False, True])
+def test_dataframe_apply_rows(dtype, has_nulls, pessimistic):
+    count = 1000
+    gdf_series_a = gen_rand_series(dtype, count, has_nulls=has_nulls)
+    gdf_series_b = gen_rand_series(dtype, count, has_nulls=has_nulls)
+    gdf_series_c = gen_rand_series(dtype, count, has_nulls=has_nulls)
+
+    if pessimistic:
+        # pessimistically combine the null masks
+        gdf_series_expected = gdf_series_a * gdf_series_b
+    else:
+        # optimistically ignore the null masks
+        a = cudf.Series._from_column(
+            column.build_column(gdf_series_a.data, dtype)
+        )
+        b = cudf.Series._from_column(
+            column.build_column(gdf_series_b.data, dtype)
+        )
+        gdf_series_expected = a * b
+
+    df_expected = cudf.DataFrame(
+        {
+            "a": gdf_series_a,
+            "b": gdf_series_b,
+            "c": gdf_series_c,
+            "out": gdf_series_expected,
+        }
+    )
+
+    df_original = cudf.DataFrame(
+        {"a": gdf_series_a, "b": gdf_series_b, "c": gdf_series_c}
+    )
+
+    with pytest.warns(FutureWarning):
+        df_actual = df_original.apply_rows(
+            _kernel_multiply,
+            ["a", "b"],
+            {"out": dtype},
+            {},
+            pessimistic_nulls=pessimistic,
+        )
+
+    assert_eq(df_expected, df_actual)
+
+
+def test_df_apply_rows():
+    nelem = 20
+
+    def kernel(in1, in2, in3, out1, out2, extra1, extra2):
+        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):  # noqa: B905
+            out1[i] = extra2 * x - extra1 * y
+            out2[i] = y - extra1 * z
+
+    in1 = np.arange(nelem)
+    in2 = np.arange(nelem)
+    in3 = np.arange(nelem)
+
+    df = DataFrame(
+        {
+            "in1": in1,
+            "in2": in2,
+            "in3": in3,
+        }
+    )
+
+    extra1 = 2.3
+    extra2 = 3.4
+
+    expect_out1 = extra2 * in1 - extra1 * in2
+    expect_out2 = in2 - extra1 * in3
+
+    with pytest.warns(FutureWarning):
+        outdf = df.apply_rows(
+            kernel,
+            incols=["in1", "in2", "in3"],
+            outcols=dict(out1=np.float64, out2=np.float64),
+            kwargs=dict(extra1=extra1, extra2=extra2),
+        )
+
+    got_out1 = outdf["out1"].to_numpy()
+    got_out2 = outdf["out2"].to_numpy()
+
+    np.testing.assert_array_almost_equal(got_out1, expect_out1)
+    np.testing.assert_array_almost_equal(got_out2, expect_out2)
+
+
+def test_df_apply_rows_incols_mapping():
+    nelem = 20
+
+    def kernel(x, y, z, out1, out2, extra1, extra2):
+        for i, (a, b, c) in enumerate(zip(x, y, z)):  # noqa: B905
+            out1[i] = extra2 * a - extra1 * b
+            out2[i] = b - extra1 * c
+
+    df = DataFrame()
+    df["in1"] = in1 = np.arange(nelem)
+    df["in2"] = in2 = np.arange(nelem)
+    df["in3"] = in3 = np.arange(nelem)
+
+    extra1 = 2.3
+    extra2 = 3.4
+
+    expected_out = DataFrame()
+    expected_out["out1"] = extra2 * in1 - extra1 * in2
+    expected_out["out2"] = in2 - extra1 * in3
+
+    with pytest.warns(FutureWarning):
+        outdf = df.apply_rows(
+            kernel,
+            incols={"in1": "x", "in2": "y", "in3": "z"},
+            outcols=dict(out1=np.float64, out2=np.float64),
+            kwargs=dict(extra1=extra1, extra2=extra2),
+        )
+
+    assert_eq(outdf[["out1", "out2"]], expected_out)
diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/input_output/test_dlpack.py
similarity index 100%
rename from python/cudf/cudf/tests/test_dlpack.py
rename to python/cudf/cudf/tests/input_output/test_dlpack.py
diff --git a/python/cudf/cudf/tests/private_objects/__init__.py b/python/cudf/cudf/tests/private_objects/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/private_objects/test_buffer.py
similarity index 100%
rename from python/cudf/cudf/tests/test_buffer.py
rename to python/cudf/cudf/tests/private_objects/test_buffer.py
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/private_objects/test_column.py
similarity index 87%
rename from python/cudf/cudf/tests/test_column.py
rename to python/cudf/cudf/tests/private_objects/test_column.py
index 15988673bcd..6ed3111b587 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/private_objects/test_column.py
@@ -5,31 +5,17 @@
 import pandas as pd
 import pyarrow as pa
 import pytest
+from numba import cuda
 
 import cudf
 from cudf.core.column.column import _can_values_be_equal, as_column
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal
-from cudf.utils import dtypes as dtypeutils
-
-dtypes = sorted(
-    list(
-        dtypeutils.ALL_TYPES
-        - {
-            "datetime64[s]",
-            "datetime64[ms]",
-            "datetime64[us]",
-            "timedelta64[s]",
-            "timedelta64[ms]",
-            "timedelta64[us]",
-        }
-    )
-)
 
 
-@pytest.fixture(params=dtypes, ids=dtypes)
-def pandas_input(request):
-    dtype = request.param
+@pytest.fixture
+def pandas_input(all_supported_types_as_str):
+    dtype = all_supported_types_as_str
     rng = np.random.default_rng(seed=0)
     size = 100
 
@@ -72,7 +58,7 @@ def str_host_view(list_of_str, to_dtype):
 @pytest.mark.parametrize("offset", [0, 1, 15])
 @pytest.mark.parametrize("size", [50, 10, 0])
 def test_column_offset_and_size(pandas_input, offset, size):
-    col = cudf.core.column.as_column(pandas_input)
+    col = as_column(pandas_input)
     col = cudf.core.column.build_column(
         data=col.base_data,
         dtype=col.dtype,
@@ -141,7 +127,7 @@ def column_slicing_test(col, offset, size, cast_to_float=False):
 @pytest.mark.parametrize("offset", [0, 1, 15])
 @pytest.mark.parametrize("size", [50, 10, 0])
 def test_column_slicing(pandas_input, offset, size):
-    col = cudf.core.column.as_column(pandas_input)
+    col = as_column(pandas_input)
     column_slicing_test(col, offset, size)
 
 
@@ -154,9 +140,7 @@ def test_column_slicing(pandas_input, offset, size):
     [cudf.Decimal128Dtype, cudf.Decimal64Dtype, cudf.Decimal32Dtype],
 )
 def test_decimal_column_slicing(offset, size, precision, scale, decimal_type):
-    col = cudf.core.column.as_column(
-        pd.Series(np.random.default_rng(seed=0).random(1000))
-    )
+    col = as_column(pd.Series(np.random.default_rng(seed=0).random(1000)))
     col = col.astype(decimal_type(precision, scale))
     column_slicing_test(col, offset, size, True)
 
@@ -173,7 +157,7 @@ def test_column_series_multi_dim(data):
         cudf.Series(data)
 
     with pytest.raises(ValueError):
-        cudf.core.column.as_column(data)
+        as_column(data)
 
 
 @pytest.mark.parametrize(
@@ -195,14 +179,13 @@ def test_column_mixed_dtype(data, error):
             cudf.Series(data)
 
 
-@pytest.mark.parametrize("nan_as_null", [True, False])
 @pytest.mark.parametrize(
     "scalar",
     [np.nan, pd.Timedelta(days=1), pd.Timestamp(2020, 1, 1)],
     ids=repr,
 )
-@pytest.mark.parametrize("size", [1, 10])
-def test_as_column_scalar_with_nan(nan_as_null, scalar, size):
+def test_as_column_scalar_with_nan(nan_as_null, scalar):
+    size = 5
     expected = (
         cudf.Series([scalar] * size, nan_as_null=nan_as_null)
         .dropna()
@@ -221,23 +204,20 @@ def test_as_column_scalar_with_nan(nan_as_null, scalar, size):
 
 
 @pytest.mark.parametrize("data", [[1.1, 2.2, 3.3, 4.4], [1, 2, 3, 4]])
-@pytest.mark.parametrize("dtype", ["float32", "float64"])
-def test_column_series_cuda_array_dtype(data, dtype):
-    psr = pd.Series(np.asarray(data), dtype=dtype)
-    sr = cudf.Series(cp.asarray(data), dtype=dtype)
+def test_column_series_cuda_array_dtype(data, float_types_as_str):
+    psr = pd.Series(np.asarray(data, dtype=float_types_as_str))
+    sr = cudf.Series(cp.asarray(data, dtype=float_types_as_str))
 
     assert_eq(psr, sr)
 
-    psr = pd.Series(data, dtype=dtype)
-    sr = cudf.Series(data, dtype=dtype)
+    psr = pd.Series(data, dtype=float_types_as_str)
+    sr = cudf.Series(data, dtype=float_types_as_str)
 
     assert_eq(psr, sr)
 
 
 def test_column_zero_length_slice():
     # see https://github.com/rapidsai/cudf/pull/4777
-    from numba import cuda
-
     x = cudf.DataFrame({"a": [1]})
     the_column = x[1:]["a"]._column
 
@@ -251,20 +231,16 @@ def test_column_chunked_array_creation():
     pyarrow_array = pa.array([1, 2, 3] * 1000)
     chunked_array = pa.chunked_array(pyarrow_array)
 
-    actual_column = cudf.core.column.as_column(
-        chunked_array, dtype=np.dtype(np.float64)
-    )
-    expected_column = cudf.core.column.as_column(
-        pyarrow_array, dtype=np.dtype(np.float64)
-    )
+    actual_column = as_column(chunked_array, dtype=np.dtype(np.float64))
+    expected_column = as_column(pyarrow_array, dtype=np.dtype(np.float64))
 
     assert_eq(
         cudf.Series._from_column(actual_column),
         cudf.Series._from_column(expected_column),
     )
 
-    actual_column = cudf.core.column.as_column(chunked_array)
-    expected_column = cudf.core.column.as_column(pyarrow_array)
+    actual_column = as_column(chunked_array)
+    expected_column = as_column(pyarrow_array)
 
     assert_eq(
         cudf.Series._from_column(actual_column),
@@ -418,8 +394,8 @@ def test_column_view_string_slice(slc):
     ],
 )
 def test_as_column_buffer(box, data):
-    expected = cudf.core.column.as_column(data)
-    actual_column = cudf.core.column.as_column(
+    expected = as_column(data)
+    actual_column = as_column(
         cudf.core.buffer.as_buffer(box(data)), dtype=data.dtype
     )
     assert_eq(
@@ -530,29 +506,6 @@ def test_build_series_from_nullable_pandas_dtype(pd_dtype, expect_dtype):
     np.testing.assert_array_equal(expect_mask, got_mask)
 
 
-@pytest.mark.parametrize(
-    "alias,expect_dtype",
-    [
-        ("UInt8", "uint8"),
-        ("UInt16", "uint16"),
-        ("UInt32", "uint32"),
-        ("UInt64", "uint64"),
-        ("Int8", "int8"),
-        ("Int16", "int16"),
-        ("Int32", "int32"),
-        ("Int64", "int64"),
-        ("boolean", "bool"),
-        ("Float32", "float32"),
-        ("Float64", "float64"),
-    ],
-)
-def test_astype_with_aliases(alias, expect_dtype):
-    pd_data = pd.Series([1, 2, 0])
-    gd_data = cudf.Series.from_pandas(pd_data)
-
-    assert_eq(pd_data.astype(expect_dtype), gd_data.astype(alias))
-
-
 @pytest.mark.parametrize(
     "left, right, expected",
     [
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/private_objects/test_column_accessor.py
similarity index 100%
rename from python/cudf/cudf/tests/test_column_accessor.py
rename to python/cudf/cudf/tests/private_objects/test_column_accessor.py
diff --git a/python/cudf/cudf/tests/test_compile_udf.py b/python/cudf/cudf/tests/private_objects/test_compile_udf.py
similarity index 96%
rename from python/cudf/cudf/tests/test_compile_udf.py
rename to python/cudf/cudf/tests/private_objects/test_compile_udf.py
index b12200c92dd..8d87c323786 100644
--- a/python/cudf/cudf/tests/test_compile_udf.py
+++ b/python/cudf/cudf/tests/private_objects/test_compile_udf.py
@@ -1,12 +1,15 @@
 # Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
+import pytest
 from numba import types
 
 from cudf.core.udf.utils import _udf_code_cache, compile_udf
 
 
-def setup_function():
+@pytest.fixture(autouse=True)
+def clear_udf_cache():
     _udf_code_cache.clear()
+    return
 
 
 def assert_cache_size(size):
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/private_objects/test_extension_compilation.py
similarity index 100%
rename from python/cudf/cudf/tests/test_extension_compilation.py
rename to python/cudf/cudf/tests/private_objects/test_extension_compilation.py
diff --git a/python/cudf/cudf/tests/test_performance_tracking.py b/python/cudf/cudf/tests/private_objects/test_memory_records_report.py
similarity index 100%
rename from python/cudf/cudf/tests/test_performance_tracking.py
rename to python/cudf/cudf/tests/private_objects/test_memory_records_report.py
diff --git a/python/cudf/cudf/tests/test_nrt_stats.py b/python/cudf/cudf/tests/private_objects/test_nrt_stats.py
similarity index 100%
rename from python/cudf/cudf/tests/test_nrt_stats.py
rename to python/cudf/cudf/tests/private_objects/test_nrt_stats.py
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 609ad8efedd..2a535478af4 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -198,6 +198,29 @@ def test_numeric_to_timedelta(
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "alias,expect_dtype",
+    [
+        ("UInt8", "uint8"),
+        ("UInt16", "uint16"),
+        ("UInt32", "uint32"),
+        ("UInt64", "uint64"),
+        ("Int8", "int8"),
+        ("Int16", "int16"),
+        ("Int32", "int32"),
+        ("Int64", "int64"),
+        ("boolean", "bool"),
+        ("Float32", "float32"),
+        ("Float64", "float64"),
+    ],
+)
+def test_astype_with_aliases(alias, expect_dtype):
+    pd_data = pd.Series([1, 2, 0])
+    gd_data = cudf.Series.from_pandas(pd_data)
+
+    assert_eq(pd_data.astype(expect_dtype), gd_data.astype(alias))
+
+
 def test_timedelta_datetime_cast_invalid():
     sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
     psr = sr.to_pandas()

From 9c63bcd05902a3f5058712652b626e32e48389a8 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 25 Aug 2025 15:52:19 -0400
Subject: [PATCH 206/366] When bundling `libnvcomp.so.X` only append the major
 version value (#19786)

We previously extracted the whole X.Y.Z.P version string of nvcomp and would therefore fail to load the correct version in our wheel packages.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/19786
---
 cpp/cmake/thirdparty/get_jitify.cmake | 4 ++--
 python/libcudf/CMakeLists.txt         | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/cpp/cmake/thirdparty/get_jitify.cmake b/cpp/cmake/thirdparty/get_jitify.cmake
index b6f11e30d28..5db4e3e907f 100644
--- a/cpp/cmake/thirdparty/get_jitify.cmake
+++ b/cpp/cmake/thirdparty/get_jitify.cmake
@@ -19,8 +19,8 @@ function(find_and_configure_jitify)
   rapids_cpm_find(
     jitify 2.0.0
     GIT_REPOSITORY https://github.com/NVIDIA/jitify.git
-    GIT_TAG 70783a3ad7b0cad2992a26a1ebf8fbe3d6b44e25 # jitify2 branch as of 5th Aug 2025
-    GIT_SHALLOW TRUE
+    GIT_TAG 44e978b21fc8bdb6b2d7d8d179523c8350db72e5 # jitify2 branch as of 23rd Aug 2025
+    GIT_SHALLOW FALSE
     DOWNLOAD_ONLY TRUE
   )
   set(JITIFY_INCLUDE_DIR
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
index 6e4c525edbd..6722db592bb 100644
--- a/python/libcudf/CMakeLists.txt
+++ b/python/libcudf/CMakeLists.txt
@@ -54,11 +54,13 @@ if(TARGET nvcomp::nvcomp)
     # Compute the SOVERSION from the library path
     get_filename_component(nvcomp_lib_dir ${nvcomp_lib_path} DIRECTORY)
     get_filename_component(nvcomp_lib_name ${nvcomp_lib_path} NAME)
-    string(REGEX REPLACE "libnvcomp\\.so\\.([0-9]+)" "\\1" nvcomp_soversion ${nvcomp_lib_name})
+    string(REPLACE [=[libnvcomp.so.]=] "" nvcomp_soversion ${nvcomp_lib_name})
+    string(REPLACE [=[.]=] ";" nvcomp_soversion ${nvcomp_soversion})
+    list(GET nvcomp_soversion 0 nvcomp_soversion_major)
     install(
       FILES ${nvcomp_lib_path}
       DESTINATION ${SKBUILD_PLATLIB_DIR}/libcudf/lib64/
-      RENAME libnvcomp.so.${nvcomp_soversion}
+      RENAME libnvcomp.so.${nvcomp_soversion_major}
     )
   else()
     message(FATAL_ERROR "nvcomp target must be imported")

From 6e2aa185766353bb8517024e51eaf3f28e5a90a2 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 25 Aug 2025 15:02:03 -0700
Subject: [PATCH 207/366] Move test_numerical/{numpy|pandas}_interop/setitem.py
 to new cudf classic testing directory (#19725)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19725
---
 .../cudf/tests/dataframe/indexing/test_loc.py |  76 +++
 .../tests/dataframe/indexing/test_setitem.py  | 110 +++-
 .../tests/dataframe/methods/test_to_pandas.py |  28 +-
 .../dataframe/methods/test_to_records.py      |  39 ++
 .../cudf/tests/dataframe/test_constructors.py |  90 ++++
 .../test_to_numeric.py}                       | 174 +------
 .../indexes/rangeindex/test_constructors.py   |  22 +
 .../cudf/tests/private_objects/test_column.py | 116 +++++
 .../tests/private_objects/test_compile_udf.py |   1 -
 .../cudf/tests/series/indexing/test_iloc.py   |  11 +
 .../cudf/tests/series/indexing/test_loc.py    |  36 ++
 .../tests/series/indexing/test_setitem.py     | 237 ++++++++-
 .../tests/series/methods/test_to_pandas.py    |  22 +
 .../cudf/cudf/tests/series/test_attributes.py |  21 +
 .../cudf/tests/series/test_constructors.py    |  23 +-
 python/cudf/cudf/tests/test_numpy_interop.py  |  96 ----
 python/cudf/cudf/tests/test_pandas_interop.py |  90 ----
 python/cudf/cudf/tests/test_setitem.py        | 477 ------------------
 18 files changed, 834 insertions(+), 835 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/indexing/test_loc.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_records.py
 rename python/cudf/cudf/tests/{test_numerical.py => general_functions/test_to_numeric.py} (56%)
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/series/indexing/test_loc.py
 delete mode 100644 python/cudf/cudf/tests/test_numpy_interop.py
 delete mode 100644 python/cudf/cudf/tests/test_pandas_interop.py
 delete mode 100644 python/cudf/cudf/tests/test_setitem.py

diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
new file mode 100644
index 00000000000..82277af12c5
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        "7",
+        pytest.param(
+            ["7", "8"],
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/11298"
+            ),
+        ),
+    ],
+)
+def test_loc_setitem_string_11298(value):
+    df = pd.DataFrame({"a": ["a", "b", "c"]})
+    cdf = cudf.from_pandas(df)
+
+    df.loc[:1, "a"] = value
+
+    cdf.loc[:1, "a"] = value
+
+    assert_eq(df, cdf)
+
+
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/11944")
+def test_loc_setitem_list_11944():
+    df = pd.DataFrame(
+        data={"a": ["yes", "no"], "b": [["l1", "l2"], ["c", "d"]]}
+    )
+    cdf = cudf.from_pandas(df)
+    df.loc[df.a == "yes", "b"] = [["hello"]]
+    cdf.loc[cdf.a == "yes", "b"] = [["hello"]]
+    assert_eq(df, cdf)
+
+
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12504")
+def test_loc_setitem_extend_empty_12504():
+    df = pd.DataFrame(columns=["a"])
+    cdf = cudf.from_pandas(df)
+
+    df.loc[0] = [1]
+
+    cdf.loc[0] = [1]
+
+    assert_eq(df, cdf)
+
+
+def test_loc_setitem_extend_existing_12505():
+    df = pd.DataFrame({"a": [0]})
+    cdf = cudf.from_pandas(df)
+
+    df.loc[1] = 1
+
+    cdf.loc[1] = 1
+
+    assert_eq(df, cdf)
+
+
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12801")
+def test_loc_setitem_add_column_partial_12801():
+    df = pd.DataFrame({"a": [0, 1, 2]})
+    cdf = cudf.from_pandas(df)
+
+    df.loc[df.a < 2, "b"] = 1
+
+    cdf.loc[cdf.a < 2, "b"] = 1
+
+    assert_eq(df, cdf)
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
index 7d2dab7caa6..0df3cdddb1f 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -1,8 +1,113 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
+import pytest
 
 import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("arg", [[True, False, True], [True, True, True]])
+@pytest.mark.parametrize("value", [0, -1])
+def test_dataframe_setitem_bool_mask_scalar(arg, value):
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    gdf = cudf.from_pandas(df)
+
+    df[arg] = value
+    gdf[arg] = value
+    assert_eq(df, gdf)
+
+
+def test_dataframe_setitem_scalar_bool():
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    df[[True, False, True]] = pd.DataFrame({"a": [-1, -2]})
+
+    gdf = cudf.DataFrame({"a": [1, 2, 3]})
+    gdf[[True, False, True]] = cudf.DataFrame({"a": [-1, -2]})
+    assert_eq(df, gdf)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [pd.DataFrame({"a": [1, 2, 3]}), pd.DataFrame({"a": ["x", "y", "z"]})],
+)
+@pytest.mark.parametrize("arg", [["a"], "a", "b"])
+@pytest.mark.parametrize(
+    "value", [-10, pd.DataFrame({"a": [-1, -2, -3]}), "abc"]
+)
+def test_dataframe_setitem_columns(df, arg, value):
+    gdf = cudf.from_pandas(df)
+    cudf_replace_value = value
+
+    if isinstance(cudf_replace_value, pd.DataFrame):
+        cudf_replace_value = cudf.from_pandas(value)
+
+    df[arg] = value
+    gdf[arg] = cudf_replace_value
+    assert_eq(df, gdf, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        pd.DataFrame({"0": [-1, -2, -3], "1": [-0, -10, -1]}),
+        10,
+        "rapids",
+        0.32234,
+        np.datetime64(1324232423423342, "ns"),
+        np.timedelta64(34234324234324234, "ns"),
+    ],
+)
+def test_dataframe_setitem_new_columns(value):
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    arg = ["b", "c"]
+    gdf = cudf.from_pandas(df)
+    cudf_replace_value = value
+
+    if isinstance(cudf_replace_value, pd.DataFrame):
+        cudf_replace_value = cudf.from_pandas(value)
+
+    df[arg] = value
+    gdf[arg] = cudf_replace_value
+    assert_eq(df, gdf, check_dtype=True)
+
+
+def test_series_setitem_index():
+    df = pd.DataFrame(
+        data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3]
+    )
+
+    df["b"] = pd.Series(data=[12, 11, 10], index=[3, 2, 1])
+    gdf = cudf.DataFrame(
+        data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3]
+    )
+    gdf["b"] = cudf.Series(data=[12, 11, 10], index=[3, 2, 1])
+    assert_eq(df, gdf, check_dtype=False)
+
+
+@pytest.mark.xfail(reason="Copy-on-Write should make a copy")
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.MultiIndex.from_frame(
+            pd.DataFrame({"b": [3, 2, 1], "c": ["a", "b", "c"]})
+        ),
+        ["a", "b", "c"],
+    ],
+)
+def test_setitem_dataframe_series_inplace(index):
+    gdf = cudf.DataFrame({"a": [1, 2, 3]}, index=index)
+    expected = gdf.copy()
+    with cudf.option_context("copy_on_write", True):
+        gdf["a"].replace(1, 500, inplace=True)
+
+    assert_eq(expected, gdf)
+
+
+def test_setitem_datetime():
+    df = cudf.DataFrame({"date": pd.date_range("20010101", "20010105").values})
+    assert df.date.dtype.kind == "M"
 
 
 def test_listcol_setitem_retain_dtype():
@@ -18,8 +123,3 @@ def test_listcol_setitem_retain_dtype():
     # prior to this fix: https://github.com/rapidsai/cudf/pull/10151/
     df2 = df1.copy()
     assert df2["a"].dtype == df["a"].dtype
-
-
-def test_setitem_datetime():
-    df = cudf.DataFrame({"date": pd.date_range("20010101", "20010105").values})
-    assert df.date.dtype.kind == "M"
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py b/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
index df549bbac5a..739d97c28a5 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
@@ -1,11 +1,37 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-
+import numpy as np
 import pytest
 
 import cudf
 
 
+def test_to_pandas():
+    df = cudf.DataFrame(
+        {
+            "a": np.arange(5, dtype=np.int32),
+            "b": np.arange(10, 15, dtype=np.float64),
+            "c": np.array([True, False, None, True, True]),
+        }
+    )
+
+    pdf = df.to_pandas()
+
+    assert tuple(df.columns) == tuple(pdf.columns)
+
+    assert df["a"].dtype == pdf["a"].dtype
+    assert df["b"].dtype == pdf["b"].dtype
+
+    # Notice, the dtype differ when Pandas and cudf boolean series
+    # contains None/NaN
+    assert df["c"].dtype == np.bool_
+    assert pdf["c"].dtype == np.object_
+
+    assert len(df["a"]) == len(pdf["a"])
+    assert len(df["b"]) == len(pdf["b"])
+    assert len(df["c"]) == len(pdf["c"])
+
+
 def test_list_to_pandas_nullable_true():
     df = cudf.DataFrame({"a": cudf.Series([[1, 2, 3]])})
     with pytest.raises(NotImplementedError):
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_records.py b/python/cudf/cudf/tests/dataframe/methods/test_to_records.py
new file mode 100644
index 00000000000..eea58d2946d
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_records.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
+
+import numpy as np
+
+import cudf
+
+
+def test_to_records_noindex():
+    aa = np.arange(10, dtype=np.int32)
+    bb = np.arange(10, 20, dtype=np.float64)
+    df = cudf.DataFrame(
+        {
+            "a": aa,
+            "b": bb,
+        }
+    )
+
+    rec = df.to_records(index=False)
+    assert rec.dtype.names == ("a", "b")
+    np.testing.assert_array_equal(rec["a"], aa)
+    np.testing.assert_array_equal(rec["b"], bb)
+
+
+def test_to_records_withindex():
+    aa = np.arange(10, dtype=np.int32)
+    bb = np.arange(10, 20, dtype=np.float64)
+    df = cudf.DataFrame(
+        {
+            "a": aa,
+            "b": bb,
+        }
+    )
+
+    rec_indexed = df.to_records(index=True)
+    assert rec_indexed.size == len(aa)
+    assert rec_indexed.dtype.names == ("index", "a", "b")
+    np.testing.assert_array_equal(rec_indexed["a"], aa)
+    np.testing.assert_array_equal(rec_indexed["b"], bb)
+    np.testing.assert_array_equal(rec_indexed["index"], np.arange(10))
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index 15926f4faf0..28c515757f0 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -1354,3 +1354,93 @@ def test_create_interval_df(data1, data2, data3, data4, interval_closed):
         dtype="interval",
     )
     assert_eq(expect_three, got_three)
+
+
+def test_from_pandas():
+    pdf = pd.DataFrame(
+        {
+            "a": np.arange(10, dtype=np.int32),
+            "b": np.arange(10, 20, dtype=np.float64),
+        }
+    )
+
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    assert tuple(df.columns) == tuple(pdf.columns)
+
+    assert df["a"].dtype == pdf["a"].dtype
+    assert df["b"].dtype == pdf["b"].dtype
+
+    assert len(df["a"]) == len(pdf["a"])
+    assert len(df["b"]) == len(pdf["b"])
+
+
+def test_from_pandas_ex1():
+    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    assert tuple(df.columns) == tuple(pdf.columns)
+    assert np.all(df["a"].to_numpy() == pdf["a"])
+    matches = df["b"].to_numpy(na_value=np.nan) == pdf["b"]
+    # the 3d element is False due to (nan == nan) == False
+    assert np.all(matches == [True, True, False, True])
+    assert np.isnan(df["b"].to_numpy(na_value=np.nan)[2])
+    assert np.isnan(pdf["b"][2])
+
+
+def test_from_pandas_with_index():
+    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
+    pdf = pdf.set_index(np.asarray([4, 3, 2, 1]))
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    # Check columns
+    assert_eq(df.a, pdf.a)
+    assert_eq(df.b, pdf.b)
+    # Check index
+    assert_eq(df.index.values, pdf.index.values)
+    # Check again using pandas testing tool on frames
+    assert_eq(df, pdf)
+
+
+@pytest.mark.parametrize("columns", [None, ("a", "b"), ("a",), ("b",)])
+def test_from_records_noindex(columns):
+    recdtype = np.dtype([("a", np.int32), ("b", np.float64)])
+    rec = np.recarray(10, dtype=recdtype)
+    rec.a = aa = np.arange(10, dtype=np.int32)
+    rec.b = bb = np.arange(10, 20, dtype=np.float64)
+    df = cudf.DataFrame.from_records(rec, columns=columns)
+
+    if columns and "a" in columns:
+        assert_eq(aa, df["a"].values)
+    if columns and "b" in columns:
+        assert_eq(bb, df["b"].values)
+    assert_eq(np.arange(10), df.index.values)
+
+
+@pytest.mark.parametrize("columns", [None, ("a", "b"), ("a",), ("b",)])
+def test_from_records_withindex(columns):
+    recdtype = np.dtype(
+        [("index", np.int64), ("a", np.int32), ("b", np.float64)]
+    )
+    rec = np.recarray(10, dtype=recdtype)
+    rec.index = ii = np.arange(30, 40)
+    rec.a = aa = np.arange(10, dtype=np.int32)
+    rec.b = bb = np.arange(10, 20, dtype=np.float64)
+    df = cudf.DataFrame.from_records(rec, index="index")
+
+    if columns and "a" in columns:
+        assert_eq(aa, df["a"].values)
+    if columns and "b" in columns:
+        assert_eq(bb, df["b"].values)
+    assert_eq(ii, df.index.values)
+
+
+def test_numpy_non_contiguous():
+    recdtype = np.dtype([("index", np.int64), ("a", np.int32)])
+    rec = np.recarray(10, dtype=recdtype)
+    rec.index = np.arange(30, 40)
+    rec.a = aa = np.arange(20, dtype=np.int32)[::2]
+    assert rec.a.flags["C_CONTIGUOUS"] is False
+
+    gdf = cudf.DataFrame.from_records(rec, index="index")
+    assert_eq(aa, gdf["a"].values)
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/general_functions/test_to_numeric.py
similarity index 56%
rename from python/cudf/cudf/tests/test_numerical.py
rename to python/cudf/cudf/tests/general_functions/test_to_numeric.py
index b1a2f081cd2..bb4ca6b24bf 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/general_functions/test_to_numeric.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2025, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
@@ -7,122 +7,12 @@
 import cudf
 from cudf.core._compat import PANDAS_GE_220
 from cudf.testing import assert_eq
-from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if
-from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
+from cudf.testing._utils import expect_warning_if
 
 
-def test_can_cast_safely_same_kind():
-    # 'i' -> 'i'
-    data = cudf.Series([1, 2, 3], dtype="int32")._column
-    to_dtype = np.dtype("int64")
-
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1, 2, 3], dtype="int64")._column
-    to_dtype = np.dtype("int32")
-
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1, 2, 2**31], dtype="int64")._column
-    assert not data.can_cast_safely(to_dtype)
-
-    # 'u' -> 'u'
-    data = cudf.Series([1, 2, 3], dtype="uint32")._column
-    to_dtype = np.dtype("uint64")
-
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1, 2, 3], dtype="uint64")._column
-    to_dtype = np.dtype("uint32")
-
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1, 2, 2**33], dtype="uint64")._column
-    assert not data.can_cast_safely(to_dtype)
-
-    # 'f' -> 'f'
-    data = cudf.Series([np.inf, 1.0], dtype="float64")._column
-    to_dtype = np.dtype("float32")
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series(
-        [float(np.finfo("float32").max) * 2, 1.0], dtype="float64"
-    )._column
-    to_dtype = np.dtype("float32")
-    assert not data.can_cast_safely(to_dtype)
-
-
-def test_can_cast_safely_mixed_kind():
-    data = cudf.Series([1, 2, 3], dtype="int32")._column
-    to_dtype = np.dtype("float32")
-    assert data.can_cast_safely(to_dtype)
-
-    # too big to fit into f32 exactly
-    data = cudf.Series([1, 2, 2**24 + 1], dtype="int32")._column
-    assert not data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1, 2, 3], dtype="uint32")._column
-    to_dtype = np.dtype("float32")
-    assert data.can_cast_safely(to_dtype)
-
-    # too big to fit into f32 exactly
-    data = cudf.Series([1, 2, 2**24 + 1], dtype="uint32")._column
-    assert not data.can_cast_safely(to_dtype)
-
-    to_dtype = np.dtype("float64")
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1.0, 2.0, 3.0], dtype="float32")._column
-    to_dtype = np.dtype("int32")
-    assert data.can_cast_safely(to_dtype)
-
-    # not integer float
-    data = cudf.Series([1.0, 2.0, 3.5], dtype="float32")._column
-    assert not data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([10.0, 11.0, 2000.0], dtype="float64")._column
-    assert data.can_cast_safely(to_dtype)
-
-    # float out of int range
-    data = cudf.Series([1.0, 2.0, 1.0 * (2**31)], dtype="float32")._column
-    assert not data.can_cast_safely(to_dtype)
-
-    # negative signed integers casting to unsigned integers
-    data = cudf.Series([-1, 0, 1], dtype="int32")._column
-    to_dtype = np.dtype("uint32")
-    assert not data.can_cast_safely(to_dtype)
-
-
-def test_to_pandas_nullable_integer():
-    gsr_not_null = cudf.Series([1, 2, 3])
-    gsr_has_null = cudf.Series([1, 2, None])
-
-    psr_not_null = pd.Series([1, 2, 3], dtype="int64")
-    psr_has_null = pd.Series([1, 2, None], dtype="Int64")
-
-    assert_eq(gsr_not_null.to_pandas(), psr_not_null)
-    assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
-
-
-def test_to_pandas_nullable_bool():
-    gsr_not_null = cudf.Series([True, False, True])
-    gsr_has_null = cudf.Series([True, False, None])
-
-    psr_not_null = pd.Series([True, False, True], dtype="bool")
-    psr_has_null = pd.Series([True, False, None], dtype="boolean")
-
-    assert_eq(gsr_not_null.to_pandas(), psr_not_null)
-    assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
-
-
-def test_can_cast_safely_has_nulls():
-    data = cudf.Series([1, 2, 3, None], dtype="float32")._column
-    to_dtype = np.dtype("int64")
-
-    assert data.can_cast_safely(to_dtype)
-
-    data = cudf.Series([1, 2, 3.1, None], dtype="float32")._column
-    assert not data.can_cast_safely(to_dtype)
+@pytest.fixture(params=["integer", "signed", "unsigned", "float"])
+def downcast(request):
+    return request.param
 
 
 @pytest.mark.parametrize(
@@ -182,9 +72,6 @@ def test_to_numeric_basic_1d(data):
         [np.iinfo(np.int64).max, np.iinfo(np.int64).min],
     ],
 )
-@pytest.mark.parametrize(
-    "downcast", ["integer", "signed", "unsigned", "float"]
-)
 def test_to_numeric_downcast_int(data, downcast):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
@@ -212,9 +99,6 @@ def test_to_numeric_downcast_int(data, downcast):
         [1.0, 1.5, 2.6, 3.4],
     ],
 )
-@pytest.mark.parametrize(
-    "downcast", ["signed", "integer", "unsigned", "float"]
-)
 def test_to_numeric_downcast_float(data, downcast):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
@@ -237,8 +121,10 @@ def test_to_numeric_downcast_float(data, downcast):
         [-1.0, -1.79e308],
     ],
 )
-@pytest.mark.parametrize("downcast", ["signed", "integer", "unsigned"])
 def test_to_numeric_downcast_large_float(data, downcast):
+    if downcast == "float":
+        pytest.skip(f"{downcast=} not applicable for test")
+
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
 
@@ -260,8 +146,8 @@ def test_to_numeric_downcast_large_float(data, downcast):
         [-1.0, -1.79e308],
     ],
 )
-@pytest.mark.parametrize("downcast", ["float"])
-def test_to_numeric_downcast_large_float_pd_bug(data, downcast):
+def test_to_numeric_downcast_large_float_pd_bug(data):
+    downcast = "float"
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
 
@@ -278,9 +164,6 @@ def test_to_numeric_downcast_large_float_pd_bug(data, downcast):
         [str(np.iinfo(np.int64).max), str(np.iinfo(np.int64).min)],
     ],
 )
-@pytest.mark.parametrize(
-    "downcast", ["signed", "integer", "unsigned", "float"]
-)
 def test_to_numeric_downcast_string_int(data, downcast):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
@@ -302,9 +185,6 @@ def test_to_numeric_downcast_string_int(data, downcast):
         ["1", "10", "1.0", "2e3", "", ""],  # mixed empty strings
     ],
 )
-@pytest.mark.parametrize(
-    "downcast", ["signed", "integer", "unsigned", "float"]
-)
 def test_to_numeric_downcast_string_float(data, downcast):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
@@ -335,9 +215,6 @@ def test_to_numeric_downcast_string_float(data, downcast):
         ],  # 2 digits relaxed from np.finfo(np.float64).min/max
     ],
 )
-@pytest.mark.parametrize(
-    "downcast", ["signed", "integer", "unsigned", "float"]
-)
 def test_to_numeric_downcast_string_large_float(data, downcast):
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
@@ -382,35 +259,8 @@ def test_to_numeric_error(data, errors):
         assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
-def test_series_construction_with_nulls(dtype, input_obj):
-    dtype = cudf.dtype(dtype)
-    # numpy case
-
-    expect = pd.Series(input_obj, dtype=np_dtypes_to_pandas_dtypes[dtype])
-    got = cudf.Series(input_obj, dtype=dtype).to_pandas(nullable=True)
-
-    assert_eq(expect, got)
-
-    # Test numpy array of objects case
-    np_data = [
-        dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
-    ]
-
-    expect = pd.Series(np_data, dtype=np_dtypes_to_pandas_dtypes[dtype])
-    got = cudf.Series(np_data, dtype=dtype).to_pandas(nullable=True)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [[True, False, True]],
-)
-@pytest.mark.parametrize(
-    "downcast", ["signed", "integer", "unsigned", "float"]
-)
-def test_series_to_numeric_bool(data, downcast):
+def test_series_to_numeric_bool(downcast):
+    data = [True, False, True]
     ps = pd.Series(data)
     gs = cudf.from_pandas(ps)
 
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py b/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py
new file mode 100644
index 00000000000..9bbe35ed33b
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_from_pandas_rangeindex():
+    idx1 = pd.RangeIndex(start=0, stop=4, step=1, name="myindex")
+    idx2 = cudf.from_pandas(idx1)
+
+    # Check index
+    assert_eq(idx1.values, idx2.values)
+    assert idx1.name == idx2.name
+
+
+def test_from_pandas_rangeindex_step():
+    expected = pd.RangeIndex(start=0, stop=8, step=2, name="myindex")
+    actual = cudf.from_pandas(expected)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/private_objects/test_column.py b/python/cudf/cudf/tests/private_objects/test_column.py
index 6ed3111b587..4d2a7eaea41 100644
--- a/python/cudf/cudf/tests/private_objects/test_column.py
+++ b/python/cudf/cudf/tests/private_objects/test_column.py
@@ -55,6 +55,30 @@ def str_host_view(list_of_str, to_dtype):
     )
 
 
+def test_column_set_equal_length_object_by_mask():
+    # Series.__setitem__ might bypass some of the cases
+    # handled in column.__setitem__ so this test is needed
+
+    data = cudf.Series([0, 0, 1, 1, 1])._column
+    replace_data = cudf.Series([100, 200, 300, 400, 500])._column
+    bool_col = cudf.Series([True, True, True, True, True])._column
+
+    data[bool_col] = replace_data
+    assert_eq(
+        cudf.Series._from_column(data),
+        cudf.Series._from_column(replace_data),
+    )
+
+    data = cudf.Series([0, 0, 1, 1, 1])._column
+    bool_col = cudf.Series([True, False, True, False, True])._column
+    data[bool_col] = replace_data
+
+    assert_eq(
+        cudf.Series._from_column(data),
+        cudf.Series([100, 0, 300, 1, 500]),
+    )
+
+
 @pytest.mark.parametrize("offset", [0, 1, 15])
 @pytest.mark.parametrize("size", [50, 10, 0])
 def test_column_offset_and_size(pandas_input, offset, size):
@@ -404,6 +428,98 @@ def test_as_column_buffer(box, data):
     )
 
 
+def test_can_cast_safely_same_kind():
+    # 'i' -> 'i'
+    data = cudf.Series([1, 2, 3], dtype="int32")._column
+    to_dtype = np.dtype("int64")
+
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1, 2, 3], dtype="int64")._column
+    to_dtype = np.dtype("int32")
+
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1, 2, 2**31], dtype="int64")._column
+    assert not data.can_cast_safely(to_dtype)
+
+    # 'u' -> 'u'
+    data = cudf.Series([1, 2, 3], dtype="uint32")._column
+    to_dtype = np.dtype("uint64")
+
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1, 2, 3], dtype="uint64")._column
+    to_dtype = np.dtype("uint32")
+
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1, 2, 2**33], dtype="uint64")._column
+    assert not data.can_cast_safely(to_dtype)
+
+    # 'f' -> 'f'
+    data = cudf.Series([np.inf, 1.0], dtype="float64")._column
+    to_dtype = np.dtype("float32")
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series(
+        [float(np.finfo("float32").max) * 2, 1.0], dtype="float64"
+    )._column
+    to_dtype = np.dtype("float32")
+    assert not data.can_cast_safely(to_dtype)
+
+
+def test_can_cast_safely_mixed_kind():
+    data = cudf.Series([1, 2, 3], dtype="int32")._column
+    to_dtype = np.dtype("float32")
+    assert data.can_cast_safely(to_dtype)
+
+    # too big to fit into f32 exactly
+    data = cudf.Series([1, 2, 2**24 + 1], dtype="int32")._column
+    assert not data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1, 2, 3], dtype="uint32")._column
+    to_dtype = np.dtype("float32")
+    assert data.can_cast_safely(to_dtype)
+
+    # too big to fit into f32 exactly
+    data = cudf.Series([1, 2, 2**24 + 1], dtype="uint32")._column
+    assert not data.can_cast_safely(to_dtype)
+
+    to_dtype = np.dtype("float64")
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1.0, 2.0, 3.0], dtype="float32")._column
+    to_dtype = np.dtype("int32")
+    assert data.can_cast_safely(to_dtype)
+
+    # not integer float
+    data = cudf.Series([1.0, 2.0, 3.5], dtype="float32")._column
+    assert not data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([10.0, 11.0, 2000.0], dtype="float64")._column
+    assert data.can_cast_safely(to_dtype)
+
+    # float out of int range
+    data = cudf.Series([1.0, 2.0, 1.0 * (2**31)], dtype="float32")._column
+    assert not data.can_cast_safely(to_dtype)
+
+    # negative signed integers casting to unsigned integers
+    data = cudf.Series([-1, 0, 1], dtype="int32")._column
+    to_dtype = np.dtype("uint32")
+    assert not data.can_cast_safely(to_dtype)
+
+
+def test_can_cast_safely_has_nulls():
+    data = cudf.Series([1, 2, 3, None], dtype="float32")._column
+    to_dtype = np.dtype("int64")
+
+    assert data.can_cast_safely(to_dtype)
+
+    data = cudf.Series([1, 2, 3.1, None], dtype="float32")._column
+    assert not data.can_cast_safely(to_dtype)
+
+
 @pytest.mark.parametrize(
     "data,pyarrow_kwargs,cudf_kwargs",
     [
diff --git a/python/cudf/cudf/tests/private_objects/test_compile_udf.py b/python/cudf/cudf/tests/private_objects/test_compile_udf.py
index 8d87c323786..2eb12f1ae86 100644
--- a/python/cudf/cudf/tests/private_objects/test_compile_udf.py
+++ b/python/cudf/cudf/tests/private_objects/test_compile_udf.py
@@ -9,7 +9,6 @@
 @pytest.fixture(autouse=True)
 def clear_udf_cache():
     _udf_code_cache.clear()
-    return
 
 
 def assert_cache_size(size):
diff --git a/python/cudf/cudf/tests/series/indexing/test_iloc.py b/python/cudf/cudf/tests/series/indexing/test_iloc.py
index 0d460c11dc8..00bc0422c7d 100644
--- a/python/cudf/cudf/tests/series/indexing/test_iloc.py
+++ b/python/cudf/cudf/tests/series/indexing/test_iloc.py
@@ -1,11 +1,22 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pytest
 
 import cudf
 from cudf.testing import assert_eq
 
 
+def test_series_setitem_singleton_range():
+    sr = cudf.Series([1, 2, 3], dtype=np.int64)
+    psr = sr.to_pandas()
+    value = np.asarray([7], dtype=np.int64)
+    sr.iloc[:1] = value
+    psr.iloc[:1] = value
+    assert_eq(sr, cudf.Series([7, 2, 3], dtype=np.int64))
+    assert_eq(sr, psr, check_dtype=True)
+
+
 @pytest.mark.parametrize(
     "indices",
     [slice(0, 3), slice(1, 4), slice(None, None, 2), slice(1, None, 2)],
diff --git a/python/cudf/cudf/tests/series/indexing/test_loc.py b/python/cudf/cudf/tests/series/indexing/test_loc.py
new file mode 100644
index 00000000000..da67da0699e
--- /dev/null
+++ b/python/cudf/cudf/tests/series/indexing/test_loc.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/13031")
+@pytest.mark.parametrize("other_index", [["1", "3", "2"], [1, 2, 3]])
+def test_loc_setitem_series_index_alignment_13031(other_index):
+    s = pd.Series([1, 2, 3], index=["1", "2", "3"])
+    other = pd.Series([5, 6, 7], index=other_index)
+
+    cs = cudf.from_pandas(s)
+    cother = cudf.from_pandas(other)
+
+    s.loc[["1", "3"]] = other
+
+    cs.loc[["1", "3"]] = cother
+
+    assert_eq(s, cs)
+
+
+def test_series_set_item_index_reference():
+    gs1 = cudf.Series([1], index=[7])
+    gs2 = cudf.Series([2], index=gs1.index)
+
+    gs1.loc[11] = 2
+    ps1 = pd.Series([1], index=[7])
+    ps2 = pd.Series([2], index=ps1.index)
+    ps1.loc[11] = 2
+
+    assert_eq(ps1, gs1)
+    assert_eq(ps2, gs2)
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
index 4d78d3f4698..aba9edcc6c7 100644
--- a/python/cudf/cudf/tests/series/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -6,10 +6,243 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    assert_exceptions_equal,
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
+
+
+@pytest.mark.parametrize(
+    "arg", ["b", ["a", "c"], slice(1, 2, 1), [True, False, True]]
 )
+def test_series_set_item(arg):
+    psr = pd.Series([1, 2, 3], index=["a", "b", "c"])
+    gsr = cudf.from_pandas(psr)
+
+    psr[arg] = 11
+    gsr[arg] = 11
+
+    assert_eq(psr, gsr)
+
+
+def test_column_set_unequal_length_object_by_mask():
+    data = [1, 2, 3, 4, 5]
+    replace_data_1 = [8, 9]
+    replace_data_2 = [8, 9, 10, 11]
+    mask = [True, True, False, True, False]
+
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+    assert_exceptions_equal(
+        psr.__setitem__,
+        gsr.__setitem__,
+        ([mask, replace_data_1], {}),
+        ([mask, replace_data_1], {}),
+    )
+
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+    assert_exceptions_equal(
+        psr.__setitem__,
+        gsr.__setitem__,
+        ([mask, replace_data_2], {}),
+        ([mask, replace_data_2], {}),
+    )
+
+
+def test_categorical_setitem_invalid():
+    ps = pd.Series([1, 2, 3], dtype="category")
+    gs = cudf.Series([1, 2, 3], dtype="category")
+
+    assert_exceptions_equal(
+        lfunc=ps.__setitem__,
+        rfunc=gs.__setitem__,
+        lfunc_args_and_kwargs=([0, 5], {}),
+        rfunc_args_and_kwargs=([0, 5], {}),
+    )
+
+
+def test_series_slice_setitem_list():
+    actual = cudf.Series([[[1, 2], [2, 3]], [[3, 4]], [[4, 5]], [[6, 7]]])
+    actual[slice(0, 3, 1)] = [[10, 11], [12, 23]]
+    expected = cudf.Series(
+        [
+            [[10, 11], [12, 23]],
+            [[10, 11], [12, 23]],
+            [[10, 11], [12, 23]],
+            [[6, 7]],
+        ]
+    )
+    assert_eq(actual, expected)
+
+
+def test_series_slice_setitem_struct():
+    actual = cudf.Series(
+        [
+            {"a": {"b": 10}, "b": 11},
+            {"a": {"b": 100}, "b": 5},
+            {"a": {"b": 50}, "b": 2},
+            {"a": {"b": 1000}, "b": 67},
+            {"a": {"b": 4000}, "b": 1090},
+        ]
+    )
+    actual[slice(0, 3, 1)] = {"a": {"b": 5050}, "b": 101}
+    expected = cudf.Series(
+        [
+            {"a": {"b": 5050}, "b": 101},
+            {"a": {"b": 5050}, "b": 101},
+            {"a": {"b": 5050}, "b": 101},
+            {"a": {"b": 1000}, "b": 67},
+            {"a": {"b": 4000}, "b": 1090},
+        ]
+    )
+    assert_eq(actual, expected)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+@pytest.mark.parametrize("indices", [0, [1, 2]])
+def test_series_setitem_upcasting(dtype, indices):
+    sr = pd.Series([0, 0, 0], dtype=dtype)
+    cr = cudf.from_pandas(sr)
+    assert_eq(sr, cr)
+    # Must be a non-integral floating point value that can't be losslessly
+    # converted to float32, otherwise pandas will try and match the source
+    # column dtype.
+    new_value = np.float64(np.pi)
+    col_ref = cr._column
+    with expect_warning_if(dtype != np.float64):
+        sr[indices] = new_value
+    with expect_warning_if(dtype != np.float64):
+        cr[indices] = new_value
+    assert_eq(sr, cr)
+
+    if dtype == np.float64:
+        # no-op type cast should not modify backing column
+        assert col_ref == cr._column
+
+
+@pytest.mark.parametrize(
+    "klass",
+    [
+        list,
+        cudf.Series,
+        lambda x: cudf.Series(x, index=[2, 3, 4, 5, 6]),
+    ],
+)
+def test_series_set_equal_length_object_by_mask(klass):
+    replace_data = klass([100, 200, 300, 400, 500])
+    psr = pd.Series([1, 2, 3, 4, 5], dtype="Int64")
+    gsr = cudf.from_pandas(psr)
+
+    # Lengths match in trivial case
+    pd_bool_col = pd.Series([True] * len(psr), dtype="boolean")
+    gd_bool_col = cudf.from_pandas(pd_bool_col)
+    psr[pd_bool_col] = (
+        replace_data.to_pandas(nullable=True)
+        if hasattr(replace_data, "to_pandas")
+        else pd.Series(replace_data)
+    )
+    gsr[gd_bool_col] = replace_data
+
+    assert_eq(psr.astype("float"), gsr.astype("float"))
+
+    # Test partial masking
+    psr[psr > 1] = (
+        replace_data.to_pandas()
+        if hasattr(replace_data, "to_pandas")
+        else pd.Series(replace_data)
+    )
+    gsr[gsr > 1] = replace_data
+
+    assert_eq(psr.astype("float"), gsr.astype("float"))
+
+
+# TODO: these two tests could perhaps be changed once specifics of
+# pandas compat wrt upcasting are decided on; this is just baking in
+# status-quo.
+def test_series_setitem_upcasting_string_column():
+    sr = pd.Series([0, 0, 0], dtype=str)
+    cr = cudf.from_pandas(sr)
+    new_value = np.float64(10.5)
+    sr[0] = str(new_value)
+    cr[0] = str(new_value)
+    assert_eq(sr, cr)
+
+
+def test_series_setitem_upcasting_string_value():
+    sr = cudf.Series([0, 0, 0], dtype=int)
+    # This is a distinction with pandas, which lets you instead make an
+    # object column with ["10", 0, 0]
+    sr[0] = "10"
+    assert_eq(pd.Series([10, 0, 0], dtype=int), sr)
+    with pytest.raises(ValueError):
+        sr[0] = "non-integer"
+
+
+def test_scatter_by_slice_with_start_and_step():
+    source = pd.Series([1, 2, 3, 4, 5])
+    csource = cudf.from_pandas(source)
+    target = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    ctarget = cudf.from_pandas(target)
+    target[1::2] = source
+    ctarget[1::2] = csource
+    assert_eq(target, ctarget)
+
+
+@pytest.mark.parametrize("n", [1, 3])
+def test_setitem_str_trailing_null(n):
+    trailing_nulls = "\x00" * n
+    s = cudf.Series(["a", "b", "c" + trailing_nulls])
+    assert s[2] == "c" + trailing_nulls
+    s[0] = "a" + trailing_nulls
+    assert s[0] == "a" + trailing_nulls
+    s[1] = trailing_nulls
+    assert s[1] == trailing_nulls
+    s[0] = ""
+    assert s[0] == ""
+    s[0] = "\x00"
+    assert s[0] == "\x00"
+
+
+@pytest.mark.parametrize(
+    "ps",
+    [
+        pd.Series([1, 2, 3], index=pd.RangeIndex(0, 3)),
+        pd.Series([1, 2, 3], index=pd.RangeIndex(start=2, stop=-1, step=-1)),
+        pd.Series([1, 2, 3], index=pd.RangeIndex(start=1, stop=6, step=2)),
+        pd.Series(
+            [1, 2, 3, 4, 5], index=pd.RangeIndex(start=1, stop=-9, step=-2)
+        ),
+        pd.Series(
+            [1, 2, 3, 4, 5], index=pd.RangeIndex(start=1, stop=-12, step=-3)
+        ),
+        pd.Series([1, 2, 3, 4], index=pd.RangeIndex(start=1, stop=14, step=4)),
+        pd.Series(
+            [1, 2, 3, 4], index=pd.RangeIndex(start=1, stop=-14, step=-4)
+        ),
+    ],
+)
+@pytest.mark.parametrize("arg", [[1], 5.6, 3.1])
+def test_series_set_item_range_index(ps, arg):
+    gsr = cudf.from_pandas(ps)
+    psr = ps.copy(deep=True)
+    psr[arg] = 11
+    gsr[arg] = 11
+
+    assert_eq(psr, gsr, check_index_type=True)
+
+
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/7448")
+def test_iloc_setitem_7448():
+    index = pd.MultiIndex.from_product([(1, 2), (3, 4)])
+    expect = cudf.Series([1, 2, 3, 4], index=index)
+    actual = cudf.from_pandas(expect)
+    expect[(1, 3)] = 101
+    actual[(1, 3)] = 101
+    assert_eq(expect, actual)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/series/methods/test_to_pandas.py b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
index c49a4bfc7f3..9013675c191 100644
--- a/python/cudf/cudf/tests/series/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
@@ -148,6 +148,28 @@ def test_series_to_pandas_arrow_type_nullable_raises(scalar):
         ser.to_pandas(nullable=True, arrow_type=True)
 
 
+def test_to_pandas_nullable_integer():
+    gsr_not_null = cudf.Series([1, 2, 3])
+    gsr_has_null = cudf.Series([1, 2, None])
+
+    psr_not_null = pd.Series([1, 2, 3], dtype="int64")
+    psr_has_null = pd.Series([1, 2, None], dtype="Int64")
+
+    assert_eq(gsr_not_null.to_pandas(), psr_not_null)
+    assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
+
+
+def test_to_pandas_nullable_bool():
+    gsr_not_null = cudf.Series([True, False, True])
+    gsr_has_null = cudf.Series([True, False, None])
+
+    psr_not_null = pd.Series([True, False, True], dtype="bool")
+    psr_has_null = pd.Series([True, False, None], dtype="boolean")
+
+    assert_eq(gsr_not_null.to_pandas(), psr_not_null)
+    assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null)
+
+
 @pytest.mark.parametrize(
     "scalar",
     [
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 80475505bc4..65ba67dce90 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -149,6 +149,27 @@ def test_series_iter_error():
         iter(gs._column)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        lambda: cudf.Series([1, 2, 3, -12, 12, 44]),
+        lambda: cudf.Series([1, 2, 3, -12, 12, 44], dtype="str"),
+        lambda: cudf.DataFrame(
+            {"a": [1, 2, 3, -1234], "b": [0.1, 0.2222, 0.4, -3.14]}
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [None, "float", "int", "str"])
+def test_series_dataframe__array__(data, dtype):
+    gs = data()
+
+    with pytest.raises(TypeError):
+        gs.__array__(dtype=dtype)
+
+    with pytest.raises(TypeError):
+        gs.index.__array__(dtype=dtype)
+
+
 @pytest.mark.parametrize("data", [[], [None, None], ["a", None]])
 def test_series_size(data):
     psr = pd.Series(data)
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index b5b03f4955b..e9ccee3a3ae 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -22,6 +22,7 @@
 from cudf.errors import MixedTypeError
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal
+from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
 
 @pytest.mark.parametrize(
@@ -679,6 +680,26 @@ def test_construct_nonnative_array(arr):
     assert_eq(result, expected)
 
 
+@pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
+def test_series_construction_with_nulls(numeric_types_as_str, input_obj):
+    dtype = np.dtype(numeric_types_as_str)
+    # numpy case
+
+    expect = pd.Series(input_obj, dtype=np_dtypes_to_pandas_dtypes[dtype])
+    got = cudf.Series(input_obj, dtype=dtype).to_pandas(nullable=True)
+
+    assert_eq(expect, got)
+
+    # Test numpy array of objects case
+    np_data = [
+        dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
+    ]
+
+    expect = pd.Series(np_data, dtype=np_dtypes_to_pandas_dtypes[dtype])
+    got = cudf.Series(np_data, dtype=dtype).to_pandas(nullable=True)
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize("nan_as_null", [True, False])
 def test_construct_all_pd_NA_with_dtype(nan_as_null):
     result = cudf.Series(
@@ -1216,7 +1237,7 @@ def test_roundtrip_series_plc_column(ps):
     assert_eq(expect, actual)
 
 
-def test_series_construction_with_nulls():
+def test_series_structarray_construction_with_nulls():
     fields = [
         pa.array([1], type=pa.int64()),
         pa.array([None], type=pa.int64()),
diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py
deleted file mode 100644
index 0bdb806732b..00000000000
--- a/python/cudf/cudf/tests/test_numpy_interop.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pytest
-
-from cudf import DataFrame, Series
-from cudf.testing import assert_eq
-
-
-def test_to_records_noindex():
-    df = DataFrame()
-    df["a"] = aa = np.arange(10, dtype=np.int32)
-    df["b"] = bb = np.arange(10, 20, dtype=np.float64)
-
-    rec = df.to_records(index=False)
-    assert rec.dtype.names == ("a", "b")
-    np.testing.assert_array_equal(rec["a"], aa)
-    np.testing.assert_array_equal(rec["b"], bb)
-
-
-def test_to_records_withindex():
-    df = DataFrame()
-    df["a"] = aa = np.arange(10, dtype=np.int32)
-    df["b"] = bb = np.arange(10, 20, dtype=np.float64)
-
-    rec_indexed = df.to_records(index=True)
-    assert rec_indexed.size == len(aa)
-    assert rec_indexed.dtype.names == ("index", "a", "b")
-    np.testing.assert_array_equal(rec_indexed["a"], aa)
-    np.testing.assert_array_equal(rec_indexed["b"], bb)
-    np.testing.assert_array_equal(rec_indexed["index"], np.arange(10))
-
-
-@pytest.mark.parametrize("columns", [None, ("a", "b"), ("a",), ("b",)])
-def test_from_records_noindex(columns):
-    recdtype = np.dtype([("a", np.int32), ("b", np.float64)])
-    rec = np.recarray(10, dtype=recdtype)
-    rec.a = aa = np.arange(10, dtype=np.int32)
-    rec.b = bb = np.arange(10, 20, dtype=np.float64)
-    df = DataFrame.from_records(rec, columns=columns)
-
-    if columns and "a" in columns:
-        assert_eq(aa, df["a"].values)
-    if columns and "b" in columns:
-        assert_eq(bb, df["b"].values)
-    assert_eq(np.arange(10), df.index.values)
-
-
-@pytest.mark.parametrize("columns", [None, ("a", "b"), ("a",), ("b",)])
-def test_from_records_withindex(columns):
-    recdtype = np.dtype(
-        [("index", np.int64), ("a", np.int32), ("b", np.float64)]
-    )
-    rec = np.recarray(10, dtype=recdtype)
-    rec.index = ii = np.arange(30, 40)
-    rec.a = aa = np.arange(10, dtype=np.int32)
-    rec.b = bb = np.arange(10, 20, dtype=np.float64)
-    df = DataFrame.from_records(rec, index="index")
-
-    if columns and "a" in columns:
-        assert_eq(aa, df["a"].values)
-    if columns and "b" in columns:
-        assert_eq(bb, df["b"].values)
-    assert_eq(ii, df.index.values)
-
-
-def test_numpy_non_contiguious():
-    recdtype = np.dtype([("index", np.int64), ("a", np.int32)])
-    rec = np.recarray(10, dtype=recdtype)
-    rec.index = np.arange(30, 40)
-    rec.a = aa = np.arange(20, dtype=np.int32)[::2]
-    assert rec.a.flags["C_CONTIGUOUS"] is False
-
-    gdf = DataFrame.from_records(rec, index="index")
-    assert_eq(aa, gdf["a"].values)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        lambda: Series([1, 2, 3, -12, 12, 44]),
-        lambda: Series([1, 2, 3, -12, 12, 44], dtype="str"),
-        lambda: DataFrame(
-            {"a": [1, 2, 3, -1234], "b": [0.1, 0.2222, 0.4, -3.14]}
-        ),
-    ],
-)
-@pytest.mark.parametrize("dtype", [None, "float", "int", "str"])
-def test_series_dataframe__array__(data, dtype):
-    gs = data()
-
-    with pytest.raises(TypeError):
-        gs.__array__(dtype=dtype)
-
-    with pytest.raises(TypeError):
-        gs.index.__array__(dtype=dtype)
diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py
deleted file mode 100644
index 5782437e394..00000000000
--- a/python/cudf/cudf/tests/test_pandas_interop.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-
-import cudf
-from cudf import DataFrame
-from cudf.testing import assert_eq
-
-
-def test_to_pandas():
-    df = DataFrame()
-    df["a"] = np.arange(5, dtype=np.int32)
-    df["b"] = np.arange(10, 15, dtype=np.float64)
-    df["c"] = np.array([True, False, None, True, True])
-
-    pdf = df.to_pandas()
-
-    assert tuple(df.columns) == tuple(pdf.columns)
-
-    assert df["a"].dtype == pdf["a"].dtype
-    assert df["b"].dtype == pdf["b"].dtype
-
-    # Notice, the dtype differ when Pandas and cudf boolean series
-    # contains None/NaN
-    assert df["c"].dtype == np.bool_
-    assert pdf["c"].dtype == np.object_
-
-    assert len(df["a"]) == len(pdf["a"])
-    assert len(df["b"]) == len(pdf["b"])
-    assert len(df["c"]) == len(pdf["c"])
-
-
-def test_from_pandas():
-    pdf = pd.DataFrame()
-    pdf["a"] = np.arange(10, dtype=np.int32)
-    pdf["b"] = np.arange(10, 20, dtype=np.float64)
-
-    df = DataFrame.from_pandas(pdf)
-
-    assert tuple(df.columns) == tuple(pdf.columns)
-
-    assert df["a"].dtype == pdf["a"].dtype
-    assert df["b"].dtype == pdf["b"].dtype
-
-    assert len(df["a"]) == len(pdf["a"])
-    assert len(df["b"]) == len(pdf["b"])
-
-
-def test_from_pandas_ex1():
-    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
-    df = DataFrame.from_pandas(pdf)
-
-    assert tuple(df.columns) == tuple(pdf.columns)
-    assert np.all(df["a"].to_numpy() == pdf["a"])
-    matches = df["b"].to_numpy(na_value=np.nan) == pdf["b"]
-    # the 3d element is False due to (nan == nan) == False
-    assert np.all(matches == [True, True, False, True])
-    assert np.isnan(df["b"].to_numpy(na_value=np.nan)[2])
-    assert np.isnan(pdf["b"][2])
-
-
-def test_from_pandas_with_index():
-    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
-    pdf = pdf.set_index(np.asarray([4, 3, 2, 1]))
-    df = DataFrame.from_pandas(pdf)
-
-    # Check columns
-    assert_eq(df.a, pdf.a)
-    assert_eq(df.b, pdf.b)
-    # Check index
-    assert_eq(df.index.values, pdf.index.values)
-    # Check again using pandas testing tool on frames
-    assert_eq(df, pdf)
-
-
-def test_from_pandas_rangeindex():
-    idx1 = pd.RangeIndex(start=0, stop=4, step=1, name="myindex")
-    idx2 = cudf.from_pandas(idx1)
-
-    # Check index
-    assert_eq(idx1.values, idx2.values)
-    assert idx1.name == idx2.name
-
-
-def test_from_pandas_rangeindex_step():
-    expected = pd.RangeIndex(start=0, stop=8, step=2, name="myindex")
-    actual = cudf.from_pandas(expected)
-
-    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
deleted file mode 100644
index ffbf21b5548..00000000000
--- a/python/cudf/cudf/tests/test_setitem.py
+++ /dev/null
@@ -1,477 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
-
-
-@pytest.mark.parametrize("arg", [[True, False, True], [True, True, True]])
-@pytest.mark.parametrize("value", [0, -1])
-def test_dataframe_setitem_bool_mask_scaler(arg, value):
-    df = pd.DataFrame({"a": [1, 2, 3]})
-    gdf = cudf.from_pandas(df)
-
-    df[arg] = value
-    gdf[arg] = value
-    assert_eq(df, gdf)
-
-
-def test_dataframe_setitem_scaler_bool():
-    df = pd.DataFrame({"a": [1, 2, 3]})
-    df[[True, False, True]] = pd.DataFrame({"a": [-1, -2]})
-
-    gdf = cudf.DataFrame({"a": [1, 2, 3]})
-    gdf[[True, False, True]] = cudf.DataFrame({"a": [-1, -2]})
-    assert_eq(df, gdf)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [pd.DataFrame({"a": [1, 2, 3]}), pd.DataFrame({"a": ["x", "y", "z"]})],
-)
-@pytest.mark.parametrize("arg", [["a"], "a", "b"])
-@pytest.mark.parametrize(
-    "value", [-10, pd.DataFrame({"a": [-1, -2, -3]}), "abc"]
-)
-def test_dataframe_setitem_columns(df, arg, value):
-    gdf = cudf.from_pandas(df)
-    cudf_replace_value = value
-
-    if isinstance(cudf_replace_value, pd.DataFrame):
-        cudf_replace_value = cudf.from_pandas(value)
-
-    df[arg] = value
-    gdf[arg] = cudf_replace_value
-    assert_eq(df, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "value",
-    [
-        pd.DataFrame({"0": [-1, -2, -3], "1": [-0, -10, -1]}),
-        10,
-        20,
-        30,
-        "rapids",
-        "ai",
-        0.32234,
-        np.datetime64(1324232423423342, "ns"),
-        np.timedelta64(34234324234324234, "ns"),
-    ],
-)
-def test_dataframe_setitem_new_columns(value):
-    df = pd.DataFrame({"a": [1, 2, 3]})
-    arg = ["b", "c"]
-    gdf = cudf.from_pandas(df)
-    cudf_replace_value = value
-
-    if isinstance(cudf_replace_value, pd.DataFrame):
-        cudf_replace_value = cudf.from_pandas(value)
-
-    df[arg] = value
-    gdf[arg] = cudf_replace_value
-    assert_eq(df, gdf, check_dtype=True)
-
-
-# set_item_series inconsistency
-def test_series_setitem_index():
-    df = pd.DataFrame(
-        data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3]
-    )
-
-    df["b"] = pd.Series(data=[12, 11, 10], index=[3, 2, 1])
-    gdf = cudf.DataFrame(
-        data={"b": [-1, -2, -3], "c": [1, 2, 3]}, index=[1, 2, 3]
-    )
-    gdf["b"] = cudf.Series(data=[12, 11, 10], index=[3, 2, 1])
-    assert_eq(df, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "arg", ["b", ["a", "c"], slice(1, 2, 1), [True, False, True]]
-)
-def test_series_set_item(arg):
-    psr = pd.Series([1, 2, 3], index=["a", "b", "c"])
-    gsr = cudf.from_pandas(psr)
-
-    psr[arg] = 11
-    gsr[arg] = 11
-
-    assert_eq(psr, gsr)
-
-
-def test_series_setitem_singleton_range():
-    sr = cudf.Series([1, 2, 3], dtype=np.int64)
-    psr = sr.to_pandas()
-    value = np.asarray([7], dtype=np.int64)
-    sr.iloc[:1] = value
-    psr.iloc[:1] = value
-    assert_eq(sr, cudf.Series([7, 2, 3], dtype=np.int64))
-    assert_eq(sr, psr, check_dtype=True)
-
-
-@pytest.mark.xfail(reason="Copy-on-Write should make a copy")
-@pytest.mark.parametrize(
-    "index",
-    [
-        pd.MultiIndex.from_frame(
-            pd.DataFrame({"b": [3, 2, 1], "c": ["a", "b", "c"]})
-        ),
-        ["a", "b", "c"],
-    ],
-)
-def test_setitem_dataframe_series_inplace(index):
-    gdf = cudf.DataFrame({"a": [1, 2, 3]}, index=index)
-    expected = gdf.copy()
-    with cudf.option_context("copy_on_write", True):
-        gdf["a"].replace(1, 500, inplace=True)
-
-    assert_eq(expected, gdf)
-
-
-@pytest.mark.parametrize(
-    "klass",
-    [
-        list,
-        cudf.Series,
-        lambda x: cudf.Series(x, index=[2, 3, 4, 5, 6]),
-    ],
-)
-def test_series_set_equal_length_object_by_mask(klass):
-    replace_data = klass([100, 200, 300, 400, 500])
-    psr = pd.Series([1, 2, 3, 4, 5], dtype="Int64")
-    gsr = cudf.from_pandas(psr)
-
-    # Lengths match in trivial case
-    pd_bool_col = pd.Series([True] * len(psr), dtype="boolean")
-    gd_bool_col = cudf.from_pandas(pd_bool_col)
-    psr[pd_bool_col] = (
-        replace_data.to_pandas(nullable=True)
-        if hasattr(replace_data, "to_pandas")
-        else pd.Series(replace_data)
-    )
-    gsr[gd_bool_col] = replace_data
-
-    assert_eq(psr.astype("float"), gsr.astype("float"))
-
-    # Test partial masking
-    psr[psr > 1] = (
-        replace_data.to_pandas()
-        if hasattr(replace_data, "to_pandas")
-        else pd.Series(replace_data)
-    )
-    gsr[gsr > 1] = replace_data
-
-    assert_eq(psr.astype("float"), gsr.astype("float"))
-
-
-def test_column_set_equal_length_object_by_mask():
-    # Series.__setitem__ might bypass some of the cases
-    # handled in column.__setitem__ so this test is needed
-
-    data = cudf.Series([0, 0, 1, 1, 1])._column
-    replace_data = cudf.Series([100, 200, 300, 400, 500])._column
-    bool_col = cudf.Series([True, True, True, True, True])._column
-
-    data[bool_col] = replace_data
-    assert_eq(
-        cudf.Series._from_column(data),
-        cudf.Series._from_column(replace_data),
-    )
-
-    data = cudf.Series([0, 0, 1, 1, 1])._column
-    bool_col = cudf.Series([True, False, True, False, True])._column
-    data[bool_col] = replace_data
-
-    assert_eq(
-        cudf.Series._from_column(data),
-        cudf.Series([100, 0, 300, 1, 500]),
-    )
-
-
-def test_column_set_unequal_length_object_by_mask():
-    data = [1, 2, 3, 4, 5]
-    replace_data_1 = [8, 9]
-    replace_data_2 = [8, 9, 10, 11]
-    mask = [True, True, False, True, False]
-
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-    assert_exceptions_equal(
-        psr.__setitem__,
-        gsr.__setitem__,
-        ([mask, replace_data_1], {}),
-        ([mask, replace_data_1], {}),
-    )
-
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-    assert_exceptions_equal(
-        psr.__setitem__,
-        gsr.__setitem__,
-        ([mask, replace_data_2], {}),
-        ([mask, replace_data_2], {}),
-    )
-
-
-def test_categorical_setitem_invalid():
-    ps = pd.Series([1, 2, 3], dtype="category")
-    gs = cudf.Series([1, 2, 3], dtype="category")
-
-    assert_exceptions_equal(
-        lfunc=ps.__setitem__,
-        rfunc=gs.__setitem__,
-        lfunc_args_and_kwargs=([0, 5], {}),
-        rfunc_args_and_kwargs=([0, 5], {}),
-    )
-
-
-def test_series_slice_setitem_list():
-    actual = cudf.Series([[[1, 2], [2, 3]], [[3, 4]], [[4, 5]], [[6, 7]]])
-    actual[slice(0, 3, 1)] = [[10, 11], [12, 23]]
-    expected = cudf.Series(
-        [
-            [[10, 11], [12, 23]],
-            [[10, 11], [12, 23]],
-            [[10, 11], [12, 23]],
-            [[6, 7]],
-        ]
-    )
-    assert_eq(actual, expected)
-
-
-def test_series_slice_setitem_struct():
-    actual = cudf.Series(
-        [
-            {"a": {"b": 10}, "b": 11},
-            {"a": {"b": 100}, "b": 5},
-            {"a": {"b": 50}, "b": 2},
-            {"a": {"b": 1000}, "b": 67},
-            {"a": {"b": 4000}, "b": 1090},
-        ]
-    )
-    actual[slice(0, 3, 1)] = {"a": {"b": 5050}, "b": 101}
-    expected = cudf.Series(
-        [
-            {"a": {"b": 5050}, "b": 101},
-            {"a": {"b": 5050}, "b": 101},
-            {"a": {"b": 5050}, "b": 101},
-            {"a": {"b": 1000}, "b": 67},
-            {"a": {"b": 4000}, "b": 1090},
-        ]
-    )
-    assert_eq(actual, expected)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
-@pytest.mark.parametrize("indices", [0, [1, 2]])
-def test_series_setitem_upcasting(dtype, indices):
-    sr = pd.Series([0, 0, 0], dtype=dtype)
-    cr = cudf.from_pandas(sr)
-    assert_eq(sr, cr)
-    # Must be a non-integral floating point value that can't be losslessly
-    # converted to float32, otherwise pandas will try and match the source
-    # column dtype.
-    new_value = np.float64(np.pi)
-    col_ref = cr._column
-    with expect_warning_if(dtype != np.float64):
-        sr[indices] = new_value
-    with expect_warning_if(dtype != np.float64):
-        cr[indices] = new_value
-    assert_eq(sr, cr)
-
-    if dtype == np.float64:
-        # no-op type cast should not modify backing column
-        assert col_ref == cr._column
-
-
-# TODO: these two tests could perhaps be changed once specifics of
-# pandas compat wrt upcasting are decided on; this is just baking in
-# status-quo.
-def test_series_setitem_upcasting_string_column():
-    sr = pd.Series([0, 0, 0], dtype=str)
-    cr = cudf.from_pandas(sr)
-    new_value = np.float64(10.5)
-    sr[0] = str(new_value)
-    cr[0] = str(new_value)
-    assert_eq(sr, cr)
-
-
-def test_series_setitem_upcasting_string_value():
-    sr = cudf.Series([0, 0, 0], dtype=int)
-    # This is a distinction with pandas, which lets you instead make an
-    # object column with ["10", 0, 0]
-    sr[0] = "10"
-    assert_eq(pd.Series([10, 0, 0], dtype=int), sr)
-    with pytest.raises(ValueError):
-        sr[0] = "non-integer"
-
-
-def test_scatter_by_slice_with_start_and_step():
-    source = pd.Series([1, 2, 3, 4, 5])
-    csource = cudf.from_pandas(source)
-    target = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
-    ctarget = cudf.from_pandas(target)
-    target[1::2] = source
-    ctarget[1::2] = csource
-    assert_eq(target, ctarget)
-
-
-@pytest.mark.parametrize("n", [1, 3])
-def test_setitem_str_trailing_null(n):
-    trailing_nulls = "\x00" * n
-    s = cudf.Series(["a", "b", "c" + trailing_nulls])
-    assert s[2] == "c" + trailing_nulls
-    s[0] = "a" + trailing_nulls
-    assert s[0] == "a" + trailing_nulls
-    s[1] = trailing_nulls
-    assert s[1] == trailing_nulls
-    s[0] = ""
-    assert s[0] == ""
-    s[0] = "\x00"
-    assert s[0] == "\x00"
-
-
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/7448")
-def test_iloc_setitem_7448():
-    index = pd.MultiIndex.from_product([(1, 2), (3, 4)])
-    expect = cudf.Series([1, 2, 3, 4], index=index)
-    actual = cudf.from_pandas(expect)
-    expect[(1, 3)] = 101
-    actual[(1, 3)] = 101
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "value",
-    [
-        "7",
-        pytest.param(
-            ["7", "8"],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/11298"
-            ),
-        ),
-    ],
-)
-def test_loc_setitem_string_11298(value):
-    df = pd.DataFrame({"a": ["a", "b", "c"]})
-    cdf = cudf.from_pandas(df)
-
-    df.loc[:1, "a"] = value
-
-    cdf.loc[:1, "a"] = value
-
-    assert_eq(df, cdf)
-
-
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/11944")
-def test_loc_setitem_list_11944():
-    df = pd.DataFrame(
-        data={"a": ["yes", "no"], "b": [["l1", "l2"], ["c", "d"]]}
-    )
-    cdf = cudf.from_pandas(df)
-    df.loc[df.a == "yes", "b"] = [["hello"]]
-    cdf.loc[df.a == "yes", "b"] = [["hello"]]
-    assert_eq(df, cdf)
-
-
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12504")
-def test_loc_setitem_extend_empty_12504():
-    df = pd.DataFrame(columns=["a"])
-    cdf = cudf.from_pandas(df)
-
-    df.loc[0] = [1]
-
-    cdf.loc[0] = [1]
-
-    assert_eq(df, cdf)
-
-
-def test_loc_setitem_extend_existing_12505():
-    df = pd.DataFrame({"a": [0]})
-    cdf = cudf.from_pandas(df)
-
-    df.loc[1] = 1
-
-    cdf.loc[1] = 1
-
-    assert_eq(df, cdf)
-
-
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12801")
-def test_loc_setitem_add_column_partial_12801():
-    df = pd.DataFrame({"a": [0, 1, 2]})
-    cdf = cudf.from_pandas(df)
-
-    df.loc[df.a < 2, "b"] = 1
-
-    cdf.loc[cdf.a < 2, "b"] = 1
-
-    assert_eq(df, cdf)
-
-
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/13031")
-@pytest.mark.parametrize("other_index", [["1", "3", "2"], [1, 2, 3]])
-def test_loc_setitem_series_index_alignment_13031(other_index):
-    s = pd.Series([1, 2, 3], index=["1", "2", "3"])
-    other = pd.Series([5, 6, 7], index=other_index)
-
-    cs = cudf.from_pandas(s)
-    cother = cudf.from_pandas(other)
-
-    s.loc[["1", "3"]] = other
-
-    cs.loc[["1", "3"]] = cother
-
-    assert_eq(s, cs)
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series([1, 2, 3], index=pd.RangeIndex(0, 3)),
-        pd.Series([1, 2, 3], index=pd.RangeIndex(start=2, stop=-1, step=-1)),
-        pd.Series([1, 2, 3], index=pd.RangeIndex(start=1, stop=6, step=2)),
-        pd.Series(
-            [1, 2, 3, 4, 5], index=pd.RangeIndex(start=1, stop=-9, step=-2)
-        ),
-        pd.Series(
-            [1, 2, 3, 4, 5], index=pd.RangeIndex(start=1, stop=-12, step=-3)
-        ),
-        pd.Series([1, 2, 3, 4], index=pd.RangeIndex(start=1, stop=14, step=4)),
-        pd.Series(
-            [1, 2, 3, 4], index=pd.RangeIndex(start=1, stop=-14, step=-4)
-        ),
-    ],
-)
-@pytest.mark.parametrize("arg", [*list(range(-20, 20)), 5.6, 3.1])
-def test_series_set_item_range_index(ps, arg):
-    gsr = cudf.from_pandas(ps)
-    psr = ps.copy(deep=True)
-    psr[arg] = 11
-    gsr[arg] = 11
-
-    assert_eq(psr, gsr, check_index_type=True)
-
-
-def test_series_set_item_index_reference():
-    gs1 = cudf.Series([1], index=[7])
-    gs2 = cudf.Series([2], index=gs1.index)
-
-    gs1.loc[11] = 2
-    ps1 = pd.Series([1], index=[7])
-    ps2 = pd.Series([2], index=ps1.index)
-    ps1.loc[11] = 2
-
-    assert_eq(ps1, gs1)
-    assert_eq(ps2, gs2)

From 5fc994093f3a1686c6a37d144251e9d2fa4e024c Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Mon, 25 Aug 2025 18:34:12 -0400
Subject: [PATCH 208/366] Fix how nvcomp major version is extracted (#19791)

Resolves https://github.com/rapidsai/cudf/issues/19790
Follow-up to https://github.com/rapidsai/cudf/pull/19786

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19791
---
 python/libcudf/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
index 6722db592bb..580078b1509 100644
--- a/python/libcudf/CMakeLists.txt
+++ b/python/libcudf/CMakeLists.txt
@@ -54,9 +54,9 @@ if(TARGET nvcomp::nvcomp)
     # Compute the SOVERSION from the library path
     get_filename_component(nvcomp_lib_dir ${nvcomp_lib_path} DIRECTORY)
     get_filename_component(nvcomp_lib_name ${nvcomp_lib_path} NAME)
-    string(REPLACE [=[libnvcomp.so.]=] "" nvcomp_soversion ${nvcomp_lib_name})
-    string(REPLACE [=[.]=] ";" nvcomp_soversion ${nvcomp_soversion})
-    list(GET nvcomp_soversion 0 nvcomp_soversion_major)
+    string(REGEX REPLACE "^libnvcomp\\.so\\.([0-9]+).*$" "\\1" nvcomp_soversion_major
+                         ${nvcomp_lib_name}
+    )
     install(
       FILES ${nvcomp_lib_path}
       DESTINATION ${SKBUILD_PLATLIB_DIR}/libcudf/lib64/

From 6bb386441a057d2ec89e73304338d82ca0e1c0c3 Mon Sep 17 00:00:00 2001
From: Tianyu Liu <kingcrimsontianyu@gmail.com>
Date: Mon, 25 Aug 2025 19:38:58 -0400
Subject: [PATCH 209/366] Use KvikIO's implementation of file-backed memory
 mapping (#19164)

### Background
Libcudf 25.04 and earlier use `memory_mapped_source` by default. For host read it uses memory mmaped I/O, and for device read it uses standard I/O.

Libcudf 25.06 uses standard I/O by default. For host read, `memory_mapped_source`  still uses memory mmaped I/O, while the device read is no longer allowed (runtime exception if used). Parquet/ORC readers fall back to the host read + H2D copy to emulate device read for the mapped source.

### This PR
Now that the file-backed memory mapping (C++) is supported by KvikIO (https://github.com/rapidsai/kvikio/pull/740), this PR updates libcudf to reinvigorate `memory_mapped_source` using KvikIO's implementation. This re-enables device read and brings performance improvement (e.g. through parallel prefault).

### Notes
`memory_mapped_source` is an implementation detail in `datasource.cpp`. Currently testing was conducted on C2C (arm) and PCIe (x86) systems by manually setting `LIBCUDF_MMAP_ENABLED=ON` and running the tests. Refer to https://github.com/rapidsai/kvikio/issues/530#issuecomment-2994871923 for benchmark results.

### Dependency
This PR depends on KvikIO PR https://github.com/rapidsai/kvikio/pull/781 to fix unit test errors.

Authors:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19164
---
 cpp/src/io/utilities/datasource.cpp | 152 ++++++----------------------
 1 file changed, 31 insertions(+), 121 deletions(-)

diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index 4b2ff497f33..1eeadb68c51 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -25,6 +25,8 @@
 #include <cudf/utilities/span.hpp>
 
 #include <kvikio/file_handle.hpp>
+#include <kvikio/file_utils.hpp>
+#include <kvikio/mmap.hpp>
 
 #include <rmm/device_buffer.hpp>
 
@@ -109,20 +111,8 @@ class kvikio_source : public datasource {
                                         rmm::cuda_stream_view stream) override
   {
     CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file.");
-
     auto const read_size = std::min(size, this->size() - offset);
-
-    if constexpr (std::is_same_v<HandleT, kvikio::FileHandle>) {
-      return _kvikio_handle.pread(dst,
-                                  read_size,
-                                  offset,
-                                  kvikio::defaults::task_size(),
-                                  kvikio::defaults::gds_threshold(),
-                                  false /* not to sync_default_stream */);
-    } else {
-      // HandleT is kvikio::RemoteHandle
-      return _kvikio_handle.pread(dst, read_size, offset);
-    }
+    return _kvikio_handle.pread(dst, read_size, offset);
   }
 
   size_t device_read(size_t offset,
@@ -167,6 +157,21 @@ class file_source : public kvikio_source<kvikio::FileHandle> {
       "Reading a file using kvikIO, with compatibility mode %s.",
       _kvikio_handle.get_compat_mode_manager().is_compat_mode_preferred() ? "on" : "off");
   }
+
+  std::future<size_t> device_read_async(size_t offset,
+                                        size_t size,
+                                        uint8_t* dst,
+                                        rmm::cuda_stream_view stream) override
+  {
+    CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file.");
+    auto const read_size = std::min(size, this->size() - offset);
+    return _kvikio_handle.pread(dst,
+                                read_size,
+                                offset,
+                                kvikio::defaults::task_size(),
+                                kvikio::defaults::gds_threshold(),
+                                false /* not to sync_default_stream */);
+  }
 };
 
 /**
@@ -175,117 +180,22 @@ class file_source : public kvikio_source<kvikio::FileHandle> {
  * Unlike Arrow's memory mapped IO class, this implementation allows memory mapping a subset of the
  * file where the starting offset may not be zero.
  */
-class memory_mapped_source : public file_source {
+class memory_mapped_source : public kvikio_source<kvikio::MmapHandle> {
  public:
-  explicit memory_mapped_source(char const* filepath, size_t offset, size_t max_size_estimate)
-    : file_source(filepath)
-  {
-    if (this->size() != 0) {
-      // Memory mapping is not exclusive, so we can include the whole region we expect to read
-      map(_kvikio_handle.fd(), offset, max_size_estimate);
-    }
-  }
-
-  ~memory_mapped_source() override
-  {
-    if (_map_addr != nullptr) { unmap(); }
-  }
-
-  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
-  {
-    // Clamp length to available data
-    auto const read_size = std::min(size, this->size() - offset);
-
-    // If the requested range is outside of the mapped region, read from the file
-    if (offset < _map_offset or offset + read_size > (_map_offset + _map_size)) {
-      return file_source::host_read(offset, read_size);
-    }
-
-    // If the requested range is only partially within the registered region, copy to a new
-    // host buffer to make the data safe to copy to the device
-    if (_reg_addr != nullptr and
-        (offset < _reg_offset or offset + read_size > (_reg_offset + _reg_size))) {
-      auto const src = static_cast<uint8_t*>(_map_addr) + (offset - _map_offset);
-
-      return std::make_unique<owning_buffer<std::vector<uint8_t>>>(
-        std::vector<uint8_t>(src, src + read_size));
-    }
-
-    return std::make_unique<non_owning_buffer>(
-      static_cast<uint8_t*>(_map_addr) + offset - _map_offset, read_size);
-  }
-
-  std::future<std::unique_ptr<datasource::buffer>> host_read_async(size_t offset,
-                                                                   size_t size) override
-  {
-    // Use the default implementation instead of the file_source's implementation
-    return datasource::host_read_async(offset, size);
-  }
-
-  size_t host_read(size_t offset, size_t size, uint8_t* dst) override
-  {
-    // Clamp length to available data
-    auto const read_size = std::min(size, this->size() - offset);
-
-    // If the requested range is outside of the mapped region, read from the file
-    if (offset < _map_offset or offset + read_size > (_map_offset + _map_size)) {
-      return file_source::host_read(offset, read_size, dst);
+  explicit memory_mapped_source(char const* filepath,
+                                size_t offset,
+                                [[maybe_unused]] size_t max_size_estimate)
+    : kvikio_source{kvikio::MmapHandle()}
+  {
+    // Since the superclass kvikio_source is initialized with an empty mmap handle, `this->size()`
+    // returns 0 at this point. Use `kvikio::get_file_size()` instead.
+    auto const file_size = kvikio::get_file_size(filepath);
+    if (file_size != 0) {
+      CUDF_EXPECTS(offset < file_size, "Offset is past end of file", std::overflow_error);
+      _kvikio_handle =
+        kvikio::MmapHandle(filepath, "r", std::nullopt, 0, kvikio::FileHandle::m644, MAP_SHARED);
     }
-
-    auto const src = static_cast<uint8_t*>(_map_addr) + (offset - _map_offset);
-    std::memcpy(dst, src, read_size);
-    return read_size;
   }
-
-  std::future<size_t> host_read_async(size_t offset, size_t size, uint8_t* dst) override
-  {
-    // Use the default implementation instead of the file_source's implementation
-    return datasource::host_read_async(offset, size, dst);
-  }
-
-  [[nodiscard]] bool supports_device_read() const override { return false; }
-
-  [[nodiscard]] bool is_device_read_preferred(size_t size) const override
-  {
-    return supports_device_read();
-  }
-
- private:
-  void map(int fd, size_t offset, size_t size)
-  {
-    CUDF_EXPECTS(offset < this->size(), "Offset is past end of file", std::overflow_error);
-
-    // Offset for `mmap()` must be page aligned
-    _map_offset = offset & ~(sysconf(_SC_PAGESIZE) - 1);
-
-    if (size == 0 || (offset + size) > this->size()) { size = this->size() - offset; }
-
-    // Size for `mmap()` needs to include the page padding
-    _map_size = size + (offset - _map_offset);
-    if (_map_size == 0) { return; }
-
-    // Check if accessing a region within already mapped area
-    _map_addr = mmap(nullptr, _map_size, PROT_READ, MAP_PRIVATE, fd, _map_offset);
-    CUDF_EXPECTS(_map_addr != MAP_FAILED, "Cannot create memory mapping");
-  }
-
-  void unmap()
-  {
-    if (_map_addr != nullptr) {
-      auto const result = munmap(_map_addr, _map_size);
-      if (result != 0) { CUDF_LOG_WARN("munmap failed with %d", result); }
-      _map_addr = nullptr;
-    }
-  }
-
- private:
-  size_t _map_offset = 0;
-  size_t _map_size   = 0;
-  void* _map_addr    = nullptr;
-
-  size_t _reg_offset = 0;
-  size_t _reg_size   = 0;
-  void* _reg_addr    = nullptr;
 };
 
 /**

From 6ddf5670bb681a8357c0708aa3cdd8b6c6bd5d11 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 26 Aug 2025 10:47:00 -0400
Subject: [PATCH 210/366] Add nvbench benchmark for cudf::encode API (#19777)

Adds a benchmark for the libcudf  `cudf::encode` API.
Looking to improve the performance of this API in follow on PR(s).

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19777
---
 cpp/benchmarks/CMakeLists.txt       |  4 +-
 cpp/benchmarks/transform/encode.cpp | 64 +++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 cpp/benchmarks/transform/encode.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 4f07dd72ed4..a6d070ce2d3 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -368,7 +368,9 @@ ConfigureNVBench(
 # ##################################################################################################
 # * transform benchmark
 # ---------------------------------------------------------------------------------
-ConfigureNVBench(TRANSFORM_NVBENCH transform/polynomials.cpp transform/transform.cpp)
+ConfigureNVBench(
+  TRANSFORM_NVBENCH transform/encode.cpp transform/polynomials.cpp transform/transform.cpp
+)
 
 # ##################################################################################################
 # * nvtext benchmark -------------------------------------------------------------------
diff --git a/cpp/benchmarks/transform/encode.cpp b/cpp/benchmarks/transform/encode.cpp
new file mode 100644
index 00000000000..dea40a0865d
--- /dev/null
+++ b/cpp/benchmarks/transform/encode.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/strings/string_view.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/transform.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename DataType>
+static void bench_encode(nvbench::state& state, nvbench::type_list<DataType>)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const width     = static_cast<cudf::size_type>(state.get_int64("width"));
+  auto const nulls     = state.get_float64("nulls");
+  auto const data_type = cudf::type_to_id<DataType>();
+
+  auto range = data_type == cudf::type_id::STRING ? (width / 10) : width;
+  data_profile const profile =
+    data_profile_builder().cardinality(0).null_probability(nulls).distribution(
+      data_type, distribution_id::UNIFORM, 0, range);
+  auto input = create_random_column(data_type, row_count{num_rows}, profile);
+  auto tv    = cudf::table_view({input->view()});
+
+  auto alloc_size = input->alloc_size();
+  state.add_global_memory_reads<uint8_t>(alloc_size);
+  state.add_global_memory_writes<cudf::size_type>(num_rows);
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { auto result = cudf::encode(tv, stream); });
+}
+
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::string_view, "string_view", "string_view");
+
+using Types = nvbench::type_list<int64_t, double, cudf::string_view>;
+
+NVBENCH_BENCH_TYPES(bench_encode, NVBENCH_TYPE_AXES(Types))
+  .set_name("encode")
+  .add_int64_axis("width", {10, 100})
+  .add_int64_axis("num_rows", {262144, 2097152, 16777216, 67108864})
+  .add_float64_axis("nulls", {0, 0.1});

From a2ec340718db95c639a2692789e961193f6d2ffc Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 26 Aug 2025 10:47:17 -0400
Subject: [PATCH 211/366] Fix ndsh benchmarks nvtx range usage (#19753)

Updates the nvtx scoped range usage in the ndsh benchmarks to use `benchmarks` prefix instead of `libcudf`
This makes the nsys tracing more clear by identifying benchmark utilities separately from libcudf APIs.

Also introduces a new `CUDF_BENCHMARK_RANGE()` macro to be used for benchmarks instead of the libcudf internal one `CUDF_FUNC_RANGE()`.

Example nsys trace with this change:
```
     16.3      657,460,069          2  328,730,034.5  328,730,034.5   12,374,462  645,085,607  447,394,341.2  PushPop  libcudf:write_parquet
     16.0      646,282,446          1  646,282,446.0  646,282,446.0  646,282,446  646,282,446            0.0  PushPop  benchmarks:write_to_parquet_device_buffer
      4.0      162,550,464          1  162,550,464.0  162,550,464.0  162,550,464  162,550,464            0.0  PushPop  benchmarks:generate_orders_lineitem_part
      2.2       87,655,406          1   87,655,406.0   87,655,406.0   87,655,406   87,655,406            0.0  PushPop  benchmarks:generate_lineitem_partial
...
```
It is now easier to separate the functions by name.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19753
---
 .../ndsh_data_generator.cpp                   | 23 +++++-----
 .../random_column_generator.cu                | 17 +++----
 .../ndsh_data_generator/table_helpers.cpp     | 21 +++++----
 cpp/benchmarks/common/nvtx_ranges.hpp         | 46 +++++++++++++++++++
 cpp/benchmarks/ndsh/q09.cpp                   | 10 ++--
 cpp/benchmarks/ndsh/utilities.cpp             | 31 ++++++-------
 cpp/benchmarks/ndsh/utilities.hpp             |  1 -
 7 files changed, 99 insertions(+), 50 deletions(-)
 create mode 100644 cpp/benchmarks/common/nvtx_ranges.hpp

diff --git a/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp
index 8a1df9c97dd..f03cbc5d125 100644
--- a/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp
+++ b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp
@@ -19,12 +19,13 @@
 #include "random_column_generator.hpp"
 #include "table_helpers.hpp"
 
+#include <benchmarks/common/nvtx_ranges.hpp>
+
 #include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/ast/detail/operators.cuh>
 #include <cudf/ast/expressions.hpp>
 #include <cudf/binaryop.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/filling.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
@@ -156,7 +157,7 @@ std::unique_ptr<cudf::table> generate_orders_independent(double scale_factor,
                                                          rmm::cuda_stream_view stream,
                                                          rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   cudf::size_type const o_num_rows = scale_factor * 1'500'000;
 
   // Generate the `o_orderkey` column
@@ -280,7 +281,7 @@ std::unique_ptr<cudf::table> generate_lineitem_partial(cudf::table_view const& o
                                                        rmm::cuda_stream_view stream,
                                                        rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const o_num_rows = orders_independent.num_rows();
   // Generate the `lineitem` table. For each row in the `orders` table,
   // we have a random number (between 1 and 7) of rows in the `lineitem` table
@@ -450,7 +451,7 @@ std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& l
                                                        rmm::cuda_stream_view stream,
                                                        rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const l_linestatus_mask = lineitem_partial.column(0);
   auto const l_orderkey        = lineitem_partial.column(1);
   auto const l_extendedprice   = lineitem_partial.column(6);
@@ -543,7 +544,7 @@ std::unique_ptr<cudf::table> generate_partsupp(double scale_factor,
                                                rmm::cuda_stream_view stream,
                                                rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Define the number of rows in the `part` and `partsupp` tables
   cudf::size_type const p_num_rows  = scale_factor * 200'000;
   cudf::size_type const ps_num_rows = scale_factor * 800'000;
@@ -591,7 +592,7 @@ std::unique_ptr<cudf::table> generate_part(double scale_factor,
                                            rmm::cuda_stream_view stream,
                                            rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   cudf::size_type const num_rows = scale_factor * 200'000;
 
   // Generate the `p_partkey` column
@@ -717,7 +718,7 @@ generate_orders_lineitem_part(double scale_factor,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Generate a table with the independent columns of the `orders` table
   auto orders_independent = generate_orders_independent(scale_factor, stream, mr);
 
@@ -784,7 +785,7 @@ std::unique_ptr<cudf::table> generate_supplier(double scale_factor,
                                                rmm::cuda_stream_view stream,
                                                rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Calculate the number of rows based on the scale factor
   cudf::size_type const num_rows = scale_factor * 10'000;
 
@@ -845,7 +846,7 @@ std::unique_ptr<cudf::table> generate_customer(double scale_factor,
                                                rmm::cuda_stream_view stream,
                                                rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Calculate the number of rows based on the scale factor
   cudf::size_type const num_rows = scale_factor * 150'000;
 
@@ -912,7 +913,7 @@ std::unique_ptr<cudf::table> generate_customer(double scale_factor,
 std::unique_ptr<cudf::table> generate_nation(rmm::cuda_stream_view stream,
                                              rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Define the number of rows
   constexpr cudf::size_type num_rows = 25;
 
@@ -952,7 +953,7 @@ std::unique_ptr<cudf::table> generate_nation(rmm::cuda_stream_view stream,
 std::unique_ptr<cudf::table> generate_region(rmm::cuda_stream_view stream,
                                              rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Define the number of rows
   constexpr cudf::size_type num_rows = 5;
 
diff --git a/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu b/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu
index 4246bd1a83b..aac50aeecb2 100644
--- a/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu
+++ b/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,11 +16,12 @@
 
 #include "random_column_generator.hpp"
 
+#include <benchmarks/common/nvtx_ranges.hpp>
+
 #include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/binaryop.hpp>
 #include <cudf/detail/iterator.cuh>
-#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/filling.hpp>
 #include <cudf/strings/detail/strings_children.cuh>
 
@@ -91,7 +92,7 @@ std::unique_ptr<cudf::column> generate_random_string_column(cudf::size_type lowe
                                                             rmm::cuda_stream_view stream,
                                                             rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto offsets_begin = cudf::detail::make_counting_transform_iterator(
     0, random_number_generator<cudf::size_type>(lower, upper));
   auto [offsets_column, computed_bytes] = cudf::strings::detail::make_offsets_child_column(
@@ -119,7 +120,7 @@ std::unique_ptr<cudf::column> generate_random_numeric_column(T lower,
                                                              rmm::cuda_stream_view stream,
                                                              rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto col = cudf::make_numeric_column(
     cudf::data_type{cudf::type_to_id<T>()}, num_rows, cudf::mask_state::UNALLOCATED, stream, mr);
   cudf::size_type begin = 0;
@@ -165,7 +166,7 @@ std::unique_ptr<cudf::column> generate_primary_key_column(cudf::scalar const& st
                                                           rmm::cuda_stream_view stream,
                                                           rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   return cudf::sequence(num_rows, start, stream, mr);
 }
 
@@ -174,7 +175,7 @@ std::unique_ptr<cudf::column> generate_repeat_string_column(std::string const& v
                                                             rmm::cuda_stream_view stream,
                                                             rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const scalar = cudf::string_scalar(value);
   return cudf::make_column_from_scalar(scalar, num_rows, stream, mr);
 }
@@ -185,7 +186,7 @@ std::unique_ptr<cudf::column> generate_random_string_column_from_set(
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Build a gather map of random strings to choose from
   // The size of the string sets always fits within 16-bit integers
   auto const indices =
@@ -211,7 +212,7 @@ std::unique_ptr<cudf::column> generate_repeat_sequence_column(T seq_length,
                                                               rmm::cuda_stream_view stream,
                                                               rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto pkey =
     generate_primary_key_column(cudf::numeric_scalar<cudf::size_type>(0), num_rows, stream, mr);
   auto repeat_seq_zero_indexed = cudf::binary_operation(pkey->view(),
diff --git a/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp
index 7095c227649..4185bcf60e5 100644
--- a/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp
+++ b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp
@@ -18,13 +18,14 @@
 
 #include "random_column_generator.hpp"
 
+#include <benchmarks/common/nvtx_ranges.hpp>
+
 #include <cudf/aggregation.hpp>
 #include <cudf/ast/detail/operators.cuh>
 #include <cudf/ast/expressions.hpp>
 #include <cudf/binaryop.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/filling.hpp>
 #include <cudf/join/join.hpp>
 #include <cudf/reduction.hpp>
@@ -55,7 +56,7 @@ std::unique_ptr<cudf::column> add_calendrical_days(cudf::column_view const& time
                                                    rmm::cuda_stream_view stream,
                                                    rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const days_duration_type = cudf::cast(days, cudf::data_type{cudf::type_id::DURATION_DAYS});
   auto const data_type          = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS};
   return cudf::binary_operation(
@@ -80,7 +81,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
                                                rmm::cuda_stream_view stream,
                                                rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   constexpr auto oob_policy = cudf::out_of_bounds_policy::NULLIFY;
   auto const left_selected  = left_input.select(left_on);
   auto const right_selected = right_input.select(right_on);
@@ -116,7 +117,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Expression: (90000 + ((p_partkey/10) modulo 20001) + 100 * (p_partkey modulo 1000)) / 100
   auto table             = cudf::table_view({p_partkey});
   auto p_partkey_col_ref = cudf::ast::column_reference(0);
@@ -160,7 +161,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
                                                                 rmm::cuda_stream_view stream,
                                                                 rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Expression: (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s + 1
 
   // Generate the `s` col
@@ -232,7 +233,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   // Expression: ps_suppkey = (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1
 
   // Generate the `s` col
@@ -299,7 +300,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
                                                       rmm::cuda_stream_view stream,
                                                       rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const sum_agg = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
   auto const l_num_rows_scalar =
     cudf::reduce(o_rep_freqs, *sum_agg, cudf::data_type{cudf::type_id::INT32}, stream, mr);
@@ -322,7 +323,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
                                                              rmm::cuda_stream_view stream,
                                                              rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const one                = cudf::numeric_scalar<double>(1);
   auto const one_minus_discount = cudf::binary_operation(
     one, discount, cudf::binary_operator::SUB, cudf::data_type{cudf::type_id::FLOAT64}, stream, mr);
@@ -352,7 +353,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
 [[nodiscard]] std::unique_ptr<cudf::column> generate_address_column(
   cudf::size_type num_rows, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   return generate_random_string_column(10, 40, num_rows, stream, mr);
 }
 
@@ -367,7 +368,7 @@ std::unique_ptr<cudf::table> perform_left_join(cudf::table_view const& left_inpu
                                                                   rmm::cuda_stream_view stream,
                                                                   rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const part_a = cudf::strings::from_integers(
     generate_random_numeric_column<int16_t>(10, 34, num_rows, stream, mr)->view());
   auto const part_b = cudf::strings::from_integers(
diff --git a/cpp/benchmarks/common/nvtx_ranges.hpp b/cpp/benchmarks/common/nvtx_ranges.hpp
new file mode 100644
index 00000000000..edcb0ab358c
--- /dev/null
+++ b/cpp/benchmarks/common/nvtx_ranges.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <nvtx3/nvtx3.hpp>
+
+namespace cudf::benchmark {
+/**
+ * @brief Tag type for the NVTX domain
+ */
+struct benchmark_domain {
+  static constexpr char const* name{"benchmarks"};  ///< Name of the domain
+};
+
+}  // namespace cudf::benchmark
+
+/**
+ * @brief Convenience macro for generating an NVTX range in the `benchmarks` domain
+ * from the lifetime of a function.
+ *
+ * Uses the name of the immediately enclosing function returned by `__func__` to
+ * name the range.
+ *
+ * Example:
+ * ```
+ * void some_function(){
+ *    CUDF_BENCHMARK_RANGE();
+ *    ...
+ * }
+ * ```
+ */
+#define CUDF_BENCHMARK_RANGE() NVTX3_FUNC_RANGE_IN(cudf::benchmark::benchmark_domain)
diff --git a/cpp/benchmarks/ndsh/q09.cpp b/cpp/benchmarks/ndsh/q09.cpp
index 28063533f16..7bb333a4956 100644
--- a/cpp/benchmarks/ndsh/q09.cpp
+++ b/cpp/benchmarks/ndsh/q09.cpp
@@ -16,6 +16,8 @@
 
 #include "utilities.hpp"
 
+#include <benchmarks/common/nvtx_ranges.hpp>
+
 #include <cudf/ast/expressions.hpp>
 #include <cudf/binaryop.hpp>
 #include <cudf/column/column.hpp>
@@ -115,7 +117,7 @@ struct q9_data {
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
 
   auto const one = cudf::numeric_scalar<double>(1);
   auto const one_minus_discount =
@@ -147,7 +149,7 @@ struct q9_data {
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
 
   std::string udf =
     R"***(
@@ -174,7 +176,7 @@ struct q9_data {
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
 
   cudf::ast::tree tree;
   cudf::table_view table{std::vector{discount, extendedprice, supplycost, quantity}};
@@ -245,7 +247,7 @@ q9_data load_data(std::unordered_map<std::string, cuio_source_sink_pair>& source
 
 std::unique_ptr<table_with_names> join_data(q9_data const& data)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
 
   // Generating the `profit` table
   // Filter the part table using `p_name like '%green%'`
diff --git a/cpp/benchmarks/ndsh/utilities.cpp b/cpp/benchmarks/ndsh/utilities.cpp
index 766d30bdfeb..074bc2d75f8 100644
--- a/cpp/benchmarks/ndsh/utilities.cpp
+++ b/cpp/benchmarks/ndsh/utilities.cpp
@@ -16,13 +16,12 @@
 
 #include "utilities.hpp"
 
-#include "common/ndsh_data_generator/ndsh_data_generator.hpp"
-#include "common/table_utilities.hpp"
-#include "cudf/detail/utilities/integer_utils.hpp"
+#include <benchmarks/common/ndsh_data_generator/ndsh_data_generator.hpp>
+#include <benchmarks/common/nvtx_ranges.hpp>
+#include <benchmarks/common/table_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/join/join.hpp>
@@ -137,7 +136,7 @@ table_with_names& table_with_names::append(std::unique_ptr<cudf::column>& col,
 
 cudf::table_view table_with_names::select(std::vector<std::string> const& col_names) const
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   std::vector<cudf::size_type> col_indices;
   for (auto const& col_name : col_names) {
     col_indices.push_back(column_id(col_name));
@@ -147,7 +146,7 @@ cudf::table_view table_with_names::select(std::vector<std::string> const& col_na
 
 void table_with_names::to_parquet(std::string const& filepath) const
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const sink_info = cudf::io::sink_info(filepath);
   cudf::io::table_metadata metadata;
   metadata.schema_info =
@@ -165,7 +164,7 @@ std::unique_ptr<cudf::table> join_and_gather(cudf::table_view const& left_input,
                                              std::vector<cudf::size_type> const& right_on,
                                              cudf::null_equality compare_nulls)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK;
   auto const left_selected  = left_input.select(left_on);
   auto const right_selected = right_input.select(right_on);
@@ -200,7 +199,7 @@ std::unique_ptr<table_with_names> apply_inner_join(
   std::vector<std::string> const& right_on,
   cudf::null_equality compare_nulls)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   std::vector<cudf::size_type> left_on_indices;
   std::vector<cudf::size_type> right_on_indices;
   std::transform(
@@ -230,7 +229,7 @@ std::unique_ptr<table_with_names> apply_inner_join(
 std::unique_ptr<table_with_names> apply_filter(std::unique_ptr<table_with_names> const& table,
                                                cudf::ast::operation const& predicate)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const boolean_mask = cudf::compute_column(table->table(), predicate);
   auto result_table       = cudf::apply_boolean_mask(table->table(), boolean_mask->view());
   return std::make_unique<table_with_names>(std::move(result_table), table->column_names());
@@ -239,7 +238,7 @@ std::unique_ptr<table_with_names> apply_filter(std::unique_ptr<table_with_names>
 std::unique_ptr<table_with_names> apply_mask(std::unique_ptr<table_with_names> const& table,
                                              std::unique_ptr<cudf::column> const& mask)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto result_table = cudf::apply_boolean_mask(table->table(), mask->view());
   return std::make_unique<table_with_names>(std::move(result_table), table->column_names());
 }
@@ -247,7 +246,7 @@ std::unique_ptr<table_with_names> apply_mask(std::unique_ptr<table_with_names> c
 std::unique_ptr<table_with_names> apply_groupby(std::unique_ptr<table_with_names> const& table,
                                                 groupby_context_t const& ctx)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const keys = table->select(ctx.keys);
   cudf::groupby::groupby groupby_obj(keys);
   std::vector<std::string> result_column_names;
@@ -291,7 +290,7 @@ std::unique_ptr<table_with_names> apply_orderby(std::unique_ptr<table_with_names
                                                 std::vector<std::string> const& sort_keys,
                                                 std::vector<cudf::order> const& sort_key_orders)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   std::vector<cudf::column_view> column_views;
   for (auto& key : sort_keys) {
     column_views.push_back(table->column(key));
@@ -305,7 +304,7 @@ std::unique_ptr<table_with_names> apply_reduction(cudf::column_view const& colum
                                                   cudf::aggregation::Kind const& agg_kind,
                                                   std::string const& col_name)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const agg            = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
   auto const result         = cudf::reduce(column, *agg, column.type());
   cudf::size_type const len = 1;
@@ -322,7 +321,7 @@ std::unique_ptr<table_with_names> read_parquet(
   std::vector<std::string> const& columns,
   std::unique_ptr<cudf::ast::operation> const& predicate)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto builder = cudf::io::parquet_reader_options_builder(source_info);
   if (!columns.empty()) { builder.columns(columns); }
   if (predicate) { builder.filter(*predicate); }
@@ -358,7 +357,7 @@ void write_to_parquet_device_buffer(std::unique_ptr<cudf::table> const& table,
                                     std::vector<std::string> const& col_names,
                                     cuio_source_sink_pair& source)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
   auto const stream = cudf::get_default_stream();
 
   // Prepare the table metadata
@@ -410,7 +409,7 @@ void generate_parquet_data_sources(double scale_factor,
                                    std::vector<std::string> const& table_names,
                                    std::unordered_map<std::string, cuio_source_sink_pair>& sources)
 {
-  CUDF_FUNC_RANGE();
+  CUDF_BENCHMARK_RANGE();
 
   // Set the memory resource to the managed pool
   auto old_mr = cudf::get_current_device_resource();
diff --git a/cpp/benchmarks/ndsh/utilities.hpp b/cpp/benchmarks/ndsh/utilities.hpp
index 82c43d8a5ee..70288bb1d2f 100644
--- a/cpp/benchmarks/ndsh/utilities.hpp
+++ b/cpp/benchmarks/ndsh/utilities.hpp
@@ -18,7 +18,6 @@
 
 #include "io/cuio_common.hpp"
 
-#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/io/parquet.hpp>
 

From 0c7588751fd332b52ba7b3ded0fc3f7d0c4f145f Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 26 Aug 2025 15:18:40 -0500
Subject: [PATCH 212/366] rearrange dependencies.yaml, other small changes
 (#19794)

Splitting some changes off of  the CUDA 13 support PR (#19768) ... that has gotten too large to review.

Contributes to https://github.com/rapidsai/build-planning/issues/208

* uses the new `[cu12, cu13]` extras added to `dask-cuda` for wheels: https://github.com/rapidsai/dask-cuda/pull/1536
* replaces hard-coding of CUDA major version in `pandas` diff script
* moves `numba-cuda` floor from `>=0.19.0` to `>=0.19.1`
* consolidates some dependency lists with unnecessary `cuda: "12.*"` filters

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/19794
---
 ci/cudf_pandas_scripts/pandas-tests/diff.sh   |   3 +-
 .../all_cuda-129_arch-aarch64.yaml            |   2 +-
 .../all_cuda-129_arch-x86_64.yaml             |   2 +-
 conda/recipes/cudf/recipe.yaml                |   4 +-
 dependencies.yaml                             | 144 ++++++++----------
 python/cudf/pyproject.toml                    |   4 +-
 python/pylibcudf/pyproject.toml               |   2 +-
 7 files changed, 73 insertions(+), 88 deletions(-)

diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
index aec1acd1539..f84776ad173 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
@@ -13,7 +13,8 @@ rapids-logger "Github job name: ${GH_JOB_NAME}"
 rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}"
 
 PY_VER="313"
-PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json
+RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+PR_ARTIFACT=$(rapids-s3-path)cuda${RAPIDS_CUDA_MAJOR}_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json
 
 rapids-logger "Fetching latest available results from nightly"
 aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/cudf/" --query "sort_by(Contents[?ends_with(Key, '_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json')], &LastModified)[::].[Key]" --output text  | tee s3_output.txt
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index ccf44072677..0140f536cc1 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -55,7 +55,7 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba-cuda>=0.19.0,<0.20.0a0
+- numba-cuda>=0.19.1,<0.20.0a0
 - numba>=0.61.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index af6ca634e15..36d858a4957 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -56,7 +56,7 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba-cuda>=0.19.0,<0.20.0a0
+- numba-cuda>=0.19.1,<0.20.0a0
 - numba>=0.61.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 4b787a8178a..b89151fb266 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -55,7 +55,7 @@ requirements:
     - rapids-build-backend >=0.4.0,<0.5.0.dev0
     - scikit-build-core >=0.10.0
     - dlpack >=0.8,<1.0
-    - numba-cuda >=0.19.0,<0.20.0a0
+    - numba-cuda >=0.19.1,<0.20.0a0
     - libcudf =${{ version }}
     - pylibcudf =${{ version }}
     - rmm =${{ minor_version }}
@@ -70,7 +70,7 @@ requirements:
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.4.0dev0
     - cupy >=12.0.0
-    - numba-cuda >=0.19.0,<0.20.0a0
+    - numba-cuda >=0.19.1,<0.20.0a0
     # TODO: Revert to numba>=0.60.0,<0.62.0a0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
     - numba >=0.61.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
diff --git a/dependencies.yaml b/dependencies.yaml
index 6398698546d..614c4fbc395 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -14,9 +14,11 @@ files:
       - cuda
       - cuda_version
       - depends_on_cupy
+      - depends_on_dask_cuda
       - depends_on_libkvikio
       - depends_on_librmm
       - depends_on_libnvcomp
+      - depends_on_numba_cuda
       - depends_on_rapids_logger
       - depends_on_rmm
       - develop
@@ -38,7 +40,6 @@ files:
       - test_python_common
       - test_python_cudf
       - test_python_cudf_common
-      - test_python_dask_cudf
       - test_python_pylibcudf
       - test_python_cudf_pandas
       - test_python_cudf_polars
@@ -75,6 +76,7 @@ files:
       - depends_on_cudf
       - depends_on_pylibcudf
       - depends_on_libcudf
+      - depends_on_numba_cuda
   test_python_pylibcudf:
     output: none
     includes:
@@ -85,6 +87,7 @@ files:
       - test_python_pylibcudf
       - depends_on_pylibcudf
       - depends_on_libcudf
+      - depends_on_numba_cuda
   test_python_other:
     output: none
     includes:
@@ -93,11 +96,12 @@ files:
       - py_version
       - test_python_common
       - test_python_cudf_common
-      - test_python_dask_cudf
       - test_python_pylibcudf
       - depends_on_cudf
+      - depends_on_dask_cuda
       - depends_on_pylibcudf
       - depends_on_libcudf
+      - depends_on_numba_cuda
       - depends_on_dask_cudf
       - depends_on_cudf_kafka
       - depends_on_custreamz
@@ -140,6 +144,7 @@ files:
       - cuda
       - cuda_version
       - depends_on_cudf
+      - depends_on_dask_cuda
       - depends_on_dask_cudf
       - depends_on_pylibcudf
       - depends_on_libcudf
@@ -162,7 +167,7 @@ files:
     includes:
       - build_base
       - build_python_common
-      - build_python_cudf
+      - depends_on_numba_cuda
       - depends_on_pylibcudf
       - depends_on_libcudf
       - depends_on_librmm
@@ -179,6 +184,7 @@ files:
       - pyarrow_run
       - depends_on_cupy
       - depends_on_libcudf
+      - depends_on_numba_cuda
       - depends_on_pylibcudf_pyarrow
       - depends_on_rmm
   py_test_cudf:
@@ -275,6 +281,7 @@ files:
       key: test
     includes:
       - depends_on_cupy
+      - depends_on_numba_cuda
       - pyarrow_run
       - test_python_common
       - test_python_cudf_common
@@ -325,6 +332,7 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
+      - depends_on_dask_cuda
       - numpy_run
       - test_python_common
       - test_python_cudf_polars
@@ -353,9 +361,9 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
+      - depends_on_dask_cuda
       - test_python_common
       - test_python_cudf_common
-      - test_python_dask_cudf
   py_build_cudf_kafka:
     output: pyproject
     pyproject_dir: python/cudf_kafka
@@ -441,6 +449,7 @@ dependencies:
       - output_types: conda
         packages:
           - c-compiler
+          - cuda-nvcc
           - cxx-compiler
           - dlpack>=0.8,<1.0
           - zlib>=1.2.13
@@ -449,22 +458,14 @@ dependencies:
         matrices:
           - matrix:
               arch: x86_64
-              cuda: "12.*"
             packages:
               - gcc_linux-64=14.*
               - sysroot_linux-64==2.28
           - matrix:
               arch: aarch64
-              cuda: "12.*"
             packages:
               - gcc_linux-aarch64=14.*
               - sysroot_linux-aarch64==2.28
-      - output_types: conda
-        matrices:
-          - matrix:
-              cuda: "12.*"
-            packages:
-              - cuda-nvcc
   build_cpp:
     common:
       - output_types: conda
@@ -500,21 +501,6 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cython>=3.0.3
-  build_python_cudf:
-    common:
-      - output_types: [conda]
-        packages:
-        - &numba_cuda numba-cuda>=0.19.0,<0.20.0a0
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix:
-              cuda: "12.*"
-            packages:
-              - &numba_cuda_cu12 numba-cuda[cu12]>=0.19.0,<0.20.0a0
-          - matrix: # Fallback for no matrix
-            packages:
-              - *numba_cuda_cu12
   numpy_run:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -559,24 +545,21 @@ dependencies:
             packages:
               - cuda-version=12.9
   cuda:
+    common:
+      - output_types: [conda]
+        packages:
+          - cuda-cudart-dev
+          - cuda-nvrtc-dev
+          - cuda-nvtx-dev
+          - libcurand-dev
+          - libnvjitlink-dev
     specific:
-      - output_types: conda
-        matrices:
-          - matrix:
-              cuda: "12.*"
-            packages:
-              - cuda-cudart-dev
-              - cuda-nvrtc-dev
-              - cuda-nvtx-dev
-              - libcurand-dev
-              - libnvjitlink-dev
       - output_types: conda
         matrices:
           - matrix:
               arch: aarch64
             packages:
           - matrix:
-              cuda: "12.*"
               arch: x86_64
             packages:
               - libcufile-dev
@@ -607,7 +590,6 @@ dependencies:
       - output_types: [conda]
         packages:
           - breathe>=4.35.0
-          - dask-cuda==25.10.*,>=0.0.0a0
           - *doxygen
           - make
           - myst-nb
@@ -690,9 +672,6 @@ dependencies:
           - packaging
           - rich
           - typing_extensions>=4.0.0
-      - output_types: [conda]
-        packages:
-          - *numba_cuda
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -705,14 +684,6 @@ dependencies:
             packages: &run_cudf_packages_all_cu12
               - cuda-python>=12.9.1,<13.0a0
           - {matrix: null, packages: *run_cudf_packages_all_cu12}
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix: {cuda: "12.*"}
-            packages:
-              - *numba_cuda_cu12
-          - matrix: # Fallback for no matrix
-            packages:
-              - *numba_cuda_cu12
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:
@@ -763,15 +734,7 @@ dependencies:
       - output_types: conda
         packages:
           - *cmake_ver
-    specific:
-      - output_types: conda
-        matrices:
-          - matrix:
-              cuda: "12.*"
-            packages:
-              - cuda-sanitizer-api
-          - matrix: # Fallback for no matrix
-            packages:
+          - cuda-sanitizer-api
   # packages we want in the 'test_cpp' group in 'files', for CI, but which
   # shouldn't be added to 'all' for building a development environment
   test_cpp_cudf:
@@ -807,7 +770,7 @@ dependencies:
               # TODO: Revert to numba==0.60.0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
               - numba==0.61.0
               - pandas==2.0.*
-              - numba-cuda==0.19.0
+              - numba-cuda==0.19.1
           - matrix: {dependencies: "latest"}
             packages:
               - pandas==2.3.1
@@ -844,11 +807,9 @@ dependencies:
       - output_types: conda
         packages:
           - python-xxhash
-          - *numba_cuda
       - output_types: [pyproject, requirements]
         packages:
           - xxhash
-          - *numba_cuda_cu12
   test_python_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -895,28 +856,10 @@ dependencies:
               - pytorch>=2.4.0
           - matrix:
             packages:
-  test_python_dask_cudf:
-    common:
-      - output_types: [conda, requirements, pyproject]
-        packages:
-          - dask-cuda==25.10.*,>=0.0.0a0
-    specific:
-      - output_types: [conda, requirements]
-        matrices:
-          - matrix: {dependencies: "oldest"}
-            packages:
-              - numpy==1.24.*
-              # pyarrow 14 is fine in some circumstances but we require pyarrow
-              # 15 in our CI tests in order to get a lz4-c that is compatible
-              # with cudf_kafka's dependencies.
-              - pyarrow==15.*
-          - matrix:
-            packages:
   test_python_cudf_polars:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - dask-cuda==25.10.*,>=0.0.0a0
           - rich
   test_python_narwhals:
     common:
@@ -945,6 +888,21 @@ dependencies:
             packages:
               - libcudf-cu12==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*libcudf_unsuffixed]}
+  depends_on_numba_cuda:
+    common:
+      - output_types: [conda]
+        packages:
+        - numba-cuda>=0.19.1,<0.20.0a0
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - &numba_cuda_cu12 numba-cuda[cu12]>=0.19.1,<0.20.0a0
+          - matrix:
+            packages:
+              - *numba_cuda_cu12
   depends_on_pylibcudf:
     common:
       - output_types: conda
@@ -1128,6 +1086,32 @@ dependencies:
           - nbformat
           - openpyxl
           - pytest-rerunfailures
+  depends_on_dask_cuda:
+    common:
+      - output_types: conda
+        packages:
+          - &dask_cuda_unsuffixed dask-cuda==25.10.*,>=0.0.0a0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+              cuda_suffixed: "true"
+            packages:
+              - dask-cuda[cu12]==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - dask-cuda[cu13]==25.10.*,>=0.0.0a0
+          - matrix:
+            packages:
+              - *dask_cuda_unsuffixed
   depends_on_dask_cudf:
     common:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 56915721e37..ece2dd3f07a 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
     "cupy-cuda12x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==25.10.*,>=0.0.0a0",
-    "numba-cuda[cu12]>=0.19.0,<0.20.0a0",
+    "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
     "numba>=0.61.0,<0.62.0a0",
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
@@ -125,7 +125,7 @@ requires = [
     "libcudf==25.10.*,>=0.0.0a0",
     "librmm==25.10.*,>=0.0.0a0",
     "ninja",
-    "numba-cuda[cu12]>=0.19.0,<0.20.0a0",
+    "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "rmm==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index dd871c4bbdb..85fabaa14bf 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -44,7 +44,7 @@ test = [
     "hypothesis>=6.131.7",
     "mmh3",
     "nanoarrow",
-    "numba-cuda[cu12]>=0.19.0,<0.20.0a0",
+    "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
     "numba>=0.61.0,<0.62.0a0",
     "pandas",
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",

From b5c29e079542f972b066ce0c02825ad81aa65577 Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Tue, 26 Aug 2025 16:49:39 -0400
Subject: [PATCH 213/366] Update rapids-dependency-file-generator (#19796)

This PR updates the rapids-dependency-file-generator hook to get https://github.com/rapidsai/dependency-file-generator/pull/163.

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19796
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9850bce2c95..e66e866c4f4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -158,10 +158,10 @@ repos:
       - id: verify-codeowners
         args: [--fix, --project-prefix=cudf]
   - repo: https://github.com/rapidsai/dependency-file-generator
-    rev: v1.19.1
+    rev: v1.20.0
     hooks:
       - id: rapids-dependency-file-generator
-        args: ["--clean"]
+        args: ["--clean", "--warn-all", "--strict"]
   - repo: https://github.com/shellcheck-py/shellcheck-py
     rev: v0.10.0.1
     hooks:

From aecf57636299731f47b46a6148d469dcc4ce6af7 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 27 Aug 2025 10:47:20 -0400
Subject: [PATCH 214/366] Some clarifications, improvements to
 GroupedRollingWindows in cudf-polars (#19776)

Follows up #19684. Adds clarifying comments to the implementation  replaces the unnecessary sorting with a scatter because we're doing a `left_join`.

- Contributes to #18633

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19776
---
 .../cudf_polars/dsl/expressions/rolling.py    | 67 +++++++------------
 .../tests/expressions/test_rolling.py         | 26 +++++++
 2 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
index f89d0e4133f..c01043d2d18 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
@@ -223,12 +223,11 @@ def do_evaluate(  # noqa: D102
             )  # pragma: no cover; translation raises first
 
         by_exprs = self.children[: self.by_count]
-        by_cols = list(
-            broadcast(
-                *(b.evaluate(df, context=ExecutionContext.FRAME) for b in by_exprs),
-                target_length=df.num_rows,
-            )
+        by_cols = broadcast(
+            *(b.evaluate(df) for b in by_exprs),
+            target_length=df.num_rows,
         )
+
         by_tbl = plc.Table([c.obj for c in by_cols])
 
         sorted_flag = (
@@ -253,13 +252,9 @@ def do_evaluate(  # noqa: D102
             out_dtypes.append(val.dtype)
 
             if isinstance(val, expr.Len):
-                # Count rows per group via sum(1).
-                ones = plc.Column.from_scalar(
-                    plc.Scalar.from_py(1, plc.DataType(plc.TypeId.INT8)), df.num_rows
-                )
-                gb_requests.append(
-                    plc.groupby.GroupByRequest(ones, [plc.aggregation.sum()])
-                )
+                # A count aggregation, we need a column so use a key column
+                col = by_cols[0].obj
+                gb_requests.append(plc.groupby.GroupByRequest(col, [val.agg_request]))
             elif isinstance(val, expr.Agg):
                 (child,) = (
                     val.children if val.name != "quantile" else (val.children[0],)
@@ -270,42 +265,32 @@ def do_evaluate(  # noqa: D102
         group_keys_tbl, value_tables = grouper.aggregate(gb_requests)
         out_cols = (t.columns()[0] for t in value_tables)
 
-        # Build gather maps to broadcast per-group results to all rows.
-        # Also left-join input keys to group-keys so every input row appears exactly once.
-        lg, rg = plc.join.left_join(
+        # We do a left-join between the input keys to group-keys
+        # so every input row appears exactly once. left_order is
+        # returned un-ordered by libcudf.
+        left_order, right_order = plc.join.left_join(
             by_tbl, group_keys_tbl, plc.types.NullEquality.EQUAL
         )
 
-        # Reorder the gather maps to preserve left/input order
-        left_rows, right_rows = by_tbl.num_rows(), group_keys_tbl.num_rows()
-        init = plc.Scalar.from_py(0, plc.types.SIZE_TYPE)
-        step = plc.Scalar.from_py(1, plc.types.SIZE_TYPE)
-        left_order = plc.copying.gather(
-            plc.Table([plc.filling.sequence(left_rows, init, step)]),
-            lg,
-            plc.copying.OutOfBoundsPolicy.DONT_CHECK,
-        )
-        right_order = plc.copying.gather(
-            plc.Table([plc.filling.sequence(right_rows, init, step)]),
-            rg,
-            plc.copying.OutOfBoundsPolicy.NULLIFY,
+        # Scatter the right order indices into an all-null table
+        # and at the position of the index in left order. Now we
+        # the map between rows an groups with the correct ordering
+        left_rows = left_order.size()
+        target = plc.Column.from_scalar(
+            plc.Scalar.from_py(None, plc.types.SIZE_TYPE), left_rows
         )
-        # Sort both maps by (left_order, right_order), then use the reordered right map
-        # to gather group aggregates in the original row order.
-        _, rg = plc.sorting.stable_sort_by_key(
-            plc.Table([lg, rg]),
-            plc.Table([*left_order.columns(), *right_order.columns()]),
-            [plc.types.Order.ASCENDING, plc.types.Order.ASCENDING],
-            [plc.types.NullOrder.AFTER, plc.types.NullOrder.AFTER],
-        ).columns()
-
-        # Broadcast each aggregated result back to row-shape using the right map.
+        aligned_map = plc.copying.scatter(
+            plc.Table([right_order]),
+            left_order,
+            plc.Table([target]),
+        ).columns()[0]
+
+        # Broadcast each aggregated result back to row-shape using
+        # the aligned mapping between rows indices and group indices
         broadcasted_cols = [
             Column(
                 plc.copying.gather(
-                    plc.Table([col]),
-                    rg,
-                    plc.copying.OutOfBoundsPolicy.NULLIFY,
+                    plc.Table([col]), aligned_map, plc.copying.OutOfBoundsPolicy.NULLIFY
                 ).columns()[0],
                 name=named_expr.name,
                 dtype=dtype,
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
index 6cf3c956358..2ec3abefc59 100644
--- a/python/cudf_polars/tests/expressions/test_rolling.py
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -217,3 +217,29 @@ def test_over_with_mapping_strategy_unsupported(df, strategy):
 def test_over_boolean_function_unsupported(df):
     q = df.select(pl.col("x").not_().over("g"))
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_over_ternary(df):
+    q = df.select(
+        pl.when(pl.col("g") == 1)
+        .then(pl.lit(None, dtype=pl.Int64))
+        .otherwise(pl.col("x"))
+        .sum()
+        .over("g")
+    )
+
+    assert_gpu_result_equal(q)
+
+
+def test_over_broadcast_input_row_group_indices_aligned():
+    num_rows, num_groups = 512, 64
+
+    df = pl.LazyFrame(
+        {
+            "g": [(i * 31) % num_groups for i in range(num_rows)],
+            "x": list(range(num_rows)),
+        }
+    )
+    q = df.select(pl.col("x").sum().over("g"))
+
+    assert_gpu_result_equal(q)

From 9fa50b9858748761763e29feacd312d85d869d2c Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 27 Aug 2025 12:58:31 -0400
Subject: [PATCH 215/366] Improve support for sliced input on from_arrow_host
 APIs (#19491)

Improves support accepting ArrowArray sliced input where the range is beyond the 2 billion row limit.
The length (number of rows) of the sliced input must still be less than 2 billion but the offset/length range may reference indices above 2 billion.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/19491
---
 cpp/src/interop/from_arrow_host.cu            | 480 +++++++++++-------
 cpp/src/interop/from_arrow_host.hpp           |  25 +-
 cpp/src/interop/from_arrow_host_strings.cu    | 106 +---
 cpp/tests/interop/from_arrow_host_test.cpp    |  15 +-
 cpp/tests/interop/from_arrow_test.cpp         |   2 +-
 .../cudf_polars/cudf_polars/testing/plugin.py |   1 -
 6 files changed, 338 insertions(+), 291 deletions(-)

diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu
index 78a7bf0e864..b5584b80854 100644
--- a/cpp/src/interop/from_arrow_host.cu
+++ b/cpp/src/interop/from_arrow_host.cu
@@ -20,17 +20,16 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
-#include <cudf/detail/copy.hpp>
-#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/interop.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/transform.hpp>
-#include <cudf/detail/unary.hpp>
+#include <cudf/detail/utilities/grid_1d.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/dictionary/dictionary_factories.hpp>
 #include <cudf/interop.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/traits.hpp>
@@ -39,6 +38,7 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <nanoarrow/nanoarrow.h>
 #include <nanoarrow/nanoarrow.hpp>
@@ -53,67 +53,129 @@ namespace detail {
 
 namespace {
 
+/**
+ * @brief Return bitmask word at the given index in the source
+ *
+ * This is a 64-bit version of cudf::detail::get_mask_offset_word
+ * since the source may have a range more than max(int) bits.
+ */
+__device__ inline bitmask_type get_mask_word(bitmask_type const* __restrict__ source,
+                                             int64_t destination_word_index,
+                                             int64_t source_begin_bit,
+                                             int64_t source_end_bit)
+{
+  constexpr auto bitmask_bits = size_in_bits<bitmask_type>();
+  auto const word_index       = destination_word_index + (source_begin_bit / bitmask_bits);
+  auto const curr_word        = source[word_index];
+  auto const end_index        = (source_end_bit - 1) / bitmask_bits;
+  auto const next_word        = (end_index > word_index) ? source[word_index + 1] : bitmask_type{0};
+  auto const shift            = static_cast<bitmask_type>(source_begin_bit % bitmask_bits);
+  return __funnelshift_r(curr_word, next_word, shift);
+}
+
+/**
+ * @brief Copy a shifted bitmask in device memory
+ *
+ * Called by get_mask_buffer below when a bit-shift within a bitmask_type is required.
+ *
+ * @param destination The destination bitmask.
+ * @param source The source bitmask.
+ * @param source_begin_bit The beginning bit of the source bitmask.
+ * @param source_end_bit The end bit of the source bitmask.
+ * @param number_of_mask_words The number of mask words.
+ */
+CUDF_KERNEL void copy_shifted_bitmask(bitmask_type* __restrict__ destination,
+                                      bitmask_type const* __restrict__ source,
+                                      int64_t source_begin_bit,
+                                      int64_t source_end_bit,
+                                      size_type number_of_mask_words)
+{
+  auto const stride = cudf::detail::grid_1d::grid_stride();
+  for (thread_index_type destination_word_index = grid_1d::global_thread_id();
+       destination_word_index < number_of_mask_words;
+       destination_word_index += stride) {
+    destination[destination_word_index] =
+      detail::get_mask_word(source, destination_word_index, source_begin_bit, source_end_bit);
+  }
+}
+
+// copies the bitmask to device and automatically applies the offset
+std::pair<std::unique_ptr<rmm::device_buffer>, size_type> get_mask_buffer(
+  ArrowArray const* input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
+{
+  auto bitmap = static_cast<uint8_t const*>(input->buffers[validity_buffer_idx]);
+  if (bitmap == nullptr || input->null_count == 0) {
+    return {std::make_unique<rmm::device_buffer>(0, stream, mr), 0};
+  }
+
+  constexpr auto bits_in_byte = static_cast<int64_t>(size_in_bits<uint8_t>());
+
+  auto const num_rows     = static_cast<size_type>(input->length);
+  auto const offset_index = input->offset / bits_in_byte;
+  auto const mask_words   = num_bitmask_words(num_rows);
+  auto const padded_words = bitmask_allocation_size_bytes(num_rows) / sizeof(bitmask_type);
+  auto const bit_index    = input->offset % bits_in_byte;
+  auto const copy_size    = cudf::util::div_rounding_up_safe(num_rows + bit_index, bits_in_byte);
+
+  auto mask = rmm::device_uvector<bitmask_type>(padded_words, stream, mr);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    mask.data(), bitmap + offset_index, copy_size, cudaMemcpyDefault, stream.value()));
+
+  if (mask_words > 0 && bit_index > 0) {
+    auto dest_mask = rmm::device_uvector<bitmask_type>(padded_words, stream, mr);
+    cudf::detail::grid_1d config(mask_words, 256);
+    copy_shifted_bitmask<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
+      dest_mask.data(), mask.data(), bit_index, bit_index + num_rows, mask_words);
+    CUDF_CHECK_CUDA(stream.value());
+    mask = std::move(dest_mask);
+  }
+
+  auto const null_count =
+    mask_words > 0 ? cudf::detail::count_unset_bits(mask.data(), 0, num_rows, stream) : 0;
+
+  return {std::make_unique<rmm::device_buffer>(std::move(mask.release())), null_count};
+}
+
+std::unique_ptr<column> get_column_copy(ArrowSchemaView const* schema,
+                                        ArrowArray const* input,
+                                        data_type type,
+                                        bool skip_mask,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::device_async_resource_ref mr);
+
 struct dispatch_copy_from_arrow_host {
   rmm::cuda_stream_view stream;
   rmm::device_async_resource_ref mr;
 
-  std::unique_ptr<rmm::device_buffer> get_mask_buffer(ArrowArray const* array)
-  {
-    auto* bitmap = array->buffers[validity_buffer_idx];
-    if (bitmap == nullptr) { return std::make_unique<rmm::device_buffer>(0, stream, mr); }
-
-    auto const bitmask_size = array->length + array->offset;
-    auto const allocation_size =
-      bitmask_allocation_size_bytes(static_cast<size_type>(bitmask_size));
-    auto mask = std::make_unique<rmm::device_buffer>(allocation_size, stream, mr);
-    CUDF_CUDA_TRY(cudaMemcpyAsync(mask->data(),
-                                  reinterpret_cast<uint8_t const*>(bitmap),
-                                  allocation_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
-    return mask;
-  }
-
   template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() && !is_fixed_point<T>())>
-  std::unique_ptr<column> operator()(ArrowSchemaView*, ArrowArray const*, data_type, bool)
+  std::unique_ptr<column> operator()(ArrowSchemaView const*, ArrowArray const*, data_type, bool)
   {
     CUDF_FAIL("Unsupported type in copy_from_arrow_host.");
   }
 
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || is_fixed_point<T>())>
-  std::unique_ptr<column> operator()(ArrowSchemaView* schema,
+  std::unique_ptr<column> operator()(ArrowSchemaView const*,
                                      ArrowArray const* input,
                                      data_type type,
                                      bool skip_mask)
   {
     using DeviceType = device_storage_type_t<T>;
 
-    size_type const num_rows   = input->length;
-    size_type const offset     = input->offset;
-    size_type const null_count = input->null_count;
-    auto data_buffer           = input->buffers[fixed_width_data_buffer_idx];
+    auto const num_rows = static_cast<size_type>(input->length);
+    auto const data_buffer =
+      static_cast<DeviceType const*>(input->buffers[fixed_width_data_buffer_idx]);
 
-    auto const has_nulls = skip_mask ? false : input->buffers[validity_buffer_idx] != nullptr;
     auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr);
     auto mutable_column_view = col->mutable_view();
-    CUDF_CUDA_TRY(
-      cudaMemcpyAsync(mutable_column_view.data<DeviceType>(),
-                      reinterpret_cast<uint8_t const*>(data_buffer) + offset * sizeof(DeviceType),
-                      sizeof(DeviceType) * num_rows,
-                      cudaMemcpyDefault,
-                      stream.value()));
-
-    if (has_nulls) {
-      auto tmp_mask = get_mask_buffer(input);
-
-      // if array is sliced, we have to copy the whole mask and then take copy
-      auto out_mask =
-        (offset == 0)
-          ? std::move(*tmp_mask)
-          : cudf::detail::copy_bitmask(
-              static_cast<bitmask_type*>(tmp_mask->data()), offset, offset + num_rows, stream, mr);
-
-      col->set_null_mask(std::move(out_mask), null_count);
+    CUDF_CUDA_TRY(cudaMemcpyAsync(mutable_column_view.data<DeviceType>(),
+                                  data_buffer + input->offset,
+                                  sizeof(DeviceType) * num_rows,
+                                  cudaMemcpyDefault,
+                                  stream.value()));
+
+    if (!skip_mask) {
+      auto [mask, null_count] = get_mask_buffer(input, stream, mr);
+      col->set_null_mask(std::move(*mask), null_count);
     }
 
     return col;
@@ -121,35 +183,39 @@ struct dispatch_copy_from_arrow_host {
 };
 
 template <>
-std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<bool>(ArrowSchemaView* schema,
+std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<bool>(ArrowSchemaView const*,
                                                                         ArrowArray const* input,
                                                                         data_type type,
                                                                         bool skip_mask)
 {
-  auto data_buffer         = input->buffers[fixed_width_data_buffer_idx];
-  auto const buffer_length = bitmask_allocation_size_bytes(input->length + input->offset);
-
-  auto data = rmm::device_buffer(buffer_length, stream, mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(data.data(),
-                                reinterpret_cast<uint8_t const*>(data_buffer),
-                                buffer_length,
-                                cudaMemcpyDefault,
-                                stream.value()));
-  auto out_col = mask_to_bools(static_cast<bitmask_type*>(data.data()),
-                               input->offset,
-                               input->offset + input->length,
-                               stream,
-                               mr);
-
-  auto const has_nulls = skip_mask ? false : input->buffers[validity_buffer_idx] != nullptr;
-  if (has_nulls) {
-    auto out_mask = detail::copy_bitmask(static_cast<bitmask_type*>(get_mask_buffer(input)->data()),
-                                         input->offset,
-                                         input->offset + input->length,
-                                         stream,
-                                         mr);
-
-    out_col->set_null_mask(std::move(out_mask), input->null_count);
+  auto data_buffer = static_cast<uint8_t const*>(input->buffers[fixed_width_data_buffer_idx]);
+
+  constexpr auto bits_in_byte = static_cast<int64_t>(size_in_bits<uint8_t>());
+
+  auto const num_rows     = static_cast<size_type>(input->length);
+  auto const offset_index = input->offset / bits_in_byte;
+  auto const data_words   = num_bitmask_words(num_rows);
+  auto const bit_index    = input->offset % bits_in_byte;
+  auto const copy_size    = cudf::util::div_rounding_up_safe(num_rows + bit_index, bits_in_byte);
+
+  auto data = rmm::device_uvector<bitmask_type>(data_words, stream, mr);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    data.data(), data_buffer + offset_index, copy_size, cudaMemcpyDefault, stream.value()));
+
+  if (data_words > 0 && bit_index > 0) {
+    auto dest_data = rmm::device_uvector<bitmask_type>(data_words, stream, mr);
+    cudf::detail::grid_1d config(data_words, 256);
+    copy_shifted_bitmask<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
+      dest_data.data(), data.data(), bit_index, bit_index + num_rows, data_words);
+    CUDF_CHECK_CUDA(stream.value());
+    data = std::move(dest_data);
+  }
+
+  auto out_col = mask_to_bools(static_cast<bitmask_type*>(data.data()), 0, num_rows, stream, mr);
+
+  if (!skip_mask) {
+    auto [out_mask, null_count] = get_mask_buffer(input, stream, mr);
+    out_col->set_null_mask(std::move(*out_mask), null_count);
   }
 
   return out_col;
@@ -157,15 +223,23 @@ std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<bool>(ArrowSch
 
 template <>
 std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<cudf::string_view>(
-  ArrowSchemaView* schema, ArrowArray const* input, data_type type, bool skip_mask)
+  ArrowSchemaView const* schema, ArrowArray const* input, data_type type, bool skip_mask)
 {
+  CUDF_EXPECTS(
+    input->length + 1 <= static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
+    "number of rows in Arrow column exceeds the column size limit",
+    std::overflow_error);
+
   if (input->length == 0) { return make_empty_column(type_id::STRING); }
-  return string_column_from_arrow_host(schema, input, get_mask_buffer(input), stream, mr);
+  auto [mask, null_count] = !skip_mask
+                              ? get_mask_buffer(input, stream, mr)
+                              : std::pair{std::make_unique<rmm::device_buffer>(0, stream, mr), 0};
+  return string_column_from_arrow_host(schema, input, std::move(mask), null_count, stream, mr);
 }
 
 template <>
 std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<cudf::dictionary32>(
-  ArrowSchemaView* schema, ArrowArray const* input, data_type type, bool skip_mask)
+  ArrowSchemaView const* schema, ArrowArray const* input, data_type type, bool skip_mask)
 {
   ArrowSchemaView keys_schema_view;
   NANOARROW_THROW_NOT_OK(
@@ -186,7 +260,7 @@ std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<cudf::dictiona
     }
   }();
 
-  auto indices_column = get_column_copy(schema, input, dict_indices_type, false, stream, mr);
+  auto indices_column = get_column_copy(schema, input, dict_indices_type, skip_mask, stream, mr);
   // child columns shouldn't have masks and we need the mask in the main column
   auto column_contents = indices_column->release();
   indices_column       = std::make_unique<column>(dict_indices_type,
@@ -203,133 +277,94 @@ std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<cudf::dictiona
 
 template <>
 std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<cudf::struct_view>(
-  ArrowSchemaView* schema, ArrowArray const* input, data_type type, bool skip_mask)
+  ArrowSchemaView const* schema, ArrowArray const* input, data_type type, bool skip_mask)
 {
   std::vector<std::unique_ptr<column>> child_columns;
-  std::transform(
-    input->children,
-    input->children + input->n_children,
-    schema->schema->children,
-    std::back_inserter(child_columns),
-    [this, input](ArrowArray const* child, ArrowSchema const* child_schema) {
-      ArrowSchemaView view;
-      NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, child_schema, nullptr));
-      auto type = arrow_to_cudf_type(&view);
-
-      auto out = get_column_copy(&view, child, type, false, stream, mr);
-      return input->offset == 0 && input->length == out->size()
-               ? std::move(out)
-               : std::make_unique<column>(
-                   cudf::detail::slice(out->view(),
-                                       static_cast<size_type>(input->offset),
-                                       static_cast<size_type>(input->offset + input->length),
-                                       stream),
-                   stream,
-                   mr);
-    });
-
-  auto out_mask = std::move(*(get_mask_buffer(input)));
-  if (input->buffers[validity_buffer_idx] != nullptr) {
-    out_mask = detail::copy_bitmask(static_cast<bitmask_type*>(out_mask.data()),
-                                    input->offset,
-                                    input->offset + input->length,
-                                    stream,
-                                    mr);
-  }
+  std::transform(input->children,
+                 input->children + input->n_children,
+                 schema->schema->children,
+                 std::back_inserter(child_columns),
+                 [this, input](ArrowArray const* child, ArrowSchema const* child_schema) {
+                   ArrowSchemaView view;
+                   NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, child_schema, nullptr));
+                   auto child_type = arrow_to_cudf_type(&view);
+
+                   ArrowArray child_array(*child);
+                   child_array.offset += input->offset;
+                   child_array.length = std::min(input->length, child_array.length);
+
+                   return get_column_copy(&view, &child_array, child_type, false, stream, mr);
+                 });
+
+  auto [out_mask, null_count] =
+    !skip_mask ? get_mask_buffer(input, stream, mr)
+               : std::pair{std::make_unique<rmm::device_buffer>(0, stream, mr), 0};
 
   return make_structs_column(
-    input->length, std::move(child_columns), input->null_count, std::move(out_mask), stream, mr);
+    input->length, std::move(child_columns), null_count, std::move(*out_mask), stream, mr);
 }
 
 template <>
 std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<cudf::list_view>(
-  ArrowSchemaView* schema, ArrowArray const* input, data_type type, bool skip_mask)
+  ArrowSchemaView const* schema, ArrowArray const* input, data_type type, bool skip_mask)
 {
-  // Initialize schema for 32-bit ints regardless of list type
-  nanoarrow::UniqueSchema offset_schema;
-  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(offset_schema.get(), NANOARROW_TYPE_INT32));
-
-  ArrowSchemaView view;
-  NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, offset_schema.get(), nullptr));
-
-  CUDF_EXPECTS(input->length + input->offset + 1 <=
-                 static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
-               "Total number of rows in Arrow column exceeds the column size limit.",
-               std::overflow_error);
-  size_type const physical_length = input->length + input->offset + 1;
-
-  auto offsets_column = [&] {
-    void const* offsets_buffers[2] = {nullptr, input->buffers[fixed_width_data_buffer_idx]};
-    ArrowArray offsets_array       = {
-            .length     = physical_length,
-            .null_count = 0,
-            .offset     = 0,
-            .n_buffers  = 2,
-            .n_children = 0,
-            .buffers    = offsets_buffers,
-    };
-
-    if (schema->type != NANOARROW_TYPE_LARGE_LIST) {
-      return this->operator()<int32_t>(&view, &offsets_array, data_type(type_id::INT32), true);
-    }
+  CUDF_EXPECTS(
+    input->length + 1 <= static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
+    "number of rows in Arrow column exceeds the column size limit",
+    std::overflow_error);
 
-    // For large lists, convert 64-bit offsets to 32-bit on host with bounds checking
-    int64_t const* large_offsets =
-      reinterpret_cast<int64_t const*>(input->buffers[fixed_width_data_buffer_idx]);
+  auto [offsets_column, offset, length] = get_offsets_column(schema, input, stream, mr);
 
-    std::vector<int32_t> int32_offsets(physical_length);
-    constexpr auto max_offset = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
-    CUDF_EXPECTS(large_offsets[physical_length - 1] <= max_offset,
-                 "Large list offsets exceed 32-bit integer bounds",
-                 std::overflow_error);
+  ArrowSchemaView view;
+  NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, schema->schema->children[0], nullptr));
+  auto child_type = arrow_to_cudf_type(&view);
 
-    std::transform(
-      large_offsets, large_offsets + physical_length, int32_offsets.begin(), [](int64_t offset) {
-        return static_cast<int32_t>(offset);
-      });
+  ArrowArray child_array(*input->children[0]);
+  child_array.offset += offset;
+  child_array.length = std::min(length, child_array.length);
 
-    offsets_buffers[1] = int32_offsets.data();
+  auto child_column = get_column_copy(&view, &child_array, child_type, skip_mask, stream, mr);
 
-    return this->operator()<int32_t>(&view, &offsets_array, data_type(type_id::INT32), true);
-  }();
+  auto [out_mask, null_count] =
+    !skip_mask ? get_mask_buffer(input, stream, mr)
+               : std::pair{std::make_unique<rmm::device_buffer>(0, stream, mr), 0};
 
-  NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, schema->schema->children[0], nullptr));
-  auto child_type   = arrow_to_cudf_type(&view);
-  auto child_column = get_column_copy(&view, input->children[0], child_type, false, stream, mr);
-
-  auto const num_rows = offsets_column->size() - 1;
-  auto out_col        = make_lists_column(num_rows,
-                                   std::move(offsets_column),
-                                   std::move(child_column),
-                                   input->null_count,
-                                   std::move(*get_mask_buffer(input)),
-                                   stream,
-                                   mr);
-
-  return num_rows == input->length
-           ? std::move(out_col)
-           : std::make_unique<column>(
-               cudf::detail::slice(out_col->view(),
-                                   static_cast<size_type>(input->offset),
-                                   static_cast<size_type>(input->offset + input->length),
-                                   stream),
-               stream,
-               mr);
+  return make_lists_column(static_cast<size_type>(input->length),
+                           std::move(offsets_column),
+                           std::move(child_column),
+                           null_count,
+                           std::move(*out_mask),
+                           stream,
+                           mr);
 }
 
-}  // namespace
-
-std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
+/**
+ * @brief Convert ArrowArray to cudf column utility
+ *
+ * This function is simply a convenience wrapper around the dispatch functor with
+ * some extra handling to avoid having to reproduce it for all of the nested types.
+ * It also allows us to centralize the location where the recursive calls happen
+ * so that we only need to forward declare this one function, rather than multiple
+ * functions which handle the overloads for nested types (list, struct, etc.)
+ *
+ * @param schema Arrow schema includes the column type
+ * @param input Column data, nulls, offset
+ * @param type The cudf column type to map input to
+ * @param skip_mask True if the mask is handled by the caller
+ * @param stream CUDA stream used for device memory operations
+ * @param mr Device memory resource to use for all device memory allocations
+ */
+std::unique_ptr<column> get_column_copy(ArrowSchemaView const* schema,
                                         ArrowArray const* input,
                                         data_type type,
                                         bool skip_mask,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
-  CUDF_EXPECTS((input->length + input->offset) <=
-                 static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
-               "Total number of rows in Arrow column exceeds the column size limit.",
-               std::overflow_error);
+  CUDF_EXPECTS(
+    input->length <= static_cast<std::int64_t>(std::numeric_limits<cudf::size_type>::max()),
+    "number of rows in Arrow column exceeds the column size limit",
+    std::overflow_error);
 
   return type.id() != type_id::EMPTY
            ? std::move(type_dispatcher(
@@ -341,6 +376,91 @@ std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
                                       input->length);
 }
 
+/**
+ * @brief Utility to copy and normalize the offsets in the given array
+ */
+template <typename OffsetType>
+std::tuple<std::unique_ptr<column>, int64_t, int64_t> copy_offsets_column(
+  ArrowSchemaView const* schema,
+  ArrowArray const* offsets,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto offsets_buffer =
+    static_cast<OffsetType const*>(offsets->buffers[fixed_width_data_buffer_idx]);
+  auto const offset = offsets_buffer[offsets->offset];
+  auto const length = offsets_buffer[offsets->offset + offsets->length - 1] - offset;
+
+  // dispatch directly since we know the type
+  auto result = dispatch_copy_from_arrow_host{stream, mr}.template operator()<OffsetType>(
+    schema, offsets, data_type{type_to_id<OffsetType>()}, true);
+  if (offset != 0) {
+    auto begin = result->mutable_view().template begin<OffsetType>();
+    auto end   = begin + offsets->length;
+    thrust::transform(
+      rmm::exec_policy_nosync(stream), begin, end, begin, [offset] __device__(auto o) {
+        return o - offset;
+      });
+  }
+  return std::tuple{std::move(result), offset, length};
+}
+
+}  // namespace
+
+/**
+ * @brief Utility to copy the offsets from the given input (strings or list) to a
+ * cudf column
+ */
+std::tuple<std::unique_ptr<column>, int64_t, int64_t> get_offsets_column(
+  ArrowSchemaView const* schema,
+  ArrowArray const* input,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  void const* offsets_buffer     = input->buffers[fixed_width_data_buffer_idx];
+  void const* offsets_buffers[2] = {nullptr, offsets_buffer};
+  ArrowArray offsets_array       = {
+          .length     = input->length + 1,
+          .null_count = 0,
+          .offset     = input->offset,
+          .n_buffers  = 2,
+          .n_children = 0,
+          .buffers    = offsets_buffers,
+  };
+
+  if (schema->type == NANOARROW_TYPE_STRING || schema->type == NANOARROW_TYPE_LIST) {
+    return copy_offsets_column<int32_t>(schema, &offsets_array, stream, mr);
+  }
+  if (schema->type == NANOARROW_TYPE_LARGE_STRING) {
+    return copy_offsets_column<int64_t>(schema, &offsets_array, stream, mr);
+  }
+
+  CUDF_EXPECTS(schema->type == NANOARROW_TYPE_LARGE_LIST, "Unknown offsets parent type");
+
+  // Large-lists must be copied to int32 column
+  auto int32_offsets = std::vector<int32_t>();
+  int32_offsets.reserve(input->length + 1);
+  auto int64_offsets = static_cast<int64_t const*>(offsets_buffer);
+  auto const offset  = int64_offsets[input->offset];
+  auto const length  = int64_offsets[input->offset + input->length] - offset;
+
+  constexpr auto max_offset = static_cast<int64_t>(std::numeric_limits<int32_t>::max());
+  CUDF_EXPECTS(
+    length <= max_offset, "large list offsets exceed 32-bit integer bounds", std::overflow_error);
+
+  // normalize the offsets while copying from int64 to int32
+  std::transform(int64_offsets + input->offset,
+                 int64_offsets + input->offset + input->length + 1,
+                 std::back_inserter(int32_offsets),
+                 [offset](int64_t o) { return static_cast<int32_t>(o - offset); });
+
+  offsets_buffers[fixed_width_data_buffer_idx] = int32_offsets.data();
+  offsets_array.offset                         = 0;  // already accounted for by the above transform
+  auto result = dispatch_copy_from_arrow_host{stream, mr}.template operator()<int32_t>(
+    schema, &offsets_array, data_type(type_id::INT32), true);
+  return std::tuple{std::move(result), offset, length};
+}
+
 std::unique_ptr<table> from_arrow_host(ArrowSchema const* schema,
                                        ArrowDeviceArray const* input,
                                        rmm::cuda_stream_view stream,
@@ -396,7 +516,7 @@ std::unique_ptr<column> from_arrow_host_column(ArrowSchema const* schema,
   return get_column_copy(&view, &input->array, type, false, stream, mr);
 }
 
-std::unique_ptr<column> get_column_from_host_copy(ArrowSchemaView* schema,
+std::unique_ptr<column> get_column_from_host_copy(ArrowSchemaView const* schema,
                                                   ArrowArray const* input,
                                                   data_type type,
                                                   bool skip_mask,
diff --git a/cpp/src/interop/from_arrow_host.hpp b/cpp/src/interop/from_arrow_host.hpp
index 9307bf8c58d..7b88ae3818a 100644
--- a/cpp/src/interop/from_arrow_host.hpp
+++ b/cpp/src/interop/from_arrow_host.hpp
@@ -32,37 +32,32 @@ namespace detail {
  * @param schema Arrow schema includes the column type
  * @param input Column data, nulls, offset
  * @param mask Mask to apply to the output column
+ * @param null_count Number of nulls in mask
  * @param stream CUDA stream used for device memory operations
  * @param mr Device memory resource to use for all device memory allocations
  */
-std::unique_ptr<column> string_column_from_arrow_host(ArrowSchemaView* schema,
+std::unique_ptr<column> string_column_from_arrow_host(ArrowSchemaView const* schema,
                                                       ArrowArray const* input,
                                                       std::unique_ptr<rmm::device_buffer>&& mask,
+                                                      size_type null_count,
                                                       rmm::cuda_stream_view stream,
                                                       rmm::device_async_resource_ref mr);
 
 /**
- * @brief Convert ArrowArray to cudf column utility
+ * @brief Create offsets column for list or strings column
  *
- * This function is simply a convenience wrapper around the dispatch functor with
- * some extra handling to avoid having to reproduce it for all of the nested types.
- * It also allows us to centralize the location where the recursive calls happen
- * so that we only need to forward declare this one function, rather than multiple
- * functions which handle the overloads for nested types (list, struct, etc.)
  *
  * @param schema Arrow schema includes the column type
  * @param input Column data, nulls, offset
- * @param type The cudf column type to map input to
- * @param skip_mask True if the mask is handled by the caller
  * @param stream CUDA stream used for device memory operations
  * @param mr Device memory resource to use for all device memory allocations
+ * @return Column plus offset and size bounds for copying data column
  */
-std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
-                                        ArrowArray const* input,
-                                        data_type type,
-                                        bool skip_mask,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::device_async_resource_ref mr);
+std::tuple<std::unique_ptr<column>, int64_t, int64_t> get_offsets_column(
+  ArrowSchemaView const* schema,
+  ArrowArray const* input,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/interop/from_arrow_host_strings.cu b/cpp/src/interop/from_arrow_host_strings.cu
index d11197f243a..03810c1398e 100644
--- a/cpp/src/interop/from_arrow_host_strings.cu
+++ b/cpp/src/interop/from_arrow_host_strings.cu
@@ -47,99 +47,34 @@
 
 namespace cudf {
 namespace detail {
-
 namespace {
 
 constexpr int chars_buffer_idx = 2;
 
-std::unique_ptr<column> from_arrow_string(ArrowSchemaView* schema,
+std::unique_ptr<column> from_arrow_string(ArrowSchemaView const* schema,
                                           ArrowArray const* input,
                                           std::unique_ptr<rmm::device_buffer>&& mask,
+                                          size_type null_count,
                                           rmm::cuda_stream_view stream,
                                           rmm::device_async_resource_ref mr)
 {
-  // offsets column should contain no nulls so we can put nullptr for the bitmask
-  // nulls are tracked in the parent string column itself, not in the offsets
-  void const* offset_buffers[] = {nullptr, input->buffers[fixed_width_data_buffer_idx]};
-  ArrowArray offsets_array     = {
-        .length     = input->offset + input->length + 1,
-        .null_count = 0,
-        .offset     = 0,
-        .n_buffers  = 2,
-        .n_children = 0,
-        .buffers    = offset_buffers,
-  };
-
-  // chars_column does not contain any nulls, they are tracked by the parent string column
-  // itself instead. So we pass nullptr for the validity bitmask.
-  int64_t const char_data_length = [&]() {
-    if (schema->type == NANOARROW_TYPE_LARGE_STRING) {
-      return reinterpret_cast<int64_t const*>(offset_buffers[1])[input->length + input->offset];
-    } else if (schema->type == NANOARROW_TYPE_STRING) {
-      return static_cast<int64_t>(
-        reinterpret_cast<int32_t const*>(offset_buffers[1])[input->length + input->offset]);
-    } else {
-      CUDF_FAIL("Unsupported string type", cudf::data_type_error);
-    }
-  }();
-  void const* char_buffers[] = {nullptr, input->buffers[chars_buffer_idx]};
-  ArrowArray char_array      = {
-         .length     = char_data_length,
-         .null_count = 0,
-         .offset     = 0,
-         .n_buffers  = 2,
-         .n_children = 0,
-         .buffers    = char_buffers,
-  };
-
-  nanoarrow::UniqueSchema offset_schema;
-  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(offset_schema.get(), NANOARROW_TYPE_INT32));
-
-  nanoarrow::UniqueSchema char_data_schema;
-  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(char_data_schema.get(), NANOARROW_TYPE_INT8));
-
-  // leverage the dispatch overloads for int32 and char(int8) to generate the child
-  // offset and char data columns for us.
-  ArrowSchemaView view;
-  NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, offset_schema.get(), nullptr));
-  auto offsets_column = [&]() {
-    if (schema->type == NANOARROW_TYPE_LARGE_STRING) {
-      return get_column_copy(&view, &offsets_array, data_type(type_id::INT64), true, stream, mr);
-    } else if (schema->type == NANOARROW_TYPE_STRING) {
-      return get_column_copy(&view, &offsets_array, data_type(type_id::INT32), true, stream, mr);
-    } else {
-      CUDF_FAIL("Unsupported string type", cudf::data_type_error);
-    }
-  }();
-  NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&view, char_data_schema.get(), nullptr));
+  auto [offsets_column, offset, char_data_length] = get_offsets_column(schema, input, stream, mr);
 
   rmm::device_buffer chars(char_data_length, stream, mr);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(chars.data(),
-                                reinterpret_cast<uint8_t const*>(char_array.buffers[1]),
-                                chars.size(),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  auto const num_rows = offsets_column->size() - 1;
-  auto out_col        = make_strings_column(num_rows,
-                                     std::move(offsets_column),
-                                     std::move(chars),
-                                     input->null_count,
-                                     std::move(*mask.release()));
-
-  return input->offset == 0
-           ? std::move(out_col)
-           : std::make_unique<column>(
-               cudf::detail::slice(out_col->view(),
-                                   static_cast<size_type>(input->offset),
-                                   static_cast<size_type>(input->offset + input->length),
-                                   stream),
-               stream,
-               mr);
+  auto const* chars_data = static_cast<uint8_t const*>(input->buffers[chars_buffer_idx]) + offset;
+  CUDF_CUDA_TRY(
+    cudaMemcpyAsync(chars.data(), chars_data, chars.size(), cudaMemcpyDefault, stream.value()));
+
+  return make_strings_column(static_cast<size_type>(input->length),
+                             std::move(offsets_column),
+                             std::move(chars),
+                             null_count,
+                             std::move(*mask.release()));
 }
 
 constexpr int stringview_vector_idx = 1;
 
-std::unique_ptr<column> from_arrow_stringview(ArrowSchemaView* schema,
+std::unique_ptr<column> from_arrow_stringview(ArrowSchemaView const* schema,
                                               ArrowArray const* input,
                                               std::unique_ptr<rmm::device_buffer>&& mask,
                                               rmm::cuda_stream_view stream,
@@ -152,10 +87,8 @@ std::unique_ptr<column> from_arrow_stringview(ArrowSchemaView* schema,
   // first copy stringview array to device
   auto items   = view.buffer_views[stringview_vector_idx].data.as_binary_view;
   auto d_items = rmm::device_uvector<ArrowBinaryView>(input->length, stream, mr);
-  // caller ensures that input->offset is < max size_type
-  auto const offset = static_cast<cudf::size_type>(input->offset);
   CUDF_CUDA_TRY(cudaMemcpyAsync(d_items.data(),
-                                items + offset,
+                                items + input->offset,
                                 input->length * sizeof(ArrowBinaryView),
                                 cudaMemcpyDefault,
                                 stream.value()));
@@ -183,10 +116,8 @@ std::unique_ptr<column> from_arrow_stringview(ArrowSchemaView* schema,
     thrust::counting_iterator<cudf::size_type>(0),
     thrust::counting_iterator<cudf::size_type>(input->length),
     d_indices.begin(),
-    [d_items = d_items.data(), d_ptrs, d_mask, offset] __device__(auto idx) -> string_index_pair {
-      if (d_mask && !cudf::bit_is_set(d_mask, idx + offset)) {
-        return string_index_pair{nullptr, 0};
-      }
+    [d_items = d_items.data(), d_ptrs, d_mask] __device__(auto idx) -> string_index_pair {
+      if (d_mask && !bit_is_set(d_mask, idx)) { return string_index_pair{nullptr, 0}; }
       auto const& item = d_items[idx];
       auto const size  = static_cast<cudf::size_type>(item.inlined.size);
       auto const data  = (size <= NANOARROW_BINARY_VIEW_INLINE_SIZE)
@@ -200,15 +131,16 @@ std::unique_ptr<column> from_arrow_stringview(ArrowSchemaView* schema,
 
 }  // namespace
 
-std::unique_ptr<column> string_column_from_arrow_host(ArrowSchemaView* schema,
+std::unique_ptr<column> string_column_from_arrow_host(ArrowSchemaView const* schema,
                                                       ArrowArray const* input,
                                                       std::unique_ptr<rmm::device_buffer>&& mask,
+                                                      size_type null_count,
                                                       rmm::cuda_stream_view stream,
                                                       rmm::device_async_resource_ref mr)
 {
   return schema->type == NANOARROW_TYPE_STRING_VIEW
            ? from_arrow_stringview(schema, input, std::move(mask), stream, mr)
-           : from_arrow_string(schema, input, std::move(mask), stream, mr);
+           : from_arrow_string(schema, input, std::move(mask), null_count, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp
index e4f168f9c82..f94006517cb 100644
--- a/cpp/tests/interop/from_arrow_host_test.cpp
+++ b/cpp/tests/interop/from_arrow_host_test.cpp
@@ -363,7 +363,7 @@ TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableNulls)
 
     // converting arrow host memory to cudf table gives us the expected table
     auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, got_cudf_table->view());
 
     // converting to a cudf table with a single struct column gives us the expected
     // result column
@@ -421,7 +421,7 @@ TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableLargeNulls)
 
     // converting arrow host memory to cudf table gives us the expected table
     auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, got_cudf_table->view());
 
     // converting to a cudf table with a single struct column gives us the expected
     // result column
@@ -430,7 +430,7 @@ TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableLargeNulls)
     auto got_cudf_col_view = got_cudf_col->view();
     cudf::table_view from_struct{std::vector<cudf::column_view>(got_cudf_col_view.child_begin(),
                                                                 got_cudf_col_view.child_end())};
-    CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(got_cudf_table->view(), from_struct);
   }
 }
 
@@ -499,7 +499,7 @@ TEST_F(FromArrowHostDeviceTest, NestedList)
 
   // converting from arrow host memory to cudf gives us the expected table
   auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_table_view, got_cudf_table->view());
 
   // converting to a single column cudf table gives us the expected struct column
   auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input);
@@ -663,7 +663,7 @@ TEST_F(FromArrowHostDeviceTest, StructColumn)
 
   // test we get the expected cudf::table from the arrow host memory data
   auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_table_view, got_cudf_table->view());
 
   // test we get the expected cudf struct column
   auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input);
@@ -851,6 +851,7 @@ TEST_P(FromArrowHostDeviceTestSlice, SliceTest)
   input.device_type = ARROW_DEVICE_CPU;
 
   auto got_cudf_table = cudf::from_arrow_host(schema.get(), &input);
+
   if (got_cudf_table->num_rows() == 0 and sliced_cudf_table.num_rows() == 0) {
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table->view());
 
@@ -861,14 +862,14 @@ TEST_P(FromArrowHostDeviceTestSlice, SliceTest)
                                                                 got_cudf_col_view.child_end())};
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(got_cudf_table->view(), from_struct);
   } else {
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table->view());
 
     auto got_cudf_col = cudf::from_arrow_host_column(schema.get(), &input);
     EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT});
     auto got_cudf_col_view = got_cudf_col->view();
     cudf::table_view from_struct{std::vector<cudf::column_view>(got_cudf_col_view.child_begin(),
                                                                 got_cudf_col_view.child_end())};
-    CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(got_cudf_table->view(), from_struct);
   }
 }
 
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 400d3852d5a..0fcf4b53b8a 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -444,7 +444,7 @@ TEST_P(FromArrowTestSlice, SliceTest)
   if (got_cudf_table.value()->num_rows() == 0 and expected_cudf_table.num_rows() == 0) {
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table.value()->view());
   } else {
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table.value()->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table.value()->view());
   }
 }
 
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 84ef9d9234f..1815f9dffea 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -163,7 +163,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/datatypes/test_struct.py::test_struct_agg_all": "Needs nested list[struct] support",
     "tests/unit/constructors/test_structs.py::test_constructor_non_strict_schema_17956": "Needs nested list[struct] support",
     "tests/unit/io/test_delta.py::test_read_delta_arrow_map_type": "Needs nested list[struct] support",
-    "tests/unit/lazyframe/test_collect_schema.py::test_collect_schema_parametric": "https://github.com/pola-rs/polars/issues/23214",
     "tests/unit/datatypes/test_struct.py::test_struct_null_cast": "pylibcudf.Scalar does not support struct scalars",
     "tests/unit/datatypes/test_struct.py::test_struct_outer_nullability_zip_18119": "pylibcudf.Scalar does not support struct scalars",
     "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[True-columns]": "allow_missing_columns argument in read_parquet not translated in IR",

From 6dc52ce33384de79456744e189d68ef618b907ae Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 27 Aug 2025 14:32:45 -0400
Subject: [PATCH 216/366] Update identify_stream_usage CUDA runtime hooks to
 CUDA 13 (#19807)

The kernel launch and memcpyasync API have changed and we need updates to work properly.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19807
---
 cpp/tests/utilities/identify_stream_usage.cpp | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/cpp/tests/utilities/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage.cpp
index 93f4ee3e4a4..69dc1c4430b 100644
--- a/cpp/tests/utilities/identify_stream_usage.cpp
+++ b/cpp/tests/utilities/identify_stream_usage.cpp
@@ -209,6 +209,40 @@ DEFINE_OVERLOAD(cudaLaunchKernel,
                     size_t sharedMem,
                     cudaStream_t stream),
                 ARG(func, gridDim, blockDim, args, sharedMem, stream));
+
+#if CUDART_VERSION >= 13000
+// We need to define the __cudaLaunchKernel ABI as
+// it isn't part of cuda_runtime.h when compiling as a C++ source
+extern "C" cudaError_t CUDARTAPI __cudaLaunchKernel(cudaKernel_t kernel,
+                                                    dim3 gridDim,
+                                                    dim3 blockDim,
+                                                    void** args,
+                                                    size_t sharedMem,
+                                                    cudaStream_t stream);
+extern "C" cudaError_t CUDARTAPI __cudaLaunchKernel_ptsz(cudaKernel_t kernel,
+                                                         dim3 gridDim,
+                                                         dim3 blockDim,
+                                                         void** args,
+                                                         size_t sharedMem,
+                                                         cudaStream_t stream);
+DEFINE_OVERLOAD(__cudaLaunchKernel,
+                ARG(cudaKernel_t kernel,
+                    dim3 gridDim,
+                    dim3 blockDim,
+                    void** args,
+                    size_t sharedMem,
+                    cudaStream_t stream),
+                ARG(kernel, gridDim, blockDim, args, sharedMem, stream));
+DEFINE_OVERLOAD(__cudaLaunchKernel_ptsz,
+                ARG(cudaKernel_t kernel,
+                    dim3 gridDim,
+                    dim3 blockDim,
+                    void** args,
+                    size_t sharedMem,
+                    cudaStream_t stream),
+                ARG(kernel, gridDim, blockDim, args, sharedMem, stream));
+#endif
+
 DEFINE_OVERLOAD(cudaLaunchCooperativeKernel,
                 ARG(void const* func,
                     dim3 gridDim,
@@ -223,9 +257,16 @@ DEFINE_OVERLOAD(cudaLaunchHostFunc,
 
 // Memory transfer APIS:
 // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY
+#if CUDART_VERSION >= 13000
+DEFINE_OVERLOAD(
+  cudaMemPrefetchAsync,
+  ARG(void const* devPtr, size_t count, cudaMemLocation loc, int flags, cudaStream_t stream),
+  ARG(devPtr, count, loc, flags, stream));
+#else
 DEFINE_OVERLOAD(cudaMemPrefetchAsync,
                 ARG(void const* devPtr, size_t count, int dstDevice, cudaStream_t stream),
                 ARG(devPtr, count, dstDevice, stream));
+#endif
 DEFINE_OVERLOAD(cudaMemcpy2DAsync,
                 ARG(void* dst,
                     size_t dpitch,

From f3632e0405e76ff852a55dc20ed91ab17f90bdcf Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Wed, 27 Aug 2025 14:00:55 -0500
Subject: [PATCH 217/366] Filter pandas warning in dask_cudf test (#19808)

This fixes a CI failure (observed at https://github.com/rapidsai/cudf/actions/runs/17267746615/job/49006444548?pr=19793#step:10:1636) coming from a pandas warning triggered by dask.dataframe's usage of pandas. We'll filter it here and fix it upstream.

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19808
---
 python/dask_cudf/dask_cudf/tests/test_groupby.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index 6efb7cf1562..4408c0394d8 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
+import warnings
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -623,9 +625,14 @@ def test_groupby_agg_params(
         1 if split_out == "use_dask_default" else split_out
     )
 
-    # Compute for easier multiindex handling
-    gf = gr.compute()
-    pf = pr.compute()
+    with warnings.catch_warnings():
+        # dask<=2025.7.0 uses a deprecated "grouper" attribute
+        # in some of these computations. We'll silence the warning
+        # here and fix it upstream.
+        warnings.filterwarnings("ignore", category=FutureWarning)
+        # Compute for easier multiindex handling
+        gf = gr.compute()
+        pf = pr.compute()
 
     # Reset index and sort by groupby columns
     if as_index:

From 477c6e27ade679283e603360bbfd42aac54c48f7 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 27 Aug 2025 14:37:56 -0700
Subject: [PATCH 218/366] Run cudf-polars-polars-tests on changes in
 test_python file group (#19819)

Similar to the other Python based jobs, I believe the `cudf-polars-polars-tests` job only needs to run on relevant changes to Python files

e.g. https://github.com/rapidsai/cudf/pull/19816 removes a Python file. None of the other Python test jobs ran except `cudf-polars-polars-tests`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19819
---
 .github/workflows/pr.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 8249fea30a0..01da4160bad 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -284,6 +284,7 @@ jobs:
     needs: wheel-build-cudf-polars
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))

From 3306234bc290a4204c870614201cd37b7046fc22 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 27 Aug 2025 18:18:04 -0400
Subject: [PATCH 219/366] xfail polars `decimal(precision=None)` test (#19821)

We return decimals with `precision=38`, polars returns `precision=None`. This is an expected mismatch, so xfail the test.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19821
---
 python/cudf_polars/cudf_polars/testing/plugin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 1815f9dffea..f55f97de334 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -122,6 +122,7 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
     "tests/unit/lazyframe/test_lazyframe.py::test_cast_frame": "Casting that raises not supported on GPU",
     "tests/unit/lazyframe/test_lazyframe.py::test_lazy_cache_hit": "Debug output on stderr doesn't match",
+    "tests/unit/lazyframe/test_collect_schema.py::test_collect_schema_parametric": "polars returns decimal column with precision=None",
     "tests/unit/operations/aggregation/test_aggregations.py::test_binary_op_agg_context_no_simplify_expr_12423": "groupby-agg of just literals should not produce collect_list",
     "tests/unit/operations/aggregation/test_aggregations.py::test_nan_inf_aggregation": "treatment of nans and nulls together is different in libcudf and polars in groupby-agg context",
     "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",

From f2c1d2e495371a3d2e50170799db0f626e58ba25 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 27 Aug 2025 16:47:22 -0700
Subject: [PATCH 220/366] Remove test_mvc.py (#19816)

xref https://github.com/rapidsai/cudf/issues/19740#issuecomment-3229250933 cc @brandon-b-miller

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/19816
---
 python/cudf/cudf/tests/test_mvc.py | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/test_mvc.py

diff --git a/python/cudf/cudf/tests/test_mvc.py b/python/cudf/cudf/tests/test_mvc.py
deleted file mode 100644
index 0ff9ddef0a0..00000000000
--- a/python/cudf/cudf/tests/test_mvc.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2023-2025, NVIDIA CORPORATION.
-import subprocess
-import sys
-
-TEST_SCRIPT = """
-import numba.cuda
-import cudf
-from cudf.utils._numba import _CUDFNumbaConfig
-
-@numba.cuda.jit
-def test_kernel(x):
-    id = numba.cuda.grid(1)
-    if id < len(x):
-        x[id] += 1
-
-s = cudf.Series([1, 2, 3])
-with _CUDFNumbaConfig():
-    test_kernel.forall(len(s))(s)
-"""
-
-
-def test_numba_mvc():
-    cp = subprocess.run(
-        [sys.executable, "-c", TEST_SCRIPT],
-        capture_output=True,
-        cwd="/",
-    )
-
-    assert cp.returncode == 0

From 70ff60d756879d2acc5ee054f79055f135c85a59 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 27 Aug 2025 17:44:12 -0700
Subject: [PATCH 221/366] Use more cached_property where possible for Index and
 subclasses (#19799)

closes https://github.com/rapidsai/cudf/issues/13357

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19799
---
 python/cudf/cudf/core/column/datetime.py      |   4 +-
 python/cudf/cudf/core/column/interval.py      |  11 +-
 python/cudf/cudf/core/column/timedelta.py     |  25 +---
 python/cudf/cudf/core/index.py                | 113 +++++++++++-------
 python/cudf/cudf/core/series.py               |   4 +-
 python/cudf/cudf/core/tools/datetimes.py      |   2 +-
 .../indexes/datetimeindex/test_attributes.py  |  10 ++
 7 files changed, 91 insertions(+), 78 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 24a5968fa5a..8a0e5e10a15 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -283,11 +283,11 @@ def days_in_month(self) -> ColumnBase:
             plc.datetime.days_in_month(self.to_pylibcudf(mode="read"))
         )
 
-    @property
+    @functools.cached_property
     def day_of_week(self) -> ColumnBase:
         raise NotImplementedError("day_of_week is currently not implemented.")
 
-    @property
+    @functools.cached_property
     def is_normalized(self) -> bool:
         raise NotImplementedError(
             "is_normalized is currently not implemented."
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index f589b852b91..d592afa3dda 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import functools
 from typing import TYPE_CHECKING, Literal
 
 import pandas as pd
@@ -119,7 +120,7 @@ def copy(self, deep: bool = True) -> Self:
             children=struct_copy.base_children,  # type: ignore[arg-type]
         )
 
-    @property
+    @functools.cached_property
     def is_empty(self) -> ColumnBase:
         left_equals_right = (self.right == self.left).fillna(False)
         not_closed_both = as_column(
@@ -127,19 +128,19 @@ def is_empty(self) -> ColumnBase:
         )
         return left_equals_right & not_closed_both
 
-    @property
+    @functools.cached_property
     def is_non_overlapping_monotonic(self) -> bool:
         raise NotImplementedError(
             "is_overlapping is currently not implemented."
         )
 
-    @property
+    @functools.cached_property
     def is_overlapping(self) -> bool:
         raise NotImplementedError(
             "is_overlapping is currently not implemented."
         )
 
-    @property
+    @functools.cached_property
     def length(self) -> ColumnBase:
         return self.right - self.left
 
@@ -147,7 +148,7 @@ def length(self) -> ColumnBase:
     def left(self) -> ColumnBase:
         return self.children[0]
 
-    @property
+    @functools.cached_property
     def mid(self) -> ColumnBase:
         try:
             return 0.5 * (self.left + self.right)
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index aa4bcef4454..90dbda3fcb3 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -278,33 +278,14 @@ def sum(
             unit=self.time_unit,
         ).as_unit(self.time_unit)
 
+    @functools.cached_property
     def components(self) -> dict[str, NumericalColumn]:
         """
-        Return a Dataframe of the components of the Timedeltas.
+        Return a dict of the components of the Timedeltas.
 
         Returns
         -------
-        DataFrame
-
-        Examples
-        --------
-        >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
-        >>> s = cudf.Series([12231312123, 1231231231, 1123236768712, 2135656,
-        ...     3244334234], dtype='timedelta64[ms]')
-        >>> s
-        0      141 days 13:35:12.123
-        1       14 days 06:00:31.231
-        2    13000 days 10:12:48.712
-        3        0 days 00:35:35.656
-        4       37 days 13:12:14.234
-        dtype: timedelta64[ms]
-        >>> s.dt.components
-            days  hours  minutes  seconds  milliseconds  microseconds  nanoseconds
-        0    141     13       35       12           123             0            0
-        1     14      6        0       31           231             0            0
-        2  13000     10       12       48           712             0            0
-        3      0      0       35       35           656             0            0
-        4     37     13       12       14           234             0            0
+        dict[str, NumericalColumn]
         """
         date_meta = {
             "hours": ["D", "h"],
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index cc795fad2b1..1770338ff0e 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -479,6 +479,7 @@ def is_monotonic_decreasing(self) -> bool:
 
     @property
     def has_duplicates(self) -> bool:
+        # .is_unique is already a cached_property
         return not self.is_unique
 
     @property
@@ -1838,11 +1839,6 @@ def _binaryop(
             return ret.values
         return ret
 
-    @property  # type: ignore
-    @_performance_tracking
-    def _values(self) -> ColumnBase:
-        return self._column
-
     @classmethod
     @_performance_tracking
     def _concat(cls, objs):
@@ -2227,7 +2223,7 @@ def _is_categorical(self) -> bool:
     def _is_interval(self) -> bool:
         return False
 
-    @property  # type: ignore
+    @cached_property
     @_performance_tracking
     def hasnans(self) -> bool:
         return self._column.has_nulls(include_nan=True)
@@ -3643,7 +3639,7 @@ def strftime(self, date_format: str) -> Index:
             self._column.strftime(date_format), name=self.name
         )
 
-    @property
+    @cached_property
     def asi8(self) -> cupy.ndarray:
         return self._column.astype(np.dtype(np.int64)).values
 
@@ -3684,7 +3680,9 @@ def tz(self) -> tzinfo | None:
         datetime.tzinfo or None
             Returns None when the array is tz-naive.
         """
-        return getattr(self.dtype, "tz", None)
+        if isinstance(self.dtype, pd.DatetimeTZDtype):
+            return self.dtype.tz
+        return None
 
     @property
     def tzinfo(self) -> tzinfo | None:
@@ -3722,7 +3720,7 @@ def normalize(self) -> Self:
             self._column.normalize(), name=self.name
         )
 
-    @property
+    @cached_property
     def time(self) -> np.ndarray:
         """
         Returns numpy array of ``datetime.time`` objects.
@@ -3731,7 +3729,7 @@ def time(self) -> np.ndarray:
         """
         return self.to_pandas().time
 
-    @property
+    @cached_property
     def timetz(self) -> np.ndarray:
         """
         Returns numpy array of ``datetime.time`` objects with timezones.
@@ -3740,7 +3738,7 @@ def timetz(self) -> np.ndarray:
         """
         return self.to_pandas().timetz
 
-    @property
+    @cached_property
     def date(self) -> np.ndarray:
         """
         Returns numpy array of python ``datetime.date`` objects.
@@ -3755,6 +3753,7 @@ def is_month_start(self) -> cupy.ndarray:
         """
         Booleans indicating if dates are the first day of the month.
         """
+        # .is_month_start is already a cached_property
         return self._column.is_month_start.values
 
     @property
@@ -3762,6 +3761,7 @@ def is_month_end(self) -> cupy.ndarray:
         """
         Booleans indicating if dates are the last day of the month.
         """
+        # .is_month_end is already a cached_property
         return self._column.is_month_end.values
 
     @property
@@ -3769,6 +3769,7 @@ def is_quarter_end(self) -> cupy.ndarray:
         """
         Booleans indicating if dates are the last day of the quarter.
         """
+        # .is_quarter_end is already a cached_property
         return self._column.is_quarter_end.values
 
     @property
@@ -3776,6 +3777,7 @@ def is_quarter_start(self) -> cupy.ndarray:
         """
         Booleans indicating if dates are the start day of the quarter.
         """
+        # .is_quarter_start is already a cached_property
         return self._column.is_quarter_start.values
 
     @property
@@ -3783,6 +3785,7 @@ def is_year_end(self) -> cupy.ndarray:
         """
         Booleans indicating if dates are the last day of the year.
         """
+        # .is_year_end is already a cached_property
         return self._column.is_year_end.values
 
     @property
@@ -3790,6 +3793,7 @@ def is_year_start(self) -> cupy.ndarray:
         """
         Booleans indicating if dates are the first day of the year.
         """
+        # .is_year_start is already a cached_property
         return self._column.is_year_start.values
 
     @property
@@ -3797,6 +3801,7 @@ def is_normalized(self) -> bool:
         """
         Returns True if all of the dates are at midnight ("no time")
         """
+        # .is_normalized is already a cached_property
         return self._column.is_normalized
 
     @property
@@ -3804,6 +3809,7 @@ def days_in_month(self) -> Index:
         """
         Get the total number of days in the month that the date falls on.
         """
+        # .days_in_month is already a cached_property
         return Index._from_column(self._column.days_in_month, name=self.name)
 
     daysinmonth = days_in_month
@@ -3813,9 +3819,10 @@ def day_of_week(self) -> Index:
         """
         Get the day of week that the date falls on.
         """
+        # .day_of_week is already a cached_property
         return Index._from_column(self._column.day_of_week, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def year(self) -> Index:
         """
@@ -3832,9 +3839,10 @@ def year(self) -> Index:
         >>> datetime_index.year
         Index([2000, 2001, 2002], dtype='int16')
         """
+        # .year is already a cached_property
         return Index._from_column(self._column.year, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def month(self) -> Index:
         """
@@ -3851,9 +3859,10 @@ def month(self) -> Index:
         >>> datetime_index.month
         Index([1, 2, 3], dtype='int16')
         """
+        # .month is already a cached_property
         return Index._from_column(self._column.month, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def day(self) -> Index:
         """
@@ -3870,9 +3879,10 @@ def day(self) -> Index:
         >>> datetime_index.day
         Index([1, 2, 3], dtype='int16')
         """
+        # .day is already a cached_property
         return Index._from_column(self._column.day, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def hour(self) -> Index:
         """
@@ -3891,9 +3901,10 @@ def hour(self) -> Index:
         >>> datetime_index.hour
         Index([0, 1, 2], dtype='int16')
         """
+        # .hour is already a cached_property
         return Index._from_column(self._column.hour, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def minute(self) -> Index:
         """
@@ -3912,9 +3923,10 @@ def minute(self) -> Index:
         >>> datetime_index.minute
         Index([0, 1, 2], dtype='int16')
         """
+        # .minute is already a cached_property
         return Index._from_column(self._column.minute, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def second(self) -> Index:
         """
@@ -3933,9 +3945,10 @@ def second(self) -> Index:
         >>> datetime_index.second
         Index([0, 1, 2], dtype='int16')
         """
+        # .second is already a cached_property
         return Index._from_column(self._column.second, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def microsecond(self) -> Index:
         """
@@ -3952,21 +3965,12 @@ def microsecond(self) -> Index:
                '2000-01-01 00:00:00.000002'],
               dtype='datetime64[ns]')
         >>> datetime_index.microsecond
-        Index([0, 1, 2], dtype='int32')
+        Index([0, 1, 2], dtype='int16')
         """
-        return Index._from_column(
-            (
-                # Need to manually promote column to int32 because
-                # pandas-matching binop behaviour requires that this
-                # __mul__ returns an int16 column.
-                self._column.millisecond.astype(np.dtype(np.int32))
-                * np.int32(1000)
-            )
-            + self._column.microsecond,
-            name=self.name,
-        )
+        # .microsecond is already a cached_property
+        return Index._from_column(self._column.microsecond, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def nanosecond(self) -> Index:
         """
@@ -3986,9 +3990,10 @@ def nanosecond(self) -> Index:
         >>> datetime_index.nanosecond
         Index([0, 1, 2], dtype='int16')
         """
+        # .nanosecond is already a cached_property
         return Index._from_column(self._column.nanosecond, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def weekday(self) -> Index:
         """
@@ -4008,9 +4013,10 @@ def weekday(self) -> Index:
         >>> datetime_index.weekday
         Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int16')
         """
+        # .weekday is already a cached_property
         return Index._from_column(self._column.weekday, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def dayofweek(self) -> Index:
         """
@@ -4030,9 +4036,10 @@ def dayofweek(self) -> Index:
         >>> datetime_index.dayofweek
         Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int16')
         """
+        # .weekday is already a cached_property
         return Index._from_column(self._column.weekday, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def dayofyear(self) -> Index:
         """
@@ -4053,9 +4060,10 @@ def dayofyear(self) -> Index:
         >>> datetime_index.dayofyear
         Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16')
         """
+        # .day_of_year is already a cached_property
         return Index._from_column(self._column.day_of_year, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def day_of_year(self) -> Index:
         """
@@ -4076,9 +4084,10 @@ def day_of_year(self) -> Index:
         >>> datetime_index.day_of_year
         Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16')
         """
+        # .day_of_year is already a cached_property
         return Index._from_column(self._column.day_of_year, name=self.name)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def is_leap_year(self) -> cupy.ndarray:
         """
@@ -4094,10 +4103,11 @@ def is_leap_year(self) -> cupy.ndarray:
         ndarray
         Booleans indicating if dates belong to a leap year.
         """
+        # .is_leap_year is already a cached_property
         res = self._column.is_leap_year.fillna(False)
         return cupy.asarray(res)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def quarter(self) -> Index:
         """
@@ -4120,6 +4130,7 @@ def quarter(self) -> Index:
         >>> gIndex.quarter
         Index([2, 4], dtype='int8')
         """
+        # .quarter is already a cached_property
         return Index._from_column(
             self._column.quarter.astype(np.dtype(np.int8))
         )
@@ -4570,7 +4581,7 @@ def to_pytimedelta(self) -> np.ndarray:
         """
         return self.to_pandas().to_pytimedelta()
 
-    @property
+    @cached_property
     def asi8(self) -> cupy.ndarray:
         return self._column.astype(np.dtype(np.int64)).values
 
@@ -4622,57 +4633,62 @@ def round(self, freq: str) -> Self:
             self._column.round(freq), name=self.name
         )
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def days(self) -> Index:
         """
         Number of days for each element.
         """
+        # .days is already a cached_property
         # Need to specifically return `int64` to avoid overflow.
         return Index._from_column(
             self._column.days.astype(np.dtype(np.int64)), name=self.name
         )
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def seconds(self) -> Index:
         """
         Number of seconds (>= 0 and less than 1 day) for each element.
         """
+        # .seconds is already a cached_property
         return Index._from_column(
             self._column.seconds.astype(np.dtype(np.int32)), name=self.name
         )
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def microseconds(self) -> Index:
         """
         Number of microseconds (>= 0 and less than 1 second) for each element.
         """
+        # .microseconds is already a cached_property
         return Index._from_column(
             self._column.microseconds.astype(np.dtype(np.int32)),
             name=self.name,
         )
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def nanoseconds(self) -> Index:
         """
         Number of nanoseconds (>= 0 and less than 1 microsecond) for each
         element.
         """
+        # .nanoseconds is already a cached_property
         return Index._from_column(
             self._column.nanoseconds.astype(np.dtype(np.int32)), name=self.name
         )
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def components(self) -> DataFrame:
         """
         Return a dataframe of the components (days, hours, minutes,
         seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas.
         """
-        ca = ColumnAccessor(self._column.components(), verify=False)
+        # .components is already a cached_property
+        ca = ColumnAccessor(self._column.components, verify=False)
         return cudf.DataFrame._from_data(ca)
 
     @property
@@ -4840,7 +4856,7 @@ def from_codes(
     def ordered(self) -> bool:
         return self._column.ordered
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def codes(self) -> Index:
         """
@@ -4848,7 +4864,7 @@ def codes(self) -> Index:
         """
         return Index._from_column(self._column.codes)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def categories(self) -> Index:
         """
@@ -5311,6 +5327,7 @@ def is_empty(self) -> cupy.ndarray:
         """
         Indicates if an interval is empty, meaning it contains no points.
         """
+        # .is_empty is already a cached_property
         return self._column.is_empty.values
 
     @property
@@ -5318,6 +5335,7 @@ def is_non_overlapping_monotonic(self) -> bool:
         """
         Return a True if the IntervalIndex is non-overlapping and monotonic.
         """
+        # .is_non_overlapping_monotonic is already a cached_property
         return self._column.is_non_overlapping_monotonic
 
     @property
@@ -5327,6 +5345,7 @@ def is_overlapping(self) -> bool:
 
         Currently not implemented
         """
+        # .is_overlapping is already a cached_property
         return self._column.is_overlapping
 
     @property
@@ -5334,6 +5353,7 @@ def length(self) -> Index:
         """
         Return an Index with entries denoting the length of each Interval.
         """
+        # .length is already a cached_property
         return _index_from_data({None: self._column.length})
 
     @property
@@ -5355,6 +5375,7 @@ def mid(self) -> Index:
         Each midpoint is calculated as the average of the left and right bounds
         of each interval.
         """
+        # .mid is already a cached_property
         return _index_from_data({None: self._column.mid})
 
     @property
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 01cf0905723..eaffa06d96d 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5081,7 +5081,7 @@ def nanoseconds(self) -> Series:
         """
         return self._return_result_like_self(self.series._column.nanoseconds)
 
-    @property  # type: ignore
+    @property
     @_performance_tracking
     def components(self) -> DataFrame:
         """
@@ -5109,7 +5109,7 @@ def components(self) -> DataFrame:
         3      0      0       35       35           656             0            0
         4     37     13       12       14           234             0            0
         """
-        ca = ColumnAccessor(self.series._column.components(), verify=False)
+        ca = ColumnAccessor(self.series._column.components, verify=False)
         return self.series._constructor_expanddim._from_data(
             ca, index=self.series.index
         )
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index f2094d31267..d4b60c7fc4b 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -775,7 +775,7 @@ def date_range(
         ``U``, ``us``, ``N``, ``ns``.
 
     tz : str or tzinfo, optional
-        Not Supported
+        Time zone name for returning localized DatetimeIndex.
 
     normalize : bool, default False
         Not Supported
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
index d5b24a6a88e..527f0d07811 100644
--- a/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/test_attributes.py
@@ -24,6 +24,16 @@ def test_contains_tz_aware(item, expected):
     assert result == expected
 
 
+@pytest.mark.parametrize("tz", ["UTC", None])
+def test_tz_attribute(tz):
+    dti = cudf.date_range("2020", periods=2, freq="D", tz=tz)
+    if tz is None:
+        assert dti.tz is None
+    else:
+        # TODO(pandas3.0-min): Assert zoneinfo.ZoneInfo(tz) == dti.tz
+        assert str(dti.tz) == tz
+
+
 def test_tz_aware_attributes_local():
     data = [
         "2008-05-12 13:50:00",

From 2da8211744b70ba8b0e108b53ac0eb74d2497f5e Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 27 Aug 2025 21:49:54 -0400
Subject: [PATCH 222/366] Warn on fallback in the streaming tests in
 cudf-polars (#19721)

If we warn and catch it, the test will fail once we support the feature in the streaming engine. Therefore we'll keep the streaming tests up-to-date.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19721
---
 .../tests/experimental/test_join.py           | 29 +++++++++++++++----
 .../tests/experimental/test_unique.py         | 26 ++++++++++++++---
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/python/cudf_polars/tests/experimental/test_join.py b/python/cudf_polars/tests/experimental/test_join.py
index 71c5d0e246e..3d6e373290c 100644
--- a/python/cudf_polars/tests/experimental/test_join.py
+++ b/python/cudf_polars/tests/experimental/test_join.py
@@ -149,7 +149,7 @@ def test_join_conditional(reverse, max_rows_per_partition):
         executor_options={
             "max_rows_per_partition": max_rows_per_partition,
             "scheduler": DEFAULT_SCHEDULER,
-            "fallback_mode": "silent",
+            "fallback_mode": "warn",
         },
     )
     left = pl.LazyFrame({"x": range(15), "y": [1, 2, 3] * 5})
@@ -157,7 +157,13 @@ def test_join_conditional(reverse, max_rows_per_partition):
     if reverse:
         left, right = right, left
     q = left.join_where(right, pl.col("y") < pl.col("yy"))
-    assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+    if max_rows_per_partition == 3:
+        with pytest.warns(
+            UserWarning, match="ConditionalJoin not supported for multiple partitions."
+        ):
+            assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+    else:
+        assert_gpu_result_equal(q, engine=engine, check_row_order=False)
 
 
 @pytest.mark.parametrize("zlice", [(0, 2), (2, 2), (-2, None)])
@@ -170,7 +176,7 @@ def test_join_and_slice(zlice):
             "broadcast_join_limit": 100,
             "scheduler": DEFAULT_SCHEDULER,
             "shuffle_method": "tasks",
-            "fallback_mode": "warn" if zlice[0] == 0 else "silent",
+            "fallback_mode": "warn",
         },
     )
     left = pl.LazyFrame(
@@ -190,8 +196,19 @@ def test_join_and_slice(zlice):
     q = left.join(right, on="a", how="inner").slice(*zlice)
     # Check that we get the correct row count
     # See: https://github.com/rapidsai/cudf/issues/19153
-    assert q.collect(engine=engine).height == q.collect().height
-
+    if zlice in {(2, 2), (-2, None)}:
+        with pytest.warns(
+            UserWarning, match="This slice not supported for multiple partitions."
+        ):
+            assert q.collect(engine=engine).height == q.collect().height
+    else:
+        assert q.collect(engine=engine).height == q.collect().height
     # Need sort to match order after a join
     q = left.join(right, on="a", how="inner").sort(pl.col("a")).slice(*zlice)
-    assert_gpu_result_equal(q, engine=engine)
+    if zlice == (2, 2):
+        with pytest.warns(
+            UserWarning, match="Sort does not support multiple partitions."
+        ):
+            assert_gpu_result_equal(q, engine=engine)
+    else:
+        assert_gpu_result_equal(q, engine=engine)
diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py
index a699a073b59..b402ab3a933 100644
--- a/python/cudf_polars/tests/experimental/test_unique.py
+++ b/python/cudf_polars/tests/experimental/test_unique.py
@@ -35,7 +35,7 @@ def test_unique(df, keep, subset, maintain_order, cardinality):
             "max_rows_per_partition": 50,
             "scheduler": DEFAULT_SCHEDULER,
             "unique_fraction": cardinality,
-            "fallback_mode": "silent",
+            "fallback_mode": "warn",
         },
     )
 
@@ -45,7 +45,19 @@ def test_unique(df, keep, subset, maintain_order, cardinality):
         q = q.select(*(pl.col(col) for col in subset))
         check_row_order = False
 
-    assert_gpu_result_equal(q, engine=engine, check_row_order=check_row_order)
+    is_cardinality0 = cardinality == {}
+
+    should_warn = (maintain_order and (not is_cardinality0 or keep == "none")) or (
+        not maintain_order and (not is_cardinality0) and keep in {"first", "last"}
+    )
+
+    if should_warn:
+        with pytest.warns(
+            UserWarning, match="Unsupported unique options for multiple partitions"
+        ):
+            assert_gpu_result_equal(q, engine=engine, check_row_order=check_row_order)
+    else:
+        assert_gpu_result_equal(q, engine=engine, check_row_order=check_row_order)
 
 
 def test_unique_fallback(df):
@@ -77,12 +89,18 @@ def test_unique_select(df, maintain_order, cardinality):
             "max_rows_per_partition": 4,
             "scheduler": DEFAULT_SCHEDULER,
             "unique_fraction": cardinality,
-            "fallback_mode": "silent",
+            "fallback_mode": "warn",
         },
     )
 
     q = df.select(pl.col("y").unique(maintain_order=maintain_order))
-    assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+    if cardinality == {"y": 0.5} and maintain_order:
+        with pytest.warns(
+            UserWarning, match="Unsupported unique options for multiple partitions."
+        ):
+            assert_gpu_result_equal(q, engine=engine, check_row_order=False)
+    else:
+        assert_gpu_result_equal(q, engine=engine, check_row_order=False)
 
 
 @pytest.mark.parametrize("keep", ["first", "last", "any"])

From 7326449185f5a5afb05c5f1ec39ceaf3f59b5189 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 27 Aug 2025 21:54:51 -0400
Subject: [PATCH 223/366] Improvements to `pylibcudf.from_iterable_of_py`
 (#19781)

- Fix a bug when getting the offsets column
- Add validation checks and fast paths to `from_iterable_of_py`

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19781
---
 python/pylibcudf/pylibcudf/column.pyx         | 35 +++++++++++++++++--
 .../tests/test_column_from_iterable.py        | 15 ++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 8773c123d75..cc0ea75c158 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -66,6 +66,14 @@ from typing import Iterable
 __all__ = ["Column", "ListColumnView", "is_c_contiguous"]
 
 
+cdef is_iterable(obj):
+    try:
+        iter(obj)
+    except Exception:
+        return False
+    return True
+
+
 cdef class _ArrowColumnHolder:
     """A holder for an Arrow column for gpumemoryview lifetime management."""
     cdef unique_ptr[arrow_column] col
@@ -1000,8 +1008,31 @@ cdef class Column:
           (in bytes) can exceed the maximum 32-bit integer value. In that case,
           the offsets column is automatically promoted to use 64-bit integers.
         """
+        if not is_iterable(obj):
+            raise ValueError(f"{obj=} is not iterable")
+
+        if (
+            hasattr(obj, "__cuda_array_interface__")
+            or hasattr(obj, "__array_interface__")
+        ):
+            raise TypeError(
+                "Object has __cuda_array_interface__ or __array_interface__. "
+                "Please call Column.from_array(obj)."
+            )
 
-        obj = list(obj)
+        if (
+            hasattr(obj, "__arrow_c_array__")
+            or hasattr(obj, "__arrow_c_device_array__")
+            or hasattr(obj, "__arrow_c_stream__")
+            or hasattr(obj, "__arrow_c_device_stream__")
+        ):
+            raise TypeError(
+                "Object implements the Arrow C data interface protocol. "
+                "Please call Column.from_arrow(obj)."
+            )
+
+        if not isinstance(obj, (list, tuple)):
+            obj = list(obj)
 
         if not obj:
             if dtype is None:
@@ -1276,7 +1307,7 @@ cdef class ListColumnView:
 
     cpdef offsets(self):
         """The offsets column of the underlying list column."""
-        return self._column.child(1)
+        return self._column.child(0)
 
     cdef lists_column_view view(self) nogil:
         """Generate a libcudf lists_column_view to pass to libcudf algorithms.
diff --git a/python/pylibcudf/tests/test_column_from_iterable.py b/python/pylibcudf/tests/test_column_from_iterable.py
index 9768b0d3960..0cb6515704b 100644
--- a/python/pylibcudf/tests/test_column_from_iterable.py
+++ b/python/pylibcudf/tests/test_column_from_iterable.py
@@ -4,6 +4,7 @@
 import operator
 from functools import reduce
 
+import numpy as np
 import pyarrow as pa
 import pytest
 from utils import assert_column_eq
@@ -216,3 +217,17 @@ def test_from_nested_list_of_large_strings(dummy_large_string_type):
     assert col.type().id() == plc.TypeId.LIST
     assert col.children()[1].type().id() == plc.TypeId.STRING
     assert col.children()[1].children()[0].type().id() == plc.TypeId.INT64
+
+
+@pytest.mark.parametrize(
+    "arr",
+    [pa.array([1, 2], type=pa.int32()), np.array([1, 2], dtype=np.int32)],
+)
+def test_from_iterable_pyarrow_or_numpy_array(arr):
+    with pytest.raises(TypeError, match="Please call"):
+        plc.Column.from_iterable_of_py(arr)
+
+
+def test_from_iterable_plc_column():
+    with pytest.raises(ValueError, match="is not iterable"):
+        plc.Column.from_iterable_of_py(plc.Column.from_iterable_of_py([1]))

From a449958a6a5f07ab085daf4a57f1a0f027b3389c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 28 Aug 2025 08:31:29 -0500
Subject: [PATCH 224/366] pin oldest numpy in dask-cudf tests, update
 dependency floors (cuda-python 12.9.2, cupy 13.6.0, numba 0.60.0) (#19806)

Contributes to https://github.com/rapidsai/build-planning/issues/208

* updates dependency pins:
  - `cuda-python`: >=12.9.2 (CUDA 12)
  - `cupy`: >=13.6.0
  - `numba`: >=0.60.0 (now that https://github.com/NVIDIA/numba-cuda/pull/403 is done)
* ensures that "oldest" `numpy` is pinned in `dask-cudf` tests
  - _the "oldest" pin for `numpy` was previously not used in `dask-cudf` wheel tests, allowing an incompatible mix of packages (`pandas 2.0.3, numpy 2.0.2`) to be installed together_

## Notes for Reviewers

### Why a separate PR?

In https://github.com/rapidsai/cudf/pull/19768#issuecomment-3225933614, we saw this set of dependency changes caused failures like this in CUDA 12 and CUDA 13 environments:

```text
...
ERROR io/tests/test_csv.py - ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
ERROR io/tests/test_json.py - ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
ERROR io/tests/test_orc.py - ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
ERROR io/tests/test_parquet.py - ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
ERROR io/tests/test_s3.py - ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
... many more ...
```

([wheel-test-dask-cudf link](https://github.com/rapidsai/cudf/actions/runs/17249655997/job/48950898976?pr=19768#step:11:11795))

Opening this more narrowly-scoped PR to investigate that.

### How I tested this

First commit here contained some of the dependency changes from #19768 , and those were enough to reproduce the test failures!

https://github.com/rapidsai/cudf/actions/runs/17271893124/job/49021534507?pr=19806#step:11:11928

#

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Gil Forsyth (https://github.com/gforsyth)

URL: https://github.com/rapidsai/cudf/pull/19806
---
 .../all_cuda-129_arch-aarch64.yaml            |  6 +--
 .../all_cuda-129_arch-x86_64.yaml             |  6 +--
 conda/recipes/cudf/recipe.yaml                |  7 ++-
 conda/recipes/pylibcudf/recipe.yaml           |  2 +-
 dependencies.yaml                             | 54 +++++++++++--------
 python/cudf/pyproject.toml                    |  6 +--
 python/dask_cudf/pyproject.toml               |  2 +-
 python/pylibcudf/pyproject.toml               |  6 +--
 8 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 0140f536cc1..466aaf5d7fa 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -20,10 +20,10 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.9.1,<13.0a0
+- cuda-python>=12.9.2,<13.0a0
 - cuda-sanitizer-api
 - cuda-version=12.9
-- cupy>=12.0.0
+- cupy>=13.6.0
 - cxx-compiler
 - cython>=3.0.3
 - dask-cuda==25.10.*,>=0.0.0a0
@@ -56,7 +56,7 @@ dependencies:
 - ninja
 - notebook
 - numba-cuda>=0.19.1,<0.20.0a0
-- numba>=0.61.0,<0.62.0a0
+- numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 36d858a4957..1679dfd5d83 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -20,10 +20,10 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.9.1,<13.0a0
+- cuda-python>=12.9.2,<13.0a0
 - cuda-sanitizer-api
 - cuda-version=12.9
-- cupy>=12.0.0
+- cupy>=13.6.0
 - cxx-compiler
 - cython>=3.0.3
 - dask-cuda==25.10.*,>=0.0.0a0
@@ -57,7 +57,7 @@ dependencies:
 - ninja
 - notebook
 - numba-cuda>=0.19.1,<0.20.0a0
-- numba>=0.61.0,<0.62.0a0
+- numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index b89151fb266..cf85a1c8117 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -69,10 +69,9 @@ requirements:
     - python
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.4.0dev0
-    - cupy >=12.0.0
+    - cupy >=13.6.0
     - numba-cuda >=0.19.1,<0.20.0a0
-    # TODO: Revert to numba>=0.60.0,<0.62.0a0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
-    - numba >=0.61.0,<0.62.0a0
+    - numba >=0.60.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<20.0.0a0
     - libcudf =${{ version }}
@@ -80,7 +79,7 @@ requirements:
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
     - cuda-cudart
-    - cuda-python >=12.9.1,<13.0a0
+    - cuda-python >=12.9.2,<13.0a0
     - if: linux and x86_64
       then:
         - libcufile
diff --git a/conda/recipes/pylibcudf/recipe.yaml b/conda/recipes/pylibcudf/recipe.yaml
index 548a35da119..8bc859fd633 100644
--- a/conda/recipes/pylibcudf/recipe.yaml
+++ b/conda/recipes/pylibcudf/recipe.yaml
@@ -70,7 +70,7 @@ requirements:
     - libcudf =${{ version }}
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
-    - cuda-python >=12.9.1,<13.0a0
+    - cuda-python >=12.9.2,<13.0a0
     - nvtx >=0.2.1
     - packaging
   run_constraints:
diff --git a/dependencies.yaml b/dependencies.yaml
index 614c4fbc395..574940c8121 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -364,6 +364,7 @@ files:
       - depends_on_dask_cuda
       - test_python_common
       - test_python_cudf_common
+      - test_python_dask_cudf
   py_build_cudf_kafka:
     output: pyproject
     pyproject_dir: python/cudf_kafka
@@ -657,17 +658,19 @@ dependencies:
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.*"}
-            packages: &run_pylibcudf_packages_all_cu12
-              - cuda-python>=12.9.1,<13.0a0
-          - {matrix: null, packages: *run_pylibcudf_packages_all_cu12}
+          - matrix: &run_pylibcudf_packages_all_cu12
+              cuda: "12.*"
+            packages:
+              - &cuda_python_cu12 cuda-python>=12.9.2,<13.0a0
+          - matrix:
+            packages:
+              - *cuda_python_cu12
   run_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cachetools
-          # TODO: Revert to numba>=0.60.0,<0.62.0a0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
-          - &numba numba>=0.61.0,<0.62.0a0
+          - &numba numba>=0.60.0,<0.62.0a0
           - nvtx>=0.2.1
           - packaging
           - rich
@@ -682,7 +685,7 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages: &run_cudf_packages_all_cu12
-              - cuda-python>=12.9.1,<13.0a0
+              - cuda-python>=12.9.2,<13.0a0
           - {matrix: null, packages: *run_cudf_packages_all_cu12}
       - output_types: [requirements, pyproject]
         matrices:
@@ -767,8 +770,7 @@ dependencies:
         matrices:
           - matrix: {dependencies: "oldest"}
             packages:
-              # TODO: Revert to numba==0.60.0 once https://github.com/NVIDIA/numba-cuda/pull/403 is released.
-              - numba==0.61.0
+              - numba==0.60.0
               - pandas==2.0.*
               - numba-cuda==0.19.1
           - matrix: {dependencies: "latest"}
@@ -778,20 +780,30 @@ dependencies:
             packages:
       - output_types: conda
         matrices:
-          - matrix: {dependencies: "oldest", arch: "aarch64", cuda: "12.*"}
-            packages:
-              - cupy==12.2.0 # cupy 12.2.0 is the earliest with CUDA 12 ARM packages.
           - matrix: {dependencies: "oldest"}
             packages:
-              - cupy==12.0.0
+              - cupy==13.6.0
           - matrix:
             packages:
       - output_types: requirements
         # Using --constraints for pip install, so we list cupy multiple times
         matrices:
-          - matrix: {dependencies: "oldest"}
+          - matrix:
+              dependencies: "oldest"
+            packages:
+              - cupy-cuda12x==13.6.0
+          - matrix:
             packages:
-              - cupy-cuda12x==12.0.0
+  test_python_dask_cudf:
+    specific:
+      - output_types: [conda, requirements, pyproject]
+        matrices:
+          - matrix:
+              dependencies: "oldest"
+            packages:
+              # this is sometimes different from the floor in cudf because
+              # dask imposes its own constraints on 'numpy' (e.g. for 'dask.array')
+              - numpy==1.24.*
           - matrix:
             packages:
   test_python_pylibcudf:
@@ -838,10 +850,7 @@ dependencies:
         matrices:
           - matrix: {dependencies: "oldest"}
             packages:
-              # TODO: Revert to numpy==1.23.* once
-              # https://github.com/NVIDIA/numba-cuda/pull/403 is released and
-              # we revert to an oldest pinning of numba==0.60.0.
-              - numpy==1.24.*
+              - numpy==1.23.*
               # pyarrow 14 is fine in some circumstances but we require pyarrow
               # 15 in our CI tests in order to get a lz4-c that is compatible
               # with cudf_kafka's dependencies.
@@ -983,13 +992,14 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - cupy>=12.0.0
+          - cupy>=13.6.0
     specific:
       - output_types: [requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.*"}
+          - matrix:
+              cuda: "12.*"
             packages: &cupy_packages_cu12
-              - cupy-cuda12x>=12.0.0
+              - cupy-cuda12x>=13.6.0
           - {matrix: null, packages: *cupy_packages_cu12}
   depends_on_libkvikio:
     common:
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index ece2dd3f07a..039e81742f8 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -19,12 +19,12 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cachetools",
-    "cuda-python>=12.9.1,<13.0a0",
-    "cupy-cuda12x>=12.0.0",
+    "cuda-python>=12.9.2,<13.0a0",
+    "cupy-cuda12x>=13.6.0",
     "fsspec>=0.6.0",
     "libcudf==25.10.*,>=0.0.0a0",
     "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
-    "numba>=0.61.0,<0.62.0a0",
+    "numba>=0.60.0,<0.62.0a0",
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index b8603849892..55f319bc268 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -20,7 +20,7 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cudf==25.10.*,>=0.0.0a0",
-    "cupy-cuda12x>=12.0.0",
+    "cupy-cuda12x>=13.6.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
     "pandas>=2.0,<2.4.0dev0",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 85fabaa14bf..fb49c45a4a2 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=12.9.1,<13.0a0",
+    "cuda-python>=12.9.2,<13.0a0",
     "libcudf==25.10.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
@@ -39,13 +39,13 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cupy-cuda12x>=12.0.0",
+    "cupy-cuda12x>=13.6.0",
     "fastavro>=0.22.9",
     "hypothesis>=6.131.7",
     "mmh3",
     "nanoarrow",
     "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
-    "numba>=0.61.0,<0.62.0a0",
+    "numba>=0.60.0,<0.62.0a0",
     "pandas",
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
     "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",

From 6b12079b44f42421c5fa55e68662ac920642e7c5 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 28 Aug 2025 09:43:34 -0400
Subject: [PATCH 225/366] Add changed-files to the needs of every job that
 requires it (#19830)

Follow up to #19819, we're currently  getting `fromJSON: empty` input because these jobs reference `needs.changed-files.outputs.changed_file_groups` without listing `changed-files` in their requirements. See the errors: https://github.com/rapidsai/cudf/actions/runs/17276870544/job/49037559718 and https://github.com/rapidsai/cudf/actions/runs/17276870544/job/49037755672

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Gil Forsyth (https://github.com/gforsyth)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19830
---
 .github/workflows/pr.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 01da4160bad..ae22eaf04c3 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -281,7 +281,7 @@ jobs:
       build_type: pull-request
       script: "ci/test_wheel_cudf_polars.sh"
   cudf-polars-polars-tests:
-    needs: wheel-build-cudf-polars
+    needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
@@ -341,7 +341,7 @@ jobs:
       build_type: pull-request
       script: ci/cudf_pandas_scripts/run_tests.sh
   third-party-integration-tests-cudf-pandas:
-    needs: conda-python-build
+    needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas

From 82f6d2b91601cce7566659af50de2a3b92d906fb Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Thu, 28 Aug 2025 08:50:23 -0500
Subject: [PATCH 226/366] Update exception handling in pdsh benchmarks (#19793)

This updates how we handle exceptions in the execution of a cudf-polars query. Previously, an exception raised during a query (e.g. an out of memory exception) would cause the process to exit, and no results would be written.

Now, we'll

1. Catch the exception, print it to stdout, and record the fact that that query-iteration errored
2. Continue running the remaining queries / iterations
3. Write out the results of any sucessful execution
4. Exit with a non-zero error code if any exceptions occurred

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19793
---
 .../cudf_polars/experimental/benchmarks/utils.py  | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 53b492e2e82..45142c31173 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -14,6 +14,7 @@
 import sys
 import textwrap
 import time
+import traceback
 from collections import defaultdict
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any, Literal, assert_never
@@ -716,6 +717,7 @@ def run_polars(
     vars(args).update({"query_set": benchmark.name})
     run_config = RunConfig.from_args(args)
     validation_failures: list[int] = []
+    query_failures: list[tuple[int, int]] = []
 
     client = initialize_dask_cluster(run_config, args)  # type: ignore
 
@@ -743,8 +745,13 @@ def run_polars(
         for i in range(args.iterations):
             t0 = time.monotonic()
 
-            result = execute_query(q_id, i, q, run_config, args, engine)
-
+            try:
+                result = execute_query(q_id, i, q, run_config, args, engine)
+            except Exception:
+                print(f"❌ query={q_id} iteration={i} failed!")
+                print(traceback.format_exc())
+                query_failures.append((q_id, i))
+                continue
             if run_config.shuffle == "rapidsmpf" and run_config.gather_shuffle_stats:
                 from rapidsmpf.integrations.dask.shuffler import (
                     clear_shuffle_statistics,
@@ -794,5 +801,9 @@ def run_polars(
             )
         else:
             print("All validated queries passed.")
+
     args.output.write(json.dumps(run_config.serialize(engine=engine)))
     args.output.write("\n")
+
+    if query_failures or validation_failures:
+        sys.exit(1)

From 94e0f922471ebdf4abdd42d7c23644ab33201391 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 28 Aug 2025 11:34:26 -0500
Subject: [PATCH 227/366] Build and test with CUDA 13.0.0 (#19768)

Contributes to https://github.com/rapidsai/build-planning/issues/208

* uses CUDA 13.0.0 to build and test
* adds CUDA 13 devcontainers
* adds `cuda-nvvm-tools` as a runtime dependency of `cudf` conda packages
  - temporary workaround for https://github.com/NVIDIA/numba-cuda/issues/430, from @brandon-b-miller

Contributes to https://github.com/rapidsai/build-planning/issues/68

* updates to CUDA 13 dependencies in fallback entries in `dependencies.yaml` matrices (i.e., the ones that get written to `pyproject.toml` in source control)

## Notes for Reviewers

This switches GitHub Actions workflows to the `cuda13.0` branch from here: https://github.com/rapidsai/shared-workflows/pull/413

A future round of PRs will revert that back to `branch-25.10`, once all of RAPIDS supports CUDA 13.

### This has dependencies

Need these to be merged first:

* [x] #19821
* [x] #19806

Authors:
  - James Lamb (https://github.com/jameslamb)
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Gil Forsyth (https://github.com/gforsyth)

URL: https://github.com/rapidsai/cudf/pull/19768
---
 .../cuda13.0-conda/devcontainer.json          |  76 +++++++++++++
 .devcontainer/cuda13.0-pip/devcontainer.json  |  53 +++++++++
 .github/workflows/build.yaml                  |  28 ++---
 .github/workflows/pandas-tests.yaml           |   4 +-
 .github/workflows/pr.yaml                     |  62 +++++------
 .../workflows/pr_issue_status_automation.yml  |   8 +-
 .github/workflows/test.yaml                   |  32 +++---
 .../trigger-breaking-change-alert.yaml        |   2 +-
 CONTRIBUTING.md                               |   2 +-
 README.md                                     |  10 +-
 .../all_cuda-130_arch-aarch64.yaml            | 103 +++++++++++++++++
 .../all_cuda-130_arch-x86_64.yaml             | 104 ++++++++++++++++++
 conda/recipes/cudf/recipe.yaml                |   8 +-
 conda/recipes/pylibcudf/recipe.yaml           |   4 +-
 dependencies.yaml                             | 101 +++++++++++++++--
 .../dependencies.yaml                         |   4 +
 python/cudf/pyproject.toml                    |   8 +-
 python/dask_cudf/pyproject.toml               |   2 +-
 python/pylibcudf/pyproject.toml               |   6 +-
 19 files changed, 526 insertions(+), 91 deletions(-)
 create mode 100644 .devcontainer/cuda13.0-conda/devcontainer.json
 create mode 100644 .devcontainer/cuda13.0-pip/devcontainer.json
 create mode 100644 conda/environments/all_cuda-130_arch-aarch64.yaml
 create mode 100644 conda/environments/all_cuda-130_arch-x86_64.yaml

diff --git a/.devcontainer/cuda13.0-conda/devcontainer.json b/.devcontainer/cuda13.0-conda/devcontainer.json
new file mode 100644
index 00000000000..f236ef00da3
--- /dev/null
+++ b/.devcontainer/cuda13.0-conda/devcontainer.json
@@ -0,0 +1,76 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "13.0",
+      "PYTHON_PACKAGE_MANAGER": "conda",
+      "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge"
+    }
+  },
+  "runArgs": [
+    "--rm",
+    "--name",
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda13.0-conda",
+    "--ulimit",
+    "nofile=500000"
+  ],
+  "hostRequirements": {
+    "gpu": "optional"
+  },
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:25.10": {
+      "version": "13.0",
+      "installCompilers": false,
+      "installProfilers": true,
+      "installDevPackages": false,
+      "installcuDNN": false,
+      "installcuTensor": false,
+      "installNCCL": false,
+      "installCUDARuntime": false,
+      "installNVRTC": false,
+      "installOpenCL": false,
+      "installcuBLAS": false,
+      "installcuSPARSE": false,
+      "installcuFFT": false,
+      "installcuFile": false,
+      "installcuRAND": false,
+      "installcuSOLVER": false,
+      "installNPP": false,
+      "installnvJPEG": false,
+      "pruneStaticLibs": true
+    },
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/cuda",
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": [
+    "/bin/bash",
+    "-c",
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.0-envs}"
+  ],
+  "postAttachCommand": [
+    "/bin/bash",
+    "-c",
+    "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"
+  ],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.devcontainer/cuda13.0-pip/devcontainer.json b/.devcontainer/cuda13.0-pip/devcontainer.json
new file mode 100644
index 00000000000..c6c0f0c2230
--- /dev/null
+++ b/.devcontainer/cuda13.0-pip/devcontainer.json
@@ -0,0 +1,53 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "13.0",
+      "PYTHON_PACKAGE_MANAGER": "pip",
+      "BASE": "rapidsai/devcontainers:25.10-cpp-cuda13.0"
+    }
+  },
+  "runArgs": [
+    "--rm",
+    "--name",
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda13.0-pip",
+    "--ulimit",
+    "nofile=500000"
+  ],
+  "hostRequirements": {
+    "gpu": "optional"
+  },
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": [
+    "/bin/bash",
+    "-c",
+    "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs}"
+  ],
+  "postAttachCommand": [
+    "/bin/bash",
+    "-c",
+    "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"
+  ],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index d9bb501c968..87cc789ac78 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -46,7 +46,7 @@ jobs:
   cpp-build:
     needs: [telemetry-setup]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
   python-build:
     needs: [telemetry-setup, cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -90,7 +90,7 @@ jobs:
   wheel-build-libcudf:
     needs: [telemetry-setup]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -105,7 +105,7 @@ jobs:
   wheel-publish-libcudf:
     needs: wheel-build-libcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -116,7 +116,7 @@ jobs:
   wheel-build-pylibcudf:
     needs: [telemetry-setup, wheel-build-libcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -128,7 +128,7 @@ jobs:
   wheel-publish-pylibcudf:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -139,7 +139,7 @@ jobs:
   wheel-build-cudf:
     needs: [telemetry-setup, wheel-build-pylibcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -151,7 +151,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -162,7 +162,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: [telemetry-setup, wheel-build-cudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -177,7 +177,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -188,7 +188,7 @@ jobs:
   wheel-build-cudf-polars:
     needs: [telemetry-setup, wheel-build-pylibcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -203,7 +203,7 @@ jobs:
   wheel-publish-cudf-polars:
     needs: wheel-build-cudf-polars
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index 56fba1f6d8f..085c17bd9f2 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -22,9 +22,9 @@ jobs:
   pandas-tests:
       # run the Pandas unit tests
       secrets: inherit
-      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
       with:
-        matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
+        matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.0", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
         build_type: nightly
         branch: ${{ inputs.branch }}
         date: ${{ inputs.date }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index ae22eaf04c3..10904fb1425 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -43,7 +43,7 @@ jobs:
       - telemetry-setup
       - third-party-integration-tests-cudf-pandas
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
@@ -68,7 +68,7 @@ jobs:
   changed-files:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda13.0
     with:
       files_yaml: |
         test_cpp:
@@ -130,14 +130,14 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: "telemetry-summarize spark-rapids-jni"
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0
     with:
       build_type: pull-request
       node_type: "cpu16"
@@ -145,7 +145,7 @@ jobs:
   cpp-linters:
     secrets: inherit
     needs: checks
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: pull-request
       script: "ci/cpp_linters.sh"
@@ -153,13 +153,13 @@ jobs:
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda13.0
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
@@ -167,14 +167,14 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0
     with:
       build_type: pull-request
       script: ci/build_python.sh
   conda-python-cudf-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -183,7 +183,7 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -191,7 +191,7 @@ jobs:
   conda-java-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java
     with:
       build_type: pull-request
@@ -202,7 +202,7 @@ jobs:
   conda-notebook-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
     with:
       build_type: pull-request
@@ -213,7 +213,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: pull-request
       node_type: "gpu-l4-latest-1"
@@ -223,7 +223,7 @@ jobs:
   wheel-build-libcudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -235,7 +235,7 @@ jobs:
   wheel-build-pylibcudf:
     needs: [checks, wheel-build-libcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       build_type: pull-request
       script: "ci/build_wheel_pylibcudf.sh"
@@ -244,7 +244,7 @@ jobs:
   wheel-build-cudf:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       build_type: pull-request
       script: "ci/build_wheel_cudf.sh"
@@ -253,7 +253,7 @@ jobs:
   wheel-tests-cudf:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -261,7 +261,7 @@ jobs:
   wheel-build-cudf-polars:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -273,7 +273,7 @@ jobs:
   wheel-tests-cudf-polars:
     needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -283,7 +283,7 @@ jobs:
   cudf-polars-polars-tests:
     needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -293,7 +293,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -305,7 +305,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: [wheel-build-dask-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -315,10 +315,10 @@ jobs:
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda13.0
     with:
       arch: '["amd64", "arm64"]'
-      cuda: '["12.9"]'
+      cuda: '["13.0"]'
       node_type: "cpu8"
       rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
       env: |
@@ -333,7 +333,7 @@ jobs:
   unit-tests-cudf-pandas:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -343,7 +343,7 @@ jobs:
   third-party-integration-tests-cudf-pandas:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       build_type: pull-request
@@ -352,17 +352,17 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
       continue-on-error: true
-      container_image: "rapidsai/ci-conda:cuda12.9.0-ubuntu24.04-py3.12"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: |
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
   pandas-tests:
     # run the Pandas unit tests using PR branch
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
-      matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
+      matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.0", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
       build_type: pull-request
       script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
       # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
@@ -370,7 +370,7 @@ jobs:
   pandas-tests-diff:
     # diff the results of running the Pandas unit tests and publish a job summary
     needs: pandas-tests
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       node_type: "cpu4"
       build_type: pull-request
@@ -378,7 +378,7 @@ jobs:
   narwhals-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml
index 148d83e73d6..e7e48eac44c 100644
--- a/.github/workflows/pr_issue_status_automation.yml
+++ b/.github/workflows/pr_issue_status_automation.yml
@@ -23,7 +23,7 @@ on:
 
 jobs:
     get-project-id:
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-25.10
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@cuda13.0
       if: github.event.pull_request.state == 'open'
       secrets: inherit
       permissions:
@@ -34,7 +34,7 @@ jobs:
 
     update-status:
       # This job sets the PR and its linked issues to "In Progress" status
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.10
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@cuda13.0
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
@@ -50,7 +50,7 @@ jobs:
 
     update-sprint:
       # This job sets the PR and its linked issues to the current "Weekly Sprint"
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-25.10
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@cuda13.0
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
@@ -79,7 +79,7 @@ jobs:
 
     update-release:
       # This job sets the PR and its linked issues to the release they are targeting
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.10
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@cuda13.0
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: [get-project-id, process-branch-name]
       with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index b8af253bc56..52e381dd7d8 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -24,7 +24,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -41,7 +41,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-memcheck-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -53,7 +53,7 @@ jobs:
       script: "ci/test_cpp_memcheck.sh"
   cpp-linters:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -63,7 +63,7 @@ jobs:
       file_to_upload: iwyu_results.txt
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -73,7 +73,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -82,7 +82,7 @@ jobs:
       script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -94,7 +94,7 @@ jobs:
       script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -106,7 +106,7 @@ jobs:
       script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -115,7 +115,7 @@ jobs:
       script: ci/test_wheel_cudf.sh
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -124,7 +124,7 @@ jobs:
       script: ci/test_wheel_dask_cudf.sh
   unit-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -133,19 +133,19 @@ jobs:
       script: ci/cudf_pandas_scripts/run_tests.sh
   third-party-integration-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
-      container_image: "rapidsai/ci-conda:cuda12.9.0-ubuntu24.04-py3.12"
+      container_image: "rapidsai/ci-conda:25.10-latest"
       script: |
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
   wheel-tests-cudf-polars:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -154,7 +154,7 @@ jobs:
       script: "ci/test_wheel_cudf_polars.sh"
   cudf-polars-polars-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -163,7 +163,7 @@ jobs:
       script: "ci/test_cudf_polars_polars_tests.sh"
   narwhals-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 48bf37afc40..72751d071bb 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda13.0
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ab7a5731b69..35a896559cd 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -121,7 +121,7 @@ Instructions for a minimal build environment without conda are included below.
 # create the conda environment (assuming in base `cudf` directory)
 # note: RAPIDS currently doesn't support `channel_priority: strict`;
 # use `channel_priority: flexible` instead
-conda env create --name cudf_dev --file conda/environments/all_cuda-129_arch-x86_64.yaml
+conda env create --name cudf_dev --file conda/environments/all_cuda-130_arch-x86_64.yaml
 # activate the environment
 conda activate cudf_dev
 ```
diff --git a/README.md b/README.md
index 04980005846..538a1b6d344 100644
--- a/README.md
+++ b/README.md
@@ -65,6 +65,10 @@ Be sure to select the appropriate cuDF package depending
 on the major version of CUDA available in your environment:
 
 ```bash
+# CUDA 13
+pip install cudf-cu13
+
+# CUDA 12
 pip install cudf-cu12
 ```
 
@@ -73,7 +77,11 @@ pip install cudf-cu12
 cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge/miniforge)) from the `rapidsai` channel:
 
 ```bash
-conda install -c rapidsai -c conda-forge cudf=25.10
+# CUDA 13
+conda install -c rapidsai -c conda-forge cudf=25.10 cuda-version=13.0
+
+# CUDA 12
+conda install -c rapidsai -c conda-forge cudf=25.10 cuda-version=12.9
 ```
 
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
new file mode 100644
index 00000000000..bd29b08c758
--- /dev/null
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -0,0 +1,103 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+dependencies:
+- aiobotocore>=2.2.0
+- boto3>=1.21.21
+- botocore>=1.24.21
+- breathe>=4.35.0
+- c-compiler
+- cachetools
+- certifi
+- clang-tools==20.1.4
+- clang==20.1.4
+- cmake>=3.30.4
+- cramjam
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvrtc-dev
+- cuda-nvtx-dev
+- cuda-nvvm-tools
+- cuda-python>=13.0.1,<14.0a0
+- cuda-sanitizer-api
+- cuda-version=13.0
+- cupy>=13.6.0
+- cxx-compiler
+- cython>=3.0.3
+- dask-cuda==25.10.*,>=0.0.0a0
+- dlpack>=0.8,<1.0
+- doxygen=1.9.1
+- fastavro>=0.22.9
+- flatbuffers==24.3.25
+- fsspec>=0.6.0
+- gcc_linux-aarch64=14.*
+- hypothesis>=6.131.7
+- identify>=2.5.20
+- include-what-you-use==0.24.0
+- ipython
+- jupyter_client
+- libcurand-dev
+- libkvikio==25.10.*,>=0.0.0a0
+- libnvcomp-dev==5.0.0.6
+- libnvjitlink-dev
+- librdkafka>=2.8.0,<2.9.0a0
+- librmm==25.10.*,>=0.0.0a0
+- make
+- mmh3
+- moto>=4.0.8
+- msgpack-python
+- myst-nb
+- nanoarrow
+- nbconvert
+- nbformat
+- nbsphinx
+- ninja
+- notebook
+- numba-cuda>=0.19.1,<0.20.0a0
+- numba>=0.60.0,<0.62.0a0
+- numpy>=1.23,<3.0a0
+- numpydoc
+- nvidia-ml-py
+- nvtx>=0.2.1
+- openpyxl
+- packaging
+- pandas
+- pandas>=2.0,<2.4.0dev0
+- pandoc
+- polars>=1.28,<1.33
+- pre-commit
+- pyarrow>=14.0.0,<20.0.0a0
+- pydata-sphinx-theme>=0.15.4
+- pynvml>=12.0.0,<13.0.0a0
+- pytest
+- pytest-benchmark
+- pytest-cases>=3.8.2
+- pytest-cov
+- pytest-rerunfailures
+- pytest-xdist
+- python-confluent-kafka>=2.8.0,<2.9.0a0
+- python-xxhash
+- python>=3.10,<3.14
+- rapids-build-backend>=0.4.0,<0.5.0.dev0
+- rapids-dask-dependency==25.10.*,>=0.0.0a0
+- rapids-logger==0.1.*,>=0.0.0a0
+- rich
+- rmm==25.10.*,>=0.0.0a0
+- s3fs>=2022.3.0
+- scikit-build-core>=0.10.0
+- scipy
+- sphinx-autobuild
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx-remove-toctrees
+- sphinx>=8.1.0
+- sphinxcontrib-websupport
+- streamz
+- sysroot_linux-aarch64==2.28
+- typing_extensions>=4.0.0
+- zlib>=1.2.13
+- zstandard
+name: all_cuda-130_arch-aarch64
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
new file mode 100644
index 00000000000..4f06590e585
--- /dev/null
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -0,0 +1,104 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+dependencies:
+- aiobotocore>=2.2.0
+- boto3>=1.21.21
+- botocore>=1.24.21
+- breathe>=4.35.0
+- c-compiler
+- cachetools
+- certifi
+- clang-tools==20.1.4
+- clang==20.1.4
+- cmake>=3.30.4
+- cramjam
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvrtc-dev
+- cuda-nvtx-dev
+- cuda-nvvm-tools
+- cuda-python>=13.0.1,<14.0a0
+- cuda-sanitizer-api
+- cuda-version=13.0
+- cupy>=13.6.0
+- cxx-compiler
+- cython>=3.0.3
+- dask-cuda==25.10.*,>=0.0.0a0
+- dlpack>=0.8,<1.0
+- doxygen=1.9.1
+- fastavro>=0.22.9
+- flatbuffers==24.3.25
+- fsspec>=0.6.0
+- gcc_linux-64=14.*
+- hypothesis>=6.131.7
+- identify>=2.5.20
+- include-what-you-use==0.24.0
+- ipython
+- jupyter_client
+- libcufile-dev
+- libcurand-dev
+- libkvikio==25.10.*,>=0.0.0a0
+- libnvcomp-dev==5.0.0.6
+- libnvjitlink-dev
+- librdkafka>=2.8.0,<2.9.0a0
+- librmm==25.10.*,>=0.0.0a0
+- make
+- mmh3
+- moto>=4.0.8
+- msgpack-python
+- myst-nb
+- nanoarrow
+- nbconvert
+- nbformat
+- nbsphinx
+- ninja
+- notebook
+- numba-cuda>=0.19.1,<0.20.0a0
+- numba>=0.60.0,<0.62.0a0
+- numpy>=1.23,<3.0a0
+- numpydoc
+- nvidia-ml-py
+- nvtx>=0.2.1
+- openpyxl
+- packaging
+- pandas
+- pandas>=2.0,<2.4.0dev0
+- pandoc
+- polars>=1.28,<1.33
+- pre-commit
+- pyarrow>=14.0.0,<20.0.0a0
+- pydata-sphinx-theme>=0.15.4
+- pynvml>=12.0.0,<13.0.0a0
+- pytest
+- pytest-benchmark
+- pytest-cases>=3.8.2
+- pytest-cov
+- pytest-rerunfailures
+- pytest-xdist
+- python-confluent-kafka>=2.8.0,<2.9.0a0
+- python-xxhash
+- python>=3.10,<3.14
+- rapids-build-backend>=0.4.0,<0.5.0.dev0
+- rapids-dask-dependency==25.10.*,>=0.0.0a0
+- rapids-logger==0.1.*,>=0.0.0a0
+- rich
+- rmm==25.10.*,>=0.0.0a0
+- s3fs>=2022.3.0
+- scikit-build-core>=0.10.0
+- scipy
+- sphinx-autobuild
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinx-remove-toctrees
+- sphinx>=8.1.0
+- sphinxcontrib-websupport
+- streamz
+- sysroot_linux-64==2.28
+- typing_extensions>=4.0.0
+- zlib>=1.2.13
+- zstandard
+name: all_cuda-130_arch-x86_64
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index cf85a1c8117..05b8f6f7b89 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -71,6 +71,10 @@ requirements:
     - pandas >=2.0,<2.4.0dev0
     - cupy >=13.6.0
     - numba-cuda >=0.19.1,<0.20.0a0
+    # TODO: remove cuda-nvvm-tools once https://github.com/NVIDIA/numba-cuda/issues/430 is resolved
+    #       and we move the numba-cuda floor up to a version containing a fix for it
+    - if: cuda_major == "13"
+      then: cuda-nvvm-tools
     - numba >=0.60.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<20.0.0a0
@@ -79,7 +83,9 @@ requirements:
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
     - cuda-cudart
-    - cuda-python >=12.9.2,<13.0a0
+    - if: cuda_major == "12"
+      then: cuda-python >=12.9.2,<13.0a0
+      else: cuda-python >=13.0.1,<14.0a0
     - if: linux and x86_64
       then:
         - libcufile
diff --git a/conda/recipes/pylibcudf/recipe.yaml b/conda/recipes/pylibcudf/recipe.yaml
index 8bc859fd633..4273baf5fd3 100644
--- a/conda/recipes/pylibcudf/recipe.yaml
+++ b/conda/recipes/pylibcudf/recipe.yaml
@@ -70,7 +70,9 @@ requirements:
     - libcudf =${{ version }}
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
     - fsspec >=0.6.0
-    - cuda-python >=12.9.2,<13.0a0
+    - if: cuda_major == "12"
+      then: cuda-python >=12.9.2,<13.0a0
+      else: cuda-python >=13.0.1,<14.0a0
     - nvtx >=0.2.1
     - packaging
   run_constraints:
diff --git a/dependencies.yaml b/dependencies.yaml
index 574940c8121..fe6d8dcf2bb 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["12.9"]
+      cuda: ["12.9", "13.0"]
       arch: [x86_64, aarch64]
     includes:
       - build_base
@@ -545,6 +545,10 @@ dependencies:
               cuda: "12.9"
             packages:
               - cuda-version=12.9
+          - matrix:
+              cuda: "13.0"
+            packages:
+              - cuda-version=13.0
   cuda:
     common:
       - output_types: [conda]
@@ -658,13 +662,14 @@ dependencies:
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
-          - matrix: &run_pylibcudf_packages_all_cu12
+          - matrix:
               cuda: "12.*"
             packages:
-              - &cuda_python_cu12 cuda-python>=12.9.2,<13.0a0
+              - cuda-python>=12.9.2,<13.0a0
+          # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided
           - matrix:
             packages:
-              - *cuda_python_cu12
+              - cuda-python>=13.0.1,<14.0a0
   run_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -683,10 +688,14 @@ dependencies:
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.*"}
-            packages: &run_cudf_packages_all_cu12
+          - matrix:
+              cuda: "12.*"
+            packages:
               - cuda-python>=12.9.2,<13.0a0
-          - {matrix: null, packages: *run_cudf_packages_all_cu12}
+          # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided
+          - matrix:
+            packages:
+              - cuda-python>=13.0.1,<14.0a0
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:
@@ -695,6 +704,12 @@ dependencies:
             packages:
               - nvidia-cuda-nvcc-cu12
               - nvidia-cuda-nvrtc-cu12
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - nvidia-cuda-nvcc-cu13
+              - nvidia-cuda-nvrtc-cu13
           - {matrix: null, packages: []}
   run_cudf_polars:
     common:
@@ -789,9 +804,15 @@ dependencies:
         # Using --constraints for pip install, so we list cupy multiple times
         matrices:
           - matrix:
+              cuda: "12.*"
               dependencies: "oldest"
             packages:
               - cupy-cuda12x==13.6.0
+          - matrix:
+              cuda: "13.*"
+              dependencies: "oldest"
+            packages:
+              - cupy-cuda13x==13.6.0
           - matrix:
             packages:
   test_python_dask_cudf:
@@ -863,6 +884,7 @@ dependencies:
               cuda: "12.*"
             packages:
               - pytorch>=2.4.0
+          # TODO: add a 13.x entry here when pytorch has CUDA 13 packages (https://github.com/pytorch/pytorch/issues/159779)
           - matrix:
             packages:
   test_python_cudf_polars:
@@ -896,6 +918,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - libcudf-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - libcudf-cu13==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*libcudf_unsuffixed]}
   depends_on_numba_cuda:
     common:
@@ -903,15 +930,26 @@ dependencies:
         packages:
         - numba-cuda>=0.19.1,<0.20.0a0
     specific:
+      # TODO: remove cuda-nvvm-tools once https://github.com/NVIDIA/numba-cuda/issues/430 is resolved
+      #       and we move the numba-cuda floor up to a version containing a fix for it
+      - output_types: [conda]
+        matrices:
+          - matrix:
+              cuda: "13.*"
+            packages:
+              - cuda-nvvm-tools
+          - matrix:
+            packages:
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:
               cuda: "12.*"
             packages:
-              - &numba_cuda_cu12 numba-cuda[cu12]>=0.19.1,<0.20.0a0
+              - numba-cuda[cu12]>=0.19.1,<0.20.0a0
+          # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided
           - matrix:
             packages:
-              - *numba_cuda_cu12
+              - numba-cuda[cu13]>=0.19.1,<0.20.0a0
   depends_on_pylibcudf:
     common:
       - output_types: conda
@@ -930,6 +968,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - pylibcudf-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - pylibcudf-cu13==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*pylibcudf_unsuffixed]}
   depends_on_pylibcudf_pyarrow:
     common:
@@ -949,6 +992,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - pylibcudf-cu12[pyarrow]==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - pylibcudf-cu13[pyarrow]==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*plc_unsuffixed]}
   depends_on_cudf:
     common:
@@ -968,6 +1016,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - cudf-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - cudf-cu13==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*cudf_unsuffixed]}
   depends_on_cudf_kafka:
     common:
@@ -987,20 +1040,31 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - cudf_kafka-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - cudf_kafka-cu13==25.10.*,>=0.0.0a0
           - {matrix: null, packages: [*cudf_kafka_unsuffixed]}
   depends_on_cupy:
     common:
       - output_types: conda
         packages:
           - cupy>=13.6.0
+    # NOTE: This is intentionally not broken into groups by a 'cuda_suffixed' selector like
+    #       other packages with -cu{nn}x suffixes in this file.
+    #       All RAPIDS wheel builds (including in devcontainers) expect cupy to be suffixed.
     specific:
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:
               cuda: "12.*"
-            packages: &cupy_packages_cu12
+            packages:
               - cupy-cuda12x>=13.6.0
-          - {matrix: null, packages: *cupy_packages_cu12}
+          # fallback to CUDA 13 versions if 'cuda' is '13.*' or not provided
+          - matrix:
+            packages:
+              - cupy-cuda13x>=13.6.0
   depends_on_libkvikio:
     common:
       - output_types: conda
@@ -1018,6 +1082,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - libkvikio-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - libkvikio-cu13==25.10.*,>=0.0.0a0
           - matrix:
             packages:
               - *libkvikio_unsuffixed
@@ -1039,6 +1108,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - librmm-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - librmm-cu13==25.10.*,>=0.0.0a0
           - matrix:
             packages:
               - *librmm_unsuffixed
@@ -1060,6 +1134,11 @@ dependencies:
               cuda_suffixed: "true"
             packages:
               - rmm-cu12==25.10.*,>=0.0.0a0
+          - matrix:
+              cuda: "13.*"
+              cuda_suffixed: "true"
+            packages:
+              - rmm-cu13==25.10.*,>=0.0.0a0
           - matrix:
             packages:
               - *rmm_unsuffixed
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
index 98b4a31a391..6e01e034cf4 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -161,6 +161,10 @@ dependencies:
               cuda: "12.9"
             packages:
               - cuda-version=12.9
+          - matrix:
+              cuda: "13.0"
+            packages:
+              - cuda-version=13.0
   py_version:
     specific:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 039e81742f8..0c6c84bdca1 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -19,11 +19,11 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cachetools",
-    "cuda-python>=12.9.2,<13.0a0",
-    "cupy-cuda12x>=13.6.0",
+    "cuda-python>=13.0.1,<14.0a0",
+    "cupy-cuda13x>=13.6.0",
     "fsspec>=0.6.0",
     "libcudf==25.10.*,>=0.0.0a0",
-    "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
+    "numba-cuda[cu13]>=0.19.1,<0.20.0a0",
     "numba>=0.60.0,<0.62.0a0",
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
@@ -125,7 +125,7 @@ requires = [
     "libcudf==25.10.*,>=0.0.0a0",
     "librmm==25.10.*,>=0.0.0a0",
     "ninja",
-    "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
+    "numba-cuda[cu13]>=0.19.1,<0.20.0a0",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "rmm==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 55f319bc268..3b5f7a1d2e7 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -20,7 +20,7 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cudf==25.10.*,>=0.0.0a0",
-    "cupy-cuda12x>=13.6.0",
+    "cupy-cuda13x>=13.6.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
     "pandas>=2.0,<2.4.0dev0",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index fb49c45a4a2..a76bbc36bae 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=12.9.2,<13.0a0",
+    "cuda-python>=13.0.1,<14.0a0",
     "libcudf==25.10.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
@@ -39,12 +39,12 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cupy-cuda12x>=13.6.0",
+    "cupy-cuda13x>=13.6.0",
     "fastavro>=0.22.9",
     "hypothesis>=6.131.7",
     "mmh3",
     "nanoarrow",
-    "numba-cuda[cu12]>=0.19.1,<0.20.0a0",
+    "numba-cuda[cu13]>=0.19.1,<0.20.0a0",
     "numba>=0.60.0,<0.62.0a0",
     "pandas",
     "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",

From 47e9f2a6c1a436b2852bd83b48269c389f3ba490 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 28 Aug 2025 13:29:33 -0700
Subject: [PATCH 228/366] Construct cuDF classic Decimal32/64Columns from RMM
 buffers (#19834)

Precursor of https://github.com/rapidsai/cudf/issues/18726 towards consistently constructing a cuDF column with a pylibcudf Column

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19834
---
 python/cudf/cudf/core/column/column.py   | 12 ----
 python/cudf/cudf/core/column/decimal.py  | 83 ++++++++++++++----------
 python/cudf/cudf/core/column/interval.py | 22 +------
 3 files changed, 51 insertions(+), 66 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index c462278d2d4..162c62fe2f5 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -122,18 +122,6 @@ def _can_values_be_equal(left: DtypeObj, right: DtypeObj) -> bool:
     return False
 
 
-def pa_mask_buffer_to_mask(mask_buf: pa.Buffer, size: int) -> Buffer:
-    """
-    Convert PyArrow mask buffer to cuDF mask buffer
-    """
-    mask_size = plc.null_mask.bitmask_allocation_size_bytes(size)
-    if mask_buf.size < mask_size:
-        dbuf = rmm.DeviceBuffer(size=mask_size)
-        dbuf.copy_from_host(np.asarray(mask_buf).view("u1"))
-        return as_buffer(dbuf)
-    return as_buffer(mask_buf)
-
-
 class ColumnBase(Serializable, BinaryOperand, Reducible):
     """
     A ColumnBase stores columnar data in device memory.
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index af1419fceb7..f036cd6c084 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -4,20 +4,20 @@
 
 import warnings
 from decimal import Decimal
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
-import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 
 import pylibcudf as plc
+import rmm
 
 import cudf
 from cudf.api.types import is_scalar
 from cudf.core._internals import binaryop
-from cudf.core.buffer import acquire_spill_lock, as_buffer
-from cudf.core.column.column import ColumnBase, pa_mask_buffer_to_mask
+from cudf.core.buffer import acquire_spill_lock
+from cudf.core.column.column import ColumnBase
 from cudf.core.column.numerical_base import NumericalBaseColumn
 from cudf.core.dtypes import (
     Decimal32Dtype,
@@ -96,6 +96,47 @@ def __cuda_array_interface__(self):
             "Decimals are not yet supported via `__cuda_array_interface__`"
         )
 
+    @classmethod
+    def _from_32_64_arrow(
+        cls,
+        data: pa.Array,
+        *,
+        view_type: Literal["int32", "int64"],
+        plc_type: plc.TypeId,
+        step: int,
+    ) -> Self:
+        # Can remove when pyarrow 19 is the minimum version
+        mask_buf, data_buf = data.buffers()
+        rmm_data_buffer = rmm.DeviceBuffer.to_device(
+            np.frombuffer(data_buf)
+            .view(view_type)[::step]
+            .copy()
+            .view("uint8")
+        )
+        plc_column = plc.Column.from_rmm_buffer(
+            rmm_data_buffer,
+            plc.DataType(plc_type, -data.type.scale),
+            len(data),
+            [],
+        )
+        if mask_buf is not None:
+            mask_size = plc.null_mask.bitmask_allocation_size_bytes(len(data))
+            if mask_buf.size < mask_size:
+                rmm_mask_buffer = rmm.DeviceBuffer(size=mask_size)
+                rmm_mask_buffer.copy_from_host(
+                    np.asarray(mask_buf).view("uint8")
+                )
+            else:
+                rmm_mask_buffer = rmm.DeviceBuffer.to_device(
+                    np.frombuffer(mask_buf).view("uint8")
+                )
+            plc_column = plc_column.with_mask(
+                plc.gpumemoryview(rmm_mask_buffer), data.null_count
+            )
+        column = cls.from_pylibcudf(plc_column)
+        column.dtype.precision = data.type.precision
+        return column
+
     def element_indexing(self, index: int):
         result = super().element_indexing(index)
         if isinstance(result, pa.Scalar):
@@ -323,21 +364,8 @@ def __init__(
 
     @classmethod
     def from_arrow(cls, data: pa.Array) -> Self:
-        dtype = Decimal32Dtype.from_arrow(data.type)
-        mask_buf = data.buffers()[0]
-        mask = (
-            mask_buf
-            if mask_buf is None
-            else pa_mask_buffer_to_mask(mask_buf, len(data))
-        )
-        data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int32"))
-        data_32 = data_128[::4].copy()
-        return cls(
-            data=as_buffer(data_32.view("uint8")),
-            size=len(data),
-            dtype=dtype,
-            offset=data.offset,
-            mask=mask,
+        return cls._from_32_64_arrow(
+            data, view_type="int32", plc_type=plc.TypeId.DECIMAL32, step=4
         )
 
     def to_arrow(self) -> pa.Array:
@@ -460,21 +488,8 @@ def __init__(
 
     @classmethod
     def from_arrow(cls, data: pa.Array) -> Self:
-        dtype = Decimal64Dtype.from_arrow(data.type)
-        mask_buf = data.buffers()[0]
-        mask = (
-            mask_buf
-            if mask_buf is None
-            else pa_mask_buffer_to_mask(mask_buf, len(data))
-        )
-        data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int64"))
-        data_64 = data_128[::2].copy()
-        return cls(
-            data=as_buffer(data_64.view("uint8")),
-            size=len(data),
-            dtype=dtype,
-            offset=data.offset,
-            mask=mask,
+        return cls._from_32_64_arrow(
+            data, view_type="int64", plc_type=plc.TypeId.DECIMAL64, step=2
         )
 
     def to_arrow(self) -> pa.Array:
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index d592afa3dda..1aab299e7ad 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -8,7 +8,7 @@
 import pyarrow as pa
 
 import cudf
-from cudf.core.column.column import as_column, pa_mask_buffer_to_mask
+from cudf.core.column.column import as_column
 from cudf.core.column.struct import StructColumn
 from cudf.core.dtypes import IntervalDtype
 from cudf.utils.dtypes import is_dtype_obj_interval
@@ -60,25 +60,7 @@ def _validate_dtype_instance(dtype: IntervalDtype) -> IntervalDtype:
     @classmethod
     def from_arrow(cls, data: pa.Array) -> Self:
         new_col = super().from_arrow(data.storage)
-        size = len(data)
-        dtype = IntervalDtype.from_arrow(data.type)
-        mask = data.buffers()[0]
-        if mask is not None:
-            mask = pa_mask_buffer_to_mask(mask, len(data))
-
-        offset = data.offset
-        null_count = data.null_count
-        children = new_col.children
-
-        return cls(
-            data=None,
-            size=size,
-            dtype=dtype,
-            mask=mask,
-            offset=offset,
-            null_count=null_count,
-            children=children,  # type: ignore[arg-type]
-        )
+        return new_col._with_type_metadata(IntervalDtype.from_arrow(data.type))  # type: ignore[return-value]
 
     def to_arrow(self) -> pa.Array:
         typ = self.dtype.to_arrow()

From 2bfd896b4e0c1f0b66402c1e067b4904dbd15c5e Mon Sep 17 00:00:00 2001
From: Tanmay Gujar <tgujar@nvidia.com>
Date: Thu, 28 Aug 2025 13:43:27 -0700
Subject: [PATCH 229/366] Add multi-column support for primitive row operator
 dispatch (#18940)

This PR extends the primitive row operator to support multi-column tables, addressing the current limitation to single-column input. The primary goal is to improve performance in operations that rely on row-wise comparisons by enabling more efficient handling of multi-column data.

Authors:
  - Tanmay Gujar (https://github.com/tgujar)
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/18940
---
 .../cudf/table/primitive_row_operators.cuh    |  61 +++++--
 cpp/src/table/primitive_row_operators.cu      |   5 +-
 cpp/tests/join/distinct_join_tests.cpp        | 164 ++++++++++++++++++
 3 files changed, 210 insertions(+), 20 deletions(-)

diff --git a/cpp/include/cudf/table/primitive_row_operators.cuh b/cpp/include/cudf/table/primitive_row_operators.cuh
index 3016422938e..cd96d5768a5 100644
--- a/cpp/include/cudf/table/primitive_row_operators.cuh
+++ b/cpp/include/cudf/table/primitive_row_operators.cuh
@@ -21,13 +21,16 @@
 #include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
+#include <cudf/table/table_view.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
+#include <thrust/equal.h>
+
+#include <memory>
 
 namespace CUDF_EXPORT cudf {
 
@@ -126,21 +129,26 @@ class row_equality_comparator {
    */
   __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const
   {
-    if (_has_nulls) {
-      bool const lhs_is_null{_lhs.column(0).is_null(lhs_row_index)};
-      bool const rhs_is_null{_rhs.column(0).is_null(rhs_row_index)};
-      if (lhs_is_null and rhs_is_null) {
-        return _nulls_are_equal == null_equality::EQUAL;
-      } else if (lhs_is_null != rhs_is_null) {
-        return false;
+    auto equal_elements = [this, lhs_row_index, rhs_row_index](column_device_view const& l,
+                                                               column_device_view const& r) {
+      // Handle null comparison for each element
+      if (_has_nulls) {
+        bool const lhs_is_null{l.is_null(lhs_row_index)};
+        bool const rhs_is_null{r.is_null(rhs_row_index)};
+        if (lhs_is_null and rhs_is_null) {
+          return _nulls_are_equal == null_equality::EQUAL;
+        } else if (lhs_is_null != rhs_is_null) {
+          return false;
+        }
       }
-    }
-    return cudf::type_dispatcher<dispatch_primitive_type>(_lhs.begin()->type(),
-                                                          element_equality_comparator{},
-                                                          _lhs.column(0),
-                                                          _rhs.column(0),
-                                                          lhs_row_index,
-                                                          rhs_row_index);
+
+      // Both elements are non-null, compare their values
+      element_equality_comparator comparator;
+      return cudf::type_dispatcher<dispatch_primitive_type>(
+        l.type(), comparator, l, r, lhs_row_index, rhs_row_index);
+    };
+
+    return thrust::equal(thrust::seq, _lhs.begin(), _lhs.end(), _rhs.begin(), equal_elements);
   }
 
   /**
@@ -243,11 +251,26 @@ class row_hasher {
    */
   __device__ auto operator()(size_type row_index) const
   {
-    if (_has_nulls && _table.column(0).is_null(row_index)) {
-      return cuda::std::numeric_limits<hash_value_type>::max();
+    element_hasher<Hash> hasher;
+    // avoid hash combine call if there is only one column
+    auto hash = cuda::std::numeric_limits<hash_value_type>::max();
+    if (!_has_nulls || !_table.column(0).is_null(row_index)) {
+      hash = cudf::type_dispatcher<dispatch_primitive_type>(
+        _table.column(0).type(), hasher, _seed, _table.column(0), row_index);
+    }
+
+    for (size_type i = 1; i < _table.num_columns(); ++i) {
+      if (!(_has_nulls && _table.column(i).is_null(row_index))) {
+        hash = cudf::hashing::detail::hash_combine(
+          hash,
+          cudf::type_dispatcher<dispatch_primitive_type>(
+            _table.column(i).type(), hasher, _seed, _table.column(i), row_index));
+      } else {
+        hash = cudf::hashing::detail::hash_combine(
+          hash, cuda::std::numeric_limits<hash_value_type>::max());
+      }
     }
-    return cudf::type_dispatcher<dispatch_primitive_type>(
-      _table.column(0).type(), element_hasher<Hash>{}, _seed, _table.column(0), row_index);
+    return hash;
   }
 
  private:
diff --git a/cpp/src/table/primitive_row_operators.cu b/cpp/src/table/primitive_row_operators.cu
index e95f9855790..d879b6d50f7 100644
--- a/cpp/src/table/primitive_row_operators.cu
+++ b/cpp/src/table/primitive_row_operators.cu
@@ -18,9 +18,12 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/traits.hpp>
 
+#include <algorithm>
+
 namespace cudf {
 bool is_primitive_row_op_compatible(cudf::table_view const& table)
 {
-  return table.num_columns() == 1 and cudf::is_numeric(table.column(0).type());
+  return std::all_of(
+    table.begin(), table.end(), [](auto const& col) { return cudf::is_numeric(col.type()); });
 }
 }  // namespace cudf
diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp
index ce27a3fbc0a..34067ac9b6c 100644
--- a/cpp/tests/join/distinct_join_tests.cpp
+++ b/cpp/tests/join/distinct_join_tests.cpp
@@ -143,6 +143,48 @@ TEST_F(DistinctJoinTest, InnerJoinNoNulls)
   this->compare_to_reference(build.view(), probe.view(), result, gold.view());
 }
 
+TEST_F(DistinctJoinTest, PrimitiveInnerJoinNoNulls)
+{
+  column_wrapper<int32_t> col0_0{{1, 2, 3, 4, 5}};
+  column_wrapper<int32_t> col0_1({0, 0, 3, 4, 5});
+  column_wrapper<int32_t> col0_2({9, 9, 9, 9, 9});
+
+  column_wrapper<int32_t> col1_0{{1, 2, 3, 4, 9}};
+  column_wrapper<int32_t> col1_1({0, 0, 0, 4, 4});
+  column_wrapper<int32_t> col1_2({9, 9, 9, 0, 9});
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table build(std::move(cols0));
+  Table probe(std::move(cols1));
+
+  auto distinct_join = cudf::distinct_hash_join{build.view()};
+  auto result        = distinct_join.inner_join(probe.view());
+
+  column_wrapper<int32_t> col_gold_0{{1, 2}};
+  column_wrapper<int32_t> col_gold_1({0, 0});
+  column_wrapper<int32_t> col_gold_2{{9, 9}};
+  column_wrapper<int32_t> col_gold_3{{1, 2}};
+  column_wrapper<int32_t> col_gold_4({0, 0});
+  column_wrapper<int32_t> col_gold_5{{9, 9}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  Table gold(std::move(cols_gold));
+
+  this->compare_to_reference(build.view(), probe.view(), result, gold.view());
+}
+
 TEST_F(DistinctJoinTest, InnerJoinWithNulls)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
@@ -192,6 +234,55 @@ TEST_F(DistinctJoinTest, InnerJoinWithNulls)
   }
 }
 
+TEST_F(DistinctJoinTest, PrimitiveInnerJoinWithNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  column_wrapper<int32_t> col0_1({1, 1, 0, 4, 0}, {true, true, false, true, true});
+  column_wrapper<int32_t> col0_2{{1, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{1, 2, 0, 2, 3}};
+  column_wrapper<int32_t> col1_1({1, 0, 1, 0, 1});
+  column_wrapper<int32_t> col1_2{{1, 1, 1, 1, 1}, {false, true, true, false, true}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table build(std::move(cols0));
+  Table probe(std::move(cols1));
+
+  // Create gold table once
+  column_wrapper<int32_t> col_gold_0{{3, 2}};
+  column_wrapper<int32_t> col_gold_1({1, 0}, {true, true});
+  column_wrapper<int32_t> col_gold_2{{1, 1}};
+  column_wrapper<int32_t> col_gold_3{{3, 2}};
+  column_wrapper<int32_t> col_gold_4({1, 0}, {true, true});
+  column_wrapper<int32_t> col_gold_5{{1, 1}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  Table gold(std::move(cols_gold));
+
+  // Test with different load factors
+  std::vector<double> load_factors = {0.5, 1.0};
+
+  for (auto load_factor : load_factors) {
+    auto distinct_join =
+      cudf::distinct_hash_join{build.view(), cudf::null_equality::EQUAL, load_factor};
+    auto result = distinct_join.inner_join(probe.view());
+
+    this->compare_to_reference(build.view(), probe.view(), result, gold.view());
+  }
+}
+
 TEST_F(DistinctJoinTest, InnerJoinWithStructsAndNulls)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
@@ -408,6 +499,42 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls)
     build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
 }
 
+TEST_F(DistinctJoinTest, PrimitiveLeftJoinNoNulls)
+{
+  column_wrapper<int32_t> col0_0({3, 1, 2, 0, 3});
+  column_wrapper<int32_t> col0_1({0, 1, 2, 4, 1});
+
+  column_wrapper<int32_t> col1_0({2, 2, 0, 4, 3});
+  column_wrapper<int32_t> col1_1({1, 0, 1, 2, 1});
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table probe(std::move(cols0));
+  Table build(std::move(cols1));
+
+  column_wrapper<int32_t> col_gold_0({3, 1, 2, 0, 3});
+  column_wrapper<int32_t> col_gold_1({0, 1, 2, 4, 1});
+  column_wrapper<int32_t> col_gold_2{{-1, -1, -1, -1, 3}, {false, false, false, false, true}};
+  column_wrapper<int32_t> col_gold_3{{-1, -1, -1, -1, 1}, {false, false, false, false, true}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  Table gold(std::move(cols_gold));
+
+  auto distinct_join = cudf::distinct_hash_join{build.view()};
+  auto result        = distinct_join.left_join(probe.view());
+  auto gather_map    = std::pair{get_left_indices(result->size()), std::move(result)};
+
+  this->compare_to_reference(
+    build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
+}
+
 TEST_F(DistinctJoinTest, LeftJoinWithNulls)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
@@ -445,6 +572,43 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls)
     build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
 }
 
+TEST_F(DistinctJoinTest, PrimitiveLeftJoinWithNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  column_wrapper<int32_t> col0_1({1, 1, -1, 4, 0}, {true, true, false, true, true});
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col1_1({1, 0, 1, 2, 1});
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table probe(std::move(cols0));
+  Table build(std::move(cols1));
+
+  auto distinct_join = cudf::distinct_hash_join{build.view()};
+  auto result        = distinct_join.left_join(probe.view());
+  auto gather_map    = std::pair{get_left_indices(result->size()), std::move(result)};
+
+  column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}};
+  column_wrapper<int32_t> col_gold_1({1, 1, -1, 4, 0}, {true, true, false, true, true});
+  column_wrapper<int32_t> col_gold_2{{3, -1, -1, -1, 2}, {true, false, false, false, true}};
+  column_wrapper<int32_t> col_gold_3({1, -1, -1, -1, 0}, {true, false, false, false, true});
+
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  Table gold(std::move(cols_gold));
+
+  this->compare_to_reference(
+    build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
+}
+
 TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls)
 {
   auto col0_names_col = strcol_wrapper{

From edbedc1ff3e5cf06b08cb05ccb19d24ad24b0e24 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 29 Aug 2025 08:33:10 -0500
Subject: [PATCH 230/366] revert numba CUDA 13 workaround (#19842)

CUDA 13 support was initially added here in #19768

During that work, we faced some runtime issues with conda packages that @brandon-b-miller diagnosed as a missing dependency in `numba-cuda` (https://github.com/NVIDIA/numba-cuda/issues/430).

To get past that, we temporarily introduced a runtime dependency on `cuda-nvvm-tools` in this project. That's no longer necessary, thanks to these:

* https://github.com/conda-forge/numba-cuda-feedstock/pull/47
* https://github.com/conda-forge/numba-cuda-feedstock/pull/46

This removes that workaround.

## Notes for Reviewers

### Don't we need to change the `numba-cuda` pin?

No, the fixes are just in new builds of 0.19.1.

#

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - https://github.com/brandon-b-miller
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cudf/pull/19842
---
 conda/environments/all_cuda-130_arch-aarch64.yaml |  1 -
 conda/environments/all_cuda-130_arch-x86_64.yaml  |  1 -
 conda/recipes/cudf/recipe.yaml                    |  4 ----
 dependencies.yaml                                 | 10 ----------
 4 files changed, 16 deletions(-)

diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index bd29b08c758..e18d021ee4e 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -20,7 +20,6 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-nvvm-tools
 - cuda-python>=13.0.1,<14.0a0
 - cuda-sanitizer-api
 - cuda-version=13.0
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 4f06590e585..359ed939f77 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -20,7 +20,6 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-nvvm-tools
 - cuda-python>=13.0.1,<14.0a0
 - cuda-sanitizer-api
 - cuda-version=13.0
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 05b8f6f7b89..2389ed684c1 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -71,10 +71,6 @@ requirements:
     - pandas >=2.0,<2.4.0dev0
     - cupy >=13.6.0
     - numba-cuda >=0.19.1,<0.20.0a0
-    # TODO: remove cuda-nvvm-tools once https://github.com/NVIDIA/numba-cuda/issues/430 is resolved
-    #       and we move the numba-cuda floor up to a version containing a fix for it
-    - if: cuda_major == "13"
-      then: cuda-nvvm-tools
     - numba >=0.60.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<20.0.0a0
diff --git a/dependencies.yaml b/dependencies.yaml
index fe6d8dcf2bb..3808369ace3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -930,16 +930,6 @@ dependencies:
         packages:
         - numba-cuda>=0.19.1,<0.20.0a0
     specific:
-      # TODO: remove cuda-nvvm-tools once https://github.com/NVIDIA/numba-cuda/issues/430 is resolved
-      #       and we move the numba-cuda floor up to a version containing a fix for it
-      - output_types: [conda]
-        matrices:
-          - matrix:
-              cuda: "13.*"
-            packages:
-              - cuda-nvvm-tools
-          - matrix:
-            packages:
       - output_types: [requirements, pyproject]
         matrices:
           - matrix:

From f0bdf9acace00a841ce9421417312a05a311e724 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Fri, 29 Aug 2025 09:21:39 -0700
Subject: [PATCH 231/366] Add join `*_match_context` APIs to hash join (#19835)

Related to #18677

This PR moves the strong types `join_match_context` and `join_partition_context` out of the `sort_merge_join` class into the public interface, allowing them to be reused in `hash_join`. It also introduces `*_join_match_context` APIs to `hash_join`, enabling per-row match data retrieval to better distribute cuDF workloads for downstream libraries like Spark.

Follow-up work, such as adding partition context retrieval, will be handled in a separate PR to keep the review process simpler.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19835
---
 cpp/include/cudf/detail/join/hash_join.cuh |  30 ++
 cpp/include/cudf/join/hash_join.hpp        | 115 +++++--
 cpp/include/cudf/join/join.hpp             |  31 ++
 cpp/include/cudf/join/sort_merge_join.hpp  |  51 +--
 cpp/src/join/hash_join.cu                  | 160 +++++++++-
 cpp/src/join/sort_merge_join.cu            |  23 +-
 cpp/tests/join/join_tests.cpp              | 349 ++++++++++++++++++++-
 7 files changed, 673 insertions(+), 86 deletions(-)

diff --git a/cpp/include/cudf/detail/join/hash_join.cuh b/cpp/include/cudf/detail/join/hash_join.cuh
index c71e1548d3d..4bff2d0b7ed 100644
--- a/cpp/include/cudf/detail/join/hash_join.cuh
+++ b/cpp/include/cudf/detail/join/hash_join.cuh
@@ -18,6 +18,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/detail/join/join.hpp>
 #include <cudf/hashing.hpp>
+#include <cudf/join/join.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -188,7 +189,36 @@ struct hash_join {
                              rmm::cuda_stream_view stream,
                              rmm::device_async_resource_ref mr) const;
 
+  /**
+   * @copydoc cudf::hash_join::inner_join_match_context
+   */
+  [[nodiscard]] cudf::join_match_context inner_join_match_context(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) const;
+
+  /**
+   * @copydoc cudf::hash_join::left_join_match_context
+   */
+  [[nodiscard]] cudf::join_match_context left_join_match_context(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) const;
+
+  /**
+   * @copydoc cudf::hash_join::full_join_match_context
+   */
+  [[nodiscard]] cudf::join_match_context full_join_match_context(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) const;
+
  private:
+  template <typename OutputIterator>
+  void compute_match_counts(cudf::table_view const& probe,
+                            OutputIterator output_iter,
+                            rmm::cuda_stream_view stream) const;
+
   /**
    * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`,
    * and returns the output indices of `build_table` and `probe_table` as a combined table,
diff --git a/cpp/include/cudf/join/hash_join.hpp b/cpp/include/cudf/join/hash_join.hpp
index 85af304f6f6..f00fbaf20eb 100644
--- a/cpp/include/cudf/join/hash_join.hpp
+++ b/cpp/include/cudf/join/hash_join.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/hashing.hpp>
+#include <cudf/join/join.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -89,7 +90,7 @@ class hash_join {
    * @note The `hash_join` object must not outlive the table viewed by `build`, else behavior is
    * undefined.
    *
-   * @throws cudf::logic_error if the build table has no columns
+   * @throws std::invalid_argument if the build table has no columns
    *
    * @param build The build table, from which the hash table is built
    * @param compare_nulls Controls whether null join-key values should match or not
@@ -120,15 +121,15 @@ class hash_join {
    * an inner join between two tables. @see cudf::inner_join(). Behavior is undefined if the
    * provided `output_size` is smaller than the actual output size.
    *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
    * @param probe The probe table, from which the tuples are probed
    * @param output_size Optional value which allows users to specify the exact output size
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
    *
-   * @throw cudf::logic_error If the input probe table has nulls while this hash_join object was not
-   * constructed with null check.
-   *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing an inner join between two tables with `build` and `probe`
    * as the join keys .
@@ -145,15 +146,15 @@ class hash_join {
    * a left join between two tables. @see cudf::left_join(). Behavior is undefined if the
    * provided `output_size` is smaller than the actual output size.
    *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
    * @param probe The probe table, from which the tuples are probed
    * @param output_size Optional value which allows users to specify the exact output size
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
    *
-   * @throw cudf::logic_error If the input probe table has nulls while this hash_join object was not
-   * constructed with null check.
-   *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing a left join between two tables with `build` and `probe`
    * as the join keys.
@@ -170,15 +171,15 @@ class hash_join {
    * a full join between two tables. @see cudf::full_join(). Behavior is undefined if the
    * provided `output_size` is smaller than the actual output size.
    *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
    * @param probe The probe table, from which the tuples are probed
    * @param output_size Optional value which allows users to specify the exact output size
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
    *
-   * @throw cudf::logic_error If the input probe table has nulls while this hash_join object was not
-   * constructed with null check.
-   *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing a full join between two tables with `build` and `probe`
    * as the join keys .
@@ -194,12 +195,12 @@ class hash_join {
    * Returns the exact number of matches (rows) when performing an inner join with the specified
    * probe table.
    *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
    * @param probe The probe table, from which the tuples are probed
    * @param stream CUDA stream used for device memory operations and kernel launches
    *
-   * @throw cudf::logic_error If the input probe table has nulls while this hash_join object was not
-   * constructed with null check.
-   *
    * @return The exact number of output when performing an inner join between two tables with
    * `build` and `probe` as the join keys .
    */
@@ -210,12 +211,12 @@ class hash_join {
    * Returns the exact number of matches (rows) when performing a left join with the specified probe
    * table.
    *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
    * @param probe The probe table, from which the tuples are probed
    * @param stream CUDA stream used for device memory operations and kernel launches
    *
-   * @throw cudf::logic_error If the input probe table has nulls while this hash_join object was not
-   * constructed with null check.
-   *
    * @return The exact number of output when performing a left join between two tables with `build`
    * and `probe` as the join keys .
    */
@@ -226,14 +227,14 @@ class hash_join {
    * Returns the exact number of matches (rows) when performing a full join with the specified probe
    * table.
    *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
    * @param probe The probe table, from which the tuples are probed
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the intermediate table and columns' device
    * memory.
    *
-   * @throw cudf::logic_error If the input probe table has nulls while this hash_join object was not
-   * constructed with null check.
-   *
    * @return The exact number of output when performing a full join between two tables with `build`
    * and `probe` as the join keys .
    */
@@ -242,6 +243,82 @@ class hash_join {
     rmm::cuda_stream_view stream      = cudf::get_default_stream(),
     rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const;
 
+  /**
+   * @brief Returns context information about matches between the probe and build tables.
+   *
+   * This method computes, for each row in the probe table, how many matching rows exist in
+   * the build table according to inner join semantics, and returns the number of matches through a
+   * join_match_context object.
+   *
+   * This is particularly useful for:
+   * - Determining the total size of a potential join result without materializing it
+   * - Planning partitioned join operations for large datasets
+   *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
+   * @param probe The probe table to join with the pre-processed build table
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the result device memory
+   *
+   * @return A join_match_context object containing the probe table view and a device vector
+   *         of match counts for each row in the probe table
+   */
+  [[nodiscard]] cudf::join_match_context inner_join_match_context(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const;
+
+  /**
+   * @brief Returns context information about matches between the probe and build tables.
+   *
+   * This method computes, for each row in the probe table, how many matching rows exist in
+   * the build table according to left join semantics, and returns the number of matches through a
+   * join_match_context object.
+   *
+   * For left join, every row in the probe table will have at least one match (either with a
+   * matching row from the build table or with a null placeholder).
+   *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
+   * @param probe The probe table to join with the pre-processed build table
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the result device memory
+   *
+   * @return A join_match_context object containing the probe table view and a device vector
+   *         of match counts for each row in the probe table
+   */
+  [[nodiscard]] cudf::join_match_context left_join_match_context(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const;
+
+  /**
+   * @brief Returns context information about matches between the probe and build tables.
+   *
+   * This method computes, for each row in the probe table, how many matching rows exist in
+   * the build table according to full join semantics, and returns the number of matches through a
+   * join_match_context object.
+   *
+   * For full join, this includes matches for probe table rows, and the result may need to be
+   * combined with unmatched rows from the build table to get the complete picture.
+   *
+   * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was
+   * not constructed with null check.
+   *
+   * @param probe The probe table to join with the pre-processed build table
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the result device memory
+   *
+   * @return A join_match_context object containing the probe table view and a device vector
+   *         of match counts for each row in the probe table
+   */
+  [[nodiscard]] cudf::join_match_context full_join_match_context(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const;
+
  private:
   std::unique_ptr<impl_type const> _impl;
 };
diff --git a/cpp/include/cudf/join/join.hpp b/cpp/include/cudf/join/join.hpp
index cf686e40d4b..d1b5e6a5e07 100644
--- a/cpp/include/cudf/join/join.hpp
+++ b/cpp/include/cudf/join/join.hpp
@@ -33,6 +33,37 @@ namespace CUDF_EXPORT cudf {
  * @file
  */
 
+/**
+ * @brief Holds context information about matches between tables during a join operation.
+ *
+ * This structure stores the left table view and a device vector containing the count of
+ * matching rows in the right table for each row in the left table. Used primarily by
+ * inner_join_match_context() to track join match information.
+ */
+struct join_match_context {
+  table_view _left_table;  ///< View of the left table involved in the join operation
+  std::unique_ptr<rmm::device_uvector<size_type>>
+    _match_counts;  ///< A device vector containing the count of matching rows in the right table
+                    ///< for each row in left table
+};
+
+/**
+ * @brief Stores context information for partitioned join operations.
+ *
+ * This structure maintains context for partitioned join operations, containing the match
+ * context from a previous join operation along with the start and end indices that define
+ * the current partition of the left table being processed.
+ *
+ * Used with partitioned_inner_join() to perform large joins in smaller chunks while
+ * preserving the context from the initial match operation.
+ */
+struct join_partition_context {
+  join_match_context
+    left_table_context;      ///< The match context from a previous inner_join_match_context call
+  size_type left_start_idx;  ///< The starting row index of the current left table partition
+  size_type left_end_idx;  ///< The ending row index (exclusive) of the current left table partition
+};
+
 /**
  * @brief Returns a pair of row index vectors corresponding to an
  * inner join between the specified tables.
diff --git a/cpp/include/cudf/join/sort_merge_join.hpp b/cpp/include/cudf/join/sort_merge_join.hpp
index cdc71fc46fb..c7f572d4525 100644
--- a/cpp/include/cudf/join/sort_merge_join.hpp
+++ b/cpp/include/cudf/join/sort_merge_join.hpp
@@ -18,6 +18,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
+#include <cudf/join/join.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/export.hpp>
@@ -43,38 +44,6 @@ namespace CUDF_EXPORT cudf {
  */
 class sort_merge_join {
  public:
-  /**
-   * @brief Holds context information about matches between tables during a join operation.
-   *
-   * This structure stores the left table view and a device vector containing the count of
-   * matching rows in the right table for each row in the left table. Used primarily by
-   * inner_join_match_context() to track join match information.
-   */
-  struct match_context {
-    table_view _left_table;  ///< View of the left table involved in the join operation
-    std::unique_ptr<rmm::device_uvector<size_type>>
-      _match_counts;  ///< A device vector containing the count of matching rows in the right table
-                      ///< for each row in left table
-  };
-
-  /**
-   * @brief Stores context information for partitioned join operations.
-   *
-   * This structure maintains context for partitioned join operations, containing the match
-   * context from a previous join operation along with the start and end indices that define
-   * the current partition of the left table being processed.
-   *
-   * Used with partitioned_inner_join() to perform large joins in smaller chunks while
-   * preserving the context from the initial match operation.
-   */
-  struct partition_context {
-    match_context
-      left_table_context;      ///< The match context from a previous inner_join_match_context call
-    size_type left_start_idx;  ///< The starting row index of the current left table partition
-    size_type
-      left_end_idx;  ///< The ending row index (exclusive) of the current left table partition
-  };
-
   /**
    * @brief Construct a sort-merge join object that pre-processes the right table
    * on creation, and can be used on subsequent join operations with multiple
@@ -126,7 +95,7 @@ class sort_merge_join {
    * - Determining the total size of a potential join result without materializing it
    * - Planning partitioned join operations for large datasets
    *
-   * The returned match_context can be used directly with partitioned_inner_join() to
+   * The returned join_match_context can be used directly with partitioned_inner_join() to
    * process large joins in manageable chunks.
    *
    * @param left The left table to join with the pre-processed right table
@@ -134,10 +103,10 @@ class sort_merge_join {
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the result device memory
    *
-   * @return A match_context object containing the left table view and a device vector
+   * @return A join_match_context object containing the left table view and a device vector
    *         of match counts for each row in the left table
    */
-  match_context inner_join_match_context(
+  cudf::join_match_context inner_join_match_context(
     table_view const& left,
     sorted is_left_sorted,
     rmm::cuda_stream_view stream      = cudf::get_default_stream(),
@@ -147,8 +116,8 @@ class sort_merge_join {
    * @brief Performs an inner join between a partition of the left table and the right table.
    *
    * This method executes an inner join operation between a specific partition of the left table
-   * (defined by the partition_context) and the right table that was provided when constructing
-   * the sort_merge_join object. The partition_context must have been previously created by
+   * (defined by the join_partition_context) and the right table that was provided when constructing
+   * the sort_merge_join object. The join_partition_context must have been previously created by
    * calling inner_join_match_context().
    *
    * This partitioning approach enables processing large joins in smaller, memory-efficient chunks,
@@ -180,7 +149,7 @@ class sort_merge_join {
    *   size_type end = std::min(start + 1000, left_table.num_rows());
    *
    *   // Create partition context
-   *   sort_merge_join::partition_context part_ctx{context, start, end};
+   *   cudf::join_partition_context part_ctx{context, start, end};
    *
    *   // Get join indices for this partition
    *   auto [left_indices, right_indices] = join_obj.partitioned_inner_join(part_ctx);
@@ -192,7 +161,7 @@ class sort_merge_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   partitioned_inner_join(
-    partition_context const& context,
+    cudf::join_partition_context const& context,
     rmm::cuda_stream_view stream      = cudf::get_default_stream(),
     rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
@@ -318,7 +287,7 @@ class sort_merge_join {
  * Result: {{1}, {0}}
  * @endcode
  *
- * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
+ * @throw std::invalid_argument if number of elements in `left_keys` or `right_keys`
  * mismatch.
  *
  * @param[in] left_keys The left table
@@ -360,7 +329,7 @@ sort_merge_inner_join(cudf::table_view const& left_keys,
  * Result: {{1}, {0}}
  * @endcode
  *
- * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
+ * @throw std::invalid_argument if number of elements in `left_keys` or `right_keys`
  * mismatch.
  *
  * @param[in] left_keys The left table
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index a2cdf122fe8..06220e299de 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -44,6 +44,7 @@
 #include <thrust/uninitialized_fill.h>
 
 #include <cstddef>
+#include <memory>
 
 namespace cudf {
 namespace detail {
@@ -55,7 +56,7 @@ using hash_table_t = cudf::hash_join::impl_type::hash_table_t;
 // mixed-join migration
 template <typename Hasher>
 struct pair_fn {
-  pair_fn(Hasher hash) : _hash{hash} {}
+  pair_fn(Hasher hash) : _hash{std::move(hash)} {}
 
   __device__ cuco::pair<hash_value_type, size_type> operator()(size_type i) const noexcept
   {
@@ -489,8 +490,10 @@ std::size_t get_full_join_size(
     // Assume all the indices in invalid_index_map are invalid
     auto invalid_index_map =
       std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
-    thrust::uninitialized_fill(
-      rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1});
+    thrust::uninitialized_fill(rmm::exec_policy_nosync(stream),
+                               invalid_index_map->begin(),
+                               invalid_index_map->end(),
+                               int32_t{1});
 
     // Functor to check for index validity since left joins can create invalid indices
     valid_range<size_type> valid(0, right_table_row_count);
@@ -540,7 +543,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
       cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}
 {
   CUDF_FUNC_RANGE();
-  CUDF_EXPECTS(0 != build.num_columns(), "Hash join build table is empty");
+  CUDF_EXPECTS(0 != build.num_columns(), "Hash join build table is empty", std::invalid_argument);
   CUDF_EXPECTS(load_factor > 0 && load_factor <= 1,
                "Invalid load factor: must be greater than 0 and less than or equal to 1.",
                std::invalid_argument);
@@ -604,7 +607,8 @@ std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,
   if (_is_empty) { return 0; }
 
   CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe),
-               "Probe table has nulls while build table was not hashed with null check.");
+               "Probe table has nulls while build table was not hashed with null check.",
+               std::invalid_argument);
 
   auto const preprocessed_probe =
     cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
@@ -630,7 +634,8 @@ std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,
   if (_is_empty) { return probe.num_rows(); }
 
   CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe),
-               "Probe table has nulls while build table was not hashed with null check.");
+               "Probe table has nulls while build table was not hashed with null check.",
+               std::invalid_argument);
 
   auto const preprocessed_probe =
     cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
@@ -657,7 +662,8 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
   if (_is_empty) { return probe.num_rows(); }
 
   CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe),
-               "Probe table has nulls while build table was not hashed with null check.");
+               "Probe table has nulls while build table was not hashed with null check.",
+               std::invalid_argument);
 
   auto const preprocessed_probe =
     cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
@@ -673,6 +679,112 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
                                           mr);
 }
 
+template <typename Hasher>
+template <typename OutputIterator>
+void hash_join<Hasher>::compute_match_counts(cudf::table_view const& probe,
+                                             OutputIterator output_iter,
+                                             rmm::cuda_stream_view stream) const
+{
+  CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe),
+               "Probe table has nulls while build table was not hashed with null check.",
+               std::invalid_argument);
+
+  auto const preprocessed_probe =
+    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+  auto const probe_nulls          = cudf::nullate::DYNAMIC{_has_nulls};
+  auto const probe_table_num_rows = probe.num_rows();
+
+  auto compute_counts = [&](auto equality, auto d_hasher) {
+    auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
+    _hash_table.count_each(iter,
+                           iter + probe_table_num_rows,
+                           equality,
+                           _hash_table.hash_function(),
+                           output_iter,
+                           stream.value());
+  };
+
+  if (cudf::is_primitive_row_op_compatible(_build)) {
+    auto const d_hasher = cudf::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
+    auto const d_equal  = cudf::row::primitive::row_equality_comparator{
+      probe_nulls, preprocessed_probe, _preprocessed_build, _nulls_equal};
+    compute_counts(primitive_pair_equal{d_equal}, d_hasher);
+  } else {
+    auto const d_hasher =
+      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
+    auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{
+      preprocessed_probe, _preprocessed_build};
+    auto const d_equal = row_comparator.equal_to<false>(probe_nulls, _nulls_equal);
+    compute_counts(pair_equal{d_equal}, d_hasher);
+  }
+}
+
+template <typename Hasher>
+cudf::join_match_context hash_join<Hasher>::inner_join_match_context(
+  cudf::table_view const& probe,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
+{
+  cudf::scoped_range range{"hash_join::inner_join_match_context"};
+
+  auto match_counts =
+    std::make_unique<rmm::device_uvector<size_type>>(probe.num_rows(), stream, mr);
+
+  if (_is_empty) {
+    thrust::fill(rmm::exec_policy_nosync(stream), match_counts->begin(), match_counts->end(), 0);
+  } else {
+    compute_match_counts(probe, match_counts->begin(), stream);
+  }
+
+  return cudf::join_match_context{probe, std::move(match_counts)};
+}
+
+template <typename Hasher>
+cudf::join_match_context hash_join<Hasher>::left_join_match_context(
+  cudf::table_view const& probe,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
+{
+  cudf::scoped_range range{"hash_join::left_join_match_context"};
+
+  auto match_counts =
+    std::make_unique<rmm::device_uvector<size_type>>(probe.num_rows(), stream, mr);
+
+  if (_is_empty) {
+    thrust::fill(rmm::exec_policy_nosync(stream), match_counts->begin(), match_counts->end(), 1);
+  } else {
+    auto transform = [] __device__(size_type count) { return count == 0 ? 1 : count; };
+    auto transformed_output =
+      thrust::make_transform_output_iterator(match_counts->begin(), transform);
+    compute_match_counts(probe, transformed_output, stream);
+  }
+
+  return cudf::join_match_context{probe, std::move(match_counts)};
+}
+
+template <typename Hasher>
+cudf::join_match_context hash_join<Hasher>::full_join_match_context(
+  cudf::table_view const& probe,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
+{
+  cudf::scoped_range range{"hash_join::full_join_match_context"};
+
+  auto match_counts =
+    std::make_unique<rmm::device_uvector<size_type>>(probe.num_rows(), stream, mr);
+
+  if (_is_empty) {
+    thrust::fill(rmm::exec_policy_nosync(stream), match_counts->begin(), match_counts->end(), 1);
+  } else {
+    auto transform = [] __device__(size_type count) { return count == 0 ? 1 : count; };
+    auto transformed_output =
+      thrust::make_transform_output_iterator(match_counts->begin(), transform);
+    compute_match_counts(probe, transformed_output, stream);
+  }
+
+  return cudf::join_match_context{probe, std::move(match_counts)};
+}
+
 template <typename Hasher>
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
@@ -690,7 +802,8 @@ hash_join<Hasher>::probe_join_indices(cudf::table_view const& probe_table,
   CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null.");
 
   CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe_table),
-               "Probe table has nulls while build table was not hashed with null check.");
+               "Probe table has nulls while build table was not hashed with null check.",
+               std::invalid_argument);
 
   auto const preprocessed_probe =
     cudf::experimental::row::equality::preprocessed_table::create(probe_table, stream);
@@ -723,13 +836,15 @@ hash_join<Hasher>::compute_hash_join(cudf::table_view const& probe,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr) const
 {
-  CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
+  CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty", std::invalid_argument);
 
   CUDF_EXPECTS(_build.num_columns() == probe.num_columns(),
-               "Mismatch in number of columns to be joined on");
+               "Mismatch in number of columns to be joined on",
+               std::invalid_argument);
 
   CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe),
-               "Probe table has nulls while build table was not hashed with null check.");
+               "Probe table has nulls while build table was not hashed with null check.",
+               std::invalid_argument);
 
   if (is_trivial_join(probe, _build, join)) {
     return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
@@ -749,7 +864,6 @@ hash_join::~hash_join() = default;
 hash_join::hash_join(cudf::table_view const& build,
                      null_equality compare_nulls,
                      rmm::cuda_stream_view stream)
-  // If we cannot know beforehand about null existence then let's assume that there are nulls.
   : hash_join(
       build, nullable_join::YES, compare_nulls, cudf::detail::CUCO_DESIRED_LOAD_FACTOR, stream)
 {
@@ -814,4 +928,26 @@ std::size_t hash_join::full_join_size(cudf::table_view const& probe,
   return _impl->full_join_size(probe, stream, mr);
 }
 
+cudf::join_match_context hash_join::inner_join_match_context(
+  cudf::table_view const& probe,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
+{
+  return _impl->inner_join_match_context(probe, stream, mr);
+}
+
+cudf::join_match_context hash_join::left_join_match_context(cudf::table_view const& probe,
+                                                            rmm::cuda_stream_view stream,
+                                                            rmm::device_async_resource_ref mr) const
+{
+  return _impl->left_join_match_context(probe, stream, mr);
+}
+
+cudf::join_match_context hash_join::full_join_match_context(cudf::table_view const& probe,
+                                                            rmm::cuda_stream_view stream,
+                                                            rmm::device_async_resource_ref mr) const
+{
+  return _impl->full_join_match_context(probe, stream, mr);
+}
+
 }  // namespace cudf
diff --git a/cpp/src/join/sort_merge_join.cu b/cpp/src/join/sort_merge_join.cu
index ec015c704b2..28bf8ca923f 100644
--- a/cpp/src/join/sort_merge_join.cu
+++ b/cpp/src/join/sort_merge_join.cu
@@ -408,7 +408,8 @@ sort_merge_join::sort_merge_join(table_view const& right,
   cudf::scoped_range range{"sort_merge_join::sort_merge_join"};
   // Sanity checks
   CUDF_EXPECTS(right.num_columns() != 0,
-               "Number of columns the keys table must be non-zero for a join");
+               "Number of columns the keys table must be non-zero for a join",
+               std::invalid_argument);
 
   this->compare_nulls = compare_nulls;
 
@@ -528,9 +529,11 @@ sort_merge_join::inner_join(table_view const& left,
   cudf::scoped_range range{"sort_merge_join::inner_join"};
   // Sanity checks
   CUDF_EXPECTS(left.num_columns() != 0,
-               "Number of columns in left keys must be non-zero for a join");
+               "Number of columns in left keys must be non-zero for a join",
+               std::invalid_argument);
   CUDF_EXPECTS(left.num_columns() == preprocessed_right._null_processed_table_view.num_columns(),
-               "Number of columns must match for a join");
+               "Number of columns must match for a join",
+               std::invalid_argument);
 
   // Preprocessing the left table
   preprocessed_left._table_view = left;
@@ -558,7 +561,7 @@ sort_merge_join::inner_join(table_view const& left,
     });
 }
 
-sort_merge_join::match_context sort_merge_join::inner_join_match_context(
+cudf::join_match_context sort_merge_join::inner_join_match_context(
   table_view const& left,
   sorted is_left_sorted,
   rmm::cuda_stream_view stream,
@@ -567,9 +570,11 @@ sort_merge_join::match_context sort_merge_join::inner_join_match_context(
   cudf::scoped_range range{"sort_merge_join::inner_join_match_context"};
   // Sanity checks
   CUDF_EXPECTS(left.num_columns() != 0,
-               "Number of columns in left keys must be non-zero for a join");
+               "Number of columns in left keys must be non-zero for a join",
+               std::invalid_argument);
   CUDF_EXPECTS(left.num_columns() == preprocessed_right._null_processed_table_view.num_columns(),
-               "Number of columns must match for a join");
+               "Number of columns must match for a join",
+               std::invalid_argument);
 
   // Preprocessing the left table
   preprocessed_left._table_view = left;
@@ -606,18 +611,18 @@ sort_merge_join::match_context sort_merge_join::inner_join_match_context(
                         mapping.begin(),
                         unprocessed_matches_per_row.begin());
         stream.synchronize();
-        return match_context{
+        return join_match_context{
           left,
           std::make_unique<rmm::device_uvector<size_type>>(std::move(unprocessed_matches_per_row))};
       }
-      return match_context{left, std::move(matches_per_row)};
+      return join_match_context{left, std::move(matches_per_row)};
     });
 }
 
 // left_partition_end exclusive
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-sort_merge_join::partitioned_inner_join(sort_merge_join::partition_context const& context,
+sort_merge_join::partitioned_inner_join(cudf::join_partition_context const& context,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 1f371576d55..f1114e761f9 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -282,10 +282,10 @@ TEST_P(JoinParameterizedTest, InvalidInput)
   cudf::table_view right({right_first_col, right_second_col, right_third_col});
 
   EXPECT_THROW(inner_join(left, right, {0, 1}, {0, 1, 2}, cudf::null_equality::EQUAL, algo),
-               cudf::logic_error);
+               std::invalid_argument);
 
   EXPECT_THROW(inner_join(left, right, {}, {0, 1, 2}, cudf::null_equality::EQUAL, algo),
-               cudf::logic_error);
+               std::invalid_argument);
 }
 
 TEST_P(JoinParameterizedTest, EmptySentinelRepro)
@@ -954,10 +954,9 @@ TEST_F(JoinTest, PartitionedInnerJoinWithNulls)
   cudf::sort_merge_join obj(t1.select(right_on), cudf::sorted::NO, compare_nulls, stream);
   auto match_context = obj.inner_join_match_context(
     t0.select(left_on), cudf::sorted::NO, stream, cudf::get_current_device_resource_ref());
-  auto partition_context = cudf::sort_merge_join::partition_context{std::move(match_context), 0, 0};
+  auto partition_context = cudf::join_partition_context{std::move(match_context), 0, 0};
 
-  auto join_and_gather = [&t0, &t1, &obj, stream](
-                           cudf::sort_merge_join::partition_context const& cxt) {
+  auto join_and_gather = [&t0, &t1, &obj, stream](cudf::join_partition_context const& cxt) {
     auto const [left_join_indices, right_join_indices] =
       obj.partitioned_inner_join(cxt, stream, cudf::get_current_device_resource_ref());
 
@@ -1969,6 +1968,346 @@ TEST_F(JoinTest, HashJoinLargeOutputSize)
   EXPECT_EQ(col_size * col_size, output_size);
 }
 
+TEST_F(JoinTest, HashJoinInnerMatchContext)
+{
+  // Test inner join match context functionality with multiple matches and nulls
+  // Use the same test data as SortMergeInnerJoinSizePerRowWithNulls for consistency
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {true, true, false, true, true});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, {true, false, true, true, true});
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {true, false, true, true, true}};
+
+  CVector cols0, cols1;
+  cols0.emplace_back(col0_0.release());
+  cols0.emplace_back(col0_1.release());
+  cols0.emplace_back(col0_2.release());
+  cols1.emplace_back(col1_0.release());
+  cols1.emplace_back(col1_1.release());
+  cols1.emplace_back(col1_2.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  // Test single column join with null_equality::EQUAL
+  {
+    cudf::hash_join hash_join(t1.select({0}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.inner_join_match_context(t0.select({0}));
+
+    // Check the match counts for each row
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // Expected: Row 0(3)=1 match, Row 1(1)=0 matches, Row 2(2)=2 matches, Row 3(0)=1 match, Row
+    // 4(2)=2 matches
+    std::vector<cudf::size_type> expected_counts = {1, 0, 2, 1, 2};
+    EXPECT_EQ(h_match_counts, expected_counts);
+
+    // Verify total matches equals inner join size
+    cudf::size_type total_matches =
+      std::accumulate(h_match_counts.begin(), h_match_counts.end(), 0);
+    auto inner_join_size = hash_join.inner_join_size(t0.select({0}));
+    EXPECT_EQ(total_matches, inner_join_size);
+  }
+
+  // Test multi-column join with null_equality::EQUAL
+  {
+    cudf::hash_join hash_join(t1.select({0, 1}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.inner_join_match_context(t0.select({0, 1}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // Expected: Row 0(3,s1)=1 match, Row 1(1,s1)=0 matches, Row 2(2,null)=1 match, Row 3(0,s4)=0
+    // matches, Row 4(2,s0)=0 matches
+    std::vector<cudf::size_type> expected_counts = {1, 0, 1, 0, 0};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+
+  // Test single column join with null_equality::UNEQUAL
+  {
+    cudf::hash_join hash_join(t1.select({0}), cudf::null_equality::UNEQUAL);
+    auto match_context = hash_join.inner_join_match_context(t0.select({0}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // Same as EQUAL for single column since nulls don't affect the integer column matching
+    std::vector<cudf::size_type> expected_counts = {1, 0, 2, 1, 2};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+
+  // Test multi-column join with null_equality::UNEQUAL
+  {
+    cudf::hash_join hash_join(t1.select({0, 1}), cudf::null_equality::UNEQUAL);
+    auto match_context = hash_join.inner_join_match_context(t0.select({0, 1}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // With UNEQUAL, rows with nulls should not match: Row 0(3,s1)=1 match, others=0 matches
+    std::vector<cudf::size_type> expected_counts = {1, 0, 0, 0, 0};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+}
+
+TEST_F(JoinTest, HashJoinLeftMatchContext)
+{
+  // Test left join match context functionality with comprehensive null handling
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {true, true, false, true, true});
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, {true, false, true, true, true});
+
+  CVector cols0, cols1;
+  cols0.emplace_back(col0_0.release());
+  cols0.emplace_back(col0_1.release());
+  cols1.emplace_back(col1_0.release());
+  cols1.emplace_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  // Test single column join - for left join, every row should have at least 1 match
+  {
+    cudf::hash_join hash_join(t1.select({0}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.left_join_match_context(t0.select({0}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // For left join: Row 0(3)=1 match, Row 1(1)=1 match(null), Row 2(2)=2 matches, Row 3(0)=1
+    // match, Row 4(2)=2 matches
+    std::vector<cudf::size_type> expected_counts = {1, 1, 2, 1, 2};
+    EXPECT_EQ(h_match_counts, expected_counts);
+
+    // Verify total matches equals left join size
+    cudf::size_type total_matches =
+      std::accumulate(h_match_counts.begin(), h_match_counts.end(), 0);
+    auto left_join_size = hash_join.left_join_size(t0.select({0}));
+    EXPECT_EQ(total_matches, left_join_size);
+  }
+
+  // Test multi-column join with null_equality::EQUAL
+  {
+    cudf::hash_join hash_join(t1.select({0, 1}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.left_join_match_context(t0.select({0, 1}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // For left join, all rows get at least 1 match: Row 0(3,s1)=1 match, others=1 match (null)
+    std::vector<cudf::size_type> expected_counts = {1, 1, 1, 1, 1};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+
+  // Test multi-column join with null_equality::UNEQUAL
+  {
+    cudf::hash_join hash_join(t1.select({0, 1}), cudf::null_equality::UNEQUAL);
+    auto match_context = hash_join.left_join_match_context(t0.select({0, 1}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // For left join with UNEQUAL, all rows still get at least 1 match (nulls for unmatched)
+    std::vector<cudf::size_type> expected_counts = {1, 1, 1, 1, 1};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+}
+
+TEST_F(JoinTest, HashJoinFullMatchContext)
+{
+  // Test full join match context functionality with same comprehensive data
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {true, true, false, true, true});
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, {true, false, true, true, true});
+
+  CVector cols0, cols1;
+  cols0.emplace_back(col0_0.release());
+  cols0.emplace_back(col0_1.release());
+  cols1.emplace_back(col1_0.release());
+  cols1.emplace_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  // Test single column join - for full join probe side, every row should have at least 1 match
+  {
+    cudf::hash_join hash_join(t1.select({0}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.full_join_match_context(t0.select({0}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // For full join: Row 0(3)=1 match, Row 1(1)=1 match(null), Row 2(2)=2 matches, Row 3(0)=1
+    // match, Row 4(2)=2 matches
+    std::vector<cudf::size_type> expected_counts = {1, 1, 2, 1, 2};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+
+  // Test multi-column join
+  {
+    cudf::hash_join hash_join(t1.select({0, 1}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.full_join_match_context(t0.select({0, 1}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // For full join, all rows get at least 1 match
+    std::vector<cudf::size_type> expected_counts = {1, 1, 1, 1, 1};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+}
+
+TEST_F(JoinTest, HashJoinMatchContextEmptyBuild)
+{
+  // Test match context with empty build table
+  column_wrapper<int32_t> col0_0{{3, 1, 2}};
+  column_wrapper<int32_t> col1_0{};  // Empty
+
+  CVector cols0, cols1;
+  cols0.emplace_back(col0_0.release());
+  cols1.emplace_back(col1_0.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  cudf::hash_join hash_join(t1, cudf::null_equality::EQUAL);
+
+  // Test inner join match context
+  {
+    auto match_context = hash_join.inner_join_match_context(t0);
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // All should be 0 for inner join with empty build table
+    std::vector<cudf::size_type> expected_counts = {0, 0, 0};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+
+  // Test left join match context
+  {
+    auto match_context = hash_join.left_join_match_context(t0);
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // All should be 1 for left join (null matches)
+    std::vector<cudf::size_type> expected_counts = {1, 1, 1};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+
+  // Test full join match context
+  {
+    auto match_context = hash_join.full_join_match_context(t0);
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // All should be 1 for full join (null matches)
+    std::vector<cudf::size_type> expected_counts = {1, 1, 1};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+}
+
+TEST_F(JoinTest, HashJoinMatchContextDuplicatesAndEdgeCases)
+{
+  // Test with duplicate keys to ensure multiple matches work correctly
+  column_wrapper<int32_t> col0_0{{1, 1, 2, 2, 3}};
+  strcol_wrapper col0_1({"a", "a", "b", "b", "c"});
+
+  column_wrapper<int32_t> col1_0{{1, 1, 1, 2, 4}};
+  strcol_wrapper col1_1({"a", "a", "a", "b", "d"});
+
+  CVector cols0, cols1;
+  cols0.emplace_back(col0_0.release());
+  cols0.emplace_back(col0_1.release());
+  cols1.emplace_back(col1_0.release());
+  cols1.emplace_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  // Test inner join with multiple matches per row
+  {
+    cudf::hash_join hash_join(t1.select({0}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.inner_join_match_context(t0.select({0}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // Row 0(1)=3 matches, Row 1(1)=3 matches, Row 2(2)=1 match, Row 3(2)=1 match, Row 4(3)=0
+    // matches
+    std::vector<cudf::size_type> expected_counts = {3, 3, 1, 1, 0};
+    EXPECT_EQ(h_match_counts, expected_counts);
+
+    // Verify total matches
+    cudf::size_type total_matches =
+      std::accumulate(h_match_counts.begin(), h_match_counts.end(), 0);
+    auto inner_join_size = hash_join.inner_join_size(t0.select({0}));
+    EXPECT_EQ(total_matches, inner_join_size);
+  }
+
+  // Test multi-column join
+  {
+    cudf::hash_join hash_join(t1.select({0, 1}), cudf::null_equality::EQUAL);
+    auto match_context = hash_join.inner_join_match_context(t0.select({0, 1}));
+
+    std::vector<cudf::size_type> h_match_counts(t0.num_rows());
+    CUDF_CUDA_TRY(cudaMemcpy(h_match_counts.data(),
+                             match_context._match_counts->data(),
+                             sizeof(cudf::size_type) * t0.num_rows(),
+                             cudaMemcpyDeviceToHost));
+
+    // Row 0(1,a)=3 matches, Row 1(1,a)=3 matches, Row 2(2,b)=1 match, Row 3(2,b)=1 match, Row
+    // 4(3,c)=0 matches
+    std::vector<cudf::size_type> expected_counts = {3, 3, 1, 1, 0};
+    EXPECT_EQ(h_match_counts, expected_counts);
+  }
+}
+
 struct JoinDictionaryTest : public cudf::test::BaseFixture {};
 
 TEST_F(JoinDictionaryTest, LeftJoinNoNulls)

From d5a1d556143cf3faed5c668e55aaa339c6f1f884 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 29 Aug 2025 11:24:27 -0700
Subject: [PATCH 232/366] Pin pytest-rerunfailures<16 (#19846)

xref https://github.com/pytest-dev/pytest-rerunfailures/issues/302

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19846
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 2 +-
 conda/environments/all_cuda-130_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-130_arch-x86_64.yaml  | 2 +-
 dependencies.yaml                                 | 3 ++-
 python/cudf/pyproject.toml                        | 2 +-
 6 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 466aaf5d7fa..8227b760b36 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -75,7 +75,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures
+- pytest-rerunfailures<16.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 1679dfd5d83..dd2aa0a6e71 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures
+- pytest-rerunfailures<16.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index e18d021ee4e..f550d88d4b3 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -75,7 +75,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures
+- pytest-rerunfailures<16.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 359ed939f77..d6103d5e73b 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures
+- pytest-rerunfailures<16.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/dependencies.yaml b/dependencies.yaml
index 3808369ace3..3dc704cebf4 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -1164,7 +1164,8 @@ dependencies:
           - nbconvert
           - nbformat
           - openpyxl
-          - pytest-rerunfailures
+          # https://github.com/pytest-dev/pytest-rerunfailures/issues/302
+          - pytest-rerunfailures<16.0
   depends_on_dask_cuda:
     common:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 0c6c84bdca1..50c6fd782e8 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -76,7 +76,7 @@ cudf-pandas-tests = [
     "nbconvert",
     "nbformat",
     "openpyxl",
-    "pytest-rerunfailures",
+    "pytest-rerunfailures<16.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]

From 6237d9429061e32d1c0969650794805a1b28c476 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Fri, 29 Aug 2025 15:38:54 -0500
Subject: [PATCH 233/366] Fix flaky DataFrame `to_string` test (#19847)

Fixes an issue where a test, `test_dataframe_to_string_wide`, sometimes failed to to improperly falling back to console based settings.

Authors:
  - https://github.com/brandon-b-miller
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19847
---
 python/cudf/cudf/tests/test_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 8d2e7f9a707..ecd68f46e46 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -373,7 +373,7 @@ def test_dataframe_to_string_with_masked_data():
 def test_dataframe_to_string_wide():
     # Test basic
     df = cudf.DataFrame({f"a{i}": [0, 1, 2] for i in range(100)})
-    with pd.option_context("display.max_columns", 0):
+    with pd.option_context("display.max_columns", 16):
         got = df.to_string()
 
     expect = textwrap.dedent(

From b47101312493e3fb1d5c013962c473c9e39de1f9 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 29 Aug 2025 16:21:44 -0700
Subject: [PATCH 234/366] Fix .str.replace ignoring n for single character
 replacements (#19848)

closes https://github.com/rapidsai/cudf/issues/19844

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19848
---
 python/cudf/cudf/core/accessors/string.py           | 4 +---
 python/cudf/cudf/core/column/string.py              | 5 ++++-
 python/cudf/cudf/tests/series/accessors/test_str.py | 8 ++++++++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/accessors/string.py b/python/cudf/cudf/core/accessors/string.py
index 0b3f14d6267..7b85271e057 100644
--- a/python/cudf/cudf/core/accessors/string.py
+++ b/python/cudf/cudf/core/accessors/string.py
@@ -994,9 +994,6 @@ def replace(
                     as_column(repl, dtype=CUDF_STRING_DTYPE),  # type: ignore[arg-type]
                 )
             return self._return_or_inplace(result)
-        # Pandas treats 0 as all
-        if n == 0:
-            n = -1
 
         # If 'pat' is re.Pattern then get the pattern string from it
         if regex and isinstance(pat, re.Pattern):
@@ -1017,6 +1014,7 @@ def replace(
             result = self._column.replace_str(
                 pat,  # type: ignore[arg-type]
                 pa_repl,
+                n,
             )
         return self._return_or_inplace(result)
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 1fa05377099..a947ae7a275 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -1279,11 +1279,14 @@ def replace_re(
         return type(self).from_pylibcudf(plc_column)  # type: ignore[return-value]
 
     @acquire_spill_lock()
-    def replace_str(self, pattern: str, replacement: pa.Scalar) -> Self:
+    def replace_str(
+        self, pattern: str, replacement: pa.Scalar, max_replace_count: int = -1
+    ) -> Self:
         plc_result = plc.strings.replace.replace(
             self.to_pylibcudf(mode="read"),
             pa_scalar_to_plc_scalar(pa.scalar(pattern)),
             pa_scalar_to_plc_scalar(replacement),
+            max_replace_count,
         )
         return type(self).from_pylibcudf(plc_result)  # type: ignore[return-value]
 
diff --git a/python/cudf/cudf/tests/series/accessors/test_str.py b/python/cudf/cudf/tests/series/accessors/test_str.py
index 509ade12ce1..f6efb7a2bc0 100644
--- a/python/cudf/cudf/tests/series/accessors/test_str.py
+++ b/python/cudf/cudf/tests/series/accessors/test_str.py
@@ -2296,6 +2296,14 @@ def test_string_replace_zero_length(ps_gs, pat):
     assert_eq(expect, got)
 
 
+@pytest.mark.parametrize("n", [-1, 0, 1])
+def test_string_replace_n(n):
+    data = ["a,b,c", "d,e,f,g"]
+    expect = pd.Series(data).str.replace(pat=",", repl="_", n=n)
+    got = cudf.Series(data).str.replace(pat=",", repl="_", n=n)
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize(
     "pat,regex",
     [

From 8da8ef0ed6d222ec4f96650f8430d70e49027f6f Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 29 Aug 2025 18:29:28 -0500
Subject: [PATCH 235/366] Update `pandas-tests-diff` to only display GPU/CPU
 usage metrics (#19210)

Resolves: #19204

This PR updates `pandas-tests-diff` to only display GPU/CPU usage metrics instead of removing the job altogether.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - James Lamb (https://github.com/jameslamb)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19210
---
 .github/workflows/pandas-tests.yaml           |   7 +-
 .github/workflows/pr.yaml                     |  12 +-
 ci/cudf_pandas_scripts/pandas-tests/diff.sh   |   9 +-
 .../pandas-tests/job-summary.py               | 124 ++++++++++++------
 ci/cudf_pandas_scripts/pandas-tests/run.sh    |   6 +-
 .../pandas/scripts/summarize-test-results.py  |  31 +++++
 6 files changed, 136 insertions(+), 53 deletions(-)

diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index 085c17bd9f2..7b4fed24910 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -22,11 +22,14 @@ jobs:
   pandas-tests:
       # run the Pandas unit tests
       secrets: inherit
-      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+      uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
       with:
-        matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.0", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
         build_type: nightly
         branch: ${{ inputs.branch }}
         date: ${{ inputs.date }}
         sha: ${{ inputs.sha }}
+        node_type: "gpu-l4-latest-1"
+        container_image: "rapidsai/citestwheel:25.10-cuda13.0.0-ubuntu24.04-py3.13"
         script: ci/cudf_pandas_scripts/pandas-tests/run.sh main
+        file_to_upload: ./artifacts/main-results.json
+        artifact-name: main-results.json
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 10904fb1425..d46de789706 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -359,14 +359,18 @@ jobs:
     # run the Pandas unit tests using PR branch
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
-      matrix_filter: '[{"ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.0", "LINUX_VER": "ubuntu24.04", "GPU": "l4", "DRIVER": "latest", "DEPENDENCIES": "newest"}]'
       build_type: pull-request
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      node_type: "gpu-l4-latest-1"
+      container_image: "rapidsai/citestwheel:25.10-cuda13.0.0-ubuntu24.04-py3.13"
       script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
-      # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
-      test_summary_show: "none"
+      file_to_upload: ./artifacts/pr-results.json
+      artifact-name: pr-results.json
   pandas-tests-diff:
     # diff the results of running the Pandas unit tests and publish a job summary
     needs: pandas-tests
diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
index f84776ad173..e30236fdbf1 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
@@ -13,8 +13,6 @@ rapids-logger "Github job name: ${GH_JOB_NAME}"
 rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}"
 
 PY_VER="313"
-RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
-PR_ARTIFACT=$(rapids-s3-path)cuda${RAPIDS_CUDA_MAJOR}_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json
 
 rapids-logger "Fetching latest available results from nightly"
 aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/cudf/" --query "sort_by(Contents[?ends_with(Key, '_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json')], &LastModified)[::].[Key]" --output text  | tee s3_output.txt
@@ -22,11 +20,12 @@ COMPARE_ENV=$(tail -n 1 s3_output.txt)
 rapids-logger "Latest available results from nightly: ${COMPARE_ENV}"
 
 aws s3 cp "s3://rapids-downloads/${COMPARE_ENV}" main-results.json
-aws s3 cp "$PR_ARTIFACT" pr-results.json
-
+# TODO: To be enabled in a follow-up PR.
+# MAIN_RUN_ID=$(gh run list -w "Pandas Test Job" -b branch-25.10 --status success --limit 7 --json databaseId --jq ".[0].databaseId")
+gh run download $GITHUB_RUN_ID -n pr-results.json
 # Compute the diff and prepare job summary:
 python -m pip install pandas tabulate
-python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY"
+python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json "${RAPIDS_FULL_VERSION}" | tee summary.txt >> "$GITHUB_STEP_SUMMARY"
 
 COMMENT=$(head -1 summary.txt | grep -oP '\d+/\d+ \(\d+\.\d+%\).*?(a decrease by|an increase by) \d+\.\d+%')
 echo "$COMMENT"
diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py
index af3e28f440f..60668280d7e 100644
--- a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py
+++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py
@@ -12,41 +12,85 @@ def get_total_and_passed(results):
     total_failed = 0
     total_errored = 0
     total_passed = 0
+    total_skipped = 0
+    total_xfailed_by_cudf_pandas = 0
+    total_skipped_by_cudf_pandas = 0
     for module_name, row in results.items():
         total_failed += row.get("failed", 0)
         total_errored += row.get("errored", 0)
         total_passed += row.get("passed", 0)
-    total_tests = total_failed + total_errored + total_passed
-    return total_tests, total_passed
+        total_skipped += row.get("skipped", 0)
+        total_xfailed_by_cudf_pandas += row.get("xfailed_by_cudf_pandas", 0)
+        total_skipped_by_cudf_pandas += row.get("skipped_by_cudf_pandas", 0)
+    total_tests = total_failed + total_errored + total_passed + total_skipped
+    return (
+        total_tests,
+        total_passed,
+        total_xfailed_by_cudf_pandas,
+        total_skipped_by_cudf_pandas,
+        total_skipped,
+    )
 
 
 main_json = sys.argv[1]
 pr_json = sys.argv[2]
+branch_version = sys.argv[3]
 
 # read the results of summarize-test-results.py --summary
 with open(main_json) as f:
     main_results = json.load(f)
-main_total, main_passed = get_total_and_passed(main_results)
+(
+    main_total,
+    main_passed,
+    main_xfailed_by_cudf_pandas,
+    main_skipped_by_cudf_pandas,
+    main_skipped,
+) = get_total_and_passed(main_results)
 
 with open(pr_json) as f:
     pr_results = json.load(f)
-pr_total, pr_passed = get_total_and_passed(pr_results)
+(
+    pr_total,
+    pr_passed,
+    pr_xfailed_by_cudf_pandas,
+    pr_skipped_by_cudf_pandas,
+    pr_skipped,
+) = get_total_and_passed(pr_results)
 
 passing_percentage = pr_passed / pr_total * 100
-pass_rate_change = abs(pr_passed - main_passed) / main_passed * 100
-rate_change_type = "a decrease" if pr_passed < main_passed else "an increase"
-
-comment = (
-    "Merging this PR would result in "
-    f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) "
-    "Pandas tests passing, "
-    f"{rate_change_type} by "
-    f"{pass_rate_change:.2f}%. "
-    f"Trunk stats: {main_passed}/{main_total}."
+
+
+metrics_df = pd.DataFrame(
+    {
+        "This PR": [
+            pr_total,
+            pr_passed,
+            pr_skipped_by_cudf_pandas,
+            pr_xfailed_by_cudf_pandas,
+            pr_skipped
+            - (pr_skipped_by_cudf_pandas + pr_xfailed_by_cudf_pandas),
+        ],
+        f"branch-{branch_version}": [
+            main_total,
+            main_passed,
+            main_skipped_by_cudf_pandas,
+            main_xfailed_by_cudf_pandas,
+            main_skipped
+            - (main_skipped_by_cudf_pandas + main_xfailed_by_cudf_pandas),
+        ],
+    },
+    index=[
+        "Total tests",
+        "Passed tests",
+        "cudf.Pandas Skipped",
+        "cudf.Pandas xFailed",
+        "pandas skipped",
+    ],
 )
 
 
 def emoji_passed(x):
+    """Format number with emoji: positive -> ✅, negative -> ❌"""
     if x > 0:
         return f"{x}✅"
     elif x < 0:
@@ -56,6 +100,7 @@ def emoji_passed(x):
 
 
 def emoji_failed(x):
+    """Format number with emoji: positive -> ❌, negative -> ✅ (inverse of emoji_passed)"""
     if x > 0:
         return f"{x}❌"
     elif x < 0:
@@ -67,6 +112,7 @@ def emoji_failed(x):
 # convert pr_results to a pandas DataFrame and then a markdown table
 pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
 main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
+# Calculate CPU and GPU usage percentages for main branch
 total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
 main_df["CPU Usage"] = (
     (main_df["_slow_function_call"] / total_usage) * 100.0
@@ -75,6 +121,7 @@ def emoji_failed(x):
     (main_df["_fast_function_call"] / total_usage) * 100.0
 ).round(1)
 
+# Calculate CPU and GPU usage percentages for PR
 total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
 pr_df["CPU Usage"] = (
     (pr_df["_slow_function_call"] / total_usage) * 100.0
@@ -83,17 +130,20 @@ def emoji_failed(x):
     (pr_df["_fast_function_call"] / total_usage) * 100.0
 ).round(1)
 
+# Calculate average usages
 cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
 gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)
-
-gpu_usage_rate_change = abs(
+gpu_usage_rate_change = (
     pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()
-)
+).round(2)
+
+# Handle NaN values
 pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
 pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
 main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
 main_df["GPU Usage"] = main_df["GPU Usage"].fillna(0)
 
+# Calculate differences between PR and main
 diff_df = pr_df - main_df
 diff_df["CPU Usage"] = diff_df["CPU Usage"].round(1).fillna(0)
 diff_df["GPU Usage"] = diff_df["GPU Usage"].round(1).fillna(0)
@@ -102,59 +152,51 @@ def emoji_failed(x):
 pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
 pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"
 
-pr_df = pr_df[
-    ["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
-]
-diff_df = diff_df[
-    ["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
-]
+# Select relevant columns
+pr_df = pr_df[["total", "CPU Usage", "GPU Usage"]]
+diff_df = diff_df[["total", "CPU Usage", "GPU Usage"]]
+
+# Rename diff columns to indicate they are differences
 diff_df.columns = diff_df.columns + "_diff"
-diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
-diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
-diff_df["skipped_diff"] = diff_df["skipped_diff"].map(emoji_failed)
 
+# Combine PR results with differences
 df = pd.concat([pr_df, diff_df], axis=1)
 df = df.rename_axis("Test module")
 
+# Rename columns for better readability
 df = df.rename(
     columns={
         "total": "Total tests",
-        "passed": "Passed tests",
-        "failed": "Failed tests",
-        "skipped": "Skipped tests",
         "total_diff": "Total delta",
-        "passed_diff": "Passed delta",
-        "failed_diff": "Failed delta",
-        "skipped_diff": "Skipped delta",
         "CPU Usage_diff": "CPU Usage delta",
         "GPU Usage_diff": "GPU Usage delta",
     }
 )
+
+# Sort by CPU usage delta and total tests
 df = df.sort_values(by=["CPU Usage delta", "Total tests"], ascending=False)
+
+# Apply emoji formatting to usage deltas
 df["CPU Usage delta"] = df["CPU Usage delta"].map(emoji_failed)
 df["GPU Usage delta"] = df["GPU Usage delta"].map(emoji_passed)
+
+# Select final columns to display
 df = df[
     [
         "Total tests",
         "CPU Usage delta",
         "GPU Usage delta",
-        "Passed tests",
-        "Failed tests",
-        "Skipped tests",
         "CPU Usage",
         "GPU Usage",
         "Total delta",
-        "Passed delta",
-        "Failed delta",
-        "Skipped delta",
     ]
 ]
-print(comment)
+# Print summary and results
+print(metrics_df.to_markdown())
 print()
 print(
-    f"Average GPU usage: {gpu_usage_mean}% {'an increase' if gpu_usage_rate_change > 0 else 'a decrease'} by {gpu_usage_rate_change}%"
+    f"Average GPU usage: {gpu_usage_mean}% ({gpu_usage_rate_change:+.2f}% change from trunk)"
 )
-print()
 print(f"Average CPU usage: {cpu_usage_mean}%")
 print()
 print("Here are the results of running the Pandas tests against this PR:")
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
index 25ce70da01f..715f7c71649 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/run.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -11,6 +11,9 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
+rapids-logger "Check GPU usage"
+nvidia-smi
+
 PANDAS_TESTS_BRANCH=${1}
 RAPIDS_FULL_VERSION=$(<./VERSION)
 rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids-version $RAPIDS_FULL_VERSION"
@@ -34,7 +37,7 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
 timeout 90m bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
-  --numprocesses 5 \
+  --numprocesses 6 \
   --tb=line \
   -vv \
   --disable-warnings \
@@ -51,5 +54,6 @@ RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
 mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
 mv pandas-testing/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"/
 rapids-upload-to-s3 "${RAPIDS_ARTIFACTS_DIR}"/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"
+mv "${RAPIDS_ARTIFACTS_DIR}"/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"/${PANDAS_TESTS_BRANCH}-results.json
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/python/cudf/cudf/pandas/scripts/summarize-test-results.py b/python/cudf/cudf/pandas/scripts/summarize-test-results.py
index 6a91f9b07c9..4493550d941 100644
--- a/python/cudf/cudf/pandas/scripts/summarize-test-results.py
+++ b/python/cudf/cudf/pandas/scripts/summarize-test-results.py
@@ -29,9 +29,11 @@ def get_per_module_results(log_file_name):
         for line in f:
             try:
                 line = json.loads(line)
+
             except Exception:
                 line = {}
             if "outcome" in line:
+                was_skipped_by_cudf_pandas = False
                 outcome = line["outcome"]
                 # outcome can be "passed", "failed", or "skipped".
                 # Depending on other fields, it can indicate
@@ -42,6 +44,16 @@ def get_per_module_results(log_file_name):
                         # if the test failed during setup or teardown,
                         # it counts as an "errored" test:
                         outcome = "errored"
+                    elif outcome == "skipped":
+                        longrepr = line.get("longrepr", [])
+                        if (
+                            longrepr is not None
+                            and "Skipped: XPASSes with cudf.pandas enabled."
+                            in longrepr
+                        ):
+                            was_skipped_by_cudf_pandas = True
+                        else:
+                            continue
                     else:
                         # we don't care about other outcomes during
                         # setup or teardown
@@ -50,6 +62,11 @@ def get_per_module_results(log_file_name):
                     if line.get("wasxfail", False) and outcome == "passed":
                         # it's an xpassed test
                         outcome = "failed"
+
+                was_xfailed_by_cudf_pandas = (
+                    line.get("wasxfail", "")
+                    == "Fails with cudf.pandas enabled."
+                )
                 module_name = (
                     line["nodeid"]
                     .split("::")[0]
@@ -58,8 +75,22 @@ def get_per_module_results(log_file_name):
                 per_module_results.setdefault(module_name, {})
                 per_module_results[module_name].setdefault("total", 0)
                 per_module_results[module_name].setdefault(outcome, 0)
+                per_module_results[module_name].setdefault(
+                    "xfailed_by_cudf_pandas", 0
+                )
+                per_module_results[module_name].setdefault(
+                    "skipped_by_cudf_pandas", 0
+                )
                 per_module_results[module_name]["total"] += 1
                 per_module_results[module_name][outcome] += 1
+                if was_xfailed_by_cudf_pandas:
+                    per_module_results[module_name][
+                        "xfailed_by_cudf_pandas"
+                    ] += 1
+                if was_skipped_by_cudf_pandas:
+                    per_module_results[module_name][
+                        "skipped_by_cudf_pandas"
+                    ] += 1
 
     directory = os.path.dirname(log_file_name)
     pattern = os.path.join(directory, "function_call_counts_worker_*.json")

From 9bf911cbfc7649aa8811cbc90d85cabfb2c4ca65 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 29 Aug 2025 16:38:25 -0700
Subject: [PATCH 236/366] Disallow loc.__setitem__ with list-like indexer when
 list elements not in index (#19851)

closes https://github.com/rapidsai/cudf/issues/19843

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19851
---
 python/cudf/cudf/core/dataframe.py            |  2 ++
 python/cudf/cudf/core/series.py               |  5 ++++-
 .../cudf/tests/dataframe/indexing/test_loc.py | 20 +++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 9833768a15a..4571a94cd03 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -387,6 +387,8 @@ def _setitem_tuple_arg(self, key, value):
                     for col in columns_df._column_names:
                         self._frame[col].loc[key[0]] = value
                 except KeyError:
+                    if not is_scalar(key[0]):
+                        raise
                     # TODO: There is a potential bug here if the inplace modifications
                     # done above fail half-way we are left with a partially modified
                     # frame. Need to handle this case better.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index eaffa06d96d..f2adf407d2c 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -379,7 +379,10 @@ def _loc_to_iloc(self, arg):
                     self._frame, Index._from_column(col)
                 )
                 if indices.null_count > 0:
-                    raise KeyError("label scalar is out of bound")
+                    missing = (
+                        indices[indices.isnull()].index.to_pandas().tolist()
+                    )
+                    raise KeyError(f"{missing} not in the index.")
                 return indices
 
 
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
index 82277af12c5..7821347cbe8 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import re
 
 import pandas as pd
 import pytest
@@ -64,6 +65,25 @@ def test_loc_setitem_extend_existing_12505():
     assert_eq(df, cdf)
 
 
+def test_loc_setitem_list_arg_missing_raises():
+    data = {"a": [0]}
+    gdf = cudf.DataFrame(data)
+    pdf = pd.DataFrame(data)
+
+    cudf_msg = re.escape("[1] not in the index.")
+    with pytest.raises(KeyError, match=cudf_msg):
+        gdf.loc[[1]] = 1
+
+    with pytest.raises(KeyError, match=cudf_msg):
+        gdf.loc[[1], "a"] = 1
+
+    with pytest.raises(KeyError):
+        pdf.loc[[1]] = 1
+
+    with pytest.raises(KeyError):
+        pdf.loc[[1], "a"] = 1
+
+
 @pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12801")
 def test_loc_setitem_add_column_partial_12801():
     df = pd.DataFrame({"a": [0, 1, 2]})

From b15d7aae5d5ea66e5ff427d5353531b976fad9e4 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Sat, 30 Aug 2025 12:04:03 -0500
Subject: [PATCH 237/366] Collect join-key information in cudf-polars (#19736)

Closes https://github.com/rapidsai/cudf/issues/19392

- [x] Blocked by https://github.com/rapidsai/cudf/pull/19752

This PR includes the following changes:
- Updates the `ColumnStats` class to use `ColumnStats.unique_count` instead of `ColumnStats.unique_stats`.
    - *Rational*: I spent a lot of time experimenting with follow-on work, and I found no value for the "fraction" component of `UniqueStats` outside of the data source information.
- Adds an `initialize_join_info` function (called within `collect_base_stats`) to collect basic join-key information.
- Adds a dedicated `JoinKey` and `JoinInfo` classes to track the necessary join information.
- Adds new `join_info: JoinInfo`  attribute to `StatsCollector`.
- Adds `find_equivalence_sets` and `apply_pkfk_heuristics` functions.
    - These functions are used to process the information in `StatsCollector.join_info` after the `collect_base_stats` traversal.
    - For now, these functions are only used for testing (`test_base_stats_join_key_info`). In a follow-up PR, these functions will be used in a *second* statistics traversal to set final row-count and unique-value statistics for each IR node.

**Note**:  The logic in this PR was adapted from @wence-'s ["doodle"](https://github.com/wence-/cudf/blob/65213d21a4917c4df6a23691acd9dc455c02027f/python/cudf_polars/cudf_polars/experimental/tablestats.py#L67).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19736
---
 .../cudf_polars/experimental/base.py          | 112 ++++++++++++--
 .../cudf_polars/experimental/statistics.py    | 144 +++++++++++++++++-
 python/cudf_polars/docs/overview.md           |  16 +-
 .../tests/experimental/test_stats.py          |  79 +++++++++-
 4 files changed, 334 insertions(+), 17 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py
index e4c0a43aa64..37a88263c38 100644
--- a/python/cudf_polars/cudf_polars/experimental/base.py
+++ b/python/cudf_polars/cudf_polars/experimental/base.py
@@ -5,10 +5,12 @@
 from __future__ import annotations
 
 import dataclasses
+from collections import defaultdict
+from functools import cached_property
 from typing import TYPE_CHECKING, Any, Generic, TypeVar
 
 if TYPE_CHECKING:
-    from collections.abc import Generator, Iterator
+    from collections.abc import Generator, Iterator, MutableMapping
 
     from cudf_polars.dsl.expr import NamedExpr
     from cudf_polars.dsl.ir import IR
@@ -72,7 +74,7 @@ class ColumnStat(Generic[T]):
 @dataclasses.dataclass
 class UniqueStats:
     """
-    Unique-value statistics.
+    Sampled unique-value statistics.
 
     Parameters
     ----------
@@ -82,6 +84,11 @@ class UniqueStats:
         Unique-value fraction. This corresponds to the total
         number of unique values (count) divided by the total
         number of rows.
+
+    Notes
+    -----
+    This class is used to track unique-value column statistics
+    that have been sampled from a data source.
     """
 
     count: ColumnStat[int] = dataclasses.field(default_factory=ColumnStat[int])
@@ -134,14 +141,22 @@ class ColumnSourceInfo:
     direct access to column-specific information.
     """
 
-    __slots__ = ("_allow_unique_sampling", "column_name", "table_source_info")
+    __slots__ = (
+        "_allow_unique_sampling",
+        "column_name",
+        "implied_unique_count",
+        "table_source_info",
+    )
     table_source_info: DataSourceInfo
     column_name: str
+    implied_unique_count: ColumnStat[int]
+    """Unique-value count implied by join heuristics."""
     _allow_unique_sampling: bool
 
     def __init__(self, table_source_info: DataSourceInfo, column_name: str) -> None:
         self.table_source_info = table_source_info
         self.column_name = column_name
+        self.implied_unique_count = ColumnStat[int](None)
         self._allow_unique_sampling = False
 
     @property
@@ -193,16 +208,16 @@ class ColumnStats:
         Child ColumnStats objects.
     source_info
         Column source information.
-    unique_stats
-        Unique-value statistics.
+    unique_count
+        Unique-value count.
     """
 
-    __slots__ = ("children", "name", "source_info", "unique_stats")
+    __slots__ = ("children", "name", "source_info", "unique_count")
 
     name: str
     children: tuple[ColumnStats, ...]
     source_info: ColumnSourceInfo
-    unique_stats: UniqueStats
+    unique_count: ColumnStat[int]
 
     def __init__(
         self,
@@ -210,12 +225,12 @@ def __init__(
         *,
         children: tuple[ColumnStats, ...] = (),
         source_info: ColumnSourceInfo | None = None,
-        unique_stats: UniqueStats | None = None,
+        unique_count: ColumnStat[int] | None = None,
     ) -> None:
         self.name = name
         self.children = children
         self.source_info = source_info or ColumnSourceInfo(DataSourceInfo(), name)
-        self.unique_stats = unique_stats or UniqueStats()
+        self.unique_count = unique_count or ColumnStat[int](None)
 
     def new_parent(
         self,
@@ -243,21 +258,94 @@ def new_parent(
             children=(self,),
             # Want to reference the same DataSourceInfo
             source_info=self.source_info,
-            # Want fresh UniqueStats so we can mutate in place
-            unique_stats=UniqueStats(),
         )
 
 
+class JoinKey:
+    """
+    Join-key information.
+
+    Parameters
+    ----------
+    column_stats
+        Column statistics for the join key.
+
+    Notes
+    -----
+    This class is used to track join-key information.
+    It is used to track the columns being joined on
+    and the estimated unique-value count for the join key.
+    """
+
+    column_stats: tuple[ColumnStats, ...]
+    implied_unique_count: int | None
+    """Estimated unique-value count from join heuristics."""
+
+    def __init__(self, *column_stats: ColumnStats) -> None:
+        self.column_stats = column_stats
+        self.implied_unique_count = None
+
+    @cached_property
+    def source_row_count(self) -> int | None:
+        """
+        Return the estimated row-count of the source columns.
+
+        Notes
+        -----
+        This is the maximum row-count estimate of the source columns.
+        """
+        return max(
+            (
+                cs.source_info.row_count.value
+                for cs in self.column_stats
+                if cs.source_info.row_count.value is not None
+            ),
+            default=None,
+        )
+
+
+class JoinInfo:
+    """
+    Join information.
+
+    Notes
+    -----
+    This class is used to track mappings between joined-on
+    columns and joined-on keys (groups of columns). We need
+    these mappings to calculate equivalence sets and make
+    join-based unique-count and row-count estimates.
+    """
+
+    __slots__ = ("column_map", "join_map", "key_map")
+
+    column_map: MutableMapping[ColumnStats, set[ColumnStats]]
+    """Mapping between joined columns."""
+    key_map: MutableMapping[JoinKey, set[JoinKey]]
+    """Mapping between joined keys (groups of columns)."""
+    join_map: dict[IR, list[JoinKey]]
+    """Mapping between IR nodes and associated join keys."""
+
+    def __init__(self) -> None:
+        self.column_map: MutableMapping[ColumnStats, set[ColumnStats]] = defaultdict(
+            set[ColumnStats]
+        )
+        self.key_map: MutableMapping[JoinKey, set[JoinKey]] = defaultdict(set[JoinKey])
+        self.join_map: dict[IR, list[JoinKey]] = {}
+
+
 class StatsCollector:
     """Column statistics collector."""
 
-    __slots__ = ("column_stats", "row_count")
+    __slots__ = ("column_stats", "join_info", "row_count")
 
     row_count: dict[IR, ColumnStat[int]]
     """Estimated row count for each IR node."""
     column_stats: dict[IR, dict[str, ColumnStats]]
     """Column statistics for each IR node."""
+    join_info: JoinInfo
+    """Join information."""
 
     def __init__(self) -> None:
         self.row_count: dict[IR, ColumnStat[int]] = {}
         self.column_stats: dict[IR, dict[str, ColumnStats]] = {}
+        self.join_info = JoinInfo()
diff --git a/python/cudf_polars/cudf_polars/experimental/statistics.py b/python/cudf_polars/cudf_polars/experimental/statistics.py
index 18588ac7e29..fcd6644a71a 100644
--- a/python/cudf_polars/cudf_polars/experimental/statistics.py
+++ b/python/cudf_polars/cudf_polars/experimental/statistics.py
@@ -6,7 +6,7 @@
 from __future__ import annotations
 
 import itertools
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, TypeVar
 
 from cudf_polars.dsl.ir import (
     IR,
@@ -20,14 +20,17 @@
 )
 from cudf_polars.dsl.traversal import post_traversal
 from cudf_polars.experimental.base import (
+    ColumnStat,
     ColumnStats,
+    JoinKey,
     StatsCollector,
 )
 from cudf_polars.experimental.dispatch import initialize_column_stats
 
 if TYPE_CHECKING:
-    from collections.abc import Sequence
+    from collections.abc import Mapping, Sequence
 
+    from cudf_polars.experimental.base import JoinInfo
     from cudf_polars.utils.config import ConfigOptions
 
 
@@ -45,13 +48,150 @@ def collect_base_stats(root: IR, config_options: ConfigOptions) -> StatsCollecto
     Returns
     -------
     A new StatsCollector object with populated datasource statistics.
+
+    Notes
+    -----
+    This function initializes the ``StatsCollector`` object
+    with the base datasource statistics. The goal is to build an
+    outline of the statistics that will be collected before any
+    real data is sampled.
     """
     stats: StatsCollector = StatsCollector()
     for node in post_traversal([root]):
+        # Initialize column statistics from datasource information
         stats.column_stats[node] = initialize_column_stats(node, stats, config_options)
+        # Initialize join information
+        if isinstance(node, Join):
+            initialize_join_info(node, stats)
     return stats
 
 
+def initialize_join_info(node: Join, stats: StatsCollector) -> None:
+    """
+    Initialize join information for the given node.
+
+    Parameters
+    ----------
+    node
+        Join node to initialize join-key information for.
+    stats
+        StatsCollector object to update.
+
+    Notes
+    -----
+    This function updates ``stats.join_info``.
+    """
+    left, right = node.children
+    join_info = stats.join_info
+    right_keys = [stats.column_stats[right][n.name] for n in node.right_on]
+    left_keys = [stats.column_stats[left][n.name] for n in node.left_on]
+    lkey = JoinKey(*right_keys)
+    rkey = JoinKey(*left_keys)
+    join_info.key_map[lkey].add(rkey)
+    join_info.key_map[rkey].add(lkey)
+    join_info.join_map[node] = [lkey, rkey]
+    for u, v in zip(left_keys, right_keys, strict=True):
+        join_info.column_map[u].add(v)
+        join_info.column_map[v].add(u)
+
+
+T = TypeVar("T")
+
+
+def find_equivalence_sets(join_map: Mapping[T, set[T]]) -> list[set[T]]:
+    """
+    Find equivalence sets in a join-key mapping.
+
+    Parameters
+    ----------
+    join_map
+        Joined key or column mapping to find equivalence sets in.
+
+    Returns
+    -------
+    List of equivalence sets.
+
+    Notes
+    -----
+    This function is used by ``apply_pkfk_heuristics``.
+    """
+    seen = set()
+    components = []
+    for v in join_map:
+        if v not in seen:
+            cluster = {v}
+            stack = [v]
+            while stack:
+                node = stack.pop()
+                for n in join_map[node]:
+                    if n not in cluster:
+                        cluster.add(n)
+                        stack.append(n)
+            components.append(cluster)
+            seen.update(cluster)
+    return components
+
+
+def apply_pkfk_heuristics(join_info: JoinInfo) -> None:
+    """
+    Apply PK-FK unique-count heuristics to join keys.
+
+    Parameters
+    ----------
+    join_info
+        Join information to apply PK-FK heuristics to.
+
+    Notes
+    -----
+    This function modifies the ``JoinKey`` objects being tracked
+    in ``StatsCollector.join_info`` using PK-FK heuristics to
+    estimate the "implied" unique-value count. This function also
+    modifies the inderlying ``ColumnStats`` objects included in
+    a join key.
+    """
+    # This applies the PK-FK matching scheme of
+    # https://blobs.duckdb.org/papers/tom-ebergen-msc-thesis-join-order-optimization-with-almost-no-statistics.pdf
+    # See section 3.2
+    for keys in find_equivalence_sets(join_info.key_map):
+        implied_unique_count = max(
+            (
+                c.implied_unique_count
+                for c in keys
+                if c.implied_unique_count is not None
+            ),
+            # Default unique-count estimate is the minimum source row count
+            default=min(
+                (c.source_row_count for c in keys if c.source_row_count is not None),
+                default=None,
+            ),
+        )
+        for key in keys:
+            # Update unique-count estimate for each join key
+            key.implied_unique_count = implied_unique_count
+
+    # We separately apply PK-FK heuristics to individual columns so
+    # that we can update ColumnStats.source_info.implied_unique_count
+    # and use the per-column information elsewhere in the query plan.
+    for cols in find_equivalence_sets(join_info.column_map):
+        unique_count = max(
+            (
+                cs.source_info.implied_unique_count.value
+                for cs in cols
+                if cs.source_info.implied_unique_count.value is not None
+            ),
+            default=min(
+                (
+                    cs.source_info.row_count.value
+                    for cs in cols
+                    if cs.source_info.row_count.value is not None
+                ),
+                default=None,
+            ),
+        )
+        for cs in cols:
+            cs.source_info.implied_unique_count = ColumnStat[int](unique_count)
+
+
 def _update_unique_stats_columns(
     child_column_stats: dict[str, ColumnStats],
     key_names: Sequence[str],
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index 34e66e62436..8cf510416e4 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -421,10 +421,21 @@ datasource (e.g. a Parquet dataset or in-memory `DataFrame`).
 Since `DataSourceInfo` tracks information for an entire table, we use
 `ColumnSourceInfo` to provide a single-column view of the object.
 - `ColumnStats`: This class is used to group together the "base"
-`ColumnSourceInfo` reference and the local `UniqueStats` estimates
+`ColumnSourceInfo` reference and the local unique-count estimate
 for a specific IR + column combination. We bundle these references
 together to simplify the design and maintenance of `StatsCollector`.
-**NOTE:** The current `UniqueStats` estimates are not yet populated.
+**NOTE:** The local unique-count estimate is not yet populated.
+- `JoinKey`: This class is used to define a set of columns being
+joined on and the estimated unique-value count of the key.
+- `JoinInfo`: This class is used to define the necessary data
+structures for applying join heuristics to our query plan.
+Each object contains the following attributes:
+  - `JoinInfo.key_map`: Returns a mapping between distinct
+  `JoinKey` objects that are joined on in the query plan.
+  - `JoinInfo.col_map`: Returns a mapping between distinct
+  `ColumnStats` objects that are joined on in the query plan.
+  - `JoinInfo.join_map`: Returns a mapping between each IR node
+  and the associated `JoinKey` objects.
 - `StatsCollector`: This class is used to collect and store
 statistics for all IR nodes within a single query. The statistics
 attached to each IR node refer to the **output** columns of the
@@ -436,6 +447,7 @@ Each object has two important attributes:
   **NOTE:** This attribute is not yet populated.
   - `StatsCollector.column_stats`: Returns a mapping between each IR
   node and the `dict[str, ColumnStats]` mapping for that node.
+  - `StatsCollector.join_info`: Returns a `JoinInfo` object.
 
 ## Collecting and using statistics
 
diff --git a/python/cudf_polars/tests/experimental/test_stats.py b/python/cudf_polars/tests/experimental/test_stats.py
index f139d344eaa..dfc1d5ca065 100644
--- a/python/cudf_polars/tests/experimental/test_stats.py
+++ b/python/cudf_polars/tests/experimental/test_stats.py
@@ -11,7 +11,11 @@
 
 from cudf_polars import Translator
 from cudf_polars.experimental.io import _clear_source_info_cache
-from cudf_polars.experimental.statistics import collect_base_stats
+from cudf_polars.experimental.statistics import (
+    apply_pkfk_heuristics,
+    collect_base_stats,
+    find_equivalence_sets,
+)
 from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
 from cudf_polars.testing.io import make_partitioned_source
 from cudf_polars.utils.config import ConfigOptions
@@ -368,3 +372,76 @@ def test_base_stats_distinct(df):
     source_info_y = column_stats["y"].source_info
     assert source_info_y.row_count.value == row_count
     assert source_info_y.row_count.exact
+
+
+def test_base_stats_join_key_info():
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+        },
+    )
+
+    # Customers table (PK: cust_id)
+    customers = pl.LazyFrame(
+        {
+            "cust_id": [1, 2],
+            "cust_name": ["Alice", "Bob"],
+        }
+    )
+
+    # Orders table (PK: order_id)
+    orders = pl.LazyFrame(
+        {
+            "order_id": [100, 101, 102],
+            "cust_id": [1, 2, 1],
+            "prod_id": [10, 20, 10],
+            "loc_id": [501, 501, 502],
+            "quant": [2, 1, 4],
+        }
+    )
+
+    # locations table (PK: prod_id, loc_id)
+    locations = pl.LazyFrame(
+        {
+            "prod_id": [10, 20, 10],
+            "loc_id": [501, 501, 502],
+            "price": [50, 60, 55],
+        }
+    )
+
+    # Step 1: Multi-key join orders and locations on prod_id & loc_id
+    orders_with_price = orders.join(locations, on=["prod_id", "loc_id"], how="inner")
+
+    # Step 2: Join result to customers on cust_id
+    q = orders_with_price.join(customers, on="cust_id", how="inner")
+
+    ir = Translator(q._ldf.visit(), engine).translate_ir()
+    config_options = ConfigOptions.from_polars_engine(engine)
+    stats = collect_base_stats(ir, config_options)
+    join_info = stats.join_info
+
+    # Check equivalence sets
+    key_sets = sorted(
+        sorted(tuple(cs.name for cs in k.column_stats) for k in group)
+        for group in find_equivalence_sets(join_info.key_map)
+    )
+    assert len(key_sets) == 2
+    assert key_sets[0] == [("cust_id",), ("cust_id",)]
+    assert key_sets[1] == [("prod_id", "loc_id"), ("prod_id", "loc_id")]
+
+    # Check basic PK-FK unique-count heuristics
+    apply_pkfk_heuristics(join_info)
+    implied_unique_count = join_info.join_map[ir][0].implied_unique_count
+    assert implied_unique_count == join_info.join_map[ir][1].implied_unique_count
+    assert (
+        q.select(pl.col("cust_id").n_unique()).collect().item() == implied_unique_count
+    )
+    assert (
+        # Calling apply_pkfk_heuristics should update the implied_unique_count
+        # estimate on the associated ColumnSourceInfo as well
+        stats.column_stats[ir]["cust_id"].source_info.implied_unique_count.value
+        == implied_unique_count
+    )

From 3a5582a9b4c44424b341d95803020f438359c87a Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Tue, 2 Sep 2025 19:50:16 +0200
Subject: [PATCH 238/366] Prevent installation of pytest-rerunfailures 16.0.0
 (#19863)

The `pytest-rerunfailures=16.0.0` package had a breaking change (see https://github.com/pytest-dev/pytest-rerunfailures/issues/302) and thus in https://github.com/rapidsai/cudf/pull/19846 we pinned to prevent installation of version 16 and higher. Since then a new build `16.0.1` is out that reverts the bad change, and although some of the bad builds were yanked, the PyPI build is still not yanked, so this change prevents installing only the bad release.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19863
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 2 +-
 conda/environments/all_cuda-130_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-130_arch-x86_64.yaml  | 2 +-
 dependencies.yaml                                 | 2 +-
 python/cudf/pyproject.toml                        | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 8227b760b36..8fccfe513f8 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -75,7 +75,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures<16.0
+- pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index dd2aa0a6e71..bad1763d906 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures<16.0
+- pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index f550d88d4b3..322fefc07da 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -75,7 +75,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures<16.0
+- pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index d6103d5e73b..3e68fbf4d07 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
-- pytest-rerunfailures<16.0
+- pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
 - python-xxhash
diff --git a/dependencies.yaml b/dependencies.yaml
index 3dc704cebf4..55c9f7cc446 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -1165,7 +1165,7 @@ dependencies:
           - nbformat
           - openpyxl
           # https://github.com/pytest-dev/pytest-rerunfailures/issues/302
-          - pytest-rerunfailures<16.0
+          - pytest-rerunfailures!=16.0.0
   depends_on_dask_cuda:
     common:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 50c6fd782e8..69c889c0cc8 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -76,7 +76,7 @@ cudf-pandas-tests = [
     "nbconvert",
     "nbformat",
     "openpyxl",
-    "pytest-rerunfailures<16.0",
+    "pytest-rerunfailures!=16.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]

From a377d23897b1f07d49ce795b969c1625f80f62e7 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 2 Sep 2025 15:22:49 -0400
Subject: [PATCH 239/366] Benchmarks comparing Arrow string formats (#19552)

Creates nvbench benchmarks comparing theoretical ArrowStringView implementation with the Arrow string format currently used in libcudf for select operations like hash, starts-with, sort.
The ArrowStringView device vector is created manually from a generated libcudf STRING column.
The functions are compared by using thrust/cub functions where custom functors wrap individual rows with a `cudf::string_view`.
This way the detailed instructions are the same and only the data layout is different.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19552
---
 cpp/benchmarks/CMakeLists.txt                 |   5 +
 .../string/experimental/stringview_compare.cu | 500 ++++++++++++++++++
 2 files changed, 505 insertions(+)
 create mode 100644 cpp/benchmarks/string/experimental/stringview_compare.cu

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index a6d070ce2d3..531a0c92c7b 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -427,6 +427,11 @@ ConfigureNVBench(
   string/url_decode.cu
 )
 
+# ##################################################################################################
+# * strings experimental benchmark ---------------------------------------------------
+ConfigureNVBench(STRINGS_EXPERIMENTAL_NVBENCH string/experimental/stringview_compare.cu)
+target_link_libraries(STRINGS_EXPERIMENTAL_NVBENCH PRIVATE nanoarrow)
+
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
 ConfigureNVBench(JSON_NVBENCH json/json.cu)
diff --git a/cpp/benchmarks/string/experimental/stringview_compare.cu b/cpp/benchmarks/string/experimental/stringview_compare.cu
new file mode 100644
index 00000000000..f012e647dac
--- /dev/null
+++ b/cpp/benchmarks/string/experimental/stringview_compare.cu
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/debug_utilities.hpp>
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
+#include <cudf/strings/detail/strings_column_factories.cuh>
+
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <cub/device/device_merge_sort.cuh>
+#include <thrust/count.h>
+#include <thrust/for_each.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nvbench/nvbench.cuh>
+
+#include <cstdlib>
+#include <format>
+#include <string>
+
+namespace {
+
+// Runtime switch to use ArrowStringView instead of cudf's Arrow string format.
+//
+// Set to anything to use ArrowStringView, and unset to use cudf.
+// Example command line to generate ArrowStringView numbers for sv_hash benchmark:
+//   CUDF_BM_ARROWSTRINGVIEW=1 benchmarks/STRINGS_EXPERIMENTAL_NVBENCH -d 0 -b sv_hash
+//
+// This will generate nvbench benchmark outputs that can be compared directly
+// using the `nvbench compare.py` script.
+auto const BM_ARROWSTRINGVIEW = "CUDF_BM_ARROWSTRINGVIEW";
+
+/**
+ * Creates ArrowBinaryView objects from a strings column.
+ */
+struct strings_to_binary_view {
+  cudf::column_device_view d_strings;
+  cudf::detail::input_offsetalator d_offsets;
+  ArrowBinaryView* d_items;  // output
+
+  __device__ void operator()(cudf::size_type idx) const
+  {
+    auto& item = d_items[idx];
+    if (d_strings.is_null(idx)) {
+      item.inlined.size = 0;  // not used in this benchmark
+      return;
+    }
+
+    auto const d_str  = d_strings.element<cudf::string_view>(idx);
+    item.inlined.size = d_str.size_bytes();
+    // copy the string data to the inlined buffer if it fits
+    if (d_str.size_bytes() <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+      thrust::copy(thrust::seq, d_str.data(), d_str.data() + d_str.size_bytes(), item.inlined.data);
+      thrust::uninitialized_fill(thrust::seq,
+                                 item.inlined.data + item.inlined.size,
+                                 item.inlined.data + NANOARROW_BINARY_VIEW_INLINE_SIZE,
+                                 0);
+    } else {
+      // otherwise, copy the prefix and set the offset to the data buffer
+      thrust::copy(thrust::seq,
+                   d_str.data(),
+                   d_str.data() + NANOARROW_BINARY_VIEW_PREFIX_SIZE,
+                   item.ref.prefix);
+      auto const offset     = d_offsets[idx];
+      item.ref.buffer_index = 0;  // only one buffer in this benchmark
+      item.ref.offset       = static_cast<int32_t>(offset);
+    }
+  }
+};
+
+/**
+ * Returns a string_view from an ArrowBinaryView.
+ * This helps in the comparison by both implementations using `cudf::string_view`
+ * as the base type so the actual operations are the same and only the
+ * format (how the data is organized) is different.
+ */
+__device__ cudf::string_view get_string_view(ArrowBinaryView const& item, char const* d_chars)
+{
+  auto const data = item.inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE
+                      ? reinterpret_cast<char const*>(item.inlined.data)
+                      : d_chars + item.ref.offset;
+  return cudf::string_view(data, item.inlined.size);
+}
+
+/**
+ * Hashes a string from an ArrowBinaryView.
+ */
+struct hash_arrow_sv {
+  ArrowBinaryView* d_items;
+  char const* d_chars;
+  __device__ cudf::hash_value_type operator()(cudf::size_type idx) const
+  {
+    auto& item        = d_items[idx];
+    auto const d_str  = get_string_view(item, d_chars);
+    auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{0};
+    return hasher(d_str);
+  }
+};
+
+/**
+ * Checks if a string from an ArrowBinaryView starts with a target string.
+ */
+struct starts_arrow_sv {
+  ArrowBinaryView* d_items;
+  char const* d_chars;
+  cudf::size_type tgt_size;
+  __device__ bool operator()(cudf::size_type idx) const
+  {
+    // note that this requires tgt_size <= 26
+    auto const d_tgt = cudf::string_view("abcdefghijklmnopqrstuvwxyz", tgt_size);
+    auto& item       = d_items[idx];
+    auto const size  = item.inlined.size;
+    auto const data  = (size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) || (tgt_size <= 4)
+                         ? reinterpret_cast<char const*>(item.inlined.data)
+                         : d_chars + item.ref.offset;
+    auto const d_str = cudf::string_view(data, size);
+    return d_str.size_bytes() >= d_tgt.size_bytes() &&
+           d_tgt.compare(d_str.data(), d_tgt.size_bytes()) == 0;
+  }
+};
+
+/**
+ * Compares two strings from ArrowBinaryView objects.
+ */
+struct compare_arrow_sv {
+  ArrowBinaryView* d_items;
+  char const* d_chars;
+  __device__ bool operator()(cudf::size_type lhs, cudf::size_type rhs)
+  {
+    auto& item_lhs = d_items[lhs];
+    auto& item_rhs = d_items[rhs];
+
+    // shortcut to check preview bytes
+    auto pv_lhs = reinterpret_cast<uint32_t const*>(item_lhs.inlined.data)[0];
+    auto pv_rhs = reinterpret_cast<uint32_t const*>(item_rhs.inlined.data)[0];
+    if (pv_lhs != pv_rhs) {
+      return cudf::hashing::detail::swap_endian(pv_lhs) <
+             cudf::hashing::detail::swap_endian(pv_rhs);
+    }
+
+    // prefix matches so check how many bytes are left to compare
+    constexpr auto prefix_size = static_cast<cudf::size_type>(sizeof(uint32_t));
+    auto const size_lhs        = item_lhs.inlined.size;
+    auto const size_rhs        = item_rhs.inlined.size;
+    // if no bytes left to compare, we are done (strings are equal)
+    if (size_lhs <= prefix_size && size_rhs <= prefix_size) { return false; }
+
+    // compare the remaining bytes
+    auto const d_str_lhs = cudf::string_view(
+      get_string_view(item_lhs, d_chars).data() + prefix_size, size_lhs - prefix_size);
+    auto const d_str_rhs = cudf::string_view(
+      get_string_view(item_rhs, d_chars).data() + prefix_size, size_rhs - prefix_size);
+
+    return d_str_lhs < d_str_rhs;
+  }
+};
+
+/**
+ * Hashes a string from a cudf column
+ */
+struct hash_sv {
+  cudf::column_device_view d_strings;
+  __device__ cudf::hash_value_type operator()(cudf::size_type idx) const
+  {
+    auto const d_str  = d_strings.element<cudf::string_view>(idx);
+    auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{0};
+    return hasher(d_str);
+  }
+};
+
+/**
+ * Checks if a string from a cudf column starts with a target string
+ */
+struct starts_sv {
+  cudf::column_device_view d_strings;
+  cudf::size_type tgt_size;
+  __device__ bool operator()(cudf::size_type idx) const
+  {
+    auto const d_str = d_strings.element<cudf::string_view>(idx);
+    auto const d_tgt = cudf::string_view("abcdefghijklmnopqrstuvwxyz", tgt_size);
+    return d_str.size_bytes() >= d_tgt.size_bytes() &&
+           d_tgt.compare(d_str.data(), d_tgt.size_bytes()) == 0;
+  }
+};
+
+/**
+ * Compares two strings from a cudf column
+ */
+struct compare_sv {
+  cudf::column_device_view d_strings;
+  __device__ bool operator()(cudf::size_type lhs, cudf::size_type rhs)
+  {
+    auto const d_str_lhs = d_strings.element<cudf::string_view>(lhs);
+    auto const d_str_rhs = d_strings.element<cudf::string_view>(rhs);
+    return d_str_lhs < d_str_rhs;
+  }
+};
+
+/**
+ * Creates an ArrowBinaryView vector and data buffer from a strings column.
+ */
+std::pair<rmm::device_uvector<ArrowBinaryView>, rmm::device_buffer> create_sv_array(
+  cudf::strings_column_view const& input, rmm::cuda_stream_view stream)
+{
+  auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
+  auto d_offsets =
+    cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());
+
+  // count the (longer) strings that will need to be stored in the data buffer
+  auto const num_longer_strings = thrust::count_if(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_counting_iterator<cudf::size_type>(0),
+    thrust::make_counting_iterator<cudf::size_type>(input.size()),
+    [d_offsets] __device__(auto idx) {
+      return d_offsets[idx + 1] - d_offsets[idx] > NANOARROW_BINARY_VIEW_INLINE_SIZE;
+    });
+
+  // gather all the long-ish strings into a single strings column
+  auto [unused_col, longer_strings] = [&] {
+    if (num_longer_strings == input.size()) {
+      // we can use the input column as is for the remainder of this function
+      return std::pair{cudf::make_empty_column(cudf::type_id::STRING), input};
+    }
+    auto indices = cudf::detail::make_counting_transform_iterator(
+      0,
+      cuda::proclaim_return_type<cudf::strings::detail::string_index_pair>(
+        [d_strings = *d_strings] __device__(auto idx) {
+          if (d_strings.is_null(idx)) {
+            return cudf::strings::detail::string_index_pair{nullptr, 0};
+          }
+          auto const d_str = d_strings.element<cudf::string_view>(idx);
+          return (d_str.size_bytes() > NANOARROW_BINARY_VIEW_INLINE_SIZE)
+                   ? cudf::strings::detail::string_index_pair{d_str.data(), d_str.size_bytes()}
+                   : cudf::strings::detail::string_index_pair{"", 0};
+        }));
+    auto longer_strings = cudf::strings::detail::make_strings_column(
+      indices, indices + input.size(), stream, cudf::get_current_device_resource_ref());
+    stream.synchronize();
+    auto const sv = cudf::strings_column_view(longer_strings->view());
+    return std::pair{std::move(longer_strings), sv};
+  }();
+  auto [first, last] = cudf::strings::detail::get_first_and_last_offset(longer_strings, stream);
+  auto const longer_chars_size = last - first;
+
+  // Make sure only one buffer is needed.
+  // Using a single data buffer makes the two formats more similar focusing on the layout.
+  constexpr int64_t max_size = std::numeric_limits<int32_t>::max() / 2;
+  auto const num_buffers     = cudf::util::div_rounding_up_safe(longer_chars_size, max_size);
+  CUDF_EXPECTS(num_buffers <= 1, "num_buffers must be <= 1");
+
+  // now build BinaryView objects from the strings in device memory
+  // (for-each works better than transform due to the prefix/data of the ArrowBinaryView)
+  auto d_items = rmm::device_uvector<ArrowBinaryView>(input.size(), stream);
+  thrust::for_each_n(rmm::exec_policy_nosync(stream),
+                     thrust::counting_iterator<cudf::size_type>(0),
+                     input.size(),
+                     strings_to_binary_view{*d_strings, d_offsets, d_items.data()});
+
+  rmm::device_buffer data_buffer(longer_chars_size, stream);
+  auto const chars_data = longer_strings.chars_begin(stream);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    data_buffer.data(), chars_data, longer_chars_size, cudaMemcpyDefault, stream.value()));
+
+  return std::pair{std::move(d_items), std::move(data_buffer)};
+}
+}  // namespace
+
+static void BM_sv_hash(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
+  auto const min_width = state.get_int64("fw") ? max_width : 1;  // fw = fixed width
+
+  data_profile const profile =
+    data_profile_builder()
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
+      .no_validity();
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
+  auto col_view     = column->view();
+  auto stream       = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_global_memory_writes(num_rows * sizeof(cudf::hash_value_type));
+  auto output = rmm::device_uvector<cudf::hash_value_type>(num_rows, stream);
+  auto begin  = thrust::make_counting_iterator<cudf::size_type>(0);
+  auto end    = thrust::make_counting_iterator<cudf::size_type>(num_rows);
+
+  if (std::getenv(BM_ARROWSTRINGVIEW)) {
+    auto [d_items, data_buffer] = create_sv_array(col_view, stream);
+    auto const d_chars          = reinterpret_cast<char const*>(data_buffer.data());
+    state.add_global_memory_reads(num_rows * sizeof(ArrowBinaryView) + data_buffer.size());
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      thrust::transform(rmm::exec_policy(stream),
+                        begin,
+                        end,
+                        output.begin(),
+                        hash_arrow_sv{d_items.data(), d_chars});
+    });
+  } else {
+    auto d_strings = cudf::column_device_view::create(col_view, stream);
+    auto col_size  = column->alloc_size();
+    state.add_global_memory_reads(col_size);
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      thrust::transform(rmm::exec_policy(stream), begin, end, output.begin(), hash_sv{*d_strings});
+    });
+  }
+}
+
+static void BM_sv_starts(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
+  auto const min_width = state.get_int64("fw") ? max_width : 1;
+  auto const tgt_size  = static_cast<cudf::size_type>(state.get_int64("tgt_size"));
+
+  data_profile const profile =
+    data_profile_builder()
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
+      .no_validity();
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
+  auto col_view     = column->view();
+  auto stream       = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_global_memory_writes(num_rows * sizeof(bool));
+  auto output = rmm::device_uvector<bool>(num_rows, stream);
+  auto begin  = thrust::make_counting_iterator<cudf::size_type>(0);
+  auto end    = thrust::make_counting_iterator<cudf::size_type>(num_rows);
+
+  if (std::getenv(BM_ARROWSTRINGVIEW)) {
+    auto [d_items, data_buffer] = create_sv_array(col_view, stream);
+    auto const d_chars          = reinterpret_cast<char const*>(data_buffer.data());
+    state.add_global_memory_reads(num_rows * sizeof(ArrowBinaryView) + data_buffer.size());
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      thrust::transform(rmm::exec_policy(stream),
+                        begin,
+                        end,
+                        output.begin(),
+                        starts_arrow_sv{d_items.data(), d_chars, tgt_size});
+    });
+  } else {
+    auto d_strings = cudf::column_device_view::create(col_view, stream);
+    auto col_size  = column->alloc_size();
+    state.add_global_memory_reads(col_size);
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      thrust::transform(
+        rmm::exec_policy(stream), begin, end, output.begin(), starts_sv{*d_strings, tgt_size});
+    });
+  }
+}
+
+static void BM_sv_sort(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
+  auto const card      = static_cast<cudf::size_type>(state.get_int64("card"));
+
+  auto h_data = std::vector<std::string>(card);
+  std::transform(thrust::counting_iterator<cudf::size_type>(0),
+                 thrust::counting_iterator<cudf::size_type>(card),
+                 h_data.begin(),
+                 [max_width](auto idx) {
+                   auto const fmt = std::format("{{:0{}d}}", max_width);
+                   return std::vformat(fmt, std::make_format_args(idx));
+                 });
+  auto d_data = cudf::test::strings_column_wrapper(h_data.begin(), h_data.end()).release();
+
+  data_profile gather_profile = data_profile_builder().cardinality(0).no_validity().distribution(
+    cudf::type_id::INT32, distribution_id::UNIFORM, 0, d_data->size() - 1);
+  auto gather_map = create_random_column(cudf::type_id::INT32, row_count{num_rows}, gather_profile);
+
+  auto table  = cudf::gather(cudf::table_view({d_data->view()}), gather_map->view());
+  auto column = std::move(table->release().front());
+
+  auto col_view = column->view();
+  auto stream   = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_global_memory_writes(num_rows * sizeof(cudf::size_type));
+
+  // indices are the keys that are sorted (not inplace)
+  auto keys      = rmm::device_uvector<cudf::size_type>(num_rows, stream);
+  auto in_keys   = thrust::make_counting_iterator<cudf::size_type>(0);
+  auto out_keys  = keys.begin();
+  auto tmp_bytes = std::size_t{0};
+
+  if (std::getenv(BM_ARROWSTRINGVIEW)) {
+    auto [d_items, data_buffer] = create_sv_array(col_view, stream);
+    auto const d_chars          = reinterpret_cast<char const*>(data_buffer.data());
+    auto comparator             = compare_arrow_sv{d_items.data(), d_chars};
+    cub::DeviceMergeSort::SortKeysCopy(
+      nullptr, tmp_bytes, in_keys, out_keys, num_rows, comparator, stream.value());
+    auto tmp_stg = rmm::device_buffer(tmp_bytes, stream);
+    state.add_global_memory_reads(num_rows * sizeof(ArrowBinaryView) + data_buffer.size());
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      cub::DeviceMergeSort::SortKeysCopy(
+        tmp_stg.data(), tmp_bytes, in_keys, out_keys, num_rows, comparator, stream.value());
+    });
+  } else {
+    auto d_strings = cudf::column_device_view::create(col_view, stream);
+    auto col_size  = column->alloc_size();
+    state.add_global_memory_reads(col_size);
+    auto comparator = compare_sv{*d_strings};
+    cub::DeviceMergeSort::SortKeysCopy(
+      nullptr, tmp_bytes, in_keys, out_keys, num_rows, comparator, stream.value());
+    auto tmp_stg = rmm::device_buffer(tmp_bytes, stream);
+    state.add_global_memory_reads(col_size);
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      cub::DeviceMergeSort::SortKeysCopy(
+        tmp_stg.data(), tmp_bytes, in_keys, out_keys, num_rows, comparator, stream.value());
+    });
+  }
+}
+
+static void BM_sv_gather(nvbench::state& state)
+{
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const width    = static_cast<cudf::size_type>(state.get_int64("width"));
+  auto const map_rows = static_cast<cudf::size_type>(state.get_int64("map_rows"));
+
+  data_profile profile = data_profile_builder().no_validity().distribution(
+    cudf::type_id::STRING, distribution_id::NORMAL, width, width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
+  auto col_view     = column->view();
+
+  data_profile map_profile = data_profile_builder().cardinality(0).no_validity().distribution(
+    cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_rows - 1);
+  auto map      = create_random_column(cudf::type_id::INT32, row_count{map_rows}, map_profile);
+  auto map_view = map->view();
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (std::getenv(BM_ARROWSTRINGVIEW)) {
+    auto [d_items, data_buffer] = create_sv_array(col_view, stream);
+
+    auto begin  = map_view.begin<int32_t>();
+    auto end    = map_view.end<int32_t>();
+    auto input  = d_items.data();
+    auto output = rmm::device_uvector<ArrowBinaryView>(map_view.size(), stream);
+
+    state.add_global_memory_writes(map_rows * sizeof(ArrowBinaryView));
+    state.add_global_memory_reads(map_rows * sizeof(ArrowBinaryView));
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      thrust::gather(rmm::exec_policy(stream), begin, end, input, output.begin());
+    });
+  } else {
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      cudf::gather(
+        cudf::table_view({col_view}), map_view, cudf::out_of_bounds_policy::DONT_CHECK, stream);
+    });
+  }
+}
+
+NVBENCH_BENCH(BM_sv_hash)
+  .set_name("sv_hash")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("max_width", {5, 10, 15, 20, 30, 60})
+  .add_int64_axis("fw", {1, 0});
+
+NVBENCH_BENCH(BM_sv_starts)
+  .set_name("sv_starts")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("max_width", {10, 20, 30, 60})
+  .add_int64_axis("tgt_size", {4, 8, 16})
+  .add_int64_axis("fw", {1, 0});
+
+NVBENCH_BENCH(BM_sv_sort)
+  .set_name("sv_sort")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("max_width", {10, 20, 30, 60})
+  .add_int64_axis("card", {100, 1000});
+
+NVBENCH_BENCH(BM_sv_gather)
+  .set_name("sv_gather")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("width", {6, 12, 24, 48})
+  .add_int64_axis("map_rows", {10'000, 100'000});

From 3de49dfb97c305c878593a0bda12b835f04abc16 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 2 Sep 2025 12:26:09 -0700
Subject: [PATCH 240/366] Add streams to strings convert APIs (#19780)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19780
---
 .../strings/convert/convert_booleans.pxd      | 10 ++-
 .../strings/convert/convert_datetime.pxd      | 13 ++--
 .../strings/convert/convert_durations.pxd     | 10 ++-
 .../strings/convert/convert_fixed_point.pxd   | 13 ++--
 .../strings/convert/convert_floats.pxd        | 13 ++--
 .../strings/convert/convert_integers.pxd      | 25 ++++---
 .../libcudf/strings/convert/convert_ipv4.pxd  | 13 ++--
 .../libcudf/strings/convert/convert_lists.pxd |  7 +-
 .../libcudf/strings/convert/convert_urls.pxd  | 10 ++-
 .../strings/convert/convert_booleans.pyi      | 13 +++-
 .../strings/convert/convert_booleans.pyx      | 26 ++++++--
 .../strings/convert/convert_datetime.pyi      | 18 +++--
 .../strings/convert/convert_datetime.pyx      | 40 +++++++++---
 .../strings/convert/convert_durations.pxd     |  9 ++-
 .../strings/convert/convert_durations.pyi     | 15 ++++-
 .../strings/convert/convert_durations.pyx     | 28 ++++++--
 .../strings/convert/convert_fixed_point.pxd   |  9 +--
 .../strings/convert/convert_fixed_point.pyi   | 16 +++--
 .../strings/convert/convert_fixed_point.pyx   | 35 +++++++---
 .../strings/convert/convert_floats.pyi        | 12 ++--
 .../strings/convert/convert_floats.pyx        | 34 +++++++---
 .../strings/convert/convert_integers.pxd      | 15 +++--
 .../strings/convert/convert_integers.pyi      | 26 ++++++--
 .../strings/convert/convert_integers.pyx      | 65 ++++++++++++++-----
 .../strings/convert/convert_ipv4.pyi          | 14 ++--
 .../strings/convert/convert_ipv4.pyx          | 35 +++++++---
 .../strings/convert/convert_lists.pxd         |  6 +-
 .../strings/convert/convert_lists.pyi         |  5 +-
 .../strings/convert/convert_lists.pyx         | 18 +++--
 .../strings/convert/convert_urls.pyi          |  8 ++-
 .../strings/convert/convert_urls.pyx          | 25 +++++--
 31 files changed, 423 insertions(+), 163 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
index 37f39b098b3..0cd44017804 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
@@ -1,18 +1,22 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_booleans(
         column_view input,
-        string_scalar true_string) except +libcudf_exception_handler
+        string_scalar true_string,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_booleans(
         column_view booleans,
         string_scalar true_string,
-        string_scalar false_string) except +libcudf_exception_handler
+        string_scalar false_string,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
index c316b7891a3..d692ff6ea7a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -6,19 +6,24 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_timestamps(
         column_view input,
         data_type timestamp_type,
-        string format) except +libcudf_exception_handler
+        string format,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_timestamps(
         column_view timestamps,
         string format,
-        column_view names) except +libcudf_exception_handler
+        column_view names,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_timestamp(
         column_view input_col,
-        string format) except +libcudf_exception_handler
+        string format,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
index 75374208172..7d7329d58e9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -6,14 +6,18 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_durations(
         const column_view & input,
         data_type duration_type,
-        const string & format) except +libcudf_exception_handler
+        const string & format,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_durations(
         const column_view & durations,
-        const string & format) except +libcudf_exception_handler
+        const string & format,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
index 71c866ad211..bc8a752e18b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
@@ -1,21 +1,26 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_fixed_point(
         column_view input,
-        data_type output_type) except +libcudf_exception_handler
+        data_type output_type,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_fixed_point(
-        column_view input) except +libcudf_exception_handler
+        column_view input,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_fixed_point(
         column_view input,
-        data_type decimal_type
+        data_type decimal_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
index 7df6b914458..198b0d8be00 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
@@ -1,20 +1,25 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_floats(
         column_view strings,
-        data_type output_type) except +libcudf_exception_handler
+        data_type output_type,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_floats(
-        column_view floats) except +libcudf_exception_handler
+        column_view floats,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_float(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
index 4033ef51480..f8a1b11e096 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd
@@ -1,36 +1,45 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport data_type
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_integers(
         column_view input,
-        data_type output_type) except +libcudf_exception_handler
+        data_type output_type,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] from_integers(
-        column_view integers) except +libcudf_exception_handler
+        column_view integers,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_integer(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_integer(
         column_view input,
-        data_type int_type
+        data_type int_type,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] hex_to_integers(
         column_view input,
-        data_type output_type) except +
+        data_type output_type,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_hex(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] integers_to_hex(
-        column_view input) except +libcudf_exception_handler
+        column_view input,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
index 33f9c798ae6..bff93ab48e9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
@@ -1,18 +1,23 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] ipv4_to_integers(
-        column_view input) except +libcudf_exception_handler
+        column_view input,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] integers_to_ipv4(
-        column_view integers) except +libcudf_exception_handler
+        column_view integers,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_ipv4(
-        column_view input
+        column_view input,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
index 3d0a677424e..da9f9d5b5a2 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd
@@ -1,10 +1,12 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \
         "cudf::strings" nogil:
@@ -12,4 +14,5 @@ cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \
     cdef unique_ptr[column] format_list_column(
         column_view input,
         string_scalar na_rep,
-        column_view separators) except +libcudf_exception_handler
+        column_view separators,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
index 03a14e215e0..37911a4118a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
@@ -1,14 +1,18 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 
 cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] url_encode(
-        column_view input) except +libcudf_exception_handler
+        column_view input,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] url_decode(
-        column_view input) except +libcudf_exception_handler
+        column_view input,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
index 77c09242e9a..de3637afc40 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
@@ -1,9 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
-def to_booleans(input: Column, true_string: Scalar) -> Column: ...
+def to_booleans(
+    input: Column, true_string: Scalar, stream: Stream | None = None
+) -> Column: ...
 def from_booleans(
-    booleans: Column, true_string: Scalar, false_string: Scalar
+    booleans: Column,
+    true_string: Scalar,
+    false_string: Scalar,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
index 1899a3b27cc..3622cca5a9c 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -9,12 +9,14 @@ from pylibcudf.libcudf.strings.convert cimport (
     convert_booleans as cpp_convert_booleans,
 )
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
 
 from cython.operator import dereference
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["from_booleans", "to_booleans"]
 
-cpdef Column to_booleans(Column input, Scalar true_string):
+cpdef Column to_booleans(Column input, Scalar true_string, Stream stream=None):
     """
     Returns a new bool column by parsing boolean values from the strings
     in the provided strings column.
@@ -29,6 +31,9 @@ cpdef Column to_booleans(Column input, Scalar true_string):
     true_string : Scalar
         String to expect for true. Non-matching strings are false
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -38,16 +43,20 @@ cpdef Column to_booleans(Column input, Scalar true_string):
     cdef const string_scalar* c_true_string = <const string_scalar*>(
         true_string.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_convert_booleans.to_booleans(
             input.view(),
-            dereference(c_true_string)
+            dereference(c_true_string),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_string):
+cpdef Column from_booleans(
+    Column booleans, Scalar true_string, Scalar false_string, Stream stream=None
+):
     """
     Returns a new strings column converting the boolean values from the
     provided column into strings.
@@ -65,6 +74,9 @@ cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_str
     false_string : Scalar
         String to use for false in the output column.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -77,12 +89,14 @@ cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_str
     cdef const string_scalar* c_false_string = <const string_scalar*>(
         false_string.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_convert_booleans.from_booleans(
             booleans.view(),
             dereference(c_true_string),
             dereference(c_false_string),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
index c6857169765..782a57895fd 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
@@ -1,12 +1,22 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
 def to_timestamps(
-    input: Column, timestamp_type: DataType, format: str
+    input: Column,
+    timestamp_type: DataType,
+    format: str,
+    stream: Stream | None = None,
 ) -> Column: ...
 def from_timestamps(
-    timestamps: Column, format: str, input_strings_names: Column
+    timestamps: Column,
+    format: str,
+    input_strings_names: Column,
+    stream: Stream | None = None,
+) -> Column: ...
+def is_timestamp(
+    input: Column, format: str, stream: Stream | None = None
 ) -> Column: ...
-def is_timestamp(input: Column, format: str) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
index f1cd684166c..9df47def056 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -8,15 +8,18 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings.convert cimport (
     convert_datetime as cpp_convert_datetime,
 )
+from pylibcudf.utils cimport _get_stream
 
 from pylibcudf.types import DataType
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["from_timestamps", "is_timestamp", "to_timestamps"]
 
 cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
-    str format
+    str format,
+    Stream stream=None
 ):
     """
     Returns a new timestamp column converting a strings column into
@@ -35,6 +38,9 @@ cpdef Column to_timestamps(
     format : str
         String specifying the timestamp format in strings.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -42,19 +48,22 @@ cpdef Column to_timestamps(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
+    stream = _get_stream(stream)
     with nogil:
         c_result = cpp_convert_datetime.to_timestamps(
             input.view(),
             timestamp_type.c_obj,
-            c_format
+            c_format,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column from_timestamps(
     Column timestamps,
     str format,
-    Column input_strings_names
+    Column input_strings_names,
+    Stream stream=None
 ):
     """
     Returns a new strings column converting a timestamp column into
@@ -73,6 +82,9 @@ cpdef Column from_timestamps(
     input_strings_names : Column
         The string names to use for weekdays ("%a", "%A") and months ("%b", "%B").
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -80,18 +92,21 @@ cpdef Column from_timestamps(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
+    stream = _get_stream(stream)
     with nogil:
         c_result = cpp_convert_datetime.from_timestamps(
             timestamps.view(),
             c_format,
-            input_strings_names.view()
+            input_strings_names.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column is_timestamp(
     Column input,
-    str format
+    str format,
+    Stream stream=None
 ):
     """
     Verifies the given strings column can be parsed to timestamps
@@ -107,6 +122,9 @@ cpdef Column is_timestamp(
     format : str
         String specifying the timestamp format in strings.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -114,10 +132,12 @@ cpdef Column is_timestamp(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
+    stream = _get_stream(stream)
     with nogil:
         c_result = cpp_convert_datetime.is_timestamp(
             input.view(),
-            c_format
+            c_format,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
index eecdade4ef9..1634c65458e 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
@@ -1,17 +1,20 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.string cimport string
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
-    str format
+    str format,
+    Stream stream=*
 )
 
 cpdef Column from_durations(
     Column durations,
-    str format=*
+    str format=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
index a5787a5fe49..90c1b8d84b2 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
@@ -1,9 +1,18 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
 def to_durations(
-    input: Column, duration_type: DataType, format: str
+    input: Column,
+    duration_type: DataType,
+    format: str,
+    stream: Stream | None = None,
+) -> Column: ...
+def from_durations(
+    durations: Column,
+    format: str | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
-def from_durations(durations: Column, format: str | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
index a9654afd00a..686ddeddb81 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -8,6 +8,8 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings.convert cimport (
     convert_durations as cpp_convert_durations,
 )
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from pylibcudf.types import DataType
 
@@ -16,7 +18,8 @@ __all__ = ["from_durations", "to_durations"]
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
-    str format
+    str format,
+    Stream stream=None
 ):
     """
     Returns a new duration column converting a strings column into
@@ -35,6 +38,9 @@ cpdef Column to_durations(
     format : str
         String specifying the duration format in strings.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
@@ -42,19 +48,22 @@ cpdef Column to_durations(
     """
     cdef unique_ptr[column] c_result
     cdef string c_format = format.encode()
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_convert_durations.to_durations(
             input.view(),
             duration_type.c_obj,
-            c_format
+            c_format,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column from_durations(
     Column durations,
-    str format=None
+    str format=None,
+    Stream stream=None
 ):
     """
     Returns a new strings column converting a duration column into
@@ -71,12 +80,16 @@ cpdef Column from_durations(
         The string specifying output format.
         Default format is "%D days %H:%M:%S".
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column with formatted durations.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     if format is None:
         format = "%D days %H:%M:%S"
@@ -85,7 +98,8 @@ cpdef Column from_durations(
     with nogil:
         c_result = cpp_convert_durations.from_durations(
             durations.view(),
-            c_format
+            c_format,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
index 049b9b3fffe..925dd04d30b 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
@@ -1,11 +1,12 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column to_fixed_point(Column input, DataType output_type)
+cpdef Column to_fixed_point(Column input, DataType output_type, Stream stream=*)
 
-cpdef Column from_fixed_point(Column input)
+cpdef Column from_fixed_point(Column input, Stream stream=*)
 
-cpdef Column is_fixed_point(Column input, DataType decimal_type=*)
+cpdef Column is_fixed_point(Column input, DataType decimal_type=*, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
index 1192d3dfcd6..5a0464f1fb3 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
@@ -1,10 +1,18 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
-def to_fixed_point(input: Column, output_type: DataType) -> Column: ...
-def from_fixed_point(input: Column) -> Column: ...
+def to_fixed_point(
+    input: Column, output_type: DataType, stream: Stream | None = None
+) -> Column: ...
+def from_fixed_point(
+    input: Column, stream: Stream | None = None
+) -> Column: ...
 def is_fixed_point(
-    input: Column, decimal_type: DataType | None = None
+    input: Column,
+    decimal_type: DataType | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
index 00cbc822f36..367120492b0 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -8,11 +8,14 @@ from pylibcudf.libcudf.strings.convert cimport (
     convert_fixed_point as cpp_fixed_point,
 )
 from pylibcudf.types cimport DataType, type_id
+from pylibcudf.utils cimport _get_stream
+
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["from_fixed_point", "is_fixed_point", "to_fixed_point"]
 
 
-cpdef Column to_fixed_point(Column input, DataType output_type):
+cpdef Column to_fixed_point(Column input, DataType output_type, Stream stream=None):
     """
     Returns a new fixed-point column parsing decimal values from the
     provided strings column.
@@ -27,22 +30,27 @@ cpdef Column to_fixed_point(Column input, DataType output_type):
     output_type : DataType
         Type of fixed-point column to return including the scale value.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column of output_type.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_fixed_point.to_fixed_point(
             input.view(),
             output_type.c_obj,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column from_fixed_point(Column input):
+cpdef Column from_fixed_point(Column input, Stream stream=None):
     """
     Returns a new strings column converting the fixed-point values
     into a strings column.
@@ -54,19 +62,25 @@ cpdef Column from_fixed_point(Column input):
     input : Column
         Fixed-point column to convert.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_fixed_point.from_fixed_point(input.view())
+        c_result = cpp_fixed_point.from_fixed_point(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
+cpdef Column is_fixed_point(
+    Column input, DataType decimal_type=None, Stream stream=None
+):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to fixed-point.
@@ -82,12 +96,16 @@ cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
         Fixed-point type (with scale) used only for checking overflow.
         Defaults to Decimal64
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     if decimal_type is None:
         decimal_type = DataType(type_id.DECIMAL64)
@@ -96,6 +114,7 @@ cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
         c_result = cpp_fixed_point.is_fixed_point(
             input.view(),
             decimal_type.c_obj,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
index ddf4042e10d..9f4d9ecf107 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
@@ -1,8 +1,12 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
-def to_floats(strings: Column, output_type: DataType) -> Column: ...
-def from_floats(floats: Column) -> Column: ...
-def is_float(input: Column) -> Column: ...
+def to_floats(
+    strings: Column, output_type: DataType, stream: Stream | None = None
+) -> Column: ...
+def from_floats(floats: Column, stream: Stream | None = None) -> Column: ...
+def is_float(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
index b5199aac577..a62a12605f9 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -8,10 +8,13 @@ from pylibcudf.libcudf.strings.convert cimport (
     convert_floats as cpp_convert_floats,
 )
 from pylibcudf.types cimport DataType
+from pylibcudf.utils cimport _get_stream
+
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["from_floats", "is_float", "to_floats"]
 
-cpdef Column to_floats(Column strings, DataType output_type):
+cpdef Column to_floats(Column strings, DataType output_type, Stream stream=None):
     """
     Returns a new numeric column by parsing float values from each string
     in the provided strings column.
@@ -26,23 +29,28 @@ cpdef Column to_floats(Column strings, DataType output_type):
     output_type : DataType
         Type of float numeric column to return.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column with floats converted from strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_convert_floats.to_floats(
             strings.view(),
             output_type.c_obj,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column from_floats(Column floats):
+cpdef Column from_floats(Column floats, Stream stream=None):
     """
     Returns a new strings column converting the float values from the
     provided column into strings.
@@ -54,20 +62,24 @@ cpdef Column from_floats(Column floats):
     floats : Column
         Numeric column to convert.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column with floats as strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_floats.from_floats(floats.view())
+        c_result = cpp_convert_floats.from_floats(floats.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column is_float(Column input):
+cpdef Column is_float(Column input, Stream stream=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to floats.
@@ -79,14 +91,18 @@ cpdef Column is_float(Column input):
     input : Column
         Strings instance for this operation.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_floats.is_float(input.view())
+        c_result = cpp_convert_floats.is_float(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
index eff2e080c27..f678f4b7974 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
@@ -1,17 +1,18 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.types cimport DataType
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column to_integers(Column input, DataType output_type)
+cpdef Column to_integers(Column input, DataType output_type, Stream stream=*)
 
-cpdef Column from_integers(Column integers)
+cpdef Column from_integers(Column integers, Stream stream=*)
 
-cpdef Column is_integer(Column input, DataType int_type=*)
+cpdef Column is_integer(Column input, DataType int_type=*, Stream stream=*)
 
-cpdef Column hex_to_integers(Column input, DataType output_type)
+cpdef Column hex_to_integers(Column input, DataType output_type, Stream stream=*)
 
-cpdef Column is_hex(Column input)
+cpdef Column is_hex(Column input, Stream stream=*)
 
-cpdef Column integers_to_hex(Column input)
+cpdef Column integers_to_hex(Column input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
index b96226fba90..947812fa882 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
@@ -1,11 +1,23 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
-def to_integers(input: Column, output_type: DataType) -> Column: ...
-def from_integers(integers: Column) -> Column: ...
-def is_integer(input: Column, int_type: DataType | None = None) -> Column: ...
-def hex_to_integers(input: Column, output_type: DataType) -> Column: ...
-def is_hex(input: Column) -> Column: ...
-def integers_to_hex(input: Column) -> Column: ...
+def to_integers(
+    input: Column, output_type: DataType, stream: Stream | None = None
+) -> Column: ...
+def from_integers(
+    integers: Column, stream: Stream | None = None
+) -> Column: ...
+def is_integer(
+    input: Column,
+    int_type: DataType | None = None,
+    stream: Stream | None = None,
+) -> Column: ...
+def hex_to_integers(
+    input: Column, output_type: DataType, stream: Stream | None = None
+) -> Column: ...
+def is_hex(input: Column, stream: Stream | None = None) -> Column: ...
+def integers_to_hex(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
index 12984e15ce9..b581b3541e1 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -8,6 +8,8 @@ from pylibcudf.libcudf.strings.convert cimport (
     convert_integers as cpp_convert_integers,
 )
 from pylibcudf.types cimport DataType
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "from_integers",
@@ -18,7 +20,7 @@ __all__ = [
     "to_integers"
 ]
 
-cpdef Column to_integers(Column input, DataType output_type):
+cpdef Column to_integers(Column input, DataType output_type, Stream stream=None):
     """
     Returns a new integer numeric column parsing integer values from the
     provided strings column.
@@ -33,25 +35,30 @@ cpdef Column to_integers(Column input, DataType output_type):
     output_type : DataType
         Type of integer numeric column to return.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column with integers converted from strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.to_integers(
                 input.view(),
-                output_type.c_obj
+                output_type.c_obj,
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column from_integers(Column integers):
+cpdef Column from_integers(Column integers, Stream stream=None):
     """
     Returns a new strings column converting the integer values from the
     provided column into strings.
@@ -63,24 +70,29 @@ cpdef Column from_integers(Column integers):
     integers : Column
         Strings instance for this operation.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column with integers as strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.from_integers(
                 integers.view(),
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column is_integer(Column input, DataType int_type=None):
+cpdef Column is_integer(Column input, DataType int_type=None, Stream stream=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to integers.
@@ -97,18 +109,23 @@ cpdef Column is_integer(Column input, DataType int_type=None):
         By default, does not check an integer type for underflow
         or overflow.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     if int_type is None:
         with nogil:
             c_result = move(
                 cpp_convert_integers.is_integer(
                     input.view(),
+                    stream.view()
                 )
             )
     else:
@@ -116,14 +133,15 @@ cpdef Column is_integer(Column input, DataType int_type=None):
             c_result = move(
                 cpp_convert_integers.is_integer(
                     input.view(),
-                    int_type.c_obj
+                    int_type.c_obj,
+                    stream.view()
                 )
             )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column hex_to_integers(Column input, DataType output_type):
+cpdef Column hex_to_integers(Column input, DataType output_type, Stream stream=None):
     """
     Returns a new integer numeric column parsing hexadecimal values
     from the provided strings column.
@@ -138,25 +156,30 @@ cpdef Column hex_to_integers(Column input, DataType output_type):
     output_type : DataType
         Type of integer numeric column to return.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column with integers converted from strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.hex_to_integers(
                 input.view(),
-                output_type.c_obj
+                output_type.c_obj,
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column is_hex(Column input):
+cpdef Column is_hex(Column input, Stream stream=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to integers from hex.
@@ -168,24 +191,29 @@ cpdef Column is_hex(Column input):
     input : Column
         Strings instance for this operation.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.is_hex(
                 input.view(),
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column integers_to_hex(Column input):
+cpdef Column integers_to_hex(Column input, Stream stream=None):
     """
     Returns a new strings column converting integer columns to hexadecimal
     characters.
@@ -197,18 +225,23 @@ cpdef Column integers_to_hex(Column input):
     input : Column
         Integer column to convert to hex.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column with hexadecimal characters.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = move(
             cpp_convert_integers.integers_to_hex(
                 input.view(),
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
index b017b32598c..a333d7801df 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
@@ -1,7 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
-def ipv4_to_integers(input: Column) -> Column: ...
-def integers_to_ipv4(integers: Column) -> Column: ...
-def is_ipv4(input: Column) -> Column: ...
+def ipv4_to_integers(
+    input: Column, stream: Stream | None = None
+) -> Column: ...
+def integers_to_ipv4(
+    integers: Column, stream: Stream | None = None
+) -> Column: ...
+def is_ipv4(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
index e7c6aae4fa8..c50ed0769bf 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
@@ -1,14 +1,17 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4
+from pylibcudf.utils cimport _get_stream
+
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["integers_to_ipv4", "ipv4_to_integers", "is_ipv4"]
 
-cpdef Column ipv4_to_integers(Column input):
+cpdef Column ipv4_to_integers(Column input, Stream stream=None):
     """
     Converts IPv4 addresses into integers.
 
@@ -19,20 +22,24 @@ cpdef Column ipv4_to_integers(Column input):
     input : Column
         Strings instance for this operation
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New uint32 column converted from strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_ipv4.ipv4_to_integers(input.view())
+        c_result = cpp_convert_ipv4.ipv4_to_integers(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column integers_to_ipv4(Column integers):
+cpdef Column integers_to_ipv4(Column integers, Stream stream=None):
     """
     Converts integers into IPv4 addresses as strings.
 
@@ -43,20 +50,24 @@ cpdef Column integers_to_ipv4(Column integers):
     integers : Column
         Integer (uint32) column to convert.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_ipv4.integers_to_ipv4(integers.view())
+        c_result = cpp_convert_ipv4.integers_to_ipv4(integers.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column is_ipv4(Column input):
+cpdef Column is_ipv4(Column input, Stream stream=None):
     """
     Returns a boolean column identifying strings in which all
     characters are valid for conversion to integers from IPv4 format.
@@ -68,14 +79,18 @@ cpdef Column is_ipv4(Column input):
     input : Column
         Strings instance for this operation.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New column of boolean results for each string.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_ipv4.is_ipv4(input.view())
+        c_result = cpp_convert_ipv4.is_ipv4(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
index 1ba4272afa2..14eb3adddb0 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
@@ -1,11 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column format_list_column(
     Column input,
     Scalar na_rep=*,
-    Column separators=*
+    Column separators=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
index 6ab3a4183e9..c3994f24787 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
@@ -1,4 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
@@ -7,4 +9,5 @@ def format_list_column(
     input: Column,
     na_rep: Scalar | None = None,
     separators: Column | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
index e2abe69e519..5c0fa8f6cdc 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
@@ -24,7 +24,8 @@ __all__ = ["format_list_column"]
 cpdef Column format_list_column(
     Column input,
     Scalar na_rep=None,
-    Column separators=None
+    Column separators=None,
+    Stream stream=None
 ):
     """
     Convert a list column of strings into a formatted strings column.
@@ -44,18 +45,22 @@ cpdef Column format_list_column(
         Strings to use for enclosing list components and separating elements.
         Default, ``,``, ``[``, ``]``
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    cdef Stream stream_local
+
+    stream_local = _get_stream(stream)
 
     if na_rep is None:
-        stream = _get_stream(None)
         na_rep = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view())
+            cpp_make_string_scalar("".encode(), stream_local.view())
         )
 
     cdef const string_scalar* c_na_rep = <const string_scalar*>(
@@ -69,7 +74,8 @@ cpdef Column format_list_column(
         c_result = cpp_convert_lists.format_list_column(
             input.view(),
             dereference(c_na_rep),
-            separators.view()
+            separators.view(),
+            stream_local.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream_local)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
index 49b8468957c..43e11dc615e 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
@@ -1,6 +1,8 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
-def url_encode(input: Column) -> Column: ...
-def url_decode(input: Column) -> Column: ...
+def url_encode(input: Column, stream: Stream | None = None) -> Column: ...
+def url_decode(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
index bd5e23bca43..24d08d1c6e6 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
@@ -1,14 +1,17 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings.convert cimport convert_urls as cpp_convert_urls
+from pylibcudf.utils cimport _get_stream
+
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["url_decode", "url_encode"]
 
-cpdef Column url_encode(Column input):
+cpdef Column url_encode(Column input, Stream stream=None):
     """
     Encodes each string using URL encoding.
 
@@ -19,20 +22,24 @@ cpdef Column url_encode(Column input):
     input : Column
         Strings instance for this operation.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_urls.url_encode(input.view())
+        c_result = cpp_convert_urls.url_encode(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column url_decode(Column input):
+cpdef Column url_decode(Column input, Stream stream=None):
     """
     Decodes each string using URL encoding.
 
@@ -43,14 +50,18 @@ cpdef Column url_decode(Column input):
     input : Column
         Strings instance for this operation.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_convert_urls.url_decode(input.view())
+        c_result = cpp_convert_urls.url_decode(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)

From 770c542192234ecf1c6471c1174904e0bd6129a1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 2 Sep 2025 13:31:21 -0700
Subject: [PATCH 241/366] Avoid CategoricalColumn constructors in cuDF classic
 (#19837)

Precursor to https://github.com/rapidsai/cudf/issues/18726. We'll want to minimize direct construction of cuDF classic column via their attributes and instead use a pylibcudf in the future

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19837
---
 python/cudf/cudf/core/column/categorical.py |  9 +---
 python/cudf/cudf/core/column/column.py      | 48 +++++----------------
 python/cudf/cudf/core/cut.py                | 19 ++------
 python/cudf/cudf/core/dataframe.py          | 13 +-----
 python/cudf/cudf/core/index.py              | 11 +----
 python/cudf/cudf/io/parquet.py              | 14 ++----
 6 files changed, 23 insertions(+), 91 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a7bdbac409f..464d63f75f0 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -807,14 +807,7 @@ def _concat(
             len(cats),
             cast(cudf.core.column.numerical.NumericalColumn, codes_col),
         )
-        return CategoricalColumn(
-            data=None,
-            size=codes_col.size,
-            dtype=CategoricalDtype(categories=cats),
-            mask=codes_col.base_mask,
-            offset=codes_col.offset,
-            children=(codes_col,),  # type: ignore[arg-type]
-        )
+        return codes_col._with_type_metadata(CategoricalDtype(categories=cats))  # type: ignore[return-value]
 
     def _with_type_metadata(self: Self, dtype: Dtype) -> Self:
         if isinstance(dtype, CategoricalDtype):
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 162c62fe2f5..161556391eb 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -73,7 +73,6 @@
     is_mixed_with_object_dtype,
     is_pandas_nullable_extension_dtype,
     min_signed_type,
-    min_unsigned_type,
     np_dtypes_to_pandas_dtypes,
 )
 from cudf.utils.scalar import pa_scalar_to_plc_scalar
@@ -1851,43 +1850,18 @@ def astype(self, dtype: DtypeObj, copy: bool = False) -> ColumnBase:
     def as_categorical_column(
         self, dtype: CategoricalDtype
     ) -> CategoricalColumn:
-        ordered = dtype.ordered
-
-        # Re-label self w.r.t. the provided categories
         if dtype._categories is not None:
-            cat_col = dtype._categories
-            codes = self._label_encoding(cats=cat_col)
-            codes = cudf.core.column.categorical.as_unsigned_codes(
-                len(cat_col), codes
-            )
-            return cudf.core.column.categorical.CategoricalColumn(
-                data=None,
-                size=None,
-                dtype=dtype,
-                mask=self.mask,
-                children=(codes,),
-            )
-
-        # Categories must be unique and sorted in ascending order.
-        cats = self.unique().sort_values()
-        label_dtype = min_unsigned_type(len(cats))
-        labels = self._label_encoding(
-            cats=cats, dtype=label_dtype, na_sentinel=pa.scalar(1)
-        )
-        # columns include null index in factorization; remove:
-        if self.has_nulls():
-            cats = cats.dropna()
-
-        labels = cudf.core.column.categorical.as_unsigned_codes(
-            len(cats), labels
-        )
-        return cudf.core.column.categorical.CategoricalColumn(
-            data=None,
-            size=None,
-            dtype=CategoricalDtype(categories=cats, ordered=ordered),
-            mask=self.mask,
-            children=(labels,),
-        )
+            # Re-label self w.r.t. the provided categories
+            codes = self._label_encoding(cats=dtype._categories)
+        else:
+            # Compute categories from self
+            cats = self.unique().sort_values()
+            codes = self._label_encoding(cats=cats)
+            if self.has_nulls():
+                # TODO: Make dropna shallow copy if there are no nulls?
+                cats = cats.dropna()
+            dtype = CategoricalDtype(categories=cats, ordered=dtype.ordered)
+        return codes.set_mask(self.mask)._with_type_metadata(dtype)  # type: ignore[return-value]
 
     def as_numerical_column(self, dtype: np.dtype) -> NumericalColumn:
         raise NotImplementedError
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 767899fef04..fe02bd8b550 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -9,7 +9,6 @@
 import cudf
 from cudf.api.types import is_list_like
 from cudf.core.column import as_column
-from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.core.index import IntervalIndex, interval_range
 
 
@@ -286,22 +285,12 @@ def cut(
             # should allow duplicate categories.
             return interval_labels[index_labels]
 
-    index_labels = as_unsigned_codes(len(interval_labels), index_labels)  # type: ignore[arg-type]
-
-    col = CategoricalColumn(
-        data=None,
-        size=index_labels.size,
-        dtype=cudf.CategoricalDtype(
-            categories=interval_labels, ordered=ordered
-        ),
-        mask=index_labels.base_mask,
-        offset=index_labels.offset,
-        children=(index_labels,),
+    categorical_index = cudf.CategoricalIndex.from_codes(
+        categories=interval_labels,
+        codes=index_labels,
+        ordered=ordered,
     )
 
-    # we return a categorical index, as we don't have a Categorical method
-    categorical_index = cudf.CategoricalIndex._from_column(col)
-
     if isinstance(orig_x, (pd.Series, cudf.Series)):
         # if we have a series input we return a series output
         res_series = cudf.Series(categorical_index, index=orig_x.index)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 4571a94cd03..ab91e1a9a9b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -61,7 +61,6 @@
     column_empty,
     concat_columns,
 )
-from cudf.core.column.categorical import as_unsigned_codes
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask
 from cudf.core.dtypes import (
@@ -8969,16 +8968,8 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
 def _reassign_categories(categories, cols, col_idxs):
     for name, idx in zip(cols, col_idxs, strict=True):
         if idx in categories:
-            codes = as_unsigned_codes(len(categories[idx]), cols[name])
-            cols[name] = CategoricalColumn(
-                data=None,
-                size=codes.size,
-                dtype=CategoricalDtype(
-                    categories=categories[idx], ordered=False
-                ),
-                mask=codes.base_mask,
-                offset=codes.offset,
-                children=(codes,),
+            cols[name] = cols[name]._with_type_metadata(
+                CategoricalDtype(categories=categories[idx], ordered=False)
             )
 
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 1770338ff0e..e4478aa586b 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -4840,15 +4840,8 @@ def from_codes(
         """
         codes = as_column(codes, dtype=np.dtype(np.int32))
         categories = as_column(categories)
-        cat_col = CategoricalColumn(
-            data=None,
-            size=len(codes),
-            dtype=cudf.CategoricalDtype(
-                categories=categories, ordered=ordered
-            ),
-            offset=0,
-            null_count=0,
-            children=(codes,),
+        cat_col = codes._with_type_metadata(
+            cudf.CategoricalDtype(categories=categories, ordered=ordered)
         )
         return cls._from_column(cat_col, name=name)
 
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 25a38a4709e..a67335fb8a6 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -24,7 +24,6 @@
 from cudf.api.types import is_list_like
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import ColumnBase, as_column, column_empty
-from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.core.dataframe import DataFrame
 from cudf.core.dtypes import (
     CategoricalDtype,
@@ -1277,17 +1276,10 @@ def _parquet_to_frame(
                     partition_categories[name].index(value),
                     length=_len,
                 )
-                codes = as_unsigned_codes(
-                    len(partition_categories[name]), codes
-                )
-                col = CategoricalColumn(
-                    data=None,
-                    size=codes.size,
-                    dtype=CategoricalDtype(
+                col = codes._with_type_metadata(
+                    CategoricalDtype(
                         categories=partition_categories[name], ordered=False
-                    ),
-                    offset=codes.offset,
-                    children=(codes,),
+                    )
                 )
             else:
                 # Not building categorical columns, so

From 245f94a2919cde7d5a72e00ad5645e5f323b1a6b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 2 Sep 2025 22:32:16 +0200
Subject: [PATCH 242/366] Update Arrow bounds to >=15,<22 (#19592)

Updates Arrow bounds to >=15,<22.

This makes cuDF compatible with Arrow 20 and 21.

Authors:
  - Bradley Dice (https://github.com/bdice)
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19592
---
 .../all_cuda-129_arch-aarch64.yaml            |  2 +-
 .../all_cuda-129_arch-x86_64.yaml             |  2 +-
 .../all_cuda-130_arch-aarch64.yaml            |  2 +-
 .../all_cuda-130_arch-x86_64.yaml             |  2 +-
 conda/recipes/cudf/recipe.yaml                |  2 +-
 conda/recipes/pylibcudf/recipe.yaml           |  2 +-
 cpp/cmake/thirdparty/get_arrow.cmake          | 58 ++++++++++----
 cpp/tests/CMakeLists.txt                      |  1 +
 .../quantiles/percentile_approx_test.cpp      |  3 +
 dependencies.yaml                             |  6 +-
 python/cudf/cudf/testing/__init__.py          |  1 +
 python/cudf/cudf/testing/testing.py           | 78 +++++++++++++++++++
 .../cudf/tests/input_output/test_parquet.py   | 16 ++--
 python/cudf/pyproject.toml                    |  4 +-
 python/pylibcudf/pyproject.toml               |  8 +-
 15 files changed, 151 insertions(+), 36 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 8fccfe513f8..4a296164cec 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -68,7 +68,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=14.0.0,<20.0.0a0
+- pyarrow>=15.0.0,<22.0.0a0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index bad1763d906..30ac023ca78 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -69,7 +69,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=14.0.0,<20.0.0a0
+- pyarrow>=15.0.0,<22.0.0a0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index 322fefc07da..5ab54367559 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -68,7 +68,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=14.0.0,<20.0.0a0
+- pyarrow>=15.0.0,<22.0.0a0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 3e68fbf4d07..906a143b428 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -69,7 +69,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=14.0.0,<20.0.0a0
+- pyarrow>=15.0.0,<22.0.0a0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
index 2389ed684c1..98013e84597 100644
--- a/conda/recipes/cudf/recipe.yaml
+++ b/conda/recipes/cudf/recipe.yaml
@@ -73,7 +73,7 @@ requirements:
     - numba-cuda >=0.19.1,<0.20.0a0
     - numba >=0.60.0,<0.62.0a0
     - numpy >=1.23,<3.0a0
-    - pyarrow>=14.0.0,<20.0.0a0
+    - pyarrow>=15.0.0,<22.0.0a0
     - libcudf =${{ version }}
     - pylibcudf =${{ version }}
     - ${{ pin_compatible("rmm", upper_bound="x.x") }}
diff --git a/conda/recipes/pylibcudf/recipe.yaml b/conda/recipes/pylibcudf/recipe.yaml
index 4273baf5fd3..bfaec91b72c 100644
--- a/conda/recipes/pylibcudf/recipe.yaml
+++ b/conda/recipes/pylibcudf/recipe.yaml
@@ -77,7 +77,7 @@ requirements:
     - packaging
   run_constraints:
     - numpy >=1.23,<3.0a0
-    - pyarrow>=14.0.0,<20.0.0a0
+    - pyarrow>=15.0.0,<22.0.0a0
   ignore_run_exports:
     from_package:
       - cuda-cudart-dev
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 8293f96fb5b..1a52d263ea1 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -23,17 +23,26 @@
 include_guard(GLOBAL)
 
 # This function finds arrow and sets any additional necessary environment variables.
-function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_PARQUET)
+function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_PARQUET
+         ENABLE_COMPUTE
+)
   if(BUILD_STATIC)
     if(TARGET arrow_static)
       set(ARROW_FOUND
           TRUE
           PARENT_SCOPE
       )
-      set(ARROW_LIBRARIES
-          arrow_static
-          PARENT_SCOPE
-      )
+      if(ENABLE_COMPUTE)
+        set(ARROW_LIBRARIES
+            arrow_static arrow_compute_static
+            PARENT_SCOPE
+        )
+      else()
+        set(ARROW_LIBRARIES
+            arrow_static
+            PARENT_SCOPE
+        )
+      endif()
       return()
     endif()
   else()
@@ -42,10 +51,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
           TRUE
           PARENT_SCOPE
       )
-      set(ARROW_LIBRARIES
-          arrow_shared
-          PARENT_SCOPE
-      )
+      if(ENABLE_COMPUTE)
+        set(ARROW_LIBRARIES
+            arrow_shared arrow_compute_shared
+            PARENT_SCOPE
+        )
+      else()
+        set(ARROW_LIBRARIES
+            arrow_shared
+            PARENT_SCOPE
+        )
+      endif()
       return()
     endif()
   endif()
@@ -92,7 +108,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
     EXCLUDE_FROM_ALL ${EXCLUDE_FROM_ALL}
     OPTIONS "CMAKE_VERBOSE_MAKEFILE ON"
             "ARROW_ACERO ON"
-            "ARROW_COMPUTE ON"
+            "ARROW_COMPUTE ${ENABLE_COMPUTE}"
             "ARROW_IPC ON"
             "ARROW_DATASET ON"
             "ARROW_WITH_BACKTRACE ON"
@@ -126,9 +142,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
   )
 
   if(BUILD_STATIC)
-    set(ARROW_LIBRARIES arrow_static)
+    if(ENABLE_COMPUTE)
+      set(ARROW_LIBRARIES arrow_static arrow_compute_static)
+    else()
+      set(ARROW_LIBRARIES arrow_static)
+    endif()
   else()
-    set(ARROW_LIBRARIES arrow_shared)
+    if(ENABLE_COMPUTE)
+      set(ARROW_LIBRARIES arrow_shared arrow_compute_shared)
+    else()
+      set(ARROW_LIBRARIES arrow_shared)
+    endif()
   endif()
 
   # Arrow_DIR:   set if CPM found Arrow on the system/conda/etc.
@@ -248,7 +272,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_P
         BUILD Arrow
         VERSION ${VERSION}
         EXPORT_SET arrow_targets
-        GLOBAL_TARGETS arrow_shared arrow_static
+        GLOBAL_TARGETS arrow_shared arrow_static arrow_compute_static arrow_compute_shared
         NAMESPACE cudf::
         FINAL_CODE_BLOCK arrow_code_string
       )
@@ -357,7 +381,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow)
   set(CUDF_VERSION_Arrow
       # This version must be kept in sync with the libarrow version pinned for builds in
       # dependencies.yaml.
-      19.0.0
+      21.0.0
       CACHE STRING "The version of Arrow to find (or build)"
   )
 endif()
@@ -376,7 +400,11 @@ if(NOT DEFINED CUDF_ENABLE_ARROW_PARQUET)
   set(CUDF_ENABLE_ARROW_PARQUET OFF)
 endif()
 
+if(NOT DEFINED CUDF_ENABLE_ARROW_COMPUTE)
+  set(CUDF_ENABLE_ARROW_COMPUTE OFF)
+endif()
+
 find_and_configure_arrow(
   ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_EXCLUDE_ARROW_FROM_ALL}
-  ${CUDF_ENABLE_ARROW_PARQUET}
+  ${CUDF_ENABLE_ARROW_PARQUET} ${CUDF_ENABLE_ARROW_COMPUTE}
 )
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 63b41e55f51..d0a8763a5b0 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -91,6 +91,7 @@ endfunction()
 
 # No need to install Arrow libs when only the final test executables are shipped.
 set(CUDF_EXCLUDE_ARROW_FROM_ALL ON)
+set(CUDF_ENABLE_ARROW_COMPUTE ON)
 include(../cmake/thirdparty/get_arrow.cmake)
 
 # ##################################################################################################
diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp
index a65f4766159..c5c481ded3b 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cpp
+++ b/cpp/tests/quantiles/percentile_approx_test.cpp
@@ -33,12 +33,15 @@
 
 #include <arrow/api.h>
 #include <arrow/compute/api.h>
+#include <arrow/compute/initialize.h>
 
 namespace {
 std::unique_ptr<cudf::column> arrow_percentile_approx(cudf::column_view const& _values,
                                                       int delta,
                                                       std::vector<double> const& percentages)
 {
+  static auto const _arrow_init_status = arrow::compute::Initialize();
+  EXPECT_TRUE(_arrow_init_status.ok());
   // sort the incoming values using the same settings that groupby does.
   // this is a little weak because null_order::AFTER is hardcoded internally to groupby.
   cudf::table_view t({_values});
diff --git a/dependencies.yaml b/dependencies.yaml
index 55c9f7cc446..de0115a9e6d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -511,7 +511,7 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-          - pyarrow>=14.0.0,<20.0.0a0
+          - pyarrow>=15.0.0,<22.0.0a0
       - output_types: [requirements, pyproject]
         packages:
           # pyarrow 17.0.0 wheels have a subtle issue around threading that
@@ -519,8 +519,8 @@ dependencies:
           # be highly dependent on the exact build configuration, so we'll just
           # avoid 17.0.0 for now unless we observe similar issues in future
           # releases as well.
-          - pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'
-          - pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'
+          - pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'
+          - pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'
   cuda_version:
     specific:
       - output_types: conda
diff --git a/python/cudf/cudf/testing/__init__.py b/python/cudf/cudf/testing/__init__.py
index b03e5bf4375..53cdc183c47 100644
--- a/python/cudf/cudf/testing/__init__.py
+++ b/python/cudf/cudf/testing/__init__.py
@@ -2,6 +2,7 @@
 
 from cudf.testing import narwhals_test_plugin
 from cudf.testing.testing import (
+    assert_arrow_table_equal,
     assert_eq,
     assert_frame_equal,
     assert_groupby_results_equal,
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 048a54a76f8..1b2595072be 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -7,12 +7,55 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from pandas import testing as tm
 
 import cudf
 from cudf.core.missing import NA, NaT
 from cudf.utils.dtypes import CUDF_STRING_DTYPE, is_dtype_obj_numeric
 
+pa_types = pa.types
+
+
+def _is_string_view(dtype: pa.DataType) -> bool:
+    return hasattr(pa_types, "is_string_view") and pa_types.is_string_view(
+        dtype
+    )
+
+
+def _map_string_view_to_string(dtype: pa.DataType) -> pa.DataType:
+    """Convert string_view -> string"""
+    if _is_string_view(dtype):
+        return pa.string()
+    if pa_types.is_list(dtype):
+        return pa.list_(_map_string_view_to_string(dtype.value_type))
+    return dtype
+
+
+def _string_view_to_string_schema(schema: pa.Schema) -> pa.Schema:
+    return pa.schema(
+        [
+            pa.field(
+                f.name,
+                _map_string_view_to_string(f.type),
+                nullable=f.nullable,
+                metadata=f.metadata,
+            )
+            for f in schema
+        ],
+        metadata=schema.metadata,
+    )
+
+
+def _string_view_to_string(obj):
+    """Cast string_view -> string"""
+    if isinstance(obj, pa.Table):
+        return pa.Table.from_arrays(
+            obj.columns, schema=_string_view_to_string_schema(obj.schema)
+        )
+
+    return obj
+
 
 def dtype_can_compare_equal_to_other(dtype):
     # return True if values of this dtype can compare
@@ -92,6 +135,41 @@ def _check_types(
             )
 
 
+def assert_arrow_table_equal(left: pa.Table, right: pa.Table) -> None:
+    """
+    Check if two pyarrow Tables are equal.
+
+    Parameters
+    ----------
+    left : pyarrow.Table
+        Left table to compare.
+    right : pyarrow.Table
+        Right table to compare.
+
+    Raises
+    ------
+    AssertionError
+
+    Notes
+    -----
+    PyArrow 21+ has shifted toward using ``string_view``
+    internally in more places, whereas previous versions
+    used ``string``. This change causes schema equality
+    checks in cuDF tests to fail. To make our tests stable
+    and future-proof against Arrow changing representations
+    over time, we cast all ``string_view`` types to ``string``
+    before comparison.
+    """
+    left = _string_view_to_string(left)
+    right = _string_view_to_string(right)
+    try:
+        assert left.equals(right)
+    except AssertionError:
+        raise_assert_detail(
+            "pyarrow.Table", "Arrow Tables are different", left, right
+        )
+
+
 def assert_column_equal(
     left,
     right,
diff --git a/python/cudf/cudf/tests/input_output/test_parquet.py b/python/cudf/cudf/tests/input_output/test_parquet.py
index c57f91e6c42..98de571ecf9 100644
--- a/python/cudf/cudf/tests/input_output/test_parquet.py
+++ b/python/cudf/cudf/tests/input_output/test_parquet.py
@@ -29,7 +29,11 @@
     ParquetWriter,
     merge_parquet_filemetadata,
 )
-from cudf.testing import assert_eq, dataset_generator as dg
+from cudf.testing import (
+    assert_arrow_table_equal,
+    assert_eq,
+    dataset_generator as dg,
+)
 from cudf.testing._utils import TIMEDELTA_TYPES, set_random_null_mask_inplace
 
 
@@ -1110,7 +1114,7 @@ def test_parquet_reader_struct_basic(tmp_path, data):
     pa.parquet.write_table(expect, fname)
     assert os.path.exists(fname)
     got = cudf.read_parquet(fname)
-    assert expect.equals(got.to_arrow())
+    assert_arrow_table_equal(expect, got.to_arrow())
 
 
 def select_columns_params():
@@ -1188,7 +1192,7 @@ def test_parquet_reader_struct_select_columns(data, columns):
 
     expect = pq.ParquetFile(buff).read(columns=columns)
     got = cudf.read_parquet(buff, columns=columns)
-    assert expect.equals(got.to_arrow())
+    assert_arrow_table_equal(expect, got.to_arrow())
 
 
 def test_parquet_reader_struct_los_large(tmp_path):
@@ -1205,7 +1209,7 @@ def test_parquet_reader_struct_los_large(tmp_path):
     pa.parquet.write_table(expect, fname)
     assert os.path.exists(fname)
     got = cudf.read_parquet(fname)
-    assert expect.equals(got.to_arrow())
+    assert_arrow_table_equal(expect, got.to_arrow())
 
 
 @pytest.mark.parametrize(
@@ -1243,7 +1247,7 @@ def string_list_gen_wrapped(x, y):
     pa.parquet.write_table(expect, fname)
     assert os.path.exists(fname)
     got = cudf.read_parquet(fname)
-    assert expect.equals(got.to_arrow())
+    assert_arrow_table_equal(expect, got.to_arrow())
 
 
 def test_parquet_reader_v2(tmp_path, simple_pdf):
@@ -2403,7 +2407,7 @@ def test_parquet_writer_list_chunked(tmp_path, store_schema):
     got = pq.read_table(fname)
     # compare with pyarrow since pandas doesn't
     # have a list or struct dtype
-    assert expect.to_arrow().equals(got)
+    assert_arrow_table_equal(expect.to_arrow(), got)
 
 
 def test_parquet_nullable_boolean(tmp_path, engine):
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 69c889c0cc8..f97bd6a7eb3 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -29,8 +29,8 @@ dependencies = [
     "nvtx>=0.2.1",
     "packaging",
     "pandas>=2.0,<2.4.0dev0",
-    "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
-    "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
+    "pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "rich",
     "rmm==25.10.*,>=0.0.0a0",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index a76bbc36bae..811af529493 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -47,16 +47,16 @@ test = [
     "numba-cuda[cu13]>=0.19.1,<0.20.0a0",
     "numba>=0.60.0,<0.62.0a0",
     "pandas",
-    "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
-    "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
+    "pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'",
     "pytest",
     "pytest-cov",
     "pytest-xdist",
     "xxhash",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 pyarrow = [
-    "pyarrow>=14.0.0,<20.0.0a0,!=17.0.0; platform_machine=='aarch64'",
-    "pyarrow>=14.0.0,<20.0.0a0; platform_machine=='x86_64'",
+    "pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 numpy = [
     "numpy>=1.23,<3.0a0",

From 3419b88d6673f79c2abd8803a6a08ca7f594a9f9 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 2 Sep 2025 17:01:39 -0700
Subject: [PATCH 243/366] Move test_timedelta/string/sorting/list/datetime.py
 to new cudf classic directory structure (#19723)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19723
---
 python/cudf/cudf/tests/conftest.py            |   6 +
 .../tests/dataframe/indexing/test_setitem.py  |  56 +-
 .../tests/dataframe/methods/test_repeat.py    |  18 +
 .../cudf/tests/dataframe/test_attributes.py   |  17 +
 .../cudf/tests/groupby/test_reductions.py     |  90 ++
 .../datetimeindex/methods/test_reductions.py  |  19 +
 .../indexes/multiindex/methods/test_repeat.py |  17 +
 .../timedeltaindex/methods/test_reductions.py |  26 +
 .../indexes/timedeltaindex/test_binops.py     | 195 ++++
 .../cudf/tests/private_objects/test_column.py | 107 +++
 .../cudf/tests/series/accessors/test_list.py  |  33 +
 .../tests/series/indexing/test_getitem.py     |  22 +
 .../cudf/tests/series/indexing/test_iloc.py   |  61 ++
 .../tests/series/indexing/test_setitem.py     | 434 +++++++++
 .../cudf/tests/series/methods/test_argsort.py |  17 +
 .../cudf/tests/series/methods/test_astype.py  | 391 +++++++-
 .../series/methods/test_nlargest_nsmallest.py |  39 +
 .../cudf/tests/series/methods/test_repeat.py  |  27 +
 .../tests/series/methods/test_sort_values.py  |  29 +
 .../tests/series/methods/test_to_pandas.py    |  19 +
 .../cudf/tests/series/methods/test_unique.py  |  20 +
 python/cudf/cudf/tests/series/test_binops.py  | 851 +++++++++++++++++-
 .../cudf/tests/series/test_constructors.py    |  28 +
 .../cudf/cudf/tests/series/test_np_ufuncs.py  |  48 +-
 python/cudf/cudf/tests/series/test_repr.py    |  20 +
 python/cudf/cudf/tests/test_categorical.py    |  45 -
 python/cudf/cudf/tests/test_contains.py       |  49 -
 python/cudf/cudf/tests/test_copying.py        | 439 ---------
 python/cudf/cudf/tests/test_datetime.py       | 422 ---------
 python/cudf/cudf/tests/test_decimal.py        | 373 --------
 python/cudf/cudf/tests/test_list.py           |  25 -
 python/cudf/cudf/tests/test_serialize.py      |  32 +-
 python/cudf/cudf/tests/test_sorting.py        | 109 ---
 python/cudf/cudf/tests/test_string.py         | 644 -------------
 python/cudf/cudf/tests/test_timedelta.py      | 644 -------------
 35 files changed, 2599 insertions(+), 2773 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_repeat.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_repeat.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/indexes/timedeltaindex/test_binops.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_argsort.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_repeat.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_sort_values.py
 delete mode 100644 python/cudf/cudf/tests/test_contains.py
 delete mode 100644 python/cudf/cudf/tests/test_copying.py
 delete mode 100644 python/cudf/cudf/tests/test_datetime.py
 delete mode 100644 python/cudf/cudf/tests/test_decimal.py
 delete mode 100644 python/cudf/cudf/tests/test_list.py
 delete mode 100644 python/cudf/cudf/tests/test_sorting.py
 delete mode 100644 python/cudf/cudf/tests/test_string.py
 delete mode 100644 python/cudf/cudf/tests/test_timedelta.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index f4157430185..8f6155bfb2c 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -461,6 +461,12 @@ def datetime_types_as_str(request):
     return request.param
 
 
+@pytest.fixture
+def datetime_types_as_str2(datetime_types_as_str):
+    """Used for testing cartesian product of datetime_types_as_str"""
+    return datetime_types_as_str
+
+
 @pytest.fixture(params=timedelta_types)
 def timedelta_types_as_str(request):
     """
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
index 0df3cdddb1f..c566be17d4a 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -105,11 +105,6 @@ def test_setitem_dataframe_series_inplace(index):
     assert_eq(expected, gdf)
 
 
-def test_setitem_datetime():
-    df = cudf.DataFrame({"date": pd.date_range("20010101", "20010105").values})
-    assert df.date.dtype.kind == "M"
-
-
 def test_listcol_setitem_retain_dtype():
     df = cudf.DataFrame(
         {"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
@@ -123,3 +118,54 @@ def test_listcol_setitem_retain_dtype():
     # prior to this fix: https://github.com/rapidsai/cudf/pull/10151/
     df2 = df1.copy()
     assert df2["a"].dtype == df["a"].dtype
+
+
+def test_setitem_datetime():
+    df = cudf.DataFrame({"date": pd.date_range("20010101", "20010105").values})
+    assert df.date.dtype.kind == "M"
+
+
+@pytest.mark.parametrize("scalar", ["a", None])
+def test_string_set_scalar(scalar):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pdf["b"] = "a"
+    gdf["b"] = "a"
+
+    assert_eq(pdf["b"], gdf["b"])
+    assert_eq(pdf, gdf)
+
+
+def test_dataframe_cow_slice_setitem():
+    with cudf.option_context("copy_on_write", True):
+        df = cudf.DataFrame(
+            {"a": [10, 11, 12, 13, 14], "b": [20, 30, 40, 50, 60]}
+        )
+        slice_df = df[1:4]
+
+        assert_eq(
+            slice_df,
+            cudf.DataFrame(
+                {"a": [11, 12, 13], "b": [30, 40, 50]}, index=[1, 2, 3]
+            ),
+        )
+
+        slice_df["a"][2] = 1111
+
+        assert_eq(
+            slice_df,
+            cudf.DataFrame(
+                {"a": [11, 1111, 13], "b": [30, 40, 50]}, index=[1, 2, 3]
+            ),
+        )
+        assert_eq(
+            df,
+            cudf.DataFrame(
+                {"a": [10, 11, 12, 13, 14], "b": [20, 30, 40, 50, 60]}
+            ),
+        )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_repeat.py b/python/cudf/cudf/tests/dataframe/methods/test_repeat.py
new file mode 100644
index 00000000000..6dbb012c03a
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_repeat.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_repeat_dataframe():
+    rng = np.random.default_rng(seed=0)
+    psr = pd.DataFrame({"a": [1, 1, 2, 2]})
+    gsr = cudf.from_pandas(psr)
+    repeats = rng.integers(10, size=4)
+
+    # pd.DataFrame doesn't have repeat() so as a workaround, we are
+    # comparing pd.Series.repeat() with cudf.DataFrame.repeat()['a']
+    assert_eq(psr["a"].repeat(repeats), gsr.repeat(repeats)["a"])
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
index d2cef2d8bdc..3a3c43a15c7 100644
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -180,3 +180,20 @@ def test_ndim():
     pdf = pd.DataFrame({"x": range(5), "y": range(5, 10)})
     gdf = cudf.DataFrame.from_pandas(pdf)
     assert pdf.ndim == gdf.ndim
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        ["a", "b", "c", "d", "e"],
+        np.array(["a", "b", "c", "d", "e"]),
+        pd.Index(["a", "b", "c", "d", "e"], name="name"),
+    ],
+)
+def test_string_index(index):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(5, 5)))
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdf.index = index
+    gdf.index = index
+    assert_eq(pdf, gdf)
diff --git a/python/cudf/cudf/tests/groupby/test_reductions.py b/python/cudf/cudf/tests/groupby/test_reductions.py
index f54c0a79337..f4d248f79c4 100644
--- a/python/cudf/cudf/tests/groupby/test_reductions.py
+++ b/python/cudf/cudf/tests/groupby/test_reductions.py
@@ -985,3 +985,93 @@ def test_group_by_reduce_numeric_only(by, data, groupby_reduction_methods):
         numeric_only=True
     )
     assert_eq(expected, result)
+
+
+@pytest.mark.parametrize(
+    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
+)
+def test_string_groupby_key(str_data):
+    num_keys = 2
+    other_data = [1, 2, 3, 4, 5][: len(str_data)]
+
+    pdf = pd.DataFrame(
+        {
+            0: pd.Series(str_data, dtype="str"),
+            1: pd.Series(str_data, dtype="str"),
+            "a": other_data,
+        }
+    )
+    gdf = cudf.DataFrame(
+        {
+            0: cudf.Series(str_data, dtype="str"),
+            1: cudf.Series(str_data, dtype="str"),
+            "a": other_data,
+        }
+    )
+
+    expect = pdf.groupby(list(range(num_keys)), as_index=False).count()
+    got = gdf.groupby(list(range(num_keys)), as_index=False).count()
+
+    expect = expect.sort_values([0]).reset_index(drop=True)
+    got = got.sort_values([0]).reset_index(drop=True)
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
+)
+@pytest.mark.parametrize("agg", ["count", "max", "min"])
+def test_string_groupby_non_key(str_data, agg):
+    num_cols = 2
+    other_data = [1, 2, 3, 4, 5][: len(str_data)]
+
+    pdf = pd.DataFrame(
+        {
+            0: pd.Series(str_data, dtype="str"),
+            1: pd.Series(str_data, dtype="str"),
+            "a": other_data,
+        }
+    )
+    gdf = cudf.DataFrame(
+        {
+            0: cudf.Series(str_data, dtype="str"),
+            1: cudf.Series(str_data, dtype="str"),
+            "a": other_data,
+        }
+    )
+
+    expect = getattr(pdf.groupby("a", as_index=False), agg)()
+    got = getattr(gdf.groupby("a", as_index=False), agg)()
+
+    expect = expect.sort_values(["a"]).reset_index(drop=True)
+    got = got.sort_values(["a"]).reset_index(drop=True)
+
+    if agg in ["min", "max"] and len(expect) == 0 and len(got) == 0:
+        for i in range(num_cols):
+            expect[i] = expect[i].astype("str")
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+def test_string_groupby_key_index():
+    str_data = ["a", "b", "c", "d", "e"]
+    other_data = [1, 2, 3, 4, 5]
+
+    pdf = pd.DataFrame(
+        {
+            "a": pd.Series(str_data, dtype="str"),
+            "b": other_data,
+        }
+    )
+    gdf = cudf.DataFrame(
+        {
+            "a": cudf.Series(str_data, dtype="str"),
+            "b": other_data,
+        }
+    )
+
+    expect = pdf.groupby("a", sort=True).count()
+    got = gdf.groupby("a", sort=True).count()
+
+    assert_eq(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_reductions.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_reductions.py
new file mode 100644
index 00000000000..be62e12edf2
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_reductions.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "method, kwargs",
+    [["mean", {}], ["std", {}], ["std", {"ddof": 0}]],
+)
+def test_dti_reduction(method, kwargs):
+    pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
+    cudf_dti = cudf.from_pandas(pd_dti)
+
+    result = getattr(cudf_dti, method)(**kwargs)
+    expected = getattr(pd_dti, method)(**kwargs)
+    assert result == expected
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_repeat.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_repeat.py
new file mode 100644
index 00000000000..b4ea86c8841
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_repeat.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_repeat_index():
+    rng = np.random.default_rng(seed=0)
+    arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+    psr = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+    gsr = cudf.from_pandas(psr)
+    repeats = rng.integers(10, size=4)
+
+    assert_eq(psr.repeat(repeats), gsr.repeat(repeats))
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_reductions.py b/python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_reductions.py
new file mode 100644
index 00000000000..1e1c9b89166
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/methods/test_reductions.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "method, kwargs",
+    [
+        ["sum", {}],
+        ["mean", {}],
+        ["median", {}],
+        ["std", {}],
+        ["std", {"ddof": 0}],
+    ],
+)
+def test_tdi_reductions(method, kwargs):
+    pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
+    cudf_tdi = cudf.from_pandas(pd_tdi)
+
+    result = getattr(pd_tdi, method)(**kwargs)
+    expected = getattr(cudf_tdi, method)(**kwargs)
+    assert result == expected
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/test_binops.py b/python/cudf/cudf/tests/indexes/timedeltaindex/test_binops.py
new file mode 100644
index 00000000000..eb23ae5041f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/test_binops.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+import datetime
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data_non_overflow",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+def test_timedelta_index_datetime_index_ops(
+    data_non_overflow, datetime_types_as_str, timedelta_types_as_str
+):
+    gdt = cudf.Index(data_non_overflow, dtype=datetime_types_as_str)
+    gtd = cudf.Index(data_non_overflow, dtype=timedelta_types_as_str)
+
+    pdt = gdt.to_pandas()
+    ptd = gtd.to_pandas()
+
+    assert_eq(gdt - gtd, pdt - ptd)
+    assert_eq(gdt + gtd, pdt + ptd)
+
+
+@pytest.mark.parametrize(
+    "datetime_data,timedelta_data",
+    [
+        ([1000000, 200000, 3000000], [1000000, 200000, 3000000]),
+        ([1000000, 200000, None], [1000000, 200000, None]),
+        ([], []),
+        ([None], [None]),
+        (
+            [12, 12, 22, 343, 4353534, 435342],
+            [12, 12, 22, 343, 4353534, 435342],
+        ),
+        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
+        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
+        ([1000000, 200000, 3000000], [200000, 34543, 3000000]),
+        ([1000000, 200000, None], [1000000, 200000, 3000000]),
+        ([None], [1]),
+        (
+            [12, 12, 22, 343, 4353534, 435342],
+            [None, 1, 220, 3, 34, 4353423287],
+        ),
+        (
+            [12, 11, 232, 223432411, 2343241, 234324, 23234],
+            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
+        ),
+        (
+            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
+        ),
+        (
+            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
+            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        ),
+        (
+            [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        ),
+    ],
+)
+def test_timedelta_datetime_index_ops_misc(
+    datetime_data,
+    timedelta_data,
+    datetime_types_as_str,
+    timedelta_types_as_str,
+):
+    gdt = cudf.Index(datetime_data, dtype=datetime_types_as_str)
+    gtd = cudf.Index(timedelta_data, dtype=timedelta_types_as_str)
+
+    pdt = gdt.to_pandas()
+    ptd = gtd.to_pandas()
+
+    assert_eq(gdt - gtd, pdt - ptd)
+    assert_eq(gdt + gtd, pdt + ptd)
+
+
+@pytest.mark.parametrize(
+    "data_non_overflow",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+@pytest.mark.parametrize(
+    "other_scalars",
+    [
+        pd.Timedelta(1513393355.5, unit="s"),
+        pd.Timedelta(34765, unit="D"),
+        datetime.timedelta(days=768),
+        datetime.timedelta(seconds=768),
+        datetime.timedelta(microseconds=7),
+        datetime.timedelta(minutes=447),
+        datetime.timedelta(hours=447),
+        datetime.timedelta(weeks=734),
+        np.timedelta64(4, "s"),
+        np.timedelta64(456, "D"),
+        np.timedelta64(46, "h"),
+        np.timedelta64("nat"),
+        np.timedelta64(1, "s"),
+        np.timedelta64(1, "ms"),
+        np.timedelta64(1, "us"),
+        np.timedelta64(1, "ns"),
+    ],
+)
+@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning:pandas")
+def test_timedelta_index_ops_with_scalars(
+    request,
+    data_non_overflow,
+    other_scalars,
+    timedelta_types_as_str,
+    arithmetic_op_method,
+):
+    if arithmetic_op_method not in ("add", "sub", "truediv", "floordiv"):
+        pytest.skip(f"Test not applicable for {arithmetic_op_method}")
+
+    gtdi = cudf.Index(data=data_non_overflow, dtype=timedelta_types_as_str)
+    ptdi = gtdi.to_pandas()
+
+    if arithmetic_op_method == "add":
+        expected = ptdi + other_scalars
+        actual = gtdi + other_scalars
+    elif arithmetic_op_method == "sub":
+        expected = ptdi - other_scalars
+        actual = gtdi - other_scalars
+    elif arithmetic_op_method == "truediv":
+        expected = ptdi / other_scalars
+        actual = gtdi / other_scalars
+    elif arithmetic_op_method == "floordiv":
+        expected = ptdi // other_scalars
+        actual = gtdi // other_scalars
+
+    assert_eq(expected, actual)
+
+    if arithmetic_op_method == "add":
+        expected = other_scalars + ptdi
+        actual = other_scalars + gtdi
+    elif arithmetic_op_method == "sub":
+        expected = other_scalars - ptdi
+        actual = other_scalars - gtdi
+    elif arithmetic_op_method == "truediv":
+        expected = other_scalars / ptdi
+        actual = other_scalars / gtdi
+    elif arithmetic_op_method == "floordiv":
+        expected = other_scalars // ptdi
+        actual = other_scalars // gtdi
+
+    # Division by zero for datetime or timedelta is
+    # dubiously defined in both pandas (Any // 0 -> 0 in
+    # pandas) and cuDF (undefined behaviour)
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                arithmetic_op_method == "floordiv"
+                and 0 in ptdi.astype("int")
+                and np.timedelta64(other_scalars).item() is not None
+            ),
+            reason="Related to https://github.com/rapidsai/cudf/issues/5938",
+        )
+    )
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/private_objects/test_column.py b/python/cudf/cudf/tests/private_objects/test_column.py
index 4d2a7eaea41..f7c7b0808d4 100644
--- a/python/cudf/cudf/tests/private_objects/test_column.py
+++ b/python/cudf/cudf/tests/private_objects/test_column.py
@@ -1,4 +1,6 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
+import sys
+from decimal import Decimal
 
 import cupy as cp
 import numpy as np
@@ -7,8 +9,16 @@
 import pytest
 from numba import cuda
 
+import rmm
+
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.core.buffer import as_buffer
 from cudf.core.column.column import _can_values_be_equal, as_column
+from cudf.core.column.decimal import Decimal32Column, Decimal64Column
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal
 
@@ -639,3 +649,100 @@ def test_build_series_from_nullable_pandas_dtype(pd_dtype, expect_dtype):
 def test__can_values_be_equal(left, right, expected):
     assert _can_values_be_equal(left, right) is expected
     assert _can_values_be_equal(right, left) is expected
+
+
+def test_string_no_children_properties():
+    empty_col = cudf.core.column.StringColumn(
+        as_buffer(rmm.DeviceBuffer(size=0)),
+        size=0,
+        dtype=np.dtype("object"),
+        children=(),
+    )
+    assert empty_col.base_children == ()
+    assert empty_col.base_size == 0
+
+    assert empty_col.children == ()
+    assert empty_col.size == 0
+
+    assert sys.getsizeof(empty_col) >= 0  # Accounts for Python GC overhead
+
+
+def test_string_int_to_ipv4():
+    gsr = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449]).astype(
+        "uint32"
+    )
+    expected = cudf.Series(
+        ["0.0.0.0", None, "0.0.0.0", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
+    )
+
+    got = cudf.Series._from_column(gsr._column.int2ip())
+
+    assert_eq(expected, got)
+
+
+def test_string_int_to_ipv4_dtype_fail(numeric_types_as_str):
+    if numeric_types_as_str == "uint32":
+        pytest.skip(f"int2ip passes with {numeric_types_as_str}")
+    gsr = cudf.Series([1, 2, 3, 4, 5]).astype(numeric_types_as_str)
+    with pytest.raises(TypeError):
+        gsr._column.int2ip()
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas.",
+)
+def test_datetime_can_cast_safely():
+    sr = cudf.Series(
+        ["1679-01-01", "2000-01-31", "2261-01-01"], dtype="datetime64[ms]"
+    )
+    assert sr._column.can_cast_safely(np.dtype("datetime64[ns]"))
+
+    sr = cudf.Series(
+        ["1677-01-01", "2000-01-31", "2263-01-01"], dtype="datetime64[ms]"
+    )
+
+    assert sr._column.can_cast_safely(np.dtype("datetime64[ns]")) is False
+
+
+@pytest.mark.parametrize(
+    "data_",
+    [
+        [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
+        [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
+        [1],
+        [-1],
+        [1, 2, 3, 4],
+        [42, 17, 41],
+        [1, 2, None, 4],
+        [None, None, None],
+        [],
+    ],
+)
+@pytest.mark.parametrize(
+    "typ_",
+    [
+        pa.decimal128(precision=4, scale=2),
+        pa.decimal128(precision=5, scale=3),
+        pa.decimal128(precision=6, scale=4),
+    ],
+)
+@pytest.mark.parametrize("col", [Decimal32Column, Decimal64Column])
+def test_round_trip_decimal_column(data_, typ_, col):
+    pa_arr = pa.array(data_, type=typ_)
+    col_32 = col.from_arrow(pa_arr)
+    assert pa_arr.equals(col_32.to_arrow())
+
+
+def test_from_arrow_max_precision_decimal64():
+    with pytest.raises(ValueError):
+        Decimal64Column.from_arrow(
+            pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19))
+        )
+
+
+def test_from_arrow_max_precision_decimal32():
+    with pytest.raises(ValueError):
+        Decimal32Column.from_arrow(
+            pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=10))
+        )
diff --git a/python/cudf/cudf/tests/series/accessors/test_list.py b/python/cudf/cudf/tests/series/accessors/test_list.py
index 18f86dfd7c2..a6487d496d8 100644
--- a/python/cudf/cudf/tests/series/accessors/test_list.py
+++ b/python/cudf/cudf/tests/series/accessors/test_list.py
@@ -534,3 +534,36 @@ def test_list_methods_setattr():
 
     with pytest.raises(AttributeError):
         ser.list.a = "b"
+
+
+def test_lists_contains(numeric_types_as_str):
+    inner_data = np.array([1, 2, 3], dtype=numeric_types_as_str)
+
+    data = cudf.Series([inner_data])
+
+    contained_scalar = inner_data.dtype.type(2)
+    not_contained_scalar = inner_data.dtype.type(42)
+
+    assert data.list.contains(contained_scalar)[0]
+    assert not data.list.contains(not_contained_scalar)[0]
+
+
+def test_lists_contains_datetime(temporal_types_as_str):
+    inner_data = np.array([1, 2, 3], dtype=temporal_types_as_str)
+
+    unit, _ = np.datetime_data(inner_data.dtype)
+
+    data = cudf.Series([inner_data])
+
+    contained_scalar = inner_data.dtype.type(2, unit)
+    not_contained_scalar = inner_data.dtype.type(42, unit)
+
+    assert data.list.contains(contained_scalar)[0]
+    assert not data.list.contains(not_contained_scalar)[0]
+
+
+def test_lists_contains_bool():
+    data = cudf.Series([[True, True, True]])
+
+    assert data.list.contains(True)[0]
+    assert not data.list.contains(False)[0]
diff --git a/python/cudf/cudf/tests/series/indexing/test_getitem.py b/python/cudf/cudf/tests/series/indexing/test_getitem.py
index aecc70335f1..37e0c10618f 100644
--- a/python/cudf/cudf/tests/series/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_getitem.py
@@ -216,3 +216,25 @@ def test_datetime_getitem_na():
 def test_timedelta_getitem_na():
     s = cudf.Series([1, 2, None, 3], dtype="timedelta64[ns]")
     assert s[2] is cudf.NaT
+
+
+def test_string_table_view_creation():
+    data = ["hi"] * 25 + [None] * 2027
+    psr = pd.Series(data)
+    gsr = cudf.Series.from_pandas(psr)
+
+    expect = psr[:1]
+    got = gsr[:1]
+
+    assert_eq(expect, got)
+
+
+def test_string_slice_with_mask():
+    actual = cudf.Series(["hi", "hello", None])
+    expected = actual[0:3]
+
+    assert actual._column.base_size == 3
+    assert_eq(actual._column.base_size, expected._column.base_size)
+    assert_eq(actual._column.null_count, expected._column.null_count)
+
+    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/series/indexing/test_iloc.py b/python/cudf/cudf/tests/series/indexing/test_iloc.py
index 00bc0422c7d..838091cff3e 100644
--- a/python/cudf/cudf/tests/series/indexing/test_iloc.py
+++ b/python/cudf/cudf/tests/series/indexing/test_iloc.py
@@ -1,6 +1,9 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
 import numpy as np
+import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -32,3 +35,61 @@ def test_struct_empty_children_slice(indices, values):
     actual = s.iloc[indices]
     expect = cudf.Series(values[indices], index=range(len(values))[indices])
     assert_eq(actual, expect)
+
+
+@pytest.mark.parametrize(
+    "item",
+    [
+        0,
+        2,
+        4,
+        slice(1, 3),
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        [0, 1, 2, 3, 4, 4, 3, 2, 1, 0],
+        np.array([0, 1, 2, 3, 4]),
+        cp.asarray(np.array([0, 1, 2, 3, 4])),
+    ],
+)
+@pytest.mark.parametrize("data", [["a"] * 5, ["a", None] * 3, [None] * 5])
+def test_string_get_item(data, item):
+    ps = pd.Series(data, dtype="str", name="nice name")
+    gs = cudf.Series(data, dtype="str", name="nice name")
+
+    got = gs.iloc[item]
+    if isinstance(got, cudf.Series):
+        got = got.to_arrow()
+
+    if isinstance(item, cp.ndarray):
+        item = cp.asnumpy(item)
+
+    expect = ps.iloc[item]
+    if isinstance(expect, pd.Series):
+        expect = pa.Array.from_pandas(expect)
+        pa.Array.equals(expect, got)
+    else:
+        if got is cudf.NA and expect is None:
+            return
+        assert expect == got
+
+
+@pytest.mark.parametrize("bool_", [True, False])
+@pytest.mark.parametrize("data", [["a"], ["a", None], [None]])
+@pytest.mark.parametrize("box", [list, np.array, cp.array])
+def test_string_bool_mask(data, bool_, box):
+    ps = pd.Series(data, dtype="str", name="nice name")
+    gs = cudf.Series(data, dtype="str", name="nice name")
+    item = box([bool_] * len(data))
+
+    got = gs.iloc[item]
+    if isinstance(got, cudf.Series):
+        got = got.to_arrow()
+
+    if isinstance(item, cp.ndarray):
+        item = cp.asnumpy(item)
+
+    expect = ps[item]
+    if isinstance(expect, pd.Series):
+        expect = pa.Array.from_pandas(expect)
+        pa.Array.equals(expect, got)
+    else:
+        assert expect == got
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
index aba9edcc6c7..2468477bdb7 100644
--- a/python/cudf/cudf/tests/series/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import decimal
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -7,6 +9,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
@@ -416,3 +419,434 @@ def test_struct_setitem(data, item):
     data[1] = item
     expected = cudf.Series(data)
     assert sr.to_arrow() == expected.to_arrow()
+
+
+def test_null_copy():
+    col = cudf.Series(range(2049))
+    col[:] = None
+    assert len(col) == 2049
+
+
+@pytest.mark.parametrize(
+    "copy_on_write, expected",
+    [
+        (True, [1, 2, 3, 4, 5]),
+        (False, [1, 100, 3, 4, 5]),
+    ],
+)
+def test_series_setitem_cow(copy_on_write, expected):
+    with cudf.option_context("copy_on_write", copy_on_write):
+        actual = cudf.Series([1, 2, 3, 4, 5])
+        new_copy = actual.copy(deep=False)
+
+        actual[1] = 100
+        assert_eq(actual, cudf.Series([1, 100, 3, 4, 5]))
+        assert_eq(new_copy, cudf.Series(expected))
+
+
+def test_series_setitem_both_slice_cow_on():
+    with cudf.option_context("copy_on_write", True):
+        actual = cudf.Series([1, 2, 3, 4, 5])
+        new_copy = actual.copy(deep=False)
+
+        actual[slice(0, 2, 1)] = 100
+        assert_eq(actual, cudf.Series([100, 100, 3, 4, 5]))
+        assert_eq(new_copy, cudf.Series([1, 2, 3, 4, 5]))
+
+        new_copy[slice(2, 4, 1)] = 300
+        assert_eq(actual, cudf.Series([100, 100, 3, 4, 5]))
+        assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
+
+
+def test_series_setitem_both_slice_cow_off():
+    with cudf.option_context("copy_on_write", False):
+        actual = cudf.Series([1, 2, 3, 4, 5])
+        new_copy = actual.copy(deep=False)
+
+        actual[slice(0, 2, 1)] = 100
+        assert_eq(actual, cudf.Series([100, 100, 3, 4, 5]))
+        assert_eq(new_copy, cudf.Series([100, 100, 3, 4, 5]))
+
+        new_copy[slice(2, 4, 1)] = 300
+        assert_eq(actual, cudf.Series([100, 100, 300, 300, 5]))
+        assert_eq(new_copy, cudf.Series([100, 100, 300, 300, 5]))
+
+
+def test_series_setitem_partial_slice_cow_on():
+    with cudf.option_context("copy_on_write", True):
+        actual = cudf.Series([1, 2, 3, 4, 5])
+        new_copy = actual.copy(deep=False)
+
+        new_copy[slice(2, 4, 1)] = 300
+        assert_eq(actual, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
+
+        new_slice = actual[2:]
+        assert (
+            new_slice._column.base_data.owner == actual._column.base_data.owner
+        )
+        new_slice[0:2] = 10
+        assert_eq(new_slice, cudf.Series([10, 10, 5], index=[2, 3, 4]))
+        assert_eq(actual, cudf.Series([1, 2, 3, 4, 5]))
+
+
+def test_series_setitem_partial_slice_cow_off():
+    with cudf.option_context("copy_on_write", False):
+        actual = cudf.Series([1, 2, 3, 4, 5])
+        new_copy = actual.copy(deep=False)
+
+        new_copy[slice(2, 4, 1)] = 300
+        assert_eq(actual, cudf.Series([1, 2, 300, 300, 5]))
+        assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
+
+        new_slice = actual[2:]
+        # Since COW is off, a slice should point to the same memory
+        ptr1 = new_slice._column.base_data.get_ptr(mode="read")
+        ptr2 = actual._column.base_data.get_ptr(mode="read")
+        assert ptr1 == ptr2
+
+        new_slice[0:2] = 10
+        assert_eq(new_slice, cudf.Series([10, 10, 5], index=[2, 3, 4]))
+        assert_eq(actual, cudf.Series([1, 2, 10, 10, 5]))
+
+
+def test_multiple_series_cow():
+    with cudf.option_context("copy_on_write", True):
+        # Verify constructing, modifying, deleting
+        # multiple copies of a series preserves
+        # the data appropriately when COW is enabled.
+        s = cudf.Series([10, 20, 30, 40, 50])
+        s1 = s.copy(deep=False)
+        s2 = s.copy(deep=False)
+        s3 = s.copy(deep=False)
+        s4 = s2.copy(deep=False)
+        s5 = s4.copy(deep=False)
+        s6 = s3.copy(deep=False)
+
+        s1[0:3] = 10000
+        # s1 will be unlinked from actual data in s,
+        # and then modified. Rest all should
+        # contain the original data.
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        for ser in [s, s2, s3, s4, s5, s6]:
+            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
+
+        s6[0:3] = 3000
+        # s6 will be unlinked from actual data in s,
+        # and then modified. Rest all should
+        # contain the original data.
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        for ser in [s2, s3, s4, s5]:
+            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
+
+        s2[1:4] = 4000
+        # s2 will be unlinked from actual data in s,
+        # and then modified. Rest all should
+        # contain the original data.
+        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        for ser in [s3, s4, s5]:
+            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
+
+        s4[2:4] = 5000
+        # s4 will be unlinked from actual data in s,
+        # and then modified. Rest all should
+        # contain the original data.
+        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
+        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        for ser in [s3, s5]:
+            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
+
+        s5[2:4] = 6000
+        # s5 will be unlinked from actual data in s,
+        # and then modified. Rest all should
+        # contain the original data.
+        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
+        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
+        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        for ser in [s3]:
+            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
+
+        s7 = s5.copy(deep=False)
+        assert_eq(s7, cudf.Series([10, 20, 6000, 6000, 50]))
+        s7[1:3] = 55
+        # Making a copy of s5, i.e., s7 and modifying shouldn't
+        # be touching/modifying data in other series.
+        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
+
+        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
+        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        for ser in [s3]:
+            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
+
+        # Deleting any of the following series objects
+        # shouldn't delete rest of the weekly referenced data
+        # elsewhere.
+
+        del s2
+
+        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
+        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
+        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
+        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
+
+        del s4
+        del s1
+
+        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
+        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
+        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
+        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
+
+        del s
+        del s6
+
+        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
+        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
+        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
+
+        del s5
+
+        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
+        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
+
+        del s3
+        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
+
+
+def test_series_zero_copy_cow_on():
+    with cudf.option_context("copy_on_write", True):
+        s = cudf.Series([1, 2, 3, 4, 5])
+        s1 = s.copy(deep=False)
+        cp_array = cp.asarray(s)
+
+        # Ensure all original data & zero-copied
+        # data is same.
+        assert_eq(s, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(cp_array, cp.array([1, 2, 3, 4, 5]))
+
+        cp_array[0:3] = 10
+        # Modifying a zero-copied array should only
+        # modify `s` and will leave rest of the copies
+        # untouched.
+
+        assert_eq(s.to_numpy(), np.array([10, 10, 10, 4, 5]))
+        assert_eq(s, cudf.Series([10, 10, 10, 4, 5]))
+        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(cp_array, cp.array([10, 10, 10, 4, 5]))
+
+        s2 = cudf.Series(cp_array)
+        assert_eq(s2, cudf.Series([10, 10, 10, 4, 5]))
+
+        s3 = s2.copy(deep=False)
+        cp_array[0] = 20
+        # Modifying a zero-copied array should modify
+        # `s2` and `s` only. Because `cp_array`
+        # is zero-copy shared with `s` & `s2`.
+
+        assert_eq(s, cudf.Series([20, 10, 10, 4, 5]))
+        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(cp_array, cp.array([20, 10, 10, 4, 5]))
+        assert_eq(s2, cudf.Series([20, 10, 10, 4, 5]))
+        assert_eq(s3, cudf.Series([10, 10, 10, 4, 5]))
+
+        s4 = cudf.Series([10, 20, 30, 40, 50])
+        s5 = cudf.Series(s4)
+        assert_eq(s5, cudf.Series([10, 20, 30, 40, 50]))
+        s5[0:2] = 1
+        # Modifying `s5` should also modify `s4`
+        # because they are zero-copied.
+        assert_eq(s5, cudf.Series([1, 1, 30, 40, 50]))
+        assert_eq(s4, cudf.Series([1, 1, 30, 40, 50]))
+
+
+def test_series_zero_copy_cow_off():
+    is_spill_enabled = get_global_manager() is not None
+
+    with cudf.option_context("copy_on_write", False):
+        s = cudf.Series([1, 2, 3, 4, 5])
+        s1 = s.copy(deep=False)
+        cp_array = cp.asarray(s)
+
+        # Ensure all original data & zero-copied
+        # data is same.
+        assert_eq(s, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
+        assert_eq(cp_array, cp.array([1, 2, 3, 4, 5]))
+
+        cp_array[0:3] = 10
+        # When COW is off, modifying a zero-copied array
+        # will need to modify `s` & `s1` since they are
+        # shallow copied.
+
+        assert_eq(s, cudf.Series([10, 10, 10, 4, 5]))
+        assert_eq(s1, cudf.Series([10, 10, 10, 4, 5]))
+        assert_eq(cp_array, cp.array([10, 10, 10, 4, 5]))
+
+        s2 = cudf.Series(cp_array)
+        assert_eq(s2, cudf.Series([10, 10, 10, 4, 5]))
+        s3 = s2.copy(deep=False)
+        cp_array[0] = 20
+
+        # Modifying `cp_array`, will propagate the changes
+        # across all Series objects, because they are
+        # either shallow copied or zero-copied.
+
+        assert_eq(s, cudf.Series([20, 10, 10, 4, 5]))
+        assert_eq(s1, cudf.Series([20, 10, 10, 4, 5]))
+        assert_eq(cp_array, cp.array([20, 10, 10, 4, 5]))
+        if not is_spill_enabled:
+            # Since spilling might make a copy of the data, we cannot
+            # expect the two series to be a zero-copy of the cupy array
+            # when spilling is enabled globally.
+            assert_eq(s2, cudf.Series([20, 10, 10, 4, 5]))
+            assert_eq(s3, cudf.Series([20, 10, 10, 4, 5]))
+
+        s4 = cudf.Series([10, 20, 30, 40, 50])
+        s5 = cudf.Series(s4)
+        assert_eq(s5, cudf.Series([10, 20, 30, 40, 50]))
+        s5[0:2] = 1
+
+        # Modifying `s5` should also modify `s4`
+        # because they are zero-copied.
+        assert_eq(s5, cudf.Series([1, 1, 30, 40, 50]))
+        assert_eq(s4, cudf.Series([1, 1, 30, 40, 50]))
+
+
+@pytest.mark.parametrize("copy_on_write", [True, False])
+def test_series_str_copy(copy_on_write):
+    with cudf.option_context("copy_on_write", copy_on_write):
+        s = cudf.Series(["a", "b", "c", "d", "e"])
+        s1 = s.copy(deep=True)
+        s2 = s.copy(deep=True)
+
+        assert_eq(s, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
+
+        s[0:3] = "abc"
+
+        assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
+        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
+
+        s2[1:4] = "xyz"
+
+        assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
+        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
+        assert_eq(s2, cudf.Series(["a", "xyz", "xyz", "xyz", "e"]))
+
+
+@pytest.mark.parametrize("copy_on_write", [True, False])
+def test_series_cat_copy(copy_on_write):
+    with cudf.option_context("copy_on_write", copy_on_write):
+        s = cudf.Series([10, 20, 30, 40, 50], dtype="category")
+        s1 = s.copy(deep=True)
+        s2 = s1.copy(deep=True)
+        s3 = s1.copy(deep=True)
+
+        s[0] = 50
+        assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
+        assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
+        assert_eq(s2, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
+        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
+
+        s2[3] = 10
+        s3[2:5] = 20
+        assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
+        assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype=s.dtype))
+        assert_eq(s2, cudf.Series([10, 20, 30, 10, 50], dtype=s.dtype))
+        assert_eq(s3, cudf.Series([10, 20, 20, 20, 20], dtype=s.dtype))
+
+
+@pytest.mark.parametrize(
+    "data, dtype, item, to, expect",
+    [
+        # scatter to a single index
+        (
+            ["1", "2", "3"],
+            cudf.Decimal64Dtype(1, 0),
+            decimal.Decimal(5),
+            1,
+            ["1", "5", "3"],
+        ),
+        (
+            ["1.5", "2.5", "3.5"],
+            cudf.Decimal64Dtype(2, 1),
+            decimal.Decimal("5.5"),
+            1,
+            ["1.5", "5.5", "3.5"],
+        ),
+        (
+            ["1.0042", "2.0042", "3.0042"],
+            cudf.Decimal64Dtype(5, 4),
+            decimal.Decimal("5.0042"),
+            1,
+            ["1.0042", "5.0042", "3.0042"],
+        ),
+        # scatter via boolmask
+        (
+            ["1", "2", "3"],
+            cudf.Decimal64Dtype(1, 0),
+            decimal.Decimal(5),
+            [True, False, True],
+            ["5", "2", "5"],
+        ),
+        (
+            ["1.5", "2.5", "3.5"],
+            cudf.Decimal64Dtype(2, 1),
+            decimal.Decimal("5.5"),
+            [True, True, True],
+            ["5.5", "5.5", "5.5"],
+        ),
+        (
+            ["1.0042", "2.0042", "3.0042"],
+            cudf.Decimal64Dtype(5, 4),
+            decimal.Decimal("5.0042"),
+            [False, False, True],
+            ["1.0042", "2.0042", "5.0042"],
+        ),
+        # We will allow assigning a decimal with less precision
+        (
+            ["1.00", "2.00", "3.00"],
+            cudf.Decimal64Dtype(3, 2),
+            decimal.Decimal(5),
+            1,
+            ["1.00", "5.00", "3.00"],
+        ),
+        # But not truncation
+        (
+            ["1", "2", "3"],
+            cudf.Decimal64Dtype(1, 0),
+            decimal.Decimal("5.5"),
+            1,
+            pa.ArrowInvalid,
+        ),
+        # We will allow for setting scalars into decimal columns
+        (["1", "2", "3"], cudf.Decimal64Dtype(1, 0), 5, 1, ["1", "5", "3"]),
+        # But not if it has too many digits to fit the precision
+        (["1", "2", "3"], cudf.Decimal64Dtype(1, 0), 50, 1, pa.ArrowInvalid),
+    ],
+)
+def test_series_setitem_decimal(data, dtype, item, to, expect):
+    data = cudf.Series([decimal.Decimal(x) for x in data], dtype=dtype)
+
+    if expect is pa.ArrowInvalid:
+        with pytest.raises(expect):
+            data[to] = item
+        return
+    else:
+        expect = cudf.Series([decimal.Decimal(x) for x in expect], dtype=dtype)
+        data[to] = item
+        assert_eq(data, expect)
diff --git a/python/cudf/cudf/tests/series/methods/test_argsort.py b/python/cudf/cudf/tests/series/methods/test_argsort.py
new file mode 100644
index 00000000000..377f849a97c
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_argsort.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+
+import cudf
+
+
+def test_series_argsort(numeric_types_as_str, ascending):
+    sr = cudf.Series([1, 3, 2, 5, 4]).astype(numeric_types_as_str)
+    res = sr.argsort(ascending=ascending)
+
+    if ascending:
+        expected = np.argsort(sr.to_numpy(), kind="mergesort")
+    else:
+        # -1 multiply works around missing desc sort (may promote to float64)
+        expected = np.argsort(sr.to_numpy() * np.int8(-1), kind="mergesort")
+    np.testing.assert_array_equal(expected, res.to_numpy())
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 2a535478af4..8d51d6226bc 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
 import datetime
 import zoneinfo
+from decimal import Decimal
 
 import cupy as cp
 import numpy as np
@@ -10,8 +10,10 @@
 import pytest
 
 import cudf
+from cudf.core.column.decimal import Decimal32Column, Decimal64Column
+from cudf.core.column.numerical import NumericalColumn
 from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @pytest.mark.parametrize(
@@ -557,6 +559,237 @@ def test_datetime_infer_format(data, timezone, datetime_types_as_str):
                 sr.astype(datetime_types_as_str)
 
 
+def test_string_astype(all_supported_types_as_str):
+    if all_supported_types_as_str.startswith(
+        "int"
+    ) or all_supported_types_as_str.startswith("uint"):
+        data = ["1", "2", "3", "4", "5"]
+    elif all_supported_types_as_str.startswith("float"):
+        data = [
+            "1.0",
+            "2.0",
+            "3.0",
+            "4.0",
+            None,
+            "5.0",
+            "nan",
+            "-INF",
+            "NaN",
+            "inF",
+            "NAn",
+        ]
+    elif all_supported_types_as_str.startswith("bool"):
+        data = ["True", "False", "True", "False", "False"]
+    elif all_supported_types_as_str.startswith("datetime64"):
+        data = [
+            "2019-06-04T00:00:00",
+            "2019-06-04T12:12:12",
+            "2019-06-03T00:00:00",
+            "2019-05-04T00:00:00",
+            "2018-06-04T00:00:00",
+            "1922-07-21T01:02:03",
+        ]
+    elif all_supported_types_as_str.startswith("timedelta64"):
+        data = [
+            "1 days 00:00:00",
+            "2 days 00:00:00",
+            "3 days 00:00:00",
+        ]
+    elif all_supported_types_as_str in {"str", "category"}:
+        data = ["ab", "cd", "ef", "gh", "ij"]
+    ps = pd.Series(data)
+    gs = cudf.Series(data)
+
+    expect = ps.astype(all_supported_types_as_str)
+    got = gs.astype(all_supported_types_as_str)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data, scale, precision",
+    [
+        (["1.11", "2.22", "3.33"], 2, 3),
+        (["111", "222", "33"], 0, 3),
+        (["111000", "22000", "3000"], -3, 3),
+        ([None, None, None], 0, 5),
+        ([None, "-2345", None], 0, 5),
+        ([], 0, 5),
+    ],
+)
+@pytest.mark.parametrize(
+    "decimal_dtype",
+    [cudf.Decimal128Dtype, cudf.Decimal64Dtype, cudf.Decimal32Dtype],
+)
+def test_string_to_decimal(data, scale, precision, decimal_dtype):
+    gs = cudf.Series(data, dtype="str")
+    fp = gs.astype(decimal_dtype(scale=scale, precision=precision))
+    got = fp.astype("str")
+    assert_eq(gs, got)
+
+
+def test_string_empty_to_decimal():
+    gs = cudf.Series(["", "-85", ""], dtype="str")
+    got = gs.astype(cudf.Decimal64Dtype(scale=0, precision=5))
+    expected = cudf.Series(
+        [0, -85, 0],
+        dtype=cudf.Decimal64Dtype(scale=0, precision=5),
+    )
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "data, scale, precision",
+    [
+        (["1.23", "-2.34", "3.45"], 2, 3),
+        (["123", "-234", "345"], 0, 3),
+        (["12300", "-400", "5000.0"], -2, 5),
+        ([None, None, None], 0, 5),
+        ([None, "-100", None], 0, 5),
+        ([], 0, 5),
+    ],
+)
+@pytest.mark.parametrize(
+    "decimal_dtype",
+    [cudf.Decimal128Dtype, cudf.Decimal32Dtype, cudf.Decimal64Dtype],
+)
+def test_string_from_decimal(data, scale, precision, decimal_dtype):
+    decimal_data = []
+    for d in data:
+        if d is None:
+            decimal_data.append(None)
+        else:
+            decimal_data.append(Decimal(d))
+    fp = cudf.Series(
+        decimal_data,
+        dtype=decimal_dtype(scale=scale, precision=precision),
+    )
+    gs = fp.astype("str")
+    got = gs.astype(decimal_dtype(scale=scale, precision=precision))
+    assert_eq(fp, got)
+
+
+def test_string_empty_astype(all_supported_types_as_str):
+    data = []
+    ps = pd.Series(data, dtype="str")
+    gs = cudf.Series(data, dtype="str")
+
+    expect = ps.astype(all_supported_types_as_str)
+    got = gs.astype(all_supported_types_as_str)
+
+    assert_eq(expect, got)
+
+
+def test_string_numeric_astype(numeric_and_temporal_types_as_str):
+    if numeric_and_temporal_types_as_str.startswith("timedelta64"):
+        pytest.skip(
+            f"Test not applicable for {numeric_and_temporal_types_as_str}"
+        )
+    if numeric_and_temporal_types_as_str.startswith("bool"):
+        data = [1, 0, 1, 0, 1]
+    elif numeric_and_temporal_types_as_str.startswith(
+        "int"
+    ) or numeric_and_temporal_types_as_str.startswith("uint"):
+        data = [1, 2, 3, 4, 5]
+    elif numeric_and_temporal_types_as_str.startswith("float"):
+        data = [1.0, 2.0, 3.0, 4.0, 5.0]
+    elif numeric_and_temporal_types_as_str.startswith("datetime64"):
+        # pandas rounds the output format based on the data
+        # Use numpy instead
+        # but fix '2011-01-01T00:00:00' -> '2011-01-01 00:00:00'
+        data = [1000000001, 2000000001, 3000000001, 4000000001, 5000000001]
+        ps = np.asarray(data, dtype=numeric_and_temporal_types_as_str).astype(
+            str
+        )
+        ps = np.array([i.replace("T", " ") for i in ps])
+
+    if not numeric_and_temporal_types_as_str.startswith("datetime64"):
+        ps = pd.Series(data, dtype=numeric_and_temporal_types_as_str)
+
+    gs = cudf.Series(data, dtype=numeric_and_temporal_types_as_str)
+
+    expect = pd.Series(ps.astype("str"))
+    got = gs.astype("str")
+
+    assert_eq(expect, got)
+
+
+def test_string_empty_numeric_astype(numeric_and_temporal_types_as_str):
+    data = []
+
+    ps = pd.Series(data, dtype=numeric_and_temporal_types_as_str)
+    gs = cudf.Series(data, dtype=numeric_and_temporal_types_as_str)
+
+    expect = ps.astype("str")
+    got = gs.astype("str")
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data,dtype",
+    [
+        (["0.1", "10.2", "10.876"], "float"),
+        (["-0.1", "10.2", "+10.876"], "float"),
+        (["1", "10.2", "10.876"], "float32"),
+        (["+123", "6344556789", "0"], "int"),
+        (["+123", "6344556789", "0"], "uint64"),
+        (["+123", "6344556789", "0"], "float"),
+        (["0.1", "-10.2", "10.876", None], "float"),
+    ],
+)
+@pytest.mark.parametrize("obj_type", [None, "str", "category"])
+def test_string_typecast(data, obj_type, dtype):
+    psr = pd.Series(data, dtype=obj_type)
+    gsr = cudf.Series(data, dtype=obj_type)
+
+    expect = psr.astype(dtype=dtype)
+    actual = gsr.astype(dtype=dtype)
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize(
+    "data,dtype",
+    [
+        (["0.1", "10.2", "10.876"], "int"),
+        (["1", "10.2", "+10.876"], "int"),
+        (["abc", "1", "2", " "], "int"),
+        (["0.1", "10.2", "10.876"], "uint64"),
+        (["1", "10.2", "+10.876"], "uint64"),
+        (["abc", "1", "2", " "], "uint64"),
+        ([" ", "0.1", "2"], "float"),
+        ([""], "int"),
+        ([""], "uint64"),
+        ([" "], "float"),
+        (["\n"], "int"),
+        (["\n"], "uint64"),
+        (["0.1", "-10.2", "10.876", None], "int"),
+        (["0.1", "-10.2", "10.876", None], "uint64"),
+        (["0.1", "-10.2", "10.876", None, "ab"], "float"),
+        (["+", "-"], "float"),
+        (["+", "-"], "int"),
+        (["+", "-"], "uint64"),
+        (["1++++", "--2"], "float"),
+        (["1++++", "--2"], "int"),
+        (["1++++", "--2"], "uint64"),
+        (["++++1", "--2"], "float"),
+        (["++++1", "--2"], "int"),
+        (["++++1", "--2"], "uint64"),
+    ],
+)
+@pytest.mark.parametrize("obj_type", [None, "str", "category"])
+def test_string_typecast_error(data, obj_type, dtype):
+    psr = pd.Series(data, dtype=obj_type)
+    gsr = cudf.Series(data, dtype=obj_type)
+
+    assert_exceptions_equal(
+        lfunc=psr.astype,
+        rfunc=gsr.astype,
+        lfunc_args_and_kwargs=([dtype],),
+        rfunc_args_and_kwargs=([dtype],),
+    )
+
+
 @pytest.mark.parametrize("unit", ["ns", "us"])
 def test_astype_aware_to_aware(unit):
     ser = cudf.Series(
@@ -859,3 +1092,157 @@ def test_series_astype_null_categorical():
     expect = cudf.Series([None, None, None], dtype="int32")
     got = sr.astype("int32")
     assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("precision, scale", [(7, 2), (11, 4), (18, 9)])
+def test_typecast_from_float_to_decimal(
+    request, float_types_as_str, precision, scale
+):
+    to_dtype = cudf.Decimal64Dtype(precision, scale)
+    data = cudf.Series(
+        [
+            14.12302,
+            97938.2,
+            np.nan,
+            0.0,
+            -8.302014,
+            np.nan,
+            94.31304,
+            -112.2314,
+            0.3333333,
+            np.nan,
+        ]
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            float_types_as_str == "float32" and to_dtype.precision > 12,
+            reason="https://github.com/rapidsai/cudf/issues/14169",
+        )
+    )
+    got = data.astype(float_types_as_str)
+
+    pa_arr = got.to_arrow().cast(
+        pa.decimal128(to_dtype.precision, to_dtype.scale)
+    )
+    expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
+
+    got = got.astype(to_dtype)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("precision, scale", [(9, 3), (11, 4), (18, 9)])
+def test_typecast_from_int_to_decimal(integer_types_as_str, precision, scale):
+    to_dtype = cudf.Decimal64Dtype(precision, scale)
+    data = cudf.Series(
+        [
+            14.12302,
+            38.2,
+            np.nan,
+            0.0,
+            -8.302014,
+            np.nan,
+            94.31304,
+            np.nan,
+            -112.2314,
+            0.3333333,
+            np.nan,
+        ]
+    )
+    got = data.astype(integer_types_as_str)
+
+    pa_arr = (
+        got.to_arrow()
+        .cast("float64")
+        .cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
+    )
+    expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
+
+    got = got.astype(to_dtype)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "from_dtype",
+    [
+        cudf.Decimal64Dtype(7, 2),
+        cudf.Decimal64Dtype(11, 4),
+        cudf.Decimal64Dtype(18, 10),
+        cudf.Decimal32Dtype(7, 2),
+        cudf.Decimal32Dtype(5, 3),
+        cudf.Decimal32Dtype(9, 5),
+    ],
+)
+@pytest.mark.parametrize(
+    "to_dtype",
+    [
+        cudf.Decimal64Dtype(7, 2),
+        cudf.Decimal64Dtype(18, 10),
+        cudf.Decimal64Dtype(11, 4),
+        cudf.Decimal32Dtype(7, 2),
+        cudf.Decimal32Dtype(9, 5),
+        cudf.Decimal32Dtype(5, 3),
+    ],
+)
+def test_typecast_to_from_decimal(from_dtype, to_dtype):
+    data = cudf.Series(
+        [
+            14.12309,
+            2.343942,
+            np.nan,
+            0.0,
+            -8.302082,
+            np.nan,
+            94.31308,
+            -112.2364,
+            -8.029972,
+            np.nan,
+        ]
+    )
+    if from_dtype.scale > to_dtype.MAX_PRECISION:
+        pytest.skip(
+            "This is supposed to overflow because the representation value in "
+            "the source exceeds the max representable in destination dtype."
+        )
+    s = data.astype(from_dtype)
+
+    pa_arr = s.to_arrow().cast(
+        pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
+    )
+    if isinstance(to_dtype, cudf.Decimal32Dtype):
+        expected = cudf.Series._from_column(Decimal32Column.from_arrow(pa_arr))
+    elif isinstance(to_dtype, cudf.Decimal64Dtype):
+        expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
+
+    with expect_warning_if(to_dtype.scale < s.dtype.scale, UserWarning):
+        got = s.astype(to_dtype)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("precision, scale", [(7, 2), (11, 4), (17, 10)])
+def test_typecast_from_decimal(precision, scale, signed_integer_types_as_str):
+    from_dtype = cudf.Decimal64Dtype(precision, scale)
+    data = cudf.Series(
+        [
+            14.12309,
+            2.343942,
+            np.nan,
+            0.0,
+            -8.302082,
+            np.nan,
+            94.31308,
+            -112.2364,
+            -8.029972,
+            np.nan,
+        ]
+    )
+    got = data.astype(from_dtype)
+    pa_arr = got.to_arrow().cast(signed_integer_types_as_str, safe=False)
+
+    got = got.astype(signed_integer_types_as_str)
+    expected = cudf.Series._from_column(NumericalColumn.from_arrow(pa_arr))
+
+    assert_eq(got, expected)
+    assert_eq(got.dtype, expected.dtype)
diff --git a/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py b/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py
index 329c7f96602..3a60e7abdf9 100644
--- a/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py
+++ b/python/cudf/cudf/tests/series/methods/test_nlargest_nsmallest.py
@@ -1,8 +1,10 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import pandas as pd
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     assert_exceptions_equal,
 )
@@ -16,3 +18,40 @@ def test_series_nlargest_nsmallest_str_error(attr):
     assert_exceptions_equal(
         getattr(gs, attr), getattr(ps, attr), ([], {"n": 1}), ([], {"n": 1})
     )
+
+
+@pytest.mark.parametrize("data", [[0, 1, 1, 2, 2, 2, 3, 3], [0], [1, 2, 3]])
+@pytest.mark.parametrize("n", [-100, -2, 0, 1, 4])
+def test_series_nlargest(data, n):
+    """Indirectly tests Series.sort_values()"""
+    sr = cudf.Series(data)
+    psr = pd.Series(data)
+    assert_eq(sr.nlargest(n), psr.nlargest(n))
+    assert_eq(sr.nlargest(n, keep="last"), psr.nlargest(n, keep="last"))
+
+    assert_exceptions_equal(
+        lfunc=psr.nlargest,
+        rfunc=sr.nlargest,
+        lfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
+        rfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
+    )
+
+
+@pytest.mark.parametrize("data", [[0, 1, 1, 2, 2, 2, 3, 3], [0], [1, 2, 3]])
+@pytest.mark.parametrize("n", [-100, -2, 0, 1, 4])
+def test_series_nsmallest(data, n):
+    """Indirectly tests Series.sort_values()"""
+    sr = cudf.Series(data)
+    psr = pd.Series(data)
+    assert_eq(sr.nsmallest(n), psr.nsmallest(n))
+    assert_eq(
+        sr.nsmallest(n, keep="last").sort_index(),
+        psr.nsmallest(n, keep="last").sort_index(),
+    )
+
+    assert_exceptions_equal(
+        lfunc=psr.nsmallest,
+        rfunc=sr.nsmallest,
+        lfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
+        rfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_repeat.py b/python/cudf/cudf/tests/series/methods/test_repeat.py
new file mode 100644
index 00000000000..dcf87331857
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_repeat.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_repeat(all_supported_types_as_str):
+    rng = np.random.default_rng(seed=0)
+    arr = rng.random(10) * 10
+    repeats = rng.integers(10, size=10)
+    psr = pd.Series(arr).astype(all_supported_types_as_str)
+    gsr = cudf.from_pandas(psr)
+
+    assert_eq(psr.repeat(repeats), gsr.repeat(repeats))
+
+
+def test_repeat_scalar(numeric_types_as_str):
+    rng = np.random.default_rng(seed=0)
+    arr = rng.random(10) * 10
+    repeats = 10
+    psr = pd.Series(arr).astype(numeric_types_as_str)
+    gsr = cudf.from_pandas(psr)
+
+    assert_eq(psr.repeat(repeats), gsr.repeat(repeats))
diff --git a/python/cudf/cudf/tests/series/methods/test_sort_values.py b/python/cudf/cudf/tests/series/methods/test_sort_values.py
new file mode 100644
index 00000000000..1ce6a55087f
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_sort_values.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data", [["z", "1", "a"], ["c", None, "b"], [None] * 3]
+)
+def test_string_sort(data, ascending):
+    ps = pd.Series(data, dtype="str", name="nice name")
+    gs = cudf.Series(data, dtype="str", name="nice name")
+
+    expect = ps.sort_values(ascending=ascending)
+    got = gs.sort_values(ascending=ascending)
+
+    assert_eq(expect, got)
+
+
+def test_series_sort_values_ignore_index(ignore_index):
+    gsr = cudf.Series([1, 3, 5, 2, 4])
+    psr = gsr.to_pandas()
+
+    expect = psr.sort_values(ignore_index=ignore_index)
+    got = gsr.sort_values(ignore_index=ignore_index)
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_pandas.py b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
index 9013675c191..7c7ca5c1ff4 100644
--- a/python/cudf/cudf/tests/series/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/series/methods/test_to_pandas.py
@@ -248,3 +248,22 @@ def test_to_from_pandas_nulls(nulls):
     got = gdf_data.to_pandas()
 
     assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("data", [["a"], ["a", None], [None]])
+def test_string_export(data):
+    ps = pd.Series(data, dtype="str", name="nice name")
+    gs = cudf.Series(data, dtype="str", name="nice name")
+
+    expect = ps
+    got = gs.to_pandas()
+    assert_eq(expect, got)
+
+    expect = np.array(ps)
+    got = gs.to_numpy()
+    assert_eq(expect, got)
+
+    expect = pa.Array.from_pandas(ps)
+    got = gs.to_arrow()
+
+    assert pa.Array.equals(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_unique.py b/python/cudf/cudf/tests/series/methods/test_unique.py
index 8346388ad78..257c57aa1e7 100644
--- a/python/cudf/cudf/tests/series/methods/test_unique.py
+++ b/python/cudf/cudf/tests/series/methods/test_unique.py
@@ -84,3 +84,23 @@ def test_series_nunique(request, nan_as_null, dropna):
     expect = pd_series.nunique(dropna=dropna)
     got = cudf_series.nunique(dropna=dropna)
     assert expect == got
+
+
+@pytest.mark.parametrize(
+    "item",
+    [
+        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
+        ["a", "a", "a", "a", "A"],
+        ["A"],
+        ["abc", "xyz", None, "ab", "123"],
+        [None, None, "abc", None, "abc"],
+    ],
+)
+def test_string_unique(item):
+    ps = pd.Series(item)
+    gs = cudf.Series(item)
+    # Pandas `unique` returns a numpy array
+    pres = pd.Series(ps.unique())
+    # cudf returns a cudf.Series
+    gres = gs.unique()
+    assert_eq(pres, gres)
diff --git a/python/cudf/cudf/tests/series/test_binops.py b/python/cudf/cudf/tests/series/test_binops.py
index 1c288a48e90..d24dab0a012 100644
--- a/python/cudf/cudf/tests/series/test_binops.py
+++ b/python/cudf/cudf/tests/series/test_binops.py
@@ -1,13 +1,16 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import datetime
+import decimal
 import operator
 
+import cupy as cp
+import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing._utils import (
-    assert_exceptions_equal,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.mark.parametrize(
@@ -17,19 +20,839 @@
     "sr2",
     [pd.Series([], dtype="float64"), pd.Series(["a", "a", "c", "z", "A"])],
 )
+def test_series_error_equality(sr1, sr2, comparison_op):
+    gsr1 = cudf.from_pandas(sr1)
+    gsr2 = cudf.from_pandas(sr2)
+
+    assert_exceptions_equal(
+        comparison_op, comparison_op, ([sr1, sr2],), ([gsr1, gsr2],)
+    )
+
+
 @pytest.mark.parametrize(
-    "op",
+    "data,other",
     [
-        operator.eq,
-        operator.ne,
-        operator.lt,
-        operator.gt,
-        operator.le,
-        operator.ge,
+        ([1000000, 200000, 3000000], [1000000, 200000, 3000000]),
+        ([1000000, 200000, None], [1000000, 200000, None]),
+        ([], []),
+        ([None], [None]),
+        (
+            [12, 12, 22, 343, 4353534, 435342],
+            [12, 12, 22, 343, 4353534, 435342],
+        ),
+        ([1000000, 200000, 3000000], [200000, 34543, 3000000]),
+        ([1000000, 200000, None], [1000000, 200000, 3000000]),
+        ([None], [1]),
+        (
+            [12, 12, 22, 343, 4353534, 435342],
+            [None, 1, 220, 3, 34, 4353423287],
+        ),
+        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
+        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
     ],
 )
-def test_series_error_equality(sr1, sr2, op):
-    gsr1 = cudf.from_pandas(sr1)
-    gsr2 = cudf.from_pandas(sr2)
+def test_timedelta_ops_misc_inputs(
+    data, other, timedelta_types_as_str, binary_op_method
+):
+    if binary_op_method in {"mul", "rmul", "pow", "rpow"}:
+        pytest.skip(f"Test not applicable for {binary_op_method}")
+    gsr = cudf.Series(data, dtype=timedelta_types_as_str)
+    other_gsr = cudf.Series(other, dtype=timedelta_types_as_str)
+
+    psr = gsr.to_pandas()
+    other_psr = other_gsr.to_pandas()
+
+    expected = getattr(psr, binary_op_method)(other_psr)
+    actual = getattr(gsr, binary_op_method)(other_gsr)
+    if binary_op_method in ("eq", "lt", "gt", "le", "ge"):
+        actual = actual.fillna(False)
+    elif binary_op_method == "ne":
+        actual = actual.fillna(True)
+
+    if binary_op_method == "floordiv":
+        expected[actual.isna().to_pandas()] = np.nan
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "datetime_data,timedelta_data",
+    [
+        ([1000000, 200000, 3000000], [1000000, 200000, 3000000]),
+        ([1000000, 200000, None], [1000000, 200000, None]),
+        ([], []),
+        ([None], [None]),
+        (
+            [12, 12, 22, 343, 4353534, 435342],
+            [12, 12, 22, 343, 4353534, 435342],
+        ),
+        ([1000000, 200000, 3000000], [200000, 34543, 3000000]),
+        ([1000000, 200000, None], [1000000, 200000, 3000000]),
+        ([None], [1]),
+        (
+            [12, 12, 22, 343, 4353534, 435342],
+            [None, 1, 220, 3, 34, 4353423287],
+        ),
+        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
+        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
+        (
+            [12, 11, 232, 223432411, 2343241, 234324, 23234],
+            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
+        ),
+        (
+            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
+        ),
+        (
+            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
+            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        ),
+        (
+            [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        ),
+    ],
+)
+@pytest.mark.parametrize("ops", ["add", "sub"])
+def test_timedelta_ops_datetime_inputs(
+    datetime_types_as_str,
+    timedelta_types_as_str,
+    datetime_data,
+    timedelta_data,
+    ops,
+):
+    gsr_datetime = cudf.Series(datetime_data, dtype=datetime_types_as_str)
+    gsr_timedelta = cudf.Series(timedelta_data, dtype=timedelta_types_as_str)
+
+    psr_datetime = gsr_datetime.to_pandas()
+    psr_timedelta = gsr_timedelta.to_pandas()
+
+    expected = getattr(psr_datetime, ops)(psr_timedelta)
+    actual = getattr(gsr_datetime, ops)(gsr_timedelta)
+
+    assert_eq(expected, actual)
+
+    if ops == "add":
+        expected = getattr(psr_timedelta, ops)(psr_datetime)
+        actual = getattr(gsr_timedelta, ops)(gsr_datetime)
+
+        assert_eq(expected, actual)
+    elif ops == "sub":
+        assert_exceptions_equal(
+            lfunc=operator.sub,
+            rfunc=operator.sub,
+            lfunc_args_and_kwargs=([psr_timedelta, psr_datetime],),
+            rfunc_args_and_kwargs=([gsr_timedelta, gsr_datetime],),
+        )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(
+            {
+                "A": pd.Series(pd.date_range("2012-1-1", periods=3, freq="D")),
+                "B": pd.Series(
+                    pd.timedelta_range(start="1 day", periods=3, freq="D")
+                ),
+            }
+        ),
+        pd.DataFrame(
+            {
+                "A": pd.Series(
+                    pd.date_range("1994-1-1", periods=10, freq="D")
+                ),
+                "B": pd.Series(
+                    pd.timedelta_range(start="1 day", periods=10, freq="D")
+                ),
+            }
+        ),
+    ],
+)
+@pytest.mark.parametrize("op", ["add", "sub"])
+def test_timedelta_dataframe_ops(df, op):
+    pdf = df
+    gdf = cudf.from_pandas(pdf)
+
+    if op == "add":
+        pdf["C"] = pdf["A"] + pdf["B"]
+        gdf["C"] = gdf["A"] + gdf["B"]
+    elif op == "sub":
+        pdf["C"] = pdf["A"] - pdf["B"]
+        gdf["C"] = gdf["A"] - gdf["B"]
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+@pytest.mark.parametrize(
+    "other_scalars",
+    [
+        datetime.timedelta(days=768),
+        datetime.timedelta(seconds=768),
+        datetime.timedelta(microseconds=7),
+        datetime.timedelta(minutes=447),
+        datetime.timedelta(hours=447),
+        datetime.timedelta(weeks=734),
+        np.timedelta64(4, "s"),
+        np.timedelta64(456, "D"),
+        np.timedelta64(46, "h"),
+        np.timedelta64("nat"),
+        np.timedelta64(1, "s"),
+        np.timedelta64(1, "ms"),
+        np.timedelta64(1, "us"),
+        np.timedelta64(1, "ns"),
+    ],
+)
+def test_timedelta_series_ops_with_scalars(
+    data, other_scalars, timedelta_types_as_str, arithmetic_op_method, request
+):
+    if arithmetic_op_method in {
+        "mul",
+        "rmul",
+        "rtruediv",
+        "pow",
+        "rpow",
+        "radd",
+        "rsub",
+        "rfloordiv",
+        "rmod",
+    }:
+        pytest.skip(f"Test not applicable for {arithmetic_op_method}")
+    gsr = cudf.Series(data=data, dtype=timedelta_types_as_str)
+    psr = gsr.to_pandas()
+
+    if arithmetic_op_method == "add":
+        expected = psr + other_scalars
+        actual = gsr + other_scalars
+    elif arithmetic_op_method == "sub":
+        expected = psr - other_scalars
+        actual = gsr - other_scalars
+    elif arithmetic_op_method == "truediv":
+        expected = psr / other_scalars
+        actual = gsr / other_scalars
+    elif arithmetic_op_method == "floordiv":
+        expected = psr // other_scalars
+        actual = gsr // other_scalars
+    elif arithmetic_op_method == "mod":
+        expected = psr % other_scalars
+        actual = gsr % other_scalars
+
+    assert_eq(expected, actual)
+
+    if arithmetic_op_method == "add":
+        expected = other_scalars + psr
+        actual = other_scalars + gsr
+    elif arithmetic_op_method == "sub":
+        expected = other_scalars - psr
+        actual = other_scalars - gsr
+    elif arithmetic_op_method == "truediv":
+        expected = other_scalars / psr
+        actual = other_scalars / gsr
+    elif arithmetic_op_method == "floordiv":
+        expected = other_scalars // psr
+        actual = other_scalars // gsr
+    elif arithmetic_op_method == "mod":
+        expected = other_scalars % psr
+        actual = other_scalars % gsr
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "reverse",
+    [
+        False,
+        pytest.param(
+            True,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason=(
+                    "timedelta modulo by zero is dubiously defined in "
+                    "both pandas and cuDF "
+                    "(see https://github.com/rapidsai/cudf/issues/5938)"
+                ),
+            ),
+        ),
+    ],
+)
+def test_timedelta_series_mod_with_scalar_zero(reverse):
+    gsr = cudf.Series(data=[0.2434], dtype=np.timedelta64(1, "ns"))
+    psr = gsr.to_pandas()
+    scalar = datetime.timedelta(days=768)
+    if reverse:
+        expected = scalar % psr
+        actual = scalar % gsr
+    else:
+        expected = psr % scalar
+        actual = gsr % scalar
+    assert_eq(expected, actual)
+
+
+def test_timedelta_invalid_ops():
+    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
+    psr = sr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([psr, 1],),
+        rfunc_args_and_kwargs=([sr, 1],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([psr, "a"],),
+        rfunc_args_and_kwargs=([sr, "a"],),
+    )
+
+    dt_sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    dt_psr = dt_sr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=operator.mod,
+        rfunc=operator.mod,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.mod,
+        rfunc=operator.mod,
+        lfunc_args_and_kwargs=([psr, "a"],),
+        rfunc_args_and_kwargs=([sr, "a"],),
+        check_exception_type=False,
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.gt,
+        rfunc=operator.gt,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.lt,
+        rfunc=operator.lt,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.ge,
+        rfunc=operator.ge,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.le,
+        rfunc=operator.le,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.truediv,
+        rfunc=operator.truediv,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.floordiv,
+        rfunc=operator.floordiv,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.mul,
+        rfunc=operator.mul,
+        lfunc_args_and_kwargs=([psr, dt_psr],),
+        rfunc_args_and_kwargs=([sr, dt_sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.mul,
+        rfunc=operator.mul,
+        lfunc_args_and_kwargs=([psr, psr],),
+        rfunc_args_and_kwargs=([sr, sr],),
+        check_exception_type=False,
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.xor,
+        rfunc=operator.xor,
+        lfunc_args_and_kwargs=([psr, psr],),
+        rfunc_args_and_kwargs=([sr, sr],),
+    )
+
+
+@pytest.mark.parametrize("op", [operator.add, operator.sub])
+def test_timdelta_binop_tz_timestamp(op):
+    s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
+    pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
+    with pytest.raises(NotImplementedError):
+        op(s, pd_tz_timestamp)
+    date_tz_scalar = datetime.datetime.now(datetime.timezone.utc)
+    with pytest.raises(NotImplementedError):
+        op(s, date_tz_scalar)
+
+
+def test_timedelta_series_cmpops_pandas_compatibility(comparison_op):
+    gsr1 = cudf.Series(
+        data=[123, 456, None, 321, None], dtype="timedelta64[ns]"
+    )
+    psr1 = gsr1.to_pandas()
+
+    gsr2 = cudf.Series(
+        data=[123, 456, 789, None, None], dtype="timedelta64[ns]"
+    )
+    psr2 = gsr2.to_pandas()
+
+    expect = comparison_op(psr1, psr2)
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = comparison_op(gsr1, gsr2)
+
+    assert_eq(expect, got)
+
+
+def test_string_equality():
+    data1 = ["b", "c", "d", "a", "c"]
+    data2 = ["a", None, "c", "a", "c"]
+
+    ps1 = pd.Series(data1)
+    ps2 = pd.Series(data2)
+    gs1 = cudf.Series(data1)
+    gs2 = cudf.Series(data2)
+
+    expect = ps1 == ps2
+    got = gs1 == gs2
+
+    assert_eq(expect, got.fillna(False))
+
+    expect = ps1 == "m"
+    got = gs1 == "m"
+
+    assert_eq(expect, got.fillna(False))
+
+    ps1 = pd.Series(["a"])
+    gs1 = cudf.Series(["a"])
+
+    expect = ps1 == "m"
+    got = gs1 == "m"
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "lhs",
+    [
+        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
+        ["abc", "xyz", "a", "ab", "123", "097"],
+    ],
+)
+@pytest.mark.parametrize(
+    "rhs",
+    [
+        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
+        ["a", "a", "a", "a", "A", "z"],
+    ],
+)
+def test_string_binary_op_add(lhs, rhs):
+    pds = pd.Series(lhs) + pd.Series(rhs)
+    gds = cudf.Series(lhs) + cudf.Series(rhs)
+
+    assert_eq(pds, gds)
+
+
+def test_concatenate_rows_of_lists():
+    pser = pd.Series([["a", "a"], ["b"], ["c"]])
+    gser = cudf.Series([["a", "a"], ["b"], ["c"]])
+
+    expect = pser + pser
+    got = gser + gser
+
+    assert_eq(expect, got)
+
+
+def test_concatenate_list_with_nonlist():
+    gser1 = cudf.Series([["a", "c"], ["b", "d"], ["c", "d"]])
+    gser2 = cudf.Series(["a", "b", "c"])
+    with pytest.raises(TypeError):
+        gser1 + gser2
+
+
+def test_datetime_series_binops_pandas(
+    datetime_types_as_str, datetime_types_as_str2
+):
+    dti = pd.date_range("20010101", "20020215", freq="400h", name="times")
+    pd_data_1 = pd.Series(dti)
+    pd_data_2 = pd_data_1
+    gdf_data_1 = cudf.Series(pd_data_1).astype(datetime_types_as_str)
+    gdf_data_2 = cudf.Series(pd_data_2).astype(datetime_types_as_str2)
+    assert_eq(pd_data_1, gdf_data_1.astype("datetime64[ns]"))
+    assert_eq(pd_data_2, gdf_data_2.astype("datetime64[ns]"))
+    assert_eq(pd_data_1 < pd_data_2, gdf_data_1 < gdf_data_2)
+    assert_eq(pd_data_1 > pd_data_2, gdf_data_1 > gdf_data_2)
+    assert_eq(pd_data_1 == pd_data_2, gdf_data_1 == gdf_data_2)
+    assert_eq(pd_data_1 <= pd_data_2, gdf_data_1 <= gdf_data_2)
+    assert_eq(pd_data_1 >= pd_data_2, gdf_data_1 >= gdf_data_2)
+
+
+def test_datetime_series_binops_numpy(
+    datetime_types_as_str, datetime_types_as_str2
+):
+    dti = pd.date_range("20010101", "20020215", freq="400h", name="times")
+    pd_data_1 = pd.Series(dti)
+    pd_data_2 = pd_data_1
+    gdf_data_1 = cudf.Series(pd_data_1).astype(datetime_types_as_str)
+    gdf_data_2 = cudf.Series(pd_data_2).astype(datetime_types_as_str2)
+    np_data_1 = np.array(pd_data_1).astype(datetime_types_as_str)
+    np_data_2 = np.array(pd_data_2).astype(datetime_types_as_str2)
+    np.testing.assert_equal(np_data_1, gdf_data_1.to_numpy())
+    np.testing.assert_equal(np_data_2, gdf_data_2.to_numpy())
+    np.testing.assert_equal(
+        np.less(np_data_1, np_data_2), (gdf_data_1 < gdf_data_2).to_numpy()
+    )
+    np.testing.assert_equal(
+        np.greater(np_data_1, np_data_2), (gdf_data_1 > gdf_data_2).to_numpy()
+    )
+    np.testing.assert_equal(
+        np.equal(np_data_1, np_data_2), (gdf_data_1 == gdf_data_2).to_numpy()
+    )
+    np.testing.assert_equal(
+        np.less_equal(np_data_1, np_data_2),
+        (gdf_data_1 <= gdf_data_2).to_numpy(),
+    )
+    np.testing.assert_equal(
+        np.greater_equal(np_data_1, np_data_2),
+        (gdf_data_1 >= gdf_data_2).to_numpy(),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.date_range("20010101", "20020215", freq="400h", name="times"),
+        pd.date_range(
+            "20010101", freq="243434324423423234ns", name="times", periods=10
+        ),
+    ],
+)
+def test_dt_ops(data):
+    pd_data = pd.Series(data)
+    gdf_data = cudf.Series(data)
+
+    assert_eq(pd_data == pd_data, gdf_data == gdf_data)
+    assert_eq(pd_data < pd_data, gdf_data < gdf_data)
+    assert_eq(pd_data > pd_data, gdf_data > gdf_data)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 10, 100, 20000],
+        [None] * 7,
+        [10, 20, 30, None, 100, 200, None],
+        [3223.234, 342.2332, 23423.23, 3343.23324, 23432.2323, 242.23, 233],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [1, 2, 3, 4, 10, 100, 20000],
+        [None] * 7,
+        [10, 20, 30, None, 100, 200, None],
+        [3223.234, 342.2332, 23423.23, 3343.23324, 23432.2323, 242.23, 233],
+        datetime.datetime(1993, 6, 22, 13, 30),
+        datetime.datetime(2005, 1, 22, 10, 00),
+        np.datetime64("2005-02"),
+        np.datetime64("2005-02-25"),
+        np.datetime64("2005-02-25T03:30"),
+        np.datetime64("nat"),
+        # TODO: https://github.com/pandas-dev/pandas/issues/52295
+    ],
+)
+def test_datetime_subtract(
+    data, other, datetime_types_as_str, datetime_types_as_str2
+):
+    gsr = cudf.Series(data, dtype=datetime_types_as_str)
+    psr = gsr.to_pandas()
+
+    if isinstance(other, np.datetime64):
+        gsr_other = other
+        psr_other = other
+    else:
+        gsr_other = cudf.Series(other, dtype=datetime_types_as_str2)
+        psr_other = gsr_other.to_pandas()
+
+    expected = psr - psr_other
+    actual = gsr - gsr_other
+
+    assert_eq(expected, actual)
+
+    expected = psr_other - psr
+    actual = gsr_other - gsr
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+@pytest.mark.parametrize(
+    "other_scalars",
+    [
+        datetime.timedelta(days=768),
+        datetime.timedelta(seconds=768),
+        datetime.timedelta(microseconds=7),
+        datetime.timedelta(minutes=447),
+        datetime.timedelta(hours=447),
+        datetime.timedelta(weeks=734),
+        np.timedelta64(4, "s"),
+        np.timedelta64(456, "D"),
+        np.timedelta64(46, "h"),
+        np.timedelta64("nat"),
+        np.timedelta64(1, "s"),
+        np.timedelta64(1, "ms"),
+        np.timedelta64(1, "us"),
+        np.timedelta64(1, "ns"),
+    ],
+)
+@pytest.mark.parametrize("op", ["add", "sub"])
+def test_datetime_series_ops_with_scalars(
+    data, other_scalars, datetime_types_as_str, op
+):
+    gsr = cudf.Series(data=data, dtype=datetime_types_as_str)
+    psr = gsr.to_pandas()
+
+    if op == "add":
+        expected = psr + other_scalars
+        actual = gsr + other_scalars
+    elif op == "sub":
+        expected = psr - other_scalars
+        actual = gsr - other_scalars
+
+    assert_eq(expected, actual)
+
+    if op == "add":
+        expected = other_scalars + psr
+        actual = other_scalars + gsr
+
+        assert_eq(expected, actual)
+
+    elif op == "sub":
+        assert_exceptions_equal(
+            lfunc=operator.sub,
+            rfunc=operator.sub,
+            lfunc_args_and_kwargs=([other_scalars, psr],),
+            rfunc_args_and_kwargs=([other_scalars, gsr],),
+        )
+
+
+@pytest.mark.parametrize("data", ["20110101", "20120101", "20130101"])
+@pytest.mark.parametrize("other_scalars", ["20110101", "20120101", "20130101"])
+def test_datetime_series_cmpops_with_scalars(
+    data, other_scalars, datetime_types_as_str, comparison_op
+):
+    gsr = cudf.Series(data=data, dtype=datetime_types_as_str)
+    psr = gsr.to_pandas()
+
+    expect = comparison_op(psr, other_scalars)
+    got = comparison_op(gsr, other_scalars)
+
+    assert_eq(expect, got)
+
+
+def test_datetime_invalid_ops():
+    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    psr = sr.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([psr, pd.Timestamp(1513393355.5, unit="s")],),
+        rfunc_args_and_kwargs=([sr, pd.Timestamp(1513393355.5, unit="s")],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.truediv,
+        rfunc=operator.truediv,
+        lfunc_args_and_kwargs=([psr, pd.Timestamp(1513393355.5, unit="s")],),
+        rfunc_args_and_kwargs=([sr, pd.Timestamp(1513393355.5, unit="s")],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([psr, psr],),
+        rfunc_args_and_kwargs=([sr, sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.floordiv,
+        rfunc=operator.floordiv,
+        lfunc_args_and_kwargs=([psr, psr],),
+        rfunc_args_and_kwargs=([sr, sr],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.floordiv,
+        rfunc=operator.floordiv,
+        lfunc_args_and_kwargs=([psr, pd.Timestamp(1513393355.5, unit="s")],),
+        rfunc_args_and_kwargs=([sr, pd.Timestamp(1513393355.5, unit="s")],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([psr, 1],),
+        rfunc_args_and_kwargs=([sr, 1],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.truediv,
+        rfunc=operator.truediv,
+        lfunc_args_and_kwargs=([psr, "a"],),
+        rfunc_args_and_kwargs=([sr, "a"],),
+    )
+
+    assert_exceptions_equal(
+        lfunc=operator.mul,
+        rfunc=operator.mul,
+        lfunc_args_and_kwargs=([psr, 1],),
+        rfunc_args_and_kwargs=([sr, 1],),
+    )
+
+
+def test_datetime_binop_tz_timestamp(comparison_op):
+    s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
+    pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
+    with pytest.raises(NotImplementedError):
+        comparison_op(s, pd_tz_timestamp)
+
+    date_scalar = datetime.datetime.now(datetime.timezone.utc)
+    with pytest.raises(NotImplementedError):
+        comparison_op(s, date_scalar)
+
+
+def test_datetime_series_cmpops_pandas_compatibility(comparison_op):
+    data1 = ["20110101", "20120101", None, "20140101", None]
+    data2 = ["20110101", "20120101", "20130101", None, None]
+    gsr1 = cudf.Series(data=data1, dtype="datetime64[ns]")
+    psr1 = gsr1.to_pandas()
+
+    gsr2 = cudf.Series(data=data2, dtype="datetime64[ns]")
+    psr2 = gsr2.to_pandas()
+
+    expect = comparison_op(psr1, psr2)
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = comparison_op(gsr1, gsr2)
+
+    assert_eq(expect, got)
+
+
+def test_decimal_overflow():
+    s = cudf.Series(
+        [decimal.Decimal("0.0009384233522166997927180531650178250")]
+    )
+    result = s * s
+    assert_eq(cudf.Decimal128Dtype(precision=38, scale=37), result.dtype)
+
+    s = cudf.Series([1, 2], dtype=cudf.Decimal128Dtype(precision=38, scale=0))
+    result = s * decimal.Decimal("1.0")
+    assert_eq(cudf.Decimal128Dtype(precision=38, scale=1), result.dtype)
+
+
+def test_decimal_binop_upcast_operands():
+    ser1 = cudf.Series([0.51, 1.51, 2.51]).astype(cudf.Decimal64Dtype(18, 2))
+    ser2 = cudf.Series([0.90, 0.96, 0.99]).astype(cudf.Decimal128Dtype(19, 2))
+    result = ser1 + ser2
+    expected = cudf.Series([1.41, 2.47, 3.50]).astype(
+        cudf.Decimal128Dtype(20, 2)
+    )
+    assert_eq(result, expected)
+
+
+def test_categorical_compare_ordered():
+    cat1 = pd.Categorical(
+        ["a", "a", "b", "c", "a"], categories=["a", "b", "c"], ordered=True
+    )
+    pdsr1 = pd.Series(cat1)
+    sr1 = cudf.Series(cat1)
+    cat2 = pd.Categorical(
+        ["a", "b", "a", "c", "b"], categories=["a", "b", "c"], ordered=True
+    )
+    pdsr2 = pd.Series(cat2)
+    sr2 = cudf.Series(cat2)
+
+    # test equal
+    out = sr1 == sr1
+    assert out.dtype == np.bool_
+    assert type(out[0]) is np.bool_
+    assert np.all(out.to_numpy())
+    assert np.all(pdsr1 == pdsr1)
+
+    # test inequality
+    out = sr1 != sr1
+    assert not np.any(out.to_numpy())
+    assert not np.any(pdsr1 != pdsr1)
+
+    assert pdsr1.cat.ordered
+    assert sr1.cat.ordered
+
+    # test using ordered operators
+    np.testing.assert_array_equal(pdsr1 < pdsr2, (sr1 < sr2).to_numpy())
+    np.testing.assert_array_equal(pdsr1 > pdsr2, (sr1 > sr2).to_numpy())
+
+
+def test_categorical_binary_add():
+    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
+    pdsr = pd.Series(cat)
+    sr = cudf.Series(cat)
 
-    assert_exceptions_equal(op, op, ([sr1, sr2],), ([gsr1, gsr2],))
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([pdsr, pdsr],),
+        rfunc_args_and_kwargs=([sr, sr],),
+    )
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index e9ccee3a3ae..6ad6a1a41cb 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -1361,6 +1361,34 @@ def test_timezone_pyarrow_array():
     assert_eq(result, expected)
 
 
+def test_string_ingest(one_dimensional_array_types):
+    expect = ["a", "a", "b", "c", "a"]
+    data = one_dimensional_array_types(expect)
+    got = cudf.Series(data)
+    assert got.dtype == np.dtype("object")
+    assert len(got) == 5
+    for idx, val in enumerate(expect):
+        assert expect[idx] == got[idx]
+
+
+def test_decimal_invalid_precision():
+    with pytest.raises(pa.ArrowInvalid):
+        cudf.Series([10, 20, 30], dtype=cudf.Decimal64Dtype(2, 2))
+
+    with pytest.raises(pa.ArrowInvalid):
+        cudf.Series([decimal.Decimal("300")], dtype=cudf.Decimal64Dtype(2, 1))
+
+
+@pytest.mark.parametrize(
+    "input_obj", [[decimal.Decimal(1), cudf.NA, decimal.Decimal(3)]]
+)
+def test_series_construction_decimals_with_nulls(input_obj):
+    expect = pa.array(input_obj, from_pandas=True)
+    got = cudf.Series(input_obj).to_arrow()
+
+    assert expect.equals(got)
+
+
 @pytest.mark.parametrize(
     "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
 )
diff --git a/python/cudf/cudf/tests/series/test_np_ufuncs.py b/python/cudf/cudf/tests/series/test_np_ufuncs.py
index c43f73b7e1f..450bc06dd46 100644
--- a/python/cudf/cudf/tests/series/test_np_ufuncs.py
+++ b/python/cudf/cudf/tests/series/test_np_ufuncs.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
+import datetime
 import operator
 from functools import reduce
 
@@ -208,3 +208,49 @@ def test_ufunc_cudf_series_error_with_out_kwarg():
     cudf_s3 = cudf.Series(data=[0, 0, 0, 0])
     with pytest.raises(TypeError):
         np.add(x1=cudf_s1, x2=cudf_s2, out=cudf_s3)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [],
+        [None],
+        [None, None, None, None, None],
+        [12, 12, 22, 343, 4353534, 435342],
+        np.array([10, 20, 30, None, 100]),
+        cp.asarray([10, 20, 30, 100]),
+        [1000000, 200000, 3000000],
+        [1000000, 200000, None],
+        [1],
+        [12, 11, 232, 223432411, 2343241, 234324, 23234],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
+        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
+    ],
+)
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        datetime.timedelta(days=768),
+        datetime.timedelta(seconds=768),
+        datetime.timedelta(microseconds=7),
+        np.timedelta64("nat"),
+        np.timedelta64(1, "s"),
+        np.timedelta64(1, "ms"),
+        np.timedelta64(1, "us"),
+        np.timedelta64(1, "ns"),
+    ],
+)
+@pytest.mark.parametrize("op", [np.add, np.subtract])
+def test_datetime_series_ops_with_scalars_misc(
+    data, scalar, datetime_types_as_str, op
+):
+    gsr = cudf.Series(data=data, dtype=datetime_types_as_str)
+    psr = gsr.to_pandas()
+
+    expect = op(psr, scalar)
+    got = op(gsr, scalar)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/test_repr.py b/python/cudf/cudf/tests/series/test_repr.py
index 2e1e9888ff4..1bf01cd94da 100644
--- a/python/cudf/cudf/tests/series/test_repr.py
+++ b/python/cudf/cudf/tests/series/test_repr.py
@@ -519,3 +519,23 @@ def test_empty_series_name():
     gs = cudf.from_pandas(ps)
 
     assert repr(ps) == repr(gs)
+
+
+@pytest.mark.parametrize("item", [0, slice(0, 1)])
+@pytest.mark.parametrize("data", [["a"], ["a", None], [None]])
+def test_string_repr(data, item, request):
+    if data == [None]:
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="Missing value repr should be <NA> instead of None",
+            )
+        )
+    ps = pd.Series(data, dtype="str", name="nice name")
+    gs = cudf.Series(data, dtype="str", name="nice name")
+
+    got_out = gs.iloc[item]
+    expect_out = ps.iloc[item]
+
+    expect = str(expect_out)
+    got = str(got_out)
+    assert expect == got
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index c5e2f05fcd9..bfa21b35bbd 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -111,51 +111,6 @@ def test_categorical_compare_unordered():
     )
 
 
-def test_categorical_compare_ordered():
-    cat1 = pd.Categorical(
-        ["a", "a", "b", "c", "a"], categories=["a", "b", "c"], ordered=True
-    )
-    pdsr1 = pd.Series(cat1)
-    sr1 = cudf.Series(cat1)
-    cat2 = pd.Categorical(
-        ["a", "b", "a", "c", "b"], categories=["a", "b", "c"], ordered=True
-    )
-    pdsr2 = pd.Series(cat2)
-    sr2 = cudf.Series(cat2)
-
-    # test equal
-    out = sr1 == sr1
-    assert out.dtype == np.bool_
-    assert type(out[0]) is np.bool_
-    assert np.all(out.to_numpy())
-    assert np.all(pdsr1 == pdsr1)
-
-    # test inequality
-    out = sr1 != sr1
-    assert not np.any(out.to_numpy())
-    assert not np.any(pdsr1 != pdsr1)
-
-    assert pdsr1.cat.ordered
-    assert sr1.cat.ordered
-
-    # test using ordered operators
-    np.testing.assert_array_equal(pdsr1 < pdsr2, (sr1 < sr2).to_numpy())
-    np.testing.assert_array_equal(pdsr1 > pdsr2, (sr1 > sr2).to_numpy())
-
-
-def test_categorical_binary_add():
-    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    pdsr = pd.Series(cat)
-    sr = cudf.Series(cat)
-
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([pdsr, pdsr],),
-        rfunc_args_and_kwargs=([sr, sr],),
-    )
-
-
 def test_categorical_element_indexing():
     """
     Element indexing to a cat column must give the underlying object
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
deleted file mode 100644
index 7db558335fe..00000000000
--- a/python/cudf/cudf/tests/test_contains.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-
-import numpy as np
-import pytest
-
-import cudf
-from cudf import Series
-from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-def test_lists_contains(dtype):
-    dtype = cudf.dtype(dtype)
-    inner_data = np.array([1, 2, 3], dtype=dtype)
-
-    data = Series([inner_data])
-
-    contained_scalar = inner_data.dtype.type(2)
-    not_contained_scalar = inner_data.dtype.type(42)
-
-    assert data.list.contains(contained_scalar)[0]
-    assert not data.list.contains(not_contained_scalar)[0]
-
-
-@pytest.mark.parametrize("dtype", DATETIME_TYPES + TIMEDELTA_TYPES)
-def test_lists_contains_datetime(dtype):
-    dtype = cudf.dtype(dtype)
-    inner_data = np.array([1, 2, 3])
-
-    unit, _ = np.datetime_data(dtype)
-
-    data = Series([inner_data])
-
-    contained_scalar = inner_data.dtype.type(2)
-    not_contained_scalar = inner_data.dtype.type(42)
-
-    assert data.list.contains(contained_scalar)[0]
-    assert not data.list.contains(not_contained_scalar)[0]
-
-
-def test_lists_contains_bool():
-    data = Series([[True, True, True]])
-
-    contained_scalar = True
-    not_contained_scalar = False
-
-    assert data.list.contains(contained_scalar)[0]
-    assert not data.list.contains(not_contained_scalar)[0]
diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py
deleted file mode 100644
index dc19c52715a..00000000000
--- a/python/cudf/cudf/tests/test_copying.py
+++ /dev/null
@@ -1,439 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import Series
-from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing import assert_eq
-from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES
-
-pytestmark = pytest.mark.spilling
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + OTHER_TYPES)
-def test_repeat(dtype):
-    rng = np.random.default_rng(seed=0)
-    arr = rng.random(10) * 10
-    repeats = rng.integers(10, size=10)
-    psr = pd.Series(arr).astype(dtype)
-    gsr = cudf.from_pandas(psr)
-
-    assert_eq(psr.repeat(repeats), gsr.repeat(repeats))
-
-
-def test_repeat_index():
-    rng = np.random.default_rng(seed=0)
-    arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-    psr = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-    gsr = cudf.from_pandas(psr)
-    repeats = rng.integers(10, size=4)
-
-    assert_eq(psr.repeat(repeats), gsr.repeat(repeats))
-
-
-def test_repeat_dataframe():
-    rng = np.random.default_rng(seed=0)
-    psr = pd.DataFrame({"a": [1, 1, 2, 2]})
-    gsr = cudf.from_pandas(psr)
-    repeats = rng.integers(10, size=4)
-
-    # pd.DataFrame doesn't have repeat() so as a workaround, we are
-    # comparing pd.Series.repeat() with cudf.DataFrame.repeat()['a']
-    assert_eq(psr["a"].repeat(repeats), gsr.repeat(repeats)["a"])
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-def test_repeat_scalar(dtype):
-    rng = np.random.default_rng(seed=0)
-    arr = rng.random(10) * 10
-    repeats = 10
-    psr = pd.Series(arr).astype(dtype)
-    gsr = cudf.from_pandas(psr)
-
-    assert_eq(psr.repeat(repeats), gsr.repeat(repeats))
-
-
-def test_null_copy():
-    col = Series(np.arange(2049))
-    col[:] = None
-    assert len(col) == 2049
-
-
-def test_series_setitem_cow_on():
-    with cudf.option_context("copy_on_write", True):
-        actual = cudf.Series([1, 2, 3, 4, 5])
-        new_copy = actual.copy(deep=False)
-
-        actual[1] = 100
-        assert_eq(actual, cudf.Series([1, 100, 3, 4, 5]))
-        assert_eq(new_copy, cudf.Series([1, 2, 3, 4, 5]))
-
-
-def test_series_setitem_cow_off():
-    with cudf.option_context("copy_on_write", False):
-        actual = cudf.Series([1, 2, 3, 4, 5])
-        new_copy = actual.copy(deep=False)
-
-        actual[1] = 100
-        assert_eq(actual, cudf.Series([1, 100, 3, 4, 5]))
-        assert_eq(new_copy, cudf.Series([1, 100, 3, 4, 5]))
-
-
-def test_series_setitem_both_slice_cow_on():
-    with cudf.option_context("copy_on_write", True):
-        actual = cudf.Series([1, 2, 3, 4, 5])
-        new_copy = actual.copy(deep=False)
-
-        actual[slice(0, 2, 1)] = 100
-        assert_eq(actual, cudf.Series([100, 100, 3, 4, 5]))
-        assert_eq(new_copy, cudf.Series([1, 2, 3, 4, 5]))
-
-        new_copy[slice(2, 4, 1)] = 300
-        assert_eq(actual, cudf.Series([100, 100, 3, 4, 5]))
-        assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
-
-
-def test_series_setitem_both_slice_cow_off():
-    with cudf.option_context("copy_on_write", False):
-        actual = cudf.Series([1, 2, 3, 4, 5])
-        new_copy = actual.copy(deep=False)
-
-        actual[slice(0, 2, 1)] = 100
-        assert_eq(actual, cudf.Series([100, 100, 3, 4, 5]))
-        assert_eq(new_copy, cudf.Series([100, 100, 3, 4, 5]))
-
-        new_copy[slice(2, 4, 1)] = 300
-        assert_eq(actual, cudf.Series([100, 100, 300, 300, 5]))
-        assert_eq(new_copy, cudf.Series([100, 100, 300, 300, 5]))
-
-
-def test_series_setitem_partial_slice_cow_on():
-    with cudf.option_context("copy_on_write", True):
-        actual = cudf.Series([1, 2, 3, 4, 5])
-        new_copy = actual.copy(deep=False)
-
-        new_copy[slice(2, 4, 1)] = 300
-        assert_eq(actual, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
-
-        new_slice = actual[2:]
-        assert (
-            new_slice._column.base_data.owner == actual._column.base_data.owner
-        )
-        new_slice[0:2] = 10
-        assert_eq(new_slice, cudf.Series([10, 10, 5], index=[2, 3, 4]))
-        assert_eq(actual, cudf.Series([1, 2, 3, 4, 5]))
-
-
-def test_series_setitem_partial_slice_cow_off():
-    with cudf.option_context("copy_on_write", False):
-        actual = cudf.Series([1, 2, 3, 4, 5])
-        new_copy = actual.copy(deep=False)
-
-        new_copy[slice(2, 4, 1)] = 300
-        assert_eq(actual, cudf.Series([1, 2, 300, 300, 5]))
-        assert_eq(new_copy, cudf.Series([1, 2, 300, 300, 5]))
-
-        new_slice = actual[2:]
-        # Since COW is off, a slice should point to the same memory
-        ptr1 = new_slice._column.base_data.get_ptr(mode="read")
-        ptr2 = actual._column.base_data.get_ptr(mode="read")
-        assert ptr1 == ptr2
-
-        new_slice[0:2] = 10
-        assert_eq(new_slice, cudf.Series([10, 10, 5], index=[2, 3, 4]))
-        assert_eq(actual, cudf.Series([1, 2, 10, 10, 5]))
-
-
-def test_multiple_series_cow():
-    with cudf.option_context("copy_on_write", True):
-        # Verify constructing, modifying, deleting
-        # multiple copies of a series preserves
-        # the data appropriately when COW is enabled.
-        s = cudf.Series([10, 20, 30, 40, 50])
-        s1 = s.copy(deep=False)
-        s2 = s.copy(deep=False)
-        s3 = s.copy(deep=False)
-        s4 = s2.copy(deep=False)
-        s5 = s4.copy(deep=False)
-        s6 = s3.copy(deep=False)
-
-        s1[0:3] = 10000
-        # s1 will be unlinked from actual data in s,
-        # and then modified. Rest all should
-        # contain the original data.
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        for ser in [s, s2, s3, s4, s5, s6]:
-            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
-
-        s6[0:3] = 3000
-        # s6 will be unlinked from actual data in s,
-        # and then modified. Rest all should
-        # contain the original data.
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        for ser in [s2, s3, s4, s5]:
-            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
-
-        s2[1:4] = 4000
-        # s2 will be unlinked from actual data in s,
-        # and then modified. Rest all should
-        # contain the original data.
-        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        for ser in [s3, s4, s5]:
-            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
-
-        s4[2:4] = 5000
-        # s4 will be unlinked from actual data in s,
-        # and then modified. Rest all should
-        # contain the original data.
-        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
-        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        for ser in [s3, s5]:
-            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
-
-        s5[2:4] = 6000
-        # s5 will be unlinked from actual data in s,
-        # and then modified. Rest all should
-        # contain the original data.
-        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
-        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
-        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        for ser in [s3]:
-            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
-
-        s7 = s5.copy(deep=False)
-        assert_eq(s7, cudf.Series([10, 20, 6000, 6000, 50]))
-        s7[1:3] = 55
-        # Making a copy of s5, i.e., s7 and modifying shouldn't
-        # be touching/modifying data in other series.
-        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
-
-        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
-        assert_eq(s2, cudf.Series([10, 4000, 4000, 4000, 50]))
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        for ser in [s3]:
-            assert_eq(ser, cudf.Series([10, 20, 30, 40, 50]))
-
-        # Deleting any of the following series objects
-        # shouldn't delete rest of the weekly referenced data
-        # elsewhere.
-
-        del s2
-
-        assert_eq(s1, cudf.Series([10000, 10000, 10000, 40, 50]))
-        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
-        assert_eq(s4, cudf.Series([10, 20, 5000, 5000, 50]))
-        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
-
-        del s4
-        del s1
-
-        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
-        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
-        assert_eq(s6, cudf.Series([3000, 3000, 3000, 40, 50]))
-        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
-
-        del s
-        del s6
-
-        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
-        assert_eq(s5, cudf.Series([10, 20, 6000, 6000, 50]))
-        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
-
-        del s5
-
-        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50]))
-        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
-
-        del s3
-        assert_eq(s7, cudf.Series([10, 55, 55, 6000, 50]))
-
-
-def test_series_zero_copy_cow_on():
-    with cudf.option_context("copy_on_write", True):
-        s = cudf.Series([1, 2, 3, 4, 5])
-        s1 = s.copy(deep=False)
-        cp_array = cp.asarray(s)
-
-        # Ensure all original data & zero-copied
-        # data is same.
-        assert_eq(s, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(cp_array, cp.array([1, 2, 3, 4, 5]))
-
-        cp_array[0:3] = 10
-        # Modifying a zero-copied array should only
-        # modify `s` and will leave rest of the copies
-        # untouched.
-
-        assert_eq(s.to_numpy(), np.array([10, 10, 10, 4, 5]))
-        assert_eq(s, cudf.Series([10, 10, 10, 4, 5]))
-        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(cp_array, cp.array([10, 10, 10, 4, 5]))
-
-        s2 = cudf.Series(cp_array)
-        assert_eq(s2, cudf.Series([10, 10, 10, 4, 5]))
-
-        s3 = s2.copy(deep=False)
-        cp_array[0] = 20
-        # Modifying a zero-copied array should modify
-        # `s2` and `s` only. Because `cp_array`
-        # is zero-copy shared with `s` & `s2`.
-
-        assert_eq(s, cudf.Series([20, 10, 10, 4, 5]))
-        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(cp_array, cp.array([20, 10, 10, 4, 5]))
-        assert_eq(s2, cudf.Series([20, 10, 10, 4, 5]))
-        assert_eq(s3, cudf.Series([10, 10, 10, 4, 5]))
-
-        s4 = cudf.Series([10, 20, 30, 40, 50])
-        s5 = cudf.Series(s4)
-        assert_eq(s5, cudf.Series([10, 20, 30, 40, 50]))
-        s5[0:2] = 1
-        # Modifying `s5` should also modify `s4`
-        # because they are zero-copied.
-        assert_eq(s5, cudf.Series([1, 1, 30, 40, 50]))
-        assert_eq(s4, cudf.Series([1, 1, 30, 40, 50]))
-
-
-def test_series_zero_copy_cow_off():
-    is_spill_enabled = get_global_manager() is not None
-
-    with cudf.option_context("copy_on_write", False):
-        s = cudf.Series([1, 2, 3, 4, 5])
-        s1 = s.copy(deep=False)
-        cp_array = cp.asarray(s)
-
-        # Ensure all original data & zero-copied
-        # data is same.
-        assert_eq(s, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(s1, cudf.Series([1, 2, 3, 4, 5]))
-        assert_eq(cp_array, cp.array([1, 2, 3, 4, 5]))
-
-        cp_array[0:3] = 10
-        # When COW is off, modifying a zero-copied array
-        # will need to modify `s` & `s1` since they are
-        # shallow copied.
-
-        assert_eq(s, cudf.Series([10, 10, 10, 4, 5]))
-        assert_eq(s1, cudf.Series([10, 10, 10, 4, 5]))
-        assert_eq(cp_array, cp.array([10, 10, 10, 4, 5]))
-
-        s2 = cudf.Series(cp_array)
-        assert_eq(s2, cudf.Series([10, 10, 10, 4, 5]))
-        s3 = s2.copy(deep=False)
-        cp_array[0] = 20
-
-        # Modifying `cp_array`, will propagate the changes
-        # across all Series objects, because they are
-        # either shallow copied or zero-copied.
-
-        assert_eq(s, cudf.Series([20, 10, 10, 4, 5]))
-        assert_eq(s1, cudf.Series([20, 10, 10, 4, 5]))
-        assert_eq(cp_array, cp.array([20, 10, 10, 4, 5]))
-        if not is_spill_enabled:
-            # Since spilling might make a copy of the data, we cannot
-            # expect the two series to be a zero-copy of the cupy array
-            # when spilling is enabled globally.
-            assert_eq(s2, cudf.Series([20, 10, 10, 4, 5]))
-            assert_eq(s3, cudf.Series([20, 10, 10, 4, 5]))
-
-        s4 = cudf.Series([10, 20, 30, 40, 50])
-        s5 = cudf.Series(s4)
-        assert_eq(s5, cudf.Series([10, 20, 30, 40, 50]))
-        s5[0:2] = 1
-
-        # Modifying `s5` should also modify `s4`
-        # because they are zero-copied.
-        assert_eq(s5, cudf.Series([1, 1, 30, 40, 50]))
-        assert_eq(s4, cudf.Series([1, 1, 30, 40, 50]))
-
-
-@pytest.mark.parametrize("copy_on_write", [True, False])
-def test_series_str_copy(copy_on_write):
-    with cudf.option_context("copy_on_write", copy_on_write):
-        s = cudf.Series(["a", "b", "c", "d", "e"])
-        s1 = s.copy(deep=True)
-        s2 = s.copy(deep=True)
-
-        assert_eq(s, cudf.Series(["a", "b", "c", "d", "e"]))
-        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
-        assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
-
-        s[0:3] = "abc"
-
-        assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
-        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
-        assert_eq(s2, cudf.Series(["a", "b", "c", "d", "e"]))
-
-        s2[1:4] = "xyz"
-
-        assert_eq(s, cudf.Series(["abc", "abc", "abc", "d", "e"]))
-        assert_eq(s1, cudf.Series(["a", "b", "c", "d", "e"]))
-        assert_eq(s2, cudf.Series(["a", "xyz", "xyz", "xyz", "e"]))
-
-
-@pytest.mark.parametrize("copy_on_write", [True, False])
-def test_series_cat_copy(copy_on_write):
-    with cudf.option_context("copy_on_write", copy_on_write):
-        s = cudf.Series([10, 20, 30, 40, 50], dtype="category")
-        s1 = s.copy(deep=True)
-        s2 = s1.copy(deep=True)
-        s3 = s1.copy(deep=True)
-
-        s[0] = 50
-        assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
-        assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
-        assert_eq(s2, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
-        assert_eq(s3, cudf.Series([10, 20, 30, 40, 50], dtype="category"))
-
-        s2[3] = 10
-        s3[2:5] = 20
-        assert_eq(s, cudf.Series([50, 20, 30, 40, 50], dtype=s.dtype))
-        assert_eq(s1, cudf.Series([10, 20, 30, 40, 50], dtype=s.dtype))
-        assert_eq(s2, cudf.Series([10, 20, 30, 10, 50], dtype=s.dtype))
-        assert_eq(s3, cudf.Series([10, 20, 20, 20, 20], dtype=s.dtype))
-
-
-def test_dataframe_cow_slice_setitem():
-    with cudf.option_context("copy_on_write", True):
-        df = cudf.DataFrame(
-            {"a": [10, 11, 12, 13, 14], "b": [20, 30, 40, 50, 60]}
-        )
-        slice_df = df[1:4]
-
-        assert_eq(
-            slice_df,
-            cudf.DataFrame(
-                {"a": [11, 12, 13], "b": [30, 40, 50]}, index=[1, 2, 3]
-            ),
-        )
-
-        slice_df["a"][2] = 1111
-
-        assert_eq(
-            slice_df,
-            cudf.DataFrame(
-                {"a": [11, 1111, 13], "b": [30, 40, 50]}, index=[1, 2, 3]
-            ),
-        )
-        assert_eq(
-            df,
-            cudf.DataFrame(
-                {"a": [10, 11, 12, 13, 14], "b": [20, 30, 40, 50, 60]}
-            ),
-        )
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
deleted file mode 100644
index b1678889eff..00000000000
--- a/python/cudf/cudf/tests/test_datetime.py
+++ /dev/null
@@ -1,422 +0,0 @@
-# Copyright (c) 2019-2025, NVIDIA CORPORATION.
-
-import datetime
-import operator
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import Series
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_VERSION,
-)
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    DATETIME_TYPES,
-    assert_exceptions_equal,
-)
-
-
-@pytest.fixture(
-    params=[
-        operator.lt,
-        operator.gt,
-        operator.le,
-        operator.ge,
-        operator.eq,
-        operator.ne,
-    ]
-)
-def op(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=[
-        pd.date_range("20010101", "20020215", freq="400h", name="times"),
-        pd.date_range(
-            "20010101", freq="243434324423423234ns", name="times", periods=10
-        ),
-    ]
-)
-def data(request):
-    return request.param
-
-
-@pytest.mark.parametrize(
-    "lhs_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-@pytest.mark.parametrize(
-    "rhs_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_datetime_series_binops_pandas(lhs_dtype, rhs_dtype):
-    pd_data_1 = pd.Series(
-        pd.date_range("20010101", "20020215", freq="400h", name="times")
-    )
-    pd_data_2 = pd.Series(
-        pd.date_range("20010101", "20020215", freq="401h", name="times")
-    )
-    gdf_data_1 = Series(pd_data_1).astype(lhs_dtype)
-    gdf_data_2 = Series(pd_data_2).astype(rhs_dtype)
-    assert_eq(pd_data_1, gdf_data_1.astype("datetime64[ns]"))
-    assert_eq(pd_data_2, gdf_data_2.astype("datetime64[ns]"))
-    assert_eq(pd_data_1 < pd_data_2, gdf_data_1 < gdf_data_2)
-    assert_eq(pd_data_1 > pd_data_2, gdf_data_1 > gdf_data_2)
-    assert_eq(pd_data_1 == pd_data_2, gdf_data_1 == gdf_data_2)
-    assert_eq(pd_data_1 <= pd_data_2, gdf_data_1 <= gdf_data_2)
-    assert_eq(pd_data_1 >= pd_data_2, gdf_data_1 >= gdf_data_2)
-
-
-@pytest.mark.parametrize(
-    "lhs_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-@pytest.mark.parametrize(
-    "rhs_dtype",
-    ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],
-)
-def test_datetime_series_binops_numpy(lhs_dtype, rhs_dtype):
-    pd_data_1 = pd.Series(
-        pd.date_range("20010101", "20020215", freq="400h", name="times")
-    )
-    pd_data_2 = pd.Series(
-        pd.date_range("20010101", "20020215", freq="401h", name="times")
-    )
-    gdf_data_1 = Series(pd_data_1).astype(lhs_dtype)
-    gdf_data_2 = Series(pd_data_2).astype(rhs_dtype)
-    np_data_1 = np.array(pd_data_1).astype(lhs_dtype)
-    np_data_2 = np.array(pd_data_2).astype(rhs_dtype)
-    np.testing.assert_equal(np_data_1, gdf_data_1.to_numpy())
-    np.testing.assert_equal(np_data_2, gdf_data_2.to_numpy())
-    np.testing.assert_equal(
-        np.less(np_data_1, np_data_2), (gdf_data_1 < gdf_data_2).to_numpy()
-    )
-    np.testing.assert_equal(
-        np.greater(np_data_1, np_data_2), (gdf_data_1 > gdf_data_2).to_numpy()
-    )
-    np.testing.assert_equal(
-        np.equal(np_data_1, np_data_2), (gdf_data_1 == gdf_data_2).to_numpy()
-    )
-    np.testing.assert_equal(
-        np.less_equal(np_data_1, np_data_2),
-        (gdf_data_1 <= gdf_data_2).to_numpy(),
-    )
-    np.testing.assert_equal(
-        np.greater_equal(np_data_1, np_data_2),
-        (gdf_data_1 >= gdf_data_2).to_numpy(),
-    )
-
-
-def test_dt_ops(data):
-    pd_data = pd.Series(data)
-    gdf_data = Series(data)
-
-    assert_eq(pd_data == pd_data, gdf_data == gdf_data)
-    assert_eq(pd_data < pd_data, gdf_data < gdf_data)
-    assert_eq(pd_data > pd_data, gdf_data > gdf_data)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas.",
-)
-def test_datetime_can_cast_safely():
-    sr = cudf.Series(
-        ["1679-01-01", "2000-01-31", "2261-01-01"], dtype="datetime64[ms]"
-    )
-    assert sr._column.can_cast_safely(np.dtype("datetime64[ns]"))
-
-    sr = cudf.Series(
-        ["1677-01-01", "2000-01-31", "2263-01-01"], dtype="datetime64[ms]"
-    )
-
-    assert sr._column.can_cast_safely(np.dtype("datetime64[ns]")) is False
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 10, 100, 20000],
-        [None] * 7,
-        [10, 20, 30, None, 100, 200, None],
-        [3223.234, 342.2332, 23423.23, 3343.23324, 23432.2323, 242.23, 233],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [1, 2, 3, 4, 10, 100, 20000],
-        [None] * 7,
-        [10, 20, 30, None, 100, 200, None],
-        [3223.234, 342.2332, 23423.23, 3343.23324, 23432.2323, 242.23, 233],
-        datetime.datetime(1993, 6, 22, 13, 30),
-        datetime.datetime(2005, 1, 22, 10, 00),
-        np.datetime64("2005-02"),
-        np.datetime64("2005-02-25"),
-        np.datetime64("2005-02-25T03:30"),
-        np.datetime64("nat"),
-        # TODO: https://github.com/pandas-dev/pandas/issues/52295
-    ],
-)
-@pytest.mark.parametrize("data_dtype", DATETIME_TYPES)
-@pytest.mark.parametrize("other_dtype", DATETIME_TYPES)
-def test_datetime_subtract(data, other, data_dtype, other_dtype):
-    gsr = cudf.Series(data, dtype=data_dtype)
-    psr = gsr.to_pandas()
-
-    if isinstance(other, np.datetime64):
-        gsr_other = other
-        psr_other = other
-    else:
-        gsr_other = cudf.Series(other, dtype=other_dtype)
-        psr_other = gsr_other.to_pandas()
-
-    expected = psr - psr_other
-    actual = gsr - gsr_other
-
-    assert_eq(expected, actual)
-
-    expected = psr_other - psr
-    actual = gsr_other - gsr
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [1],
-        [12, 11, 232, 223432411, 2343241, 234324, 23234],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-    ],
-)
-@pytest.mark.parametrize(
-    "other_scalars",
-    [
-        datetime.timedelta(days=768),
-        datetime.timedelta(seconds=768),
-        datetime.timedelta(microseconds=7),
-        datetime.timedelta(minutes=447),
-        datetime.timedelta(hours=447),
-        datetime.timedelta(weeks=734),
-        np.timedelta64(4, "s"),
-        np.timedelta64(456, "D"),
-        np.timedelta64(46, "h"),
-        np.timedelta64("nat"),
-        np.timedelta64(1, "s"),
-        np.timedelta64(1, "ms"),
-        np.timedelta64(1, "us"),
-        np.timedelta64(1, "ns"),
-    ],
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "op",
-    ["add", "sub"],
-)
-def test_datetime_series_ops_with_scalars(data, other_scalars, dtype, op):
-    gsr = cudf.Series(data=data, dtype=dtype)
-    psr = gsr.to_pandas()
-
-    if op == "add":
-        expected = psr + other_scalars
-        actual = gsr + other_scalars
-    elif op == "sub":
-        expected = psr - other_scalars
-        actual = gsr - other_scalars
-
-    assert_eq(expected, actual)
-
-    if op == "add":
-        expected = other_scalars + psr
-        actual = other_scalars + gsr
-
-        assert_eq(expected, actual)
-
-    elif op == "sub":
-        assert_exceptions_equal(
-            lfunc=operator.sub,
-            rfunc=operator.sub,
-            lfunc_args_and_kwargs=([other_scalars, psr],),
-            rfunc_args_and_kwargs=([other_scalars, gsr],),
-        )
-
-
-@pytest.mark.parametrize("data", ["20110101", "20120101", "20130101"])
-@pytest.mark.parametrize("other_scalars", ["20110101", "20120101", "20130101"])
-@pytest.mark.parametrize(
-    "dtype",
-    ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
-)
-def test_datetime_series_cmpops_with_scalars(data, other_scalars, dtype, op):
-    gsr = cudf.Series(data=data, dtype=dtype)
-    psr = gsr.to_pandas()
-
-    expect = op(psr, other_scalars)
-    got = op(gsr, other_scalars)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [1],
-        [12, 11, 232, 223432411, 2343241, 234324, 23234],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-    ],
-)
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        datetime.timedelta(days=768),
-        datetime.timedelta(seconds=768),
-        datetime.timedelta(microseconds=7),
-        np.timedelta64("nat"),
-        np.timedelta64(1, "s"),
-        np.timedelta64(1, "ms"),
-        np.timedelta64(1, "us"),
-        np.timedelta64(1, "ns"),
-    ],
-)
-@pytest.mark.parametrize("dtype", DATETIME_TYPES)
-@pytest.mark.parametrize("op", [np.add, np.subtract])
-def test_datetime_series_ops_with_scalars_misc(data, scalar, dtype, op):
-    gsr = cudf.Series(data=data, dtype=dtype)
-    psr = gsr.to_pandas()
-
-    expect = op(psr, scalar)
-    got = op(gsr, scalar)
-
-    assert_eq(expect, got)
-
-
-def test_datetime_invalid_ops():
-    sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    psr = sr.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([psr, pd.Timestamp(1513393355.5, unit="s")],),
-        rfunc_args_and_kwargs=([sr, pd.Timestamp(1513393355.5, unit="s")],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.truediv,
-        rfunc=operator.truediv,
-        lfunc_args_and_kwargs=([psr, pd.Timestamp(1513393355.5, unit="s")],),
-        rfunc_args_and_kwargs=([sr, pd.Timestamp(1513393355.5, unit="s")],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([psr, psr],),
-        rfunc_args_and_kwargs=([sr, sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.floordiv,
-        rfunc=operator.floordiv,
-        lfunc_args_and_kwargs=([psr, psr],),
-        rfunc_args_and_kwargs=([sr, sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.floordiv,
-        rfunc=operator.floordiv,
-        lfunc_args_and_kwargs=([psr, pd.Timestamp(1513393355.5, unit="s")],),
-        rfunc_args_and_kwargs=([sr, pd.Timestamp(1513393355.5, unit="s")],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([psr, 1],),
-        rfunc_args_and_kwargs=([sr, 1],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.truediv,
-        rfunc=operator.truediv,
-        lfunc_args_and_kwargs=([psr, "a"],),
-        rfunc_args_and_kwargs=([sr, "a"],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.mul,
-        rfunc=operator.mul,
-        lfunc_args_and_kwargs=([psr, 1],),
-        rfunc_args_and_kwargs=([sr, 1],),
-    )
-
-
-def test_datetime_binop_tz_timestamp(op):
-    s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
-    with pytest.raises(NotImplementedError):
-        op(s, pd_tz_timestamp)
-
-    date_scalar = datetime.datetime.now(datetime.timezone.utc)
-    with pytest.raises(NotImplementedError):
-        op(s, date_scalar)
-
-
-def test_datetime_series_cmpops_pandas_compatibility(op):
-    data1 = ["20110101", "20120101", None, "20140101", None]
-    data2 = ["20110101", "20120101", "20130101", None, None]
-    gsr1 = cudf.Series(data=data1, dtype="datetime64[ns]")
-    psr1 = gsr1.to_pandas()
-
-    gsr2 = cudf.Series(data=data2, dtype="datetime64[ns]")
-    psr2 = gsr2.to_pandas()
-
-    expect = op(psr1, psr2)
-    with cudf.option_context("mode.pandas_compatible", True):
-        got = op(gsr1, gsr2)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "method, kwargs",
-    [["mean", {}], ["std", {}], ["std", {"ddof": 0}]],
-)
-def test_dti_reduction(method, kwargs):
-    pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
-    cudf_dti = cudf.from_pandas(pd_dti)
-
-    result = getattr(cudf_dti, method)(**kwargs)
-    expected = getattr(pd_dti, method)(**kwargs)
-    assert result == expected
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
deleted file mode 100644
index 2cb16f71011..00000000000
--- a/python/cudf/cudf/tests/test_decimal.py
+++ /dev/null
@@ -1,373 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-
-import decimal
-from decimal import Decimal
-
-import numpy as np
-import pyarrow as pa
-import pytest
-
-import cudf
-from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn
-from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    FLOAT_TYPES,
-    INTEGER_TYPES,
-    SIGNED_TYPES,
-    expect_warning_if,
-)
-
-
-@pytest.mark.parametrize(
-    "data_",
-    [
-        [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
-        [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")],
-        [1],
-        [-1],
-        [1, 2, 3, 4],
-        [42, 17, 41],
-        [1, 2, None, 4],
-        [None, None, None],
-        [],
-    ],
-)
-@pytest.mark.parametrize(
-    "typ_",
-    [
-        pa.decimal128(precision=4, scale=2),
-        pa.decimal128(precision=5, scale=3),
-        pa.decimal128(precision=6, scale=4),
-    ],
-)
-@pytest.mark.parametrize("col", [Decimal32Column, Decimal64Column])
-def test_round_trip_decimal_column(data_, typ_, col):
-    pa_arr = pa.array(data_, type=typ_)
-    col_32 = col.from_arrow(pa_arr)
-    assert pa_arr.equals(col_32.to_arrow())
-
-
-def test_from_arrow_max_precision_decimal64():
-    with pytest.raises(ValueError):
-        Decimal64Column.from_arrow(
-            pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19))
-        )
-
-
-def test_from_arrow_max_precision_decimal32():
-    with pytest.raises(ValueError):
-        Decimal32Column.from_arrow(
-            pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=10))
-        )
-
-
-@pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
-@pytest.mark.parametrize(
-    "to_dtype",
-    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
-)
-def test_typecast_from_float_to_decimal(request, from_dtype, to_dtype):
-    data = cudf.Series(
-        [
-            14.12302,
-            97938.2,
-            np.nan,
-            0.0,
-            -8.302014,
-            np.nan,
-            94.31304,
-            -112.2314,
-            0.3333333,
-            np.nan,
-        ]
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            from_dtype == np.dtype("float32") and to_dtype.precision > 12,
-            reason="https://github.com/rapidsai/cudf/issues/14169",
-        )
-    )
-    got = data.astype(from_dtype)
-
-    pa_arr = got.to_arrow().cast(
-        pa.decimal128(to_dtype.precision, to_dtype.scale)
-    )
-    expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
-
-    got = got.astype(to_dtype)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize("from_dtype", INTEGER_TYPES)
-@pytest.mark.parametrize(
-    "to_dtype",
-    [Decimal64Dtype(9, 3), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
-)
-def test_typecast_from_int_to_decimal(from_dtype, to_dtype):
-    data = cudf.Series(
-        [
-            14.12302,
-            38.2,
-            np.nan,
-            0.0,
-            -8.302014,
-            np.nan,
-            94.31304,
-            np.nan,
-            -112.2314,
-            0.3333333,
-            np.nan,
-        ]
-    )
-    got = data.astype(from_dtype)
-
-    pa_arr = (
-        got.to_arrow()
-        .cast("float64")
-        .cast(pa.decimal128(to_dtype.precision, to_dtype.scale))
-    )
-    expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
-
-    got = got.astype(to_dtype)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "from_dtype",
-    [
-        Decimal64Dtype(7, 2),
-        Decimal64Dtype(11, 4),
-        Decimal64Dtype(18, 10),
-        Decimal32Dtype(7, 2),
-        Decimal32Dtype(5, 3),
-        Decimal32Dtype(9, 5),
-    ],
-)
-@pytest.mark.parametrize(
-    "to_dtype",
-    [
-        Decimal64Dtype(7, 2),
-        Decimal64Dtype(18, 10),
-        Decimal64Dtype(11, 4),
-        Decimal32Dtype(7, 2),
-        Decimal32Dtype(9, 5),
-        Decimal32Dtype(5, 3),
-    ],
-)
-def test_typecast_to_from_decimal(from_dtype, to_dtype):
-    data = cudf.Series(
-        [
-            14.12309,
-            2.343942,
-            np.nan,
-            0.0,
-            -8.302082,
-            np.nan,
-            94.31308,
-            -112.2364,
-            -8.029972,
-            np.nan,
-        ]
-    )
-    if from_dtype.scale > to_dtype.MAX_PRECISION:
-        pytest.skip(
-            "This is supposed to overflow because the representation value in "
-            "the source exceeds the max representable in destination dtype."
-        )
-    s = data.astype(from_dtype)
-
-    pa_arr = s.to_arrow().cast(
-        pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False
-    )
-    if isinstance(to_dtype, Decimal32Dtype):
-        expected = cudf.Series._from_column(Decimal32Column.from_arrow(pa_arr))
-    elif isinstance(to_dtype, Decimal64Dtype):
-        expected = cudf.Series._from_column(Decimal64Column.from_arrow(pa_arr))
-
-    with expect_warning_if(to_dtype.scale < s.dtype.scale, UserWarning):
-        got = s.astype(to_dtype)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "from_dtype",
-    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(17, 10)],
-)
-@pytest.mark.parametrize("to_dtype", SIGNED_TYPES)
-def test_typecast_from_decimal(from_dtype, to_dtype):
-    data = cudf.Series(
-        [
-            14.12309,
-            2.343942,
-            np.nan,
-            0.0,
-            -8.302082,
-            np.nan,
-            94.31308,
-            -112.2364,
-            -8.029972,
-            np.nan,
-        ]
-    )
-    got = data.astype(from_dtype)
-    pa_arr = got.to_arrow().cast(to_dtype, safe=False)
-
-    got = got.astype(to_dtype)
-    expected = cudf.Series._from_column(NumericalColumn.from_arrow(pa_arr))
-
-    assert_eq(got, expected)
-    assert_eq(got.dtype, expected.dtype)
-
-
-@pytest.mark.parametrize(
-    "data, dtype, item, to, expect",
-    [
-        # scatter to a single index
-        (
-            ["1", "2", "3"],
-            Decimal64Dtype(1, 0),
-            Decimal(5),
-            1,
-            ["1", "5", "3"],
-        ),
-        (
-            ["1.5", "2.5", "3.5"],
-            Decimal64Dtype(2, 1),
-            Decimal("5.5"),
-            1,
-            ["1.5", "5.5", "3.5"],
-        ),
-        (
-            ["1.0042", "2.0042", "3.0042"],
-            Decimal64Dtype(5, 4),
-            Decimal("5.0042"),
-            1,
-            ["1.0042", "5.0042", "3.0042"],
-        ),
-        # scatter via boolmask
-        (
-            ["1", "2", "3"],
-            Decimal64Dtype(1, 0),
-            Decimal(5),
-            [True, False, True],
-            ["5", "2", "5"],
-        ),
-        (
-            ["1.5", "2.5", "3.5"],
-            Decimal64Dtype(2, 1),
-            Decimal("5.5"),
-            [True, True, True],
-            ["5.5", "5.5", "5.5"],
-        ),
-        (
-            ["1.0042", "2.0042", "3.0042"],
-            Decimal64Dtype(5, 4),
-            Decimal("5.0042"),
-            [False, False, True],
-            ["1.0042", "2.0042", "5.0042"],
-        ),
-        # We will allow assigning a decimal with less precision
-        (
-            ["1.00", "2.00", "3.00"],
-            Decimal64Dtype(3, 2),
-            Decimal(5),
-            1,
-            ["1.00", "5.00", "3.00"],
-        ),
-        # But not truncation
-        (
-            ["1", "2", "3"],
-            Decimal64Dtype(1, 0),
-            Decimal("5.5"),
-            1,
-            pa.lib.ArrowInvalid,
-        ),
-        # We will allow for setting scalars into decimal columns
-        (["1", "2", "3"], Decimal64Dtype(1, 0), 5, 1, ["1", "5", "3"]),
-        # But not if it has too many digits to fit the precision
-        (["1", "2", "3"], Decimal64Dtype(1, 0), 50, 1, pa.lib.ArrowInvalid),
-    ],
-)
-def test_series_setitem_decimal(data, dtype, item, to, expect):
-    data = cudf.Series([Decimal(x) for x in data], dtype=dtype)
-
-    if expect is pa.lib.ArrowInvalid:
-        with pytest.raises(expect):
-            data[to] = item
-        return
-    else:
-        expect = cudf.Series([Decimal(x) for x in expect], dtype=dtype)
-        data[to] = item
-        assert_eq(data, expect)
-
-
-@pytest.mark.parametrize(
-    "input_obj", [[decimal.Decimal(1), cudf.NA, decimal.Decimal(3)]]
-)
-def test_series_construction_with_nulls(input_obj):
-    expect = pa.array(input_obj, from_pandas=True)
-    got = cudf.Series(input_obj).to_arrow()
-
-    assert expect == got
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [(["1", "2", "3"], cudf.Decimal64Dtype(1, 0))],
-        [
-            (["1", "2", "3"], cudf.Decimal64Dtype(1, 0)),
-            (["1.0", "2.0", "3.0"], cudf.Decimal64Dtype(2, 1)),
-            (["10.1", "20.2", "30.3"], cudf.Decimal64Dtype(3, 1)),
-        ],
-        [
-            (["1", None, "3"], cudf.Decimal64Dtype(1, 0)),
-            (["1.0", "2.0", None], cudf.Decimal64Dtype(2, 1)),
-            ([None, "20.2", "30.3"], cudf.Decimal64Dtype(3, 1)),
-        ],
-    ],
-)
-def test_serialize_decimal_columns(data):
-    df = cudf.DataFrame(
-        {
-            str(i): cudf.Series(
-                [Decimal(x) if x is not None else x for x in values],
-                dtype=dtype,
-            )
-            for i, (values, dtype) in enumerate(data)
-        }
-    )
-    recreated = df.__class__.deserialize(*df.serialize())
-    assert_eq(recreated, df)
-
-
-def test_decimal_invalid_precision():
-    with pytest.raises(pa.ArrowInvalid):
-        _ = cudf.Series([10, 20, 30], dtype=cudf.Decimal64Dtype(2, 2))
-
-    with pytest.raises(pa.ArrowInvalid):
-        _ = cudf.Series([Decimal("300")], dtype=cudf.Decimal64Dtype(2, 1))
-
-
-def test_decimal_overflow():
-    s = cudf.Series([Decimal("0.0009384233522166997927180531650178250")])
-    result = s * s
-    assert_eq(cudf.Decimal128Dtype(precision=38, scale=37), result.dtype)
-
-    s = cudf.Series([1, 2], dtype=cudf.Decimal128Dtype(precision=38, scale=0))
-    result = s * Decimal("1.0")
-    assert_eq(cudf.Decimal128Dtype(precision=38, scale=1), result.dtype)
-
-
-def test_decimal_binop_upcast_operands():
-    ser1 = cudf.Series([0.51, 1.51, 2.51]).astype(cudf.Decimal64Dtype(18, 2))
-    ser2 = cudf.Series([0.90, 0.96, 0.99]).astype(cudf.Decimal128Dtype(19, 2))
-    result = ser1 + ser2
-    expected = cudf.Series([1.41, 2.47, 3.50]).astype(
-        cudf.Decimal128Dtype(20, 2)
-    )
-    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
deleted file mode 100644
index 039bb7cbf9c..00000000000
--- a/python/cudf/cudf/tests/test_list.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-def test_concatenate_rows_of_lists():
-    pdf = pd.DataFrame({"val": [["a", "a"], ["b"], ["c"]]})
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf["val"] + pdf["val"]
-    got = gdf["val"] + gdf["val"]
-
-    assert_eq(expect, got)
-
-
-def test_concatenate_list_with_nonlist():
-    with pytest.raises(TypeError):
-        gdf1 = cudf.DataFrame({"A": [["a", "c"], ["b", "d"], ["c", "d"]]})
-        gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]})
-        gdf1["A"] + gdf2["A"]
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index 1c508307e32..59609158728 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
+import decimal
 import itertools
 import pickle
 
@@ -440,3 +440,33 @@ def test_serialize_column_types_preserved(columns):
     expected = cudf.DataFrame([[10, 11]], columns=columns())
     result = cudf.DataFrame.deserialize(*expected.serialize())
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [(["1", "2", "3"], cudf.Decimal64Dtype(1, 0))],
+        [
+            (["1", "2", "3"], cudf.Decimal64Dtype(1, 0)),
+            (["1.0", "2.0", "3.0"], cudf.Decimal64Dtype(2, 1)),
+            (["10.1", "20.2", "30.3"], cudf.Decimal64Dtype(3, 1)),
+        ],
+        [
+            (["1", None, "3"], cudf.Decimal64Dtype(1, 0)),
+            (["1.0", "2.0", None], cudf.Decimal64Dtype(2, 1)),
+            ([None, "20.2", "30.3"], cudf.Decimal64Dtype(3, 1)),
+        ],
+    ],
+)
+def test_serialize_decimal_columns(data):
+    df = cudf.DataFrame(
+        {
+            str(i): cudf.Series(
+                [decimal.Decimal(x) if x is not None else x for x in values],
+                dtype=dtype,
+            )
+            for i, (values, dtype) in enumerate(data)
+        }
+    )
+    recreated = df.__class__.deserialize(*df.serialize())
+    assert_eq(recreated, df)
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
deleted file mode 100644
index 2ee6904ef4d..00000000000
--- a/python/cudf/cudf/tests/test_sorting.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-
-import numpy as np
-import pandas as pd
-import pytest
-
-from cudf import Series
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    assert_exceptions_equal,
-)
-
-
-@pytest.fixture(params=[2, 257])
-def nelem(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=[
-        np.int32,
-        np.int64,
-        np.uint32,
-        np.uint64,
-        np.float32,
-        np.float64,
-    ]
-)
-def dtype(request):
-    return request.param
-
-
-@pytest.fixture(params=[slice(1, None), slice(None, -1), slice(1, -1)])
-def sliceobj(request):
-    return request.param
-
-
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_series_sort_values_ignore_index(ignore_index):
-    gsr = Series([1, 3, 5, 2, 4])
-    psr = gsr.to_pandas()
-
-    expect = psr.sort_values(ignore_index=ignore_index)
-    got = gsr.sort_values(ignore_index=ignore_index)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("asc", [True, False])
-def test_series_argsort(nelem, dtype, asc):
-    rng = np.random.default_rng(seed=0)
-    sr = Series((100 * rng.random(nelem)).astype(dtype))
-    res = sr.argsort(ascending=asc)
-
-    if asc:
-        expected = np.argsort(sr.to_numpy(), kind="mergesort")
-    else:
-        # -1 multiply works around missing desc sort (may promote to float64)
-        expected = np.argsort(sr.to_numpy() * np.int8(-1), kind="mergesort")
-    np.testing.assert_array_equal(expected, res.to_numpy())
-
-
-@pytest.mark.parametrize("asc", [True, False])
-def test_series_sort_index(nelem, asc):
-    rng = np.random.default_rng(seed=0)
-    sr = Series(100 * rng.random(nelem))
-    psr = sr.to_pandas()
-
-    expected = psr.sort_index(ascending=asc)
-    got = sr.sort_index(ascending=asc)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("data", [[0, 1, 1, 2, 2, 2, 3, 3], [0], [1, 2, 3]])
-@pytest.mark.parametrize("n", [-100, -50, -12, -2, 0, 1, 2, 3, 4, 7])
-def test_series_nlargest(data, n):
-    """Indirectly tests Series.sort_values()"""
-    sr = Series(data)
-    psr = pd.Series(data)
-    assert_eq(sr.nlargest(n), psr.nlargest(n))
-    assert_eq(sr.nlargest(n, keep="last"), psr.nlargest(n, keep="last"))
-
-    assert_exceptions_equal(
-        lfunc=psr.nlargest,
-        rfunc=sr.nlargest,
-        lfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
-        rfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
-    )
-
-
-@pytest.mark.parametrize("data", [[0, 1, 1, 2, 2, 2, 3, 3], [0], [1, 2, 3]])
-@pytest.mark.parametrize("n", [-100, -50, -12, -2, 0, 1, 2, 3, 4, 9])
-def test_series_nsmallest(data, n):
-    """Indirectly tests Series.sort_values()"""
-    sr = Series(data)
-    psr = pd.Series(data)
-    assert_eq(sr.nsmallest(n), psr.nsmallest(n))
-    assert_eq(
-        sr.nsmallest(n, keep="last").sort_index(),
-        psr.nsmallest(n, keep="last").sort_index(),
-    )
-
-    assert_exceptions_equal(
-        lfunc=psr.nsmallest,
-        rfunc=sr.nsmallest,
-        lfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
-        rfunc_args_and_kwargs=([], {"n": 3, "keep": "what"}),
-    )
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
deleted file mode 100644
index f04c636757f..00000000000
--- a/python/cudf/cudf/tests/test_string.py
+++ /dev/null
@@ -1,644 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-from decimal import Decimal
-from sys import getsizeof
-
-import cupy
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-
-import rmm
-
-import cudf
-from cudf.core.buffer import as_buffer
-from cudf.core.column.string import StringColumn
-from cudf.core.index import Index
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    assert_exceptions_equal,
-)
-from cudf.utils import dtypes as dtypeutils
-
-
-@pytest.fixture(
-    params=[
-        ["AbC", "de", "FGHI", "j", "kLm"],
-        ["nOPq", None, "RsT", None, "uVw"],
-        [None, None, None, None, None],
-    ],
-    ids=["no_nulls", "some_nulls", "all_nulls"],
-)
-def data(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=[None, [10, 11, 12, 13, 14]], ids=["None_index", "Set_index"]
-)
-def index(request):
-    return request.param
-
-
-@pytest.fixture
-def ps_gs(data, index):
-    ps = pd.Series(data, index=index, dtype="str", name="nice name")
-    gs = cudf.Series(data, index=index, dtype="str", name="nice name")
-    return (ps, gs)
-
-
-@pytest.mark.parametrize("construct", [list, np.array, pd.Series, pa.array])
-def test_string_ingest(construct):
-    expect = ["a", "a", "b", "c", "a"]
-    data = construct(expect)
-    got = cudf.Series(data)
-    assert got.dtype == np.dtype("object")
-    assert len(got) == 5
-    for idx, val in enumerate(expect):
-        assert expect[idx] == got[idx]
-
-
-def test_string_export(ps_gs):
-    ps, gs = ps_gs
-
-    expect = ps
-    got = gs.to_pandas()
-    assert_eq(expect, got)
-
-    expect = np.array(ps)
-    got = gs.to_numpy()
-    assert_eq(expect, got)
-
-    expect = pa.Array.from_pandas(ps)
-    got = gs.to_arrow()
-
-    assert pa.Array.equals(expect, got)
-
-
-@pytest.mark.parametrize(
-    "item",
-    [
-        0,
-        2,
-        4,
-        slice(1, 3),
-        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        [0, 1, 2, 3, 4, 4, 3, 2, 1, 0],
-        np.array([0, 1, 2, 3, 4]),
-        cupy.asarray(np.array([0, 1, 2, 3, 4])),
-    ],
-)
-def test_string_get_item(ps_gs, item):
-    ps, gs = ps_gs
-
-    got = gs.iloc[item]
-    if isinstance(got, cudf.Series):
-        got = got.to_arrow()
-
-    if isinstance(item, cupy.ndarray):
-        item = cupy.asnumpy(item)
-
-    expect = ps.iloc[item]
-    if isinstance(expect, pd.Series):
-        expect = pa.Array.from_pandas(expect)
-        pa.Array.equals(expect, got)
-    else:
-        if got is cudf.NA and expect is None:
-            return
-        assert expect == got
-
-
-@pytest.mark.parametrize(
-    "item",
-    [
-        [True] * 5,
-        [False] * 5,
-        np.array([True] * 5),
-        np.array([False] * 5),
-        cupy.asarray(np.array([True] * 5)),
-        cupy.asarray(np.array([False] * 5)),
-        np.random.default_rng(seed=0)
-        .integers(0, 2, 5)
-        .astype("bool")
-        .tolist(),
-        np.random.default_rng(seed=0).integers(0, 2, 5).astype("bool"),
-        cupy.asarray(
-            np.random.default_rng(seed=0).integers(0, 2, 5).astype("bool")
-        ),
-    ],
-)
-def test_string_bool_mask(ps_gs, item):
-    ps, gs = ps_gs
-
-    got = gs.iloc[item]
-    if isinstance(got, cudf.Series):
-        got = got.to_arrow()
-
-    if isinstance(item, cupy.ndarray):
-        item = cupy.asnumpy(item)
-
-    expect = ps[item]
-    if isinstance(expect, pd.Series):
-        expect = pa.Array.from_pandas(expect)
-        pa.Array.equals(expect, got)
-    else:
-        assert expect == got
-
-
-@pytest.mark.parametrize("item", [0, slice(1, 3), slice(5)])
-def test_string_repr(ps_gs, item):
-    ps, gs = ps_gs
-
-    got_out = gs.iloc[item]
-    expect_out = ps.iloc[item]
-
-    expect = str(expect_out)
-    got = str(got_out)
-
-    if got_out is not cudf.NA and len(got_out) > 1:
-        expect = expect.replace("None", "<NA>")
-
-    assert expect == got or (expect == "None" and got == "<NA>")
-
-
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + DATETIME_TYPES + ["bool", "object", "str"]
-)
-def test_string_astype(dtype):
-    if (
-        dtype.startswith("int")
-        or dtype.startswith("uint")
-        or dtype.startswith("long")
-    ):
-        data = ["1", "2", "3", "4", "5"]
-    elif dtype.startswith("float"):
-        data = [
-            "1.0",
-            "2.0",
-            "3.0",
-            "4.0",
-            None,
-            "5.0",
-            "nan",
-            "-INF",
-            "NaN",
-            "inF",
-            "NAn",
-        ]
-    elif dtype.startswith("bool"):
-        data = ["True", "False", "True", "False", "False"]
-    elif dtype.startswith("datetime64"):
-        data = [
-            "2019-06-04T00:00:00",
-            "2019-06-04T12:12:12",
-            "2019-06-03T00:00:00",
-            "2019-05-04T00:00:00",
-            "2018-06-04T00:00:00",
-            "1922-07-21T01:02:03",
-        ]
-    elif dtype == "str" or dtype == "object":
-        data = ["ab", "cd", "ef", "gh", "ij"]
-    ps = pd.Series(data)
-    gs = cudf.Series(data)
-
-    expect = ps.astype(dtype)
-    got = gs.astype(dtype)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data, scale, precision",
-    [
-        (["1.11", "2.22", "3.33"], 2, 3),
-        (["111", "222", "33"], 0, 3),
-        (["111000", "22000", "3000"], -3, 3),
-        ([None, None, None], 0, 5),
-        ([None, "-2345", None], 0, 5),
-        ([], 0, 5),
-    ],
-)
-@pytest.mark.parametrize(
-    "decimal_dtype",
-    [cudf.Decimal128Dtype, cudf.Decimal64Dtype, cudf.Decimal32Dtype],
-)
-def test_string_to_decimal(data, scale, precision, decimal_dtype):
-    gs = cudf.Series(data, dtype="str")
-    fp = gs.astype(decimal_dtype(scale=scale, precision=precision))
-    got = fp.astype("str")
-    assert_eq(gs, got)
-
-
-def test_string_empty_to_decimal():
-    gs = cudf.Series(["", "-85", ""], dtype="str")
-    got = gs.astype(cudf.Decimal64Dtype(scale=0, precision=5))
-    expected = cudf.Series(
-        [0, -85, 0],
-        dtype=cudf.Decimal64Dtype(scale=0, precision=5),
-    )
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data, scale, precision",
-    [
-        (["1.23", "-2.34", "3.45"], 2, 3),
-        (["123", "-234", "345"], 0, 3),
-        (["12300", "-400", "5000.0"], -2, 5),
-        ([None, None, None], 0, 5),
-        ([None, "-100", None], 0, 5),
-        ([], 0, 5),
-    ],
-)
-@pytest.mark.parametrize(
-    "decimal_dtype",
-    [cudf.Decimal128Dtype, cudf.Decimal32Dtype, cudf.Decimal64Dtype],
-)
-def test_string_from_decimal(data, scale, precision, decimal_dtype):
-    decimal_data = []
-    for d in data:
-        if d is None:
-            decimal_data.append(None)
-        else:
-            decimal_data.append(Decimal(d))
-    fp = cudf.Series(
-        decimal_data,
-        dtype=decimal_dtype(scale=scale, precision=precision),
-    )
-    gs = fp.astype("str")
-    got = gs.astype(decimal_dtype(scale=scale, precision=precision))
-    assert_eq(fp, got)
-
-
-@pytest.mark.parametrize(
-    "dtype", NUMERIC_TYPES + DATETIME_TYPES + ["bool", "object", "str"]
-)
-def test_string_empty_astype(dtype):
-    data = []
-    ps = pd.Series(data, dtype="str")
-    gs = cudf.Series(data, dtype="str")
-
-    expect = ps.astype(dtype)
-    got = gs.astype(dtype)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES + ["bool"])
-def test_string_numeric_astype(dtype):
-    if dtype.startswith("bool"):
-        data = [1, 0, 1, 0, 1]
-    elif (
-        dtype.startswith("int")
-        or dtype.startswith("uint")
-        or dtype.startswith("long")
-    ):
-        data = [1, 2, 3, 4, 5]
-    elif dtype.startswith("float"):
-        data = [1.0, 2.0, 3.0, 4.0, 5.0]
-    elif dtype.startswith("datetime64"):
-        # pandas rounds the output format based on the data
-        # Use numpy instead
-        # but fix '2011-01-01T00:00:00' -> '2011-01-01 00:00:00'
-        data = [1000000001, 2000000001, 3000000001, 4000000001, 5000000001]
-        ps = np.asarray(data, dtype=dtype).astype(str)
-        ps = np.array([i.replace("T", " ") for i in ps])
-
-    if not dtype.startswith("datetime64"):
-        ps = pd.Series(data, dtype=dtype)
-
-    gs = cudf.Series(data, dtype=dtype)
-
-    expect = pd.Series(ps.astype("str"))
-    got = gs.astype("str")
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES + ["bool"])
-def test_string_empty_numeric_astype(dtype):
-    data = []
-
-    if dtype.startswith("datetime64"):
-        ps = pd.Series(data, dtype="datetime64[ns]")
-    else:
-        ps = pd.Series(data, dtype=dtype)
-    gs = cudf.Series(data, dtype=dtype)
-
-    expect = ps.astype("str")
-    got = gs.astype("str")
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("ascending", [True, False])
-def test_string_sort(ps_gs, ascending):
-    ps, gs = ps_gs
-
-    expect = ps.sort_values(ascending=ascending)
-    got = gs.sort_values(ascending=ascending)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
-)
-@pytest.mark.parametrize("num_keys", [1, 2, 3])
-def test_string_groupby_key(str_data, num_keys):
-    other_data = [1, 2, 3, 4, 5][: len(str_data)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    for i in range(num_keys):
-        pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = cudf.Series(str_data, dtype="str")
-    pdf["a"] = other_data
-    gdf["a"] = other_data
-
-    expect = pdf.groupby(list(range(num_keys)), as_index=False).count()
-    got = gdf.groupby(list(range(num_keys)), as_index=False).count()
-
-    expect = expect.sort_values([0]).reset_index(drop=True)
-    got = got.sort_values([0]).reset_index(drop=True)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
-)
-@pytest.mark.parametrize("num_cols", [1, 2, 3])
-@pytest.mark.parametrize("agg", ["count", "max", "min"])
-def test_string_groupby_non_key(str_data, num_cols, agg):
-    other_data = [1, 2, 3, 4, 5][: len(str_data)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    for i in range(num_cols):
-        pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = cudf.Series(str_data, dtype="str")
-    pdf["a"] = other_data
-    gdf["a"] = other_data
-
-    expect = getattr(pdf.groupby("a", as_index=False), agg)()
-    got = getattr(gdf.groupby("a", as_index=False), agg)()
-
-    expect = expect.sort_values(["a"]).reset_index(drop=True)
-    got = got.sort_values(["a"]).reset_index(drop=True)
-
-    if agg in ["min", "max"] and len(expect) == 0 and len(got) == 0:
-        for i in range(num_cols):
-            expect[i] = expect[i].astype("str")
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_string_groupby_key_index():
-    str_data = ["a", "b", "c", "d", "e"]
-    other_data = [1, 2, 3, 4, 5]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    pdf["a"] = pd.Series(str_data, dtype="str")
-    gdf["a"] = cudf.Series(str_data, dtype="str")
-    pdf["b"] = other_data
-    gdf["b"] = other_data
-
-    expect = pdf.groupby("a", sort=True).count()
-    got = gdf.groupby("a", sort=True).count()
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize("scalar", ["a", None])
-def test_string_set_scalar(scalar):
-    pdf = pd.DataFrame()
-    pdf["a"] = [1, 2, 3, 4, 5]
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    pdf["b"] = "a"
-    gdf["b"] = "a"
-
-    assert_eq(pdf["b"], gdf["b"])
-    assert_eq(pdf, gdf)
-
-
-def test_string_index():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.random(size=(5, 5)))
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    stringIndex = ["a", "b", "c", "d", "e"]
-    pdf.index = stringIndex
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-    stringIndex = np.array(["a", "b", "c", "d", "e"])
-    pdf.index = stringIndex
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-    stringIndex = Index(["a", "b", "c", "d", "e"], name="name")
-    pdf.index = stringIndex.to_pandas()
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-    stringIndex = cudf.Index._from_column(
-        cudf.core.column.as_column(["a", "b", "c", "d", "e"]), name="name"
-    )
-    pdf.index = stringIndex.to_pandas()
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "item",
-    [
-        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
-        ["a", "a", "a", "a", "A"],
-        ["A"],
-        ["abc", "xyz", None, "ab", "123"],
-        [None, None, "abc", None, "abc"],
-    ],
-)
-def test_string_unique(item):
-    ps = pd.Series(item)
-    gs = cudf.Series(item)
-    # Pandas `unique` returns a numpy array
-    pres = pd.Series(ps.unique())
-    # cudf returns a cudf.Series
-    gres = gs.unique()
-    assert_eq(pres, gres)
-
-
-def test_string_equality():
-    data1 = ["b", "c", "d", "a", "c"]
-    data2 = ["a", None, "c", "a", "c"]
-
-    ps1 = pd.Series(data1)
-    ps2 = pd.Series(data2)
-    gs1 = cudf.Series(data1)
-    gs2 = cudf.Series(data2)
-
-    expect = ps1 == ps2
-    got = gs1 == gs2
-
-    assert_eq(expect, got.fillna(False))
-
-    expect = ps1 == "m"
-    got = gs1 == "m"
-
-    assert_eq(expect, got.fillna(False))
-
-    ps1 = pd.Series(["a"])
-    gs1 = cudf.Series(["a"])
-
-    expect = ps1 == "m"
-    got = gs1 == "m"
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "lhs",
-    [
-        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
-        ["abc", "xyz", "a", "ab", "123", "097"],
-    ],
-)
-@pytest.mark.parametrize(
-    "rhs",
-    [
-        ["Cbe", "cbe", "CbeD", "Cb", "ghi", "Cb"],
-        ["a", "a", "a", "a", "A", "z"],
-    ],
-)
-def test_string_binary_op_add(lhs, rhs):
-    pds = pd.Series(lhs) + pd.Series(rhs)
-    gds = cudf.Series(lhs) + cudf.Series(rhs)
-
-    assert_eq(pds, gds)
-
-
-def test_string_no_children_properties():
-    empty_col = StringColumn(
-        as_buffer(rmm.DeviceBuffer(size=0)),
-        size=0,
-        dtype=np.dtype("object"),
-        children=(),
-    )
-    assert empty_col.base_children == ()
-    assert empty_col.base_size == 0
-
-    assert empty_col.children == ()
-    assert empty_col.size == 0
-
-    assert getsizeof(empty_col) >= 0  # Accounts for Python GC overhead
-
-
-def test_string_table_view_creation():
-    data = ["hi"] * 25 + [None] * 2027
-    psr = pd.Series(data)
-    gsr = cudf.Series.from_pandas(psr)
-
-    expect = psr[:1]
-    got = gsr[:1]
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data,dtype",
-    [
-        (["0.1", "10.2", "10.876"], "float"),
-        (["-0.1", "10.2", "+10.876"], "float"),
-        (["1", "10.2", "10.876"], "float32"),
-        (["+123", "6344556789", "0"], "int"),
-        (["+123", "6344556789", "0"], "uint64"),
-        (["+123", "6344556789", "0"], "float"),
-        (["0.1", "-10.2", "10.876", None], "float"),
-    ],
-)
-@pytest.mark.parametrize("obj_type", [None, "str", "category"])
-def test_string_typecast(data, obj_type, dtype):
-    psr = pd.Series(data, dtype=obj_type)
-    gsr = cudf.Series(data, dtype=obj_type)
-
-    expect = psr.astype(dtype=dtype)
-    actual = gsr.astype(dtype=dtype)
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "data,dtype",
-    [
-        (["0.1", "10.2", "10.876"], "int"),
-        (["1", "10.2", "+10.876"], "int"),
-        (["abc", "1", "2", " "], "int"),
-        (["0.1", "10.2", "10.876"], "uint64"),
-        (["1", "10.2", "+10.876"], "uint64"),
-        (["abc", "1", "2", " "], "uint64"),
-        ([" ", "0.1", "2"], "float"),
-        ([""], "int"),
-        ([""], "uint64"),
-        ([" "], "float"),
-        (["\n"], "int"),
-        (["\n"], "uint64"),
-        (["0.1", "-10.2", "10.876", None], "int"),
-        (["0.1", "-10.2", "10.876", None], "uint64"),
-        (["0.1", "-10.2", "10.876", None, "ab"], "float"),
-        (["+", "-"], "float"),
-        (["+", "-"], "int"),
-        (["+", "-"], "uint64"),
-        (["1++++", "--2"], "float"),
-        (["1++++", "--2"], "int"),
-        (["1++++", "--2"], "uint64"),
-        (["++++1", "--2"], "float"),
-        (["++++1", "--2"], "int"),
-        (["++++1", "--2"], "uint64"),
-    ],
-)
-@pytest.mark.parametrize("obj_type", [None, "str", "category"])
-def test_string_typecast_error(data, obj_type, dtype):
-    psr = pd.Series(data, dtype=obj_type)
-    gsr = cudf.Series(data, dtype=obj_type)
-
-    assert_exceptions_equal(
-        lfunc=psr.astype,
-        rfunc=gsr.astype,
-        lfunc_args_and_kwargs=([dtype],),
-        rfunc_args_and_kwargs=([dtype],),
-    )
-
-
-def test_string_int_to_ipv4():
-    gsr = cudf.Series([0, None, 0, 698875905, 2130706433, 700776449]).astype(
-        "uint32"
-    )
-    expected = cudf.Series(
-        ["0.0.0.0", None, "0.0.0.0", "41.168.0.1", "127.0.0.1", "41.197.0.1"]
-    )
-
-    got = cudf.Series._from_column(gsr._column.int2ip())
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"uint32"}))
-)
-def test_string_int_to_ipv4_dtype_fail(dtype):
-    gsr = cudf.Series([1, 2, 3, 4, 5]).astype(dtype)
-    with pytest.raises(TypeError):
-        gsr._column.int2ip()
-
-
-def test_string_slice_with_mask():
-    actual = cudf.Series(["hi", "hello", None])
-    expected = actual[0:3]
-
-    assert actual._column.base_size == 3
-    assert_eq(actual._column.base_size, expected._column.base_size)
-    assert_eq(actual._column.null_count, expected._column.null_count)
-
-    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
deleted file mode 100644
index 833035858be..00000000000
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ /dev/null
@@ -1,644 +0,0 @@
-# Copyright (c) 2020-2025, NVIDIA CORPORATION.
-
-import datetime
-import operator
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import _utils as utils, assert_eq
-from cudf.testing._utils import assert_exceptions_equal
-
-
-@pytest.fixture(
-    params=[
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [1],
-        [12, 11, 232, 223432411, 2343241, 234324, 23234],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-    ]
-)
-def data_non_overflow(request):
-    return request.param
-
-
-@pytest.fixture(params=utils.TIMEDELTA_TYPES)
-def timedelta_dtype(request):
-    return request.param
-
-
-@pytest.mark.parametrize(
-    "data,other",
-    [
-        ([1000000, 200000, 3000000], [1000000, 200000, 3000000]),
-        ([1000000, 200000, None], [1000000, 200000, None]),
-        ([], []),
-        ([None], [None]),
-        ([None, None, None, None, None], [None, None, None, None, None]),
-        (
-            [12, 12, 22, 343, 4353534, 435342],
-            [12, 12, 22, 343, 4353534, 435342],
-        ),
-        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
-        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
-        ([1000000, 200000, 3000000], [200000, 34543, 3000000]),
-        ([1000000, 200000, None], [1000000, 200000, 3000000]),
-        ([None], [1]),
-        (
-            [12, 12, 22, 343, 4353534, 435342],
-            [None, 1, 220, 3, 34, 4353423287],
-        ),
-        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
-        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
-    ],
-)
-@pytest.mark.parametrize(
-    "ops",
-    [
-        "eq",
-        "ne",
-        "lt",
-        "gt",
-        "le",
-        "ge",
-        "add",
-        "radd",
-        "sub",
-        "rsub",
-        "floordiv",
-        "truediv",
-        "mod",
-    ],
-)
-def test_timedelta_ops_misc_inputs(data, other, timedelta_dtype, ops):
-    gsr = cudf.Series(data, dtype=timedelta_dtype)
-    other_gsr = cudf.Series(other, dtype=timedelta_dtype)
-
-    psr = gsr.to_pandas()
-    other_psr = other_gsr.to_pandas()
-
-    expected = getattr(psr, ops)(other_psr)
-    actual = getattr(gsr, ops)(other_gsr)
-    if ops in ("eq", "lt", "gt", "le", "ge"):
-        actual = actual.fillna(False)
-    elif ops == "ne":
-        actual = actual.fillna(True)
-
-    if ops == "floordiv":
-        expected[actual.isna().to_pandas()] = np.nan
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "datetime_data,timedelta_data",
-    [
-        ([1000000, 200000, 3000000], [1000000, 200000, 3000000]),
-        ([1000000, 200000, None], [1000000, 200000, None]),
-        ([], []),
-        ([None], [None]),
-        ([None, None, None, None, None], [None, None, None, None, None]),
-        (
-            [12, 12, 22, 343, 4353534, 435342],
-            [12, 12, 22, 343, 4353534, 435342],
-        ),
-        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
-        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
-        ([1000000, 200000, 3000000], [200000, 34543, 3000000]),
-        ([1000000, 200000, None], [1000000, 200000, 3000000]),
-        ([None], [1]),
-        (
-            [12, 12, 22, 343, 4353534, 435342],
-            [None, 1, 220, 3, 34, 4353423287],
-        ),
-        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
-        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
-        (
-            [12, 11, 232, 223432411, 2343241, 234324, 23234],
-            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
-        ),
-        (
-            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
-        ),
-        (
-            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
-            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        ),
-        (
-            [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        ),
-    ],
-)
-@pytest.mark.parametrize("datetime_dtype", utils.DATETIME_TYPES)
-@pytest.mark.parametrize(
-    "ops",
-    ["add", "sub"],
-)
-def test_timedelta_ops_datetime_inputs(
-    datetime_data, timedelta_data, datetime_dtype, timedelta_dtype, ops
-):
-    gsr_datetime = cudf.Series(datetime_data, dtype=datetime_dtype)
-    gsr_timedelta = cudf.Series(timedelta_data, dtype=timedelta_dtype)
-
-    psr_datetime = gsr_datetime.to_pandas()
-    psr_timedelta = gsr_timedelta.to_pandas()
-
-    expected = getattr(psr_datetime, ops)(psr_timedelta)
-    actual = getattr(gsr_datetime, ops)(gsr_timedelta)
-
-    assert_eq(expected, actual)
-
-    if ops == "add":
-        expected = getattr(psr_timedelta, ops)(psr_datetime)
-        actual = getattr(gsr_timedelta, ops)(gsr_datetime)
-
-        assert_eq(expected, actual)
-    elif ops == "sub":
-        assert_exceptions_equal(
-            lfunc=operator.sub,
-            rfunc=operator.sub,
-            lfunc_args_and_kwargs=([psr_timedelta, psr_datetime],),
-            rfunc_args_and_kwargs=([gsr_timedelta, gsr_datetime],),
-        )
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(
-            {
-                "A": pd.Series(pd.date_range("2012-1-1", periods=3, freq="D")),
-                "B": pd.Series([pd.Timedelta(days=i) for i in range(3)]),
-            }
-        ),
-        pd.DataFrame(
-            {
-                "A": pd.Series(
-                    pd.date_range("1994-1-1", periods=50, freq="D")
-                ),
-                "B": pd.Series([pd.Timedelta(days=i) for i in range(50)]),
-            }
-        ),
-    ],
-)
-@pytest.mark.parametrize("op", ["add", "sub"])
-def test_timedelta_dataframe_ops(df, op):
-    pdf = df
-    gdf = cudf.from_pandas(pdf)
-
-    if op == "add":
-        pdf["C"] = pdf["A"] + pdf["B"]
-        gdf["C"] = gdf["A"] + gdf["B"]
-    elif op == "sub":
-        pdf["C"] = pdf["A"] - pdf["B"]
-        gdf["C"] = gdf["A"] - gdf["B"]
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [],
-        [None],
-        [None, None, None, None, None],
-        [12, 12, 22, 343, 4353534, 435342],
-        np.array([10, 20, 30, None, 100]),
-        cp.asarray([10, 20, 30, 100]),
-        [1000000, 200000, 3000000],
-        [1000000, 200000, None],
-        [1],
-        [12, 11, 232, 223432411, 2343241, 234324, 23234],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        [1.321, 1132.324, 23223231.11, 233.41, 332, 323],
-        [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-    ],
-)
-@pytest.mark.parametrize(
-    "other_scalars",
-    [
-        datetime.timedelta(days=768),
-        datetime.timedelta(seconds=768),
-        datetime.timedelta(microseconds=7),
-        datetime.timedelta(minutes=447),
-        datetime.timedelta(hours=447),
-        datetime.timedelta(weeks=734),
-        np.timedelta64(4, "s"),
-        np.timedelta64(456, "D"),
-        np.timedelta64(46, "h"),
-        np.timedelta64("nat"),
-        np.timedelta64(1, "s"),
-        np.timedelta64(1, "ms"),
-        np.timedelta64(1, "us"),
-        np.timedelta64(1, "ns"),
-    ],
-)
-@pytest.mark.parametrize(
-    "op",
-    [
-        "add",
-        "sub",
-        "truediv",
-        "mod",
-        "floordiv",
-    ],
-)
-def test_timedelta_series_ops_with_scalars(
-    data, other_scalars, timedelta_dtype, op
-):
-    gsr = cudf.Series(data=data, dtype=timedelta_dtype)
-    psr = gsr.to_pandas()
-
-    if op == "add":
-        expected = psr + other_scalars
-        actual = gsr + other_scalars
-    elif op == "sub":
-        expected = psr - other_scalars
-        actual = gsr - other_scalars
-    elif op == "truediv":
-        expected = psr / other_scalars
-        actual = gsr / other_scalars
-    elif op == "floordiv":
-        expected = psr // other_scalars
-        actual = gsr // other_scalars
-    elif op == "mod":
-        expected = psr % other_scalars
-        actual = gsr % other_scalars
-
-    assert_eq(expected, actual)
-
-    if op == "add":
-        expected = other_scalars + psr
-        actual = other_scalars + gsr
-    elif op == "sub":
-        expected = other_scalars - psr
-        actual = other_scalars - gsr
-    elif op == "truediv":
-        expected = other_scalars / psr
-        actual = other_scalars / gsr
-    elif op == "floordiv":
-        expected = other_scalars // psr
-        actual = other_scalars // gsr
-    elif op == "mod":
-        expected = other_scalars % psr
-        actual = other_scalars % gsr
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "reverse",
-    [
-        False,
-        pytest.param(
-            True,
-            marks=pytest.mark.xfail(
-                strict=True,
-                reason=(
-                    "timedelta modulo by zero is dubiously defined in "
-                    "both pandas and cuDF "
-                    "(see https://github.com/rapidsai/cudf/issues/5938)"
-                ),
-            ),
-        ),
-    ],
-)
-def test_timedelta_series_mod_with_scalar_zero(reverse):
-    gsr = cudf.Series(data=[0.2434], dtype=np.timedelta64(1, "ns"))
-    psr = gsr.to_pandas()
-    scalar = datetime.timedelta(days=768)
-    if reverse:
-        expected = scalar % psr
-        actual = scalar % gsr
-    else:
-        expected = psr % scalar
-        actual = gsr % scalar
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("datetime_dtype", utils.DATETIME_TYPES)
-def test_timedelta_index_datetime_index_ops(
-    data_non_overflow, datetime_dtype, timedelta_dtype
-):
-    gdt = cudf.Index(data_non_overflow, dtype=datetime_dtype)
-    gtd = cudf.Index(data_non_overflow, dtype=timedelta_dtype)
-
-    pdt = gdt.to_pandas()
-    ptd = gtd.to_pandas()
-
-    assert_eq(gdt - gtd, pdt - ptd)
-    assert_eq(gdt + gtd, pdt + ptd)
-
-
-@pytest.mark.parametrize(
-    "datetime_data,timedelta_data",
-    [
-        ([1000000, 200000, 3000000], [1000000, 200000, 3000000]),
-        ([1000000, 200000, None], [1000000, 200000, None]),
-        ([], []),
-        ([None], [None]),
-        ([None, None, None, None, None], [None, None, None, None, None]),
-        (
-            [12, 12, 22, 343, 4353534, 435342],
-            [12, 12, 22, 343, 4353534, 435342],
-        ),
-        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
-        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
-        ([1000000, 200000, 3000000], [200000, 34543, 3000000]),
-        ([1000000, 200000, None], [1000000, 200000, 3000000]),
-        ([None], [1]),
-        (
-            [12, 12, 22, 343, 4353534, 435342],
-            [None, 1, 220, 3, 34, 4353423287],
-        ),
-        (np.array([10, 20, 30, None, 100]), np.array([10, 20, 30, None, 100])),
-        (cp.asarray([10, 20, 30, 100]), cp.asarray([10, 20, 30, 100])),
-        (
-            [12, 11, 232, 223432411, 2343241, 234324, 23234],
-            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
-        ),
-        (
-            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
-        ),
-        (
-            [11, 1132324, 2322323111, 23341, 2434, 332, 323],
-            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        ),
-        (
-            [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-            [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        ),
-    ],
-)
-@pytest.mark.parametrize("datetime_dtype", utils.DATETIME_TYPES)
-def test_timedelta_datetime_index_ops_misc(
-    datetime_data, timedelta_data, datetime_dtype, timedelta_dtype
-):
-    gdt = cudf.Index(datetime_data, dtype=datetime_dtype)
-    gtd = cudf.Index(timedelta_data, dtype=timedelta_dtype)
-
-    pdt = gdt.to_pandas()
-    ptd = gtd.to_pandas()
-
-    assert_eq(gdt - gtd, pdt - ptd)
-    assert_eq(gdt + gtd, pdt + ptd)
-
-
-@pytest.mark.parametrize(
-    "other_scalars",
-    [
-        pd.Timedelta(1513393355.5, unit="s"),
-        pd.Timedelta(34765, unit="D"),
-        datetime.timedelta(days=768),
-        datetime.timedelta(seconds=768),
-        datetime.timedelta(microseconds=7),
-        datetime.timedelta(minutes=447),
-        datetime.timedelta(hours=447),
-        datetime.timedelta(weeks=734),
-        np.timedelta64(4, "s"),
-        np.timedelta64(456, "D"),
-        np.timedelta64(46, "h"),
-        np.timedelta64("nat"),
-        np.timedelta64(1, "s"),
-        np.timedelta64(1, "ms"),
-        np.timedelta64(1, "us"),
-        np.timedelta64(1, "ns"),
-    ],
-)
-@pytest.mark.parametrize(
-    "op",
-    [
-        "add",
-        "sub",
-        "truediv",
-        "floordiv",
-    ],
-)
-@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning:pandas")
-def test_timedelta_index_ops_with_scalars(
-    request, data_non_overflow, other_scalars, timedelta_dtype, op
-):
-    gtdi = cudf.Index(data=data_non_overflow, dtype=timedelta_dtype)
-    ptdi = gtdi.to_pandas()
-
-    if op == "add":
-        expected = ptdi + other_scalars
-        actual = gtdi + other_scalars
-    elif op == "sub":
-        expected = ptdi - other_scalars
-        actual = gtdi - other_scalars
-    elif op == "truediv":
-        expected = ptdi / other_scalars
-        actual = gtdi / other_scalars
-    elif op == "floordiv":
-        expected = ptdi // other_scalars
-        actual = gtdi // other_scalars
-
-    assert_eq(expected, actual)
-
-    if op == "add":
-        expected = other_scalars + ptdi
-        actual = other_scalars + gtdi
-    elif op == "sub":
-        expected = other_scalars - ptdi
-        actual = other_scalars - gtdi
-    elif op == "truediv":
-        expected = other_scalars / ptdi
-        actual = other_scalars / gtdi
-    elif op == "floordiv":
-        expected = other_scalars // ptdi
-        actual = other_scalars // gtdi
-
-    # Division by zero for datetime or timedelta is
-    # dubiously defined in both pandas (Any // 0 -> 0 in
-    # pandas) and cuDF (undefined behaviour)
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                op == "floordiv"
-                and 0 in ptdi.astype("int")
-                and np.timedelta64(other_scalars).item() is not None
-            ),
-            reason="Related to https://github.com/rapidsai/cudf/issues/5938",
-        )
-    )
-    assert_eq(expected, actual)
-
-
-def test_timedelta_invalid_ops():
-    sr = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
-    psr = sr.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([psr, 1],),
-        rfunc_args_and_kwargs=([sr, 1],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([psr, "a"],),
-        rfunc_args_and_kwargs=([sr, "a"],),
-    )
-
-    dt_sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    dt_psr = dt_sr.to_pandas()
-
-    assert_exceptions_equal(
-        lfunc=operator.mod,
-        rfunc=operator.mod,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.mod,
-        rfunc=operator.mod,
-        lfunc_args_and_kwargs=([psr, "a"],),
-        rfunc_args_and_kwargs=([sr, "a"],),
-        check_exception_type=False,
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.gt,
-        rfunc=operator.gt,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.lt,
-        rfunc=operator.lt,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.ge,
-        rfunc=operator.ge,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.le,
-        rfunc=operator.le,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.truediv,
-        rfunc=operator.truediv,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.floordiv,
-        rfunc=operator.floordiv,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.mul,
-        rfunc=operator.mul,
-        lfunc_args_and_kwargs=([psr, dt_psr],),
-        rfunc_args_and_kwargs=([sr, dt_sr],),
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.mul,
-        rfunc=operator.mul,
-        lfunc_args_and_kwargs=([psr, psr],),
-        rfunc_args_and_kwargs=([sr, sr],),
-        check_exception_type=False,
-    )
-
-    assert_exceptions_equal(
-        lfunc=operator.xor,
-        rfunc=operator.xor,
-        lfunc_args_and_kwargs=([psr, psr],),
-        rfunc_args_and_kwargs=([sr, sr],),
-    )
-
-
-@pytest.mark.parametrize("op", [operator.add, operator.sub])
-def test_timdelta_binop_tz_timestamp(op):
-    s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
-    pd_tz_timestamp = pd.Timestamp("1970-01-01 00:00:00.000000001", tz="utc")
-    with pytest.raises(NotImplementedError):
-        op(s, pd_tz_timestamp)
-    date_tz_scalar = datetime.datetime.now(datetime.timezone.utc)
-    with pytest.raises(NotImplementedError):
-        op(s, date_tz_scalar)
-
-
-@pytest.mark.parametrize(
-    "op",
-    [
-        operator.lt,
-        operator.gt,
-        operator.le,
-        operator.ge,
-        operator.eq,
-        operator.ne,
-    ],
-)
-def test_timedelta_series_cmpops_pandas_compatibility(op):
-    gsr1 = cudf.Series(
-        data=[123, 456, None, 321, None], dtype="timedelta64[ns]"
-    )
-    psr1 = gsr1.to_pandas()
-
-    gsr2 = cudf.Series(
-        data=[123, 456, 789, None, None], dtype="timedelta64[ns]"
-    )
-    psr2 = gsr2.to_pandas()
-
-    expect = op(psr1, psr2)
-    with cudf.option_context("mode.pandas_compatible", True):
-        got = op(gsr1, gsr2)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "method, kwargs",
-    [
-        ["sum", {}],
-        ["mean", {}],
-        ["median", {}],
-        ["std", {}],
-        ["std", {"ddof": 0}],
-    ],
-)
-def test_tdi_reductions(method, kwargs):
-    pd_tdi = pd.TimedeltaIndex(["1 day", "2 days", "3 days"])
-    cudf_tdi = cudf.from_pandas(pd_tdi)
-
-    result = getattr(pd_tdi, method)(**kwargs)
-    expected = getattr(cudf_tdi, method)(**kwargs)
-    assert result == expected

From 57f59474a04f98dafc63f7ffa40bab39d9141829 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 2 Sep 2025 18:35:25 -0700
Subject: [PATCH 244/366] Avoid more direct construction of cuDF classic
 columns (#19858)

Precursor to https://github.com/rapidsai/cudf/issues/18726. We'll want to minimize direct construction of cuDF classic column via their attributes and instead use a pylibcudf in the future

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19858
---
 python/cudf/cudf/core/column/categorical.py   | 132 ++++--------------
 python/cudf/cudf/core/column/column.py        |   4 +-
 python/cudf/cudf/core/column/datetime.py      |   8 +-
 python/cudf/cudf/core/column/interval.py      |  30 +---
 python/cudf/cudf/core/column/struct.py        |  10 +-
 python/cudf/cudf/core/series.py               |   6 +-
 .../cudf/pandas/scripts/conftest-patch.py     |   7 -
 7 files changed, 45 insertions(+), 152 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 464d63f75f0..3b23265d2c3 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -231,13 +231,7 @@ def __setitem__(self, key, value):
             value = value.codes
         codes = self.codes
         codes[key] = value
-        out = type(self)(
-            data=self.data,
-            size=codes.size,
-            dtype=self.dtype,
-            mask=codes.base_mask,
-            children=(codes,),
-        )
+        out = codes._with_type_metadata(self.dtype)
         self._mimic_inplace(out, inplace=True)
 
     def _fill(
@@ -261,14 +255,8 @@ def _fill(
         return result
 
     def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
-        codes = self.codes.slice(start, stop, stride)
-        return type(self)(
-            data=self.data,  # type: ignore[arg-type]
-            size=codes.size,
-            dtype=self.dtype,
-            mask=codes.base_mask,
-            offset=codes.offset,
-            children=(codes,),
+        return self.codes.slice(start, stop, stride)._with_type_metadata(  # type: ignore[return-value]
+            self.dtype
         )
 
     def _reduce(
@@ -328,23 +316,12 @@ def _normalize_binop_operand(
         codes = column.as_column(
             self._encode(other), length=len(self), dtype=self.codes.dtype
         )
-        return type(self)(
-            data=None,
-            size=self.size,
-            dtype=self.dtype,
-            mask=self.base_mask,
-            children=(codes,),  # type: ignore[arg-type]
-        )
+        return codes._with_type_metadata(self.dtype)
 
     def sort_values(self, ascending: bool = True, na_position="last") -> Self:
-        codes = self.codes.sort_values(ascending, na_position)
-        return type(self)(
-            data=self.data,  # type: ignore[arg-type]
-            size=codes.size,
-            dtype=self.dtype,
-            mask=codes.base_mask,
-            children=(codes,),
-        )
+        return self.codes.sort_values(  # type: ignore[return-value]
+            ascending, na_position
+        )._with_type_metadata(self.dtype)
 
     def element_indexing(self, index: int) -> ScalarLike:
         val = self.codes.element_indexing(index)
@@ -439,15 +416,7 @@ def data_array_view(
         return self.codes.data_array_view(mode=mode)
 
     def unique(self) -> Self:
-        codes = self.codes.unique()
-        return type(self)(
-            data=self.data,  # type: ignore[arg-type]
-            size=codes.size,
-            dtype=self.dtype,
-            mask=codes.base_mask,
-            offset=codes.offset,
-            children=(codes,),
-        )
+        return self.codes.unique()._with_type_metadata(self.dtype)  # type: ignore[return-value]
 
     def _cast_self_and_other_for_where(
         self, other: ScalarLike | ColumnBase, inplace: bool
@@ -595,18 +564,11 @@ def find_and_replace(
         replacement_col = catmap._data["index"].astype(replaced.codes.dtype)
 
         replaced_codes = column.as_column(replaced.codes)
-        output = replaced_codes.replace(to_replace_col, replacement_col)
-        codes = as_unsigned_codes(len(new_cats["cats"]), output)  # type: ignore[arg-type]
-
-        result = type(self)(
-            data=self.data,  # type: ignore[arg-type]
-            size=codes.size,
-            dtype=CategoricalDtype(
+        new_codes = replaced_codes.replace(to_replace_col, replacement_col)
+        result = new_codes._with_type_metadata(
+            CategoricalDtype(
                 categories=new_cats["cats"], ordered=self.dtype.ordered
-            ),
-            mask=codes.base_mask,
-            offset=codes.offset,
-            children=(codes,),
+            )
         )
         if result.dtype != self.dtype:
             warnings.warn(
@@ -617,7 +579,7 @@ def find_and_replace(
                 "instead.",
                 FutureWarning,
             )
-        return result
+        return result  # type: ignore[return-value]
 
     def isnull(self) -> ColumnBase:
         """
@@ -700,7 +662,7 @@ def as_categorical_column(self, dtype: CategoricalDtype) -> Self:
             if isinstance(
                 self.categories.dtype, cudf.StructDtype
             ) and isinstance(dtype.categories.dtype, cudf.IntervalDtype):
-                codes = self.codes
+                return self._with_type_metadata(dtype)
             else:
                 # Otherwise if both categories are of different Column types,
                 # return a column full of nulls.
@@ -712,15 +674,7 @@ def as_categorical_column(self, dtype: CategoricalDtype) -> Self:
                         dtype=self.codes.dtype,
                     ),
                 )
-                codes = as_unsigned_codes(len(dtype.categories), codes)
-            return type(self)(
-                data=self.data,  # type: ignore[arg-type]
-                size=self.size,
-                dtype=dtype,
-                mask=self.base_mask,
-                offset=self.offset,
-                children=(codes,),
-            )
+                return codes._with_type_metadata(dtype)  # type: ignore[return-value]
 
         return self.set_categories(
             new_categories=dtype.categories, ordered=bool(dtype.ordered)
@@ -813,12 +767,12 @@ def _with_type_metadata(self: Self, dtype: Dtype) -> Self:
         if isinstance(dtype, CategoricalDtype):
             return type(self)(
                 data=self.data,  # type: ignore[arg-type]
-                size=self.codes.size,
+                size=self.size,
                 dtype=dtype,
-                mask=self.codes.base_mask,
-                offset=self.codes.offset,
-                null_count=self.codes.null_count,
-                children=(self.codes,),
+                mask=self.base_mask,
+                offset=self.offset,
+                null_count=self.null_count,
+                children=self.base_children,  # type: ignore[arg-type]
             )
 
         return self
@@ -847,15 +801,8 @@ def set_categories(
                     "new_categories must have the same "
                     "number of items as old categories"
                 )
-            out_col = type(self)(
-                data=self.data,  # type: ignore[arg-type]
-                size=self.size,
-                dtype=CategoricalDtype(
-                    categories=new_categories, ordered=ordered
-                ),
-                mask=self.base_mask,
-                offset=self.offset,
-                children=(self.codes,),
+            return self._with_type_metadata(
+                CategoricalDtype(categories=new_categories, ordered=ordered)
             )
         else:
             out_col = self
@@ -870,16 +817,10 @@ def set_categories(
                         dtype=self.codes.dtype,
                     ),
                 )
-                new_codes = as_unsigned_codes(len(new_categories), new_codes)
-                out_col = type(self)(
-                    data=self.data,  # type: ignore[arg-type]
-                    size=self.size,
-                    dtype=CategoricalDtype(
+                out_col = new_codes._with_type_metadata(  # type: ignore[assignment]
+                    CategoricalDtype(
                         categories=new_categories, ordered=ordered
-                    ),
-                    mask=self.base_mask,
-                    offset=self.offset,
-                    children=(new_codes,),
+                    )
                 )
             elif (
                 not out_col._categories_equal(new_categories, ordered=True)
@@ -967,16 +908,8 @@ def _set_categories(
         new_codes = cast(
             cudf.core.column.numerical.NumericalColumn, df._data["new_codes"]
         )
-
-        # codes can't have masks, so take mask out before moving in
-        new_codes = as_unsigned_codes(len(new_cats), new_codes)
-        return type(self)(
-            data=self.data,  # type: ignore[arg-type]
-            size=new_codes.size,
-            dtype=CategoricalDtype(categories=new_cats, ordered=ordered),
-            mask=new_codes.base_mask,
-            offset=new_codes.offset,
-            children=(new_codes,),
+        return new_codes._with_type_metadata(  # type: ignore[return-value]
+            CategoricalDtype(categories=new_cats, ordered=ordered)
         )
 
     def add_categories(self, new_categories: Any) -> Self:
@@ -1056,13 +989,6 @@ def remove_unused_categories(self) -> Self:
     def as_ordered(self, ordered: bool) -> Self:
         if self.dtype.ordered == ordered:
             return self
-        return type(self)(
-            data=self.data,  # type: ignore[arg-type]
-            size=self.size,
-            dtype=CategoricalDtype(
-                categories=self.categories, ordered=ordered
-            ),
-            mask=self.base_mask,
-            offset=self.offset,
-            children=self.children,
+        return self._with_type_metadata(
+            CategoricalDtype(categories=self.categories, ordered=ordered)
         )
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 161556391eb..091e6a81c32 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2325,9 +2325,7 @@ def reduce(self, reduction_op: str, **kwargs) -> ScalarLike:
                 new_dtype = type(col_dtype)(precision, scale)
                 result_col = result_col.astype(new_dtype)
             elif isinstance(col_dtype, IntervalDtype):
-                result_col = type(self).from_struct_column(  # type: ignore[attr-defined]
-                    result_col, closed=col_dtype.closed
-                )
+                result_col = result_col._with_type_metadata(col_dtype)
         return result_col.element_indexing(0)
 
     @acquire_spill_lock()
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 8a0e5e10a15..0cc1402c141 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -908,10 +908,6 @@ def tz_convert(self, tz: str | None) -> DatetimeColumn:
         elif tz == str(self.dtype.tz):
             return self.copy()
         utc_time = self._utc_time
-        return type(self)(
-            data=utc_time.base_data,  # type: ignore[arg-type]
-            dtype=pd.DatetimeTZDtype(self.time_unit, tz),
-            mask=utc_time.base_mask,
-            size=utc_time.size,
-            offset=utc_time.offset,
+        return utc_time._with_type_metadata(
+            pd.DatetimeTZDtype(self.time_unit, tz)
         )
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 1aab299e7ad..4fae8d77d88 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -71,36 +71,8 @@ def to_arrow(self) -> pa.Array:
             struct_arrow = pa.array([], typ.storage_type)
         return pa.ExtensionArray.from_storage(typ, struct_arrow)
 
-    @classmethod
-    def from_struct_column(
-        cls,
-        struct_column: StructColumn,
-        closed: Literal["left", "right", "both", "neither"] = "right",
-    ) -> Self:
-        first_field_name = next(iter(struct_column.dtype.fields.keys()))
-        return cls(
-            data=None,
-            size=struct_column.size,
-            dtype=IntervalDtype(
-                struct_column.dtype.fields[first_field_name], closed
-            ),
-            mask=struct_column.base_mask,
-            offset=struct_column.offset,
-            null_count=struct_column.null_count,
-            children=struct_column.base_children,  # type: ignore[arg-type]
-        )
-
     def copy(self, deep: bool = True) -> Self:
-        struct_copy = super().copy(deep=deep)
-        return IntervalColumn(  # type: ignore[return-value]
-            data=None,
-            size=struct_copy.size,
-            dtype=IntervalDtype(self.dtype.subtype, self.dtype.closed),
-            mask=struct_copy.base_mask,
-            offset=struct_copy.offset,
-            null_count=struct_copy.null_count,
-            children=struct_copy.base_children,  # type: ignore[arg-type]
-        )
+        return super().copy(deep=deep)._with_type_metadata(self.dtype)  # type: ignore[return-value]
 
     @functools.cached_property
     def is_empty(self) -> ColumnBase:
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index d0a9f0389f6..0815af9c223 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -220,7 +220,15 @@ def _with_type_metadata(self: StructColumn, dtype: Dtype) -> StructColumn:
 
         # Check IntervalDtype first because it's a subclass of StructDtype
         if isinstance(dtype, IntervalDtype):
-            return IntervalColumn.from_struct_column(self, closed=dtype.closed)
+            return IntervalColumn(
+                data=None,
+                size=self.size,
+                dtype=dtype,
+                mask=self.base_mask,
+                offset=self.offset,
+                null_count=self.null_count,
+                children=self.base_children,  # type: ignore[arg-type]
+            )
         elif isinstance(dtype, StructDtype):
             return StructColumn(
                 data=None,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f2adf407d2c..d07a8f76205 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -35,7 +35,6 @@
 )
 from cudf.core.column import (
     ColumnBase,
-    IntervalColumn,
     as_column,
 )
 from cudf.core.column.column import concat_columns
@@ -3112,8 +3111,9 @@ def value_counts(
         # Pandas returns an IntervalIndex as the index of res
         # this condition makes sure we do too if bins is given
         if bins is not None and len(res) == len(res.index.categories):
-            interval_col = IntervalColumn.from_struct_column(
-                res.index._column._get_decategorized_column()
+            struct_col = res.index._column._get_decategorized_column()
+            interval_col = struct_col._with_type_metadata(
+                res.index.dtype.categories.dtype
             )
             res.index = cudf.IntervalIndex._from_column(
                 interval_col, name=res.index.name
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index cf12fae4337..c8b645b6d26 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -3777,12 +3777,8 @@ def pytest_unconfigure(config):
     "tests/extension/test_categorical.py::TestCategorical::test_series_constructor_scalar_na_with_index",
     "tests/extension/test_categorical.py::TestCategorical::test_setitem_frame_2d_values",
     "tests/extension/test_categorical.py::TestCategorical::test_to_numpy",
-    "tests/extension/test_categorical.py::TestCategorical::test_unstack[frame-index0]",
-    "tests/extension/test_categorical.py::TestCategorical::test_unstack[frame-index1]",
     "tests/extension/test_categorical.py::TestCategorical::test_unstack[frame-index2]",
     "tests/extension/test_categorical.py::TestCategorical::test_unstack[frame-index3]",
-    "tests/extension/test_categorical.py::TestCategorical::test_unstack[series-index0]",
-    "tests/extension/test_categorical.py::TestCategorical::test_unstack[series-index1]",
     "tests/extension/test_categorical.py::TestCategorical::test_unstack[series-index2]",
     "tests/extension/test_categorical.py::TestCategorical::test_unstack[series-index3]",
     "tests/extension/test_common.py::test_ellipsis_index",
@@ -6021,8 +6017,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_unstack[False]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_unstack[True]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_long_index",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_mixed_extension_types[0]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_mixed_extension_types[1]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_multi_level_cols",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_multi_level_rows_and_cols",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_nan_index2",
@@ -6058,7 +6052,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_unstack_preserve_names[True]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_unstack_wrong_level_name[False-unstack]",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_stack_unstack_wrong_level_name[True-unstack]",
-    "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_categorical_columns",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_preserve_types",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_with_level_has_nan",
     "tests/frame/test_stack_unstack.py::TestStackUnstackMultiLevel::test_unstack_with_missing_int_cast_to_float",

From 161f21d88a4e3b57cafa49585d2572603a0d01e7 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 2 Sep 2025 21:06:45 -0500
Subject: [PATCH 245/366] Bump pandas supported version to `2.3.2` (#19856)

This PR updates latest supported pandas version in `cudf`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19856
---
 dependencies.yaml                      | 2 +-
 python/cudf/cudf/core/_compat.py       | 2 +-
 python/pylibcudf/tests/common/utils.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index de0115a9e6d..73d6900d8f1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -790,7 +790,7 @@ dependencies:
               - numba-cuda==0.19.1
           - matrix: {dependencies: "latest"}
             packages:
-              - pandas==2.3.1
+              - pandas==2.3.2
           - matrix:
             packages:
       - output_types: conda
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index d17ea599f71..9b6f3c6ad67 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -3,7 +3,7 @@
 import pandas as pd
 from packaging import version
 
-PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.3.1")
+PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.3.2")
 PANDAS_VERSION = version.parse(pd.__version__)
 
 
diff --git a/python/pylibcudf/tests/common/utils.py b/python/pylibcudf/tests/common/utils.py
index 1ec0afed6cb..6302d59563b 100644
--- a/python/pylibcudf/tests/common/utils.py
+++ b/python/pylibcudf/tests/common/utils.py
@@ -338,11 +338,11 @@ def sink_to_str(sink):
     for comparison
     """
     if isinstance(sink, (str, os.PathLike)):
-        with open(sink, "r") as f:
+        with open(sink, "r", encoding="utf-8") as f:
             str_result = f.read()
     elif isinstance(sink, io.BytesIO):
         sink.seek(0)
-        str_result = sink.read().decode()
+        str_result = sink.read().decode("utf-8")
     else:
         sink.seek(0)
         str_result = sink.read()

From 839e03952de15eb52d366f554d166c210ad906d2 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Wed, 3 Sep 2025 11:10:49 -0700
Subject: [PATCH 246/366] Migrate mixed join to use multiset (#19660)

This PR migrates mixed join to use the new `cuco::static_multiset` data structure and fixes several small issues in the mixed semi join code. This is the last PR to close #12261.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19660
---
 cpp/include/cudf/join/mixed_join.hpp          | 127 ++---
 cpp/src/join/hash_join.cu                     |  15 +-
 cpp/src/join/join_common_utils.cuh            |  77 ++-
 cpp/src/join/join_common_utils.hpp            |  12 +-
 cpp/src/join/mixed_join.cu                    | 459 +++++++++---------
 cpp/src/join/mixed_join_common_utils.cuh      |  97 ++--
 cpp/src/join/mixed_join_kernel.cu             |  29 +-
 cpp/src/join/mixed_join_kernel.cuh            | 296 ++++++++---
 cpp/src/join/mixed_join_kernel.hpp            |  79 ++-
 cpp/src/join/mixed_join_kernel_nulls.cu       |  29 +-
 cpp/src/join/mixed_join_kernels_semi.cu       |   5 +-
 cpp/src/join/mixed_join_semi.cu               |   2 +-
 cpp/src/join/mixed_join_size_kernel.cu        |  21 +-
 cpp/src/join/mixed_join_size_kernel.cuh       | 181 ++++---
 cpp/src/join/mixed_join_size_kernel.hpp       |  63 ++-
 cpp/src/join/mixed_join_size_kernel_nulls.cu  |  22 +-
 cpp/tests/join/mixed_join_tests.cu            |  65 ++-
 .../java/ai/rapids/cudf/MixedJoinSize.java    |  43 --
 java/src/main/java/ai/rapids/cudf/Table.java  | 114 ++---
 java/src/main/native/src/TableJni.cpp         | 157 +++---
 .../test/java/ai/rapids/cudf/TableTest.java   |  40 +-
 python/pylibcudf/pylibcudf/join.pyx           | 106 +++-
 python/pylibcudf/pylibcudf/libcudf/join.pxd   |  27 +-
 23 files changed, 1121 insertions(+), 945 deletions(-)
 delete mode 100644 java/src/main/java/ai/rapids/cudf/MixedJoinSize.java

diff --git a/cpp/include/cudf/join/mixed_join.hpp b/cpp/include/cudf/join/mixed_join.hpp
index 1d7261eebb3..8f17b0ebbc7 100644
--- a/cpp/include/cudf/join/mixed_join.hpp
+++ b/cpp/include/cudf/join/mixed_join.hpp
@@ -53,7 +53,7 @@ namespace CUDF_EXPORT cudf {
  * responsibility to choose a suitable compare_nulls value AND use appropriate
  * null-safe operators in the expression.
  *
- * If the provided output size or per-row counts are incorrect, behavior is undefined.
+ * If the specified output size is less than the actual output size, the behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -76,8 +76,7 @@ namespace CUDF_EXPORT cudf {
  * @param right_conditional The right table used for the conditional join
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
- * @param output_size_data An optional pair of values indicating the exact output size and the
- * number of matches for each row in the larger of the two input tables, left or right (may be
+ * @param output_size An optional value indicating the exact output size (may be
  * precomputed using the corresponding mixed_inner_join_size API).
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
@@ -87,16 +86,15 @@ namespace CUDF_EXPORT cudf {
  */
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_inner_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls = null_equality::EQUAL,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data = {},
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+mixed_inner_join(table_view const& left_equality,
+                 table_view const& right_equality,
+                 table_view const& left_conditional,
+                 table_view const& right_conditional,
+                 ast::expression const& binary_predicate,
+                 null_equality compare_nulls            = null_equality::EQUAL,
+                 std::optional<std::size_t> output_size = {},
+                 rmm::cuda_stream_view stream           = cudf::get_default_stream(),
+                 rmm::device_async_resource_ref mr      = cudf::get_current_device_resource_ref());
 
 /**
  * @brief Returns a pair of row index vectors corresponding to all pairs of
@@ -115,7 +113,7 @@ mixed_inner_join(
  * responsibility to choose a suitable compare_nulls value AND use appropriate
  * null-safe operators in the expression.
  *
- * If the provided output size or per-row counts are incorrect, behavior is undefined.
+ * If the specified output size is less than the actual output size, the behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -138,8 +136,7 @@ mixed_inner_join(
  * @param right_conditional The right table used for the conditional join
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
- * @param output_size_data An optional pair of values indicating the exact output size and the
- * number of matches for each row in the larger of the two input tables, left or right (may be
+ * @param output_size An optional value indicating the exact output size (may be
  * precomputed using the corresponding mixed_left_join_size API).
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
@@ -149,16 +146,15 @@ mixed_inner_join(
  */
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_left_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls = null_equality::EQUAL,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data = {},
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+mixed_left_join(table_view const& left_equality,
+                table_view const& right_equality,
+                table_view const& left_conditional,
+                table_view const& right_conditional,
+                ast::expression const& binary_predicate,
+                null_equality compare_nulls            = null_equality::EQUAL,
+                std::optional<std::size_t> output_size = {},
+                rmm::cuda_stream_view stream           = cudf::get_default_stream(),
+                rmm::device_async_resource_ref mr      = cudf::get_current_device_resource_ref());
 
 /**
  * @brief Returns a pair of row index vectors corresponding to all pairs of
@@ -177,7 +173,7 @@ mixed_left_join(
  * responsibility to choose a suitable compare_nulls value AND use appropriate
  * null-safe operators in the expression.
  *
- * If the provided output size or per-row counts are incorrect, behavior is undefined.
+ * If the specified output size is less than the actual output size, the behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -200,9 +196,7 @@ mixed_left_join(
  * @param right_conditional The right table used for the conditional join
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
- * @param output_size_data An optional pair of values indicating the exact output size and the
- * number of matches for each row in the larger of the two input tables, left or right (may be
- * precomputed using the corresponding mixed_full_join_size API).
+ * @param output_size An optional value indicating the exact output size
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
@@ -211,16 +205,15 @@ mixed_left_join(
  */
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_full_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls = null_equality::EQUAL,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data = {},
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+mixed_full_join(table_view const& left_equality,
+                table_view const& right_equality,
+                table_view const& left_conditional,
+                table_view const& right_conditional,
+                ast::expression const& binary_predicate,
+                null_equality compare_nulls            = null_equality::EQUAL,
+                std::optional<std::size_t> output_size = {},
+                rmm::cuda_stream_view stream           = cudf::get_default_stream(),
+                rmm::device_async_resource_ref mr      = cudf::get_current_device_resource_ref());
 
 /**
  * @brief Returns an index vector corresponding to all rows in the left tables
@@ -232,7 +225,6 @@ mixed_full_join(
  * choose a suitable compare_nulls value AND use appropriate null-safe
  * operators in the expression.
  *
- * If the provided output size or per-row counts are incorrect, behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -258,8 +250,7 @@ mixed_full_join(
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
- * the result of performing a mixed full join between the four input tables.
+ * @return A vector of indices from the left table that have matches in the right table.
  */
 std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
   table_view const& left_equality,
@@ -282,7 +273,6 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
  * choose a suitable compare_nulls value AND use appropriate null-safe
  * operators in the expression.
  *
- * If the provided output size or per-row counts are incorrect, behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -308,8 +298,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
- * the result of performing a mixed full join between the four input tables.
+ * @return A vector of indices from the left table that do not have matches in the right table.
  */
 std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
   table_view const& left_equality,
@@ -345,23 +334,16 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
  * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair containing the size that would result from performing the
- * requested join and the number of matches for each row in one of the two
- * tables. Which of the two tables is an implementation detail and should not
- * be relied upon, simply passed to the corresponding `mixed_inner_join` API as
- * is.
+ * @return The size that would result from performing the requested join.
  */
-std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_inner_join_size(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls       = null_equality::EQUAL,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+std::size_t mixed_inner_join_size(table_view const& left_equality,
+                                  table_view const& right_equality,
+                                  table_view const& left_conditional,
+                                  table_view const& right_conditional,
+                                  ast::expression const& binary_predicate,
+                                  null_equality compare_nulls  = null_equality::EQUAL,
+                                  rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Returns the exact number of matches (rows) when performing a
@@ -387,23 +369,16 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_in
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
  * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair containing the size that would result from performing the
- * requested join and the number of matches for each row in one of the two
- * tables. Which of the two tables is an implementation detail and should not
- * be relied upon, simply passed to the corresponding `mixed_left_join` API as
- * is.
+ * @return The size that would result from performing the requested join.
  */
-std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_join_size(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls       = null_equality::EQUAL,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+std::size_t mixed_left_join_size(table_view const& left_equality,
+                                 table_view const& right_equality,
+                                 table_view const& left_conditional,
+                                 table_view const& right_conditional,
+                                 ast::expression const& binary_predicate,
+                                 null_equality compare_nulls  = null_equality::EQUAL,
+                                 rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /** @} */  // end of group
 
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 06220e299de..5a211b91466 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -52,20 +52,7 @@ namespace {
 using hash_table_t = cudf::hash_join::impl_type::hash_table_t;
 
 // Multimap type used for mixed joins. TODO: This is a temporary alias used
-// TODO: `pair_equal` and `pair_fn` to be moved to common utils during
-// mixed-join migration
-template <typename Hasher>
-struct pair_fn {
-  pair_fn(Hasher hash) : _hash{std::move(hash)} {}
-
-  __device__ cuco::pair<hash_value_type, size_type> operator()(size_type i) const noexcept
-  {
-    return cuco::pair{_hash(i), i};
-  }
-
- private:
-  Hasher _hash;
-};
+// TODO: `pair_equal` to be moved to common utils during mixed-join migration
 
 template <typename Equal>
 class pair_equal {
diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh
index 91dab3e67f4..11210e11362 100644
--- a/cpp/src/join/join_common_utils.cuh
+++ b/cpp/src/join/join_common_utils.cuh
@@ -33,43 +33,17 @@
 #include <utility>
 
 namespace cudf::detail {
-/**
- * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value.
- *
- * @param hash The hash value to potentially remap
- * @param sentinel The reserved value
- */
-template <typename H, typename S>
-constexpr auto remap_sentinel_hash(H hash, S sentinel)
-{
-  // Arbitrarily choose hash - 1
-  return (hash == sentinel) ? (hash - 1) : hash;
-}
+template <typename Hasher>
+struct pair_fn {
+  CUDF_HOST_DEVICE pair_fn(Hasher hash) : _hash{std::move(hash)} {}
 
-/**
- * @brief Device functor to create a pair of {hash_value, row_index} for a given row.
- *
- * @tparam T Type of row index, must be convertible to `size_type`.
- * @tparam Hasher The type of internal hasher to compute row hash.
- */
-template <typename Hasher, typename T = size_type>
-class make_pair_function {
- public:
-  CUDF_HOST_DEVICE make_pair_function(Hasher const& hash, hash_value_type const empty_key_sentinel)
-    : _hash{hash}, _empty_key_sentinel{empty_key_sentinel}
-  {
-  }
-
-  __device__ __forceinline__ auto operator()(size_type i) const noexcept
+  __device__ cuco::pair<hash_value_type, size_type> operator()(size_type i) const noexcept
   {
-    // Compute the hash value of row `i`
-    auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel);
-    return cuco::make_pair(row_hash_value, T{i});
+    return cuco::pair{_hash(i), i};
   }
 
  private:
   Hasher _hash;
-  hash_value_type const _empty_key_sentinel;
 };
 
 /**
@@ -165,37 +139,39 @@ get_trivial_left_join_indices(table_view const& left,
  * @param bitmask Bitmask to denote whether a row is valid.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-template <typename MultimapType>
+template <typename HashTable>
 void build_join_hash_table(
   cudf::table_view const& build,
   std::shared_ptr<experimental::row::equality::preprocessed_table> const& preprocessed_build,
-  MultimapType& hash_table,
-  bool has_nulls,
+  HashTable& hash_table,
+  bool has_nested_nulls,
   null_equality nulls_equal,
   [[maybe_unused]] bitmask_type const* bitmask,
   rmm::cuda_stream_view stream)
 {
-  CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty");
-  CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows");
+  CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty", std::invalid_argument);
+  CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows", std::invalid_argument);
 
-  auto const row_hash   = experimental::row::hash::row_hasher{preprocessed_build};
-  auto const hash_build = row_hash.device_hasher(nullate::DYNAMIC{has_nulls});
+  auto insert_rows = [&](auto const& build, auto const& d_hasher) {
+    auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
 
-  auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_build, empty_key_sentinel};
+    if (nulls_equal == cudf::null_equality::EQUAL or not nullable(build)) {
+      hash_table.insert_async(iter, iter + build.num_rows(), stream.value());
+    } else {
+      auto const stencil = thrust::counting_iterator<size_type>{0};
+      auto const pred    = row_is_valid{bitmask};
 
-  auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_func);
+      // insert valid rows
+      hash_table.insert_if_async(iter, iter + build.num_rows(), stencil, pred, stream.value());
+    }
+  };
 
-  size_type const build_table_num_rows{build.num_rows()};
-  if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) {
-    hash_table.insert(iter, iter + build_table_num_rows, stream.value());
-  } else {
-    thrust::counting_iterator<size_type> stencil(0);
-    row_is_valid pred{bitmask};
+  auto const nulls = nullate::DYNAMIC{has_nested_nulls};
 
-    // insert valid rows
-    hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value());
-  }
+  auto const row_hash = experimental::row::hash::row_hasher{preprocessed_build};
+  auto const d_hasher = row_hash.device_hasher(nulls);
+
+  insert_rows(build, d_hasher);
 }
 
 // Convenient alias for a pair of unique pointers to device uvectors.
@@ -262,4 +238,5 @@ struct valid_range {
     return ((index >= start) && (index < stop));
   }
 };
+
 }  // namespace cudf::detail
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index a78b07e6be4..27d3c3afe45 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -20,7 +20,7 @@
 #include <cudf/hashing.hpp>
 #include <cudf/table/table_view.hpp>
 
-#include <cuco/static_multimap.cuh>
+#include <cuco/static_multiset.cuh>
 #include <cuda/atomic>
 
 namespace cudf::detail {
@@ -31,15 +31,5 @@ using pair_type = cuco::pair<hash_value_type, size_type>;
 
 using hash_type = cuco::murmurhash3_32<hash_value_type>;
 
-// Multimap type used for mixed joins. TODO: This is a temporary alias used
-// until the mixed joins are converted to using CGs properly. Right now it's
-// using a cooperative group of size 1.
-using mixed_multimap_type =
-  cuco::static_multimap<hash_value_type,
-                        size_type,
-                        cuda::thread_scope_device,
-                        cudf::detail::cuco_allocator<char>,
-                        cuco::legacy::double_hashing<1, hash_type, hash_type>>;
-
 bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type);
 }  // namespace cudf::detail
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index 2109dc8951d..d48c2fe7873 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -16,6 +16,7 @@
 
 #include "join_common_utils.cuh"
 #include "join_common_utils.hpp"
+#include "mixed_join_common_utils.cuh"
 #include "mixed_join_kernel.hpp"
 #include "mixed_join_size_kernel.hpp"
 
@@ -37,8 +38,9 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/fill.h>
-#include <thrust/scan.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/transform.h>
 
 #include <optional>
 #include <utility>
@@ -46,19 +48,86 @@
 namespace cudf {
 namespace detail {
 
+namespace {
+
+/**
+ * @brief Precompute input pairs and hash indices for mixed join operations
+ *
+ * Precomputes input pairs and hash indices in a single pass to reduce code duplication
+ * between mixed_join and compute_mixed_join_output_size functions.
+ *
+ * Precomputation reduces register pressure in probing kernels by avoiding expensive
+ * on-the-fly calculations of iterator transforms and hash table indices.
+ *
+ * @tparam HashProbe Type of the device hasher for computing probe keys
+ * @param hash_table Hash table for probing
+ * @param hash_probe Device hasher for probe keys
+ * @param probe_table_num_rows Number of rows in probe table
+ * @param stream CUDA stream
+ * @param mr Memory resource
+ * @return Pair of device vectors: precomputed input pairs and hash indices
+ */
+template <typename HashProbe>
+std::pair<rmm::device_uvector<cuco::pair<hash_value_type, size_type>>,
+          rmm::device_uvector<cuda::std::pair<size_type, size_type>>>
+precompute_mixed_join_data(mixed_join_hash_table_t const& hash_table,
+                           HashProbe const& hash_probe,
+                           size_type probe_table_num_rows,
+                           rmm::cuda_stream_view stream,
+                           rmm::device_async_resource_ref mr)
+{
+  auto input_pairs =
+    rmm::device_uvector<cuco::pair<hash_value_type, size_type>>(probe_table_num_rows, stream, mr);
+  auto hash_indices =
+    rmm::device_uvector<cuda::std::pair<size_type, size_type>>(probe_table_num_rows, stream, mr);
+
+  auto const extent                        = hash_table.capacity();
+  auto const probe_hash_fn                 = hash_table.hash_function();
+  static constexpr std::size_t bucket_size = mixed_join_hash_table_t::bucket_size;
+
+  // Functor to pre-compute both input pairs and initial slots and step sizes for double hashing.
+  auto precompute_fn = [=] __device__(size_type i) {
+    auto const probe_key = cuco::pair<hash_value_type, size_type>{hash_probe(i), i};
+
+    // Use the probing scheme's hash functions for proper double hashing
+    auto const hash1_val = cuda::std::get<0>(probe_hash_fn)(probe_key);
+    auto const hash2_val = cuda::std::get<1>(probe_hash_fn)(probe_key);
+
+    // Double hashing logic: initial position and step size
+    auto const init_idx = (hash1_val % (extent / bucket_size)) * bucket_size;
+    auto const step_val =
+      ((hash2_val % (extent / bucket_size - std::size_t{1})) + std::size_t{1}) * bucket_size;
+
+    return cuda::std::pair{
+      probe_key,
+      cuda::std::pair{static_cast<size_type>(init_idx), static_cast<size_type>(step_val)}};
+  };
+
+  // Single transform to fill both arrays using zip iterator
+  thrust::transform(
+    rmm::exec_policy_nosync(stream),
+    thrust::counting_iterator<size_type>(0),
+    thrust::counting_iterator<size_type>(probe_table_num_rows),
+    thrust::make_zip_iterator(thrust::make_tuple(input_pairs.begin(), hash_indices.begin())),
+    precompute_fn);
+
+  return std::make_pair(std::move(input_pairs), std::move(hash_indices));
+}
+
+}  // anonymous namespace
+
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls,
-  join_kind join_type,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> const& output_size_data,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+mixed_join(table_view const& left_equality,
+           table_view const& right_equality,
+           table_view const& left_conditional,
+           table_view const& right_conditional,
+           ast::expression const& binary_predicate,
+           null_equality compare_nulls,
+           join_kind join_type,
+           std::optional<std::size_t> const& output_size,
+           rmm::cuda_stream_view stream,
+           rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
                "The left conditional and equality tables must have the same number of rows.");
@@ -72,10 +141,10 @@ mixed_join(
   auto const left_num_rows{left_conditional.num_rows()};
   auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
 
-  // The "outer" table is the larger of the two tables. The kernels are
-  // launched with one thread per row of the outer table, which also means that
-  // it is the probe table for the hash
-  auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
+  // The "probe" table is the table we iterate over during the join operation.
+  // For performance optimization, we choose the larger table as the probe table.
+  // The kernels are launched with one thread per row of the probe table.
+  auto const probe_table_num_rows{swap_tables ? right_num_rows : left_num_rows};
 
   // We can immediately filter out cases where the right table is empty. In
   // some cases, we return all the rows of the left table with a corresponding
@@ -129,13 +198,17 @@ mixed_join(
   auto probe_view = table_device_view::create(probe, stream);
   auto build_view = table_device_view::create(build, stream);
 
-  // Don't use multimap_type because we want a CG size of 1.
-  mixed_multimap_type hash_table{
-    compute_hash_table_size(build.num_rows()),
-    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-    cuco::empty_value{cudf::detail::JoinNoneValue},
-    stream.value(),
-    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
+  mixed_join_hash_table_t hash_table{
+    cuco::extent{static_cast<std::size_t>(build.num_rows())},
+    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
+    cuco::empty_key{
+      cuco::pair{std::numeric_limits<hash_value_type>::max(), cudf::detail::JoinNoneValue}},
+    {},
+    {},
+    {},
+    {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()},
+    stream.value()};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
@@ -151,24 +224,21 @@ mixed_join(
                         compare_nulls,
                         static_cast<bitmask_type const*>(row_bitmask.data()),
                         stream);
-  auto hash_table_view = hash_table.get_device_view();
+  auto hash_table_storage = cudf::device_span<cuco::pair<hash_value_type, size_type>>{
+    hash_table.data(), hash_table.capacity()};
 
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
   // For inner joins we support optimizing the join by launching one thread for
   // whichever table is larger rather than always using the left table.
-  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
+  detail::grid_1d const config(probe_table_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
   auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
   join_kind const kernel_join_type =
     join_type == join_kind::FULL_JOIN ? join_kind::LEFT_JOIN : join_type;
 
-  // If the join size data was not provided as an input, compute it here.
+  // If the join size was not provided as an input, compute it here.
   std::size_t join_size;
-  // Using an optional because we only need to allocate a new vector if one was
-  // not passed as input, and rmm::device_uvector is not default constructible
-  std::optional<rmm::device_uvector<size_type>> matches_per_row{};
-  device_span<size_type const> matches_per_row_span{};
 
   auto const preprocessed_probe =
     experimental::row::equality::preprocessed_table::create(probe, stream);
@@ -178,50 +248,38 @@ mixed_join(
     cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
-  if (output_size_data.has_value()) {
-    join_size            = output_size_data->first;
-    matches_per_row_span = output_size_data->second;
+  auto [input_pairs, hash_indices] =
+    precompute_mixed_join_data(hash_table, hash_probe, probe_table_num_rows, stream, mr);
+
+  if (output_size.has_value()) {
+    join_size = output_size.value();
   } else {
-    matches_per_row =
-      rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
-    // Note that the view goes out of scope after this else statement, but the
-    // data owned by matches_per_row stays alive so the data pointer is valid.
-    auto mutable_matches_per_row_span = cudf::device_span<size_type>{
-      matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
-    matches_per_row_span = cudf::device_span<size_type const>{
-      matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
     if (has_nulls) {
       join_size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
                                                               *right_conditional_view,
-                                                              *probe_view,
-                                                              *build_view,
-                                                              hash_probe,
-                                                              equality_probe,
                                                               kernel_join_type,
-                                                              hash_table_view,
+                                                              equality_probe,
+                                                              hash_table_storage,
+                                                              input_pairs.data(),
+                                                              hash_indices.data(),
                                                               parser.device_expression_data,
                                                               swap_tables,
-                                                              mutable_matches_per_row_span,
                                                               config,
                                                               shmem_size_per_block,
-                                                              stream,
-                                                              mr);
+                                                              stream);
     } else {
       join_size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
                                                                *right_conditional_view,
-                                                               *probe_view,
-                                                               *build_view,
-                                                               hash_probe,
-                                                               equality_probe,
                                                                kernel_join_type,
-                                                               hash_table_view,
+                                                               equality_probe,
+                                                               hash_table_storage,
+                                                               input_pairs.data(),
+                                                               hash_indices.data(),
                                                                parser.device_expression_data,
                                                                swap_tables,
-                                                               mutable_matches_per_row_span,
                                                                config,
                                                                shmem_size_per_block,
-                                                               stream,
-                                                               mr);
+                                                               stream);
     }
   }
 
@@ -236,14 +294,6 @@ mixed_join(
                      std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
-  // Given the number of matches per row, we need to compute the offsets for insertion.
-  auto join_result_offsets =
-    rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
-  thrust::exclusive_scan(rmm::exec_policy{stream},
-                         matches_per_row_span.begin(),
-                         matches_per_row_span.end(),
-                         join_result_offsets.begin());
-
   auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
   auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
 
@@ -253,16 +303,14 @@ mixed_join(
   if (has_nulls) {
     launch_mixed_join<true>(*left_conditional_view,
                             *right_conditional_view,
-                            *probe_view,
-                            *build_view,
-                            hash_probe,
-                            equality_probe,
                             kernel_join_type,
-                            hash_table_view,
+                            equality_probe,
+                            hash_table_storage,
+                            input_pairs.data(),
+                            hash_indices.data(),
                             join_output_l,
                             join_output_r,
                             parser.device_expression_data,
-                            join_result_offsets.data(),
                             swap_tables,
                             config,
                             shmem_size_per_block,
@@ -270,16 +318,14 @@ mixed_join(
   } else {
     launch_mixed_join<false>(*left_conditional_view,
                              *right_conditional_view,
-                             *probe_view,
-                             *build_view,
-                             hash_probe,
-                             equality_probe,
                              kernel_join_type,
-                             hash_table_view,
+                             equality_probe,
+                             hash_table_storage,
+                             input_pairs.data(),
+                             hash_indices.data(),
                              join_output_l,
                              join_output_r,
                              parser.device_expression_data,
-                             join_result_offsets.data(),
                              swap_tables,
                              config,
                              shmem_size_per_block,
@@ -298,16 +344,14 @@ mixed_join(
   return join_indices;
 }
 
-std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>>
-compute_mixed_join_output_size(table_view const& left_equality,
-                               table_view const& right_equality,
-                               table_view const& left_conditional,
-                               table_view const& right_conditional,
-                               ast::expression const& binary_predicate,
-                               null_equality compare_nulls,
-                               join_kind join_type,
-                               rmm::cuda_stream_view stream,
-                               rmm::device_async_resource_ref mr)
+std::size_t compute_mixed_join_output_size(table_view const& left_equality,
+                                           table_view const& right_equality,
+                                           table_view const& left_conditional,
+                                           table_view const& right_conditional,
+                                           ast::expression const& binary_predicate,
+                                           null_equality compare_nulls,
+                                           join_kind join_type,
+                                           rmm::cuda_stream_view stream)
 {
   // Until we add logic to handle the number of non-matches in the right table,
   // full joins are not supported in this function. Note that this does not
@@ -329,52 +373,37 @@ compute_mixed_join_output_size(table_view const& left_equality,
   auto const left_num_rows{left_conditional.num_rows()};
   auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
 
-  // The "outer" table is the larger of the two tables. The kernels are
-  // launched with one thread per row of the outer table, which also means that
-  // it is the probe table for the hash
-  auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
+  // The "probe" table is the table we iterate over during the join operation.
+  // For performance optimization, we choose the larger table as the probe table.
+  // The kernels are launched with one thread per row of the probe table.
+  auto const probe_table_num_rows{swap_tables ? right_num_rows : left_num_rows};
 
-  auto matches_per_row = std::make_unique<rmm::device_uvector<size_type>>(
-    static_cast<std::size_t>(outer_num_rows), stream, mr);
-  auto matches_per_row_span = cudf::device_span<size_type>{
-    matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
-
-  // We can immediately filter out cases where one table is empty. In
-  // some cases, we return all the rows of the other table with a corresponding
-  // null index for the empty table; in others, we return an empty output.
+  // We can immediately filter out cases where one table is empty.
   if (right_num_rows == 0) {
     switch (join_type) {
-      // Left, left anti, and full all return all the row indices from left
-      // with a corresponding NULL from the right.
-      case join_kind::LEFT_JOIN:
-      case join_kind::FULL_JOIN: {
-        thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1);
-        return {left_num_rows, std::move(matches_per_row)};
+      // Left joins return all the row indices from left with a corresponding NULL from the right.
+      case join_kind::LEFT_JOIN: {
+        return left_num_rows;
       }
-      // Inner and left semi joins return empty output because no matches can exist.
+      // Inner joins return empty output because no matches can exist.
       case join_kind::INNER_JOIN: {
-        thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0);
-        return {0, std::move(matches_per_row)};
+        return 0;
       }
       default: CUDF_FAIL("Invalid join kind."); break;
     }
   } else if (left_num_rows == 0) {
     switch (join_type) {
-      // Left, left anti, left semi, and inner joins all return empty sets.
+      // Left and inner joins all return empty sets.
       case join_kind::LEFT_JOIN:
       case join_kind::INNER_JOIN: {
-        thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0);
-        return {0, std::move(matches_per_row)};
-      }
-      // Full joins need to return the trivial complement.
-      case join_kind::FULL_JOIN: {
-        thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1);
-        return {right_num_rows, std::move(matches_per_row)};
+        return 0;
       }
       default: CUDF_FAIL("Invalid join kind."); break;
     }
   }
 
+  auto mr = cudf::get_current_device_resource_ref();
+
   // If evaluating the expression may produce null outputs we create a nullable
   // output column and follow the null-supporting expression evaluation code
   // path.
@@ -396,19 +425,22 @@ compute_mixed_join_output_size(table_view const& left_equality,
   auto probe_view = table_device_view::create(probe, stream);
   auto build_view = table_device_view::create(build, stream);
 
-  // Don't use multimap_type because we want a CG size of 1.
-  mixed_multimap_type hash_table{
-    compute_hash_table_size(build.num_rows()),
-    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-    cuco::empty_value{cudf::detail::JoinNoneValue},
-    stream.value(),
-    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
+  mixed_join_hash_table_t hash_table{
+    cuco::extent{static_cast<size_t>(build.num_rows())},
+    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
+    cuco::empty_key{
+      cuco::pair{std::numeric_limits<hash_value_type>::max(), cudf::detail::JoinNoneValue}},
+    {},
+    {},
+    {},
+    {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()},
+    stream.value()};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
   // won't be able to support AST conditions for those types anyway.
-  auto const row_bitmask =
-    cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()).first;
+  auto const row_bitmask = cudf::detail::bitmask_and(build, stream, mr).first;
   auto const preprocessed_build =
     experimental::row::equality::preprocessed_table::create(build, stream);
   build_join_hash_table(build,
@@ -418,14 +450,15 @@ compute_mixed_join_output_size(table_view const& left_equality,
                         compare_nulls,
                         static_cast<bitmask_type const*>(row_bitmask.data()),
                         stream);
-  auto hash_table_view = hash_table.get_device_view();
+  auto hash_table_storage = cudf::device_span<cuco::pair<hash_value_type, size_type>>{
+    hash_table.data(), hash_table.capacity()};
 
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
   // For inner joins we support optimizing the join by launching one thread for
   // whichever table is larger rather than always using the left table.
-  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
+  detail::grid_1d const config(probe_table_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
   auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
 
   auto const preprocessed_probe =
@@ -436,60 +469,58 @@ compute_mixed_join_output_size(table_view const& left_equality,
     cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
+  // Precompute input pairs and hash indices using common utility function
+  auto [input_pairs, hash_indices] =
+    precompute_mixed_join_data(hash_table, hash_probe, probe_table_num_rows, stream, mr);
+
   // Determine number of output rows without actually building the output to simply
   // find what the size of the output will be.
-  std::size_t size = 0;
-  if (has_nulls) {
-    size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
-                                                       *right_conditional_view,
-                                                       *probe_view,
-                                                       *build_view,
-                                                       hash_probe,
-                                                       equality_probe,
-                                                       join_type,
-                                                       hash_table_view,
-                                                       parser.device_expression_data,
-                                                       swap_tables,
-                                                       matches_per_row_span,
-                                                       config,
-                                                       shmem_size_per_block,
-                                                       stream,
-                                                       mr);
-  } else {
-    size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
-                                                        *right_conditional_view,
-                                                        *probe_view,
-                                                        *build_view,
-                                                        hash_probe,
-                                                        equality_probe,
-                                                        join_type,
-                                                        hash_table_view,
-                                                        parser.device_expression_data,
-                                                        swap_tables,
-                                                        matches_per_row_span,
-                                                        config,
-                                                        shmem_size_per_block,
-                                                        stream,
-                                                        mr);
-  }
+  std::size_t const size = [&]() {
+    if (has_nulls) {
+      return launch_compute_mixed_join_output_size<true>(*left_conditional_view,
+                                                         *right_conditional_view,
+                                                         join_type,
+                                                         equality_probe,
+                                                         hash_table_storage,
+                                                         input_pairs.data(),
+                                                         hash_indices.data(),
+                                                         parser.device_expression_data,
+                                                         swap_tables,
+                                                         config,
+                                                         shmem_size_per_block,
+                                                         stream);
+    } else {
+      return launch_compute_mixed_join_output_size<false>(*left_conditional_view,
+                                                          *right_conditional_view,
+                                                          join_type,
+                                                          equality_probe,
+                                                          hash_table_storage,
+                                                          input_pairs.data(),
+                                                          hash_indices.data(),
+                                                          parser.device_expression_data,
+                                                          swap_tables,
+                                                          config,
+                                                          shmem_size_per_block,
+                                                          stream);
+    }
+  }();
 
-  return {size, std::move(matches_per_row)};
+  return size;
 }
 
 }  // namespace detail
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_inner_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> const output_size_data,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+mixed_inner_join(table_view const& left_equality,
+                 table_view const& right_equality,
+                 table_view const& left_conditional,
+                 table_view const& right_conditional,
+                 ast::expression const& binary_predicate,
+                 null_equality compare_nulls,
+                 std::optional<std::size_t> const output_size,
+                 rmm::cuda_stream_view stream,
+                 rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::mixed_join(left_equality,
@@ -499,20 +530,18 @@ mixed_inner_join(
                             binary_predicate,
                             compare_nulls,
                             detail::join_kind::INNER_JOIN,
-                            output_size_data,
+                            output_size,
                             stream,
                             mr);
 }
 
-std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_inner_join_size(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+std::size_t mixed_inner_join_size(table_view const& left_equality,
+                                  table_view const& right_equality,
+                                  table_view const& left_conditional,
+                                  table_view const& right_conditional,
+                                  ast::expression const& binary_predicate,
+                                  null_equality compare_nulls,
+                                  rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
   return detail::compute_mixed_join_output_size(left_equality,
@@ -522,22 +551,20 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_in
                                                 binary_predicate,
                                                 compare_nulls,
                                                 detail::join_kind::INNER_JOIN,
-                                                stream,
-                                                mr);
+                                                stream);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_left_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> const output_size_data,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+mixed_left_join(table_view const& left_equality,
+                table_view const& right_equality,
+                table_view const& left_conditional,
+                table_view const& right_conditional,
+                ast::expression const& binary_predicate,
+                null_equality compare_nulls,
+                std::optional<std::size_t> const output_size,
+                rmm::cuda_stream_view stream,
+                rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::mixed_join(left_equality,
@@ -547,20 +574,18 @@ mixed_left_join(
                             binary_predicate,
                             compare_nulls,
                             detail::join_kind::LEFT_JOIN,
-                            output_size_data,
+                            output_size,
                             stream,
                             mr);
 }
 
-std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_join_size(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+std::size_t mixed_left_join_size(table_view const& left_equality,
+                                 table_view const& right_equality,
+                                 table_view const& left_conditional,
+                                 table_view const& right_conditional,
+                                 ast::expression const& binary_predicate,
+                                 null_equality compare_nulls,
+                                 rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
   return detail::compute_mixed_join_output_size(left_equality,
@@ -570,22 +595,20 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_le
                                                 binary_predicate,
                                                 compare_nulls,
                                                 detail::join_kind::LEFT_JOIN,
-                                                stream,
-                                                mr);
+                                                stream);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_full_join(
-  table_view const& left_equality,
-  table_view const& right_equality,
-  table_view const& left_conditional,
-  table_view const& right_conditional,
-  ast::expression const& binary_predicate,
-  null_equality compare_nulls,
-  std::optional<std::pair<std::size_t, device_span<size_type const>>> const output_size_data,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+mixed_full_join(table_view const& left_equality,
+                table_view const& right_equality,
+                table_view const& left_conditional,
+                table_view const& right_conditional,
+                ast::expression const& binary_predicate,
+                null_equality compare_nulls,
+                std::optional<std::size_t> const output_size,
+                rmm::cuda_stream_view stream,
+                rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::mixed_join(left_equality,
@@ -595,7 +618,7 @@ mixed_full_join(
                             binary_predicate,
                             compare_nulls,
                             detail::join_kind::FULL_JOIN,
-                            output_size_data,
+                            output_size,
                             stream,
                             mr);
 }
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
index 4a52cfe098a..cc3babf7e0d 100644
--- a/cpp/src/join/mixed_join_common_utils.cuh
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cub/cub.cuh>
+#include <cuco/static_multiset.cuh>
 #include <cuco/static_set.cuh>
 
 namespace cudf {
@@ -38,6 +39,56 @@ using row_hash =
 using row_equality = cudf::experimental::row::equality::strong_index_comparator_adapter<
   cudf::experimental::row::equality::device_row_comparator<false, cudf::nullate::DYNAMIC>>;
 
+// Comparator that always returns false to ensure all values are inserted (like hash_join)
+struct mixed_join_always_not_equal {
+  __device__ constexpr bool operator()(cuco::pair<hash_value_type, size_type> const&,
+                                       cuco::pair<hash_value_type, size_type> const&) const noexcept
+  {
+    // multiset always insert
+    return false;
+  }
+};
+
+// hasher1 and hasher2 used for double hashing. The first hash is used to determine the initial slot
+// and the second hash is used to determine the step size.
+//
+// For the first hash, we use the row hash value directly so there is no need to hash it again.
+//
+// For the second hash, we hash the row hash value again to determine the step size.
+struct mixed_join_hasher1 {
+  __device__ constexpr hash_value_type operator()(
+    cuco::pair<hash_value_type, size_type> const& key) const noexcept
+  {
+    return key.first;
+  }
+};
+
+struct mixed_join_hasher2 {
+  mixed_join_hasher2(hash_value_type seed) : _hash{seed} {}
+
+  __device__ constexpr hash_value_type operator()(
+    cuco::pair<hash_value_type, size_type> const& key) const noexcept
+  {
+    return _hash(key.first);
+  }
+
+ private:
+  using hash_type = cuco::murmurhash3_32<hash_value_type>;
+  hash_type _hash;
+};
+
+// Hash table type used for mixed joins
+using mixed_join_hash_table_t =
+  cuco::static_multiset<cuco::pair<hash_value_type, size_type>,
+                        cuco::extent<std::size_t>,
+                        cuda::thread_scope_device,
+                        mixed_join_always_not_equal,
+                        cuco::double_hashing<1, mixed_join_hasher1, mixed_join_hasher2>,
+                        cudf::detail::cuco_allocator<char>,
+                        cuco::storage<2>>;
+template <typename Tag>
+using mixed_join_hash_table_ref_t = mixed_join_hash_table_t::ref_type<Tag>;
+
 /**
  * @brief Equality comparator for use with cuco map methods that require expression evaluation.
  *
@@ -77,17 +128,8 @@ template <bool has_nulls>
 struct single_expression_equality : expression_equality<has_nulls> {
   using expression_equality<has_nulls>::expression_equality;
 
-  // The parameters are build/probe rather than left/right because the operator
-  // is called by cuco's kernels with parameters in this order (note that this
-  // is an implementation detail that we should eventually stop relying on by
-  // defining operators with suitable heterogeneous typing). Rather than
-  // converting to left/right semantics, we can operate directly on build/probe
-  // until we get to the expression evaluator, which needs to convert back to
-  // left/right semantics because the conditional expression need not be
-  // commutative.
-  // TODO: The input types should really be size_type.
-  __device__ __forceinline__ bool operator()(hash_value_type const build_row_index,
-                                             hash_value_type const probe_row_index) const noexcept
+  __device__ __forceinline__ bool operator()(size_type const left_index,
+                                             size_type const right_index) const noexcept
   {
     using cudf::experimental::row::lhs_index_type;
     using cudf::experimental::row::rhs_index_type;
@@ -97,12 +139,13 @@ struct single_expression_equality : expression_equality<has_nulls> {
     // 1. The contents of the columns involved in the equality condition are equal.
     // 2. The predicate evaluated on the relevant columns (already encoded in the evaluator)
     // evaluates to true.
-    if (this->equality_probe(lhs_index_type{probe_row_index}, rhs_index_type{build_row_index})) {
-      auto const lrow_idx = this->swap_tables ? build_row_index : probe_row_index;
-      auto const rrow_idx = this->swap_tables ? probe_row_index : build_row_index;
+    if (this->equality_probe(lhs_index_type{left_index}, rhs_index_type{right_index})) {
+      // For the AST evaluator, we need to map back to left/right table semantics
+      auto const left_table_idx  = this->swap_tables ? right_index : left_index;
+      auto const right_table_idx = this->swap_tables ? left_index : right_index;
       this->evaluator.evaluate(output_dest,
-                               static_cast<size_type>(lrow_idx),
-                               static_cast<size_type>(rrow_idx),
+                               static_cast<size_type>(left_table_idx),
+                               static_cast<size_type>(right_table_idx),
                                0,
                                this->thread_intermediate_storage);
       return (output_dest.is_valid() && output_dest.value());
@@ -129,16 +172,8 @@ template <bool has_nulls>
 struct pair_expression_equality : public expression_equality<has_nulls> {
   using expression_equality<has_nulls>::expression_equality;
 
-  // The parameters are build/probe rather than left/right because the operator
-  // is called by cuco's kernels with parameters in this order (note that this
-  // is an implementation detail that we should eventually stop relying on by
-  // defining operators with suitable heterogeneous typing). Rather than
-  // converting to left/right semantics, we can operate directly on build/probe
-  // until we get to the expression evaluator, which needs to convert back to
-  // left/right semantics because the conditional expression need not be
-  // commutative.
-  __device__ __forceinline__ bool operator()(pair_type const& build_row,
-                                             pair_type const& probe_row) const noexcept
+  __device__ __forceinline__ bool operator()(pair_type const& left_row,
+                                             pair_type const& right_row) const noexcept
   {
     using cudf::experimental::row::lhs_index_type;
     using cudf::experimental::row::rhs_index_type;
@@ -149,10 +184,10 @@ struct pair_expression_equality : public expression_equality<has_nulls> {
     // 2. The contents of the columns involved in the equality condition are equal.
     // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator)
     // evaluates to true.
-    if ((probe_row.first == build_row.first) &&
-        this->equality_probe(lhs_index_type{probe_row.second}, rhs_index_type{build_row.second})) {
-      auto const lrow_idx = this->swap_tables ? build_row.second : probe_row.second;
-      auto const rrow_idx = this->swap_tables ? probe_row.second : build_row.second;
+    if ((left_row.first == right_row.first) &&
+        this->equality_probe(lhs_index_type{left_row.second}, rhs_index_type{right_row.second})) {
+      auto const lrow_idx = this->swap_tables ? right_row.second : left_row.second;
+      auto const rrow_idx = this->swap_tables ? left_row.second : right_row.second;
       this->evaluator.evaluate(
         output_dest, lrow_idx, rrow_idx, 0, this->thread_intermediate_storage);
       return (output_dest.is_valid() && output_dest.value());
diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu
index cd4016837cc..307a7cb5035 100644
--- a/cpp/src/join/mixed_join_kernel.cu
+++ b/cpp/src/join/mixed_join_kernel.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,27 +17,22 @@
 #include "mixed_join_kernel.cuh"
 #include "mixed_join_kernel.hpp"
 
-namespace cudf {
-namespace detail {
+namespace cudf::detail {
 
 template void launch_mixed_join<false>(
   table_device_view left_table,
   table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  size_type* join_output_l,
-  size_type* join_output_r,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  cudf::size_type* join_output_l,
+  cudf::size_type* join_output_r,
   cudf::ast::detail::expression_device_view device_expression_data,
-  cudf::size_type const* join_result_offsets,
-  bool const swap_tables,
-  detail::grid_1d const config,
+  bool swap_tables,
+  detail::grid_1d const& config,
   int64_t shmem_size_per_block,
   rmm::cuda_stream_view stream);
 
-}  // namespace detail
-
-}  // namespace cudf
+}  // namespace cudf::detail
diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
index 830c783f2e1..52659b316c5 100644
--- a/cpp/src/join/mixed_join_kernel.cuh
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -26,40 +26,185 @@
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/table/table_device_view.cuh>
+#include <cudf/types.hpp>
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <cooperative_groups.h>
-#include <cub/cub.cuh>
-#include <thrust/iterator/discard_iterator.h>
+#include <cuda/atomic>
 
-namespace cudf {
-namespace detail {
+namespace cudf::detail {
 
-namespace cg = cooperative_groups;
+/**
+ * @brief Optimized standalone retrieve implementation for hash table probing
+ *
+ * This implementation uses precomputed hash indices and storage references
+ * for efficient mixed join operations with minimal overhead.
+ *
+ * @tparam is_outer Boolean flag indicating whether outer join semantics should be used
+ */
+template <bool is_outer, bool has_nulls>
+__device__ __forceinline__ void retrieve(
+  cooperative_groups::thread_block const& block,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  pair_expression_equality<has_nulls> const& key_equal,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_probe_begin,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_probe_end,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* input_hash_begin,
+  cudf::size_type* output_probe,
+  cudf::size_type* output_match,
+  cuda::atomic<size_t, cuda::thread_scope_device>& atomic_counter) noexcept
+{
+  static constexpr auto bucket_size = 2;
+  static constexpr auto block_size  = DEFAULT_JOIN_BLOCK_SIZE;
+  namespace cg                      = cooperative_groups;
+
+  auto const n = cuda::std::distance(input_probe_begin, input_probe_end);
+
+  // Use warps to efficiently flush shared memory buffers when they get full
+  // Each warp manages its own buffer segment to avoid conflicts
+  auto constexpr num_warps            = block_size / warp_size;
+  auto constexpr max_matches_per_step = warp_size * bucket_size;
+  auto constexpr buffer_size          = max_matches_per_step + warp_size;
+
+  auto const warp    = cg::tiled_partition<warp_size>(block);
+  auto const warp_id = warp.meta_group_rank();
+  auto const stride  = block_size;
+  auto idx           = threadIdx.x;
+
+  __shared__ cudf::size_type probe_output_buffer[num_warps][buffer_size];
+  __shared__ cudf::size_type match_output_buffer[num_warps][buffer_size];
+  __shared__ cudf::size_type warp_counter[num_warps];
+
+  if (warp.thread_rank() == 0) {
+    cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> init_counter_ref{
+      warp_counter[warp_id]};
+    init_counter_ref.store(0, cuda::memory_order_relaxed);
+  }
+  warp.sync();
+
+  auto flush_output_buffer = [&](auto const& warp_group) {
+    size_type offset = 0;
+    auto const count = warp_counter[warp_id];
+    auto const rank  = warp_group.thread_rank();
+    if (rank == 0) { offset = atomic_counter.fetch_add(count, cuda::memory_order_relaxed); }
+    offset = warp_group.shfl(offset, 0);
+
+    for (auto i = rank; i < count; i += warp_group.size()) {
+      *(output_probe + offset + i) = probe_output_buffer[warp_id][i];
+      *(output_match + offset + i) = match_output_buffer[warp_id][i];
+    }
+  };
+
+  while (warp.any(idx < n)) {
+    bool active_flag       = idx < n;
+    auto const active_warp = cg::binary_partition<warp_size>(warp, active_flag);
+
+    if (active_flag) {
+      auto const& probe_key = *(input_probe_begin + idx);
+      auto const& hash_idx  = *(input_hash_begin + idx);
+
+      auto const extent     = hash_table_storage.size();
+      auto current_slot_idx = static_cast<std::size_t>(hash_idx.first);
+      auto const step       = static_cast<std::size_t>(hash_idx.second);
+
+      bool running                      = true;
+      [[maybe_unused]] bool found_match = false;
+
+      while (active_warp.any(running)) {
+        if (running) {
+          auto const bucket_slots = *reinterpret_cast<
+            cuda::std::array<cuco::pair<hash_value_type, cudf::size_type>, 2> const*>(
+            hash_table_storage.data() + current_slot_idx);
+
+          auto const first_slot_is_empty  = bucket_slots[0].second == cudf::detail::JoinNoneValue;
+          auto const second_slot_is_empty = bucket_slots[1].second == cudf::detail::JoinNoneValue;
+          auto const first_equals =
+            (not first_slot_is_empty and key_equal(probe_key, bucket_slots[0]));
+          auto const second_equals =
+            (not second_slot_is_empty and key_equal(probe_key, bucket_slots[1]));
 
-template <cudf::size_type block_size, bool has_nulls>
-CUDF_KERNEL void __launch_bounds__(block_size)
+          if (first_equals or second_equals) {
+            if constexpr (is_outer) { found_match = true; }
+
+            cudf::size_type num_matches = (first_equals ? 1 : 0) + (second_equals ? 1 : 0);
+            cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> counter_ref{
+              warp_counter[warp_id]};
+            cudf::size_type output_idx =
+              counter_ref.fetch_add(num_matches, cuda::memory_order_relaxed);
+
+            auto const probe_row_index = probe_key.second;
+
+            if (first_equals) {
+              probe_output_buffer[warp_id][output_idx] = probe_row_index;
+              match_output_buffer[warp_id][output_idx] = bucket_slots[0].second;
+              if (second_equals) {
+                probe_output_buffer[warp_id][output_idx + 1] = probe_row_index;
+                match_output_buffer[warp_id][output_idx + 1] = bucket_slots[1].second;
+              }
+            } else if (second_equals) {
+              probe_output_buffer[warp_id][output_idx] = probe_row_index;
+              match_output_buffer[warp_id][output_idx] = bucket_slots[1].second;
+            }
+          }
+
+          if (first_slot_is_empty or second_slot_is_empty) {
+            running = false;
+
+            if constexpr (is_outer) {
+              if (not found_match) {
+                cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> counter_ref{
+                  warp_counter[warp_id]};
+                auto const output_idx      = counter_ref.fetch_add(1, cuda::memory_order_relaxed);
+                auto const probe_row_index = probe_key.second;
+                probe_output_buffer[warp_id][output_idx] = probe_row_index;
+                match_output_buffer[warp_id][output_idx] = cudf::detail::JoinNoneValue;
+              }
+            }
+          }
+        }
+
+        active_warp.sync();
+        // Check if warp's shared memory buffer is getting full and needs flushing
+        if (warp_counter[warp_id] > (buffer_size - max_matches_per_step)) {
+          flush_output_buffer(active_warp);
+          active_warp.sync();
+
+          if (active_warp.thread_rank() == 0) { warp_counter[warp_id] = 0; }
+          active_warp.sync();
+        }
+
+        current_slot_idx = (current_slot_idx + step) % extent;
+        if (current_slot_idx == static_cast<std::size_t>(hash_idx.first)) { running = false; }
+      }
+    }
+
+    warp.sync();
+    idx += stride;
+  }
+
+  warp.sync();
+  // Final flush: ensure any remaining buffered matches are written to global memory
+  cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> final_counter_ref{
+    warp_counter[warp_id]};
+  if (final_counter_ref.load(cuda::memory_order_relaxed) > 0) { flush_output_buffer(warp); }
+}
+
+template <bool has_nulls>
+CUDF_KERNEL void __launch_bounds__(DEFAULT_JOIN_BLOCK_SIZE)
   mixed_join(table_device_view left_table,
              table_device_view right_table,
-             table_device_view probe,
-             table_device_view build,
-             row_hash const hash_probe,
-             row_equality const equality_probe,
-             join_kind const join_type,
-             cudf::detail::mixed_multimap_type::device_view hash_table_view,
+             join_kind join_type,
+             row_equality equality_probe,
+             cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+             cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+             cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
              size_type* join_output_l,
              size_type* join_output_r,
              cudf::ast::detail::expression_device_view device_expression_data,
-             cudf::size_type const* join_result_offsets,
-             bool const swap_tables)
+             bool swap_tables)
 {
-  // Normally the casting of a shared memory array is used to create multiple
-  // arrays of different types from the shared memory buffer, but here it is
-  // used to circumvent conflicts between arrays of different types between
-  // different template instantiations due to the extern specifier.
   extern __shared__ char raw_intermediate_storage[];
-  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+  auto intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
   auto thread_intermediate_storage =
     &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
@@ -68,84 +213,75 @@ CUDF_KERNEL void __launch_bounds__(block_size)
   cudf::size_type const right_num_rows = right_table.num_rows();
   auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
 
-  cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size;
+  auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>{
+    left_table, right_table, device_expression_data};
+  auto const equality = pair_expression_equality<has_nulls>{
+    evaluator, thread_intermediate_storage, swap_tables, equality_probe};
 
-  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, device_expression_data);
+  namespace cg = cooperative_groups;
 
-  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_probe, empty_key_sentinel};
+  auto const block = cg::this_thread_block();
+  cuda::atomic<size_t, cuda::thread_scope_device> counter_ref{0};
 
-  if (outer_row_index < outer_num_rows) {
-    // Figure out the number of elements for this key.
-    cg::thread_block_tile<1> this_thread = cg::this_thread();
-    // Figure out the number of elements for this key.
-    auto query_pair = pair_func(outer_row_index);
-    auto equality   = pair_expression_equality<has_nulls>{
-      evaluator, thread_intermediate_storage, swap_tables, equality_probe};
-
-    auto probe_key_begin       = thrust::make_discard_iterator();
-    auto probe_value_begin     = swap_tables ? join_output_r + join_result_offsets[outer_row_index]
-                                             : join_output_l + join_result_offsets[outer_row_index];
-    auto contained_key_begin   = thrust::make_discard_iterator();
-    auto contained_value_begin = swap_tables ? join_output_l + join_result_offsets[outer_row_index]
-                                             : join_output_r + join_result_offsets[outer_row_index];
+  auto const block_begin_offset = block.group_index().x * DEFAULT_JOIN_BLOCK_SIZE;
+  auto const block_end_offset   = cuda::std::min(
+    outer_num_rows, static_cast<cudf::size_type>(block_begin_offset + DEFAULT_JOIN_BLOCK_SIZE));
 
+  if (block_begin_offset < block_end_offset) {
     if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) {
-      hash_table_view.pair_retrieve_outer(this_thread,
-                                          query_pair,
-                                          probe_key_begin,
-                                          probe_value_begin,
-                                          contained_key_begin,
-                                          contained_value_begin,
-                                          equality);
+      retrieve<true>(block,
+                     hash_table_storage,
+                     equality,
+                     input_pairs + block_begin_offset,
+                     input_pairs + block_end_offset,
+                     hash_indices + block_begin_offset,
+                     swap_tables ? join_output_r : join_output_l,
+                     swap_tables ? join_output_l : join_output_r,
+                     counter_ref);
     } else {
-      hash_table_view.pair_retrieve(this_thread,
-                                    query_pair,
-                                    probe_key_begin,
-                                    probe_value_begin,
-                                    contained_key_begin,
-                                    contained_value_begin,
-                                    equality);
+      retrieve<false>(block,
+                      hash_table_storage,
+                      equality,
+                      input_pairs + block_begin_offset,
+                      input_pairs + block_end_offset,
+                      hash_indices + block_begin_offset,
+                      swap_tables ? join_output_r : join_output_l,
+                      swap_tables ? join_output_l : join_output_r,
+                      counter_ref);
     }
   }
 }
 
 template <bool has_nulls>
-void launch_mixed_join(table_device_view left_table,
-                       table_device_view right_table,
-                       table_device_view probe,
-                       table_device_view build,
-                       row_hash const hash_probe,
-                       row_equality const equality_probe,
-                       join_kind const join_type,
-                       cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                       size_type* join_output_l,
-                       size_type* join_output_r,
-                       cudf::ast::detail::expression_device_view device_expression_data,
-                       cudf::size_type const* join_result_offsets,
-                       bool const swap_tables,
-                       detail::grid_1d const config,
-                       int64_t shmem_size_per_block,
-                       rmm::cuda_stream_view stream)
+void launch_mixed_join(
+  table_device_view left_table,
+  table_device_view right_table,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  size_type* join_output_l,
+  size_type* join_output_r,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  bool swap_tables,
+  detail::grid_1d const& config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream)
 {
-  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
+  mixed_join<has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,
-      probe,
-      build,
-      hash_probe,
-      equality_probe,
       join_type,
-      hash_table_view,
+      equality_probe,
+      hash_table_storage,
+      input_pairs,
+      hash_indices,
       join_output_l,
       join_output_r,
       device_expression_data,
-      join_result_offsets,
       swap_tables);
 }
 
-}  // namespace detail
-
-}  // namespace cudf
+}  // namespace cudf::detail
diff --git a/cpp/src/join/mixed_join_kernel.hpp b/cpp/src/join/mixed_join_kernel.hpp
index 8187ad03aca..e72d9370db9 100644
--- a/cpp/src/join/mixed_join_kernel.hpp
+++ b/cpp/src/join/mixed_join_kernel.hpp
@@ -16,66 +16,65 @@
 
 #pragma once
 
-#include "join/join_common_utils.hpp"
-#include "join/mixed_join_common_utils.cuh"
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_common_utils.cuh"
 
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/span.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
+
+#include <cuco/pair.cuh>
+
 namespace CUDF_EXPORT cudf {
 namespace detail {
 
 /**
- * @brief Performs a join using the combination of a hash lookup to identify
- * equal rows between one pair of tables and the evaluation of an expression
- * containing an arbitrary expression.
+ * @brief Performs a mixed join using hash lookup and expression evaluation.
  *
  * This method probes the hash table with each row in the probe table using a
  * custom equality comparator that also checks that the conditional expression
  * evaluates to true between the left/right tables when a match is found
  * between probe and build rows.
  *
- * @tparam block_size The number of threads per block for this kernel
  * @tparam has_nulls Whether or not the inputs may contain nulls.
  *
- * @param[in] left_table The left table
- * @param[in] right_table The right table
- * @param[in] probe The table with which to probe the hash table for matches.
- * @param[in] build The table with which the hash table was built.
- * @param[in] hash_probe The hasher used for the probe table.
- * @param[in] equality_probe The equality comparator used when probing the hash table.
- * @param[in] join_type The type of join to be performed
- * @param[in] hash_table_view The hash table built from `build`.
- * @param[out] join_output_l The left result of the join operation
- * @param[out] join_output_r The right result of the join operation
- * @param[in] device_expression_data Container of device data required to evaluate the desired
- * expression.
- * @param[in] join_result_offsets The starting indices in join_output[l|r]
- * where the matches for each row begin. Equivalent to a prefix sum of
- * matches_per_row.
- * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
- * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
+ * @param left_table The left table
+ * @param right_table The right table
+ * @param join_type The type of join to be performed
+ * @param equality_probe The equality comparator used when probing the hash table
+ * @param hash_table_storage The hash table storage for probing operations
+ * @param input_pairs Array of hash-value/row-index pairs for probing
+ * @param hash_indices Array of hash index pairs for efficient lookup
+ * @param join_output_l The left result of the join operation
+ * @param join_output_r The right result of the join operation
+ * @param device_expression_data Container of device data required to evaluate the desired
+ * expression
+ * @param swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows
+ * @param config Grid configuration for kernel launch
+ * @param shmem_size_per_block Shared memory size per block in bytes
+ * @param stream CUDA stream used for device memory operations and kernel launches
  */
 template <bool has_nulls>
-void launch_mixed_join(table_device_view left_table,
-                       table_device_view right_table,
-                       table_device_view probe,
-                       table_device_view build,
-                       row_hash const hash_probe,
-                       row_equality const equality_probe,
-                       join_kind const join_type,
-                       cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                       size_type* join_output_l,
-                       size_type* join_output_r,
-                       cudf::ast::detail::expression_device_view device_expression_data,
-                       cudf::size_type const* join_result_offsets,
-                       bool const swap_tables,
-                       detail::grid_1d const config,
-                       int64_t shmem_size_per_block,
-                       rmm::cuda_stream_view stream);
+void launch_mixed_join(
+  cudf::table_device_view left_table,
+  cudf::table_device_view right_table,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  cudf::size_type* join_output_l,
+  cudf::size_type* join_output_r,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  bool swap_tables,
+  detail::grid_1d const& config,
+  int64_t shmem_size_per_block,
+  rmm::cuda_stream_view stream);
 
 }  // namespace detail
-
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu
index 185aa133f2d..a45781f2588 100644
--- a/cpp/src/join/mixed_join_kernel_nulls.cu
+++ b/cpp/src/join/mixed_join_kernel_nulls.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,27 +17,22 @@
 #include "mixed_join_kernel.cuh"
 #include "mixed_join_kernel.hpp"
 
-namespace cudf {
-namespace detail {
+namespace cudf::detail {
 
 template void launch_mixed_join<true>(
   table_device_view left_table,
   table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  size_type* join_output_l,
-  size_type* join_output_r,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  cudf::size_type* join_output_l,
+  cudf::size_type* join_output_r,
   cudf::ast::detail::expression_device_view device_expression_data,
-  cudf::size_type const* join_result_offsets,
-  bool const swap_tables,
-  detail::grid_1d const config,
+  bool swap_tables,
+  detail::grid_1d const& config,
   int64_t shmem_size_per_block,
   rmm::cuda_stream_view stream);
 
-}  // namespace detail
-
-}  // namespace cudf
+}  // namespace cudf::detail
diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
index 3cf081e5ded..69bf8fa1870 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -51,8 +51,9 @@ CUDF_KERNEL void __launch_bounds__(block_size)
   auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
     left_table, right_table, device_expression_data);
 
-  // Make sure to swap_tables here as hash_set will use probe table as the left one
-  auto constexpr swap_tables = true;
+  // The cuco API passes parameters in the same (left, right) order we use here,
+  // so no swapping needed
+  auto constexpr swap_tables = false;
   auto const equality        = single_expression_equality<has_nulls>{
     evaluator, thread_intermediate_storage, swap_tables, equality_probe};
 
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
index 4926c6b2792..3a19f534f3b 100644
--- a/cpp/src/join/mixed_join_semi.cu
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -123,7 +123,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   auto const preprocessed_probe =
     cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
   auto const row_comparator =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe};
+    cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
   // Create hash table containing all keys found in right table
diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu
index 4882c8769e6..bce24655bae 100644
--- a/cpp/src/join/mixed_join_size_kernel.cu
+++ b/cpp/src/join/mixed_join_size_kernel.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,19 +23,16 @@ namespace detail {
 template std::size_t launch_compute_mixed_join_output_size<false>(
   table_device_view left_table,
   table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
   ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  cudf::device_span<cudf::size_type> matches_per_row,
-  detail::grid_1d const config,
+  bool swap_tables,
+  detail::grid_1d const& config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr);
+  rmm::cuda_stream_view stream);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
index 07d9bee4aff..51426baed2e 100644
--- a/cpp/src/join/mixed_join_size_kernel.cuh
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -29,79 +29,127 @@
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <cooperative_groups.h>
-#include <cub/cub.cuh>
-#include <thrust/iterator/discard_iterator.h>
-
-namespace cudf {
-namespace detail {
-namespace cg = cooperative_groups;
-
-template <int block_size, bool has_nulls>
-CUDF_KERNEL void __launch_bounds__(block_size)
-  compute_mixed_join_output_size(table_device_view left_table,
-                                 table_device_view right_table,
-                                 table_device_view probe,
-                                 table_device_view build,
-                                 row_hash const hash_probe,
-                                 row_equality const equality_probe,
-                                 join_kind const join_type,
-                                 cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                                 ast::detail::expression_device_view device_expression_data,
-                                 bool const swap_tables,
-                                 std::size_t* output_size,
-                                 cudf::device_span<cudf::size_type> matches_per_row)
+#include <cuda/atomic>
+
+namespace cudf::detail {
+
+/**
+ * @brief Standalone count implementation using precomputed hash indices
+ *
+ * This implementation provides essential count functionality for mixed joins
+ * using precomputed probe indices and step sizes.
+ */
+template <bool has_nulls>
+__device__ __forceinline__ auto standalone_count(
+  pair_expression_equality<has_nulls> const& key_equal,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const& probe_key,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const& hash_idx,
+  join_kind join_type) noexcept
+{
+  cudf::size_type count = 0;
+  auto const extent     = hash_table_storage.size();
+  auto const* data      = hash_table_storage.data();
+  auto probe_idx        = static_cast<std::size_t>(hash_idx.first);   // initial probe index
+  auto const step       = static_cast<std::size_t>(hash_idx.second);  // step size
+
+  while (true) {
+    auto const bucket_slots =
+      *reinterpret_cast<cuda::std::array<cuco::pair<hash_value_type, cudf::size_type>, 2> const*>(
+        data + probe_idx);
+
+    // Check for empty slots and key equality
+    auto const first_slot_is_empty  = bucket_slots[0].second == cudf::detail::JoinNoneValue;
+    auto const second_slot_is_empty = bucket_slots[1].second == cudf::detail::JoinNoneValue;
+    auto const first_slot_equals =
+      (not first_slot_is_empty and key_equal(probe_key, bucket_slots[0]));
+    auto const second_slot_equals =
+      (not second_slot_is_empty and key_equal(probe_key, bucket_slots[1]));
+
+    count += (first_slot_equals + second_slot_equals);
+
+    // Exit if we find an empty slot
+    if (first_slot_is_empty or second_slot_is_empty) {
+      // Handle outer join logic: non-matching rows are counted as 1 match
+      if ((join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) && count == 0) {
+        return 1;
+      }
+      return count;
+    }
+
+    // Move to next bucket using precomputed step
+    probe_idx = (probe_idx + step) % extent;
+
+    // Detect full cycle completion
+    if (probe_idx == static_cast<std::size_t>(hash_idx.first)) {
+      // Handle outer join logic: non-matching rows are counted as 1 match
+      if ((join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) && count == 0) {
+        return 1;
+      }
+      return count;
+    }
+  }
+}
+
+template <bool has_nulls>
+CUDF_KERNEL void __launch_bounds__(DEFAULT_JOIN_BLOCK_SIZE) compute_mixed_join_output_size(
+  table_device_view left_table,
+  table_device_view right_table,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  ast::detail::expression_device_view device_expression_data,
+  bool swap_tables,
+  size_t* d_total_count)
 {
   // The (required) extern storage of the shared memory array leads to
   // conflicting declarations between different templates. The easiest
   // workaround is to declare an arbitrary (here char) array type then cast it
   // after the fact to the appropriate type.
   extern __shared__ char raw_intermediate_storage[];
-  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+  auto intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
   auto thread_intermediate_storage =
     intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates);
 
-  std::size_t thread_counter{0};
+  using BlockReduce = cub::BlockReduce<size_t, DEFAULT_JOIN_BLOCK_SIZE>;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+
   auto const start_idx                 = cudf::detail::grid_1d::global_thread_id();
   auto const stride                    = cudf::detail::grid_1d::grid_stride();
   cudf::size_type const left_num_rows  = left_table.num_rows();
   cudf::size_type const right_num_rows = right_table.num_rows();
   auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
 
-  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, device_expression_data);
-
-  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_probe, empty_key_sentinel};
+  auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>{
+    left_table, right_table, device_expression_data};
 
   // Figure out the number of elements for this key.
-  cg::thread_block_tile<1> this_thread = cg::this_thread();
   // TODO: Address asymmetry in operator.
   auto count_equality = pair_expression_equality<has_nulls>{
     evaluator, thread_intermediate_storage, swap_tables, equality_probe};
 
+  // Thread-local count
+  size_t per_thread_count = 0;
+
   for (auto outer_row_index = start_idx; outer_row_index < outer_num_rows;
        outer_row_index += stride) {
-    auto query_pair = pair_func(outer_row_index);
-    if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) {
-      matches_per_row[outer_row_index] =
-        hash_table_view.pair_count_outer(this_thread, query_pair, count_equality);
-    } else {
-      matches_per_row[outer_row_index] =
-        hash_table_view.pair_count(this_thread, query_pair, count_equality);
-    }
-    thread_counter += matches_per_row[outer_row_index];
+    auto const& probe_key = input_pairs[outer_row_index];
+    auto const& hash_idx  = hash_indices[outer_row_index];
+
+    auto match_count =
+      standalone_count(count_equality, hash_table_storage, probe_key, hash_idx, join_type);
+
+    per_thread_count += match_count;
   }
 
-  using BlockReduce = cub::BlockReduce<cudf::size_type, block_size>;
-  __shared__ typename BlockReduce::TempStorage temp_storage;
-  std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter);
+  size_t per_block_count = BlockReduce(temp_storage).Sum(per_thread_count);
 
-  // Add block counter to global counter
   if (threadIdx.x == 0) {
-    cuda::atomic_ref<std::size_t, cuda::thread_scope_device> ref{*output_size};
-    ref.fetch_add(block_counter, cuda::std::memory_order_relaxed);
+    cuda::atomic_ref<size_t, cuda::thread_scope_device> counter_ref(*d_total_count);
+    counter_ref.fetch_add(per_block_count, cuda::memory_order_relaxed);
   }
 }
 
@@ -109,39 +157,34 @@ template <bool has_nulls>
 std::size_t launch_compute_mixed_join_output_size(
   table_device_view left_table,
   table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
   ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  cudf::device_span<cudf::size_type> matches_per_row,
-  detail::grid_1d const config,
+  bool swap_tables,
+  detail::grid_1d const& config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
+  rmm::cuda_stream_view stream)
 {
-  // Allocate storage for the counter used to get the size of the join output
-  cudf::detail::device_scalar<std::size_t> size(0, stream, mr);
+  cudf::detail::device_scalar<std::size_t> d_total_count{
+    0, stream, cudf::get_current_device_resource_ref()};
 
-  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
+  compute_mixed_join_output_size<has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,
-      probe,
-      build,
-      hash_probe,
-      equality_probe,
       join_type,
-      hash_table_view,
+      equality_probe,
+      hash_table_storage,
+      input_pairs,
+      hash_indices,
       device_expression_data,
       swap_tables,
-      size.data(),
-      matches_per_row);
-  return size.value(stream);
+      d_total_count.data());
+
+  return d_total_count.value(stream);
 }
 
-}  // namespace detail
-}  // namespace cudf
+}  // namespace cudf::detail
diff --git a/cpp/src/join/mixed_join_size_kernel.hpp b/cpp/src/join/mixed_join_size_kernel.hpp
index 776229f11c1..d48239a3e13 100644
--- a/cpp/src/join/mixed_join_size_kernel.hpp
+++ b/cpp/src/join/mixed_join_size_kernel.hpp
@@ -46,46 +46,41 @@ namespace detail {
  * evaluates to true between the left/right tables when a match is found
  * between probe and build rows.
  *
- * @tparam block_size The number of threads per block for this kernel
+ * Uses the current device memory resource for internal allocations.
+ *
  * @tparam has_nulls Whether or not the inputs may contain nulls.
  *
- * @param[in] left_table The left table
- * @param[in] right_table The right table
- * @param[in] probe The table with which to probe the hash table for matches.
- * @param[in] build The table with which the hash table was built.
- * @param[in] hash_probe The hasher used for the probe table.
- * @param[in] equality_probe The equality comparator used when probing the hash table.
- * @param[in] join_type The type of join to be performed
- * @param[in] hash_table_view The hash table built from `build`.
- * @param[in] device_expression_data Container of device data required to evaluate the desired
- * expression.
- * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
- * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
- * @param[out] output_size The resulting output size
- * @param[out] matches_per_row The number of matches in one pair of
- * equality/conditional tables for each row in the other pair of tables. If
- * swap_tables is true, matches_per_row corresponds to the right_table,
- * otherwise it corresponds to the left_table. Note that corresponding swap of
- * left/right tables to determine which is the build table and which is the
- * probe table has already happened on the host.
+ * @param left_table The left table
+ * @param right_table The right table
+ * @param join_type The type of join to be performed
+ * @param equality_probe The equality comparator used when probing the hash table
+ * @param hash_table_storage The hash table storage for probing operations
+ * @param input_pairs Array of hash-value/row-index pairs for probing
+ * @param hash_indices Array of hash index pairs for efficient lookup
+ * @param device_expression_data Container of device data required to evaluate the desired
+ * expression
+ * @param swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows
+ * @param config Grid configuration for kernel launch
+ * @param shmem_size_per_block Shared memory size per block in bytes
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return The resulting output size
  */
-
 template <bool has_nulls>
 std::size_t launch_compute_mixed_join_output_size(
-  cudf::table_device_view left_table,
-  cudf::table_device_view right_table,
-  cudf::table_device_view probe,
-  cudf::table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  table_device_view left_table,
+  table_device_view right_table,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
   ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  cudf::device_span<cudf::size_type> matches_per_row,
-  detail::grid_1d const config,
+  bool swap_tables,
+  detail::grid_1d const& config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr);
+  rmm::cuda_stream_view stream);
+
 }  // namespace detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu
index 11f9103da4d..66b0439cd42 100644
--- a/cpp/src/join/mixed_join_size_kernel_nulls.cu
+++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,18 +22,16 @@ namespace detail {
 template std::size_t launch_compute_mixed_join_output_size<true>(
   table_device_view left_table,
   table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_hash const hash_probe,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  join_kind join_type,
+  row_equality equality_probe,
+  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
+  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
+  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
   ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  cudf::device_span<cudf::size_type> matches_per_row,
-  detail::grid_1d const config,
+  bool swap_tables,
+  detail::grid_1d const& config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr);
+  rmm::cuda_stream_view stream);
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu
index ef32d365425..f334aff89e9 100644
--- a/cpp/tests/join/mixed_join_tests.cu
+++ b/cpp/tests/join/mixed_join_tests.cu
@@ -210,19 +210,12 @@ struct MixedJoinPairReturnTest : public MixedJoinTest<T> {
                      std::vector<std::pair<cudf::size_type, cudf::size_type>> expected_outputs,
                      cudf::null_equality compare_nulls = cudf::null_equality::EQUAL)
   {
-    auto [result_size, actual_counts] = this->join_size(
+    auto result_size = this->join_size(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
     EXPECT_TRUE(result_size == expected_outputs.size());
 
-    cudf::test::fixed_width_column_wrapper<cudf::size_type> expected_counts_cw(
-      expected_counts.begin(), expected_counts.end());
-    auto const actual_counts_view =
-      cudf::column_view(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
-                        actual_counts->size(),
-                        actual_counts->data(),
-                        nullptr,
-                        0);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_counts_cw, actual_counts_view);
+    // Since we no longer get per-row counts, we can't verify them.
+    // Instead just verify that the total count matches
 
     auto result = this->join(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
@@ -325,13 +318,12 @@ struct MixedJoinPairReturnTest : public MixedJoinTest<T> {
    * It should be a simply forwarding of arguments to the appropriate cudf
    * mixed join size computation API.
    */
-  virtual std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
-    cudf::table_view left_equality,
-    cudf::table_view right_equality,
-    cudf::table_view left_conditional,
-    cudf::table_view right_conditional,
-    cudf::ast::operation predicate,
-    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0;
+  virtual std::size_t join_size(cudf::table_view left_equality,
+                                cudf::table_view right_equality,
+                                cudf::table_view left_conditional,
+                                cudf::table_view right_conditional,
+                                cudf::ast::operation predicate,
+                                cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0;
 };
 
 /**
@@ -350,13 +342,12 @@ struct MixedInnerJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
   }
 
-  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
-    cudf::table_view left_equality,
-    cudf::table_view right_equality,
-    cudf::table_view left_conditional,
-    cudf::table_view right_conditional,
-    cudf::ast::operation predicate,
-    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  std::size_t join_size(cudf::table_view left_equality,
+                        cudf::table_view right_equality,
+                        cudf::table_view left_conditional,
+                        cudf::table_view right_conditional,
+                        cudf::ast::operation predicate,
+                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
     return cudf::mixed_inner_join_size(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
@@ -541,13 +532,12 @@ struct MixedLeftJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
   }
 
-  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
-    cudf::table_view left_equality,
-    cudf::table_view right_equality,
-    cudf::table_view left_conditional,
-    cudf::table_view right_conditional,
-    cudf::ast::operation predicate,
-    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  std::size_t join_size(cudf::table_view left_equality,
+                        cudf::table_view right_equality,
+                        cudf::table_view left_conditional,
+                        cudf::table_view right_conditional,
+                        cudf::ast::operation predicate,
+                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
     return cudf::mixed_left_join_size(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
@@ -608,13 +598,12 @@ struct MixedFullJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
   }
 
-  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
-    cudf::table_view left_equality,
-    cudf::table_view right_equality,
-    cudf::table_view left_conditional,
-    cudf::table_view right_conditional,
-    cudf::ast::operation predicate,
-    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  std::size_t join_size(cudf::table_view left_equality,
+                        cudf::table_view right_equality,
+                        cudf::table_view left_conditional,
+                        cudf::table_view right_conditional,
+                        cudf::ast::operation predicate,
+                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
     // Full joins don't actually support size calculations, and there's no easy way to spoof it.
     CUDF_FAIL("Size calculation not supported for full joins.");
diff --git a/java/src/main/java/ai/rapids/cudf/MixedJoinSize.java b/java/src/main/java/ai/rapids/cudf/MixedJoinSize.java
deleted file mode 100644
index 811f0b9a0b0..00000000000
--- a/java/src/main/java/ai/rapids/cudf/MixedJoinSize.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package ai.rapids.cudf;
-
-/** This class tracks size information associated with a mixed table join. */
-public final class MixedJoinSize implements AutoCloseable {
-  private final long outputRowCount;
-  // This is in flux, avoid exposing publicly until the dust settles.
-  private ColumnVector matches;
-
-  MixedJoinSize(long outputRowCount, ColumnVector matches) {
-    this.outputRowCount = outputRowCount;
-    this.matches = matches;
-  }
-
-  /** Return the number of output rows that would be generated from the mixed join */
-  public long getOutputRowCount() {
-    return outputRowCount;
-  }
-
-  ColumnVector getMatches() {
-    return matches;
-  }
-
-  @Override
-  public synchronized void close() {
-    matches.close();
-  }
-}
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 422989143c7..229e04754c5 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -736,31 +736,31 @@ private static native long[] conditionalLeftAntiJoinGatherMapWithCount(long left
                                                                          long condition,
                                                                          long rowCount) throws CudfException;
 
-  private static native long[] mixedLeftJoinSize(long leftKeysTable, long rightKeysTable,
-                                                 long leftConditionTable, long rightConditionTable,
-                                                 long condition, boolean compareNullsEqual);
+  private static native long mixedLeftJoinRowCount(long leftKeysTable, long rightKeysTable,
+                                                   long leftConditionTable, long rightConditionTable,
+                                                   long condition, boolean compareNullsEqual);
 
   private static native long[] mixedLeftJoinGatherMaps(long leftKeysTable, long rightKeysTable,
                                                        long leftConditionTable, long rightConditionTable,
                                                        long condition, boolean compareNullsEqual);
 
-  private static native long[] mixedLeftJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
-                                                               long leftConditionTable, long rightConditionTable,
-                                                               long condition, boolean compareNullsEqual,
-                                                               long outputRowCount, long matchesColumnView);
+  private static native long[] mixedLeftJoinGatherMapsWithCount(long leftKeysTable, long rightKeysTable,
+                                                                long leftConditionTable, long rightConditionTable,
+                                                                long condition, boolean compareNullsEqual,
+                                                                long outputRowCount);
 
-  private static native long[] mixedInnerJoinSize(long leftKeysTable, long rightKeysTable,
-                                                  long leftConditionTable, long rightConditionTable,
-                                                  long condition, boolean compareNullsEqual);
+  private static native long mixedInnerJoinRowCount(long leftKeysTable, long rightKeysTable,
+                                                    long leftConditionTable, long rightConditionTable,
+                                                    long condition, boolean compareNullsEqual);
 
   private static native long[] mixedInnerJoinGatherMaps(long leftKeysTable, long rightKeysTable,
                                                         long leftConditionTable, long rightConditionTable,
                                                         long condition, boolean compareNullsEqual);
 
-  private static native long[] mixedInnerJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
-                                                                long leftConditionTable, long rightConditionTable,
-                                                                long condition, boolean compareNullsEqual,
-                                                                long outputRowCount, long matchesColumnView);
+  private static native long[] mixedInnerJoinGatherMapsWithCount(long leftKeysTable, long rightKeysTable,
+                                                                 long leftConditionTable, long rightConditionTable,
+                                                                 long condition, boolean compareNullsEqual,
+                                                                 long outputRowCount);
 
   private static native long[] mixedFullJoinGatherMaps(long leftKeysTable, long rightKeysTable,
                                                        long leftConditionTable, long rightConditionTable,
@@ -3094,31 +3094,26 @@ public GatherMap[] conditionalLeftJoinGatherMaps(Table rightTable,
   }
 
   /**
-   * Computes output size information for a left join between two tables using a mix of equality
-   * and inequality conditions. The entire join condition is assumed to be a logical AND of the
-   * equality condition and inequality condition.
-   * NOTE: It is the responsibility of the caller to close the resulting size information object
-   * or native resources can be leaked!
+   * Computes the number of rows resulting from a left join between two tables using a mix of
+   * equality and inequality conditions. The entire join condition is assumed to be a logical AND
+   * of the equality condition and inequality condition.
+   *
    * @param leftKeys the left table's key columns for the equality condition
    * @param rightKeys the right table's key columns for the equality condition
    * @param leftConditional the left table's columns needed to evaluate the inequality condition
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @return size information for the join
+   * @return row count of the join result
    */
-  public static MixedJoinSize mixedLeftJoinSize(Table leftKeys, Table rightKeys,
-                                                Table leftConditional, Table rightConditional,
-                                                CompiledExpression condition,
-                                                NullEquality nullEquality) {
-    long[] mixedSizeInfo = mixedLeftJoinSize(
-            leftKeys.getNativeView(), rightKeys.getNativeView(),
-            leftConditional.getNativeView(), rightConditional.getNativeView(),
-            condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
-    assert mixedSizeInfo.length == 2;
-    long outputRowCount = mixedSizeInfo[0];
-    long matchesColumnHandle = mixedSizeInfo[1];
-    return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
+  public static long mixedLeftJoinRowCount(Table leftKeys, Table rightKeys,
+                                           Table leftConditional, Table rightConditional,
+                                           CompiledExpression condition,
+                                           NullEquality nullEquality) {
+    return mixedLeftJoinRowCount(
+        leftKeys.getNativeView(), rightKeys.getNativeView(),
+        leftConditional.getNativeView(), rightConditional.getNativeView(),
+        condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
   }
 
   /**
@@ -3159,8 +3154,8 @@ public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKey
    *
    * It is the responsibility of the caller to close the resulting gather map instances.
    *
-   * This interface allows passing the size result from
-   * {@link #mixedLeftJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
+   * This interface allows passing the row count from
+   * {@link #mixedLeftJoinRowCount(Table, Table, Table, Table, CompiledExpression, NullEquality)}
    * when the output size was computed previously.
    *
    * @param leftKeys the left table's key columns for the equality condition
@@ -3169,20 +3164,20 @@ public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKey
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @param joinSize mixed join size result
+   * @param outputRowCount number of output rows in the join result
    * @return left and right table gather maps
    */
   public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKeys,
                                                     Table leftConditional, Table rightConditional,
                                                     CompiledExpression condition,
                                                     NullEquality nullEquality,
-                                                    MixedJoinSize joinSize) {
-    long[] gatherMapData = mixedLeftJoinGatherMapsWithSize(
-            leftKeys.getNativeView(), rightKeys.getNativeView(),
-            leftConditional.getNativeView(), rightConditional.getNativeView(),
-            condition.getNativeHandle(),
-            nullEquality == NullEquality.EQUAL,
-            joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
+                                                    long outputRowCount) {
+    long[] gatherMapData = mixedLeftJoinGatherMapsWithCount(
+        leftKeys.getNativeView(), rightKeys.getNativeView(),
+        leftConditional.getNativeView(), rightConditional.getNativeView(),
+        condition.getNativeHandle(),
+        nullEquality == NullEquality.EQUAL,
+        outputRowCount);
     return buildJoinGatherMaps(gatherMapData);
   }
 
@@ -3361,31 +3356,26 @@ public GatherMap[] conditionalInnerJoinGatherMaps(Table rightTable,
   }
 
   /**
-   * Computes output size information for an inner join between two tables using a mix of equality
-   * and inequality conditions. The entire join condition is assumed to be a logical AND of the
-   * equality condition and inequality condition.
-   * NOTE: It is the responsibility of the caller to close the resulting size information object
-   * or native resources can be leaked!
+   * Computes the number of rows resulting from an inner join between two tables using a mix of
+   * equality and inequality conditions. The entire join condition is assumed to be a logical AND
+   * of the equality condition and inequality condition.
+   *
    * @param leftKeys the left table's key columns for the equality condition
    * @param rightKeys the right table's key columns for the equality condition
    * @param leftConditional the left table's columns needed to evaluate the inequality condition
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @return size information for the join
+   * @return row count of the join result
    */
-  public static MixedJoinSize mixedInnerJoinSize(Table leftKeys, Table rightKeys,
-                                                 Table leftConditional, Table rightConditional,
-                                                 CompiledExpression condition,
-                                                 NullEquality nullEquality) {
-    long[] mixedSizeInfo = mixedInnerJoinSize(
+  public static long mixedInnerJoinRowCount(Table leftKeys, Table rightKeys,
+                                            Table leftConditional, Table rightConditional,
+                                            CompiledExpression condition,
+                                            NullEquality nullEquality) {
+    return mixedInnerJoinRowCount(
         leftKeys.getNativeView(), rightKeys.getNativeView(),
         leftConditional.getNativeView(), rightConditional.getNativeView(),
         condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
-    assert mixedSizeInfo.length == 2;
-    long outputRowCount = mixedSizeInfo[0];
-    long matchesColumnHandle = mixedSizeInfo[1];
-    return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
   }
 
   /**
@@ -3427,7 +3417,7 @@ public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKe
    * It is the responsibility of the caller to close the resulting gather map instances.
    *
    * This interface allows passing the size result from
-   * {@link #mixedInnerJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
+   * {@link #mixedInnerJoinRowCount(Table, Table, Table, Table, CompiledExpression, NullEquality)}
    * when the output size was computed previously.
    *
    * @param leftKeys the left table's key columns for the equality condition
@@ -3436,20 +3426,20 @@ public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKe
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @param joinSize mixed join size result
+   * @param outputRowCount number of output rows in the join result
    * @return left and right table gather maps
    */
   public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKeys,
                                                      Table leftConditional, Table rightConditional,
                                                      CompiledExpression condition,
                                                      NullEquality nullEquality,
-                                                     MixedJoinSize joinSize) {
-    long[] gatherMapData = mixedInnerJoinGatherMapsWithSize(
+                                                     long outputRowCount) {
+    long[] gatherMapData = mixedInnerJoinGatherMapsWithCount(
         leftKeys.getNativeView(), rightKeys.getNativeView(),
         leftConditional.getNativeView(), rightConditional.getNativeView(),
         condition.getNativeHandle(),
         nullEquality == NullEquality.EQUAL,
-        joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
+        outputRowCount);
     return buildJoinGatherMaps(gatherMapData);
   }
 
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 0efc76fc6e9..1dc29a0af2c 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -851,14 +851,14 @@ jlongArray cond_join_gather_single_map(
 }
 
 template <typename T>
-jlongArray mixed_join_size(JNIEnv* env,
-                           jlong j_left_keys,
-                           jlong j_right_keys,
-                           jlong j_left_condition,
-                           jlong j_right_condition,
-                           jlong j_condition,
-                           jboolean j_nulls_equal,
-                           T join_size_func)
+jlong mixed_join_size(JNIEnv* env,
+                      jlong j_left_keys,
+                      jlong j_right_keys,
+                      jlong j_left_condition,
+                      jlong j_right_condition,
+                      jlong j_condition,
+                      jboolean j_nulls_equal,
+                      T join_size_func)
 {
   JNI_NULL_CHECK(env, j_left_keys, "left keys table is null", 0);
   JNI_NULL_CHECK(env, j_right_keys, "right keys table is null", 0);
@@ -874,27 +874,14 @@ jlongArray mixed_join_size(JNIEnv* env,
     auto const condition = reinterpret_cast<cudf::jni::ast::compiled_expr const*>(j_condition);
     auto const nulls_equal =
       j_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL;
-    auto [join_size, matches_per_row] = join_size_func(*left_keys,
-                                                       *right_keys,
-                                                       *left_condition,
-                                                       *right_condition,
-                                                       condition->get_top_expression(),
-                                                       nulls_equal);
-    if (matches_per_row->size() > std::numeric_limits<cudf::size_type>::max()) {
-      throw std::runtime_error("Too many values in device buffer to convert into a column");
-    }
-    auto col_size = static_cast<size_type>(matches_per_row->size());
-    auto col_data = matches_per_row->release();
-    cudf::jni::native_jlongArray result(env, 2);
-    result[0] = static_cast<jlong>(join_size);
-    result[1] = ptr_as_jlong(new cudf::column{cudf::data_type{cudf::type_id::INT32},
-                                              col_size,
-                                              std::move(col_data),
-                                              rmm::device_buffer{},
-                                              0});
-    return result.get_jArray();
+    return join_size_func(*left_keys,
+                          *right_keys,
+                          *left_condition,
+                          *right_condition,
+                          condition->get_top_expression(),
+                          nulls_equal);
   }
-  CATCH_STD(env, NULL);
+  CATCH_STD(env, 0);
 }
 
 template <typename T>
@@ -967,16 +954,6 @@ jlongArray mixed_join_gather_single_map(JNIEnv* env,
   CATCH_STD(env, NULL);
 }
 
-std::pair<std::size_t, cudf::device_span<cudf::size_type const>> get_mixed_size_info(
-  JNIEnv* env, jlong j_output_row_count, jlong j_matches_view)
-{
-  auto const row_count = static_cast<std::size_t>(j_output_row_count);
-  auto const matches   = reinterpret_cast<cudf::column_view const*>(j_matches_view);
-  return std::make_pair(row_count,
-                        cudf::device_span<cudf::size_type const>(
-                          matches->template data<cudf::size_type>(), matches->size()));
-}
-
 cudf::column_view remove_validity_from_col(cudf::column_view column_view)
 {
   if (!cudf::is_compound(column_view.type())) {
@@ -3050,14 +3027,14 @@ Java_ai_rapids_cudf_Table_conditionalLeftJoinGatherMapsWithCount(JNIEnv* env,
                                           });
 }
 
-JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftJoinSize(JNIEnv* env,
-                                                                         jclass,
-                                                                         jlong j_left_keys,
-                                                                         jlong j_right_keys,
-                                                                         jlong j_left_condition,
-                                                                         jlong j_right_condition,
-                                                                         jlong j_condition,
-                                                                         jboolean j_nulls_equal)
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_mixedLeftJoinRowCount(JNIEnv* env,
+                                                                        jclass,
+                                                                        jlong j_left_keys,
+                                                                        jlong j_right_keys,
+                                                                        jlong j_left_condition,
+                                                                        jlong j_right_condition,
+                                                                        jlong j_condition,
+                                                                        jboolean j_nulls_equal)
 {
   return cudf::jni::mixed_join_size(
     env,
@@ -3108,18 +3085,16 @@ Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMaps(JNIEnv* env,
 }
 
 JNIEXPORT jlongArray JNICALL
-Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMapsWithSize(JNIEnv* env,
-                                                          jclass,
-                                                          jlong j_left_keys,
-                                                          jlong j_right_keys,
-                                                          jlong j_left_condition,
-                                                          jlong j_right_condition,
-                                                          jlong j_condition,
-                                                          jboolean j_nulls_equal,
-                                                          jlong j_output_row_count,
-                                                          jlong j_matches_view)
-{
-  auto size_info = cudf::jni::get_mixed_size_info(env, j_output_row_count, j_matches_view);
+Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMapsWithCount(JNIEnv* env,
+                                                           jclass,
+                                                           jlong j_left_keys,
+                                                           jlong j_right_keys,
+                                                           jlong j_left_condition,
+                                                           jlong j_right_condition,
+                                                           jlong j_condition,
+                                                           jboolean j_nulls_equal,
+                                                           jlong j_output_row_count)
+{
   return cudf::jni::mixed_join_gather_maps(
     env,
     j_left_keys,
@@ -3128,14 +3103,15 @@ Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMapsWithSize(JNIEnv* env,
     j_right_condition,
     j_condition,
     j_nulls_equal,
-    [&size_info](cudf::table_view const& left_keys,
-                 cudf::table_view const& right_keys,
-                 cudf::table_view const& left_condition,
-                 cudf::table_view const& right_condition,
-                 cudf::ast::expression const& condition,
-                 cudf::null_equality nulls_equal) {
+    [row_count = static_cast<std::size_t>(j_output_row_count)](
+      cudf::table_view const& left_keys,
+      cudf::table_view const& right_keys,
+      cudf::table_view const& left_condition,
+      cudf::table_view const& right_condition,
+      cudf::ast::expression const& condition,
+      cudf::null_equality nulls_equal) {
       return cudf::mixed_left_join(
-        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, size_info);
+        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, row_count);
     });
 }
 
@@ -3264,14 +3240,14 @@ Java_ai_rapids_cudf_Table_conditionalInnerJoinGatherMapsWithCount(JNIEnv* env,
                                           });
 }
 
-JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedInnerJoinSize(JNIEnv* env,
-                                                                          jclass,
-                                                                          jlong j_left_keys,
-                                                                          jlong j_right_keys,
-                                                                          jlong j_left_condition,
-                                                                          jlong j_right_condition,
-                                                                          jlong j_condition,
-                                                                          jboolean j_nulls_equal)
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_mixedInnerJoinRowCount(JNIEnv* env,
+                                                                         jclass,
+                                                                         jlong j_left_keys,
+                                                                         jlong j_right_keys,
+                                                                         jlong j_left_condition,
+                                                                         jlong j_right_condition,
+                                                                         jlong j_condition,
+                                                                         jboolean j_nulls_equal)
 {
   return cudf::jni::mixed_join_size(
     env,
@@ -3322,18 +3298,16 @@ Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMaps(JNIEnv* env,
 }
 
 JNIEXPORT jlongArray JNICALL
-Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMapsWithSize(JNIEnv* env,
-                                                           jclass,
-                                                           jlong j_left_keys,
-                                                           jlong j_right_keys,
-                                                           jlong j_left_condition,
-                                                           jlong j_right_condition,
-                                                           jlong j_condition,
-                                                           jboolean j_nulls_equal,
-                                                           jlong j_output_row_count,
-                                                           jlong j_matches_view)
+Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMapsWithCount(JNIEnv* env,
+                                                            jclass,
+                                                            jlong j_left_keys,
+                                                            jlong j_right_keys,
+                                                            jlong j_left_condition,
+                                                            jlong j_right_condition,
+                                                            jlong j_condition,
+                                                            jboolean j_nulls_equal,
+                                                            jlong j_output_row_count)
 {
-  auto size_info = cudf::jni::get_mixed_size_info(env, j_output_row_count, j_matches_view);
   return cudf::jni::mixed_join_gather_maps(
     env,
     j_left_keys,
@@ -3342,14 +3316,15 @@ Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMapsWithSize(JNIEnv* env,
     j_right_condition,
     j_condition,
     j_nulls_equal,
-    [&size_info](cudf::table_view const& left_keys,
-                 cudf::table_view const& right_keys,
-                 cudf::table_view const& left_condition,
-                 cudf::table_view const& right_condition,
-                 cudf::ast::expression const& condition,
-                 cudf::null_equality nulls_equal) {
+    [row_count = static_cast<std::size_t>(j_output_row_count)](
+      cudf::table_view const& left_keys,
+      cudf::table_view const& right_keys,
+      cudf::table_view const& left_condition,
+      cudf::table_view const& right_condition,
+      cudf::ast::expression const& condition,
+      cudf::null_equality nulls_equal) {
       return cudf::mixed_inner_join(
-        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, size_info);
+        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, row_count);
     });
 }
 
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 1289f468002..e539ded8089 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -2464,12 +2464,12 @@ void testMixedLeftJoinGatherMapsWithSize() {
          Table expected = new Table.TestBuilder()
              .column(  0,   1, 2,   3,   4,   5,   6, 7, 8,   9)
              .column(inv, inv, 2, inv, inv, inv, inv, 0, 1, inv)
-             .build();
-         MixedJoinSize sizeInfo = Table.mixedLeftJoinSize(leftKeys, rightKeys, left, right,
-             condition, NullEquality.UNEQUAL)) {
-      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
+             .build()) {
+      long rowCount = Table.mixedLeftJoinRowCount(leftKeys, rightKeys, left, right,
+          condition, NullEquality.UNEQUAL);
+      assertEquals(expected.getRowCount(), rowCount);
       GatherMap[] maps = Table.mixedLeftJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-          NullEquality.UNEQUAL, sizeInfo);
+          NullEquality.UNEQUAL, rowCount);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
@@ -2500,12 +2500,12 @@ void testMixedLeftJoinGatherMapsNullsWithSize() {
          Table expected = new Table.TestBuilder()
              .column(0,   1,   2,   3,   4,   5,   6, 7, 7, 8,   9)
              .column(0, inv, inv, inv, inv, inv, inv, 0, 2, 1, inv)
-             .build();
-         MixedJoinSize sizeInfo = Table.mixedLeftJoinSize(leftKeys, rightKeys, left, right,
-             condition, NullEquality.EQUAL)) {
-      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
+             .build()) {
+      long rowCount = Table.mixedLeftJoinRowCount(leftKeys, rightKeys, left, right,
+          condition, NullEquality.EQUAL);
+      assertEquals(expected.getRowCount(), rowCount);
       GatherMap[] maps = Table.mixedLeftJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-              NullEquality.EQUAL, sizeInfo);
+              NullEquality.EQUAL, rowCount);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
@@ -2979,12 +2979,12 @@ void testMixedInnerJoinGatherMapsWithSize() {
          Table expected = new Table.TestBuilder()
              .column(2, 7, 8)
              .column(2, 0, 1)
-             .build();
-         MixedJoinSize sizeInfo = Table.mixedInnerJoinSize(leftKeys, rightKeys, left, right,
-             condition, NullEquality.UNEQUAL)) {
-      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
+             .build()) {
+      long rowCount = Table.mixedInnerJoinRowCount(leftKeys, rightKeys, left, right,
+          condition, NullEquality.UNEQUAL);
+      assertEquals(expected.getRowCount(), rowCount);
       GatherMap[] maps = Table.mixedInnerJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-          NullEquality.UNEQUAL, sizeInfo);
+          NullEquality.UNEQUAL, rowCount);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
@@ -3014,12 +3014,12 @@ void testMixedInnerJoinGatherMapsNullsWithSize() {
          Table expected = new Table.TestBuilder()
              .column(0, 7, 7, 8)
              .column(0, 0, 2, 1)
-             .build();
-         MixedJoinSize sizeInfo = Table.mixedInnerJoinSize(leftKeys, rightKeys, left, right,
-             condition, NullEquality.EQUAL)) {
-      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
+             .build()) {
+      long rowCount = Table.mixedInnerJoinRowCount(leftKeys, rightKeys, left, right,
+          condition, NullEquality.EQUAL);
+      assertEquals(expected.getRowCount(), rowCount);
       GatherMap[] maps = Table.mixedInnerJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-          NullEquality.EQUAL, sizeInfo);
+          NullEquality.EQUAL, rowCount);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index a9261345db5..f5b8d1ba6d1 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -33,8 +33,10 @@ __all__ = [
     "left_semi_join",
     "mixed_full_join",
     "mixed_inner_join",
+    "mixed_inner_join_size",
     "mixed_left_anti_join",
     "mixed_left_join",
+    "mixed_left_join_size",
     "mixed_left_semi_join",
 ]
 
@@ -516,7 +518,7 @@ cpdef tuple mixed_inner_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
-    cdef cpp_join.output_size_data_type empty_optional
+    cdef optional[size_t] empty_optional
 
     stream = _get_stream(stream)
 
@@ -536,6 +538,55 @@ cpdef tuple mixed_inner_join(
         _column_from_gather_map(move(c_result.second), stream),
     )
 
+cpdef size_t mixed_inner_join_size(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal,
+    Stream stream=None
+):
+    """Get the size of a mixed inner join between two tables.
+
+    For details, see :cpp:func:`mixed_inner_join_size`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    int
+        The number of rows that would be produced by the join.
+    """
+    cdef size_t result
+
+    stream = _get_stream(stream)
+
+    with nogil:
+        result = cpp_join.mixed_inner_join_size(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+            stream.view()
+        )
+    return result
+
 
 cpdef tuple mixed_left_join(
     Table left_keys,
@@ -572,7 +623,7 @@ cpdef tuple mixed_left_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
-    cdef cpp_join.output_size_data_type empty_optional
+    cdef optional[size_t] empty_optional
 
     stream = _get_stream(stream)
 
@@ -592,6 +643,55 @@ cpdef tuple mixed_left_join(
         _column_from_gather_map(move(c_result.second), stream),
     )
 
+cpdef size_t mixed_left_join_size(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal,
+    Stream stream=None
+):
+    """Get the size of a mixed left join between two tables.
+
+    For details, see :cpp:func:`mixed_left_join_size`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    int
+        The number of rows that would be produced by the join.
+    """
+    cdef size_t result
+
+    stream = _get_stream(stream)
+
+    with nogil:
+        result = cpp_join.mixed_left_join_size(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+            stream.view()
+        )
+    return result
+
 
 cpdef tuple mixed_full_join(
     Table left_keys,
@@ -628,7 +728,7 @@ cpdef tuple mixed_full_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
-    cdef cpp_join.output_size_data_type empty_optional
+    cdef optional[size_t] empty_optional
 
     stream = _get_stream(stream)
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 111576ea1d9..0f702d6ea20 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -19,7 +19,6 @@ from pylibcudf.libcudf.utilities.span cimport device_span
 
 ctypedef unique_ptr[device_uvector[size_type]] gather_map_type
 ctypedef pair[gather_map_type, gather_map_type] gather_map_pair_type
-ctypedef optional[pair[size_t, device_span[const size_type]]] output_size_data_type
 
 cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
     cdef gather_map_pair_type inner_join(
@@ -169,7 +168,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        output_size_data_type output_size_data,
+        optional[size_t] output_size,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
@@ -180,7 +179,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        output_size_data_type output_size_data,
+        optional[size_t] output_size,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
@@ -191,7 +190,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        output_size_data_type output_size_data,
+        optional[size_t] output_size,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
@@ -214,3 +213,23 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         null_equality compare_nulls,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
+
+    cdef size_t mixed_inner_join_size(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
+
+    cdef size_t mixed_left_join_size(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler

From 07243acf7b60ed47a218cfa5bd4485524682740e Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 3 Sep 2025 14:32:35 -0500
Subject: [PATCH 247/366] Fix various pandas test failures in `cudf.pandas`
 (#19372)

Closes #19550

This PR:

- Fixes a net of 400 pytest failures
- Enables `data_array_view` for pandas nullable extension types.
- Returns `values_host` for null columns with pandas nullable extension types with `np.nan` for missing values.
- Fixes `distinct_count` and thus `nunique` to properly handle `NA`, `np.nan` for all types.
- Changes `astype`'s `copy` parameter default to `True` from `False`.
- Raises error in `searchsorted` for nullable extension types.
- Disallows conversion of `np.array([nat, nat])` column to `pa.array([null, null])` in `as_column` constructor.
- Fixes return types for many combinations of binary op with pandas nullable extension types.
- Fixes all edge cases not handled in `POW`.
- Disable conversion of float to int column if there are `nan`'s
- Disable allowing `np.nan` for `StringColumn.fillna`
- Update cupy nan methods dict with newly supported nan APIs.
- Update `to_array` and thus `to_numpy` to work with all pandas nullable extension types.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Roeschke (https://github.com/mroeschke)
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - https://github.com/brandon-b-miller
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Matthew Murray (https://github.com/Matt711)
  - Robert Maynard (https://github.com/robertmaynard)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19372
---
 python/cudf/cudf/core/column/column.py        |  70 +-
 python/cudf/cudf/core/column/numerical.py     | 121 ++-
 python/cudf/cudf/core/column/string.py        |  13 +
 python/cudf/cudf/core/dataframe.py            |  67 +-
 python/cudf/cudf/core/frame.py                |  88 ++-
 python/cudf/cudf/core/indexed_frame.py        |  61 +-
 python/cudf/cudf/core/series.py               |  26 +-
 python/cudf/cudf/pandas/_wrappers/pandas.py   |   3 +
 python/cudf/cudf/pandas/fast_slow_proxy.py    |   6 +-
 .../cudf/pandas/scripts/conftest-patch.py     | 708 ++++--------------
 .../cudf/tests/private_objects/test_column.py |   4 +-
 .../cudf/tests/series/methods/test_unique.py  |  18 +-
 python/cudf/cudf/utils/dtypes.py              |  11 +-
 13 files changed, 563 insertions(+), 633 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 091e6a81c32..e43f15f1895 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import decimal
 import pickle
 import warnings
 from collections.abc import Iterable, Iterator, MutableSequence, Sequence
@@ -689,7 +690,12 @@ def data_array_view(
                 raise ValueError(f"Unsupported mode: {mode}")
         else:
             obj = None
-        return cuda.as_cuda_array(obj).view(self.dtype)
+        if cudf.get_option("mode.pandas_compatible"):
+            return cuda.as_cuda_array(obj).view(
+                getattr(self.dtype, "numpy_dtype", self.dtype)
+            )
+        else:
+            return cuda.as_cuda_array(obj).view(self.dtype)
 
     def mask_array_view(
         self, *, mode: Literal["write", "read"] = "write"
@@ -799,6 +805,23 @@ def values_host(self) -> np.ndarray:
         if len(self) == 0:
             return np.array([], dtype=self.dtype)
 
+        if (
+            cudf.get_option("mode.pandas_compatible")
+            and is_pandas_nullable_extension_dtype(self.dtype)
+            and self.dtype.kind in "iuf"
+            and self.has_nulls()
+        ):
+            col = self.astype(
+                np.dtype("float32")
+                if getattr(self.dtype, "numpy_dtype", self.dtype)
+                == np.dtype("float32")
+                else np.dtype("float64")
+            )
+            col = col.fillna(np.nan)
+            with acquire_spill_lock():
+                res = col.data_array_view(mode="read").copy_to_host()
+            return res
+
         if self.has_nulls():
             raise ValueError("Column must have no nulls.")
 
@@ -1786,7 +1809,9 @@ def distinct_count(self, dropna: bool = True) -> int:
                     plc.types.NullPolicy.EXCLUDE
                     if dropna
                     else plc.types.NullPolicy.INCLUDE,
-                    plc.types.NanPolicy.NAN_IS_VALID,
+                    plc.types.NanPolicy.NAN_IS_NULL
+                    if dropna
+                    else plc.types.NanPolicy.NAN_IS_VALID,
                 )
             self._distinct_count[dropna] = result
             return self._distinct_count[dropna]
@@ -1810,7 +1835,7 @@ def cast(self, dtype: Dtype) -> ColumnBase:
             result._dtype = dtype
         return result
 
-    def astype(self, dtype: DtypeObj, copy: bool = False) -> ColumnBase:
+    def astype(self, dtype: DtypeObj, copy: bool | None = False) -> ColumnBase:
         if self.dtype == dtype:
             result = self
         elif len(self) == 0:
@@ -1844,7 +1869,7 @@ def astype(self, dtype: DtypeObj, copy: bool = False) -> ColumnBase:
                 result = self.as_numerical_column(dtype)
 
         if copy and result is self:
-            return result.copy()
+            return result.copy(deep=copy)
         return result
 
     def as_categorical_column(
@@ -1963,6 +1988,13 @@ def searchsorted(
             raise ValueError(
                 "Column searchsorted expects values to be column of same dtype"
             )
+        if is_pandas_nullable_extension_dtype(self.dtype) and self.has_nulls(
+            include_nan=True
+        ):
+            raise ValueError(
+                "searchsorted requires array to be sorted, which is impossible "
+                "with NAs present."
+            )
         return ColumnBase.from_pylibcudf(
             sorting.search_sorted(  # type: ignore[return-value]
                 [self],
@@ -2233,14 +2265,18 @@ def _return_sentinel_column():
     def copy_if_else(
         self, other: Self | plc.Scalar, boolean_mask: NumericalColumn
     ) -> Self:
-        return type(self).from_pylibcudf(  # type: ignore[return-value]
-            plc.copying.copy_if_else(
-                self.to_pylibcudf(mode="read"),
-                other
-                if isinstance(other, plc.Scalar)
-                else other.to_pylibcudf(mode="read"),
-                boolean_mask.to_pylibcudf(mode="read"),
+        return (
+            type(self)
+            .from_pylibcudf(  # type: ignore[return-value]
+                plc.copying.copy_if_else(
+                    self.to_pylibcudf(mode="read"),
+                    other
+                    if isinstance(other, plc.Scalar)
+                    else other.to_pylibcudf(mode="read"),
+                    boolean_mask.to_pylibcudf(mode="read"),
+                )
             )
+            ._with_type_metadata(self.dtype)
         )
 
     def split_by_offsets(
@@ -2490,11 +2526,17 @@ def where(
 def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
     """Check if an object dtype Series or array contains NaN."""
     return any(
-        isinstance(x, (float, np.floating)) and np.isnan(x)
+        (isinstance(x, (float, np.floating)) and np.isnan(x))
+        or (isinstance(x, decimal.Decimal) and x.is_nan())
         for x in np.asarray(arbitrary)
     )
 
 
+def _has_any_nat(arbitrary: pd.Series | np.ndarray) -> bool:
+    """Check if an object dtype Series or array contains NaT."""
+    return any(x is pd.NaT for x in np.asarray(arbitrary))
+
+
 def column_empty(
     row_count: int,
     dtype: DtypeObj = CUDF_STRING_DTYPE,
@@ -2993,7 +3035,7 @@ def as_column(
             elif (
                 nan_as_null is False
                 and inferred_dtype not in ("decimal", "empty")
-                and _has_any_nan(arbitrary)
+                and (_has_any_nan(arbitrary) or _has_any_nat(arbitrary))
             ):
                 # Decimal can hold float("nan")
                 # All np.nan is not restricted by type
@@ -3433,4 +3475,4 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
             plc.concatenate.concatenate(
                 [col.to_pylibcudf(mode="read") for col in objs_with_len]
             )
-        )
+        )._with_type_metadata(objs_with_len[0].dtype)  # type: ignore[return-value]
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 2292b92e68e..1faa75b8881 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -7,6 +7,7 @@
 
 import cupy as cp
 import numpy as np
+import pandas as pd
 import pyarrow as pa
 from typing_extensions import Self
 
@@ -211,12 +212,30 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             np.uint64: np.float64,
             np.bool_: np.float32,
         }
+        if cudf.get_option("mode.pandas_compatible"):
+            int_float_dtype_mapping = {
+                np.int8: np.float64,
+                np.int16: np.float64,
+                np.int32: np.float64,
+                np.int64: np.float64,
+                np.uint8: np.float64,
+                np.uint16: np.float64,
+                np.uint32: np.float64,
+                np.uint64: np.float64,
+                np.bool_: np.float64,
+            }
 
         out_dtype = None
         if op in {"__truediv__", "__rtruediv__"}:
             # Division with integer types results in a suitable float.
-            if truediv_type := int_float_dtype_mapping.get(self.dtype.type):
-                return self.astype(np.dtype(truediv_type))._binaryop(other, op)
+            if truediv_type := int_float_dtype_mapping.get(
+                self.dtype.numpy_dtype.type
+                if is_pandas_nullable_extension_dtype(self.dtype)
+                else self.dtype.type
+            ):
+                return self.astype(
+                    get_dtype_of_same_kind(self.dtype, np.dtype(truediv_type))
+                )._binaryop(other, op)
         elif op in {
             "__lt__",
             "__gt__",
@@ -279,7 +298,11 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                     f"{self.dtype.type.__name__} and "
                     f"{other_cudf_dtype.type.__name__}"
                 )
-            if self.dtype.kind == "b" or other_cudf_dtype.kind == "b":
+            if self.dtype.kind == "b" and other_cudf_dtype.kind == "b":
+                out_dtype = get_dtype_of_same_kind(
+                    self.dtype, np.dtype(np.bool_)
+                )
+            elif self.dtype.kind == "b" or other_cudf_dtype.kind == "b":
                 out_dtype = get_dtype_of_same_kind(
                     out_dtype, np.dtype(np.bool_)
                 )
@@ -291,13 +314,53 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
         ):
             op = "INT_POW"
 
+        lhs_dtype, rhs_dtype = (
+            (other_cudf_dtype, self.dtype)
+            if reflect
+            else (self.dtype, other_cudf_dtype)
+        )
         lhs, rhs = (other, self) if reflect else (self, other)
-
+        if out_dtype.kind == "f" and is_pandas_nullable_extension_dtype(
+            out_dtype
+        ):
+            if (
+                not is_pandas_nullable_extension_dtype(lhs_dtype)
+                and lhs_dtype.kind == "f"
+                and isinstance(lhs, NumericalColumn)
+            ):
+                lhs = lhs.nans_to_nulls()
+            if (
+                not is_pandas_nullable_extension_dtype(rhs_dtype)
+                and rhs_dtype.kind == "f"
+                and isinstance(rhs, NumericalColumn)
+            ):
+                rhs = rhs.nans_to_nulls()
         if isinstance(lhs, pa.Scalar):
             lhs = pa_scalar_to_plc_scalar(lhs)
         elif isinstance(rhs, pa.Scalar):
             rhs = pa_scalar_to_plc_scalar(rhs)
-        return binaryop.binaryop(lhs, rhs, op, out_dtype)
+
+        res = binaryop.binaryop(lhs, rhs, op, out_dtype)
+        if (
+            is_pandas_nullable_extension_dtype(out_dtype)
+            and out_dtype.kind == "f"
+        ):
+            # If the output dtype is a pandas nullable extension type,
+            # we need to ensure that the result is a NumericalColumn.
+            res = res.nans_to_nulls()
+        if op in {"__mod__", "__floordiv__"} and tmp_dtype.kind == "b":
+            res = res.astype(
+                get_dtype_of_same_kind(out_dtype, np.dtype(np.int8))
+            )
+        elif op == "INT_POW" and res.null_count:
+            if (
+                isinstance(lhs, plc.Scalar)
+                and lhs.to_py() == 1
+                and isinstance(rhs, ColumnBase)
+                and rhs.null_count > 0
+            ):
+                res = res.fillna(lhs.to_py())
+        return res
 
     def nans_to_nulls(self: Self) -> Self:
         # Only floats can contain nan.
@@ -339,8 +402,20 @@ def _normalize_binop_operand(self, other: Any) -> pa.Scalar | ColumnBase:
             #   => np.int64
             # np.promote_types(np.asarray([0], dtype=np.int64).dtype, np.uint8)
             #   => np.int64
-            common_dtype = np.result_type(self.dtype, other)  # noqa: TID251
-            if common_dtype.kind in {"b", "i", "u", "f"}:
+            if is_pandas_nullable_extension_dtype(self.dtype):
+                if isinstance(self.dtype, pd.ArrowDtype):
+                    common_dtype = cudf.utils.dtypes.find_common_type(
+                        [self.dtype, other]
+                    )
+                else:
+                    common_dtype = get_dtype_of_same_kind(
+                        self.dtype,
+                        np.result_type(self.dtype.numpy_dtype, other),  # noqa: TID251
+                    )
+
+            else:
+                common_dtype = np.result_type(self.dtype, other)  # noqa: TID251
+            if common_dtype.kind in {"b", "i", "u", "f"}:  # type: ignore[union-attr]
                 if self.dtype.kind == "b" and not isinstance(other, bool):
                     common_dtype = min_signed_type(other)
                 return pa.scalar(
@@ -427,7 +502,26 @@ def as_decimal_column(self, dtype: DecimalDtype) -> DecimalBaseColumn:
     def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         if dtype == self.dtype:
             return self
+
         if cudf.get_option("mode.pandas_compatible"):
+            if (
+                is_pandas_nullable_extension_dtype(self.dtype)
+                and isinstance(dtype, np.dtype)
+                and self.null_count > 0
+            ):
+                if dtype.kind in "iu":
+                    raise ValueError("cannot convert NA to integer")
+                elif dtype.kind == "b":
+                    raise ValueError("cannot convert float NaN to bool")
+
+            if (
+                not is_pandas_nullable_extension_dtype(self.dtype)
+                and is_pandas_nullable_extension_dtype(dtype)
+                and dtype.kind == "f"  # type: ignore[union-attr]
+            ):
+                res = self.nans_to_nulls().cast(dtype=dtype)  # type: ignore[return-value]
+                res._dtype = dtype
+                return res  # type: ignore[return-value]
             if dtype_to_pylibcudf_type(dtype) == dtype_to_pylibcudf_type(
                 self.dtype
             ):
@@ -446,6 +540,19 @@ def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
                 else:
                     self._dtype = dtype
                     return self
+            if self.dtype.kind == "f" and dtype.kind in "iu":  # type: ignore[union-attr]
+                if (
+                    not is_pandas_nullable_extension_dtype(dtype)
+                    and self.nan_count > 0
+                ):
+                    raise TypeError(
+                        "Cannot convert non-finite values (NA or inf) to integer"
+                    )
+                # If casting from float to int, we need to convert nans to nulls
+                res = self.nans_to_nulls().cast(dtype=dtype)  # type: ignore[return-value]
+                res._dtype = dtype
+                return res  # type: ignore[return-value]
+
         return self.cast(dtype=dtype)  # type: ignore[return-value]
 
     def all(self, skipna: bool = True) -> bool:
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index a947ae7a275..e7a7105e6a9 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -18,6 +18,7 @@
 from cudf.core._internals import binaryop
 from cudf.core.buffer import Buffer, acquire_spill_lock
 from cudf.core.column.column import ColumnBase, as_column, column_empty
+from cudf.errors import MixedTypeError
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
     CUDF_STRING_DTYPE,
@@ -252,6 +253,18 @@ def __cuda_array_interface__(self):
             "`__cuda_array_interface__`"
         )
 
+    def _validate_fillna_value(
+        self, fill_value: ScalarLike | ColumnLike
+    ) -> plc.Scalar | ColumnBase:
+        """Align fill_value for .fillna based on column type."""
+        if (
+            cudf.get_option("mode.pandas_compatible")
+            and is_scalar(fill_value)
+            and fill_value is np.nan
+        ):
+            raise MixedTypeError("Cannot fill `np.nan` in string column")
+        return super()._validate_fillna_value(fill_value)
+
     def element_indexing(self, index: int):
         result = super().element_indexing(index)
         if isinstance(result, pa.Scalar):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ab91e1a9a9b..2dc4fed005e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -105,6 +105,8 @@
     get_dtype_of_same_kind,
     is_column_like,
     is_dtype_obj_numeric,
+    is_mixed_with_object_dtype,
+    is_pandas_nullable_extension_dtype,
     min_signed_type,
 )
 from cudf.utils.ioutils import (
@@ -130,6 +132,7 @@
     "mean": "nanmean",
     "std": "nanstd",
     "var": "nanvar",
+    "median": "nanmedian",
 }
 
 
@@ -2047,9 +2050,11 @@ def _concat(
     def astype(
         self,
         dtype: Dtype | dict[Hashable, Dtype],
-        copy: bool = False,
+        copy: bool | None = None,
         errors: Literal["raise", "ignore"] = "raise",
     ) -> Self:
+        if copy is None:
+            copy = True
         if is_dict_like(dtype):
             if len(set(dtype.keys()) - set(self._column_names)) > 0:  # type: ignore[union-attr]
                 raise KeyError(
@@ -6994,6 +6999,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
         prepared, mask, common_dtype = self._prepare_for_rowwise_op(
             method, skipna, numeric_only
         )
+
         for col in prepared._column_names:
             if prepared._data[col].nullable:
                 prepared._data[col] = (
@@ -7012,15 +7018,25 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
         if skipna is not False and method in _cupy_nan_methods_map:
             method = _cupy_nan_methods_map[method]
 
+        if len(arr) == 0 and method == "nanmedian":
+            # Workaround for a cupy limitation, cupy
+            # errors for zero dim array in nanmedian
+            # https://github.com/cupy/cupy/issues/9332
+            method = "median"
         result = getattr(cupy, method)(arr, axis=1, **kwargs)
 
         if result.ndim == 1:
             type_coerced_methods = {
                 "count",
                 "min",
+                "nanmin",
                 "max",
+                "nanmax",
                 "sum",
+                "nansum",
                 "prod",
+                "nanprod",
+                "product",
                 "cummin",
                 "cummax",
                 "cumsum",
@@ -7032,6 +7048,45 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
                 or (common_dtype is not None and common_dtype.kind == "M")
                 else None
             )
+
+            if (
+                cudf.get_option("mode.pandas_compatible")
+                and result_dtype is None
+                and is_pandas_nullable_extension_dtype(common_dtype)
+            ):
+                if (
+                    method
+                    in {
+                        "kurt",
+                        "kurtosis",
+                        "mean",
+                        "nanmean",
+                        "median",
+                        "nanmedian",
+                        "sem",
+                        "skew",
+                        "std",
+                        "nanstd",
+                        "var",
+                        "nanvar",
+                    }
+                    and common_dtype.kind != "f"
+                ):
+                    result_dtype = get_dtype_of_same_kind(
+                        common_dtype, np.dtype(np.float64)
+                    )
+                else:
+                    result_dtype = get_dtype_of_same_kind(
+                        common_dtype, result.dtype
+                    )
+            if (
+                result_dtype is not None
+                and result_dtype.kind == "b"
+                and result.dtype.kind != "b"
+            ):
+                result_dtype = get_dtype_of_same_kind(
+                    common_dtype, result.dtype
+                )
             result = as_column(result, dtype=result_dtype)
             if mask is not None:
                 result = result.set_mask(mask._column.as_mask())
@@ -7633,6 +7688,16 @@ def unnamed_group_generator():
                 col.astype(common_type) if col is not None else all_nulls()
                 for col in columns
             )
+            if (
+                cudf.get_option("mode.pandas_compatible")
+                and common_type == "object"
+            ):
+                for col, hcol in zip(columns, homogenized, strict=True):
+                    if is_mixed_with_object_dtype(col, hcol):
+                        raise TypeError(
+                            "Stacking a DataFrame with mixed object and "
+                            "non-object dtypes is not supported. "
+                        )
 
             with acquire_spill_lock():
                 interleaved_col = ColumnBase.from_pylibcudf(
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 0c627f3ea84..7caa5826f2a 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -17,10 +17,11 @@
 import pylibcudf as plc
 
 import cudf
+from cudf.api.extensions import no_default
 
 # TODO: The `numpy` import is needed for typing purposes during doc builds
 # only, need to figure out why the `np` alias is insufficient then remove.
-from cudf.api.types import is_dtype_equal, is_scalar
+from cudf.api.types import is_dtype_equal, is_scalar, is_string_dtype
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core._internals import copying, sorting
 from cudf.core.abc import Serializable
@@ -403,7 +404,7 @@ def __len__(self) -> int:
 
     @_performance_tracking
     def astype(
-        self, dtype: dict[Hashable, DtypeObj], copy: bool = False
+        self, dtype: dict[Hashable, DtypeObj], copy: bool | None = None
     ) -> Self:
         casted = (
             col.astype(dtype.get(col_name, col.dtype), copy=copy)
@@ -552,20 +553,56 @@ def _to_array(
         module: ModuleType,
         copy: bool,
         dtype: Dtype | None = None,
-        na_value=None,
+        na_value=no_default,
     ) -> cupy.ndarray | numpy.ndarray:
         # Internal function to implement to_cupy and to_numpy, which are nearly
         # identical except for the attribute they access to generate values.
 
         def to_array(
-            col: ColumnBase, dtype: np.dtype
+            col: ColumnBase, to_dtype: np.dtype
         ) -> cupy.ndarray | numpy.ndarray:
-            if na_value is not None:
+            if (
+                col.has_nulls()
+                and dtype is not None
+                and not is_string_dtype(dtype)
+                and na_value is no_default
+            ):
+                raise ValueError(
+                    f"cannot convert to '{dtype}'-dtype NumPy array "
+                    "with missing values. Specify an appropriate 'na_value' "
+                    "for this dtype."
+                )
+            if na_value is not no_default:
                 col = col.fillna(na_value)
+
             if isinstance(col.dtype, cudf.CategoricalDtype):
                 col = col._get_decategorized_column()  # type: ignore[attr-defined]
+
             array = get_array(col)
-            casted_array = module.asarray(array, dtype=dtype)
+
+            if (
+                cudf.get_option("mode.pandas_compatible")
+                and is_pandas_nullable_extension_dtype(col.dtype)
+                and col.dtype.kind in "iuf"
+                and to_dtype is None
+            ):
+                to_dtype = array.dtype
+            if (
+                to_dtype != array.dtype
+                and dtype is None
+                and array.dtype.kind in "f"
+                and col.has_nulls()
+            ):
+                to_dtype = None
+            casted_array = module.asarray(array, dtype=to_dtype)
+            if (
+                col.has_nulls()
+                and dtype is not None
+                and is_string_dtype(dtype)
+            ):
+                casted_array[col.isnull().values_host] = (
+                    cudf.NA if na_value is no_default else na_value
+                )
             if copy and casted_array is array:
                 # Don't double copy after asarray
                 casted_array = casted_array.copy()
@@ -581,29 +618,43 @@ def to_array(
 
         if dtype is None:
             if ncol == 1:
-                dtype = next(self._dtypes)[1]
+                to_dtype = next(self._dtypes)[1]
             else:
-                dtype = find_common_type([dtype for _, dtype in self._dtypes])
+                to_dtype = find_common_type(
+                    [dtype for _, dtype in self._dtypes]
+                )
 
-            if isinstance(dtype, cudf.CategoricalDtype):
-                dtype = dtype.categories.dtype
+            if cudf.get_option(
+                "mode.pandas_compatible"
+            ) and is_pandas_nullable_extension_dtype(to_dtype):
+                to_dtype = getattr(to_dtype, "numpy_dtype", to_dtype)
+                if getattr(to_dtype, "kind", None) == "U":
+                    to_dtype = np.dtype(object)
+            if isinstance(to_dtype, cudf.CategoricalDtype):
+                to_dtype = to_dtype.categories.dtype
 
-            if not isinstance(dtype, numpy.dtype):
+            if not isinstance(to_dtype, numpy.dtype):
                 raise NotImplementedError(
-                    f"{dtype} cannot be exposed as an array"
+                    f"{to_dtype} cannot be exposed as an array"
                 )
 
         if self.ndim == 1:
-            return to_array(self._columns[0], dtype)
+            return to_array(
+                self._columns[0], to_dtype if dtype is None else dtype
+            )
         else:
             matrix = module.empty(
-                shape=(len(self), ncol), dtype=dtype, order="F"
+                shape=(len(self), ncol),
+                dtype=to_dtype if dtype is None else dtype,
+                order="F",
             )
             for i, col in enumerate(self._columns):
                 # TODO: col.values may fail if there is nullable data or an
                 # unsupported dtype. We may want to catch and provide a more
                 # suitable error.
-                matrix[:, i] = to_array(col, dtype)
+                matrix[:, i] = to_array(
+                    col, to_dtype if dtype is None else dtype
+                )
             return matrix
 
     @_performance_tracking
@@ -735,7 +786,7 @@ def to_numpy(
         self,
         dtype: Dtype | None = None,
         copy: bool = True,
-        na_value=None,
+        na_value=no_default,
     ) -> numpy.ndarray:
         """Convert the Frame to a NumPy array.
 
@@ -1466,10 +1517,11 @@ def searchsorted(
             values = [*values._columns]
         if len(values) != self._num_columns:
             raise ValueError("Mismatch number of columns to search for.")
+
         if cudf.get_option("mode.pandas_compatible"):
             if any(
-                col.has_nulls()
-                and is_pandas_nullable_extension_dtype(col.dtype)
+                is_pandas_nullable_extension_dtype(col.dtype)
+                and col.has_nulls(include_nan=True)
                 for col in self._columns
             ):
                 raise ValueError(
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index e9c8b71fbe3..f09d15e091b 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -4935,7 +4935,7 @@ def repeat(self, repeats, axis=None):
     def astype(
         self,
         dtype: Dtype | dict[Hashable, Dtype],
-        copy: bool = False,
+        copy: bool | None = None,
         errors: Literal["raise", "ignore"] = "raise",
     ) -> Self:
         """Cast the object to the given dtype.
@@ -6821,30 +6821,43 @@ def _append_new_row_inplace(col: ColumnBase, value: ScalarLike) -> None:
     """Append a scalar `value` to the end of `col` inplace.
     Cast to common type if possible
     """
-    val_col = as_column(value, dtype=col.dtype if value is None else None)
-    if (
-        cudf.get_option("mode.pandas_compatible")
-        and is_pandas_nullable_extension_dtype(col.dtype)
-        and val_col.dtype.kind == "f"
-    ):
-        # If the column is a pandas nullable extension type, we need to
-        # convert the nans to a nullable type as well.
-        val_col = val_col.nans_to_nulls()
-        if len(val_col) == val_col.null_count:
-            # If the column is all nulls, we can use the column dtype
-            # to avoid unnecessary casting.
-            val_col = val_col.astype(col.dtype)
-    to_type = find_common_type([val_col.dtype, col.dtype])
-    if (
-        cudf.get_option("mode.pandas_compatible")
-        and is_string_dtype(to_type)
-        and is_mixed_with_object_dtype(val_col, col)
-    ):
-        raise MixedTypeError("Cannot append mixed types")
-    if cudf.get_option("mode.pandas_compatible") and val_col.can_cast_safely(
-        col.dtype
-    ):
+    val_col = as_column(
+        value,
+        dtype=col.dtype
+        if (
+            cudf.utils.utils._is_null_host_scalar(value)
+            or value in {None, np.nan}
+        )
+        else None,
+    )
+    if val_col.dtype.kind != "f" and val_col.can_cast_safely(col.dtype):
+        # If the value can be cast to the column dtype, do so
+        val_col = val_col.astype(col.dtype)
         to_type = col.dtype
+    else:
+        if (
+            cudf.get_option("mode.pandas_compatible")
+            and is_pandas_nullable_extension_dtype(col.dtype)
+            and val_col.dtype.kind == "f"
+        ):
+            # If the column is a pandas nullable extension type, we need to
+            # convert the nans to a nullable type as well.
+            val_col = val_col.nans_to_nulls()
+            if len(val_col) == val_col.null_count:
+                # If the column is all nulls, we can use the column dtype
+                # to avoid unnecessary casting.
+                val_col = val_col.astype(col.dtype)
+        to_type = find_common_type([val_col.dtype, col.dtype])
+        if (
+            cudf.get_option("mode.pandas_compatible")
+            and is_string_dtype(to_type)
+            and is_mixed_with_object_dtype(val_col, col)
+        ):
+            raise MixedTypeError("Cannot append mixed types")
+        if cudf.get_option(
+            "mode.pandas_compatible"
+        ) and val_col.can_cast_safely(col.dtype):
+            to_type = col.dtype
     val_col = val_col.astype(to_type)
     old_col = col.astype(to_type)
     res_col = concat_columns([old_col, val_col])._with_type_metadata(to_type)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index d07a8f76205..5c7d34da65a 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -220,9 +220,21 @@ def __setitem__(self, key, value):
                         and tmp_value.dtype.kind == "b"
                     )
                 ):
-                    to_dtype = find_common_type(
-                        (tmp_value.dtype, self._frame.dtype)
-                    )
+                    if not tmp_value.can_cast_safely(
+                        self._frame.dtype
+                    ) and is_pandas_nullable_extension_dtype(
+                        self._frame.dtype
+                    ):
+                        raise TypeError(
+                            f"Invalid value '{value!s}' for dtype "
+                            f"'{self._frame.dtype}'"
+                        )
+                    if tmp_value.can_cast_safely(self._frame.dtype):
+                        to_dtype = self._frame.dtype
+                    else:
+                        to_dtype = find_common_type(
+                            (tmp_value.dtype, self._frame.dtype)
+                        )
                     tmp_value = tmp_value.astype(to_dtype)
                     if to_dtype != self._frame.dtype:
                         # Do not remove until pandas-3.0 support is added.
@@ -1967,9 +1979,11 @@ def data(self):
     def astype(
         self,
         dtype: Dtype | dict[Hashable, Dtype],
-        copy: bool = False,
+        copy: bool | None = None,
         errors: Literal["raise", "ignore"] = "raise",
     ) -> Self:
+        if copy is None:
+            copy = True
         if cudf.get_option("mode.pandas_compatible"):
             if inspect.isclass(dtype) and issubclass(
                 dtype, pd.api.extensions.ExtensionDtype
@@ -2960,6 +2974,10 @@ def unique(self):
         """
         res = self._column.unique()
         if cudf.get_option("mode.pandas_compatible"):
+            if is_pandas_nullable_extension_dtype(self.dtype):
+                raise NotImplementedError(
+                    "cudf does not support ExtensionArrays"
+                )
             return res.values
         return Series._from_column(res, name=self.name)
 
diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index b4cb727721c..d4f52670cd1 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -680,6 +680,7 @@ def Index__setattr__(self, name, value):
         slow_to_fast=_Unusable(),
         additional_attributes={
             "_pa_array": _FastSlowAttribute("_pa_array", private=True),
+            "__array__": _FastSlowAttribute("__array__", private=True),
         },
     )
 
@@ -702,6 +703,7 @@ def Index__setattr__(self, name, value):
     slow_to_fast=_Unusable(),
     additional_attributes={
         "_pa_array": _FastSlowAttribute("_pa_array", private=True),
+        "__array__": _FastSlowAttribute("__array__", private=True),
     },
 )
 
@@ -1899,6 +1901,7 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
     slow_to_fast=_Unusable(),
     additional_attributes={
         "_pa_array": _FastSlowAttribute("_pa_array", private=True),
+        "__array__": _FastSlowAttribute("__array__", private=True),
     },
 )
 
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index fb62e100e40..73e2d371836 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -835,7 +835,10 @@ def __get__(self, instance, owner) -> Any:
                     raise e
 
             if _is_function_or_method(slow_attr):
-                self._attr = _MethodProxy(fast_attr, slow_attr)
+                self._attr = _MethodProxy(
+                    fast_attr,
+                    slow_attr,
+                )
             else:
                 # for anything else, use a fast-slow attribute:
                 self._attr, _ = _fast_slow_function_call(
@@ -867,6 +870,7 @@ def __get__(self, instance, owner) -> Any:
                     instance,
                     self._name,
                 )[0]
+
         return self._attr
 
 
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index c8b645b6d26..1d57d38d787 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -88,6 +88,8 @@ def pytest_unconfigure(config):
 
 # TODO: Pass these tests with cudf.pandas enabled.
 NODEIDS_THAT_FAIL_WITH_CUDF_PANDAS = {
+    "tests/series/methods/test_reindex.py::test_reindexing_with_float64_NA_log",
+    "tests/copy_view/test_array.py::test_series_values[values]",
     "tests/api/test_api.py::test_pandas_array_alias",
     "tests/apply/test_frame_apply.py::test_agg_transform[axis='columns']",
     "tests/apply/test_frame_apply.py::test_agg_transform[axis='index']",
@@ -795,59 +797,11 @@ def pytest_unconfigure(config):
     "tests/arrays/floating/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Float32Dtype]",
     "tests/arrays/floating/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Float64Dtype]",
     "tests/arrays/floating/test_construction.py::test_floating_array_constructor_copy",
-    "tests/arrays/floating/test_construction.py::test_series_from_float[Float32Dtype]",
-    "tests/arrays/floating/test_function.py::test_stat_method[kurtosis-kwargs4]",
-    "tests/arrays/floating/test_function.py::test_stat_method[skew-kwargs5]",
     "tests/arrays/floating/test_function.py::test_ufuncs_single[absolute]",
     "tests/arrays/floating/test_function.py::test_ufuncs_single[sign]",
     "tests/arrays/floating/test_function.py::test_value_counts_empty",
-    "tests/arrays/floating/test_function.py::test_value_counts_with_normalize",
     "tests/arrays/floating/test_repr.py::test_frame_repr[Float32Dtype]",
     "tests/arrays/floating/test_to_numpy.py::test_to_numpy_copy",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int16Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int16Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int16Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int32Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int32Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int32Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int64Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int64Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int64Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int8Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int8Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[Int8Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt16Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt16Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt16Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt32Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt32Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt32Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt64Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt64Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt64Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt8Dtype-__floordiv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt8Dtype-__rtruediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arith_coerce_scalar[UInt8Dtype-__truediv__]",
-    "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__rtruediv__-1.0]",
-    "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__rtruediv__-other1]",
-    "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__truediv__-1.0]",
-    "tests/arrays/integer/test_arithmetic.py::test_arithmetic_conversion[__truediv__-other1]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int16Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int16Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int32Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int32Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int64Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int64Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int8Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[Int8Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt16Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt16Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt32Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt32Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt64Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt64Dtype-__rmul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt8Dtype-__mul__]",
-    "tests/arrays/integer/test_arithmetic.py::test_error_invalid_values[UInt8Dtype-__rmul__]",
     "tests/arrays/integer/test_arithmetic.py::test_values_multiplying_large_series_by_NA",
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Int16Dtype]",
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[Int32Dtype]",
@@ -857,79 +811,24 @@ def pytest_unconfigure(config):
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[UInt32Dtype]",
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[UInt64Dtype]",
     "tests/arrays/integer/test_comparison.py::TestComparisonOps::test_ufunc_with_out[UInt8Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[Int16Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[Int32Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[Int64Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[Int8Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[UInt16Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[UInt32Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[UInt64Dtype]",
-    "tests/arrays/integer/test_construction.py::test_from_dtype_from_float[UInt8Dtype]",
     "tests/arrays/integer/test_construction.py::test_integer_array_constructor_copy",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-Int16Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-Int32Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-Int64Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-Int8Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-UInt16Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-UInt32Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-UInt64Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data-UInt8Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-Int16Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-Int32Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-Int64Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-Int8Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-UInt16Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-UInt32Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-UInt64Dtype]",
-    "tests/arrays/integer/test_dtypes.py::test_astype[data_missing-UInt8Dtype]",
     "tests/arrays/integer/test_dtypes.py::test_astype_copy",
     "tests/arrays/integer/test_function.py::test_ufuncs_single_int[absolute]",
     "tests/arrays/integer/test_function.py::test_ufuncs_single_int[sign]",
     "tests/arrays/integer/test_function.py::test_value_counts_empty",
-    "tests/arrays/integer/test_function.py::test_value_counts_with_normalize",
     "tests/arrays/interval/test_interval.py::TestSetitem::test_set_na[float64]",
     "tests/arrays/interval/test_interval_pyarrow.py::test_arrow_array",
     "tests/arrays/interval/test_interval_pyarrow.py::test_arrow_array_missing",
     "tests/arrays/interval/test_interval_pyarrow.py::test_from_arrow_from_raw_struct_array",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int16-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int16-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int32-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int32-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int64-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int64-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int8-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[Int8-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt16-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt16-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt32-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt32-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt64-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt64-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt8-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_frame[UInt8-__truediv__]",
+    "tests/arrays/masked/test_arithmetic.py::test_frame[boolean-__floordiv__]",
+    "tests/arrays/masked/test_arithmetic.py::test_frame[boolean-__pow__]",
+    "tests/arrays/masked/test_arithmetic.py::test_frame[boolean-__rfloordiv__]",
+    "tests/arrays/masked/test_arithmetic.py::test_frame[boolean-__rpow__]",
     "tests/arrays/masked/test_arithmetic.py::test_frame[boolean-__rtruediv__]",
     "tests/arrays/masked/test_arithmetic.py::test_frame[boolean-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int16-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int16-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int32-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int32-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int64-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int64-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int8-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[Int8-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt16-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt16-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt32-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt32-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt64-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt64-__truediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt8-__rtruediv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[UInt8-__truediv__]",
     "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__floordiv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__mod__]",
     "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__pow__]",
     "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__rfloordiv__]",
-    "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__rmod__]",
     "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__rpow__]",
     "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__rtruediv__]",
     "tests/arrays/masked/test_arithmetic.py::test_series[boolean-__truediv__]",
@@ -1119,15 +1018,15 @@ def pytest_unconfigure(config):
     "tests/arrays/string_/test_string.py::test_repr[string=str[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_repr[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_to_numpy_returns_pdna_default[pyarrow_numpy]",
-    "tests/arrays/string_/test_string.py::test_to_numpy_returns_pdna_default[string=str[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_to_numpy_returns_pdna_default[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_value_counts_sort_false[string=str[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_value_counts_sort_false[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_value_counts_sort_false[string=string[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_value_counts_with_normalize[pyarrow]",
     "tests/arrays/string_/test_string.py::test_value_counts_with_normalize[python]",
+    "tests/arrays/string_/test_string.py::test_value_counts_with_normalize[string=str[pyarrow]]",
+    "tests/arrays/string_/test_string.py::test_value_counts_with_normalize[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_value_counts_with_normalize[string=string[pyarrow]]",
-    "tests/arrays/string_/test_string.py::test_value_counts_with_normalize[string=string[python]]",
     "tests/arrays/test_array.py::test_array_inference[data7-expected7]",
     "tests/arrays/test_datetimelike.py::TestDatetimeArray::test_array_i8_dtype['+01:15'-B]",
     "tests/arrays/test_datetimelike.py::TestDatetimeArray::test_array_i8_dtype['+01:15'-D]",
@@ -1356,7 +1255,6 @@ def pytest_unconfigure(config):
     "tests/arrays/test_datetimelike.py::TestTimedeltaArray::test_searchsorted_castable_strings[pyarrow_numpy-series]",
     "tests/arrays/test_datetimelike.py::test_searchsorted_datetimelike_with_listlike_invalid_dtype[arg0-values1]",
     "tests/arrays/test_datetimes.py::TestDatetimeArray::test_array_interface",
-    "tests/arrays/test_datetimes.py::TestDatetimeArray::test_astype_copies[datetime64[ns]-datetime64[ns]]",
     "tests/arrays/test_datetimes.py::TestDatetimeArray::test_astype_to_same",
     "tests/arrays/test_datetimes.py::TestDatetimeArray::test_shift_fill_value",
     "tests/arrays/test_datetimes.py::TestDatetimeArrayComparisons::test_cmp_dt64_arraylike_tznaive[eq]",
@@ -1630,78 +1528,6 @@ def pytest_unconfigure(config):
     "tests/base/test_misc.py::test_memory_usage_components_narrow_series[float16]",
     "tests/base/test_misc.py::test_ndarray_compat_properties[multi]",
     "tests/base/test_misc.py::test_ndarray_compat_properties[tuples]",
-    "tests/base/test_unique.py::test_nunique_null[float32-None]",
-    "tests/base/test_unique.py::test_nunique_null[float32-nan]",
-    "tests/base/test_unique.py::test_nunique_null[float32-series-None]",
-    "tests/base/test_unique.py::test_nunique_null[float32-series-nan]",
-    "tests/base/test_unique.py::test_nunique_null[float64-None]",
-    "tests/base/test_unique.py::test_nunique_null[float64-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-bool-dtype-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-bool-dtype-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-bool-object-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-bool-object-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-categorical-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-categorical-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-complex128-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-complex128-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-complex64-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-complex64-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-datetime-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-datetime-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-datetime-tz-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-datetime-tz-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-float32-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-float32-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-float64-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-float64-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int16-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int16-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int32-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int32-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int64-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int64-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int8-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-int8-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-interval-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-interval-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-mi-with-dt64tz-level-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-mi-with-dt64tz-level-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-multi-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-multi-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_bool-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_bool-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_float-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_float-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_int-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_int-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_uint-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-nullable_uint-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-object-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-object-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-period-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-period-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-range-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-range-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-repeats-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-repeats-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-string-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-string-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-string-pyarrow-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-string-pyarrow-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-string-python-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-string-python-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-timedelta-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-timedelta-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-tuples-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-tuples-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint16-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint16-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint32-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint32-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint64-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint64-index-nan]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint8-index-None]",
-    "tests/base/test_unique.py::test_nunique_null[series-with-uint8-index-nan]",
     "tests/base/test_unique.py::test_unique[multi]",
     "tests/base/test_unique.py::test_unique[tuples]",
     "tests/base/test_value_counts.py::test_value_counts[multi]",
@@ -1843,7 +1669,6 @@ def pytest_unconfigure(config):
     "tests/copy_view/test_array.py::test_dataframe_values[values]",
     "tests/copy_view/test_array.py::test_series_array_ea_dtypes",
     "tests/copy_view/test_array.py::test_series_to_numpy",
-    "tests/copy_view/test_array.py::test_series_values[values]",
     "tests/copy_view/test_astype.py::test_astype_avoids_copy[Int64-Int64]",
     "tests/copy_view/test_astype.py::test_astype_avoids_copy[Int64-int64]",
     "tests/copy_view/test_astype.py::test_astype_avoids_copy[int64-Int64]",
@@ -2507,8 +2332,19 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_argsort_missing[uint32]",
     "tests/extension/test_arrow.py::TestArrowArray::test_argsort_missing[uint64]",
     "tests/extension/test_arrow.py::TestArrowArray::test_argsort_missing[uint8]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__add__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__mul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__pow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__rmod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__rmul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__rtruediv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[bool-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[date32[day]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[date32[day]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[date64[ms]-__rsub__]",
@@ -2524,7 +2360,9 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[decimal128(7, 3)-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[decimal128(7, 3)-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[decimal128(7, 3)-__truediv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[double-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[double-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[double-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[double-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[duration[ms]-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[duration[ms]-__radd__]",
@@ -2542,19 +2380,29 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[duration[us]-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[duration[us]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[duration[us]-__sub__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[float-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[float-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[float-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[float-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int16-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int16-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int16-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int16-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int16-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int32-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int32-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int32-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int32-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int32-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int64-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int64-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int64-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int64-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int64-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int8-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int8-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int8-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int8-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[int8-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[timestamp[ms, tz=US/Eastern]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[timestamp[ms, tz=US/Eastern]-__sub__]",
@@ -2589,16 +2437,31 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[timestamp[us]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[timestamp[us]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint16-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint16-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint16-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint16-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint16-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint32-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint32-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint32-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint32-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint32-__rtruediv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__mul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__pow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__rmod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__rmul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__rtruediv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint64-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint8-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint8-__mod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint8-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint8-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint8-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__floordiv__]",
@@ -2611,9 +2474,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__rmul__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__rpow__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__rsub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__sub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[bool-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[date32[day]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[date32[day]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[date64[ms]-__rsub__]",
@@ -2686,9 +2547,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int16-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int16-__rmul__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int16-__rsub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int16-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int16-__sub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int16-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__floordiv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__mod__]",
@@ -2698,9 +2557,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__rmul__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__rsub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__sub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int32-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__floordiv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__mod__]",
@@ -2710,9 +2567,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__rmul__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__rsub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__sub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int64-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__floordiv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__mod__]",
@@ -2722,9 +2577,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__rmul__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__rsub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__rtruediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__sub__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[int8-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[timestamp[ms, tz=US/Eastern]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[timestamp[ms, tz=US/Eastern]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[timestamp[ms, tz=US/Pacific]-__rsub__]",
@@ -2765,8 +2618,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint16-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint16-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint16-__rmul__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint16-__rtruediv__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint16-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__floordiv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__mod__]",
@@ -2775,8 +2626,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__rmul__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__rtruediv__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint32-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__floordiv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__mod__]",
@@ -2785,8 +2634,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__rmul__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__rtruediv__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint64-__truediv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__floordiv__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__mod__]",
@@ -2795,8 +2642,18 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__rmul__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__rtruediv__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_array[uint8-__truediv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__add__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__mul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__pow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__radd__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__rmod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__rmul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__rsub__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[bool-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[date32[day]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[date32[day]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[date64[ms]-__rsub__]",
@@ -2810,6 +2667,8 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[decimal128(7, 3)-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[decimal128(7, 3)-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[decimal128(7, 3)-__truediv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[double-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[double-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[duration[ms]-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[duration[ms]-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[duration[ms]-__rsub__]",
@@ -2826,10 +2685,24 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[duration[us]-__radd__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[duration[us]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[duration[us]-__sub__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[float-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[float-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int16-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int16-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int16-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int16-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int32-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int32-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int32-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int32-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int64-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int64-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int64-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int64-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int8-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int8-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int8-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[int8-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[timestamp[ms, tz=US/Eastern]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[timestamp[ms, tz=US/Eastern]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[timestamp[ms, tz=US/Pacific]-__rsub__]",
@@ -2863,49 +2736,30 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[timestamp[us]-__rsub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[timestamp[us]-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint16-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint16-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint16-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint16-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint32-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint32-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint32-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint32-__rmod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__add__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__floordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__mul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__pow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__radd__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__rmod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__rmul__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__rsub__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint64-__sub__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint8-__floordiv__]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[binary]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[bool]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[date32[day]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[date64[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[decimal128(7, 3)]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[double]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[duration[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[duration[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[float]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[int16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[int32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[int64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[int8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[string]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[time32[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[time32[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[time64[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[time64[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ms, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ms, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ms, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ns, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ns, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ns, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[s, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[s, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[s, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[us, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[us, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[us, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[timestamp[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[uint16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[uint32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[uint64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface_copy[uint8]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint8-__mod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint8-__rfloordiv__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint8-__rmod__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface[duration[ns]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_array_interface[timestamp[ns]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_array_type[binary]",
     "tests/extension/test_arrow.py::TestArrowArray::test_array_type[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_array_type[date32[day]]",
@@ -3298,10 +3152,29 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-std-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-sum-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-var-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint8-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint16-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint32-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint64-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int8-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int16-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int32-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int64-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[double-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[ms]-any-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[ns]-any-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[s]-any-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[us]-any-False]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[float-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int16-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int32-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int64-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int8-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint16-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint32-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint64-kurt-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint8-kurt-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[double]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[float]",
@@ -3402,6 +3275,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint32]",
     "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint64]",
     "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint8]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[decimal128(7, 3)]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[decimal128(7, 3)]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[double]",
@@ -3444,37 +3318,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint32]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint64]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[decimal128(7, 3)]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[double]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[duration[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[duration[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[float]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[int16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[int32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[int64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[int8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ms, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ms, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ms, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ns, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ns, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ns, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[s, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[s, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[s, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[us, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[us, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[us, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[timestamp[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[uint16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[uint32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[uint64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_to_numpy[uint8]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[binary-positive]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-absolute]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-negative]",
@@ -3632,16 +3475,22 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-series-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[uint8-series-index3]",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and",
+    "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and_scalar[False-expected3]",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and_scalar[None-expected0]",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and_scalar[other1-expected1]",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_or",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_or_scalar[None-expected0]",
+    "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_or_scalar[True-expected2]",
     "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_or_scalar[other1-expected1]",
-    "tests/extension/test_arrow.py::TestLogicalOps::test_logical_masked_numpy[__and__-True]",
-    "tests/extension/test_arrow.py::TestLogicalOps::test_logical_masked_numpy[__or__-True]",
-    "tests/extension/test_arrow.py::TestLogicalOps::test_logical_masked_numpy[__xor__-False]",
-    "tests/extension/test_arrow.py::test_arrow_true_division_large_divisor[int64[pyarrow]]",
-    "tests/extension/test_arrow.py::test_arrow_true_division_large_divisor[uint64[pyarrow]]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_floating_0_divisor[float[pyarrow]]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_integral_invalid[pa_type0]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_integral_invalid[pa_type1]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_integral_invalid[pa_type2]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_integral_invalid[pa_type3]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_large_integral_result[uint64[pyarrow]]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_larger_divisor[pa_type0]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_larger_divisor[pa_type1]",
+    "tests/extension/test_arrow.py::test_arrow_floordiv_larger_divisor[pa_type2]",
     "tests/extension/test_arrow.py::test_astype_errors_ignore",
     "tests/extension/test_arrow.py::test_bitwise[pa_type0]",
     "tests/extension/test_arrow.py::test_bitwise[pa_type1]",
@@ -3666,7 +3515,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::test_describe_datetime_data[pa_type6]",
     "tests/extension/test_arrow.py::test_describe_datetime_data[pa_type7]",
     "tests/extension/test_arrow.py::test_describe_datetime_data[pa_type9]",
-    "tests/extension/test_arrow.py::test_dt_components",
     "tests/extension/test_arrow.py::test_dt_day_month_name[day_name-Sunday]",
     "tests/extension/test_arrow.py::test_dt_day_month_name[month_name-January]",
     "tests/extension/test_arrow.py::test_dt_days_in_month[days_in_month]",
@@ -3873,69 +3721,26 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_argsort_missing[UInt32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_argsort_missing[UInt64Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_argsort_missing[UInt8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int16Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int16Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int32Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int32Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int64Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int64Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int8Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Int8Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt16Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt16Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt32Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt32Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt64Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt64Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt8Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[UInt8Dtype-__truediv__]",
+    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Float32Dtype-__floordiv__]",
+    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_frame_with_scalar[Float64Dtype-__floordiv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__floordiv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__mod__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__pow__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__rfloordiv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__rmod__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__rpow__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__rsub__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__sub__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[BooleanDtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Float32Dtype-__floordiv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Float64Dtype-__floordiv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int16Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int16Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int16Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int32Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int32Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int64Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int64Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int8Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int8Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[Int8Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt16Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt16Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt16Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt32Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt32Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt64Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt64Dtype-__truediv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt8Dtype-__rpow__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt8Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_array[UInt8Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int16Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int16Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int32Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int32Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int64Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int64Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int8Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Int8Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt16Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt16Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt32Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt32Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt64Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt64Dtype-__truediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt8Dtype-__rtruediv__]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[UInt8Dtype-__truediv__]",
+    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Float32Dtype-__floordiv__]",
+    "tests/extension/test_masked.py::TestMaskedArrays::test_arith_series_with_scalar[Float64Dtype-__floordiv__]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_array_interface_copy[BooleanDtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_array_interface_copy[Float32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_array_interface_copy[Float64Dtype]",
@@ -4071,7 +3876,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_memory_usage[UInt32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_memory_usage[UInt64Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_memory_usage[UInt8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reduce_series_numeric[Float32Dtype-skew-True]",
+    "tests/extension/test_masked.py::TestMaskedArrays::test_reduce_frame[Float32Dtype-skew-True]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[BooleanDtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Float32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Float64Dtype]",
@@ -4181,17 +3986,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-frame-index3]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-series-index2]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_unstack[UInt8Dtype-series-index3]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[BooleanDtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[Float32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[Float64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[Int16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[Int32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[Int64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[Int8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[UInt16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[UInt32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[UInt64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_value_counts_with_normalize[UInt8Dtype]",
     "tests/extension/test_numpy.py::Test2DCompat::test_copy_order[float]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_accumulate_series[float-cummax-False]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_accumulate_series[float-cummax-True]",
@@ -4446,12 +4240,8 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_arith_series_with_array[string=string[pyarrow]-True-__radd__]",
     "tests/extension/test_string.py::TestStringArray::test_arith_series_with_array[string=string[python]-False-__radd__]",
     "tests/extension/test_string.py::TestStringArray::test_arith_series_with_array[string=string[python]-True-__radd__]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=str[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=str[pyarrow]-True]",
     "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=str[python]-False]",
     "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=string[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=string[pyarrow]-True]",
     "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=string[python]-False]",
     "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_astype_own_type[pyarrow-False-False]",
@@ -4814,12 +4604,8 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[pyarrow_numpy-True]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[python-False]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[python-True]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=str[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=str[pyarrow]-True]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=str[python]-False]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=string[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=string[pyarrow]-True]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=string[python]-False]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_unary_ufunc_dunder_equivalence[pyarrow-False-positive]",
@@ -4910,10 +4696,12 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[pyarrow-True]",
     "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[python-False]",
     "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[python-True]",
+    "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=str[pyarrow]-False]",
+    "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=str[pyarrow]-True]",
+    "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=str[python]-False]",
+    "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=str[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=string[pyarrow]-False]",
     "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=string[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=string[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_value_counts_with_normalize[string=string[python]-True]",
     "tests/frame/constructors/test_from_records.py::TestFromRecords::test_from_records_misc_brokenness3",
     "tests/frame/constructors/test_from_records.py::TestFromRecords::test_from_records_series_categorical_index",
     "tests/frame/constructors/test_from_records.py::TestFromRecords::test_from_records_series_list_dict",
@@ -5083,13 +4871,10 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_arg_for_errors_dictlist",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[inf-int32]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[inf-int64]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[nan-int32]",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[nan-int64]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_dt64_to_string[DataFrame-None]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_dt64_to_string[Series-None]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_dt64tz",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_dt64tz_to_str",
-    "tests/frame/methods/test_astype.py::TestAstype::test_astype_extension_dtypes_duplicate_col[Int64]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_str_float",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_td64_to_string[DataFrame]",
     "tests/frame/methods/test_astype.py::TestAstype::test_astype_td64_to_string[Series]",
@@ -5125,14 +4910,13 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_count.py::TestDataFrameCount::test_count",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_item_cache",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_nooverlap[pearson]",
+    "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_nullable_integer[pearson-other_column2-nullable_column0]",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_numeric_only[False-spearman]",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_scipy_method[pearson]",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCov::test_cov",
     "tests/frame/methods/test_describe.py::TestDataFrameDescribe::test_describe_datetime_columns",
     "tests/frame/methods/test_diff.py::TestDataFrameDiff::test_diff_all_int_dtype[int16]",
     "tests/frame/methods/test_diff.py::TestDataFrameDiff::test_diff_all_int_dtype[int8]",
-    "tests/frame/methods/test_diff.py::TestDataFrameDiff::test_diff_integer_na[0-expected0]",
-    "tests/frame/methods/test_diff.py::TestDataFrameDiff::test_diff_integer_na[1-expected1]",
     "tests/frame/methods/test_dot.py::TestDataFrameDot::test_dot_1d_ndarray",
     "tests/frame/methods/test_dot.py::TestDataFrameDot::test_dot_2d_ndarray",
     "tests/frame/methods/test_dot.py::TestDataFrameDot::test_dot_aligns",
@@ -5290,7 +5074,6 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_rename.py::TestRename::test_rename_mapper_and_positional_arguments_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_no_mappings_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_nocopy",
-    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NA_with_None",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_after_convert_dtypes",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_dict_tuple_list_ordering_remains_the_same",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-array-data0-to_replace0-value0-expected0]",
@@ -5398,11 +5181,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_tz_localize.py::TestTZLocalize::test_tz_localize_copy_inplace_mutate[Series-True]",
     "tests/frame/methods/test_update.py::TestDataFrameUpdate::test_update_modify_view",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NoneType]",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[Decimal]",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NaTType]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NAType]",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[Decimal-columns1]",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NaTType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NAType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty_normalize",
@@ -5466,22 +5245,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_arithmetic.py::test_arithmetic_multiindex_align[python]",
     "tests/frame/test_arithmetic.py::test_bool_frame_mult_float[numexpr]",
     "tests/frame/test_arithmetic.py::test_bool_frame_mult_float[python]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-Int16]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-Int32]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-Int64]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-Int8]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-UInt16]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-UInt32]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-UInt64]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[numexpr-UInt8]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-Int16]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-Int32]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-Int64]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-Int8]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-UInt16]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-UInt32]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-UInt64]",
-    "tests/frame/test_arithmetic.py::test_frame_sub_nullable_int[python-UInt8]",
     "tests/frame/test_arithmetic.py::test_frame_with_zero_len_series_corner_cases[numexpr]",
     "tests/frame/test_arithmetic.py::test_frame_with_zero_len_series_corner_cases[python]",
     "tests/frame/test_arithmetic.py::test_inplace_arithmetic_series_update[numexpr]",
@@ -5520,7 +5283,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_1d_object_array_does_not_copy",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_2d_object_array_does_not_copy",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_construct_from_list_of_datetimes",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_construct_ndarray_with_nas_and_int_dtype",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_construct_with_two_categoricalindex_series",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_with_nulls[arr0]",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_datetimes_with_nulls[arr1]",
@@ -5534,6 +5296,7 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_dict_with_index",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_dict_with_index_and_columns",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_dtype_nocast_view_2d_array",
+    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_dtype_nocast_view_dataframe",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_error_msgs",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_for_list_with_dtypes",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_list_of_2d_raises",
@@ -5542,7 +5305,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_maskedarray_hardened",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_maskedarray_nonfloat",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_maskedrecarray_dtype",
-    "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_miscast_na_int_dtype",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_mixed_dict_and_Series",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_ndarray_copy",
     "tests/frame/test_constructors.py::TestDataFrameConstructors::test_constructor_rec",
@@ -5828,9 +5590,7 @@ def pytest_unconfigure(config):
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_idxmax_idxmin_convert_dtypes[idxmax-expected_value0]",
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_idxmax_idxmin_convert_dtypes[idxmin-expected_value1]",
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_mode_dropna[False-expected3]",
-    "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_nunique",
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_operators_timedelta64",
-    "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_stat_op_calc",
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_std_datetime64_with_nat[False-ms-values0]",
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_std_datetime64_with_nat[False-ms-values1]",
     "tests/frame/test_reductions.py::TestDataFrameAnalytics::test_std_datetime64_with_nat[False-ns-values0]",
@@ -5887,102 +5647,14 @@ def pytest_unconfigure(config):
     "tests/frame/test_reductions.py::TestEmptyDataFrameReductions::test_df_empty_nullable_min_count_0[sum-UInt8-0-UInt64]",
     "tests/frame/test_reductions.py::TestNuisanceColumns::test_any_all_categorical_dtype_nuisance_column[all]",
     "tests/frame/test_reductions.py::TestNuisanceColumns::test_any_all_categorical_dtype_nuisance_column[any]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-0-kurt]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-0-kurtosis]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-0-sem]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-0-skew]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-kurt]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-kurtosis]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-sem]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-skew]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Float32-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int16-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int16-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int16-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int32-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int32-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int32-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int64-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int64-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int64-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int8-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int8-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-Int8-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt16-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt16-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt16-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt32-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt32-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt32-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt64-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt64-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt64-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt8-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt8-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[False-UInt8-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-0-kurt]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-0-kurtosis]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-0-sem]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-0-skew]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-kurt]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-kurtosis]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-sem]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-skew]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float32-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float64-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Float64-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int16-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int16-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int16-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int16-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int16-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int32-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int32-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int32-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int32-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int32-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int64-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int64-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int64-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int64-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int64-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int8-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int8-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int8-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int8-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-Int8-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt16-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt16-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt16-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt16-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt16-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt32-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt32-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt32-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt32-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt32-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt64-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt64-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt64-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt64-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt64-2-sum]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt8-0-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt8-2-median]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt8-2-prod]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt8-2-product]",
-    "tests/frame/test_reductions.py::test_numeric_ea_axis_1[True-UInt8-2-sum]",
+    "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-False-kurt]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-False-mean]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-False-median]",
+    "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-False-skew]",
+    "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-True-kurt]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-True-mean]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-True-median]",
+    "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[Float64-True-skew]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[float64-False-kurt]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[float64-False-mean]",
     "tests/frame/test_reductions.py::test_reduction_axis_none_returns_scalar[float64-False-median]",
@@ -6979,8 +6651,6 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_nth.py::test_nth_after_selection[any-selection2]",
     "tests/groupby/methods/test_nth.py::test_nth_column_order",
     "tests/groupby/methods/test_nth.py::test_nth_indexed",
-    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[Decimal-nth]",
-    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NaTType-nth]",
     "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NAType-nth]",
     "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NoneType-nth]",
     "tests/groupby/methods/test_nth.py::test_nth_multi_grouper",
@@ -7153,10 +6823,6 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-True-True-expected_rows2-expected_count2-expected_group_size2-True]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-False-count-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-True-proportion-False-expected_data1-expected_index1]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[Decimal-False-count-False-expected_data1-expected_index1]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[Decimal-True-proportion-False-expected_data1-expected_index1]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NaTType-False-count-False-expected_data1-expected_index1]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NaTType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-False-count-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_dropna_combinations[True-False-expected_rows2-expected_values2]",
@@ -7270,12 +6936,6 @@ def pytest_unconfigure(config):
     "tests/groupby/test_counting.py::test_count_arrow_string_array[string[pyarrow]]",
     "tests/groupby/test_counting.py::test_count_arrow_string_array[string[pyarrow_numpy]]",
     "tests/groupby/test_counting.py::test_count_arrow_string_array[string[python]]",
-    "tests/groupby/test_cumulative.py::test_cummin[Int64]",
-    "tests/groupby/test_cumulative.py::test_cummin[np.int64]",
-    "tests/groupby/test_cumulative.py::test_cummin_max_all_nan_column[Int64-cummax]",
-    "tests/groupby/test_cumulative.py::test_cummin_max_all_nan_column[Int64-cummin]",
-    "tests/groupby/test_cumulative.py::test_cummin_max_all_nan_column[UInt64-cummax]",
-    "tests/groupby/test_cumulative.py::test_cummin_max_all_nan_column[UInt64-cummin]",
     "tests/groupby/test_cumulative.py::test_cummin_max_all_nan_column[boolean-cummax]",
     "tests/groupby/test_cumulative.py::test_cummin_max_all_nan_column[boolean-cummin]",
     "tests/groupby/test_cumulative.py::test_cython_api2",
@@ -7457,23 +7117,9 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_true_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_agg[False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NoneType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[Decimal-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NaTType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NAType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-Decimal-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-NaTType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-NAType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[Decimal-NoneType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-Decimal-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-NaTType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-NAType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NaTType-NoneType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-Decimal-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NaTType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NoneType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-Decimal-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NaTType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[False]",
@@ -8214,10 +7860,6 @@ def pytest_unconfigure(config):
     "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[inf-int32]",
     "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[inf-int64]",
     "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[inf-int]",
-    "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[nan-int16]",
-    "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[nan-int32]",
-    "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[nan-int64]",
-    "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[nan-int]",
     "tests/indexes/numeric/test_indexing.py::TestContains::test_contains_float64_nans",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_invalid",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[backfill-expected1]",
@@ -8246,11 +7888,7 @@ def pytest_unconfigure(config):
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int16]",
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int32]",
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int64]",
-    "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[Decimal]",
-    "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[NaTType]",
     "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[NAType]",
-    "tests/indexes/object/test_indexing.py::TestGetIndexer::test_get_indexer_with_NA_values[None-unique_nulls_fixture22]",
-    "tests/indexes/object/test_indexing.py::TestGetIndexer::test_get_indexer_with_NA_values[unique_nulls_fixture2-None]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--object]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--string[pyarrow_numpy]]",
     "tests/indexes/period/test_constructors.py::TestPeriodIndex::test_constructor_fromarraylike",
@@ -8324,8 +7962,6 @@ def pytest_unconfigure(config):
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[pyarrow]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[python]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[string=object-null3]",
-    "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-Decimal]",
-    "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-NaTType]",
     "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-NAType]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=object-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[pyarrow]-in_slice13-]",
@@ -8378,8 +8014,6 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[uint32]",
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[uint64]",
     "tests/indexes/test_base.py::TestIndex::test_equals_op_mismatched_multiindex_raises[index0]",
-    "tests/indexes/test_base.py::TestIndex::test_format_missing[Decimal-vals1]",
-    "tests/indexes/test_base.py::TestIndex::test_format_missing[NaTType-vals1]",
     "tests/indexes/test_base.py::TestIndex::test_format_missing[NAType-vals1]",
     "tests/indexes/test_base.py::TestIndex::test_is_",
     "tests/indexes/test_base.py::TestIndex::test_isin_level_kwarg_bad_label_raises[bool-dtype-nan]",
@@ -8437,10 +8071,6 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float32]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float64]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[Decimal-Decimal]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[Decimal-NoneType]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NaTType-NaTType]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NaTType-NoneType]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NAType]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NoneType]",
     "tests/indexes/test_base.py::TestIndex::test_map_defaultdict",
@@ -9013,7 +8643,6 @@ def pytest_unconfigure(config):
     "tests/indexing/test_iat.py::test_iat_setitem_item_cache_cleared[iat]",
     "tests/indexing/test_iat.py::test_iat_setitem_item_cache_cleared[iloc]",
     "tests/indexing/test_iloc.py::TestILocErrors::test_iloc_getitem_setitem_fancy_exceptions",
-    "tests/indexing/test_iloc.py::TestILocSeries::test_iloc_nullable_int64_size_1_nan",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_identity_slice_returns_new_object",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_getitem_doc_issue",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_getitem_int_single_ea_block_view",
@@ -10102,6 +9731,7 @@ def pytest_unconfigure(config):
     "tests/indexing/test_loc.py::TestLocWithMultiIndex::test_loc_getitem_multilevel_index_order[index-keys5-expected5]",
     "tests/indexing/test_loc.py::TestLocWithMultiIndex::test_loc_getitem_multilevel_index_order[index-keys6-expected6]",
     "tests/indexing/test_loc.py::TestLocWithMultiIndex::test_loc_multiindex_null_slice_na_level",
+    "tests/indexing/test_loc.py::TestLocWithMultiIndex::test_loc_set_nan_in_categorical_series[Float64]",
     "tests/indexing/test_loc.py::test_loc_datetimelike_mismatched_dtypes",
     "tests/indexing/test_loc.py::test_loc_getitem_multiindex_tuple_level",
     "tests/indexing/test_loc.py::test_loc_setitem_uint8_upcast[300]",
@@ -10242,7 +9872,6 @@ def pytest_unconfigure(config):
     "tests/io/formats/style/test_html.py::test_sticky_levels[one-False-True]",
     "tests/io/formats/style/test_html.py::test_sticky_levels[one-True-False]",
     "tests/io/formats/style/test_html.py::test_sticky_levels[one-True-True]",
-    "tests/io/formats/style/test_matplotlib.py::test_background_gradient_nullable_dtypes",
     "tests/io/formats/style/test_style.py::TestStyler::test_apply_axis",
     "tests/io/formats/style/test_style.py::TestStyler::test_caption",
     "tests/io/formats/style/test_style.py::TestStyler::test_export",
@@ -10298,7 +9927,6 @@ def pytest_unconfigure(config):
     "tests/io/formats/test_to_csv.py::TestToCSV::test_to_csv_different_datetime_formats",
     "tests/io/formats/test_to_csv.py::TestToCSV::test_to_csv_multi_index",
     "tests/io/formats/test_to_csv.py::TestToCSV::test_to_csv_na_rep",
-    "tests/io/formats/test_to_csv.py::TestToCSV::test_to_csv_na_rep_long_string[Int64]",
     "tests/io/formats/test_to_csv.py::TestToCSV::test_to_csv_with_single_column",
     "tests/io/formats/test_to_html.py::test_to_html_truncate",
     "tests/io/formats/test_to_html.py::test_to_html_truncation_index_false_max_cols[True-gh22783_named_columns_index-0]",
@@ -10524,8 +10152,6 @@ def pytest_unconfigure(config):
     "tests/io/test_fsspec.py::test_json_options[zip]",
     "tests/io/test_fsspec.py::test_json_options[zstd]",
     "tests/io/test_fsspec.py::test_non_fsspec_options",
-    "tests/io/test_html.py::TestReadHtml::test_extract_links[bs4-header]",
-    "tests/io/test_html.py::TestReadHtml::test_extract_links[lxml-header]",
     "tests/io/test_orc.py::test_orc_reader_basic",
     "tests/io/test_orc.py::test_orc_reader_date_high",
     "tests/io/test_orc.py::test_orc_reader_date_low",
@@ -10687,10 +10313,6 @@ def pytest_unconfigure(config):
     "tests/io/xml/test_xml.py::test_empty_stylesheet[1]",
     "tests/io/xml/test_xml.py::test_wrong_file_path[etree]",
     "tests/io/xml/test_xml.py::test_wrong_file_path[lxml]",
-    "tests/io/xml/test_xml_dtypes.py::test_dtype_nullable_int[etree]",
-    "tests/io/xml/test_xml_dtypes.py::test_dtype_nullable_int[lxml]",
-    "tests/io/xml/test_xml_dtypes.py::test_dtypes_with_names[etree]",
-    "tests/io/xml/test_xml_dtypes.py::test_dtypes_with_names[lxml]",
     "tests/libs/test_libalgos.py::test_ensure_platform_int",
     "tests/plotting/frame/test_frame.py::TestDataFramePlots::test_unordered_ts",
     "tests/plotting/test_boxplot_method.py::TestDataFramePlots::test_boxplot_legacy1_series",
@@ -11458,7 +11080,6 @@ def pytest_unconfigure(config):
     "tests/reshape/test_get_dummies.py::TestGetDummies::test_get_dummies_ea_dtype[string=string[python]-string]",
     "tests/reshape/test_get_dummies.py::TestGetDummies::test_get_dummies_int_int",
     "tests/reshape/test_melt.py::TestMelt::test_melt_ea_columns",
-    "tests/reshape/test_melt.py::TestMelt::test_melt_ea_dtype[Int8]",
     "tests/reshape/test_melt.py::TestMelt::test_multiindex",
     "tests/reshape/test_melt.py::TestMelt::test_vars_work_with_multiindex",
     "tests/reshape/test_melt.py::TestWideToLong::test_invalid_separator",
@@ -11935,8 +11556,6 @@ def pytest_unconfigure(config):
     "tests/series/indexing/test_setitem.py::TestSeriesNoneCoercion::test_series_where[obj1-expected1-None]",
     "tests/series/indexing/test_setitem.py::TestSeriesNoneCoercion::test_series_where[obj2-expected2-None]",
     "tests/series/indexing/test_setitem.py::TestSeriesNoneCoercion::test_series_where[obj3-expected3-None]",
-    "tests/series/indexing/test_setitem.py::TestSetitemBooleanMask::test_setitem_boolean_nullable_int_types[Float32]",
-    "tests/series/indexing/test_setitem.py::TestSetitemBooleanMask::test_setitem_boolean_nullable_int_types[Float64]",
     "tests/series/indexing/test_setitem.py::TestSetitemBooleanMask::test_setitem_boolean_nullable_int_types[Int16]",
     "tests/series/indexing/test_setitem.py::TestSetitemBooleanMask::test_setitem_boolean_nullable_int_types[Int32]",
     "tests/series/indexing/test_setitem.py::TestSetitemBooleanMask::test_setitem_boolean_nullable_int_types[Int64]",
@@ -12490,8 +12109,7 @@ def pytest_unconfigure(config):
     "tests/series/indexing/test_setitem.py::TestSetitemTimedelta64IntoNumeric::test_mask_key[int-loc]",
     "tests/series/indexing/test_setitem.py::TestSetitemTimedelta64IntoNumeric::test_mask_key[int-setitem]",
     "tests/series/indexing/test_setitem.py::TestSetitemTimedelta64IntoNumeric::test_series_where[int]",
-    "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlargement_object_none[Decimal]",
-    "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlargement_object_none[NaTType]",
+    "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlarge_with_na[na5-target_na5-int64-object-2-None]",
     "tests/series/indexing/test_setitem.py::TestSetitemWithExpansion::test_setitem_enlargement_object_none[NAType]",
     "tests/series/indexing/test_setitem.py::TestSmallIntegerSetitemUpcast::test_int_key[iloc-4611686018427387904]",
     "tests/series/indexing/test_setitem.py::TestSmallIntegerSetitemUpcast::test_int_key[iloc-8589934593.0]",
@@ -12532,9 +12150,7 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_argsort.py::TestSeriesArgsort::test_argsort_dt64[us]",
     "tests/series/methods/test_argsort.py::TestSeriesArgsort::test_argsort_stable",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[int32-inf]",
-    "tests/series/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[int32-nan]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[int64-inf]",
-    "tests/series/methods/test_astype.py::TestAstype::test_astype_cast_nan_inf_int[int64-nan]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_float_to_uint_negatives_raise[float-uint16]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_float_to_uint_negatives_raise[float-uint32]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_float_to_uint_negatives_raise[float-uint64]",
@@ -12849,7 +12465,6 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_nlargest.py::TestSeriesNLargestNSmallest::test_nlargest_boundary_datetimelike[nsmallest-timedelta64[ns]]",
     "tests/series/methods/test_nlargest.py::TestSeriesNLargestNSmallest::test_nlargest_error[r4]",
     "tests/series/methods/test_nlargest.py::TestSeriesNLargestNSmallest::test_nlargest_misc",
-    "tests/series/methods/test_nunique.py::test_nunique",
     "tests/series/methods/test_quantile.py::TestSeriesQuantile::test_quantile_all_na[Int16]",
     "tests/series/methods/test_quantile.py::TestSeriesQuantile::test_quantile_all_na[Int32]",
     "tests/series/methods/test_quantile.py::TestSeriesQuantile::test_quantile_all_na[Int64]",
@@ -12892,23 +12507,18 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_dense_method[string[pyarrow]-ser5-exp5]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_dense_method[string[pyarrow]-ser6-exp6]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_dense_method[string[pyarrow]-ser7-exp7]",
-    "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[average-Int64]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[average-float64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[average-int64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[average-string[pyarrow]]",
-    "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[dense-Int64]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[dense-float64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[dense-int64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[dense-string[pyarrow]]",
-    "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[first-Int64]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[first-float64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[first-int64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[first-string[pyarrow]]",
-    "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[max-Int64]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[max-float64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[max-int64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[max-string[pyarrow]]",
-    "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[min-Int64]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[min-float64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[min-int64[pyarrow]]",
     "tests/series/methods/test_rank.py::TestSeriesRank::test_rank_descending[min-string[pyarrow]]",
@@ -13101,10 +12711,8 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_reindex.py::test_reindex_fill_value_datetimelike_upcast[0-datetime64[ns]]",
     "tests/series/methods/test_reindex.py::test_reindex_fill_value_datetimelike_upcast[0-timedelta64[ns]]",
     "tests/series/methods/test_reindex.py::test_reindex_pad2",
-    "tests/series/methods/test_reindex.py::test_reindexing_with_float64_NA_log",
     "tests/series/methods/test_rename.py::TestRename::test_rename_copy_false",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_categorical_single",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_change_dtype_series",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_datetime64",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Float64-input_data4-to_replace4-expected_data4]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Int64-input_data2-to_replace2-expected_data2]",
@@ -13157,8 +12765,6 @@ def pytest_unconfigure(config):
     "tests/series/test_arithmetic.py::TestSeriesArithmetic::test_alignment_doesnt_change_tz[python]",
     "tests/series/test_arithmetic.py::TestSeriesArithmetic::test_arithmetic_with_duplicate_index[numexpr]",
     "tests/series/test_arithmetic.py::TestSeriesArithmetic::test_arithmetic_with_duplicate_index[python]",
-    "tests/series/test_arithmetic.py::TestSeriesArithmetic::test_mask_div_propagate_na_for_non_na_dtype[numexpr]",
-    "tests/series/test_arithmetic.py::TestSeriesArithmetic::test_mask_div_propagate_na_for_non_na_dtype[python]",
     "tests/series/test_arithmetic.py::TestSeriesComparison::test_comparison_tuples[numexpr]",
     "tests/series/test_arithmetic.py::TestSeriesComparison::test_comparison_tuples[python]",
     "tests/series/test_arithmetic.py::TestSeriesFlexArithmetic::test_flex_add_scalar_fill_value[numexpr]",
@@ -13321,22 +12927,12 @@ def pytest_unconfigure(config):
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_dtype_timedelta64",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_floating_data_int_dtype[DataFrame]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_floating_data_int_dtype[Series]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[int16]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[int32]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[int64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[int8]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[int]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[uint16]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[uint32]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[uint64]",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_invalid_coerce_ints_with_float_nan[uint8]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_list_of_tuples",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_maskedarray",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_no_data_string_type",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_no_partial_datetime_casting",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_pass_none",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_range_overflows",
-    "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_sanitize",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_tuple_of_tuples",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_convert_non_ns",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_invalid_dtype",
@@ -13640,7 +13236,6 @@ def pytest_unconfigure(config):
     "tests/test_algos.py::TestUnique::test_timedelta64_dtype_array_returned",
     "tests/test_algos.py::TestValueCounts::test_categorical_nans",
     "tests/test_algos.py::TestValueCounts::test_value_counts_dropna",
-    "tests/test_algos.py::TestValueCounts::test_value_counts_normalized[M8[ns]]",
     "tests/test_algos.py::TestValueCounts::test_value_counts_normalized[float64]",
     "tests/test_common.py::test_serializable[obj0]",
     "tests/test_common.py::test_standardize_mapping",
@@ -13845,7 +13440,6 @@ def pytest_unconfigure(config):
     "tests/tools/test_to_datetime.py::TestToDatetimeUnit::test_to_datetime_unit_with_nulls[-9223372036854775808]",
     "tests/tools/test_to_datetime.py::TestToDatetimeUnit::test_unit_rounding[False]",
     "tests/tools/test_to_datetime.py::TestToDatetimeUnit::test_unit_rounding[True]",
-    "tests/tools/test_to_datetime.py::test_nullable_integer_to_datetime",
     "tests/tools/test_to_numeric.py::test_downcast_basic[kwargs0-int64-data2]",
     "tests/tools/test_to_numeric.py::test_downcast_basic[kwargs1-int64-data2]",
     "tests/tools/test_to_numeric.py::test_downcast_basic[kwargs2-f-data2]",
@@ -14468,6 +14062,7 @@ def pytest_unconfigure(config):
     "tests/util/test_assert_produces_warning.py::test_right_category_wrong_match_raises[tuple7]",
     "tests/util/test_assert_series_equal.py::test_allows_duplicate_labels",
     "tests/util/test_assert_series_equal.py::test_assert_series_equal_extension_dtype_mismatch",
+    "tests/util/test_assert_series_equal.py::test_large_unequal_ints[Int64]",
     "tests/util/test_assert_series_equal.py::test_large_unequal_ints[int64]",
     "tests/util/test_assert_series_equal.py::test_series_equal_series_type",
     "tests/util/test_hashing.py::test_hash_with_tuple[data0-result_data0]",
@@ -15148,6 +14743,15 @@ def pytest_unconfigure(config):
     r"tests/util/test_assert_series_equal.py::test_series_equal_index_dtype[True-s11-s21-MultiIndex level \\[0\\] are different]",
 }
 NODEIDS_THAT_XPASS_WITH_CUDF_PANDAS = {
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint8-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint16-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint32-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint64-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int8-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int16-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int32-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[int64-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[float-skew-True]",
     "tests/io/json/test_readlines.py::test_readjson_unicode[pyarrow]",
     "tests/series/methods/test_info.py::test_info_memory_usage_deep_pypy",
     "tests/arrays/string_/test_string.py::test_add_2d[pyarrow]",
@@ -15325,6 +14929,12 @@ def pytest_unconfigure(config):
 
 # TODO: Investigate why sometimes these fail
 NODEIDS_THAT_FLAKY_XFAIL_WITH_CUDF_PANDAS = {
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint8-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint16-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint32-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint8-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint16-__rpow__]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_arith_frame_with_scalar[uint32-__rpow__]",
     r"tests/tools/test_to_datetime.py::TestToDatetimeMisc::test_to_datetime_iso8601_fails[True-2012-01-01 10:00-%Y-%m-%d %H:%M:%S]",
     "tests/indexing/test_chaining_and_caching.py::TestChaining::test_detect_chained_assignment_warnings_errors",
     "tests/indexes/multi/test_indexing.py::test_pyint_engine",
diff --git a/python/cudf/cudf/tests/private_objects/test_column.py b/python/cudf/cudf/tests/private_objects/test_column.py
index f7c7b0808d4..47cd9c6d706 100644
--- a/python/cudf/cudf/tests/private_objects/test_column.py
+++ b/python/cudf/cudf/tests/private_objects/test_column.py
@@ -313,7 +313,9 @@ def test_column_view_valid_numeric_to_numeric(data, from_dtype, to_dtype):
     gpu_data_view = gpu_data.view(to_dtype)
 
     expect = pd.Series(cpu_data_view, dtype=cpu_data_view.dtype)
-    got = cudf.Series._from_column(gpu_data_view).astype(gpu_data_view.dtype)
+    got = cudf.Series._from_column(gpu_data_view).astype(
+        gpu_data_view.dtype, copy=False
+    )
 
     gpu_ptr = gpu_data.data.get_ptr(mode="read")
     assert gpu_ptr == got._column.data.get_ptr(mode="read")
diff --git a/python/cudf/cudf/tests/series/methods/test_unique.py b/python/cudf/cudf/tests/series/methods/test_unique.py
index 257c57aa1e7..b87f1872157 100644
--- a/python/cudf/cudf/tests/series/methods/test_unique.py
+++ b/python/cudf/cudf/tests/series/methods/test_unique.py
@@ -50,14 +50,6 @@ def test_series_unique():
 
 
 def test_series_nunique(request, nan_as_null, dropna):
-    # We remove nulls as opposed to NaNs using the dropna parameter,
-    # so to test against pandas we replace NaN with another discrete value
-    request.applymarker(
-        pytest.mark.xfail(
-            nan_as_null is None,
-            reason=f"{nan_as_null=} returns wrong result",
-        )
-    )
     cudf_series = cudf.Series([1, 2, 2, 3, 3], nan_as_null=nan_as_null)
     pd_series = pd.Series([1, 2, 2, 3, 3])
     expect = pd_series.nunique(dropna=dropna)
@@ -67,20 +59,20 @@ def test_series_nunique(request, nan_as_null, dropna):
     cudf_series = cudf.Series(
         [1.0, 2.0, 3.0, np.nan, None], nan_as_null=nan_as_null
     )
-    if nan_as_null is True:
-        pd_series = pd.Series([1.0, 2.0, 3.0, np.nan, None])
+    if nan_as_null in {True, None}:
+        pd_series = pd.Series([1.0, 2.0, 3.0, None, None])
     else:
-        pd_series = pd.Series([1.0, 2.0, 3.0, -1.0, None])
+        pd_series = pd.Series([1.0, 2.0, 3.0, np.nan, None], dtype=object)
 
     expect = pd_series.nunique(dropna=dropna)
     got = cudf_series.nunique(dropna=dropna)
     assert expect == got
 
     cudf_series = cudf.Series([1.0, np.nan, np.nan], nan_as_null=nan_as_null)
-    if nan_as_null is True:
+    if nan_as_null in {True, None}:
         pd_series = pd.Series([1.0, np.nan, np.nan])
     else:
-        pd_series = pd.Series([1.0, -1.0, -1.0])
+        pd_series = pd.Series([1.0, None, None])
     expect = pd_series.nunique(dropna=dropna)
     got = cudf_series.nunique(dropna=dropna)
     assert expect == got
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index b8acd46e895..b10b5f1247c 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -228,9 +228,18 @@ def is_mixed_with_object_dtype(lhs, rhs):
     elif isinstance(rhs.dtype, cudf.CategoricalDtype):
         return is_mixed_with_object_dtype(lhs, rhs.dtype.categories)
 
-    return (lhs.dtype == "object" and rhs.dtype != "object") or (
+    res = (lhs.dtype == "object" and rhs.dtype != "object") or (
         rhs.dtype == "object" and lhs.dtype != "object"
     )
+    if res:
+        return res
+    return (
+        cudf.api.types.is_string_dtype(lhs.dtype)
+        and not cudf.api.types.is_string_dtype(rhs.dtype)
+    ) or (
+        cudf.api.types.is_string_dtype(rhs.dtype)
+        and not cudf.api.types.is_string_dtype(lhs.dtype)
+    )
 
 
 def _get_nan_for_dtype(dtype: DtypeObj) -> DtypeObj:

From 414b91691094b087fce9c687c8efaa4ca22308cf Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 3 Sep 2025 12:42:08 -0700
Subject: [PATCH 248/366] Remove pyarrow upper bound (#19870)

The latest PR to update our pyarrow pinnings #19592 made us compatible with the latest version of Arrow. The update was a little bumpy, but the main reasons had to do with 1) our improper use of Arrow APIs in our C++ tests and 2) a bug in our reading of v2 parquet files. Actual usage of our library was fine, so users would have been OK using a newer version, and we might have caught the bugs in our parquet support sooner. This PR proposes dropping the upper bound entirely to allow us to automatically support future versions as they are released.

There is no real need for us to upgrade the version of Arrow that our C++ builds against; if it's already working, then we can stick with it since we're primarily using it for testing. If the Spark team finds a reason to request an upgrade we can always bump the CMake pin, but [they also plan to move to nanoarrow eventually](https://github.com/NVIDIA/spark-rapids-jni/issues/3268) so I doubt it'll be a priority.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19870
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 2 +-
 conda/environments/all_cuda-130_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-130_arch-x86_64.yaml  | 2 +-
 dependencies.yaml                                 | 6 +++---
 python/cudf/pyproject.toml                        | 4 ++--
 python/pylibcudf/pyproject.toml                   | 8 ++++----
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 4a296164cec..a4a81ebaa66 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -68,7 +68,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=15.0.0,<22.0.0a0
+- pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 30ac023ca78..0fa1e5f3ddc 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -69,7 +69,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=15.0.0,<22.0.0a0
+- pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index 5ab54367559..c64e9000a08 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -68,7 +68,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=15.0.0,<22.0.0a0
+- pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 906a143b428..7ae24161e60 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -69,7 +69,7 @@ dependencies:
 - pandoc
 - polars>=1.28,<1.33
 - pre-commit
-- pyarrow>=15.0.0,<22.0.0a0
+- pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
 - pynvml>=12.0.0,<13.0.0a0
 - pytest
diff --git a/dependencies.yaml b/dependencies.yaml
index 73d6900d8f1..67e627aa718 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -511,7 +511,7 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-          - pyarrow>=15.0.0,<22.0.0a0
+          - pyarrow>=15.0.0
       - output_types: [requirements, pyproject]
         packages:
           # pyarrow 17.0.0 wheels have a subtle issue around threading that
@@ -519,8 +519,8 @@ dependencies:
           # be highly dependent on the exact build configuration, so we'll just
           # avoid 17.0.0 for now unless we observe similar issues in future
           # releases as well.
-          - pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'
-          - pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'
+          - pyarrow>=15.0.0; platform_machine=='x86_64'
+          - pyarrow>=15.0.0,!=17.0.0; platform_machine=='aarch64'
   cuda_version:
     specific:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index f97bd6a7eb3..af324761ee0 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -29,8 +29,8 @@ dependencies = [
     "nvtx>=0.2.1",
     "packaging",
     "pandas>=2.0,<2.4.0dev0",
-    "pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'",
-    "pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'",
+    "pyarrow>=15.0.0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=15.0.0; platform_machine=='x86_64'",
     "pylibcudf==25.10.*,>=0.0.0a0",
     "rich",
     "rmm==25.10.*,>=0.0.0a0",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 811af529493..f5167490384 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -47,16 +47,16 @@ test = [
     "numba-cuda[cu13]>=0.19.1,<0.20.0a0",
     "numba>=0.60.0,<0.62.0a0",
     "pandas",
-    "pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'",
-    "pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'",
+    "pyarrow>=15.0.0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=15.0.0; platform_machine=='x86_64'",
     "pytest",
     "pytest-cov",
     "pytest-xdist",
     "xxhash",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 pyarrow = [
-    "pyarrow>=15.0.0,<22.0.0a0,!=17.0.0; platform_machine=='aarch64'",
-    "pyarrow>=15.0.0,<22.0.0a0; platform_machine=='x86_64'",
+    "pyarrow>=15.0.0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=15.0.0; platform_machine=='x86_64'",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 numpy = [
     "numpy>=1.23,<3.0a0",

From 314ccc675ec19f0aaed473a02276939a0305d968 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 3 Sep 2025 17:11:04 -0400
Subject: [PATCH 249/366] Fix empty column returned by
 cudf::from_arrow_stream_column (#19812)

Fixes the `cudf::from_arrow_stream_column` API to return the correct type for an empty column.

Closes #19802

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19812
---
 cpp/src/interop/from_arrow_stream.cu         |  7 ---
 cpp/tests/interop/from_arrow_stream_test.cpp | 47 ++++++++++++++++++++
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/cpp/src/interop/from_arrow_stream.cu b/cpp/src/interop/from_arrow_stream.cu
index d8a17b4dc4d..f2cc8cf8c61 100644
--- a/cpp/src/interop/from_arrow_stream.cu
+++ b/cpp/src/interop/from_arrow_stream.cu
@@ -154,13 +154,6 @@ std::unique_ptr<column> from_arrow_stream_column(ArrowArrayStream* input,
   input->release(input);
 
   if (chunks.empty()) {
-    if (schema.n_children == 0) {
-      schema.release(&schema);
-      return std::make_unique<cudf::column>();
-    }
-
-    // If there are no chunks but the schema has children, we need to construct a suitable empty
-    // column.
     auto empty_column = make_empty_column_from_schema(&schema, stream, mr);
     schema.release(&schema);
     return empty_column;
diff --git a/cpp/tests/interop/from_arrow_stream_test.cpp b/cpp/tests/interop/from_arrow_stream_test.cpp
index d8e11e375bc..5d8d37b5e15 100644
--- a/cpp/tests/interop/from_arrow_stream_test.cpp
+++ b/cpp/tests/interop/from_arrow_stream_test.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
 #include <cudf_test/nanoarrow_utils.hpp>
 #include <cudf_test/table_utilities.hpp>
 
@@ -62,6 +63,32 @@ get_nanoarrow_stream(int num_copies)
   return std::make_tuple(std::move(expected), std::move(schema), stream);
 }
 
+std::tuple<std::unique_ptr<cudf::column>, nanoarrow::UniqueSchema, ArrowArrayStream>
+get_nanoarrow_chunked_stream(int num_copies, cudf::size_type length)
+{
+  std::vector<std::unique_ptr<cudf::column>> columns;
+  std::vector<nanoarrow::UniqueArray> arrays;
+  for (auto i = 0; i < 3; ++i) {
+    auto [tbl, sch, arr] = get_nanoarrow_host_tables(length);
+    // just use the first column
+    columns.push_back(std::move(tbl->release().front()));
+    arrays.push_back(std::move(arr->children[0]));
+  }
+  std::vector<cudf::column_view> views;
+  for (auto const& col : columns) {
+    views.push_back(col->view());
+  }
+  auto expected = cudf::concatenate(views);
+
+  nanoarrow::UniqueSchema schema;
+  ArrowSchemaInit(schema.get());
+  NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(schema.get(), NANOARROW_TYPE_INT64));
+
+  ArrowArrayStream stream;
+  makeStreamFromArrays(std::move(arrays), std::move(schema), &stream);
+  return std::make_tuple(std::move(expected), std::move(schema), stream);
+}
+
 TEST_F(FromArrowStreamTest, BasicTest)
 {
   constexpr auto num_copies = 3;
@@ -82,3 +109,23 @@ TEST_F(FromArrowStreamTest, EmptyTest)
   auto result = cudf::from_arrow_stream(&stream);
   cudf::have_same_types(expected->view(), result->view());
 }
+
+TEST_F(FromArrowStreamTest, ChunkedTest)
+{
+  constexpr auto num_copies       = 3;
+  constexpr auto length           = 3;
+  auto [expected, schema, stream] = get_nanoarrow_chunked_stream(num_copies, length);
+
+  auto result = cudf::from_arrow_stream_column(&stream);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), result->view());
+}
+
+TEST_F(FromArrowStreamTest, EmptyChunkedTest)
+{
+  constexpr auto num_copies       = 3;
+  constexpr auto length           = 0;
+  auto [expected, schema, stream] = get_nanoarrow_chunked_stream(num_copies, length);
+
+  auto result = cudf::from_arrow_stream_column(&stream);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected->view());
+}

From 266875f366766ffed66cb24cde4996587c6fb134 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 Sep 2025 15:49:22 -0700
Subject: [PATCH 250/366] Refactor column_empty to use only pylibcudf APIs
 (#19800)

Precursor of https://github.com/rapidsai/cudf/issues/18726 towards consistently constructing a cuDF column with a pylibcudf Column

Also added `tests/series/methods/test_map.py::test_map_empty[categorical]` to conftest-patch since this now (consistent with other types for this test) returns `None` with object type instead of `np.nan` with float type for "all missing-value data"

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19800
---
 python/cudf/cudf/core/column/column.py        | 82 ++++++++-----------
 python/cudf/cudf/core/udf/groupby_utils.py    |  3 +-
 .../cudf/pandas/scripts/conftest-patch.py     |  1 +
 python/cudf/cudf/utils/applyutils.py          |  2 +-
 python/cudf/cudf/utils/queryutils.py          |  3 +-
 5 files changed, 42 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index e43f15f1895..7fecde15bc1 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2540,7 +2540,6 @@ def _has_any_nat(arbitrary: pd.Series | np.ndarray) -> bool:
 def column_empty(
     row_count: int,
     dtype: DtypeObj = CUDF_STRING_DTYPE,
-    for_numba: bool = False,
 ) -> ColumnBase:
     """
     Allocate a new column with the given row_count and dtype.
@@ -2555,59 +2554,50 @@ def column_empty(
 
     dtype : Dtype
         Type of the column.
-
-    for_numba : bool, default False
-        If True, don't allocate a mask as it's not supported by numba.
     """
-    children: tuple[ColumnBase, ...] = ()
-
-    if isinstance(dtype, StructDtype):
-        data = None
-        children = tuple(
-            column_empty(row_count, field_dtype)
-            for field_dtype in dtype.fields.values()
-        )
-    elif isinstance(dtype, ListDtype):
-        data = None
-        children = (
-            as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),
-            column_empty(row_count, dtype=dtype.element_type),
-        )
-    elif isinstance(dtype, CategoricalDtype):
-        data = None
-        children = (
-            ColumnBase.from_pylibcudf(
-                plc.Column.from_scalar(
-                    plc.Scalar.from_py(
-                        None, dtype_to_pylibcudf_type(SIZE_TYPE_DTYPE)
-                    ),
-                    row_count,
+    if isinstance(dtype, (StructDtype, ListDtype)):
+        if isinstance(dtype, StructDtype):
+            children = tuple(
+                column_empty(row_count, field_dtype)
+                for field_dtype in dtype.fields.values()
+            )
+        elif isinstance(dtype, ListDtype):
+            children = (
+                as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),
+                column_empty(row_count, dtype=dtype.element_type),
+            )
+        mask = (
+            None
+            if row_count == 0
+            else plc.gpumemoryview(
+                plc.null_mask.create_null_mask(
+                    row_count, plc.null_mask.MaskState.ALL_NULL
                 )
-            ),
+            )
         )
+        return ColumnBase.from_pylibcudf(
+            plc.Column(
+                dtype_to_pylibcudf_type(dtype),
+                row_count,
+                None,
+                mask,
+                row_count,
+                0,
+                [child.to_pylibcudf(mode="read") for child in children],
+            )
+        )._with_type_metadata(dtype)
     else:
-        col = ColumnBase.from_pylibcudf(
+        if isinstance(dtype, CategoricalDtype):
+            # May get downcast in _with_type_metadata
+            plc_dtype = plc.DataType(plc.TypeId.INT64)
+        else:
+            plc_dtype = dtype_to_pylibcudf_type(dtype)
+        return ColumnBase.from_pylibcudf(
             plc.Column.from_scalar(
-                plc.Scalar.from_py(None, dtype_to_pylibcudf_type(dtype)),
+                plc.Scalar.from_py(None, plc_dtype),
                 row_count,
             )
         )._with_type_metadata(dtype)
-        if for_numba:
-            col = col.set_mask(None)
-        return col
-
-    if row_count > 0 and not for_numba:
-        mask = as_buffer(
-            plc.null_mask.create_null_mask(
-                row_count, plc.null_mask.MaskState.ALL_NULL
-            )
-        )
-    else:
-        mask = None
-
-    return build_column(
-        data, dtype, mask=mask, size=row_count, children=children
-    )
 
 
 def build_column(
diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py
index 677e9a38881..3327b4320f3 100644
--- a/python/cudf/cudf/core/udf/groupby_utils.py
+++ b/python/cudf/cudf/core/udf/groupby_utils.py
@@ -126,7 +126,8 @@ def jit_groupby_apply(offsets, grouped_values, function, *args):
     offsets = as_column(offsets)
     ngroups = len(offsets) - 1
 
-    output = column_empty(ngroups, dtype=return_type, for_numba=True)
+    output = column_empty(ngroups, dtype=return_type)
+    output = output.set_mask(None)
     launch_args = [
         offsets,
         output,
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 1d57d38d787..94c6d914029 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -12437,6 +12437,7 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_map.py::test_map_dict_ignore_na[dict]",
     "tests/series/methods/test_map.py::test_map_dict_subclass_with_missing",
     "tests/series/methods/test_map.py::test_map_dict_with_tuple_keys",
+    "tests/series/methods/test_map.py::test_map_empty[categorical]",
     "tests/series/methods/test_map.py::test_map_empty[bool-dtype]",
     "tests/series/methods/test_map.py::test_map_empty[empty]",
     "tests/series/methods/test_map.py::test_map_empty[float32]",
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index f3dc0df8362..712ef3fb654 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -158,7 +158,7 @@ def run(self, df, **launch_params):
         outputs = {}
         for k, dt in self.outcols.items():
             outputs[k] = column.column_empty(
-                len(df), np.dtype(dt), False
+                len(df), np.dtype(dt)
             ).data_array_view(mode="write")
         # Bind argument
         args = {}
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index c49e8c1ba2b..a9f45d87e74 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -247,7 +247,8 @@ def query_execute(df, expr, callenv):
 
     # allocate output buffer
     nrows = len(df)
-    out = column_empty(nrows, dtype=np.dtype(np.bool_), for_numba=True)
+    out = column_empty(nrows, dtype=np.dtype(np.bool_))
+    out = out.set_mask(None)
     # run kernel
     args = [out, *colarrays, *envargs]
     with _CUDFNumbaConfig():

From f1c32e08dd65666afa976049d9bb238d11ee9d03 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 Sep 2025 16:58:13 -0700
Subject: [PATCH 251/366] Move more test_dataframe.py tests to new cudf classic
 testing directory (#19770)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19770
---
 python/cudf/cudf/tests/conftest.py            |  104 +-
 .../tests/dataframe/indexing/test_getitem.py  |   26 +
 .../cudf/tests/dataframe/indexing/test_loc.py |   77 +
 .../tests/dataframe/indexing/test_setitem.py  |   40 +
 .../cudf/tests/dataframe/methods/test_agg.py  |  117 +
 .../tests/dataframe/methods/test_argsort.py   |   23 +
 .../cudf/tests/dataframe/methods/test_diff.py |   98 +
 .../dataframe/methods/test_duplicated.py      |   35 +
 .../tests/dataframe/methods/test_equals.py    |   29 +
 .../cudf/tests/dataframe/methods/test_eval.py |   99 +
 .../tests/dataframe/methods/test_explode.py   |   77 +
 .../dataframe/methods/test_ffill_bfill.py     |   45 +
 .../cudf/tests/dataframe/methods/test_info.py |  248 ++
 .../tests/dataframe/methods/test_insert.py    |   14 +
 .../methods/test_iterrows_itertuples.py       |   33 +
 .../cudf/tests/dataframe/methods/test_keys.py |   54 +
 .../dataframe/methods/test_memory_usage.py    |  111 +
 .../cudf/tests/dataframe/methods/test_mode.py |   54 +
 .../tests/dataframe/methods/test_nunique.py   |   32 +
 .../dataframe/methods/test_pct_change.py      |   46 +
 .../cudf/tests/dataframe/methods/test_pipe.py |   62 +
 .../dataframe/methods/test_reductions.py      |   69 +
 .../tests/dataframe/methods/test_reindex.py   |   50 +
 .../tests/dataframe/methods/test_rename.py    |   79 +
 .../tests/dataframe/methods/test_sample.py    |  241 ++
 .../tests/dataframe/methods/test_to_arrow.py  |   68 +
 .../tests/dataframe/methods/test_to_pandas.py |  193 +
 .../tests/dataframe/methods/test_update.py    |   91 +
 .../dataframe/methods/test_value_counts.py    |   59 +
 .../cudf/tests/dataframe/test_attributes.py   |  129 +
 .../cudf/cudf/tests/dataframe/test_binops.py  |  170 +
 .../cudf/tests/dataframe/test_constructors.py |  460 +++
 .../cudf/tests/dataframe/test_reductions.py   |   75 -
 .../multiindex/methods/test_memory_usage.py   |   33 +
 .../rangeindex/methods/test_nunique.py        |   15 +
 python/cudf/cudf/tests/reshape/test_concat.py |  644 +++
 .../cudf/tests/series/methods/test_keys.py    |   32 +
 .../cudf/cudf/tests/series/test_attributes.py |   17 +
 python/cudf/cudf/tests/test_dataframe.py      | 3524 +----------------
 39 files changed, 3677 insertions(+), 3696 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_agg.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_argsort.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_diff.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_duplicated.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_equals.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_eval.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_explode.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_ffill_bfill.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_info.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_insert.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_iterrows_itertuples.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_keys.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_mode.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_nunique.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_pct_change.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_pipe.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_sample.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_update.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_value_counts.py
 create mode 100644 python/cudf/cudf/tests/dataframe/test_binops.py
 delete mode 100644 python/cudf/cudf/tests/dataframe/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_memory_usage.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_keys.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 8f6155bfb2c..663e4dbd517 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -15,7 +15,6 @@
 import rmm  # noqa: F401
 
 import cudf
-from cudf.testing import assert_eq
 
 _CURRENT_DIRECTORY = str(pathlib.Path(__file__).resolve().parent)
 
@@ -25,103 +24,6 @@ def datadir():
     return pathlib.Path(__file__).parent / "data"
 
 
-@pytest.fixture(
-    params=itertools.product([0, 2, None], [0.3, None]),
-    ids=lambda arg: f"n={arg[0]}-frac={arg[1]}",
-)
-def sample_n_frac(request):
-    """
-    Specific to `test_sample*` tests.
-    """
-    n, frac = request.param
-    if n is not None and frac is not None:
-        pytest.skip("Cannot specify both n and frac.")
-    return n, frac
-
-
-def shape_checker(expected, got):
-    assert expected.shape == got.shape
-
-
-def exact_checker(expected, got):
-    assert_eq(expected, got)
-
-
-@pytest.fixture(
-    params=[
-        (None, None, shape_checker),
-        (42, 42, shape_checker),
-        (np.random.RandomState(42), np.random.RandomState(42), exact_checker),
-    ],
-    ids=["None", "IntSeed", "NumpyRandomState"],
-)
-def random_state_tuple_axis_1(request):
-    """
-    Specific to `test_sample*_axis_1` tests.
-    A pytest fixture of valid `random_state` parameter pairs for pandas
-    and cudf. Valid parameter combinations, and what to check for each pair
-    are listed below:
-
-    pandas:   None,   seed(int),  np.random.RandomState
-    cudf:     None,   seed(int),  np.random.RandomState
-    ------
-    check:    shape,  shape,      exact result
-
-    Each column above stands for one valid parameter combination and check.
-    """
-
-    return request.param
-
-
-@pytest.fixture(
-    params=[
-        (None, None, shape_checker),
-        (42, 42, shape_checker),
-        (np.random.RandomState(42), np.random.RandomState(42), exact_checker),
-        (np.random.RandomState(42), cp.random.RandomState(42), shape_checker),
-    ],
-    ids=["None", "IntSeed", "NumpyRandomState", "CupyRandomState"],
-)
-def random_state_tuple_axis_0(request):
-    """
-    Specific to `test_sample*_axis_0` tests.
-    A pytest fixture of valid `random_state` parameter pairs for pandas
-    and cudf. Valid parameter combinations, and what to check for each pair
-    are listed below:
-
-    pandas:   None,   seed(int),  np.random.RandomState,  np.random.RandomState
-    cudf:     None,   seed(int),  np.random.RandomState,  cp.random.RandomState
-    ------
-    check:    shape,  shape,      exact result,           shape
-
-    Each column above stands for one valid parameter combination and check.
-    """
-
-    return request.param
-
-
-@pytest.fixture(params=[None, "builtin_list", "ndarray"])
-def make_weights_axis_0(request):
-    """Specific to `test_sample*_axis_0` tests.
-    Only testing weights array that matches type with random state.
-    """
-
-    if request.param is None:
-        return lambda *_: (None, None)
-    elif request.param == "builtin-list":
-        return lambda size, _: ([1] * size, [1] * size)
-    else:
-
-        def wrapped(size, numpy_weights_for_cudf):
-            # Uniform distribution, non-normalized
-            if numpy_weights_for_cudf:
-                return np.ones(size), np.ones(size)
-            else:
-                return np.ones(size), cp.ones(size)
-
-        return wrapped
-
-
 # To set and remove the NO_EXTERNAL_ONLY_APIS environment variable we must use
 # the sessionstart and sessionfinish hooks rather than a simple autouse,
 # session-scope fixture because we need to set these variable before collection
@@ -504,6 +406,12 @@ def numeric_and_temporal_types_as_str(request):
     return request.param
 
 
+@pytest.fixture
+def numeric_and_temporal_types_as_str2(numeric_and_temporal_types_as_str):
+    """Used for testing cartesian product of numeric_and_temporal_types_as_str"""
+    return numeric_and_temporal_types_as_str
+
+
 @pytest.fixture(
     params=signed_integer_types
     + unsigned_integer_types
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
index 6453a4abca1..ba7b7f95b2e 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
@@ -8,3 +8,29 @@ def test_struct_of_struct_loc():
     df = cudf.DataFrame({"col": [{"a": {"b": 1}}]})
     expect = cudf.Series([{"a": {"b": 1}}], name="col")
     assert_eq(expect, df["col"])
+
+
+def test_dataframe_midx_cols_getitem():
+    df = cudf.DataFrame(
+        {
+            "a": ["a", "b", "c"],
+            "b": ["b", "", ""],
+            "c": [10, 11, 12],
+        }
+    )
+    df.columns = df.set_index(["a", "b"]).index
+    pdf = df.to_pandas()
+
+    expected = df["c"]
+    actual = pdf["c"]
+    assert_eq(expected, actual)
+    df = cudf.DataFrame(
+        [[1, 0], [0, 1]],
+        columns=[
+            ["foo", "foo"],
+            ["location", "location"],
+            ["x", "y"],
+        ],
+    )
+    df = df.assign(bools=cudf.Series([True, False], dtype="bool"))
+    assert_eq(df["bools"], df.to_pandas()["bools"])
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
index 7821347cbe8..9a0ace436a7 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 import re
 
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -8,6 +9,82 @@
 from cudf.testing import assert_eq
 
 
+def test_dataframe_midx_columns_loc():
+    idx_1 = ["Hi", "Lo"]
+    idx_2 = ["I", "II", "III"]
+    idx = cudf.MultiIndex.from_product([idx_1, idx_2])
+
+    data_rand = (
+        np.random.default_rng(seed=0)
+        .uniform(0, 1, 3 * len(idx))
+        .reshape(3, -1)
+    )
+    df = cudf.DataFrame(data_rand, index=["A", "B", "C"], columns=idx)
+    pdf = df.to_pandas()
+
+    assert_eq(df.shape, pdf.shape)
+
+    expected = pdf.loc[["A", "B"]]
+    actual = df.loc[["A", "B"]]
+
+    assert_eq(expected, actual)
+    assert_eq(df, pdf)
+
+
+@pytest.mark.parametrize("dtype1", ["int16", "float32"])
+@pytest.mark.parametrize("dtype2", ["int16", "float32"])
+def test_dataframe_loc_int_float(dtype1, dtype2):
+    df = cudf.DataFrame(
+        {"a": [10, 11, 12, 13, 14]},
+        index=cudf.Index([1, 2, 3, 4, 5], dtype=dtype1),
+    )
+    pdf = df.to_pandas()
+
+    gidx = cudf.Index([2, 3, 4], dtype=dtype2)
+    pidx = gidx.to_pandas()
+
+    actual = df.loc[gidx]
+    expected = pdf.loc[pidx]
+
+    assert_eq(actual, expected, check_index_type=True, check_dtype=True)
+
+
+@pytest.mark.xfail(reason="Not yet properly supported.")
+def test_multiindex_wildcard_selection_three_level_all():
+    midx = cudf.MultiIndex.from_tuples(
+        [(c1, c2, c3) for c1 in "abcd" for c2 in "abc" for c3 in "ab"]
+    )
+    df = cudf.DataFrame({f"{i}": [i] for i in range(24)})
+    df.columns = midx
+
+    expect = df.to_pandas().loc[:, (slice("a", "c"), slice("a", "b"), "b")]
+    got = df.loc[:, (slice(None), "b")]
+    assert_eq(expect, got)
+
+
+def test_multiindex_wildcard_selection_all():
+    midx = cudf.MultiIndex.from_tuples(
+        [(c1, c2) for c1 in "abc" for c2 in "ab"]
+    )
+    df = cudf.DataFrame({f"{i}": [i] for i in range(6)})
+    df.columns = midx
+    expect = df.to_pandas().loc[:, (slice(None), "b")]
+    got = df.loc[:, (slice(None), "b")]
+    assert_eq(expect, got)
+
+
+@pytest.mark.xfail(reason="Not yet properly supported.")
+def test_multiindex_wildcard_selection_partial():
+    midx = cudf.MultiIndex.from_tuples(
+        [(c1, c2) for c1 in "abc" for c2 in "ab"]
+    )
+    df = cudf.DataFrame({f"{i}": [i] for i in range(6)})
+    df.columns = midx
+    expect = df.to_pandas().loc[:, (slice("a", "b"), "b")]
+    got = df.loc[:, (slice("a", "b"), "b")]
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize(
     "value",
     [
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
index c566be17d4a..f7874f5cd90 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -120,6 +121,45 @@ def test_listcol_setitem_retain_dtype():
     assert df2["a"].dtype == df["a"].dtype
 
 
+def test_setitem_reset_label_dtype():
+    result = cudf.DataFrame({1: [2]})
+    expected = pd.DataFrame({1: [2]})
+    result["a"] = [2]
+    expected["a"] = [2]
+    assert_eq(result, expected)
+
+
+def test_dataframe_assign_scalar_to_empty_series():
+    expected = pd.DataFrame({"a": []})
+    actual = cudf.DataFrame({"a": []})
+    expected.a = 0
+    actual.a = 0
+    assert_eq(expected, actual)
+
+
+def test_dataframe_assign_cp_np_array():
+    m, n = 5, 3
+    cp_ndarray = cp.random.randn(m, n)
+    pdf = pd.DataFrame({f"f_{i}": range(m) for i in range(n)})
+    gdf = cudf.DataFrame({f"f_{i}": range(m) for i in range(n)})
+    pdf[[f"f_{i}" for i in range(n)]] = cp.asnumpy(cp_ndarray)
+    gdf[[f"f_{i}" for i in range(n)]] = cp_ndarray
+
+    assert_eq(pdf, gdf)
+
+
+def test_dataframe_setitem_cupy_array():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.standard_normal(size=(10, 2)))
+    gdf = cudf.from_pandas(pdf)
+
+    gpu_array = cp.array([True, False] * 5)
+    pdf[gpu_array.get()] = 1.5
+    gdf[gpu_array] = 1.5
+
+    assert_eq(pdf, gdf)
+
+
 def test_setitem_datetime():
     df = cudf.DataFrame({"date": pd.date_range("20010101", "20010105").values})
     assert df.date.dtype.kind == "M"
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_agg.py b/python/cudf/cudf/tests/dataframe/methods/test_agg.py
new file mode 100644
index 00000000000..7265376b835
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_agg.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import re
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2.5, 3], "b": [3, 4.5, 5], "c": [2.0, 3.0, 4.0]},
+        {"a": [1, 2.2, 3], "b": [2.0, 3.0, 4.0], "c": [5.0, 6.0, 4.0]},
+    ],
+)
+@pytest.mark.parametrize(
+    "aggs",
+    [
+        ["min", "sum", "max"],
+        ("min", "sum", "max"),
+        {"min", "sum", "max"},
+        "sum",
+        {"a": "sum", "b": "min", "c": "max"},
+        {"a": ["sum"], "b": ["min"], "c": ["max"]},
+        {"a": ("sum"), "b": ("min"), "c": ("max")},
+        {"a": {"sum"}, "b": {"min"}, "c": {"max"}},
+        {"a": ["sum", "min"], "b": ["sum", "max"], "c": ["min", "max"]},
+        {"a": ("sum", "min"), "b": ("sum", "max"), "c": ("min", "max")},
+        {"a": {"sum", "min"}, "b": {"sum", "max"}, "c": {"min", "max"}},
+    ],
+)
+def test_agg_for_dataframes(data, aggs):
+    pdf = pd.DataFrame(data)
+    gdf = cudf.DataFrame(data)
+
+    expect = pdf.agg(aggs).sort_index()
+    got = gdf.agg(aggs).sort_index()
+
+    assert_eq(expect, got, check_dtype=True)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [3.0, 4.0, 5.0], "c": [True, True, False]},
+        {"a": [1, 2, 3], "b": [True, True, False], "c": [False, True, False]},
+    ],
+)
+@pytest.mark.parametrize(
+    "aggs",
+    [
+        ["min", "sum", "max"],
+        "sum",
+        {"a": "sum", "b": "min", "c": "max"},
+    ],
+)
+def test_agg_for_dataframes_error(data, aggs):
+    gdf = cudf.DataFrame(data)
+
+    with pytest.raises(TypeError):
+        gdf.agg(aggs)
+
+
+def test_agg_for_unsupported_function():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+
+    with pytest.raises(NotImplementedError):
+        gdf.agg({"a": np.sum, "b": np.min, "c": np.max})
+
+
+def test_agg_for_dataframe_with_invalid_function():
+    aggs = "asdf"
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+
+    with pytest.raises(
+        AttributeError,
+        match=f"{aggs} is not a valid function for 'DataFrame' object",
+    ):
+        gdf.agg(aggs)
+
+
+def test_agg_for_series_with_invalid_function():
+    aggs = {"a": "asdf"}
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+
+    with pytest.raises(
+        AttributeError,
+        match=f"{aggs['a']} is not a valid function for 'Series' object",
+    ):
+        gdf.agg(aggs)
+
+
+@pytest.mark.parametrize(
+    "aggs",
+    [
+        "sum",
+        ["min", "sum", "max"],
+        {"a": {"sum", "min"}, "b": {"sum", "max"}, "c": {"min", "max"}},
+    ],
+)
+def test_agg_for_dataframe_with_string_columns(aggs):
+    gdf = cudf.DataFrame(
+        {"a": ["m", "n", "o"], "b": ["t", "u", "v"], "c": ["x", "y", "z"]},
+        index=["a", "b", "c"],
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match=re.escape(
+            "DataFrame.agg() is not supported for "
+            "frames containing string columns"
+        ),
+    ):
+        gdf.agg(aggs)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_argsort.py b/python/cudf/cudf/tests/dataframe/methods/test_argsort.py
new file mode 100644
index 00000000000..96e1bcb8228
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_argsort.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cupy as cp
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "ascending,expected_data",
+    [
+        (True, [1, 2, 0]),
+        (False, [0, 2, 1]),
+    ],
+)
+def test_dataframe_argsort(ascending, expected_data):
+    actual = cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}).argsort(
+        ascending=ascending
+    )
+    expected = cp.array(expected_data, dtype="int32")
+
+    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_diff.py b/python/cudf/cudf/tests/dataframe/methods/test_diff.py
new file mode 100644
index 00000000000..c5f3b504313
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_diff.py
@@ -0,0 +1,98 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 0, 4, -10, 6],
+        np.array([1.123, 2.343, 5.890, 0.0]),
+        [True, False, True, False, False],
+        {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
+    ],
+)
+@pytest.mark.parametrize("periods", (-5, -1, 0, 1, 5))
+def test_diff_numeric_dtypes(data, periods):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    actual = gdf.diff(periods=periods, axis=0)
+    expected = pdf.diff(periods=periods, axis=0)
+
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("precision", "scale"),
+    [(5, 2), (8, 5)],
+)
+@pytest.mark.parametrize(
+    "dtype",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype],
+)
+def test_diff_decimal_dtypes(precision, scale, dtype):
+    gdf = cudf.DataFrame(
+        np.random.default_rng(seed=42).uniform(10.5, 75.5, (10, 6)),
+        dtype=dtype(precision=precision, scale=scale),
+    )
+    pdf = gdf.to_pandas()
+
+    actual = gdf.diff()
+    expected = pdf.diff()
+
+    assert_eq(
+        expected,
+        actual,
+        check_dtype=False,
+    )
+
+
+def test_diff_invalid_axis():
+    gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0]))
+    with pytest.raises(NotImplementedError, match="Only axis=0 is supported."):
+        gdf.diff(periods=1, axis=1)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "int_col": [1, 2, 3, 4, 5],
+            "float_col": [1.0, 2.0, 3.0, 4.0, 5.0],
+            "string_col": ["a", "b", "c", "d", "e"],
+        },
+        ["a", "b", "c", "d", "e"],
+    ],
+)
+def test_diff_unsupported_dtypes(data):
+    gdf = cudf.DataFrame(data)
+    with pytest.raises(
+        TypeError,
+        match=r"unsupported operand type\(s\)",
+    ):
+        gdf.diff()
+
+
+def test_diff_many_dtypes():
+    pdf = pd.DataFrame(
+        {
+            "dates": pd.date_range("2020-01-01", "2020-01-06", freq="D"),
+            "bools": [True, True, True, False, True, True],
+            "floats": [1.0, 2.0, 3.5, np.nan, 5.0, -1.7],
+            "ints": [1, 2, 3, 3, 4, 5],
+            "nans_nulls": [np.nan, None, None, np.nan, np.nan, None],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf.diff(), gdf.diff())
+    assert_eq(pdf.diff(periods=2), gdf.diff(periods=2))
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_duplicated.py b/python/cudf/cudf/tests/dataframe/methods/test_duplicated.py
new file mode 100644
index 00000000000..ce23afd2ace
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_duplicated.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "brand": ["Yum Yum", "Yum Yum", "Indomie", "Indomie", "Indomie"],
+            "style": ["cup", "cup", "cup", "pack", "pack"],
+            "rating": [4, 4, 3.5, 15, 5],
+        },
+        {
+            "brand": ["Indomie", "Yum Yum", "Indomie", "Indomie", "Indomie"],
+            "style": ["cup", "cup", "cup", "cup", "pack"],
+            "rating": [4, 4, 3.5, 4, 5],
+        },
+    ],
+)
+@pytest.mark.parametrize(
+    "subset", [None, ["brand"], ["rating"], ["style", "rating"]]
+)
+@pytest.mark.parametrize("keep", ["first", "last", False])
+def test_dataframe_duplicated(data, subset, keep):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = pdf.duplicated(subset=subset, keep=keep)
+    actual = gdf.duplicated(subset=subset, keep=keep)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_equals.py b/python/cudf/cudf/tests/dataframe/methods/test_equals.py
new file mode 100644
index 00000000000..a41438b3fc3
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_equals.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "lhs, rhs", [("a", "a"), ("a", "b"), (1, 1.0), (None, None), (None, "a")]
+)
+def test_equals_names(lhs, rhs):
+    lhs = cudf.DataFrame({lhs: [1, 2]})
+    rhs = cudf.DataFrame({rhs: [1, 2]})
+
+    got = lhs.equals(rhs)
+    expect = lhs.to_pandas().equals(rhs.to_pandas())
+
+    assert_eq(expect, got)
+
+
+def test_equals_dtypes():
+    lhs = cudf.DataFrame({"a": [1, 2.0]})
+    rhs = cudf.DataFrame({"a": [1, 2]})
+
+    got = lhs.equals(rhs)
+    expect = lhs.to_pandas().equals(rhs.to_pandas())
+
+    assert got == expect
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_eval.py b/python/cudf/cudf/tests/dataframe/methods/test_eval.py
new file mode 100644
index 00000000000..239304129a1
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_eval.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+# Note that for now expressions do not automatically handle casting, so inputs
+# need to be casted appropriately
+@pytest.mark.filterwarnings("ignore::RuntimeWarning")
+@pytest.mark.parametrize(
+    "expr, dtype",
+    [
+        ("a", int),
+        ("+a", int),
+        ("a + b", int),
+        ("a == b", int),
+        ("a / b", float),
+        ("a * b", int),
+        ("a > b", int),
+        ("a >= b", int),
+        ("a > b > c", int),
+        ("a > b < c", int),
+        ("a & b", int),
+        ("a & b | c", int),
+        ("sin(a)", float),
+        ("exp(sin(abs(a)))", float),
+        ("sqrt(floor(a))", float),
+        ("ceil(arctanh(a))", float),
+        ("(a + b) - (c * d)", int),
+        ("~a", int),
+        ("(a > b) and (c > d)", int),
+        ("(a > b) or (c > d)", int),
+        ("not (a > b)", int),
+        ("a + 1", int),
+        ("a + 1.0", float),
+        ("-a + 1", int),
+        ("+a + 1", int),
+        ("e = a + 1", int),
+        (
+            """
+            e = log(cos(a)) + 1.0
+            f = abs(c) - exp(d)
+            """,
+            float,
+        ),
+        ("a_b_are_equal = (a == b)", int),
+        ("a > b", str),
+        ("a < '1'", str),
+        ('a == "1"', str),
+    ],
+)
+@pytest.mark.parametrize("nrows", [0, 10])
+def test_dataframe_eval(nrows, expr, dtype):
+    arr = np.ones(nrows)
+    df_eval = cudf.DataFrame({"a": arr, "b": arr, "c": arr, "d": arr})
+    df_eval = df_eval.astype(dtype)
+    expect = df_eval.to_pandas().eval(expr)
+    got = df_eval.eval(expr)
+    # In the specific case where the evaluated expression is a unary function
+    # of a single column with no nesting, pandas will retain the name. This
+    # level of compatibility is out of scope for now.
+    assert_eq(expect, got, check_names=False)
+
+    # Test inplace
+    if re.search("[^=><]=[^=]", expr) is not None:
+        pdf_eval = df_eval.to_pandas()
+        pdf_eval.eval(expr, inplace=True)
+        df_eval.eval(expr, inplace=True)
+        assert_eq(pdf_eval, df_eval)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        """
+        e = a + b
+        a == b
+        """,
+        "a_b_are_equal = (a == b) = c",
+    ],
+)
+def test_dataframe_eval_errors(expr):
+    df = cudf.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]})
+    with pytest.raises(ValueError):
+        df.eval(expr)
+
+
+def test_dataframe_eval_misc():
+    df = cudf.DataFrame({"a": [1, 2, 3, None, 5]})
+    got = df.eval("isnull(a)")
+    assert_eq(got, cudf.Series.isnull(df["a"]), check_names=False)
+
+    df.eval("c = isnull(1)", inplace=True)
+    assert_eq(df["c"], cudf.Series([False] * len(df), name="c"))
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_explode.py b/python/cudf/cudf/tests/dataframe/methods/test_explode.py
new file mode 100644
index 00000000000..c01b6f3fc39
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_explode.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_explode_preserve_categorical():
+    gdf = cudf.DataFrame(
+        {
+            "A": [[1, 2], None, [2, 3]],
+            "B": cudf.Series([0, 1, 2], dtype="category"),
+        }
+    )
+    result = gdf.explode("A")
+    expected = cudf.DataFrame(
+        {
+            "A": [1, 2, None, 2, 3],
+            "B": cudf.Series([0, 0, 1, 2, 2], dtype="category"),
+        }
+    )
+    expected.index = cudf.Index([0, 0, 1, 2, 2])
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            [[1, 2, 3], 11, "a"],
+            [None, 22, "e"],
+            [[4], 33, "i"],
+            [[], 44, "o"],
+            [[5, 6], 55, "u"],
+        ],  # nested
+        [
+            [1, 11, "a"],
+            [2, 22, "e"],
+            [3, 33, "i"],
+            [4, 44, "o"],
+            [5, 55, "u"],
+        ],  # non-nested
+    ],
+)
+@pytest.mark.parametrize(
+    ("labels", "label_to_explode"),
+    [
+        (None, 0),
+        (pd.Index(["a", "b", "c"]), "a"),
+        (
+            pd.MultiIndex.from_tuples(
+                [(0, "a"), (0, "b"), (1, "a")], names=["l0", "l1"]
+            ),
+            (0, "a"),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "p_index",
+    [
+        None,
+        ["ia", "ib", "ic", "id", "ie"],
+        pd.MultiIndex.from_tuples(
+            [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b")]
+        ),
+    ],
+)
+def test_explode(data, labels, ignore_index, p_index, label_to_explode):
+    pdf = pd.DataFrame(data, index=p_index, columns=labels)
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.explode(label_to_explode, ignore_index)
+    got = gdf.explode(label_to_explode, ignore_index)
+
+    assert_eq(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_ffill_bfill.py b/python/cudf/cudf/tests/dataframe/methods/test_ffill_bfill.py
new file mode 100644
index 00000000000..49516c9c1cb
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_ffill_bfill.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame({"A": [1, 2, 3, np.nan, None, 6]}),
+        pd.Series([1, 2, 3, None, np.nan, 5, 6, np.nan]),
+    ],
+)
+@pytest.mark.parametrize("alias", ["bfill", "backfill"])
+def test_dataframe_bfill(df, alias):
+    gdf = cudf.from_pandas(df)
+
+    with expect_warning_if(alias == "backfill"):
+        actual = getattr(df, alias)()
+    with expect_warning_if(alias == "backfill"):
+        expected = getattr(gdf, alias)()
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame({"A": [1, 2, 3, np.nan, None, 6]}),
+        pd.Series([1, 2, 3, None, np.nan, 5, 6, np.nan]),
+    ],
+)
+@pytest.mark.parametrize("alias", ["ffill", "pad"])
+def test_dataframe_ffill(df, alias):
+    gdf = cudf.from_pandas(df)
+
+    with expect_warning_if(alias == "pad"):
+        actual = getattr(df, alias)()
+    with expect_warning_if(alias == "pad"):
+        expected = getattr(gdf, alias)()
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_info.py b/python/cudf/cudf/tests/dataframe/methods/test_info.py
new file mode 100644
index 00000000000..d5d06268e85
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_info.py
@@ -0,0 +1,248 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import io
+import textwrap
+
+import numpy as np
+import pandas as pd
+
+import cudf
+
+
+def test_dataframe_info_basic():
+    buffer = io.StringIO()
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    Index: 10 entries, a to 1111
+    Data columns (total 10 columns):
+     #   Column  Non-Null Count  Dtype
+    ---  ------  --------------  -----
+     0   0       10 non-null     float64
+     1   1       10 non-null     float64
+     2   2       10 non-null     float64
+     3   3       10 non-null     float64
+     4   4       10 non-null     float64
+     5   5       10 non-null     float64
+     6   6       10 non-null     float64
+     7   7       10 non-null     float64
+     8   8       10 non-null     float64
+     9   9       10 non-null     float64
+    dtypes: float64(10)
+    memory usage: 859.0+ bytes
+    """
+    )
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        rng.standard_normal(size=(10, 10)),
+        index=["a", "2", "3", "4", "5", "6", "7", "8", "100", "1111"],
+    )
+    cudf.from_pandas(df).info(buf=buffer, verbose=True)
+    s = buffer.getvalue()
+    assert str_cmp == s
+
+
+def test_dataframe_info_verbose_mem_usage():
+    buffer = io.StringIO()
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["safdas", "assa", "asdasd"]})
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 3 entries, 0 to 2
+    Data columns (total 2 columns):
+     #   Column  Non-Null Count  Dtype
+    ---  ------  --------------  -----
+     0   a       3 non-null      int64
+     1   b       3 non-null      object
+    dtypes: int64(1), object(1)
+    memory usage: 56.0+ bytes
+    """
+    )
+    cudf.from_pandas(df).info(buf=buffer, verbose=True)
+    s = buffer.getvalue()
+    assert str_cmp == s
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 3 entries, 0 to 2
+    Columns: 2 entries, a to b
+    dtypes: int64(1), object(1)
+    memory usage: 56.0+ bytes
+    """
+    )
+    cudf.from_pandas(df).info(buf=buffer, verbose=False)
+    s = buffer.getvalue()
+    assert str_cmp == s
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": ["safdas", "assa", "asdasd"]},
+        index=["sdfdsf", "sdfsdfds", "dsfdf"],
+    )
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    Index: 3 entries, sdfdsf to dsfdf
+    Data columns (total 2 columns):
+     #   Column  Non-Null Count  Dtype
+    ---  ------  --------------  -----
+     0   a       3 non-null      int64
+     1   b       3 non-null      object
+    dtypes: int64(1), object(1)
+    memory usage: 91.0 bytes
+    """
+    )
+    cudf.from_pandas(df).info(buf=buffer, verbose=True, memory_usage="deep")
+    s = buffer.getvalue()
+    assert str_cmp == s
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    int_values = [1, 2, 3, 4, 5]
+    text_values = ["alpha", "beta", "gamma", "delta", "epsilon"]
+    float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
+
+    df = cudf.DataFrame(
+        {
+            "int_col": int_values,
+            "text_col": text_values,
+            "float_col": float_values,
+        }
+    )
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 5 entries, 0 to 4
+    Data columns (total 3 columns):
+     #   Column     Non-Null Count  Dtype
+    ---  ------     --------------  -----
+     0   int_col    5 non-null      int64
+     1   text_col   5 non-null      object
+     2   float_col  5 non-null      float64
+    dtypes: float64(1), int64(1), object(1)
+    memory usage: 130.0 bytes
+    """
+    )
+    df.info(buf=buffer, verbose=True, memory_usage="deep")
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+
+def test_dataframe_info_null_counts():
+    int_values = [1, 2, 3, 4, 5]
+    text_values = ["alpha", "beta", "gamma", "delta", "epsilon"]
+    float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
+
+    df = cudf.DataFrame(
+        {
+            "int_col": int_values,
+            "text_col": text_values,
+            "float_col": float_values,
+        }
+    )
+    buffer = io.StringIO()
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 5 entries, 0 to 4
+    Data columns (total 3 columns):
+     #   Column     Dtype
+    ---  ------     -----
+     0   int_col    int64
+     1   text_col   object
+     2   float_col  float64
+    dtypes: float64(1), int64(1), object(1)
+    memory usage: 130.0+ bytes
+    """
+    )
+    df.info(buf=buffer, verbose=True, null_counts=False)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    df.info(buf=buffer, verbose=True, max_cols=0)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    df = cudf.DataFrame()
+
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 0 entries
+    Empty DataFrame"""
+    )
+    df.info(buf=buffer, verbose=True)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, None, 10, 11, 12, None],
+            "b": ["a", "b", "c", "sd", "sdf", "sd", None, None],
+        }
+    )
+
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 8 entries, 0 to 7
+    Data columns (total 2 columns):
+     #   Column  Dtype
+    ---  ------  -----
+     0   a       int64
+     1   b       object
+    dtypes: int64(1), object(1)
+    memory usage: 238.0+ bytes
+    """
+    )
+    with pd.option_context("display.max_info_rows", 2):
+        df.info(buf=buffer, max_cols=2, null_counts=None)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    str_cmp = textwrap.dedent(
+        """\
+    <class 'cudf.core.dataframe.DataFrame'>
+    RangeIndex: 8 entries, 0 to 7
+    Data columns (total 2 columns):
+     #   Column  Non-Null Count  Dtype
+    ---  ------  --------------  -----
+     0   a       6 non-null      int64
+     1   b       6 non-null      object
+    dtypes: int64(1), object(1)
+    memory usage: 238.0+ bytes
+    """
+    )
+
+    df.info(buf=buffer, max_cols=2, null_counts=None)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
+
+    buffer.truncate(0)
+    buffer.seek(0)
+
+    df.info(buf=buffer, null_counts=True)
+    actual_string = buffer.getvalue()
+    assert str_cmp == actual_string
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_insert.py b/python/cudf/cudf/tests/dataframe/methods/test_insert.py
new file mode 100644
index 00000000000..f3dbe3a7ff0
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_insert.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_insert_reset_label_dtype():
+    result = cudf.DataFrame({1: [2]})
+    expected = pd.DataFrame({1: [2]})
+    result.insert(1, "a", [2])
+    expected.insert(1, "a", [2])
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_iterrows_itertuples.py b/python/cudf/cudf/tests/dataframe/methods/test_iterrows_itertuples.py
new file mode 100644
index 00000000000..d67dea7b79c
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_iterrows_itertuples.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import pytest
+
+import cudf
+
+
+def test_dataframe_iterrows_itertuples():
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "cuDF does not support iteration of DataFrame "
+            "via itertuples. Consider using "
+            "`.to_pandas().itertuples()` "
+            "if you wish to iterate over namedtuples."
+        ),
+    ):
+        df.itertuples()
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "cuDF does not support iteration of DataFrame "
+            "via iterrows. Consider using "
+            "`.to_pandas().iterrows()` "
+            "if you wish to iterate over each row."
+        ),
+    ):
+        df.iterrows()
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_keys.py b/python/cudf/cudf/tests/dataframe/methods/test_keys.py
new file mode 100644
index 00000000000..4e9fdd9f9c9
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_keys.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame({"a": [1, 2, 3, 4, 5, 10, 11, 12, 33, 55, 19]}),
+        pd.DataFrame(
+            {
+                "one": [1, 2, 3, 4, 5, 10],
+                "two": ["abc", "def", "ghi", "xyz", "pqr", "abc"],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "one": [1, 2, 3, 4, 5, 10],
+                "two": ["abc", "def", "ghi", "xyz", "pqr", "abc"],
+            },
+            index=[10, 20, 30, 40, 50, 60],
+        ),
+        pd.DataFrame(
+            {
+                "one": [1, 2, 3, 4, 5, 10],
+                "two": ["abc", "def", "ghi", "xyz", "pqr", "abc"],
+            },
+            index=["a", "b", "c", "d", "e", "f"],
+        ),
+        pd.DataFrame(index=["a", "b", "c", "d", "e", "f"]),
+        pd.DataFrame(columns=["a", "b", "c", "d", "e", "f"]),
+        pd.DataFrame(index=[10, 11, 12]),
+        pd.DataFrame(columns=[10, 11, 12]),
+        pd.DataFrame(),
+        pd.DataFrame({"one": [], "two": []}),
+        pd.DataFrame({2: [], 1: []}),
+        pd.DataFrame(
+            {
+                0: [1, 2, 3, 4, 5, 10],
+                1: ["abc", "def", "ghi", "xyz", "pqr", "abc"],
+                100: ["a", "b", "b", "x", "z", "a"],
+            },
+            index=[10, 20, 30, 40, 50, 60],
+        ),
+    ],
+)
+def test_dataframe_keys(df):
+    gdf = cudf.from_pandas(df)
+
+    assert_eq(df.keys(), gdf.keys())
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py b/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py
index 42185ff6a9c..1c3a5bc0585 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_memory_usage.py
@@ -1,9 +1,120 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
+import pandas as pd
+import pytest
 
 import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if
 
 
 def test_list_struct_list_memory_usage():
     df = cudf.DataFrame({"a": [[{"b": [1]}]]})
     assert df.memory_usage().sum() == 16
+
+
+@pytest.mark.parametrize("index", [False, True])
+def test_memory_usage_index_preserve_types(index):
+    data = [[1, 2, 3]]
+    columns = pd.Index(np.array([1, 2, 3], dtype=np.int8), name="a")
+    result = (
+        cudf.DataFrame(data, columns=columns).memory_usage(index=index).index
+    )
+    expected = (
+        pd.DataFrame(data, columns=columns).memory_usage(index=index).index
+    )
+    if index:
+        # pandas returns an Index[object] with int and string elements
+        expected = expected.astype(str)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("set_index", [None, "A", "C", "D"])
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("deep", [True, False])
+def test_memory_usage(deep, index, set_index):
+    # Testing numerical/datetime by comparing with pandas
+    # (string and categorical columns will be different)
+    rows = 100
+    df = pd.DataFrame(
+        {
+            "A": np.arange(rows, dtype="int64"),
+            "B": np.arange(rows, dtype="int32"),
+            "C": np.arange(rows, dtype="float64"),
+        }
+    )
+    df["D"] = pd.to_datetime(df.A)
+    if set_index:
+        df = df.set_index(set_index)
+
+    gdf = cudf.from_pandas(df)
+
+    if index and set_index is None:
+        # Special Case: Assume RangeIndex size == 0
+        with expect_warning_if(deep, UserWarning):
+            assert gdf.index.memory_usage(deep=deep) == 0
+
+    else:
+        # Check for Series only
+        assert df["B"].memory_usage(index=index, deep=deep) == gdf[
+            "B"
+        ].memory_usage(index=index, deep=deep)
+
+        # Check for entire DataFrame
+        assert_eq(
+            df.memory_usage(index=index, deep=deep).sort_index(),
+            gdf.memory_usage(index=index, deep=deep).sort_index(),
+        )
+
+
+@pytest.mark.xfail
+def test_memory_usage_string():
+    rows = 100
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        {
+            "A": np.arange(rows, dtype="int32"),
+            "B": rng.choice(["apple", "banana", "orange"], rows),
+        }
+    )
+    gdf = cudf.from_pandas(df)
+
+    # Check deep=False (should match pandas)
+    assert gdf.B.memory_usage(deep=False, index=False) == df.B.memory_usage(
+        deep=False, index=False
+    )
+
+    # Check string column
+    assert gdf.B.memory_usage(deep=True, index=False) == df.B.memory_usage(
+        deep=True, index=False
+    )
+
+    # Check string index
+    assert gdf.set_index("B").index.memory_usage(
+        deep=True
+    ) == df.B.memory_usage(deep=True, index=False)
+
+
+def test_memory_usage_cat():
+    rows = 100
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        {
+            "A": np.arange(rows, dtype="int32"),
+            "B": rng.choice(["apple", "banana", "orange"], rows),
+        }
+    )
+    df["B"] = df.B.astype("category")
+    gdf = cudf.from_pandas(df)
+
+    expected = (
+        gdf.B._column.categories.memory_usage
+        + gdf.B._column.codes.memory_usage
+    )
+
+    # Check cat column
+    assert gdf.B.memory_usage(deep=True, index=False) == expected
+
+    # Check cat index
+    assert gdf.set_index("B").index.memory_usage(deep=True) == expected
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_mode.py b/python/cudf/cudf/tests/dataframe/methods/test_mode.py
new file mode 100644
index 00000000000..aad65d67612
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_mode.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        lambda: cudf.DataFrame({"a": [1, 2, 3]}),
+        lambda: cudf.DataFrame(
+            {"a": [1, 2, 3], "b": ["a", "z", "c"]}, index=["a", "z", "x"]
+        ),
+        lambda: cudf.DataFrame(
+            {
+                "a": [1, 2, 3, None, 2, 1, None],
+                "b": ["a", "z", "c", "a", "v", "z", "z"],
+            }
+        ),
+        lambda: cudf.DataFrame({"a": [], "b": []}),
+        lambda: cudf.DataFrame({"a": [None, None], "b": [None, None]}),
+        lambda: cudf.DataFrame(
+            {
+                "a": ["hello", "world", "rapids", "ai", "nvidia"],
+                "b": cudf.Series(
+                    [1, 21, 21, 11, 11],
+                    dtype="timedelta64[s]",
+                    index=["a", "b", "c", "d", " e"],
+                ),
+            },
+            index=["a", "b", "c", "d", " e"],
+        ),
+        lambda: cudf.DataFrame(
+            {
+                "a": ["hello", None, "world", "rapids", None, "ai", "nvidia"],
+                "b": cudf.Series(
+                    [1, 21, None, 11, None, 11, None], dtype="datetime64[s]"
+                ),
+            }
+        ),
+    ],
+)
+def test_dataframe_mode(df, numeric_only, dropna):
+    df = df()
+    pdf = df.to_pandas()
+
+    expected = pdf.mode(numeric_only=numeric_only, dropna=dropna)
+    actual = df.mode(numeric_only=numeric_only, dropna=dropna)
+    if len(actual.columns) == 0:
+        # pandas < 3.0 returns an Index[object] instead of RangeIndex
+        actual.columns = expected.columns
+    assert_eq(expected, actual, check_dtype=False)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_nunique.py b/python/cudf/cudf/tests/dataframe/methods/test_nunique.py
new file mode 100644
index 00000000000..b54e8cf1b4f
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_nunique.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        pd.RangeIndex(2, name="foo"),
+        pd.MultiIndex.from_arrays([[1, 2], [2, 3]], names=["foo", 1]),
+        pd.Index([3, 5], dtype=np.int8, name="foo"),
+    ],
+)
+def test_nunique_preserve_column_in_index(columns):
+    df = cudf.DataFrame([[1, 2]], columns=columns)
+    result = df.nunique().index.to_pandas()
+    assert_eq(result, columns, exact=True)
+
+
+def test_dataframe_nunique():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.nunique()
+    expected = pdf.nunique()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_pct_change.py b/python/cudf/cudf/tests/dataframe/methods/test_pct_change.py
new file mode 100644
index 00000000000..57a68f14d35
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_pct_change.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.api.extensions import no_default
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    expect_warning_if,
+)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 4, 6, 1],
+        np.array([1.123, 2.343, 5.890, 0.0]),
+        {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
+    ],
+)
+@pytest.mark.parametrize("periods", [-5, 0, 2])
+@pytest.mark.parametrize(
+    "fill_method", ["ffill", "bfill", "pad", "backfill", no_default]
+)
+def test_dataframe_pct_change(data, periods, fill_method):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    with expect_warning_if(fill_method is not no_default):
+        actual = gdf.pct_change(periods=periods, fill_method=fill_method)
+    with expect_warning_if(
+        fill_method is not no_default or pdf.isna().any().any()
+    ):
+        expected = pdf.pct_change(periods=periods, fill_method=fill_method)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_pipe.py b/python/cudf/cudf/tests/dataframe/methods/test_pipe.py
new file mode 100644
index 00000000000..f20724da0c5
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_pipe.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_dataframe_pipe():
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+
+    def add_int_col(df, column):
+        df[column] = df._constructor_sliced([10, 20, 30, 40])
+        return df
+
+    def add_str_col(df, column):
+        df[column] = df._constructor_sliced(["a", "b", "xyz", "ai"])
+        return df
+
+    expected = (
+        pdf.pipe(add_int_col, "one")
+        .pipe(add_int_col, column="two")
+        .pipe(add_str_col, "three")
+    )
+    actual = (
+        gdf.pipe(add_int_col, "one")
+        .pipe(add_int_col, column="two")
+        .pipe(add_str_col, "three")
+    )
+
+    assert_eq(expected, actual)
+
+    expected = (
+        pdf.pipe((add_str_col, "df"), column="one")
+        .pipe(add_str_col, column="two")
+        .pipe(add_int_col, "three")
+    )
+    actual = (
+        gdf.pipe((add_str_col, "df"), column="one")
+        .pipe(add_str_col, column="two")
+        .pipe(add_int_col, "three")
+    )
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_pipe_error():
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+
+    def custom_func(df, column):
+        df[column] = df._constructor_sliced([10, 20, 30, 40])
+        return df
+
+    assert_exceptions_equal(
+        lfunc=pdf.pipe,
+        rfunc=gdf.pipe,
+        lfunc_args_and_kwargs=([(custom_func, "columns")], {"columns": "d"}),
+        rfunc_args_and_kwargs=([(custom_func, "columns")], {"columns": "d"}),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_reductions.py b/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
index 9b4134e5b3b..0061115bc08 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
@@ -181,3 +181,72 @@ def test_dataframe_axis_0_preserve_column_type_in_index(columns):
     result = cudf_df.sum(axis=0)
     expected = pd_df.sum(axis=0)
     assert_eq(result, expected, check_index_type=True)
+
+
+def test_dataframe_reduction_error():
+    gdf = cudf.DataFrame(
+        {
+            "a": cudf.Series([1, 2, 3], dtype="float"),
+            "d": cudf.Series([10, 20, 30], dtype="timedelta64[ns]"),
+        }
+    )
+
+    with pytest.raises(TypeError):
+        gdf.sum()
+
+
+def test_mean_timeseries(numeric_only):
+    gdf = cudf.datasets.timeseries()
+    if not numeric_only:
+        gdf = gdf.select_dtypes(include="number")
+    pdf = gdf.to_pandas()
+
+    expected = pdf.mean(numeric_only=numeric_only)
+    actual = gdf.mean(numeric_only=numeric_only)
+
+    assert_eq(expected, actual)
+
+
+def test_std_different_dtypes(numeric_only):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "b", "c", "d", "e"],
+            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    )
+    if not numeric_only:
+        gdf = gdf.select_dtypes(include="number")
+    pdf = gdf.to_pandas()
+
+    expected = pdf.std(numeric_only=numeric_only)
+    actual = gdf.std(numeric_only=numeric_only)
+
+    assert_eq(expected, actual)
+
+
+def test_empty_numeric_only():
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        }
+    )
+    pdf = gdf.to_pandas()
+    expected = pdf.prod(numeric_only=True)
+    actual = gdf.prod(numeric_only=True)
+    assert_eq(expected, actual, check_dtype=True)
+
+
+@pytest.mark.parametrize(
+    "op",
+    ["count", "kurt", "kurtosis", "skew"],
+)
+def test_dataframe_axis1_unsupported_ops(op):
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [8, 9, 10]})
+
+    with pytest.raises(
+        NotImplementedError, match="Only axis=0 is currently supported."
+    ):
+        getattr(df, op)(axis=1)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_reindex.py b/python/cudf/cudf/tests/dataframe/methods/test_reindex.py
index 19efd968a95..bfe6ecf170f 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_reindex.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_reindex.py
@@ -6,6 +6,7 @@
 
 import cudf
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.mark.parametrize("copy", [True, False])
@@ -189,3 +190,52 @@ def test_reindex_columns_rangeindex_keeps_rangeindex(name, klass):
     result = df.reindex(columns=new_columns).columns
     expected = pd.RangeIndex(3, name=exp_name)
     assert_eq(result, expected)
+
+
+def test_dataframe_duplicate_index_reindex():
+    gdf = cudf.DataFrame({"a": [0, 1, 2, 3]}, index=[0, 0, 1, 1])
+    pdf = gdf.to_pandas()
+
+    assert_exceptions_equal(
+        gdf.reindex,
+        pdf.reindex,
+        lfunc_args_and_kwargs=([10, 11, 12, 13], {}),
+        rfunc_args_and_kwargs=([10, 11, 12, 13], {}),
+    )
+
+
+def test_dataframe_reindex_keep_colname():
+    gdf = cudf.DataFrame([1], columns=cudf.Index([1], name="foo"))
+    result = gdf.reindex(index=[0, 1])
+    expected = cudf.DataFrame(
+        [1, None], columns=cudf.Index([1], name="foo"), index=[0, 1]
+    )
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "index_data,name",
+    [([10, 13], "a"), ([30, 40, 20], "b"), (["ef"], "c"), ([2, 3], "Z")],
+)
+def test_dataframe_reindex_with_index_names(index_data, name):
+    gdf = cudf.DataFrame(
+        {
+            "a": [10, 12, 13],
+            "b": [20, 30, 40],
+            "c": cudf.Series(["ab", "cd", "ef"], dtype="category"),
+        }
+    )
+    if name in gdf.columns:
+        gdf = gdf.set_index(name)
+    pdf = gdf.to_pandas()
+
+    gidx = cudf.Index(index_data, name=name)
+    actual = gdf.reindex(gidx)
+    expected = pdf.reindex(gidx.to_pandas())
+
+    assert_eq(actual, expected)
+
+    actual = gdf.reindex(index_data)
+    expected = pdf.reindex(index_data)
+
+    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_rename.py b/python/cudf/cudf/tests/dataframe/methods/test_rename.py
index b7e9bf3c7ce..345342574c0 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_rename.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_rename.py
@@ -68,3 +68,82 @@ def test_dataframe_column_rename(axis):
     got = gdf.rename(columns=rename_mapper)
 
     assert_eq(expect, got)
+
+
+def test_rename_reset_label_dtype():
+    data = {1: [2]}
+    col_mapping = {1: "a"}
+    result = cudf.DataFrame(data).rename(columns=col_mapping)
+    expected = pd.DataFrame(data).rename(columns=col_mapping)
+    assert_eq(result, expected)
+
+
+def test_dataframe_rename_columns_keep_type():
+    gdf = cudf.DataFrame([[1, 2, 3]])
+    gdf.columns = cudf.Index([4, 5, 6], dtype=np.int8)
+    result = gdf.rename({4: 50}, axis="columns").columns
+    expected = pd.Index([50, 5, 6], dtype=np.int8)
+    assert_eq(result, expected)
+
+
+def test_dataframe_rename_duplicate_column():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    with pytest.raises(
+        ValueError, match="Duplicate column names are not allowed"
+    ):
+        gdf.rename(columns={"a": "b"}, inplace=True)
+
+
+@pytest.mark.parametrize("level", ["x", 0])
+def test_rename_for_level_MultiIndex_dataframe(level):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
+    index = {0: 123, 1: 4, 2: 6}
+    pdf = pd.DataFrame(
+        data,
+        index=pd.MultiIndex.from_tuples([(0, 1, 2), (1, 2, 3), (2, 3, 4)]),
+    )
+    pdf.index.names = ["x", "y", "z"]
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf.rename(index=index, level=level)
+    got = gdf.rename(index=index, level=level)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [{"a": "f", "b": "g"}, {1: 3, 2: 4}, lambda s: 2 * s],
+)
+@pytest.mark.parametrize("level", [0, 1])
+def test_rename_for_level_MultiColumn_dataframe(columns, level):
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
+
+    pdf = gdf.to_pandas()
+
+    expect = pdf.rename(columns=columns, level=level)
+    got = gdf.rename(columns=columns, level=level)
+
+    assert_eq(expect, got)
+
+
+def test_rename_for_level_RangeIndex_dataframe():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    pdf = gdf.to_pandas()
+
+    expect = pdf.rename(columns={"a": "f"}, index={0: 3, 1: 4}, level=0)
+    got = gdf.rename(columns={"a": "f"}, index={0: 3, 1: 4}, level=0)
+
+    assert_eq(expect, got)
+
+
+def test_rename_for_level_is_None_MC():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
+    pdf = gdf.to_pandas()
+
+    expect = pdf.rename(columns={"a": "f"}, level=None)
+    got = gdf.rename(columns={"a": "f"}, level=None)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sample.py b/python/cudf/cudf/tests/dataframe/methods/test_sample.py
new file mode 100644
index 00000000000..5d46f8c0d11
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sample.py
@@ -0,0 +1,241 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import itertools
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def shape_checker(expected, got):
+    assert expected.shape == got.shape
+
+
+def exact_checker(expected, got):
+    assert_eq(expected, got)
+
+
+@pytest.fixture(
+    params=itertools.product([0, 2, None], [0.3, None]),
+    ids=lambda arg: f"n={arg[0]}-frac={arg[1]}",
+)
+def sample_n_frac(request):
+    """
+    Specific to `test_sample*` tests.
+    """
+    n, frac = request.param
+    if n is not None and frac is not None:
+        pytest.skip("Cannot specify both n and frac.")
+    return n, frac
+
+
+@pytest.fixture(params=[None, "builtin_list", "ndarray"])
+def make_weights_axis_0(request):
+    """Specific to `test_sample*_axis_0` tests.
+    Only testing weights array that matches type with random state.
+    """
+
+    if request.param is None:
+        return lambda *_: (None, None)
+    elif request.param == "builtin-list":
+        return lambda size, _: ([1] * size, [1] * size)
+    else:
+
+        def wrapped(size, numpy_weights_for_cudf):
+            # Uniform distribution, non-normalized
+            if numpy_weights_for_cudf:
+                return np.ones(size), np.ones(size)
+            else:
+                return np.ones(size), cp.ones(size)
+
+        return wrapped
+
+
+@pytest.mark.parametrize(
+    "make_weights_axis_1",
+    [lambda _: None, lambda s: [1] * s, lambda s: np.ones(s)],
+)
+@pytest.mark.parametrize(
+    "pd_random_state, gd_random_state, checker",
+    [
+        (None, None, shape_checker),
+        (42, 42, shape_checker),
+        (np.random.RandomState(42), np.random.RandomState(42), exact_checker),
+    ],
+    ids=["None", "IntSeed", "NumpyRandomState"],
+)
+def test_sample_axis_1(
+    sample_n_frac,
+    pd_random_state,
+    gd_random_state,
+    checker,
+    make_weights_axis_1,
+):
+    n, frac = sample_n_frac
+
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "float": [0.05, 0.2, 0.3, 0.2, 0.25],
+            "int": [1, 3, 5, 4, 2],
+        },
+    )
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    weights = make_weights_axis_1(len(pdf.columns))
+
+    expected = pdf.sample(
+        n=n,
+        frac=frac,
+        replace=False,
+        random_state=pd_random_state,
+        weights=weights,
+        axis=1,
+    )
+    got = df.sample(
+        n=n,
+        frac=frac,
+        replace=False,
+        random_state=gd_random_state,
+        weights=weights,
+        axis=1,
+    )
+    checker(expected, got)
+
+
+@pytest.mark.parametrize(
+    "pdf",
+    [
+        pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5],
+                "float": [0.05, 0.2, 0.3, 0.2, 0.25],
+                "int": [1, 3, 5, 4, 2],
+            },
+        ),
+        pd.Series([1, 2, 3, 4, 5]),
+    ],
+)
+@pytest.mark.parametrize(
+    "pd_random_state, gd_random_state, checker",
+    [
+        (None, None, shape_checker),
+        (42, 42, shape_checker),
+        (np.random.RandomState(42), np.random.RandomState(42), exact_checker),
+        (np.random.RandomState(42), cp.random.RandomState(42), shape_checker),
+    ],
+    ids=["None", "IntSeed", "NumpyRandomState", "CupyRandomState"],
+)
+@pytest.mark.parametrize("replace", [True, False])
+def test_sample_axis_0(
+    pdf,
+    sample_n_frac,
+    replace,
+    pd_random_state,
+    gd_random_state,
+    checker,
+    make_weights_axis_0,
+):
+    n, frac = sample_n_frac
+
+    df = cudf.from_pandas(pdf)
+
+    pd_weights, gd_weights = make_weights_axis_0(
+        len(pdf), isinstance(gd_random_state, np.random.RandomState)
+    )
+    if (
+        not replace
+        and not isinstance(gd_random_state, np.random.RandomState)
+        and gd_weights is not None
+    ):
+        pytest.skip(
+            "`cupy.random.RandomState` doesn't support weighted sampling "
+            "without replacement."
+        )
+
+    expected = pdf.sample(
+        n=n,
+        frac=frac,
+        replace=replace,
+        random_state=pd_random_state,
+        weights=pd_weights,
+        axis=0,
+    )
+
+    got = df.sample(
+        n=n,
+        frac=frac,
+        replace=replace,
+        random_state=gd_random_state,
+        weights=gd_weights,
+        axis=0,
+    )
+    checker(expected, got)
+
+
+@pytest.mark.parametrize("replace", [True, False])
+@pytest.mark.parametrize(
+    "random_state_lib", [cp.random.RandomState, np.random.RandomState]
+)
+def test_sample_reproducibility(replace, random_state_lib):
+    df = cudf.DataFrame({"a": cp.arange(0, 25)})
+
+    n = 25
+    expected = df.sample(n, replace=replace, random_state=random_state_lib(10))
+    out = df.sample(n, replace=replace, random_state=random_state_lib(10))
+
+    assert_eq(expected, out)
+
+
+def test_sample_invalid_n_frac_combo(axis):
+    n, frac = 2, 0.5
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5],
+            "float": [0.05, 0.2, 0.3, 0.2, 0.25],
+            "int": [1, 3, 5, 4, 2],
+        },
+    )
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    assert_exceptions_equal(
+        lfunc=pdf.sample,
+        rfunc=df.sample,
+        lfunc_args_and_kwargs=([], {"n": n, "frac": frac, "axis": axis}),
+        rfunc_args_and_kwargs=([], {"n": n, "frac": frac, "axis": axis}),
+    )
+
+
+@pytest.mark.parametrize("n, frac", [(100, None), (None, 3)])
+def test_oversample_without_replace(n, frac, axis):
+    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    assert_exceptions_equal(
+        lfunc=pdf.sample,
+        rfunc=df.sample,
+        lfunc_args_and_kwargs=(
+            [],
+            {"n": n, "frac": frac, "axis": axis, "replace": False},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"n": n, "frac": frac, "axis": axis, "replace": False},
+        ),
+    )
+
+
+@pytest.mark.parametrize("random_state", [None, cp.random.RandomState(42)])
+def test_sample_unsupported_arguments(random_state):
+    df = cudf.DataFrame({"float": [0.05, 0.2, 0.3, 0.2, 0.25]})
+    with pytest.raises(
+        NotImplementedError,
+        match="Random sampling with cupy does not support these inputs.",
+    ):
+        df.sample(
+            n=2, replace=False, random_state=random_state, weights=[1] * 5
+        )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
index d29a6bbcfaf..7af8a5f94de 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_arrow.py
@@ -161,3 +161,71 @@ def test_to_arrow_categorical():
 
     assert isinstance(pa_gs, pa.Array)
     assert pa.Array.equals(pa_s, pa_gs)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {0: [1, 2, 3], 2: [10, 11, 23]},
+        {("a", "b"): [1, 2, 3], ("2",): [10, 11, 23]},
+    ],
+)
+def test_non_string_column_name_to_arrow(data):
+    df = cudf.DataFrame(data)
+
+    expected = df.to_arrow()
+    actual = pa.Table.from_pandas(df.to_pandas())
+
+    assert expected.equals(actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [{"one": 3, "two": 4, "three": 10}]},
+        {
+            "left-a": [0, 1, 2],
+            "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
+            "right-a": ["abc", "def", "ghi"],
+        },
+        {
+            "left-a": [{"a": 1}, None, None],
+            "a": [
+                {"one": 324, "two": 23432, "three": 324},
+                None,
+                {"one": 3.24, "two": 1, "three": 324},
+            ],
+            "right-a": ["abc", "def", "ghi"],
+        },
+    ],
+)
+def test_dataframe_roundtrip_arrow_struct_dtype(data):
+    gdf = cudf.DataFrame(data)
+    table = gdf.to_arrow()
+    expected = cudf.DataFrame.from_arrow(table)
+
+    assert_eq(gdf, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [[1], [2], [3]]},
+        {
+            "left-a": [0, 1, 2],
+            "a": [[1], None, [3]],
+            "right-a": ["abc", "def", "ghi"],
+        },
+        {
+            "left-a": [[], None, None],
+            "a": [[1], None, [3]],
+            "right-a": ["abc", "def", "ghi"],
+        },
+    ],
+)
+def test_dataframe_roundtrip_arrow_list_dtype(data):
+    gdf = cudf.DataFrame(data)
+    table = gdf.to_arrow()
+    expected = cudf.DataFrame.from_arrow(table)
+
+    assert_eq(gdf, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py b/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
index 739d97c28a5..4fd728d06f1 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_pandas.py
@@ -1,9 +1,14 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import datetime
+import decimal
 
 import numpy as np
+import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 def test_to_pandas():
@@ -36,3 +41,191 @@ def test_list_to_pandas_nullable_true():
     df = cudf.DataFrame({"a": cudf.Series([[1, 2, 3]])})
     with pytest.raises(NotImplementedError):
         df.to_pandas(nullable=True)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+        {"1": 2},
+        [1],
+        decimal.Decimal("1.0"),
+    ],
+)
+def test_dataframe_to_pandas_arrow_type(scalar):
+    pa_array = pa.array([scalar, None])
+    df = cudf.DataFrame({"a": pa_array})
+    result = df.to_pandas(arrow_type=True)
+    expected = pd.DataFrame({"a": pd.arrays.ArrowExtensionArray(pa_array)})
+    pd.testing.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+        {"1": 2},
+        [1],
+        decimal.Decimal("1.0"),
+    ],
+)
+def test_dataframe_to_pandas_arrow_type_nullable_raises(scalar):
+    pa_array = pa.array([scalar, None])
+    df = cudf.DataFrame({"a": pa_array})
+    with pytest.raises(ValueError):
+        df.to_pandas(nullable=True, arrow_type=True)
+
+
+@pytest.mark.parametrize(
+    "df,expected_pdf",
+    [
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series([1, 2, None, 3], dtype="uint8"),
+                    "b": cudf.Series([23, None, None, 32], dtype="uint16"),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series([1, 2, None, 3], dtype=pd.UInt8Dtype()),
+                    "b": pd.Series(
+                        [23, None, None, 32], dtype=pd.UInt16Dtype()
+                    ),
+                }
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series([None, 123, None, 1], dtype="uint32"),
+                    "b": cudf.Series(
+                        [234, 2323, 23432, None, None, 224], dtype="uint64"
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        [None, 123, None, 1], dtype=pd.UInt32Dtype()
+                    ),
+                    "b": pd.Series(
+                        [234, 2323, 23432, None, None, 224],
+                        dtype=pd.UInt64Dtype(),
+                    ),
+                }
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [-10, 1, None, -1, None, 3], dtype="int8"
+                    ),
+                    "b": cudf.Series(
+                        [111, None, 222, None, 13], dtype="int16"
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        [-10, 1, None, -1, None, 3], dtype=pd.Int8Dtype()
+                    ),
+                    "b": pd.Series(
+                        [111, None, 222, None, 13], dtype=pd.Int16Dtype()
+                    ),
+                }
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [11, None, 22, 33, None, 2, None, 3], dtype="int32"
+                    ),
+                    "b": cudf.Series(
+                        [32431, None, None, 32322, 0, 10, -32324, None],
+                        dtype="int64",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        [11, None, 22, 33, None, 2, None, 3],
+                        dtype=pd.Int32Dtype(),
+                    ),
+                    "b": pd.Series(
+                        [32431, None, None, 32322, 0, 10, -32324, None],
+                        dtype=pd.Int64Dtype(),
+                    ),
+                }
+            ),
+        ),
+        (
+            lambda: cudf.DataFrame(
+                {
+                    "a": cudf.Series(
+                        [True, None, False, None, False, True, True, False],
+                        dtype="bool_",
+                    ),
+                    "b": cudf.Series(
+                        [
+                            "abc",
+                            "a",
+                            None,
+                            "hello world",
+                            "foo buzz",
+                            "",
+                            None,
+                            "rapids ai",
+                        ],
+                        dtype="object",
+                    ),
+                    "c": cudf.Series(
+                        [0.1, None, 0.2, None, 3, 4, 1000, None],
+                        dtype="float64",
+                    ),
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": pd.Series(
+                        [True, None, False, None, False, True, True, False],
+                        dtype=pd.BooleanDtype(),
+                    ),
+                    "b": pd.Series(
+                        [
+                            "abc",
+                            "a",
+                            None,
+                            "hello world",
+                            "foo buzz",
+                            "",
+                            None,
+                            "rapids ai",
+                        ],
+                        dtype=pd.StringDtype(),
+                    ),
+                    "c": pd.Series(
+                        [0.1, None, 0.2, None, 3, 4, 1000, None],
+                        dtype=pd.Float64Dtype(),
+                    ),
+                }
+            ),
+        ),
+    ],
+)
+def test_dataframe_to_pandas_nullable_dtypes(df, expected_pdf):
+    actual_pdf = df().to_pandas(nullable=True)
+
+    assert_eq(actual_pdf, expected_pdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_update.py b/python/cudf/cudf/tests/dataframe/methods/test_update.py
new file mode 100644
index 00000000000..92d7a117403
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_update.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("overwrite", [True, False])
+@pytest.mark.parametrize(
+    "left_keys,right_keys",
+    [
+        [("a", "b"), ("a", "b")],
+        [("a", "b"), ("a", "c")],
+        [("a", "b"), ("d", "e")],
+    ],
+)
+@pytest.mark.parametrize(
+    "data_left,data_right",
+    [
+        [([1, 2, 3], [3, 4, 5]), ([1, 2, 3], [3, 4, 5])],
+        [
+            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
+            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
+        ],
+        [
+            ([True, False, True], [False, False, False]),
+            ([True, False, True], [False, False, False]),
+        ],
+        [
+            ([np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]),
+            ([np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]),
+        ],
+        [([1, 2, 3], [3, 4, 5]), ([1, 2, 4], [30, 40, 50])],
+        [
+            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
+            ([1.0, 2.0, 4.0], [30.0, 40.0, 50.0]),
+        ],
+        [([1, 2, 3], [3, 4, 5]), ([10, 20, 40], [30, 40, 50])],
+        [
+            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
+            ([10.0, 20.0, 40.0], [30.0, 40.0, 50.0]),
+        ],
+    ],
+)
+def test_update_for_dataframes(
+    left_keys, right_keys, data_left, data_right, overwrite
+):
+    errors = "ignore"
+    join = "left"
+    left = dict(zip(left_keys, data_left, strict=True))
+    right = dict(zip(right_keys, data_right, strict=True))
+    pdf = pd.DataFrame(left)
+    gdf = cudf.DataFrame(left, nan_as_null=False)
+
+    other_pd = pd.DataFrame(right)
+    other_gd = cudf.DataFrame(right, nan_as_null=False)
+
+    pdf.update(other=other_pd, join=join, overwrite=overwrite, errors=errors)
+    gdf.update(other=other_gd, join=join, overwrite=overwrite, errors=errors)
+
+    assert_eq(pdf, gdf, check_dtype=False)
+
+
+def test_update_for_right_join():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+    other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+
+    with pytest.raises(
+        NotImplementedError, match="Only left join is supported"
+    ):
+        gdf.update(other_gd, join="right")
+
+
+def test_update_for_data_overlap():
+    errors = "raise"
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+
+    other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+    other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+
+    assert_exceptions_equal(
+        lfunc=pdf.update,
+        rfunc=gdf.update,
+        lfunc_args_and_kwargs=([other_pd, errors], {}),
+        rfunc_args_and_kwargs=([other_gd, errors], {}),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_value_counts.py b/python/cudf/cudf/tests/dataframe/methods/test_value_counts.py
new file mode 100644
index 00000000000..26357348182
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_value_counts.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_value_counts_no_subset():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
+    with pytest.raises(KeyError):
+        gdf.value_counts(subset=["not_a_column_name"])
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        {
+            "first_name": ["John", "Anne", "John", "Beth"],
+            "middle_name": ["Smith", None, None, "Louise"],
+        },
+    ],
+)
+@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize("use_subset", [True, False])
+def test_value_counts(
+    data,
+    sort,
+    ascending,
+    normalize,
+    dropna,
+    use_subset,
+):
+    subset = [next(iter(data.keys()))]
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    got = gdf.value_counts(
+        subset=subset if (use_subset) else None,
+        sort=sort,
+        ascending=ascending,
+        normalize=normalize,
+        dropna=dropna,
+    )
+    expected = pdf.value_counts(
+        subset=subset if (use_subset) else None,
+        sort=sort,
+        ascending=ascending,
+        normalize=normalize,
+        dropna=dropna,
+    )
+
+    if not dropna:
+        # Convert the Pandas series to a cuDF one due to difference
+        # in the handling of NaNs between the two (<NA> in cuDF and
+        # NaN in Pandas) when dropna=False.
+        assert_eq(got.sort_index(), cudf.from_pandas(expected).sort_index())
+    else:
+        assert_eq(got.sort_index(), expected.sort_index())
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
index 3a3c43a15c7..dd0ecbf5d61 100644
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -182,6 +182,135 @@ def test_ndim():
     assert pdf.ndim == gdf.ndim
 
 
+@pytest.mark.parametrize("names", [["abc", "def"], [1, 2], ["abc", 10]])
+def test_dataframe_multiindex_column_names(names):
+    arrays = [["A", "A", "B", "B"], ["one", "two", "one", "two"]]
+    tuples = list(zip(*arrays, strict=True))
+    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+
+    pdf = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=index)
+    df = cudf.from_pandas(pdf)
+
+    assert_eq(df, pdf)
+    assert_eq(df.columns.names, pdf.columns.names)
+    pdf.columns.names = names
+    df.columns.names = names
+    assert_eq(df, pdf)
+    assert_eq(df.columns.names, pdf.columns.names)
+
+
+@pytest.mark.parametrize("name", ["a", 0, None, np.nan, cudf.NA])
+@pytest.mark.parametrize("contains", ["a", 0, None, np.nan, cudf.NA])
+@pytest.mark.parametrize("other_names", [[], ["b", "c"], [1, 2]])
+def test_dataframe_contains(name, contains, other_names):
+    column_names = [name, *other_names]
+    gdf = cudf.DataFrame({c: [0] for c in column_names})
+    pdf = pd.DataFrame({c: [0] for c in column_names})
+
+    assert_eq(gdf, pdf)
+
+    if contains is cudf.NA or name is cudf.NA:
+        expectation = contains is cudf.NA and name is cudf.NA
+        assert (contains in pdf) == expectation
+        assert (contains in gdf) == expectation
+    elif gdf.columns.dtype.kind == "f":
+        # In some cases, the columns are converted to an Index[float] based on
+        # the other column names. That casts name values from None to np.nan.
+        expectation = contains is np.nan and (name is None or name is np.nan)
+        assert (contains in pdf) == expectation
+        assert (contains in gdf) == expectation
+    else:
+        expectation = contains == name or (
+            contains is np.nan and name is np.nan
+        )
+        assert (contains in pdf) == expectation
+        assert (contains in gdf) == expectation
+
+    assert (contains in pdf) == (contains in gdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"col": [{"a": 1.1}, {"a": 2.1}, {"a": 10.0}, {"a": 11.2323}, None]},
+        {"a": [[{"b": 567}], None] * 10},
+        {"a": [decimal.Decimal(10), decimal.Decimal(20), None]},
+    ],
+)
+def test_dataframe_values_complex_types(data):
+    gdf = cudf.DataFrame(data)
+    with pytest.raises(NotImplementedError):
+        gdf.values
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[100, 10, 1, 0]),
+        pd.DataFrame(columns=["a", "b", "c", "d"]),
+        pd.DataFrame(columns=["a", "b", "c", "d"], index=[100]),
+        pd.DataFrame(
+            columns=["a", "b", "c", "d"], index=[100, 10000, 2131, 133]
+        ),
+        pd.DataFrame({"a": [1, 2, 3], "b": ["abc", "xyz", "klm"]}),
+    ],
+)
+def test_dataframe_size(df):
+    pdf = df
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(pdf.size, gdf.size)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[100, 10, 1, 0]),
+        pd.DataFrame(columns=["a", "b", "c", "d"]),
+        pd.DataFrame(columns=["a", "b", "c", "d"], index=[100]),
+        pd.DataFrame(
+            columns=["a", "b", "c", "d"], index=[100, 10000, 2131, 133]
+        ),
+        pd.DataFrame({"a": [1, 2, 3], "b": ["abc", "xyz", "klm"]}),
+    ],
+)
+def test_dataframe_empty(df):
+    pdf = df
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(pdf.empty, gdf.empty)
+
+
+def test_cudf_arrow_array_error():
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+
+    with pytest.raises(
+        TypeError,
+        match="Implicit conversion to a host PyArrow object via "
+        "__arrow_array__ is not allowed. Consider using .to_arrow()",
+    ):
+        df.__arrow_array__()
+
+    sr = cudf.Series([1, 2, 3])
+
+    with pytest.raises(
+        TypeError,
+        match="Implicit conversion to a host PyArrow object via "
+        "__arrow_array__ is not allowed. Consider using .to_arrow()",
+    ):
+        sr.__arrow_array__()
+
+    sr = cudf.Series(["a", "b", "c"])
+    with pytest.raises(
+        TypeError,
+        match="Implicit conversion to a host PyArrow object via "
+        "__arrow_array__ is not allowed. Consider using .to_arrow()",
+    ):
+        sr.__arrow_array__()
+
+
 @pytest.mark.parametrize(
     "index",
     [
diff --git a/python/cudf/cudf/tests/dataframe/test_binops.py b/python/cudf/cudf/tests/dataframe/test_binops.py
new file mode 100644
index 00000000000..a8540b4e711
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/test_binops.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "expected",
+    [
+        pd.RangeIndex(1, 2, name="a"),
+        pd.Index([1], dtype=np.int8, name="a"),
+        pd.MultiIndex.from_arrays([[1]], names=["a"]),
+    ],
+)
+@pytest.mark.parametrize("binop", [lambda df: df == df, lambda df: df - 1])
+def test_dataframe_binop_preserves_column_metadata(expected, binop):
+    df = cudf.DataFrame([1], columns=expected)
+    result = binop(df).columns
+    pd.testing.assert_index_equal(result, expected, exact=True)
+
+
+def test_dataframe_series_dot():
+    pser = pd.Series(range(2))
+    gser = cudf.from_pandas(pser)
+
+    expected = pser @ pser
+    actual = gser @ gser
+
+    assert_eq(expected, actual)
+
+    pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("ab"))
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pser @ pdf
+    actual = gser @ gdf
+
+    assert_eq(expected, actual)
+
+    assert_exceptions_equal(
+        lfunc=pdf.dot,
+        rfunc=gdf.dot,
+        lfunc_args_and_kwargs=([pser], {}),
+        rfunc_args_and_kwargs=([gser], {}),
+    )
+
+    assert_exceptions_equal(
+        lfunc=pdf.dot,
+        rfunc=gdf.dot,
+        lfunc_args_and_kwargs=([pdf], {}),
+        rfunc_args_and_kwargs=([gdf], {}),
+    )
+
+    pser = pd.Series(range(2), index=["a", "k"])
+    gser = cudf.from_pandas(pser)
+
+    pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("ab"), index=["a", "k"])
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pser @ pdf
+    actual = gser @ gdf
+
+    assert_eq(expected, actual)
+
+    actual = gdf @ [2, 3]
+    expected = pdf @ [2, 3]
+
+    assert_eq(expected, actual)
+
+    actual = pser @ [12, 13]
+    expected = gser @ [12, 13]
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_binop_with_datetime_index():
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        rng.random(size=(2, 2)),
+        columns=pd.Index(["2000-01-03", "2000-01-04"], dtype="datetime64[ns]"),
+    )
+    ser = pd.Series(
+        rng.random(2),
+        index=pd.Index(
+            [
+                "2000-01-04",
+                "2000-01-03",
+            ],
+            dtype="datetime64[ns]",
+        ),
+    )
+    gdf = cudf.from_pandas(df)
+    gser = cudf.from_pandas(ser)
+    expected = df - ser
+    got = gdf - gser
+    assert_eq(expected, got)
+
+
+def test_dataframe_binop_and_where():
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(rng.random(size=(2, 2)), columns=pd.Index([True, False]))
+    gdf = cudf.from_pandas(df)
+
+    expected = df > 1
+    got = gdf > 1
+
+    assert_eq(expected, got)
+
+    expected = df[df > 1]
+    got = gdf[gdf > 1]
+
+    assert_eq(expected, got)
+
+
+def test_dataframe_binop_with_mixed_string_types():
+    rng = np.random.default_rng(seed=0)
+    df1 = pd.DataFrame(rng.random(size=(3, 3)), columns=pd.Index([0, 1, 2]))
+    df2 = pd.DataFrame(
+        rng.random(size=(6, 6)),
+        columns=pd.Index([0, 1, 2, "VhDoHxRaqt", "X0NNHBIPfA", "5FbhPtS0D1"]),
+    )
+    gdf1 = cudf.from_pandas(df1)
+    gdf2 = cudf.from_pandas(df2)
+
+    expected = df2 + df1
+    got = gdf2 + gdf1
+
+    assert_eq(expected, got)
+
+
+def test_dataframe_binop_with_mixed_date_types():
+    rng = np.random.default_rng(seed=0)
+    df = pd.DataFrame(
+        rng.random(size=(2, 2)),
+        columns=pd.Index(["2000-01-03", "2000-01-04"], dtype="datetime64[ns]"),
+    )
+    ser = pd.Series(rng.random(size=3), index=[0, 1, 2])
+    gdf = cudf.from_pandas(df)
+    gser = cudf.from_pandas(ser)
+    expected = df - ser
+    got = gdf - gser
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "df1",
+    [
+        pd.DataFrame({"a": [10, 11, 12]}, index=["a", "b", "z"]),
+        pd.DataFrame({"z": ["a"]}),
+        pd.DataFrame({"a": [], "b": []}),
+    ],
+)
+@pytest.mark.parametrize(
+    "df2",
+    [
+        pd.DataFrame(),
+        pd.DataFrame({"a": ["a", "a", "c", "z", "A"], "z": [1, 2, 3, 4, 5]}),
+    ],
+)
+def test_dataframe_error_equality(df1, df2, comparison_op):
+    gdf1 = cudf.from_pandas(df1)
+    gdf2 = cudf.from_pandas(df2)
+
+    assert_exceptions_equal(
+        comparison_op, comparison_op, ([df1, df2],), ([gdf1, gdf2],)
+    )
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index 28c515757f0..21fb39059e8 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -13,6 +13,7 @@
 import cudf
 from cudf.core.column.column import as_column
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 def test_init_via_list_of_tuples():
@@ -1356,6 +1357,465 @@ def test_create_interval_df(data1, data2, data3, data4, interval_closed):
     assert_eq(expect_three, got_three)
 
 
+def test_roundtrip_dataframe_plc_table():
+    pdf = pd.DataFrame(
+        {
+            "a": [None, None, np.nan, None],
+            "b": [np.nan, None, np.nan, None],
+        }
+    )
+    expect = cudf.DataFrame.from_pandas(pdf)
+    actual = cudf.DataFrame.from_pylibcudf(*expect.to_pylibcudf())
+    assert_eq(expect, actual)
+
+
+def test_dataframe_from_generator():
+    pdf = pd.DataFrame((i for i in range(5)))
+    gdf = cudf.DataFrame((i for i in range(5)))
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "dtype", ["datetime64[ns]", "timedelta64[ns]", "int64", "float32"]
+)
+def test_dataframe_mixed_dtype_error(dtype):
+    pdf = pd.Series([1, 2, 3], dtype=dtype).to_frame().astype(object)
+    with pytest.raises(TypeError):
+        cudf.from_pandas(pdf)
+
+
+def test_dataframe_from_arrow_slice():
+    table = pa.Table.from_pandas(
+        pd.DataFrame.from_dict(
+            {"a": ["aa", "bb", "cc"] * 3, "b": [1, 2, 3] * 3}
+        )
+    )
+    table_slice = table.slice(3, 7)
+
+    expected = table_slice.to_pandas()
+    actual = cudf.DataFrame.from_arrow(table_slice)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data,index",
+    [
+        ({"a": [1, 2, 3], "b": ["x", "y", "z", "z"], "c": 4}, None),
+        (
+            {
+                "a": [1, 2, 3],
+                "b": ["x", "y", "z"],
+            },
+            [10, 11],
+        ),
+        (
+            {
+                "a": [1, 2, 3],
+                "b": ["x", "y", "z"],
+            },
+            [10, 11],
+        ),
+        ([[10, 11], [12, 13]], ["a", "b", "c"]),
+    ],
+)
+def test_dataframe_init_length_error(data, index):
+    assert_exceptions_equal(
+        lfunc=pd.DataFrame,
+        rfunc=cudf.DataFrame,
+        lfunc_args_and_kwargs=(
+            [],
+            {"data": data, "index": index},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"data": data, "index": index},
+        ),
+    )
+
+
+def test_complex_types_from_arrow():
+    expected = pa.Table.from_arrays(
+        [
+            pa.array([1, 2, 3]),
+            pa.array([10, 20, 30]),
+            pa.array([{"a": 9}, {"b": 10}, {"c": 11}]),
+            pa.array([[{"a": 1}], [{"b": 2}], [{"c": 3}]]),
+            pa.array([10, 11, 12]).cast(pa.decimal128(21, 2)),
+            pa.array([{"a": 9}, {"b": 10, "c": {"g": 43}}, {"c": {"a": 10}}]),
+        ],
+        names=["a", "b", "c", "d", "e", "f"],
+    )
+
+    df = cudf.DataFrame.from_arrow(expected)
+    actual = df.to_arrow()
+
+    assert expected.equals(actual)
+
+
+def test_dataframe_constructor_column_index_only():
+    columns = ["a", "b", "c"]
+    index = ["r1", "r2", "r3"]
+
+    gdf = cudf.DataFrame(index=index, columns=columns)
+    assert gdf["a"]._column is not gdf["b"]._column
+    assert gdf["b"]._column is not gdf["c"]._column
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(
+            {"a": [1, 2, 3], "b": [10, 11, 20], "c": ["a", "bcd", "xyz"]}
+        ),
+        pd.DataFrame(),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [
+        None,
+        ["a"],
+        ["c", "a"],
+        ["b", "a", "c"],
+        [],
+        pd.Index(["c", "a"]),
+        cudf.Index(["c", "a"]),
+        ["abc", "a"],
+        ["column_not_exists1", "column_not_exists2"],
+    ],
+)
+@pytest.mark.parametrize("index", [["abc", "def", "ghi"]])
+def test_dataframe_constructor_columns(df, columns, index, request):
+    def assert_local_eq(actual, df, expected, host_columns):
+        check_index_type = not expected.empty
+        if host_columns is not None and any(
+            col not in df.columns for col in host_columns
+        ):
+            assert_eq(
+                expected,
+                actual,
+                check_dtype=False,
+                check_index_type=check_index_type,
+            )
+        else:
+            assert_eq(
+                expected,
+                actual,
+                check_index_type=check_index_type,
+                check_column_type=False,
+            )
+
+    gdf = cudf.from_pandas(df)
+    host_columns = (
+        columns.to_pandas() if isinstance(columns, cudf.Index) else columns
+    )
+
+    expected = pd.DataFrame(df, columns=host_columns, index=index)
+    actual = cudf.DataFrame(gdf, columns=columns, index=index)
+
+    assert_local_eq(actual, df, expected, host_columns)
+
+
+def test_dataframe_from_pandas_duplicate_columns():
+    pdf = pd.DataFrame(columns=["a", "b", "c", "a"])
+    pdf["a"] = [1, 2, 3]
+
+    with pytest.raises(
+        ValueError, match="Duplicate column names are not allowed"
+    ):
+        cudf.from_pandas(pdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}],
+        [{"a": 1, "b": 2, "c": None}, {"a": None, "b": 5, "c": 6}],
+        [{"a": 1, "b": 2}, {"a": 1, "b": 5, "c": 6}],
+        [{"a": 1, "b": 2}, {"b": 5, "c": 6}],
+        [{}, {"a": 1, "b": 5, "c": 6}],
+        [{"a": 1, "b": 2, "c": 3}, {"a": 4.5, "b": 5.5, "c": 6.5}],
+    ],
+)
+def test_dataframe_init_from_list_of_dicts(data):
+    expect = pd.DataFrame(data)
+    got = cudf.DataFrame(data)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        None,
+        [],
+        [1],
+        {"a": [10, 11, 12]},
+        {
+            "a": [10, 11, 12],
+            "another column name": [12, 22, 34],
+            "xyz": [0, 10, 11],
+        },
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [["a"], ["another column name"], None, pd.Index(["a"], name="index name")],
+)
+def test_dataframe_init_with_columns(data, columns):
+    pdf = pd.DataFrame(data, columns=columns)
+    gdf = cudf.DataFrame(data, columns=columns)
+
+    assert_eq(
+        pdf,
+        gdf,
+        check_index_type=len(pdf.index) != 0,
+        check_dtype=not (pdf.empty and len(pdf.columns)),
+        check_column_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data, ignore_dtype",
+    [
+        ([pd.Series([1, 2, 3])], False),
+        ([pd.Series(index=[1, 2, 3], dtype="float64")], False),
+        ([pd.Series(name="empty series name", dtype="float64")], False),
+        (
+            [pd.Series([1]), pd.Series([], dtype="float64"), pd.Series([3])],
+            False,
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], dtype="float64"),
+                pd.Series([3], name="series that is named"),
+            ],
+            False,
+        ),
+        ([pd.Series([1, 2, 3], name="hi")] * 10, False),
+        ([pd.Series([1, 2, 3], name=None, index=[10, 11, 12])] * 10, False),
+        (
+            [
+                pd.Series([1, 2, 3], name=None, index=[10, 11, 12]),
+                pd.Series([1, 2, 30], name=None, index=[13, 144, 15]),
+            ],
+            True,
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
+            ],
+            False,
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], name="abc", dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
+            ],
+            False,
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([1, -100, 200, -399, 400], name="abc"),
+                pd.Series([111, 222, 333], index=[10, 11, 12]),
+            ],
+            False,
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [
+        None,
+        ["0"],
+        [0],
+        ["abc"],
+        [144, 13],
+        [2, 1, 0],
+        pd.Index(["abc"], name="custom_name"),
+    ],
+)
+def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
+    gd_data = [cudf.from_pandas(obj) for obj in data]
+
+    expected = pd.DataFrame(data, columns=columns)
+    actual = cudf.DataFrame(gd_data, columns=columns)
+
+    if ignore_dtype:
+        # When a union is performed to generate columns,
+        # the order is never guaranteed. Hence sort by
+        # columns before comparison.
+        if not expected.columns.equals(actual.columns):
+            expected = expected.sort_index(axis=1)
+            actual = actual.sort_index(axis=1)
+        assert_eq(
+            expected.fillna(-1),
+            actual.fillna(-1),
+            check_dtype=False,
+            check_index_type=True,
+        )
+    else:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=True,
+            check_column_type=False,
+        )
+
+
+@pytest.mark.parametrize(
+    "data, ignore_dtype, index",
+    [
+        ([pd.Series([1, 2, 3])], False, ["a", "b", "c"]),
+        ([pd.Series(index=[1, 2, 3], dtype="float64")], False, ["a", "b"]),
+        (
+            [pd.Series(name="empty series name", dtype="float64")],
+            False,
+            ["index1"],
+        ),
+        (
+            [pd.Series([1]), pd.Series([], dtype="float64"), pd.Series([3])],
+            False,
+            ["0", "2", "1"],
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], dtype="float64"),
+                pd.Series([3], name="series that is named"),
+            ],
+            False,
+            ["_", "+", "*"],
+        ),
+        ([pd.Series([1, 2, 3], name="hi")] * 10, False, ["mean"] * 10),
+        (
+            [pd.Series([1, 2, 3], name=None, index=[10, 11, 12])] * 10,
+            False,
+            ["abc"] * 10,
+        ),
+        (
+            [
+                pd.Series([1, 2, 3], name=None, index=[10, 11, 12]),
+                pd.Series([1, 2, 30], name=None, index=[13, 144, 15]),
+            ],
+            True,
+            ["set_index_a", "set_index_b"],
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
+            ],
+            False,
+            ["a", "b", "c"],
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], name="abc", dtype="float64"),
+                pd.Series(index=[10, 11, 12], dtype="float64"),
+            ],
+            False,
+            ["a", "v", "z"],
+        ),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([1, -100, 200, -399, 400], name="abc"),
+                pd.Series([111, 222, 333], index=[10, 11, 12]),
+            ],
+            False,
+            ["a", "v", "z"],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns", [None, ["0"], [0], ["abc"], [144, 13], [2, 1, 0]]
+)
+def test_dataframe_init_from_series_list_with_index(
+    data,
+    ignore_dtype,
+    index,
+    columns,
+):
+    gd_data = [cudf.from_pandas(obj) for obj in data]
+
+    expected = pd.DataFrame(data, columns=columns, index=index)
+    actual = cudf.DataFrame(gd_data, columns=columns, index=index)
+
+    if ignore_dtype:
+        # When a union is performed to generate columns,
+        # the order is never guaranteed. Hence sort by
+        # columns before comparison.
+        if not expected.columns.equals(actual.columns):
+            expected = expected.sort_index(axis=1)
+            actual = actual.sort_index(axis=1)
+        assert_eq(expected.fillna(-1), actual.fillna(-1), check_dtype=False)
+    else:
+        assert_eq(expected, actual, check_column_type=False)
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        ([pd.Series([1, 2]), pd.Series([1, 2])], ["a", "b", "c"]),
+        (
+            [
+                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
+                pd.Series([], dtype="float64"),
+                pd.Series([3], name="series that is named"),
+            ],
+            ["_", "+"],
+        ),
+        ([pd.Series([1, 2, 3], name="hi")] * 10, ["mean"] * 9),
+    ],
+)
+def test_dataframe_init_from_series_list_with_index_error(data, index):
+    gd_data = [cudf.from_pandas(obj) for obj in data]
+
+    assert_exceptions_equal(
+        pd.DataFrame,
+        cudf.DataFrame,
+        ([data], {"index": index}),
+        ([gd_data], {"index": index}),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [pd.Series([1, 2, 3], index=["a", "a", "a"])],
+        [pd.Series([1, 2, 3], index=["a", "a", "a"])] * 4,
+        [
+            pd.Series([1, 2, 3], index=["a", "b", "a"]),
+            pd.Series([1, 2, 3], index=["b", "b", "a"]),
+        ],
+        [
+            pd.Series([1, 2, 3], index=["a", "b", "z"]),
+            pd.Series([1, 2, 3], index=["u", "b", "a"]),
+            pd.Series([1, 2, 3], index=["u", "b", "u"]),
+        ],
+    ],
+)
+def test_dataframe_init_from_series_list_duplicate_index_error(data):
+    gd_data = [cudf.from_pandas(obj) for obj in data]
+
+    assert_exceptions_equal(
+        lfunc=pd.DataFrame,
+        rfunc=cudf.DataFrame,
+        lfunc_args_and_kwargs=([], {"data": data}),
+        rfunc_args_and_kwargs=([], {"data": gd_data}),
+        check_exception_type=False,
+    )
+
+
 def test_from_pandas():
     pdf = pd.DataFrame(
         {
diff --git a/python/cudf/cudf/tests/dataframe/test_reductions.py b/python/cudf/cudf/tests/dataframe/test_reductions.py
deleted file mode 100644
index 167ba2bd427..00000000000
--- a/python/cudf/cudf/tests/dataframe/test_reductions.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
-
-
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.testing import assert_eq
-
-
-def test_single_q():
-    q = 0.5
-
-    pdf = pd.DataFrame({"a": [4, 24, 13, 8, 7]})
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
-
-    assert_eq(pdf_q, gdf_q, check_index_type=False)
-
-
-def test_with_index():
-    q = [0, 0.5, 1]
-
-    pdf = pd.DataFrame({"a": [7, 4, 4, 9, 13]}, index=[0, 4, 3, 2, 7])
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
-
-    assert_eq(pdf_q, gdf_q, check_index_type=False)
-
-
-def test_with_multiindex():
-    q = [0, 0.5, 1]
-
-    pdf = pd.DataFrame(
-        {
-            "index_1": [3, 1, 9, 7, 5],
-            "index_2": [2, 4, 3, 5, 1],
-            "a": [8, 4, 2, 3, 8],
-        }
-    )
-    pdf.set_index(["index_1", "index_2"], inplace=True)
-
-    gdf = cudf.from_pandas(pdf)
-
-    pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
-
-    assert_eq(pdf_q, gdf_q, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2, 3], "b": [10, 11, 12]},
-        {"a": [1, 0, 3], "b": [10, 11, 12]},
-        {"a": [1, 2, 3], "b": [10, 11, None]},
-        {
-            "a": [],
-        },
-        {},
-    ],
-)
-@pytest.mark.parametrize("op", ["all", "any"])
-def test_any_all_axis_none(data, op):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    expected = getattr(pdf, op)(axis=None)
-    actual = getattr(gdf, op)(axis=None)
-
-    assert expected == actual
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_memory_usage.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_memory_usage.py
new file mode 100644
index 00000000000..5c193c4a290
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_memory_usage.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import cudf
+
+
+def test_memory_usage_multi():
+    # We need to sample without replacement to guarantee that the size of the
+    # levels are always the same.
+    rng = np.random.default_rng(seed=0)
+    rows = 10
+    df = pd.DataFrame(
+        {
+            "A": np.arange(rows, dtype="int32"),
+            "B": rng.choice(
+                np.arange(rows, dtype="int64"), rows, replace=False
+            ),
+            "C": rng.choice(
+                np.arange(rows, dtype="float64"), rows, replace=False
+            ),
+        }
+    ).set_index(["B", "C"])
+    gdf = cudf.from_pandas(df)
+    # Assume MultiIndex memory footprint is just that
+    # of the underlying columns, levels, and codes
+    expect = rows * 16  # Source Columns
+    expect += rows * 16  # Codes
+    expect += rows * 8  # Level 0
+    expect += rows * 8  # Level 1
+
+    assert expect == gdf.index.memory_usage(deep=True)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py
new file mode 100644
index 00000000000..1683051c9ad
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+
+
+def test_nunique():
+    gidx = cudf.RangeIndex(5)
+    pidx = pd.RangeIndex(5)
+
+    actual = gidx.nunique()
+    expected = pidx.nunique()
+
+    assert actual == expected
diff --git a/python/cudf/cudf/tests/reshape/test_concat.py b/python/cudf/cudf/tests/reshape/test_concat.py
index e533bfac9df..7cdd40f68bc 100644
--- a/python/cudf/cudf/tests/reshape/test_concat.py
+++ b/python/cudf/cudf/tests/reshape/test_concat.py
@@ -2217,3 +2217,647 @@ def test_series_concat_existing_buffers():
     np.testing.assert_equal(
         gs.to_numpy(), np.hstack([a6.to_numpy(), a5.to_numpy()])
     )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB")),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[10, 20]),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[7, 8]),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            },
+            index=[7, 20, 11, 9],
+        ),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"l": [10]}, index=[100]),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame(
+            {"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]},
+            index=[100, 200, 300, 400, 500, 0],
+        ),
+        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [[1, 2], [10, 100]],
+        [[1, 2, 10, 100, 0.1, 0.2, 0.0021]],
+        [[]],
+        [[], [], [], []],
+        [[0.23, 0.00023, -10.00, 100, 200, 1000232, 1232.32323]],
+    ],
+)
+def test_dataframe_concat_lists(df, other, sort, ignore_index):
+    pdf = df
+    other_pd = [pd.DataFrame(o) for o in other]
+
+    gdf = cudf.from_pandas(df)
+    other_gd = [cudf.from_pandas(o) for o in other_pd]
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            [pdf, *other_pd], sort=sort, ignore_index=ignore_index
+        )
+        actual = cudf.concat(
+            [gdf, *other_gd], sort=sort, ignore_index=ignore_index
+        )
+
+    if expected.shape != df.shape:
+        assert_eq(
+            expected.fillna(-1),
+            actual.fillna(-1),
+            check_dtype=False,
+            check_column_type=not gdf.empty,
+        )
+    else:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=not gdf.empty,
+            check_column_type=len(gdf.columns) != 0,
+        )
+
+
+def test_dataframe_concat_series_without_name():
+    df = cudf.DataFrame({"a": [1, 2, 3]})
+    pdf = df.to_pandas()
+    gs = cudf.Series([1, 2, 3])
+    ps = gs.to_pandas()
+
+    assert_eq(pd.concat([pdf, ps]), cudf.concat([df, gs]))
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[10, 20, 30]),
+        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB")),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[10, 20]),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[7, 8]),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            },
+            index=[7, 20, 11, 9],
+        ),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"l": [10]}, index=[100]),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame(
+            {"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]},
+            index=[100, 200, 300, 400, 500, 0],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [pd.DataFrame([[5, 6], [7, 8]], columns=list("AB"))],
+        [
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("BD")),
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("DE")),
+        ],
+        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
+        [
+            pd.DataFrame(
+                {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+            ),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        ],
+        [
+            pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+            pd.DataFrame({"l": [10]}),
+            pd.DataFrame({"l": [10]}, index=[200]),
+        ],
+        [pd.DataFrame([]), pd.DataFrame([], index=[100])],
+        [
+            pd.DataFrame([]),
+            pd.DataFrame([], index=[100]),
+            pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
+        ],
+        [
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                },
+                index=[0, 100, 200, 300],
+            ),
+        ],
+        [
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                },
+                index=[0, 100, 200, 300],
+            ),
+        ],
+        [
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                },
+                index=[0, 100, 200, 300],
+            ),
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                },
+                index=[0, 100, 200, 300],
+            ),
+        ],
+        [
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                },
+                index=[0, 100, 200, 300],
+            ),
+            pd.DataFrame(
+                {
+                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
+                },
+                index=[0, 100, 200, 300],
+            ),
+            pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
+        ],
+    ],
+)
+def test_dataframe_concat_dataframe_lists(df, other, sort, ignore_index):
+    pdf = df
+    other_pd = other
+
+    gdf = cudf.from_pandas(df)
+    other_gd = [cudf.from_pandas(o) for o in other]
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            [pdf, *other_pd], sort=sort, ignore_index=ignore_index
+        )
+        actual = cudf.concat(
+            [gdf, *other_gd], sort=sort, ignore_index=ignore_index
+        )
+
+    # In some cases, Pandas creates an empty Index([], dtype="object") for
+    # columns whereas cudf creates a RangeIndex(0, 0).
+    check_column_type = (
+        False if len(expected.columns) == len(df.columns) == 0 else True
+    )
+
+    if expected.shape != df.shape:
+        assert_eq(
+            expected.fillna(-1),
+            actual.fillna(-1),
+            check_dtype=False,
+            check_column_type=check_column_type,
+        )
+    else:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=not gdf.empty,
+            check_column_type=check_column_type,
+        )
+
+
+def test_dataframe_concat_series_mixed_index():
+    df = cudf.DataFrame({"first": [], "d": []})
+    pdf = df.to_pandas()
+
+    sr = cudf.Series([1, 2, 3, 4])
+    psr = sr.to_pandas()
+
+    assert_eq(
+        cudf.concat([df, sr], ignore_index=True),
+        pd.concat([pdf, psr], ignore_index=True),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[10, 20, 30]),
+        pd.DataFrame({12: [], 22: []}),
+        pd.DataFrame([[1, 2], [3, 4]], columns=[10, 20]),
+        pd.DataFrame([[1, 2], [3, 4]], columns=[0, 1], index=[10, 20]),
+        pd.DataFrame([[1, 2], [3, 4]], columns=[1, 0], index=[7, 8]),
+        pd.DataFrame(
+            {
+                23: [315.3324, 3243.32432, 3232.332, -100.32],
+                33: [0.3223, 0.32, 0.0000232, 0.32224],
+            }
+        ),
+        pd.DataFrame(
+            {
+                0: [315.3324, 3243.32432, 3232.332, -100.32],
+                1: [0.3223, 0.32, 0.0000232, 0.32224],
+            },
+            index=[7, 20, 11, 9],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        pd.Series([10, 11, 23, 234, 13]),
+        pd.Series([10, 11, 23, 234, 13], index=[11, 12, 13, 44, 33]),
+        {1: 1},
+        {0: 10, 1: 100, 2: 102},
+    ],
+)
+def test_dataframe_concat_series(df, other, sort):
+    pdf = df
+    gdf = cudf.from_pandas(df)
+
+    if isinstance(other, dict):
+        other_pd = pd.Series(other)
+    else:
+        other_pd = other
+    other_gd = cudf.from_pandas(other_pd)
+
+    expected = pd.concat([pdf, other_pd], ignore_index=True, sort=sort)
+    actual = cudf.concat([gdf, other_gd], ignore_index=True, sort=sort)
+
+    if expected.shape != df.shape:
+        # Ignore the column type comparison because pandas incorrectly
+        # returns pd.Index([1, 2, 3], dtype="object") instead
+        # of pd.Index([1, 2, 3], dtype="int64")
+        assert_eq(
+            expected.fillna(-1),
+            actual.fillna(-1),
+            check_dtype=False,
+            check_column_type=False,
+            check_index_type=True,
+        )
+    else:
+        assert_eq(expected, actual, check_index_type=not gdf.empty)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        pd.DataFrame(),
+        pd.DataFrame(index=[10, 20, 30]),
+        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB")),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[10, 20]),
+        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[7, 8]),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            },
+            index=[7, 20, 11, 9],
+        ),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"l": [10]}, index=[100]),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame(
+            {"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]},
+            index=[100, 200, 300, 400, 500, 0],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
+        pd.DataFrame([[5, 6], [7, 8]], columns=list("BD")),
+        pd.DataFrame([[5, 6], [7, 8]], columns=list("DE")),
+        pd.DataFrame(),
+        pd.DataFrame(
+            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
+        ),
+        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
+        pd.DataFrame({"l": [10]}),
+        pd.DataFrame({"l": [10]}, index=[200]),
+        pd.DataFrame([]),
+        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
+        pd.DataFrame([], index=[100]),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            }
+        ),
+        pd.DataFrame(
+            {
+                "a": [315.3324, 3243.32432, 3232.332, -100.32],
+                "z": [0.3223, 0.32, 0.0000232, 0.32224],
+            },
+            index=[0, 100, 200, 300],
+        ),
+    ],
+)
+def test_dataframe_concat_dataframe(df, other, sort, ignore_index):
+    pdf = df
+    other_pd = other
+
+    gdf = cudf.from_pandas(df)
+    other_gd = cudf.from_pandas(other)
+
+    with _hide_concat_empty_dtype_warning():
+        expected = pd.concat(
+            [pdf, other_pd], sort=sort, ignore_index=ignore_index
+        )
+        actual = cudf.concat(
+            [gdf, other_gd], sort=sort, ignore_index=ignore_index
+        )
+
+    # In empty dataframe cases, Pandas & cudf differ in columns
+    # creation, pandas creates RangeIndex(0, 0)
+    # whereas cudf creates an empty Index([], dtype="object").
+    check_column_type = (
+        False if len(expected.columns) == len(df.columns) == 0 else True
+    )
+
+    if expected.shape != df.shape:
+        assert_eq(
+            expected.fillna(-1),
+            actual.fillna(-1),
+            check_dtype=False,
+            check_column_type=check_column_type,
+        )
+    else:
+        assert_eq(
+            expected,
+            actual,
+            check_index_type=not gdf.empty,
+            check_column_type=check_column_type,
+        )
+
+
+@pytest.mark.parametrize("df_1_data", [{"a": [1, 2], "b": [1, 3]}, {}])
+@pytest.mark.parametrize("df_2_data", [{"a": [], "b": []}, {}])
+def test_concat_empty_dataframe(df_1_data, df_2_data):
+    df_1 = cudf.DataFrame(df_1_data)
+    df_2 = cudf.DataFrame(df_2_data)
+    with _hide_concat_empty_dtype_warning():
+        got = cudf.concat([df_1, df_2])
+        expect = pd.concat([df_1.to_pandas(), df_2.to_pandas()], sort=False)
+
+    # ignoring dtypes as pandas upcasts int to float
+    # on concatenation with empty dataframes
+
+    assert_eq(got, expect, check_dtype=False, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "df1_d",
+    [
+        {"a": [1, 2], "b": [1, 2], "c": ["s1", "s2"], "d": [1.0, 2.0]},
+        {"b": [1.9, 10.9], "c": ["s1", "s2"]},
+        {"c": ["s1"], "b": pd.Series([None], dtype="float"), "a": [False]},
+    ],
+)
+@pytest.mark.parametrize(
+    "df2_d",
+    [
+        {"a": [1, 2, 3]},
+        {"a": [1, None, 3], "b": [True, True, False], "c": ["s3", None, "s4"]},
+        {"a": [], "b": []},
+        {},
+    ],
+)
+def test_concat_different_column_dataframe(df1_d, df2_d):
+    with _hide_concat_empty_dtype_warning():
+        got = cudf.concat(
+            [
+                cudf.DataFrame(df1_d),
+                cudf.DataFrame(df2_d),
+                cudf.DataFrame(df1_d),
+            ],
+            sort=False,
+        )
+
+    pdf1 = pd.DataFrame(df1_d)
+    pdf2 = pd.DataFrame(df2_d)
+
+    expect = pd.concat([pdf1, pdf2, pdf1], sort=False)
+
+    # numerical columns are upcasted to float in cudf.DataFrame.to_pandas()
+    # casts nan to 0 in non-float numerical columns
+
+    numeric_cols = got.dtypes[got.dtypes != "object"].index
+    for col in numeric_cols:
+        got[col] = got[col].astype(np.float64).fillna(np.nan)
+
+    assert_eq(got, expect, check_dtype=False, check_index_type=True)
+
+
+@pytest.mark.parametrize(
+    "ser_1", [pd.Series([1, 2, 3]), pd.Series([], dtype="float64")]
+)
+def test_concat_empty_series(ser_1):
+    ser_2 = pd.Series([], dtype="float64")
+    with _hide_concat_empty_dtype_warning():
+        got = cudf.concat([cudf.Series(ser_1), cudf.Series(ser_2)])
+        expect = pd.concat([ser_1, ser_2])
+
+    assert_eq(got, expect, check_index_type=True)
+
+
+def test_dataframe_concat_different_numerical_columns(
+    numeric_and_temporal_types_as_str, numeric_and_temporal_types_as_str2
+):
+    df1 = pd.DataFrame(
+        dict(
+            x=pd.Series(np.arange(5)).astype(numeric_and_temporal_types_as_str)
+        )
+    )
+    df2 = pd.DataFrame(
+        dict(
+            x=pd.Series(np.arange(5)).astype(
+                numeric_and_temporal_types_as_str2
+            )
+        )
+    )
+    if (
+        numeric_and_temporal_types_as_str != numeric_and_temporal_types_as_str2
+        and "datetime" in numeric_and_temporal_types_as_str
+        or "datetime" in numeric_and_temporal_types_as_str2
+    ):
+        with pytest.raises(TypeError):
+            cudf.concat([df1, df2])
+    else:
+        pres = pd.concat([df1, df2])
+        gres = cudf.concat([cudf.from_pandas(df1), cudf.from_pandas(df2)])
+        assert_eq(pres, gres, check_dtype=False, check_index_type=True)
+
+
+def test_dataframe_concat_different_column_types():
+    df1 = cudf.Series([42], dtype=np.float64)
+    df2 = cudf.Series(["a"], dtype="category")
+    with pytest.raises(ValueError):
+        cudf.concat([df1, df2])
+
+    df2 = cudf.Series(["a string"])
+    with pytest.raises(TypeError):
+        cudf.concat([df1, df2])
+
+
+@pytest.mark.parametrize("a", [[1, 2, 3], [1, 10, 30]])
+@pytest.mark.parametrize("b", [[4, 5, 6], [-11, -100, 30]])
+def test_concat_index(a, b):
+    ser_a = pd.Series(a)
+    ser_b = pd.Series(b)
+
+    gser_a = cudf.Series(a)
+    gser_b = cudf.Series(b)
+
+    expected = pd.concat([ser_a, ser_b])
+    actual = cudf.concat([gser_a, gser_b])
+
+    assert len(expected) == len(actual)
+    assert_eq(expected.index, actual.index)
+
+    expected = pd.concat([ser_a, ser_b], ignore_index=True)
+    actual = cudf.concat([gser_a, gser_b], ignore_index=True)
+
+    assert len(expected) == len(actual)
+    assert_eq(expected.index, actual.index)
+
+
+def test_concat_with_axis():
+    df1 = pd.DataFrame(dict(x=np.arange(5), y=np.arange(5)))
+    df2 = pd.DataFrame(dict(a=np.arange(5), b=np.arange(5)))
+
+    concat_df = pd.concat([df1, df2], axis=1)
+    cdf1 = cudf.from_pandas(df1)
+    cdf2 = cudf.from_pandas(df2)
+
+    # concat only dataframes
+    concat_cdf = cudf.concat([cdf1, cdf2], axis=1)
+    assert_eq(concat_cdf, concat_df, check_index_type=True)
+
+    # concat only series
+    concat_s = pd.concat([df1.x, df1.y], axis=1)
+    cs1 = cudf.Series.from_pandas(df1.x)
+    cs2 = cudf.Series.from_pandas(df1.y)
+    concat_cdf_s = cudf.concat([cs1, cs2], axis=1)
+
+    assert_eq(concat_cdf_s, concat_s, check_index_type=True)
+
+    rng = np.random.default_rng(seed=0)
+    # concat series and dataframes
+    s3 = pd.Series(rng.random(5))
+    cs3 = cudf.Series.from_pandas(s3)
+
+    concat_cdf_all = cudf.concat([cdf1, cs3, cdf2], axis=1)
+    concat_df_all = pd.concat([df1, s3, df2], axis=1)
+    assert_eq(concat_cdf_all, concat_df_all, check_index_type=True)
+
+    # concat manual multi index
+    midf1 = cudf.from_pandas(df1)
+    midf1.index = cudf.MultiIndex(
+        levels=[[0, 1, 2, 3], [0, 1]], codes=[[0, 1, 2, 3, 2], [0, 1, 0, 1, 0]]
+    )
+    midf2 = midf1[2:]
+    midf2.index = cudf.MultiIndex(
+        levels=[[3, 4, 5], [2, 0]], codes=[[0, 1, 2], [1, 0, 1]]
+    )
+    mipdf1 = midf1.to_pandas()
+    mipdf2 = midf2.to_pandas()
+
+    assert_eq(
+        cudf.concat([midf1, midf2]),
+        pd.concat([mipdf1, mipdf2]),
+        check_index_type=True,
+    )
+    assert_eq(
+        cudf.concat([midf2, midf1]),
+        pd.concat([mipdf2, mipdf1]),
+        check_index_type=True,
+    )
+    assert_eq(
+        cudf.concat([midf1, midf2, midf1]),
+        pd.concat([mipdf1, mipdf2, mipdf1]),
+        check_index_type=True,
+    )
+
+    rng = np.random.default_rng(seed=0)
+    # concat groupby multi index
+    gdf1 = cudf.DataFrame(
+        {
+            "x": rng.integers(0, 10, 10),
+            "y": rng.integers(0, 10, 10),
+            "z": rng.integers(0, 10, 10),
+            "v": rng.integers(0, 10, 10),
+        }
+    )
+    gdf2 = gdf1[5:]
+    gdg1 = gdf1.groupby(["x", "y"]).min()
+    gdg2 = gdf2.groupby(["x", "y"]).min()
+    pdg1 = gdg1.to_pandas()
+    pdg2 = gdg2.to_pandas()
+
+    assert_eq(
+        cudf.concat([gdg1, gdg2]),
+        pd.concat([pdg1, pdg2]),
+        check_index_type=True,
+    )
+    assert_eq(
+        cudf.concat([gdg2, gdg1]),
+        pd.concat([pdg2, pdg1]),
+        check_index_type=True,
+    )
+
+    # series multi index concat
+    gdgz1 = gdg1.z
+    gdgz2 = gdg2.z
+    pdgz1 = gdgz1.to_pandas()
+    pdgz2 = gdgz2.to_pandas()
+
+    assert_eq(
+        cudf.concat([gdgz1, gdgz2]),
+        pd.concat([pdgz1, pdgz2]),
+        check_index_type=True,
+    )
+    assert_eq(
+        cudf.concat([gdgz2, gdgz1]),
+        pd.concat([pdgz2, pdgz1]),
+        check_index_type=True,
+    )
diff --git a/python/cudf/cudf/tests/series/methods/test_keys.py b/python/cudf/cudf/tests/series/methods/test_keys.py
new file mode 100644
index 00000000000..6f9c78ab008
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_keys.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "ps",
+    [
+        pd.Series([1, 2, 3, 4, 5, 10, 11, 12, 33, 55, 19]),
+        pd.Series(["abc", "def", "ghi", "xyz", "pqr", "abc"]),
+        pd.Series(
+            [1, 2, 3, 4, 5, 10],
+            index=["abc", "def", "ghi", "xyz", "pqr", "abc"],
+        ),
+        pd.Series(
+            ["abc", "def", "ghi", "xyz", "pqr", "abc"],
+            index=[1, 2, 3, 4, 5, 10],
+        ),
+        pd.Series(index=["a", "b", "c", "d", "e", "f"], dtype="float64"),
+        pd.Series(index=[10, 11, 12], dtype="float64"),
+        pd.Series(dtype="float64"),
+        pd.Series([], dtype="float64"),
+    ],
+)
+def test_series_keys(ps):
+    gds = cudf.from_pandas(ps)
+
+    assert_eq(ps.keys(), gds.keys())
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 65ba67dce90..677a791e1d9 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -206,6 +206,23 @@ def test_axes(data):
         assert_eq(e, a)
 
 
+@pytest.mark.parametrize(
+    "ps",
+    [
+        pd.Series(dtype="float64"),
+        pd.Series(index=[100, 10, 1, 0], dtype="float64"),
+        pd.Series([], dtype="float64"),
+        pd.Series(["a", "b", "c", "d"]),
+        pd.Series(["a", "b", "c", "d"], index=[0, 1, 10, 11]),
+    ],
+)
+def test_series_empty(ps):
+    ps = ps
+    gs = cudf.from_pandas(ps)
+
+    assert_eq(ps.empty, gs.empty)
+
+
 @pytest.mark.parametrize(
     "data",
     [
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ecd68f46e46..d0ad73de09c 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1,14 +1,8 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 import array as arr
-import datetime
-import decimal
-import io
 import operator
-import re
 import textwrap
-import warnings
-from contextlib import contextmanager
 from copy import copy
 
 import cupy
@@ -19,14 +13,13 @@
 from packaging import version
 
 import cudf
-from cudf.api.extensions import no_default
 from cudf.core._compat import (
     PANDAS_CURRENT_SUPPORTED_VERSION,
     PANDAS_VERSION,
 )
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.column.column import as_column
-from cudf.testing import _utils as utils, assert_eq
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
@@ -54,96 +47,6 @@
     pytest_xfail = pytest.mark.skipif
 
 
-@contextmanager
-def _hide_ufunc_warnings(eval_str):
-    # pandas raises warnings for some inputs to the following ufuncs:
-    if any(
-        x in eval_str
-        for x in {
-            "arctanh",
-            "log",
-        }
-    ):
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                "invalid value encountered in",
-                category=RuntimeWarning,
-            )
-            warnings.filterwarnings(
-                "ignore",
-                "divide by zero encountered in",
-                category=RuntimeWarning,
-            )
-            yield
-    else:
-        yield
-
-
-@contextmanager
-def _hide_concat_empty_dtype_warning():
-    with warnings.catch_warnings():
-        # Ignoring warnings in this test as warnings are
-        # being caught and validated in other tests.
-        warnings.filterwarnings(
-            "ignore",
-            "The behavior of array concatenation with empty "
-            "entries is deprecated.",
-            category=FutureWarning,
-        )
-        yield
-
-
-@pytest.fixture(
-    params=[
-        pd.DataFrame(
-            {
-                "a": [0, 1, 2, np.nan, 4, None, 6],
-                "b": [np.nan, None, "u", "h", "d", "a", "m"],
-            },
-            index=["q", "w", "e", "r", "t", "y", "u"],
-        ),
-        pd.DataFrame({"a": [0, 1, 2, 3, 4], "b": ["a", "b", "u", "h", "d"]}),
-        pd.DataFrame(
-            {
-                "a": [None, None, np.nan, None],
-                "b": [np.nan, None, np.nan, None],
-            }
-        ),
-        pd.DataFrame({"a": []}),
-        pd.DataFrame({"a": [np.nan], "b": [None]}),
-        pd.DataFrame({"a": ["a", "b", "c", None, "e"]}),
-        pd.DataFrame({"a": ["a", "b", "c", "d", "e"]}),
-    ]
-)
-def na_data(request):
-    return request.param
-
-
-@pytest.mark.parametrize("a", [[1, 2, 3], [1, 10, 30]])
-@pytest.mark.parametrize("b", [[4, 5, 6], [-11, -100, 30]])
-def test_concat_index(a, b):
-    df = pd.DataFrame()
-    df["a"] = a
-    df["b"] = b
-
-    gdf = cudf.DataFrame()
-    gdf["a"] = a
-    gdf["b"] = b
-
-    expected = pd.concat([df.a, df.b])
-    actual = cudf.concat([gdf.a, gdf.b])
-
-    assert len(expected) == len(actual)
-    assert_eq(expected.index, actual.index)
-
-    expected = pd.concat([df.a, df.b], ignore_index=True)
-    actual = cudf.concat([gdf.a, gdf.b], ignore_index=True)
-
-    assert len(expected) == len(actual)
-    assert_eq(expected.index, actual.index)
-
-
 def test_dataframe_basic():
     rng = np.random.default_rng(seed=0)
     df = cudf.DataFrame()
@@ -571,203 +474,6 @@ def test_empty_dataframe_setitem_df():
     assert_eq(gdf1, gdf2)
 
 
-@pytest.mark.parametrize("dtype1", utils.supported_numpy_dtypes)
-@pytest.mark.parametrize("dtype2", utils.supported_numpy_dtypes)
-def test_dataframe_concat_different_numerical_columns(dtype1, dtype2):
-    df1 = pd.DataFrame(dict(x=pd.Series(np.arange(5)).astype(dtype1)))
-    df2 = pd.DataFrame(dict(x=pd.Series(np.arange(5)).astype(dtype2)))
-    if dtype1 != dtype2 and "datetime" in dtype1 or "datetime" in dtype2:
-        with pytest.raises(TypeError):
-            cudf.concat([df1, df2])
-    else:
-        pres = pd.concat([df1, df2])
-        gres = cudf.concat([cudf.from_pandas(df1), cudf.from_pandas(df2)])
-        assert_eq(pres, gres, check_dtype=False, check_index_type=True)
-
-
-def test_dataframe_concat_different_column_types():
-    df1 = cudf.Series([42], dtype=np.float64)
-    df2 = cudf.Series(["a"], dtype="category")
-    with pytest.raises(ValueError):
-        cudf.concat([df1, df2])
-
-    df2 = cudf.Series(["a string"])
-    with pytest.raises(TypeError):
-        cudf.concat([df1, df2])
-
-
-@pytest.mark.parametrize("df_1_data", [{"a": [1, 2], "b": [1, 3]}, {}])
-@pytest.mark.parametrize("df_2_data", [{"a": [], "b": []}, {}])
-def test_concat_empty_dataframe(df_1_data, df_2_data):
-    df_1 = cudf.DataFrame(df_1_data)
-    df_2 = cudf.DataFrame(df_2_data)
-    with _hide_concat_empty_dtype_warning():
-        got = cudf.concat([df_1, df_2])
-        expect = pd.concat([df_1.to_pandas(), df_2.to_pandas()], sort=False)
-
-    # ignoring dtypes as pandas upcasts int to float
-    # on concatenation with empty dataframes
-
-    assert_eq(got, expect, check_dtype=False, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "df1_d",
-    [
-        {"a": [1, 2], "b": [1, 2], "c": ["s1", "s2"], "d": [1.0, 2.0]},
-        {"b": [1.9, 10.9], "c": ["s1", "s2"]},
-        {"c": ["s1"], "b": pd.Series([None], dtype="float"), "a": [False]},
-    ],
-)
-@pytest.mark.parametrize(
-    "df2_d",
-    [
-        {"a": [1, 2, 3]},
-        {"a": [1, None, 3], "b": [True, True, False], "c": ["s3", None, "s4"]},
-        {"a": [], "b": []},
-        {},
-    ],
-)
-def test_concat_different_column_dataframe(df1_d, df2_d):
-    with _hide_concat_empty_dtype_warning():
-        got = cudf.concat(
-            [
-                cudf.DataFrame(df1_d),
-                cudf.DataFrame(df2_d),
-                cudf.DataFrame(df1_d),
-            ],
-            sort=False,
-        )
-
-    pdf1 = pd.DataFrame(df1_d)
-    pdf2 = pd.DataFrame(df2_d)
-
-    expect = pd.concat([pdf1, pdf2, pdf1], sort=False)
-
-    # numerical columns are upcasted to float in cudf.DataFrame.to_pandas()
-    # casts nan to 0 in non-float numerical columns
-
-    numeric_cols = got.dtypes[got.dtypes != "object"].index
-    for col in numeric_cols:
-        got[col] = got[col].astype(np.float64).fillna(np.nan)
-
-    assert_eq(got, expect, check_dtype=False, check_index_type=True)
-
-
-@pytest.mark.parametrize(
-    "ser_1", [pd.Series([1, 2, 3]), pd.Series([], dtype="float64")]
-)
-def test_concat_empty_series(ser_1):
-    ser_2 = pd.Series([], dtype="float64")
-    with _hide_concat_empty_dtype_warning():
-        got = cudf.concat([cudf.Series(ser_1), cudf.Series(ser_2)])
-        expect = pd.concat([ser_1, ser_2])
-
-    assert_eq(got, expect, check_index_type=True)
-
-
-def test_concat_with_axis():
-    df1 = pd.DataFrame(dict(x=np.arange(5), y=np.arange(5)))
-    df2 = pd.DataFrame(dict(a=np.arange(5), b=np.arange(5)))
-
-    concat_df = pd.concat([df1, df2], axis=1)
-    cdf1 = cudf.from_pandas(df1)
-    cdf2 = cudf.from_pandas(df2)
-
-    # concat only dataframes
-    concat_cdf = cudf.concat([cdf1, cdf2], axis=1)
-    assert_eq(concat_cdf, concat_df, check_index_type=True)
-
-    # concat only series
-    concat_s = pd.concat([df1.x, df1.y], axis=1)
-    cs1 = cudf.Series.from_pandas(df1.x)
-    cs2 = cudf.Series.from_pandas(df1.y)
-    concat_cdf_s = cudf.concat([cs1, cs2], axis=1)
-
-    assert_eq(concat_cdf_s, concat_s, check_index_type=True)
-
-    rng = np.random.default_rng(seed=0)
-    # concat series and dataframes
-    s3 = pd.Series(rng.random(5))
-    cs3 = cudf.Series.from_pandas(s3)
-
-    concat_cdf_all = cudf.concat([cdf1, cs3, cdf2], axis=1)
-    concat_df_all = pd.concat([df1, s3, df2], axis=1)
-    assert_eq(concat_cdf_all, concat_df_all, check_index_type=True)
-
-    # concat manual multi index
-    midf1 = cudf.from_pandas(df1)
-    midf1.index = cudf.MultiIndex(
-        levels=[[0, 1, 2, 3], [0, 1]], codes=[[0, 1, 2, 3, 2], [0, 1, 0, 1, 0]]
-    )
-    midf2 = midf1[2:]
-    midf2.index = cudf.MultiIndex(
-        levels=[[3, 4, 5], [2, 0]], codes=[[0, 1, 2], [1, 0, 1]]
-    )
-    mipdf1 = midf1.to_pandas()
-    mipdf2 = midf2.to_pandas()
-
-    assert_eq(
-        cudf.concat([midf1, midf2]),
-        pd.concat([mipdf1, mipdf2]),
-        check_index_type=True,
-    )
-    assert_eq(
-        cudf.concat([midf2, midf1]),
-        pd.concat([mipdf2, mipdf1]),
-        check_index_type=True,
-    )
-    assert_eq(
-        cudf.concat([midf1, midf2, midf1]),
-        pd.concat([mipdf1, mipdf2, mipdf1]),
-        check_index_type=True,
-    )
-
-    rng = np.random.default_rng(seed=0)
-    # concat groupby multi index
-    gdf1 = cudf.DataFrame(
-        {
-            "x": rng.integers(0, 10, 10),
-            "y": rng.integers(0, 10, 10),
-            "z": rng.integers(0, 10, 10),
-            "v": rng.integers(0, 10, 10),
-        }
-    )
-    gdf2 = gdf1[5:]
-    gdg1 = gdf1.groupby(["x", "y"]).min()
-    gdg2 = gdf2.groupby(["x", "y"]).min()
-    pdg1 = gdg1.to_pandas()
-    pdg2 = gdg2.to_pandas()
-
-    assert_eq(
-        cudf.concat([gdg1, gdg2]),
-        pd.concat([pdg1, pdg2]),
-        check_index_type=True,
-    )
-    assert_eq(
-        cudf.concat([gdg2, gdg1]),
-        pd.concat([pdg2, pdg1]),
-        check_index_type=True,
-    )
-
-    # series multi index concat
-    gdgz1 = gdg1.z
-    gdgz2 = gdg2.z
-    pdgz1 = gdgz1.to_pandas()
-    pdgz2 = gdgz2.to_pandas()
-
-    assert_eq(
-        cudf.concat([gdgz1, gdgz2]),
-        pd.concat([pdgz1, pdgz2]),
-        check_index_type=True,
-    )
-    assert_eq(
-        cudf.concat([gdgz2, gdgz1]),
-        pd.concat([pdgz2, pdgz1]),
-        check_index_type=True,
-    )
-
-
 @pytest.mark.parametrize("nrows", [0, 3])
 def test_nonmatching_index_setitem(nrows):
     rng = np.random.default_rng(seed=0)
@@ -2636,147 +2342,6 @@ def test_df_sr_binop_col_order(op):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("set_index", [None, "A", "C", "D"])
-@pytest.mark.parametrize("index", [True, False])
-@pytest.mark.parametrize("deep", [True, False])
-def test_memory_usage(deep, index, set_index):
-    # Testing numerical/datetime by comparing with pandas
-    # (string and categorical columns will be different)
-    rows = 100
-    df = pd.DataFrame(
-        {
-            "A": np.arange(rows, dtype="int64"),
-            "B": np.arange(rows, dtype="int32"),
-            "C": np.arange(rows, dtype="float64"),
-        }
-    )
-    df["D"] = pd.to_datetime(df.A)
-    if set_index:
-        df = df.set_index(set_index)
-
-    gdf = cudf.from_pandas(df)
-
-    if index and set_index is None:
-        # Special Case: Assume RangeIndex size == 0
-        with expect_warning_if(deep, UserWarning):
-            assert gdf.index.memory_usage(deep=deep) == 0
-
-    else:
-        # Check for Series only
-        assert df["B"].memory_usage(index=index, deep=deep) == gdf[
-            "B"
-        ].memory_usage(index=index, deep=deep)
-
-        # Check for entire DataFrame
-        assert_eq(
-            df.memory_usage(index=index, deep=deep).sort_index(),
-            gdf.memory_usage(index=index, deep=deep).sort_index(),
-        )
-
-
-@pytest_xfail
-def test_memory_usage_string():
-    rows = 100
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        {
-            "A": np.arange(rows, dtype="int32"),
-            "B": rng.choice(["apple", "banana", "orange"], rows),
-        }
-    )
-    gdf = cudf.from_pandas(df)
-
-    # Check deep=False (should match pandas)
-    assert gdf.B.memory_usage(deep=False, index=False) == df.B.memory_usage(
-        deep=False, index=False
-    )
-
-    # Check string column
-    assert gdf.B.memory_usage(deep=True, index=False) == df.B.memory_usage(
-        deep=True, index=False
-    )
-
-    # Check string index
-    assert gdf.set_index("B").index.memory_usage(
-        deep=True
-    ) == df.B.memory_usage(deep=True, index=False)
-
-
-def test_memory_usage_cat():
-    rows = 100
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        {
-            "A": np.arange(rows, dtype="int32"),
-            "B": rng.choice(["apple", "banana", "orange"], rows),
-        }
-    )
-    df["B"] = df.B.astype("category")
-    gdf = cudf.from_pandas(df)
-
-    expected = (
-        gdf.B._column.categories.memory_usage
-        + gdf.B._column.codes.memory_usage
-    )
-
-    # Check cat column
-    assert gdf.B.memory_usage(deep=True, index=False) == expected
-
-    # Check cat index
-    assert gdf.set_index("B").index.memory_usage(deep=True) == expected
-
-
-def test_memory_usage_list():
-    df = cudf.DataFrame({"A": [[0, 1, 2, 3], [4, 5, 6], [7, 8], [9]]})
-    expected = (
-        df.A._column.offsets.memory_usage + df.A._column.elements.memory_usage
-    )
-    assert expected == df.A.memory_usage()
-
-
-def test_memory_usage_multi():
-    # We need to sample without replacement to guarantee that the size of the
-    # levels are always the same.
-    rng = np.random.default_rng(seed=0)
-    rows = 10
-    df = pd.DataFrame(
-        {
-            "A": np.arange(rows, dtype="int32"),
-            "B": rng.choice(
-                np.arange(rows, dtype="int64"), rows, replace=False
-            ),
-            "C": rng.choice(
-                np.arange(rows, dtype="float64"), rows, replace=False
-            ),
-        }
-    ).set_index(["B", "C"])
-    gdf = cudf.from_pandas(df)
-    # Assume MultiIndex memory footprint is just that
-    # of the underlying columns, levels, and codes
-    expect = rows * 16  # Source Columns
-    expect += rows * 16  # Codes
-    expect += rows * 8  # Level 0
-    expect += rows * 8  # Level 1
-
-    assert expect == gdf.index.memory_usage(deep=True)
-
-
-@pytest.mark.parametrize("index", [False, True])
-def test_memory_usage_index_preserve_types(index):
-    data = [[1, 2, 3]]
-    columns = pd.Index(np.array([1, 2, 3], dtype=np.int8), name="a")
-    result = (
-        cudf.DataFrame(data, columns=columns).memory_usage(index=index).index
-    )
-    expected = (
-        pd.DataFrame(data, columns=columns).memory_usage(index=index).index
-    )
-    if index:
-        # pandas returns an Index[object] with int and string elements
-        expected = expected.astype(str)
-    assert_eq(result, expected)
-
-
 @pytest.mark.parametrize(
     "list_input",
     [
@@ -2978,3090 +2543,3 @@ def test_dataframe_assign_scalar_with_scalar_cols(col_data, assign_val):
     gdf["b"] = assign_val
 
     assert_eq(pdf, gdf)
-
-
-def test_dataframe_info_basic():
-    buffer = io.StringIO()
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    Index: 10 entries, a to 1111
-    Data columns (total 10 columns):
-     #   Column  Non-Null Count  Dtype
-    ---  ------  --------------  -----
-     0   0       10 non-null     float64
-     1   1       10 non-null     float64
-     2   2       10 non-null     float64
-     3   3       10 non-null     float64
-     4   4       10 non-null     float64
-     5   5       10 non-null     float64
-     6   6       10 non-null     float64
-     7   7       10 non-null     float64
-     8   8       10 non-null     float64
-     9   9       10 non-null     float64
-    dtypes: float64(10)
-    memory usage: 859.0+ bytes
-    """
-    )
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        rng.standard_normal(size=(10, 10)),
-        index=["a", "2", "3", "4", "5", "6", "7", "8", "100", "1111"],
-    )
-    cudf.from_pandas(df).info(buf=buffer, verbose=True)
-    s = buffer.getvalue()
-    assert str_cmp == s
-
-
-def test_dataframe_info_verbose_mem_usage():
-    buffer = io.StringIO()
-    df = pd.DataFrame({"a": [1, 2, 3], "b": ["safdas", "assa", "asdasd"]})
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 3 entries, 0 to 2
-    Data columns (total 2 columns):
-     #   Column  Non-Null Count  Dtype
-    ---  ------  --------------  -----
-     0   a       3 non-null      int64
-     1   b       3 non-null      object
-    dtypes: int64(1), object(1)
-    memory usage: 56.0+ bytes
-    """
-    )
-    cudf.from_pandas(df).info(buf=buffer, verbose=True)
-    s = buffer.getvalue()
-    assert str_cmp == s
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 3 entries, 0 to 2
-    Columns: 2 entries, a to b
-    dtypes: int64(1), object(1)
-    memory usage: 56.0+ bytes
-    """
-    )
-    cudf.from_pandas(df).info(buf=buffer, verbose=False)
-    s = buffer.getvalue()
-    assert str_cmp == s
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    df = pd.DataFrame(
-        {"a": [1, 2, 3], "b": ["safdas", "assa", "asdasd"]},
-        index=["sdfdsf", "sdfsdfds", "dsfdf"],
-    )
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    Index: 3 entries, sdfdsf to dsfdf
-    Data columns (total 2 columns):
-     #   Column  Non-Null Count  Dtype
-    ---  ------  --------------  -----
-     0   a       3 non-null      int64
-     1   b       3 non-null      object
-    dtypes: int64(1), object(1)
-    memory usage: 91.0 bytes
-    """
-    )
-    cudf.from_pandas(df).info(buf=buffer, verbose=True, memory_usage="deep")
-    s = buffer.getvalue()
-    assert str_cmp == s
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    int_values = [1, 2, 3, 4, 5]
-    text_values = ["alpha", "beta", "gamma", "delta", "epsilon"]
-    float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
-
-    df = cudf.DataFrame(
-        {
-            "int_col": int_values,
-            "text_col": text_values,
-            "float_col": float_values,
-        }
-    )
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 5 entries, 0 to 4
-    Data columns (total 3 columns):
-     #   Column     Non-Null Count  Dtype
-    ---  ------     --------------  -----
-     0   int_col    5 non-null      int64
-     1   text_col   5 non-null      object
-     2   float_col  5 non-null      float64
-    dtypes: float64(1), int64(1), object(1)
-    memory usage: 130.0 bytes
-    """
-    )
-    df.info(buf=buffer, verbose=True, memory_usage="deep")
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-
-def test_dataframe_info_null_counts():
-    int_values = [1, 2, 3, 4, 5]
-    text_values = ["alpha", "beta", "gamma", "delta", "epsilon"]
-    float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
-
-    df = cudf.DataFrame(
-        {
-            "int_col": int_values,
-            "text_col": text_values,
-            "float_col": float_values,
-        }
-    )
-    buffer = io.StringIO()
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 5 entries, 0 to 4
-    Data columns (total 3 columns):
-     #   Column     Dtype
-    ---  ------     -----
-     0   int_col    int64
-     1   text_col   object
-     2   float_col  float64
-    dtypes: float64(1), int64(1), object(1)
-    memory usage: 130.0+ bytes
-    """
-    )
-    df.info(buf=buffer, verbose=True, null_counts=False)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    df.info(buf=buffer, verbose=True, max_cols=0)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    df = cudf.DataFrame()
-
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 0 entries
-    Empty DataFrame"""
-    )
-    df.info(buf=buffer, verbose=True)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    df = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, None, 10, 11, 12, None],
-            "b": ["a", "b", "c", "sd", "sdf", "sd", None, None],
-        }
-    )
-
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 8 entries, 0 to 7
-    Data columns (total 2 columns):
-     #   Column  Dtype
-    ---  ------  -----
-     0   a       int64
-     1   b       object
-    dtypes: int64(1), object(1)
-    memory usage: 238.0+ bytes
-    """
-    )
-    pd.options.display.max_info_rows = 2
-    df.info(buf=buffer, max_cols=2, null_counts=None)
-    pd.reset_option("display.max_info_rows")
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    str_cmp = textwrap.dedent(
-        """\
-    <class 'cudf.core.dataframe.DataFrame'>
-    RangeIndex: 8 entries, 0 to 7
-    Data columns (total 2 columns):
-     #   Column  Non-Null Count  Dtype
-    ---  ------  --------------  -----
-     0   a       6 non-null      int64
-     1   b       6 non-null      object
-    dtypes: int64(1), object(1)
-    memory usage: 238.0+ bytes
-    """
-    )
-
-    df.info(buf=buffer, max_cols=2, null_counts=None)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-    buffer.truncate(0)
-    buffer.seek(0)
-
-    df.info(buf=buffer, null_counts=True)
-    actual_string = buffer.getvalue()
-    assert str_cmp == actual_string
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame({"a": [1, 2, 3, 4, 5, 10, 11, 12, 33, 55, 19]}),
-        pd.DataFrame(
-            {
-                "one": [1, 2, 3, 4, 5, 10],
-                "two": ["abc", "def", "ghi", "xyz", "pqr", "abc"],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "one": [1, 2, 3, 4, 5, 10],
-                "two": ["abc", "def", "ghi", "xyz", "pqr", "abc"],
-            },
-            index=[10, 20, 30, 40, 50, 60],
-        ),
-        pd.DataFrame(
-            {
-                "one": [1, 2, 3, 4, 5, 10],
-                "two": ["abc", "def", "ghi", "xyz", "pqr", "abc"],
-            },
-            index=["a", "b", "c", "d", "e", "f"],
-        ),
-        pd.DataFrame(index=["a", "b", "c", "d", "e", "f"]),
-        pd.DataFrame(columns=["a", "b", "c", "d", "e", "f"]),
-        pd.DataFrame(index=[10, 11, 12]),
-        pd.DataFrame(columns=[10, 11, 12]),
-        pd.DataFrame(),
-        pd.DataFrame({"one": [], "two": []}),
-        pd.DataFrame({2: [], 1: []}),
-        pd.DataFrame(
-            {
-                0: [1, 2, 3, 4, 5, 10],
-                1: ["abc", "def", "ghi", "xyz", "pqr", "abc"],
-                100: ["a", "b", "b", "x", "z", "a"],
-            },
-            index=[10, 20, 30, 40, 50, 60],
-        ),
-    ],
-)
-def test_dataframe_keys(df):
-    gdf = cudf.from_pandas(df)
-
-    assert_eq(
-        df.keys(),
-        gdf.keys(),
-    )
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series([1, 2, 3, 4, 5, 10, 11, 12, 33, 55, 19]),
-        pd.Series(["abc", "def", "ghi", "xyz", "pqr", "abc"]),
-        pd.Series(
-            [1, 2, 3, 4, 5, 10],
-            index=["abc", "def", "ghi", "xyz", "pqr", "abc"],
-        ),
-        pd.Series(
-            ["abc", "def", "ghi", "xyz", "pqr", "abc"],
-            index=[1, 2, 3, 4, 5, 10],
-        ),
-        pd.Series(index=["a", "b", "c", "d", "e", "f"], dtype="float64"),
-        pd.Series(index=[10, 11, 12], dtype="float64"),
-        pd.Series(dtype="float64"),
-        pd.Series([], dtype="float64"),
-    ],
-)
-def test_series_keys(ps):
-    gds = cudf.from_pandas(ps)
-
-    assert_eq(ps.keys(), gds.keys())
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[10, 20, 30]),
-        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB")),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[10, 20]),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[7, 8]),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            },
-            index=[7, 20, 11, 9],
-        ),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"l": [10]}, index=[100]),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame(
-            {"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]},
-            index=[100, 200, 300, 400, 500, 0],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        pd.DataFrame([[5, 6], [7, 8]], columns=list("BD")),
-        pd.DataFrame([[5, 6], [7, 8]], columns=list("DE")),
-        pd.DataFrame(),
-        pd.DataFrame(
-            {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-        ),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"l": [10]}, index=[200]),
-        pd.DataFrame([]),
-        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
-        pd.DataFrame([], index=[100]),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            },
-            index=[0, 100, 200, 300],
-        ),
-    ],
-)
-@pytest.mark.parametrize("sort", [False, True])
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_dataframe_concat_dataframe(df, other, sort, ignore_index):
-    pdf = df
-    other_pd = other
-
-    gdf = cudf.from_pandas(df)
-    other_gd = cudf.from_pandas(other)
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            [pdf, other_pd], sort=sort, ignore_index=ignore_index
-        )
-        actual = cudf.concat(
-            [gdf, other_gd], sort=sort, ignore_index=ignore_index
-        )
-
-    # In empty dataframe cases, Pandas & cudf differ in columns
-    # creation, pandas creates RangeIndex(0, 0)
-    # whereas cudf creates an empty Index([], dtype="object").
-    check_column_type = (
-        False if len(expected.columns) == len(df.columns) == 0 else True
-    )
-
-    if expected.shape != df.shape:
-        assert_eq(
-            expected.fillna(-1),
-            actual.fillna(-1),
-            check_dtype=False,
-            check_column_type=check_column_type,
-        )
-    else:
-        assert_eq(
-            expected,
-            actual,
-            check_index_type=not gdf.empty,
-            check_column_type=check_column_type,
-        )
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[10, 20, 30]),
-        pd.DataFrame({12: [], 22: []}),
-        pd.DataFrame([[1, 2], [3, 4]], columns=[10, 20]),
-        pd.DataFrame([[1, 2], [3, 4]], columns=[0, 1], index=[10, 20]),
-        pd.DataFrame([[1, 2], [3, 4]], columns=[1, 0], index=[7, 8]),
-        pd.DataFrame(
-            {
-                23: [315.3324, 3243.32432, 3232.332, -100.32],
-                33: [0.3223, 0.32, 0.0000232, 0.32224],
-            }
-        ),
-        pd.DataFrame(
-            {
-                0: [315.3324, 3243.32432, 3232.332, -100.32],
-                1: [0.3223, 0.32, 0.0000232, 0.32224],
-            },
-            index=[7, 20, 11, 9],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        pd.Series([10, 11, 23, 234, 13]),
-        pd.Series([10, 11, 23, 234, 13], index=[11, 12, 13, 44, 33]),
-        {1: 1},
-        {0: 10, 1: 100, 2: 102},
-    ],
-)
-@pytest.mark.parametrize("sort", [False, True])
-def test_dataframe_concat_series(df, other, sort):
-    pdf = df
-    gdf = cudf.from_pandas(df)
-
-    if isinstance(other, dict):
-        other_pd = pd.Series(other)
-    else:
-        other_pd = other
-    other_gd = cudf.from_pandas(other_pd)
-
-    expected = pd.concat([pdf, other_pd], ignore_index=True, sort=sort)
-    actual = cudf.concat([gdf, other_gd], ignore_index=True, sort=sort)
-
-    if expected.shape != df.shape:
-        # Ignore the column type comparison because pandas incorrectly
-        # returns pd.Index([1, 2, 3], dtype="object") instead
-        # of pd.Index([1, 2, 3], dtype="int64")
-        assert_eq(
-            expected.fillna(-1),
-            actual.fillna(-1),
-            check_dtype=False,
-            check_column_type=False,
-            check_index_type=True,
-        )
-    else:
-        assert_eq(expected, actual, check_index_type=not gdf.empty)
-
-
-def test_dataframe_concat_series_mixed_index():
-    df = cudf.DataFrame({"first": [], "d": []})
-    pdf = df.to_pandas()
-
-    sr = cudf.Series([1, 2, 3, 4])
-    psr = sr.to_pandas()
-
-    assert_eq(
-        cudf.concat([df, sr], ignore_index=True),
-        pd.concat([pdf, psr], ignore_index=True),
-        check_dtype=False,
-    )
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[10, 20, 30]),
-        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB")),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[10, 20]),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[7, 8]),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            },
-            index=[7, 20, 11, 9],
-        ),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"l": [10]}, index=[100]),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame(
-            {"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]},
-            index=[100, 200, 300, 400, 500, 0],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [pd.DataFrame([[5, 6], [7, 8]], columns=list("AB"))],
-        [
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("BD")),
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("DE")),
-        ],
-        [pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()],
-        [
-            pd.DataFrame(
-                {"c": [10, 11, 22, 33, 44, 100]}, index=[7, 8, 9, 10, 11, 20]
-            ),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame([[5, 6], [7, 8]], columns=list("AB")),
-        ],
-        [
-            pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-            pd.DataFrame({"l": [10]}),
-            pd.DataFrame({"l": [10]}, index=[200]),
-        ],
-        [pd.DataFrame([]), pd.DataFrame([], index=[100])],
-        [
-            pd.DataFrame([]),
-            pd.DataFrame([], index=[100]),
-            pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
-        ],
-        [
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                },
-                index=[0, 100, 200, 300],
-            ),
-        ],
-        [
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                },
-                index=[0, 100, 200, 300],
-            ),
-        ],
-        [
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                },
-                index=[0, 100, 200, 300],
-            ),
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                },
-                index=[0, 100, 200, 300],
-            ),
-        ],
-        [
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                },
-                index=[0, 100, 200, 300],
-            ),
-            pd.DataFrame(
-                {
-                    "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                    "z": [0.3223, 0.32, 0.0000232, 0.32224],
-                },
-                index=[0, 100, 200, 300],
-            ),
-            pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
-        ],
-    ],
-)
-@pytest.mark.parametrize("sort", [False, True])
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_dataframe_concat_dataframe_lists(df, other, sort, ignore_index):
-    pdf = df
-    other_pd = other
-
-    gdf = cudf.from_pandas(df)
-    other_gd = [cudf.from_pandas(o) for o in other]
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            [pdf, *other_pd], sort=sort, ignore_index=ignore_index
-        )
-        actual = cudf.concat(
-            [gdf, *other_gd], sort=sort, ignore_index=ignore_index
-        )
-
-    # In some cases, Pandas creates an empty Index([], dtype="object") for
-    # columns whereas cudf creates a RangeIndex(0, 0).
-    check_column_type = (
-        False if len(expected.columns) == len(df.columns) == 0 else True
-    )
-
-    if expected.shape != df.shape:
-        assert_eq(
-            expected.fillna(-1),
-            actual.fillna(-1),
-            check_dtype=False,
-            check_column_type=check_column_type,
-        )
-    else:
-        assert_eq(
-            expected,
-            actual,
-            check_index_type=not gdf.empty,
-            check_column_type=check_column_type,
-        )
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame({"A": [1, 2, 3, np.nan, None, 6]}),
-        pd.Series([1, 2, 3, None, np.nan, 5, 6, np.nan]),
-    ],
-)
-@pytest.mark.parametrize("alias", ["bfill", "backfill"])
-def test_dataframe_bfill(df, alias):
-    gdf = cudf.from_pandas(df)
-
-    with expect_warning_if(alias == "backfill"):
-        actual = getattr(df, alias)()
-    with expect_warning_if(alias == "backfill"):
-        expected = getattr(gdf, alias)()
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame({"A": [1, 2, 3, np.nan, None, 6]}),
-        pd.Series([1, 2, 3, None, np.nan, 5, 6, np.nan]),
-    ],
-)
-@pytest.mark.parametrize("alias", ["ffill", "pad"])
-def test_dataframe_ffill(df, alias):
-    gdf = cudf.from_pandas(df)
-
-    with expect_warning_if(alias == "pad"):
-        actual = getattr(df, alias)()
-    with expect_warning_if(alias == "pad"):
-        expected = getattr(gdf, alias)()
-    assert_eq(expected, actual)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB")),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[10, 20]),
-        pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[7, 8]),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            }
-        ),
-        pd.DataFrame(
-            {
-                "a": [315.3324, 3243.32432, 3232.332, -100.32],
-                "z": [0.3223, 0.32, 0.0000232, 0.32224],
-            },
-            index=[7, 20, 11, 9],
-        ),
-        pd.DataFrame({"l": [10]}),
-        pd.DataFrame({"l": [10]}, index=[100]),
-        pd.DataFrame({"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]}),
-        pd.DataFrame(
-            {"f": [10.2, 11.2332, 0.22, 3.3, 44.23, 10.0]},
-            index=[100, 200, 300, 400, 500, 0],
-        ),
-        pd.DataFrame({"first_col": [], "second_col": [], "third_col": []}),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [[1, 2], [10, 100]],
-        [[1, 2, 10, 100, 0.1, 0.2, 0.0021]],
-        [[]],
-        [[], [], [], []],
-        [[0.23, 0.00023, -10.00, 100, 200, 1000232, 1232.32323]],
-    ],
-)
-@pytest.mark.parametrize("sort", [False, True])
-@pytest.mark.parametrize("ignore_index", [True, False])
-def test_dataframe_concat_lists(df, other, sort, ignore_index):
-    pdf = df
-    other_pd = [pd.DataFrame(o) for o in other]
-
-    gdf = cudf.from_pandas(df)
-    other_gd = [cudf.from_pandas(o) for o in other_pd]
-
-    with _hide_concat_empty_dtype_warning():
-        expected = pd.concat(
-            [pdf, *other_pd], sort=sort, ignore_index=ignore_index
-        )
-        actual = cudf.concat(
-            [gdf, *other_gd], sort=sort, ignore_index=ignore_index
-        )
-
-    if expected.shape != df.shape:
-        assert_eq(
-            expected.fillna(-1),
-            actual.fillna(-1),
-            check_dtype=False,
-            check_column_type=not gdf.empty,
-        )
-    else:
-        assert_eq(
-            expected,
-            actual,
-            check_index_type=not gdf.empty,
-            check_column_type=len(gdf.columns) != 0,
-        )
-
-
-def test_dataframe_concat_series_without_name():
-    df = cudf.DataFrame({"a": [1, 2, 3]})
-    pdf = df.to_pandas()
-    gs = cudf.Series([1, 2, 3])
-    ps = gs.to_pandas()
-
-    assert_eq(pd.concat([pdf, ps]), cudf.concat([df, gs]))
-
-
-def test_cudf_arrow_array_error():
-    df = cudf.DataFrame({"a": [1, 2, 3]})
-
-    with pytest.raises(
-        TypeError,
-        match="Implicit conversion to a host PyArrow object via "
-        "__arrow_array__ is not allowed. Consider using .to_arrow()",
-    ):
-        df.__arrow_array__()
-
-    sr = cudf.Series([1, 2, 3])
-
-    with pytest.raises(
-        TypeError,
-        match="Implicit conversion to a host PyArrow object via "
-        "__arrow_array__ is not allowed. Consider using .to_arrow()",
-    ):
-        sr.__arrow_array__()
-
-    sr = cudf.Series(["a", "b", "c"])
-    with pytest.raises(
-        TypeError,
-        match="Implicit conversion to a host PyArrow object via "
-        "__arrow_array__ is not allowed. Consider using .to_arrow()",
-    ):
-        sr.__arrow_array__()
-
-
-@pytest.mark.parametrize(
-    "make_weights_axis_1",
-    [lambda _: None, lambda s: [1] * s, lambda s: np.ones(s)],
-)
-def test_sample_axis_1(
-    sample_n_frac, random_state_tuple_axis_1, make_weights_axis_1
-):
-    n, frac = sample_n_frac
-    pd_random_state, gd_random_state, checker = random_state_tuple_axis_1
-
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5],
-            "float": [0.05, 0.2, 0.3, 0.2, 0.25],
-            "int": [1, 3, 5, 4, 2],
-        },
-    )
-    df = cudf.DataFrame.from_pandas(pdf)
-
-    weights = make_weights_axis_1(len(pdf.columns))
-
-    expected = pdf.sample(
-        n=n,
-        frac=frac,
-        replace=False,
-        random_state=pd_random_state,
-        weights=weights,
-        axis=1,
-    )
-    got = df.sample(
-        n=n,
-        frac=frac,
-        replace=False,
-        random_state=gd_random_state,
-        weights=weights,
-        axis=1,
-    )
-    checker(expected, got)
-
-
-@pytest.mark.parametrize(
-    "pdf",
-    [
-        pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5],
-                "float": [0.05, 0.2, 0.3, 0.2, 0.25],
-                "int": [1, 3, 5, 4, 2],
-            },
-        ),
-        pd.Series([1, 2, 3, 4, 5]),
-    ],
-)
-@pytest.mark.parametrize("replace", [True, False])
-def test_sample_axis_0(
-    pdf, sample_n_frac, replace, random_state_tuple_axis_0, make_weights_axis_0
-):
-    n, frac = sample_n_frac
-    pd_random_state, gd_random_state, checker = random_state_tuple_axis_0
-
-    df = cudf.from_pandas(pdf)
-
-    pd_weights, gd_weights = make_weights_axis_0(
-        len(pdf), isinstance(gd_random_state, np.random.RandomState)
-    )
-    if (
-        not replace
-        and not isinstance(gd_random_state, np.random.RandomState)
-        and gd_weights is not None
-    ):
-        pytest.skip(
-            "`cupy.random.RandomState` doesn't support weighted sampling "
-            "without replacement."
-        )
-
-    expected = pdf.sample(
-        n=n,
-        frac=frac,
-        replace=replace,
-        random_state=pd_random_state,
-        weights=pd_weights,
-        axis=0,
-    )
-
-    got = df.sample(
-        n=n,
-        frac=frac,
-        replace=replace,
-        random_state=gd_random_state,
-        weights=gd_weights,
-        axis=0,
-    )
-    checker(expected, got)
-
-
-@pytest.mark.parametrize("replace", [True, False])
-@pytest.mark.parametrize(
-    "random_state_lib", [cupy.random.RandomState, np.random.RandomState]
-)
-def test_sample_reproducibility(replace, random_state_lib):
-    df = cudf.DataFrame({"a": cupy.arange(0, 1024)})
-
-    n = 1024
-    expected = df.sample(n, replace=replace, random_state=random_state_lib(10))
-    out = df.sample(n, replace=replace, random_state=random_state_lib(10))
-
-    assert_eq(expected, out)
-
-
-@pytest.mark.parametrize("axis", [0, 1])
-def test_sample_invalid_n_frac_combo(axis):
-    n, frac = 2, 0.5
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5],
-            "float": [0.05, 0.2, 0.3, 0.2, 0.25],
-            "int": [1, 3, 5, 4, 2],
-        },
-    )
-    df = cudf.DataFrame.from_pandas(pdf)
-
-    assert_exceptions_equal(
-        lfunc=pdf.sample,
-        rfunc=df.sample,
-        lfunc_args_and_kwargs=([], {"n": n, "frac": frac, "axis": axis}),
-        rfunc_args_and_kwargs=([], {"n": n, "frac": frac, "axis": axis}),
-    )
-
-
-@pytest.mark.parametrize("n, frac", [(100, None), (None, 3)])
-@pytest.mark.parametrize("axis", [0, 1])
-def test_oversample_without_replace(n, frac, axis):
-    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
-    df = cudf.DataFrame.from_pandas(pdf)
-
-    assert_exceptions_equal(
-        lfunc=pdf.sample,
-        rfunc=df.sample,
-        lfunc_args_and_kwargs=(
-            [],
-            {"n": n, "frac": frac, "axis": axis, "replace": False},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"n": n, "frac": frac, "axis": axis, "replace": False},
-        ),
-    )
-
-
-@pytest.mark.parametrize("random_state", [None, cupy.random.RandomState(42)])
-def test_sample_unsupported_arguments(random_state):
-    df = cudf.DataFrame({"float": [0.05, 0.2, 0.3, 0.2, 0.25]})
-    with pytest.raises(
-        NotImplementedError,
-        match="Random sampling with cupy does not support these inputs.",
-    ):
-        df.sample(
-            n=2, replace=False, random_state=random_state, weights=[1] * 5
-        )
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[100, 10, 1, 0]),
-        pd.DataFrame(columns=["a", "b", "c", "d"]),
-        pd.DataFrame(columns=["a", "b", "c", "d"], index=[100]),
-        pd.DataFrame(
-            columns=["a", "b", "c", "d"], index=[100, 10000, 2131, 133]
-        ),
-        pd.DataFrame({"a": [1, 2, 3], "b": ["abc", "xyz", "klm"]}),
-    ],
-)
-def test_dataframe_empty(df):
-    pdf = df
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf.empty, gdf.empty)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(),
-        pd.DataFrame(index=[100, 10, 1, 0]),
-        pd.DataFrame(columns=["a", "b", "c", "d"]),
-        pd.DataFrame(columns=["a", "b", "c", "d"], index=[100]),
-        pd.DataFrame(
-            columns=["a", "b", "c", "d"], index=[100, 10000, 2131, 133]
-        ),
-        pd.DataFrame({"a": [1, 2, 3], "b": ["abc", "xyz", "klm"]}),
-    ],
-)
-def test_dataframe_size(df):
-    pdf = df
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf.size, gdf.size)
-
-
-@pytest.mark.parametrize(
-    "ps",
-    [
-        pd.Series(dtype="float64"),
-        pd.Series(index=[100, 10, 1, 0], dtype="float64"),
-        pd.Series([], dtype="float64"),
-        pd.Series(["a", "b", "c", "d"]),
-        pd.Series(["a", "b", "c", "d"], index=[0, 1, 10, 11]),
-    ],
-)
-def test_series_empty(ps):
-    ps = ps
-    gs = cudf.from_pandas(ps)
-
-    assert_eq(ps.empty, gs.empty)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        None,
-        [],
-        [1],
-        {"a": [10, 11, 12]},
-        {
-            "a": [10, 11, 12],
-            "another column name": [12, 22, 34],
-            "xyz": [0, 10, 11],
-        },
-    ],
-)
-@pytest.mark.parametrize(
-    "columns",
-    [["a"], ["another column name"], None, pd.Index(["a"], name="index name")],
-)
-def test_dataframe_init_with_columns(data, columns):
-    pdf = pd.DataFrame(data, columns=columns)
-    gdf = cudf.DataFrame(data, columns=columns)
-
-    assert_eq(
-        pdf,
-        gdf,
-        check_index_type=len(pdf.index) != 0,
-        check_dtype=not (pdf.empty and len(pdf.columns)),
-        check_column_type=False,
-    )
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "data, ignore_dtype",
-    [
-        ([pd.Series([1, 2, 3])], False),
-        ([pd.Series(index=[1, 2, 3], dtype="float64")], False),
-        ([pd.Series(name="empty series name", dtype="float64")], False),
-        (
-            [pd.Series([1]), pd.Series([], dtype="float64"), pd.Series([3])],
-            False,
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], dtype="float64"),
-                pd.Series([3], name="series that is named"),
-            ],
-            False,
-        ),
-        ([pd.Series([1, 2, 3], name="hi")] * 10, False),
-        ([pd.Series([1, 2, 3], name=None, index=[10, 11, 12])] * 10, False),
-        (
-            [
-                pd.Series([1, 2, 3], name=None, index=[10, 11, 12]),
-                pd.Series([1, 2, 30], name=None, index=[13, 144, 15]),
-            ],
-            True,
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], dtype="float64"),
-                pd.Series(index=[10, 11, 12], dtype="float64"),
-            ],
-            False,
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], name="abc", dtype="float64"),
-                pd.Series(index=[10, 11, 12], dtype="float64"),
-            ],
-            False,
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([1, -100, 200, -399, 400], name="abc"),
-                pd.Series([111, 222, 333], index=[10, 11, 12]),
-            ],
-            False,
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "columns",
-    [
-        None,
-        ["0"],
-        [0],
-        ["abc"],
-        [144, 13],
-        [2, 1, 0],
-        pd.Index(["abc"], name="custom_name"),
-    ],
-)
-def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
-    gd_data = [cudf.from_pandas(obj) for obj in data]
-
-    expected = pd.DataFrame(data, columns=columns)
-    actual = cudf.DataFrame(gd_data, columns=columns)
-
-    if ignore_dtype:
-        # When a union is performed to generate columns,
-        # the order is never guaranteed. Hence sort by
-        # columns before comparison.
-        if not expected.columns.equals(actual.columns):
-            expected = expected.sort_index(axis=1)
-            actual = actual.sort_index(axis=1)
-        assert_eq(
-            expected.fillna(-1),
-            actual.fillna(-1),
-            check_dtype=False,
-            check_index_type=True,
-        )
-    else:
-        assert_eq(
-            expected,
-            actual,
-            check_index_type=True,
-            check_column_type=False,
-        )
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "data, ignore_dtype, index",
-    [
-        ([pd.Series([1, 2, 3])], False, ["a", "b", "c"]),
-        ([pd.Series(index=[1, 2, 3], dtype="float64")], False, ["a", "b"]),
-        (
-            [pd.Series(name="empty series name", dtype="float64")],
-            False,
-            ["index1"],
-        ),
-        (
-            [pd.Series([1]), pd.Series([], dtype="float64"), pd.Series([3])],
-            False,
-            ["0", "2", "1"],
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], dtype="float64"),
-                pd.Series([3], name="series that is named"),
-            ],
-            False,
-            ["_", "+", "*"],
-        ),
-        ([pd.Series([1, 2, 3], name="hi")] * 10, False, ["mean"] * 10),
-        (
-            [pd.Series([1, 2, 3], name=None, index=[10, 11, 12])] * 10,
-            False,
-            ["abc"] * 10,
-        ),
-        (
-            [
-                pd.Series([1, 2, 3], name=None, index=[10, 11, 12]),
-                pd.Series([1, 2, 30], name=None, index=[13, 144, 15]),
-            ],
-            True,
-            ["set_index_a", "set_index_b"],
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], dtype="float64"),
-                pd.Series(index=[10, 11, 12], dtype="float64"),
-            ],
-            False,
-            ["a", "b", "c"],
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], name="abc", dtype="float64"),
-                pd.Series(index=[10, 11, 12], dtype="float64"),
-            ],
-            False,
-            ["a", "v", "z"],
-        ),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([1, -100, 200, -399, 400], name="abc"),
-                pd.Series([111, 222, 333], index=[10, 11, 12]),
-            ],
-            False,
-            ["a", "v", "z"],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "columns", [None, ["0"], [0], ["abc"], [144, 13], [2, 1, 0]]
-)
-def test_dataframe_init_from_series_list_with_index(
-    data,
-    ignore_dtype,
-    index,
-    columns,
-):
-    gd_data = [cudf.from_pandas(obj) for obj in data]
-
-    expected = pd.DataFrame(data, columns=columns, index=index)
-    actual = cudf.DataFrame(gd_data, columns=columns, index=index)
-
-    if ignore_dtype:
-        # When a union is performed to generate columns,
-        # the order is never guaranteed. Hence sort by
-        # columns before comparison.
-        if not expected.columns.equals(actual.columns):
-            expected = expected.sort_index(axis=1)
-            actual = actual.sort_index(axis=1)
-        assert_eq(expected.fillna(-1), actual.fillna(-1), check_dtype=False)
-    else:
-        assert_eq(expected, actual, check_column_type=False)
-
-
-@pytest.mark.parametrize(
-    "data, index",
-    [
-        ([pd.Series([1, 2]), pd.Series([1, 2])], ["a", "b", "c"]),
-        (
-            [
-                pd.Series([1, 0.324234, 32424.323, -1233, 34242]),
-                pd.Series([], dtype="float64"),
-                pd.Series([3], name="series that is named"),
-            ],
-            ["_", "+"],
-        ),
-        ([pd.Series([1, 2, 3], name="hi")] * 10, ["mean"] * 9),
-    ],
-)
-def test_dataframe_init_from_series_list_with_index_error(data, index):
-    gd_data = [cudf.from_pandas(obj) for obj in data]
-
-    assert_exceptions_equal(
-        pd.DataFrame,
-        cudf.DataFrame,
-        ([data], {"index": index}),
-        ([gd_data], {"index": index}),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [pd.Series([1, 2, 3], index=["a", "a", "a"])],
-        [pd.Series([1, 2, 3], index=["a", "a", "a"])] * 4,
-        [
-            pd.Series([1, 2, 3], index=["a", "b", "a"]),
-            pd.Series([1, 2, 3], index=["b", "b", "a"]),
-        ],
-        [
-            pd.Series([1, 2, 3], index=["a", "b", "z"]),
-            pd.Series([1, 2, 3], index=["u", "b", "a"]),
-            pd.Series([1, 2, 3], index=["u", "b", "u"]),
-        ],
-    ],
-)
-def test_dataframe_init_from_series_list_duplicate_index_error(data):
-    gd_data = [cudf.from_pandas(obj) for obj in data]
-
-    assert_exceptions_equal(
-        lfunc=pd.DataFrame,
-        rfunc=cudf.DataFrame,
-        lfunc_args_and_kwargs=([], {"data": data}),
-        rfunc_args_and_kwargs=([], {"data": gd_data}),
-        check_exception_type=False,
-    )
-
-
-def test_dataframe_iterrows_itertuples():
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "cuDF does not support iteration of DataFrame "
-            "via itertuples. Consider using "
-            "`.to_pandas().itertuples()` "
-            "if you wish to iterate over namedtuples."
-        ),
-    ):
-        df.itertuples()
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            "cuDF does not support iteration of DataFrame "
-            "via iterrows. Consider using "
-            "`.to_pandas().iterrows()` "
-            "if you wish to iterate over each row."
-        ),
-    ):
-        df.iterrows()
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        lambda: cudf.DataFrame({"a": [1, 2, 3]}),
-        lambda: cudf.DataFrame(
-            {"a": [1, 2, 3], "b": ["a", "z", "c"]}, index=["a", "z", "x"]
-        ),
-        lambda: cudf.DataFrame(
-            {
-                "a": [1, 2, 3, None, 2, 1, None],
-                "b": ["a", "z", "c", "a", "v", "z", "z"],
-            }
-        ),
-        lambda: cudf.DataFrame({"a": [], "b": []}),
-        lambda: cudf.DataFrame({"a": [None, None], "b": [None, None]}),
-        lambda: cudf.DataFrame(
-            {
-                "a": ["hello", "world", "rapids", "ai", "nvidia"],
-                "b": cudf.Series(
-                    [1, 21, 21, 11, 11],
-                    dtype="timedelta64[s]",
-                    index=["a", "b", "c", "d", " e"],
-                ),
-            },
-            index=["a", "b", "c", "d", " e"],
-        ),
-        lambda: cudf.DataFrame(
-            {
-                "a": ["hello", None, "world", "rapids", None, "ai", "nvidia"],
-                "b": cudf.Series(
-                    [1, 21, None, 11, None, 11, None], dtype="datetime64[s]"
-                ),
-            }
-        ),
-    ],
-)
-@pytest.mark.parametrize("numeric_only", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
-def test_dataframe_mode(df, numeric_only, dropna):
-    df = df()
-    pdf = df.to_pandas()
-
-    expected = pdf.mode(numeric_only=numeric_only, dropna=dropna)
-    actual = df.mode(numeric_only=numeric_only, dropna=dropna)
-    if len(actual.columns) == 0:
-        # pandas < 3.0 returns an Index[object] instead of RangeIndex
-        actual.columns = expected.columns
-    assert_eq(expected, actual, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "lhs, rhs", [("a", "a"), ("a", "b"), (1, 1.0), (None, None), (None, "a")]
-)
-def test_equals_names(lhs, rhs):
-    lhs = cudf.DataFrame({lhs: [1, 2]})
-    rhs = cudf.DataFrame({rhs: [1, 2]})
-
-    got = lhs.equals(rhs)
-    expect = lhs.to_pandas().equals(rhs.to_pandas())
-
-    assert_eq(expect, got)
-
-
-def test_equals_dtypes():
-    lhs = cudf.DataFrame({"a": [1, 2.0]})
-    rhs = cudf.DataFrame({"a": [1, 2]})
-
-    got = lhs.equals(rhs)
-    expect = lhs.to_pandas().equals(rhs.to_pandas())
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "df1",
-    [
-        pd.DataFrame({"a": [10, 11, 12]}, index=["a", "b", "z"]),
-        pd.DataFrame({"z": ["a"]}),
-        pd.DataFrame({"a": [], "b": []}),
-    ],
-)
-@pytest.mark.parametrize(
-    "df2",
-    [
-        pd.DataFrame(),
-        pd.DataFrame({"a": ["a", "a", "c", "z", "A"], "z": [1, 2, 3, 4, 5]}),
-    ],
-)
-@pytest.mark.parametrize(
-    "op",
-    [
-        operator.eq,
-        operator.ne,
-        operator.lt,
-        operator.gt,
-        operator.le,
-        operator.ge,
-    ],
-)
-def test_dataframe_error_equality(df1, df2, op):
-    gdf1 = cudf.from_pandas(df1)
-    gdf2 = cudf.from_pandas(df2)
-
-    assert_exceptions_equal(op, op, ([df1, df2],), ([gdf1, gdf2],))
-
-
-@pytest.mark.parametrize(
-    "df,expected_pdf",
-    [
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series([1, 2, None, 3], dtype="uint8"),
-                    "b": cudf.Series([23, None, None, 32], dtype="uint16"),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series([1, 2, None, 3], dtype=pd.UInt8Dtype()),
-                    "b": pd.Series(
-                        [23, None, None, 32], dtype=pd.UInt16Dtype()
-                    ),
-                }
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series([None, 123, None, 1], dtype="uint32"),
-                    "b": cudf.Series(
-                        [234, 2323, 23432, None, None, 224], dtype="uint64"
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        [None, 123, None, 1], dtype=pd.UInt32Dtype()
-                    ),
-                    "b": pd.Series(
-                        [234, 2323, 23432, None, None, 224],
-                        dtype=pd.UInt64Dtype(),
-                    ),
-                }
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [-10, 1, None, -1, None, 3], dtype="int8"
-                    ),
-                    "b": cudf.Series(
-                        [111, None, 222, None, 13], dtype="int16"
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        [-10, 1, None, -1, None, 3], dtype=pd.Int8Dtype()
-                    ),
-                    "b": pd.Series(
-                        [111, None, 222, None, 13], dtype=pd.Int16Dtype()
-                    ),
-                }
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [11, None, 22, 33, None, 2, None, 3], dtype="int32"
-                    ),
-                    "b": cudf.Series(
-                        [32431, None, None, 32322, 0, 10, -32324, None],
-                        dtype="int64",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        [11, None, 22, 33, None, 2, None, 3],
-                        dtype=pd.Int32Dtype(),
-                    ),
-                    "b": pd.Series(
-                        [32431, None, None, 32322, 0, 10, -32324, None],
-                        dtype=pd.Int64Dtype(),
-                    ),
-                }
-            ),
-        ),
-        (
-            lambda: cudf.DataFrame(
-                {
-                    "a": cudf.Series(
-                        [True, None, False, None, False, True, True, False],
-                        dtype="bool_",
-                    ),
-                    "b": cudf.Series(
-                        [
-                            "abc",
-                            "a",
-                            None,
-                            "hello world",
-                            "foo buzz",
-                            "",
-                            None,
-                            "rapids ai",
-                        ],
-                        dtype="object",
-                    ),
-                    "c": cudf.Series(
-                        [0.1, None, 0.2, None, 3, 4, 1000, None],
-                        dtype="float64",
-                    ),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "a": pd.Series(
-                        [True, None, False, None, False, True, True, False],
-                        dtype=pd.BooleanDtype(),
-                    ),
-                    "b": pd.Series(
-                        [
-                            "abc",
-                            "a",
-                            None,
-                            "hello world",
-                            "foo buzz",
-                            "",
-                            None,
-                            "rapids ai",
-                        ],
-                        dtype=pd.StringDtype(),
-                    ),
-                    "c": pd.Series(
-                        [0.1, None, 0.2, None, 3, 4, 1000, None],
-                        dtype=pd.Float64Dtype(),
-                    ),
-                }
-            ),
-        ),
-    ],
-)
-def test_dataframe_to_pandas_nullable_dtypes(df, expected_pdf):
-    actual_pdf = df().to_pandas(nullable=True)
-
-    assert_eq(actual_pdf, expected_pdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}],
-        [{"a": 1, "b": 2, "c": None}, {"a": None, "b": 5, "c": 6}],
-        [{"a": 1, "b": 2}, {"a": 1, "b": 5, "c": 6}],
-        [{"a": 1, "b": 2}, {"b": 5, "c": 6}],
-        [{}, {"a": 1, "b": 5, "c": 6}],
-        [{"a": 1, "b": 2, "c": 3}, {"a": 4.5, "b": 5.5, "c": 6.5}],
-    ],
-)
-def test_dataframe_init_from_list_of_dicts(data):
-    expect = pd.DataFrame(data)
-    got = cudf.DataFrame(data)
-
-    assert_eq(expect, got)
-
-
-def test_dataframe_pipe():
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-
-    def add_int_col(df, column):
-        df[column] = df._constructor_sliced([10, 20, 30, 40])
-        return df
-
-    def add_str_col(df, column):
-        df[column] = df._constructor_sliced(["a", "b", "xyz", "ai"])
-        return df
-
-    expected = (
-        pdf.pipe(add_int_col, "one")
-        .pipe(add_int_col, column="two")
-        .pipe(add_str_col, "three")
-    )
-    actual = (
-        gdf.pipe(add_int_col, "one")
-        .pipe(add_int_col, column="two")
-        .pipe(add_str_col, "three")
-    )
-
-    assert_eq(expected, actual)
-
-    expected = (
-        pdf.pipe((add_str_col, "df"), column="one")
-        .pipe(add_str_col, column="two")
-        .pipe(add_int_col, "three")
-    )
-    actual = (
-        gdf.pipe((add_str_col, "df"), column="one")
-        .pipe(add_str_col, column="two")
-        .pipe(add_int_col, "three")
-    )
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_pipe_error():
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-
-    def custom_func(df, column):
-        df[column] = df._constructor_sliced([10, 20, 30, 40])
-        return df
-
-    assert_exceptions_equal(
-        lfunc=pdf.pipe,
-        rfunc=gdf.pipe,
-        lfunc_args_and_kwargs=([(custom_func, "columns")], {"columns": "d"}),
-        rfunc_args_and_kwargs=([(custom_func, "columns")], {"columns": "d"}),
-    )
-
-
-@pytest.mark.parametrize(
-    "op",
-    ["count", "kurt", "kurtosis", "skew"],
-)
-def test_dataframe_axis1_unsupported_ops(op):
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [8, 9, 10]})
-
-    with pytest.raises(
-        NotImplementedError, match="Only axis=0 is currently supported."
-    ):
-        getattr(df, op)(axis=1)
-
-
-def test_dataframe_from_pandas_duplicate_columns():
-    pdf = pd.DataFrame(columns=["a", "b", "c", "a"])
-    pdf["a"] = [1, 2, 3]
-
-    with pytest.raises(
-        ValueError, match="Duplicate column names are not allowed"
-    ):
-        cudf.from_pandas(pdf)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        pd.DataFrame(
-            {"a": [1, 2, 3], "b": [10, 11, 20], "c": ["a", "bcd", "xyz"]}
-        ),
-        pd.DataFrame(),
-    ],
-)
-@pytest.mark.parametrize(
-    "columns",
-    [
-        None,
-        ["a"],
-        ["c", "a"],
-        ["b", "a", "c"],
-        [],
-        pd.Index(["c", "a"]),
-        cudf.Index(["c", "a"]),
-        ["abc", "a"],
-        ["column_not_exists1", "column_not_exists2"],
-    ],
-)
-@pytest.mark.parametrize("index", [["abc", "def", "ghi"]])
-def test_dataframe_constructor_columns(df, columns, index, request):
-    def assert_local_eq(actual, df, expected, host_columns):
-        check_index_type = not expected.empty
-        if host_columns is not None and any(
-            col not in df.columns for col in host_columns
-        ):
-            assert_eq(
-                expected,
-                actual,
-                check_dtype=False,
-                check_index_type=check_index_type,
-            )
-        else:
-            assert_eq(
-                expected,
-                actual,
-                check_index_type=check_index_type,
-                check_column_type=False,
-            )
-
-    gdf = cudf.from_pandas(df)
-    host_columns = (
-        columns.to_pandas() if isinstance(columns, cudf.Index) else columns
-    )
-
-    expected = pd.DataFrame(df, columns=host_columns, index=index)
-    actual = cudf.DataFrame(gdf, columns=columns, index=index)
-
-    assert_local_eq(actual, df, expected, host_columns)
-
-
-def test_dataframe_constructor_column_index_only():
-    columns = ["a", "b", "c"]
-    index = ["r1", "r2", "r3"]
-
-    gdf = cudf.DataFrame(index=index, columns=columns)
-    assert not id(gdf["a"]._column) == id(gdf["b"]._column) and not id(
-        gdf["b"]._column
-    ) == id(gdf["c"]._column)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2.5, 3], "b": [3, 4.5, 5], "c": [2.0, 3.0, 4.0]},
-        {"a": [1, 2.2, 3], "b": [2.0, 3.0, 4.0], "c": [5.0, 6.0, 4.0]},
-    ],
-)
-@pytest.mark.parametrize(
-    "aggs",
-    [
-        ["min", "sum", "max"],
-        ("min", "sum", "max"),
-        {"min", "sum", "max"},
-        "sum",
-        {"a": "sum", "b": "min", "c": "max"},
-        {"a": ["sum"], "b": ["min"], "c": ["max"]},
-        {"a": ("sum"), "b": ("min"), "c": ("max")},
-        {"a": {"sum"}, "b": {"min"}, "c": {"max"}},
-        {"a": ["sum", "min"], "b": ["sum", "max"], "c": ["min", "max"]},
-        {"a": ("sum", "min"), "b": ("sum", "max"), "c": ("min", "max")},
-        {"a": {"sum", "min"}, "b": {"sum", "max"}, "c": {"min", "max"}},
-    ],
-)
-def test_agg_for_dataframes(data, aggs):
-    pdf = pd.DataFrame(data)
-    gdf = cudf.DataFrame(data)
-
-    expect = pdf.agg(aggs).sort_index()
-    got = gdf.agg(aggs).sort_index()
-
-    assert_eq(expect, got, check_dtype=True)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [1, 2, 3], "b": [3.0, 4.0, 5.0], "c": [True, True, False]},
-        {"a": [1, 2, 3], "b": [True, True, False], "c": [False, True, False]},
-    ],
-)
-@pytest.mark.parametrize(
-    "aggs",
-    [
-        ["min", "sum", "max"],
-        "sum",
-        {"a": "sum", "b": "min", "c": "max"},
-    ],
-)
-def test_agg_for_dataframes_error(data, aggs):
-    gdf = cudf.DataFrame(data)
-
-    with pytest.raises(TypeError):
-        gdf.agg(aggs)
-
-
-def test_agg_for_unsupported_function():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-
-    with pytest.raises(NotImplementedError):
-        gdf.agg({"a": np.sum, "b": np.min, "c": np.max})
-
-
-def test_agg_for_dataframe_with_invalid_function():
-    aggs = "asdf"
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-
-    with pytest.raises(
-        AttributeError,
-        match=f"{aggs} is not a valid function for 'DataFrame' object",
-    ):
-        gdf.agg(aggs)
-
-
-def test_agg_for_series_with_invalid_function():
-    aggs = {"a": "asdf"}
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-
-    with pytest.raises(
-        AttributeError,
-        match=f"{aggs['a']} is not a valid function for 'Series' object",
-    ):
-        gdf.agg(aggs)
-
-
-@pytest.mark.parametrize(
-    "aggs",
-    [
-        "sum",
-        ["min", "sum", "max"],
-        {"a": {"sum", "min"}, "b": {"sum", "max"}, "c": {"min", "max"}},
-    ],
-)
-def test_agg_for_dataframe_with_string_columns(aggs):
-    gdf = cudf.DataFrame(
-        {"a": ["m", "n", "o"], "b": ["t", "u", "v"], "c": ["x", "y", "z"]},
-        index=["a", "b", "c"],
-    )
-
-    with pytest.raises(
-        NotImplementedError,
-        match=re.escape(
-            "DataFrame.agg() is not supported for "
-            "frames containing string columns"
-        ),
-    ):
-        gdf.agg(aggs)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize("overwrite", [True, False])
-@pytest.mark.parametrize(
-    "left_keys,right_keys",
-    [
-        [("a", "b"), ("a", "b")],
-        [("a", "b"), ("a", "c")],
-        [("a", "b"), ("d", "e")],
-    ],
-)
-@pytest.mark.parametrize(
-    "data_left,data_right",
-    [
-        [([1, 2, 3], [3, 4, 5]), ([1, 2, 3], [3, 4, 5])],
-        [
-            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
-            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
-        ],
-        [
-            ([True, False, True], [False, False, False]),
-            ([True, False, True], [False, False, False]),
-        ],
-        [
-            ([np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]),
-            ([np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]),
-        ],
-        [([1, 2, 3], [3, 4, 5]), ([1, 2, 4], [30, 40, 50])],
-        [
-            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
-            ([1.0, 2.0, 4.0], [30.0, 40.0, 50.0]),
-        ],
-        [([1, 2, 3], [3, 4, 5]), ([10, 20, 40], [30, 40, 50])],
-        [
-            ([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]),
-            ([10.0, 20.0, 40.0], [30.0, 40.0, 50.0]),
-        ],
-    ],
-)
-def test_update_for_dataframes(
-    left_keys, right_keys, data_left, data_right, overwrite
-):
-    errors = "ignore"
-    join = "left"
-    left = dict(zip(left_keys, data_left, strict=True))
-    right = dict(zip(right_keys, data_right, strict=True))
-    pdf = pd.DataFrame(left)
-    gdf = cudf.DataFrame(left, nan_as_null=False)
-
-    other_pd = pd.DataFrame(right)
-    other_gd = cudf.DataFrame(right, nan_as_null=False)
-
-    pdf.update(other=other_pd, join=join, overwrite=overwrite, errors=errors)
-    gdf.update(other=other_gd, join=join, overwrite=overwrite, errors=errors)
-
-    assert_eq(pdf, gdf, check_dtype=False)
-
-
-def test_update_for_right_join():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-    other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
-
-    with pytest.raises(
-        NotImplementedError, match="Only left join is supported"
-    ):
-        gdf.update(other_gd, join="right")
-
-
-def test_update_for_data_overlap():
-    errors = "raise"
-    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-
-    other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
-    other_gd = cudf.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
-
-    assert_exceptions_equal(
-        lfunc=pdf.update,
-        rfunc=gdf.update,
-        lfunc_args_and_kwargs=([other_pd, errors], {}),
-        rfunc_args_and_kwargs=([other_gd, errors], {}),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [[1], [2], [3]]},
-        {
-            "left-a": [0, 1, 2],
-            "a": [[1], None, [3]],
-            "right-a": ["abc", "def", "ghi"],
-        },
-        {
-            "left-a": [[], None, None],
-            "a": [[1], None, [3]],
-            "right-a": ["abc", "def", "ghi"],
-        },
-    ],
-)
-def test_dataframe_roundtrip_arrow_list_dtype(data):
-    gdf = cudf.DataFrame(data)
-    table = gdf.to_arrow()
-    expected = cudf.DataFrame.from_arrow(table)
-
-    assert_eq(gdf, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": [{"one": 3, "two": 4, "three": 10}]},
-        {
-            "left-a": [0, 1, 2],
-            "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
-            "right-a": ["abc", "def", "ghi"],
-        },
-        {
-            "left-a": [{"a": 1}, None, None],
-            "a": [
-                {"one": 324, "two": 23432, "three": 324},
-                None,
-                {"one": 3.24, "two": 1, "three": 324},
-            ],
-            "right-a": ["abc", "def", "ghi"],
-        },
-    ],
-)
-def test_dataframe_roundtrip_arrow_struct_dtype(data):
-    gdf = cudf.DataFrame(data)
-    table = gdf.to_arrow()
-    expected = cudf.DataFrame.from_arrow(table)
-
-    assert_eq(gdf, expected)
-
-
-def test_dataframe_setitem_cupy_array():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.standard_normal(size=(10, 2)))
-    gdf = cudf.from_pandas(pdf)
-
-    gpu_array = cupy.array([True, False] * 5)
-    pdf[gpu_array.get()] = 1.5
-    gdf[gpu_array] = 1.5
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("level", ["x", 0])
-def test_rename_for_level_MultiIndex_dataframe(level):
-    data = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
-    index = {0: 123, 1: 4, 2: 6}
-    pdf = pd.DataFrame(
-        data,
-        index=pd.MultiIndex.from_tuples([(0, 1, 2), (1, 2, 3), (2, 3, 4)]),
-    )
-    pdf.index.names = ["x", "y", "z"]
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.rename(index=index, level=level)
-    got = gdf.rename(index=index, level=level)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "columns",
-    [{"a": "f", "b": "g"}, {1: 3, 2: 4}, lambda s: 2 * s],
-)
-@pytest.mark.parametrize("level", [0, 1])
-def test_rename_for_level_MultiColumn_dataframe(columns, level):
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
-
-    pdf = gdf.to_pandas()
-
-    expect = pdf.rename(columns=columns, level=level)
-    got = gdf.rename(columns=columns, level=level)
-
-    assert_eq(expect, got)
-
-
-def test_rename_for_level_RangeIndex_dataframe():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    pdf = gdf.to_pandas()
-
-    expect = pdf.rename(columns={"a": "f"}, index={0: 3, 1: 4}, level=0)
-    got = gdf.rename(columns={"a": "f"}, index={0: 3, 1: 4}, level=0)
-
-    assert_eq(expect, got)
-
-
-def test_rename_for_level_is_None_MC():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
-    pdf = gdf.to_pandas()
-
-    expect = pdf.rename(columns={"a": "f"}, level=None)
-    got = gdf.rename(columns={"a": "f"}, level=None)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            [[1, 2, 3], 11, "a"],
-            [None, 22, "e"],
-            [[4], 33, "i"],
-            [[], 44, "o"],
-            [[5, 6], 55, "u"],
-        ],  # nested
-        [
-            [1, 11, "a"],
-            [2, 22, "e"],
-            [3, 33, "i"],
-            [4, 44, "o"],
-            [5, 55, "u"],
-        ],  # non-nested
-    ],
-)
-@pytest.mark.parametrize(
-    ("labels", "label_to_explode"),
-    [
-        (None, 0),
-        (pd.Index(["a", "b", "c"]), "a"),
-        (
-            pd.MultiIndex.from_tuples(
-                [(0, "a"), (0, "b"), (1, "a")], names=["l0", "l1"]
-            ),
-            (0, "a"),
-        ),
-    ],
-)
-@pytest.mark.parametrize("ignore_index", [True, False])
-@pytest.mark.parametrize(
-    "p_index",
-    [
-        None,
-        ["ia", "ib", "ic", "id", "ie"],
-        pd.MultiIndex.from_tuples(
-            [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b")]
-        ),
-    ],
-)
-def test_explode(data, labels, ignore_index, p_index, label_to_explode):
-    pdf = pd.DataFrame(data, index=p_index, columns=labels)
-    gdf = cudf.from_pandas(pdf)
-
-    expect = pdf.explode(label_to_explode, ignore_index)
-    got = gdf.explode(label_to_explode, ignore_index)
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-def test_explode_preserve_categorical():
-    gdf = cudf.DataFrame(
-        {
-            "A": [[1, 2], None, [2, 3]],
-            "B": cudf.Series([0, 1, 2], dtype="category"),
-        }
-    )
-    result = gdf.explode("A")
-    expected = cudf.DataFrame(
-        {
-            "A": [1, 2, None, 2, 3],
-            "B": cudf.Series([0, 0, 1, 2, 2], dtype="category"),
-        }
-    )
-    expected.index = cudf.Index([0, 0, 1, 2, 2])
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "data,ascending,expected_data",
-    [
-        (
-            {"a": [10, 0, 2], "b": [-10, 10, 1]},
-            True,
-            [1, 2, 0],
-        ),
-        (
-            {"a": [10, 0, 2], "b": [-10, 10, 1]},
-            False,
-            [0, 2, 1],
-        ),
-    ],
-)
-def test_dataframe_argsort(data, ascending, expected_data):
-    actual = cudf.DataFrame(data).argsort(ascending=ascending)
-    expected = cupy.array(expected_data, dtype="int32")
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        np.random.RandomState(seed=10).randint(-50, 50, (25, 30)),
-        np.random.RandomState(seed=10).random_sample((4, 4)),
-        np.array([1.123, 2.343, 5.890, 0.0]),
-        [True, False, True, False, False],
-        {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
-    ],
-)
-@pytest.mark.parametrize("periods", (-5, -1, 0, 1, 5))
-def test_diff_numeric_dtypes(data, periods):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    actual = gdf.diff(periods=periods, axis=0)
-    expected = pdf.diff(periods=periods, axis=0)
-
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    ("precision", "scale"),
-    [(5, 2), (8, 5)],
-)
-@pytest.mark.parametrize(
-    "dtype",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype],
-)
-def test_diff_decimal_dtypes(precision, scale, dtype):
-    gdf = cudf.DataFrame(
-        np.random.default_rng(seed=42).uniform(10.5, 75.5, (10, 6)),
-        dtype=dtype(precision=precision, scale=scale),
-    )
-    pdf = gdf.to_pandas()
-
-    actual = gdf.diff()
-    expected = pdf.diff()
-
-    assert_eq(
-        expected,
-        actual,
-        check_dtype=False,
-    )
-
-
-def test_diff_invalid_axis():
-    gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0]))
-    with pytest.raises(NotImplementedError, match="Only axis=0 is supported."):
-        gdf.diff(periods=1, axis=1)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "int_col": [1, 2, 3, 4, 5],
-            "float_col": [1.0, 2.0, 3.0, 4.0, 5.0],
-            "string_col": ["a", "b", "c", "d", "e"],
-        },
-        ["a", "b", "c", "d", "e"],
-    ],
-)
-def test_diff_unsupported_dtypes(data):
-    gdf = cudf.DataFrame(data)
-    with pytest.raises(
-        TypeError,
-        match=r"unsupported operand type\(s\)",
-    ):
-        gdf.diff()
-
-
-def test_diff_many_dtypes():
-    pdf = pd.DataFrame(
-        {
-            "dates": pd.date_range("2020-01-01", "2020-01-06", freq="D"),
-            "bools": [True, True, True, False, True, True],
-            "floats": [1.0, 2.0, 3.5, np.nan, 5.0, -1.7],
-            "ints": [1, 2, 3, 3, 4, 5],
-            "nans_nulls": [np.nan, None, None, np.nan, np.nan, None],
-        }
-    )
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(pdf.diff(), gdf.diff())
-    assert_eq(pdf.diff(periods=2), gdf.diff(periods=2))
-
-
-def test_dataframe_assign_cp_np_array():
-    m, n = 5, 3
-    cp_ndarray = cupy.random.randn(m, n)
-    pdf = pd.DataFrame({f"f_{i}": range(m) for i in range(n)})
-    gdf = cudf.DataFrame({f"f_{i}": range(m) for i in range(n)})
-    pdf[[f"f_{i}" for i in range(n)]] = cupy.asnumpy(cp_ndarray)
-    gdf[[f"f_{i}" for i in range(n)]] = cp_ndarray
-
-    assert_eq(pdf, gdf)
-
-
-def test_dataframe_nunique():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
-    pdf = gdf.to_pandas()
-
-    actual = gdf.nunique()
-    expected = pdf.nunique()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "columns",
-    [
-        pd.RangeIndex(2, name="foo"),
-        pd.MultiIndex.from_arrays([[1, 2], [2, 3]], names=["foo", 1]),
-        pd.Index([3, 5], dtype=np.int8, name="foo"),
-    ],
-)
-def test_nunique_preserve_column_in_index(columns):
-    df = cudf.DataFrame([[1, 2]], columns=columns)
-    result = df.nunique().index.to_pandas()
-    assert_eq(result, columns, exact=True)
-
-
-def test_dataframe_nunique_index():
-    gdf = cudf.DataFrame(
-        {"key": [0, 1, 1, 0, 0, 1], "val": [1, 8, 3, 9, -3, 8]}
-    )
-    pdf = gdf.to_pandas()
-
-    actual = gdf.index.nunique()
-    expected = pdf.index.nunique()
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_rename_duplicate_column():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
-    with pytest.raises(
-        ValueError, match="Duplicate column names are not allowed"
-    ):
-        gdf.rename(columns={"a": "b"}, inplace=True)
-
-
-def test_dataframe_rename_columns_keep_type():
-    gdf = cudf.DataFrame([[1, 2, 3]])
-    gdf.columns = cudf.Index([4, 5, 6], dtype=np.int8)
-    result = gdf.rename({4: 50}, axis="columns").columns
-    expected = pd.Index([50, 5, 6], dtype=np.int8)
-    assert_eq(result, expected)
-
-
-@pytest_unmark_spilling
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "data",
-    [
-        np.random.RandomState(seed=10).randint(-50, 50, (10, 10)),
-        np.random.RandomState(seed=10).random_sample((4, 4)),
-        np.array([1.123, 2.343, 5.890, 0.0]),
-        {"a": [1.123, 2.343, np.nan, np.nan], "b": [None, 3, 9.08, None]},
-    ],
-)
-@pytest.mark.parametrize("periods", [-5, 0, 2])
-@pytest.mark.parametrize(
-    "fill_method", ["ffill", "bfill", "pad", "backfill", no_default]
-)
-def test_dataframe_pct_change(data, periods, fill_method):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    with expect_warning_if(fill_method is not no_default):
-        actual = gdf.pct_change(periods=periods, fill_method=fill_method)
-    with expect_warning_if(
-        fill_method is not no_default or pdf.isna().any().any()
-    ):
-        expected = pdf.pct_change(periods=periods, fill_method=fill_method)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_mean_timeseries(numeric_only):
-    gdf = cudf.datasets.timeseries()
-    if not numeric_only:
-        gdf = gdf.select_dtypes(include="number")
-    pdf = gdf.to_pandas()
-
-    expected = pdf.mean(numeric_only=numeric_only)
-    actual = gdf.mean(numeric_only=numeric_only)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_std_different_dtypes(numeric_only):
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5],
-            "b": ["a", "b", "c", "d", "e"],
-            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
-        }
-    )
-    if not numeric_only:
-        gdf = gdf.select_dtypes(include="number")
-    pdf = gdf.to_pandas()
-
-    expected = pdf.std(numeric_only=numeric_only)
-    actual = gdf.std(numeric_only=numeric_only)
-
-    assert_eq(expected, actual)
-
-
-def test_empty_numeric_only():
-    gdf = cudf.DataFrame(
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
-            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
-        }
-    )
-    pdf = gdf.to_pandas()
-    expected = pdf.prod(numeric_only=True)
-    actual = gdf.prod(numeric_only=True)
-    assert_eq(expected, actual, check_dtype=True)
-
-
-# Note that for now expressions do not automatically handle casting, so inputs
-# need to be casted appropriately
-@pytest.mark.parametrize(
-    "expr, dtype",
-    [
-        ("a", int),
-        ("+a", int),
-        ("a + b", int),
-        ("a == b", int),
-        ("a / b", float),
-        ("a * b", int),
-        ("a > b", int),
-        ("a >= b", int),
-        ("a > b > c", int),
-        ("a > b < c", int),
-        ("a & b", int),
-        ("a & b | c", int),
-        ("sin(a)", float),
-        ("exp(sin(abs(a)))", float),
-        ("sqrt(floor(a))", float),
-        ("ceil(arctanh(a))", float),
-        ("(a + b) - (c * d)", int),
-        ("~a", int),
-        ("(a > b) and (c > d)", int),
-        ("(a > b) or (c > d)", int),
-        ("not (a > b)", int),
-        ("a + 1", int),
-        ("a + 1.0", float),
-        ("-a + 1", int),
-        ("+a + 1", int),
-        ("e = a + 1", int),
-        (
-            """
-            e = log(cos(a)) + 1.0
-            f = abs(c) - exp(d)
-            """,
-            float,
-        ),
-        ("a_b_are_equal = (a == b)", int),
-        ("a > b", str),
-        ("a < '1'", str),
-        ('a == "1"', str),
-    ],
-)
-@pytest.mark.parametrize("nrows", [0, 10])
-def test_dataframe_eval(nrows, expr, dtype):
-    arr = np.ones(nrows)
-    df_eval = cudf.DataFrame({"a": arr, "b": arr, "c": arr, "d": arr})
-    df_eval = df_eval.astype(dtype)
-    with _hide_ufunc_warnings(expr):
-        expect = df_eval.to_pandas().eval(expr)
-    got = df_eval.eval(expr)
-    # In the specific case where the evaluated expression is a unary function
-    # of a single column with no nesting, pandas will retain the name. This
-    # level of compatibility is out of scope for now.
-    assert_eq(expect, got, check_names=False)
-
-    # Test inplace
-    if re.search("[^=><]=[^=]", expr) is not None:
-        pdf_eval = df_eval.to_pandas()
-        with _hide_ufunc_warnings(expr):
-            pdf_eval.eval(expr, inplace=True)
-        df_eval.eval(expr, inplace=True)
-        assert_eq(pdf_eval, df_eval)
-
-
-@pytest.mark.parametrize(
-    "expr",
-    [
-        """
-        e = a + b
-        a == b
-        """,
-        "a_b_are_equal = (a == b) = c",
-    ],
-)
-def test_dataframe_eval_errors(expr):
-    df = cudf.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]})
-    with pytest.raises(ValueError):
-        df.eval(expr)
-
-
-def test_dataframe_eval_misc():
-    df = cudf.DataFrame({"a": [1, 2, 3, None, 5]})
-    got = df.eval("isnull(a)")
-    assert_eq(got, cudf.Series.isnull(df["a"]), check_names=False)
-
-    df.eval("c = isnull(1)", inplace=True)
-    assert_eq(df["c"], cudf.Series([False] * len(df), name="c"))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
-        {
-            "first_name": ["John", "Anne", "John", "Beth"],
-            "middle_name": ["Smith", None, None, "Louise"],
-        },
-    ],
-)
-@pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
-@pytest.mark.parametrize("use_subset", [True, False])
-def test_value_counts(
-    data,
-    sort,
-    ascending,
-    normalize,
-    dropna,
-    use_subset,
-):
-    subset = [next(iter(data.keys()))]
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    got = gdf.value_counts(
-        subset=subset if (use_subset) else None,
-        sort=sort,
-        ascending=ascending,
-        normalize=normalize,
-        dropna=dropna,
-    )
-    expected = pdf.value_counts(
-        subset=subset if (use_subset) else None,
-        sort=sort,
-        ascending=ascending,
-        normalize=normalize,
-        dropna=dropna,
-    )
-
-    if not dropna:
-        # Convert the Pandas series to a cuDF one due to difference
-        # in the handling of NaNs between the two (<NA> in cuDF and
-        # NaN in Pandas) when dropna=False.
-        assert_eq(got.sort_index(), cudf.from_pandas(expected).sort_index())
-    else:
-        assert_eq(got.sort_index(), expected.sort_index())
-
-
-def test_value_counts_no_subset():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [1, 1, 0]})
-    with pytest.raises(KeyError):
-        gdf.value_counts(subset=["not_a_column_name"])
-
-
-def test_multiindex_wildcard_selection_all():
-    midx = cudf.MultiIndex.from_tuples(
-        [(c1, c2) for c1 in "abc" for c2 in "ab"]
-    )
-    df = cudf.DataFrame({f"{i}": [i] for i in range(6)})
-    df.columns = midx
-    expect = df.to_pandas().loc[:, (slice(None), "b")]
-    got = df.loc[:, (slice(None), "b")]
-    assert_eq(expect, got)
-
-
-@pytest_xfail(reason="Not yet properly supported.")
-def test_multiindex_wildcard_selection_partial():
-    midx = cudf.MultiIndex.from_tuples(
-        [(c1, c2) for c1 in "abc" for c2 in "ab"]
-    )
-    df = cudf.DataFrame({f"{i}": [i] for i in range(6)})
-    df.columns = midx
-    expect = df.to_pandas().loc[:, (slice("a", "b"), "b")]
-    got = df.loc[:, (slice("a", "b"), "b")]
-    assert_eq(expect, got)
-
-
-@pytest_xfail(reason="Not yet properly supported.")
-def test_multiindex_wildcard_selection_three_level_all():
-    midx = cudf.MultiIndex.from_tuples(
-        [(c1, c2, c3) for c1 in "abcd" for c2 in "abc" for c3 in "ab"]
-    )
-    df = cudf.DataFrame({f"{i}": [i] for i in range(24)})
-    df.columns = midx
-
-    expect = df.to_pandas().loc[:, (slice("a", "c"), slice("a", "b"), "b")]
-    got = df.loc[:, (slice(None), "b")]
-    assert_eq(expect, got)
-
-
-def test_dataframe_assign_scalar_to_empty_series():
-    expected = pd.DataFrame({"a": []})
-    actual = cudf.DataFrame({"a": []})
-    expected.a = 0
-    actual.a = 0
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {0: [1, 2, 3], 2: [10, 11, 23]},
-        {("a", "b"): [1, 2, 3], ("2",): [10, 11, 23]},
-    ],
-)
-def test_non_string_column_name_to_arrow(data):
-    df = cudf.DataFrame(data)
-
-    expected = df.to_arrow()
-    actual = pa.Table.from_pandas(df.to_pandas())
-
-    assert expected.equals(actual)
-
-
-def test_complex_types_from_arrow():
-    expected = pa.Table.from_arrays(
-        [
-            pa.array([1, 2, 3]),
-            pa.array([10, 20, 30]),
-            pa.array([{"a": 9}, {"b": 10}, {"c": 11}]),
-            pa.array([[{"a": 1}], [{"b": 2}], [{"c": 3}]]),
-            pa.array([10, 11, 12]).cast(pa.decimal128(21, 2)),
-            pa.array([{"a": 9}, {"b": 10, "c": {"g": 43}}, {"c": {"a": 10}}]),
-        ],
-        names=["a", "b", "c", "d", "e", "f"],
-    )
-
-    df = cudf.DataFrame.from_arrow(expected)
-    actual = df.to_arrow()
-
-    assert expected.equals(actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "brand": ["Yum Yum", "Yum Yum", "Indomie", "Indomie", "Indomie"],
-            "style": ["cup", "cup", "cup", "pack", "pack"],
-            "rating": [4, 4, 3.5, 15, 5],
-        },
-        {
-            "brand": ["Indomie", "Yum Yum", "Indomie", "Indomie", "Indomie"],
-            "style": ["cup", "cup", "cup", "cup", "pack"],
-            "rating": [4, 4, 3.5, 4, 5],
-        },
-    ],
-)
-@pytest.mark.parametrize(
-    "subset", [None, ["brand"], ["rating"], ["style", "rating"]]
-)
-@pytest.mark.parametrize("keep", ["first", "last", False])
-def test_dataframe_duplicated(data, subset, keep):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    expected = pdf.duplicated(subset=subset, keep=keep)
-    actual = gdf.duplicated(subset=subset, keep=keep)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"col": [{"a": 1.1}, {"a": 2.1}, {"a": 10.0}, {"a": 11.2323}, None]},
-        {"a": [[{"b": 567}], None] * 10},
-        {"a": [decimal.Decimal(10), decimal.Decimal(20), None]},
-    ],
-)
-def test_dataframe_values_complex_types(data):
-    gdf = cudf.DataFrame(data)
-    with pytest.raises(NotImplementedError):
-        gdf.values
-
-
-def test_dataframe_from_arrow_slice():
-    table = pa.Table.from_pandas(
-        pd.DataFrame.from_dict(
-            {"a": ["aa", "bb", "cc"] * 3, "b": [1, 2, 3] * 3}
-        )
-    )
-    table_slice = table.slice(3, 7)
-
-    expected = table_slice.to_pandas()
-    actual = cudf.DataFrame.from_arrow(table_slice)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data,index",
-    [
-        ({"a": [1, 2, 3], "b": ["x", "y", "z", "z"], "c": 4}, None),
-        (
-            {
-                "a": [1, 2, 3],
-                "b": ["x", "y", "z"],
-            },
-            [10, 11],
-        ),
-        (
-            {
-                "a": [1, 2, 3],
-                "b": ["x", "y", "z"],
-            },
-            [10, 11],
-        ),
-        ([[10, 11], [12, 13]], ["a", "b", "c"]),
-    ],
-)
-def test_dataframe_init_length_error(data, index):
-    assert_exceptions_equal(
-        lfunc=pd.DataFrame,
-        rfunc=cudf.DataFrame,
-        lfunc_args_and_kwargs=(
-            [],
-            {"data": data, "index": index},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"data": data, "index": index},
-        ),
-    )
-
-
-def test_dataframe_binop_with_mixed_date_types():
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        rng.random(size=(2, 2)),
-        columns=pd.Index(["2000-01-03", "2000-01-04"], dtype="datetime64[ns]"),
-    )
-    ser = pd.Series(rng.random(size=3), index=[0, 1, 2])
-    gdf = cudf.from_pandas(df)
-    gser = cudf.from_pandas(ser)
-    expected = df - ser
-    got = gdf - gser
-    assert_eq(expected, got)
-
-
-def test_dataframe_binop_with_mixed_string_types():
-    rng = np.random.default_rng(seed=0)
-    df1 = pd.DataFrame(rng.random(size=(3, 3)), columns=pd.Index([0, 1, 2]))
-    df2 = pd.DataFrame(
-        rng.random(size=(6, 6)),
-        columns=pd.Index([0, 1, 2, "VhDoHxRaqt", "X0NNHBIPfA", "5FbhPtS0D1"]),
-    )
-    gdf1 = cudf.from_pandas(df1)
-    gdf2 = cudf.from_pandas(df2)
-
-    expected = df2 + df1
-    got = gdf2 + gdf1
-
-    assert_eq(expected, got)
-
-
-def test_dataframe_binop_and_where():
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(rng.random(size=(2, 2)), columns=pd.Index([True, False]))
-    gdf = cudf.from_pandas(df)
-
-    expected = df > 1
-    got = gdf > 1
-
-    assert_eq(expected, got)
-
-    expected = df[df > 1]
-    got = gdf[gdf > 1]
-
-    assert_eq(expected, got)
-
-
-def test_dataframe_binop_with_datetime_index():
-    rng = np.random.default_rng(seed=0)
-    df = pd.DataFrame(
-        rng.random(size=(2, 2)),
-        columns=pd.Index(["2000-01-03", "2000-01-04"], dtype="datetime64[ns]"),
-    )
-    ser = pd.Series(
-        rng.random(2),
-        index=pd.Index(
-            [
-                "2000-01-04",
-                "2000-01-03",
-            ],
-            dtype="datetime64[ns]",
-        ),
-    )
-    gdf = cudf.from_pandas(df)
-    gser = cudf.from_pandas(ser)
-    expected = df - ser
-    got = gdf - gser
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "dtype", ["datetime64[ns]", "timedelta64[ns]", "int64", "float32"]
-)
-def test_dataframe_mixed_dtype_error(dtype):
-    pdf = pd.Series([1, 2, 3], dtype=dtype).to_frame().astype(object)
-    with pytest.raises(TypeError):
-        cudf.from_pandas(pdf)
-
-
-@pytest.mark.parametrize(
-    "index_data,name",
-    [([10, 13], "a"), ([30, 40, 20], "b"), (["ef"], "c"), ([2, 3], "Z")],
-)
-def test_dataframe_reindex_with_index_names(index_data, name):
-    gdf = cudf.DataFrame(
-        {
-            "a": [10, 12, 13],
-            "b": [20, 30, 40],
-            "c": cudf.Series(["ab", "cd", "ef"], dtype="category"),
-        }
-    )
-    if name in gdf.columns:
-        gdf = gdf.set_index(name)
-    pdf = gdf.to_pandas()
-
-    gidx = cudf.Index(index_data, name=name)
-    actual = gdf.reindex(gidx)
-    expected = pdf.reindex(gidx.to_pandas())
-
-    assert_eq(actual, expected)
-
-    actual = gdf.reindex(index_data)
-    expected = pdf.reindex(index_data)
-
-    assert_eq(actual, expected)
-
-
-def test_dataframe_reduction_error():
-    gdf = cudf.DataFrame(
-        {
-            "a": cudf.Series([1, 2, 3], dtype="float"),
-            "d": cudf.Series([10, 20, 30], dtype="timedelta64[ns]"),
-        }
-    )
-
-    with pytest.raises(TypeError):
-        gdf.sum()
-
-
-def test_dataframe_from_generator():
-    pdf = pd.DataFrame((i for i in range(5)))
-    gdf = cudf.DataFrame((i for i in range(5)))
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize("name", ["a", 0, None, np.nan, cudf.NA])
-@pytest.mark.parametrize("contains", ["a", 0, None, np.nan, cudf.NA])
-@pytest.mark.parametrize("other_names", [[], ["b", "c"], [1, 2]])
-def test_dataframe_contains(name, contains, other_names):
-    column_names = [name, *other_names]
-    gdf = cudf.DataFrame({c: [0] for c in column_names})
-    pdf = pd.DataFrame({c: [0] for c in column_names})
-
-    assert_eq(gdf, pdf)
-
-    if contains is cudf.NA or name is cudf.NA:
-        expectation = contains is cudf.NA and name is cudf.NA
-        assert (contains in pdf) == expectation
-        assert (contains in gdf) == expectation
-    elif gdf.columns.dtype.kind == "f":
-        # In some cases, the columns are converted to an Index[float] based on
-        # the other column names. That casts name values from None to np.nan.
-        expectation = contains is np.nan and (name is None or name is np.nan)
-        assert (contains in pdf) == expectation
-        assert (contains in gdf) == expectation
-    else:
-        expectation = contains == name or (
-            contains is np.nan and name is np.nan
-        )
-        assert (contains in pdf) == expectation
-        assert (contains in gdf) == expectation
-
-    assert (contains in pdf) == (contains in gdf)
-
-
-def test_dataframe_series_dot():
-    pser = pd.Series(range(2))
-    gser = cudf.from_pandas(pser)
-
-    expected = pser @ pser
-    actual = gser @ gser
-
-    assert_eq(expected, actual)
-
-    pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("ab"))
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pser @ pdf
-    actual = gser @ gdf
-
-    assert_eq(expected, actual)
-
-    assert_exceptions_equal(
-        lfunc=pdf.dot,
-        rfunc=gdf.dot,
-        lfunc_args_and_kwargs=([pser], {}),
-        rfunc_args_and_kwargs=([gser], {}),
-    )
-
-    assert_exceptions_equal(
-        lfunc=pdf.dot,
-        rfunc=gdf.dot,
-        lfunc_args_and_kwargs=([pdf], {}),
-        rfunc_args_and_kwargs=([gdf], {}),
-    )
-
-    pser = pd.Series(range(2), index=["a", "k"])
-    gser = cudf.from_pandas(pser)
-
-    pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("ab"), index=["a", "k"])
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pser @ pdf
-    actual = gser @ gdf
-
-    assert_eq(expected, actual)
-
-    actual = gdf @ [2, 3]
-    expected = pdf @ [2, 3]
-
-    assert_eq(expected, actual)
-
-    actual = pser @ [12, 13]
-    expected = gser @ [12, 13]
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_reindex_keep_colname():
-    gdf = cudf.DataFrame([1], columns=cudf.Index([1], name="foo"))
-    result = gdf.reindex(index=[0, 1])
-    expected = cudf.DataFrame(
-        [1, None], columns=cudf.Index([1], name="foo"), index=[0, 1]
-    )
-    assert_eq(result, expected)
-
-
-def test_dataframe_duplicate_index_reindex():
-    gdf = cudf.DataFrame({"a": [0, 1, 2, 3]}, index=[0, 0, 1, 1])
-    pdf = gdf.to_pandas()
-
-    assert_exceptions_equal(
-        gdf.reindex,
-        pdf.reindex,
-        lfunc_args_and_kwargs=([10, 11, 12, 13], {}),
-        rfunc_args_and_kwargs=([10, 11, 12, 13], {}),
-    )
-
-
-@pytest.mark.parametrize(
-    "expected",
-    [
-        pd.RangeIndex(1, 2, name="a"),
-        pd.Index([1], dtype=np.int8, name="a"),
-        pd.MultiIndex.from_arrays([[1]], names=["a"]),
-    ],
-)
-@pytest.mark.parametrize("binop", [lambda df: df == df, lambda df: df - 1])
-def test_dataframe_binop_preserves_column_metadata(expected, binop):
-    df = cudf.DataFrame([1], columns=expected)
-    result = binop(df).columns
-    pd.testing.assert_index_equal(result, expected, exact=True)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-        {"1": 2},
-        [1],
-        decimal.Decimal("1.0"),
-    ],
-)
-def test_dataframe_to_pandas_arrow_type_nullable_raises(scalar):
-    pa_array = pa.array([scalar, None])
-    df = cudf.DataFrame({"a": pa_array})
-    with pytest.raises(ValueError):
-        df.to_pandas(nullable=True, arrow_type=True)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-        {"1": 2},
-        [1],
-        decimal.Decimal("1.0"),
-    ],
-)
-def test_dataframe_to_pandas_arrow_type(scalar):
-    pa_array = pa.array([scalar, None])
-    df = cudf.DataFrame({"a": pa_array})
-    result = df.to_pandas(arrow_type=True)
-    expected = pd.DataFrame({"a": pd.arrays.ArrowExtensionArray(pa_array)})
-    pd.testing.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("dtype1", ["int16", "float32"])
-@pytest.mark.parametrize("dtype2", ["int16", "float32"])
-def test_dataframe_loc_int_float(dtype1, dtype2):
-    df = cudf.DataFrame(
-        {"a": [10, 11, 12, 13, 14]},
-        index=cudf.Index([1, 2, 3, 4, 5], dtype=dtype1),
-    )
-    pdf = df.to_pandas()
-
-    gidx = cudf.Index([2, 3, 4], dtype=dtype2)
-    pidx = gidx.to_pandas()
-
-    actual = df.loc[gidx]
-    expected = pdf.loc[pidx]
-
-    assert_eq(actual, expected, check_index_type=True, check_dtype=True)
-
-
-@pytest.mark.parametrize("names", [["abc", "def"], [1, 2], ["abc", 10]])
-def test_dataframe_multiindex_column_names(names):
-    arrays = [["A", "A", "B", "B"], ["one", "two", "one", "two"]]
-    tuples = list(zip(*arrays, strict=True))
-    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
-
-    pdf = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=index)
-    df = cudf.from_pandas(pdf)
-
-    assert_eq(df, pdf)
-    assert_eq(df.columns.names, pdf.columns.names)
-    pdf.columns.names = names
-    df.columns.names = names
-    assert_eq(df, pdf)
-    assert_eq(df.columns.names, pdf.columns.names)
-
-
-def test_roundtrip_dataframe_plc_table(na_data):
-    pdf = na_data
-    expect = cudf.DataFrame.from_pandas(pdf)
-    actual = cudf.DataFrame.from_pylibcudf(*expect.to_pylibcudf())
-    assert_eq(expect, actual)
-
-
-def test_dataframe_midx_columns_loc():
-    idx_1 = ["Hi", "Lo"]
-    idx_2 = ["I", "II", "III"]
-    idx = cudf.MultiIndex.from_product([idx_1, idx_2])
-
-    data_rand = (
-        np.random.default_rng(seed=0)
-        .uniform(0, 1, 3 * len(idx))
-        .reshape(3, -1)
-    )
-    df = cudf.DataFrame(data_rand, index=["A", "B", "C"], columns=idx)
-    pdf = df.to_pandas()
-
-    assert_eq(df.shape, pdf.shape)
-
-    expected = pdf.loc[["A", "B"]]
-    actual = df.loc[["A", "B"]]
-
-    assert_eq(expected, actual)
-    assert_eq(df, pdf)
-
-
-def test_rename_reset_label_dtype():
-    data = {1: [2]}
-    col_mapping = {1: "a"}
-    result = cudf.DataFrame(data).rename(columns=col_mapping)
-    expected = pd.DataFrame(data).rename(columns=col_mapping)
-    assert_eq(result, expected)
-
-
-def test_insert_reset_label_dtype():
-    result = cudf.DataFrame({1: [2]})
-    expected = pd.DataFrame({1: [2]})
-    result.insert(1, "a", [2])
-    expected.insert(1, "a", [2])
-    assert_eq(result, expected)
-
-
-def test_setitem_reset_label_dtype():
-    result = cudf.DataFrame({1: [2]})
-    expected = pd.DataFrame({1: [2]})
-    result["a"] = [2]
-    expected["a"] = [2]
-    assert_eq(result, expected)
-
-
-def test_dataframe_midx_cols_getitem():
-    df = cudf.DataFrame(
-        {
-            "a": ["a", "b", "c"],
-            "b": ["b", "", ""],
-            "c": [10, 11, 12],
-        }
-    )
-    df.columns = df.set_index(["a", "b"]).index
-    pdf = df.to_pandas()
-
-    expected = df["c"]
-    actual = pdf["c"]
-    assert_eq(expected, actual)
-    df = cudf.DataFrame(
-        [[1, 0], [0, 1]],
-        columns=[
-            ["foo", "foo"],
-            ["location", "location"],
-            ["x", "y"],
-        ],
-    )
-    df = df.assign(bools=cudf.Series([True, False], dtype="bool"))
-    assert_eq(df["bools"], df.to_pandas()["bools"])

From fe33fdbe2f454b53ebb2ac2d9b8d05a97d6f10d6 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 3 Sep 2025 20:06:12 -0500
Subject: [PATCH 252/366] use 'nvidia-ml-py' package for 'pynvml' module
 (#19862)

Contributes to https://github.com/rapidsai/build-infra/issues/293

The `pynvml` *package* (https://github.com/gpuopenanalytics/pynvml) provides 2 things:

* the `pynvml` **Python module**, transitively via a dependency on the `nvidia-ml-py` package
* the `pynvml_utils` Python module

This project doesn't need the `pynvml_utils` Python module, so this PR proposes dropping the dependency on the `pynvml` package in favor of `nvidia-ml-py`.

## Notes for Reviewers

#

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19862
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 2 +-
 conda/environments/all_cuda-130_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-130_arch-x86_64.yaml  | 2 +-
 conda/recipes/dask-cudf/recipe.yaml               | 4 +++-
 dependencies.yaml                                 | 4 +++-
 python/dask_cudf/pyproject.toml                   | 2 +-
 7 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index a4a81ebaa66..03505a99545 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -60,6 +60,7 @@ dependencies:
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
+- nvidia-ml-py>=12.560.30
 - nvtx>=0.2.1
 - openpyxl
 - packaging
@@ -70,7 +71,6 @@ dependencies:
 - pre-commit
 - pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
-- pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
 - pytest-cases>=3.8.2
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 0fa1e5f3ddc..63ec249d456 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -61,6 +61,7 @@ dependencies:
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
+- nvidia-ml-py>=12.560.30
 - nvtx>=0.2.1
 - openpyxl
 - packaging
@@ -71,7 +72,6 @@ dependencies:
 - pre-commit
 - pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
-- pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
 - pytest-cases>=3.8.2
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index c64e9000a08..454a4053fdf 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -60,6 +60,7 @@ dependencies:
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
+- nvidia-ml-py>=12.560.30
 - nvtx>=0.2.1
 - openpyxl
 - packaging
@@ -70,7 +71,6 @@ dependencies:
 - pre-commit
 - pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
-- pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
 - pytest-cases>=3.8.2
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 7ae24161e60..580fc9e4b1b 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -61,6 +61,7 @@ dependencies:
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvidia-ml-py
+- nvidia-ml-py>=12.560.30
 - nvtx>=0.2.1
 - openpyxl
 - packaging
@@ -71,7 +72,6 @@ dependencies:
 - pre-commit
 - pyarrow>=15.0.0
 - pydata-sphinx-theme>=0.15.4
-- pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
 - pytest-cases>=3.8.2
diff --git a/conda/recipes/dask-cudf/recipe.yaml b/conda/recipes/dask-cudf/recipe.yaml
index fc9e20f4192..077195dfb97 100644
--- a/conda/recipes/dask-cudf/recipe.yaml
+++ b/conda/recipes/dask-cudf/recipe.yaml
@@ -34,7 +34,9 @@ requirements:
   run:
     - python
     - cudf =${{ version }}
-    - pynvml >=12.0.0,<13.0.0a0
+    # 'nvidia-ml-py' provides the 'pynvml' module, since v12.560.30
+    # ref: https://github.com/conda-forge/nvidia-ml-py-feedstock/pull/24
+    - nvidia-ml-py>=12.560.30
     - rapids-dask-dependency =${{ minor_version }}
     - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
 
diff --git a/dependencies.yaml b/dependencies.yaml
index 67e627aa718..fdb33201aa3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -734,7 +734,9 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - pynvml>=12.0.0,<13.0.0a0
+          # 'nvidia-ml-py' provides the 'pynvml' module, since v12.560.30
+          # ref: https://github.com/conda-forge/nvidia-ml-py-feedstock/pull/24
+          - nvidia-ml-py>=12.560.30
           - rapids-dask-dependency==25.10.*,>=0.0.0a0
   run_custreamz:
     common:
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 3b5f7a1d2e7..ef0a7d9376f 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -23,8 +23,8 @@ dependencies = [
     "cupy-cuda13x>=13.6.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
+    "nvidia-ml-py>=12.560.30",
     "pandas>=2.0,<2.4.0dev0",
-    "pynvml>=12.0.0,<13.0.0a0",
     "rapids-dask-dependency==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

From 86509db03cb17e95c0011c910db16e3bf29e6d87 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 Sep 2025 18:10:55 -0700
Subject: [PATCH 253/366] Move test_categorical/dask/serialize.py to new cuDF
 classic test directory structure (#19877)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Also conslidates `test_version.py` into `test_api.py`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19877
---
 python/cudf/cudf/tests/dask/__init__.py       |   0
 .../test_dataframe_utils.py}                  |   0
 .../cudf/tests/{ => dask}/test_serialize.py   |  35 +-
 .../tests/dataframe/methods/test_assign.py    |  47 +
 .../cudf/tests/dataframe/methods/test_copy.py |  24 +-
 .../tests/dataframe/methods/test_set_index.py |  15 +
 .../dataframe/methods/test_sort_index.py      |  14 +
 .../tests/dtypes/test_categoricaldtype.py     |  29 +
 python/cudf/cudf/tests/groupby/test_fillna.py |  21 +
 .../categoricalindex/methods/__init__.py      |   0
 .../methods/test_add_categories.py            |  15 +
 .../methods/test_as_ordered.py                |  18 +
 .../methods/test_remove_categories.py         |  15 +
 .../methods/test_reorder_categories.py        |  17 +
 .../methods/test_set_categories.py            |  17 +
 .../categoricalindex/test_attributes.py       |  23 +-
 .../tests/indexes/index/test_constructor.py   |  10 +
 .../cudf/tests/series/accessors/test_cat.py   | 349 +++++++
 .../tests/series/indexing/test_getitem.py     |  27 +
 .../tests/series/indexing/test_setitem.py     |  12 +
 .../cudf/tests/series/methods/test_astype.py  |  73 ++
 .../cudf/tests/series/methods/test_nunique.py |  21 +-
 .../tests/series/methods/test_reductions.py   |  19 +
 .../cudf/tests/series/methods/test_unique.py  |  23 +-
 python/cudf/cudf/tests/series/test_binops.py  |  26 +
 .../cudf/tests/series/test_constructors.py    |  78 ++
 python/cudf/cudf/tests/test_api.py            |   9 +
 python/cudf/cudf/tests/test_categorical.py    | 895 ------------------
 python/cudf/cudf/tests/test_version.py        |  12 -
 29 files changed, 916 insertions(+), 928 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dask/__init__.py
 rename python/cudf/cudf/tests/{test_dask.py => dask/test_dataframe_utils.py} (100%)
 rename python/cudf/cudf/tests/{ => dask}/test_serialize.py (94%)
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/test_add_categories.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/test_as_ordered.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/test_remove_categories.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/test_reorder_categories.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/test_set_categories.py
 create mode 100644 python/cudf/cudf/tests/series/accessors/test_cat.py
 delete mode 100644 python/cudf/cudf/tests/test_categorical.py
 delete mode 100644 python/cudf/cudf/tests/test_version.py

diff --git a/python/cudf/cudf/tests/dask/__init__.py b/python/cudf/cudf/tests/dask/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/test_dask.py b/python/cudf/cudf/tests/dask/test_dataframe_utils.py
similarity index 100%
rename from python/cudf/cudf/tests/test_dask.py
rename to python/cudf/cudf/tests/dask/test_dataframe_utils.py
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/dask/test_serialize.py
similarity index 94%
rename from python/cudf/cudf/tests/test_serialize.py
rename to python/cudf/cudf/tests/dask/test_serialize.py
index 59609158728..132ac6d5006 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/dask/test_serialize.py
@@ -6,12 +6,12 @@
 import msgpack
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 from packaging import version
 
 import cudf
-from cudf.core.column import as_column
-from cudf.testing import _utils as utils, assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
@@ -230,14 +230,7 @@ def test_serialize_multi_index():
 
 
 def test_serialize_masked_series():
-    nelem = 50
-    rng = np.random.default_rng(seed=0)
-    data = rng.random(nelem)
-    mask = utils.random_bitmask(nelem)
-    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
-    null_count = utils.count_zero(bitmask)
-    assert null_count >= 0
-    sr = cudf.Series._from_column(as_column(data).set_mask(mask))
+    sr = cudf.Series(pa.array([1, None, 2]))
     outsr = cudf.Series.deserialize(*sr.serialize())
     assert_eq(sr, outsr)
 
@@ -470,3 +463,25 @@ def test_serialize_decimal_columns(data):
     )
     recreated = df.__class__.deserialize(*df.serialize())
     assert_eq(recreated, df)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": pd.Series(["a", "b", "c", "a", "c", "b"]).astype("category")},
+        {
+            "a": pd.Series(["a", "a", "b", "b"]).astype("category"),
+            "b": pd.Series(["b", "b", "c", "c"]).astype("category"),
+            "c": pd.Series(["c", "c", "a", "a"]).astype("category"),
+        },
+        {
+            "a": pd.Series(["a", None, "b", "b"]).astype("category"),
+            "b": pd.Series(["b", "b", None, "c"]).astype("category"),
+            "c": pd.Series(["c", "c", "a", None]).astype("category"),
+        },
+    ],
+)
+def test_serialize_categorical_columns(data):
+    df = cudf.DataFrame(data)
+    recreated = df.__class__.deserialize(*df.serialize())
+    assert_eq(recreated, df)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_assign.py b/python/cudf/cudf/tests/dataframe/methods/test_assign.py
index 9cbd740a4ef..02159a31e63 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_assign.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_assign.py
@@ -34,3 +34,50 @@ def test_assign_callable(mapping):
     expect = df.assign(**mapping)
     actual = cdf.assign(**mapping)
     assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4],
+        ["a", "1", "2", "1", "a"],
+        pd.Series(["a", "1", "22", "1", "aa"]),
+        pd.Series(["a", "1", "22", "1", "aa"], dtype="category"),
+        pd.Series([1, 2, 3, 4], dtype="int64"),
+        pd.Series([1, 2.3, 3, 4], dtype="float"),
+        [None, 1, None, 2, None],
+        ["a"],
+    ],
+)
+@pytest.mark.parametrize(
+    "categories",
+    [
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["a", "b", "c"],
+        ["22", "b", "c"],
+        ["a"],
+    ],
+)
+def test_categorical_assignment(data, categories):
+    cat_dtype = pd.CategoricalDtype(categories)
+    pd_df = pd.DataFrame({"a": np.ones(len(data))})
+    cd_df = cudf.from_pandas(pd_df)
+
+    pd_cat_series = pd.Series(data, dtype=cat_dtype)
+    # assign categorical series
+    pd_df.assign(cat_col=pd_cat_series)
+    cd_df.assign(cat_col=pd_cat_series)
+    assert_eq(pd_df, cd_df)
+
+    # assign categorical array
+    # needed for dask_cudf support for including file name
+    # as a categorical column
+    # see issue: https://github.com/rapidsai/cudf/issues/2269
+    pd_df = pd.DataFrame({"a": np.ones(len(data))})
+    cd_df = cudf.from_pandas(pd_df)
+
+    pd_categorical = pd.Categorical(data, dtype=cat_dtype)
+    pd_df.assign(cat_col=pd_categorical)
+    cd_df.assign(cat_col=pd_categorical)
+    assert_eq(pd_df, cd_df)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_copy.py b/python/cudf/cudf/tests/dataframe/methods/test_copy.py
index 7910451db57..787e2a7f0af 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_copy.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_copy.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 
-from cudf.core.dataframe import DataFrame
+import cudf
 from cudf.testing import assert_eq, assert_neq
 
 """
@@ -43,7 +43,7 @@ def test_dataframe_deep_copy(copy_fn):
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
     )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     copy_pdf = copy_fn(pdf)
     copy_gdf = copy_fn(gdf)
     copy_pdf["b"] = [0, 0, 0]
@@ -67,7 +67,7 @@ def test_cudf_dataframe_copy(copy_fn, ncols, all_supported_types_as_str):
             for i in range(ncols)
         }
     )
-    df = DataFrame.from_pandas(pdf)
+    df = cudf.DataFrame.from_pandas(pdf)
     copy_df = copy_fn(df)
     assert_eq(df, copy_df)
 
@@ -85,7 +85,7 @@ def test_cudf_dataframe_copy_then_insert(
             for i in range(ncols)
         }
     )
-    df = DataFrame.from_pandas(pdf)
+    df = cudf.DataFrame.from_pandas(pdf)
     copy_df = copy_fn(df)
     copy_pdf = copy_fn(pdf)
     copy_df["aa"] = pd.Series(rng.integers(0, 1000, 20)).astype(
@@ -102,7 +102,7 @@ def test_deep_copy_write_in_place():
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
     )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     cdf = gdf.copy(deep=True)
     sr = gdf["b"]
 
@@ -117,7 +117,7 @@ def test_shallow_copy_write_in_place():
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
     )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     cdf = gdf.copy(deep=False)
     sr = gdf["a"]
 
@@ -133,10 +133,20 @@ def test_dataframe_copy_shallow():
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
     )
-    gdf = DataFrame.from_pandas(pdf)
+    gdf = cudf.DataFrame.from_pandas(pdf)
     copy_pdf = pdf.copy(deep=False)
     copy_gdf = gdf.copy(deep=False)
     copy_pdf["b"] = [0, 0, 0]
     copy_gdf["b"] = [0, 0, 0]
     assert_eq(pdf["b"], copy_pdf["b"])
     assert_eq(gdf["b"], copy_gdf["b"])
+
+
+def test_categorical_dataframe_slice_copy():
+    pdf = pd.DataFrame({"g": pd.Series(["a", "b", "z"], dtype="category")})
+    gdf = cudf.from_pandas(pdf)
+
+    exp = pdf[1:].copy()
+    gdf = gdf[1:].copy()
+
+    assert_eq(exp, gdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_set_index.py b/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
index e1a99df4119..b9727941e0f 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
@@ -103,3 +103,18 @@ def test_set_index_multi(drop):
         df.set_index(["b", "d", "e"], drop=drop),
         gdf.set_index(["b", "d", "e"], drop=drop),
     )
+
+
+def test_df_cat_set_index():
+    df = cudf.DataFrame(
+        {
+            "a": pd.Categorical(list("aababcabbc"), categories=list("abc")),
+            "b": np.arange(10),
+        }
+    )
+    got = df.set_index("a")
+
+    pddf = df.to_pandas()
+    expect = pddf.set_index("a")
+
+    assert_eq(got, expect)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py b/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
index 905c4622ade..8e58755eff9 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
@@ -145,3 +145,17 @@ def test_sort_index_axis_1_ignore_index_true_columnaccessor_state_names():
     gdf = cudf.DataFrame([[1, 2, 3]], columns=["b", "a", "c"])
     result = gdf.sort_index(axis=1, ignore_index=True)
     assert result._data.names == tuple(result._data.keys())
+
+
+def test_df_cat_sort_index():
+    df = cudf.DataFrame(
+        {
+            "a": pd.Categorical(list("aababcabbc"), categories=list("abc")),
+            "b": np.arange(10),
+        }
+    )
+
+    got = df.set_index("a").sort_index()
+    expect = df.to_pandas().set_index("a").sort_index()
+
+    assert_eq(got, expect)
diff --git a/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py b/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py
index 93311fd3c02..3205f54dea8 100644
--- a/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py
+++ b/python/cudf/cudf/tests/dtypes/test_categoricaldtype.py
@@ -4,6 +4,7 @@
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
@@ -34,3 +35,31 @@ def test_cdf_to_pandas(data, categorical_ordered):
             categories=data, ordered=categorical_ordered
         ).to_pandas()
     )
+
+
+@pytest.mark.parametrize(
+    "categories",
+    [
+        [],
+        [1, 2, 3],
+        pd.Series(["a", "c", "b"], dtype="category"),
+        pd.Series([1, 2, 3, 4, -100], dtype="category"),
+    ],
+)
+def test_categorical_dtype(categories, categorical_ordered):
+    expected = pd.CategoricalDtype(
+        categories=categories, ordered=categorical_ordered
+    )
+    got = cudf.CategoricalDtype(
+        categories=categories, ordered=categorical_ordered
+    )
+    assert_eq(expected, got)
+
+    expected = pd.CategoricalDtype(categories=categories)
+    got = cudf.CategoricalDtype(categories=categories)
+    assert_eq(expected, got)
+
+
+def test_categorical_dtype_ordered_not_settable():
+    with pytest.raises(AttributeError):
+        cudf.CategoricalDtype().ordered = False
diff --git a/python/cudf/cudf/tests/groupby/test_fillna.py b/python/cudf/cudf/tests/groupby/test_fillna.py
index c8f45818357..7a3b85b66be 100644
--- a/python/cudf/cudf/tests/groupby/test_fillna.py
+++ b/python/cudf/cudf/tests/groupby/test_fillna.py
@@ -10,6 +10,7 @@
     PANDAS_VERSION,
 )
 from cudf.testing import assert_groupby_results_equal
+from cudf.testing._utils import assert_exceptions_equal
 from cudf.testing.dataset_generator import rand_dataframe
 
 
@@ -189,3 +190,23 @@ def test_groupby_fillna_method(method):
     assert_groupby_results_equal(
         expect[value_cols], got[value_cols], sort=False
     )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
+def test_cat_groupby_fillna():
+    ps = pd.Series(["a", "b", "c"], dtype="category")
+    gs = cudf.from_pandas(ps)
+
+    with pytest.warns(FutureWarning):
+        pg = ps.groupby(ps)
+    gg = gs.groupby(gs)
+
+    assert_exceptions_equal(
+        lfunc=pg.fillna,
+        rfunc=gg.fillna,
+        lfunc_args_and_kwargs=(("d",), {}),
+        rfunc_args_and_kwargs=(("d",), {}),
+    )
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/__init__.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_add_categories.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_add_categories.py
new file mode 100644
index 00000000000..09ae5580001
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_add_categories.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_add_categories():
+    pd_ci = pd.CategoricalIndex([1, 2, 3])
+    cudf_ci = cudf.from_pandas(pd_ci)
+
+    expected = pd_ci.add_categories([4])
+    result = cudf_ci.add_categories([4])
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_as_ordered.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_as_ordered.py
new file mode 100644
index 00000000000..1a327555913
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_as_ordered.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("method", ["as_ordered", "as_unordered"])
+@pytest.mark.parametrize("ordered", [True, False])
+def test_index_as_ordered(method, ordered):
+    pd_ci = pd.CategoricalIndex([1, 2, 3], ordered=ordered)
+    cudf_ci = cudf.from_pandas(pd_ci)
+
+    expected = getattr(pd_ci, method)()
+    result = getattr(cudf_ci, method)()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_remove_categories.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_remove_categories.py
new file mode 100644
index 00000000000..7457368cebf
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_remove_categories.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_remove_categories():
+    pd_ci = pd.CategoricalIndex([1, 2, 3], categories=[1, 2, 3, 4])
+    cudf_ci = cudf.from_pandas(pd_ci)
+
+    expected = pd_ci.remove_categories([4])
+    result = cudf_ci.remove_categories([4])
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_reorder_categories.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_reorder_categories.py
new file mode 100644
index 00000000000..b1c31be1232
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_reorder_categories.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_index_reorder_categories(ordered):
+    pd_ci = pd.CategoricalIndex([1, 2, 3], categories=[1, 3, 2, 4])
+    cudf_ci = cudf.from_pandas(pd_ci)
+
+    expected = pd_ci.reorder_categories([1, 2, 3, 4], ordered=ordered)
+    result = cudf_ci.reorder_categories([1, 2, 3, 4], ordered=ordered)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_set_categories.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_set_categories.py
new file mode 100644
index 00000000000..4616e666e71
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_set_categories.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_index_set_categories(ordered):
+    pd_ci = pd.CategoricalIndex([1, 2, 3])
+    cudf_ci = cudf.from_pandas(pd_ci)
+
+    expected = pd_ci.set_categories([1, 2, 3, 4], ordered=ordered)
+    result = cudf_ci.set_categories([1, 2, 3, 4], ordered=ordered)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py b/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py
index cfdf26877b6..d563bc332f6 100644
--- a/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/test_attributes.py
@@ -3,7 +3,8 @@
 import pandas as pd
 import pytest
 
-from cudf.core.index import CategoricalIndex
+import cudf
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
@@ -12,9 +13,27 @@
 def test_categorical_index_is_unique_monotonic(testlist):
     # Assuming unordered categorical data cannot be "monotonic"
     raw_cat = pd.Categorical(testlist, ordered=True)
-    index = CategoricalIndex(raw_cat)
+    index = cudf.CategoricalIndex(raw_cat)
     index_pd = pd.CategoricalIndex(raw_cat)
 
     assert index.is_unique == index_pd.is_unique
     assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
     assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
+
+
+@pytest.mark.parametrize(
+    "data", [["$ 1", "$ 2", "hello"], ["($) 1", "( 2", "hello", "^1$"]]
+)
+@pytest.mark.parametrize("value", ["$ 1", "hello", "$", "^1$"])
+def test_categorical_string_index_contains(data, value):
+    idx = cudf.CategoricalIndex(data)
+    pidx = idx.to_pandas()
+
+    assert_eq(value in idx, value in pidx)
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_index_ordered(ordered):
+    pd_ci = pd.CategoricalIndex([1, 2, 3], ordered=ordered)
+    cudf_ci = cudf.from_pandas(pd_ci)
+    assert pd_ci.ordered == cudf_ci.ordered
diff --git a/python/cudf/cudf/tests/indexes/index/test_constructor.py b/python/cudf/cudf/tests/indexes/index/test_constructor.py
index 6c8abdec247..a46c4fec49e 100644
--- a/python/cudf/cudf/tests/indexes/index/test_constructor.py
+++ b/python/cudf/cudf/tests/indexes/index/test_constructor.py
@@ -47,3 +47,13 @@ def test_infer_timedelta_index(data, timedelta_types_as_str):
     pdi = gdi.to_pandas()
 
     assert_eq(pdi, gdi)
+
+
+def test_categorical_index_with_dtype():
+    dtype = cudf.CategoricalDtype(categories=["a", "z", "c"])
+    gi = cudf.Index(["z", "c", "a"], dtype=dtype)
+    pi = pd.Index(["z", "c", "a"], dtype=dtype.to_pandas())
+
+    assert_eq(gi, pi)
+    assert_eq(gi.dtype, pi.dtype)
+    assert_eq(gi.dtype.categories, pi.dtype.categories)
diff --git a/python/cudf/cudf/tests/series/accessors/test_cat.py b/python/cudf/cudf/tests/series/accessors/test_cat.py
new file mode 100644
index 00000000000..50f01173e5a
--- /dev/null
+++ b/python/cudf/cudf/tests/series/accessors/test_cat.py
@@ -0,0 +1,349 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+from textwrap import dedent
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+def test_categorical_basic():
+    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
+    cudf_cat = cudf.Index(cat)
+    assert_eq(cat.codes, cudf_cat.codes.to_numpy())
+
+    pdsr = pd.Series(cat, index=["p", "q", "r", "s", "t"])
+    sr = cudf.Series(cat, index=["p", "q", "r", "s", "t"])
+    assert_eq(pdsr.cat.codes, sr.cat.codes, check_dtype=False)
+
+    # Test attributes
+    assert_eq(pdsr.cat.categories, sr.cat.categories)
+    assert pdsr.cat.ordered == sr.cat.ordered
+
+    np.testing.assert_array_equal(
+        pdsr.cat.codes.values, sr.cat.codes.to_numpy()
+    )
+
+    assert str(sr) == str(pdsr)
+
+
+def test_categorical_integer():
+    cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"])
+    pdsr = pd.Series(cat)
+    sr = cudf.Series(cat)
+    np.testing.assert_array_equal(
+        cat.codes, sr.cat.codes.astype(cat.codes.dtype).fillna(-1).to_numpy()
+    )
+    assert sr.null_count == 2
+
+    np.testing.assert_array_equal(
+        pdsr.cat.codes.values,
+        sr.cat.codes.astype(pdsr.cat.codes.dtype).fillna(-1).to_numpy(),
+    )
+
+    expect_str = dedent(
+        """\
+        0       a
+        1    <NA>
+        2    <NA>
+        3       c
+        4       a
+        dtype: category
+        Categories (3, object): ['a', 'b', 'c']"""
+    )
+    assert str(sr) == expect_str
+
+
+def test_categorical_element_indexing():
+    """
+    Element indexing to a cat column must give the underlying object
+    not the numerical index.
+    """
+    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
+    pdsr = pd.Series(cat)
+    sr = cudf.Series(cat)
+    assert_eq(pdsr, sr)
+    assert_eq(pdsr.cat.codes, sr.cat.codes, check_dtype=False)
+
+
+def test_categorical_empty():
+    cat = pd.Categorical([])
+    pdsr = pd.Series(cat)
+    sr = cudf.Series(cat)
+    np.testing.assert_array_equal(cat.codes, sr.cat.codes.to_numpy())
+
+    # Test attributes
+    assert_eq(pdsr.cat.categories, sr.cat.categories)
+    assert pdsr.cat.ordered == sr.cat.ordered
+
+    np.testing.assert_array_equal(
+        pdsr.cat.codes.values, sr.cat.codes.to_numpy()
+    )
+
+
+def test_categorical_set_categories():
+    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
+    psr = pd.Series(cat)
+    sr = cudf.Series.from_pandas(cat)
+
+    # adding category
+    expect = psr.cat.set_categories(["a", "b", "c", "d"])
+    got = sr.cat.set_categories(["a", "b", "c", "d"])
+    assert_eq(expect, got)
+
+    # removing category
+    expect = psr.cat.set_categories(["a", "b"])
+    got = sr.cat.set_categories(["a", "b"])
+    assert_eq(expect, got)
+
+
+def test_categorical_set_categories_preserves_order():
+    series = pd.Series([1, 0, 0, 0, 2]).astype("category")
+    # reassigning categories should preserve element ordering
+    assert_eq(
+        series.cat.set_categories([1, 2]),
+        cudf.Series(series).cat.set_categories([1, 2]),
+    )
+
+
+def test_categorical_as_ordered():
+    categories = list("abc")
+    codes = [0, 0, 1, 0, 1, 2, 0, 1, 1, 2]
+    pd_str_cat = pd.Categorical.from_codes(codes, categories=categories)
+    pd_sr = pd.Series(pd_str_cat.set_ordered(False))
+    cd_sr = cudf.Series(pd_str_cat.set_ordered(False))
+
+    assert cd_sr.cat.ordered is False
+    assert cd_sr.cat.ordered == pd_sr.cat.ordered
+
+    pd_sr_1 = pd_sr.cat.as_ordered()
+    cd_sr_1 = cd_sr.cat.as_ordered()
+
+    assert cd_sr_1.cat.ordered is True
+    assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
+    assert str(cd_sr_1) == str(pd_sr_1)
+
+
+def test_categorical_as_unordered():
+    categories = list("abc")
+    codes = [0, 0, 1, 0, 1, 2, 0, 1, 1, 2]
+    pd_str_cat = pd.Categorical.from_codes(codes, categories=categories)
+    pd_sr = pd.Series(pd_str_cat.set_ordered(True))
+    cd_sr = cudf.Series(pd_str_cat.set_ordered(True))
+
+    assert cd_sr.cat.ordered is True
+    assert cd_sr.cat.ordered == pd_sr.cat.ordered
+
+    pd_sr_1 = pd_sr.cat.as_unordered()
+    cd_sr_1 = cd_sr.cat.as_unordered()
+
+    assert cd_sr_1.cat.ordered is False
+    assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
+    assert str(cd_sr_1) == str(pd_sr_1)
+
+
+@pytest.mark.parametrize("from_ordered", [True, False])
+@pytest.mark.parametrize("to_ordered", [True, False])
+def test_categorical_reorder_categories(from_ordered, to_ordered):
+    categories = list("abc")
+    codes = [0, 0, 1, 0, 1, 2, 0, 1, 1, 2]
+    pd_str_cat = pd.Categorical.from_codes(codes, categories=categories)
+    pd_sr = pd.Series(pd_str_cat.set_ordered(from_ordered))
+    cd_sr = cudf.Series(pd_str_cat.set_ordered(from_ordered))
+
+    assert_eq(pd_sr, cd_sr)
+
+    assert str(pd_sr) == str(cd_sr)
+
+    pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), ordered=to_ordered)
+    cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), ordered=to_ordered)
+
+    assert_eq(pd_sr_1, cd_sr_1)
+
+    assert str(cd_sr_1) == str(pd_sr_1)
+
+
+def test_categorical_add_categories():
+    categories = list("abc")
+    codes = [0, 0, 1, 0, 1, 2, 0, 1, 1, 2]
+    pd_str_cat = pd.Categorical.from_codes(codes, categories=categories)
+    pd_sr = pd.Series(pd_str_cat)
+    cd_sr = cudf.Series(pd_str_cat)
+
+    assert_eq(pd_sr, cd_sr)
+
+    assert str(pd_sr) == str(cd_sr)
+
+    pd_sr_1 = pd_sr.cat.add_categories(["d"])
+    cd_sr_1 = cd_sr.cat.add_categories(["d"])
+
+    assert "d" in pd_sr_1.cat.categories.to_list()
+    assert "d" in cd_sr_1.cat.categories.to_pandas().to_list()
+
+    assert_eq(pd_sr_1, cd_sr_1)
+
+
+def test_categorical_remove_categories():
+    categories = list("abc")
+    codes = [0, 0, 1, 0, 1, 2, 0, 1, 1, 2]
+    pd_str_cat = pd.Categorical.from_codes(codes, categories=categories)
+    pd_sr = pd.Series(pd_str_cat)
+    cd_sr = cudf.Series(pd_str_cat)
+
+    assert_eq(pd_sr, cd_sr)
+
+    assert str(pd_sr) == str(cd_sr)
+
+    pd_sr_1 = pd_sr.cat.remove_categories(["a"])
+    cd_sr_1 = cd_sr.cat.remove_categories(["a"])
+
+    assert "a" not in pd_sr_1.cat.categories.to_list()
+    assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()
+
+    assert_eq(pd_sr_1, cd_sr_1)
+
+    # test using ordered operators
+    assert_exceptions_equal(
+        lfunc=cd_sr.to_pandas().cat.remove_categories,
+        rfunc=cd_sr.cat.remove_categories,
+        lfunc_args_and_kwargs=([["a", "d"]], {}),
+        rfunc_args_and_kwargs=([["a", "d"]], {}),
+    )
+
+
+@pytest.mark.filterwarnings("ignore:Can't safely cast column:UserWarning")
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series([1, 2, 3, 89]),
+        pd.Series(["a", "b", "c", "c", "b", "a", "b", "b"]),
+        pd.Series(["aa", "b", "c", "c", "bb", "bb", "a", "b", "b"]),
+        pd.Series([1, 2, 3, 89, None, np.nan, np.nan], dtype="float64"),
+        pd.Series([1, 2, 3, 89], dtype="float64"),
+        pd.Series([1, 2.5, 3.001, 89], dtype="float64"),
+        pd.Series([None, None, None]),
+        pd.Series([], dtype="float64"),
+    ],
+)
+@pytest.mark.parametrize(
+    "new_categories",
+    [
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["a", "b", "c"],
+        [],
+        pd.Series(["a", "b", "c"]),
+        pd.Series(["a", "b", "c"], dtype="category"),
+        pd.Series([-100, 10, 11, 0, 1, 2], dtype="category"),
+    ],
+)
+def test_categorical_set_categories_categoricals(data, new_categories):
+    pd_data = data.astype("category")
+    gd_data = cudf.from_pandas(pd_data)
+
+    expected = pd_data.cat.set_categories(new_categories=new_categories)
+    actual = gd_data.cat.set_categories(new_categories=new_categories)
+
+    assert_eq(expected, actual)
+
+    expected = pd_data.cat.set_categories(
+        new_categories=pd.Series(new_categories, dtype="category")
+    )
+    actual = gd_data.cat.set_categories(
+        new_categories=cudf.Series(new_categories, dtype="category")
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.filterwarnings("ignore:Can't safely cast column:UserWarning")
+@pytest.mark.parametrize(
+    "data,add",
+    [
+        ([1, 2, 3], [100, 11, 12]),
+        ([1, 2, 3], [0.01, 9.7, 15.0]),
+        ([0.0, 6.7, 10.0], [100, 11, 12]),
+        ([0.0, 6.7, 10.0], [0.01, 9.7, 15.0]),
+        (["a", "bd", "ef"], ["asdfsdf", "bddf", "eff"]),
+        ([1, 2, 3], []),
+        ([0.0, 6.7, 10.0], []),
+        (["a", "bd", "ef"], []),
+    ],
+)
+def test_add_categories(data, add):
+    pds = pd.Series(data, dtype="category")
+    gds = cudf.Series(data, dtype="category")
+
+    expected = pds.cat.add_categories(add)
+    actual = gds.cat.add_categories(add)
+
+    assert_eq(
+        expected.cat.codes, actual.cat.codes.astype(expected.cat.codes.dtype)
+    )
+
+    # Need to type-cast pandas object to str due to mixed-type
+    # support in "object"
+    assert_eq(
+        expected.cat.categories.astype("str")
+        if (expected.cat.categories.dtype == "object")
+        else expected.cat.categories,
+        actual.cat.categories,
+    )
+
+
+@pytest.mark.parametrize(
+    "data,add",
+    [
+        ([1, 2, 3], [1, 3, 11]),
+        ([0.0, 6.7, 10.0], [1, 2, 0.0]),
+        (["a", "bd", "ef"], ["a", "bd", "a"]),
+    ],
+)
+def test_add_categories_error(data, add):
+    pds = pd.Series(data, dtype="category")
+    gds = cudf.Series(data, dtype="category")
+
+    assert_exceptions_equal(
+        pds.cat.add_categories,
+        gds.cat.add_categories,
+        ([add],),
+        ([add],),
+    )
+
+
+def test_add_categories_mixed_error():
+    gds = cudf.Series(["a", "bd", "ef"], dtype="category")
+
+    with pytest.raises(TypeError):
+        gds.cat.add_categories([1, 2, 3])
+
+    gds = cudf.Series([1, 2, 3], dtype="category")
+
+    with pytest.raises(TypeError):
+        gds.cat.add_categories(["a", "bd", "ef"])
+
+
+def test_categorical_allow_nan():
+    gs = cudf.Series([1, 2, np.nan, 10, np.nan, None], nan_as_null=False)
+    gs = gs.astype("category")
+    expected_codes = cudf.Series([0, 1, 3, 2, 3, None], dtype="uint8")
+    assert_eq(expected_codes, gs.cat.codes)
+
+    expected_categories = cudf.Index([1.0, 2.0, 10.0, np.nan], dtype="float64")
+    assert_eq(expected_categories, gs.cat.categories)
+
+    actual_ps = gs.to_pandas()
+    expected_ps = pd.Series(
+        [1.0, 2.0, np.nan, 10.0, np.nan, np.nan], dtype="category"
+    )
+    assert_eq(actual_ps, expected_ps)
+
+
+def test_cat_iterate_error():
+    s = cudf.Series([1, 2, 3], dtype="category")
+    with pytest.raises(TypeError):
+        iter(s.cat)
diff --git a/python/cudf/cudf/tests/series/indexing/test_getitem.py b/python/cudf/cudf/tests/series/indexing/test_getitem.py
index 37e0c10618f..2909980bb0e 100644
--- a/python/cudf/cudf/tests/series/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_getitem.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
@@ -238,3 +239,29 @@ def test_string_slice_with_mask():
     assert_eq(actual._column.null_count, expected._column.null_count)
 
     assert_eq(actual, expected)
+
+
+def test_categorical_masking():
+    """
+    Test common operation for getting a all rows that matches a certain
+    category.
+    """
+    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
+    pdsr = pd.Series(cat)
+    sr = cudf.Series(cat)
+
+    # check scalar comparison
+    expect_matches = pdsr == "a"
+    got_matches = sr == "a"
+
+    np.testing.assert_array_equal(
+        expect_matches.values, got_matches.to_numpy()
+    )
+
+    # mask series
+    expect_masked = pdsr[expect_matches]
+    got_masked = sr[got_matches]
+
+    assert len(expect_masked) == len(got_masked)
+    assert got_masked.null_count == 0
+    assert_eq(got_masked, expect_masked)
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
index 2468477bdb7..5024adbe73f 100644
--- a/python/cudf/cudf/tests/series/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -850,3 +850,15 @@ def test_series_setitem_decimal(data, dtype, item, to, expect):
         expect = cudf.Series([decimal.Decimal(x) for x in expect], dtype=dtype)
         data[to] = item
         assert_eq(data, expect)
+
+
+def test_categorical_setitem_with_nan():
+    gs = cudf.Series(
+        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
+    ).astype("category")
+    gs[[1, 3]] = np.nan
+
+    expected_series = cudf.Series(
+        [1, np.nan, np.nan, np.nan, np.nan, None], nan_as_null=False
+    ).astype(gs.dtype)
+    assert_eq(gs, expected_series)
diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py
index 8d51d6226bc..daf5388e272 100644
--- a/python/cudf/cudf/tests/series/methods/test_astype.py
+++ b/python/cudf/cudf/tests/series/methods/test_astype.py
@@ -1246,3 +1246,76 @@ def test_typecast_from_decimal(precision, scale, signed_integer_types_as_str):
 
     assert_eq(got, expected)
     assert_eq(got.dtype, expected.dtype)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Series([1, 2, 3, 89]),
+        pd.Series([1, 2, 3, 89, 3, 1, 89], dtype="category"),
+        pd.Series(["1", "2", "3", "4", "5"], dtype="category"),
+        pd.Series(["1.0", "2.5", "3.001", "9"], dtype="category"),
+        pd.Series(["1", "2", "3", None, "4", "5"], dtype="category"),
+        pd.Series(["1.0", "2.5", "3.001", None, "9"], dtype="category"),
+        pd.Series(["a", "b", "c", "c", "b", "a", "b", "b"]),
+        pd.Series(["aa", "b", "c", "c", "bb", "bb", "a", "b", "b"]),
+        pd.Series([1, 2, 3, 89, None, np.nan, np.nan], dtype="float64"),
+        pd.Series([1, 2, 3, 89], dtype="float64"),
+        pd.Series([1, 2.5, 3.001, 89], dtype="float64"),
+        pd.Series([None, None, None]),
+        pd.Series([], dtype="float64"),
+    ],
+)
+@pytest.mark.parametrize(
+    "categories",
+    [
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["aa", "bb", "c"],
+        ["a", "bb", "c"],
+        ["a", "b", "c"],
+        ["1", "2", "3", "4"],
+        ["1.0", "2.5", "3.001", "9"],
+        [],
+    ],
+)
+def test_categorical_typecast(data, categories):
+    pd_data = data
+    gd_data = cudf.from_pandas(data)
+    cat_type = pd.CategoricalDtype(categories)
+
+    assert_eq(pd_data.astype(cat_type), gd_data.astype(cat_type))
+
+
+@pytest.mark.parametrize(
+    ("values", "expected"),
+    [
+        ([1], np.uint8),
+        ([1, None], np.uint8),
+        (np.arange(np.iinfo(np.int8).max), np.uint8),
+        (np.append(np.arange(np.iinfo(np.int8).max), [None]), np.uint8),
+        (np.arange(np.iinfo(np.int16).max), np.uint16),
+        (np.append(np.arange(np.iinfo(np.int16).max), [None]), np.uint16),
+        (np.arange(np.iinfo(np.uint8).max), np.uint8),
+        (np.append(np.arange(np.iinfo(np.uint8).max), [None]), np.uint8),
+        (np.arange(np.iinfo(np.uint16).max), np.uint16),
+        (np.append(np.arange(np.iinfo(np.uint16).max), [None]), np.uint16),
+    ],
+)
+def test_astype_dtype(values, expected):
+    data = cudf.Series(values)
+    got = data.astype("category").cat.codes.dtype
+    np.testing.assert_equal(got, expected)
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_empty_series_category_cast(ordered):
+    dtype = cudf.CategoricalDtype(ordered=ordered)
+    ps = pd.Series([], dtype="str")
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.astype(dtype.to_pandas())
+    actual = gs.astype(dtype)
+
+    assert_eq(expected, actual)
+    assert_eq(expected.dtype.ordered, actual.dtype.ordered)
diff --git a/python/cudf/cudf/tests/series/methods/test_nunique.py b/python/cudf/cudf/tests/series/methods/test_nunique.py
index c645fa401f4..3cdc8435f79 100644
--- a/python/cudf/cudf/tests/series/methods/test_nunique.py
+++ b/python/cudf/cudf/tests/series/methods/test_nunique.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
+import string
 
 import numpy as np
 import pandas as pd
@@ -50,3 +50,22 @@ def test_datetime_nunique(data, nulls):
     expected = psr.nunique()
     got = gsr.nunique()
     assert_eq(got, expected)
+
+
+def test_categorical_nunique():
+    nelem = 20
+    rng = np.random.default_rng(seed=0)
+    pd_cat = pd.Categorical(
+        pd.Series(
+            rng.choice(list(string.ascii_letters + string.digits), nelem),
+            dtype="category",
+        )
+    )
+
+    gser = cudf.Series(pd_cat)
+    gdf_unique_count = gser.nunique()
+
+    pser = pd.Series(pd_cat)
+    pdf_unique = pser.unique()
+
+    assert gdf_unique_count == len(pdf_unique)
diff --git a/python/cudf/cudf/tests/series/methods/test_reductions.py b/python/cudf/cudf/tests/series/methods/test_reductions.py
index 3b636476651..001563b34db 100644
--- a/python/cudf/cudf/tests/series/methods/test_reductions.py
+++ b/python/cudf/cudf/tests/series/methods/test_reductions.py
@@ -1039,3 +1039,22 @@ def test_datetime_reductions(data, reduction_methods, datetime_types_as_str):
         assert True
     else:
         assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("op", ["min", "max"])
+def test_categorical_maxima(op):
+    ser = cudf.Series(
+        ["a", "d", "c", "z", "g"],
+        dtype=cudf.CategoricalDtype(["z", "c", "g", "d", "a"], ordered=False),
+    )
+    assert not ser.cat.ordered
+
+    # Cannot get extrema of unordered Categorical column
+    with pytest.raises(TypeError, match="Categorical is not ordered"):
+        getattr(ser, op)()
+
+    # Max/min should work after converting to "ordered"
+    ser_pd = ser.to_pandas()
+    result = getattr(ser.cat.as_ordered(), op)()
+    result_pd = getattr(ser_pd.cat.as_ordered(), op)()
+    assert_eq(result, result_pd)
diff --git a/python/cudf/cudf/tests/series/methods/test_unique.py b/python/cudf/cudf/tests/series/methods/test_unique.py
index b87f1872157..bc9fe01cf2b 100644
--- a/python/cudf/cudf/tests/series/methods/test_unique.py
+++ b/python/cudf/cudf/tests/series/methods/test_unique.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
+import string
 
 import numpy as np
 import pandas as pd
@@ -96,3 +96,24 @@ def test_string_unique(item):
     # cudf returns a cudf.Series
     gres = gs.unique()
     assert_eq(pres, gres)
+
+
+def test_categorical_unique():
+    num_elements = 20
+    rng = np.random.default_rng(seed=12)
+    pd_cat = pd.Categorical(
+        pd.Series(
+            rng.choice(
+                list(string.ascii_letters + string.digits), num_elements
+            ),
+            dtype="category",
+        )
+    )
+
+    gser = cudf.Series(pd_cat)
+    gdf_unique_sorted = np.sort(gser.unique().to_pandas())
+
+    pser = pd.Series(pd_cat)
+    pdf_unique_sorted = np.sort(pser.unique())
+
+    np.testing.assert_array_equal(pdf_unique_sorted, gdf_unique_sorted)
diff --git a/python/cudf/cudf/tests/series/test_binops.py b/python/cudf/cudf/tests/series/test_binops.py
index d24dab0a012..222b9373dc9 100644
--- a/python/cudf/cudf/tests/series/test_binops.py
+++ b/python/cudf/cudf/tests/series/test_binops.py
@@ -856,3 +856,29 @@ def test_categorical_binary_add():
         lfunc_args_and_kwargs=([pdsr, pdsr],),
         rfunc_args_and_kwargs=([sr, sr],),
     )
+
+
+def test_cat_series_binop_error():
+    data_a = pd.Categorical(list("aababcabbc"), categories=list("abc"))
+    data_b = np.arange(len(data_a))
+
+    pd_ser_a = pd.Series(data_a)
+    pd_ser_b = pd.Series(data_b)
+    gdf_ser_a = cudf.Series(data_a)
+    gdf_ser_b = cudf.Series(data_b)
+
+    # lhs is categorical
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([pd_ser_a, pd_ser_b],),
+        rfunc_args_and_kwargs=([gdf_ser_a, gdf_ser_b],),
+    )
+
+    # lhs is numerical
+    assert_exceptions_equal(
+        lfunc=operator.add,
+        rfunc=operator.add,
+        lfunc_args_and_kwargs=([pd_ser_b, pd_ser_a],),
+        rfunc_args_and_kwargs=([gdf_ser_b, gdf_ser_a],),
+    )
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index 6ad6a1a41cb..1074006be2f 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -1423,3 +1423,81 @@ def test_from_pandas_obj_tz_aware_unsupported(klass):
     pandas_obj = getattr(pd, klass)(tz_aware_data)
     with pytest.raises(NotImplementedError):
         cudf.from_pandas(pandas_obj)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4],
+        ["a", "1", "2", "1", "a"],
+        pd.Series(["a", "1", "22", "1", "aa"]),
+        pd.Series(["a", "1", "22", "1", "aa"], dtype="category"),
+        pd.Series([1, 2, 3, -4], dtype="int64"),
+        pd.Series([1, 2, 3, 4], dtype="uint64"),
+        pd.Series([1, 2.3, 3, 4], dtype="float"),
+        np.asarray([0, 2, 1]),
+        [None, 1, None, 2, None],
+        [],
+    ],
+)
+@pytest.mark.parametrize(
+    "categories",
+    [
+        ["aa", "bb", "cc"],
+        [2, 4, 10, 100],
+        ["a", "b", "c"],
+        ["22", "b", "c"],
+        [],
+    ],
+)
+def test_categorical_creation(data, categories):
+    dtype = pd.CategoricalDtype(categories)
+    expected = pd.Series(data, dtype=dtype)
+    got = cudf.Series(data, dtype=dtype)
+    assert_eq(expected, got)
+
+    got = cudf.Series(data, dtype=cudf.from_pandas(dtype))
+    assert_eq(expected, got)
+
+    expected = pd.Series(data, dtype="category")
+    got = cudf.Series(data, dtype="category")
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
+def test_series_construction_with_nulls_as_category(
+    input_obj, all_supported_types_as_str
+):
+    if all_supported_types_as_str == "category":
+        pytest.skip(f"No {all_supported_types_as_str} scalar.")
+    if all_supported_types_as_str.startswith(
+        "datetime"
+    ) or all_supported_types_as_str.startswith("timedelta"):
+        pytest.skip("Test intended for numeric and string scalars.")
+    dtype = cudf.dtype(all_supported_types_as_str)
+    input_obj = [
+        dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
+    ]
+
+    expect = pd.Series(input_obj, dtype="category")
+    got = cudf.Series(input_obj, dtype="category")
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("scalar", [1, "a", None, 10.2])
+def test_cat_from_scalar(scalar):
+    ps = pd.Series(scalar, dtype="category")
+    gs = cudf.Series(scalar, dtype="category")
+
+    assert_eq(ps, gs)
+
+
+def test_categorical_interval_pandas_roundtrip():
+    expected = cudf.Series(cudf.interval_range(0, 5)).astype("category")
+    result = cudf.Series.from_pandas(expected.to_pandas())
+    assert_eq(result, expected)
+
+    expected = pd.Series(pd.interval_range(0, 5)).astype("category")
+    result = cudf.Series.from_pandas(expected).to_pandas()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_api.py b/python/cudf/cudf/tests/test_api.py
index a7d4b6f7cd3..44a5cc90da2 100644
--- a/python/cudf/cudf/tests/test_api.py
+++ b/python/cudf/cudf/tests/test_api.py
@@ -7,3 +7,12 @@ def test_toplevel_imports_matches_all_modules():
     all_objects = set(cudf.__all__)
     extras = dir_objects - all_objects
     assert not extras, f"{extras} not included in cudf.__all__"
+
+
+def test_version_constants_are_populated():
+    # __git_commit__ will only be non-empty in a built distribution
+    assert isinstance(cudf.__git_commit__, str)
+
+    # __version__ should always be non-empty
+    assert isinstance(cudf.__version__, str)
+    assert len(cudf.__version__) > 0
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
deleted file mode 100644
index bfa21b35bbd..00000000000
--- a/python/cudf/cudf/tests/test_categorical.py
+++ /dev/null
@@ -1,895 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import operator
-import string
-import warnings
-from contextlib import contextmanager
-from textwrap import dedent
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import assert_eq
-from cudf.testing._utils import NUMERIC_TYPES, assert_exceptions_equal
-
-
-@contextmanager
-def _hide_cudf_safe_casting_warning():
-    with warnings.catch_warnings():
-        warnings.filterwarnings(
-            "ignore",
-            "Can't safely cast column",
-            category=UserWarning,
-        )
-        yield
-
-
-@pytest.fixture
-def pd_str_cat():
-    categories = list("abc")
-    codes = [0, 0, 1, 0, 1, 2, 0, 1, 1, 2]
-    return pd.Categorical.from_codes(codes, categories=categories)
-
-
-def test_categorical_basic():
-    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    cudf_cat = cudf.Index(cat)
-    assert_eq(cat.codes, cudf_cat.codes.to_numpy())
-
-    pdsr = pd.Series(cat, index=["p", "q", "r", "s", "t"])
-    sr = cudf.Series(cat, index=["p", "q", "r", "s", "t"])
-    assert_eq(pdsr.cat.codes, sr.cat.codes, check_dtype=False)
-
-    # Test attributes
-    assert_eq(pdsr.cat.categories, sr.cat.categories)
-    assert pdsr.cat.ordered == sr.cat.ordered
-
-    np.testing.assert_array_equal(
-        pdsr.cat.codes.values, sr.cat.codes.to_numpy()
-    )
-
-    assert str(sr) == str(pdsr)
-
-
-def test_categorical_integer():
-    cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"])
-    pdsr = pd.Series(cat)
-    sr = cudf.Series(cat)
-    np.testing.assert_array_equal(
-        cat.codes, sr.cat.codes.astype(cat.codes.dtype).fillna(-1).to_numpy()
-    )
-    assert sr.null_count == 2
-
-    np.testing.assert_array_equal(
-        pdsr.cat.codes.values,
-        sr.cat.codes.astype(pdsr.cat.codes.dtype).fillna(-1).to_numpy(),
-    )
-
-    expect_str = dedent(
-        """\
-        0       a
-        1    <NA>
-        2    <NA>
-        3       c
-        4       a
-        dtype: category
-        Categories (3, object): ['a', 'b', 'c']"""
-    )
-    assert str(sr) == expect_str
-
-
-def test_categorical_compare_unordered():
-    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    pdsr = pd.Series(cat)
-
-    sr = cudf.Series(cat)
-
-    # test equal
-    out = sr == sr
-    assert out.dtype == np.bool_
-    assert type(out[0]) is np.bool_
-    assert np.all(out.to_numpy())
-    assert np.all(pdsr == pdsr)
-
-    # test inequality
-    out = sr != sr
-    assert not np.any(out.to_numpy())
-    assert not np.any(pdsr != pdsr)
-
-    assert not pdsr.cat.ordered
-    assert not sr.cat.ordered
-
-    # test using ordered operators
-    assert_exceptions_equal(
-        lfunc=operator.lt,
-        rfunc=operator.lt,
-        lfunc_args_and_kwargs=([pdsr, pdsr],),
-        rfunc_args_and_kwargs=([sr, sr],),
-    )
-
-
-def test_categorical_element_indexing():
-    """
-    Element indexing to a cat column must give the underlying object
-    not the numerical index.
-    """
-    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    pdsr = pd.Series(cat)
-    sr = cudf.Series(cat)
-    assert_eq(pdsr, sr)
-    assert_eq(pdsr.cat.codes, sr.cat.codes, check_dtype=False)
-
-
-def test_categorical_masking():
-    """
-    Test common operation for getting a all rows that matches a certain
-    category.
-    """
-    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    pdsr = pd.Series(cat)
-    sr = cudf.Series(cat)
-
-    # check scalar comparison
-    expect_matches = pdsr == "a"
-    got_matches = sr == "a"
-
-    np.testing.assert_array_equal(
-        expect_matches.values, got_matches.to_numpy()
-    )
-
-    # mask series
-    expect_masked = pdsr[expect_matches]
-    got_masked = sr[got_matches]
-
-    assert len(expect_masked) == len(got_masked)
-    assert got_masked.null_count == 0
-    assert_eq(got_masked, expect_masked)
-
-
-def test_df_cat_set_index():
-    df = cudf.DataFrame()
-    df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
-    df["b"] = np.arange(len(df))
-    got = df.set_index("a")
-
-    pddf = df.to_pandas()
-    expect = pddf.set_index("a")
-
-    assert_eq(got, expect)
-
-
-def test_df_cat_sort_index():
-    df = cudf.DataFrame()
-    df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
-    df["b"] = np.arange(len(df))
-
-    got = df.set_index("a").sort_index()
-    expect = df.to_pandas().set_index("a").sort_index()
-
-    assert_eq(got, expect)
-
-
-def test_cat_series_binop_error():
-    df = cudf.DataFrame()
-    df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
-    df["b"] = np.arange(len(df))
-
-    pdf = df.to_pandas()
-
-    # lhs is categorical
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([pdf["a"], pdf["b"]],),
-        rfunc_args_and_kwargs=([df["a"], df["b"]],),
-    )
-
-    # lhs is numerical
-    assert_exceptions_equal(
-        lfunc=operator.add,
-        rfunc=operator.add,
-        lfunc_args_and_kwargs=([pdf["b"], pdf["a"]],),
-        rfunc_args_and_kwargs=([df["b"], df["a"]],),
-    )
-
-
-def test_categorical_unique():
-    num_elements = 20
-    rng = np.random.default_rng(seed=12)
-    pd_cat = pd.Categorical(
-        pd.Series(
-            rng.choice(
-                list(string.ascii_letters + string.digits), num_elements
-            ),
-            dtype="category",
-        )
-    )
-
-    # gdf
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series.from_pandas(pd_cat)
-    gdf_unique_sorted = np.sort(gdf["a"].unique().to_pandas())
-
-    # pandas
-    pdf = pd.DataFrame()
-    pdf["a"] = pd_cat
-    pdf_unique_sorted = np.sort(pdf["a"].unique())
-
-    # verify
-    np.testing.assert_array_equal(pdf_unique_sorted, gdf_unique_sorted)
-
-
-def test_categorical_unique_count():
-    nelem = 20
-    rng = np.random.default_rng(seed=0)
-    pd_cat = pd.Categorical(
-        pd.Series(
-            rng.choice(list(string.ascii_letters + string.digits), nelem),
-            dtype="category",
-        )
-    )
-
-    # gdf
-    gdf = cudf.DataFrame()
-    gdf["a"] = cudf.Series.from_pandas(pd_cat)
-    gdf_unique_count = gdf["a"].nunique()
-
-    # pandas
-    pdf = pd.DataFrame()
-    pdf["a"] = pd_cat
-    pdf_unique = pdf["a"].unique()
-
-    # verify
-    assert gdf_unique_count == len(pdf_unique)
-
-
-def test_categorical_empty():
-    cat = pd.Categorical([])
-    pdsr = pd.Series(cat)
-    sr = cudf.Series(cat)
-    np.testing.assert_array_equal(cat.codes, sr.cat.codes.to_numpy())
-
-    # Test attributes
-    assert_eq(pdsr.cat.categories, sr.cat.categories)
-    assert pdsr.cat.ordered == sr.cat.ordered
-
-    np.testing.assert_array_equal(
-        pdsr.cat.codes.values, sr.cat.codes.to_numpy()
-    )
-
-
-def test_categorical_set_categories():
-    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
-    psr = pd.Series(cat)
-    sr = cudf.Series.from_pandas(cat)
-
-    # adding category
-    expect = psr.cat.set_categories(["a", "b", "c", "d"])
-    got = sr.cat.set_categories(["a", "b", "c", "d"])
-    assert_eq(expect, got)
-
-    # removing category
-    expect = psr.cat.set_categories(["a", "b"])
-    got = sr.cat.set_categories(["a", "b"])
-    assert_eq(expect, got)
-
-
-def test_categorical_set_categories_preserves_order():
-    series = pd.Series([1, 0, 0, 0, 2]).astype("category")
-    # reassigning categories should preserve element ordering
-    assert_eq(
-        series.cat.set_categories([1, 2]),
-        cudf.Series(series).cat.set_categories([1, 2]),
-    )
-
-
-def test_categorical_as_ordered(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat.set_ordered(False))
-    cd_sr = cudf.Series(pd_str_cat.set_ordered(False))
-
-    assert cd_sr.cat.ordered is False
-    assert cd_sr.cat.ordered == pd_sr.cat.ordered
-
-    pd_sr_1 = pd_sr.cat.as_ordered()
-    cd_sr_1 = cd_sr.cat.as_ordered()
-
-    assert cd_sr_1.cat.ordered is True
-    assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
-    assert str(cd_sr_1) == str(pd_sr_1)
-
-
-def test_categorical_as_unordered(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat.set_ordered(True))
-    cd_sr = cudf.Series(pd_str_cat.set_ordered(True))
-
-    assert cd_sr.cat.ordered is True
-    assert cd_sr.cat.ordered == pd_sr.cat.ordered
-
-    pd_sr_1 = pd_sr.cat.as_unordered()
-    cd_sr_1 = cd_sr.cat.as_unordered()
-
-    assert cd_sr_1.cat.ordered is False
-    assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
-    assert str(cd_sr_1) == str(pd_sr_1)
-
-
-@pytest.mark.parametrize("from_ordered", [True, False])
-@pytest.mark.parametrize("to_ordered", [True, False])
-def test_categorical_reorder_categories(pd_str_cat, from_ordered, to_ordered):
-    pd_sr = pd.Series(pd_str_cat.set_ordered(from_ordered))
-    cd_sr = cudf.Series(pd_str_cat.set_ordered(from_ordered))
-
-    assert_eq(pd_sr, cd_sr)
-
-    assert str(pd_sr) == str(cd_sr)
-
-    pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), ordered=to_ordered)
-    cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), ordered=to_ordered)
-
-    assert_eq(pd_sr_1, cd_sr_1)
-
-    assert str(cd_sr_1) == str(pd_sr_1)
-
-
-def test_categorical_add_categories(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat)
-    cd_sr = cudf.Series(pd_str_cat)
-
-    assert_eq(pd_sr, cd_sr)
-
-    assert str(pd_sr) == str(cd_sr)
-
-    pd_sr_1 = pd_sr.cat.add_categories(["d"])
-    cd_sr_1 = cd_sr.cat.add_categories(["d"])
-
-    assert "d" in pd_sr_1.cat.categories.to_list()
-    assert "d" in cd_sr_1.cat.categories.to_pandas().to_list()
-
-    assert_eq(pd_sr_1, cd_sr_1)
-
-
-def test_categorical_remove_categories(pd_str_cat):
-    pd_sr = pd.Series(pd_str_cat)
-    cd_sr = cudf.Series(pd_str_cat)
-
-    assert_eq(pd_sr, cd_sr)
-
-    assert str(pd_sr) == str(cd_sr)
-
-    pd_sr_1 = pd_sr.cat.remove_categories(["a"])
-    cd_sr_1 = cd_sr.cat.remove_categories(["a"])
-
-    assert "a" not in pd_sr_1.cat.categories.to_list()
-    assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()
-
-    assert_eq(pd_sr_1, cd_sr_1)
-
-    # test using ordered operators
-    assert_exceptions_equal(
-        lfunc=cd_sr.to_pandas().cat.remove_categories,
-        rfunc=cd_sr.cat.remove_categories,
-        lfunc_args_and_kwargs=([["a", "d"]], {}),
-        rfunc_args_and_kwargs=([["a", "d"]], {}),
-    )
-
-
-def test_categorical_dataframe_slice_copy():
-    pdf = pd.DataFrame({"g": pd.Series(["a", "b", "z"], dtype="category")})
-    gdf = cudf.from_pandas(pdf)
-
-    exp = pdf[1:].copy()
-    gdf = gdf[1:].copy()
-
-    assert_eq(exp, gdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([1, 2, 3, 89]),
-        pd.Series([1, 2, 3, 89, 3, 1, 89], dtype="category"),
-        pd.Series(["1", "2", "3", "4", "5"], dtype="category"),
-        pd.Series(["1.0", "2.5", "3.001", "9"], dtype="category"),
-        pd.Series(["1", "2", "3", None, "4", "5"], dtype="category"),
-        pd.Series(["1.0", "2.5", "3.001", None, "9"], dtype="category"),
-        pd.Series(["a", "b", "c", "c", "b", "a", "b", "b"]),
-        pd.Series(["aa", "b", "c", "c", "bb", "bb", "a", "b", "b"]),
-        pd.Series([1, 2, 3, 89, None, np.nan, np.nan], dtype="float64"),
-        pd.Series([1, 2, 3, 89], dtype="float64"),
-        pd.Series([1, 2.5, 3.001, 89], dtype="float64"),
-        pd.Series([None, None, None]),
-        pd.Series([], dtype="float64"),
-    ],
-)
-@pytest.mark.parametrize(
-    "categories",
-    [
-        ["aa", "bb", "cc"],
-        [2, 4, 10, 100],
-        ["aa", "bb", "c"],
-        ["a", "bb", "c"],
-        ["a", "b", "c"],
-        ["1", "2", "3", "4"],
-        ["1.0", "2.5", "3.001", "9"],
-        [],
-    ],
-)
-def test_categorical_typecast(data, categories):
-    pd_data = data
-    gd_data = cudf.from_pandas(data)
-    cat_type = pd.CategoricalDtype(categories)
-
-    assert_eq(pd_data.astype(cat_type), gd_data.astype(cat_type))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Series([1, 2, 3, 89]),
-        pd.Series(["a", "b", "c", "c", "b", "a", "b", "b"]),
-        pd.Series(["aa", "b", "c", "c", "bb", "bb", "a", "b", "b"]),
-        pd.Series([1, 2, 3, 89, None, np.nan, np.nan], dtype="float64"),
-        pd.Series([1, 2, 3, 89], dtype="float64"),
-        pd.Series([1, 2.5, 3.001, 89], dtype="float64"),
-        pd.Series([None, None, None]),
-        pd.Series([], dtype="float64"),
-    ],
-)
-@pytest.mark.parametrize(
-    "new_categories",
-    [
-        ["aa", "bb", "cc"],
-        [2, 4, 10, 100],
-        ["aa", "bb", "c"],
-        ["a", "bb", "c"],
-        ["a", "b", "c"],
-        [],
-        pd.Series(["a", "b", "c"]),
-        pd.Series(["a", "b", "c"], dtype="category"),
-        pd.Series([-100, 10, 11, 0, 1, 2], dtype="category"),
-    ],
-)
-def test_categorical_set_categories_categoricals(data, new_categories):
-    pd_data = data.astype("category")
-    gd_data = cudf.from_pandas(pd_data)
-
-    expected = pd_data.cat.set_categories(new_categories=new_categories)
-    with _hide_cudf_safe_casting_warning():
-        actual = gd_data.cat.set_categories(new_categories=new_categories)
-
-    assert_eq(expected, actual)
-
-    expected = pd_data.cat.set_categories(
-        new_categories=pd.Series(new_categories, dtype="category")
-    )
-    with _hide_cudf_safe_casting_warning():
-        actual = gd_data.cat.set_categories(
-            new_categories=cudf.Series(new_categories, dtype="category")
-        )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4],
-        ["a", "1", "2", "1", "a"],
-        pd.Series(["a", "1", "22", "1", "aa"]),
-        pd.Series(["a", "1", "22", "1", "aa"], dtype="category"),
-        pd.Series([1, 2, 3, -4], dtype="int64"),
-        pd.Series([1, 2, 3, 4], dtype="uint64"),
-        pd.Series([1, 2.3, 3, 4], dtype="float"),
-        np.asarray([0, 2, 1]),
-        [None, 1, None, 2, None],
-        [],
-    ],
-)
-@pytest.mark.parametrize(
-    "categories",
-    [
-        ["aa", "bb", "cc"],
-        [2, 4, 10, 100],
-        ["aa", "bb", "c"],
-        ["a", "bb", "c"],
-        ["a", "b", "c"],
-        ["22", "b", "c"],
-        [],
-    ],
-)
-def test_categorical_creation(data, categories):
-    dtype = pd.CategoricalDtype(categories)
-    expected = pd.Series(data, dtype=dtype)
-    got = cudf.Series(data, dtype=dtype)
-    assert_eq(expected, got)
-
-    got = cudf.Series(data, dtype=cudf.from_pandas(dtype))
-    assert_eq(expected, got)
-
-    expected = pd.Series(data, dtype="category")
-    got = cudf.Series(data, dtype="category")
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "categories",
-    [
-        [],
-        [1, 2, 3],
-        pd.Series(["a", "c", "b"], dtype="category"),
-        pd.Series([1, 2, 3, 4, -100], dtype="category"),
-    ],
-)
-@pytest.mark.parametrize("ordered", [True, False])
-def test_categorical_dtype(categories, ordered):
-    expected = pd.CategoricalDtype(categories=categories, ordered=ordered)
-    got = cudf.CategoricalDtype(categories=categories, ordered=ordered)
-    assert_eq(expected, got)
-
-    expected = pd.CategoricalDtype(categories=categories)
-    got = cudf.CategoricalDtype(categories=categories)
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    ("values", "expected"),
-    [
-        ([1], np.uint8),
-        ([1, None], np.uint8),
-        (np.arange(np.iinfo(np.int8).max), np.uint8),
-        (np.append(np.arange(np.iinfo(np.int8).max), [None]), np.uint8),
-        (np.arange(np.iinfo(np.int16).max), np.uint16),
-        (np.append(np.arange(np.iinfo(np.int16).max), [None]), np.uint16),
-        (np.arange(np.iinfo(np.uint8).max), np.uint8),
-        (np.append(np.arange(np.iinfo(np.uint8).max), [None]), np.uint8),
-        (np.arange(np.iinfo(np.uint16).max), np.uint16),
-        (np.append(np.arange(np.iinfo(np.uint16).max), [None]), np.uint16),
-    ],
-)
-def test_astype_dtype(values, expected):
-    data = cudf.Series(values)
-    got = data.astype("category").cat.codes.dtype
-    np.testing.assert_equal(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data,add",
-    [
-        ([1, 2, 3], [100, 11, 12]),
-        ([1, 2, 3], [0.01, 9.7, 15.0]),
-        ([0.0, 6.7, 10.0], [100, 11, 12]),
-        ([0.0, 6.7, 10.0], [0.01, 9.7, 15.0]),
-        (["a", "bd", "ef"], ["asdfsdf", "bddf", "eff"]),
-        ([1, 2, 3], []),
-        ([0.0, 6.7, 10.0], []),
-        (["a", "bd", "ef"], []),
-    ],
-)
-def test_add_categories(data, add):
-    pds = pd.Series(data, dtype="category")
-    gds = cudf.Series(data, dtype="category")
-
-    expected = pds.cat.add_categories(add)
-    with _hide_cudf_safe_casting_warning():
-        actual = gds.cat.add_categories(add)
-
-    assert_eq(
-        expected.cat.codes, actual.cat.codes.astype(expected.cat.codes.dtype)
-    )
-
-    # Need to type-cast pandas object to str due to mixed-type
-    # support in "object"
-    assert_eq(
-        expected.cat.categories.astype("str")
-        if (expected.cat.categories.dtype == "object")
-        else expected.cat.categories,
-        actual.cat.categories,
-    )
-
-
-@pytest.mark.parametrize(
-    "data,add",
-    [
-        ([1, 2, 3], [1, 3, 11]),
-        ([0.0, 6.7, 10.0], [1, 2, 0.0]),
-        (["a", "bd", "ef"], ["a", "bd", "a"]),
-    ],
-)
-def test_add_categories_error(data, add):
-    pds = pd.Series(data, dtype="category")
-    gds = cudf.Series(data, dtype="category")
-
-    assert_exceptions_equal(
-        pds.cat.add_categories,
-        gds.cat.add_categories,
-        ([add],),
-        ([add],),
-    )
-
-
-def test_add_categories_mixed_error():
-    gds = cudf.Series(["a", "bd", "ef"], dtype="category")
-
-    with pytest.raises(TypeError):
-        gds.cat.add_categories([1, 2, 3])
-
-    gds = cudf.Series([1, 2, 3], dtype="category")
-
-    with pytest.raises(TypeError):
-        gds.cat.add_categories(["a", "bd", "ef"])
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4],
-        ["a", "1", "2", "1", "a"],
-        pd.Series(["a", "1", "22", "1", "aa"]),
-        pd.Series(["a", "1", "22", "1", "aa"], dtype="category"),
-        pd.Series([1, 2, 3, 4], dtype="int64"),
-        pd.Series([1, 2.3, 3, 4], dtype="float"),
-        [None, 1, None, 2, None],
-        ["a"],
-    ],
-)
-@pytest.mark.parametrize(
-    "categories",
-    [
-        ["aa", "bb", "cc"],
-        [2, 4, 10, 100],
-        ["aa", "bb", "c"],
-        ["a", "bb", "c"],
-        ["a", "b", "c"],
-        ["22", "b", "c"],
-        ["a"],
-    ],
-)
-def test_categorical_assignment(data, categories):
-    cat_dtype = pd.CategoricalDtype(categories)
-    pd_df = pd.DataFrame()
-    pd_df["a"] = np.ones(len(data))
-    cd_df = cudf.from_pandas(pd_df)
-
-    pd_cat_series = pd.Series(data, dtype=cat_dtype)
-    # assign categorical series
-    pd_df.assign(cat_col=pd_cat_series)
-    cd_df.assign(cat_col=pd_cat_series)
-    assert_eq(pd_df, cd_df)
-
-    # assign categorical array
-    # needed for dask_cudf support for including file name
-    # as a categorical column
-    # see issue: https://github.com/rapidsai/cudf/issues/2269
-    pd_df = pd.DataFrame()
-    pd_df["a"] = np.ones(len(data))
-    cd_df = cudf.from_pandas(pd_df)
-
-    pd_categorical = pd.Categorical(data, dtype=cat_dtype)
-    pd_df.assign(cat_col=pd_categorical)
-    cd_df.assign(cat_col=pd_categorical)
-    assert_eq(pd_df, cd_df)
-
-
-def test_categorical_allow_nan():
-    gs = cudf.Series([1, 2, np.nan, 10, np.nan, None], nan_as_null=False)
-    gs = gs.astype("category")
-    expected_codes = cudf.Series([0, 1, 3, 2, 3, None], dtype="uint8")
-    assert_eq(expected_codes, gs.cat.codes)
-
-    expected_categories = cudf.Index([1.0, 2.0, 10.0, np.nan], dtype="float64")
-    assert_eq(expected_categories, gs.cat.categories)
-
-    actual_ps = gs.to_pandas()
-    expected_ps = pd.Series(
-        [1.0, 2.0, np.nan, 10.0, np.nan, np.nan], dtype="category"
-    )
-    assert_eq(actual_ps, expected_ps)
-
-
-def test_categorical_setitem_with_nan():
-    gs = cudf.Series(
-        [1, 2, np.nan, 10, np.nan, None], nan_as_null=False
-    ).astype("category")
-    gs[[1, 3]] = np.nan
-
-    expected_series = cudf.Series(
-        [1, np.nan, np.nan, np.nan, np.nan, None], nan_as_null=False
-    ).astype(gs.dtype)
-    assert_eq(gs, expected_series)
-
-
-@pytest.mark.parametrize("dtype", [*list(NUMERIC_TYPES), "object"])
-@pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
-def test_series_construction_with_nulls(input_obj, dtype):
-    dtype = cudf.dtype(dtype)
-    input_obj = [
-        dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
-    ]
-
-    expect = pd.Series(input_obj, dtype="category")
-    got = cudf.Series(input_obj, dtype="category").to_pandas()
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"a": pd.Series(["a", "b", "c", "a", "c", "b"]).astype("category")},
-        {
-            "a": pd.Series(["a", "a", "b", "b"]).astype("category"),
-            "b": pd.Series(["b", "b", "c", "c"]).astype("category"),
-            "c": pd.Series(["c", "c", "a", "a"]).astype("category"),
-        },
-        {
-            "a": pd.Series(["a", None, "b", "b"]).astype("category"),
-            "b": pd.Series(["b", "b", None, "c"]).astype("category"),
-            "c": pd.Series(["c", "c", "a", None]).astype("category"),
-        },
-    ],
-)
-def test_serialize_categorical_columns(data):
-    df = cudf.DataFrame(data)
-    recreated = df.__class__.deserialize(*df.serialize())
-    assert_eq(recreated, df)
-
-
-@pytest.mark.parametrize(
-    "data", [["$ 1", "$ 2", "hello"], ["($) 1", "( 2", "hello", "^1$"]]
-)
-@pytest.mark.parametrize("value", ["$ 1", "hello", "$", "^1$"])
-def test_categorical_string_index_contains(data, value):
-    idx = cudf.CategoricalIndex(data)
-    pidx = idx.to_pandas()
-
-    assert_eq(value in idx, value in pidx)
-
-
-def test_categorical_index_with_dtype():
-    dtype = cudf.CategoricalDtype(categories=["a", "z", "c"])
-    gi = cudf.Index(["z", "c", "a"], dtype=dtype)
-    pi = pd.Index(["z", "c", "a"], dtype=dtype.to_pandas())
-
-    assert_eq(gi, pi)
-    assert_eq(gi.dtype, pi.dtype)
-    assert_eq(gi.dtype.categories, pi.dtype.categories)
-
-
-def test_cat_iterate_error():
-    s = cudf.Series([1, 2, 3], dtype="category")
-    with pytest.raises(TypeError):
-        iter(s.cat)
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_empty_series_category_cast(ordered):
-    dtype = cudf.CategoricalDtype(ordered=ordered)
-    ps = pd.Series([], dtype="str")
-    gs = cudf.from_pandas(ps)
-
-    expected = ps.astype(dtype.to_pandas())
-    actual = gs.astype(dtype)
-
-    assert_eq(expected, actual)
-    assert_eq(expected.dtype.ordered, actual.dtype.ordered)
-
-
-def test_categorical_dtype_ordered_not_settable():
-    with pytest.raises(AttributeError):
-        cudf.CategoricalDtype().ordered = False
-
-
-@pytest.mark.parametrize("scalar", [1, "a", None, 10.2])
-def test_cat_from_scalar(scalar):
-    ps = pd.Series(scalar, dtype="category")
-    gs = cudf.Series(scalar, dtype="category")
-
-    assert_eq(ps, gs)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Does not warn on older versions of pandas",
-)
-def test_cat_groupby_fillna():
-    ps = pd.Series(["a", "b", "c"], dtype="category")
-    gs = cudf.from_pandas(ps)
-
-    with pytest.warns(FutureWarning):
-        pg = ps.groupby(ps)
-    gg = gs.groupby(gs)
-
-    assert_exceptions_equal(
-        lfunc=pg.fillna,
-        rfunc=gg.fillna,
-        lfunc_args_and_kwargs=(("d",), {}),
-        rfunc_args_and_kwargs=(("d",), {}),
-    )
-
-
-@pytest.mark.parametrize("op", ["min", "max"])
-def test_categorical_maxima(op):
-    ser = cudf.Series(
-        ["a", "d", "c", "z", "g"],
-        dtype=cudf.CategoricalDtype(["z", "c", "g", "d", "a"], ordered=False),
-    )
-    assert not ser.cat.ordered
-
-    # Cannot get extrema of unordered Categorical column
-    with pytest.raises(TypeError, match="Categorical is not ordered"):
-        getattr(ser, op)()
-
-    # Max/min should work after converting to "ordered"
-    ser_pd = ser.to_pandas()
-    result = getattr(ser.cat.as_ordered(), op)()
-    result_pd = getattr(ser_pd.cat.as_ordered(), op)()
-    assert_eq(result, result_pd)
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_index_ordered(ordered):
-    pd_ci = pd.CategoricalIndex([1, 2, 3], ordered=ordered)
-    cudf_ci = cudf.from_pandas(pd_ci)
-    assert pd_ci.ordered == cudf_ci.ordered
-
-
-@pytest.mark.parametrize("method", ["as_ordered", "as_unordered"])
-@pytest.mark.parametrize("ordered", [True, False])
-def test_index_as_ordered(method, ordered):
-    pd_ci = pd.CategoricalIndex([1, 2, 3], ordered=ordered)
-    cudf_ci = cudf.from_pandas(pd_ci)
-
-    expected = getattr(pd_ci, method)()
-    result = getattr(cudf_ci, method)()
-    assert_eq(result, expected)
-
-
-def test_index_add_categories():
-    pd_ci = pd.CategoricalIndex([1, 2, 3])
-    cudf_ci = cudf.from_pandas(pd_ci)
-
-    expected = pd_ci.add_categories([4])
-    result = cudf_ci.add_categories([4])
-    assert_eq(result, expected)
-
-
-def test_index_remove_categories():
-    pd_ci = pd.CategoricalIndex([1, 2, 3], categories=[1, 2, 3, 4])
-    cudf_ci = cudf.from_pandas(pd_ci)
-
-    expected = pd_ci.remove_categories([4])
-    result = cudf_ci.remove_categories([4])
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_index_reorder_categories(ordered):
-    pd_ci = pd.CategoricalIndex([1, 2, 3], categories=[1, 3, 2, 4])
-    cudf_ci = cudf.from_pandas(pd_ci)
-
-    expected = pd_ci.reorder_categories([1, 2, 3, 4], ordered=ordered)
-    result = cudf_ci.reorder_categories([1, 2, 3, 4], ordered=ordered)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_index_set_categories(ordered):
-    pd_ci = pd.CategoricalIndex([1, 2, 3])
-    cudf_ci = cudf.from_pandas(pd_ci)
-
-    expected = pd_ci.set_categories([1, 2, 3, 4], ordered=ordered)
-    result = cudf_ci.set_categories([1, 2, 3, 4], ordered=ordered)
-    assert_eq(result, expected)
-
-
-def test_categorical_interval_pandas_roundtrip():
-    expected = cudf.Series(cudf.interval_range(0, 5)).astype("category")
-    result = cudf.Series.from_pandas(expected.to_pandas())
-    assert_eq(result, expected)
-
-    expected = pd.Series(pd.interval_range(0, 5)).astype("category")
-    result = cudf.Series.from_pandas(expected).to_pandas()
-    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_version.py b/python/cudf/cudf/tests/test_version.py
deleted file mode 100644
index 8c10cc20a9a..00000000000
--- a/python/cudf/cudf/tests/test_version.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-import cudf
-
-
-def test_version_constants_are_populated():
-    # __git_commit__ will only be non-empty in a built distribution
-    assert isinstance(cudf.__git_commit__, str)
-
-    # __version__ should always be non-empty
-    assert isinstance(cudf.__version__, str)
-    assert len(cudf.__version__) > 0

From 54d0d15b6955c4c5f7b684dab055d92e45a14b5f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 Sep 2025 19:45:15 -0700
Subject: [PATCH 254/366] Move (most of) test_index.py to new cudf classic
 directory structure (#19696)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

The remaining are pending other tests being moved first so sub-directories/files are established

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19696
---
 python/cudf/cudf/tests/conftest.py            |   20 +
 .../dataframe/methods/test_sort_index.py      |   25 +-
 .../dataframe/methods/test_sort_values.py     |   10 +
 .../cudf/tests/dataframe/methods/test_take.py |   47 +
 .../cudf/tests/groupby/test_reductions.py     |  122 +-
 .../categoricalindex/methods/test_equals.py   |   71 +
 .../categoricalindex/test_constructors.py     |   55 +
 .../datetimeindex/indexing/__init__.py        |    0
 .../datetimeindex/indexing/test_getitem.py    |   16 +
 .../methods/test_ceil_floor_round.py          |   20 +
 .../datetimeindex/methods/test_repeat.py      |   19 +
 .../tests/indexes/index/indexing/__init__.py  |    0
 .../indexes/index/indexing/test_getitem.py    |   33 +
 .../indexes/index/indexing/test_setitem.py    |   16 +
 .../indexes/index/methods/test_any_all.py     |   12 +
 .../indexes/index/methods/test_append.py      |  323 ++
 .../indexes/index/methods/test_argsort.py     |   22 +
 .../indexes/index/methods/test_astype.py      |   18 +
 .../tests/indexes/index/methods/test_copy.py  |   50 +
 .../indexes/index/methods/test_difference.py  |  109 +
 .../index/methods/test_drop_duplicates.py     |   23 +
 .../indexes/index/methods/test_dropna.py      |    5 +
 .../indexes/index/methods/test_duplicated.py  |   26 +
 .../indexes/index/methods/test_equals.py      |  122 +
 .../indexes/index/methods/test_fillna.py      |   38 +
 .../index/methods/test_find_label_range.py    |   27 +
 .../indexes/index/methods/test_get_indexer.py |  158 +
 .../indexes/index/methods/test_get_loc.py     |   76 +
 .../index/methods/test_intersection.py        |   89 +
 .../tests/indexes/index/methods/test_isin.py  |   85 +
 .../index/methods/test_isna_notnull.py        |   15 +
 .../indexes/index/methods/test_reductions.py  |   23 +
 .../indexes/index/methods/test_rename.py      |   68 +
 .../indexes/index/methods/test_set_names.py   |   48 +
 .../indexes/index/methods/test_sort_values.py |   47 +
 .../indexes/index/methods/test_to_arrow.py    |   29 +
 .../indexes/index/methods/test_to_frame.py    |   24 +
 .../indexes/index/methods/test_to_pandas.py   |   77 +
 .../indexes/index/methods/test_to_series.py   |   22 +
 .../indexes/index/methods/test_tolist.py      |   22 +
 .../tests/indexes/index/methods/test_union.py |   72 +
 .../tests/indexes/index/methods/test_where.py |  190 +
 .../tests/indexes/index/test_attributes.py    |  150 +
 .../tests/indexes/index/test_constructor.py   |  274 ++
 .../multiindex/indexing/test_getitem.py       |   47 +
 .../indexes/multiindex/methods/test_append.py |   52 +
 .../multiindex/methods/test_get_indexer.py    |  128 +
 .../multiindex/methods/test_get_loc.py        |  141 +
 .../indexes/multiindex/methods/test_isin.py   |   85 +
 .../multiindex/methods/test_to_arrow.py       |   27 +
 .../indexes/multiindex/test_constructors.py   |   18 +
 .../indexes/rangeindex/indexing/__init__.py   |    0
 .../rangeindex/indexing/test_getitem.py       |   85 +
 .../rangeindex/methods/test_any_all.py        |   12 +
 .../indexes/rangeindex/methods/test_append.py |   15 +
 .../indexes/rangeindex/methods/test_dropna.py |   12 +
 .../rangeindex/methods/test_factorize.py      |   15 +
 .../methods/test_find_label_range.py          |   23 +
 .../rangeindex/methods/test_get_indexer.py    |   39 +
 .../rangeindex/methods/test_get_loc.py        |   32 +
 .../rangeindex/methods/test_intersection.py   |   19 +
 .../indexes/rangeindex/methods/test_join.py   |   19 +
 .../rangeindex/methods/test_nunique.py        |   33 +
 .../indexes/rangeindex/methods/test_rename.py |   15 +
 .../indexes/rangeindex/methods/test_repeat.py |   16 +
 .../rangeindex/methods/test_searchsorted.py   |   12 +
 .../indexes/rangeindex/methods/test_take.py   |   16 +
 .../indexes/rangeindex/methods/test_union.py  |   17 +
 .../indexes/rangeindex/methods/test_unique.py |   17 +
 .../indexes/rangeindex/methods/test_where.py  |   18 +
 .../indexes/rangeindex/test_attributes.py     |   32 +
 .../tests/indexes/rangeindex/test_binops.py   |   33 +
 .../indexes/rangeindex/test_constructors.py   |   40 +-
 .../cudf/tests/input_output/test_pickling.py  |   30 +
 .../cudf/tests/series/methods/test_take.py    |   37 +
 python/cudf/cudf/tests/test_index.py          | 3110 +----------------
 python/cudf/cudf/tests/test_indexing.py       |   82 -
 python/cudf/cudf/tests/test_multiindex.py     |  269 +-
 78 files changed, 3798 insertions(+), 3446 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_take.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/methods/test_equals.py
 create mode 100644 python/cudf/cudf/tests/indexes/categoricalindex/test_constructors.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/indexing/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/indexing/test_getitem.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/test_ceil_floor_round.py
 create mode 100644 python/cudf/cudf/tests/indexes/datetimeindex/methods/test_repeat.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/indexing/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/indexing/test_getitem.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/indexing/test_setitem.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_any_all.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_append.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_argsort.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_astype.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_copy.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_difference.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_drop_duplicates.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_duplicated.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_equals.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_fillna.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_find_label_range.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_get_indexer.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_get_loc.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_intersection.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_isin.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_isna_notnull.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_reductions.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_rename.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_set_names.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_sort_values.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_to_arrow.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_to_frame.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_to_pandas.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_to_series.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_tolist.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_union.py
 create mode 100644 python/cudf/cudf/tests/indexes/index/methods/test_where.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/indexing/test_getitem.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_append.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_get_indexer.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_get_loc.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_isin.py
 create mode 100644 python/cudf/cudf/tests/indexes/multiindex/methods/test_to_arrow.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/indexing/__init__.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/indexing/test_getitem.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_any_all.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_append.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_dropna.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_factorize.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_find_label_range.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_indexer.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_loc.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_intersection.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_join.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_repeat.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_searchsorted.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_take.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_union.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_unique.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/methods/test_where.py
 create mode 100644 python/cudf/cudf/tests/indexes/rangeindex/test_binops.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_take.py

diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 663e4dbd517..5d017f9e8ca 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -298,6 +298,14 @@ def signed_integer_types_as_str(request):
     return request.param
 
 
+@pytest.fixture(params=unsigned_integer_types)
+def unsigned_integer_types_as_str(request):
+    """
+    - "uint8", "uint16", "uint32", "uint64"
+    """
+    return request.param
+
+
 @pytest.fixture(params=signed_integer_types + unsigned_integer_types)
 def integer_types_as_str(request):
     """
@@ -486,6 +494,18 @@ def numpy_ufunc(request):
     return request.param
 
 
+@pytest.fixture(params=[True, False])
+def copy(request):
+    """Param for `copy` argument"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def deep(request):
+    """Param for `deep` argument"""
+    return request.param
+
+
 @pytest.fixture(params=[True, False])
 def dropna(request):
     """Param for `dropna` argument"""
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py b/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
index 8e58755eff9..73e7bd184d8 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sort_index.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
-
+import itertools
 
 import numpy as np
 import pandas as pd
@@ -147,6 +147,29 @@ def test_sort_index_axis_1_ignore_index_true_columnaccessor_state_names():
     assert result._data.names == tuple(result._data.keys())
 
 
+@pytest.mark.parametrize(
+    "levels",
+    itertools.chain.from_iterable(
+        itertools.permutations(range(3), n) for n in range(1, 4)
+    ),
+    ids=str,
+)
+def test_multiindex_sort_index_partial(levels):
+    df = pd.DataFrame(
+        {
+            "a": [3, 3, 3, 1, 1, 1, 2, 2],
+            "b": [4, 2, 7, -1, 11, -2, 7, 7],
+            "c": [4, 4, 2, 3, 3, 3, 1, 1],
+            "val": [1, 2, 3, 4, 5, 6, 7, 8],
+        }
+    ).set_index(["a", "b", "c"])
+    cdf = cudf.from_pandas(df)
+
+    expect = df.sort_index(level=levels, sort_remaining=True)
+    got = cdf.sort_index(level=levels, sort_remaining=True)
+    assert_eq(expect, got)
+
+
 def test_df_cat_sort_index():
     df = cudf.DataFrame(
         {
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py b/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
index 2a7b53d94cb..dedc30feeb2 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_sort_values.py
@@ -218,3 +218,13 @@ def test_sort_values_datetime():
     s_gdf = gdf.sort_values(by="date")
 
     assert_eq(s_df, s_gdf)
+
+
+def test_dataframe_loc_duplicate_index_scalar():
+    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=[1, 2, 1, 4, 2])
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pdf_sorted = pdf.sort_values(by=list(pdf.columns), axis=0)
+    gdf_sorted = gdf.sort_values(by=list(gdf.columns), axis=0)
+
+    assert_eq(pdf_sorted, gdf_sorted)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_take.py b/python/cudf/cudf/tests/dataframe/methods/test_take.py
new file mode 100644
index 00000000000..35d6f78487a
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_take.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("ntake", [0, 1, 123, 122, 200])
+def test_dataframe_take(ntake):
+    rng = np.random.default_rng(seed=0)
+    nelem = 123
+    df = cudf.DataFrame(
+        {
+            "ii": rng.integers(0, 20, nelem),
+            "ff": rng.random(nelem),
+        }
+    )
+
+    take_indices = rng.integers(0, len(df), ntake)
+
+    actual = df.take(take_indices)
+    expected = df.to_pandas().take(take_indices)
+
+    assert actual.ii.null_count == 0
+    assert actual.ff.null_count == 0
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("ntake", [1, 2, 8, 9])
+def test_dataframe_take_with_multiindex(ntake):
+    rng = np.random.default_rng(seed=0)
+    df = cudf.DataFrame(
+        {"ii": rng.integers(0, 20, 9), "ff": rng.random(9)},
+        index=cudf.MultiIndex(
+            levels=[["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        ),
+    )
+
+    take_indices = rng.integers(0, len(df), ntake)
+
+    actual = df.take(take_indices)
+    expected = df.to_pandas().take(take_indices)
+
+    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/groupby/test_reductions.py b/python/cudf/cudf/tests/groupby/test_reductions.py
index f4d248f79c4..19ad845a435 100644
--- a/python/cudf/cudf/tests/groupby/test_reductions.py
+++ b/python/cudf/cudf/tests/groupby/test_reductions.py
@@ -9,7 +9,7 @@
     PANDAS_CURRENT_SUPPORTED_VERSION,
     PANDAS_VERSION,
 )
-from cudf.testing import assert_eq, assert_groupby_results_equal
+from cudf.testing import assert_eq, assert_groupby_results_equal, assert_neq
 from cudf.testing._utils import assert_exceptions_equal
 
 
@@ -987,6 +987,126 @@ def test_group_by_reduce_numeric_only(by, data, groupby_reduction_methods):
     assert_eq(expected, result)
 
 
+def test_multiindex_multiple_groupby():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": [4, 17, 4, 9, 5],
+            "b": [1, 4, 4, 3, 2],
+            "x": rng.normal(size=5),
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdg = pdf.groupby(["a", "b"], sort=True).sum()
+    gdg = gdf.groupby(["a", "b"], sort=True).sum()
+    assert_eq(pdg, gdg)
+    pdg = pdf.groupby(["a", "b"], sort=True).x.sum()
+    gdg = gdf.groupby(["a", "b"], sort=True).x.sum()
+    assert_eq(pdg, gdg)
+
+
+def test_multiindex_equality():
+    # mi made from groupby
+    # mi made manually to be identical
+    # are they equal?
+    gdf = cudf.DataFrame(
+        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [0, 1, 0, 1, 0]}
+    )
+    mi1 = gdf.groupby(["x", "y"], sort=True).mean().index
+    mi2 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    assert_eq(mi1, mi2)
+
+    # mi made from two groupbys, are they equal?
+    mi2 = gdf.groupby(["x", "y"], sort=True).max().index
+    assert_eq(mi1, mi2)
+
+    # mi made manually twice are they equal?
+    mi1 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    mi2 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    assert_eq(mi1, mi2)
+
+    # mi made from different groupbys are they not equal?
+    mi1 = gdf.groupby(["x", "y"]).mean().index
+    mi2 = gdf.groupby(["x", "z"]).mean().index
+    assert_neq(mi1, mi2)
+
+    # mi made from different manuals are they not equal?
+    mi1 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    mi2 = cudf.MultiIndex(
+        levels=[[0, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    assert_neq(mi1, mi2)
+
+
+def test_multiindex_equals():
+    # mi made from groupby
+    # mi made manually to be identical
+    # are they equal?
+    gdf = cudf.DataFrame(
+        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [0, 1, 0, 1, 0]}
+    )
+    mi1 = gdf.groupby(["x", "y"], sort=True).mean().index
+    mi2 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    assert_eq(mi1.equals(mi2), True)
+
+    # mi made from two groupbys, are they equal?
+    mi2 = gdf.groupby(["x", "y"], sort=True).max().index
+    assert_eq(mi1.equals(mi2), True)
+
+    # mi made manually twice are they equal?
+    mi1 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    mi2 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    assert_eq(mi1.equals(mi2), True)
+
+    # mi made from different groupbys are they not equal?
+    mi1 = gdf.groupby(["x", "y"], sort=True).mean().index
+    mi2 = gdf.groupby(["x", "z"], sort=True).mean().index
+    assert_eq(mi1.equals(mi2), False)
+
+    # mi made from different manuals are they not equal?
+    mi1 = cudf.MultiIndex(
+        levels=[[1, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    mi2 = cudf.MultiIndex(
+        levels=[[0, 3, 4, 5], [1, 2, 5]],
+        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
+        names=["x", "y"],
+    )
+    assert_eq(mi1.equals(mi2), False)
+
+
 @pytest.mark.parametrize(
     "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
 )
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_equals.py b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_equals.py
new file mode 100644
index 00000000000..4077e8611fe
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/methods/test_equals.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+def test_index_categories_equal(data, other):
+    pd_data = pd.Index(data).astype("category")
+    pd_other = pd.Index(other)
+
+    gd_data = cudf.Index(data).astype("category")
+    gd_other = cudf.Index(other)
+
+    expected = pd_data.equals(pd_other)
+    actual = gd_data.equals(gd_other)
+    assert_eq(expected, actual)
+
+    expected = pd_other.equals(pd_data)
+    actual = gd_other.equals(gd_data)
+    assert_eq(expected, actual)
+
+
+def test_index_equals_categories():
+    lhs = cudf.CategoricalIndex(
+        ["a", "b", "c", "b", "a"], categories=["a", "b", "c"]
+    )
+    rhs = cudf.CategoricalIndex(
+        ["a", "b", "c", "b", "a"], categories=["a", "b", "c", "_"]
+    )
+
+    got = lhs.equals(rhs)
+    expect = lhs.to_pandas().equals(rhs.to_pandas())
+
+    assert got == expect
diff --git a/python/cudf/cudf/tests/indexes/categoricalindex/test_constructors.py b/python/cudf/cudf/tests/indexes/categoricalindex/test_constructors.py
new file mode 100644
index 00000000000..e6124831550
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/categoricalindex/test_constructors.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
+@pytest.mark.parametrize("categories", [[1, 2], None])
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pd.CategoricalDtype([1, 2, 3], ordered=True),
+        pd.CategoricalDtype([1, 2, 3], ordered=False),
+        None,
+    ],
+)
+@pytest.mark.parametrize("ordered", [True, False])
+@pytest.mark.parametrize("name", [1, "a", None])
+def test_categorical_index_basic(data, categories, dtype, ordered, name):
+    # can't have both dtype and categories/ordered
+    if dtype is not None:
+        categories = None
+        ordered = None
+    pindex = pd.CategoricalIndex(
+        data=data,
+        categories=categories,
+        dtype=dtype,
+        ordered=ordered,
+        name=name,
+    )
+    gindex = cudf.CategoricalIndex(
+        data=data,
+        categories=categories,
+        dtype=dtype,
+        ordered=ordered,
+        name=name,
+    )
+
+    assert_eq(pindex, gindex)
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+@pytest.mark.parametrize("name", [None, "test"])
+def test_categoricalindex_from_codes(ordered, name):
+    codes = [0, 1, 2, 3, 4]
+    categories = ["a", "b", "c", "d", "e"]
+    result = cudf.CategoricalIndex.from_codes(codes, categories, ordered, name)
+    expected = pd.CategoricalIndex(
+        pd.Categorical.from_codes(codes, categories, ordered=ordered),
+        name=name,
+    )
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/indexing/__init__.py b/python/cudf/cudf/tests/indexes/datetimeindex/indexing/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/indexing/test_getitem.py b/python/cudf/cudf/tests/indexes/datetimeindex/indexing/test_getitem.py
new file mode 100644
index 00000000000..dc40612f6e6
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/indexing/test_getitem.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_getitem_time_duration(temporal_types_as_str):
+    gidx = cudf.Index([1, 2, 3, 4, None], dtype=temporal_types_as_str)
+    pidx = gidx.to_pandas()
+    with cudf.option_context("mode.pandas_compatible", True):
+        for i in range(len(gidx)):
+            if i == 4:
+                assert gidx[i] is pidx[i]
+            else:
+                assert_eq(gidx[i], pidx[i])
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_ceil_floor_round.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_ceil_floor_round.py
new file mode 100644
index 00000000000..b21e315502c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_ceil_floor_round.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
+)
+@pytest.mark.parametrize("method", ["ceil", "floor", "round"])
+def test_index_datetime_ceil(resolution, method):
+    cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
+    pidx = cuidx.to_pandas()
+
+    expected = getattr(pidx, method)(resolution)
+    result = getattr(cuidx, method)(resolution)
+
+    assert_eq(expected, result)
diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_repeat.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_repeat.py
new file mode 100644
index 00000000000..a2b68a3584f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_repeat.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_datetime_repeat():
+    gidx = cudf.date_range("2021-01-01", periods=3, freq="D")
+    pidx = gidx.to_pandas()
+
+    actual = gidx.repeat(5)
+    expected = pidx.repeat(5)
+
+    assert_eq(actual, expected)
+
+    actual = gidx.to_frame().repeat(5)
+
+    assert_eq(actual.index, expected)
diff --git a/python/cudf/cudf/tests/indexes/index/indexing/__init__.py b/python/cudf/cudf/tests/indexes/index/indexing/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/index/indexing/test_getitem.py b/python/cudf/cudf/tests/indexes/index/indexing/test_getitem.py
new file mode 100644
index 00000000000..70fab79932a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/indexing/test_getitem.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "index_values",
+    [range(1, 10, 2), [1, 2, 3], ["a", "b", "c"], [1.5, 2.5, 3.5]],
+)
+@pytest.mark.parametrize("i_type", [int, np.int8, np.int32, np.int64])
+def test_scalar_getitem(index_values, i_type):
+    i = i_type(1)
+    index = cudf.Index(index_values)
+
+    assert not isinstance(index[i], cudf.Index)
+    assert index[i] == index_values[i]
+    assert_eq(index, index.to_pandas())
+
+
+@pytest.mark.parametrize("idx", [0, np.int64(0)])
+def test_index_getitem_from_int(idx):
+    result = cudf.Index([1, 2])[idx]
+    assert result == 1
+
+
+@pytest.mark.parametrize("idx", [1.5, True, "foo"])
+def test_index_getitem_from_nonint_raises(idx):
+    with pytest.raises(ValueError):
+        cudf.Index([1, 2])[idx]
diff --git a/python/cudf/cudf/tests/indexes/index/indexing/test_setitem.py b/python/cudf/cudf/tests/indexes/index/indexing/test_setitem.py
new file mode 100644
index 00000000000..91fa5f2a33c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/indexing/test_setitem.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+def test_index_immutable():
+    start, stop = 10, 34
+    rg = cudf.RangeIndex(start, stop)
+    with pytest.raises(TypeError):
+        rg[1] = 5
+    gi = cudf.Index(np.arange(start, stop))
+    with pytest.raises(TypeError):
+        gi[1] = 5
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_any_all.py b/python/cudf/cudf/tests/indexes/index/methods/test_any_all.py
new file mode 100644
index 00000000000..f20c292c845
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_any_all.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_any():
+    gidx = cudf.Index([1, 2, 3])
+    pidx = gidx.to_pandas()
+
+    assert_eq(pidx.any(), gidx.any())
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_append.py b/python/cudf/cudf/tests/indexes/index/methods/test_append.py
new file mode 100644
index 00000000000..dc5a2a26a80
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_append.py
@@ -0,0 +1,323 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.core.index import Index
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+    expect_warning_if,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
+def test_index_append(data, other):
+    pd_data = pd.Index(data)
+    pd_other = pd.Index(other)
+
+    gd_data = cudf.Index(data)
+    gd_other = cudf.Index(other)
+
+    if cudf.utils.dtypes.is_mixed_with_object_dtype(gd_data, gd_other):
+        gd_data = gd_data.astype("str")
+        gd_other = gd_other.astype("str")
+
+    with expect_warning_if(
+        (len(data) == 0 or len(other) == 0) and pd_data.dtype != pd_other.dtype
+    ):
+        expected = pd_data.append(pd_other)
+    with expect_warning_if(
+        (len(data) == 0 or len(other) == 0) and gd_data.dtype != gd_other.dtype
+    ):
+        actual = gd_data.append(gd_other)
+    if len(data) == 0 and len(other) == 0:
+        # Pandas default dtype to "object" for empty list
+        # cudf default dtype to "float" for empty list
+        assert_eq(expected, actual.astype("str"))
+    elif actual.dtype == "object":
+        assert_eq(expected.astype("str"), actual)
+    else:
+        assert_eq(expected, actual)
+
+
+def test_index_empty_append_name_conflict():
+    empty = cudf.Index([], name="foo")
+    non_empty = cudf.Index([1], name="bar")
+    expected = cudf.Index([1])
+
+    with pytest.warns(FutureWarning):
+        result = non_empty.append(empty)
+    assert_eq(result, expected)
+
+    with pytest.warns(FutureWarning):
+        result = empty.append(non_empty)
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        [1],
+        [2, 3, 4],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        ["1", "2", "3", "4", "5", "6"],
+        ["a"],
+        ["b", "c", "d"],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+def test_index_append_error(data, other):
+    gd_data = Index(data)
+    gd_other = Index(other)
+
+    got_dtype = (
+        gd_other.dtype
+        if gd_data.dtype == np.dtype("object")
+        else gd_data.dtype
+    )
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"cudf does not support appending an Index of "
+            f"dtype `{np.dtype('object')}` with an Index "
+            f"of dtype `{got_dtype}`, please type-cast "
+            f"either one of them to same dtypes."
+        ),
+    ):
+        gd_data.append(gd_other)
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"cudf does not support appending an Index of "
+            f"dtype `{np.dtype('object')}` with an Index "
+            f"of dtype `{got_dtype}`, please type-cast "
+            f"either one of them to same dtypes."
+        ),
+    ):
+        gd_other.append(gd_data)
+
+    sr = gd_other.to_series()
+
+    assert_exceptions_equal(
+        lfunc=gd_data.to_pandas().append,
+        rfunc=gd_data.append,
+        lfunc_args_and_kwargs=([[sr.to_pandas()]],),
+        rfunc_args_and_kwargs=([[sr]],),
+    )
+
+
+@pytest.mark.parametrize(
+    "data,other",
+    [
+        (
+            pd.Index([1, 2, 3, 4, 5, 6]),
+            [
+                pd.Index([1, 2, 3, 4, 5, 6]),
+                pd.Index([1, 2, 3, 4, 5, 6, 10]),
+                pd.Index([]),
+            ],
+        ),
+        (
+            pd.Index([]),
+            [
+                pd.Index([1, 2, 3, 4, 5, 6]),
+                pd.Index([1, 2, 3, 4, 5, 6, 10]),
+                pd.Index([1, 4, 5, 6]),
+            ],
+        ),
+        (
+            pd.Index([10, 20, 30, 40, 50, 60]),
+            [
+                pd.Index([10, 20, 30, 40, 50, 60]),
+                pd.Index([10, 20, 30]),
+                pd.Index([40, 50, 60]),
+                pd.Index([10, 60]),
+                pd.Index([60]),
+            ],
+        ),
+        (
+            pd.Index([]),
+            [
+                pd.Index([10, 20, 30, 40, 50, 60]),
+                pd.Index([10, 20, 30]),
+                pd.Index([40, 50, 60]),
+                pd.Index([10, 60]),
+                pd.Index([60]),
+            ],
+        ),
+        (
+            pd.Index(["1", "2", "3", "4", "5", "6"]),
+            [
+                pd.Index(["1", "2", "3", "4", "5", "6"]),
+                pd.Index(["1", "2", "3"]),
+                pd.Index(["6"]),
+                pd.Index(["1", "6"]),
+            ],
+        ),
+        (
+            pd.Index([]),
+            [
+                pd.Index(["1", "2", "3", "4", "5", "6"]),
+                pd.Index(["1", "2", "3"]),
+                pd.Index(["6"]),
+                pd.Index(["1", "6"]),
+            ],
+        ),
+        (
+            pd.Index([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
+            [
+                pd.Index([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
+                pd.Index([1.0, 6.0]),
+                pd.Index([]),
+                pd.Index([6.0]),
+            ],
+        ),
+        (
+            pd.Index([]),
+            [
+                pd.Index([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
+                pd.Index([1.0, 6.0]),
+                pd.Index([1.0, 2.0, 6.0]),
+                pd.Index([6.0]),
+            ],
+        ),
+        (
+            pd.Index(["a"]),
+            [
+                pd.Index(["a"]),
+                pd.Index(["a", "b", "c"]),
+                pd.Index(["c"]),
+                pd.Index(["d"]),
+                pd.Index(["ae", "hello", "world"]),
+            ],
+        ),
+        (
+            pd.Index([]),
+            [
+                pd.Index(["a"]),
+                pd.Index(["a", "b", "c"]),
+                pd.Index(["c"]),
+                pd.Index(["d"]),
+                pd.Index(["ae", "hello", "world"]),
+                pd.Index([]),
+            ],
+        ),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Does not warn on older versions of pandas",
+)
+def test_index_append_list(data, other):
+    pd_data = data
+    pd_other = other
+
+    gd_data = cudf.from_pandas(data)
+    gd_other = [cudf.from_pandas(i) for i in other]
+
+    with expect_warning_if(
+        (len(data) == 0 or any(len(d) == 0 for d in other))
+        and (any(d.dtype != data.dtype for d in other))
+    ):
+        expected = pd_data.append(pd_other)
+    with expect_warning_if(
+        (len(data) == 0 or any(len(d) == 0 for d in other))
+        and (any(d.dtype != data.dtype for d in other))
+    ):
+        actual = gd_data.append(gd_other)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        range(np.random.default_rng(seed=0).integers(0, 100)),
+        range(0, 10, -2),
+        range(0, -10, 2),
+        range(0, -10, -2),
+        range(0, 1),
+        [1, 2, 3, 1, None, None],
+        [None, None, 3.2, 1, None, None],
+        [None, "a", "3.2", "z", None, None],
+        pd.Series(["a", "b", None], dtype="category"),
+        np.array([1, 2, 3, None], dtype="datetime64[s]"),
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        "to_series",
+        "isna",
+        "notna",
+        "append",
+    ],
+)
+def test_index_methods(index, func):
+    gidx = cudf.Index(index)
+    pidx = gidx.to_pandas()
+
+    if func == "append":
+        expected = pidx.append(other=pidx)
+        actual = gidx.append(other=gidx)
+    else:
+        expected = getattr(pidx, func)()
+        actual = getattr(gidx, func)()
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_argsort.py b/python/cudf/cudf/tests/indexes/index/methods/test_argsort.py
new file mode 100644
index 00000000000..aca74c8885b
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_argsort.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 10, 2, 100, -10],
+        ["z", "x", "a", "c", "b"],
+        [-10.2, 100.1, -100.2, 0.0, 0.23],
+    ],
+)
+def test_index_argsort(data):
+    pdi = pd.Index(data)
+    gdi = cudf.from_pandas(pdi)
+
+    assert_eq(pdi.argsort(), gdi.argsort())
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_astype.py b/python/cudf/cudf/tests/indexes/index/methods/test_astype.py
new file mode 100644
index 00000000000..05ec3e792f9
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_astype.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_astype(all_supported_types_as_str, copy):
+    pdi = pd.Index([1, 2, 3])
+    gdi = cudf.from_pandas(pdi)
+
+    actual = gdi.astype(dtype=all_supported_types_as_str, copy=copy)
+    expected = pdi.astype(dtype=all_supported_types_as_str, copy=copy)
+
+    assert_eq(expected, actual)
+    assert_eq(pdi, gdi)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_copy.py b/python/cudf/cudf/tests/indexes/index/methods/test_copy.py
new file mode 100644
index 00000000000..3429adea5d1
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_copy.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_column_memory_eq,
+    assert_column_memory_ne,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        range(1, 5),
+        [1, 2, 3, 4],
+        pd.DatetimeIndex(["2001", "2002", "2003"]),
+        ["a", "b", "c"],
+        pd.CategoricalIndex(["a", "b", "c"]),
+    ],
+)
+@pytest.mark.parametrize("copy_on_write", [True, False])
+def test_index_copy(data, deep, copy_on_write):
+    name = "x"
+    cidx = cudf.Index(data)
+    pidx = cidx.to_pandas()
+
+    pidx_copy = pidx.copy(name=name, deep=deep)
+    cidx_copy = cidx.copy(name=name, deep=deep)
+
+    assert_eq(pidx_copy, cidx_copy)
+
+    with cudf.option_context("copy_on_write", copy_on_write):
+        if not isinstance(cidx, cudf.RangeIndex):
+            if (
+                isinstance(cidx._column, cudf.core.column.StringColumn)
+                or not deep
+                or (copy_on_write and not deep)
+            ):
+                # StringColumn is immutable hence, deep copies of a
+                # Index with string dtype will share the same StringColumn.
+
+                # When `copy_on_write` is turned on, Index objects will
+                # have unique column object but they all point to same
+                # data pointers.
+                assert_column_memory_eq(cidx._column, cidx_copy._column)
+            else:
+                assert_column_memory_ne(cidx._column, cidx_copy._column)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_difference.py b/python/cudf/cudf/tests/indexes/index/methods/test_difference.py
new file mode 100644
index 00000000000..18a08cb63b2
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_difference.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [4, 5, 6, 10, 20, 30],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        ["5", "6", "2", "a", "b", "c"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        [1.0, 5.0, 6.0, 0.0, 1.3],
+        ["ab", "cd", "ef"],
+        pd.Series(["1", "2", "a", "3", None], dtype="category"),
+        range(0, 10),
+        [],
+        [1, 1, 2, 2],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [4, 5, 6, 10, 20, 30],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        ["5", "6", "2", "a", "b", "c"],
+        ["ab", "ef", None],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        [1.0, 5.0, 6.0, 0.0, 1.3],
+        range(2, 4),
+        pd.Series(["1", "a", "3", None], dtype="category"),
+        [],
+        [2],
+    ],
+)
+@pytest.mark.parametrize("sort", [None, False, True])
+@pytest.mark.parametrize(
+    "name_data,name_other",
+    [("abc", "c"), (None, "abc"), ("abc", pd.NA), ("abc", "abc")],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_index_difference(data, other, sort, name_data, name_other):
+    pd_data = pd.Index(data, name=name_data)
+    pd_other = pd.Index(other, name=name_other)
+    if (
+        not PANDAS_GE_220
+        and isinstance(pd_data.dtype, pd.CategoricalDtype)
+        and not isinstance(pd_other.dtype, pd.CategoricalDtype)
+        and pd_other.isnull().any()
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/issues/57318")
+
+    if (
+        not PANDAS_GE_220
+        and len(pd_other) == 0
+        and len(pd_data) != len(pd_data.unique())
+    ):
+        pytest.skip(reason="Bug fixed in pandas-2.2+")
+
+    gd_data = cudf.from_pandas(pd_data)
+    gd_other = cudf.from_pandas(pd_other)
+
+    expected = pd_data.difference(pd_other, sort=sort)
+    actual = gd_data.difference(gd_other, sort=sort)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("other", ["a", 1, None])
+def test_index_difference_invalid_inputs(other):
+    pdi = pd.Index([1, 2, 3])
+    gdi = cudf.Index([1, 2, 3])
+
+    assert_exceptions_equal(
+        pdi.difference,
+        gdi.difference,
+        ([other], {}),
+        ([other], {}),
+    )
+
+
+def test_index_difference_sort_error():
+    pdi = pd.Index([1, 2, 3])
+    gdi = cudf.Index([1, 2, 3])
+
+    assert_exceptions_equal(
+        pdi.difference,
+        gdi.difference,
+        ([pdi], {"sort": "A"}),
+        ([gdi], {"sort": "A"}),
+    )
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_drop_duplicates.py b/python/cudf/cudf/tests/indexes/index/methods/test_drop_duplicates.py
new file mode 100644
index 00000000000..f569fcde38a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_drop_duplicates.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 1, 2, 3, 4], [], [1], [1, 2, 3]])
+def test_index_drop_duplicates(data, all_supported_types_as_str, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            len(data) > 0
+            and all_supported_types_as_str
+            in {"timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"},
+            reason=f"wrong result for {all_supported_types_as_str}",
+        )
+    )
+    pdi = pd.Index(data, dtype=all_supported_types_as_str)
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str)
+
+    assert_eq(pdi.drop_duplicates(), gdi.drop_duplicates())
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py b/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py
index b572e9e156d..01cfb9e7aa2 100644
--- a/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_dropna.py
@@ -7,6 +7,11 @@
 from cudf.testing import assert_eq
 
 
+def test_dropna_bad_how():
+    with pytest.raises(ValueError):
+        cudf.Index([1]).dropna(how="foo")
+
+
 @pytest.mark.parametrize(
     "data, dtype",
     [
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_duplicated.py b/python/cudf/cudf/tests/indexes/index/methods/test_duplicated.py
new file mode 100644
index 00000000000..dce4c1ec5c0
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_duplicated.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 1, 1, 3, 2, 3],
+        [np.nan, 10, 15, 16, np.nan, 10, 16],
+        range(0, 10),
+        ["ab", "zx", None, "pq", "ab", None, "zx", None],
+    ],
+)
+@pytest.mark.parametrize("keep", ["first", "last", False])
+def test_index_duplicated(data, keep):
+    gs = cudf.Index(data)
+    ps = gs.to_pandas()
+
+    expected = ps.duplicated(keep=keep)
+    actual = gs.duplicated(keep=keep)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_equals.py b/python/cudf/cudf/tests/indexes/index/methods/test_equals.py
new file mode 100644
index 00000000000..91e943311e9
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_equals.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_comparision():
+    start, stop = 10, 34
+    rg = cudf.RangeIndex(start, stop)
+    gi = cudf.Index(np.arange(start, stop))
+    assert rg.equals(gi)
+    assert gi.equals(rg)
+    assert not rg[:-1].equals(gi)
+    assert rg[:-1].equals(gi[:-1])
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        [],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+def test_index_equals(data, other):
+    pd_data = pd.Index(data)
+    pd_other = pd.Index(other)
+
+    gd_data = cudf.Index(data)
+    gd_other = cudf.Index(other)
+
+    expected = pd_data.equals(pd_other)
+    actual = gd_data.equals(gd_other)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        [1, 2, 3, 4, 5, 6],
+        [10, 20, 30, 40, 50, 60],
+        ["1", "2", "3", "4", "5", "6"],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
+        ["a"],
+        ["b", "c", "d"],
+        [1],
+        [2, 3, 4],
+        [],
+        [10.0],
+        [1100.112, 2323.2322, 2323.2322],
+        ["abcd", "defgh", "werty", "poiu"],
+    ],
+)
+def test_index_equal_misc(data, other):
+    pd_data = pd.Index(data)
+    pd_other = other
+
+    gd_data = cudf.Index(data)
+    gd_other = other
+
+    expected = pd_data.equals(pd_other)
+    actual = gd_data.equals(gd_other)
+    assert_eq(expected, actual)
+
+    expected = pd_data.equals(np.array(pd_other))
+    actual = gd_data.equals(np.array(gd_other))
+    assert_eq(expected, actual)
+
+    expected = pd_data.equals(pd.Series(pd_other))
+    actual = gd_data.equals(cudf.Series(gd_other))
+    assert_eq(expected, actual)
+
+    expected = pd_data.astype("category").equals(pd_other)
+    actual = gd_data.astype("category").equals(gd_other)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_fillna.py b/python/cudf/cudf/tests/indexes/index/methods/test_fillna.py
new file mode 100644
index 00000000000..ab061abc73f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_fillna.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data,fill_value",
+    [
+        ([1, 2, 3, 1, None, None], 1),
+        ([None, None, 3.2, 1, None, None], 10.0),
+        ([None, "a", "3.2", "z", None, None], "helloworld"),
+        (pd.Series(["a", "b", None], dtype="category"), "b"),
+        (pd.Series([None, None, 1.0], dtype="category"), 1.0),
+        (
+            np.array([1, 2, 3, None], dtype="datetime64[s]"),
+            np.datetime64("2005-02-25"),
+        ),
+        (
+            np.array(
+                [None, None, 122, 3242234, None, 6237846],
+                dtype="datetime64[ms]",
+            ),
+            np.datetime64("2005-02-25"),
+        ),
+    ],
+)
+def test_index_fillna(data, fill_value):
+    pdi = pd.Index(data)
+    gdi = cudf.Index(data)
+
+    assert_eq(
+        pdi.fillna(fill_value), gdi.fillna(fill_value), exact=False
+    )  # Int64 v/s Float64
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_find_label_range.py b/python/cudf/cudf/tests/indexes/index/methods/test_find_label_range.py
new file mode 100644
index 00000000000..21bc684026e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_find_label_range.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+def test_index_find_label_range_index():
+    # Monotonic Index
+    idx = cudf.Index(np.asarray([4, 5, 6, 10]))
+    assert idx.find_label_range(slice(4, 6)) == slice(0, 3, 1)
+    assert idx.find_label_range(slice(5, 10)) == slice(1, 4, 1)
+    assert idx.find_label_range(slice(0, 6)) == slice(0, 3, 1)
+    assert idx.find_label_range(slice(4, 11)) == slice(0, 4, 1)
+
+    # Non-monotonic Index
+    idx_nm = cudf.Index(np.asarray([5, 4, 6, 10]))
+    assert idx_nm.find_label_range(slice(4, 6)) == slice(1, 3, 1)
+    assert idx_nm.find_label_range(slice(5, 10)) == slice(0, 4, 1)
+    # Last value not found
+    with pytest.raises(KeyError, match="not in index"):
+        idx_nm.find_label_range(slice(0, 6))
+    # Last value not found
+    with pytest.raises(KeyError, match="not in index"):
+        idx_nm.find_label_range(slice(4, 11))
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_get_indexer.py b/python/cudf/cudf/tests/indexes/index/methods/test_get_indexer.py
new file mode 100644
index 00000000000..7b1ac5dcf43
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_get_indexer.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "data", [[1, 3, 6], [6, 1, 3]], ids=["monotonic", "non-monotonic"]
+)
+@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
+def test_get_indexer_single_unique_numeric(data, method):
+    key = list(range(0, 8))
+    pi = pd.Index(data)
+    gi = cudf.from_pandas(pi)
+
+    if (
+        # `method` only applicable to monotonic index
+        not pi.is_monotonic_increasing and method is not None
+    ):
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_indexer(key, method=method)
+        got = gi.get_indexer(key, method=method)
+
+        assert_eq(expected, got)
+
+        with cudf.option_context("mode.pandas_compatible", True):
+            got = gi.get_indexer(key, method=method)
+        assert_eq(expected, got, check_dtype=True)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        [-1, 2, 3, 6],
+        [6, 1, 3, 4],
+    ],
+    ids=["monotonic", "non-monotonic"],
+)
+@pytest.mark.parametrize("key", [[0, 3, 1], [6, 7]])
+@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
+@pytest.mark.parametrize("tolerance", [None, 1, 2])
+def test_get_indexer_single_duplicate_numeric(idx, key, method, tolerance):
+    pi = pd.Index(idx)
+    gi = cudf.from_pandas(pi)
+
+    if not pi.is_monotonic_increasing and method is not None:
+        assert_exceptions_equal(
+            lfunc=pi.get_indexer,
+            rfunc=gi.get_indexer,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_indexer(
+            key, method=method, tolerance=None if method is None else tolerance
+        )
+        got = gi.get_indexer(
+            key, method=method, tolerance=None if method is None else tolerance
+        )
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("idx", [["b", "f", "m", "q"], ["m", "f", "b", "q"]])
+@pytest.mark.parametrize("key", [["a", "f", "n", "z"], ["p", "p", "b"]])
+@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
+def test_get_indexer_single_unique_string(idx, key, method):
+    pi = pd.Index(idx)
+    gi = cudf.from_pandas(pi)
+
+    if not pi.is_monotonic_increasing and method is not None:
+        assert_exceptions_equal(
+            lfunc=pi.get_indexer,
+            rfunc=gi.get_indexer,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_indexer(key, method=method)
+        got = gi.get_indexer(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("idx", [["b", "m", "m", "q"], ["a", "f", "m", "q"]])
+@pytest.mark.parametrize("key", [["a"], ["f", "n", "z"]])
+@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
+def test_get_indexer_single_duplicate_string(idx, key, method):
+    pi = pd.Index(idx)
+    gi = cudf.from_pandas(pi)
+
+    if (
+        # `method` only applicable to monotonic index
+        (not pi.is_monotonic_increasing and method is not None)
+        or not pi.is_unique
+    ):
+        assert_exceptions_equal(
+            lfunc=pi.get_indexer,
+            rfunc=gi.get_indexer,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_indexer(key, method=method)
+        got = gi.get_indexer(key, method=method)
+
+        assert_eq(expected, got)
+
+        with cudf.option_context("mode.pandas_compatible", True):
+            got = gi.get_indexer(key, method=method)
+
+        assert_eq(expected, got, check_dtype=True)
+
+
+@pytest.mark.parametrize(
+    "idx1",
+    [
+        lambda: cudf.Index(["a", "b", "c"]),
+        lambda: cudf.RangeIndex(0, 10),
+        lambda: cudf.Index([1, 2, 3], dtype="category"),
+        lambda: cudf.Index(["a", "b", "c", "d"], dtype="category"),
+        lambda: cudf.MultiIndex.from_tuples(
+            [
+                ("a", "a", "a"),
+                ("a", "b", "c"),
+                ("b", "a", "a"),
+                ("a", "a", "b"),
+                ("a", "b", "a"),
+                ("b", "c", "a"),
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "idx2",
+    [
+        lambda: cudf.Index(["a", "b", "c"]),
+        lambda: cudf.RangeIndex(0, 10),
+        lambda: cudf.Index([1, 2, 3], dtype="category"),
+        lambda: cudf.Index(["a", "b", "c", "d"], dtype="category"),
+    ],
+)
+def test_get_indexer_invalid(idx1, idx2):
+    idx1 = idx1()
+    idx2 = idx2()
+    assert_eq(
+        idx1.get_indexer(idx2), idx1.to_pandas().get_indexer(idx2.to_pandas())
+    )
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_get_loc.py b/python/cudf/cudf/tests/indexes/index/methods/test_get_loc.py
new file mode 100644
index 00000000000..96277cec167
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_get_loc.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        [1, 3, 3, 6],
+        [6, 1, 3, 3],
+        [4, 3, 2, 1, 0],
+    ],
+    ids=["monotonic increasing", "non-monotonic", "monotonic decreasing"],
+)
+@pytest.mark.parametrize("key", [0, 3, 6, 7, 4])
+def test_get_loc_duplicate_numeric(idx, key):
+    pi = pd.Index(idx)
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key}),
+            rfunc_args_and_kwargs=([], {"key": key}),
+        )
+    else:
+        expected = pi.get_loc(key)
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("idx", [["b", "f", "m", "q"], ["m", "f", "b", "q"]])
+@pytest.mark.parametrize("key", ["a", "f", "n", "z"])
+def test_get_loc_single_unique_string(idx, key):
+    pi = pd.Index(idx)
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key}),
+            rfunc_args_and_kwargs=([], {"key": key}),
+        )
+    else:
+        expected = pi.get_loc(key)
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("idx", [["b", "m", "m", "q"], ["m", "f", "m", "q"]])
+@pytest.mark.parametrize("key", ["a", "f", "n", "z"])
+def test_get_loc_single_duplicate_string(idx, key):
+    pi = pd.Index(idx)
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key}),
+            rfunc_args_and_kwargs=([], {"key": key}),
+        )
+    else:
+        expected = pi.get_loc(key)
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_intersection.py b/python/cudf/cudf/tests/indexes/index/methods/test_intersection.py
new file mode 100644
index 00000000000..c142044a348
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_intersection.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "idx1, idx2",
+    [
+        (pd.RangeIndex(0, 10), pd.RangeIndex(3, 7)),
+        (pd.RangeIndex(0, 10), pd.RangeIndex(-10, 20)),
+        (pd.RangeIndex(0, 10, name="a"), pd.RangeIndex(90, 100, name="b")),
+        (pd.Index([0, 1, 2, 30], name=pd.NA), pd.Index([30, 0, 90, 100])),
+        (pd.Index([0, 1, 2, 30], name="a"), [90, 100]),
+        (pd.Index([0, 1, 2, 30]), pd.Index([0, 10, 1.0, 11])),
+        (
+            pd.Index(["a", "b", "c", "d", "c"]),
+            pd.Index(["a", "c", "z"], name="abc"),
+        ),
+        (
+            pd.Index(["a", "b", "c", "d", "c"]),
+            pd.Index(["a", "b", "c", "d", "c"]),
+        ),
+        (pd.Index([True, False, True, True]), pd.Index([10, 11, 12, 0, 1, 2])),
+        (pd.Index([True, False, True, True]), pd.Index([True, True])),
+        (pd.RangeIndex(0, 10, name="a"), pd.Index([5, 6, 7], name="b")),
+        (pd.Index(["a", "b", "c"], dtype="category"), pd.Index(["a", "b"])),
+        (pd.Index([0, 1, 2], dtype="category"), pd.RangeIndex(0, 10)),
+        (pd.Index(["a", "b", "c"], name="abc"), []),
+        (pd.Index([], name="abc"), pd.RangeIndex(0, 4)),
+        (pd.Index([1, 2, 3]), pd.Index([1, 2], dtype="category")),
+        (pd.Index([]), pd.Index([1, 2], dtype="category")),
+    ],
+)
+@pytest.mark.parametrize("sort", [None, False, True])
+@pytest.mark.parametrize("pandas_compatible", [True, False])
+def test_intersection_index(idx1, idx2, sort, pandas_compatible):
+    expected = idx1.intersection(idx2, sort=sort)
+
+    with cudf.option_context("mode.pandas_compatible", pandas_compatible):
+        idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.Index) else idx1
+        idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.Index) else idx2
+
+        actual = idx1.intersection(idx2, sort=sort)
+
+        # TODO: Resolve the bool vs ints mixed issue
+        # once pandas has a direction on this issue
+        # https://github.com/pandas-dev/pandas/issues/44000
+        assert_eq(
+            expected,
+            actual,
+            exact=False
+            if (idx1.dtype.kind == "b" and idx2.dtype.kind != "b")
+            or (idx1.dtype.kind != "b" or idx2.dtype.kind == "b")
+            else True,
+        )
+
+
+@pytest.mark.parametrize(
+    "idx1, idx2",
+    [
+        (pd.Index(["a", "b", "c"], dtype="category"), pd.Index([1, 2, 3])),
+    ],
+)
+@pytest.mark.parametrize("sort", [None, False, True])
+@pytest.mark.parametrize("pandas_compatible", [True, False])
+def test_intersection_index_error(idx1, idx2, sort, pandas_compatible):
+    expected = idx1.intersection(idx2, sort=sort)
+
+    with cudf.option_context("mode.pandas_compatible", pandas_compatible):
+        idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.Index) else idx1
+        idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.Index) else idx2
+
+        if pandas_compatible:
+            with pytest.raises(
+                ValueError,
+                match="Cannot convert numerical column to string column when dtype is an object dtype in pandas compatibility mode.",
+            ):
+                idx1.intersection(idx2, sort=sort)
+        else:
+            actual = idx1.intersection(idx2, sort=sort)
+
+            assert_eq(
+                expected,
+                actual,
+            )
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_isin.py b/python/cudf/cudf/tests/indexes/index/methods/test_isin.py
new file mode 100644
index 00000000000..37377a50b91
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_isin.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import PANDAS_GE_220
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.Index([]),
+        pd.Index(["a", "b", "c", "d", "e"]),
+        pd.Index([0, None, 9]),
+        pd.date_range("2019-01-01", periods=3),
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        [],
+        ["this", "is"],
+        [0, 19, 13],
+        ["2019-01-01 04:00:00", "2019-01-01 06:00:00", "2018-03-02 10:00:00"],
+    ],
+)
+def test_isin_index(index, values):
+    pidx = index
+    gidx = cudf.Index.from_pandas(pidx)
+
+    is_dt_str = (
+        next(iter(values), None) == "2019-01-01 04:00:00"
+        and len(pidx)
+        and pidx.dtype.kind == "M"
+    )
+    with expect_warning_if(is_dt_str):
+        got = gidx.isin(values)
+    with expect_warning_if(PANDAS_GE_220 and is_dt_str):
+        expected = pidx.isin(values)
+
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "idx, values",
+    [
+        (range(100, 1000, 10), [200, 600, 800]),
+        ([None, "a", "3.2", "z", None, None], ["a", "z"]),
+        (pd.Series(["a", "b", None], dtype="category"), [10, None]),
+    ],
+)
+def test_index_isin_values(idx, values):
+    gidx = cudf.Index(idx)
+    pidx = gidx.to_pandas()
+
+    actual = gidx.isin(values)
+    expected = pidx.isin(values)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "idx, scalar",
+    [
+        (range(0, -10, -2), -4),
+        ([None, "a", "3.2", "z", None, None], "x"),
+        (pd.Series(["a", "b", None], dtype="category"), 10),
+    ],
+)
+def test_index_isin_scalar_values(idx, scalar):
+    gidx = cudf.Index(idx)
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"only list-like objects are allowed to be passed "
+            f"to isin(), you passed a {type(scalar).__name__}"
+        ),
+    ):
+        gidx.isin(scalar)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_isna_notnull.py b/python/cudf/cudf/tests/indexes/index/methods/test_isna_notnull.py
new file mode 100644
index 00000000000..33bfb957390
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_isna_notnull.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_index_isna_notna():
+    idx = [1, None, 3, None, 5]
+    pidx = pd.Index(idx, name="idx")
+    gidx = cudf.Index(idx, name="idx")
+    assert_eq(gidx.isna(), pidx.isna())
+    assert_eq(gidx.notna(), pidx.notna())
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_reductions.py b/python/cudf/cudf/tests/indexes/index/methods/test_reductions.py
new file mode 100644
index 00000000000..eaf5e48cc01
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_reductions.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x.min(),
+        lambda x: x.max(),
+        lambda x: x.any(),
+        lambda x: x.all(),
+    ],
+)
+def test_reductions(func):
+    x = np.asarray([4, 5, 6, 10])
+    idx = cudf.Index(np.asarray([4, 5, 6, 10]))
+
+    assert func(x) == func(idx)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_rename.py b/python/cudf/cudf/tests/indexes/index/methods/test_rename.py
new file mode 100644
index 00000000000..b0db84e20cd
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_rename.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    SERIES_OR_INDEX_NAMES,
+)
+
+
+@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
+@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
+def test_index_rename(initial_name, name):
+    pds = pd.Index([1, 2, 3], name=initial_name)
+    gds = cudf.Index(pds)
+
+    assert_eq(pds, gds)
+
+    expect = pds.rename(name)
+    got = gds.rename(name)
+
+    assert_eq(expect, got)
+    """
+    From here on testing recursive creation
+    and if name is being handles in recursive creation.
+    """
+    pds = pd.Index(expect)
+    gds = cudf.Index(got)
+
+    assert_eq(pds, gds)
+
+    pds = pd.Index(pds, name="abc")
+    gds = cudf.Index(gds, name="abc")
+    assert_eq(pds, gds)
+
+
+def test_index_rename_inplace():
+    pds = pd.Index([1, 2, 3], name="asdf")
+    gds = cudf.Index(pds)
+
+    # inplace=False should yield a shallow copy
+    gds_renamed_deep = gds.rename("new_name", inplace=False)
+
+    assert gds_renamed_deep._column.data_ptr == gds._column.data_ptr
+
+    # inplace=True returns none
+    expected_ptr = gds._column.data_ptr
+    gds.rename("new_name", inplace=True)
+
+    assert expected_ptr == gds._column.data_ptr
+
+
+def test_index_rename_preserves_arg():
+    idx1 = cudf.Index([1, 2, 3], name="orig_name")
+
+    # this should be an entirely new object
+    idx2 = idx1.rename("new_name", inplace=False)
+
+    assert idx2.name == "new_name"
+    assert idx1.name == "orig_name"
+
+    # a new object but referencing the same data
+    idx3 = cudf.Index(idx1, name="last_name")
+
+    assert idx3.name == "last_name"
+    assert idx1.name == "orig_name"
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_set_names.py b/python/cudf/cudf/tests/indexes/index/methods/test_set_names.py
new file mode 100644
index 00000000000..11b0c371ed9
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_set_names.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.Index([1, 2, 3]),
+        pd.Index(["abc", "def", "ghi"]),
+        pd.RangeIndex(0, 10, 1),
+        pd.Index([0.324, 0.234, 1.3], name="abc"),
+    ],
+)
+@pytest.mark.parametrize("names", [None, "a", "new name", ["another name"]])
+def test_index_set_names(idx, names, inplace):
+    if inplace:
+        pi = idx.copy()
+    else:
+        pi = idx
+    gi = cudf.from_pandas(idx)
+
+    expected = pi.set_names(names=names, inplace=inplace)
+    actual = gi.set_names(names=names, inplace=inplace)
+
+    if inplace:
+        expected, actual = pi, gi
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("level", [1, [0], "abc"])
+@pytest.mark.parametrize("names", [None, "a"])
+def test_index_set_names_error(level, names):
+    pi = pd.Index([1, 2, 3], name="abc")
+    gi = cudf.from_pandas(pi)
+
+    assert_exceptions_equal(
+        lfunc=pi.set_names,
+        rfunc=gi.set_names,
+        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
+        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
+    )
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_sort_values.py b/python/cudf/cudf/tests/indexes/index/methods/test_sort_values.py
new file mode 100644
index 00000000000..edc03010540
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_sort_values.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.Index([1, 10, 2, 100, -10], name="abc"),
+        pd.Index(["z", "x", "a", "c", "b"]),
+        pd.Index(["z", "x", "a", "c", "b"], dtype="category"),
+        pd.Index(
+            [-10.2, 100.1, -100.2, 0.0, 0.23], name="this is a float index"
+        ),
+        pd.Index([102, 1001, 1002, 0.0, 23], dtype="datetime64[ns]"),
+        pd.Index([13240.2, 1001, 100.2, 0.0, 23], dtype="datetime64[ns]"),
+        pd.RangeIndex(0, 10, 1),
+        pd.RangeIndex(0, -100, -2),
+        pd.Index([-10.2, 100.1, -100.2, 0.0, 23], dtype="timedelta64[ns]"),
+    ],
+)
+@pytest.mark.parametrize("return_indexer", [True, False])
+def test_index_sort_values(data, ascending, return_indexer):
+    pdi = data
+    gdi = cudf.from_pandas(pdi)
+
+    expected = pdi.sort_values(
+        ascending=ascending, return_indexer=return_indexer
+    )
+    actual = gdi.sort_values(
+        ascending=ascending, return_indexer=return_indexer
+    )
+
+    if return_indexer:
+        expected_indexer = expected[1]
+        actual_indexer = actual[1]
+
+        assert_eq(expected_indexer, actual_indexer)
+
+        expected = expected[0]
+        actual = actual[0]
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_to_arrow.py b/python/cudf/cudf/tests/indexes/index/methods/test_to_arrow.py
new file mode 100644
index 00000000000..ee6b97cae6b
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_to_arrow.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 1, None, None],
+        [None, None, 3.2, 1, None, None],
+        [None, "a", "3.2", "z", None, None],
+        pd.Series(["a", "b", None], dtype="category"),
+        np.array([1, 2, 3, None], dtype="datetime64[s]"),
+    ],
+)
+def test_index_to_arrow(data):
+    pdi = pd.Index(data)
+    gdi = cudf.Index(data)
+
+    expected_arrow_array = pa.Array.from_pandas(pdi)
+    got_arrow_array = gdi.to_arrow()
+
+    assert_eq(expected_arrow_array, got_arrow_array)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_to_frame.py b/python/cudf/cudf/tests/indexes/index/methods/test_to_frame.py
new file mode 100644
index 00000000000..223786b0f86
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_to_frame.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.api.extensions import no_default
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3], ["ab", "cd", "e", None], range(0, 10)]
+)
+@pytest.mark.parametrize("data_name", [None, 1, "abc"])
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("name", [None, no_default, 1, "abc"])
+def test_index_to_frame(data, data_name, index, name):
+    pidx = pd.Index(data, name=data_name)
+    gidx = cudf.from_pandas(pidx)
+
+    expected = pidx.to_frame(index=index, name=name)
+    actual = gidx.to_frame(index=index, name=name)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_to_pandas.py b/python/cudf/cudf/tests/indexes/index/methods/test_to_pandas.py
new file mode 100644
index 00000000000..c505222f67a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_to_pandas.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import datetime
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data,expected_dtype",
+    [
+        ([10, 11, 12], pd.Int64Dtype()),
+        ([0.1, 10.2, 12.3], pd.Float64Dtype()),
+        (["abc", None, "def"], pd.StringDtype()),
+    ],
+)
+def test_index_to_pandas_nullable(data, expected_dtype):
+    gi = cudf.Index(data)
+    pi = gi.to_pandas(nullable=True)
+    expected = pd.Index(data, dtype=expected_dtype)
+
+    assert_eq(pi, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        range(1),
+        np.array([1, 2], dtype="datetime64[ns]"),
+        np.array([1, 2], dtype="timedelta64[ns]"),
+    ],
+)
+def test_index_to_pandas_nullable_notimplemented(data):
+    idx = cudf.Index(data)
+    with pytest.raises(NotImplementedError):
+        idx.to_pandas(nullable=True)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+        pd.Interval(1, 2),
+    ],
+)
+def test_index_to_pandas_arrow_type_nullable_raises(scalar):
+    data = [scalar, None]
+    idx = cudf.Index(data)
+    with pytest.raises(ValueError):
+        idx.to_pandas(nullable=True, arrow_type=True)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        1.0,
+        "a",
+        datetime.datetime(2020, 1, 1),
+        datetime.timedelta(1),
+    ],
+)
+def test_index_to_pandas_arrow_type(scalar):
+    pa_array = pa.array([scalar, None])
+    idx = cudf.Index(pa_array)
+    result = idx.to_pandas(arrow_type=True)
+    expected = pd.Index(pd.arrays.ArrowExtensionArray(pa_array))
+    pd.testing.assert_index_equal(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_to_series.py b/python/cudf/cudf/tests/indexes/index/methods/test_to_series.py
new file mode 100644
index 00000000000..9c63a5d5fcd
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_to_series.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 10, 2, 100, -10],
+        ["z", "x", "a", "c", "b"],
+        [-10.2, 100.1, -100.2, 0.0, 0.23],
+    ],
+)
+def test_index_to_series(data):
+    pdi = pd.Index(data)
+    gdi = cudf.from_pandas(pdi)
+
+    assert_eq(pdi.to_series(), gdi.to_series())
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_tolist.py b/python/cudf/cudf/tests/indexes/index/methods/test_tolist.py
new file mode 100644
index 00000000000..6b0454c647b
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_tolist.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import re
+
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 1, 2, 3, 4], []])
+def test_index_tolist(data, all_supported_types_as_str):
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str)
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            r"cuDF does not support conversion to host memory "
+            r"via the `tolist()` method. Consider using "
+            r"`.to_arrow().to_pylist()` to construct a Python list."
+        ),
+    ):
+        gdi.tolist()
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_union.py b/python/cudf/cudf/tests/indexes/index/methods/test_union.py
new file mode 100644
index 00000000000..9bbdf5e4bc6
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_union.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "idx1, idx2",
+    [
+        (pd.RangeIndex(0, 10), pd.RangeIndex(3, 7)),
+        (pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)),
+        (pd.RangeIndex(0, 10, 2), pd.RangeIndex(1, 5, 3)),
+        (pd.RangeIndex(1, 5, 3), pd.RangeIndex(0, 10, 2)),
+        (pd.RangeIndex(1, 10, 3), pd.RangeIndex(1, 5, 2)),
+        (pd.RangeIndex(1, 5, 2), pd.RangeIndex(1, 10, 3)),
+        (pd.RangeIndex(1, 100, 3), pd.RangeIndex(1, 50, 3)),
+        (pd.RangeIndex(1, 100, 3), pd.RangeIndex(1, 50, 6)),
+        (pd.RangeIndex(1, 100, 6), pd.RangeIndex(1, 50, 3)),
+        (pd.RangeIndex(0, 10, name="a"), pd.RangeIndex(90, 100, name="b")),
+        (pd.Index([0, 1, 2, 30], name="a"), pd.Index([90, 100])),
+        (pd.Index([0, 1, 2, 30], name="a"), [90, 100]),
+        (pd.Index([0, 1, 2, 30]), pd.Index([0, 10, 1.0, 11])),
+        (pd.Index(["a", "b", "c", "d", "c"]), pd.Index(["a", "c", "z"])),
+        (
+            pd.IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]),
+            pd.IntervalIndex.from_tuples([(0, 2), (2, 4)]),
+        ),
+        (pd.RangeIndex(0, 10), pd.Index([8, 1, 2, 4])),
+        (pd.Index([8, 1, 2, 4], name="a"), pd.Index([8, 1, 2, 4], name="b")),
+        (
+            pd.Index([8, 1, 2, 4], name="a"),
+            pd.Index([], name="b", dtype="int64"),
+        ),
+        (pd.Index([], dtype="int64", name="a"), pd.Index([10, 12], name="b")),
+        (pd.Index([True, True, True], name="a"), pd.Index([], dtype="bool")),
+        (
+            pd.Index([True, True, True]),
+            pd.Index([False, True], dtype="bool", name="b"),
+        ),
+    ],
+)
+@pytest.mark.parametrize("sort", [None, False, True])
+def test_union_index(idx1, idx2, sort):
+    expected = idx1.union(idx2, sort=sort)
+
+    idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.Index) else idx1
+    idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.Index) else idx2
+
+    actual = idx1.union(idx2, sort=sort)
+
+    assert_eq(expected, actual)
+
+
+def test_union_bool_with_other():
+    idx1 = cudf.Index([True, True, True])
+    idx2 = cudf.Index([0, 1], name="b")
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(cudf.errors.MixedTypeError):
+            idx1.union(idx2)
+
+
+def test_union_unsigned_vs_signed(
+    signed_integer_types_as_str, unsigned_integer_types_as_str
+):
+    idx1 = cudf.Index([10, 20, 30], dtype=signed_integer_types_as_str)
+    idx2 = cudf.Index([0, 1], dtype=unsigned_integer_types_as_str)
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(cudf.errors.MixedTypeError):
+            idx1.union(idx2)
diff --git a/python/cudf/cudf/tests/indexes/index/methods/test_where.py b/python/cudf/cudf/tests/indexes/index/methods/test_where.py
new file mode 100644
index 00000000000..46fa521e6e2
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/index/methods/test_where.py
@@ -0,0 +1,190 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import (
+    assert_exceptions_equal,
+)
+
+
+@pytest.mark.parametrize(
+    "data,condition,other,error",
+    [
+        (pd.Index(range(5)), pd.Index(range(5)) > 0, None, None),
+        (pd.Index([1, 2, 3]), pd.Index([1, 2, 3]) != 2, None, None),
+        (pd.Index(list("abc")), pd.Index(list("abc")) == "c", None, None),
+        (
+            pd.Index(list("abc")),
+            pd.Index(list("abc")) == "c",
+            pd.Index(list("xyz")),
+            None,
+        ),
+        (pd.Index(range(5)), pd.Index(range(4)) > 0, None, ValueError),
+        (
+            pd.Index(range(5)),
+            pd.Index(range(5)) > 1,
+            10,
+            None,
+        ),
+        (
+            pd.Index(np.arange(10)),
+            (pd.Index(np.arange(10)) % 3) == 0,
+            -pd.Index(np.arange(10)),
+            None,
+        ),
+        (
+            pd.Index([1, 2, np.nan]),
+            pd.Index([1, 2, np.nan]) == 4,
+            None,
+            None,
+        ),
+        (
+            pd.Index([1, 2, np.nan]),
+            pd.Index([1, 2, np.nan]) != 4,
+            None,
+            None,
+        ),
+        (
+            pd.Index([-2, 3, -4, -79]),
+            [True, True, True],
+            None,
+            ValueError,
+        ),
+        (
+            pd.Index([-2, 3, -4, -79]),
+            [True, True, True, False],
+            None,
+            None,
+        ),
+        (
+            pd.Index([-2, 3, -4, -79]),
+            [True, True, True, False],
+            17,
+            None,
+        ),
+        (pd.Index(list("abcdgh")), pd.Index(list("abcdgh")) != "g", "3", None),
+        (
+            pd.Index(list("abcdgh")),
+            pd.Index(list("abcdg")) != "g",
+            "3",
+            ValueError,
+        ),
+        (
+            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]),
+            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) != "a",
+            "a",
+            None,
+        ),
+        (
+            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]),
+            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) != "a",
+            "b",
+            None,
+        ),
+        (
+            pd.MultiIndex.from_tuples(
+                list(
+                    zip(
+                        *[
+                            [
+                                "bar",
+                                "bar",
+                                "baz",
+                                "baz",
+                                "foo",
+                                "foo",
+                                "qux",
+                                "qux",
+                            ],
+                            [
+                                "one",
+                                "two",
+                                "one",
+                                "two",
+                                "one",
+                                "two",
+                                "one",
+                                "two",
+                            ],
+                        ],
+                        strict=True,
+                    )
+                )
+            ),
+            pd.MultiIndex.from_tuples(
+                list(
+                    zip(
+                        *[
+                            [
+                                "bar",
+                                "bar",
+                                "baz",
+                                "baz",
+                                "foo",
+                                "foo",
+                                "qux",
+                                "qux",
+                            ],
+                            [
+                                "one",
+                                "two",
+                                "one",
+                                "two",
+                                "one",
+                                "two",
+                                "one",
+                                "two",
+                            ],
+                        ],
+                        strict=True,
+                    )
+                )
+            )
+            != "a",
+            None,
+            NotImplementedError,
+        ),
+    ],
+)
+def test_index_where(data, condition, other, error):
+    ps = data
+    gs = cudf.from_pandas(data)
+
+    ps_condition = condition
+    if isinstance(condition, pd.Index):
+        gs_condition = cudf.from_pandas(condition)
+    else:
+        gs_condition = condition
+
+    ps_other = other
+    if isinstance(condition, pd.Index):
+        gs_other = cudf.from_pandas(other)
+    else:
+        gs_other = other
+
+    if error is None:
+        if hasattr(ps, "dtype") and isinstance(ps.dtype, pd.CategoricalDtype):
+            expect = ps.where(ps_condition, other=ps_other)
+            got = gs.where(gs_condition, other=gs_other)
+            np.testing.assert_array_equal(
+                expect.codes,
+                got.codes.astype(expect.codes.dtype).fillna(-1).to_numpy(),
+            )
+            assert_eq(expect.categories, got.categories)
+        else:
+            assert_eq(
+                ps.where(ps_condition, other=ps_other),
+                gs.where(gs_condition, other=gs_other).to_pandas(),
+            )
+    else:
+        assert_exceptions_equal(
+            lfunc=ps.where,
+            rfunc=gs.where,
+            lfunc_args_and_kwargs=([ps_condition], {"other": ps_other}),
+            rfunc_args_and_kwargs=([gs_condition], {"other": gs_other}),
+        )
diff --git a/python/cudf/cudf/tests/indexes/index/test_attributes.py b/python/cudf/cudf/tests/indexes/index/test_attributes.py
index ee4a1654a10..3ccf155875c 100644
--- a/python/cudf/cudf/tests/indexes/index/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/index/test_attributes.py
@@ -1,11 +1,13 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 import datetime
+import re
 
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
@@ -67,3 +69,151 @@ def test_index_is_unique_monotonic(testlist):
     assert index.is_unique == index_pd.is_unique
     assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
     assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
+
+
+def test_name():
+    idx = cudf.Index(np.asarray([4, 5, 6, 10]), name="foo")
+    assert idx.name == "foo"
+
+
+def test_index_names():
+    idx = cudf.Index([1, 2, 3], name="idx")
+    assert idx.names == ("idx",)
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
+def test_index_empty(data, all_supported_types_as_str):
+    pdi = pd.Index(data, dtype=all_supported_types_as_str)
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str)
+
+    assert pdi.empty == gdi.empty
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
+def test_index_size(data, all_supported_types_as_str):
+    pdi = pd.Index(data, dtype=all_supported_types_as_str)
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str)
+
+    assert pdi.size == gdi.size
+
+
+@pytest.mark.parametrize("data", [[], [1]])
+def test_index_iter_error(data, all_supported_types_as_str):
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str)
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"{gdi.__class__.__name__} object is not iterable. "
+            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
+            f"if you wish to iterate over the values."
+        ),
+    ):
+        iter(gdi)
+
+
+@pytest.mark.parametrize("data", [[], [1]])
+def test_index_values_host(data, all_supported_types_as_str, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            len(data) > 0
+            and all_supported_types_as_str
+            in {"timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"},
+            reason=f"wrong result for {all_supported_types_as_str}",
+        )
+    )
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str)
+    pdi = pd.Index(data, dtype=all_supported_types_as_str)
+
+    np.testing.assert_array_equal(gdi.values_host, pdi.values)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3],
+        ["a", "v", "d"],
+        [234.243, 2432.3, None],
+        [True, False, True],
+        pd.Series(["a", " ", "v"], dtype="category"),
+        pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        "is_numeric",
+        "is_boolean",
+        "is_integer",
+        "is_floating",
+        "is_object",
+        "is_categorical",
+        "is_interval",
+    ],
+)
+def test_index_type_methods(data, func):
+    pidx = pd.Index(data)
+    gidx = cudf.from_pandas(pidx)
+
+    with pytest.warns(FutureWarning):
+        expected = getattr(pidx, func)()
+    with pytest.warns(FutureWarning):
+        actual = getattr(gidx, func)()
+
+    if gidx.dtype == np.dtype("bool") and func == "is_object":
+        assert_eq(False, actual)
+    else:
+        assert_eq(expected, actual)
+
+
+def test_index_values():
+    gidx = cudf.Index([1, 2, 3])
+    pidx = gidx.to_pandas()
+
+    assert_eq(pidx.values, gidx.values)
+
+
+def test_index_null_values():
+    gidx = cudf.Index([1.0, None, 3, 0, None])
+    with pytest.raises(ValueError):
+        gidx.values
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3],
+        pytest.param(
+            [np.nan, 10, 15, 16],
+            marks=pytest.mark.xfail(
+                reason="https://github.com/pandas-dev/pandas/issues/49818"
+            ),
+        ),
+        range(0, 10),
+        [np.nan, None, 10, 20],
+        ["ab", "zx", "pq"],
+        ["ab", "zx", None, "pq"],
+    ],
+)
+def test_index_hasnans(data):
+    gs = cudf.Index(data, nan_as_null=False)
+    if isinstance(gs, cudf.RangeIndex):
+        with pytest.raises(NotImplementedError):
+            gs.to_pandas(nullable=True)
+    else:
+        ps = gs.to_pandas(nullable=True)
+        # Check type to avoid mixing Python bool and NumPy bool
+        assert isinstance(gs.hasnans, bool)
+        assert gs.hasnans == ps.hasnans
+
+
+@pytest.mark.parametrize("data", [[0, 1, 2], [1.1, 2.3, 4.5]])
+@pytest.mark.parametrize("needle", [0, 1, 2.3])
+def test_index_contains_float_int(data, numeric_types_as_str, needle):
+    gidx = cudf.Index(data=data, dtype=numeric_types_as_str)
+    pidx = gidx.to_pandas()
+
+    actual = needle in gidx
+    expected = needle in pidx
+
+    assert_eq(actual, expected)
diff --git a/python/cudf/cudf/tests/indexes/index/test_constructor.py b/python/cudf/cudf/tests/indexes/index/test_constructor.py
index a46c4fec49e..e23969c7cb7 100644
--- a/python/cudf/cudf/tests/indexes/index/test_constructor.py
+++ b/python/cudf/cudf/tests/indexes/index/test_constructor.py
@@ -1,8 +1,11 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import re
+
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -49,6 +52,277 @@ def test_infer_timedelta_index(data, timedelta_types_as_str):
     assert_eq(pdi, gdi)
 
 
+def test_pandas_as_index():
+    # Define Pandas Indexes
+    pdf_int_index = pd.Index([1, 2, 3, 4, 5])
+    pdf_uint_index = pd.Index([1, 2, 3, 4, 5])
+    pdf_float_index = pd.Index([1.0, 2.0, 3.0, 4.0, 5.0])
+    pdf_datetime_index = pd.DatetimeIndex(
+        [1000000, 2000000, 3000000, 4000000, 5000000]
+    )
+    pdf_category_index = pd.CategoricalIndex(["a", "b", "c", "b", "a"])
+
+    # Define cudf Indexes
+    gdf_int_index = cudf.Index(pdf_int_index)
+    gdf_uint_index = cudf.Index(pdf_uint_index)
+    gdf_float_index = cudf.Index(pdf_float_index)
+    gdf_datetime_index = cudf.Index(pdf_datetime_index)
+    gdf_category_index = cudf.Index(pdf_category_index)
+
+    # Check instance types
+    assert isinstance(gdf_int_index, cudf.Index)
+    assert isinstance(gdf_uint_index, cudf.Index)
+    assert isinstance(gdf_float_index, cudf.Index)
+    assert isinstance(gdf_datetime_index, cudf.DatetimeIndex)
+    assert isinstance(gdf_category_index, cudf.CategoricalIndex)
+
+    # Check equality
+    assert_eq(pdf_int_index, gdf_int_index)
+    assert_eq(pdf_uint_index, gdf_uint_index)
+    assert_eq(pdf_float_index, gdf_float_index)
+    assert_eq(pdf_datetime_index, gdf_datetime_index)
+    assert_eq(pdf_category_index, gdf_category_index)
+
+    assert_eq(
+        pdf_category_index.codes,
+        gdf_category_index.codes.astype(
+            pdf_category_index.codes.dtype
+        ).to_numpy(),
+    )
+
+
+def test_from_pandas_str():
+    idx = ["a", "b", "c"]
+    pidx = pd.Index(idx, name="idx")
+    gidx_1 = cudf.Index(idx, name="idx")
+    gidx_2 = cudf.from_pandas(pidx)
+
+    assert_eq(gidx_1, gidx_2)
+
+
+def test_from_pandas_gen():
+    idx = [2, 4, 6]
+    pidx = pd.Index(idx, name="idx")
+    gidx_1 = cudf.Index(idx, name="idx")
+    gidx_2 = cudf.from_pandas(pidx)
+
+    assert_eq(gidx_1, gidx_2)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        range(0),
+        range(1),
+        range(0, 1),
+        range(0, 5),
+        range(1, 10),
+        range(1, 10, 1),
+        range(1, 10, 3),
+        range(10, 1, -3),
+        range(-5, 10),
+    ],
+)
+def test_range_index_from_range(data):
+    assert_eq(pd.Index(data), cudf.Index(data))
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
+@pytest.mark.parametrize("name", [1, "a", None])
+def test_index_basic(data, all_supported_types_as_str, name, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            len(data) > 0
+            and all_supported_types_as_str
+            in {"timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"},
+            reason=f"wrong result for {all_supported_types_as_str}",
+        )
+    )
+    pdi = pd.Index(data, dtype=all_supported_types_as_str, name=name)
+    gdi = cudf.Index(data, dtype=all_supported_types_as_str, name=name)
+
+    assert_eq(pdi, gdi)
+
+
+@pytest.mark.parametrize(
+    "data,nan_idx,NA_idx",
+    [([1, 2, 3, None], None, 3), ([2, 3, np.nan, None], 2, 3)],
+)
+def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):
+    idx = cudf.Index(data, nan_as_null=nan_as_null)
+
+    if nan_as_null is not False:
+        if nan_idx is not None:
+            assert idx[nan_idx] is cudf.NA
+    else:
+        if nan_idx is not None:
+            assert np.isnan(idx[nan_idx])
+
+    if NA_idx is not None:
+        assert idx[NA_idx] is cudf.NA
+
+
+def test_index_constructor_integer(default_integer_bitwidth):
+    got = cudf.Index([1, 2, 3])
+    expect = cudf.Index([1, 2, 3], dtype=f"int{default_integer_bitwidth}")
+
+    assert_eq(expect, got)
+
+
+def test_index_constructor_float(default_float_bitwidth):
+    got = cudf.Index([1.0, 2.0, 3.0])
+    expect = cudf.Index(
+        [1.0, 2.0, 3.0], dtype=f"float{default_float_bitwidth}"
+    )
+
+    assert_eq(expect, got)
+
+
+def test_index_error_list_index():
+    s = cudf.Series([[1, 2], [2], [4]])
+    with pytest.raises(
+        NotImplementedError,
+        match=re.escape(
+            "Unsupported column type passed to create an "
+            "Index: <class 'cudf.core.column.lists.ListColumn'>"
+        ),
+    ):
+        cudf.Index(s)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            pd.Timestamp("1970-01-01 00:00:00.000000001"),
+            pd.Timestamp("1970-01-01 00:00:00.000000002"),
+            12,
+            20,
+        ],
+        [
+            pd.Timedelta(10),
+            pd.Timedelta(20),
+            12,
+            20,
+        ],
+        [1, 2, 3, 4],
+    ],
+)
+def test_index_mixed_dtype_error(data):
+    pi = pd.Index(data, dtype="object")
+    with pytest.raises(TypeError):
+        cudf.Index(pi)
+
+
+@pytest.mark.parametrize("cls", [pd.DatetimeIndex, pd.TimedeltaIndex])
+def test_index_date_duration_freq_error(cls):
+    s = cls([1, 2, 3], freq="infer")
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.Index(s)
+
+
+def test_index_empty_from_pandas(all_supported_types_as_str):
+    pidx = pd.Index([], dtype=all_supported_types_as_str)
+    gidx = cudf.from_pandas(pidx)
+
+    assert_eq(pidx, gidx)
+
+
+def test_empty_index_init():
+    pidx = pd.Index([])
+    gidx = cudf.Index([])
+
+    assert_eq(pidx, gidx)
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3], range(0, 10)])
+def test_index_with_index_dtype(request, data, all_supported_types_as_str):
+    request.applymarker(
+        pytest.mark.xfail(
+            isinstance(data, list)
+            and all_supported_types_as_str
+            in {"timedelta64[us]", "timedelta64[ms]", "timedelta64[s]"},
+            reason=f"wrong result for {all_supported_types_as_str}",
+        )
+    )
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str == "category",
+            raises=AttributeError,
+            reason=f"cuDF bug in Column.astype with {all_supported_types_as_str}",
+        )
+    )
+    pidx = pd.Index(data)
+    gidx = cudf.Index(data)
+
+    expected = pd.Index(pidx, dtype=all_supported_types_as_str)
+    actual = cudf.Index(gidx, dtype=all_supported_types_as_str)
+
+    assert_eq(expected, actual)
+
+
+def test_period_index_error():
+    pidx = pd.PeriodIndex(data=[pd.Period("2020-01")])
+    with pytest.raises(NotImplementedError):
+        cudf.from_pandas(pidx)
+    with pytest.raises(NotImplementedError):
+        cudf.Index(pidx)
+    with pytest.raises(NotImplementedError):
+        cudf.Series(pidx)
+    with pytest.raises(NotImplementedError):
+        cudf.Series(pd.Series(pidx))
+    with pytest.raises(NotImplementedError):
+        cudf.Series(pd.array(pidx))
+
+
+@pytest.mark.parametrize("value", [cudf.DataFrame(range(1)), 11])
+def test_index_from_dataframe_scalar_raises(value):
+    with pytest.raises(TypeError):
+        cudf.Index(value)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        cp.ones(5, dtype=cp.float16),
+        np.ones(5, dtype="float16"),
+        pd.Series([0.1, 1.2, 3.3], dtype="float16"),
+        pytest.param(
+            pa.array(np.ones(5, dtype="float16")),
+            marks=pytest.mark.xfail(
+                reason="https://issues.apache.org/jira/browse/ARROW-13762"
+            ),
+        ),
+    ],
+)
+def test_index_raises_float16(data):
+    with pytest.raises(TypeError):
+        cudf.Index(data)
+
+
+def test_from_pandas_rangeindex_return_rangeindex():
+    pidx = pd.RangeIndex(start=3, stop=9, step=3, name="a")
+    result = cudf.Index.from_pandas(pidx)
+    expected = cudf.RangeIndex(start=3, stop=9, step=3, name="a")
+    assert_eq(result, expected, exact=True)
+
+
+def test_Index_init_with_nans():
+    with cudf.option_context("mode.pandas_compatible", True):
+        gi = cudf.Index([1, 2, 3, np.nan])
+    assert gi.dtype == np.dtype("float64")
+    pi = pd.Index([1, 2, 3, np.nan])
+    assert_eq(pi, gi)
+
+
+def test_roundtrip_index_plc_column():
+    index = cudf.Index([1])
+    expect = cudf.Index(index)
+    actual = cudf.Index.from_pylibcudf(*expect.to_pylibcudf())
+    assert_eq(expect, actual)
+
+
 def test_categorical_index_with_dtype():
     dtype = cudf.CategoricalDtype(categories=["a", "z", "c"])
     gi = cudf.Index(["z", "c", "a"], dtype=dtype)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/indexing/test_getitem.py b/python/cudf/cudf/tests/indexes/multiindex/indexing/test_getitem.py
new file mode 100644
index 00000000000..6605aa0ab94
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/indexing/test_getitem.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_getitem():
+    pidx = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pidx.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gidx = cudf.from_pandas(pidx)
+    assert_eq(pidx[0], gidx[0])
+
+
+@pytest.mark.parametrize(
+    "key",
+    [0, 1, [], [0, 1], slice(None), slice(0, 0), slice(0, 1), slice(0, 2)],
+)
+def test_multiindex_indexing(key):
+    gi = cudf.MultiIndex.from_frame(
+        cudf.DataFrame({"a": [1, 2, 3], "b": [True, False, False]})
+    )
+    pi = gi.to_pandas()
+
+    assert_eq(gi[key], pi[key], exact=False)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_append.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_append.py
new file mode 100644
index 00000000000..ecf2c9fb97e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_append.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.MultiIndex.from_arrays(
+            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
+            names=("number", "color"),
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 4], ["yellow", "violet", "pink", "white"]],
+            names=("number1", "color2"),
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        pd.MultiIndex.from_arrays(
+            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
+            names=("number", "color"),
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 4], ["yellow", "violet", "pink", "white"]],
+            names=("number1", "color2"),
+        ),
+        pd.MultiIndex.from_arrays(
+            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
+        ),
+    ],
+)
+def test_multiindex_append(data, other):
+    pdi = data
+    other_pd = other
+
+    gdi = cudf.from_pandas(data)
+    other_gd = cudf.from_pandas(other)
+
+    expected = pdi.append(other_pd)
+    actual = gdi.append(other_gd)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_get_indexer.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_get_indexer.py
new file mode 100644
index 00000000000..9ea8b7555e0
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_get_indexer.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)],
+        [(1, 1, 1), (1, 1, 2), (1, 1, 24), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
+    ],
+)
+@pytest.mark.parametrize("key", [[(1, 2, 3)], [(9, 9, 9)]])
+@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
+def test_get_indexer_multi_numeric(data, key, method):
+    idx = pd.MultiIndex.from_tuples(data)
+    pi = idx.sort_values()
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.get_indexer(key, method=method)
+    got = gi.get_indexer(key, method=method)
+
+    assert_eq(expected, got)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = gi.get_indexer(key, method=method)
+
+    assert_eq(expected, got, check_dtype=True)
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        ((1, 2, 3),),
+        ((2, 1, 1),),
+        ((9, 9, 9),),
+    ],
+)
+@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
+def test_get_indexer_multi_numeric_deviate(key, method):
+    pi = pd.MultiIndex.from_tuples(
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 10), (1, 1, 1), (2, 2, 1)]
+    ).sort_values()
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.get_indexer(key, method=method)
+    got = gi.get_indexer(key, method=method)
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("method", ["ffill", "bfill"])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_get_indexer_multi_error(method):
+    pi = pd.MultiIndex.from_tuples(
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 10), (1, 1, 1), (2, 2, 1)]
+    )
+    gi = cudf.from_pandas(pi)
+
+    assert_exceptions_equal(
+        pi.get_indexer,
+        gi.get_indexer,
+        lfunc_args_and_kwargs=(
+            [],
+            {"target": ((1, 2, 3),), "method": method},
+        ),
+        rfunc_args_and_kwargs=(
+            [],
+            {"target": ((1, 2, 3),), "method": method},
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            ("a", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "b"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "a"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+    ],
+)
+@pytest.mark.parametrize(
+    "key", [[("a", "b", "c"), ("b", "c", "a")], [("z", "z", "z")]]
+)
+@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
+def test_get_indexer_multi_string(data, key, method):
+    idx = pd.MultiIndex.from_tuples(data)
+    pi = idx.sort_values()
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.get_indexer(key, method=method)
+    got = gi.get_indexer(key, method=method)
+
+    assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_get_loc.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_get_loc.py
new file mode 100644
index 00000000000..4048a0f9c60
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_get_loc.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)],
+        [(1, 1, 1), (1, 1, 2), (1, 1, 2), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
+    ],
+)
+@pytest.mark.parametrize("key", [1, (1, 2), (1, 2, 3), (2, 1, 1), (9, 9, 9)])
+def test_get_loc_multi_numeric(data, key):
+    idx = pd.MultiIndex.from_tuples(data)
+    pi = idx.sort_values()
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key}),
+            rfunc_args_and_kwargs=([], {"key": key}),
+        )
+    else:
+        expected = pi.get_loc(key)
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "key, result",
+    [
+        (1, slice(1, 5, 1)),  # deviates
+        ((1, 2), slice(1, 3, 1)),
+        ((1, 2, 3), slice(1, 2, None)),
+        ((2, 1, 1), slice(0, 1, None)),
+        ((9, 9, 9), None),
+    ],
+)
+def test_get_loc_multi_numeric_deviate(key, result):
+    pi = pd.MultiIndex.from_tuples(
+        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 1), (1, 1, 1), (2, 2, 1)]
+    )
+    gi = cudf.from_pandas(pi)
+
+    with expect_warning_if(
+        isinstance(key, tuple), pd.errors.PerformanceWarning
+    ):
+        key_flag = key not in pi
+
+    if key_flag:
+        with expect_warning_if(
+            isinstance(key, tuple), pd.errors.PerformanceWarning
+        ):
+            assert_exceptions_equal(
+                lfunc=pi.get_loc,
+                rfunc=gi.get_loc,
+                lfunc_args_and_kwargs=([], {"key": key}),
+                rfunc_args_and_kwargs=([], {"key": key}),
+            )
+    else:
+        expected = result
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            ("a", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "b"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "a"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "a"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "a"),
+            ("a", "a", "b"),
+            ("a", "a", "b"),
+            ("a", "b", "c"),
+            ("b", "a", "a"),
+            ("b", "c", "a"),
+        ],
+        [
+            ("a", "a", "b"),
+            ("b", "a", "a"),
+            ("b", "a", "a"),
+            ("a", "a", "a"),
+            ("a", "b", "a"),
+            ("b", "c", "a"),
+        ],
+    ],
+)
+@pytest.mark.parametrize(
+    "key", ["a", ("a", "a"), ("a", "b", "c"), ("b", "c", "a"), ("z", "z", "z")]
+)
+def test_get_loc_multi_string(data, key):
+    idx = pd.MultiIndex.from_tuples(data)
+    pi = idx.sort_values()
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key}),
+            rfunc_args_and_kwargs=([], {"key": key}),
+        )
+    else:
+        expected = pi.get_loc(key)
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_isin.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_isin.py
new file mode 100644
index 00000000000..0cce814a2a0
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_isin.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3], ["red", "blue", "green"]], names=("number", "color")
+        ),
+        pd.MultiIndex.from_arrays([[], []], names=("number", "color")),
+        pd.MultiIndex.from_arrays(
+            [[1, 2, 3, 10, 100], ["red", "blue", "green", "pink", "white"]],
+            names=("number", "color"),
+        ),
+        pd.MultiIndex.from_product(
+            [[0, 1], ["red", "blue", "green"]], names=("number", "color")
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "values,level,err",
+    [
+        ([(1, "red"), (2, "blue"), (0, "green")], None, None),
+        (["red", "orange", "yellow"], "color", None),
+        (["red", "white", "yellow"], "color", None),
+        ([0, 1, 2, 10, 11, 15], "number", None),
+        ([0, 1, 2, 10, 11, 15], None, TypeError),
+        (pd.Series([0, 1, 2, 10, 11, 15]), None, TypeError),
+        (pd.Index([0, 1, 2, 10, 11, 15]), None, TypeError),
+        (pd.Index([0, 1, 2, 8, 11, 15]), "number", None),
+        (pd.Index(["red", "white", "yellow"]), "color", None),
+        ([(1, "red"), (3, "red")], None, None),
+        (((1, "red"), (3, "red")), None, None),
+        (
+            pd.MultiIndex.from_arrays(
+                [[1, 2, 3], ["red", "blue", "green"]],
+                names=("number", "color"),
+            ),
+            None,
+            None,
+        ),
+        (
+            pd.MultiIndex.from_arrays([[], []], names=("number", "color")),
+            None,
+            None,
+        ),
+        (
+            pd.MultiIndex.from_arrays(
+                [
+                    [1, 2, 3, 10, 100],
+                    ["red", "blue", "green", "pink", "white"],
+                ],
+                names=("number", "color"),
+            ),
+            None,
+            None,
+        ),
+    ],
+)
+def test_isin_multiindex(data, values, level, err):
+    pmdx = data
+    gmdx = cudf.from_pandas(data)
+
+    if err is None:
+        expected = pmdx.isin(values, level=level)
+        if isinstance(values, pd.MultiIndex):
+            values = cudf.from_pandas(values)
+        got = gmdx.isin(values, level=level)
+
+        assert_eq(got, expected)
+    else:
+        assert_exceptions_equal(
+            lfunc=pmdx.isin,
+            rfunc=gmdx.isin,
+            lfunc_args_and_kwargs=([values], {"level": level}),
+            rfunc_args_and_kwargs=([values], {"level": level}),
+            check_exception_type=False,
+        )
diff --git a/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_arrow.py b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_arrow.py
new file mode 100644
index 00000000000..e4151a6083f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/multiindex/methods/test_to_arrow.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_multiindex_to_arrow():
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 2, 1, 2, 3],
+            "b": [1.0, 2.0, 3.0, 4.0, 5.0],
+            "c": np.array([1, 2, 3, None, 5], dtype="datetime64[s]"),
+            "d": ["a", "b", "c", "d", "e"],
+        }
+    )
+    pdf["a"] = pdf["a"].astype("category")
+    df = cudf.from_pandas(pdf)
+    gdi = cudf.MultiIndex.from_frame(df)
+
+    expected = pa.Table.from_pandas(pdf)
+    got = gdi.to_arrow()
+
+    assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py b/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
index 2afb5c4f179..131024c521f 100644
--- a/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
+++ b/python/cudf/cudf/tests/indexes/multiindex/test_constructors.py
@@ -4,6 +4,7 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -181,3 +182,20 @@ def test_multiindex_duplicate_names():
     )
 
     assert_eq(gi, pi)
+
+
+def test_multiindex_from_arrow():
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 2, 1, 2, 3],
+            "b": [1.0, 2.0, 3.0, 4.0, 5.0],
+            "c": np.array([1, 2, 3, None, 5], dtype="datetime64[s]"),
+            "d": ["a", "b", "c", "d", "e"],
+        }
+    )
+    pdf["a"] = pdf["a"].astype("category")
+    ptb = pa.Table.from_pandas(pdf)
+    gdi = cudf.MultiIndex.from_arrow(ptb)
+    pdi = pd.MultiIndex.from_frame(pdf)
+
+    assert_eq(pdi, gdi)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/indexing/__init__.py b/python/cudf/cudf/tests/indexes/rangeindex/indexing/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/indexing/test_getitem.py b/python/cudf/cudf/tests/indexes/rangeindex/indexing/test_getitem.py
new file mode 100644
index 00000000000..d33a0c57f43
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/indexing/test_getitem.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_slice_attr_name():
+    start, stop = 0, 10
+    rg = cudf.RangeIndex(start, stop, name="myindex")
+    sliced_rg = rg[0:9]
+    assert rg.name == sliced_rg.name
+
+
+@pytest.mark.parametrize(
+    "start,stop,step",
+    [(1, 10, 1), (1, 10, 3), (10, -17, -1), (10, -17, -3)],
+)
+def test_index_rangeindex_get_item_basic(start, stop, step):
+    pridx = pd.RangeIndex(start, stop, step)
+    gridx = cudf.RangeIndex(start, stop, step)
+
+    for i in range(-len(pridx), len(pridx)):
+        assert pridx[i] == gridx[i]
+
+
+@pytest.mark.parametrize("start,stop,step", [(1, 10, 3), (10, 1, -3)])
+def test_index_rangeindex_get_item_out_of_bounds(start, stop, step):
+    gridx = cudf.RangeIndex(start, stop, step)
+    with pytest.raises(IndexError):
+        gridx[4]
+
+
+@pytest.mark.parametrize("start,stop,step", [(10, 1, 1), (-17, 10, -3)])
+def test_index_rangeindex_get_item_null_range(start, stop, step):
+    gridx = cudf.RangeIndex(start, stop, step)
+
+    with pytest.raises(IndexError):
+        gridx[0]
+
+
+@pytest.mark.parametrize(
+    "start,stop,step",
+    [(-17, 21, 2), (21, -17, -3), (0, 0, 1), (0, 1, -3), (10, 0, 5)],
+)
+@pytest.mark.parametrize(
+    "sl",
+    [
+        slice(1, 7, 1),
+        slice(1, 7, 2),
+        slice(-1, 7, 1),
+        slice(-1, 7, 2),
+        slice(-3, 7, 2),
+        slice(7, 1, -2),
+        slice(7, -3, -2),
+        slice(None, None, 1),
+        slice(0, None, 2),
+        slice(0, None, 3),
+        slice(0, 0, 3),
+    ],
+)
+def test_index_rangeindex_get_item_slices(start, stop, step, sl):
+    pridx = pd.RangeIndex(start, stop, step)
+    gridx = cudf.RangeIndex(start, stop, step)
+
+    assert_eq(pridx[sl], gridx[sl])
+
+
+def test_rangeindex_apply_boolean_mask_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for apply boolean mask operation.
+    idx = cudf.RangeIndex(0, 8)
+    mask = [True, True, True, False, False, False, True, False]
+    actual = idx[mask]
+    expected = cudf.Index([0, 1, 2, 6], dtype=f"int{default_integer_bitwidth}")
+    assert_eq(expected, actual)
+
+
+def test_df_slice_empty_index():
+    idx = cudf.RangeIndex(0)
+    assert isinstance(idx[:1], cudf.RangeIndex)
+    with pytest.raises(IndexError):
+        idx[1]
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_any_all.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_any_all.py
new file mode 100644
index 00000000000..4d5dbf72543
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_any_all.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+
+
+@pytest.mark.parametrize("data", [range(-3, 3), range(1, 3), range(0)])
+def test_rangeindex_all(data):
+    result = cudf.RangeIndex(data).all()
+    expected = cudf.Index(list(data)).all()
+    assert result == expected
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_append.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_append.py
new file mode 100644
index 00000000000..d793151d87f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_append.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_append_return_rangeindex():
+    idx = cudf.RangeIndex(0, 10)
+    result = idx.append([])
+    assert_eq(idx, result)
+
+    result = idx.append(cudf.Index([10]))
+    expected = cudf.RangeIndex(0, 11)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_dropna.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_dropna.py
new file mode 100644
index 00000000000..6f6f61a771d
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_dropna.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_dropna():
+    ri = cudf.RangeIndex(range(2))
+    result = ri.dropna()
+    expected = ri.copy()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_factorize.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_factorize.py
new file mode 100644
index 00000000000..d22ddf3c58a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_factorize.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("data", [range(2), range(2, -1, -1)])
+def test_rangeindex_factorize(sort, data):
+    res_codes, res_uniques = cudf.RangeIndex(data).factorize(sort=sort)
+    exp_codes, exp_uniques = cudf.Index(list(data)).factorize(sort=sort)
+    assert_eq(res_codes, exp_codes)
+    assert_eq(res_uniques, exp_uniques)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_find_label_range.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_find_label_range.py
new file mode 100644
index 00000000000..f6666d4213e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_find_label_range.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import cudf
+
+
+def test_index_find_label_range_rangeindex():
+    """Cudf specific"""
+    # step > 0
+    # 3, 8, 13, 18
+    ridx = cudf.RangeIndex(3, 20, 5)
+    assert ridx.find_label_range(slice(3, 8)) == slice(0, 2, 1)
+    assert ridx.find_label_range(slice(0, 7)) == slice(0, 1, 1)
+    assert ridx.find_label_range(slice(3, 19)) == slice(0, 4, 1)
+    assert ridx.find_label_range(slice(2, 21)) == slice(0, 4, 1)
+
+    # step < 0
+    # 20, 15, 10, 5
+    ridx = cudf.RangeIndex(20, 3, -5)
+    assert ridx.find_label_range(slice(15, 10)) == slice(1, 3, 1)
+    assert ridx.find_label_range(slice(10, 15, -1)) == slice(2, 0, -1)
+    assert ridx.find_label_range(slice(10, 0)) == slice(2, 4, 1)
+    assert ridx.find_label_range(slice(30, 13)) == slice(0, 2, 1)
+    assert ridx.find_label_range(slice(30, 0)) == slice(0, 4, 1)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_indexer.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_indexer.py
new file mode 100644
index 00000000000..b67aebac4cf
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_indexer.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "rng",
+    [
+        range(1, 20, 3),
+        range(20, 35, 3),
+        range(35, 77, 3),
+        range(77, 110, 3),
+    ],
+)
+@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
+@pytest.mark.parametrize("tolerance", [None, 0, 1, 13, 20])
+def test_get_indexer_rangeindex(rng, method, tolerance):
+    key = list(rng)
+    pi = pd.RangeIndex(3, 100, 4)
+    gi = cudf.from_pandas(pi)
+
+    expected = pi.get_indexer(
+        key, method=method, tolerance=None if method is None else tolerance
+    )
+    got = gi.get_indexer(
+        key, method=method, tolerance=None if method is None else tolerance
+    )
+
+    assert_eq(expected, got)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = gi.get_indexer(
+            key, method=method, tolerance=None if method is None else tolerance
+        )
+    assert_eq(expected, got, check_dtype=True)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_loc.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_loc.py
new file mode 100644
index 00000000000..d453f99566c
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_get_loc.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize("key", list(range(1, 110, 13)))
+def test_get_loc_rangeindex(key):
+    pi = pd.RangeIndex(3, 100, 4)
+    gi = cudf.from_pandas(pi)
+    if (
+        (key not in pi)
+        # Get key before the first element is KeyError
+        or (key < pi.start)
+        # Get key after the last element is KeyError
+        or (key >= pi.stop)
+    ):
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key}),
+            rfunc_args_and_kwargs=([], {"key": key}),
+        )
+    else:
+        expected = pi.get_loc(key)
+        got = gi.get_loc(key)
+
+        assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_intersection.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_intersection.py
new file mode 100644
index 00000000000..93ef401a69e
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_intersection.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_intersection_default_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for intersection operation.
+    idx1 = cudf.RangeIndex(0, 100)
+    # Intersecting two RangeIndex will _always_ result in a RangeIndex, use
+    # regular index here to force materializing.
+    idx2 = cudf.Index([50, 102])
+
+    expected = cudf.Index([50], dtype=f"int{default_integer_bitwidth}")
+    actual = idx1.intersection(idx2)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_join.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_join.py
new file mode 100644
index 00000000000..a471d86230f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_join.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_join_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for join.
+    idx1 = cudf.RangeIndex(0, 10, name="a")
+    idx2 = cudf.RangeIndex(5, 15, name="b")
+
+    actual = idx1.join(idx2, how="inner", sort=True)
+    expected = idx1.to_pandas().join(idx2.to_pandas(), how="inner", sort=True)
+    assert actual.dtype == cudf.dtype(f"int{default_integer_bitwidth}")
+    # exact=False to ignore dtype comparison,
+    # because `default_integer_bitwidth` is cudf only option
+    assert_eq(expected, actual, exact=False)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py
index 1683051c9ad..0f996ef7996 100644
--- a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_nunique.py
@@ -1,8 +1,41 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
+import pytest
 
 import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "rangeindex",
+    [
+        range(np.random.default_rng(seed=0).integers(0, 100)),
+        range(9, 12, 2),
+        range(20, 30),
+        range(100, 1000, 10),
+        range(0, 10, -2),
+        range(0, -10, 2),
+        range(0, -10, -2),
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    ["nunique", "min", "max", "any", "values"],
+)
+def test_rangeindex_methods(rangeindex, func):
+    gidx = cudf.RangeIndex(rangeindex)
+    pidx = gidx.to_pandas()
+
+    if func == "values":
+        expected = pidx.values
+        actual = gidx.values
+    else:
+        expected = getattr(pidx, func)()
+        actual = getattr(gidx, func)()
+
+    assert_eq(expected, actual)
 
 
 def test_nunique():
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py
new file mode 100644
index 00000000000..01841d11647
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_rename.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+
+
+def test_rename_shallow_copy():
+    idx = pd.Index([1])
+    result = idx.rename("a")
+    assert idx.to_numpy(copy=False) is result.to_numpy(copy=False)
+
+    idx = cudf.Index([1])
+    result = idx.rename("a")
+    assert idx._column is result._column
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_repeat.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_repeat.py
new file mode 100644
index 00000000000..ed2b838c65a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_repeat.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_repeat_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for repeat operation.
+    idx = cudf.RangeIndex(0, 3)
+    actual = idx.repeat(3)
+    expected = cudf.Index(
+        [0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=f"int{default_integer_bitwidth}"
+    )
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_searchsorted.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_searchsorted.py
new file mode 100644
index 00000000000..8a2e3eec796
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_searchsorted.py
@@ -0,0 +1,12 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+
+
+def test_index_rangeindex_searchsorted():
+    # step > 0
+    ridx = cudf.RangeIndex(-13, 17, 4)
+    for i in range(len(ridx)):
+        assert i == ridx.searchsorted(ridx[i], side="left")
+        assert i + 1 == ridx.searchsorted(ridx[i], side="right")
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_take.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_take.py
new file mode 100644
index 00000000000..7804f71a1ab
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_take.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_take_default_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for take operation.
+    idx = cudf.RangeIndex(0, 100)
+    actual = idx.take([0, 3, 7, 62])
+    expected = cudf.Index(
+        [0, 3, 7, 62], dtype=f"int{default_integer_bitwidth}"
+    )
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_union.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_union.py
new file mode 100644
index 00000000000..e38f2ca761f
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_union.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_union_default_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for union operation.
+    idx1 = cudf.RangeIndex(0, 2)
+    idx2 = cudf.RangeIndex(5, 6)
+
+    expected = cudf.Index([0, 1, 5], dtype=f"int{default_integer_bitwidth}")
+    actual = idx1.union(idx2)
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_unique.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_unique.py
new file mode 100644
index 00000000000..685e7f69ae6
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_unique.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_unique_shallow_copy():
+    ri_pandas = pd.RangeIndex(1)
+    result = ri_pandas.unique()
+    assert result is not ri_pandas
+
+    ri_cudf = cudf.RangeIndex(1)
+    result = ri_cudf.unique()
+    assert result is not ri_cudf
+    assert_eq(result, ri_cudf)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/methods/test_where.py b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_where.py
new file mode 100644
index 00000000000..3e960875cbc
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/methods/test_where.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cudf
+from cudf.testing import assert_eq
+
+
+def test_rangeindex_where_user_option(default_integer_bitwidth):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for where operation.
+    idx = cudf.RangeIndex(0, 10)
+    mask = [True, False, True, False, True, False, True, False, True, False]
+    actual = idx.where(mask, -1)
+    expected = cudf.Index(
+        [0, -1, 2, -1, 4, -1, 6, -1, 8, -1],
+        dtype=f"int{default_integer_bitwidth}",
+    )
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py b/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
index 60ee8b432e6..63ebdd7e472 100644
--- a/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_attributes.py
@@ -4,6 +4,8 @@
 import pytest
 
 import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 def test_rangeindex_contains():
@@ -22,3 +24,33 @@ def test_range_index_is_unique_monotonic(start, stop, step):
     assert index.is_unique == index_pd.is_unique
     assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
     assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
+
+
+@pytest.mark.parametrize("data", [range(2), [10, 11, 12]])
+def test_index_contains_hashable(data):
+    gidx = cudf.Index(data)
+    pidx = gidx.to_pandas()
+
+    assert_exceptions_equal(
+        lambda: [] in gidx,
+        lambda: [] in pidx,
+        lfunc_args_and_kwargs=((),),
+        rfunc_args_and_kwargs=((),),
+    )
+
+
+def test_bool_rangeindex_raises():
+    assert_exceptions_equal(
+        lfunc=bool,
+        rfunc=bool,
+        lfunc_args_and_kwargs=[[pd.RangeIndex(0)]],
+        rfunc_args_and_kwargs=[[cudf.RangeIndex(0)]],
+    )
+
+
+def test_from_pandas_rangeindex():
+    idx1 = pd.RangeIndex(start=0, stop=4, step=1, name="myindex")
+    idx2 = cudf.from_pandas(idx1)
+
+    assert_eq(idx1.values, idx2.values)
+    assert idx1.name == idx2.name
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py b/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py
new file mode 100644
index 00000000000..af39d17864a
--- /dev/null
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "op, expected, expected_kind",
+    [
+        (lambda idx: 2**idx, [2, 4, 8, 16], "int"),
+        (lambda idx: idx**2, [1, 4, 9, 16], "int"),
+        (lambda idx: idx / 2, [0.5, 1, 1.5, 2], "float"),
+        (lambda idx: 2 / idx, [2, 1, 2 / 3, 0.5], "float"),
+        (lambda idx: idx % 3, [1, 2, 0, 1], "int"),
+        (lambda idx: 3 % idx, [0, 1, 0, 3], "int"),
+    ],
+)
+def test_rangeindex_binops_user_option(
+    op, expected, expected_kind, default_integer_bitwidth
+):
+    # Test that RangeIndex is materialized into 32 bit index under user
+    # configuration for binary operation.
+    idx = cudf.RangeIndex(1, 5)
+    actual = op(idx)
+    expected = cudf.Index(
+        expected, dtype=f"{expected_kind}{default_integer_bitwidth}"
+    )
+    assert_eq(
+        expected,
+        actual,
+    )
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py b/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py
index 9bbe35ed33b..b54a781ff38 100644
--- a/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_constructors.py
@@ -1,18 +1,46 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
 import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
 
 
-def test_from_pandas_rangeindex():
-    idx1 = pd.RangeIndex(start=0, stop=4, step=1, name="myindex")
-    idx2 = cudf.from_pandas(idx1)
+def test_rangeindex_arg_validation():
+    with pytest.raises(TypeError):
+        cudf.RangeIndex("1")
 
-    # Check index
-    assert_eq(idx1.values, idx2.values)
-    assert idx1.name == idx2.name
+    with pytest.raises(TypeError):
+        cudf.RangeIndex(1, "2")
+
+    with pytest.raises(TypeError):
+        cudf.RangeIndex(1, 3, "1")
+
+    with pytest.raises(ValueError):
+        cudf.RangeIndex(1, dtype="float64")
+
+    with pytest.raises(ValueError):
+        cudf.RangeIndex(1, dtype="uint64")
+
+
+def test_rangeindex_name_not_hashable():
+    with pytest.raises(ValueError):
+        cudf.RangeIndex(range(2), name=["foo"])
+
+    with pytest.raises(ValueError):
+        cudf.RangeIndex(range(2)).copy(name=["foo"])
+
+
+@pytest.mark.parametrize("klass", [cudf.RangeIndex, pd.RangeIndex])
+@pytest.mark.parametrize("name_inner", [None, "a"])
+@pytest.mark.parametrize("name_outer", [None, "b"])
+def test_rangeindex_accepts_rangeindex(klass, name_inner, name_outer):
+    result = cudf.RangeIndex(klass(range(1), name=name_inner), name=name_outer)
+    expected = pd.RangeIndex(
+        pd.RangeIndex(range(1), name=name_inner), name=name_outer
+    )
+    assert_eq(result, expected)
 
 
 def test_from_pandas_rangeindex_step():
diff --git a/python/cudf/cudf/tests/input_output/test_pickling.py b/python/cudf/cudf/tests/input_output/test_pickling.py
index ed3483f296b..25450dab8a8 100644
--- a/python/cudf/cudf/tests/input_output/test_pickling.py
+++ b/python/cudf/cudf/tests/input_output/test_pickling.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
+import io
 import pickle
 
 import numpy as np
@@ -132,3 +133,32 @@ def test_pickle_string_column(slices):
     out = pickle.loads(pickled)
 
     assert_eq(Series._from_column(out), Series._from_column(input_col))
+
+
+@pytest.mark.parametrize(
+    "names",
+    [
+        ["a", "b", "c"],
+        [None, None, None],
+        ["aa", "aa", "aa"],
+        ["bb", "aa", "aa"],
+        None,
+    ],
+)
+def test_pickle_roundtrip_multiindex(names):
+    df = DataFrame(
+        {
+            "one": [1, 2, 3],
+            "two": [True, False, True],
+            "three": ["ab", "cd", "ef"],
+            "four": [0.2, 0.1, -10.2],
+        }
+    )
+    expected_df = df.set_index(["one", "two", "three"])
+    expected_df.index.names = names
+    local_file = io.BytesIO()
+
+    pickle.dump(expected_df, local_file)
+    local_file.seek(0)
+    actual_df = pickle.load(local_file)
+    assert_eq(expected_df, actual_df)
diff --git a/python/cudf/cudf/tests/series/methods/test_take.py b/python/cudf/cudf/tests/series/methods/test_take.py
new file mode 100644
index 00000000000..9bc3e88cb5a
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_take.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize("ntake", [0, 1, 123, 122, 200])
+def test_series_take(ntake):
+    rng = np.random.default_rng(seed=0)
+    nelem = 123
+
+    psr = pd.Series(rng.integers(0, 20, nelem))
+    gsr = cudf.Series(psr)
+
+    take_indices = rng.integers(0, len(gsr), ntake)
+
+    actual = gsr.take(take_indices)
+    expected = psr.take(take_indices)
+
+    assert_eq(actual, expected)
+
+
+def test_series_take_positional():
+    psr = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
+
+    gsr = cudf.Series.from_pandas(psr)
+
+    take_indices = [1, 2, 0, 3]
+
+    expect = psr.take(take_indices)
+    got = gsr.take(take_indices)
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index dbb193019b2..c28d65fe45e 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -4,35 +4,15 @@
 Test related to Index
 """
 
-import datetime
 import operator
-import re
 
-import cupy as cp
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import cudf
-from cudf.api.extensions import no_default
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_VERSION,
-)
-from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex
+from cudf.core.index import CategoricalIndex
 from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    ALL_TYPES,
-    NUMERIC_TYPES,
-    OTHER_TYPES,
-    SERIES_OR_INDEX_NAMES,
-    assert_column_memory_eq,
-    assert_column_memory_ne,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
 
 
 def test_df_set_index_from_series():
@@ -62,97 +42,6 @@ def test_df_set_index_from_name():
     assert list(sliced_strided.index.values) == [2, 4, 6]
 
 
-def test_df_slice_empty_index():
-    df = cudf.DataFrame()
-    assert isinstance(df.index, RangeIndex)
-    assert isinstance(df.index[:1], RangeIndex)
-    with pytest.raises(IndexError):
-        df.index[1]
-
-
-def test_index_find_label_range_genericindex():
-    # Monotonic Index
-    idx = cudf.Index(np.asarray([4, 5, 6, 10]))
-    assert idx.find_label_range(slice(4, 6)) == slice(0, 3, 1)
-    assert idx.find_label_range(slice(5, 10)) == slice(1, 4, 1)
-    assert idx.find_label_range(slice(0, 6)) == slice(0, 3, 1)
-    assert idx.find_label_range(slice(4, 11)) == slice(0, 4, 1)
-
-    # Non-monotonic Index
-    idx_nm = cudf.Index(np.asarray([5, 4, 6, 10]))
-    assert idx_nm.find_label_range(slice(4, 6)) == slice(1, 3, 1)
-    assert idx_nm.find_label_range(slice(5, 10)) == slice(0, 4, 1)
-    # Last value not found
-    with pytest.raises(KeyError) as raises:
-        idx_nm.find_label_range(slice(0, 6))
-    raises.match("not in index")
-    # Last value not found
-    with pytest.raises(KeyError) as raises:
-        idx_nm.find_label_range(slice(4, 11))
-    raises.match("not in index")
-
-
-def test_index_find_label_range_rangeindex():
-    """Cudf specific"""
-    # step > 0
-    # 3, 8, 13, 18
-    ridx = RangeIndex(3, 20, 5)
-    assert ridx.find_label_range(slice(3, 8)) == slice(0, 2, 1)
-    assert ridx.find_label_range(slice(0, 7)) == slice(0, 1, 1)
-    assert ridx.find_label_range(slice(3, 19)) == slice(0, 4, 1)
-    assert ridx.find_label_range(slice(2, 21)) == slice(0, 4, 1)
-
-    # step < 0
-    # 20, 15, 10, 5
-    ridx = RangeIndex(20, 3, -5)
-    assert ridx.find_label_range(slice(15, 10)) == slice(1, 3, 1)
-    assert ridx.find_label_range(slice(10, 15, -1)) == slice(2, 0, -1)
-    assert ridx.find_label_range(slice(10, 0)) == slice(2, 4, 1)
-    assert ridx.find_label_range(slice(30, 13)) == slice(0, 2, 1)
-    assert ridx.find_label_range(slice(30, 0)) == slice(0, 4, 1)
-
-
-def test_index_comparision():
-    start, stop = 10, 34
-    rg = cudf.RangeIndex(start, stop)
-    gi = cudf.Index(np.arange(start, stop))
-    assert rg.equals(gi)
-    assert gi.equals(rg)
-    assert not rg[:-1].equals(gi)
-    assert rg[:-1].equals(gi[:-1])
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x: x.min(),
-        lambda x: x.max(),
-        lambda x: x.any(),
-        lambda x: x.all(),
-    ],
-)
-def test_reductions(func):
-    x = np.asarray([4, 5, 6, 10])
-    idx = cudf.Index(np.asarray([4, 5, 6, 10]))
-
-    assert func(x) == func(idx)
-
-
-def test_name():
-    idx = cudf.Index(np.asarray([4, 5, 6, 10]), name="foo")
-    assert idx.name == "foo"
-
-
-def test_index_immutable():
-    start, stop = 10, 34
-    rg = RangeIndex(start, stop)
-    with pytest.raises(TypeError):
-        rg[1] = 5
-    gi = cudf.Index(np.arange(start, stop))
-    with pytest.raises(TypeError):
-        gi[1] = 5
-
-
 def test_categorical_index():
     pdf = pd.DataFrame()
     pdf["a"] = [1, 2, 3]
@@ -183,103 +72,6 @@ def test_categorical_index():
     )
 
 
-def test_pandas_as_index():
-    # Define Pandas Indexes
-    pdf_int_index = pd.Index([1, 2, 3, 4, 5])
-    pdf_uint_index = pd.Index([1, 2, 3, 4, 5])
-    pdf_float_index = pd.Index([1.0, 2.0, 3.0, 4.0, 5.0])
-    pdf_datetime_index = pd.DatetimeIndex(
-        [1000000, 2000000, 3000000, 4000000, 5000000]
-    )
-    pdf_category_index = pd.CategoricalIndex(["a", "b", "c", "b", "a"])
-
-    # Define cudf Indexes
-    gdf_int_index = Index(pdf_int_index)
-    gdf_uint_index = Index(pdf_uint_index)
-    gdf_float_index = Index(pdf_float_index)
-    gdf_datetime_index = Index(pdf_datetime_index)
-    gdf_category_index = Index(pdf_category_index)
-
-    # Check instance types
-    assert isinstance(gdf_int_index, Index)
-    assert isinstance(gdf_uint_index, Index)
-    assert isinstance(gdf_float_index, Index)
-    assert isinstance(gdf_datetime_index, DatetimeIndex)
-    assert isinstance(gdf_category_index, CategoricalIndex)
-
-    # Check equality
-    assert_eq(pdf_int_index, gdf_int_index)
-    assert_eq(pdf_uint_index, gdf_uint_index)
-    assert_eq(pdf_float_index, gdf_float_index)
-    assert_eq(pdf_datetime_index, gdf_datetime_index)
-    assert_eq(pdf_category_index, gdf_category_index)
-
-    assert_eq(
-        pdf_category_index.codes,
-        gdf_category_index.codes.astype(
-            pdf_category_index.codes.dtype
-        ).to_numpy(),
-    )
-
-
-@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
-@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
-def test_index_rename(initial_name, name):
-    pds = pd.Index([1, 2, 3], name=initial_name)
-    gds = Index(pds)
-
-    assert_eq(pds, gds)
-
-    expect = pds.rename(name)
-    got = gds.rename(name)
-
-    assert_eq(expect, got)
-    """
-    From here on testing recursive creation
-    and if name is being handles in recursive creation.
-    """
-    pds = pd.Index(expect)
-    gds = Index(got)
-
-    assert_eq(pds, gds)
-
-    pds = pd.Index(pds, name="abc")
-    gds = Index(gds, name="abc")
-    assert_eq(pds, gds)
-
-
-def test_index_rename_inplace():
-    pds = pd.Index([1, 2, 3], name="asdf")
-    gds = Index(pds)
-
-    # inplace=False should yield a shallow copy
-    gds_renamed_deep = gds.rename("new_name", inplace=False)
-
-    assert gds_renamed_deep._column.data_ptr == gds._column.data_ptr
-
-    # inplace=True returns none
-    expected_ptr = gds._column.data_ptr
-    gds.rename("new_name", inplace=True)
-
-    assert expected_ptr == gds._column.data_ptr
-
-
-def test_index_rename_preserves_arg():
-    idx1 = cudf.Index([1, 2, 3], name="orig_name")
-
-    # this should be an entirely new object
-    idx2 = idx1.rename("new_name", inplace=False)
-
-    assert idx2.name == "new_name"
-    assert idx1.name == "orig_name"
-
-    # a new object but referencing the same data
-    idx3 = Index(idx1, name="last_name")
-
-    assert idx3.name == "last_name"
-    assert idx1.name == "orig_name"
-
-
 def test_set_index_as_property():
     cdf = cudf.DataFrame()
     col1 = np.arange(10)
@@ -306,102 +98,6 @@ def test_set_index_as_property():
     assert_eq(head.index, idx[:5])
 
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        range(1, 5),
-        [1, 2, 3, 4],
-        pd.DatetimeIndex(["2001", "2002", "2003"]),
-        ["a", "b", "c"],
-        pd.CategoricalIndex(["a", "b", "c"]),
-    ],
-)
-@pytest.mark.parametrize("deep", [True, False])
-@pytest.mark.parametrize("copy_on_write", [True, False])
-def test_index_copy(data, deep, copy_on_write):
-    name = "x"
-    cidx = cudf.Index(data)
-    pidx = cidx.to_pandas()
-
-    pidx_copy = pidx.copy(name=name, deep=deep)
-    cidx_copy = cidx.copy(name=name, deep=deep)
-
-    assert_eq(pidx_copy, cidx_copy)
-
-    with cudf.option_context("copy_on_write", copy_on_write):
-        if not isinstance(cidx, cudf.RangeIndex):
-            if (
-                isinstance(cidx._column, cudf.core.column.StringColumn)
-                or not deep
-                or (copy_on_write and not deep)
-            ):
-                # StringColumn is immutable hence, deep copies of a
-                # Index with string dtype will share the same StringColumn.
-
-                # When `copy_on_write` is turned on, Index objects will
-                # have unique column object but they all point to same
-                # data pointers.
-                assert_column_memory_eq(cidx._column, cidx_copy._column)
-            else:
-                assert_column_memory_ne(cidx._column, cidx_copy._column)
-
-
-def test_index_isna_notna():
-    idx = [1, None, 3, None, 5]
-    pidx = pd.Index(idx, name="idx")
-    gidx = cudf.Index(idx, name="idx")
-    assert_eq(gidx.isna(), pidx.isna())
-    assert_eq(gidx.notna(), pidx.notna())
-
-
-def test_rangeindex_slice_attr_name():
-    start, stop = 0, 10
-    rg = RangeIndex(start, stop, name="myindex")
-    sliced_rg = rg[0:9]
-    assert_eq(rg.name, sliced_rg.name)
-
-
-def test_from_pandas_str():
-    idx = ["a", "b", "c"]
-    pidx = pd.Index(idx, name="idx")
-    gidx_1 = cudf.Index(idx, name="idx")
-    gidx_2 = cudf.from_pandas(pidx)
-
-    assert_eq(gidx_1, gidx_2)
-
-
-def test_from_pandas_gen():
-    idx = [2, 4, 6]
-    pidx = pd.Index(idx, name="idx")
-    gidx_1 = cudf.Index(idx, name="idx")
-    gidx_2 = cudf.from_pandas(pidx)
-
-    assert_eq(gidx_1, gidx_2)
-
-
-def test_index_names():
-    idx = Index([1, 2, 3], name="idx")
-    assert idx.names == ("idx",)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        range(0),
-        range(1),
-        range(0, 1),
-        range(0, 5),
-        range(1, 10),
-        range(1, 10, 1),
-        range(1, 10, 3),
-        range(10, 1, -3),
-        range(-5, 10),
-    ],
-)
-def test_range_index_from_range(data):
-    assert_eq(pd.Index(data), cudf.Index(data))
-
-
 @pytest.mark.parametrize(
     "n",
     [-10, -5, -2, 0, 1, 0, 2, 5, 10],
@@ -424,2764 +120,42 @@ def test_empty_df_head_tail_index(n):
 
 
 @pytest.mark.parametrize(
-    "data,condition,other,error",
-    [
-        (pd.Index(range(5)), pd.Index(range(5)) > 0, None, None),
-        (pd.Index([1, 2, 3]), pd.Index([1, 2, 3]) != 2, None, None),
-        (pd.Index(list("abc")), pd.Index(list("abc")) == "c", None, None),
-        (
-            pd.Index(list("abc")),
-            pd.Index(list("abc")) == "c",
-            pd.Index(list("xyz")),
-            None,
-        ),
-        (pd.Index(range(5)), pd.Index(range(4)) > 0, None, ValueError),
-        (
-            pd.Index(range(5)),
-            pd.Index(range(5)) > 1,
-            10,
-            None,
-        ),
-        (
-            pd.Index(np.arange(10)),
-            (pd.Index(np.arange(10)) % 3) == 0,
-            -pd.Index(np.arange(10)),
-            None,
-        ),
-        (
-            pd.Index([1, 2, np.nan]),
-            pd.Index([1, 2, np.nan]) == 4,
-            None,
-            None,
-        ),
-        (
-            pd.Index([1, 2, np.nan]),
-            pd.Index([1, 2, np.nan]) != 4,
-            None,
-            None,
-        ),
-        (
-            pd.Index([-2, 3, -4, -79]),
-            [True, True, True],
-            None,
-            ValueError,
-        ),
-        (
-            pd.Index([-2, 3, -4, -79]),
-            [True, True, True, False],
-            None,
-            None,
-        ),
-        (
-            pd.Index([-2, 3, -4, -79]),
-            [True, True, True, False],
-            17,
-            None,
-        ),
-        (pd.Index(list("abcdgh")), pd.Index(list("abcdgh")) != "g", "3", None),
-        (
-            pd.Index(list("abcdgh")),
-            pd.Index(list("abcdg")) != "g",
-            "3",
-            ValueError,
-        ),
-        (
-            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]),
-            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) != "a",
-            "a",
-            None,
-        ),
-        (
-            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]),
-            pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) != "a",
-            "b",
-            None,
-        ),
-        (
-            pd.MultiIndex.from_tuples(
-                list(
-                    zip(
-                        *[
-                            [
-                                "bar",
-                                "bar",
-                                "baz",
-                                "baz",
-                                "foo",
-                                "foo",
-                                "qux",
-                                "qux",
-                            ],
-                            [
-                                "one",
-                                "two",
-                                "one",
-                                "two",
-                                "one",
-                                "two",
-                                "one",
-                                "two",
-                            ],
-                        ],
-                        strict=True,
-                    )
-                )
-            ),
-            pd.MultiIndex.from_tuples(
-                list(
-                    zip(
-                        *[
-                            [
-                                "bar",
-                                "bar",
-                                "baz",
-                                "baz",
-                                "foo",
-                                "foo",
-                                "qux",
-                                "qux",
-                            ],
-                            [
-                                "one",
-                                "two",
-                                "one",
-                                "two",
-                                "one",
-                                "two",
-                                "one",
-                                "two",
-                            ],
-                        ],
-                        strict=True,
-                    )
-                )
-            )
-            != "a",
-            None,
-            NotImplementedError,
-        ),
-    ],
-)
-def test_index_where(data, condition, other, error):
-    ps = data
-    gs = cudf.from_pandas(data)
-
-    ps_condition = condition
-    if type(condition).__module__.split(".")[0] == "pandas":
-        gs_condition = cudf.from_pandas(condition)
-    else:
-        gs_condition = condition
-
-    ps_other = other
-    if type(other).__module__.split(".")[0] == "pandas":
-        gs_other = cudf.from_pandas(other)
-    else:
-        gs_other = other
-
-    if error is None:
-        if hasattr(ps, "dtype") and isinstance(ps.dtype, pd.CategoricalDtype):
-            expect = ps.where(ps_condition, other=ps_other)
-            got = gs.where(gs_condition, other=gs_other)
-            np.testing.assert_array_equal(
-                expect.codes,
-                got.codes.astype(expect.codes.dtype).fillna(-1).to_numpy(),
-            )
-            assert_eq(expect.categories, got.categories)
-        else:
-            assert_eq(
-                ps.where(ps_condition, other=ps_other),
-                gs.where(gs_condition, other=gs_other).to_pandas(),
-            )
-    else:
-        assert_exceptions_equal(
-            lfunc=ps.where,
-            rfunc=gs.where,
-            lfunc_args_and_kwargs=([ps_condition], {"other": ps_other}),
-            rfunc_args_and_kwargs=([gs_condition], {"other": gs_other}),
-        )
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + OTHER_TYPES)
-@pytest.mark.parametrize("copy", [True, False])
-def test_index_astype(dtype, copy):
-    pdi = pd.Index([1, 2, 3])
-    gdi = cudf.from_pandas(pdi)
-
-    actual = gdi.astype(dtype=dtype, copy=copy)
-    expected = pdi.astype(dtype=dtype, copy=copy)
-
-    assert_eq(expected, actual)
-    assert_eq(pdi, gdi)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 10, 2, 100, -10],
-        ["z", "x", "a", "c", "b"],
-        [-10.2, 100.1, -100.2, 0.0, 0.23],
-    ],
-)
-def test_index_argsort(data):
-    pdi = pd.Index(data)
-    gdi = cudf.from_pandas(pdi)
-
-    assert_eq(pdi.argsort(), gdi.argsort())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.Index([1, 10, 2, 100, -10], name="abc"),
-        pd.Index(["z", "x", "a", "c", "b"]),
-        pd.Index(["z", "x", "a", "c", "b"], dtype="category"),
-        pd.Index(
-            [-10.2, 100.1, -100.2, 0.0, 0.23], name="this is a float index"
-        ),
-        pd.Index([102, 1001, 1002, 0.0, 23], dtype="datetime64[ns]"),
-        pd.Index([13240.2, 1001, 100.2, 0.0, 23], dtype="datetime64[ns]"),
-        pd.RangeIndex(0, 10, 1),
-        pd.RangeIndex(0, -100, -2),
-        pd.Index([-10.2, 100.1, -100.2, 0.0, 23], dtype="timedelta64[ns]"),
-    ],
-)
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("return_indexer", [True, False])
-def test_index_sort_values(data, ascending, return_indexer):
-    pdi = data
-    gdi = cudf.from_pandas(pdi)
-
-    expected = pdi.sort_values(
-        ascending=ascending, return_indexer=return_indexer
-    )
-    actual = gdi.sort_values(
-        ascending=ascending, return_indexer=return_indexer
-    )
-
-    if return_indexer:
-        expected_indexer = expected[1]
-        actual_indexer = actual[1]
-
-        assert_eq(expected_indexer, actual_indexer)
-
-        expected = expected[0]
-        actual = actual[0]
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 10, 2, 100, -10],
-        ["z", "x", "a", "c", "b"],
-        [-10.2, 100.1, -100.2, 0.0, 0.23],
-    ],
-)
-def test_index_to_series(data):
-    pdi = pd.Index(data)
-    gdi = cudf.from_pandas(pdi)
-
-    assert_eq(pdi.to_series(), gdi.to_series())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [4, 5, 6, 10, 20, 30],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        ["5", "6", "2", "a", "b", "c"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        [1.0, 5.0, 6.0, 0.0, 1.3],
-        ["ab", "cd", "ef"],
-        pd.Series(["1", "2", "a", "3", None], dtype="category"),
-        range(0, 10),
-        [],
-        [1, 1, 2, 2],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [4, 5, 6, 10, 20, 30],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        ["5", "6", "2", "a", "b", "c"],
-        ["ab", "ef", None],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        [1.0, 5.0, 6.0, 0.0, 1.3],
-        range(2, 4),
-        pd.Series(["1", "a", "3", None], dtype="category"),
-        [],
-        [2],
-    ],
-)
-@pytest.mark.parametrize("sort", [None, False, True])
-@pytest.mark.parametrize(
-    "name_data,name_other",
-    [("abc", "c"), (None, "abc"), ("abc", pd.NA), ("abc", "abc")],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_index_difference(data, other, sort, name_data, name_other):
-    pd_data = pd.Index(data, name=name_data)
-    pd_other = pd.Index(other, name=name_other)
-    if (
-        not PANDAS_GE_220
-        and isinstance(pd_data.dtype, pd.CategoricalDtype)
-        and not isinstance(pd_other.dtype, pd.CategoricalDtype)
-        and pd_other.isnull().any()
-    ):
-        pytest.skip(reason="https://github.com/pandas-dev/pandas/issues/57318")
-
-    if (
-        not PANDAS_GE_220
-        and len(pd_other) == 0
-        and len(pd_data) != len(pd_data.unique())
-    ):
-        pytest.skip(reason="Bug fixed in pandas-2.2+")
-
-    gd_data = cudf.from_pandas(pd_data)
-    gd_other = cudf.from_pandas(pd_other)
-
-    expected = pd_data.difference(pd_other, sort=sort)
-    actual = gd_data.difference(gd_other, sort=sort)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("other", ["a", 1, None])
-def test_index_difference_invalid_inputs(other):
-    pdi = pd.Index([1, 2, 3])
-    gdi = cudf.Index([1, 2, 3])
-
-    assert_exceptions_equal(
-        pdi.difference,
-        gdi.difference,
-        ([other], {}),
-        ([other], {}),
-    )
-
-
-def test_index_difference_sort_error():
-    pdi = pd.Index([1, 2, 3])
-    gdi = cudf.Index([1, 2, 3])
-
-    assert_exceptions_equal(
-        pdi.difference,
-        gdi.difference,
-        ([pdi], {"sort": "A"}),
-        ([gdi], {"sort": "A"}),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        [],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-def test_index_equals(data, other):
-    pd_data = pd.Index(data)
-    pd_other = pd.Index(other)
-
-    gd_data = Index(data)
-    gd_other = Index(other)
-
-    expected = pd_data.equals(pd_other)
-    actual = gd_data.equals(gd_other)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-def test_index_categories_equal(data, other):
-    pd_data = pd.Index(data).astype("category")
-    pd_other = pd.Index(other)
-
-    gd_data = Index(data).astype("category")
-    gd_other = Index(other)
-
-    expected = pd_data.equals(pd_other)
-    actual = gd_data.equals(gd_other)
-    assert_eq(expected, actual)
-
-    expected = pd_other.equals(pd_data)
-    actual = gd_other.equals(gd_data)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
+    "objs",
     [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
+        [pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)],
+        [pd.RangeIndex(10, 20), pd.RangeIndex(22, 40), pd.RangeIndex(50, 60)],
+        [pd.RangeIndex(10, 20, 2), pd.RangeIndex(20, 40, 2)],
     ],
 )
-def test_index_equal_misc(data, other):
-    pd_data = pd.Index(data)
-    pd_other = other
-
-    gd_data = Index(data)
-    gd_other = other
-
-    expected = pd_data.equals(pd_other)
-    actual = gd_data.equals(gd_other)
-    assert_eq(expected, actual)
-
-    expected = pd_data.equals(np.array(pd_other))
-    actual = gd_data.equals(np.array(gd_other))
-    assert_eq(expected, actual)
+def test_range_index_concat(objs):
+    cudf_objs = [cudf.from_pandas(obj) for obj in objs]
 
-    expected = pd_data.equals(pd.Series(pd_other))
-    actual = gd_data.equals(cudf.Series(gd_other))
-    assert_eq(expected, actual)
+    actual = cudf.concat(cudf_objs)
 
-    expected = pd_data.astype("category").equals(pd_other)
-    actual = gd_data.astype("category").equals(gd_other)
+    expected = objs[0]
+    for obj in objs[1:]:
+        expected = expected.append(obj)
     assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        ["1", "2", "3", "4", "5", "6"],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        ["a"],
-        ["b", "c", "d"],
-        [1],
-        [2, 3, 4],
-        [],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Does not warn on older versions of pandas",
+    "op", [operator.add, operator.sub, operator.mul, operator.truediv]
 )
-def test_index_append(data, other):
-    pd_data = pd.Index(data)
-    pd_other = pd.Index(other)
-
-    gd_data = cudf.Index(data)
-    gd_other = cudf.Index(other)
-
-    if cudf.utils.dtypes.is_mixed_with_object_dtype(gd_data, gd_other):
-        gd_data = gd_data.astype("str")
-        gd_other = gd_other.astype("str")
-
-    with expect_warning_if(
-        (len(data) == 0 or len(other) == 0) and pd_data.dtype != pd_other.dtype
-    ):
-        expected = pd_data.append(pd_other)
-    with expect_warning_if(
-        (len(data) == 0 or len(other) == 0) and gd_data.dtype != gd_other.dtype
-    ):
-        actual = gd_data.append(gd_other)
-    if len(data) == 0 and len(other) == 0:
-        # Pandas default dtype to "object" for empty list
-        # cudf default dtype to "float" for empty list
-        assert_eq(expected, actual.astype("str"))
-    elif actual.dtype == "object":
-        assert_eq(expected.astype("str"), actual)
-    else:
-        assert_eq(expected, actual)
-
-
-def test_index_empty_append_name_conflict():
-    empty = cudf.Index([], name="foo")
-    non_empty = cudf.Index([1], name="bar")
-    expected = cudf.Index([1])
-
-    with pytest.warns(FutureWarning):
-        result = non_empty.append(empty)
-    assert_eq(result, expected)
-
-    with pytest.warns(FutureWarning):
-        result = empty.append(non_empty)
+def test_rangeindex_binop_diff_names_none(op):
+    idx1 = cudf.RangeIndex(10, 13, name="foo")
+    idx2 = cudf.RangeIndex(13, 16, name="bar")
+    result = op(idx1, idx2)
+    expected = op(idx1.to_pandas(), idx2.to_pandas())
     assert_eq(result, expected)
+    assert result.name is None
 
 
 @pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4, 5, 6],
-        [10, 20, 30, 40, 50, 60],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
-        [1],
-        [2, 3, 4],
-        [10.0],
-        [1100.112, 2323.2322, 2323.2322],
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        ["1", "2", "3", "4", "5", "6"],
-        ["a"],
-        ["b", "c", "d"],
-        ["abcd", "defgh", "werty", "poiu"],
-    ],
-)
-def test_index_append_error(data, other):
-    gd_data = Index(data)
-    gd_other = Index(other)
-
-    got_dtype = (
-        gd_other.dtype
-        if gd_data.dtype == np.dtype("object")
-        else gd_data.dtype
-    )
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"cudf does not support appending an Index of "
-            f"dtype `{np.dtype('object')}` with an Index "
-            f"of dtype `{got_dtype}`, please type-cast "
-            f"either one of them to same dtypes."
-        ),
-    ):
-        gd_data.append(gd_other)
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"cudf does not support appending an Index of "
-            f"dtype `{np.dtype('object')}` with an Index "
-            f"of dtype `{got_dtype}`, please type-cast "
-            f"either one of them to same dtypes."
-        ),
-    ):
-        gd_other.append(gd_data)
-
-    sr = gd_other.to_series()
-
-    assert_exceptions_equal(
-        lfunc=gd_data.to_pandas().append,
-        rfunc=gd_data.append,
-        lfunc_args_and_kwargs=([[sr.to_pandas()]],),
-        rfunc_args_and_kwargs=([[sr]],),
-    )
-
-
-@pytest.mark.parametrize(
-    "data,other",
-    [
-        (
-            pd.Index([1, 2, 3, 4, 5, 6]),
-            [
-                pd.Index([1, 2, 3, 4, 5, 6]),
-                pd.Index([1, 2, 3, 4, 5, 6, 10]),
-                pd.Index([]),
-            ],
-        ),
-        (
-            pd.Index([]),
-            [
-                pd.Index([1, 2, 3, 4, 5, 6]),
-                pd.Index([1, 2, 3, 4, 5, 6, 10]),
-                pd.Index([1, 4, 5, 6]),
-            ],
-        ),
-        (
-            pd.Index([10, 20, 30, 40, 50, 60]),
-            [
-                pd.Index([10, 20, 30, 40, 50, 60]),
-                pd.Index([10, 20, 30]),
-                pd.Index([40, 50, 60]),
-                pd.Index([10, 60]),
-                pd.Index([60]),
-            ],
-        ),
-        (
-            pd.Index([]),
-            [
-                pd.Index([10, 20, 30, 40, 50, 60]),
-                pd.Index([10, 20, 30]),
-                pd.Index([40, 50, 60]),
-                pd.Index([10, 60]),
-                pd.Index([60]),
-            ],
-        ),
-        (
-            pd.Index(["1", "2", "3", "4", "5", "6"]),
-            [
-                pd.Index(["1", "2", "3", "4", "5", "6"]),
-                pd.Index(["1", "2", "3"]),
-                pd.Index(["6"]),
-                pd.Index(["1", "6"]),
-            ],
-        ),
-        (
-            pd.Index([]),
-            [
-                pd.Index(["1", "2", "3", "4", "5", "6"]),
-                pd.Index(["1", "2", "3"]),
-                pd.Index(["6"]),
-                pd.Index(["1", "6"]),
-            ],
-        ),
-        (
-            pd.Index([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
-            [
-                pd.Index([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
-                pd.Index([1.0, 6.0]),
-                pd.Index([]),
-                pd.Index([6.0]),
-            ],
-        ),
-        (
-            pd.Index([]),
-            [
-                pd.Index([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
-                pd.Index([1.0, 6.0]),
-                pd.Index([1.0, 2.0, 6.0]),
-                pd.Index([6.0]),
-            ],
-        ),
-        (
-            pd.Index(["a"]),
-            [
-                pd.Index(["a"]),
-                pd.Index(["a", "b", "c"]),
-                pd.Index(["c"]),
-                pd.Index(["d"]),
-                pd.Index(["ae", "hello", "world"]),
-            ],
-        ),
-        (
-            pd.Index([]),
-            [
-                pd.Index(["a"]),
-                pd.Index(["a", "b", "c"]),
-                pd.Index(["c"]),
-                pd.Index(["d"]),
-                pd.Index(["ae", "hello", "world"]),
-                pd.Index([]),
-            ],
-        ),
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Does not warn on older versions of pandas",
-)
-def test_index_append_list(data, other):
-    pd_data = data
-    pd_other = other
-
-    gd_data = cudf.from_pandas(data)
-    gd_other = [cudf.from_pandas(i) for i in other]
-
-    with expect_warning_if(
-        (len(data) == 0 or any(len(d) == 0 for d in other))
-        and (any(d.dtype != data.dtype for d in other))
-    ):
-        expected = pd_data.append(pd_other)
-    with expect_warning_if(
-        (len(data) == 0 or any(len(d) == 0 for d in other))
-        and (any(d.dtype != data.dtype for d in other))
-    ):
-        actual = gd_data.append(gd_other)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-@pytest.mark.parametrize("name", [1, "a", None])
-def test_index_basic(data, dtype, name):
-    pdi = pd.Index(data, dtype=dtype, name=name)
-    gdi = cudf.Index(data, dtype=dtype, name=name)
-
-    assert_eq(pdi, gdi)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize("categories", [[1, 2], None])
-@pytest.mark.parametrize(
-    "dtype",
+    "index",
     [
-        pd.CategoricalDtype([1, 2, 3], ordered=True),
-        pd.CategoricalDtype([1, 2, 3], ordered=False),
-        None,
-    ],
-)
-@pytest.mark.parametrize("ordered", [True, False])
-@pytest.mark.parametrize("name", [1, "a", None])
-def test_categorical_index_basic(data, categories, dtype, ordered, name):
-    # can't have both dtype and categories/ordered
-    if dtype is not None:
-        categories = None
-        ordered = None
-    pindex = pd.CategoricalIndex(
-        data=data,
-        categories=categories,
-        dtype=dtype,
-        ordered=ordered,
-        name=name,
-    )
-    gindex = CategoricalIndex(
-        data=data,
-        categories=categories,
-        dtype=dtype,
-        ordered=ordered,
-        name=name,
-    )
-
-    assert_eq(pindex, gindex)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.MultiIndex.from_arrays(
-            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
-            names=("number", "color"),
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 4], ["yellow", "violet", "pink", "white"]],
-            names=("number1", "color2"),
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        pd.MultiIndex.from_arrays(
-            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
-            names=("number", "color"),
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 4], ["yellow", "violet", "pink", "white"]],
-            names=("number1", "color2"),
-        ),
-        pd.MultiIndex.from_arrays(
-            [[1, 1, 2, 2], ["red", "blue", "red", "blue"]],
-        ),
-    ],
-)
-def test_multiindex_append(data, other):
-    pdi = data
-    other_pd = other
-
-    gdi = cudf.from_pandas(data)
-    other_gd = cudf.from_pandas(other)
-
-    expected = pdi.append(other_pd)
-    actual = gdi.append(other_gd)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-def test_index_empty(data, dtype):
-    pdi = pd.Index(data, dtype=dtype)
-    gdi = cudf.Index(data, dtype=dtype)
-
-    assert_eq(pdi.empty, gdi.empty)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 4], []])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-def test_index_size(data, dtype):
-    pdi = pd.Index(data, dtype=dtype)
-    gdi = cudf.Index(data, dtype=dtype)
-
-    assert_eq(pdi.size, gdi.size)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 1, 2, 3, 4], [], [1], [1, 2, 3]])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-def test_index_drop_duplicates(data, dtype):
-    pdi = pd.Index(data, dtype=dtype)
-    gdi = cudf.Index(data, dtype=dtype)
-
-    assert_eq(pdi.drop_duplicates(), gdi.drop_duplicates())
-
-
-def test_dropna_bad_how():
-    with pytest.raises(ValueError):
-        cudf.Index([1]).dropna(how="foo")
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3, 1, 2, 3, 4], []])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-def test_index_tolist(data, dtype):
-    gdi = cudf.Index(data, dtype=dtype)
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            r"cuDF does not support conversion to host memory "
-            r"via the `tolist()` method. Consider using "
-            r"`.to_arrow().to_pylist()` to construct a Python list."
-        ),
-    ):
-        gdi.tolist()
-
-
-@pytest.mark.parametrize("data", [[], [1], [1, 2, 3]])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-def test_index_iter_error(data, dtype):
-    gdi = cudf.Index(data, dtype=dtype)
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"{gdi.__class__.__name__} object is not iterable. "
-            f"Consider using `.to_arrow()`, `.to_pandas()` or `.values_host` "
-            f"if you wish to iterate over the values."
-        ),
-    ):
-        iter(gdi)
-
-
-@pytest.mark.parametrize("data", [[], [1], [1, 2, 3, 4, 5]])
-@pytest.mark.parametrize(
-    "dtype", [*NUMERIC_TYPES, "str", "category", "datetime64[ns]"]
-)
-def test_index_values_host(data, dtype):
-    gdi = cudf.Index(data, dtype=dtype)
-    pdi = pd.Index(data, dtype=dtype)
-
-    np.testing.assert_array_equal(gdi.values_host, pdi.values)
-
-
-@pytest.mark.parametrize(
-    "data,fill_value",
-    [
-        ([1, 2, 3, 1, None, None], 1),
-        ([None, None, 3.2, 1, None, None], 10.0),
-        ([None, "a", "3.2", "z", None, None], "helloworld"),
-        (pd.Series(["a", "b", None], dtype="category"), "b"),
-        (pd.Series([None, None, 1.0], dtype="category"), 1.0),
-        (
-            np.array([1, 2, 3, None], dtype="datetime64[s]"),
-            np.datetime64("2005-02-25"),
-        ),
-        (
-            np.array(
-                [None, None, 122, 3242234, None, 6237846],
-                dtype="datetime64[ms]",
-            ),
-            np.datetime64("2005-02-25"),
-        ),
-    ],
-)
-def test_index_fillna(data, fill_value):
-    pdi = pd.Index(data)
-    gdi = cudf.Index(data)
-
-    assert_eq(
-        pdi.fillna(fill_value), gdi.fillna(fill_value), exact=False
-    )  # Int64 v/s Float64
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 1, None, None],
-        [None, None, 3.2, 1, None, None],
-        [None, "a", "3.2", "z", None, None],
-        pd.Series(["a", "b", None], dtype="category"),
-        np.array([1, 2, 3, None], dtype="datetime64[s]"),
-    ],
-)
-def test_index_to_arrow(data):
-    pdi = pd.Index(data)
-    gdi = cudf.Index(data)
-
-    expected_arrow_array = pa.Array.from_pandas(pdi)
-    got_arrow_array = gdi.to_arrow()
-
-    assert_eq(expected_arrow_array, got_arrow_array)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [None, None, 3.2, 1, None, None],
-        [None, "a", "3.2", "z", None, None],
-        pd.Series(["a", "b", None], dtype="category"),
-        np.array([1, 2, 3, None], dtype="datetime64[s]"),
-    ],
-)
-def test_index_from_arrow(data):
-    pdi = pd.Index(data)
-
-    arrow_array = pa.Array.from_pandas(pdi)
-    expected_index = pd.Index(arrow_array.to_pandas())
-    gdi = cudf.Index.from_arrow(arrow_array)
-
-    assert_eq(expected_index, gdi)
-
-
-def test_multiindex_to_arrow():
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 2, 1, 2, 3],
-            "b": [1.0, 2.0, 3.0, 4.0, 5.0],
-            "c": np.array([1, 2, 3, None, 5], dtype="datetime64[s]"),
-            "d": ["a", "b", "c", "d", "e"],
-        }
-    )
-    pdf["a"] = pdf["a"].astype("category")
-    df = cudf.from_pandas(pdf)
-    gdi = cudf.MultiIndex.from_frame(df)
-
-    expected = pa.Table.from_pandas(pdf)
-    got = gdi.to_arrow()
-
-    assert_eq(expected, got)
-
-
-def test_multiindex_from_arrow():
-    pdf = pd.DataFrame(
-        {
-            "a": [1, 2, 1, 2, 3],
-            "b": [1.0, 2.0, 3.0, 4.0, 5.0],
-            "c": np.array([1, 2, 3, None, 5], dtype="datetime64[s]"),
-            "d": ["a", "b", "c", "d", "e"],
-        }
-    )
-    pdf["a"] = pdf["a"].astype("category")
-    ptb = pa.Table.from_pandas(pdf)
-    gdi = cudf.MultiIndex.from_arrow(ptb)
-    pdi = pd.MultiIndex.from_frame(pdf)
-
-    assert_eq(pdi, gdi)
-
-
-def test_index_equals_categories():
-    lhs = cudf.CategoricalIndex(
-        ["a", "b", "c", "b", "a"], categories=["a", "b", "c"]
-    )
-    rhs = cudf.CategoricalIndex(
-        ["a", "b", "c", "b", "a"], categories=["a", "b", "c", "_"]
-    )
-
-    got = lhs.equals(rhs)
-    expect = lhs.to_pandas().equals(rhs.to_pandas())
-
-    assert_eq(expect, got)
-
-
-def test_rangeindex_arg_validation():
-    with pytest.raises(TypeError):
-        RangeIndex("1")
-
-    with pytest.raises(TypeError):
-        RangeIndex(1, "2")
-
-    with pytest.raises(TypeError):
-        RangeIndex(1, 3, "1")
-
-    with pytest.raises(ValueError):
-        RangeIndex(1, dtype="float64")
-
-    with pytest.raises(ValueError):
-        RangeIndex(1, dtype="uint64")
-
-
-def test_rangeindex_name_not_hashable():
-    with pytest.raises(ValueError):
-        RangeIndex(range(2), name=["foo"])
-
-    with pytest.raises(ValueError):
-        RangeIndex(range(2)).copy(name=["foo"])
-
-
-def test_index_rangeindex_searchsorted():
-    # step > 0
-    ridx = RangeIndex(-13, 17, 4)
-    for i in range(len(ridx)):
-        assert i == ridx.searchsorted(ridx[i], side="left")
-        assert i + 1 == ridx.searchsorted(ridx[i], side="right")
-
-
-@pytest.mark.parametrize(
-    "rge",
-    [(1, 10, 1), (1, 10, 3), (10, -17, -1), (10, -17, -3)],
-)
-def test_index_rangeindex_get_item_basic(rge):
-    pridx = pd.RangeIndex(*rge)
-    gridx = cudf.RangeIndex(*rge)
-
-    for i in range(-len(pridx), len(pridx)):
-        assert pridx[i] == gridx[i]
-
-
-@pytest.mark.parametrize(
-    "rge",
-    [(1, 10, 3), (10, 1, -3)],
-)
-def test_index_rangeindex_get_item_out_of_bounds(rge):
-    gridx = cudf.RangeIndex(*rge)
-    with pytest.raises(IndexError):
-        _ = gridx[4]
-
-
-@pytest.mark.parametrize(
-    "rge",
-    [(10, 1, 1), (-17, 10, -3)],
-)
-def test_index_rangeindex_get_item_null_range(rge):
-    gridx = cudf.RangeIndex(*rge)
-
-    with pytest.raises(IndexError):
-        gridx[0]
-
-
-@pytest.mark.parametrize(
-    "rge", [(-17, 21, 2), (21, -17, -3), (0, 0, 1), (0, 1, -3), (10, 0, 5)]
-)
-@pytest.mark.parametrize(
-    "sl",
-    [
-        slice(1, 7, 1),
-        slice(1, 7, 2),
-        slice(-1, 7, 1),
-        slice(-1, 7, 2),
-        slice(-3, 7, 2),
-        slice(7, 1, -2),
-        slice(7, -3, -2),
-        slice(None, None, 1),
-        slice(0, None, 2),
-        slice(0, None, 3),
-        slice(0, 0, 3),
-    ],
-)
-def test_index_rangeindex_get_item_slices(rge, sl):
-    pridx = pd.RangeIndex(*rge)
-    gridx = cudf.RangeIndex(*rge)
-
-    assert_eq(pridx[sl], gridx[sl])
-
-
-@pytest.mark.parametrize(
-    "idx",
-    [
-        pd.Index([1, 2, 3]),
-        pd.Index(["abc", "def", "ghi"]),
-        pd.RangeIndex(0, 10, 1),
-        pd.Index([0.324, 0.234, 1.3], name="abc"),
-    ],
-)
-@pytest.mark.parametrize("names", [None, "a", "new name", ["another name"]])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_index_set_names(idx, names, inplace):
-    pi = idx.copy()
-    gi = cudf.from_pandas(idx)
-
-    expected = pi.set_names(names=names, inplace=inplace)
-    actual = gi.set_names(names=names, inplace=inplace)
-
-    if inplace:
-        expected, actual = pi, gi
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("level", [1, [0], "abc"])
-@pytest.mark.parametrize("names", [None, "a"])
-def test_index_set_names_error(level, names):
-    pi = pd.Index([1, 2, 3], name="abc")
-    gi = cudf.from_pandas(pi)
-
-    assert_exceptions_equal(
-        lfunc=pi.set_names,
-        rfunc=gi.set_names,
-        lfunc_args_and_kwargs=([], {"names": names, "level": level}),
-        rfunc_args_and_kwargs=([], {"names": names, "level": level}),
-    )
-
-
-@pytest.mark.parametrize(
-    "data", [[1, 3, 6], [6, 1, 3]], ids=["monotonic", "non-monotonic"]
-)
-@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
-def test_get_indexer_single_unique_numeric(data, method):
-    key = list(range(0, 8))
-    pi = pd.Index(data)
-    gi = cudf.from_pandas(pi)
-
-    if (
-        # `method` only applicable to monotonic index
-        not pi.is_monotonic_increasing and method is not None
-    ):
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
-            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
-        )
-    else:
-        expected = pi.get_indexer(key, method=method)
-        got = gi.get_indexer(key, method=method)
-
-        assert_eq(expected, got)
-
-        with cudf.option_context("mode.pandas_compatible", True):
-            got = gi.get_indexer(key, method=method)
-        assert_eq(expected, got, check_dtype=True)
-
-
-@pytest.mark.parametrize(
-    "rng",
-    [
-        range(1, 20, 3),
-        range(20, 35, 3),
-        range(35, 77, 3),
-        range(77, 110, 3),
-    ],
-)
-@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
-@pytest.mark.parametrize("tolerance", [None, 0, 1, 13, 20])
-def test_get_indexer_rangeindex(rng, method, tolerance):
-    key = list(rng)
-    pi = pd.RangeIndex(3, 100, 4)
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.get_indexer(
-        key, method=method, tolerance=None if method is None else tolerance
-    )
-    got = gi.get_indexer(
-        key, method=method, tolerance=None if method is None else tolerance
-    )
-
-    assert_eq(expected, got)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        got = gi.get_indexer(
-            key, method=method, tolerance=None if method is None else tolerance
-        )
-    assert_eq(expected, got, check_dtype=True)
-
-
-@pytest.mark.parametrize("key", list(range(1, 110, 3)))
-def test_get_loc_rangeindex(key):
-    pi = pd.RangeIndex(3, 100, 4)
-    gi = cudf.from_pandas(pi)
-    if (
-        (key not in pi)
-        # Get key before the first element is KeyError
-        or (key < pi.start)
-        # Get key after the last element is KeyError
-        or (key >= pi.stop)
-    ):
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key}),
-            rfunc_args_and_kwargs=([], {"key": key}),
-        )
-    else:
-        expected = pi.get_loc(key)
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "idx",
-    [
-        [1, 3, 3, 6],
-        [6, 1, 3, 3],
-        [4, 3, 2, 1, 0],
-    ],
-    ids=["monotonic increasing", "non-monotonic", "monotonic decreasing"],
-)
-@pytest.mark.parametrize("key", [0, 3, 6, 7, 4])
-def test_get_loc_duplicate_numeric(idx, key):
-    pi = pd.Index(idx)
-    gi = cudf.from_pandas(pi)
-
-    if key not in pi:
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key}),
-            rfunc_args_and_kwargs=([], {"key": key}),
-        )
-    else:
-        expected = pi.get_loc(key)
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "idx",
-    [
-        [-1, 2, 3, 6],
-        [6, 1, 3, 4],
-    ],
-    ids=["monotonic", "non-monotonic"],
-)
-@pytest.mark.parametrize("key", [[0, 3, 1], [6, 7]])
-@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
-@pytest.mark.parametrize("tolerance", [None, 1, 2])
-def test_get_indexer_single_duplicate_numeric(idx, key, method, tolerance):
-    pi = pd.Index(idx)
-    gi = cudf.from_pandas(pi)
-
-    if not pi.is_monotonic_increasing and method is not None:
-        assert_exceptions_equal(
-            lfunc=pi.get_indexer,
-            rfunc=gi.get_indexer,
-            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
-            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
-        )
-    else:
-        expected = pi.get_indexer(
-            key, method=method, tolerance=None if method is None else tolerance
-        )
-        got = gi.get_indexer(
-            key, method=method, tolerance=None if method is None else tolerance
-        )
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("idx", [["b", "f", "m", "q"], ["m", "f", "b", "q"]])
-@pytest.mark.parametrize("key", ["a", "f", "n", "z"])
-def test_get_loc_single_unique_string(idx, key):
-    pi = pd.Index(idx)
-    gi = cudf.from_pandas(pi)
-
-    if key not in pi:
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key}),
-            rfunc_args_and_kwargs=([], {"key": key}),
-        )
-    else:
-        expected = pi.get_loc(key)
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("idx", [["b", "f", "m", "q"], ["m", "f", "b", "q"]])
-@pytest.mark.parametrize("key", [["a", "f", "n", "z"], ["p", "p", "b"]])
-@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_single_unique_string(idx, key, method):
-    pi = pd.Index(idx)
-    gi = cudf.from_pandas(pi)
-
-    if not pi.is_monotonic_increasing and method is not None:
-        assert_exceptions_equal(
-            lfunc=pi.get_indexer,
-            rfunc=gi.get_indexer,
-            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
-            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
-        )
-    else:
-        expected = pi.get_indexer(key, method=method)
-        got = gi.get_indexer(key, method=method)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("idx", [["b", "m", "m", "q"], ["m", "f", "m", "q"]])
-@pytest.mark.parametrize("key", ["a", "f", "n", "z"])
-def test_get_loc_single_duplicate_string(idx, key):
-    pi = pd.Index(idx)
-    gi = cudf.from_pandas(pi)
-
-    if key not in pi:
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key}),
-            rfunc_args_and_kwargs=([], {"key": key}),
-        )
-    else:
-        expected = pi.get_loc(key)
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("idx", [["b", "m", "m", "q"], ["a", "f", "m", "q"]])
-@pytest.mark.parametrize("key", [["a"], ["f", "n", "z"]])
-@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_single_duplicate_string(idx, key, method):
-    pi = pd.Index(idx)
-    gi = cudf.from_pandas(pi)
-
-    if (
-        # `method` only applicable to monotonic index
-        (not pi.is_monotonic_increasing and method is not None)
-        or not pi.is_unique
-    ):
-        assert_exceptions_equal(
-            lfunc=pi.get_indexer,
-            rfunc=gi.get_indexer,
-            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
-            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
-        )
-    else:
-        expected = pi.get_indexer(key, method=method)
-        got = gi.get_indexer(key, method=method)
-
-        assert_eq(expected, got)
-
-        with cudf.option_context("mode.pandas_compatible", True):
-            got = gi.get_indexer(key, method=method)
-
-        assert_eq(expected, got, check_dtype=True)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
-        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)],
-        [(1, 1, 1), (1, 1, 2), (1, 1, 2), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
-    ],
-)
-@pytest.mark.parametrize("key", [1, (1, 2), (1, 2, 3), (2, 1, 1), (9, 9, 9)])
-def test_get_loc_multi_numeric(data, key):
-    idx = pd.MultiIndex.from_tuples(data)
-    pi = idx.sort_values()
-    gi = cudf.from_pandas(pi)
-
-    if key not in pi:
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key}),
-            rfunc_args_and_kwargs=([], {"key": key}),
-        )
-    else:
-        expected = pi.get_loc(key)
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
-        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)],
-        [(1, 1, 1), (1, 1, 2), (1, 1, 24), (1, 2, 3), (2, 1, 1), (2, 2, 1)],
-    ],
-)
-@pytest.mark.parametrize("key", [[(1, 2, 3)], [(9, 9, 9)]])
-@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_multi_numeric(data, key, method):
-    idx = pd.MultiIndex.from_tuples(data)
-    pi = idx.sort_values()
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.get_indexer(key, method=method)
-    got = gi.get_indexer(key, method=method)
-
-    assert_eq(expected, got)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        got = gi.get_indexer(key, method=method)
-
-    assert_eq(expected, got, check_dtype=True)
-
-
-@pytest.mark.parametrize(
-    "key, result",
-    [
-        (1, slice(1, 5, 1)),  # deviates
-        ((1, 2), slice(1, 3, 1)),
-        ((1, 2, 3), slice(1, 2, None)),
-        ((2, 1, 1), slice(0, 1, None)),
-        ((9, 9, 9), None),
-    ],
-)
-def test_get_loc_multi_numeric_deviate(key, result):
-    pi = pd.MultiIndex.from_tuples(
-        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 1), (1, 1, 1), (2, 2, 1)]
-    )
-    gi = cudf.from_pandas(pi)
-
-    with expect_warning_if(
-        isinstance(key, tuple), pd.errors.PerformanceWarning
-    ):
-        key_flag = key not in pi
-
-    if key_flag:
-        with expect_warning_if(
-            isinstance(key, tuple), pd.errors.PerformanceWarning
-        ):
-            assert_exceptions_equal(
-                lfunc=pi.get_loc,
-                rfunc=gi.get_loc,
-                lfunc_args_and_kwargs=([], {"key": key}),
-                rfunc_args_and_kwargs=([], {"key": key}),
-            )
-    else:
-        expected = result
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "key",
-    [
-        ((1, 2, 3),),
-        ((2, 1, 1),),
-        ((9, 9, 9),),
-    ],
-)
-@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_multi_numeric_deviate(key, method):
-    pi = pd.MultiIndex.from_tuples(
-        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 10), (1, 1, 1), (2, 2, 1)]
-    ).sort_values()
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.get_indexer(key, method=method)
-    got = gi.get_indexer(key, method=method)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("method", ["ffill", "bfill"])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_get_indexer_multi_error(method):
-    pi = pd.MultiIndex.from_tuples(
-        [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 10), (1, 1, 1), (2, 2, 1)]
-    )
-    gi = cudf.from_pandas(pi)
-
-    assert_exceptions_equal(
-        pi.get_indexer,
-        gi.get_indexer,
-        lfunc_args_and_kwargs=(
-            [],
-            {"target": ((1, 2, 3),), "method": method},
-        ),
-        rfunc_args_and_kwargs=(
-            [],
-            {"target": ((1, 2, 3),), "method": method},
-        ),
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            ("a", "a", "a"),
-            ("a", "a", "b"),
-            ("a", "b", "a"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("b", "c", "a"),
-        ],
-        [
-            ("a", "a", "b"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("a", "a", "a"),
-            ("a", "b", "a"),
-            ("b", "c", "a"),
-        ],
-        [
-            ("a", "a", "a"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("a", "a", "b"),
-            ("a", "b", "a"),
-            ("b", "c", "a"),
-        ],
-        [
-            ("a", "a", "a"),
-            ("a", "a", "b"),
-            ("a", "a", "b"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("b", "c", "a"),
-        ],
-        [
-            ("a", "a", "b"),
-            ("b", "a", "a"),
-            ("b", "a", "a"),
-            ("a", "a", "a"),
-            ("a", "b", "a"),
-            ("b", "c", "a"),
-        ],
-    ],
-)
-@pytest.mark.parametrize(
-    "key", ["a", ("a", "a"), ("a", "b", "c"), ("b", "c", "a"), ("z", "z", "z")]
-)
-def test_get_loc_multi_string(data, key):
-    idx = pd.MultiIndex.from_tuples(data)
-    pi = idx.sort_values()
-    gi = cudf.from_pandas(pi)
-
-    if key not in pi:
-        assert_exceptions_equal(
-            lfunc=pi.get_loc,
-            rfunc=gi.get_loc,
-            lfunc_args_and_kwargs=([], {"key": key}),
-            rfunc_args_and_kwargs=([], {"key": key}),
-        )
-    else:
-        expected = pi.get_loc(key)
-        got = gi.get_loc(key)
-
-        assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            ("a", "a", "a"),
-            ("a", "a", "b"),
-            ("a", "b", "a"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("b", "c", "a"),
-        ],
-        [
-            ("a", "a", "b"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("a", "a", "a"),
-            ("a", "b", "a"),
-            ("b", "c", "a"),
-        ],
-        [
-            ("a", "a", "a"),
-            ("a", "b", "c"),
-            ("b", "a", "a"),
-            ("a", "a", "b"),
-            ("a", "b", "a"),
-            ("b", "c", "a"),
-        ],
-    ],
-)
-@pytest.mark.parametrize(
-    "key", [[("a", "b", "c"), ("b", "c", "a")], [("z", "z", "z")]]
-)
-@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
-def test_get_indexer_multi_string(data, key, method):
-    idx = pd.MultiIndex.from_tuples(data)
-    pi = idx.sort_values()
-    gi = cudf.from_pandas(pi)
-
-    expected = pi.get_indexer(key, method=method)
-    got = gi.get_indexer(key, method=method)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "idx1",
-    [
-        lambda: cudf.Index(["a", "b", "c"]),
-        lambda: cudf.RangeIndex(0, 10),
-        lambda: cudf.Index([1, 2, 3], dtype="category"),
-        lambda: cudf.Index(["a", "b", "c", "d"], dtype="category"),
-        lambda: cudf.MultiIndex.from_tuples(
-            [
-                ("a", "a", "a"),
-                ("a", "b", "c"),
-                ("b", "a", "a"),
-                ("a", "a", "b"),
-                ("a", "b", "a"),
-                ("b", "c", "a"),
-            ]
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "idx2",
-    [
-        lambda: cudf.Index(["a", "b", "c"]),
-        lambda: cudf.RangeIndex(0, 10),
-        lambda: cudf.Index([1, 2, 3], dtype="category"),
-        lambda: cudf.Index(["a", "b", "c", "d"], dtype="category"),
-    ],
-)
-def test_get_indexer_invalid(idx1, idx2):
-    idx1 = idx1()
-    idx2 = idx2()
-    assert_eq(
-        idx1.get_indexer(idx2), idx1.to_pandas().get_indexer(idx2.to_pandas())
-    )
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)],
-        [pd.RangeIndex(10, 20), pd.RangeIndex(22, 40), pd.RangeIndex(50, 60)],
-        [pd.RangeIndex(10, 20, 2), pd.RangeIndex(20, 40, 2)],
-    ],
-)
-def test_range_index_concat(objs):
-    cudf_objs = [cudf.from_pandas(obj) for obj in objs]
-
-    actual = cudf.concat(cudf_objs)
-
-    expected = objs[0]
-    for obj in objs[1:]:
-        expected = expected.append(obj)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "idx1, idx2",
-    [
-        (pd.RangeIndex(0, 10), pd.RangeIndex(3, 7)),
-        (pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)),
-        (pd.RangeIndex(0, 10, 2), pd.RangeIndex(1, 5, 3)),
-        (pd.RangeIndex(1, 5, 3), pd.RangeIndex(0, 10, 2)),
-        (pd.RangeIndex(1, 10, 3), pd.RangeIndex(1, 5, 2)),
-        (pd.RangeIndex(1, 5, 2), pd.RangeIndex(1, 10, 3)),
-        (pd.RangeIndex(1, 100, 3), pd.RangeIndex(1, 50, 3)),
-        (pd.RangeIndex(1, 100, 3), pd.RangeIndex(1, 50, 6)),
-        (pd.RangeIndex(1, 100, 6), pd.RangeIndex(1, 50, 3)),
-        (pd.RangeIndex(0, 10, name="a"), pd.RangeIndex(90, 100, name="b")),
-        (pd.Index([0, 1, 2, 30], name="a"), pd.Index([90, 100])),
-        (pd.Index([0, 1, 2, 30], name="a"), [90, 100]),
-        (pd.Index([0, 1, 2, 30]), pd.Index([0, 10, 1.0, 11])),
-        (pd.Index(["a", "b", "c", "d", "c"]), pd.Index(["a", "c", "z"])),
-        (
-            pd.IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]),
-            pd.IntervalIndex.from_tuples([(0, 2), (2, 4)]),
-        ),
-        (pd.RangeIndex(0, 10), pd.Index([8, 1, 2, 4])),
-        (pd.Index([8, 1, 2, 4], name="a"), pd.Index([8, 1, 2, 4], name="b")),
-        (
-            pd.Index([8, 1, 2, 4], name="a"),
-            pd.Index([], name="b", dtype="int64"),
-        ),
-        (pd.Index([], dtype="int64", name="a"), pd.Index([10, 12], name="b")),
-        (pd.Index([True, True, True], name="a"), pd.Index([], dtype="bool")),
-        (
-            pd.Index([True, True, True]),
-            pd.Index([False, True], dtype="bool", name="b"),
-        ),
-    ],
-)
-@pytest.mark.parametrize("sort", [None, False, True])
-def test_union_index(idx1, idx2, sort):
-    expected = idx1.union(idx2, sort=sort)
-
-    idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.Index) else idx1
-    idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.Index) else idx2
-
-    actual = idx1.union(idx2, sort=sort)
-
-    assert_eq(expected, actual)
-
-
-def test_union_bool_with_other():
-    idx1 = cudf.Index([True, True, True])
-    idx2 = cudf.Index([0, 1], name="b")
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(cudf.errors.MixedTypeError):
-            idx1.union(idx2)
-
-
-@pytest.mark.parametrize("dtype1", ["int8", "int32", "int32"])
-@pytest.mark.parametrize("dtype2", ["uint32", "uint64"])
-def test_union_unsigned_vs_signed(dtype1, dtype2):
-    idx1 = cudf.Index([10, 20, 30], dtype=dtype1)
-    idx2 = cudf.Index([0, 1], dtype=dtype2)
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(cudf.errors.MixedTypeError):
-            idx1.union(idx2)
-
-
-@pytest.mark.parametrize(
-    "idx1, idx2",
-    [
-        (pd.RangeIndex(0, 10), pd.RangeIndex(3, 7)),
-        (pd.RangeIndex(0, 10), pd.RangeIndex(-10, 20)),
-        (pd.RangeIndex(0, 10, name="a"), pd.RangeIndex(90, 100, name="b")),
-        (pd.Index([0, 1, 2, 30], name=pd.NA), pd.Index([30, 0, 90, 100])),
-        (pd.Index([0, 1, 2, 30], name="a"), [90, 100]),
-        (pd.Index([0, 1, 2, 30]), pd.Index([0, 10, 1.0, 11])),
-        (
-            pd.Index(["a", "b", "c", "d", "c"]),
-            pd.Index(["a", "c", "z"], name="abc"),
-        ),
-        (
-            pd.Index(["a", "b", "c", "d", "c"]),
-            pd.Index(["a", "b", "c", "d", "c"]),
-        ),
-        (pd.Index([True, False, True, True]), pd.Index([10, 11, 12, 0, 1, 2])),
-        (pd.Index([True, False, True, True]), pd.Index([True, True])),
-        (pd.RangeIndex(0, 10, name="a"), pd.Index([5, 6, 7], name="b")),
-        (pd.Index(["a", "b", "c"], dtype="category"), pd.Index(["a", "b"])),
-        (pd.Index([0, 1, 2], dtype="category"), pd.RangeIndex(0, 10)),
-        (pd.Index(["a", "b", "c"], name="abc"), []),
-        (pd.Index([], name="abc"), pd.RangeIndex(0, 4)),
-        (pd.Index([1, 2, 3]), pd.Index([1, 2], dtype="category")),
-        (pd.Index([]), pd.Index([1, 2], dtype="category")),
-    ],
-)
-@pytest.mark.parametrize("sort", [None, False, True])
-@pytest.mark.parametrize("pandas_compatible", [True, False])
-def test_intersection_index(idx1, idx2, sort, pandas_compatible):
-    expected = idx1.intersection(idx2, sort=sort)
-
-    with cudf.option_context("mode.pandas_compatible", pandas_compatible):
-        idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.Index) else idx1
-        idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.Index) else idx2
-
-        actual = idx1.intersection(idx2, sort=sort)
-
-        # TODO: Resolve the bool vs ints mixed issue
-        # once pandas has a direction on this issue
-        # https://github.com/pandas-dev/pandas/issues/44000
-        assert_eq(
-            expected,
-            actual,
-            exact=False
-            if (idx1.dtype.kind == "b" and idx2.dtype.kind != "b")
-            or (idx1.dtype.kind != "b" or idx2.dtype.kind == "b")
-            else True,
-        )
-
-
-@pytest.mark.parametrize(
-    "idx1, idx2",
-    [
-        (pd.Index(["a", "b", "c"], dtype="category"), pd.Index([1, 2, 3])),
-    ],
-)
-@pytest.mark.parametrize("sort", [None, False, True])
-@pytest.mark.parametrize("pandas_compatible", [True, False])
-def test_intersection_index_error(idx1, idx2, sort, pandas_compatible):
-    expected = idx1.intersection(idx2, sort=sort)
-
-    with cudf.option_context("mode.pandas_compatible", pandas_compatible):
-        idx1 = cudf.from_pandas(idx1) if isinstance(idx1, pd.Index) else idx1
-        idx2 = cudf.from_pandas(idx2) if isinstance(idx2, pd.Index) else idx2
-
-        if pandas_compatible:
-            with pytest.raises(
-                ValueError,
-                match="Cannot convert numerical column to string column when dtype is an object dtype in pandas compatibility mode.",
-            ):
-                idx1.intersection(idx2, sort=sort)
-        else:
-            actual = idx1.intersection(idx2, sort=sort)
-
-            assert_eq(
-                expected,
-                actual,
-            )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3],
-        ["a", "v", "d"],
-        [234.243, 2432.3, None],
-        [True, False, True],
-        pd.Series(["a", " ", "v"], dtype="category"),
-        pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
-    ],
-)
-@pytest.mark.parametrize(
-    "func",
-    [
-        "is_numeric",
-        "is_boolean",
-        "is_integer",
-        "is_floating",
-        "is_object",
-        "is_categorical",
-        "is_interval",
-    ],
-)
-def test_index_type_methods(data, func):
-    pidx = pd.Index(data)
-    gidx = cudf.from_pandas(pidx)
-
-    with pytest.warns(FutureWarning):
-        expected = getattr(pidx, func)()
-    with pytest.warns(FutureWarning):
-        actual = getattr(gidx, func)()
-
-    if gidx.dtype == np.dtype("bool") and func == "is_object":
-        assert_eq(False, actual)
-    else:
-        assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
-)
-def test_index_datetime_ceil(resolution):
-    cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
-    pidx = cuidx.to_pandas()
-
-    pidx_ceil = pidx.ceil(resolution)
-    cuidx_ceil = cuidx.ceil(resolution)
-
-    assert_eq(pidx_ceil, cuidx_ceil)
-
-
-@pytest.mark.parametrize(
-    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
-)
-def test_index_datetime_floor(resolution):
-    cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
-    pidx = cuidx.to_pandas()
-
-    pidx_floor = pidx.floor(resolution)
-    cuidx_floor = cuidx.floor(resolution)
-
-    assert_eq(pidx_floor, cuidx_floor)
-
-
-@pytest.mark.parametrize(
-    "resolution", ["D", "h", "min", "s", "ms", "us", "ns"]
-)
-def test_index_datetime_round(resolution):
-    cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000])
-    pidx = cuidx.to_pandas()
-
-    pidx_floor = pidx.round(resolution)
-    cuidx_floor = cuidx.round(resolution)
-
-    assert_eq(pidx_floor, cuidx_floor)
-
-
-@pytest.mark.parametrize(
-    "data,nan_idx,NA_idx",
-    [([1, 2, 3, None], None, 3), ([2, 3, np.nan, None], 2, 3)],
-)
-@pytest.mark.parametrize("nan_as_null", [True, False])
-def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):
-    idx = cudf.Index(data, nan_as_null=nan_as_null)
-
-    if nan_as_null:
-        if nan_idx is not None:
-            assert idx[nan_idx] is cudf.NA
-    else:
-        if nan_idx is not None:
-            assert np.isnan(idx[nan_idx])
-
-    if NA_idx is not None:
-        assert idx[NA_idx] is cudf.NA
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        pd.Index([]),
-        pd.Index(["a", "b", "c", "d", "e"]),
-        pd.Index([0, None, 9]),
-        pd.date_range("2019-01-01", periods=3),
-    ],
-)
-@pytest.mark.parametrize(
-    "values",
-    [
-        [],
-        ["this", "is"],
-        [0, 19, 13],
-        ["2019-01-01 04:00:00", "2019-01-01 06:00:00", "2018-03-02 10:00:00"],
-    ],
-)
-def test_isin_index(index, values):
-    pidx = index
-    gidx = cudf.Index.from_pandas(pidx)
-
-    is_dt_str = (
-        next(iter(values), None) == "2019-01-01 04:00:00"
-        and len(pidx)
-        and pidx.dtype.kind == "M"
-    )
-    with expect_warning_if(is_dt_str):
-        got = gidx.isin(values)
-    with expect_warning_if(PANDAS_GE_220 and is_dt_str):
-        expected = pidx.isin(values)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3], ["red", "blue", "green"]], names=("number", "color")
-        ),
-        pd.MultiIndex.from_arrays([[], []], names=("number", "color")),
-        pd.MultiIndex.from_arrays(
-            [[1, 2, 3, 10, 100], ["red", "blue", "green", "pink", "white"]],
-            names=("number", "color"),
-        ),
-        pd.MultiIndex.from_product(
-            [[0, 1], ["red", "blue", "green"]], names=("number", "color")
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "values,level,err",
-    [
-        ([(1, "red"), (2, "blue"), (0, "green")], None, None),
-        (["red", "orange", "yellow"], "color", None),
-        (["red", "white", "yellow"], "color", None),
-        ([0, 1, 2, 10, 11, 15], "number", None),
-        ([0, 1, 2, 10, 11, 15], None, TypeError),
-        (pd.Series([0, 1, 2, 10, 11, 15]), None, TypeError),
-        (pd.Index([0, 1, 2, 10, 11, 15]), None, TypeError),
-        (pd.Index([0, 1, 2, 8, 11, 15]), "number", None),
-        (pd.Index(["red", "white", "yellow"]), "color", None),
-        ([(1, "red"), (3, "red")], None, None),
-        (((1, "red"), (3, "red")), None, None),
-        (
-            pd.MultiIndex.from_arrays(
-                [[1, 2, 3], ["red", "blue", "green"]],
-                names=("number", "color"),
-            ),
-            None,
-            None,
-        ),
-        (
-            pd.MultiIndex.from_arrays([[], []], names=("number", "color")),
-            None,
-            None,
-        ),
-        (
-            pd.MultiIndex.from_arrays(
-                [
-                    [1, 2, 3, 10, 100],
-                    ["red", "blue", "green", "pink", "white"],
-                ],
-                names=("number", "color"),
-            ),
-            None,
-            None,
-        ),
-    ],
-)
-def test_isin_multiindex(data, values, level, err):
-    pmdx = data
-    gmdx = cudf.from_pandas(data)
-
-    if err is None:
-        expected = pmdx.isin(values, level=level)
-        if isinstance(values, pd.MultiIndex):
-            values = cudf.from_pandas(values)
-        got = gmdx.isin(values, level=level)
-
-        assert_eq(got, expected)
-    else:
-        assert_exceptions_equal(
-            lfunc=pmdx.isin,
-            rfunc=gmdx.isin,
-            lfunc_args_and_kwargs=([values], {"level": level}),
-            rfunc_args_and_kwargs=([values], {"level": level}),
-            check_exception_type=False,
-        )
-
-
-@pytest.mark.parametrize(
-    "rangeindex",
-    [
-        range(np.random.default_rng(seed=0).integers(0, 100)),
-        range(9, 12, 2),
-        range(20, 30),
-        range(100, 1000, 10),
-        range(0, 10, -2),
-        range(0, -10, 2),
-        range(0, -10, -2),
-    ],
-)
-@pytest.mark.parametrize(
-    "func",
-    ["nunique", "min", "max", "any", "values"],
-)
-def test_rangeindex_methods(rangeindex, func):
-    gidx = cudf.RangeIndex(rangeindex)
-    pidx = gidx.to_pandas()
-
-    if func == "values":
-        expected = pidx.values
-        actual = gidx.values
-    else:
-        expected = getattr(pidx, func)()
-        actual = getattr(gidx, func)()
-
-    assert_eq(expected, actual)
-
-
-def test_index_constructor_integer(default_integer_bitwidth):
-    got = cudf.Index([1, 2, 3])
-    expect = cudf.Index([1, 2, 3], dtype=f"int{default_integer_bitwidth}")
-
-    assert_eq(expect, got)
-
-
-def test_index_constructor_float(default_float_bitwidth):
-    got = cudf.Index([1.0, 2.0, 3.0])
-    expect = cudf.Index(
-        [1.0, 2.0, 3.0], dtype=f"float{default_float_bitwidth}"
-    )
-
-    assert_eq(expect, got)
-
-
-def test_rangeindex_union_default_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for union operation.
-    idx1 = cudf.RangeIndex(0, 2)
-    idx2 = cudf.RangeIndex(5, 6)
-
-    expected = cudf.Index([0, 1, 5], dtype=f"int{default_integer_bitwidth}")
-    actual = idx1.union(idx2)
-
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_intersection_default_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for intersection operation.
-    idx1 = cudf.RangeIndex(0, 100)
-    # Intersecting two RangeIndex will _always_ result in a RangeIndex, use
-    # regular index here to force materializing.
-    idx2 = cudf.Index([50, 102])
-
-    expected = cudf.Index([50], dtype=f"int{default_integer_bitwidth}")
-    actual = idx1.intersection(idx2)
-
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_take_default_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for take operation.
-    idx = cudf.RangeIndex(0, 100)
-    actual = idx.take([0, 3, 7, 62])
-    expected = cudf.Index(
-        [0, 3, 7, 62], dtype=f"int{default_integer_bitwidth}"
-    )
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_apply_boolean_mask_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for apply boolean mask operation.
-    idx = cudf.RangeIndex(0, 8)
-    mask = [True, True, True, False, False, False, True, False]
-    actual = idx[mask]
-    expected = cudf.Index([0, 1, 2, 6], dtype=f"int{default_integer_bitwidth}")
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_repeat_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for repeat operation.
-    idx = cudf.RangeIndex(0, 3)
-    actual = idx.repeat(3)
-    expected = cudf.Index(
-        [0, 0, 0, 1, 1, 1, 2, 2, 2], dtype=f"int{default_integer_bitwidth}"
-    )
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "op, expected, expected_kind",
-    [
-        (lambda idx: 2**idx, [2, 4, 8, 16], "int"),
-        (lambda idx: idx**2, [1, 4, 9, 16], "int"),
-        (lambda idx: idx / 2, [0.5, 1, 1.5, 2], "float"),
-        (lambda idx: 2 / idx, [2, 1, 2 / 3, 0.5], "float"),
-        (lambda idx: idx % 3, [1, 2, 0, 1], "int"),
-        (lambda idx: 3 % idx, [0, 1, 0, 3], "int"),
-    ],
-)
-def test_rangeindex_binops_user_option(
-    op, expected, expected_kind, default_integer_bitwidth
-):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for binary operation.
-    idx = cudf.RangeIndex(1, 5)
-    actual = op(idx)
-    expected = cudf.Index(
-        expected, dtype=f"{expected_kind}{default_integer_bitwidth}"
-    )
-    assert_eq(
-        expected,
-        actual,
-    )
-
-
-@pytest.mark.parametrize(
-    "op", [operator.add, operator.sub, operator.mul, operator.truediv]
-)
-def test_rangeindex_binop_diff_names_none(op):
-    idx1 = cudf.RangeIndex(10, 13, name="foo")
-    idx2 = cudf.RangeIndex(13, 16, name="bar")
-    result = op(idx1, idx2)
-    expected = op(idx1.to_pandas(), idx2.to_pandas())
-    assert_eq(result, expected)
-    assert result.name is None
-
-
-def test_rangeindex_join_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for join.
-    idx1 = cudf.RangeIndex(0, 10, name="a")
-    idx2 = cudf.RangeIndex(5, 15, name="b")
-
-    actual = idx1.join(idx2, how="inner", sort=True)
-    expected = idx1.to_pandas().join(idx2.to_pandas(), how="inner", sort=True)
-    assert actual.dtype == cudf.dtype(f"int{default_integer_bitwidth}")
-    # exact=False to ignore dtype comparison,
-    # because `default_integer_bitwidth` is cudf only option
-    assert_eq(expected, actual, exact=False)
-
-
-def test_rangeindex_where_user_option(default_integer_bitwidth):
-    # Test that RangeIndex is materialized into 32 bit index under user
-    # configuration for where operation.
-    idx = cudf.RangeIndex(0, 10)
-    mask = [True, False, True, False, True, False, True, False, True, False]
-    actual = idx.where(mask, -1)
-    expected = cudf.Index(
-        [0, -1, 2, -1, 4, -1, 6, -1, 8, -1],
-        dtype=f"int{default_integer_bitwidth}",
-    )
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_append_return_rangeindex():
-    idx = cudf.RangeIndex(0, 10)
-    result = idx.append([])
-    assert_eq(idx, result)
-
-    result = idx.append(cudf.Index([10]))
-    expected = cudf.RangeIndex(0, 11)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        range(np.random.default_rng(seed=0).integers(0, 100)),
-        range(0, 10, -2),
-        range(0, -10, 2),
-        range(0, -10, -2),
-        range(0, 1),
-        [1, 2, 3, 1, None, None],
-        [None, None, 3.2, 1, None, None],
-        [None, "a", "3.2", "z", None, None],
-        pd.Series(["a", "b", None], dtype="category"),
-        np.array([1, 2, 3, None], dtype="datetime64[s]"),
-    ],
-)
-@pytest.mark.parametrize(
-    "func",
-    [
-        "to_series",
-        "isna",
-        "notna",
-        "append",
-    ],
-)
-def test_index_methods(index, func):
-    gidx = cudf.Index(index)
-    pidx = gidx.to_pandas()
-
-    if func == "append":
-        expected = pidx.append(other=pidx)
-        actual = gidx.append(other=gidx)
-    else:
-        expected = getattr(pidx, func)()
-        actual = getattr(gidx, func)()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "idx, values",
-    [
-        (range(100, 1000, 10), [200, 600, 800]),
-        ([None, "a", "3.2", "z", None, None], ["a", "z"]),
-        (pd.Series(["a", "b", None], dtype="category"), [10, None]),
-    ],
-)
-def test_index_isin_values(idx, values):
-    gidx = cudf.Index(idx)
-    pidx = gidx.to_pandas()
-
-    actual = gidx.isin(values)
-    expected = pidx.isin(values)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "idx, scalar",
-    [
-        (range(0, -10, -2), -4),
-        ([None, "a", "3.2", "z", None, None], "x"),
-        (pd.Series(["a", "b", None], dtype="category"), 10),
-    ],
-)
-def test_index_isin_scalar_values(idx, scalar):
-    gidx = cudf.Index(idx)
-
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            f"only list-like objects are allowed to be passed "
-            f"to isin(), you passed a {type(scalar).__name__}"
-        ),
-    ):
-        gidx.isin(scalar)
-
-
-def test_index_any():
-    gidx = cudf.Index([1, 2, 3])
-    pidx = gidx.to_pandas()
-
-    assert_eq(pidx.any(), gidx.any())
-
-
-def test_index_values():
-    gidx = cudf.Index([1, 2, 3])
-    pidx = gidx.to_pandas()
-
-    assert_eq(pidx.values, gidx.values)
-
-
-def test_index_null_values():
-    gidx = cudf.Index([1.0, None, 3, 0, None])
-    with pytest.raises(ValueError):
-        gidx.values
-
-
-def test_index_error_list_index():
-    s = cudf.Series([[1, 2], [2], [4]])
-    with pytest.raises(
-        NotImplementedError,
-        match=re.escape(
-            "Unsupported column type passed to create an "
-            "Index: <class 'cudf.core.column.lists.ListColumn'>"
-        ),
-    ):
-        cudf.Index(s)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3],
-        pytest.param(
-            [np.nan, 10, 15, 16],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/49818"
-            ),
-        ),
-        range(0, 10),
-        [np.nan, None, 10, 20],
-        ["ab", "zx", "pq"],
-        ["ab", "zx", None, "pq"],
-    ],
-)
-def test_index_hasnans(data):
-    gs = cudf.Index(data, nan_as_null=False)
-    if isinstance(gs, cudf.RangeIndex):
-        with pytest.raises(NotImplementedError):
-            gs.to_pandas(nullable=True)
-    else:
-        ps = gs.to_pandas(nullable=True)
-        # Check type to avoid mixing Python bool and NumPy bool
-        assert isinstance(gs.hasnans, bool)
-        assert gs.hasnans == ps.hasnans
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 1, 1, 3, 2, 3],
-        [np.nan, 10, 15, 16, np.nan, 10, 16],
-        range(0, 10),
-        ["ab", "zx", None, "pq", "ab", None, "zx", None],
-    ],
-)
-@pytest.mark.parametrize("keep", ["first", "last", False])
-def test_index_duplicated(data, keep):
-    gs = cudf.Index(data)
-    ps = gs.to_pandas()
-
-    expected = ps.duplicated(keep=keep)
-    actual = gs.duplicated(keep=keep)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "data,expected_dtype",
-    [
-        ([10, 11, 12], pd.Int64Dtype()),
-        ([0.1, 10.2, 12.3], pd.Float64Dtype()),
-        (["abc", None, "def"], pd.StringDtype()),
-    ],
-)
-def test_index_to_pandas_nullable(data, expected_dtype):
-    gi = cudf.Index(data)
-    pi = gi.to_pandas(nullable=True)
-    expected = pd.Index(data, dtype=expected_dtype)
-
-    assert_eq(pi, expected)
-
-
-@pytest.mark.parametrize(
-    "index_values",
-    [range(1, 10, 2), [1, 2, 3], ["a", "b", "c"], [1.5, 2.5, 3.5]],
-)
-@pytest.mark.parametrize("i_type", [int, np.int8, np.int32, np.int64])
-def test_scalar_getitem(index_values, i_type):
-    i = i_type(1)
-    index = cudf.Index(index_values)
-
-    assert not isinstance(index[i], cudf.Index)
-    assert index[i] == index_values[i]
-    assert_eq(index, index.to_pandas())
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [
-            pd.Timestamp("1970-01-01 00:00:00.000000001"),
-            pd.Timestamp("1970-01-01 00:00:00.000000002"),
-            12,
-            20,
-        ],
-        [
-            pd.Timedelta(10),
-            pd.Timedelta(20),
-            12,
-            20,
-        ],
-        [1, 2, 3, 4],
-    ],
-)
-def test_index_mixed_dtype_error(data):
-    pi = pd.Index(data, dtype="object")
-    with pytest.raises(TypeError):
-        cudf.Index(pi)
-
-
-@pytest.mark.parametrize("cls", [pd.DatetimeIndex, pd.TimedeltaIndex])
-def test_index_date_duration_freq_error(cls):
-    s = cls([1, 2, 3], freq="infer")
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.Index(s)
-
-
-@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
-def test_index_getitem_time_duration(dtype):
-    gidx = cudf.Index([1, 2, 3, 4, None], dtype=dtype)
-    pidx = gidx.to_pandas()
-    with cudf.option_context("mode.pandas_compatible", True):
-        for i in range(len(gidx)):
-            if i == 4:
-                assert gidx[i] is pidx[i]
-            else:
-                assert_eq(gidx[i], pidx[i])
-
-
-@pytest.mark.parametrize("dtype", ALL_TYPES)
-def test_index_empty_from_pandas(dtype):
-    pidx = pd.Index([], dtype=dtype)
-    gidx = cudf.from_pandas(pidx)
-
-    assert_eq(pidx, gidx)
-
-
-def test_empty_index_init():
-    pidx = pd.Index([])
-    gidx = cudf.Index([])
-
-    assert_eq(pidx, gidx)
-
-
-@pytest.mark.parametrize(
-    "data", [[1, 2, 3], ["ab", "cd", "e", None], range(0, 10)]
-)
-@pytest.mark.parametrize("data_name", [None, 1, "abc"])
-@pytest.mark.parametrize("index", [True, False])
-@pytest.mark.parametrize("name", [None, no_default, 1, "abc"])
-def test_index_to_frame(data, data_name, index, name):
-    pidx = pd.Index(data, name=data_name)
-    gidx = cudf.from_pandas(pidx)
-
-    expected = pidx.to_frame(index=index, name=name)
-    actual = gidx.to_frame(index=index, name=name)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3], range(0, 10)])
-@pytest.mark.parametrize("dtype", ["str", "int64", "float64"])
-def test_index_with_index_dtype(data, dtype):
-    pidx = pd.Index(data)
-    gidx = cudf.Index(data)
-
-    expected = pd.Index(pidx, dtype=dtype)
-    actual = cudf.Index(gidx, dtype=dtype)
-
-    assert_eq(expected, actual)
-
-
-def test_period_index_error():
-    pidx = pd.PeriodIndex(data=[pd.Period("2020-01")])
-    with pytest.raises(NotImplementedError):
-        cudf.from_pandas(pidx)
-    with pytest.raises(NotImplementedError):
-        cudf.Index(pidx)
-    with pytest.raises(NotImplementedError):
-        cudf.Series(pidx)
-    with pytest.raises(NotImplementedError):
-        cudf.Series(pd.Series(pidx))
-    with pytest.raises(NotImplementedError):
-        cudf.Series(pd.array(pidx))
-
-
-@pytest.mark.parametrize("value", [cudf.DataFrame(range(1)), 11])
-def test_index_from_dataframe_scalar_raises(value):
-    with pytest.raises(TypeError):
-        cudf.Index(value)
-
-
-@pytest.mark.parametrize("idx", [0, np.int64(0)])
-def test_index_getitem_from_int(idx):
-    result = cudf.Index([1, 2])[idx]
-    assert result == 1
-
-
-@pytest.mark.parametrize("idx", [1.5, True, "foo"])
-def test_index_getitem_from_nonint_raises(idx):
-    with pytest.raises(ValueError):
-        cudf.Index([1, 2])[idx]
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        cp.ones(5, dtype=cp.float16),
-        np.ones(5, dtype="float16"),
-        pd.Series([0.1, 1.2, 3.3], dtype="float16"),
-        pytest.param(
-            pa.array(np.ones(5, dtype="float16")),
-            marks=pytest.mark.xfail(
-                reason="https://issues.apache.org/jira/browse/ARROW-13762"
-            ),
-        ),
-    ],
-)
-def test_index_raises_float16(data):
-    with pytest.raises(TypeError):
-        cudf.Index(data)
-
-
-def test_from_pandas_rangeindex_return_rangeindex():
-    pidx = pd.RangeIndex(start=3, stop=9, step=3, name="a")
-    result = cudf.Index.from_pandas(pidx)
-    expected = cudf.RangeIndex(start=3, stop=9, step=3, name="a")
-    assert_eq(result, expected, exact=True)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        range(1),
-        np.array([1, 2], dtype="datetime64[ns]"),
-        np.array([1, 2], dtype="timedelta64[ns]"),
-    ],
-)
-def test_index_to_pandas_nullable_notimplemented(data):
-    idx = cudf.Index(data)
-    with pytest.raises(NotImplementedError):
-        idx.to_pandas(nullable=True)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-        pd.Interval(1, 2),
-    ],
-)
-def test_index_to_pandas_arrow_type_nullable_raises(scalar):
-    data = [scalar, None]
-    idx = cudf.Index(data)
-    with pytest.raises(ValueError):
-        idx.to_pandas(nullable=True, arrow_type=True)
-
-
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        1.0,
-        "a",
-        datetime.datetime(2020, 1, 1),
-        datetime.timedelta(1),
-    ],
-)
-def test_index_to_pandas_arrow_type(scalar):
-    pa_array = pa.array([scalar, None])
-    idx = cudf.Index(pa_array)
-    result = idx.to_pandas(arrow_type=True)
-    expected = pd.Index(pd.arrays.ArrowExtensionArray(pa_array))
-    pd.testing.assert_index_equal(result, expected)
-
-
-@pytest.mark.parametrize("data", [range(-3, 3), range(1, 3), range(0)])
-def test_rangeindex_all(data):
-    result = cudf.RangeIndex(data).all()
-    expected = cudf.Index(list(data)).all()
-    assert result == expected
-
-
-@pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.parametrize("data", [range(2), range(2, -1, -1)])
-def test_rangeindex_factorize(sort, data):
-    res_codes, res_uniques = cudf.RangeIndex(data).factorize(sort=sort)
-    exp_codes, exp_uniques = cudf.Index(list(data)).factorize(sort=sort)
-    assert_eq(res_codes, exp_codes)
-    assert_eq(res_uniques, exp_uniques)
-
-
-def test_rangeindex_dropna():
-    ri = cudf.RangeIndex(range(2))
-    result = ri.dropna()
-    expected = ri.copy()
-    assert_eq(result, expected)
-
-
-def test_rangeindex_unique_shallow_copy():
-    ri_pandas = pd.RangeIndex(1)
-    result = ri_pandas.unique()
-    assert result is not ri_pandas
-
-    ri_cudf = cudf.RangeIndex(1)
-    result = ri_cudf.unique()
-    assert result is not ri_cudf
-    assert_eq(result, ri_cudf)
-
-
-def test_rename_shallow_copy():
-    idx = pd.Index([1])
-    result = idx.rename("a")
-    assert idx.to_numpy(copy=False) is result.to_numpy(copy=False)
-
-    idx = cudf.Index([1])
-    result = idx.rename("a")
-    assert idx._column is result._column
-
-
-@pytest.mark.parametrize("data", [range(2), [10, 11, 12]])
-def test_index_contains_hashable(data):
-    gidx = cudf.Index(data)
-    pidx = gidx.to_pandas()
-
-    assert_exceptions_equal(
-        lambda: [] in gidx,
-        lambda: [] in pidx,
-        lfunc_args_and_kwargs=((),),
-        rfunc_args_and_kwargs=((),),
-    )
-
-
-@pytest.mark.parametrize("data", [[0, 1, 2], [1.1, 2.3, 4.5]])
-@pytest.mark.parametrize("dtype", ["int32", "float32", "float64"])
-@pytest.mark.parametrize("needle", [0, 1, 2.3])
-def test_index_contains_float_int(data, dtype, needle):
-    gidx = cudf.Index(data=data, dtype=dtype)
-    pidx = gidx.to_pandas()
-
-    actual = needle in gidx
-    expected = needle in pidx
-
-    assert_eq(actual, expected)
-
-
-def test_Index_init_with_nans():
-    with cudf.option_context("mode.pandas_compatible", True):
-        gi = cudf.Index([1, 2, 3, np.nan])
-    assert gi.dtype == np.dtype("float64")
-    pi = pd.Index([1, 2, 3, np.nan])
-    assert_eq(pi, gi)
-
-
-def test_index_datetime_repeat():
-    gidx = cudf.date_range("2021-01-01", periods=3, freq="D")
-    pidx = gidx.to_pandas()
-
-    actual = gidx.repeat(5)
-    expected = pidx.repeat(5)
-
-    assert_eq(actual, expected)
-
-    actual = gidx.to_frame().repeat(5)
-
-    assert_eq(actual.index, expected)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        lambda: cudf.Index([1]),
-        lambda: cudf.RangeIndex(1),
-        lambda: cudf.MultiIndex(levels=[[0]], codes=[[0]]),
+        lambda: cudf.Index([1]),
+        lambda: cudf.RangeIndex(1),
+        lambda: cudf.MultiIndex(levels=[[0]], codes=[[0]]),
     ],
 )
 def test_index_assignment_no_shallow_copy(index):
@@ -3189,43 +163,3 @@ def test_index_assignment_no_shallow_copy(index):
     df = cudf.DataFrame(range(1))
     df.index = index
     assert df.index is index
-
-
-def test_bool_rangeindex_raises():
-    assert_exceptions_equal(
-        lfunc=bool,
-        rfunc=bool,
-        lfunc_args_and_kwargs=[[pd.RangeIndex(0)]],
-        rfunc_args_and_kwargs=[[cudf.RangeIndex(0)]],
-    )
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-@pytest.mark.parametrize("name", [None, "test"])
-def test_categoricalindex_from_codes(ordered, name):
-    codes = [0, 1, 2, 3, 4]
-    categories = ["a", "b", "c", "d", "e"]
-    result = cudf.CategoricalIndex.from_codes(codes, categories, ordered, name)
-    expected = pd.CategoricalIndex(
-        pd.Categorical.from_codes(codes, categories, ordered=ordered),
-        name=name,
-    )
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("klass", [cudf.RangeIndex, pd.RangeIndex])
-@pytest.mark.parametrize("name_inner", [None, "a"])
-@pytest.mark.parametrize("name_outer", [None, "b"])
-def test_rangeindex_accepts_rangeindex(klass, name_inner, name_outer):
-    result = cudf.RangeIndex(klass(range(1), name=name_inner), name=name_outer)
-    expected = pd.RangeIndex(
-        pd.RangeIndex(range(1), name=name_inner), name=name_outer
-    )
-    assert_eq(result, expected)
-
-
-def test_roundtrip_index_plc_column():
-    index = cudf.Index([1])
-    expect = cudf.Index(index)
-    actual = cudf.Index.from_pylibcudf(*expect.to_pylibcudf())
-    assert_eq(expect, actual)
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 6d373f56b14..c26527f8282 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -360,16 +360,6 @@ def test_dataframe_loc(scalar, step):
     assert_eq(df.loc[np.array([0])], pdf.loc[np.array([0])])
 
 
-def test_dataframe_loc_duplicate_index_scalar():
-    pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=[1, 2, 1, 4, 2])
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    pdf_sorted = pdf.sort_values(by=list(pdf.columns), axis=0)
-    gdf_sorted = gdf.sort_values(by=list(gdf.columns), axis=0)
-
-    assert_eq(pdf_sorted, gdf_sorted)
-
-
 @pytest.mark.parametrize(
     "mask",
     [[True, False, False, False, False], [True, False, True, False, True]],
@@ -701,78 +691,6 @@ def test_dataframe_iloc_index_error():
         gdf.iloc[nelem * 2]
 
 
-@pytest.mark.parametrize("ntake", [0, 1, 10, 123, 122, 200])
-def test_dataframe_take(ntake):
-    rng = np.random.default_rng(seed=0)
-    nelem = 123
-    df = cudf.DataFrame(
-        {
-            "ii": rng.integers(0, 20, nelem),
-            "ff": rng.random(nelem),
-        }
-    )
-
-    take_indices = rng.integers(0, len(df), ntake)
-
-    actual = df.take(take_indices)
-    expected = df.to_pandas().take(take_indices)
-
-    assert actual.ii.null_count == 0
-    assert actual.ff.null_count == 0
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("ntake", [1, 2, 8, 9])
-def test_dataframe_take_with_multiindex(ntake):
-    rng = np.random.default_rng(seed=0)
-    df = cudf.DataFrame(
-        index=cudf.MultiIndex(
-            levels=[["lama", "cow", "falcon"], ["speed", "weight", "length"]],
-            codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
-        )
-    )
-
-    nelem = 9
-    df["ii"] = rng.integers(0, 20, nelem)
-    df["ff"] = rng.random(nelem)
-
-    take_indices = rng.integers(0, len(df), ntake)
-
-    actual = df.take(take_indices)
-    expected = df.to_pandas().take(take_indices)
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("ntake", [0, 1, 10, 123, 122, 200])
-def test_series_take(ntake):
-    rng = np.random.default_rng(seed=0)
-    nelem = 123
-
-    psr = pd.Series(rng.integers(0, 20, nelem))
-    gsr = cudf.Series(psr)
-
-    take_indices = rng.integers(0, len(gsr), ntake)
-
-    actual = gsr.take(take_indices)
-    expected = psr.take(take_indices)
-
-    assert_eq(actual, expected)
-
-
-def test_series_take_positional():
-    psr = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
-
-    gsr = cudf.Series.from_pandas(psr)
-
-    take_indices = [1, 2, 0, 3]
-
-    expect = psr.take(take_indices)
-    got = gsr.take(take_indices)
-
-    assert_eq(expect, got)
-
-
 @pytest.mark.parametrize("nelem", [0, 1, 5, 20, 100])
 @pytest.mark.parametrize("slice_start", [None, 0, 1, 3, 10, -10])
 @pytest.mark.parametrize("slice_end", [None, 0, 1, 30, 50, -1])
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index b612e20a17f..edb7fc3ec9b 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -1,10 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-import itertools
 import operator
-import pickle
 from contextlib import contextmanager
-from io import BytesIO
 
 import cupy as cp
 import numpy as np
@@ -13,7 +10,7 @@
 
 import cudf
 from cudf.core.column import as_column
-from cudf.testing import assert_eq, assert_neq
+from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
@@ -172,15 +169,6 @@ def test_series_multiindex(pdfIndex):
     assert_eq(ps, gs)
 
 
-def test_multiindex_getitem(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(pdf.index[0], gdf.index[0])
-
-
 @pytest.mark.parametrize(
     "key_tuple",
     [
@@ -386,150 +374,6 @@ def test_multiindex_index_and_columns():
     assert_eq(pdf, gdf)
 
 
-def test_multiindex_multiple_groupby():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "a": [4, 17, 4, 9, 5],
-            "b": [1, 4, 4, 3, 2],
-            "x": rng.normal(size=5),
-        }
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    pdg = pdf.groupby(["a", "b"], sort=True).sum()
-    gdg = gdf.groupby(["a", "b"], sort=True).sum()
-    assert_eq(pdg, gdg)
-    pdg = pdf.groupby(["a", "b"], sort=True).x.sum()
-    gdg = gdf.groupby(["a", "b"], sort=True).x.sum()
-    assert_eq(pdg, gdg)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda df: df.groupby(["x", "y"], sort=True).z.sum(),
-        lambda df: df.groupby(["x", "y"], sort=True).sum(),
-    ],
-)
-def test_multi_column(func):
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "x": rng.integers(0, 5, size=1000),
-            "y": rng.integers(0, 10, size=1000),
-            "z": rng.normal(size=1000),
-        }
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    a = func(pdf)
-    b = func(gdf)
-
-    assert_eq(a, b)
-
-
-def test_multiindex_equality():
-    # mi made from groupby
-    # mi made manually to be identical
-    # are they equal?
-    gdf = cudf.DataFrame(
-        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [0, 1, 0, 1, 0]}
-    )
-    mi1 = gdf.groupby(["x", "y"], sort=True).mean().index
-    mi2 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    assert_eq(mi1, mi2)
-
-    # mi made from two groupbys, are they equal?
-    mi2 = gdf.groupby(["x", "y"], sort=True).max().index
-    assert_eq(mi1, mi2)
-
-    # mi made manually twice are they equal?
-    mi1 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    mi2 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    assert_eq(mi1, mi2)
-
-    # mi made from different groupbys are they not equal?
-    mi1 = gdf.groupby(["x", "y"]).mean().index
-    mi2 = gdf.groupby(["x", "z"]).mean().index
-    assert_neq(mi1, mi2)
-
-    # mi made from different manuals are they not equal?
-    mi1 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    mi2 = cudf.MultiIndex(
-        levels=[[0, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    assert_neq(mi1, mi2)
-
-
-def test_multiindex_equals():
-    # mi made from groupby
-    # mi made manually to be identical
-    # are they equal?
-    gdf = cudf.DataFrame(
-        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [0, 1, 0, 1, 0]}
-    )
-    mi1 = gdf.groupby(["x", "y"], sort=True).mean().index
-    mi2 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    assert_eq(mi1.equals(mi2), True)
-
-    # mi made from two groupbys, are they equal?
-    mi2 = gdf.groupby(["x", "y"], sort=True).max().index
-    assert_eq(mi1.equals(mi2), True)
-
-    # mi made manually twice are they equal?
-    mi1 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    mi2 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    assert_eq(mi1.equals(mi2), True)
-
-    # mi made from different groupbys are they not equal?
-    mi1 = gdf.groupby(["x", "y"], sort=True).mean().index
-    mi2 = gdf.groupby(["x", "z"], sort=True).mean().index
-    assert_eq(mi1.equals(mi2), False)
-
-    # mi made from different manuals are they not equal?
-    mi1 = cudf.MultiIndex(
-        levels=[[1, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    mi2 = cudf.MultiIndex(
-        levels=[[0, 3, 4, 5], [1, 2, 5]],
-        codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        names=["x", "y"],
-    )
-    assert_eq(mi1.equals(mi2), False)
-
-
 @pytest.mark.parametrize(
     "iloc_rows",
     [
@@ -649,52 +493,6 @@ def test_multicolumn_item():
     assert_eq(gdgT[(0, 0)], pdgT[(0, 0)])
 
 
-def test_multiindex_reset_index(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(pdf.reset_index(), gdf.reset_index())
-
-
-def test_multiindex_groupby_reset_index():
-    gdf = cudf.DataFrame(
-        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [0, 1, 0, 1, 0]}
-    )
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["x", "y"], sort=True).sum()
-    pdg = pdf.groupby(["x", "y"], sort=True).sum()
-    assert_eq(pdg.reset_index(), gdg.reset_index())
-
-
-def test_multicolumn_reset_index():
-    gdf = cudf.DataFrame({"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5]})
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["x"], sort=True).agg({"y": ["count", "mean"]})
-    pdg = pdf.groupby(["x"], sort=True).agg({"y": ["count", "mean"]})
-    assert_eq(pdg.reset_index(), gdg.reset_index(), check_dtype=False)
-    gdg = gdf.groupby(["x"], sort=True).agg({"y": ["count"]})
-    pdg = pdf.groupby(["x"], sort=True).agg({"y": ["count"]})
-    assert_eq(pdg.reset_index(), gdg.reset_index(), check_dtype=False)
-    gdg = gdf.groupby(["x"], sort=True).agg({"y": "count"})
-    pdg = pdf.groupby(["x"], sort=True).agg({"y": "count"})
-    assert_eq(pdg.reset_index(), gdg.reset_index(), check_dtype=False)
-
-
-def test_multiindex_multicolumn_reset_index():
-    gdf = cudf.DataFrame(
-        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [1, 2, 3, 4, 5]}
-    )
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["x", "y"], sort=True).agg({"y": ["count", "mean"]})
-    pdg = pdf.groupby(["x", "y"], sort=True).agg({"y": ["count", "mean"]})
-    assert_eq(pdg.reset_index(), gdg.reset_index(), check_dtype=False)
-    gdg = gdf.groupby(["x", "z"], sort=True).agg({"y": ["count", "mean"]})
-    pdg = pdf.groupby(["x", "z"], sort=True).agg({"y": ["count", "mean"]})
-    assert_eq(pdg.reset_index(), gdg.reset_index(), check_dtype=False)
-
-
 def test_groupby_multiindex_columns_from_pandas(pdf, gdf, pdfIndex):
     gdfIndex = cudf.from_pandas(pdfIndex)
     pdf = pdf.copy(deep=False)
@@ -780,48 +578,6 @@ def test_multicolumn_set_item(pdf, pdfIndex):
     assert_eq(pdf, gdf)
 
 
-@pytest.mark.parametrize(
-    "key",
-    [0, 1, [], [0, 1], slice(None), slice(0, 0), slice(0, 1), slice(0, 2)],
-)
-def test_multiindex_indexing(key):
-    gi = cudf.MultiIndex.from_frame(
-        cudf.DataFrame({"a": [1, 2, 3], "b": [True, False, False]})
-    )
-    pi = gi.to_pandas()
-
-    assert_eq(gi[key], pi[key], exact=False)
-
-
-@pytest.mark.parametrize(
-    "names",
-    [
-        ["a", "b", "c"],
-        [None, None, None],
-        ["aa", "aa", "aa"],
-        ["bb", "aa", "aa"],
-        None,
-    ],
-)
-def test_pickle_roundtrip_multiindex(names):
-    df = cudf.DataFrame(
-        {
-            "one": [1, 2, 3],
-            "two": [True, False, True],
-            "three": ["ab", "cd", "ef"],
-            "four": [0.2, 0.1, -10.2],
-        }
-    )
-    expected_df = df.set_index(["one", "two", "three"])
-    expected_df.index.names = names
-    local_file = BytesIO()
-
-    pickle.dump(expected_df, local_file)
-    local_file.seek(0)
-    actual_df = pickle.load(local_file)
-    assert_eq(expected_df, actual_df)
-
-
 def test_multiindex_index_single_row():
     arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
     tuples = list(zip(*arrays, strict=True))
@@ -834,29 +590,6 @@ def test_multiindex_index_single_row():
     assert_eq(pdf.loc[("b", 3)], gdf.loc[("b", 3)])
 
 
-@pytest.mark.parametrize(
-    "levels",
-    itertools.chain.from_iterable(
-        itertools.permutations(range(3), n) for n in range(1, 4)
-    ),
-    ids=str,
-)
-def test_multiindex_sort_index_partial(levels):
-    df = pd.DataFrame(
-        {
-            "a": [3, 3, 3, 1, 1, 1, 2, 2],
-            "b": [4, 2, 7, -1, 11, -2, 7, 7],
-            "c": [4, 4, 2, 3, 3, 3, 1, 1],
-            "val": [1, 2, 3, 4, 5, 6, 7, 8],
-        }
-    ).set_index(["a", "b", "c"])
-    cdf = cudf.from_pandas(df)
-
-    expect = df.sort_index(level=levels, sort_remaining=True)
-    got = cdf.sort_index(level=levels, sort_remaining=True)
-    assert_eq(expect, got)
-
-
 @pytest.mark.parametrize("idx_get", [(0, 0), (0, 1), (1, 0), (1, 1)])
 @pytest.mark.parametrize("cols_get", [0, 1, [0, 1], [1, 0], [1], [0]])
 def test_multiindex_loc_scalar(idx_get, cols_get):

From 92626499193a8d63710d1de0da0bd02fd2bf10b6 Mon Sep 17 00:00:00 2001
From: Peixin <pxli@nyu.edu>
Date: Thu, 4 Sep 2025 12:08:38 +0800
Subject: [PATCH 255/366] Update boost version to 1.79 for JNI dockerfile
 (#19883)

Fix https://github.com/rapidsai/cudf/issues/19879

The default boost-devel (1.66.x) is incompatible with the recent arrow update.

This change, which is ported from JNI repo, has been verified internally.

Authors:
  - Peixin (https://github.com/pxLi)

Approvers:
  - Tim Liu (https://github.com/NvTimLiu)

URL: https://github.com/rapidsai/cudf/pull/19883
---
 java/ci/Dockerfile.rocky | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky
index d6eff8125fa..fbb687ce908 100644
--- a/java/ci/Dockerfile.rocky
+++ b/java/ci/Dockerfile.rocky
@@ -30,7 +30,7 @@ ARG TOOLSET_VERSION=14
 ARG CMAKE_VERSION=3.30.7
 ARG CCACHE_VERSION=4.11.2
 ### Install basic requirements
-RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-11 gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel
+RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-11 gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
 RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids
 
@@ -58,5 +58,15 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v
    cd ../.. && \
    rm -rf ccache-${CCACHE_VERSION}
 
+## install a version of boost that is needed for arrow/parquet to work
+RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \
+  tar -xzf boost_1_79_0.tar.gz && \
+  rm boost_1_79_0.tar.gz && \
+  cd boost_1_79_0 && \
+  ./bootstrap.sh --prefix=/usr/local && \
+  ./b2 install --prefix=/usr/local --with-filesystem --with-system && \
+   cd /usr/local && \
+   rm -rf boost_1_79_0
+
 # disable cuda container constraints to allow running w/ elder drivers on data-center GPUs
 ENV NVIDIA_DISABLE_REQUIRE="true"

From 9f2fe17b49a0e4e31421575f23382b8825e0ab36 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 4 Sep 2025 05:47:07 -0500
Subject: [PATCH 256/366] Skip flaky stats tests pending follow up (#19881)

Skips a few flaky tests while we investigate

xref https://github.com/rapidsai/cudf/issues/19880

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19881
---
 python/cudf/cudf/tests/private_objects/test_nrt_stats.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf/cudf/tests/private_objects/test_nrt_stats.py b/python/cudf/cudf/tests/private_objects/test_nrt_stats.py
index e951374f5a0..e65aa85911e 100644
--- a/python/cudf/cudf/tests/private_objects/test_nrt_stats.py
+++ b/python/cudf/cudf/tests/private_objects/test_nrt_stats.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import pytest
 from numba import config
 from numba.cuda.memory_management.nrt import rtsys
 
@@ -14,6 +15,7 @@
 from cudf.utils._numba import _CUDFNumbaConfig
 
 
+@pytest.mark.skip(reason="https://github.com/rapidsai/cudf/issues/19880")
 def test_string_udf_basic(monkeypatch):
     monkeypatch.setattr(config, "CUDA_NRT_STATS", True)
 
@@ -34,6 +36,7 @@ def double(st):
     assert stats.alloc - stats.free == 0
 
 
+@pytest.mark.skip(reason="https://github.com/rapidsai/cudf/issues/19880")
 def test_string_udf_conditional_allocations(monkeypatch):
     monkeypatch.setattr(config, "CUDA_NRT_STATS", True)
 
@@ -54,6 +57,7 @@ def double(st):
     assert after_stats.alloc - before_stats.free == 1
 
 
+@pytest.mark.skip(reason="https://github.com/rapidsai/cudf/issues/19880")
 def test_string_udf_free_kernel(monkeypatch):
     monkeypatch.setattr(config, "CUDA_NRT_STATS", True)
 

From 99aed37ece502444e3573b385cc539482089a5ef Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 4 Sep 2025 11:04:45 -0400
Subject: [PATCH 257/366] Fix strings::find_instance warp parallel logic
 (#19845)

Fixes the `find_instance_warp_parallel_fn` to ensure all threads in the warp participate in the cooperative group functions appropriately.
Failure was found on CUDA 13 build running on a sm_75 system only.
All `compute-sanitizer` tool types on this kernel passed.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/19845
---
 cpp/src/strings/search/find_instance.cu | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/cpp/src/strings/search/find_instance.cu b/cpp/src/strings/search/find_instance.cu
index 79dd54ad2aa..012059b1cb5 100644
--- a/cpp/src/strings/search/find_instance.cu
+++ b/cpp/src/strings/search/find_instance.cu
@@ -63,23 +63,27 @@ CUDF_KERNEL void find_instance_warp_parallel_fn(column_device_view const d_strin
   auto const max_pos   = d_str.size_bytes();
   size_type char_pos   = max_pos;
   size_type char_count = 0;
-  size_type count      = 0;
-  for (auto itr = begin + lane_idx; itr + d_target.size_bytes() <= end;
-       itr += cudf::detail::warp_size) {
-    size_type const is_char = !is_utf8_continuation_char(*itr);
-    size_type const found   = is_char && (d_target.compare(itr, d_target.size_bytes()) == 0);
+  size_type byte_count = 0;
+  size_type offset     = 0;
+  auto itr             = begin + lane_idx;
+  while (byte_count + d_target.size_bytes() <= d_str.size_bytes()) {
+    size_type const is_char =
+      (itr + d_target.size_bytes() <= end) && !is_utf8_continuation_char(*itr);
+    size_type const found = is_char && (d_target.compare(itr, d_target.size_bytes()) == 0);
     // count of threads that matched in this warp and produce an offset in each thread
     auto const found_count = cg::reduce(warp, found, cg::plus<size_type>());
     auto const found_scan  = cg::inclusive_scan(warp, found);
     // handy character counter for threads in this warp
     auto const chars_scan = cg::exclusive_scan(warp, is_char);
     // activate the thread where we hit the desired find instance
-    auto const found_pos = (found_scan + count) == (instance + 1) ? chars_scan : char_pos;
+    auto const found_pos = (found_scan + offset) == (instance + 1) ? chars_scan : char_pos;
     // copy the position value for that thread into all warp threads
     char_pos = cg::reduce(warp, found_pos, cg::less<size_type>());
     if (char_pos < max_pos) { break; }  // all threads will stop
-    count += found_count;               // otherwise continue with the next set
+    offset += found_count;              // otherwise continue with the next set
     char_count += cg::reduce(warp, is_char, cg::plus<size_type>());
+    itr += cudf::detail::warp_size;
+    byte_count += cudf::detail::warp_size;
   }
 
   // output the position if an instance match has been found

From 9d7afc74e0ecb009ab5396b62839faf5c4aa7d15 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 4 Sep 2025 11:47:19 -0700
Subject: [PATCH 258/366] Move prefetching out of experimental and simplify the
 API (#19875)

We have been using prefetching in cudf.pandas for almost a year now, and for a shorter time in cudf-polars, both without issue. We have not modified the places where we prefetch or how we use the APIs in a long time. This PR therefore moves the libcudf prefetching logic out of an experimental namespace. It also simplifies the APIs for enabling prefetching, removing per-key control and instead making it an all-or-nothing choice.

I've marked the PR as breaking, but since we are changing experimental features we do not need to provide any sort of compatibility shim.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19875
---
 cpp/include/cudf/column/column_view.hpp       |  1 -
 cpp/include/cudf/strings/detail/gather.cuh    |  5 +-
 .../cudf/strings/detail/strings_children.cuh  |  2 +-
 cpp/include/cudf/utilities/prefetch.hpp       | 95 +++++--------------
 cpp/src/column/column_view.cpp                | 10 +-
 cpp/src/join/hash_join.cu                     |  5 +-
 cpp/src/utilities/prefetch.cpp                | 84 +++++++---------
 python/cudf/cudf/pandas/__init__.py           | 47 ++++-----
 python/cudf_polars/cudf_polars/callback.py    | 11 +--
 python/pylibcudf/pylibcudf/CMakeLists.txt     |  2 +-
 python/pylibcudf/pylibcudf/__init__.pxd       |  4 +-
 python/pylibcudf/pylibcudf/__init__.py        |  4 +-
 python/pylibcudf/pylibcudf/experimental.pxd   | 10 --
 python/pylibcudf/pylibcudf/experimental.pyi   |  5 -
 python/pylibcudf/pylibcudf/experimental.pyx   | 44 ---------
 .../pylibcudf/libcudf/experimental.pxd        | 16 ----
 .../pylibcudf/pylibcudf/libcudf/prefetch.pxd  |  8 ++
 python/pylibcudf/pylibcudf/prefetch.pxd       |  9 ++
 python/pylibcudf/pylibcudf/prefetch.pyi       |  6 ++
 python/pylibcudf/pylibcudf/prefetch.pyx       | 25 +++++
 20 files changed, 141 insertions(+), 252 deletions(-)
 delete mode 100644 python/pylibcudf/pylibcudf/experimental.pxd
 delete mode 100644 python/pylibcudf/pylibcudf/experimental.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/experimental.pyx
 delete mode 100644 python/pylibcudf/pylibcudf/libcudf/experimental.pxd
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/prefetch.pxd
 create mode 100644 python/pylibcudf/pylibcudf/prefetch.pxd
 create mode 100644 python/pylibcudf/pylibcudf/prefetch.pyi
 create mode 100644 python/pylibcudf/pylibcudf/prefetch.pyx

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 11180e8c339..2c6a14739ea 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -17,7 +17,6 @@
 
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/prefetch.hpp>
 #include <cudf/utilities/span.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index 73b53dbe9be..288c0e0a45c 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -233,7 +233,7 @@ rmm::device_uvector<char> gather_chars(StringIterator strings_begin,
   if (output_count == 0) return rmm::device_uvector<char>(0, stream, mr);
 
   auto chars_data = rmm::device_uvector<char>(chars_bytes, stream, mr);
-  cudf::experimental::prefetch::detail::prefetch("gather", chars_data, stream);
+  cudf::prefetch::detail::prefetch(chars_data, stream);
   auto d_chars = chars_data.data();
 
   constexpr int warps_per_threadblock = 4;
@@ -316,8 +316,7 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
   // build chars column
   auto const offsets_view =
     cudf::detail::offsetalator_factory::make_input_iterator(out_offsets_column->view());
-  cudf::experimental::prefetch::detail::prefetch(
-    "gather", strings.chars_begin(stream), strings.chars_size(stream), stream);
+  cudf::prefetch::detail::prefetch(strings.chars_begin(stream), strings.chars_size(stream), stream);
   auto out_chars_data = gather_chars(
     d_strings->begin<string_view>(), begin, end, offsets_view, total_bytes, stream, mr);
 
diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh
index 3402abd9d36..c378ba97309 100644
--- a/cpp/include/cudf/strings/detail/strings_children.cuh
+++ b/cpp/include/cudf/strings/detail/strings_children.cuh
@@ -260,7 +260,7 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
 
   // Now build the chars column
   rmm::device_uvector<char> chars(bytes, stream, mr);
-  cudf::experimental::prefetch::detail::prefetch("gather", chars, stream);
+  cudf::prefetch::detail::prefetch(chars, stream);
   size_and_exec_fn.d_chars = chars.data();
 
   // Execute the function fn again to fill in the chars data.
diff --git a/cpp/include/cudf/utilities/prefetch.hpp b/cpp/include/cudf/utilities/prefetch.hpp
index 3384181fc37..ce7892b4460 100644
--- a/cpp/include/cudf/utilities/prefetch.hpp
+++ b/cpp/include/cudf/utilities/prefetch.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,73 +20,26 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include <map>
-#include <shared_mutex>
-#include <string>
-#include <string_view>
+#include <atomic>
 
 namespace CUDF_EXPORT cudf {
-namespace experimental::prefetch {
+namespace prefetch {
 
 namespace detail {
 
-/**
- * @brief A singleton class that manages the prefetching configuration.
- */
-class prefetch_config {
- public:
-  prefetch_config& operator=(const prefetch_config&) = delete;
-  prefetch_config(const prefetch_config&)            = delete;
-
-  /**
-   * @brief Get the singleton instance of the prefetching configuration.
-   *
-   * @return The singleton instance of the prefetching configuration.
-   */
-  static prefetch_config& instance();
-
-  /**
-   * @brief Get the value of a configuration key.
-   *
-   * If the key does not exist, a `false` value will be returned.
-   *
-   * @param key The configuration key.
-   * @return The value of the configuration key.
-   */
-  bool get(std::string_view key);
-  /**
-   * @brief Set the value of a configuration key.
-   *
-   * This is a thread-safe operation.
-   *
-   * @param key The configuration key.
-   * @param value The value to set.
-   */
-  void set(std::string_view key, bool value);
-  /**
-   * @brief Enable or disable debug mode.
-   *
-   * In debug mode, the pointers being prefetched are printed to stderr.
-   */
-  bool debug{false};
-
- private:
-  prefetch_config() = default;                //< Private constructor to enforce singleton pattern
-  std::map<std::string, bool> config_values;  //< Map of configuration keys to values
-  std::shared_mutex config_mtx;               //< Mutex for thread-safe config access
-};
+std::atomic_bool& enabled();
+
+std::atomic_bool& debug();
 
 /**
  * @brief Enable prefetching for a particular structure or algorithm.
  *
- * @param key The key to enable prefetching for.
  * @param ptr The pointer to prefetch.
  * @param size The size of the memory region to prefetch.
  * @param stream The stream to prefetch on.
  * @param device_id The device to prefetch on.
  */
-void prefetch(std::string_view key,
-              void const* ptr,
+void prefetch(void const* ptr,
               std::size_t size,
               rmm::cuda_stream_view stream,
               rmm::cuda_device_id device_id = rmm::get_current_cuda_device());
@@ -100,14 +53,12 @@ void prefetch(std::string_view key,
  * removed once an method for stream-ordered data pointer access is added to
  * those data structures.
  *
- * @param key The key to enable prefetching for.
  * @param ptr The pointer to prefetch.
  * @param size The size of the memory region to prefetch.
  * @param stream The stream to prefetch on.
  * @param device_id The device to prefetch on.
  */
 cudaError_t prefetch_noexcept(
-  std::string_view key,
   void const* ptr,
   std::size_t size,
   rmm::cuda_stream_view stream,
@@ -119,45 +70,47 @@ cudaError_t prefetch_noexcept(
  * @note At present this function does not support stream-ordered execution. Prefetching always
  * occurs on the default stream.
  *
- * @param key The key to enable prefetching for.
  * @param v The device_uvector to prefetch.
  * @param stream The stream to prefetch on.
  * @param device_id The device to prefetch on.
  */
 template <typename T>
-void prefetch(std::string_view key,
-              rmm::device_uvector<T> const& v,
+void prefetch(rmm::device_uvector<T> const& v,
               rmm::cuda_stream_view stream,
               rmm::cuda_device_id device_id = rmm::get_current_cuda_device())
 {
   if (v.is_empty()) { return; }
-  prefetch(key, v.data(), v.size(), stream, device_id);
+  prefetch(v.data(), v.size(), stream, device_id);
 }
 
 }  // namespace detail
 
 /**
- * @brief Enable prefetching for a particular structure or algorithm.
+ * @brief Enable prefetching.
  *
- * @param key The key to enable prefetching for.
+ * Prefetching of managed memory in cudf currently always synchronizes on the
+ * default stream and is not compatible with multi-stream applications.
  */
-void enable_prefetching(std::string_view key);
+void enable() noexcept;
 
 /**
- * @brief Disable prefetching for a particular structure or algorithm.
- *
- * @param key The key to disable prefetching for.
+ * @brief Disable prefetching.
  */
-void disable_prefetching(std::string_view key);
+void disable() noexcept;
 
 /**
- * @brief Enable or disable debug mode.
+ * @brief Enable debug mode for prefetching.
  *
  * In debug mode, the pointers being prefetched are printed to stderr.
+ */
+void enable_debugging() noexcept;
+
+/**
+ * @brief Enable debug mode for prefetching.
  *
- * @param enable Whether to enable or disable debug mode.
+ * In debug mode, the pointers being prefetched are printed to stderr.
  */
-void prefetch_debugging(bool enable);
+void disable_debugging() noexcept;
 
-}  // namespace experimental::prefetch
+}  // namespace prefetch
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index b54689441be..f87fcce76fd 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -22,6 +22,7 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/prefetch.hpp>
 #include <cudf/utilities/traits.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
@@ -37,21 +38,20 @@ namespace {
 template <typename ColumnView>
 void prefetch_col_data(ColumnView& col, void const* data_ptr, std::string_view key) noexcept
 {
-  if (cudf::experimental::prefetch::detail::prefetch_config::instance().get(key)) {
+  if (cudf::prefetch::detail::enabled()) {
     if (col.type().id() == cudf::type_id::EMPTY) {
       // Skip prefetching for empty columns
       return;
     } else if (cudf::is_fixed_width(col.type())) {
-      cudf::experimental::prefetch::detail::prefetch_noexcept(
-        key, data_ptr, col.size() * size_of(col.type()), cudf::get_default_stream());
+      cudf::prefetch::detail::prefetch_noexcept(
+        data_ptr, col.size() * size_of(col.type()), cudf::get_default_stream());
     } else if (col.type().id() == type_id::STRING) {
       strings_column_view const scv{col};
       if (data_ptr == nullptr) {
         // Do not call chars_size if the data_ptr is nullptr.
         return;
       }
-      cudf::experimental::prefetch::detail::prefetch_noexcept(
-        key,
+      cudf::prefetch::detail::prefetch_noexcept(
         data_ptr,
         scv.chars_size(cudf::get_default_stream()) * sizeof(char),
         cudf::get_default_stream());
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 5a211b91466..8cf99c5218d 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -26,6 +26,7 @@
 #include <cudf/table/primitive_row_operators.cuh>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/prefetch.hpp>
 #include <cudf/utilities/type_checks.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -303,8 +304,8 @@ probe_join_hash_table(
 
   auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
   auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
-  cudf::experimental::prefetch::detail::prefetch("hash_join", *left_indices, stream);
-  cudf::experimental::prefetch::detail::prefetch("hash_join", *right_indices, stream);
+  cudf::prefetch::detail::prefetch(*left_indices, stream);
+  cudf::prefetch::detail::prefetch(*right_indices, stream);
 
   auto const probe_table_num_rows = probe_table.num_rows();
   auto const out_probe_begin =
diff --git a/cpp/src/utilities/prefetch.cpp b/cpp/src/utilities/prefetch.cpp
index be28d54214e..a2aec9abda0 100644
--- a/cpp/src/utilities/prefetch.cpp
+++ b/cpp/src/utilities/prefetch.cpp
@@ -19,80 +19,66 @@
 
 #include <rmm/cuda_device.hpp>
 
+#include <atomic>
 #include <iostream>
 
-namespace cudf::experimental::prefetch {
+namespace cudf::prefetch {
 
 namespace detail {
 
-prefetch_config& prefetch_config::instance()
+std::atomic_bool& enabled()
 {
-  static prefetch_config instance;
-  return instance;
+  static std::atomic_bool value;
+  return value;
 }
 
-bool prefetch_config::get(std::string_view key)
+std::atomic_bool& debug()
 {
-  std::shared_lock<std::shared_mutex> const lock(config_mtx);
-  auto const it = config_values.find(key.data());
-  return it == config_values.end() ? false : it->second;  // default to not prefetching
+  static std::atomic_bool value;
+  return value;
 }
 
-void prefetch_config::set(std::string_view key, bool value)
-{
-  std::lock_guard<std::shared_mutex> const lock(config_mtx);
-  config_values[key.data()] = value;
-}
-
-cudaError_t prefetch_noexcept(std::string_view key,
-                              void const* ptr,
+cudaError_t prefetch_noexcept(void const* ptr,
                               std::size_t size,
                               rmm::cuda_stream_view stream,
                               rmm::cuda_device_id device_id) noexcept
 {
+  if (!detail::enabled()) { return cudaSuccess; }
+
   // Don't try to prefetch nullptrs or empty data. Sometimes libcudf has column
   // views that use nullptrs with a nonzero size as an optimization.
   if (ptr == nullptr) {
-    if (prefetch_config::instance().debug) {
-      std::cerr << "Skipping prefetch of nullptr" << std::endl;
-    }
+    if (detail::debug()) { std::cerr << "Skipping prefetch of nullptr" << std::endl; }
     return cudaSuccess;
   }
   if (size == 0) {
-    if (prefetch_config::instance().debug) {
-      std::cerr << "Skipping prefetch of size 0" << std::endl;
-    }
+    if (detail::debug()) { std::cerr << "Skipping prefetch of size 0" << std::endl; }
     return cudaSuccess;
   }
-  if (prefetch_config::instance().get(key)) {
-    if (prefetch_config::instance().debug) {
-      std::cerr << "Prefetching " << size << " bytes for key " << key << " at location " << ptr
-                << std::endl;
-    }
+  if (detail::debug()) {
+    std::cerr << "Prefetching " << size << " bytes at location " << ptr << std::endl;
+  }
 
 #if defined(CUDART_VERSION) && CUDART_VERSION >= 13000
-    cudaMemLocation location{
-      (device_id.value() == cudaCpuDeviceId) ? cudaMemLocationTypeHost : cudaMemLocationTypeDevice,
-      device_id.value()};
-    constexpr int flags = 0;
-    auto result         = cudaMemPrefetchAsync(ptr, size, location, flags, stream.value());
+  cudaMemLocation location{
+    (device_id.value() == cudaCpuDeviceId) ? cudaMemLocationTypeHost : cudaMemLocationTypeDevice,
+    device_id.value()};
+  constexpr int flags = 0;
+  auto result         = cudaMemPrefetchAsync(ptr, size, location, flags, stream.value());
 #else
-    auto result = cudaMemPrefetchAsync(ptr, size, device_id.value(), stream.value());
+  auto result = cudaMemPrefetchAsync(ptr, size, device_id.value(), stream.value());
 #endif
-    // Need to flush the CUDA error so that the context is not corrupted.
-    if (result == cudaErrorInvalidValue) { cudaGetLastError(); }
-    return result;
-  }
-  return cudaSuccess;
+  // Need to flush the CUDA error so that the context is not corrupted.
+  if (result == cudaErrorInvalidValue) { cudaGetLastError(); }
+  return result;
 }
 
-void prefetch(std::string_view key,
-              void const* ptr,
+void prefetch(void const* ptr,
               std::size_t size,
               rmm::cuda_stream_view stream,
               rmm::cuda_device_id device_id)
 {
-  auto result = prefetch_noexcept(key, ptr, size, stream, device_id);
+  auto result = prefetch_noexcept(ptr, size, stream, device_id);
   // Ignore cudaErrorInvalidValue because that will be raised if prefetching is
   // attempted on unmanaged memory.
   if ((result != cudaErrorInvalidValue) && (result != cudaSuccess)) {
@@ -103,15 +89,11 @@ void prefetch(std::string_view key,
 
 }  // namespace detail
 
-void enable_prefetching(std::string_view key)
-{
-  detail::prefetch_config::instance().set(key, true);
-}
+void enable() noexcept { detail::enabled() = true; }
 
-void disable_prefetching(std::string_view key)
-{
-  detail::prefetch_config::instance().set(key, false);
-}
+void disable() noexcept { detail::enabled() = false; }
+
+void enable_debugging() noexcept { detail::debug() = true; }
 
-void prefetch_debugging(bool enable) { detail::prefetch_config::instance().debug = enable; }
-}  // namespace cudf::experimental::prefetch
+void disable_debugging() noexcept { detail::debug() = false; }
+}  // namespace cudf::prefetch
diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py
index e71564cbcc3..70cbbb19830 100644
--- a/python/cudf/cudf/pandas/__init__.py
+++ b/python/cudf/cudf/pandas/__init__.py
@@ -28,19 +28,6 @@
 
 LOADED = False
 
-_SUPPORTED_PREFETCHES = {
-    "column_view::get_data",
-    "mutable_column_view::get_data",
-    "gather",
-    "hash_join",
-}
-
-
-def _enable_managed_prefetching(rmm_mode, managed_memory_is_supported):
-    if managed_memory_is_supported and "managed" in rmm_mode:
-        for key in _SUPPORTED_PREFETCHES:
-            pylibcudf.experimental.enable_prefetching(key)
-
 
 def install():
     """Enable Pandas Accelerator Mode."""
@@ -69,11 +56,6 @@ def install():
     if rmm_mode is None:
         rmm_mode = "managed_pool" if managed_memory_is_supported else "pool"
 
-    if "managed" in rmm_mode and not managed_memory_is_supported:
-        raise ValueError(
-            f"Managed memory is not supported on this system, so the requested {rmm_mode=} is invalid."
-        )
-
     # Check if a non-default memory resource is set
     current_mr = rmm.mr.get_current_device_resource()
     if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
@@ -97,22 +79,31 @@ def install():
         )
     elif rmm_mode == "async":
         new_mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory)
-    elif rmm_mode == "managed":
-        new_mr = rmm.mr.PrefetchResourceAdaptor(rmm.mr.ManagedMemoryResource())
-    elif rmm_mode == "managed_pool":
-        new_mr = rmm.mr.PrefetchResourceAdaptor(
-            rmm.mr.PoolMemoryResource(
-                rmm.mr.ManagedMemoryResource(),
-                initial_pool_size=free_memory,
+    elif "managed" in rmm_mode:
+        if not managed_memory_is_supported:
+            raise ValueError(
+                "Managed memory is not supported on this system, so the "
+                f"requested {rmm_mode=} is invalid."
             )
-        )
+        if rmm_mode == "managed":
+            new_mr = rmm.mr.PrefetchResourceAdaptor(
+                rmm.mr.ManagedMemoryResource()
+            )
+        elif rmm_mode == "managed_pool":
+            new_mr = rmm.mr.PrefetchResourceAdaptor(
+                rmm.mr.PoolMemoryResource(
+                    rmm.mr.ManagedMemoryResource(),
+                    initial_pool_size=free_memory,
+                )
+            )
+        else:
+            raise ValueError(f"Unsupported {rmm_mode=}")
+        pylibcudf.prefetch.enable()
     elif rmm_mode != "cuda":
         raise ValueError(f"Unsupported {rmm_mode=}")
 
     rmm.mr.set_current_device_resource(new_mr)
 
-    _enable_managed_prefetching(rmm_mode, managed_memory_is_supported)
-
 
 def pytest_load_initial_conftests(early_config, parser, args):
     # We need to install ourselves before conftest.py import (which
diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py
index 45518cd3cf2..a3418311630 100644
--- a/python/cudf_polars/cudf_polars/callback.py
+++ b/python/cudf_polars/cudf_polars/callback.py
@@ -40,14 +40,6 @@
 __all__: list[str] = ["execute_with_cudf"]
 
 
-_SUPPORTED_PREFETCHES = {
-    "column_view::get_data",
-    "mutable_column_view::get_data",
-    "gather",
-    "hash_join",
-}
-
-
 @cache
 def default_memory_resource(
     device: int,
@@ -80,8 +72,7 @@ def default_memory_resource(
             # Leaving a 20% headroom to avoid OOM errors.
             free_memory, _ = rmm.mr.available_device_memory()
             free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
-            for key in _SUPPORTED_PREFETCHES:
-                pylibcudf.experimental.enable_prefetching(key)
+            pylibcudf.prefetch.enable()
             mr = rmm.mr.PrefetchResourceAdaptor(
                 rmm.mr.PoolMemoryResource(
                     rmm.mr.ManagedMemoryResource(),
diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
index f4f707b45e4..efd989496d6 100644
--- a/python/pylibcudf/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -21,7 +21,6 @@ set(cython_sources
     concatenate.pyx
     copying.pyx
     datetime.pyx
-    experimental.pyx
     expressions.pyx
     filling.pyx
     gpumemoryview.pyx
@@ -36,6 +35,7 @@ set(cython_sources
     merge.pyx
     null_mask.pyx
     partitioning.pyx
+    prefetch.pyx
     quantiles.pyx
     reduce.pyx
     replace.pyx
diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd
index decbe9282ec..2739e14d4a3 100644
--- a/python/pylibcudf/pylibcudf/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/__init__.pxd
@@ -9,7 +9,6 @@ from . cimport (
     contiguous_split,
     copying,
     datetime,
-    experimental,
     expressions,
     filling,
     groupby,
@@ -23,6 +22,7 @@ from . cimport (
     null_mask,
     nvtext,
     partitioning,
+    prefetch,
     quantiles,
     reduce,
     replace,
@@ -60,7 +60,6 @@ __all__ = [
     "concatenate",
     "copying",
     "datetime",
-    "experimental",
     "expressions",
     "filling",
     "gpumemoryview",
@@ -72,6 +71,7 @@ __all__ = [
     "lists",
     "merge",
     "null_mask",
+    "prefetch",
     "partitioning",
     "quantiles",
     "reduce",
diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
index b819c0abae2..09dcfacca8c 100644
--- a/python/pylibcudf/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -18,7 +18,6 @@
     contiguous_split,
     copying,
     datetime,
-    experimental,
     expressions,
     filling,
     groupby,
@@ -33,6 +32,7 @@
     null_mask,
     nvtext,
     partitioning,
+    prefetch,
     quantiles,
     reduce,
     replace,
@@ -70,7 +70,6 @@
     "contiguous_split",
     "copying",
     "datetime",
-    "experimental",
     "expressions",
     "filling",
     "gpumemoryview",
@@ -86,6 +85,7 @@
     "null_mask",
     "nvtext",
     "partitioning",
+    "prefetch",
     "quantiles",
     "reduce",
     "replace",
diff --git a/python/pylibcudf/pylibcudf/experimental.pxd b/python/pylibcudf/pylibcudf/experimental.pxd
deleted file mode 100644
index 107c91c8365..00000000000
--- a/python/pylibcudf/pylibcudf/experimental.pxd
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from libcpp cimport bool
-
-
-cpdef enable_prefetching(str key)
-
-cpdef disable_prefetching(str key)
-
-cpdef prefetch_debugging(bool enable)
diff --git a/python/pylibcudf/pylibcudf/experimental.pyi b/python/pylibcudf/pylibcudf/experimental.pyi
deleted file mode 100644
index bbfb86b0ff6..00000000000
--- a/python/pylibcudf/pylibcudf/experimental.pyi
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-def enable_prefetching(key: str) -> None: ...
-def disable_prefetching(key: str) -> None: ...
-def prefetch_debugging(enable: bool) -> None: ...
diff --git a/python/pylibcudf/pylibcudf/experimental.pyx b/python/pylibcudf/pylibcudf/experimental.pyx
deleted file mode 100644
index d94d6d087ac..00000000000
--- a/python/pylibcudf/pylibcudf/experimental.pyx
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from libcpp cimport bool
-from libcpp.string cimport string
-from pylibcudf.libcudf cimport experimental as cpp_experimental
-
-
-__all__ = ["disable_prefetching", "enable_prefetching", "prefetch_debugging"]
-
-cpdef enable_prefetching(str key):
-    """Turn on prefetch instructions for the given key.
-
-    Parameters
-    ----------
-    key : str
-        The key to enable prefetching for.
-    """
-    cdef string c_key = key.encode("utf-8")
-    cpp_experimental.enable_prefetching(c_key)
-
-
-cpdef disable_prefetching(str key):
-    """Turn off prefetch instructions for the given key.
-
-    Parameters
-    ----------
-    key : str
-        The key to disable prefetching for.
-    """
-    cdef string c_key = key.encode("utf-8")
-    cpp_experimental.disable_prefetching(c_key)
-
-
-cpdef prefetch_debugging(bool enable):
-    """Enable or disable prefetch debugging.
-
-    When enabled, any prefetch instructions will be logged to the console.
-
-    Parameters
-    ----------
-    enable : bool
-        Whether to enable or disable prefetch debugging.
-    """
-    cpp_experimental.prefetch_debugging(enable)
diff --git a/python/pylibcudf/pylibcudf/libcudf/experimental.pxd b/python/pylibcudf/pylibcudf/libcudf/experimental.pxd
deleted file mode 100644
index 764815fba36..00000000000
--- a/python/pylibcudf/pylibcudf/libcudf/experimental.pxd
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-from libcpp cimport bool
-from libcpp.string cimport string
-from pylibcudf.exception_handler cimport libcudf_exception_handler
-
-
-cdef extern from "cudf/utilities/prefetch.hpp" \
-        namespace "cudf::experimental::prefetch" nogil:
-    # Not technically the right signature, but it's good enough to let Cython
-    # generate valid C++ code. It just means we'll be copying a host string
-    # extra, but that's OK. If we care we could generate string_view bindings,
-    # but there's no real rush so if we go that route we might as well
-    # contribute them upstream to Cython itself.
-    void enable_prefetching(string key)
-    void disable_prefetching(string key)
-    void prefetch_debugging(bool enable)
diff --git a/python/pylibcudf/pylibcudf/libcudf/prefetch.pxd b/python/pylibcudf/pylibcudf/libcudf/prefetch.pxd
new file mode 100644
index 00000000000..1b7dc36444a
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/libcudf/prefetch.pxd
@@ -0,0 +1,8 @@
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
+
+
+cdef extern from "cudf/utilities/prefetch.hpp" namespace "cudf::prefetch" nogil:
+    void enable() noexcept
+    void disable() noexcept
+    void enable_debugging() noexcept
+    void disable_debugging() noexcept
diff --git a/python/pylibcudf/pylibcudf/prefetch.pxd b/python/pylibcudf/pylibcudf/prefetch.pxd
new file mode 100644
index 00000000000..36632a4ed02
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/prefetch.pxd
@@ -0,0 +1,9 @@
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+cpdef enable()
+
+cpdef disable()
+
+cpdef enable_debugging()
+
+cpdef disable_debugging()
diff --git a/python/pylibcudf/pylibcudf/prefetch.pyi b/python/pylibcudf/pylibcudf/prefetch.pyi
new file mode 100644
index 00000000000..d949e001d71
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/prefetch.pyi
@@ -0,0 +1,6 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+def enable() -> None: ...
+def disable() -> None: ...
+def enable_debugging() -> None: ...
+def disable_debugging() -> None: ...
diff --git a/python/pylibcudf/pylibcudf/prefetch.pyx b/python/pylibcudf/pylibcudf/prefetch.pyx
new file mode 100644
index 00000000000..8d251d8c96d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/prefetch.pyx
@@ -0,0 +1,25 @@
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from pylibcudf.libcudf cimport prefetch as cpp_prefetch
+
+
+__all__ = ["disable", "disable_debugging", "enable", "enable_debugging"]
+
+cpdef enable():
+    """Turn on prefetching of managed memory."""
+    cpp_prefetch.enable()
+
+
+cpdef disable():
+    """Turn off prefetching of managed memory."""
+    cpp_prefetch.disable()
+
+
+cpdef enable_debugging():
+    """Enable prefetch debugging."""
+    cpp_prefetch.enable_debugging()
+
+
+cpdef disable_debugging():
+    """Disable prefetch debugging."""
+    cpp_prefetch.disable_debugging()

From cd173ff120789c2206a0409ba5375533540a9b63 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 4 Sep 2025 12:55:04 -0700
Subject: [PATCH 259/366] Fix a decompression parameter in the chunked ORC
 reader (#19882)

Chunked ORC reader passes zero for the maximum uncompressed block size to the `decompress` call; the `max_uncompressed_block_size` in the `compinfo` array elements is never set. In the non-chunked case, `parse_compressed_stripe_data` is called in `decompress_stripe_data` so this information is correct. In the chunked case, the `compinfo` array is re-created from `compinfo_map`, which does not `have max_uncompressed_block_size`.

This PR fixes this by including `max_uncompressed_block_size` in `compinfo_map` and propagating it from `load_next_stripe_data` to `decompress_stripe_data` via this map.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/19882
---
 cpp/src/io/comp/nvcomp_adapter.cpp      | 10 ++++++++++
 cpp/src/io/orc/reader_impl_chunking.cu  |  3 ++-
 cpp/src/io/orc/reader_impl_chunking.hpp |  1 +
 cpp/src/io/orc/reader_impl_decode.cu    | 10 +++++-----
 4 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index 5dd78515bcf..ae7eda0711e 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -715,6 +715,12 @@ void batched_decompress(compression_type compression,
                         size_t max_total_uncomp_size,
                         rmm::cuda_stream_view stream)
 {
+  CUDF_EXPECTS(inputs.size() > 0, "inputs must be non-empty");
+  CUDF_EXPECTS(inputs.size() == outputs.size(), "inputs and outputs must have the same size");
+  CUDF_EXPECTS(inputs.size() == results.size(), "inputs and results must have the same size");
+  CUDF_EXPECTS(max_total_uncomp_size > 0, "max_total_uncomp_size must be greater than 0");
+  CUDF_EXPECTS(max_uncomp_chunk_size > 0, "max_uncomp_chunk_size must be greater than 0");
+
   auto const num_chunks = inputs.size();
 
   // cuDF inflate inputs converted to nvcomp inputs
@@ -837,6 +843,10 @@ void batched_compress(compression_type compression,
                       device_span<codec_exec_result> results,
                       rmm::cuda_stream_view stream)
 {
+  CUDF_EXPECTS(inputs.size() > 0, "inputs must be non-empty");
+  CUDF_EXPECTS(inputs.size() == outputs.size(), "inputs and outputs must have the same size");
+  CUDF_EXPECTS(inputs.size() == results.size(), "inputs and results must have the same size");
+
   auto const num_chunks = inputs.size();
 
   auto nvcomp_args = create_batched_nvcomp_args(inputs, outputs, stream);
diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu
index 092fa33c95c..d0e3c862656 100644
--- a/cpp/src/io/orc/reader_impl_chunking.cu
+++ b/cpp/src/io/orc/reader_impl_chunking.cu
@@ -690,7 +690,8 @@ void reader_impl::load_next_stripe_data(read_mode mode)
         // Cache these parsed numbers so they can be reused in the decompression/decoding step.
         compinfo_map[info.source] = {stream_compinfo.num_compressed_blocks,
                                      stream_compinfo.num_uncompressed_blocks,
-                                     stream_compinfo.max_uncompressed_size};
+                                     stream_compinfo.max_uncompressed_size,
+                                     stream_compinfo.max_uncompressed_block_size};
         stripe_decomp_sizes[info.source.stripe_idx - stripe_start].size_bytes +=
           stream_compinfo.max_uncompressed_size;
       }
diff --git a/cpp/src/io/orc/reader_impl_chunking.hpp b/cpp/src/io/orc/reader_impl_chunking.hpp
index 3aa0344bf21..54ed6e041da 100644
--- a/cpp/src/io/orc/reader_impl_chunking.hpp
+++ b/cpp/src/io/orc/reader_impl_chunking.hpp
@@ -74,6 +74,7 @@ struct stripe_level_comp_info {
   std::size_t num_compressed_blocks{0};
   std::size_t num_uncompressed_blocks{0};
   std::size_t total_decomp_size{0};
+  std::size_t max_uncompressed_block_size{0};
 };
 
 /**
diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu
index 6909a84e903..cfd68fa9d04 100644
--- a/cpp/src/io/orc/reader_impl_decode.cu
+++ b/cpp/src/io/orc/reader_impl_decode.cu
@@ -106,12 +106,12 @@ rmm::device_buffer decompress_stripe_data(
         stripe_data[info.source.stripe_idx - loaded_stripe_range.begin].data()) +
         info.dst_pos,
       info.length);
-
     if (compinfo_ready) {
-      auto const& cached_comp_info             = compinfo_map.at(info.source);
-      stream_comp_info.num_compressed_blocks   = cached_comp_info.num_compressed_blocks;
-      stream_comp_info.num_uncompressed_blocks = cached_comp_info.num_uncompressed_blocks;
-      stream_comp_info.max_uncompressed_size   = cached_comp_info.total_decomp_size;
+      auto const& cached_comp_info                 = compinfo_map.at(info.source);
+      stream_comp_info.num_compressed_blocks       = cached_comp_info.num_compressed_blocks;
+      stream_comp_info.num_uncompressed_blocks     = cached_comp_info.num_uncompressed_blocks;
+      stream_comp_info.max_uncompressed_size       = cached_comp_info.total_decomp_size;
+      stream_comp_info.max_uncompressed_block_size = cached_comp_info.max_uncompressed_block_size;
 
       num_compressed_blocks += cached_comp_info.num_compressed_blocks;
       num_uncompressed_blocks += cached_comp_info.num_uncompressed_blocks;

From 5e59566b4a22e5d5df3e88bf8d3d2ae81254ce84 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Thu, 4 Sep 2025 16:59:26 -0700
Subject: [PATCH 260/366] Move row operators to detail and deprecate legacy
 (#19849)

Related to #12593 and #19191

This PR deprecates the legacy row operator and moves the experimental and primitive row operators into the `detail` namespace. To avoid breaking changes, the code is temporarily duplicated: primitive operators are copied to `detail/row_operator/primitive_row_operators.cuh`, and experimental ones to `detail/row_operator/row_operators.cuh`. An attempt to wrap the detail APIs instead of duplicating code was not feasible due to nested namespaces and static functions.

There are no functional modifications; only namespace reorganizations and header inclusion updates.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19849
---
 cpp/CMakeLists.txt                            |    2 +
 .../cudf/detail/join/distinct_hash_join.cuh   |    8 +-
 cpp/include/cudf/detail/join/hash_join.cuh    |    4 +-
 .../cudf/detail/row_operator/common_utils.cuh |  151 ++
 .../row_operator/primitive_row_operators.cuh  |  287 +++
 .../detail/row_operator/row_operators.cuh     | 2064 +++++++++++++++++
 .../cudf/table/experimental/row_operators.cuh |   66 +
 .../cudf/table/primitive_row_operators.cuh    |   27 +
 cpp/include/cudf/table/row_operators.cuh      |  187 +-
 cpp/src/binaryop/compiled/binary_ops.cu       |   10 +-
 .../binaryop/compiled/struct_binary_ops.cuh   |   33 +-
 cpp/src/groupby/hash/groupby.cu               |    8 +-
 cpp/src/groupby/hash/helpers.cuh              |   15 +-
 cpp/src/groupby/sort/group_nunique.cu         |    8 +-
 cpp/src/groupby/sort/group_rank_scan.cu       |    5 +-
 cpp/src/groupby/sort/sort_helper.cu           |    4 +-
 cpp/src/hash/murmurhash3_x86_32.cu            |    6 +-
 cpp/src/io/orc/dict_enc.cu                    |    4 +-
 cpp/src/io/parquet/chunk_dict.cu              |    5 +-
 cpp/src/join/distinct_hash_join.cu            |   52 +-
 cpp/src/join/hash_join.cu                     |  105 +-
 cpp/src/join/join_common_utils.cuh            |   14 +-
 cpp/src/join/mixed_join.cu                    |   26 +-
 cpp/src/join/mixed_join_common_utils.cuh      |   23 +-
 cpp/src/join/mixed_join_semi.cu               |   19 +-
 cpp/src/join/sort_merge_join.cu               |   10 +-
 cpp/src/lists/contains.cu                     |    9 +-
 cpp/src/merge/merge.cu                        |    6 +-
 cpp/src/partitioning/partitioning.cu          |    4 +-
 cpp/src/reductions/histogram.cu               |   20 +-
 .../reductions/nested_type_minmax_util.cuh    |   12 +-
 cpp/src/reductions/scan/rank_scan.cu          |    6 +-
 cpp/src/reductions/segmented/nunique.cu       |    9 +-
 .../row_operator/primitive_row_operators.cu   |   29 +
 cpp/src/row_operator/row_operators.cu         |  879 +++++++
 cpp/src/search/contains_scalar.cu             |   10 +-
 cpp/src/search/contains_table.cu              |   31 +-
 cpp/src/search/contains_table_impl.cu         |   29 +-
 cpp/src/search/contains_table_impl.cuh        |   35 +-
 cpp/src/search/contains_table_impl_nested.cu  |   27 +-
 .../search/contains_table_impl_primitive.cu   |   10 +-
 cpp/src/search/search_ordered.cu              |   10 +-
 cpp/src/sort/is_sorted.cu                     |    6 +-
 cpp/src/sort/rank.cu                          |    4 +-
 cpp/src/sort/sort_column_impl.cuh             |    2 +-
 cpp/src/sort/sort_impl.cuh                    |    4 +-
 cpp/src/stream_compaction/distinct.cu         |   22 +-
 cpp/src/stream_compaction/distinct_count.cu   |   11 +-
 cpp/src/stream_compaction/distinct_helpers.cu |   16 +-
 .../stream_compaction/distinct_helpers.hpp    |   10 +-
 cpp/src/stream_compaction/unique.cu           |    4 +-
 cpp/src/stream_compaction/unique_count.cu     |    6 +-
 .../stream_compaction/unique_count_column.cu  |    2 +-
 cpp/src/transform/one_hot_encode.cu           |   15 +-
 .../table/experimental_row_operator_tests.cu  |   71 +-
 .../table/row_operator_tests_utilities.cu     |   25 +-
 .../table/row_operator_tests_utilities.hpp    |   15 +-
 .../table/row_operator_tests_utilities2.cu    |    4 +-
 cpp/tests/utilities/column_utilities.cu       |   12 +-
 59 files changed, 3977 insertions(+), 521 deletions(-)
 create mode 100644 cpp/include/cudf/detail/row_operator/common_utils.cuh
 create mode 100644 cpp/include/cudf/detail/row_operator/primitive_row_operators.cuh
 create mode 100644 cpp/include/cudf/detail/row_operator/row_operators.cuh
 create mode 100644 cpp/src/row_operator/primitive_row_operators.cu
 create mode 100644 cpp/src/row_operator/row_operators.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5eb59323caa..28b35d4cbb6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -683,6 +683,8 @@ add_library(
   src/rolling/range_window_bounds.cpp
   src/rolling/rolling.cpp
   src/round/round.cu
+  src/row_operator/primitive_row_operators.cu
+  src/row_operator/row_operators.cu
   src/runtime/context.cpp
   src/scalar/scalar.cpp
   src/scalar/scalar_factories.cpp
diff --git a/cpp/include/cudf/detail/join/distinct_hash_join.cuh b/cpp/include/cudf/detail/join/distinct_hash_join.cuh
index 3da903dc415..a50675c156e 100644
--- a/cpp/include/cudf/detail/join/distinct_hash_join.cuh
+++ b/cpp/include/cudf/detail/join/distinct_hash_join.cuh
@@ -15,8 +15,8 @@
  */
 #pragma once
 
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
@@ -32,8 +32,8 @@
 
 namespace cudf::detail {
 
-using cudf::experimental::row::lhs_index_type;
-using cudf::experimental::row::rhs_index_type;
+using cudf::detail::row::lhs_index_type;
+using cudf::detail::row::rhs_index_type;
 
 /**
  * @brief A custom comparator used for the build table insertion
@@ -170,7 +170,7 @@ class distinct_hash_join {
   bool _has_nested_columns;  ///< True if nested columns are present in build and probe tables
   cudf::null_equality _nulls_equal;  ///< Whether to consider nulls as equal
   cudf::table_view _build;           ///< Input table to build the hash map
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table>
     _preprocessed_build;        ///< Input table preprocssed for row operators
   hash_table_type _hash_table;  ///< Hash table built on `_build`
 };
diff --git a/cpp/include/cudf/detail/join/hash_join.cuh b/cpp/include/cudf/detail/join/hash_join.cuh
index 4bff2d0b7ed..99f11126999 100644
--- a/cpp/include/cudf/detail/join/hash_join.cuh
+++ b/cpp/include/cudf/detail/join/hash_join.cuh
@@ -36,7 +36,7 @@
 #include <optional>
 
 // Forward declaration
-namespace cudf::experimental::row::equality {
+namespace cudf::detail::row::equality {
 class preprocessed_table;
 }
 
@@ -108,7 +108,7 @@ struct hash_join {
   bool const _has_nulls;  ///< true if nulls are present in either build table or any probe table
   cudf::null_equality const _nulls_equal;  ///< whether to consider nulls as equal
   cudf::table_view _build;                 ///< input table to build the hash map
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table>
     _preprocessed_build;     ///< input table preprocssed for row operators
   hash_table_t _hash_table;  ///< hash table built on `_build`
 
diff --git a/cpp/include/cudf/detail/row_operator/common_utils.cuh b/cpp/include/cudf/detail/row_operator/common_utils.cuh
new file mode 100644
index 00000000000..794b2300bf7
--- /dev/null
+++ b/cpp/include/cudf/detail/row_operator/common_utils.cuh
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/types.hpp>
+
+#include <cuda/std/type_traits>
+
+namespace cudf::detail {
+
+/**
+ * @brief Result type of comparison operations.
+ *
+ * Indicates how two elements `a` and `b` compare with one and another.
+ *
+ * Equivalence is defined as `not (a<b) and not (b<a)`. Elements that are
+ * EQUIVALENT may not necessarily be *equal*.
+ */
+enum class weak_ordering {
+  LESS,        ///< Indicates `a` is less than (ordered before) `b`
+  EQUIVALENT,  ///< Indicates `a` is ordered neither before nor after `b`
+  GREATER      ///< Indicates `a` is greater than (ordered after) `b`
+};
+
+/**
+ * @brief Compare the elements ordering with respect to `lhs`.
+ *
+ * @param lhs first element
+ * @param rhs second element
+ * @return Indicates the relationship between the elements in
+ * the `lhs` and `rhs` columns.
+ */
+template <typename Element>
+__device__ weak_ordering compare_elements(Element lhs, Element rhs)
+{
+  if (lhs < rhs) {
+    return weak_ordering::LESS;
+  } else if (rhs < lhs) {
+    return weak_ordering::GREATER;
+  }
+  return weak_ordering::EQUIVALENT;
+}
+
+/**
+ * @brief A specialization for floating-point `Element` type relational comparison
+ * to derive the order of the elements with respect to `lhs`.
+ *
+ * This specialization handles `nan` in the following order:
+ * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)`
+ * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)`
+ *
+ * @param lhs The first element
+ * @param rhs The second element
+ * @return Indicates the relationship between the elements
+ */
+template <typename Element>
+__device__ weak_ordering relational_compare(Element lhs, Element rhs)
+  requires(cuda::std::is_floating_point_v<Element>)
+{
+  if (isnan(lhs) and isnan(rhs)) {
+    return weak_ordering::EQUIVALENT;
+  } else if (isnan(rhs)) {
+    return weak_ordering::LESS;
+  } else if (isnan(lhs)) {
+    return weak_ordering::GREATER;
+  }
+
+  return detail::compare_elements(lhs, rhs);
+}
+
+/**
+ * @brief Compare the nulls according to null order.
+ *
+ * @param lhs_is_null boolean representing if lhs is null
+ * @param rhs_is_null boolean representing if rhs is null
+ * @param null_precedence null order
+ * @return Indicates the relationship between null in lhs and rhs columns.
+ */
+inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
+{
+  if (lhs_is_null and rhs_is_null) {  // null <? null
+    return weak_ordering::EQUIVALENT;
+  } else if (lhs_is_null) {  // null <? x
+    return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
+  } else if (rhs_is_null) {  // x <? null
+    return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
+  }
+  return weak_ordering::EQUIVALENT;
+}
+
+/**
+ * @brief A specialization for non-floating-point `Element` type relational
+ * comparison to derive the order of the elements with respect to `lhs`.
+ *
+ * @param lhs The first element
+ * @param rhs The second element
+ * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns
+ */
+template <typename Element>
+__device__ weak_ordering relational_compare(Element lhs, Element rhs)
+  requires(not cuda::std::is_floating_point_v<Element>)
+{
+  return detail::compare_elements(lhs, rhs);
+}
+
+/**
+ * @brief A specialization for floating-point `Element` type to check if
+ * `lhs` is equivalent to `rhs`. `nan == nan`.
+ *
+ * @param lhs first element
+ * @param rhs second element
+ * @return `true` if `lhs` == `rhs` else `false`.
+ */
+template <typename Element>
+__device__ bool equality_compare(Element lhs, Element rhs)
+  requires(cuda::std::is_floating_point_v<Element>)
+{
+  if (isnan(lhs) and isnan(rhs)) { return true; }
+  return lhs == rhs;
+}
+
+/**
+ * @brief A specialization for non-floating-point `Element` type to check if
+ * `lhs` is equivalent to `rhs`.
+ *
+ * @param lhs first element
+ * @param rhs second element
+ * @return `true` if `lhs` == `rhs` else `false`.
+ */
+template <typename Element>
+__device__ bool equality_compare(Element const lhs, Element const rhs)
+  requires(not cuda::std::is_floating_point_v<Element>)
+{
+  return lhs == rhs;
+}
+
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/row_operator/primitive_row_operators.cuh b/cpp/include/cudf/detail/row_operator/primitive_row_operators.cuh
new file mode 100644
index 00000000000..4743e9fc645
--- /dev/null
+++ b/cpp/include/cudf/detail/row_operator/primitive_row_operators.cuh
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/row_operator/common_utils.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
+#include <cudf/detail/utilities/assert.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <cuda/std/limits>
+#include <cuda/std/type_traits>
+#include <thrust/equal.h>
+
+#include <memory>
+
+namespace CUDF_EXPORT cudf {
+
+namespace detail {
+
+/**
+ * @brief Checks if a table is compatible with primitive row operations
+ *
+ * A table is compatible with primitive row operations if it contains exactly one column
+ * and that column contains only numeric data types.
+ *
+ * @param table The table to check for compatibility
+ * @return Boolean indicating if the table is compatible with primitive row operations
+ */
+bool is_primitive_row_op_compatible(cudf::table_view const& table);
+
+namespace row::primitive {
+
+/**
+ * @brief Returns `void` if it's not a primitive type
+ */
+template <typename T>
+using primitive_type_t = cuda::std::conditional_t<cudf::is_numeric<T>(), T, void>;
+
+/**
+ * @brief Custom dispatcher for primitive types
+ */
+template <cudf::type_id Id>
+struct dispatch_primitive_type {
+  using type = primitive_type_t<id_to_type<Id>>;  ///< The underlying type
+};
+
+/**
+ * @brief Performs an equality comparison between two elements in two columns.
+ */
+class element_equality_comparator {
+ public:
+  /**
+   * @brief Compares the specified elements for equality.
+   *
+   * @param lhs The first column
+   * @param rhs The second column
+   * @param lhs_element_index The index of the first element
+   * @param rhs_element_index The index of the second element
+   * @return True if lhs and rhs element are equal
+   */
+  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
+  __device__ bool operator()(column_device_view const& lhs,
+                             column_device_view const& rhs,
+                             size_type lhs_element_index,
+                             size_type rhs_element_index) const
+  {
+    return cudf::detail::equality_compare(lhs.element<Element>(lhs_element_index),
+                                          rhs.element<Element>(rhs_element_index));
+  }
+
+  // @cond
+  template <typename Element, CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>())>
+  __device__ bool operator()(column_device_view const&,
+                             column_device_view const&,
+                             size_type,
+                             size_type) const
+  {
+    CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
+  }
+  // @endcond
+};
+
+/**
+ * @brief Performs a relational comparison between two elements in two tables.
+ */
+class row_equality_comparator {
+ public:
+  /**
+   * @brief Construct a new row equality comparator object
+   *
+   * @param has_nulls Indicates if either input column contains nulls
+   * @param lhs Preprocessed table containing the first element
+   * @param rhs Preprocessed table containing the second element (may be the same as lhs)
+   * @param nulls_are_equal Indicates if two null elements are treated as equivalent
+   */
+  row_equality_comparator(cudf::nullate::DYNAMIC const& has_nulls,
+                          std::shared_ptr<cudf::detail::row::equality::preprocessed_table> lhs,
+                          std::shared_ptr<cudf::detail::row::equality::preprocessed_table> rhs,
+                          null_equality nulls_are_equal)
+    : _has_nulls{has_nulls}, _lhs{*lhs}, _rhs{*rhs}, _nulls_are_equal{nulls_are_equal}
+  {
+    CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
+  }
+
+  /**
+   * @brief Compares the specified rows for equality.
+   *
+   * @param lhs_row_index The index of the first row to compare (in the lhs table)
+   * @param rhs_row_index The index of the second row to compare (in the rhs table)
+   * @return true if both rows are equal, otherwise false
+   */
+  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const
+  {
+    auto equal_elements = [this, lhs_row_index, rhs_row_index](column_device_view const& l,
+                                                               column_device_view const& r) {
+      // Handle null comparison for each element
+      if (_has_nulls) {
+        bool const lhs_is_null{l.is_null(lhs_row_index)};
+        bool const rhs_is_null{r.is_null(rhs_row_index)};
+        if (lhs_is_null and rhs_is_null) {
+          return _nulls_are_equal == null_equality::EQUAL;
+        } else if (lhs_is_null != rhs_is_null) {
+          return false;
+        }
+      }
+
+      // Both elements are non-null, compare their values
+      element_equality_comparator comparator;
+      return cudf::type_dispatcher<dispatch_primitive_type>(
+        l.type(), comparator, l, r, lhs_row_index, rhs_row_index);
+    };
+
+    return thrust::equal(thrust::seq, _lhs.begin(), _lhs.end(), _rhs.begin(), equal_elements);
+  }
+
+  /**
+   * @brief Compares the specified rows for equality.
+   *
+   * @param lhs_index The index of the first row to compare (in the lhs table)
+   * @param rhs_index The index of the second row to compare (in the rhs table)
+   * @return Boolean indicating if both rows are equal
+   */
+  __device__ bool operator()(cudf::detail::row::lhs_index_type lhs_index,
+                             cudf::detail::row::rhs_index_type rhs_index) const
+  {
+    return (*this)(static_cast<size_type>(lhs_index), static_cast<size_type>(rhs_index));
+  }
+
+ private:
+  cudf::nullate::DYNAMIC _has_nulls;
+  table_device_view _lhs;
+  table_device_view _rhs;
+  null_equality _nulls_are_equal;
+};
+
+/**
+ * @brief Function object for computing the hash value of a row in a column.
+ *
+ * @tparam Hash Hash functor to use for hashing elements
+ */
+template <template <typename> class Hash>
+class element_hasher {
+ public:
+  /**
+   * @brief Returns the hash value of the given element in the given column.
+   *
+   * @tparam T The type of the element to hash
+   * @param seed The seed value to use for hashing
+   * @param col The column to hash
+   * @param row_index The index of the row to hash
+   * @return The hash value of the given element
+   */
+  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
+  __device__ hash_value_type operator()(hash_value_type seed,
+                                        column_device_view const& col,
+                                        size_type row_index) const
+  {
+    return Hash<T>{seed}(col.element<T>(row_index));
+  }
+
+  // @cond
+  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
+  __device__ hash_value_type operator()(hash_value_type, column_device_view const&, size_type) const
+  {
+    CUDF_UNREACHABLE("Unsupported type in hash.");
+  }
+  // @endcond
+};
+
+/**
+ * @brief Computes the hash value of a row in the given table.
+ *
+ * @tparam Hash Hash functor to use for hashing elements.
+ */
+template <template <typename> class Hash = cudf::hashing::detail::default_hash>
+class row_hasher {
+ public:
+  row_hasher() = delete;
+
+  /**
+   * @brief Constructs a row_hasher object with a seed value.
+   *
+   * @param has_nulls Indicates if the input column contains nulls
+   * @param t A table_device_view to hash
+   * @param seed A seed value to use for hashing
+   */
+  row_hasher(cudf::nullate::DYNAMIC const& has_nulls,
+             table_device_view t,
+             hash_value_type seed = DEFAULT_HASH_SEED)
+    : _has_nulls{has_nulls}, _table{t}, _seed{seed}
+  {
+  }
+
+  /**
+   * @brief Constructs a row_hasher object with a seed value.
+   *
+   * @param has_nulls Indicates if the input column contains nulls
+   * @param t Preprocessed table to hash
+   * @param seed A seed value to use for hashing
+   */
+  row_hasher(cudf::nullate::DYNAMIC const& has_nulls,
+             std::shared_ptr<cudf::detail::row::equality::preprocessed_table> t,
+             hash_value_type seed = DEFAULT_HASH_SEED)
+    : _has_nulls{has_nulls}, _table{*t}, _seed{seed}
+  {
+  }
+
+  /**
+   * @brief Computes the hash value of the row at `row_index` in the `table`
+   *
+   * @param row_index The index of the row in the `table` to hash
+   * @return The hash value of the row at `row_index` in the `table`
+   */
+  __device__ auto operator()(size_type row_index) const
+  {
+    element_hasher<Hash> hasher;
+    // avoid hash combine call if there is only one column
+    auto hash = cuda::std::numeric_limits<hash_value_type>::max();
+    if (!_has_nulls || !_table.column(0).is_null(row_index)) {
+      hash = cudf::type_dispatcher<dispatch_primitive_type>(
+        _table.column(0).type(), hasher, _seed, _table.column(0), row_index);
+    }
+
+    for (size_type i = 1; i < _table.num_columns(); ++i) {
+      if (!(_has_nulls && _table.column(i).is_null(row_index))) {
+        hash = cudf::hashing::detail::hash_combine(
+          hash,
+          cudf::type_dispatcher<dispatch_primitive_type>(
+            _table.column(i).type(), hasher, _seed, _table.column(i), row_index));
+      } else {
+        hash = cudf::hashing::detail::hash_combine(
+          hash, cuda::std::numeric_limits<hash_value_type>::max());
+      }
+    }
+    return hash;
+  }
+
+ private:
+  cudf::nullate::DYNAMIC _has_nulls;
+  table_device_view _table;
+  hash_value_type _seed;
+};
+
+}  // namespace row::primitive
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/detail/row_operator/row_operators.cuh b/cpp/include/cudf/detail/row_operator/row_operators.cuh
new file mode 100644
index 00000000000..9033bed5bec
--- /dev/null
+++ b/cpp/include/cudf/detail/row_operator/row_operators.cuh
@@ -0,0 +1,2064 @@
+/*
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/row_operator/common_utils.cuh>
+#include <cudf/detail/utilities/algorithm.cuh>
+#include <cudf/detail/utilities/assert.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
+#include <cudf/hashing/detail/hashing.hpp>
+#include <cudf/lists/detail/dremel.hpp>
+#include <cudf/lists/list_device_view.cuh>
+#include <cudf/lists/lists_column_device_view.cuh>
+#include <cudf/sorting.hpp>
+#include <cudf/structs/structs_column_device_view.cuh>
+#include <cudf/table/row_operators.cuh>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <cuda/std/functional>
+#include <cuda/std/limits>
+#include <cuda/std/optional>
+#include <cuda/std/tuple>
+#include <cuda/std/utility>
+#include <thrust/detail/use_default.h>
+#include <thrust/equal.h>
+#include <thrust/execution_policy.h>
+#include <thrust/functional.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/iterator_adaptor.h>
+#include <thrust/iterator/iterator_categories.h>
+#include <thrust/iterator/iterator_facade.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/logical.h>
+#include <thrust/swap.h>
+#include <thrust/transform_reduce.h>
+
+#include <memory>
+#include <type_traits>
+
+namespace CUDF_EXPORT cudf {
+
+namespace detail::row::primitive {
+class row_equality_comparator;  // Forward declaration
+
+template <template <typename> class Hash>
+class row_hasher;  // Forward declaration
+}  // namespace detail::row::primitive
+
+namespace detail {
+
+/**
+ * @brief A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
+ *
+ * To be used with type_dispatcher in place of the default map, when it is required that STRUCT and
+ * LIST map to void. This is useful when we want to avoid recursion in a functor. For example, in
+ * element_comparator, we have a specialization for STRUCT but the type_dispatcher in it is only
+ * used to dispatch to the same functor for non-nested types. Even when we're guaranteed to not have
+ * non-nested types at that point, the compiler doesn't know this and would try to create recursive
+ * code which is very slow.
+ *
+ * Usage:
+ * @code
+ * type_dispatcher<dispatch_nested_to_void>(data_type(), functor{});
+ * @endcode
+ */
+template <cudf::type_id t>
+struct dispatch_void_if_nested {
+  /// The type to dispatch to if the type is nested
+  using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
+};
+
+namespace row {
+
+enum class lhs_index_type : size_type {};
+enum class rhs_index_type : size_type {};
+
+/**
+ * @brief A counting iterator that uses strongly typed indices bound to tables.
+ *
+ * Performing lexicographic or equality comparisons between values in two
+ * tables requires the use of strongly typed indices. The strong index types
+ * `lhs_index_type` and `rhs_index_type` ensure that index values are bound to
+ * the correct table, regardless of the order in which these indices are
+ * provided to the call operator. This struct and its type aliases
+ * `lhs_iterator` and `rhs_iterator` provide an interface similar to a counting
+ * iterator, with strongly typed values to represent the table indices.
+ *
+ * @tparam Index The strong index type
+ */
+template <typename Index, typename Underlying = std::underlying_type_t<Index>>
+struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
+                                                              Index,
+                                                              thrust::use_default,
+                                                              thrust::random_access_traversal_tag,
+                                                              Index,
+                                                              Underlying> {
+  using super_t =
+    thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;  ///< The base class
+
+  /**
+   * @brief Constructs a strong index iterator
+   *
+   * @param n The beginning index
+   */
+  explicit constexpr strong_index_iterator(Underlying n) : begin{n} {}
+
+  friend class thrust::iterator_core_access;  ///< Allow access to the base class
+
+ private:
+  __device__ constexpr void increment() { ++begin; }
+  __device__ constexpr void decrement() { --begin; }
+
+  __device__ constexpr void advance(Underlying n) { begin += n; }
+
+  __device__ constexpr bool equal(strong_index_iterator<Index> const& other) const noexcept
+  {
+    return begin == other.begin;
+  }
+
+  __device__ constexpr Index dereference() const noexcept { return static_cast<Index>(begin); }
+
+  __device__ constexpr Underlying distance_to(
+    strong_index_iterator<Index> const& other) const noexcept
+  {
+    return other.begin - begin;
+  }
+
+  Underlying begin{};
+};
+
+/**
+ * @brief Iterator representing indices into a left-side table.
+ */
+using lhs_iterator = strong_index_iterator<lhs_index_type>;
+
+/**
+ * @brief Iterator representing indices into a right-side table.
+ */
+using rhs_iterator = strong_index_iterator<rhs_index_type>;
+
+namespace lexicographic {
+
+/**
+ * @brief Computes a weak ordering of two values with special sorting behavior.
+ *
+ * This relational comparator functor compares physical values rather than logical
+ * elements like lists, strings, or structs. It evaluates `NaN` as not less than, equal to, or
+ * greater than other values and is IEEE-754 compliant.
+ */
+struct physical_element_comparator {
+  /**
+   * @brief Operator for relational comparisons.
+   *
+   * @param lhs First element
+   * @param rhs Second element
+   * @return Relation between elements
+   */
+  template <typename Element>
+  __device__ constexpr cudf::detail::weak_ordering operator()(Element const lhs,
+                                                              Element const rhs) const noexcept
+  {
+    return cudf::detail::compare_elements(lhs, rhs);
+  }
+};
+
+/**
+ * @brief Relational comparator functor that compares physical values rather than logical
+ * elements like lists, strings, or structs. It evaluates `NaN` as equivalent to other `NaN`s and
+ * greater than all other values.
+ */
+struct sorting_physical_element_comparator {
+  /**
+   * @brief Operator for relational comparison of non-floating point values.
+   *
+   * @param lhs First element
+   * @param rhs Second element
+   * @return Relation between elements
+   */
+  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
+  __device__ constexpr cudf::detail::weak_ordering operator()(Element const lhs,
+                                                              Element const rhs) const noexcept
+  {
+    return cudf::detail::compare_elements(lhs, rhs);
+  }
+
+  /**
+   * @brief Operator for relational comparison of floating point values.
+   *
+   * @param lhs First element
+   * @param rhs Second element
+   * @return Relation between elements
+   */
+  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
+  __device__ constexpr cudf::detail::weak_ordering operator()(Element const lhs,
+                                                              Element const rhs) const noexcept
+  {
+    if (isnan(lhs)) {
+      return isnan(rhs) ? cudf::detail::weak_ordering::EQUIVALENT
+                        : cudf::detail::weak_ordering::GREATER;
+    } else if (isnan(rhs)) {
+      return cudf::detail::weak_ordering::LESS;
+    }
+
+    return cudf::detail::compare_elements(lhs, rhs);
+  }
+};
+
+using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
+
+// The has_nested_columns template parameter of the device_row_comparator is
+// necessary to help the compiler optimize our code. Without it, the list and
+// struct view specializations are present in the code paths used for primitive
+// types, and the compiler fails to inline this nearly as well resulting in a
+// significant performance drop.  As a result, there is some minor tension in
+// the current design between the presence of this parameter and the way that
+// the Dremel data is passed around, first as a
+// std::optional<device_span<dremel_device_view>> in the
+// preprocessed_table/device_row_comparator (which is always valid when
+// has_nested_columns and is otherwise invalid) that is then unpacked to a
+// cuda::std::optional<dremel_device_view> at the element_comparator level (which
+// is always valid for a list column and otherwise invalid).  We cannot use an
+// additional template parameter for the element_comparator on a per-column
+// basis because we cannot conditionally define dremel_device_view member
+// variables without jumping through extra hoops with inheritance, so the
+// cuda::std::optional<dremel_device_view> member must be an optional rather than
+// a raw dremel_device_view.
+/**
+ * @brief Computes the lexicographic comparison between 2 rows.
+ *
+ * Lexicographic ordering is determined by:
+ * - Two rows are compared element by element.
+ * - The first mismatching element defines which row is lexicographically less
+ * or greater than the other.
+ * - If the rows are compared without mismatched elements, the rows are equivalent
+ *
+ *
+ * Lexicographic ordering is exactly equivalent to doing an alphabetical sort of
+ * two words, for example, `aac` would be *less* than (or precede) `abb`. The
+ * second letter in both words is the first non-equal letter, and `a < b`, thus
+ * `aac < abb`.
+ *
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ *        `type_dispatcher` to help select an overload instance for each column in a table.
+ *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+ *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ *         This template parameter is to be used by the developer by querying
+ *         `cudf::has_nested_columns(input)`. `true` compiles operator
+ *         overloads for nested types, while `false` only compiles operator
+ *         overloads for primitive types.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ * @tparam PhysicalElementComparator A relational comparator functor that compares individual values
+ * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN`
+ * as greater than all other values.
+ */
+template <bool has_nested_columns,
+          typename Nullate,
+          typename PhysicalElementComparator = sorting_physical_element_comparator>
+class device_row_comparator {
+ public:
+  friend class self_comparator;       ///< Allow self_comparator to access private members
+  friend class two_table_comparator;  ///< Allow two_table_comparator to access private members
+
+  /**
+   * @brief Construct a function object for performing a lexicographic
+   * comparison between the rows of two tables.
+   *
+   * @param check_nulls Indicates if any input column contains nulls.
+   * @param lhs The first table
+   * @param rhs The second table (may be the same table as `lhs`)
+   * @param l_dremel_device_views lhs table dremel device view for list type
+   * @param r_dremel_device_views rhs table dremel device view for list type
+   * @param depth Optional, device array the same length as a row that contains starting depths of
+   * columns if they're nested, and 0 otherwise.
+   * @param column_order Optional, device array the same length as a row that indicates the desired
+   * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are
+   * sorted in ascending order.
+   * @param null_precedence Optional, device array the same length as a row and indicates how null
+   * values compare to all other for every column. If `nullopt`, then null precedence would be
+   * `null_order::BEFORE` for all columns.
+   * @param comparator Physical element relational comparison functor.
+   */
+  device_row_comparator(
+    Nullate check_nulls,
+    table_device_view lhs,
+    table_device_view rhs,
+    device_span<detail::dremel_device_view const> l_dremel_device_views,
+    device_span<detail::dremel_device_view const> r_dremel_device_views,
+    cuda::std::optional<device_span<int const>> depth                  = cuda::std::nullopt,
+    cuda::std::optional<device_span<order const>> column_order         = cuda::std::nullopt,
+    cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
+    PhysicalElementComparator comparator                               = {}) noexcept
+    : _lhs{lhs},
+      _rhs{rhs},
+      _l_dremel(l_dremel_device_views),
+      _r_dremel(r_dremel_device_views),
+      _check_nulls{check_nulls},
+      _depth{depth},
+      _column_order{column_order},
+      _null_precedence{null_precedence},
+      _comparator{comparator}
+  {
+  }
+
+  /**
+   * @brief Construct a function object for performing a lexicographic
+   * comparison between the rows of two tables.
+   * This is a special overload to allow device-side construction of the
+   * comparator for cases where no preprocessing is needed, i.e. tables with
+   * non-nested type columns.
+   *
+   * @param check_nulls Indicates if any input column contains nulls.
+   * @param lhs The first table
+   * @param rhs The second table (may be the same table as `lhs`)
+   * @param column_order Optional, device array the same length as a row that indicates the desired
+   * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are
+   * sorted in ascending order.
+   * @param null_precedence Optional, device array the same length as a row and indicates how null
+   * values compare to all other for every column. If `nullopt`, then null precedence would be
+   * `null_order::BEFORE` for all columns.
+   * @param comparator Physical element relational comparison functor.
+   */
+  template <bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
+  __device__ device_row_comparator(
+    Nullate check_nulls,
+    table_device_view lhs,
+    table_device_view rhs,
+    cuda::std::optional<device_span<order const>> column_order         = cuda::std::nullopt,
+    cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
+    PhysicalElementComparator comparator                               = {}) noexcept
+    : _lhs{lhs},
+      _rhs{rhs},
+      _l_dremel{},
+      _r_dremel{},
+      _check_nulls{check_nulls},
+      _depth{},
+      _column_order{column_order},
+      _null_precedence{null_precedence},
+      _comparator{comparator}
+  {
+  }
+
+  /**
+   * @brief Performs a relational comparison between two elements in two columns.
+   */
+  class element_comparator {
+   public:
+    /**
+     * @brief Construct type-dispatched function object for performing a
+     * relational comparison between two elements.
+     *
+     * @note `lhs` and `rhs` may be the same.
+     *
+     * @param check_nulls Indicates if either input column contains nulls.
+     * @param lhs The column containing the first element
+     * @param rhs The column containing the second element (may be the same as lhs)
+     * @param null_precedence Indicates how null values are ordered with other values
+     * @param depth The depth of the column if part of a nested column @see
+     * preprocessed_table::depths
+     * @param comparator Physical element relational comparison functor.
+     * @param l_dremel_device_view <>
+     * @param r_dremel_device_view <>
+     */
+    __device__ element_comparator(Nullate check_nulls,
+                                  column_device_view lhs,
+                                  column_device_view rhs,
+                                  null_order null_precedence                = null_order::BEFORE,
+                                  int depth                                 = 0,
+                                  PhysicalElementComparator comparator      = {},
+                                  optional_dremel_view l_dremel_device_view = {},
+                                  optional_dremel_view r_dremel_device_view = {})
+      : _lhs{lhs},
+        _rhs{rhs},
+        _check_nulls{check_nulls},
+        _null_precedence{null_precedence},
+        _depth{depth},
+        _l_dremel_device_view{l_dremel_device_view},
+        _r_dremel_device_view{r_dremel_device_view},
+        _comparator{comparator}
+    {
+    }
+
+    /**
+     * @brief Performs a relational comparison between the specified elements
+     *
+     * @param lhs_element_index The index of the first element
+     * @param rhs_element_index The index of the second element
+     * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along
+     * with the depth at which a null value was encountered.
+     */
+    template <typename Element,
+              CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
+    __device__ cuda::std::pair<cudf::detail::weak_ordering, int> operator()(
+      size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
+    {
+      if (_check_nulls) {
+        bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
+        bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
+
+        if (lhs_is_null or rhs_is_null) {  // at least one is null
+          return cuda::std::pair(
+            cudf::detail::null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
+        }
+      }
+
+      return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
+                                         _rhs.element<Element>(rhs_element_index)),
+                             cuda::std::numeric_limits<int>::max());
+    }
+
+    /**
+     * @brief Throws run-time error when columns types cannot be compared
+     *        or if this class is instantiated with `has_nested_columns = false` but
+     *        passed tables with nested columns
+     *
+     * @return Ordering
+     */
+    template <typename Element,
+              CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
+                             (not has_nested_columns or not cudf::is_nested<Element>()))>
+    __device__ cuda::std::pair<cudf::detail::weak_ordering, int> operator()(
+      size_type const, size_type const) const noexcept
+    {
+      CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
+    }
+
+    /**
+     * @brief Compares two struct-type columns
+     *
+     * @param lhs_element_index The index of the first element
+     * @param rhs_element_index The index of the second element
+     * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along
+     * with the depth at which a null value was encountered.
+     */
+    template <typename Element,
+              CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
+    __device__ cuda::std::pair<cudf::detail::weak_ordering, int> operator()(
+      size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
+    {
+      column_device_view lcol = _lhs;
+      column_device_view rcol = _rhs;
+      int depth               = _depth;
+      while (lcol.type().id() == type_id::STRUCT) {
+        bool const lhs_is_null{lcol.is_null(lhs_element_index)};
+        bool const rhs_is_null{rcol.is_null(rhs_element_index)};
+
+        if (lhs_is_null or rhs_is_null) {  // at least one is null
+          cudf::detail::weak_ordering state =
+            cudf::detail::null_compare(lhs_is_null, rhs_is_null, _null_precedence);
+          return cuda::std::pair(state, depth);
+        }
+
+        if (lcol.num_child_columns() == 0) {
+          return cuda::std::pair(cudf::detail::weak_ordering::EQUIVALENT,
+                                 cuda::std::numeric_limits<int>::max());
+        }
+
+        // Non-empty structs have been modified to only have 1 child when using this.
+        lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
+        rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
+        ++depth;
+      }
+
+      return cudf::type_dispatcher<dispatch_void_if_nested>(
+        lcol.type(),
+        element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator},
+        lhs_element_index,
+        rhs_element_index);
+    }
+
+    /**
+     * @brief Compares two list-type columns
+     *
+     * @param lhs_element_index The index of the first element
+     * @param rhs_element_index The index of the second element
+     * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns, along
+     * with the depth at which a null value was encountered.
+     */
+    template <typename Element,
+              CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
+    __device__ cuda::std::pair<cudf::detail::weak_ordering, int> operator()(
+      size_type lhs_element_index, size_type rhs_element_index)
+    {
+      // only order top-NULLs according to null_order
+      auto const is_l_row_null = _lhs.is_null(lhs_element_index);
+      auto const is_r_row_null = _rhs.is_null(rhs_element_index);
+      if (is_l_row_null || is_r_row_null) {
+        return cuda::std::pair(
+          cudf::detail::null_compare(is_l_row_null, is_r_row_null, _null_precedence), _depth);
+      }
+
+      // These are all the values from the Dremel encoding.
+      auto const l_max_def_level = _l_dremel_device_view->max_def_level;
+      auto const r_max_def_level = _r_dremel_device_view->max_def_level;
+      auto const l_def_levels    = _l_dremel_device_view->def_levels;
+      auto const r_def_levels    = _r_dremel_device_view->def_levels;
+      auto const l_rep_levels    = _l_dremel_device_view->rep_levels;
+      auto const r_rep_levels    = _r_dremel_device_view->rep_levels;
+
+      // Traverse the nested list hierarchy to get a column device view
+      // pointing to the underlying child data.
+      column_device_view lcol = _lhs.slice(lhs_element_index, 1);
+      column_device_view rcol = _rhs.slice(rhs_element_index, 1);
+
+      while (lcol.type().id() == type_id::LIST) {
+        lcol = detail::lists_column_device_view(lcol).get_sliced_child();
+        rcol = detail::lists_column_device_view(rcol).get_sliced_child();
+      }
+
+      // These start and end values indicate the start and end points of all
+      // the elements of the lists in the current list element
+      // (`[lhs|rhs]_element_index`) that we are comparing.
+      auto const l_offsets = _l_dremel_device_view->offsets;
+      auto const r_offsets = _r_dremel_device_view->offsets;
+      auto l_start         = l_offsets[lhs_element_index];
+      auto l_end           = l_offsets[lhs_element_index + 1];
+      auto r_start         = r_offsets[rhs_element_index];
+      auto r_end           = r_offsets[rhs_element_index + 1];
+
+      // This comparator will be used to compare leaf (non-nested) data types.
+      auto comparator =
+        element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator};
+
+      // Loop over each element in the encoding. Note that this includes nulls
+      // and empty lists, so not every index corresponds to an actual element
+      // in the child column. The element_index is used to keep track of the current
+      // child element that we're actually comparing.
+      for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
+           l_dremel_index < l_end and r_dremel_index < r_end;
+           ++l_dremel_index, ++r_dremel_index) {
+        auto const l_rep_level = l_rep_levels[l_dremel_index];
+        auto const r_rep_level = r_rep_levels[r_dremel_index];
+
+        // early exit for smaller sub-list
+        if (l_rep_level != r_rep_level) {
+          // the lower repetition level is a smaller sub-list
+          return l_rep_level < r_rep_level
+                   ? cuda::std::pair(cudf::detail::weak_ordering::LESS, _depth)
+                   : cuda::std::pair(cudf::detail::weak_ordering::GREATER, _depth);
+        }
+
+        // only compare if left and right are at same nesting level
+        auto const l_def_level = l_def_levels[l_dremel_index];
+        auto const r_def_level = r_def_levels[r_dremel_index];
+
+        // either left or right are empty or NULLs of arbitrary nesting
+        if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
+          // in the fully unraveled version of the list column, only the
+          // most nested NULLs and leafs are present
+          // In this rare condition that we get to the most nested NULL, we increment
+          // element_index because either both rows have a deeply nested NULL at the
+          // same position, and we'll "continue" in our iteration, or we will early
+          // exit if only one of the rows has a deeply nested NULL
+          if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
+              (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
+            ++element_index;
+          }
+          if (l_def_level == r_def_level) { continue; }
+          // We require [] < [NULL] < [leaf] for nested nulls.
+          // The null_precedence only affects top level nulls.
+          return l_def_level < r_def_level
+                   ? cuda::std::pair(cudf::detail::weak_ordering::LESS, _depth)
+                   : cuda::std::pair(cudf::detail::weak_ordering::GREATER, _depth);
+        }
+
+        // finally, compare leaf to leaf
+        cudf::detail::weak_ordering state{cudf::detail::weak_ordering::EQUIVALENT};
+        int last_null_depth                    = _depth;
+        cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
+          lcol.type(), comparator, element_index, element_index);
+        if (state != cudf::detail::weak_ordering::EQUIVALENT) {
+          return cuda::std::pair(state, _depth);
+        }
+        ++element_index;
+      }
+
+      // If we have reached this stage, we know that definition levels,
+      // repetition levels, and actual elements are identical in both list
+      // columns up to the `min(l_end - l_start, r_end - r_start)` element of
+      // the Dremel encoding. However, two lists can only compare equivalent if
+      // they are of the same length. Otherwise, the shorter of the two is less
+      // than the longer. This final check determines the appropriate resulting
+      // ordering by checking how many total elements each list is composed of.
+      return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
+    }
+
+   private:
+    column_device_view const _lhs;
+    column_device_view const _rhs;
+    Nullate const _check_nulls;
+    null_order const _null_precedence;
+    int const _depth;
+    optional_dremel_view _l_dremel_device_view;
+    optional_dremel_view _r_dremel_device_view;
+    PhysicalElementComparator const _comparator;
+  };
+
+ public:
+  /**
+   * @brief Checks whether the row at `lhs_index` in the `lhs` table compares
+   * lexicographically less, greater, or equivalent to the row at `rhs_index` in the `rhs` table.
+   *
+   * @param lhs_index The index of the row in the `lhs` table to examine
+   * @param rhs_index The index of the row in the `rhs` table to examine
+   * @return weak ordering comparison of the row in the `lhs` table relative to the row in the `rhs`
+   * table
+   */
+  __device__ constexpr cudf::detail::weak_ordering operator()(
+    size_type const lhs_index, size_type const rhs_index) const noexcept
+  {
+    int last_null_depth = cuda::std::numeric_limits<int>::max();
+    size_type list_column_index{-1};
+    for (size_type i = 0; i < _lhs.num_columns(); ++i) {
+      if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
+
+      int const depth = _depth.has_value() ? (*_depth)[i] : 0;
+      if (depth > last_null_depth) { continue; }
+
+      bool const ascending =
+        _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true;
+
+      null_order const null_precedence =
+        _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
+
+      // TODO: At what point do we verify that the columns of lhs and rhs are
+      // all of the same types? I assume that it's already happened before
+      // here, otherwise the current code would be failing.
+      auto const [l_dremel_i, r_dremel_i] =
+        _lhs.column(i).type().id() == type_id::LIST
+          ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
+                                  optional_dremel_view(_r_dremel[list_column_index]))
+          : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
+
+      auto element_comp = element_comparator{_check_nulls,
+                                             _lhs.column(i),
+                                             _rhs.column(i),
+                                             null_precedence,
+                                             depth,
+                                             _comparator,
+                                             l_dremel_i,
+                                             r_dremel_i};
+
+      cudf::detail::weak_ordering state;
+      cuda::std::tie(state, last_null_depth) =
+        cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index);
+
+      if (state == cudf::detail::weak_ordering::EQUIVALENT) { continue; }
+
+      return ascending ? state
+                       : (state == cudf::detail::weak_ordering::GREATER
+                            ? cudf::detail::weak_ordering::LESS
+                            : cudf::detail::weak_ordering::GREATER);
+    }
+    return cudf::detail::weak_ordering::EQUIVALENT;
+  }
+
+ private:
+  table_device_view const _lhs;
+  table_device_view const _rhs;
+  device_span<detail::dremel_device_view const> const _l_dremel;
+  device_span<detail::dremel_device_view const> const _r_dremel;
+  Nullate const _check_nulls;
+  cuda::std::optional<device_span<int const>> const _depth;
+  cuda::std::optional<device_span<order const>> const _column_order;
+  cuda::std::optional<device_span<null_order const>> const _null_precedence;
+  PhysicalElementComparator const _comparator;
+};  // class device_row_comparator
+
+/**
+ * @brief Wraps and interprets the result of templated Comparator that returns a
+ * cudf::detail::weak_ordering. Returns true if the cudf::detail::weak_ordering matches any of the
+ * templated values.
+ *
+ * Note that this should never be used with only `cudf::detail::weak_ordering::EQUIVALENT`.
+ * An equality comparator should be used instead for optimal performance.
+ *
+ * @tparam Comparator generic comparator that returns a cudf::detail::weak_ordering.
+ * @tparam values cudf::detail::weak_ordering parameter pack of orderings to interpret as true
+ */
+template <typename Comparator, cudf::detail::weak_ordering... values>
+struct weak_ordering_comparator_impl {
+  static_assert(
+    not((cudf::detail::weak_ordering::EQUIVALENT == values) && ...),
+    "cudf::detail::weak_ordering_comparator should not be used for pure equality comparisons. The "
+    "`row_equality_comparator` should be used instead");
+
+  template <typename LhsType, typename RhsType>
+  __device__ constexpr bool operator()(LhsType const lhs_index,
+                                       RhsType const rhs_index) const noexcept
+  {
+    cudf::detail::weak_ordering const result = comparator(lhs_index, rhs_index);
+    return ((result == values) || ...);
+  }
+  Comparator const comparator;
+};
+
+/**
+ * @brief Wraps and interprets the result of device_row_comparator, true if the result is
+ * cudf::detail::weak_ordering::LESS meaning one row is lexicographically *less* than another row.
+ *
+ * @tparam Comparator generic comparator that returns a cudf::detail::weak_ordering
+ */
+template <typename Comparator>
+struct less_comparator
+  : weak_ordering_comparator_impl<Comparator, cudf::detail::weak_ordering::LESS> {
+  /**
+   * @brief Constructs a less_comparator
+   *
+   * @param comparator The comparator to wrap
+   */
+  less_comparator(Comparator const& comparator)
+    : weak_ordering_comparator_impl<Comparator, cudf::detail::weak_ordering::LESS>{comparator}
+  {
+  }
+};
+
+/**
+ * @brief Wraps and interprets the result of device_row_comparator, true if the result is
+ * cudf::detail::weak_ordering::LESS or cudf::detail::weak_ordering::EQUIVALENT meaning one row is
+ * lexicographically *less* than or *equivalent* to another row.
+ *
+ * @tparam Comparator generic comparator that returns a cudf::detail::weak_ordering
+ */
+template <typename Comparator>
+struct less_equivalent_comparator
+  : weak_ordering_comparator_impl<Comparator,
+                                  cudf::detail::weak_ordering::LESS,
+                                  cudf::detail::weak_ordering::EQUIVALENT> {
+  /**
+   * @brief Constructs a less_equivalent_comparator
+   *
+   * @param comparator The comparator to wrap
+   */
+  less_equivalent_comparator(Comparator const& comparator)
+    : weak_ordering_comparator_impl<Comparator,
+                                    cudf::detail::weak_ordering::LESS,
+                                    cudf::detail::weak_ordering::EQUIVALENT>{comparator}
+  {
+  }
+};
+
+/**
+ * @brief Preprocessed table for use with lexicographical comparison
+ *
+ */
+struct preprocessed_table {
+  /// Type of table device view owner for the preprocessed table.
+  using table_device_view_owner =
+    std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
+
+  /**
+   * @brief Preprocess table for use with lexicographical comparison
+   *
+   * Sets up the table for use with lexicographical comparison. The resulting preprocessed table can
+   * be passed to the constructor of `lexicographic::self_comparator` or
+   * `lexicographic::two_table_comparator` to avoid preprocessing again.
+   *
+   * Note that the output of this factory function should not be used in `two_table_comparator` if
+   * the input table contains lists-of-structs. In such cases, please use the overload
+   * `preprocessed_table::create(table_view const&, table_view const&,...)` to preprocess both input
+   * tables at the same time.
+   *
+   * @param table The table to preprocess
+   * @param column_order Optional, host array the same length as a row that indicates the desired
+   *        ascending/descending order of each column in a row. If empty, it is assumed all columns
+   *        are sorted in ascending order.
+   * @param null_precedence Optional, an array having the same length as the number of columns in
+   *        the input tables that indicates how null values compare to all other. If it is empty,
+   *        the order `null_order::BEFORE` will be used for all columns.
+   * @param stream The stream to launch kernels and h->d copies on while preprocessing
+   * @return A shared pointer to a preprocessed table
+   */
+  static std::shared_ptr<preprocessed_table> create(table_view const& table,
+                                                    host_span<order const> column_order,
+                                                    host_span<null_order const> null_precedence,
+                                                    rmm::cuda_stream_view stream);
+
+  /**
+   * @brief Preprocess tables for use with lexicographical comparison
+   *
+   * Sets up the tables for use with lexicographical comparison. The resulting preprocessed tables
+   * can be passed to the constructor of `lexicographic::self_comparator` or
+   * `lexicographic::two_table_comparator` to avoid preprocessing again.
+   *
+   * This factory function performs some extra operations to guarantee that its output can be used
+   * in `two_table_comparator` for all cases.
+   *
+   * @param lhs The lhs table to preprocess
+   * @param rhs The rhs table to preprocess
+   * @param column_order Optional, host array the same length as a row that indicates the desired
+   *        ascending/descending order of each column in a row. If empty, it is assumed all columns
+   *        are sorted in ascending order.
+   * @param null_precedence Optional, an array having the same length as the number of columns in
+   *        the input tables that indicates how null values compare to all other. If it is empty,
+   *        the order `null_order::BEFORE` will be used for all columns.
+   * @param stream The stream to launch kernels and h->d copies on while preprocessing
+   * @return A pair of shared pointers to the preprocessed tables
+   */
+  static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>> create(
+    table_view const& lhs,
+    table_view const& rhs,
+    host_span<order const> column_order,
+    host_span<null_order const> null_precedence,
+    rmm::cuda_stream_view stream);
+
+ private:
+  friend class self_comparator;       ///< Allow self_comparator to access private members
+  friend class two_table_comparator;  ///< Allow two_table_comparator to access private members
+
+  /**
+   * @brief Create the output preprocessed table from intermediate preprocessing results
+   *
+   * @param preprocessed_input The table resulted from preprocessing
+   * @param verticalized_col_depths The depths of each column resulting from decomposing struct
+   *        columns in the original input table
+   * @param transformed_columns Store the intermediate columns generated from transforming
+   *        nested children columns into integers columns using `cudf::rank()`
+   * @param column_order Optional, host array the same length as a row that indicates the desired
+   *        ascending/descending order of each column in a row. If empty, it is assumed all columns
+   *        are sorted in ascending order.
+   * @param null_precedence Optional, an array having the same length as the number of columns in
+   *        the input tables that indicates how null values compare to all other. If it is empty,
+   *        the order `null_order::BEFORE` will be used for all columns.
+   * @param has_ranked_children Flag indicating if the input table was preprocessed to transform
+   *        any nested child column into an integer column using `cudf::rank`
+   * @param stream The stream to launch kernels and h->d copies on while preprocessing
+   * @return A shared pointer to a preprocessed table
+   */
+  static std::shared_ptr<preprocessed_table> create(
+    table_view const& preprocessed_input,
+    std::vector<int>&& verticalized_col_depths,
+    std::vector<std::unique_ptr<column>>&& transformed_columns,
+    host_span<order const> column_order,
+    host_span<null_order const> null_precedence,
+    bool has_ranked_children,
+    rmm::cuda_stream_view stream);
+
+  /**
+   * @brief Construct a preprocessed table for use with lexicographical comparison
+   *
+   * Sets up the table for use with lexicographical comparison. The resulting preprocessed table can
+   * be passed to the constructor of `lexicographic::self_comparator` to avoid preprocessing again.
+   *
+   * @param table The table to preprocess
+   * @param column_order Optional, device array the same length as a row that indicates the desired
+   *        ascending/descending order of each column in a row. If empty, it is assumed all columns
+   *        are sorted in ascending order.
+   * @param null_precedence Optional, device array the same length as a row and indicates how null
+   *        values compare to all other for every column. If it is nullptr, then null precedence
+   *        would be `null_order::BEFORE` for all columns.
+   * @param depths The depths of each column resulting from decomposing struct columns.
+   * @param dremel_data The dremel data for each list column. The length of this object is the
+   *        number of list columns in the table.
+   * @param dremel_device_views Device views into the dremel_data structs contained in the
+   *        `dremel_data` parameter. For columns that are not list columns, this uvector will should
+   *        contain an empty `dremel_device_view`. As such, this uvector has as many elements as
+   *        there are columns in the table (unlike the `dremel_data` parameter, which is only as
+   *        long as the number of list columns).
+   * @param transformed_columns Store the intermediate columns generated from transforming
+   *        nested children columns into integers columns using `cudf::rank()`
+   * @param has_ranked_children Flag indicating if the input table was preprocessed to transform
+   *        any lists-of-structs column having floating-point children using `cudf::rank`
+   */
+  preprocessed_table(table_device_view_owner&& table,
+                     rmm::device_uvector<order>&& column_order,
+                     rmm::device_uvector<null_order>&& null_precedence,
+                     rmm::device_uvector<size_type>&& depths,
+                     std::vector<detail::dremel_data>&& dremel_data,
+                     rmm::device_uvector<detail::dremel_device_view>&& dremel_device_views,
+                     std::vector<std::unique_ptr<column>>&& transformed_columns,
+                     bool has_ranked_children);
+
+  preprocessed_table(table_device_view_owner&& table,
+                     rmm::device_uvector<order>&& column_order,
+                     rmm::device_uvector<null_order>&& null_precedence,
+                     rmm::device_uvector<size_type>&& depths,
+                     std::vector<std::unique_ptr<column>>&& transformed_columns,
+                     bool has_ranked_children);
+
+  /**
+   * @brief Implicit conversion operator to a `table_device_view` of the preprocessed table.
+   *
+   * @return table_device_view
+   */
+  operator table_device_view() { return *_t; }
+
+  /**
+   * @brief Get a device array containing the desired order of each column in the preprocessed table
+   *
+   * @return Device array containing respective column orders. If no explicit column orders were
+   * specified during the creation of this object then this will be `nullopt`.
+   */
+  [[nodiscard]] cuda::std::optional<device_span<order const>> column_order() const
+  {
+    return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
+                                : cuda::std::nullopt;
+  }
+
+  /**
+   * @brief Get a device array containing the desired null precedence of each column in the
+   * preprocessed table
+   *
+   * @return Device array containing respective column null precedence. If no explicit column null
+   * precedences were specified during the creation of this object then this will be `nullopt`.
+   */
+  [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence() const
+  {
+    return _null_precedence.size()
+             ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
+             : cuda::std::nullopt;
+  }
+
+  /**
+   * @brief Get a device array containing the depth of each column in the preprocessed table
+   *
+   * @see struct_linearize()
+   *
+   * @return std::optional<device_span<int const>> Device array containing respective column depths.
+   * If there are no nested columns in the table then this will be `nullopt`.
+   */
+  [[nodiscard]] cuda::std::optional<device_span<int const>> depths() const
+  {
+    return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
+                          : cuda::std::nullopt;
+  }
+
+  [[nodiscard]] device_span<detail::dremel_device_view const> dremel_device_views() const
+  {
+    if (_dremel_device_views.has_value()) {
+      return device_span<detail::dremel_device_view const>(*_dremel_device_views);
+    } else {
+      return {};
+    }
+  }
+
+  template <typename PhysicalElementComparator>
+  void check_physical_element_comparator()
+  {
+    if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
+      CUDF_EXPECTS(!_has_ranked_children,
+                   "The input table has nested type children and they were transformed using a "
+                   "different type of physical element comparator.");
+    }
+  }
+
+ private:
+  table_device_view_owner const _t;
+  rmm::device_uvector<order> const _column_order;
+  rmm::device_uvector<null_order> const _null_precedence;
+  rmm::device_uvector<size_type> const _depths;
+
+  // Dremel encoding of list columns used for the comparison algorithm
+  cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
+  cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
+
+  // Intermediate columns generated from transforming nested children columns into
+  // integers columns using `cudf::rank()`, need to be kept alive.
+  std::vector<std::unique_ptr<column>> _transformed_columns;
+
+  // Flag to record if the input table was preprocessed to transform any nested children column(s)
+  // into integer column(s) using `cudf::rank`.
+  bool const _has_ranked_children;
+};
+
+/**
+ * @brief An owning object that can be used to lexicographically compare two rows of the same table
+ *
+ * This class can take a table_view and preprocess certain columns to allow for lexicographical
+ * comparison. The preprocessed table and temporary data required for the comparison are created and
+ * owned by this class.
+ *
+ * Alternatively, `self_comparator` can be constructed from an existing
+ * `shared_ptr<preprocessed_table>` when sharing the same table among multiple comparators.
+ *
+ * This class can then provide a functor object that can used on the device.
+ * The object of this class must outlive the usage of the device functor.
+ */
+class self_comparator {
+ public:
+  /**
+   * @brief Construct an owning object for performing a lexicographic comparison between two rows of
+   * the same table.
+   *
+   * @param t The table to compare
+   * @param column_order Optional, host array the same length as a row that indicates the desired
+   *        ascending/descending order of each column in a row. If empty, it is assumed all columns
+   *        are sorted in ascending order.
+   * @param null_precedence Optional, device array the same length as a row and indicates how null
+   *        values compare to all other for every column. If empty, then null precedence would be
+   *        `null_order::BEFORE` for all columns.
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   *        comparisons using this object.
+   */
+  self_comparator(table_view const& t,
+                  host_span<order const> column_order         = {},
+                  host_span<null_order const> null_precedence = {},
+                  rmm::cuda_stream_view stream                = cudf::get_default_stream())
+    : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
+  {
+  }
+
+  /**
+   * @brief Construct an owning object for performing a lexicographic comparison between two rows of
+   * the same preprocessed table.
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple comparators.
+   *
+   * @param t A table preprocessed for lexicographic comparison
+   */
+  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
+
+  /**
+   * @brief Return the binary operator for comparing rows in the table.
+   *
+   * Returns a binary callable, `F`, with signature `bool F(size_type, size_type)`.
+   *
+   * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`.
+   *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *       `type_dispatcher` to help select an overload instance for each column in a table.
+   *       So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *       but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+   * @tparam PhysicalElementComparator A relational comparator functor that compares individual
+   *         values rather than logical elements, defaults to `NaN` aware relational comparator
+   *         that evaluates `NaN` as greater than all other values.
+   * @throw cudf::logic_error if the input table was preprocessed to transform any nested children
+   *        columns into integer columns but `PhysicalElementComparator` is not
+   *        `sorting_physical_element_comparator`.
+   * @param nullate Indicates if any input column contains nulls.
+   * @param comparator Physical element relational comparison functor.
+   * @return A binary callable object.
+   */
+  template <bool has_nested_columns,
+            typename Nullate,
+            typename PhysicalElementComparator = sorting_physical_element_comparator>
+  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
+  {
+    d_t->check_physical_element_comparator<PhysicalElementComparator>();
+
+    return less_comparator{
+      device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
+        nullate,
+        *d_t,
+        *d_t,
+        d_t->dremel_device_views(),
+        d_t->dremel_device_views(),
+        d_t->depths(),
+        d_t->column_order(),
+        d_t->null_precedence(),
+        comparator}};
+  }
+
+  /// @copydoc less()
+  template <bool has_nested_columns,
+            typename Nullate,
+            typename PhysicalElementComparator = sorting_physical_element_comparator>
+  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
+  {
+    d_t->check_physical_element_comparator<PhysicalElementComparator>();
+
+    return less_equivalent_comparator{
+      device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
+        nullate,
+        *d_t,
+        *d_t,
+        d_t->dremel_device_views(),
+        d_t->dremel_device_views(),
+        d_t->depths(),
+        d_t->column_order(),
+        d_t->null_precedence(),
+        comparator}};
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_t;
+};
+
+// @cond
+template <typename Comparator>
+struct strong_index_comparator_adapter {
+  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
+
+  __device__ constexpr cudf::detail::weak_ordering operator()(
+    lhs_index_type const lhs_index, rhs_index_type const rhs_index) const noexcept
+  {
+    return comparator(static_cast<cudf::size_type>(lhs_index),
+                      static_cast<cudf::size_type>(rhs_index));
+  }
+
+  __device__ constexpr cudf::detail::weak_ordering operator()(
+    rhs_index_type const rhs_index, lhs_index_type const lhs_index) const noexcept
+  {
+    auto const left_right_ordering =
+      comparator(static_cast<cudf::size_type>(lhs_index), static_cast<cudf::size_type>(rhs_index));
+
+    // Invert less/greater values to reflect right to left ordering
+    if (left_right_ordering == cudf::detail::weak_ordering::LESS) {
+      return cudf::detail::weak_ordering::GREATER;
+    } else if (left_right_ordering == cudf::detail::weak_ordering::GREATER) {
+      return cudf::detail::weak_ordering::LESS;
+    }
+    return cudf::detail::weak_ordering::EQUIVALENT;
+  }
+
+  Comparator const comparator;
+};
+// @endcond
+
+/**
+ * @brief An owning object that can be used to lexicographically compare rows of two different
+ * tables
+ *
+ * This class takes two table_views and preprocesses certain columns to allow for lexicographical
+ * comparison. The preprocessed table and temporary data required for the comparison are created and
+ * owned by this class.
+ *
+ * Alternatively, `two_table_comparator` can be constructed from two existing
+ * `shared_ptr<preprocessed_table>`s when sharing the same tables among multiple comparators.
+ *
+ * This class can then provide a functor object that can used on the device.
+ * The object of this class must outlive the usage of the device functor.
+ */
+class two_table_comparator {
+ public:
+  /**
+   * @brief Construct an owning object for performing a lexicographic comparison between rows of
+   * two different tables.
+   *
+   * The left and right table are expected to have the same number of columns
+   * and data types for each column.
+   *
+   * @param left The left table to compare
+   * @param right The right table to compare
+   * @param column_order Optional, host array the same length as a row that indicates the desired
+   *        ascending/descending order of each column in a row. If empty, it is assumed all columns
+   *        are sorted in ascending order.
+   * @param null_precedence Optional, device array the same length as a row and indicates how null
+   *        values compare to all other for every column. If empty, then null precedence would be
+   *        `null_order::BEFORE` for all columns.
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   *        comparisons using this object.
+   */
+  two_table_comparator(table_view const& left,
+                       table_view const& right,
+                       host_span<order const> column_order         = {},
+                       host_span<null_order const> null_precedence = {},
+                       rmm::cuda_stream_view stream                = cudf::get_default_stream());
+
+  /**
+   * @brief Construct an owning object for performing a lexicographic comparison between two rows of
+   * the same preprocessed table.
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple comparators.
+   *
+   * The preprocessed_table(s) should have been pre-generated together using the factory function
+   * `preprocessed_table::create(table_view const&, table_view const&)`. Otherwise, the comparison
+   * results between two tables may be incorrect.
+   *
+   * @param left A table preprocessed for lexicographic comparison
+   * @param right A table preprocessed for lexicographic comparison
+   */
+  two_table_comparator(std::shared_ptr<preprocessed_table> left,
+                       std::shared_ptr<preprocessed_table> right)
+    : d_left_table{std::move(left)}, d_right_table{std::move(right)}
+  {
+  }
+
+  /**
+   * @brief Return the binary operator for comparing rows in the table.
+   *
+   * Returns a binary callable, `F`, with signatures
+   * `bool F(lhs_index_type, rhs_index_type)` and
+   * `bool F(rhs_index_type, lhs_index_type)`.
+   *
+   * `F(lhs_index_type i, rhs_index_type j)` returns true if and only if row
+   * `i` of the left table compares lexicographically less than row `j` of the
+   * right table.
+   *
+   * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and
+   * only if row `i` of the right table compares lexicographically less than row
+   * `j` of the left table.
+   *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *       `type_dispatcher` to help select an overload instance for each column in a table.
+   *       So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *       but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+   * @tparam PhysicalElementComparator A relational comparator functor that compares individual
+   *         values rather than logical elements, defaults to `NaN` aware relational comparator
+   *         that evaluates `NaN` as greater than all other values.
+   * @throw cudf::logic_error if the input tables were preprocessed to transform any nested children
+   *        columns into integer columns but `PhysicalElementComparator` is not
+   *        `sorting_physical_element_comparator`.
+   * @param nullate Indicates if any input column contains nulls.
+   * @param comparator Physical element relational comparison functor.
+   * @return A binary callable object.
+   */
+  template <bool has_nested_columns,
+            typename Nullate,
+            typename PhysicalElementComparator = sorting_physical_element_comparator>
+  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
+  {
+    d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
+    d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
+
+    return less_comparator{strong_index_comparator_adapter{
+      device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
+        nullate,
+        *d_left_table,
+        *d_right_table,
+        d_left_table->dremel_device_views(),
+        d_right_table->dremel_device_views(),
+        d_left_table->depths(),
+        d_left_table->column_order(),
+        d_left_table->null_precedence(),
+        comparator}}};
+  }
+
+  /// @copydoc less()
+  template <bool has_nested_columns,
+            typename Nullate,
+            typename PhysicalElementComparator = sorting_physical_element_comparator>
+  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
+  {
+    d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
+    d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
+
+    return less_equivalent_comparator{strong_index_comparator_adapter{
+      device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
+        nullate,
+        *d_left_table,
+        *d_right_table,
+        d_left_table->dremel_device_views(),
+        d_right_table->dremel_device_views(),
+        d_left_table->depths(),
+        d_left_table->column_order(),
+        d_left_table->null_precedence(),
+        comparator}}};
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_left_table;
+  std::shared_ptr<preprocessed_table> d_right_table;
+};
+
+}  // namespace lexicographic
+
+namespace hash {
+class row_hasher;
+}  // namespace hash
+
+namespace equality {
+
+/**
+ * @brief Equality comparator functor that compares physical values rather than logical
+ * elements like lists, strings, or structs. It evaluates `NaN` not equal to all other values for
+ * IEEE-754 compliance.
+ */
+struct physical_equality_comparator {
+  /**
+   * @brief Operator for equality comparisons.
+   *
+   * Note that `NaN != NaN`, following IEEE-754.
+   *
+   * @param lhs First element
+   * @param rhs Second element
+   * @return `true` if `lhs == rhs` else `false`
+   */
+  template <typename Element>
+  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
+  {
+    return lhs == rhs;
+  }
+};
+
+/**
+ * @brief Equality comparator functor that compares physical values rather than logical
+ * elements like lists, strings, or structs. It evaluates `NaN` as equal to other `NaN`s.
+ */
+struct nan_equal_physical_equality_comparator {
+  /**
+   * @brief Operator for equality comparison of non-floating point values.
+   *
+   * @param lhs First element
+   * @param rhs Second element
+   * @return `true` if `lhs == rhs` else `false`
+   */
+  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
+  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
+  {
+    return lhs == rhs;
+  }
+
+  /**
+   * @brief Operator for equality comparison of floating point values.
+   *
+   * Note that `NaN == NaN`.
+   *
+   * @param lhs First element
+   * @param rhs Second element
+   * @return `true` if `lhs` == `rhs` else `false`
+   */
+  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
+  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
+  {
+    return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
+  }
+};
+
+/**
+ * @brief Computes the equality comparison between 2 rows.
+ *
+ * Equality is determined by comparing rows element by element. The first mismatching element
+ * returns false, representing unequal rows. If the rows are compared without mismatched elements,
+ * the rows are equal.
+ *
+ * @note The operator overloads in sub-class `element_comparator` are templated via the
+ *        `type_dispatcher` to help select an overload instance for each column in a table.
+ *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+ *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+ *
+ * @tparam has_nested_columns compile-time optimization for primitive types.
+ *         This template parameter is to be used by the developer by querying
+ *         `cudf::has_nested_columns(input)`. `true` compiles operator
+ *         overloads for nested types, while `false` only compiles operator
+ *         overloads for primitive types.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values
+ * rather than logical elements, defaults to a comparator for which `NaN == NaN`.
+ */
+template <bool has_nested_columns,
+          typename Nullate,
+          typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
+class device_row_comparator {
+  friend class self_comparator;       ///< Allow self_comparator to access private members
+  friend class two_table_comparator;  ///< Allow two_table_comparator to access private members
+
+ public:
+  /**
+   * @brief Checks whether the row at `lhs_index` in the `lhs` table is equal to the row at
+   * `rhs_index` in the `rhs` table.
+   *
+   * @param lhs_index The index of the row in the `lhs` table to examine
+   * @param rhs_index The index of the row in the `rhs` table to examine
+   * @return `true` if row from the `lhs` table is equal to the row in the `rhs` table
+   */
+  __device__ constexpr bool operator()(size_type const lhs_index,
+                                       size_type const rhs_index) const noexcept
+  {
+    auto equal_elements = [lhs_index, rhs_index, this](column_device_view l, column_device_view r) {
+      return cudf::type_dispatcher(
+        l.type(),
+        element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
+        lhs_index,
+        rhs_index);
+    };
+
+    return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
+  }
+
+ private:
+  /**
+   * @brief Construct a function object for performing equality comparison between the rows of two
+   * tables.
+   *
+   * @param check_nulls Indicates if any input column contains nulls.
+   * @param lhs The first table
+   * @param rhs The second table (may be the same table as `lhs`)
+   * @param nulls_are_equal Indicates if two null elements are treated as equivalent
+   * @param comparator Physical element equality comparison functor.
+   */
+  device_row_comparator(Nullate check_nulls,
+                        table_device_view lhs,
+                        table_device_view rhs,
+                        null_equality nulls_are_equal         = null_equality::EQUAL,
+                        PhysicalEqualityComparator comparator = {}) noexcept
+    : lhs{lhs},
+      rhs{rhs},
+      check_nulls{check_nulls},
+      nulls_are_equal{nulls_are_equal},
+      comparator{comparator}
+  {
+  }
+
+  /**
+   * @brief Performs an equality comparison between two elements in two columns.
+   */
+  class element_comparator {
+   public:
+    /**
+     * @brief Construct type-dispatched function object for comparing equality
+     * between two elements.
+     *
+     * @note `lhs` and `rhs` may be the same.
+     *
+     * @param check_nulls Indicates if either input column contains nulls.
+     * @param lhs The column containing the first element
+     * @param rhs The column containing the second element (may be the same as lhs)
+     * @param nulls_are_equal Indicates if two null elements are treated as equivalent
+     * @param comparator Physical element equality comparison functor.
+     */
+    __device__ element_comparator(Nullate check_nulls,
+                                  column_device_view lhs,
+                                  column_device_view rhs,
+                                  null_equality nulls_are_equal         = null_equality::EQUAL,
+                                  PhysicalEqualityComparator comparator = {}) noexcept
+      : lhs{lhs},
+        rhs{rhs},
+        check_nulls{check_nulls},
+        nulls_are_equal{nulls_are_equal},
+        comparator{comparator}
+    {
+    }
+
+    /**
+     * @brief Compares the specified elements for equality.
+     *
+     * @param lhs_element_index The index of the first element
+     * @param rhs_element_index The index of the second element
+     * @return True if lhs and rhs are equal or if both lhs and rhs are null and nulls are
+     * considered equal (`nulls_are_equal` == `null_equality::EQUAL`)
+     */
+    template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
+    __device__ bool operator()(size_type const lhs_element_index,
+                               size_type const rhs_element_index) const noexcept
+    {
+      if (check_nulls) {
+        bool const lhs_is_null{lhs.is_null(lhs_element_index)};
+        bool const rhs_is_null{rhs.is_null(rhs_element_index)};
+        if (lhs_is_null and rhs_is_null) {
+          return nulls_are_equal == null_equality::EQUAL;
+        } else if (lhs_is_null != rhs_is_null) {
+          return false;
+        }
+      }
+
+      return comparator(lhs.element<Element>(lhs_element_index),
+                        rhs.element<Element>(rhs_element_index));
+    }
+
+    template <typename Element,
+              CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
+                             (not has_nested_columns or not cudf::is_nested<Element>())),
+              typename... Args>
+    __device__ bool operator()(Args...)
+    {
+      CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
+    }
+
+    template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
+    __device__ bool operator()(size_type const lhs_element_index,
+                               size_type const rhs_element_index) const noexcept
+    {
+      column_device_view lcol = lhs.slice(lhs_element_index, 1);
+      column_device_view rcol = rhs.slice(rhs_element_index, 1);
+      while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
+        if (check_nulls) {
+          auto lvalid = detail::make_validity_iterator<true>(lcol);
+          auto rvalid = detail::make_validity_iterator<true>(rcol);
+          if (nulls_are_equal == null_equality::UNEQUAL) {
+            if (thrust::any_of(
+                  thrust::seq, lvalid, lvalid + lcol.size(), cuda::std::logical_not<bool>()) or
+                thrust::any_of(
+                  thrust::seq, rvalid, rvalid + rcol.size(), cuda::std::logical_not<bool>())) {
+              return false;
+            }
+          } else {
+            if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
+              return false;
+            }
+          }
+        }
+        if (lcol.type().id() == type_id::STRUCT) {
+          if (lcol.num_child_columns() == 0) { return true; }
+          // Non-empty structs are assumed to be decomposed and contain only one child
+          lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
+          rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
+        } else if (lcol.type().id() == type_id::LIST) {
+          auto l_list_col = detail::lists_column_device_view(lcol);
+          auto r_list_col = detail::lists_column_device_view(rcol);
+
+          auto lsizes = make_list_size_iterator(l_list_col);
+          auto rsizes = make_list_size_iterator(r_list_col);
+          if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
+            return false;
+          }
+
+          lcol = l_list_col.get_sliced_child();
+          rcol = r_list_col.get_sliced_child();
+          if (lcol.size() != rcol.size()) { return false; }
+        }
+      }
+
+      auto comp = column_comparator{
+        element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
+      return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
+    }
+
+   private:
+    /**
+     * @brief Serially compare two columns for equality.
+     *
+     * When we want to get the equivalence of two columns by serially comparing all elements in
+     * one column with the corresponding elements in the other column, this saves us from type
+     * dispatching for each individual element in the range
+     */
+    struct column_comparator {
+      element_comparator const comp;
+      size_type const size;
+
+      /**
+       * @brief Serially compare two columns for equality.
+       *
+       * @return True if ALL elements compare equal, false otherwise
+       */
+      template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
+      __device__ bool operator()() const noexcept
+      {
+        return thrust::all_of(thrust::seq,
+                              thrust::make_counting_iterator(0),
+                              thrust::make_counting_iterator(0) + size,
+                              [this](auto i) { return comp.template operator()<Element>(i, i); });
+      }
+
+      template <typename Element,
+                CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
+                typename... Args>
+      __device__ bool operator()(Args...) const noexcept
+      {
+        CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
+      }
+    };
+
+    column_device_view const lhs;
+    column_device_view const rhs;
+    Nullate const check_nulls;
+    null_equality const nulls_are_equal;
+    PhysicalEqualityComparator const comparator;
+  };
+
+  table_device_view const lhs;
+  table_device_view const rhs;
+  Nullate const check_nulls;
+  null_equality const nulls_are_equal;
+  PhysicalEqualityComparator const comparator;
+};
+
+/**
+ * @brief Preprocessed table for use with row equality comparison or row hashing
+ *
+ */
+struct preprocessed_table {
+  /**
+   * @brief Factory to construct preprocessed_table for use with
+   * row equality comparison or row hashing
+   *
+   * Sets up the table for use with row equality comparison or row hashing. The resulting
+   * preprocessed table can be passed to the constructor of `equality::self_comparator` to
+   * avoid preprocessing again.
+   *
+   * @param table The table to preprocess
+   * @param stream The cuda stream to use while preprocessing.
+   * @return A preprocessed table as shared pointer
+   */
+  static std::shared_ptr<preprocessed_table> create(table_view const& table,
+                                                    rmm::cuda_stream_view stream);
+
+ private:
+  friend class self_comparator;       ///< Allow self_comparator to access private members
+  friend class two_table_comparator;  ///< Allow two_table_comparator to access private members
+  friend class hash::row_hasher;      ///< Allow row_hasher to access private members
+  /// Allow primitive equality comparator to access private members
+  friend class ::cudf::detail::row::primitive::row_equality_comparator;
+
+  template <template <typename> class Hash>
+  friend class ::cudf::detail::row::primitive::row_hasher;
+
+  using table_device_view_owner =
+    std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
+
+  preprocessed_table(table_device_view_owner&& table,
+                     std::vector<rmm::device_buffer>&& null_buffers,
+                     std::vector<std::unique_ptr<column>>&& tmp_columns)
+    : _t(std::move(table)),
+      _null_buffers(std::move(null_buffers)),
+      _tmp_columns(std::move(tmp_columns))
+  {
+  }
+
+  /**
+   * @brief Implicit conversion operator to a `table_device_view` of the preprocessed table.
+   *
+   * @return table_device_view
+   */
+  operator table_device_view() { return *_t; }
+
+  table_device_view_owner _t;
+  std::vector<rmm::device_buffer> _null_buffers;
+  std::vector<std::unique_ptr<column>> _tmp_columns;
+};
+
+/**
+ * @brief Comparator for performing equality comparisons between two rows of the same table.
+ *
+ */
+class self_comparator {
+ public:
+  /**
+   * @brief Construct an owning object for performing equality comparisons between two rows of the
+   * same table.
+   *
+   * @param t The table to compare
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   * comparisons using this object.
+   */
+  self_comparator(table_view const& t, rmm::cuda_stream_view stream)
+    : d_t(preprocessed_table::create(t, stream))
+  {
+  }
+
+  /**
+   * @brief Construct an owning object for performing equality comparisons between two rows of the
+   * same table.
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple comparators.
+   *
+   * @param t A table preprocessed for equality comparison
+   */
+  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
+
+  /**
+   * @brief Get the comparison operator to use on the device
+   *
+   * Returns a binary callable, `F`, with signature `bool F(size_type, size_type)`.
+   *
+   * `F(i,j)` returns true if and only if row `i` compares equal to row `j`.
+   *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *        `type_dispatcher` to help select an overload instance for each column in a table.
+   *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+   * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual
+   * values rather than logical elements, defaults to a comparator for which `NaN == NaN`.
+   * @param nullate Indicates if any input column contains nulls.
+   * @param nulls_are_equal Indicates if nulls are equal.
+   * @param comparator Physical element equality comparison functor.
+   * @return A binary callable object
+   */
+  template <bool has_nested_columns,
+            typename Nullate,
+            typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
+  auto equal_to(Nullate nullate                       = {},
+                null_equality nulls_are_equal         = null_equality::EQUAL,
+                PhysicalEqualityComparator comparator = {}) const noexcept
+  {
+    return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
+      nullate, *d_t, *d_t, nulls_are_equal, comparator};
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_t;
+};
+
+// @cond
+template <typename Comparator>
+struct strong_index_comparator_adapter {
+  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
+
+  __device__ constexpr bool operator()(lhs_index_type const lhs_index,
+                                       rhs_index_type const rhs_index) const noexcept
+  {
+    return comparator(static_cast<cudf::size_type>(lhs_index),
+                      static_cast<cudf::size_type>(rhs_index));
+  }
+
+  __device__ constexpr bool operator()(rhs_index_type const rhs_index,
+                                       lhs_index_type const lhs_index) const noexcept
+  {
+    return this->operator()(lhs_index, rhs_index);
+  }
+
+  Comparator const comparator;
+};
+// @endcond
+
+/**
+ * @brief An owning object that can be used to equality compare rows of two different tables.
+ *
+ * This class takes two table_views and preprocesses certain columns to allow for equality
+ * comparison. The preprocessed table and temporary data required for the comparison are created and
+ * owned by this class.
+ *
+ * Alternatively, `two_table_comparator` can be constructed from two existing
+ * `shared_ptr<preprocessed_table>`s when sharing the same tables among multiple comparators.
+ *
+ * This class can then provide a functor object that can used on the device.
+ * The object of this class must outlive the usage of the device functor.
+ */
+class two_table_comparator {
+ public:
+  /**
+   * @brief Construct an owning object for performing equality comparisons between two rows from two
+   * tables.
+   *
+   * The left and right table are expected to have the same number of columns and data types for
+   * each column.
+   *
+   * @param left The left table to compare.
+   * @param right The right table to compare.
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   * comparisons using this object.
+   */
+  two_table_comparator(table_view const& left,
+                       table_view const& right,
+                       rmm::cuda_stream_view stream);
+
+  /**
+   * @brief Construct an owning object for performing equality comparisons between two rows from two
+   * tables.
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple comparators.
+   *
+   * @param left The left table preprocessed for equality comparison.
+   * @param right The right table preprocessed for equality comparison.
+   */
+  two_table_comparator(std::shared_ptr<preprocessed_table> left,
+                       std::shared_ptr<preprocessed_table> right)
+    : d_left_table{std::move(left)}, d_right_table{std::move(right)}
+  {
+  }
+
+  /**
+   * @brief Return the binary operator for comparing rows in the table.
+   *
+   * Returns a binary callable, `F`, with signatures `bool F(lhs_index_type, rhs_index_type)` and
+   * `bool F(rhs_index_type, lhs_index_type)`.
+   *
+   * `F(lhs_index_type i, rhs_index_type j)` returns true if and only if row `i` of the left table
+   * compares equal to row `j` of the right table.
+   *
+   * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the
+   * right table compares equal to row `j` of the left table.
+   *
+   * @note The operator overloads in sub-class `element_comparator` are templated via the
+   *        `type_dispatcher` to help select an overload instance for each column in a table.
+   *        So, `cudf::is_nested<Element>` will return `true` if the table has nested-type columns,
+   *        but it will be a runtime error if template parameter `has_nested_columns != true`.
+   *
+   * @tparam has_nested_columns compile-time optimization for primitive types.
+   *         This template parameter is to be used by the developer by querying
+   *         `cudf::has_nested_columns(input)`. `true` compiles operator
+   *         overloads for nested types, while `false` only compiles operator
+   *         overloads for primitive types.
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+   * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual
+   * values rather than logical elements, defaults to a `NaN == NaN` equality comparator.
+   * @param nullate Indicates if any input column contains nulls.
+   * @param nulls_are_equal Indicates if nulls are equal.
+   * @param comparator Physical element equality comparison functor.
+   * @return A binary callable object
+   */
+  template <bool has_nested_columns,
+            typename Nullate,
+            typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
+  auto equal_to(Nullate nullate                       = {},
+                null_equality nulls_are_equal         = null_equality::EQUAL,
+                PhysicalEqualityComparator comparator = {}) const noexcept
+  {
+    return strong_index_comparator_adapter{
+      device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
+        nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_left_table;
+  std::shared_ptr<preprocessed_table> d_right_table;
+};
+
+}  // namespace equality
+
+namespace hash {
+
+/**
+ * @brief Computes the hash value of an element in the given column.
+ *
+ * @tparam hash_function Hash functor to use for hashing elements.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ */
+template <template <typename> class hash_function, typename Nullate>
+class element_hasher {
+ public:
+  /**
+   * @brief Constructs an element_hasher object.
+   *
+   * @param nulls Indicates whether to check for nulls
+   * @param seed  The seed to use for the hash function
+   * @param null_hash The hash value to use for nulls
+   */
+  __device__ element_hasher(
+    Nullate nulls,
+    uint32_t seed             = DEFAULT_HASH_SEED,
+    hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
+    : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
+  {
+  }
+
+  /**
+   * @brief Returns the hash value of the given element.
+   *
+   * @tparam T The type of the element to hash
+   * @param col The column to hash
+   * @param row_index The index of the row to hash
+   * @return The hash value of the given element
+   */
+  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
+  __device__ hash_value_type operator()(column_device_view const& col,
+                                        size_type row_index) const noexcept
+  {
+    if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
+    return hash_function<T>{_seed}(col.element<T>(row_index));
+  }
+
+  /**
+   * @brief Returns the hash value of the given element.
+   *
+   * @tparam T The type of the element to hash
+   * @param col The column to hash
+   * @param row_index The index of the row to hash
+   * @return The hash value of the given element
+   */
+  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
+  __device__ hash_value_type operator()(column_device_view const& col,
+                                        size_type row_index) const noexcept
+  {
+    CUDF_UNREACHABLE("Unsupported type in hash.");
+  }
+
+  Nullate _check_nulls;        ///< Whether to check for nulls
+  uint32_t _seed;              ///< The seed to use for hashing
+  hash_value_type _null_hash;  ///< Hash value to use for null elements
+};
+
+/**
+ * @brief Computes the hash value of a row in the given table.
+ *
+ * @tparam hash_function Hash functor to use for hashing elements.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ */
+template <template <typename> class hash_function, typename Nullate>
+class device_row_hasher {
+  friend class row_hasher;  ///< Allow row_hasher to access private members.
+
+ public:
+  /**
+   * @brief Return the hash value of a row in the given table.
+   *
+   * @param row_index The row index to compute the hash value of
+   * @return The hash value of the row
+   */
+  __device__ auto operator()(size_type row_index) const noexcept
+  {
+    auto it =
+      thrust::make_transform_iterator(_table.begin(), [row_index, this](auto const& column) {
+        return cudf::type_dispatcher<dispatch_storage_type>(
+          column.type(),
+          element_hasher_adapter<hash_function>{_check_nulls, _seed},
+          column,
+          row_index);
+      });
+
+    // Hash each element and combine all the hash values together
+    return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
+      return cudf::hashing::detail::hash_combine(hash, h);
+    });
+  }
+
+ private:
+  /**
+   * @brief Computes the hash value of an element in the given column.
+   *
+   * When the column is non-nested, this is a simple wrapper around the element_hasher.
+   * When the column is nested, this uses the element_hasher to hash the shape and values of the
+   * column.
+   */
+  template <template <typename> class hash_fn>
+  class element_hasher_adapter {
+    static constexpr hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
+    static constexpr hash_value_type NON_NULL_HASH = 0;
+
+   public:
+    __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
+      : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
+    {
+    }
+
+    template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
+    __device__ hash_value_type operator()(column_device_view const& col,
+                                          size_type row_index) const noexcept
+    {
+      return _element_hasher.template operator()<T>(col, row_index);
+    }
+
+    template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
+    __device__ hash_value_type operator()(column_device_view const& col,
+                                          size_type row_index) const noexcept
+    {
+      auto hash                   = hash_value_type{0};
+      column_device_view curr_col = col.slice(row_index, 1);
+      while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
+        if (_check_nulls) {
+          auto validity_it = detail::make_validity_iterator<true>(curr_col);
+          hash             = detail::accumulate(
+            validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
+              return cudf::hashing::detail::hash_combine(hash,
+                                                         is_valid ? NON_NULL_HASH : NULL_HASH);
+            });
+        }
+        if (curr_col.type().id() == type_id::STRUCT) {
+          if (curr_col.num_child_columns() == 0) { return hash; }
+          // Non-empty structs are assumed to be decomposed and contain only one child
+          curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
+        } else if (curr_col.type().id() == type_id::LIST) {
+          auto list_col   = detail::lists_column_device_view(curr_col);
+          auto list_sizes = make_list_size_iterator(list_col);
+          hash            = detail::accumulate(
+            list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
+              return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
+            });
+          curr_col = list_col.get_sliced_child();
+        }
+      }
+      for (int i = 0; i < curr_col.size(); ++i) {
+        hash = cudf::hashing::detail::hash_combine(
+          hash,
+          type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
+      }
+      return hash;
+    }
+
+    element_hasher<hash_fn, Nullate> const _element_hasher;
+    Nullate const _check_nulls;
+  };
+
+  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
+                                     table_device_view t,
+                                     uint32_t seed = DEFAULT_HASH_SEED) noexcept
+    : _check_nulls{check_nulls}, _table{t}, _seed(seed)
+  {
+  }
+
+  Nullate const _check_nulls;
+  table_device_view const _table;
+  uint32_t const _seed;
+};
+
+// Inject row::equality::preprocessed_table into the row::hash namespace
+// As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
+// type and are interchangeable.
+using preprocessed_table = row::equality::preprocessed_table;
+
+/**
+ * @brief Computes the hash value of a row in the given table.
+ *
+ */
+class row_hasher {
+ public:
+  /**
+   * @brief Construct an owning object for hashing the rows of a table
+   *
+   * @param t The table containing rows to hash
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   * comparisons using this object.
+   */
+  row_hasher(table_view const& t, rmm::cuda_stream_view stream)
+    : d_t(preprocessed_table::create(t, stream))
+  {
+  }
+
+  /**
+   * @brief Construct an owning object for hashing the rows of a table from an existing
+   * preprocessed_table
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple `row_hasher` and `equality::self_comparator` objects.
+   *
+   * @param t A table preprocessed for hashing or equality.
+   */
+  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
+
+  /**
+   * @brief Get the hash operator to use on the device
+   *
+   * Returns a unary callable, `F`, with signature `hash_function::hash_value_type F(size_type)`.
+   *
+   * `F(i)` returns the hash of row i.
+   *
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls
+   * @param nullate Indicates if any input column contains nulls
+   * @param seed The seed to use for the hash function
+   * @return A hash operator to use on the device
+   */
+  template <
+    template <typename> class hash_function = cudf::hashing::detail::default_hash,
+    template <template <typename> class, typename> class DeviceRowHasher = device_row_hasher,
+    typename Nullate>
+  DeviceRowHasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
+                                                        uint32_t seed   = DEFAULT_HASH_SEED) const
+  {
+    return DeviceRowHasher<hash_function, Nullate>(nullate, *d_t, seed);
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_t;
+};
+
+}  // namespace hash
+
+}  // namespace row
+
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 4e45e9ef65d..e2e08c59f55 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -16,6 +16,12 @@
 
 #pragma once
 
+/**
+ * @file
+ * @deprecated This header is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
+ */
+
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
@@ -79,6 +85,9 @@ namespace experimental {
  * @code
  * type_dispatcher<dispatch_nested_to_void>(data_type(), functor{});
  * @endcode
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <cudf::type_id t>
 struct dispatch_void_if_nested {
@@ -88,7 +97,9 @@ struct dispatch_void_if_nested {
 
 namespace row {
 
+/// Strongly typed index for left-hand side table rows
 enum class lhs_index_type : size_type {};
+/// Strongly typed index for right-hand side table rows
 enum class rhs_index_type : size_type {};
 
 /**
@@ -103,6 +114,9 @@ enum class rhs_index_type : size_type {};
  * iterator, with strongly typed values to represent the table indices.
  *
  * @tparam Index The strong index type
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
@@ -163,6 +177,9 @@ namespace lexicographic {
  * This relational comparator functor compares physical values rather than logical
  * elements like lists, strings, or structs. It evaluates `NaN` as not less than, equal to, or
  * greater than other values and is IEEE-754 compliant.
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 struct physical_element_comparator {
   /**
@@ -183,6 +200,9 @@ struct physical_element_comparator {
  * @brief Relational comparator functor that compares physical values rather than logical
  * elements like lists, strings, or structs. It evaluates `NaN` as equivalent to other `NaN`s and
  * greater than all other values.
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 struct sorting_physical_element_comparator {
   /**
@@ -218,6 +238,7 @@ struct sorting_physical_element_comparator {
   }
 };
 
+/// Optional dremel device view for handling nested column structures
 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
 
 // The has_nested_columns template parameter of the device_row_comparator is
@@ -266,6 +287,9 @@ using optional_dremel_view = cuda::std::optional<detail::dremel_device_view cons
  * @tparam PhysicalElementComparator A relational comparator functor that compares individual values
  * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN`
  * as greater than all other values.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <bool has_nested_columns,
           typename Nullate,
@@ -681,6 +705,9 @@ class device_row_comparator {
  *
  * @tparam Comparator generic comparator that returns a weak_ordering.
  * @tparam values weak_ordering parameter pack of orderings to interpret as true
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <typename Comparator, weak_ordering... values>
 struct weak_ordering_comparator_impl {
@@ -703,6 +730,9 @@ struct weak_ordering_comparator_impl {
  * weak_ordering::LESS meaning one row is lexicographically *less* than another row.
  *
  * @tparam Comparator generic comparator that returns a weak_ordering
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <typename Comparator>
 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
@@ -723,6 +753,9 @@ struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering
  * or *equivalent* to another row.
  *
  * @tparam Comparator generic comparator that returns a weak_ordering
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <typename Comparator>
 struct less_equivalent_comparator
@@ -742,6 +775,8 @@ struct less_equivalent_comparator
 /**
  * @brief Preprocessed table for use with lexicographical comparison
  *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 struct preprocessed_table {
   /// Type of table device view owner for the preprocessed table.
@@ -974,6 +1009,9 @@ struct preprocessed_table {
  *
  * This class can then provide a functor object that can used on the device.
  * The object of this class must outlive the usage of the device functor.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 class self_comparator {
  public:
@@ -1127,6 +1165,9 @@ struct strong_index_comparator_adapter {
  *
  * This class can then provide a functor object that can used on the device.
  * The object of this class must outlive the usage of the device functor.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 class two_table_comparator {
  public:
@@ -1270,6 +1311,9 @@ namespace equality {
  * @brief Equality comparator functor that compares physical values rather than logical
  * elements like lists, strings, or structs. It evaluates `NaN` not equal to all other values for
  * IEEE-754 compliance.
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 struct physical_equality_comparator {
   /**
@@ -1291,6 +1335,9 @@ struct physical_equality_comparator {
 /**
  * @brief Equality comparator functor that compares physical values rather than logical
  * elements like lists, strings, or structs. It evaluates `NaN` as equal to other `NaN`s.
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 struct nan_equal_physical_equality_comparator {
   /**
@@ -1342,6 +1389,9 @@ struct nan_equal_physical_equality_comparator {
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
  * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values
  * rather than logical elements, defaults to a comparator for which `NaN == NaN`.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <bool has_nested_columns,
           typename Nullate,
@@ -1563,6 +1613,8 @@ class device_row_comparator {
 /**
  * @brief Preprocessed table for use with row equality comparison or row hashing
  *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 struct preprocessed_table {
   /**
@@ -1617,6 +1669,8 @@ struct preprocessed_table {
 /**
  * @brief Comparator for performing equality comparisons between two rows of the same table.
  *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 class self_comparator {
  public:
@@ -1718,6 +1772,9 @@ struct strong_index_comparator_adapter {
  *
  * This class can then provide a functor object that can used on the device.
  * The object of this class must outlive the usage of the device functor.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 class two_table_comparator {
  public:
@@ -1809,6 +1866,9 @@ namespace hash {
  *
  * @tparam hash_function Hash functor to use for hashing elements.
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <template <typename> class hash_function, typename Nullate>
 class element_hasher {
@@ -1869,6 +1929,9 @@ class element_hasher {
  *
  * @tparam hash_function Hash functor to use for hashing elements.
  * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 template <template <typename> class hash_function, typename Nullate>
 class device_row_hasher {
@@ -1980,11 +2043,14 @@ class device_row_hasher {
 // Inject row::equality::preprocessed_table into the row::hash namespace
 // As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
 // type and are interchangeable.
+/// Preprocessed table type alias for row hashing operations
 using preprocessed_table = row::equality::preprocessed_table;
 
 /**
  * @brief Computes the hash value of a row in the given table.
  *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/row_operators.cuh instead.
  */
 class row_hasher {
  public:
diff --git a/cpp/include/cudf/table/primitive_row_operators.cuh b/cpp/include/cudf/table/primitive_row_operators.cuh
index cd96d5768a5..af635e7a07b 100644
--- a/cpp/include/cudf/table/primitive_row_operators.cuh
+++ b/cpp/include/cudf/table/primitive_row_operators.cuh
@@ -16,6 +16,12 @@
 
 #pragma once
 
+/**
+ * @file
+ * @deprecated This header is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
+ */
+
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/hashing/detail/hash_functions.cuh>
@@ -42,6 +48,9 @@ namespace CUDF_EXPORT cudf {
  *
  * @param table The table to check for compatibility
  * @return Boolean indicating if the table is compatible with primitive row operations
+ *
+ * @deprecated This function is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 bool is_primitive_row_op_compatible(cudf::table_view const& table);
 
@@ -49,12 +58,18 @@ namespace row::primitive {
 
 /**
  * @brief Returns `void` if it's not a primitive type
+ *
+ * @deprecated This type alias is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 template <typename T>
 using primitive_type_t = cuda::std::conditional_t<cudf::is_numeric<T>(), T, void>;
 
 /**
  * @brief Custom dispatcher for primitive types
+ *
+ * @deprecated This struct is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 template <cudf::type_id Id>
 struct dispatch_primitive_type {
@@ -63,6 +78,9 @@ struct dispatch_primitive_type {
 
 /**
  * @brief Performs an equality comparison between two elements in two columns.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 class element_equality_comparator {
  public:
@@ -99,6 +117,9 @@ class element_equality_comparator {
 
 /**
  * @brief Performs a relational comparison between two elements in two tables.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 class row_equality_comparator {
  public:
@@ -175,6 +196,9 @@ class row_equality_comparator {
  * @brief Function object for computing the hash value of a row in a column.
  *
  * @tparam Hash Hash functor to use for hashing elements
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 template <template <typename> class Hash>
 class element_hasher {
@@ -209,6 +233,9 @@ class element_hasher {
  * @brief Computes the hash value of a row in the given table.
  *
  * @tparam Hash Hash functor to use for hashing elements.
+ *
+ * @deprecated This class is deprecated in 25.10 and will be removed in 25.12.
+ * Users should use cudf/detail/row_operator/primitive_row_operators.cuh instead.
  */
 template <template <typename> class Hash = cudf::hashing::detail::default_hash>
 class row_hasher {
diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh
index c1543089fee..dfcff8977f1 100644
--- a/cpp/include/cudf/table/row_operators.cuh
+++ b/cpp/include/cudf/table/row_operators.cuh
@@ -14,9 +14,22 @@
  * limitations under the License.
  */
 
+/**
+ * @file row_operators.cuh
+ * @brief DEPRECATED: This header is deprecated as of 25.10 and will be removed in 25.12.
+ *
+ * Users should migrate to using `cudf::detail::row_operator::row_operators.cuh` directly
+ * and access the row operators in the `cudf::detail::row` namespace.
+ *
+ * @deprecated This header will be removed in cuDF 25.12. Use
+ * `cudf::detail::row_operator::row_operators.cuh` instead.
+ */
+
 #pragma once
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/row_operator/common_utils.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
@@ -32,134 +45,86 @@
 
 namespace CUDF_EXPORT cudf {
 
+// Aliases for backward compatibility - these provide access to common utilities
+// that were previously defined inline or elsewhere but are now in detail namespace
+
 /**
- * @brief Result type of the `element_relational_comparator` function object.
- *
- * Indicates how two elements `a` and `b` compare with one and another.
+ * @brief Alias for backward compatibility with legacy row operators.
  *
- * Equivalence is defined as `not (a<b) and not (b<a)`. Elements that are
- * EQUIVALENT may not necessarily be *equal*.
+ * @deprecated This alias is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::weak_ordering` instead.
  */
-enum class weak_ordering {
-  LESS,        ///< Indicates `a` is less than (ordered before) `b`
-  EQUIVALENT,  ///< Indicates `a` is ordered neither before nor after `b`
-  GREATER      ///< Indicates `a` is greater than (ordered after) `b`
-};
+using weak_ordering = cudf::detail::weak_ordering;
 
-namespace detail {
 /**
- * @brief Compare the elements ordering with respect to `lhs`.
+ * @brief Alias for backward compatibility with legacy row operators.
  *
- * @param lhs first element
- * @param rhs second element
- * @return Indicates the relationship between the elements in
- * the `lhs` and `rhs` columns.
+ * @tparam Element The type of the elements being compared
+ * @param lhs First element to compare
+ * @param rhs Second element to compare
+ * @return weak_ordering indicating the relationship between the elements
+ * @deprecated This function is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::compare_elements` instead.
  */
 template <typename Element>
 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
 {
-  if (lhs < rhs) {
-    return weak_ordering::LESS;
-  } else if (rhs < lhs) {
-    return weak_ordering::GREATER;
-  }
-  return weak_ordering::EQUIVALENT;
+  return cudf::detail::compare_elements(lhs, rhs);
 }
-}  // namespace detail
 
 /**
- * @brief A specialization for floating-point `Element` type relational comparison
- * to derive the order of the elements with respect to `lhs`.
- *
- * This specialization handles `nan` in the following order:
- * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)`
- * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)`
+ * @brief Alias for backward compatibility with legacy row operators.
  *
+ * @tparam Element The type of the elements being compared
+ * @param lhs First element to compare
+ * @param rhs Second element to compare
+ * @return weak_ordering indicating the relationship between the elements
+ * @deprecated This function is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::relational_compare` instead.
  */
 template <typename Element>
 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
-  requires(std::is_floating_point_v<Element>)
 {
-  if (isnan(lhs) and isnan(rhs)) {
-    return weak_ordering::EQUIVALENT;
-  } else if (isnan(rhs)) {
-    return weak_ordering::LESS;
-  } else if (isnan(lhs)) {
-    return weak_ordering::GREATER;
-  }
-
-  return detail::compare_elements(lhs, rhs);
+  return cudf::detail::relational_compare(lhs, rhs);
 }
 
 /**
- * @brief Compare the nulls according to null order.
+ * @brief Alias for backward compatibility with legacy row operators.
  *
- * @param lhs_is_null boolean representing if lhs is null
- * @param rhs_is_null boolean representing if lhs is null
- * @param null_precedence null order
- * @return Indicates the relationship between null in lhs and rhs columns.
+ * @param lhs_is_null Boolean representing if lhs is null
+ * @param rhs_is_null Boolean representing if rhs is null
+ * @param null_precedence Null order specifying precedence of null values
+ * @return weak_ordering indicating the relationship between null in lhs and rhs columns
+ * @deprecated This function is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::null_compare` instead.
  */
 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
 {
-  if (lhs_is_null and rhs_is_null) {  // null <? null
-    return weak_ordering::EQUIVALENT;
-  } else if (lhs_is_null) {  // null <? x
-    return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
-  } else if (rhs_is_null) {  // x <? null
-    return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
-  }
-  return weak_ordering::EQUIVALENT;
+  return cudf::detail::null_compare(lhs_is_null, rhs_is_null, null_precedence);
 }
 
 /**
- * @brief A specialization for non-floating-point `Element` type relational
- * comparison to derive the order of the elements with respect to `lhs`.
+ * @brief Alias for backward compatibility with legacy row operators.
  *
- * @param lhs The first element
- * @param rhs The second element
- * @return Indicates the relationship between the elements in the `lhs` and `rhs` columns
- */
-template <typename Element>
-__device__ weak_ordering relational_compare(Element lhs, Element rhs)
-  requires(not std::is_floating_point_v<Element>)
-{
-  return detail::compare_elements(lhs, rhs);
-}
-
-/**
- * @brief A specialization for floating-point `Element` type to check if
- * `lhs` is equivalent to `rhs`. `nan == nan`.
- *
- * @param lhs first element
- * @param rhs second element
- * @return `true` if `lhs` == `rhs` else `false`.
+ * @tparam Element The type of the elements being compared
+ * @param lhs First element to compare
+ * @param rhs Second element to compare
+ * @return bool `true` if `lhs` == `rhs` else `false`
+ * @deprecated This function is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::equality_compare` instead.
  */
 template <typename Element>
 __device__ bool equality_compare(Element lhs, Element rhs)
-  requires(std::is_floating_point_v<Element>)
 {
-  if (isnan(lhs) and isnan(rhs)) { return true; }
-  return lhs == rhs;
-}
-
-/**
- * @brief A specialization for non-floating-point `Element` type to check if
- * `lhs` is equivalent to `rhs`.
- *
- * @param lhs first element
- * @param rhs second element
- * @return `true` if `lhs` == `rhs` else `false`.
- */
-template <typename Element>
-__device__ bool equality_compare(Element const lhs, Element const rhs)
-  requires(not std::is_floating_point_v<Element>)
-{
-  return lhs == rhs;
+  return cudf::detail::equality_compare(lhs, rhs);
 }
 
 /**
  * @brief Performs an equality comparison between two elements in two columns.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::equality::device_row_comparator::element_comparator` instead.
+ *
  * @tparam Nullate A cudf::nullate type describing how to check for nulls.
  */
 template <typename Nullate>
@@ -207,8 +172,8 @@ class element_equality_comparator {
       }
     }
 
-    return equality_compare(lhs.element<Element>(lhs_element_index),
-                            rhs.element<Element>(rhs_element_index));
+    return cudf::detail::equality_compare(lhs.element<Element>(lhs_element_index),
+                                          rhs.element<Element>(rhs_element_index));
   }
 
   // @cond
@@ -230,6 +195,9 @@ class element_equality_comparator {
 /**
  * @brief Performs a relational comparison between two elements in two tables.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::equality::device_row_comparator` instead.
+ *
  * @tparam Nullate A cudf::nullate type describing how to check for nulls
  */
 template <typename Nullate>
@@ -282,6 +250,9 @@ class row_equality_comparator {
 /**
  * @brief Performs a relational comparison between two elements in two columns.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::lexicographic::device_row_comparator::element_comparator` instead.
+ *
  * @tparam Nullate A cudf::nullate type describing how to check for nulls.
  */
 template <typename Nullate>
@@ -330,8 +301,8 @@ class element_relational_comparator {
    * the `lhs` and `rhs` columns.
    */
   template <typename Element>
-  __device__ weak_ordering operator()(size_type lhs_element_index,
-                                      size_type rhs_element_index) const noexcept
+  __device__ cudf::detail::weak_ordering operator()(size_type lhs_element_index,
+                                                    size_type rhs_element_index) const noexcept
     requires(cudf::is_relationally_comparable<Element, Element>())
   {
     if (nulls) {
@@ -339,17 +310,18 @@ class element_relational_comparator {
       bool const rhs_is_null{rhs.is_null(rhs_element_index)};
 
       if (lhs_is_null or rhs_is_null) {  // at least one is null
-        return null_compare(lhs_is_null, rhs_is_null, null_precedence);
+        return cudf::detail::null_compare(lhs_is_null, rhs_is_null, null_precedence);
       }
     }
 
-    return relational_compare(lhs.element<Element>(lhs_element_index),
-                              rhs.element<Element>(rhs_element_index));
+    return cudf::detail::relational_compare(lhs.element<Element>(lhs_element_index),
+                                            rhs.element<Element>(rhs_element_index));
   }
 
   // @cond
   template <typename Element>
-  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
+  __device__ cudf::detail::weak_ordering operator()(size_type lhs_element_index,
+                                                    size_type rhs_element_index)
     requires(not cudf::is_relationally_comparable<Element, Element>())
   {
     CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
@@ -366,6 +338,9 @@ class element_relational_comparator {
 /**
  * @brief Computes whether one row is lexicographically *less* than another row.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::lexicographic::device_row_comparator` instead.
+ *
  * Lexicographic ordering is determined by:
  * - Two rows are compared element by element.
  * - The first mismatching element defines which row is lexicographically less
@@ -434,12 +409,13 @@ class row_lexicographic_comparator {
       auto comparator =
         element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence};
 
-      weak_ordering state =
+      cudf::detail::weak_ordering state =
         cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
 
-      if (state == weak_ordering::EQUIVALENT) { continue; }
+      if (state == cudf::detail::weak_ordering::EQUIVALENT) { continue; }
 
-      return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
+      return state ==
+             (ascending ? cudf::detail::weak_ordering::LESS : cudf::detail::weak_ordering::GREATER);
     }
     return false;
   }
@@ -455,6 +431,9 @@ class row_lexicographic_comparator {
 /**
  * @brief Computes the hash value of an element in the given column.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::hash::element_hasher` instead.
+ *
  * @tparam hash_function Hash functor to use for hashing elements.
  * @tparam Nullate A cudf::nullate type describing how to check for nulls.
  */
@@ -498,6 +477,9 @@ class element_hasher {
 /**
  * @brief Function object for computing the hash value of a row in a column.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::hash::element_hasher` instead.
+ *
  * @tparam hash_function Hash functor to use for hashing elements
  * @tparam Nullate A cudf::nullate type describing how to check for nulls
  */
@@ -565,6 +547,9 @@ class element_hasher_with_seed {
 /**
  * @brief Computes the hash value of a row in the given table.
  *
+ * @deprecated This class is deprecated as of 25.10 and will be removed in 25.12.
+ * Use `cudf::detail::row::hash::device_row_hasher` instead.
+ *
  * @tparam hash_function Hash functor to use for hashing elements.
  * @tparam Nullate A cudf::nullate type describing how to check for nulls.
  */
diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu
index da718dd3cb9..88c60959ac1 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cu
+++ b/cpp/src/binaryop/compiled/binary_ops.cu
@@ -422,7 +422,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
         is_lhs_scalar,
         is_rhs_scalar,
         op,
-        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{},
+        cudf::detail::row::equality::nan_equal_physical_equality_comparator{},
         stream);
       break;
     case binary_operator::LESS:
@@ -432,7 +432,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
         rhs,
         is_lhs_scalar,
         is_rhs_scalar,
-        cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+        cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
         stream);
       break;
     case binary_operator::GREATER:
@@ -442,7 +442,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
         rhs,
         is_lhs_scalar,
         is_rhs_scalar,
-        cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+        cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
         stream);
       break;
     case binary_operator::LESS_EQUAL:
@@ -452,7 +452,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
         rhs,
         is_lhs_scalar,
         is_rhs_scalar,
-        cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+        cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
         stream);
       break;
     case binary_operator::GREATER_EQUAL:
@@ -462,7 +462,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
         rhs,
         is_lhs_scalar,
         is_rhs_scalar,
-        cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+        cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
         stream);
       break;
     default: CUDF_FAIL("Unsupported operator for structs");
diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
index a57ff661d67..9cc4b7980bc 100644
--- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/iterator.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
@@ -52,8 +52,8 @@ struct device_comparison_functor {
   bool __device__ operator()(size_type i)
   {
     return _optional_iter[i].has_value() &&
-           _comparator(cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i},
-                       cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i});
+           _comparator(cudf::detail::row::lhs_index_type{_is_lhs_scalar ? 0 : i},
+                       cudf::detail::row::rhs_index_type{_is_rhs_scalar ? 0 : i});
   }
 
   OptionalIterator const _optional_iter;
@@ -64,7 +64,7 @@ struct device_comparison_functor {
 
 template <class BinaryOperator,
           typename PhysicalElementComparator =
-            cudf::experimental::row::lexicographic::sorting_physical_element_comparator>
+            cudf::detail::row::lexicographic::sorting_physical_element_comparator>
 void apply_struct_binary_op(mutable_column_view& out,
                             column_view const& lhs,
                             column_view const& rhs,
@@ -77,10 +77,10 @@ void apply_struct_binary_op(mutable_column_view& out,
     lhs.size(),
     is_any_v<BinaryOperator, ops::Greater, ops::GreaterEqual> ? order::DESCENDING
                                                               : order::ASCENDING);
-  auto const tlhs             = table_view{{lhs}};
-  auto const trhs             = table_view{{rhs}};
-  auto const table_comparator = cudf::experimental::row::lexicographic::two_table_comparator{
-    tlhs, trhs, compare_orders, {}, stream};
+  auto const tlhs = table_view{{lhs}};
+  auto const trhs = table_view{{rhs}};
+  auto const table_comparator =
+    cudf::detail::row::lexicographic::two_table_comparator{tlhs, trhs, compare_orders, {}, stream};
   auto outd = column_device_view::create(out, stream);
   auto optional_iter =
     cudf::detail::make_optional_iterator<bool>(*outd, nullate::DYNAMIC{out.has_nulls()});
@@ -123,8 +123,8 @@ struct struct_equality_functor {
 
   auto __device__ operator()(size_type i) const noexcept
   {
-    auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i};
-    auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i};
+    auto const lhs = cudf::detail::row::lhs_index_type{_is_lhs_scalar ? 0 : i};
+    auto const rhs = cudf::detail::row::rhs_index_type{_is_rhs_scalar ? 0 : i};
     return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output);
   }
 
@@ -136,8 +136,8 @@ struct struct_equality_functor {
   bool _preserve_output;
 };
 
-template <typename PhysicalEqualityComparator =
-            cudf::experimental::row::equality::physical_equality_comparator>
+template <
+  typename PhysicalEqualityComparator = cudf::detail::row::equality::physical_equality_comparator>
 void apply_struct_equality_op(mutable_column_view& out,
                               column_view const& lhs,
                               column_view const& rhs,
@@ -152,10 +152,9 @@ void apply_struct_equality_op(mutable_column_view& out,
                "Unsupported operator for these types",
                cudf::data_type_error);
 
-  auto tlhs = table_view{{lhs}};
-  auto trhs = table_view{{rhs}};
-  auto table_comparator =
-    cudf::experimental::row::equality::two_table_comparator{tlhs, trhs, stream};
+  auto tlhs             = table_view{{lhs}};
+  auto trhs             = table_view{{rhs}};
+  auto table_comparator = cudf::detail::row::equality::two_table_comparator{tlhs, trhs, stream};
 
   auto outd = column_device_view::create(out, stream);
   auto optional_iter =
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 155c2fee9ce..4452184ceb5 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -21,9 +21,9 @@
 #include <cudf/aggregation.hpp>
 #include <cudf/detail/aggregation/result_cache.hpp>
 #include <cudf/detail/groupby.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -87,9 +87,9 @@ std::unique_ptr<table> dispatch_groupby(table_view const& keys,
   auto const has_null             = nullate::DYNAMIC{cudf::has_nested_nulls(keys)};
   auto const skip_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE;
 
-  auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
-  auto const comparator  = cudf::experimental::row::equality::self_comparator{preprocessed_keys};
-  auto const row_hash    = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)};
+  auto preprocessed_keys = cudf::detail::row::hash::preprocessed_table::create(keys, stream);
+  auto const comparator  = cudf::detail::row::equality::self_comparator{preprocessed_keys};
+  auto const row_hash    = cudf::detail::row::hash::row_hasher{std::move(preprocessed_keys)};
   auto const d_row_hash  = row_hash.device_hasher(has_null);
 
   if (cudf::detail::has_nested_columns(keys)) {
diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh
index c4c96ec210d..944df158f0a 100644
--- a/cpp/src/groupby/hash/helpers.cuh
+++ b/cpp/src/groupby/hash/helpers.cuh
@@ -16,8 +16,8 @@
 #pragma once
 
 #include <cudf/detail/cuco_helpers.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/types.hpp>
 
 #include <cuco/static_set.cuh>
@@ -52,9 +52,8 @@ using shmem_extent_t =
 CUDF_HOST_DEVICE auto constexpr valid_extent =
   cuco::make_valid_extent<GROUPBY_CG_SIZE, GROUPBY_BUCKET_SIZE>(shmem_extent_t{});
 
-using row_hash_t =
-  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                   cudf::nullate::DYNAMIC>;
+using row_hash_t = cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                              cudf::nullate::DYNAMIC>;
 
 /// Adapter to cudf row hasher with caching support.
 class row_hasher_with_cache_t {
@@ -78,15 +77,15 @@ class row_hasher_with_cache_t {
 /// Probing scheme type used by groupby hash table
 using probing_scheme_t = cuco::linear_probing<GROUPBY_CG_SIZE, row_hasher_with_cache_t>;
 
-using row_comparator_t = cudf::experimental::row::equality::device_row_comparator<
+using row_comparator_t = cudf::detail::row::equality::device_row_comparator<
   false,
   cudf::nullate::DYNAMIC,
-  cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+  cudf::detail::row::equality::nan_equal_physical_equality_comparator>;
 
-using nullable_row_comparator_t = cudf::experimental::row::equality::device_row_comparator<
+using nullable_row_comparator_t = cudf::detail::row::equality::device_row_comparator<
   true,
   cudf::nullate::DYNAMIC,
-  cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+  cudf::detail::row::equality::nan_equal_physical_equality_comparator>;
 
 using global_set_t = cuco::static_set<cudf::size_type,
                                       cuco::extent<int64_t>,
diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu
index 348ab366762..77efc2dfb98 100644
--- a/cpp/src/groupby/sort/group_nunique.cu
+++ b/cpp/src/groupby/sort/group_nunique.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
@@ -37,7 +37,7 @@ namespace {
 template <bool has_nested_columns, typename Nullate>
 struct is_unique_iterator_fn {
   using comparator_type =
-    typename cudf::experimental::row::equality::device_row_comparator<has_nested_columns, Nullate>;
+    typename cudf::detail::row::equality::device_row_comparator<has_nested_columns, Nullate>;
 
   Nullate nulls;
   column_device_view const v;
@@ -91,7 +91,7 @@ std::unique_ptr<column> group_nunique(column_view const& values,
   if (num_groups == 0) { return result; }
 
   auto const values_view = table_view{{values}};
-  auto const comparator  = cudf::experimental::row::equality::self_comparator{values_view, stream};
+  auto const comparator  = cudf::detail::row::equality::self_comparator{values_view, stream};
 
   auto const d_values_view = column_device_view::create(values, stream);
 
diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu
index 8e266d42077..77e48533a3b 100644
--- a/cpp/src/groupby/sort/group_rank_scan.cu
+++ b/cpp/src/groupby/sort/group_rank_scan.cu
@@ -21,8 +21,8 @@
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/device_operators.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
 
@@ -106,8 +106,7 @@ std::unique_ptr<column> rank_generator(column_view const& grouped_values,
                                        rmm::device_async_resource_ref mr)
 {
   auto const grouped_values_view = table_view{{grouped_values}};
-  auto const comparator =
-    cudf::experimental::row::equality::self_comparator{grouped_values_view, stream};
+  auto const comparator = cudf::detail::row::equality::self_comparator{grouped_values_view, stream};
 
   auto ranks = make_fixed_width_column(
     data_type{type_to_id<size_type>()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr);
diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu
index ed89ddc8090..a3d1fa8ecc9 100644
--- a/cpp/src/groupby/sort/sort_helper.cu
+++ b/cpp/src/groupby/sort/sort_helper.cu
@@ -25,11 +25,11 @@
 #include <cudf/detail/groupby/sort_helper.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/labeling/label_segments.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/scatter.hpp>
 #include <cudf/detail/sequence.hpp>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/strings/string_view.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/traits.hpp>
@@ -143,7 +143,7 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets(
   // This way, a 2nd (parallel) call to this will not be given a partially created object.
   auto group_offsets = std::make_unique<index_vector>(size + 1, stream);
 
-  auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream};
+  auto const comparator = cudf::detail::row::equality::self_comparator{_keys, stream};
 
   auto const sorted_order = key_sort_order(stream).data<size_type>();
   decltype(group_offsets->begin()) result_end;
diff --git a/cpp/src/hash/murmurhash3_x86_32.cu b/cpp/src/hash/murmurhash3_x86_32.cu
index dd7b19633be..1e22392534b 100644
--- a/cpp/src/hash/murmurhash3_x86_32.cu
+++ b/cpp/src/hash/murmurhash3_x86_32.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,10 +15,10 @@
  */
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
@@ -46,7 +46,7 @@ std::unique_ptr<column> murmurhash3_x86_32(table_view const& input,
   if (input.num_columns() == 0 || input.num_rows() == 0) { return output; }
 
   bool const nullable   = has_nulls(input);
-  auto const row_hasher = cudf::experimental::row::hash::row_hasher(input, stream);
+  auto const row_hasher = cudf::detail::row::hash::row_hasher(input, stream);
   auto output_view      = output->mutable_view();
 
   // Compute the hash value for each row
diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu
index 26a4d20f02b..e1c1093dd57 100644
--- a/cpp/src/io/orc/dict_enc.cu
+++ b/cpp/src/io/orc/dict_enc.cu
@@ -17,10 +17,10 @@
 #include "orc_gpu.hpp"
 
 #include <cudf/detail/offsets_iterator.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/io/orc_types.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -82,7 +82,7 @@ struct equality_functor {
   __device__ bool operator()(size_type lhs_idx, size_type rhs_idx) const
   {
     // We don't call this for nulls so this is fine
-    auto const equal = cudf::experimental::row::equality::nan_equal_physical_equality_comparator{};
+    auto const equal = cudf::detail::row::equality::nan_equal_physical_equality_comparator{};
     return equal(col.element<string_view>(lhs_idx), col.element<string_view>(rhs_idx));
   }
 };
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index 036cca6e55e..02325a5b7d9 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -17,9 +17,9 @@
 #include "parquet_gpu.cuh"
 
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 
 #include <rmm/exec_policy.hpp>
 
@@ -38,8 +38,7 @@ struct equality_functor {
   __device__ bool operator()(key_type lhs_idx, key_type rhs_idx) const
   {
     // We don't call this for nulls so this is fine.
-    auto constexpr equal =
-      cudf::experimental::row::equality::nan_equal_physical_equality_comparator{};
+    auto constexpr equal = cudf::detail::row::equality::nan_equal_physical_equality_comparator{};
     return equal(col.element<T>(lhs_idx), col.element<T>(rhs_idx));
   }
 };
diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu
index c67d345a3cd..4d20978de0c 100644
--- a/cpp/src/join/distinct_hash_join.cu
+++ b/cpp/src/join/distinct_hash_join.cu
@@ -18,10 +18,10 @@
 #include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/join/distinct_hash_join.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/join/distinct_hash_join.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/primitive_row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
@@ -53,7 +53,7 @@ bool constexpr has_nulls = true;  ///< Always has nulls
  */
 template <typename T>
 class primitive_keys_fn {
-  using hasher = cudf::row::primitive::row_hasher<>;
+  using hasher = cudf::detail::row::primitive::row_hasher<>;
 
  public:
   CUDF_HOST_DEVICE constexpr primitive_keys_fn(hasher const& hash) : _hash{hash} {}
@@ -72,9 +72,8 @@ class primitive_keys_fn {
  */
 template <typename T>
 class build_keys_fn {
-  using hasher =
-    cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                     cudf::nullate::DYNAMIC>;
+  using hasher = cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                            cudf::nullate::DYNAMIC>;
 
  public:
   CUDF_HOST_DEVICE constexpr build_keys_fn(hasher const& hash) : _hash{hash} {}
@@ -168,8 +167,7 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build,
   : _has_nested_columns{cudf::has_nested_columns(build)},
     _nulls_equal{compare_nulls},
     _build{build},
-    _preprocessed_build{
-      cudf::experimental::row::equality::preprocessed_table::create(_build, stream)},
+    _preprocessed_build{cudf::detail::row::equality::preprocessed_table::create(_build, stream)},
     _hash_table{build.num_rows(),
                 load_factor,
                 cuco::empty_key{cuco::pair{std::numeric_limits<hash_value_type>::max(),
@@ -207,16 +205,16 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build,
     }
   };
 
-  if (cudf::is_primitive_row_op_compatible(_build)) {
-    auto const d_hasher =
-      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, this->_preprocessed_build};
+  if (cudf::detail::is_primitive_row_op_compatible(_build)) {
+    auto const d_hasher = cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls},
+                                                                   this->_preprocessed_build};
 
     auto const iter = cudf::detail::make_counting_transform_iterator(
       0, primitive_keys_fn<rhs_index_type>{d_hasher});
 
     build_hash_table(iter);
   } else {
-    auto const row_hasher = experimental::row::hash::row_hasher{this->_preprocessed_build};
+    auto const row_hasher = detail::row::hash::row_hasher{this->_preprocessed_build};
     auto const d_hasher   = row_hasher.device_hasher(nullate::DYNAMIC{has_nulls});
 
     auto const iter =
@@ -251,12 +249,11 @@ distinct_hash_join::inner_join(cudf::table_view const& probe,
   auto const found_begin =
     thrust::make_transform_output_iterator(found_indices.begin(), output_fn{});
 
-  auto preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
-  if (cudf::is_primitive_row_op_compatible(_build)) {
+  auto preprocessed_probe = cudf::detail::row::equality::preprocessed_table::create(probe, stream);
+  if (cudf::detail::is_primitive_row_op_compatible(_build)) {
     auto const d_hasher =
-      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_probe};
-    auto const d_equal = cudf::row::primitive::row_equality_comparator{
+      cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_probe};
+    auto const d_equal = cudf::detail::row::primitive::row_equality_comparator{
       nullate::DYNAMIC{has_nulls}, preprocessed_probe, _preprocessed_build, _nulls_equal};
     auto const iter = cudf::detail::make_counting_transform_iterator(
       0, primitive_keys_fn<lhs_index_type>{d_hasher});
@@ -270,10 +267,10 @@ distinct_hash_join::inner_join(cudf::table_view const& probe,
                                found_begin,
                                stream);
   } else {
-    auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator(
-      preprocessed_probe, _preprocessed_build);
+    auto const two_table_equal =
+      cudf::detail::row::equality::two_table_comparator(preprocessed_probe, _preprocessed_build);
 
-    auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
+    auto const probe_row_hasher = cudf::detail::row::hash::row_hasher{preprocessed_probe};
     auto const d_probe_hasher   = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls});
     auto const iter             = cudf::detail::make_counting_transform_iterator(
       0, build_keys_fn<lhs_index_type>{d_probe_hasher});
@@ -346,13 +343,12 @@ std::unique_ptr<rmm::device_uvector<size_type>> distinct_hash_join::left_join(
   auto const output_begin =
     thrust::make_transform_output_iterator(build_indices->begin(), output_fn{});
 
-  auto preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+  auto preprocessed_probe = cudf::detail::row::equality::preprocessed_table::create(probe, stream);
 
-  if (cudf::is_primitive_row_op_compatible(_build)) {
+  if (cudf::detail::is_primitive_row_op_compatible(_build)) {
     auto const d_hasher =
-      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_probe};
-    auto const d_equal = cudf::row::primitive::row_equality_comparator{
+      cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_probe};
+    auto const d_equal = cudf::detail::row::primitive::row_equality_comparator{
       nullate::DYNAMIC{has_nulls}, preprocessed_probe, _preprocessed_build, _nulls_equal};
 
     auto const iter = cudf::detail::make_counting_transform_iterator(
@@ -374,10 +370,10 @@ std::unique_ptr<rmm::device_uvector<size_type>> distinct_hash_join::left_join(
                    build_indices->end(),
                    JoinNoneValue);
     } else {
-      auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator(
-        preprocessed_probe, _preprocessed_build);
+      auto const two_table_equal =
+        cudf::detail::row::equality::two_table_comparator(preprocessed_probe, _preprocessed_build);
 
-      auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
+      auto const probe_row_hasher = cudf::detail::row::hash::row_hasher{preprocessed_probe};
       auto const d_probe_hasher   = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls});
       auto const iter             = cudf::detail::make_counting_transform_iterator(
         0, build_keys_fn<lhs_index_type>{d_probe_hasher});
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 8cf99c5218d..76a7ec5c820 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -19,11 +19,11 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/join/hash_join.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/join/hash_join.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/primitive_row_operators.cuh>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/prefetch.hpp>
@@ -64,8 +64,8 @@ class pair_equal {
     cuco::pair<hash_value_type, size_type> const& lhs,
     cuco::pair<hash_value_type, size_type> const& rhs) const noexcept
   {
-    using experimental::row::lhs_index_type;
-    using experimental::row::rhs_index_type;
+    using detail::row::lhs_index_type;
+    using detail::row::rhs_index_type;
 
     return lhs.first == rhs.first and
            _check_row_equality(lhs_index_type{lhs.second}, rhs_index_type{rhs.second});
@@ -85,7 +85,7 @@ struct output_fn {
 
 class primitive_pair_equal {
  public:
-  primitive_pair_equal(cudf::row::primitive::row_equality_comparator check_row_equality)
+  primitive_pair_equal(cudf::detail::row::primitive::row_equality_comparator check_row_equality)
     : _check_row_equality{std::move(check_row_equality)}
   {
   }
@@ -98,7 +98,7 @@ class primitive_pair_equal {
   }
 
  private:
-  cudf::row::primitive::row_equality_comparator _check_row_equality;
+  cudf::detail::row::primitive::row_equality_comparator _check_row_equality;
 };
 
 /**
@@ -116,7 +116,7 @@ class primitive_pair_equal {
  */
 void build_hash_join(
   cudf::table_view const& build,
-  std::shared_ptr<experimental::row::equality::preprocessed_table> const& preprocessed_build,
+  std::shared_ptr<detail::row::equality::preprocessed_table> const& preprocessed_build,
   cudf::detail::hash_table_t& hash_table,
   bool has_nested_nulls,
   null_equality nulls_equal,
@@ -144,12 +144,12 @@ void build_hash_join(
   auto const nulls = nullate::DYNAMIC{has_nested_nulls};
 
   // Insert rows into hash table
-  if (cudf::is_primitive_row_op_compatible(build)) {
-    auto const d_hasher = cudf::row::primitive::row_hasher{nulls, preprocessed_build};
+  if (cudf::detail::is_primitive_row_op_compatible(build)) {
+    auto const d_hasher = cudf::detail::row::primitive::row_hasher{nulls, preprocessed_build};
 
     insert_rows(build, d_hasher);
   } else {
-    auto const row_hash = experimental::row::hash::row_hasher{preprocessed_build};
+    auto const row_hash = detail::row::hash::row_hasher{preprocessed_build};
     auto const d_hasher = row_hash.device_hasher(nulls);
 
     insert_rows(build, d_hasher);
@@ -164,9 +164,9 @@ void build_hash_join(
  *
  * @param build_table The right hand table
  * @param probe_table The left hand table
- * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table for
+ * @param preprocessed_build shared_ptr to cudf::detail::row::equality::preprocessed_table for
  *                           build_table
- * @param preprocessed_probe shared_ptr to cudf::experimental::row::equality::preprocessed_table for
+ * @param preprocessed_probe shared_ptr to cudf::detail::row::equality::preprocessed_table for
  *                           probe_table
  * @param hash_table A hash table built on the build table that maps the index
  *                   of every row to the hash value of that row
@@ -180,8 +180,8 @@ void build_hash_join(
 std::size_t compute_join_output_size(
   table_view const& build_table,
   table_view const& probe_table,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const& preprocessed_build,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const& preprocessed_probe,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> const& preprocessed_build,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> const& preprocessed_probe,
   cudf::detail::hash_table_t const& hash_table,
   join_kind join,
   bool has_nulls,
@@ -223,17 +223,17 @@ std::size_t compute_join_output_size(
 
   // Use primitive row operator logic if build table is compatible. Otherwise, use non-primitive row
   // operator logic.
-  if (cudf::is_primitive_row_op_compatible(build_table)) {
-    auto const d_hasher = cudf::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
-    auto const d_equal  = cudf::row::primitive::row_equality_comparator{
+  if (cudf::detail::is_primitive_row_op_compatible(build_table)) {
+    auto const d_hasher = cudf::detail::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
+    auto const d_equal  = cudf::detail::row::primitive::row_equality_comparator{
       probe_nulls, preprocessed_probe, preprocessed_build, nulls_equal};
 
     return compute_size(primitive_pair_equal{d_equal}, d_hasher);
   } else {
     auto const d_hasher =
-      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
-    auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{
-      preprocessed_probe, preprocessed_build};
+      cudf::detail::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
+    auto const row_comparator =
+      cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
 
     if (cudf::detail::has_nested_columns(probe_table)) {
       auto const d_equal = row_comparator.equal_to<true>(has_nulls, nulls_equal);
@@ -252,9 +252,9 @@ std::size_t compute_join_output_size(
  *
  * @param build_table Table of build side columns to join
  * @param probe_table Table of probe side columns to join
- * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table
+ * @param preprocessed_build shared_ptr to cudf::detail::row::equality::preprocessed_table
  * for build_table
- * @param preprocessed_probe shared_ptr to cudf::experimental::row::equality::preprocessed_table
+ * @param preprocessed_probe shared_ptr to cudf::detail::row::equality::preprocessed_table
  * for probe_table
  * @param hash_table Hash table built from `build_table`
  * @param join The type of join to be performed
@@ -271,8 +271,8 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
 probe_join_hash_table(
   cudf::table_view const& build_table,
   cudf::table_view const& probe_table,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const& preprocessed_build,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const& preprocessed_probe,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> const& preprocessed_build,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> const& preprocessed_probe,
   cudf::detail::hash_table_t const& hash_table,
   join_kind join,
   bool has_nulls,
@@ -344,20 +344,20 @@ probe_join_hash_table(
 
   auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};
 
-  if (cudf::is_primitive_row_op_compatible(build_table)) {
-    auto const d_hasher = cudf::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
-    auto const d_equal  = cudf::row::primitive::row_equality_comparator{
+  if (cudf::detail::is_primitive_row_op_compatible(build_table)) {
+    auto const d_hasher = cudf::detail::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
+    auto const d_equal  = cudf::detail::row::primitive::row_equality_comparator{
       probe_nulls, preprocessed_probe, preprocessed_build, compare_nulls};
     auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
 
     retrieve_results(primitive_pair_equal{d_equal}, iter);
   } else {
     auto const d_hasher =
-      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
+      cudf::detail::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
     auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
 
-    auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{
-      preprocessed_probe, preprocessed_build};
+    auto const row_comparator =
+      cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
 
     if (cudf::detail::has_nested_columns(probe_table)) {
       auto const d_equal = row_comparator.equal_to<true>(probe_nulls, compare_nulls);
@@ -379,9 +379,9 @@ probe_join_hash_table(
  *
  * @param build_table Table of build side columns to join
  * @param probe_table Table of probe side columns to join
- * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table
+ * @param preprocessed_build shared_ptr to cudf::detail::row::equality::preprocessed_table
  * for build_table
- * @param preprocessed_probe shared_ptr to cudf::experimental::row::equality::preprocessed_table
+ * @param preprocessed_probe shared_ptr to cudf::detail::row::equality::preprocessed_table
  * for probe_table
  * @param hash_table Hash table built from `build_table`
  * @param has_nulls Flag to denote if build or probe tables have nested nulls
@@ -394,8 +394,8 @@ probe_join_hash_table(
 std::size_t get_full_join_size(
   cudf::table_view const& build_table,
   cudf::table_view const& probe_table,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const& preprocessed_build,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const& preprocessed_probe,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> const& preprocessed_build,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> const& preprocessed_probe,
   cudf::detail::hash_table_t const& hash_table,
   bool has_nulls,
   null_equality compare_nulls,
@@ -425,9 +425,9 @@ std::size_t get_full_join_size(
     thrust::make_transform_output_iterator(right_indices->begin(), output_fn{});
 
   // Apply primitive row operator logic
-  if (cudf::is_primitive_row_op_compatible(build_table)) {
-    auto const d_hasher = cudf::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
-    auto const d_equal  = cudf::row::primitive::row_equality_comparator{
+  if (cudf::detail::is_primitive_row_op_compatible(build_table)) {
+    auto const d_hasher = cudf::detail::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
+    auto const d_equal  = cudf::detail::row::primitive::row_equality_comparator{
       probe_nulls, preprocessed_probe, preprocessed_build, compare_nulls};
     auto const iter     = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
     auto const equality = primitive_pair_equal{d_equal};
@@ -441,11 +441,11 @@ std::size_t get_full_join_size(
                               stream.value());
   } else {
     auto const d_hasher =
-      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
+      cudf::detail::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
     auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
 
-    auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{
-      preprocessed_probe, preprocessed_build};
+    auto const row_comparator =
+      cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
     auto const comparator_helper = [&](auto d_equal) {
       auto const equality = pair_equal{d_equal};
       hash_table.retrieve_outer(iter,
@@ -527,8 +527,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
       cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()},
       stream.value()},
     _build{build},
-    _preprocessed_build{
-      cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}
+    _preprocessed_build{cudf::detail::row::equality::preprocessed_table::create(_build, stream)}
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(0 != build.num_columns(), "Hash join build table is empty", std::invalid_argument);
@@ -599,7 +598,7 @@ std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,
                std::invalid_argument);
 
   auto const preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+    cudf::detail::row::equality::preprocessed_table::create(probe, stream);
 
   return cudf::detail::compute_join_output_size(_build,
                                                 probe,
@@ -626,7 +625,7 @@ std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,
                std::invalid_argument);
 
   auto const preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+    cudf::detail::row::equality::preprocessed_table::create(probe, stream);
 
   return cudf::detail::compute_join_output_size(_build,
                                                 probe,
@@ -654,7 +653,7 @@ std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
                std::invalid_argument);
 
   auto const preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+    cudf::detail::row::equality::preprocessed_table::create(probe, stream);
 
   return cudf::detail::get_full_join_size(_build,
                                           probe,
@@ -678,7 +677,7 @@ void hash_join<Hasher>::compute_match_counts(cudf::table_view const& probe,
                std::invalid_argument);
 
   auto const preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+    cudf::detail::row::equality::preprocessed_table::create(probe, stream);
   auto const probe_nulls          = cudf::nullate::DYNAMIC{_has_nulls};
   auto const probe_table_num_rows = probe.num_rows();
 
@@ -692,16 +691,16 @@ void hash_join<Hasher>::compute_match_counts(cudf::table_view const& probe,
                            stream.value());
   };
 
-  if (cudf::is_primitive_row_op_compatible(_build)) {
-    auto const d_hasher = cudf::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
-    auto const d_equal  = cudf::row::primitive::row_equality_comparator{
+  if (cudf::detail::is_primitive_row_op_compatible(_build)) {
+    auto const d_hasher = cudf::detail::row::primitive::row_hasher{probe_nulls, preprocessed_probe};
+    auto const d_equal  = cudf::detail::row::primitive::row_equality_comparator{
       probe_nulls, preprocessed_probe, _preprocessed_build, _nulls_equal};
     compute_counts(primitive_pair_equal{d_equal}, d_hasher);
   } else {
     auto const d_hasher =
-      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
-    auto const row_comparator = cudf::experimental::row::equality::two_table_comparator{
-      preprocessed_probe, _preprocessed_build};
+      cudf::detail::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls);
+    auto const row_comparator =
+      cudf::detail::row::equality::two_table_comparator{preprocessed_probe, _preprocessed_build};
     auto const d_equal = row_comparator.equal_to<false>(probe_nulls, _nulls_equal);
     compute_counts(pair_equal{d_equal}, d_hasher);
   }
@@ -794,7 +793,7 @@ hash_join<Hasher>::probe_join_indices(cudf::table_view const& probe_table,
                std::invalid_argument);
 
   auto const preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe_table, stream);
+    cudf::detail::row::equality::preprocessed_table::create(probe_table, stream);
   auto join_indices = cudf::detail::probe_join_hash_table(_build,
                                                           probe_table,
                                                           _preprocessed_build,
diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh
index 11210e11362..8d4f7670502 100644
--- a/cpp/src/join/join_common_utils.cuh
+++ b/cpp/src/join/join_common_utils.cuh
@@ -19,9 +19,9 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -94,8 +94,8 @@ class pair_equality {
   template <typename LhsPair, typename RhsPair>
   __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept
   {
-    using experimental::row::lhs_index_type;
-    using experimental::row::rhs_index_type;
+    using detail::row::lhs_index_type;
+    using detail::row::rhs_index_type;
 
     return lhs.first == rhs.first and
            _check_row_equality(lhs_index_type{rhs.second}, rhs_index_type{lhs.second});
@@ -131,10 +131,10 @@ get_trivial_left_join_indices(table_view const& left,
  * @tparam MultimapType The type of the hash table
  *
  * @param build Table of columns used to build join hash.
- * @param preprocessed_build shared_ptr to cudf::experimental::row::equality::preprocessed_table
+ * @param preprocessed_build shared_ptr to cudf::detail::row::equality::preprocessed_table
  * for build
  * @param hash_table Build hash table.
- * @param has_nulls Flag to denote if build or probe tables have nested nulls
+ * @param has_nested_nulls Flag to denote if build or probe tables have nested nulls
  * @param nulls_equal Flag to denote nulls are equal or not.
  * @param bitmask Bitmask to denote whether a row is valid.
  * @param stream CUDA stream used for device memory operations and kernel launches.
@@ -142,7 +142,7 @@ get_trivial_left_join_indices(table_view const& left,
 template <typename HashTable>
 void build_join_hash_table(
   cudf::table_view const& build,
-  std::shared_ptr<experimental::row::equality::preprocessed_table> const& preprocessed_build,
+  std::shared_ptr<detail::row::equality::preprocessed_table> const& preprocessed_build,
   HashTable& hash_table,
   bool has_nested_nulls,
   null_equality nulls_equal,
@@ -168,7 +168,7 @@ void build_join_hash_table(
 
   auto const nulls = nullate::DYNAMIC{has_nested_nulls};
 
-  auto const row_hash = experimental::row::hash::row_hasher{preprocessed_build};
+  auto const row_hash = detail::row::hash::row_hasher{preprocessed_build};
   auto const d_hasher = row_hash.device_hasher(nulls);
 
   insert_rows(build, d_hasher);
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index d48c2fe7873..24f2fe4c164 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -215,8 +215,7 @@ mixed_join(table_view const& left_equality,
   // won't be able to support AST conditions for those types anyway.
   auto const row_bitmask =
     cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()).first;
-  auto const preprocessed_build =
-    experimental::row::equality::preprocessed_table::create(build, stream);
+  auto const preprocessed_build = detail::row::equality::preprocessed_table::create(build, stream);
   build_join_hash_table(build,
                         preprocessed_build,
                         hash_table,
@@ -240,12 +239,11 @@ mixed_join(table_view const& left_equality,
   // If the join size was not provided as an input, compute it here.
   std::size_t join_size;
 
-  auto const preprocessed_probe =
-    experimental::row::equality::preprocessed_table::create(probe, stream);
-  auto const row_hash   = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
-  auto const hash_probe = row_hash.device_hasher(has_nulls);
+  auto const preprocessed_probe = detail::row::equality::preprocessed_table::create(probe, stream);
+  auto const row_hash           = cudf::detail::row::hash::row_hasher{preprocessed_probe};
+  auto const hash_probe         = row_hash.device_hasher(has_nulls);
   auto const row_comparator =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
+    cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
   auto [input_pairs, hash_indices] =
@@ -440,9 +438,8 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
   // won't be able to support AST conditions for those types anyway.
-  auto const row_bitmask = cudf::detail::bitmask_and(build, stream, mr).first;
-  auto const preprocessed_build =
-    experimental::row::equality::preprocessed_table::create(build, stream);
+  auto const row_bitmask        = cudf::detail::bitmask_and(build, stream, mr).first;
+  auto const preprocessed_build = detail::row::equality::preprocessed_table::create(build, stream);
   build_join_hash_table(build,
                         preprocessed_build,
                         hash_table,
@@ -461,12 +458,11 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
   detail::grid_1d const config(probe_table_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
   auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
 
-  auto const preprocessed_probe =
-    experimental::row::equality::preprocessed_table::create(probe, stream);
-  auto const row_hash   = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
-  auto const hash_probe = row_hash.device_hasher(has_nulls);
+  auto const preprocessed_probe = detail::row::equality::preprocessed_table::create(probe, stream);
+  auto const row_hash           = cudf::detail::row::hash::row_hasher{preprocessed_probe};
+  auto const hash_probe         = row_hash.device_hasher(has_nulls);
   auto const row_comparator =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
+    cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
   // Precompute input pairs and hash indices using common utility function
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
index cc3babf7e0d..ed1c1023e47 100644
--- a/cpp/src/join/mixed_join_common_utils.cuh
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -18,8 +18,8 @@
 #include "join/join_common_utils.hpp"
 
 #include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -31,13 +31,12 @@
 namespace cudf {
 namespace detail {
 
-using row_hash =
-  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                   cudf::nullate::DYNAMIC>;
+using row_hash = cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                            cudf::nullate::DYNAMIC>;
 
 // // This alias is used by mixed_joins, which support only non-nested types
-using row_equality = cudf::experimental::row::equality::strong_index_comparator_adapter<
-  cudf::experimental::row::equality::device_row_comparator<false, cudf::nullate::DYNAMIC>>;
+using row_equality = cudf::detail::row::equality::strong_index_comparator_adapter<
+  cudf::detail::row::equality::device_row_comparator<false, cudf::nullate::DYNAMIC>>;
 
 // Comparator that always returns false to ensure all values are inserted (like hash_join)
 struct mixed_join_always_not_equal {
@@ -131,8 +130,8 @@ struct single_expression_equality : expression_equality<has_nulls> {
   __device__ __forceinline__ bool operator()(size_type const left_index,
                                              size_type const right_index) const noexcept
   {
-    using cudf::experimental::row::lhs_index_type;
-    using cudf::experimental::row::rhs_index_type;
+    using cudf::detail::row::lhs_index_type;
+    using cudf::detail::row::rhs_index_type;
 
     auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
     // Two levels of checks:
@@ -175,8 +174,8 @@ struct pair_expression_equality : public expression_equality<has_nulls> {
   __device__ __forceinline__ bool operator()(pair_type const& left_row,
                                              pair_type const& right_row) const noexcept
   {
-    using cudf::experimental::row::lhs_index_type;
-    using cudf::experimental::row::rhs_index_type;
+    using cudf::detail::row::lhs_index_type;
+    using cudf::detail::row::rhs_index_type;
 
     auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
     // Three levels of checks:
@@ -205,8 +204,8 @@ struct double_row_equality_comparator {
 
   __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
   {
-    using experimental::row::lhs_index_type;
-    using experimental::row::rhs_index_type;
+    using detail::row::lhs_index_type;
+    using detail::row::rhs_index_type;
 
     return equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) &&
            conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index});
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
index 3a19f534f3b..06b5e9d4c7a 100644
--- a/cpp/src/join/mixed_join_semi.cu
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -119,11 +119,11 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
   auto const preprocessed_build =
-    cudf::experimental::row::equality::preprocessed_table::create(build, stream);
+    cudf::detail::row::equality::preprocessed_table::create(build, stream);
   auto const preprocessed_probe =
-    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+    cudf::detail::row::equality::preprocessed_table::create(probe, stream);
   auto const row_comparator =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
+    cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
   // Create hash table containing all keys found in right table
@@ -131,7 +131,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   // places. However, this probably isn't worth adding any time soon since we
   // won't be able to support AST conditions for those types anyway.
   auto const build_nulls    = cudf::nullate::DYNAMIC{cudf::has_nulls(build)};
-  auto const row_hash_build = cudf::experimental::row::hash::row_hasher{preprocessed_build};
+  auto const row_hash_build = cudf::detail::row::hash::row_hasher{preprocessed_build};
 
   // Since we may see multiple rows that are identical in the equality tables
   // but differ in the conditional tables, the equality comparator used for
@@ -143,14 +143,13 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   // that requires additional plumbing through the AST machinery and is out of
   // scope for now.
   auto const row_comparator_build =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_build};
+    cudf::detail::row::equality::two_table_comparator{preprocessed_build, preprocessed_build};
   auto const equality_build_equality =
     row_comparator_build.equal_to<false>(build_nulls, compare_nulls);
   auto const preprocessed_build_condtional =
-    cudf::experimental::row::equality::preprocessed_table::create(right_conditional, stream);
-  auto const row_comparator_conditional_build =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_build_condtional,
-                                                            preprocessed_build_condtional};
+    cudf::detail::row::equality::preprocessed_table::create(right_conditional, stream);
+  auto const row_comparator_conditional_build = cudf::detail::row::equality::two_table_comparator{
+    preprocessed_build_condtional, preprocessed_build_condtional};
   auto const equality_build_conditional =
     row_comparator_conditional_build.equal_to<false>(build_nulls, compare_nulls);
 
@@ -184,7 +183,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
     parser.shmem_per_thread *
     cuco::detail::int_div_ceil(config.num_threads_per_block, hash_set_type::cg_size);
 
-  auto const row_hash   = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
+  auto const row_hash   = cudf::detail::row::hash::row_hasher{preprocessed_probe};
   auto const hash_probe = row_hash.device_hasher(has_nulls);
 
   hash_set_ref_type const row_set_ref =
diff --git a/cpp/src/join/sort_merge_join.cu b/cpp/src/join/sort_merge_join.cu
index 28bf8ca923f..821d57d40d5 100644
--- a/cpp/src/join/sort_merge_join.cu
+++ b/cpp/src/join/sort_merge_join.cu
@@ -19,11 +19,11 @@
 #include <cudf/copying.hpp>
 #include <cudf/detail/null_mask.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/join/sort_merge_join.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -106,9 +106,9 @@ class merge {
     __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
     {
       if (*_d_ptr == bound_type::UPPER) {
-        return ub_comparator(lhs_index, rhs_index) == weak_ordering::LESS;
+        return ub_comparator(lhs_index, rhs_index) == cudf::detail::weak_ordering::LESS;
       }
-      return lb_comparator(lhs_index, rhs_index) == weak_ordering::LESS;
+      return lb_comparator(lhs_index, rhs_index) == cudf::detail::weak_ordering::LESS;
     }
 
     bound_type* _d_ptr;
@@ -118,8 +118,8 @@ class merge {
     table_device_view _rhs;
     device_span<detail::dremel_device_view const> _lhs_dremel;
     device_span<detail::dremel_device_view const> _rhs_dremel;
-    cudf::experimental::row::lexicographic::device_row_comparator<true, bool> ub_comparator;
-    cudf::experimental::row::lexicographic::device_row_comparator<true, bool> lb_comparator;
+    cudf::detail::row::lexicographic::device_row_comparator<true, bool> ub_comparator;
+    cudf::detail::row::lexicographic::device_row_comparator<true, bool> lb_comparator;
   };
 
   merge(table_view const& smaller,
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index 5ece9e67f83..5e244c495b3 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -18,6 +18,7 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/lists/contains.hpp>
 #include <cudf/lists/detail/contains.hpp>
@@ -26,8 +27,6 @@
 #include <cudf/lists/lists_column_device_view.cuh>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/row_operators.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/type_checks.hpp>
@@ -142,8 +141,8 @@ struct search_list_fn {
   template <bool forward>
   __device__ inline size_type search_list_op(list_device_view const list) const
   {
-    using cudf::experimental::row::lhs_index_type;
-    using cudf::experimental::row::rhs_index_type;
+    using cudf::detail::row::lhs_index_type;
+    using cudf::detail::row::rhs_index_type;
 
     auto const [begin, end] = element_index_pair_iter<forward>(list.size());
     auto const found_iter =
@@ -221,7 +220,7 @@ std::unique_ptr<column> dispatch_index_of(lists_column_view const& lists,
   auto const child_tview = cudf::table_view{{child}};
   auto const has_nulls   = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview);
   auto const comparator =
-    cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream);
+    cudf::detail::row::equality::two_table_comparator(child_tview, keys_tview, stream);
   if (cudf::is_nested(search_keys.type())) {
     auto const d_comp = comparator.equal_to<true>(nullate::DYNAMIC{has_nulls});
     index_of(input_it, num_rows, output_it, child, search_keys, find_option, d_comp, stream);
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 3f7c38a4d34..8c876a22337 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -20,6 +20,7 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/merge.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/search.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
@@ -30,7 +31,6 @@
 #include <cudf/merge.hpp>
 #include <cudf/strings/detail/merge.hpp>
 #include <cudf/structs/structs_column_view.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
@@ -83,10 +83,10 @@ struct row_lexicographic_tagged_comparator {
     table_device_view const* ptr_right_dview{r_side == side::LEFT ? &_lhs : &_rhs};
     auto const comparator = [&]() {
       if constexpr (has_nulls) {
-        return cudf::experimental::row::lexicographic::device_row_comparator<false, bool>{
+        return cudf::detail::row::lexicographic::device_row_comparator<false, bool>{
           has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order, _null_precedence};
       } else {
-        return cudf::experimental::row::lexicographic::device_row_comparator<false, bool>{
+        return cudf::detail::row::lexicographic::device_row_comparator<false, bool>{
           has_nulls, *ptr_left_dview, *ptr_right_dview, _column_order};
       }
     }();
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index f4653cc6892..7f309e61d9f 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -19,6 +19,7 @@
 #include <cudf/detail/gather.cuh>
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/scatter.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
@@ -26,7 +27,6 @@
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
 #include <cudf/partitioning.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
@@ -509,7 +509,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
   auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async<size_type>(
     num_rows, stream, cudf::get_current_device_resource_ref());
 
-  auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream);
+  auto const row_hasher = detail::row::hash::row_hasher(table_to_hash, stream);
   auto const hasher =
     row_hasher.device_hasher<hash_function>(nullate::DYNAMIC{hash_has_nulls}, seed);
 
diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu
index b40b2b6dd2e..ecd44941959 100644
--- a/cpp/src/reductions/histogram.cu
+++ b/cpp/src/reductions/histogram.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,9 +18,9 @@
 #include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/structs/structs_column_view.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/exec_policy.hpp>
@@ -121,23 +121,21 @@ compute_row_frequencies(table_view const& input,
                std::invalid_argument);
 
   auto const preprocessed_input =
-    cudf::experimental::row::hash::preprocessed_table::create(input, stream);
+    cudf::detail::row::hash::preprocessed_table::create(input, stream);
   auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)};
 
-  auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
+  auto const row_hasher = cudf::detail::row::hash::row_hasher(preprocessed_input);
   auto const key_hasher = row_hasher.device_hasher(has_nulls);
-  auto const row_comp   = cudf::experimental::row::equality::self_comparator(preprocessed_input);
+  auto const row_comp   = cudf::detail::row::equality::self_comparator(preprocessed_input);
 
   // Always compare NaNs as equal.
-  using nan_equal_comparator =
-    cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
-  auto const value_comp = nan_equal_comparator{};
+  using nan_equal_comparator = cudf::detail::row::equality::nan_equal_physical_equality_comparator;
+  auto const value_comp      = nan_equal_comparator{};
   // Hard set the tparam `has_nested_columns` = false for now as we don't yet support nested columns
   auto const key_equal = row_comp.equal_to<false>(has_nulls, null_equality::EQUAL, value_comp);
 
-  using row_hash =
-    cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                     cudf::nullate::DYNAMIC>;
+  using row_hash = cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                              cudf::nullate::DYNAMIC>;
 
   size_t const num_rows = input.num_rows();
 
diff --git a/cpp/src/reductions/nested_type_minmax_util.cuh b/cpp/src/reductions/nested_type_minmax_util.cuh
index 6a2c4c44553..e45b6aed5ed 100644
--- a/cpp/src/reductions/nested_type_minmax_util.cuh
+++ b/cpp/src/reductions/nested_type_minmax_util.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,10 +17,10 @@
 #pragma once
 
 #include <cudf/aggregation.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/device_operators.cuh>
 #include <cudf/reduction/detail/reduction_operators.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
@@ -90,7 +90,7 @@ class comparison_binop_generator {
   std::unique_ptr<cudf::structs::detail::flattened_table> const flattened_input;
 
   // Contains data used in the returned binop, thus needs to be kept alive as a member variable.
-  cudf::experimental::row::lexicographic::self_comparator row_comparator;
+  cudf::detail::row::lexicographic::self_comparator row_comparator;
 
   comparison_binop_generator(column_view const& input_,
                              bool is_min_op_,
@@ -123,7 +123,7 @@ class comparison_binop_generator {
             // array resulted from struct flattening.
             auto null_orders    = flattened_input->null_orders();
             null_orders.front() = cudf::null_order::AFTER;
-            return cudf::experimental::row::lexicographic::self_comparator{
+            return cudf::detail::row::lexicographic::self_comparator{
               flattened_input->flattened_columns(), {}, null_orders, stream_};
           } else {
             // For list type, we cannot set a separate null order for the top level column.
@@ -139,11 +139,11 @@ class comparison_binop_generator {
                                                   input_.null_count(),
                                                   0,
                                                   {}};
-            return cudf::experimental::row::lexicographic::self_comparator{
+            return cudf::detail::row::lexicographic::self_comparator{
               cudf::table_view{{dummy_struct, input_}}, {}, null_orders, stream_};
           }
         } else {
-          return cudf::experimental::row::lexicographic::self_comparator{
+          return cudf::detail::row::lexicographic::self_comparator{
             input_tview, {}, std::vector<null_order>{DEFAULT_NULL_ORDER}, stream_};
         }
       }()}
diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu
index 6d0adc83359..41a5680c4c5 100644
--- a/cpp/src/reductions/scan/rank_scan.cu
+++ b/cpp/src/reductions/scan/rank_scan.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,10 +17,10 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/scan.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/device_operators.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -71,7 +71,7 @@ std::unique_ptr<column> rank_generator(column_view const& order_by,
                                        rmm::device_async_resource_ref mr)
 {
   auto const order_by_tview = table_view{{order_by}};
-  auto comp = cudf::experimental::row::equality::self_comparator(order_by_tview, stream);
+  auto comp                 = cudf::detail::row::equality::self_comparator(order_by_tview, stream);
 
   auto ranks = make_fixed_width_column(
     data_type{type_to_id<size_type>()}, order_by.size(), mask_state::UNALLOCATED, stream, mr);
diff --git a/cpp/src/reductions/segmented/nunique.cu b/cpp/src/reductions/segmented/nunique.cu
index 9b7e6f9fe57..899b7692d3c 100644
--- a/cpp/src/reductions/segmented/nunique.cu
+++ b/cpp/src/reductions/segmented/nunique.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,9 +19,9 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/labeling/label_segments.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/reduction/detail/segmented_reduction.cuh>
 #include <cudf/reduction/detail/segmented_reduction_functions.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
@@ -63,9 +63,8 @@ std::unique_ptr<cudf::column> segmented_nunique(column_view const& col,
 
   // compute the unique identifiers within each segment
   auto const identifiers = [&] {
-    auto const d_col = column_device_view::create(col, stream);
-    auto const comparator =
-      cudf::experimental::row::equality::self_comparator{table_view({col}), stream};
+    auto const d_col      = column_device_view::create(col, stream);
+    auto const comparator = cudf::detail::row::equality::self_comparator{table_view({col}), stream};
     auto const row_equal =
       comparator.equal_to<false>(cudf::nullate::DYNAMIC{col.has_nulls()}, null_equality::EQUAL);
 
diff --git a/cpp/src/row_operator/primitive_row_operators.cu b/cpp/src/row_operator/primitive_row_operators.cu
new file mode 100644
index 00000000000..ea68edf82b0
--- /dev/null
+++ b/cpp/src/row_operator/primitive_row_operators.cu
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
+#include <cudf/types.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <algorithm>
+
+namespace cudf::detail {
+bool is_primitive_row_op_compatible(cudf::table_view const& table)
+{
+  return std::all_of(
+    table.begin(), table.end(), [](auto const& col) { return cudf::is_numeric(col.type()); });
+}
+}  // namespace cudf::detail
diff --git a/cpp/src/row_operator/row_operators.cu b/cpp/src/row_operator/row_operators.cu
new file mode 100644
index 00000000000..791f73cdf72
--- /dev/null
+++ b/cpp/src/row_operator/row_operators.cu
@@ -0,0 +1,879 @@
+/*
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lists/utilities.hpp"
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/concatenate.hpp>
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
+#include <cudf/detail/sorting.hpp>
+#include <cudf/detail/structs/utilities.hpp>
+#include <cudf/detail/utilities/linked_column.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/type_checks.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <thrust/iterator/transform_iterator.h>
+
+#include <functional>
+
+namespace cudf {
+namespace detail {
+namespace {
+
+/**
+ * @brief Removes the offsets of struct column's children
+ *
+ * @param c The column whose children are to be un-sliced
+ * @return Children of `c` with offsets removed
+ */
+std::vector<column_view> unslice_children(column_view const& c)
+{
+  if (c.type().id() == type_id::STRUCT) {
+    auto child_it = thrust::make_transform_iterator(c.child_begin(), [](auto const& child) {
+      return column_view(
+        child.type(),
+        child.offset() + child.size(),  // This is hacky, we don't know the actual unsliced size but
+                                        // it is at least offset + size
+        child.head(),
+        child.null_mask(),
+        child.null_count(),
+        0,
+        unslice_children(child));
+    });
+    return {child_it, child_it + c.num_children()};
+  }
+  return {c.child_begin(), c.child_end()};
+};
+
+/**
+ * @brief Removes the child column offsets of struct columns in a table.
+ *
+ * Given a table, this replaces any struct columns with similar struct columns that have their
+ * offsets removed from their children. Structs that are children of list columns are not affected.
+ *
+ */
+table_view remove_struct_child_offsets(table_view table)
+{
+  std::vector<column_view> cols;
+  cols.reserve(table.num_columns());
+  std::transform(table.begin(), table.end(), std::back_inserter(cols), [&](column_view const& c) {
+    return column_view(c.type(),
+                       c.size(),
+                       c.head<uint8_t>(),
+                       c.null_mask(),
+                       c.null_count(),
+                       c.offset(),
+                       unslice_children(c));
+  });
+  return table_view(cols);
+}
+
+/**
+ * @brief The enum to specify whether the `decompose_structs` function will process lists columns
+ * (at any nested level) or will output them unchanged.
+ */
+enum class decompose_lists_column : bool { YES, NO };
+
+/**
+ * @brief Decompose all struct columns in a table
+ *
+ * If a structs column is a tree with N leaves, then this function decomposes the tree into
+ * N "linear trees" (branch factor == 1) and prunes common parents. Also returns a vector of
+ * per-column `depth`s.
+ *
+ * A `depth` value is the number of nested levels as parent of the column in the original,
+ * non-decomposed table, which are pruned during decomposition.
+ *
+ * Special handling is needed in the cases of structs column having lists as its first child. In
+ * such situations, the function decomposes the tree of N leaves into N+1 linear trees in which the
+ * second tree was generated by extracting out leaf of the first tree. This is to make sure there is
+ * no structs column having child lists column in the output. Note that structs with lists children
+ * in subsequent positions do not require any special treatment because the struct parent will be
+ * pruned for all subsequent children.
+ *
+ * For example, if the original table has a column `Struct<Struct<int, float>, decimal>`,
+ *
+ *      S1
+ *     / \
+ *    S2  d
+ *   / \
+ *  i   f
+ *
+ * then after decomposition, we get three columns:
+ * `Struct<Struct<int>>`, `float`, and `decimal`.
+ *
+ *  0   2   1  <- depths
+ *  S1
+ *  |
+ *  S2      d
+ *  |
+ *  i   f
+ *
+ * The depth of the first column is 0 because it contains all its parent levels, while the depth
+ * of the second column is 2 because two of its parent struct levels were pruned.
+ *
+ * Similarly, a struct column of type `Struct<int, Struct<float, decimal>>` is decomposed as follows
+ *
+ *     S1
+ *    / \
+ *   i   S2
+ *      / \
+ *     f   d
+ *
+ *  0   1   2  <- depths
+ *  S1  S2  d
+ *  |   |
+ *  i   f
+ *
+ * In the case of structs column with a lists column as its first child such as
+ * `Struct<List<int>, float>`, after decomposition we get three columns `Struct<>`,
+ * `List<int>`, and `float`.
+ *
+ * When list columns are present, depending on the input flag `decompose_lists`, the decomposition
+ * can be performed similarly to pure structs but list parent columns are NOT pruned. The list
+ * parents are still needed to define the range of elements in the leaf that belong to the same row.
+ *
+ * For example, if the original table has a column `List<Struct<int, float>>`,
+ *
+ *    L
+ *    |
+ *    S
+ *   / \
+ *  i   f
+ *
+ * after decomposition, we get two columns
+ *
+ *  L   L
+ *  |   |
+ *  S   f
+ *  |
+ *  i
+ *
+ * Note that the `decompose_lists` flag should be specified as follow:
+ *  - `decompose_lists_column::YES` when preprocessing a table for equality comparison.
+ *  - `decompose_lists_column::NO` when preprocessing a table for lexicographic comparison,
+ *    since we need to keep all lists columns intact to input into the next preprocessing step.
+ *
+ * @param table The table whose struct columns to decompose.
+ * @param decompose_lists Whether to decompose lists columns
+ * @param column_order The per-column order if using output with lexicographic comparison
+ * @param null_precedence The per-column null precedence
+ * @return A tuple containing a table with all struct columns decomposed, new corresponding column
+ *         orders and null precedences and depths of the linearized branches
+ */
+auto decompose_structs(table_view table,
+                       decompose_lists_column decompose_lists,
+                       host_span<order const> column_order         = {},
+                       host_span<null_order const> null_precedence = {})
+{
+  auto linked_columns = detail::table_to_linked_columns(table);
+
+  std::vector<column_view> verticalized_columns;
+  std::vector<order> new_column_order;
+  std::vector<null_order> new_null_precedence;
+  std::vector<int> verticalized_col_depths;
+  for (size_t col_idx = 0; col_idx < linked_columns.size(); ++col_idx) {
+    detail::linked_column_view const* col = linked_columns[col_idx].get();
+    if (is_nested(col->type())) {
+      // convert and insert
+      std::vector<std::vector<detail::linked_column_view const*>> flattened;
+      std::function<void(
+        detail::linked_column_view const*, std::vector<detail::linked_column_view const*>*, int)>
+        recursive_child = [&](detail::linked_column_view const* c,
+                              std::vector<detail::linked_column_view const*>* branch,
+                              int depth) {
+          branch->push_back(c);
+          if (decompose_lists == decompose_lists_column::YES && c->type().id() == type_id::LIST) {
+            recursive_child(
+              c->children[lists_column_view::child_column_index].get(), branch, depth + 1);
+          } else if (c->type().id() == type_id::STRUCT) {
+            for (size_t child_idx = 0; child_idx < c->children.size(); ++child_idx) {
+              // When child_idx == 0, we also cut off the current branch if its first child is a
+              // lists column.
+              // In such cases, the last column of the current branch will be `Struct<List,...>` and
+              // it will be modified to empty struct type `Struct<>` later on.
+              if (child_idx > 0 || c->children[0]->type().id() == type_id::LIST) {
+                verticalized_col_depths.push_back(depth + 1);
+                branch = &flattened.emplace_back();
+              }
+              recursive_child(c->children[child_idx].get(), branch, depth + 1);
+            }
+          }
+        };
+      auto& branch = flattened.emplace_back();
+      verticalized_col_depths.push_back(0);
+      recursive_child(col, &branch, 0);
+
+      for (auto const& branch : flattened) {
+        column_view temp_col = *branch.back();
+
+        // Change `Struct<List,...>` into empty struct type `Struct<>`.
+        if (temp_col.type().id() == type_id::STRUCT &&
+            (temp_col.num_children() > 0 && temp_col.child(0).type().id() == type_id::LIST)) {
+          temp_col = column_view(temp_col.type(),
+                                 temp_col.size(),
+                                 temp_col.head(),
+                                 temp_col.null_mask(),
+                                 temp_col.null_count(),
+                                 temp_col.offset(),
+                                 {});
+        }
+
+        for (auto it = branch.crbegin() + 1; it < branch.crend(); ++it) {
+          auto const& prev_col = *(*it);
+          auto children =
+            (prev_col.type().id() == type_id::LIST)
+              ? std::vector<column_view>{*prev_col
+                                            .children[lists_column_view::offsets_column_index],
+                                         temp_col}
+              : std::vector<column_view>{temp_col};
+          temp_col = column_view(prev_col.type(),
+                                 prev_col.size(),
+                                 nullptr,
+                                 prev_col.null_mask(),
+                                 prev_col.null_count(),
+                                 prev_col.offset(),
+                                 std::move(children));
+        }
+        // Traverse upward and include any list columns in the ancestors
+        for (detail::linked_column_view* parent = branch.front()->parent; parent;
+             parent                             = parent->parent) {
+          if (parent->type().id() == type_id::LIST) {
+            // Include this parent
+            temp_col = column_view(
+              parent->type(),
+              parent->size(),
+              nullptr,  // list has no data of its own
+              nullptr,  // If we're going through this then nullmask is already in another branch
+              0,
+              parent->offset(),
+              {*parent->children[lists_column_view::offsets_column_index], temp_col});
+          } else if (parent->type().id() == type_id::STRUCT) {
+            // Replace offset with parent's offset
+            temp_col = column_view(temp_col.type(),
+                                   parent->size(),
+                                   temp_col.head(),
+                                   temp_col.null_mask(),
+                                   temp_col.null_count(),
+                                   parent->offset(),
+                                   {temp_col.child_begin(), temp_col.child_end()});
+          }
+        }
+        verticalized_columns.push_back(temp_col);
+      }
+      if (not column_order.empty()) {
+        new_column_order.insert(new_column_order.end(), flattened.size(), column_order[col_idx]);
+      }
+      if (not null_precedence.empty()) {
+        new_null_precedence.insert(
+          new_null_precedence.end(), flattened.size(), null_precedence[col_idx]);
+      }
+    } else {
+      verticalized_columns.push_back(*col);
+      verticalized_col_depths.push_back(0);
+      if (not column_order.empty()) { new_column_order.push_back(column_order[col_idx]); }
+      if (not null_precedence.empty()) { new_null_precedence.push_back(null_precedence[col_idx]); }
+    }
+  }
+  return std::make_tuple(table_view(verticalized_columns),
+                         std::move(new_column_order),
+                         std::move(new_null_precedence),
+                         std::move(verticalized_col_depths));
+}
+
+/*
+ * This helper function generates dremel data for any list-type columns in a
+ * table. This data is necessary for lexicographic comparisons.
+ */
+auto list_lex_preprocess(table_view const& table, rmm::cuda_stream_view stream)
+{
+  std::vector<detail::dremel_data> dremel_data;
+  auto const num_list_columns = std::count_if(
+    table.begin(), table.end(), [](auto const& col) { return col.type().id() == type_id::LIST; });
+  auto dremel_device_views =
+    cudf::detail::make_empty_host_vector<detail::dremel_device_view>(num_list_columns, stream);
+  for (auto const& col : table) {
+    if (col.type().id() == type_id::LIST) {
+      dremel_data.push_back(detail::get_comparator_data(col, {}, false, stream));
+      dremel_device_views.push_back(dremel_data.back());
+    }
+  }
+  auto d_dremel_device_views = detail::make_device_uvector(
+    dremel_device_views, stream, cudf::get_current_device_resource_ref());
+  return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views));
+}
+
+using column_checker_fn_t = std::function<void(column_view const&)>;
+
+/**
+ * @brief Check a table for compatibility with lexicographic comparison
+ *
+ * Checks whether a given table contains columns of non-relationally comparable types.
+ */
+void check_lex_compatibility(table_view const& input)
+{
+  // Basically check if there's any LIST of STRUCT or STRUCT of LIST hiding anywhere in the table
+  column_checker_fn_t check_column = [&](column_view const& c) {
+    if (c.type().id() == type_id::LIST) {
+      auto const& list_col = lists_column_view(c);
+      CUDF_EXPECTS(list_col.child().type().id() != type_id::STRUCT,
+                   "Cannot lexicographically compare a table with a LIST of STRUCT column");
+      check_column(list_col.child());
+    } else if (c.type().id() == type_id::STRUCT) {
+      for (auto child = c.child_begin(); child < c.child_end(); ++child) {
+        CUDF_EXPECTS(child->type().id() != type_id::LIST,
+                     "Cannot lexicographically compare a table with a STRUCT of LIST column");
+        check_column(*child);
+      }
+    }
+    if (not is_nested(c.type())) {
+      CUDF_EXPECTS(is_relationally_comparable(c.type()),
+                   "Cannot lexicographic compare a table with a column of type " +
+                     cudf::type_to_name(c.type()));
+    }
+  };
+  for (column_view const& c : input) {
+    check_column(c);
+  }
+}
+
+/**
+ * @brief Check a table for compatibility with equality comparison
+ *
+ * Checks whether a given table contains columns of non-equality comparable types.
+ */
+void check_eq_compatibility(table_view const& input)
+{
+  column_checker_fn_t check_column = [&](column_view const& c) {
+    if (not is_nested(c.type())) {
+      CUDF_EXPECTS(is_equality_comparable(c.type()),
+                   "Cannot compare equality for a table with a column of type " +
+                     cudf::type_to_name(c.type()));
+    }
+    for (auto child = c.child_begin(); child < c.child_end(); ++child) {
+      check_column(*child);
+    }
+  };
+  for (column_view const& c : input) {
+    check_column(c);
+  }
+}
+
+void check_shape_compatibility(table_view const& lhs, table_view const& rhs)
+{
+  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(),
+               "Cannot compare tables with different number of columns");
+  for (size_type i = 0; i < lhs.num_columns(); ++i) {
+    CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)),
+                 "Cannot compare tables with different column types");
+  }
+}
+
+}  // namespace
+
+namespace row {
+
+namespace lexicographic {
+
+namespace {
+
+/**
+ * @brief Replace child of the input lists column by a new child column.
+ *
+ * If the input is not sliced, just replace the input child by the new_child.
+ * Otherwise, we have to generate new offsets and replace both the offsets and the child of the
+ * input by the new ones. This is because the new child was generated by ranking and always
+ * has zero offset, so it cannot replace the input child if it is sliced.
+ *
+ * The new generated offsets column needs to be returned and kept alive.
+ *
+ * @param[in] input The input column_view of type LIST
+ * @param[in] new_child A new child column to replace the existing child of the input
+ * @param[out] out_cols An array to store the new generated offsets (if applicable)
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches
+ * @param[in] mr Device memory resource used to allocate the returned column
+ * @return An output column_view with child replaced
+ */
+auto replace_child(column_view const& input,
+                   column_view const& new_child,
+                   std::vector<std::unique_ptr<column>>& out_cols,
+                   rmm::cuda_stream_view stream,
+                   rmm::device_async_resource_ref mr)
+{
+  auto const make_output = [&input](auto const& offsets_cv, auto const& child_cv) {
+    return column_view{data_type{type_id::LIST},
+                       input.size(),
+                       nullptr,
+                       input.null_mask(),
+                       input.null_count(),
+                       0,
+                       {offsets_cv, child_cv}};
+  };
+
+  if (input.offset() == 0) {
+    return make_output(input.child(lists_column_view::offsets_column_index), new_child);
+  }
+
+  out_cols.emplace_back(
+    cudf::lists::detail::get_normalized_offsets(lists_column_view{input}, stream, mr));
+  return make_output(out_cols.back()->view(), new_child);
+}
+
+/**
+ * @brief Compute ranks of the input column.
+ *
+ * `Dense` rank type must be used for compute ranking of the input for later lexicographic
+ * comparison.
+ *
+ * To understand why, consider: `input = [ [{0, "a"}, {3, "c"}], [{0, "a"}, {2, "b"}] ]`.
+ * If first rank is used, `transformed_input = [ [0, 3], [1, 2] ]`. Comparing them will lead
+ * to the result row(0) < row(1) which is incorrect.
+ * With dense rank, `transformed_input = [ [0, 2], [0, 1] ]`, producing the correct output for
+ * lexicographic comparison.
+ *
+ * In addition, since the input column being ranked is always a nested child column instead of
+ * a top-level column, the column order for ranking should be fixed to the same value
+ * `order::ASCENDING` in all situations.
+ * For example, with the same input above, using column order as `order::ASCENDING` we will have
+ * `transformed_input = [ [0, 2], [0, 1] ]`. The output of sorting `transformed_input` will be
+ * exactly the same as sorting `input` regardless of the sorting order (ASC or DESC).
+ *
+ * @param input The input column to compute ranks
+ * @param column_null_order The flag indicating how nulls compare to non-null values
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column
+ * @return The output rank columns
+ */
+auto compute_ranks(column_view const& input,
+                   null_order column_null_order,
+                   rmm::cuda_stream_view stream,
+                   rmm::device_async_resource_ref mr)
+{
+  return cudf::detail::rank(input,
+                            rank_method::DENSE,
+                            order::ASCENDING,
+                            null_policy::EXCLUDE,
+                            column_null_order,
+                            false /*percentage*/,
+                            stream,
+                            mr);
+}
+
+/**
+ * @brief Transform any nested lists-of-structs column into lists-of-integers column.
+ *
+ * For a lists-of-structs column at any nested level, its child structs column will be replaced by a
+ * `size_type` column computed as its ranks.
+ *
+ * If the input column is not lists-of-structs, or does not contain lists-of-structs at any nested
+ * level, the input will be passed through without any changes.
+ *
+ * @param input The input column to transform
+ * @param column_null_order The flag indicating how nulls compare to non-null values
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column(s)
+ * @return A pair consisting of new column_view representing the transformed input, along with
+ *         an array containing its rank column(s) (of `size_type` type) and possibly new list
+ *         offsets generated during the transformation process
+ */
+std::pair<column_view, std::vector<std::unique_ptr<column>>> transform_lists_of_structs(
+  column_view const& input,
+  null_order column_null_order,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  std::vector<std::unique_ptr<column>> out_cols;
+
+  if (input.type().id() == type_id::LIST) {
+    auto const child = cudf::lists_column_view{input}.get_sliced_child(stream);
+
+    // Found a lists-of-structs column.
+    if (child.type().id() == type_id::STRUCT) {
+      out_cols.emplace_back(compute_ranks(child, column_null_order, stream, mr));
+      return {replace_child(input, out_cols.back()->view(), out_cols, stream, mr),
+              std::move(out_cols)};
+    }
+    // Found a lists-of-lists column.
+    else if (child.type().id() == type_id::LIST) {
+      // Recursively call transformation on the child column.
+      auto [new_child, out_cols_child] =
+        transform_lists_of_structs(child, column_null_order, stream, mr);
+
+      // Only transform the current column if its child has been transformed.
+      if (out_cols_child.size() > 0) {
+        out_cols.insert(out_cols.end(),
+                        std::make_move_iterator(out_cols_child.begin()),
+                        std::make_move_iterator(out_cols_child.end()));
+        return {replace_child(input, new_child, out_cols, stream, mr), std::move(out_cols)};
+      }
+      // else: child was not transformed so input is also not transformed.
+    }
+    // else: child is not STRUCT or LIST: no transformation.
+  }
+  // else: lhs.type().id() != type_id::LIST.
+  // In such situations, lhs.type().id() can still be type_id::STRUCT. However, any
+  // structs-of-lists should be decomposed into empty struct type `Struct<>` before being
+  // processed by this function so we do nothing here.
+
+  // Passthrough: nothing changed.
+  return {input, std::move(out_cols)};
+}
+
+/**
+ * @brief Transform any nested lists-of-structs column into lists-of-integers column.
+ *
+ * For a lists-of-structs column at any nested level, its child structs column will be replaced by a
+ * `size_type` column computed as its ranks. In addition, equivalent child columns of both input
+ * columns (i.e., child columns at the same order, same nested level) will be combined and
+ * ranked together.
+ *
+ * If the input columns are not lists-of-structs, or do not contain lists-of-structs at any nested
+ * level, there will not be any changes.
+ *
+ * @param lhs The input lhs column to transform
+ * @param rhs The input rhs column to transform
+ * @param column_null_order The flag indicating how nulls compare to non-null values
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column(s)
+ * @return A tuple consisting of new column_view(s) representing the transformed input, along with
+ *         their rank column(s) (of `size_type` type) and possibly new list offsets generated
+ *         during the transformation process
+ */
+std::tuple<column_view,
+           column_view,
+           std::vector<std::unique_ptr<column>>,
+           std::vector<std::unique_ptr<column>>>
+transform_lists_of_structs(column_view const& lhs,
+                           column_view const& rhs,
+                           null_order column_null_order,
+                           rmm::cuda_stream_view stream,
+                           rmm::device_async_resource_ref mr)
+{
+  std::vector<std::unique_ptr<column>> out_cols_lhs;
+  std::vector<std::unique_ptr<column>> out_cols_rhs;
+
+  auto const make_output = [&](auto const& new_child_lhs, auto const& new_child_rhs) {
+    return std::tuple{replace_child(lhs, new_child_lhs, out_cols_lhs, stream, mr),
+                      replace_child(rhs, new_child_rhs, out_cols_rhs, stream, mr),
+                      std::move(out_cols_lhs),
+                      std::move(out_cols_rhs)};
+  };
+
+  if (lhs.type().id() == type_id::LIST) {
+    auto const child_lhs = cudf::lists_column_view{lhs}.get_sliced_child(stream);
+    auto const child_rhs = cudf::lists_column_view{rhs}.get_sliced_child(stream);
+
+    // Found a lists-of-structs column.
+    if (child_lhs.type().id() == type_id::STRUCT) {
+      auto const concatenated_children =
+        cudf::detail::concatenate(std::vector<column_view>{child_lhs, child_rhs},
+                                  stream,
+                                  cudf::get_current_device_resource_ref());
+
+      auto const ranks        = compute_ranks(concatenated_children->view(),
+                                       column_null_order,
+                                       stream,
+                                       cudf::get_current_device_resource_ref());
+      auto const ranks_slices = cudf::detail::slice(
+        ranks->view(),
+        {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()},
+        stream);
+
+      out_cols_lhs.emplace_back(std::make_unique<column>(ranks_slices.front(), stream, mr));
+      out_cols_rhs.emplace_back(std::make_unique<column>(ranks_slices.back(), stream, mr));
+
+      return make_output(out_cols_lhs.back()->view(), out_cols_rhs.back()->view());
+
+    }
+    // Found a lists-of-lists column.
+    else if (child_lhs.type().id() == type_id::LIST) {
+      // Recursively call transformation on the child column.
+      auto [new_child_lhs, new_child_rhs, out_cols_child_lhs, out_cols_child_rhs] =
+        transform_lists_of_structs(child_lhs, child_rhs, column_null_order, stream, mr);
+
+      // Only transform the current pair of columns if their children have been transformed.
+      if (out_cols_child_lhs.size() > 0 || out_cols_child_rhs.size() > 0) {
+        out_cols_lhs.insert(out_cols_lhs.end(),
+                            std::make_move_iterator(out_cols_child_lhs.begin()),
+                            std::make_move_iterator(out_cols_child_lhs.end()));
+        out_cols_rhs.insert(out_cols_rhs.end(),
+                            std::make_move_iterator(out_cols_child_rhs.begin()),
+                            std::make_move_iterator(out_cols_child_rhs.end()));
+
+        return make_output(new_child_lhs, new_child_rhs);
+      }
+    }
+    // else: child is not STRUCT or LIST: just go to the end of this function, no transformation.
+  }
+  // else: lhs.type().id() != type_id::LIST.
+  // In such situations, lhs.type().id() can still be type_id::STRUCT. However, any
+  // structs-of-lists should be decomposed into empty struct type `Struct<>` before being
+  // processed by this function so we do nothing here.
+
+  // Passthrough: nothing changed.
+  return {lhs, rhs, std::move(out_cols_lhs), std::move(out_cols_rhs)};
+}
+
+}  // namespace
+
+std::shared_ptr<preprocessed_table> preprocessed_table::create(
+  table_view const& preprocessed_input,
+  std::vector<int>&& verticalized_col_depths,
+  std::vector<std::unique_ptr<column>>&& transformed_columns,
+  host_span<order const> column_order,
+  host_span<null_order const> null_precedence,
+  bool has_ranked_children,
+  rmm::cuda_stream_view stream)
+{
+  check_lex_compatibility(preprocessed_input);
+
+  auto d_table        = table_device_view::create(preprocessed_input, stream);
+  auto d_column_order = detail::make_device_uvector_async(
+    column_order, stream, cudf::get_current_device_resource_ref());
+  auto d_null_precedence = detail::make_device_uvector_async(
+    null_precedence, stream, cudf::get_current_device_resource_ref());
+  auto d_depths = detail::make_device_uvector_async(
+    verticalized_col_depths, stream, cudf::get_current_device_resource_ref());
+
+  if (detail::has_nested_columns(preprocessed_input)) {
+    auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(preprocessed_input, stream);
+    return std::shared_ptr<preprocessed_table>(
+      new preprocessed_table(std::move(d_table),
+                             std::move(d_column_order),
+                             std::move(d_null_precedence),
+                             std::move(d_depths),
+                             std::move(dremel_data),
+                             std::move(d_dremel_device_view),
+                             std::move(transformed_columns),
+                             has_ranked_children));
+  } else {
+    return std::shared_ptr<preprocessed_table>(
+      new preprocessed_table(std::move(d_table),
+                             std::move(d_column_order),
+                             std::move(d_null_precedence),
+                             std::move(d_depths),
+                             std::move(transformed_columns),
+                             has_ranked_children));
+  }
+}
+
+std::shared_ptr<preprocessed_table> preprocessed_table::create(
+  table_view const& input,
+  host_span<order const> column_order,
+  host_span<null_order const> null_precedence,
+  rmm::cuda_stream_view stream)
+{
+  auto [decomposed_input, new_column_order, new_null_precedence, verticalized_col_depths] =
+    decompose_structs(input, decompose_lists_column::NO, column_order, null_precedence);
+
+  // Transform any (nested) lists-of-structs column into lists-of-integers column.
+  std::vector<std::unique_ptr<column>> transformed_columns;
+  auto const transformed_input =
+    [&, &decomposed_input = decomposed_input, &new_null_precedence = new_null_precedence] {
+      std::vector<column_view> transformed_cvs;
+
+      for (size_type col_idx = 0; col_idx < decomposed_input.num_columns(); ++col_idx) {
+        auto const& lhs_col = decomposed_input.column(col_idx);
+
+        auto [transformed, curr_out_cols] = transform_lists_of_structs(
+          lhs_col,
+          null_precedence.empty() ? null_order::BEFORE : new_null_precedence[col_idx],
+          stream,
+          cudf::get_current_device_resource_ref());
+
+        transformed_cvs.emplace_back(std::move(transformed));
+        transformed_columns.insert(transformed_columns.end(),
+                                   std::make_move_iterator(curr_out_cols.begin()),
+                                   std::make_move_iterator(curr_out_cols.end()));
+      }
+
+      return table_view{transformed_cvs};
+    }();
+
+  auto const has_ranked_children = !transformed_columns.empty();
+  return create(transformed_input,
+                std::move(verticalized_col_depths),
+                std::move(transformed_columns),
+                new_column_order,
+                new_null_precedence,
+                has_ranked_children,
+                stream);
+}
+
+std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>>
+preprocessed_table::create(table_view const& lhs,
+                           table_view const& rhs,
+                           host_span<order const> column_order,
+                           host_span<null_order const> null_precedence,
+                           rmm::cuda_stream_view stream)
+{
+  check_shape_compatibility(lhs, rhs);
+
+  auto [decomposed_lhs,
+        new_column_order_lhs,
+        new_null_precedence_lhs,
+        verticalized_col_depths_lhs] =
+    decompose_structs(lhs, decompose_lists_column::NO, column_order, null_precedence);
+
+  // Unused variables are new column order and null order for rhs, which are the same as for lhs
+  // so we don't need them.
+  [[maybe_unused]] auto [decomposed_rhs, unused0, unused1, verticalized_col_depths_rhs] =
+    decompose_structs(rhs, decompose_lists_column::NO, column_order, null_precedence);
+
+  // Transform any (nested) lists-of-structs column into lists-of-integers column.
+  std::vector<std::unique_ptr<column>> transformed_columns_lhs;
+  std::vector<std::unique_ptr<column>> transformed_columns_rhs;
+  auto const [transformed_lhs,
+              transformed_rhs] = [&,
+                                  &decomposed_lhs          = decomposed_lhs,
+                                  &decomposed_rhs          = decomposed_rhs,
+                                  &new_null_precedence_lhs = new_null_precedence_lhs] {
+    std::vector<column_view> transformed_lhs_cvs;
+    std::vector<column_view> transformed_rhs_cvs;
+
+    for (size_type col_idx = 0; col_idx < decomposed_lhs.num_columns(); ++col_idx) {
+      auto const& lhs_col = decomposed_lhs.column(col_idx);
+      auto const& rhs_col = decomposed_rhs.column(col_idx);
+
+      auto [transformed_lhs, transformed_rhs, curr_out_cols_lhs, curr_out_cols_rhs] =
+        transform_lists_of_structs(
+          lhs_col,
+          rhs_col,
+          null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx],
+          stream,
+          cudf::get_current_device_resource_ref());
+
+      transformed_lhs_cvs.emplace_back(std::move(transformed_lhs));
+      transformed_rhs_cvs.emplace_back(std::move(transformed_rhs));
+      transformed_columns_lhs.insert(transformed_columns_lhs.end(),
+                                     std::make_move_iterator(curr_out_cols_lhs.begin()),
+                                     std::make_move_iterator(curr_out_cols_lhs.end()));
+      transformed_columns_rhs.insert(transformed_columns_rhs.end(),
+                                     std::make_move_iterator(curr_out_cols_rhs.begin()),
+                                     std::make_move_iterator(curr_out_cols_rhs.end()));
+    }
+
+    return std::pair{table_view{transformed_lhs_cvs}, table_view{transformed_rhs_cvs}};
+  }();
+
+  // This should be the same for both lhs and rhs but not all the time, such as when one table
+  // has 0 rows while the other has >0 rows. So we check separately for each of them.
+  auto const has_ranked_children_lhs = !transformed_columns_lhs.empty();
+  auto const has_ranked_children_rhs = !transformed_columns_rhs.empty();
+
+  return {create(transformed_lhs,
+                 std::move(verticalized_col_depths_lhs),
+                 std::move(transformed_columns_lhs),
+                 new_column_order_lhs,
+                 new_null_precedence_lhs,
+                 has_ranked_children_lhs,
+                 stream),
+          create(transformed_rhs,
+                 std::move(verticalized_col_depths_rhs),
+                 std::move(transformed_columns_rhs),
+                 new_column_order_lhs,
+                 new_null_precedence_lhs,
+                 has_ranked_children_rhs,
+                 stream)};
+}
+
+preprocessed_table::preprocessed_table(
+  table_device_view_owner&& table,
+  rmm::device_uvector<order>&& column_order,
+  rmm::device_uvector<null_order>&& null_precedence,
+  rmm::device_uvector<size_type>&& depths,
+  std::vector<detail::dremel_data>&& dremel_data,
+  rmm::device_uvector<detail::dremel_device_view>&& dremel_device_views,
+  std::vector<std::unique_ptr<column>>&& transformed_columns,
+  bool has_ranked_children)
+  : _t(std::move(table)),
+    _column_order(std::move(column_order)),
+    _null_precedence(std::move(null_precedence)),
+    _depths(std::move(depths)),
+    _dremel_data(std::move(dremel_data)),
+    _dremel_device_views(std::move(dremel_device_views)),
+    _transformed_columns(std::move(transformed_columns)),
+    _has_ranked_children(has_ranked_children)
+{
+}
+
+preprocessed_table::preprocessed_table(table_device_view_owner&& table,
+                                       rmm::device_uvector<order>&& column_order,
+                                       rmm::device_uvector<null_order>&& null_precedence,
+                                       rmm::device_uvector<size_type>&& depths,
+                                       std::vector<std::unique_ptr<column>>&& transformed_columns,
+                                       bool has_ranked_children)
+  : _t(std::move(table)),
+    _column_order(std::move(column_order)),
+    _null_precedence(std::move(null_precedence)),
+    _depths(std::move(depths)),
+    _dremel_data{},
+    _dremel_device_views{},
+    _transformed_columns(std::move(transformed_columns)),
+    _has_ranked_children(has_ranked_children)
+{
+}
+
+two_table_comparator::two_table_comparator(table_view const& left,
+                                           table_view const& right,
+                                           host_span<order const> column_order,
+                                           host_span<null_order const> null_precedence,
+                                           rmm::cuda_stream_view stream)
+{
+  std::tie(d_left_table, d_right_table) =
+    preprocessed_table::create(left, right, column_order, null_precedence, stream);
+}
+
+}  // namespace lexicographic
+
+namespace equality {
+
+std::shared_ptr<preprocessed_table> preprocessed_table::create(table_view const& t,
+                                                               rmm::cuda_stream_view stream)
+{
+  check_eq_compatibility(t);
+
+  auto [null_pushed_table, nullable_data] =
+    structs::detail::push_down_nulls(t, stream, cudf::get_current_device_resource_ref());
+  auto struct_offset_removed_table = remove_struct_child_offsets(null_pushed_table);
+  auto verticalized_t =
+    std::get<0>(decompose_structs(struct_offset_removed_table, decompose_lists_column::YES));
+
+  auto d_t = table_device_view_owner(table_device_view::create(verticalized_t, stream));
+  return std::shared_ptr<preprocessed_table>(new preprocessed_table(
+    std::move(d_t), std::move(nullable_data.new_null_masks), std::move(nullable_data.new_columns)));
+}
+
+two_table_comparator::two_table_comparator(table_view const& left,
+                                           table_view const& right,
+                                           rmm::cuda_stream_view stream)
+  : d_left_table{preprocessed_table::create(left, stream)},
+    d_right_table{preprocessed_table::create(right, stream)}
+{
+  check_shape_compatibility(left, right);
+}
+
+}  // namespace equality
+
+}  // namespace row
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu
index 21f2d601d6b..5afc840d184 100644
--- a/cpp/src/search/contains_scalar.cu
+++ b/cpp/src/search/contains_scalar.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,13 +17,13 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/search.hpp>
 #include <cudf/dictionary/detail/search.hpp>
 #include <cudf/dictionary/detail/update_keys.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/search.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -108,11 +108,11 @@ struct contains_scalar_dispatch {
     auto const has_nulls     = has_nested_nulls(haystack_tv) || has_nested_nulls(needle_tv);
 
     auto const comparator =
-      cudf::experimental::row::equality::two_table_comparator(haystack_tv, needle_tv, stream);
+      cudf::detail::row::equality::two_table_comparator(haystack_tv, needle_tv, stream);
 
-    auto const begin = cudf::experimental::row::lhs_iterator(0);
+    auto const begin = cudf::detail::row::lhs_iterator(0);
     auto const end   = begin + haystack.size();
-    using cudf::experimental::row::rhs_index_type;
+    using cudf::detail::row::rhs_index_type;
 
     auto const check_nulls      = haystack.has_nulls();
     auto const haystack_cdv_ptr = column_device_view::create(haystack, stream);
diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu
index b5d364a2590..e58bd5cc5e9 100644
--- a/cpp/src/search/contains_table.cu
+++ b/cpp/src/search/contains_table.cu
@@ -16,9 +16,9 @@
 
 #include "contains_table_impl.cuh"
 
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/search.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/primitive_row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/traits.hpp>
@@ -47,9 +47,9 @@ rmm::device_uvector<bool> contains(table_view const& haystack,
   auto const has_any_nulls      = haystack_has_nulls || needles_has_nulls;
 
   auto const preprocessed_needles =
-    cudf::experimental::row::equality::preprocessed_table::create(needles, stream);
+    cudf::detail::row::equality::preprocessed_table::create(needles, stream);
   auto const preprocessed_haystack =
-    cudf::experimental::row::equality::preprocessed_table::create(haystack, stream);
+    cudf::detail::row::equality::preprocessed_table::create(haystack, stream);
 
   // The output vector.
   auto contained = rmm::device_uvector<bool>(needles.num_rows(), stream, mr);
@@ -60,15 +60,15 @@ rmm::device_uvector<bool> contains(table_view const& haystack,
     std::any_of(haystack.begin(), haystack.end(), [](auto const& col) {
       return cudf::is_floating_point(col.type());
     });
-  if (cudf::is_primitive_row_op_compatible(haystack) && !has_floating_point) {
-    auto const d_haystack_hasher =
-      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_any_nulls}, preprocessed_haystack};
-    auto const d_needle_hasher =
-      cudf::row::primitive::row_hasher{nullate::DYNAMIC{has_any_nulls}, preprocessed_needles};
+  if (cudf::detail::is_primitive_row_op_compatible(haystack) && !has_floating_point) {
+    auto const d_haystack_hasher = cudf::detail::row::primitive::row_hasher{
+      nullate::DYNAMIC{has_any_nulls}, preprocessed_haystack};
+    auto const d_needle_hasher = cudf::detail::row::primitive::row_hasher{
+      nullate::DYNAMIC{has_any_nulls}, preprocessed_needles};
     auto const d_hasher     = hasher_adapter{d_haystack_hasher, d_needle_hasher};
-    auto const d_self_equal = cudf::row::primitive::row_equality_comparator{
+    auto const d_self_equal = cudf::detail::row::primitive::row_equality_comparator{
       nullate::DYNAMIC{has_any_nulls}, preprocessed_haystack, preprocessed_haystack, compare_nulls};
-    auto const d_two_table_equal = cudf::row::primitive::row_equality_comparator{
+    auto const d_two_table_equal = cudf::detail::row::primitive::row_equality_comparator{
       nullate::DYNAMIC{has_any_nulls}, preprocessed_needles, preprocessed_haystack, compare_nulls};
     auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal};
     perform_contains(haystack,
@@ -81,15 +81,14 @@ rmm::device_uvector<bool> contains(table_view const& haystack,
                      contained,
                      stream);
   } else {
-    auto const haystack_hasher   = cudf::experimental::row::hash::row_hasher(preprocessed_haystack);
+    auto const haystack_hasher   = cudf::detail::row::hash::row_hasher(preprocessed_haystack);
     auto const d_haystack_hasher = haystack_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls});
-    auto const needle_hasher     = cudf::experimental::row::hash::row_hasher(preprocessed_needles);
+    auto const needle_hasher     = cudf::detail::row::hash::row_hasher(preprocessed_needles);
     auto const d_needle_hasher   = needle_hasher.device_hasher(nullate::DYNAMIC{has_any_nulls});
     auto const d_hasher          = hasher_adapter{d_haystack_hasher, d_needle_hasher};
 
-    auto const self_equal =
-      cudf::experimental::row::equality::self_comparator(preprocessed_haystack);
-    auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator(
+    auto const self_equal = cudf::detail::row::equality::self_comparator(preprocessed_haystack);
+    auto const two_table_equal = cudf::detail::row::equality::two_table_comparator(
       preprocessed_needles, preprocessed_haystack);
 
     if (cudf::detail::has_nested_columns(haystack)) {
diff --git a/cpp/src/search/contains_table_impl.cu b/cpp/src/search/contains_table_impl.cu
index dcbd4855b73..5218b236a2f 100644
--- a/cpp/src/search/contains_table_impl.cu
+++ b/cpp/src/search/contains_table_impl.cu
@@ -16,16 +16,16 @@
 
 #include "contains_table_impl.cuh"
 
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <cuco/static_set.cuh>
 
 namespace cudf::detail {
 
-using cudf::experimental::row::lhs_index_type;
-using cudf::experimental::row::rhs_index_type;
+using cudf::detail::row::lhs_index_type;
+using cudf::detail::row::rhs_index_type;
 
 /**
  * @brief Build a row bitmask for the input table.
@@ -60,10 +60,9 @@ std::pair<rmm::device_buffer, bitmask_type const*> build_row_bitmask(table_view
 
 // Explicit instantiations for non-nested types (HasNested=false)
 using hasher_adapter_t = hasher_adapter<
-  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                   nullate::DYNAMIC>,
-  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                   nullate::DYNAMIC>>;
+  cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash, nullate::DYNAMIC>,
+  cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                             nullate::DYNAMIC>>;
 
 template void dispatch_nan_comparator<false, hasher_adapter_t>(
   table_view const& haystack,
@@ -73,20 +72,20 @@ template void dispatch_nan_comparator<false, hasher_adapter_t>(
   bool haystack_has_nulls,
   bool needles_has_nulls,
   bool has_any_nulls,
-  cudf::experimental::row::equality::self_comparator self_equal,
-  cudf::experimental::row::equality::two_table_comparator two_table_equal,
+  cudf::detail::row::equality::self_comparator self_equal,
+  cudf::detail::row::equality::two_table_comparator two_table_equal,
   hasher_adapter_t const& d_hasher,
   rmm::device_uvector<bool>& contained,
   rmm::cuda_stream_view stream);
 
 // For HasNested=false (non-nested columns) with nan_equal_comparator
-using nan_equal_self_comparator = cudf::experimental::row::equality::device_row_comparator<
+using nan_equal_self_comparator = cudf::detail::row::equality::device_row_comparator<
   false,
   cudf::nullate::DYNAMIC,
-  cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+  cudf::detail::row::equality::nan_equal_physical_equality_comparator>;
 
 using nan_equal_two_table_comparator =
-  cudf::experimental::row::equality::strong_index_comparator_adapter<nan_equal_self_comparator>;
+  cudf::detail::row::equality::strong_index_comparator_adapter<nan_equal_self_comparator>;
 
 using nan_equal_comparator_adapter =
   comparator_adapter<nan_equal_self_comparator, nan_equal_two_table_comparator>;
@@ -102,13 +101,13 @@ template void perform_contains(table_view const& haystack,
                                rmm::cuda_stream_view stream);
 
 // For HasNested=false (non-nested columns) with nan_unequal_comparator
-using nan_unequal_self_comparator = cudf::experimental::row::equality::device_row_comparator<
+using nan_unequal_self_comparator = cudf::detail::row::equality::device_row_comparator<
   false,
   cudf::nullate::DYNAMIC,
-  cudf::experimental::row::equality::physical_equality_comparator>;
+  cudf::detail::row::equality::physical_equality_comparator>;
 
 using nan_unequal_two_table_comparator =
-  cudf::experimental::row::equality::strong_index_comparator_adapter<nan_unequal_self_comparator>;
+  cudf::detail::row::equality::strong_index_comparator_adapter<nan_unequal_self_comparator>;
 
 using nan_unequal_comparator_adapter =
   comparator_adapter<nan_unequal_self_comparator, nan_unequal_two_table_comparator>;
diff --git a/cpp/src/search/contains_table_impl.cuh b/cpp/src/search/contains_table_impl.cuh
index dedf79fc0f7..b6036f7a49e 100644
--- a/cpp/src/search/contains_table_impl.cuh
+++ b/cpp/src/search/contains_table_impl.cuh
@@ -20,9 +20,9 @@
 
 #include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/search.hpp>
 #include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
@@ -35,8 +35,8 @@
 
 namespace cudf::detail {
 
-using cudf::experimental::row::lhs_index_type;
-using cudf::experimental::row::rhs_index_type;
+using cudf::detail::row::lhs_index_type;
+using cudf::detail::row::rhs_index_type;
 
 /**
  * @brief An hasher adapter wrapping both haystack hasher and needles hasher
@@ -206,19 +206,18 @@ void perform_contains(table_view const& haystack,
  * @param stream CUDA stream used for device memory operations and kernel launches
  */
 template <bool HasNested, typename Hasher>
-void dispatch_nan_comparator(
-  table_view const& haystack,
-  table_view const& needles,
-  null_equality compare_nulls,
-  nan_equality compare_nans,
-  bool haystack_has_nulls,
-  bool needles_has_nulls,
-  bool has_any_nulls,
-  cudf::experimental::row::equality::self_comparator self_equal,
-  cudf::experimental::row::equality::two_table_comparator two_table_equal,
-  Hasher const& d_hasher,
-  rmm::device_uvector<bool>& contained,
-  rmm::cuda_stream_view stream)
+void dispatch_nan_comparator(table_view const& haystack,
+                             table_view const& needles,
+                             null_equality compare_nulls,
+                             nan_equality compare_nans,
+                             bool haystack_has_nulls,
+                             bool needles_has_nulls,
+                             bool has_any_nulls,
+                             cudf::detail::row::equality::self_comparator self_equal,
+                             cudf::detail::row::equality::two_table_comparator two_table_equal,
+                             Hasher const& d_hasher,
+                             rmm::device_uvector<bool>& contained,
+                             rmm::cuda_stream_view stream)
 {
   // Distinguish probing scheme CG sizes between nested and flat types for better performance
   auto const probing_scheme = [&]() {
@@ -231,7 +230,7 @@ void dispatch_nan_comparator(
 
   if (compare_nans == nan_equality::ALL_EQUAL) {
     using nan_equal_comparator =
-      cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
+      cudf::detail::row::equality::nan_equal_physical_equality_comparator;
     auto const d_self_equal = self_equal.equal_to<HasNested>(
       nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_equal_comparator{});
     auto const d_two_table_equal = two_table_equal.equal_to<HasNested>(
@@ -247,7 +246,7 @@ void dispatch_nan_comparator(
                      contained,
                      stream);
   } else {
-    using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator;
+    using nan_unequal_comparator = cudf::detail::row::equality::physical_equality_comparator;
     auto const d_self_equal      = self_equal.equal_to<HasNested>(
       nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, nan_unequal_comparator{});
     auto const d_two_table_equal = two_table_equal.equal_to<HasNested>(
diff --git a/cpp/src/search/contains_table_impl_nested.cu b/cpp/src/search/contains_table_impl_nested.cu
index e90870508af..a3e5e59a34f 100644
--- a/cpp/src/search/contains_table_impl_nested.cu
+++ b/cpp/src/search/contains_table_impl_nested.cu
@@ -16,18 +16,17 @@
 
 #include "contains_table_impl.cuh"
 
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
 namespace cudf::detail {
 
 // Explicit instantiations to reduce build time
 using hasher_adapter_t = hasher_adapter<
-  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                   nullate::DYNAMIC>,
-  cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                   nullate::DYNAMIC>>;
+  cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash, nullate::DYNAMIC>,
+  cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                             nullate::DYNAMIC>>;
 
 template void dispatch_nan_comparator<true, hasher_adapter_t>(
   table_view const& haystack,
@@ -37,8 +36,8 @@ template void dispatch_nan_comparator<true, hasher_adapter_t>(
   bool haystack_has_nulls,
   bool needles_has_nulls,
   bool has_any_nulls,
-  cudf::experimental::row::equality::self_comparator self_equal,
-  cudf::experimental::row::equality::two_table_comparator two_table_equal,
+  cudf::detail::row::equality::self_comparator self_equal,
+  cudf::detail::row::equality::two_table_comparator two_table_equal,
   hasher_adapter_t const& d_hasher,
   rmm::device_uvector<bool>& contained,
   rmm::cuda_stream_view stream);
@@ -46,14 +45,13 @@ template void dispatch_nan_comparator<true, hasher_adapter_t>(
 // Explicit instantiations for perform_contains with nested types (experimental row operations)
 
 // For HasNested=true (nested columns) with nan_equal_comparator
-using nan_equal_self_comparator_nested = cudf::experimental::row::equality::device_row_comparator<
+using nan_equal_self_comparator_nested = cudf::detail::row::equality::device_row_comparator<
   true,
   cudf::nullate::DYNAMIC,
-  cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+  cudf::detail::row::equality::nan_equal_physical_equality_comparator>;
 
 using nan_equal_two_table_comparator_nested =
-  cudf::experimental::row::equality::strong_index_comparator_adapter<
-    nan_equal_self_comparator_nested>;
+  cudf::detail::row::equality::strong_index_comparator_adapter<nan_equal_self_comparator_nested>;
 
 using nan_equal_comparator_adapter_nested =
   comparator_adapter<nan_equal_self_comparator_nested, nan_equal_two_table_comparator_nested>;
@@ -69,14 +67,13 @@ template void perform_contains(table_view const& haystack,
                                rmm::cuda_stream_view stream);
 
 // For HasNested=true (nested columns) with nan_unequal_comparator
-using nan_unequal_self_comparator_nested = cudf::experimental::row::equality::device_row_comparator<
+using nan_unequal_self_comparator_nested = cudf::detail::row::equality::device_row_comparator<
   true,
   cudf::nullate::DYNAMIC,
-  cudf::experimental::row::equality::physical_equality_comparator>;
+  cudf::detail::row::equality::physical_equality_comparator>;
 
 using nan_unequal_two_table_comparator_nested =
-  cudf::experimental::row::equality::strong_index_comparator_adapter<
-    nan_unequal_self_comparator_nested>;
+  cudf::detail::row::equality::strong_index_comparator_adapter<nan_unequal_self_comparator_nested>;
 
 using nan_unequal_comparator_adapter_nested =
   comparator_adapter<nan_unequal_self_comparator_nested, nan_unequal_two_table_comparator_nested>;
diff --git a/cpp/src/search/contains_table_impl_primitive.cu b/cpp/src/search/contains_table_impl_primitive.cu
index 9d925321509..935483c1d81 100644
--- a/cpp/src/search/contains_table_impl_primitive.cu
+++ b/cpp/src/search/contains_table_impl_primitive.cu
@@ -16,17 +16,17 @@
 
 #include "contains_table_impl.cuh"
 
-#include <cudf/table/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
 
 namespace cudf::detail {
 
 // Explicit instantiation for perform_contains with primitive row operations
-using primitive_hasher_adapter_type =
-  hasher_adapter<cudf::row::primitive::row_hasher<>, cudf::row::primitive::row_hasher<>>;
+using primitive_hasher_adapter_type = hasher_adapter<cudf::detail::row::primitive::row_hasher<>,
+                                                     cudf::detail::row::primitive::row_hasher<>>;
 
 using primitive_comparator_adapter_type =
-  comparator_adapter<cudf::row::primitive::row_equality_comparator,
-                     cudf::row::primitive::row_equality_comparator>;
+  comparator_adapter<cudf::detail::row::primitive::row_equality_comparator,
+                     cudf::detail::row::primitive::row_equality_comparator>;
 
 template void perform_contains(
   table_view const& haystack,
diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu
index ac93e24b254..05cbd6eba2b 100644
--- a/cpp/src/search/search_ordered.cu
+++ b/cpp/src/search/search_ordered.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/detail/update_keys.hpp>
 #include <cudf/search.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -68,12 +68,12 @@ std::unique_ptr<column> search_ordered(table_view const& haystack,
   auto const& matched_haystack = matched.second.front();
   auto const& matched_needles  = matched.second.back();
 
-  auto const comparator = cudf::experimental::row::lexicographic::two_table_comparator(
+  auto const comparator = cudf::detail::row::lexicographic::two_table_comparator(
     matched_haystack, matched_needles, column_order, null_precedence, stream);
   auto const has_nulls = has_nested_nulls(matched_haystack) or has_nested_nulls(matched_needles);
 
-  auto const haystack_it = cudf::experimental::row::lhs_iterator(0);
-  auto const needles_it  = cudf::experimental::row::rhs_iterator(0);
+  auto const haystack_it = cudf::detail::row::lhs_iterator(0);
+  auto const needles_it  = cudf::detail::row::rhs_iterator(0);
 
   if (cudf::detail::has_nested_columns(haystack) || cudf::detail::has_nested_columns(needles)) {
     auto const d_comparator = comparator.less<true>(nullate::DYNAMIC{has_nulls});
diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu
index 39476a2f534..82cab3e4e67 100644
--- a/cpp/src/sort/is_sorted.cu
+++ b/cpp/src/sort/is_sorted.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
  */
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -41,7 +41,7 @@ bool is_sorted(cudf::table_view const& in,
                rmm::cuda_stream_view stream)
 {
   auto const comparator =
-    experimental::row::lexicographic::self_comparator{in, column_order, null_precedence, stream};
+    detail::row::lexicographic::self_comparator{in, column_order, null_precedence, stream};
 
   if (cudf::detail::has_nested_columns(in)) {
     auto const device_comparator = comparator.less<true>(has_nested_nulls(in));
diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu
index 35a9a3ec38d..f6ac2bc56f5 100644
--- a/cpp/src/sort/rank.cu
+++ b/cpp/src/sort/rank.cu
@@ -19,10 +19,10 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/utilities/functional.hpp>
 #include <cudf/sorting.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
@@ -75,7 +75,7 @@ rmm::device_uvector<size_type> sorted_dense_rank(column_view input_col,
                                                  rmm::cuda_stream_view stream)
 {
   auto const t_input    = table_view{{input_col}};
-  auto const comparator = cudf::experimental::row::equality::self_comparator{t_input, stream};
+  auto const comparator = cudf::detail::row::equality::self_comparator{t_input, stream};
 
   auto const sorted_index_order = thrust::make_permutation_iterator(
     sorted_order_view.begin<size_type>(), thrust::make_counting_iterator<size_type>(0));
diff --git a/cpp/src/sort/sort_column_impl.cuh b/cpp/src/sort/sort_column_impl.cuh
index dfc31e8b33e..a1a2e1d8d4e 100644
--- a/cpp/src/sort/sort_column_impl.cuh
+++ b/cpp/src/sort/sort_column_impl.cuh
@@ -19,7 +19,7 @@
 #include "sort.hpp"
 
 #include <cudf/column/column_device_view.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/traits.hpp>
diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh
index b7fdc517f26..4e0e4065f6c 100644
--- a/cpp/src/sort/sort_impl.cuh
+++ b/cpp/src/sort/sort_impl.cuh
@@ -90,8 +90,8 @@ std::unique_ptr<column> sorted_order(table_view input,
     }
   };
 
-  auto const comp = cudf::experimental::row::lexicographic::self_comparator(
-    input, column_order, null_precedence, stream);
+  auto const comp =
+    cudf::detail::row::lexicographic::self_comparator(input, column_order, null_precedence, stream);
   if (cudf::detail::has_nested_columns(input)) {
     auto const comparator = comp.less<true>(nullate::DYNAMIC{has_nested_nulls(input)});
     do_sort(comparator);
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index 9ab8ed5938a..8386b78b6cf 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,8 +21,8 @@
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -54,20 +54,20 @@ rmm::device_uvector<cudf::size_type> dispatch_row_equal(
   null_equality compare_nulls,
   nan_equality compare_nans,
   bool has_nulls,
-  cudf::experimental::row::equality::self_comparator row_equal,
+  cudf::detail::row::equality::self_comparator row_equal,
   Func&& func)
 {
   if (compare_nans == nan_equality::ALL_EQUAL) {
     auto const d_equal = row_equal.equal_to<HasNested>(
       nullate::DYNAMIC{has_nulls},
       compare_nulls,
-      cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+      cudf::detail::row::equality::nan_equal_physical_equality_comparator{});
     return func(d_equal);
   } else {
-    auto const d_equal = row_equal.equal_to<HasNested>(
-      nullate::DYNAMIC{has_nulls},
-      compare_nulls,
-      cudf::experimental::row::equality::physical_equality_comparator{});
+    auto const d_equal =
+      row_equal.equal_to<HasNested>(nullate::DYNAMIC{has_nulls},
+                                    compare_nulls,
+                                    cudf::detail::row::equality::physical_equality_comparator{});
     return func(d_equal);
   }
 }
@@ -87,12 +87,12 @@ rmm::device_uvector<size_type> distinct_indices(table_view const& input,
   }
 
   auto const preprocessed_input =
-    cudf::experimental::row::hash::preprocessed_table::create(input, stream);
+    cudf::detail::row::hash::preprocessed_table::create(input, stream);
   auto const has_nulls          = nullate::DYNAMIC{cudf::has_nested_nulls(input)};
   auto const has_nested_columns = cudf::detail::has_nested_columns(input);
 
-  auto const row_hash  = cudf::experimental::row::hash::row_hasher(preprocessed_input);
-  auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input);
+  auto const row_hash  = cudf::detail::row::hash::row_hasher(preprocessed_input);
+  auto const row_equal = cudf::detail::row::equality::self_comparator(preprocessed_input);
 
   auto const helper_func = [&](auto const& d_equal) {
     using RowEqual = std::decay_t<decltype(d_equal)>;
diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index 4f311d06de1..c4c3920d203 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -23,11 +23,11 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
@@ -136,11 +136,10 @@ cudf::size_type distinct_count(table_view const& keys,
   if (num_rows == 0) { return 0; }  // early exit for empty input
   auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(keys)};
 
-  auto const preprocessed_input =
-    cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
-  auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
-  auto const hash_key   = row_hasher.device_hasher(has_nulls);
-  auto const row_comp   = cudf::experimental::row::equality::self_comparator(preprocessed_input);
+  auto const preprocessed_input = cudf::detail::row::hash::preprocessed_table::create(keys, stream);
+  auto const row_hasher         = cudf::detail::row::hash::row_hasher(preprocessed_input);
+  auto const hash_key           = row_hasher.device_hasher(has_nulls);
+  auto const row_comp           = cudf::detail::row::equality::self_comparator(preprocessed_input);
 
   auto const comparator_helper = [&](auto const row_equal) {
     using hasher_type = decltype(hash_key);
diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu
index cf90476052d..2032d7005a1 100644
--- a/cpp/src/stream_compaction/distinct_helpers.cu
+++ b/cpp/src/stream_compaction/distinct_helpers.cu
@@ -101,40 +101,40 @@ rmm::device_uvector<size_type> reduce_by_row(distinct_set_t<RowEqual>& set,
 }
 
 template rmm::device_uvector<size_type> reduce_by_row(
-  distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
+  distinct_set_t<cudf::detail::row::equality::device_row_comparator<
     false,
     cudf::nullate::DYNAMIC,
-    cudf::experimental::row::equality::nan_equal_physical_equality_comparator>>& set,
+    cudf::detail::row::equality::nan_equal_physical_equality_comparator>>& set,
   size_type num_rows,
   duplicate_keep_option keep,
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr);
 
 template rmm::device_uvector<size_type> reduce_by_row(
-  distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
+  distinct_set_t<cudf::detail::row::equality::device_row_comparator<
     true,
     cudf::nullate::DYNAMIC,
-    cudf::experimental::row::equality::nan_equal_physical_equality_comparator>>& set,
+    cudf::detail::row::equality::nan_equal_physical_equality_comparator>>& set,
   size_type num_rows,
   duplicate_keep_option keep,
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr);
 
 template rmm::device_uvector<size_type> reduce_by_row(
-  distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
+  distinct_set_t<cudf::detail::row::equality::device_row_comparator<
     false,
     cudf::nullate::DYNAMIC,
-    cudf::experimental::row::equality::physical_equality_comparator>>& set,
+    cudf::detail::row::equality::physical_equality_comparator>>& set,
   size_type num_rows,
   duplicate_keep_option keep,
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr);
 
 template rmm::device_uvector<size_type> reduce_by_row(
-  distinct_set_t<cudf::experimental::row::equality::device_row_comparator<
+  distinct_set_t<cudf::detail::row::equality::device_row_comparator<
     true,
     cudf::nullate::DYNAMIC,
-    cudf::experimental::row::equality::physical_equality_comparator>>& set,
+    cudf::detail::row::equality::physical_equality_comparator>>& set,
   size_type num_rows,
   duplicate_keep_option keep,
   rmm::cuda_stream_view stream,
diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp
index 3339a4d6f34..d75b9cb9d12 100644
--- a/cpp/src/stream_compaction/distinct_helpers.hpp
+++ b/cpp/src/stream_compaction/distinct_helpers.hpp
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <cudf/detail/cuco_helpers.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
@@ -55,10 +55,10 @@ using distinct_set_t =
                    cuco::extent<int64_t>,
                    cuda::thread_scope_device,
                    RowEqual,
-                   cuco::linear_probing<1,
-                                        cudf::experimental::row::hash::device_row_hasher<
-                                          cudf::hashing::detail::default_hash,
-                                          cudf::nullate::DYNAMIC>>,
+                   cuco::linear_probing<
+                     1,
+                     cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                                cudf::nullate::DYNAMIC>>,
                    cudf::detail::cuco_allocator<char>,
                    cuco::storage<1>>;
 
diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu
index 94f4a8d86c3..e1b49cbb496 100644
--- a/cpp/src/stream_compaction/unique.cu
+++ b/cpp/src/stream_compaction/unique.cu
@@ -23,10 +23,10 @@
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -66,7 +66,7 @@ std::unique_ptr<table> unique(table_view const& input,
   auto mutable_view = mutable_column_device_view::create(*unique_indices, stream);
   auto keys_view    = input.select(keys);
 
-  auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream);
+  auto comp = cudf::detail::row::equality::self_comparator(keys_view, stream);
 
   size_type const unique_size = [&] {
     if (cudf::detail::has_nested_columns(keys_view)) {
diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu
index d842f63cd7b..3325c2d4107 100644
--- a/cpp/src/stream_compaction/unique_count.cu
+++ b/cpp/src/stream_compaction/unique_count.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,9 +15,9 @@
  */
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
@@ -36,7 +36,7 @@ cudf::size_type unique_count(table_view const& keys,
                              null_equality nulls_equal,
                              rmm::cuda_stream_view stream)
 {
-  auto const row_comp = cudf::experimental::row::equality::self_comparator(keys, stream);
+  auto const row_comp = cudf::detail::row::equality::self_comparator(keys, stream);
   if (cudf::detail::has_nested_columns(keys)) {
     auto const comp =
       row_comp.equal_to<true>(nullate::DYNAMIC{has_nested_nulls(keys)}, nulls_equal);
diff --git a/cpp/src/stream_compaction/unique_count_column.cu b/cpp/src/stream_compaction/unique_count_column.cu
index 61b2e24e477..20e3740a7bf 100644
--- a/cpp/src/stream_compaction/unique_count_column.cu
+++ b/cpp/src/stream_compaction/unique_count_column.cu
@@ -18,9 +18,9 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu
index e1a784a985e..e72eade3528 100644
--- a/cpp/src/transform/one_hot_encode.cu
+++ b/cpp/src/transform/one_hot_encode.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -48,8 +48,8 @@ struct ohe_equality_functor {
 
   auto __device__ operator()(size_type i) const noexcept
   {
-    auto const element_index  = cudf::experimental::row::lhs_index_type{i % _input_size};
-    auto const category_index = cudf::experimental::row::rhs_index_type{i / _input_size};
+    auto const element_index  = cudf::detail::row::lhs_index_type{i % _input_size};
+    auto const category_index = cudf::detail::row::rhs_index_type{i / _input_size};
     return _d_equal(element_index, category_index);
   }
 
@@ -79,10 +79,9 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const&
   auto all_encodings =
     make_numeric_column(data_type{type_id::BOOL8}, total_size, mask_state::UNALLOCATED, stream, mr);
 
-  auto const t_lhs = table_view{{input}};
-  auto const t_rhs = table_view{{categories}};
-  auto const comparator =
-    cudf::experimental::row::equality::two_table_comparator{t_lhs, t_rhs, stream};
+  auto const t_lhs      = table_view{{input}};
+  auto const t_rhs      = table_view{{categories}};
+  auto const comparator = cudf::detail::row::equality::two_table_comparator{t_lhs, t_rhs, stream};
 
   auto const comparator_helper = [&](auto const d_equal) {
     thrust::transform(rmm::exec_policy(stream),
diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu
index 0d9e4e27f2c..a1398d2f02d 100644
--- a/cpp/tests/table/experimental_row_operator_tests.cu
+++ b/cpp/tests/table/experimental_row_operator_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -48,7 +48,7 @@ std::unique_ptr<cudf::column> two_table_equality(cudf::table_view lhs,
                                                  PhysicalElementComparator comparator);
 template <typename PhysicalElementComparator>
 std::unique_ptr<cudf::column> sorted_order(
-  std::shared_ptr<cudf::experimental::row::lexicographic::preprocessed_table> preprocessed_input,
+  std::shared_ptr<cudf::detail::row::lexicographic::preprocessed_table> preprocessed_input,
   cudf::size_type num_rows,
   bool has_nested,
   PhysicalElementComparator comparator,
@@ -66,14 +66,14 @@ TYPED_TEST(TypedTableViewTest, TestLexicographicalComparatorTwoTables)
 
   auto const expected = cudf::test::fixed_width_column_wrapper<bool>{{1, 1, 0, 1}};
   auto const got      = two_table_comparison(
-    lhs, rhs, column_order, cudf::experimental::row::lexicographic::physical_element_comparator{});
+    lhs, rhs, column_order, cudf::detail::row::lexicographic::physical_element_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
-  auto const sorting_got = two_table_comparison(
-    lhs,
-    rhs,
-    column_order,
-    cudf::experimental::row::lexicographic::sorting_physical_element_comparator{});
+  auto const sorting_got =
+    two_table_comparison(lhs,
+                         rhs,
+                         column_order,
+                         cudf::detail::row::lexicographic::sorting_physical_element_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorting_got->view());
 }
 
@@ -86,16 +86,14 @@ TYPED_TEST(TypedTableViewTest, TestLexicographicalComparatorSameTable)
   auto const input_table  = cudf::table_view{{col1}};
 
   auto const expected = cudf::test::fixed_width_column_wrapper<bool>{{0, 0, 0, 0}};
-  auto const got =
-    self_comparison(input_table,
-                    column_order,
-                    cudf::experimental::row::lexicographic::physical_element_comparator{});
+  auto const got      = self_comparison(
+    input_table, column_order, cudf::detail::row::lexicographic::physical_element_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
   auto const sorting_got =
     self_comparison(input_table,
                     column_order,
-                    cudf::experimental::row::lexicographic::sorting_physical_element_comparator{});
+                    cudf::detail::row::lexicographic::sorting_physical_element_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorting_got->view());
 }
 
@@ -123,21 +121,21 @@ TYPED_TEST(TypedTableViewTest, TestSortSameTableFromTwoTables)
     auto const expected_lhs = int32s_col{3, 1, 4, 0, 2};
     test_sort(preprocessed_lhs,
               lhs,
-              cudf::experimental::row::lexicographic::physical_element_comparator{},
+              cudf::detail::row::lexicographic::physical_element_comparator{},
               expected_lhs);
     test_sort(preprocessed_lhs,
               lhs,
-              cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+              cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
               expected_lhs);
 
     auto const expected_empty_rhs = int32s_col{};
     test_sort(preprocessed_empty_rhs,
               empty_rhs,
-              cudf::experimental::row::lexicographic::physical_element_comparator{},
+              cudf::detail::row::lexicographic::physical_element_comparator{},
               expected_empty_rhs);
     test_sort(preprocessed_empty_rhs,
               empty_rhs,
-              cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+              cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
               expected_empty_rhs);
   };
 
@@ -146,13 +144,13 @@ TYPED_TEST(TypedTableViewTest, TestSortSameTableFromTwoTables)
   // produce exactly the same result.
   {
     auto const [preprocessed_lhs, preprocessed_empty_rhs] =
-      cudf::experimental::row::lexicographic::preprocessed_table::create(
+      cudf::detail::row::lexicographic::preprocessed_table::create(
         lhs, empty_rhs, std::vector{cudf::order::ASCENDING}, {}, stream);
     test_sort_two_tables(preprocessed_lhs, preprocessed_empty_rhs);
   }
   {
     auto const [preprocessed_empty_rhs, preprocessed_lhs] =
-      cudf::experimental::row::lexicographic::preprocessed_table::create(
+      cudf::detail::row::lexicographic::preprocessed_table::create(
         empty_rhs, lhs, std::vector{cudf::order::ASCENDING}, {}, stream);
     test_sort_two_tables(preprocessed_lhs, preprocessed_empty_rhs);
   }
@@ -201,23 +199,23 @@ TYPED_TEST(TypedTableViewTest, TestSortSameTableFromTwoTablesWithListsOfStructs)
     auto const expected_lhs = int32s_col{1, 0};
     test_sort(preprocessed_lhs,
               lhs,
-              cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+              cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
               expected_lhs);
 
     auto const expected_empty_rhs = int32s_col{};
     test_sort(preprocessed_empty_rhs,
               empty_rhs,
-              cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
+              cudf::detail::row::lexicographic::sorting_physical_element_comparator{},
               expected_empty_rhs);
 
     EXPECT_THROW(test_sort(preprocessed_lhs,
                            lhs,
-                           cudf::experimental::row::lexicographic::physical_element_comparator{},
+                           cudf::detail::row::lexicographic::physical_element_comparator{},
                            expected_lhs),
                  cudf::logic_error);
     EXPECT_THROW(test_sort(preprocessed_empty_rhs,
                            empty_rhs,
-                           cudf::experimental::row::lexicographic::physical_element_comparator{},
+                           cudf::detail::row::lexicographic::physical_element_comparator{},
                            expected_empty_rhs),
                  cudf::logic_error);
   };
@@ -227,13 +225,13 @@ TYPED_TEST(TypedTableViewTest, TestSortSameTableFromTwoTablesWithListsOfStructs)
   // produce exactly the same result.
   {
     auto const [preprocessed_lhs, preprocessed_empty_rhs] =
-      cudf::experimental::row::lexicographic::preprocessed_table::create(
+      cudf::detail::row::lexicographic::preprocessed_table::create(
         lhs, empty_rhs, std::vector{cudf::order::ASCENDING}, {}, stream);
     test_sort_two_tables(preprocessed_lhs, preprocessed_empty_rhs);
   }
   {
     auto const [preprocessed_empty_rhs, preprocessed_lhs] =
-      cudf::experimental::row::lexicographic::preprocessed_table::create(
+      cudf::detail::row::lexicographic::preprocessed_table::create(
         empty_rhs, lhs, std::vector{cudf::order::ASCENDING}, {}, stream);
     test_sort_two_tables(preprocessed_lhs, preprocessed_empty_rhs);
   }
@@ -257,15 +255,15 @@ TYPED_TEST(NaNTableViewTest, TestLexicographicalComparatorTwoTableNaNCase)
 
   auto const expected = cudf::test::fixed_width_column_wrapper<bool>{{0, 0, 0, 0}};
   auto const got      = two_table_comparison(
-    lhs, rhs, column_order, cudf::experimental::row::lexicographic::physical_element_comparator{});
+    lhs, rhs, column_order, cudf::detail::row::lexicographic::physical_element_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
   auto const sorting_expected = cudf::test::fixed_width_column_wrapper<bool>{{0, 1, 0, 0}};
-  auto const sorting_got      = two_table_comparison(
-    lhs,
-    rhs,
-    column_order,
-    cudf::experimental::row::lexicographic::sorting_physical_element_comparator{});
+  auto const sorting_got =
+    two_table_comparison(lhs,
+                         rhs,
+                         column_order,
+                         cudf::detail::row::lexicographic::sorting_physical_element_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorting_expected, sorting_got->view());
 }
 
@@ -282,14 +280,11 @@ TYPED_TEST(NaNTableViewTest, TestEqualityComparatorTwoTableNaNCase)
 
   auto const expected = cudf::test::fixed_width_column_wrapper<bool>{{0, 0, 0, 1}};
   auto const got      = two_table_equality(
-    lhs, rhs, column_order, cudf::experimental::row::equality::physical_equality_comparator{});
+    lhs, rhs, column_order, cudf::detail::row::equality::physical_equality_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
   auto const nan_equal_expected = cudf::test::fixed_width_column_wrapper<bool>{{1, 0, 0, 1}};
-  auto const nan_equal_got =
-    two_table_equality(lhs,
-                       rhs,
-                       column_order,
-                       cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+  auto const nan_equal_got      = two_table_equality(
+    lhs, rhs, column_order, cudf::detail::row::equality::nan_equal_physical_equality_comparator{});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(nan_equal_expected, nan_equal_got->view());
 }
diff --git a/cpp/tests/table/row_operator_tests_utilities.cu b/cpp/tests/table/row_operator_tests_utilities.cu
index 6127864987d..4549cd361c0 100644
--- a/cpp/tests/table/row_operator_tests_utilities.cu
+++ b/cpp/tests/table/row_operator_tests_utilities.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,10 +34,10 @@ std::unique_ptr<cudf::column> two_table_comparison(cudf::table_view lhs,
 {
   rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
-  auto const table_comparator = cudf::experimental::row::lexicographic::two_table_comparator{
-    lhs, rhs, column_order, {}, stream};
-  auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
-  auto const rhs_it = cudf::experimental::row::rhs_iterator(0);
+  auto const table_comparator =
+    cudf::detail::row::lexicographic::two_table_comparator{lhs, rhs, column_order, {}, stream};
+  auto const lhs_it = cudf::detail::row::lhs_iterator(0);
+  auto const rhs_it = cudf::detail::row::rhs_iterator(0);
 
   auto output = cudf::make_numeric_column(
     cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);
@@ -73,7 +73,7 @@ template std::unique_ptr<cudf::column> two_table_comparison<sorting_comparator_t
 
 template <typename PhysicalElementComparator>
 std::unique_ptr<cudf::column> sorted_order(
-  std::shared_ptr<cudf::experimental::row::lexicographic::preprocessed_table> preprocessed_input,
+  std::shared_ptr<cudf::detail::row::lexicographic::preprocessed_table> preprocessed_input,
   cudf::size_type num_rows,
   bool has_nested,
   PhysicalElementComparator comparator,
@@ -87,7 +87,7 @@ std::unique_ptr<cudf::column> sorted_order(
   thrust::sequence(rmm::exec_policy(stream), out_begin, out_begin + num_rows, 0);
 
   auto const table_comparator =
-    cudf::experimental::row::lexicographic::self_comparator{preprocessed_input};
+    cudf::detail::row::lexicographic::self_comparator{preprocessed_input};
   if (has_nested) {
     auto const comp = table_comparator.less<true>(cudf::nullate::NO{}, comparator);
     thrust::stable_sort(rmm::exec_policy(stream), out_begin, out_begin + num_rows, comp);
@@ -100,13 +100,13 @@ std::unique_ptr<cudf::column> sorted_order(
 }
 
 template std::unique_ptr<cudf::column> sorted_order<physical_comparator_t>(
-  std::shared_ptr<cudf::experimental::row::lexicographic::preprocessed_table> preprocessed_input,
+  std::shared_ptr<cudf::detail::row::lexicographic::preprocessed_table> preprocessed_input,
   cudf::size_type num_rows,
   bool has_nested,
   physical_comparator_t comparator,
   rmm::cuda_stream_view stream);
 template std::unique_ptr<cudf::column> sorted_order<sorting_comparator_t>(
-  std::shared_ptr<cudf::experimental::row::lexicographic::preprocessed_table> preprocessed_input,
+  std::shared_ptr<cudf::detail::row::lexicographic::preprocessed_table> preprocessed_input,
   cudf::size_type num_rows,
   bool has_nested,
   sorting_comparator_t comparator,
@@ -120,11 +120,10 @@ std::unique_ptr<cudf::column> two_table_equality(cudf::table_view lhs,
 {
   rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
-  auto const table_comparator =
-    cudf::experimental::row::equality::two_table_comparator{lhs, rhs, stream};
+  auto const table_comparator = cudf::detail::row::equality::two_table_comparator{lhs, rhs, stream};
 
-  auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
-  auto const rhs_it = cudf::experimental::row::rhs_iterator(0);
+  auto const lhs_it = cudf::detail::row::lhs_iterator(0);
+  auto const rhs_it = cudf::detail::row::rhs_iterator(0);
 
   auto output = cudf::make_numeric_column(
     cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);
diff --git a/cpp/tests/table/row_operator_tests_utilities.hpp b/cpp/tests/table/row_operator_tests_utilities.hpp
index 023a54669b4..34bde81ac41 100644
--- a/cpp/tests/table/row_operator_tests_utilities.hpp
+++ b/cpp/tests/table/row_operator_tests_utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,18 +15,17 @@
  */
 #pragma once
 
-#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
 #include <vector>
 
-using physical_comparator_t = cudf::experimental::row::lexicographic::physical_element_comparator;
-using sorting_comparator_t =
-  cudf::experimental::row::lexicographic::sorting_physical_element_comparator;
-using physical_equality_t = cudf::experimental::row::equality::physical_equality_comparator;
-using nan_equality_t = cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
+using physical_comparator_t = cudf::detail::row::lexicographic::physical_element_comparator;
+using sorting_comparator_t  = cudf::detail::row::lexicographic::sorting_physical_element_comparator;
+using physical_equality_t   = cudf::detail::row::equality::physical_equality_comparator;
+using nan_equality_t        = cudf::detail::row::equality::nan_equal_physical_equality_comparator;
 
 template <typename PhysicalElementComparator>
 std::unique_ptr<cudf::column> self_comparison(cudf::table_view input,
@@ -44,7 +43,7 @@ std::unique_ptr<cudf::column> two_table_equality(cudf::table_view lhs,
                                                  PhysicalElementComparator comparator);
 template <typename PhysicalElementComparator>
 std::unique_ptr<cudf::column> sorted_order(
-  std::shared_ptr<cudf::experimental::row::lexicographic::preprocessed_table> preprocessed_input,
+  std::shared_ptr<cudf::detail::row::lexicographic::preprocessed_table> preprocessed_input,
   cudf::size_type num_rows,
   bool has_nested,
   PhysicalElementComparator comparator,
diff --git a/cpp/tests/table/row_operator_tests_utilities2.cu b/cpp/tests/table/row_operator_tests_utilities2.cu
index 17d274eba13..d85ba0385be 100644
--- a/cpp/tests/table/row_operator_tests_utilities2.cu
+++ b/cpp/tests/table/row_operator_tests_utilities2.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@ std::unique_ptr<cudf::column> self_comparison(cudf::table_view input,
   rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto const table_comparator =
-    cudf::experimental::row::lexicographic::self_comparator{input, column_order, {}, stream};
+    cudf::detail::row::lexicographic::self_comparator{input, column_order, {}, stream};
 
   auto output = cudf::make_numeric_column(
     cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED);
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index eab283ae6be..8e777e1ba60 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -25,10 +25,10 @@
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/get_value.cuh>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/lists/list_view.hpp>
 #include <cudf/structs/struct_view.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/type_checks.hpp>
@@ -364,8 +364,8 @@ class corresponding_rows_unequal {
 
   __device__ bool operator()(size_type index)
   {
-    using cudf::experimental::row::lhs_index_type;
-    using cudf::experimental::row::rhs_index_type;
+    using cudf::detail::row::lhs_index_type;
+    using cudf::detail::row::rhs_index_type;
 
     return !comp(lhs_index_type{lhs_row_indices.element<size_type>(index)},
                  rhs_index_type{rhs_row_indices.element<size_type>(index)});
@@ -443,8 +443,8 @@ class corresponding_rows_not_equivalent {
 
   __device__ bool operator()(size_type index)
   {
-    using cudf::experimental::row::lhs_index_type;
-    using cudf::experimental::row::rhs_index_type;
+    using cudf::detail::row::lhs_index_type;
+    using cudf::detail::row::rhs_index_type;
 
     auto const lhs_index = lhs_row_indices.element<size_type>(index);
     auto const rhs_index = rhs_row_indices.element<size_type>(index);
@@ -524,7 +524,7 @@ struct column_comparator_impl {
     auto lhs_tview = table_view{{lhs}};
     auto rhs_tview = table_view{{rhs}};
 
-    auto const comparator = cudf::experimental::row::equality::two_table_comparator{
+    auto const comparator = cudf::detail::row::equality::two_table_comparator{
       lhs_tview, rhs_tview, cudf::test::get_default_stream()};
     auto const has_nulls = cudf::has_nulls(lhs_tview) or cudf::has_nulls(rhs_tview);
 

From a889ea617d5d21e903d2ab41b6c252d67df52050 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 4 Sep 2025 17:29:41 -0700
Subject: [PATCH 261/366] Use cupy arrays instead of numba device arrays for
 cuDF classic intermediates (#19855)

Commonly `ColumnBase.data_array_view`, which returned a numba arrays, was subsequently converted to a cupy array for further use. This PR refactors `data_array_view` to instead return a cupy array and convert to a numba array when needed.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19855
---
 python/cudf/cudf/core/column/categorical.py   |  8 +-
 python/cudf/cudf/core/column/column.py        | 97 +++++++------------
 python/cudf/cudf/core/frame.py                |  2 +-
 python/cudf/cudf/core/groupby/groupby.py      |  2 +-
 .../cudf/tests/series/test_constructors.py    |  8 +-
 python/cudf/cudf/tests/series/test_repr.py    | 10 +-
 python/cudf/cudf/utils/applyutils.py          | 22 +++--
 python/cudf/cudf/utils/queryutils.py          |  5 +-
 8 files changed, 62 insertions(+), 92 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 3b23265d2c3..648416dc388 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -4,7 +4,7 @@
 
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
 import numpy as np
 import pandas as pd
@@ -31,7 +31,7 @@
 if TYPE_CHECKING:
     from collections.abc import Mapping, MutableSequence, Sequence
 
-    import numba.cuda
+    import cupy as cp
 
     from cudf._typing import (
         ColumnBinaryOperand,
@@ -411,8 +411,8 @@ def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self:
         )
 
     def data_array_view(
-        self, *, mode="write"
-    ) -> numba.cuda.devicearray.DeviceNDArray:
+        self, *, mode: Literal["write", "read"] = "write"
+    ) -> cp.ndarray:
         return self.codes.data_array_view(mode=mode)
 
     def unique(self) -> Self:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7fecde15bc1..5b56df7522c 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -11,12 +11,11 @@
 from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any, Literal, cast
 
-import cupy
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
-from numba import cuda
 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
 from typing_extensions import Self
 
@@ -368,8 +367,8 @@ def set_mask(self, value) -> Self:
             else:
                 if cai["typestr"] not in ("|i1", "|u1"):
                     if isinstance(value, ColumnBase):
-                        value = value.data_array_view(mode="write")
-                    value = cupy.asarray(value).view("|u1")
+                        value = value.values
+                    value = cp.asarray(value).view("|u1")
                 mask = as_buffer(value)
             if mask.size < required_num_bytes:
                 raise ValueError(error_msg.format(str(value.size)))
@@ -657,7 +656,7 @@ def from_cuda_array_interface(cls, arbitrary: Any) -> Self:
 
     def data_array_view(
         self, *, mode: Literal["write", "read"] = "write"
-    ) -> "cuda.devicearray.DeviceNDArray":
+    ) -> cp.ndarray:
         """
         View the data as a device array object
 
@@ -675,7 +674,7 @@ def data_array_view(
 
         Returns
         -------
-        numba.cuda.cudadrv.devicearray.DeviceNDArray
+        cupy.ndarray
         """
         if self.data is not None:
             if mode == "read":
@@ -690,49 +689,12 @@ def data_array_view(
                 raise ValueError(f"Unsupported mode: {mode}")
         else:
             obj = None
-        if cudf.get_option("mode.pandas_compatible"):
-            return cuda.as_cuda_array(obj).view(
-                getattr(self.dtype, "numpy_dtype", self.dtype)
-            )
-        else:
-            return cuda.as_cuda_array(obj).view(self.dtype)
 
-    def mask_array_view(
-        self, *, mode: Literal["write", "read"] = "write"
-    ) -> "cuda.devicearray.DeviceNDArray":
-        """
-        View the mask as a device array
-
-        Parameters
-        ----------
-        mode : str, default 'write'
-            Supported values are {'read', 'write'}
-            If 'write' is passed, a device array object
-            with readonly flag set to False in CAI is returned.
-            If 'read' is passed, a device array object
-            with readonly flag set to True in CAI is returned.
-            This also means, If the caller wishes to modify
-            the data returned through this view, they must
-            pass mode="write", else pass mode="read".
-
-        Returns
-        -------
-        numba.cuda.cudadrv.devicearray.DeviceNDArray
-        """
-        if self.mask is not None:
-            if mode == "read":
-                obj = cuda_array_interface_wrapper(
-                    ptr=self.mask.get_ptr(mode="read"),
-                    size=self.mask.size,
-                    owner=self.mask,
-                )
-            elif mode == "write":
-                obj = self.mask
-            else:
-                raise ValueError(f"Unsupported mode: {mode}")
+        if cudf.get_option("mode.pandas_compatible"):
+            dtype = getattr(self.dtype, "numpy_dtype", self.dtype)
         else:
-            obj = None
-        return cuda.as_cuda_array(obj).view(SIZE_TYPE_DTYPE)
+            dtype = self.dtype
+        return cp.asarray(obj).view(dtype)
 
     def __len__(self) -> int:
         return self.size
@@ -819,27 +781,27 @@ def values_host(self) -> np.ndarray:
             )
             col = col.fillna(np.nan)
             with acquire_spill_lock():
-                res = col.data_array_view(mode="read").copy_to_host()
+                res = col.data_array_view(mode="read").get()
             return res
 
         if self.has_nulls():
             raise ValueError("Column must have no nulls.")
 
         with acquire_spill_lock():
-            return self.data_array_view(mode="read").copy_to_host()
+            return self.data_array_view(mode="read").get()
 
     @property
-    def values(self) -> cupy.ndarray:
+    def values(self) -> cp.ndarray:
         """
         Return a CuPy representation of the Column.
         """
         if len(self) == 0:
-            return cupy.array([], dtype=self.dtype)
+            return cp.array([], dtype=self.dtype)
 
         if self.has_nulls():
             raise ValueError("Column must have no nulls.")
 
-        return cupy.asarray(self.data_array_view(mode="write"))
+        return self.data_array_view(mode="write")
 
     def find_and_replace(
         self,
@@ -1082,11 +1044,16 @@ def shift(self, offset: int, fill_value: ScalarLike) -> Self:
         return type(self).from_pylibcudf(plc_col)  # type: ignore[return-value]
 
     @property
-    def nullmask(self) -> Buffer:
+    def nullmask(self) -> cp.ndarray:
         """The gpu buffer for the null-mask"""
         if not self.nullable:
             raise ValueError("Column has no null mask")
-        return self.mask_array_view(mode="read")
+        obj = cuda_array_interface_wrapper(
+            ptr=self.mask.get_ptr(mode="read"),  # type: ignore[union-attr]
+            size=self.mask.size,  # type: ignore[union-attr]
+            owner=self.mask,  # type: ignore[union-attr]
+        )
+        return cp.asarray(obj).view(SIZE_TYPE_DTYPE)
 
     def copy(self, deep: bool = True) -> Self:
         """
@@ -1558,12 +1525,12 @@ def interpolate(self, index: Index) -> ColumnBase:
         valid_locs = ~mask
         if isinstance(index, RangeIndex):
             # Each point is evenly spaced, index values don't matter
-            known_x = cupy.flatnonzero(valid_locs.values)
+            known_x = cp.flatnonzero(valid_locs.values)
         else:
             known_x = index._column.apply_boolean_mask(valid_locs).values  # type: ignore[attr-defined]
         known_y = self.apply_boolean_mask(valid_locs).values
 
-        result = cupy.interp(index.to_cupy(), known_x, known_y)
+        result = cp.interp(index.to_cupy(), known_x, known_y)
 
         first_nan_idx = valid_locs.values.argmax().item()
         result[:first_nan_idx] = np.nan
@@ -2555,13 +2522,15 @@ def column_empty(
     dtype : Dtype
         Type of the column.
     """
-    if isinstance(dtype, (StructDtype, ListDtype)):
-        if isinstance(dtype, StructDtype):
+    if (is_struct := isinstance(dtype, StructDtype)) or isinstance(
+        dtype, ListDtype
+    ):
+        if is_struct:
             children = tuple(
                 column_empty(row_count, field_dtype)
                 for field_dtype in dtype.fields.values()
             )
-        elif isinstance(dtype, ListDtype):
+        else:
             children = (
                 as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),
                 column_empty(row_count, dtype=dtype.element_type),
@@ -2767,9 +2736,9 @@ def maybe_reshape(
 ) -> Any:
     """Reshape ndarrays compatible with cuDF columns."""
     if len(shape) == 0:
-        arbitrary = cupy.asarray(arbitrary)[np.newaxis]
+        arbitrary = cp.asarray(arbitrary)[np.newaxis]
     if not plc.column.is_c_contiguous(shape, strides, dtype.itemsize):
-        arbitrary = cupy.ascontiguousarray(arbitrary)
+        arbitrary = cp.ascontiguousarray(arbitrary)
     return arbitrary
 
 
@@ -2989,7 +2958,7 @@ def as_column(
                 # not supported by cupy
                 arbitrary = np.asarray(arbitrary)
             else:
-                arbitrary = cupy.asarray(arbitrary)
+                arbitrary = cp.asarray(arbitrary)
             return as_column(
                 arbitrary, nan_as_null=nan_as_null, dtype=dtype, length=length
             )
@@ -3297,10 +3266,10 @@ def as_column(
             if is_column_like(element):
                 # e.g. test_nested_series_from_sequence_data
                 return cudf.core.column.ListColumn.from_sequences(arbitrary)
-            elif isinstance(element, cupy.ndarray):
+            elif isinstance(element, cp.ndarray):
                 # e.g. test_series_from_cupy_scalars
                 return as_column(
-                    cupy.array(arbitrary),
+                    cp.array(arbitrary),
                     dtype=dtype,
                     nan_as_null=nan_as_null,
                     length=length,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 7caa5826f2a..258ca64daed 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1565,7 +1565,7 @@ def searchsorted(
 
         # Return result as cupy array if the values is non-scalar
         # If values is scalar, result is expected to be scalar.
-        result = cupy.asarray(outcol.data_array_view(mode="read"))
+        result = outcol.values
         if scalar_flag:
             return result[0].item()
         else:
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index db79e26782f..cae2fdb9d36 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1593,7 +1593,7 @@ def sample(
                         [plc.types.NullOrder.AFTER],
                     )
                     indices = ColumnBase.from_pylibcudf(plc_table.columns()[0])
-                indices = cp.asarray(indices.data_array_view(mode="read"))
+                indices = indices.data_array_view(mode="read")
             # Which indices are we going to want?
             want = np.arange(samples_per_group.sum(), dtype=SIZE_TYPE_DTYPE)
             scan = np.empty_like(samples_per_group)
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index 1074006be2f..df1c007ada5 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -730,7 +730,9 @@ def test_to_from_arrow_nulls(all_supported_types_as_str):
     # number of bytes, so only check the first byte in this case
     np.testing.assert_array_equal(
         np.asarray(s1.buffers()[0]).view("u1")[0],
-        gs1._column.mask_array_view(mode="read").copy_to_host().view("u1")[0],
+        cp.asarray(gs1._column.to_pylibcudf(mode="read").null_mask())
+        .get()
+        .view("u1")[0],
     )
     assert pa.Array.equals(s1, gs1.to_arrow())
 
@@ -741,7 +743,9 @@ def test_to_from_arrow_nulls(all_supported_types_as_str):
     # number of bytes, so only check the first byte in this case
     np.testing.assert_array_equal(
         np.asarray(s2.buffers()[0]).view("u1")[0],
-        gs2._column.mask_array_view(mode="read").copy_to_host().view("u1")[0],
+        cp.asarray(gs2._column.to_pylibcudf(mode="read").null_mask())
+        .get()
+        .view("u1")[0],
     )
     assert pa.Array.equals(s2, gs2.to_arrow())
 
diff --git a/python/cudf/cudf/tests/series/test_repr.py b/python/cudf/cudf/tests/series/test_repr.py
index 1bf01cd94da..303e78d7c90 100644
--- a/python/cudf/cudf/tests/series/test_repr.py
+++ b/python/cudf/cudf/tests/series/test_repr.py
@@ -9,7 +9,6 @@
 from hypothesis import given, settings, strategies as st
 
 import cudf
-from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
 
 @pytest.mark.parametrize("nrows", [0, 5, 10])
@@ -29,14 +28,7 @@ def test_null_series(nrows, all_supported_types_as_str, request):
     if all_supported_types_as_str != "category" and cudf.dtype(
         all_supported_types_as_str
     ).kind in {"u", "i"}:
-        ps = pd.Series(
-            sr._column.data_array_view(mode="read").copy_to_host(),
-            dtype=np_dtypes_to_pandas_dtypes.get(
-                cudf.dtype(all_supported_types_as_str),
-                cudf.dtype(all_supported_types_as_str),
-            ),
-        )
-        ps[sr.isnull().to_pandas()] = pd.NA
+        ps = sr.to_pandas(nullable=True)
     else:
         ps = sr.to_pandas()
 
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index 712ef3fb654..fc88d6465a2 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -146,20 +146,24 @@ def run(self, df, **launch_params):
         # Get input columns
         if isinstance(self.incols, dict):
             inputs = {
-                v: df[k]._column.data_array_view(mode="read")
+                v: cuda.as_cuda_array(
+                    df[k]._column.data_array_view(mode="read")
+                )
                 for (k, v) in self.incols.items()
             }
         else:
             inputs = {
-                k: df[k]._column.data_array_view(mode="read")
+                k: cuda.as_cuda_array(
+                    df[k]._column.data_array_view(mode="read")
+                )
                 for k in self.incols
             }
         # Allocate output columns
-        outputs = {}
-        for k, dt in self.outcols.items():
-            outputs[k] = column.column_empty(
-                len(df), np.dtype(dt)
-            ).data_array_view(mode="write")
+        outputs = {
+            k: cuda.device_array(len(df), dtype=np.dtype(dt))
+            for k, dt in self.outcols.items()
+        }
+
         # Bind argument
         args = {}
         for dct in [inputs, outputs, self.kwargs]:
@@ -179,9 +183,7 @@ def run(self, df, **launch_params):
                 outputs[k], index=outdf.index, nan_as_null=False
             )
             if out_mask is not None:
-                outdf._data[k] = outdf[k]._column.set_mask(
-                    out_mask.data_array_view(mode="write")
-                )
+                outdf._data[k] = outdf[k]._column.set_mask(out_mask.values)
 
         return outdf
 
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index a9f45d87e74..aa01e2f7c22 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -223,7 +223,10 @@ def query_execute(df, expr, callenv):
             "query only supports numeric, datetime, timedelta, or bool dtypes."
         )
 
-    colarrays = [col.data_array_view(mode="read") for col in colarrays]
+    colarrays = [
+        cuda.as_cuda_array(col.data_array_view(mode="read"))
+        for col in colarrays
+    ]
 
     kernel = compiled["kernel"]
     # process env args

From 532e6b967ff9ae5643c463578b47d76956f38f7f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 4 Sep 2025 19:24:47 -0700
Subject: [PATCH 262/366] Make sure conftest fixture data is valid on exit
 (#19889)

As pylibcudf is working to enable stream-ordered APIs and we add tests accordingly, those tests will all be running on non-default streams. Since we create those streams using rmm's APIs, by default they will be non-blocking streams that do not synchronize with the default stream. For such tests to be valid, any fixtures used in those tests must synchronize the streams used to create those fixtures before the tests queue up any work on the new streams (either synchronize the stream or enqueue an event on that stream for the test stream to wait on, but the latter is more complicated and probably unnecessary). Doing so ensures valid data since the host thread will block on the first synchronization, which will occur before any work is queued on the new stream that could use data on the old one.

We've been seeing the `io/test_json.py::test_write_json_basic[100-source_or_sink1-False-100-stream1]` pylibcudf test fail intermittently. The failure is always in "wheel-tests-cudf / 12.9.1, 3.13, arm64, ubuntu22.04, a100, latest-driver, latest-deps". Based on the matrix of tests that we run in PRs for conda and wheels, we have seen both x86 + Python 3.13 and arm + Python 3.12 succeed, and we've seen the same driver and hardware also pass with other matrix runs, so it's not immediately clear what variable or combination of variables is implicated. We have attempted to reproduce it consistently in CI in https://github.com/rapidsai/cudf/pull/19865, but have yet to find a way to see it happen regularly. Here are some previous runs showing the error:
- https://github.com/rapidsai/cudf/actions/runs/17078533043/job/48428636573?pr=19738
- https://github.com/rapidsai/cudf/actions/runs/17088133288/job/48458607661?pr=19729
- https://github.com/rapidsai/cudf/actions/runs/17078069473/job/48427585028
- https://github.com/rapidsai/cudf/actions/runs/17108385574/job/48525491249?pr=19743#step:11:416

The only consistent fact is that the failing test is the first one in the test_json.py file to run on a non-default stream. That makes stream-ordering a very likely culprit. Upon inspection of the test suite, I noticed the lack of synchronization of the streams. I don't know for sure if this is the problem, but it seems like a plausible culprit. If we stop seeing this failure consistently once this PR merges, then we can go through and update the rest of our fixtures as well (we should do that anyway, but I want this PR in to see if it resolves the JSON test issue).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19889
---
 python/pylibcudf/tests/conftest.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/pylibcudf/tests/conftest.py b/python/pylibcudf/tests/conftest.py
index 24f9d79aa7f..6ad83b7e043 100644
--- a/python/pylibcudf/tests/conftest.py
+++ b/python/pylibcudf/tests/conftest.py
@@ -145,9 +145,13 @@ def _generate_nested_data(typ):
 
     pa_table = pa.Table.from_pydict(table_dict)
 
-    return plc.io.TableWithMetadata(
+    # TODO: Once interop APIs support a stream we should pass one and synchronize on it
+    # to avoid syncing the default stream here.
+    plc_table = plc.io.TableWithMetadata(
         plc.Table.from_arrow(pa_table), column_names=colnames
-    ), pa_table
+    )
+    plc.utils.DEFAULT_STREAM.synchronize()
+    return plc_table, pa_table
 
 
 @pytest.fixture(scope="session", params=[0, 100])

From 5e1e57005267063ffd5830d405659eb698a9c68e Mon Sep 17 00:00:00 2001
From: Chong Gao <gaochong.gc@qq.com>
Date: Fri, 5 Sep 2025 16:52:20 +0800
Subject: [PATCH 263/366] Add an option to support reading ORC timestamp column
 as UTC time. (#19773)

Add an option to support ignoring the writer timezone in the stripe footer: `ignore_timezone_in_stripe_footer`
contributes to https://github.com/NVIDIA/spark-rapids/issues/12882

## Details

This PR is just adding an option, and does not impact the existing code, so will not add any test case.

For the ORC Java reading, it uses the writer timezones in the stripe footers in the ORC file, and it uses the JVM default timezone as reader timezone.

If the `writerTimezone` and reader timezone are the same,  a simple addition operation can supports all the timezones.

[ORC Java writing logic](https://github.com/NVIDIA/spark-rapids/pull/13052#issuecomment-3219334970):
- Get the local time of 2025-01-01 in the write timezone, get the base timestamp.
- Get the UTC timestamp for the value to write.
- Apply the diff of the above 2 values.

So the steps of read are like:
1. read ORC file as UTC timezone(This PR)
2. get the diff for 2025-01-01 between write timezone and UTC timezone.
```scala
  /**
   * Get the offset in microseconds for 2025-01-01 between JVM timezone and UTC timezone.
   * @param jvmTz the JVM timezone to calculate the offset for
   * @return the offset in microseconds
   *         between the JVM timezone and UTC timezone for 2025-01-01
   *         This is used to rebase the timestamp columns in the input table.
   */
  private def getOffsetForJanuaryFirst2015(jvmTz: ZoneId): Long = {
    val t1 = LocalDateTime.of(2015, 1, 1, 0, 0, 0).atZone(jvmTz).toInstant.getEpochSecond
    val t2 = LocalDateTime.of(2015, 1, 1, 0, 0, 0).atZone(ZoneId.of("UTC")).toInstant.getEpochSecond
    val diffMicros: Long = (t2 - t1) * 1000000L // convert seconds to microseconds
    diffMicros
  }
```
3. timestamp column adds the diff.

## why introduce this config:
Spark-Rapids is using own [timezone kernel utilities](https://github.com/NVIDIA/spark-rapids-jni/blob/branch-25.10/src/main/cpp/src/timezones.cu) to do timezone rebase to match Java timezone behavior, and in ORC Java reading, it also using Java timezone library to do rebase: [convertBetweenTimezones](https://github.com/apache/orc/blob/rel/release-1.9.1/java/core/src/java/org/apache/orc/impl/SerializationUtils.java#L1440).
cuDF can not know the reader timezone, it's the JVM default timezone.
So, Spark-Rapids will do all the timezone rebase work in own repo.

Authors:
  - Chong Gao (https://github.com/res-life)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/19773
---
 cpp/include/cudf/io/orc.hpp                   | 25 +++++++++++++++++++
 cpp/src/io/orc/reader_impl.cu                 |  1 +
 cpp/src/io/orc/reader_impl.hpp                |  3 ++-
 cpp/src/io/orc/reader_impl_chunking.cu        |  7 +++---
 .../java/ai/rapids/cudf/ORCChunkedReader.java | 13 ++++++----
 .../main/java/ai/rapids/cudf/ORCOptions.java  | 15 ++++++++++-
 java/src/main/java/ai/rapids/cudf/Table.java  | 11 +++++---
 java/src/main/native/src/ChunkedReaderJni.cpp | 16 ++++++++----
 java/src/main/native/src/TableJni.cpp         | 23 ++++++++++-------
 9 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp
index 22e54bd6298..110dea3ec6c 100644
--- a/cpp/include/cudf/io/orc.hpp
+++ b/cpp/include/cudf/io/orc.hpp
@@ -96,6 +96,9 @@ class orc_reader_options {
   // Columns that should be read as Decimal128
   std::vector<std::string> _decimal128_columns;
 
+  // Ignore writer timezone in the stripe footer, read as UTC timezone
+  bool _ignore_timezone_in_stripe_footer = false;
+
   friend orc_reader_options_builder;
 
   /**
@@ -188,6 +191,16 @@ class orc_reader_options {
     return _decimal128_columns;
   }
 
+  /**
+   * @brief Returns whether to ignore writer timezone in the stripe footer.
+   *
+   * @return `true` if the writer timezone in the stripe footer is ignored.
+   */
+  [[nodiscard]] bool get_ignore_timezone_in_stripe_footer() const
+  {
+    return _ignore_timezone_in_stripe_footer;
+  }
+
   // Setters
 
   /**
@@ -394,6 +407,18 @@ class orc_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Set whether to ignore writer timezone in the stripe footer.
+   *
+   * @param ignore Boolean value to enable/disable ignoring writer timezone
+   * @return this for chaining
+   */
+  orc_reader_options_builder& ignore_timezone_in_stripe_footer(bool ignore)
+  {
+    options._ignore_timezone_in_stripe_footer = ignore;
+    return *this;
+  }
+
   /**
    * @brief move orc_reader_options member once it's built.
    */
diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index b82494c692e..0d2820736b8 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -169,6 +169,7 @@ reader_impl::reader_impl(std::size_t chunk_read_limit,
              options.is_enabled_use_index(),
              options.is_enabled_use_np_dtypes(),
              options.get_decimal128_columns(),
+             options.get_ignore_timezone_in_stripe_footer(),
              options.get_skip_rows(),
              options.get_num_rows(),
              options.get_stripes()},
diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp
index bb2d6dbcc9f..31ad754a48f 100644
--- a/cpp/src/io/orc/reader_impl.hpp
+++ b/cpp/src/io/orc/reader_impl.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -173,6 +173,7 @@ class reader_impl {
     bool use_index;            // enable or disable attempt to use row index for parsing
     bool use_np_dtypes;        // enable or disable the conversion to numpy-compatible dtypes
     std::vector<std::string> decimal128_columns;  // control decimals conversion
+    bool ignore_timezone_in_stripe_footer;        // ignore writer timezone in strip footer
 
     // User specified reading rows/stripes selection.
     int64_t const skip_rows;
diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu
index d0e3c862656..2eeee34810b 100644
--- a/cpp/src/io/orc/reader_impl_chunking.cu
+++ b/cpp/src/io/orc/reader_impl_chunking.cu
@@ -259,9 +259,10 @@ void reader_impl::preprocess_file(read_mode mode)
         });
       });
 
-    return has_timestamp_column ? cudf::detail::make_timezone_transition_table(
-                                    {}, selected_stripes[0].stripe_footer->writerTimezone, _stream)
-                                : std::make_unique<cudf::table>();
+    return (has_timestamp_column && !_options.ignore_timezone_in_stripe_footer)
+             ? cudf::detail::make_timezone_transition_table(
+                 {}, selected_stripes[0].stripe_footer->writerTimezone, _stream)
+             : std::make_unique<cudf::table>();
   }();
 
   //
diff --git a/java/src/main/java/ai/rapids/cudf/ORCChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ORCChunkedReader.java
index 2f46c8d1825..b0d54078a76 100644
--- a/java/src/main/java/ai/rapids/cudf/ORCChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ORCChunkedReader.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2024, NVIDIA CORPORATION.
+ *  Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -44,7 +44,8 @@ public ORCChunkedReader(long chunkReadLimit, long passReadLimit,
     handle = createReader(chunkReadLimit, passReadLimit,
         opts.getIncludeColumnNames(), buffer.getAddress() + offset, len,
         opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(),
-        opts.getDecimal128Columns());
+        opts.getDecimal128Columns(),
+        opts.ignoreTimezoneInStripeFooter());
     if (handle == 0) {
       throw new IllegalStateException("Cannot create native chunked ORC reader object.");
     }
@@ -68,7 +69,7 @@ public ORCChunkedReader(long chunkReadLimit, long passReadLimit, long outputRowS
     handle = createReaderWithOutputGranularity(chunkReadLimit, passReadLimit, outputRowSizingGranularity,
         opts.getIncludeColumnNames(), buffer.getAddress() + offset, len,
         opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(),
-        opts.getDecimal128Columns());
+        opts.getDecimal128Columns(), opts.ignoreTimezoneInStripeFooter());
     if (handle == 0) {
       throw new IllegalStateException("Cannot create native chunked ORC reader object.");
     }
@@ -146,7 +147,8 @@ public void close() {
    */
   private static native long createReader(long chunkReadLimit, long passReadLimit,
       String[] filterColumnNames, long bufferAddrs, long length,
-      boolean usingNumPyTypes, int timeUnit, String[] decimal128Columns);
+      boolean usingNumPyTypes, int timeUnit, String[] decimal128Columns,
+      boolean ignoreTimezoneInStripeFooter);
 
   /**
    * Create a native chunked ORC reader object, similar to
@@ -159,7 +161,8 @@ private static native long createReader(long chunkReadLimit, long passReadLimit,
   private static native long createReaderWithOutputGranularity(
       long chunkReadLimit, long passReadLimit, long outputRowSizingGranularity,
       String[] filterColumnNames, long bufferAddrs, long length,
-      boolean usingNumPyTypes, int timeUnit, String[] decimal128Columns);
+      boolean usingNumPyTypes, int timeUnit, String[] decimal128Columns,
+      boolean ignoreTimezoneInStripeFooter);
 
   private static native boolean hasNext(long handle);
 
diff --git a/java/src/main/java/ai/rapids/cudf/ORCOptions.java b/java/src/main/java/ai/rapids/cudf/ORCOptions.java
index 2ff253060f0..10e81177537 100644
--- a/java/src/main/java/ai/rapids/cudf/ORCOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/ORCOptions.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *  Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -32,12 +32,14 @@ public class ORCOptions extends ColumnFilterOptions {
   private final boolean useNumPyTypes;
   private final DType unit;
   private final String[] decimal128Columns;
+  private final boolean ignoreTimezoneInStripeFooter;
 
   private ORCOptions(Builder builder) {
     super(builder);
     decimal128Columns = builder.decimal128Columns.toArray(new String[0]);
     useNumPyTypes = builder.useNumPyTypes;
     unit = builder.unit;
+    ignoreTimezoneInStripeFooter = builder.ignoreTimezoneInStripeFooter;
   }
 
   boolean usingNumPyTypes() {
@@ -52,6 +54,10 @@ String[] getDecimal128Columns() {
     return decimal128Columns;
   }
 
+  boolean ignoreTimezoneInStripeFooter() {
+    return ignoreTimezoneInStripeFooter;
+  }
+
   public static Builder builder() {
     return new Builder();
   }
@@ -62,6 +68,8 @@ public static class Builder extends ColumnFilterOptions.Builder<Builder> {
 
     final List<String> decimal128Columns = new ArrayList<>();
 
+    private boolean ignoreTimezoneInStripeFooter = false;
+
     /**
      * Specify whether the parser should implicitly promote TIMESTAMP_DAYS
      * columns to TIMESTAMP_MILLISECONDS for compatibility with NumPy.
@@ -102,6 +110,11 @@ public Builder decimal128Column(String... names) {
       return this;
     }
 
+    public Builder ignoreTimezoneInStripeFooter() {
+      this.ignoreTimezoneInStripeFooter = true;
+      return this;
+    }
+
     public ORCOptions build() { return new ORCOptions(this); }
   }
 }
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 229e04754c5..ad463ffeb85 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -444,11 +444,13 @@ private static native long writeParquetBufferBegin(String[] columnNames,
   private static native long[] readORC(String[] filterColumnNames,
                                        String filePath, long address, long length,
                                        boolean usingNumPyTypes, int timeUnit,
-                                       String[] decimal128Columns) throws CudfException;
+                                       String[] decimal128Columns,
+                                       boolean ignoreTimezoneInStripeFooter) throws CudfException;
 
   private static native long[] readORCFromDataSource(String[] filterColumnNames,
                                                      boolean usingNumPyTypes, int timeUnit,
                                                      String[] decimal128Columns,
+                                                     boolean ignoreTimezoneInStripeFooter,
                                                      long dataSourceHandle) throws CudfException;
 
   /**
@@ -1583,7 +1585,7 @@ public static Table readORC(ORCOptions opts, File path) {
     return new Table(readORC(opts.getIncludeColumnNames(),
         path.getAbsolutePath(), 0, 0,
         opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(),
-        opts.getDecimal128Columns()));
+        opts.getDecimal128Columns(), opts.ignoreTimezoneInStripeFooter()));
   }
 
   /**
@@ -1652,7 +1654,7 @@ public static Table readORC(ORCOptions opts, HostMemoryBuffer buffer,
     return new Table(readORC(opts.getIncludeColumnNames(),
         null, buffer.getAddress() + offset, len,
         opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(),
-        opts.getDecimal128Columns()));
+        opts.getDecimal128Columns(), opts.ignoreTimezoneInStripeFooter()));
   }
 
   public static Table readORC(ORCOptions opts, DataSource ds) {
@@ -1660,7 +1662,8 @@ public static Table readORC(ORCOptions opts, DataSource ds) {
     try {
       return new Table(readORCFromDataSource(opts.getIncludeColumnNames(),
               opts.usingNumPyTypes(), opts.timeUnit().typeId.getNativeId(),
-              opts.getDecimal128Columns(), dataSourceHandle));
+              opts.getDecimal128Columns(), opts.ignoreTimezoneInStripeFooter(),
+              dataSourceHandle));
     } finally {
       DataSourceHelper.destroyWrapperDataSource(dataSourceHandle);
     }
diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index 5a464b3d97e..126ea307aeb 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -212,7 +212,8 @@ jlong create_chunked_orc_reader(JNIEnv* env,
                                 jlong buffer_length,
                                 jboolean using_numpy_Types,
                                 jint unit,
-                                jobjectArray dec128_col_names)
+                                jobjectArray dec128_col_names,
+                                jboolean ignoreTimezoneInStripeFooter)
 {
   JNI_NULL_CHECK(env, buffer, "buffer is null", 0);
   if (buffer_length <= 0) {
@@ -234,6 +235,7 @@ jlong create_chunked_orc_reader(JNIEnv* env,
                              .use_np_dtypes(static_cast<bool>(using_numpy_Types))
                              .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
                              .decimal128_columns(n_dec128_col_names.as_cpp_vector())
+                             .ignore_timezone_in_stripe_footer(ignoreTimezoneInStripeFooter)
                              .build();
 
     if (output_granularity) {
@@ -264,7 +266,8 @@ Java_ai_rapids_cudf_ORCChunkedReader_createReader(JNIEnv* env,
                                                   jlong buffer_length,
                                                   jboolean using_numpy_Types,
                                                   jint unit,
-                                                  jobjectArray dec128_col_names)
+                                                  jobjectArray dec128_col_names,
+                                                  jboolean ignoreTimezoneInStripeFooter)
 {
   return create_chunked_orc_reader(env,
                                    chunk_read_limit,
@@ -275,7 +278,8 @@ Java_ai_rapids_cudf_ORCChunkedReader_createReader(JNIEnv* env,
                                    buffer_length,
                                    using_numpy_Types,
                                    unit,
-                                   dec128_col_names);
+                                   dec128_col_names,
+                                   ignoreTimezoneInStripeFooter);
 }
 
 // This function should take all the parameters that `Table.readORC` takes,
@@ -291,7 +295,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ORCChunkedReader_createReaderWithOut
   jlong buffer_length,
   jboolean using_numpy_Types,
   jint unit,
-  jobjectArray dec128_col_names)
+  jobjectArray dec128_col_names,
+  jboolean ignoreTimezoneInStripeFooter)
 {
   return create_chunked_orc_reader(env,
                                    chunk_read_limit,
@@ -302,7 +307,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ORCChunkedReader_createReaderWithOut
                                    buffer_length,
                                    using_numpy_Types,
                                    unit,
-                                   dec128_col_names);
+                                   dec128_col_names,
+                                   ignoreTimezoneInStripeFooter);
 }
 
 JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ORCChunkedReader_hasNext(JNIEnv* env,
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 1dc29a0af2c..f9a576b2d1e 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -2397,6 +2397,7 @@ Java_ai_rapids_cudf_Table_readORCFromDataSource(JNIEnv* env,
                                                 jboolean usingNumPyTypes,
                                                 jint unit,
                                                 jobjectArray dec128_col_names,
+                                                jboolean ignore_timezone_in_stripe_footer,
                                                 jlong ds_handle)
 {
   JNI_NULL_CHECK(env, ds_handle, "no data source handle given", 0);
@@ -2421,21 +2422,24 @@ Java_ai_rapids_cudf_Table_readORCFromDataSource(JNIEnv* env,
         .use_np_dtypes(static_cast<bool>(usingNumPyTypes))
         .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
         .decimal128_columns(n_dec128_col_names.as_cpp_vector())
+        .ignore_timezone_in_stripe_footer(ignore_timezone_in_stripe_footer)
         .build();
     return convert_table_for_return(env, cudf::io::read_orc(opts).tbl);
   }
   CATCH_STD(env, NULL);
 }
 
-JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC(JNIEnv* env,
-                                                               jclass,
-                                                               jobjectArray filter_col_names,
-                                                               jstring inputfilepath,
-                                                               jlong buffer,
-                                                               jlong buffer_length,
-                                                               jboolean usingNumPyTypes,
-                                                               jint unit,
-                                                               jobjectArray dec128_col_names)
+JNIEXPORT jlongArray JNICALL
+Java_ai_rapids_cudf_Table_readORC(JNIEnv* env,
+                                  jclass,
+                                  jobjectArray filter_col_names,
+                                  jstring inputfilepath,
+                                  jlong buffer,
+                                  jlong buffer_length,
+                                  jboolean usingNumPyTypes,
+                                  jint unit,
+                                  jobjectArray dec128_col_names,
+                                  jboolean ignore_timezone_in_stripe_footer)
 {
   bool read_buffer = true;
   if (buffer == 0) {
@@ -2477,6 +2481,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC(JNIEnv* env,
         .use_np_dtypes(static_cast<bool>(usingNumPyTypes))
         .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
         .decimal128_columns(n_dec128_col_names.as_cpp_vector())
+        .ignore_timezone_in_stripe_footer(ignore_timezone_in_stripe_footer)
         .build();
     return convert_table_for_return(env, cudf::io::read_orc(opts).tbl);
   }

From fb8eb44d04e09039a66d86a642feaf64fd451b01 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 5 Sep 2025 12:12:23 -0400
Subject: [PATCH 264/366] Ignore scalars when broadcasting for horizontal
 string concatenation in cudf-polars (#19893)

Closes #19892

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19893
---
 python/cudf_polars/cudf_polars/dsl/expressions/string.py   | 4 +++-
 .../cudf_polars/tests/expressions/test_stringfunction.py   | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
index b7cd24972ad..bc7f421d0bc 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
@@ -312,8 +312,10 @@ def do_evaluate(
                 for child in self.children
             ]
 
+            non_unit_sizes = [c.size for c in columns if c.size != 1]
             broadcasted = broadcast(
-                *columns, target_length=max(col.size for col in columns)
+                *columns,
+                target_length=max(non_unit_sizes) if non_unit_sizes else None,
             )
 
             delimiter, ignore_nulls = self.options
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 21fa5779757..0e1f38f0f43 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -868,3 +868,10 @@ def test_len_bytes(ldf):
 def test_len_chars(ldf):
     q = ldf.select(pl.col("a").str.len_chars())
     assert_gpu_result_equal(q)
+
+
+def test_string_concat_empty_frame():
+    lf = pl.LazyFrame({"a": pl.Series([], dtype=pl.String)})
+    q = lf.select(pl.lit(", ") + pl.col("a"))
+
+    assert_gpu_result_equal(q)

From 8969ec3026d31caf0e771e7e0ec311f7da85c56d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Sep 2025 11:56:36 -0700
Subject: [PATCH 265/366] Simplify/consolidate from_arrow logic (#19801)

To have the Arrow branch in `as_column` follow the same format as `__array_interface__` and `__cuda_array_interface__`

Needs https://github.com/rapidsai/cudf/pull/19812

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19801
---
 python/cudf/cudf/core/column/column.py        | 107 ++++++-------
 python/cudf/cudf/core/frame.py                | 149 +-----------------
 .../cudf/pandas/scripts/conftest-patch.py     |  69 ++++++--
 3 files changed, 111 insertions(+), 214 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5b56df7522c..6f5d71fd2f9 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -16,6 +16,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
+from packaging import version
 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
 from typing_extensions import Self
 
@@ -922,54 +923,58 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
             )
         elif isinstance(array.type, ArrowIntervalType):
             return cudf.core.column.IntervalColumn.from_arrow(array)
+        elif pa.types.is_null(array.type):
+            # default "empty" type
+            array = array.cast(pa.string())
 
-        data = pa.table([array], [None])
-
-        if isinstance(array.type, pa.DictionaryType):
-            indices_table = pa.table(
-                [
-                    pa.chunked_array(
-                        [chunk.indices for chunk in data.column(0).chunks],
-                        type=array.type.index_type,
-                    )
-                ],
-                [None],
-            )
-            dictionaries_table = pa.table(
-                [
+        if pa.types.is_dictionary(array.type):
+            if isinstance(array, pa.Array):
+                codes = array.indices
+                dictionary = array.dictionary
+            else:
+                codes = pa.chunked_array(
+                    [chunk.indices for chunk in array.chunks],
+                    type=array.type.index_type,
+                )
+                dictionary = pc.unique(
                     pa.chunked_array(
-                        [chunk.dictionary for chunk in data.column(0).chunks],
+                        [chunk.dictionary for chunk in array.chunks],
                         type=array.type.value_type,
                     )
-                ],
-                [None],
-            )
-            with acquire_spill_lock():
-                codes = cls.from_pylibcudf(
-                    plc.Table.from_arrow(indices_table).columns()[0]
                 )
-                categories = cls.from_pylibcudf(
-                    plc.Table.from_arrow(dictionaries_table).columns()[0]
-                )
-            codes = cudf.core.column.categorical.as_unsigned_codes(
-                len(categories),
-                codes,  # type: ignore[arg-type]
-            )
-            return cudf.core.column.CategoricalColumn(
-                data=None,
-                size=codes.size,
-                dtype=CategoricalDtype(
+            with acquire_spill_lock():
+                if version.parse(pa.__version__) < version.parse(
+                    "16"
+                ) and isinstance(codes, pa.ChunkedArray):
+                    result = cls.from_pylibcudf(
+                        plc.Table.from_arrow(
+                            pa.table({None: codes})
+                        ).columns()[0]
+                    )
+                    categories = cls.from_pylibcudf(
+                        plc.Table.from_arrow(
+                            pa.table({None: dictionary})
+                        ).columns()[0]
+                    )
+                else:
+                    result = cls.from_pylibcudf(plc.Column.from_arrow(codes))
+                    categories = cls.from_pylibcudf(
+                        plc.Column.from_arrow(dictionary)
+                    )
+            return result._with_type_metadata(
+                CategoricalDtype(
                     categories=categories, ordered=array.type.ordered
-                ),
-                mask=codes.base_mask,
-                children=(codes,),
+                )
             )
         else:
-            result = cls.from_pylibcudf(
-                plc.Table.from_arrow(data).columns()[0]
-            )
-
-            # Return a column with the appropriately converted dtype
+            if version.parse(pa.__version__) < version.parse(
+                "16"
+            ) and isinstance(array, pa.ChunkedArray):
+                result = cls.from_pylibcudf(
+                    plc.Table.from_arrow(pa.table({None: array})).columns()[0]
+                )
+            else:
+                result = cls.from_pylibcudf(plc.Column.from_arrow(array))
             return result._with_type_metadata(
                 cudf_dtype_from_pa_type(array.type)
             )
@@ -2829,24 +2834,12 @@ def as_column(
             column = column.astype(dtype)
         return column
     elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)):
-        if (nan_as_null is None or nan_as_null) and pa.types.is_floating(
-            arbitrary.type
-        ):
-            arbitrary = pc.if_else(
-                pc.is_nan(arbitrary),
-                pa.nulls(len(arbitrary), type=arbitrary.type),
-                arbitrary,
-            )
-        elif dtype is None and pa.types.is_null(arbitrary.type):
-            # default "empty" type
-            dtype = CUDF_STRING_DTYPE
-        col = ColumnBase.from_arrow(arbitrary)
-
+        column = ColumnBase.from_arrow(arbitrary)
+        if nan_as_null is not False:
+            column = column.nans_to_nulls()
         if dtype is not None:
-            col = col.astype(dtype)
-
-        return col
-
+            column = column.astype(dtype)
+        return column
     elif isinstance(
         arbitrary, (pd.Series, pd.Index, pd.api.extensions.ExtensionArray)
     ):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 258ca64daed..f68bf062411 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -29,16 +29,12 @@
 from cudf.core.column import (
     ColumnBase,
     as_column,
-    column_empty,
     deserialize_columns,
     serialize_columns,
 )
-from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.mixins import BinaryOperand, Scannable
 from cudf.utils.dtypes import (
-    CUDF_STRING_DTYPE,
-    cudf_dtype_from_pa_type,
     find_common_type,
     is_dtype_obj_numeric,
     is_pandas_nullable_extension_dtype,
@@ -1081,145 +1077,16 @@ def from_arrow(cls, data: pa.Table) -> Self:
             raise TypeError(
                 f"data must be a pyarrow.Table, not {type(data).__name__}"
             )
-
-        column_names = data.column_names
-        pandas_dtypes = {}
-        np_dtypes = {}
-        if isinstance(data.schema.pandas_metadata, dict):
-            for col in data.schema.pandas_metadata:
-                if "field_name" in col:
-                    pandas_dtypes[col["field_name"]] = col["pandas_type"]
-                    np_dtypes[col["field_name"]] = col["numpy_type"]
-
-        # Currently we don't have support for
-        # pyarrow.DictionaryArray -> cudf Categorical column,
-        # so handling indices and dictionary as two different columns.
-        # This needs be removed once we have hooked libcudf dictionary32
-        # with categorical.
-        if any(
-            isinstance(x.type, pa.DictionaryType)
-            and isinstance(x, pa.ChunkedArray)
-            for x in data
-        ):
-            data = data.combine_chunks()
-
-        dict_indices = {}
-        dict_dictionaries = {}
-        dict_ordered = {}
-        for field in data.schema:
-            if isinstance(field.type, pa.DictionaryType):
-                dict_ordered[field.name] = field.type.ordered
-                dict_indices[field.name] = pa.chunked_array(
-                    [chunk.indices for chunk in data[field.name].chunks],
-                    type=field.type.index_type,
-                )
-                dict_dictionaries[field.name] = pa.chunked_array(
-                    [chunk.dictionary for chunk in data[field.name].chunks],
-                    type=field.type.value_type,
-                )
-
-        # Handle dict arrays
-        cudf_category_frame = {}
-        if len(dict_indices):
-            dict_indices_table = pa.table(dict_indices)
-            data = data.drop(dict_indices_table.column_names)
-            plc_indices = plc.Table.from_arrow(dict_indices_table)
-            # as dictionary size can vary, it can't be a single table
-            cudf_dictionaries_columns = {
-                name: ColumnBase.from_arrow(dict_dictionaries[name])
-                for name in dict_dictionaries.keys()
-            }
-
-            for name, plc_codes in zip(
-                dict_indices_table.column_names,
-                plc_indices.columns(),
-                strict=True,
-            ):
-                codes = ColumnBase.from_pylibcudf(plc_codes)
-                categories = cudf_dictionaries_columns[name]
-                codes = as_unsigned_codes(len(categories), codes)  # type: ignore[arg-type]
-                cudf_category_frame[name] = CategoricalColumn(
-                    data=None,
-                    size=codes.size,
-                    dtype=cudf.CategoricalDtype(
-                        categories=categories,
-                        ordered=dict_ordered[name],
-                    ),
-                    mask=codes.base_mask,
-                    children=(codes,),
-                )
-
-        # Handle non-dict arrays
-        cudf_non_category_frame = {
-            name: ColumnBase.from_pylibcudf(plc_col)
-            for name, plc_col in zip(
-                data.column_names,
-                plc.Table.from_arrow(data).columns(),
-                strict=True,
-            )
-        }
-
-        result = {**cudf_non_category_frame, **cudf_category_frame}
-
-        # There are some special cases that need to be handled
-        # based on metadata.
-        for name in result:
-            if (
-                len(result[name]) == 0
-                and pandas_dtypes.get(name) == "categorical"
-            ):
-                # When pandas_dtype is a categorical column and the size
-                # of column is 0 (i.e., empty) then we will have an
-                # int8 column in result._data[name] returned by libcudf,
-                # which needs to be type-casted to 'category' dtype.
-                result[name] = result[name].astype(
-                    cudf.CategoricalDtype(
-                        categories=column_empty(0, dtype=result[name].dtype)
-                    )
-                )
-            elif (
-                pandas_dtypes.get(name) == "empty"
-                and np_dtypes.get(name) == "object"
-            ):
-                # When a string column has all null values, pandas_dtype is
-                # is specified as 'empty' and np_dtypes as 'object',
-                # hence handling this special case to type-cast the empty
-                # float column to str column.
-                result[name] = result[name].astype(CUDF_STRING_DTYPE)
-            elif name in data.column_names and isinstance(
-                data[name].type,
-                (
-                    pa.StructType,
-                    pa.ListType,
-                    pa.Decimal128Type,
-                    pa.TimestampType,
-                ),
-            ):
-                # In case of struct column, libcudf is not aware of names of
-                # struct fields, hence renaming the struct fields is
-                # necessary by extracting the field names from arrow
-                # struct types.
-
-                # In case of decimal column, libcudf is not aware of the
-                # decimal precision.
-
-                # In case of list column, there is a possibility of nested
-                # list columns to have struct or decimal columns inside them.
-
-                # Datetimes ("timestamps") may need timezone metadata
-                # attached to them, as libcudf is timezone-unaware
-
-                # All of these cases are handled by calling the
-                # _with_type_metadata method on the column.
-                result[name] = result[name]._with_type_metadata(
-                    cudf_dtype_from_pa_type(data[name].type)
+        ca = ColumnAccessor(
+            {
+                name: ColumnBase.from_arrow(array)
+                for name, array in zip(
+                    data.column_names, data.itercolumns(), strict=True
                 )
-
-        return cls._from_data(
-            ColumnAccessor(
-                {name: result[name] for name in column_names}, verify=False
-            )
+            },
+            verify=False,
         )
+        return cls._from_data(ca)
 
     @_performance_tracking
     def to_arrow(self) -> pa.Table:
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 94c6d914029..f3fd3ff2aa5 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -3545,6 +3545,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::test_dt_tz_convert[us]",
     "tests/extension/test_arrow.py::test_dt_tz_convert_none",
     "tests/extension/test_arrow.py::test_dt_tz_localize_nonexistent[shift_backward-exp_date1]",
+    "tests/extension/test_arrow.py::test_duration_overflow_from_ndarray_containing_nat",
     "tests/extension/test_arrow.py::test_from_arrow_respecting_given_dtype",
     "tests/extension/test_arrow.py::test_groupby_series_size_returns_pa_int[bool]",
     "tests/extension/test_arrow.py::test_groupby_series_size_returns_pa_int[decimal128(7, 3)]",
@@ -4891,11 +4892,9 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_combine_first.py::TestDataFrameCombineFirst::test_combine_first_timezone[ns]",
     "tests/frame/methods/test_combine_first.py::TestDataFrameCombineFirst::test_combine_first_timezone[s]",
     "tests/frame/methods/test_combine_first.py::TestDataFrameCombineFirst::test_combine_first_timezone[us]",
-    "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug[Decimal-scalar10-scalar20]",
     "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug[NAType-scalar10-scalar20]",
     "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug[NaTType-scalar10-scalar20]",
     "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug[NaTType-scalar12-scalar22]",
-    "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug[NoneType-scalar10-scalar20]",
     "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug_NaT",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[pyarrow-False-expected0]",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[pyarrow-True-Int32]",
@@ -4945,6 +4944,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict2-input_index2-expected_dict2-expected_index2]",
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict3-input_index3-expected_dict3-expected_index3]",
     "tests/frame/methods/test_explode.py::test_multi_columns_nan_empty",
+    "tests/frame/methods/test_fillna.py::test_fillna_nones_inplace",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_dict_inplace_nonunique_columns",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_dtype_conversion",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_on_column_view",
@@ -5074,8 +5074,13 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_rename.py::TestRename::test_rename_mapper_and_positional_arguments_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_no_mappings_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_nocopy",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NA_with_None",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NAT_with_None",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_after_convert_dtypes",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_dict_tuple_list_ordering_remains_the_same",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_method[nan-bfill-expected1]",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_no_replacement_dtypes[nan-int64]",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_no_replacement_dtypes[value1-int64]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-array-data0-to_replace0-value0-expected0]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-tuple-data0-to_replace0-value0-expected0]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-tuple-data1-to_replace1-value1-expected1]",
@@ -5088,6 +5093,9 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[Series-tuple-data3-to_replace3-value3-expected3]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_mixed_int_block_splitting",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_is_none",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[2]",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[nan]",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[2.0]",
     "tests/frame/methods/test_reset_index.py::TestResetIndex::test_reset_index",
     "tests/frame/methods/test_reset_index.py::TestResetIndex::test_reset_index_duplicate_columns_allow[False-False]",
     "tests/frame/methods/test_reset_index.py::TestResetIndex::test_reset_index_duplicate_columns_allow[False-True]",
@@ -5161,6 +5169,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_to_csv.py::TestDataFrameToCSV::test_to_csv_from_csv_w_some_infs",
     "tests/frame/methods/test_to_csv.py::TestDataFrameToCSV::test_to_csv_headers",
     "tests/frame/methods/test_to_csv.py::TestDataFrameToCSV::test_to_csv_multiindex",
+    "tests/frame/methods/test_to_csv.py::TestDataFrameToCSV::test_to_csv_na_quoting",
     "tests/frame/methods/test_to_csv.py::TestDataFrameToCSV::test_to_csv_quoting",
     "tests/frame/methods/test_to_csv.py::TestDataFrameToCSV::test_to_csv_single_level_multi_index",
     "tests/frame/methods/test_to_dict.py::TestDataFrameToDict::test_to_dict_index_false_error[series]",
@@ -5561,6 +5570,7 @@ def pytest_unconfigure(config):
     "tests/frame/test_constructors.py::TestFromScalar::test_out_of_s_bounds_timedelta64[Series-None-datetime64]",
     "tests/frame/test_constructors.py::TestFromScalar::test_out_of_s_bounds_timedelta64[Series-None-timedelta64]",
     "tests/frame/test_iteration.py::TestIteration::test_iteritems",
+    "tests/frame/test_iteration.py::TestIteration::test_iterrows_corner",
     "tests/frame/test_iteration.py::TestIteration::test_itertuples",
     "tests/frame/test_iteration.py::TestIteration::test_keys",
     "tests/frame/test_logical_ops.py::TestDataFrameLogicalOperators::test_different_dtypes_different_index_raises",
@@ -6852,6 +6862,7 @@ def pytest_unconfigure(config):
     "tests/groupby/test_all_methods.py::test_duplicate_columns[rank-False]",
     "tests/groupby/test_api.py::test_all_methods_categorized",
     "tests/groupby/test_api.py::test_tab_completion",
+    "tests/groupby/test_apply.py::test_groupby_apply_all_none",
     "tests/groupby/test_apply.py::test_apply_datetime_issue[group_column_dtlike0]",
     "tests/groupby/test_apply.py::test_apply_datetime_issue[group_column_dtlike1]",
     "tests/groupby/test_apply.py::test_apply_frame_concat_series",
@@ -6872,7 +6883,6 @@ def pytest_unconfigure(config):
     "tests/groupby/test_apply.py::test_empty_df[agg-sum]",
     "tests/groupby/test_apply.py::test_empty_df[apply-<lambda>0]",
     "tests/groupby/test_apply.py::test_empty_df[apply-<lambda>1]",
-    "tests/groupby/test_apply.py::test_group_apply_once_per_group2",
     "tests/groupby/test_apply.py::test_group_apply_once_per_group[GH10519]",
     "tests/groupby/test_apply.py::test_group_apply_once_per_group[GH12155]",
     "tests/groupby/test_apply.py::test_group_apply_once_per_group[GH20084]",
@@ -7140,6 +7150,18 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-float0-NoneType-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-float1-NoneType-None-False]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-float1-NoneType-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-NoneType-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-NoneType-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-object-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-object-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-NoneType-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-NoneType-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-object-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-object-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-None-False]",
     "tests/groupby/test_groupby_subclass.py::test_groupby_preserves_metadata",
     "tests/groupby/test_groupby_subclass.py::test_groupby_preserves_subclass[all-obj0]",
     "tests/groupby/test_groupby_subclass.py::test_groupby_preserves_subclass[any-obj0]",
@@ -7532,6 +7554,7 @@ def pytest_unconfigure(config):
     "tests/groupby/transform/test_transform.py::test_transform_numeric_ret[size-cols1-expected1]",
     "tests/groupby/transform/test_transform.py::test_transform_transformation_func[cumcount]",
     "tests/indexes/base_class/test_constructors.py::TestIndexConstructor::test_index_string_inference",
+    "tests/indexes/base_class/test_reshape.py::TestReshape::test_insert_missing[Decimal]",
     "tests/indexes/categorical/test_astype.py::TestAstype::test_astype_category[False-True-None]",
     "tests/indexes/categorical/test_astype.py::TestAstype::test_astype_category[False-True-foo]",
     "tests/indexes/categorical/test_astype.py::TestAstype::test_astype_category[True-False-None]",
@@ -7674,7 +7697,6 @@ def pytest_unconfigure(config):
     "tests/indexes/interval/test_constructors.py::TestFromBreaks::test_constructor_pass_closed[breaks4]",
     "tests/indexes/interval/test_constructors.py::TestFromBreaks::test_generic_errors",
     "tests/indexes/interval/test_constructors.py::TestFromBreaks::test_left_right_dont_share_data",
-    "tests/indexes/interval/test_constructors.py::TestFromTuples::test_constructor_categorical_valid[CategoricalIndex]",
     "tests/indexes/interval/test_constructors.py::TestFromTuples::test_constructor_pass_closed[breaks0]",
     "tests/indexes/interval/test_constructors.py::TestFromTuples::test_constructor_pass_closed[breaks2]",
     "tests/indexes/interval/test_constructors.py::TestFromTuples::test_constructor_pass_closed[breaks3]",
@@ -7749,7 +7771,6 @@ def pytest_unconfigure(config):
     "tests/indexes/multi/test_drop.py::test_droplevel_list",
     "tests/indexes/multi/test_drop.py::test_droplevel_multiindex_one_level",
     "tests/indexes/multi/test_duplicates.py::test_duplicate_multiindex_codes",
-    "tests/indexes/multi/test_equivalence.py::test_equals",
     "tests/indexes/multi/test_equivalence.py::test_equals_multi",
     "tests/indexes/multi/test_equivalence.py::test_equals_op",
     "tests/indexes/multi/test_equivalence.py::test_is_",
@@ -7840,14 +7861,6 @@ def pytest_unconfigure(config):
     "tests/indexes/multi/test_setops.py::test_difference[None]",
     "tests/indexes/multi/test_setops.py::test_intersection_non_object[False]",
     "tests/indexes/multi/test_setops.py::test_intersection_non_object[None]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[False-difference]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[False-intersection]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[False-symmetric_difference]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[False-union]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[None-difference]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[None-intersection]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[None-symmetric_difference]",
-    "tests/indexes/multi/test_setops.py::test_setop_with_categorical[None-union]",
     "tests/indexes/multi/test_sorting.py::test_argsort",
     "tests/indexes/multi/test_sorting.py::test_numpy_argsort",
     "tests/indexes/multi/test_sorting.py::test_reconstruct_remove_unused",
@@ -7862,6 +7875,7 @@ def pytest_unconfigure(config):
     "tests/indexes/numeric/test_astype.py::TestAstype::test_cannot_cast_inf_to_int[inf-int]",
     "tests/indexes/numeric/test_indexing.py::TestContains::test_contains_float64_nans",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_invalid",
+    "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nan",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[backfill-expected1]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[nearest-expected2]",
     "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_nearest_decreasing[pad-expected0]",
@@ -7962,7 +7976,6 @@ def pytest_unconfigure(config):
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[pyarrow]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[python]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[string=object-null3]",
-    "tests/indexes/string/test_indexing.py::TestGetLoc::test_get_loc_missing[string=object-NAType]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=object-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[pyarrow]-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[python]-in_slice13-]",
@@ -8073,6 +8086,8 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NAType]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NoneType]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NoneType-Decimal]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-Decimal]",
     "tests/indexes/test_base.py::TestIndex::test_map_defaultdict",
     "tests/indexes/test_base.py::TestIndex::test_str_attribute_raises[index2]",
     "tests/indexes/test_base.py::TestIndex::test_str_bool_return",
@@ -12369,6 +12384,12 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_drop.py::test_drop_unique_and_non_unique_index[data1-index1-drop_labels1-rows-expected_data1-expected_index1]",
     "tests/series/methods/test_equals.py::test_equals[arr1-idx1]",
     "tests/series/methods/test_equals.py::test_equals[arr2-idx2]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[Decimal-NoneType]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[Decimal-float0]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[Decimal-float1]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[float0-Decimal]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[float1-Decimal]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[NoneType-Decimal]",
     "tests/series/methods/test_equals.py::test_equals_list_array[val5]",
     "tests/series/methods/test_equals.py::test_equals_list_array[val6]",
     "tests/series/methods/test_equals.py::test_equals_list_array[val7]",
@@ -12713,6 +12734,14 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_reindex.py::test_reindex_fill_value_datetimelike_upcast[0-timedelta64[ns]]",
     "tests/series/methods/test_reindex.py::test_reindex_pad2",
     "tests/series/methods/test_rename.py::TestRename::test_rename_copy_false",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt8]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt16]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt32]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt64]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int8]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int16]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int32]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int64]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_categorical_single",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_datetime64",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Float64-input_data4-to_replace4-expected_data4]",
@@ -12723,8 +12752,13 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_na_in_obj_column[Int64]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_no_cast[ser0-exp0]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_nullable_numeric",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_numeric_column_with_na[0]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_numeric_column_with_na[0.5]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_string_dtype",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_string_dtype_list_to_replace",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[2]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[2.0]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[nan]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_with_dictlike_and_string_dtype[string[pyarrow]]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_with_dictlike_and_string_dtype[string[python]]",
     "tests/series/methods/test_reset_index.py::TestResetIndex::test_reset_index_drop_errors",
@@ -13120,8 +13154,6 @@ def pytest_unconfigure(config):
     "tests/strings/test_api.py::test_api_for_categorical[rfind-string[pyarrow_numpy]]",
     "tests/strings/test_api.py::test_api_for_categorical[rfind-string[python]]",
     "tests/strings/test_api.py::test_api_mi_raises",
-    "tests/strings/test_api.py::test_api_per_dtype[index-decimal-category]",
-    "tests/strings/test_api.py::test_api_per_dtype[series-decimal-category]",
     "tests/strings/test_extract.py::test_extract_expand_capture_groups[object]",
     "tests/strings/test_extract.py::test_extract_expand_capture_groups[string=object]",
     "tests/strings/test_extract.py::test_extract_expand_capture_groups[string[pyarrow]]",
@@ -13344,6 +13376,7 @@ def pytest_unconfigure(config):
     "tests/test_register_accessor.py::test_register[Series-register_series_accessor]",
     "tests/test_sorting.py::TestSorting::test_int64_overflow_groupby_large_df_shuffled[mean]",
     "tests/test_sorting.py::TestSorting::test_int64_overflow_groupby_large_df_shuffled[median]",
+    "tests/tools/test_to_datetime.py::test_na_to_datetime[Decimal-list]",
     "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[%Y-%d-%m %H:%M:%S-None]",
     "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[%Y-%m-%d %H:%M:%S-None]",
     "tests/tools/test_to_datetime.py::TestOrigin::test_unix",
@@ -14702,9 +14735,13 @@ def pytest_unconfigure(config):
     "tests/window/test_timeseries_window.py::TestRollingTS::test_invalid_window_nonfixed[2MS]",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_non_monotonic_on",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_count",
+    "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_kurt",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_max",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_min",
+    "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_skew",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_sum",
+    "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_std",
+    "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_var",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_regular_min",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_rolling_on_decreasing_index[ms]",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_rolling_on_decreasing_index[ns]",

From f1f2e5a85142cc6573c5e78e558dc01558f9ce2b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Sep 2025 12:10:46 -0700
Subject: [PATCH 266/366] Move test_index/multiindex/indexing.py to new cuDF
 classic directory structure (#19887)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19887
---
 .../tests/dataframe/indexing/test_getitem.py  |  233 ++
 .../tests/dataframe/indexing/test_iloc.py     |  518 ++++
 .../cudf/tests/dataframe/indexing/test_loc.py | 1208 +++++++++
 .../tests/dataframe/indexing/test_setitem.py  |  131 +
 .../tests/dataframe/methods/test_head_tail.py |   18 +
 .../tests/dataframe/methods/test_set_index.py |   63 +
 .../tests/dataframe/methods/test_transpose.py |   32 +
 .../cudf/tests/dataframe/test_attributes.py   |   75 +
 .../cudf/tests/dataframe/test_constructors.py |   21 +
 .../tests/indexes/rangeindex/test_binops.py   |   13 +
 python/cudf/cudf/tests/reshape/test_concat.py |   19 +
 .../tests/series/indexing/test_getitem.py     |  209 ++
 .../cudf/tests/series/indexing/test_iloc.py   |  243 ++
 .../cudf/tests/series/indexing/test_loc.py    |  379 +++
 .../tests/series/indexing/test_setitem.py     |  169 ++
 .../cudf/cudf/tests/series/test_attributes.py |   20 +
 python/cudf/cudf/tests/test_index.py          |  165 --
 python/cudf/cudf/tests/test_indexing.py       | 2372 -----------------
 python/cudf/cudf/tests/test_multiindex.py     |  603 -----
 19 files changed, 3351 insertions(+), 3140 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/indexing/test_iloc.py
 delete mode 100644 python/cudf/cudf/tests/test_index.py
 delete mode 100644 python/cudf/cudf/tests/test_indexing.py
 delete mode 100644 python/cudf/cudf/tests/test_multiindex.py

diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
index ba7b7f95b2e..4178fbcc67a 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
@@ -1,4 +1,9 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+from itertools import combinations
+
+import numpy as np
+import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -34,3 +39,231 @@ def test_dataframe_midx_cols_getitem():
     )
     df = df.assign(bools=cudf.Series([True, False], dtype="bool"))
     assert_eq(df["bools"], df.to_pandas()["bools"])
+
+
+def test_multicolumn_item():
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10), "z": range(10)})
+    gdg = gdf.groupby(["x", "y"]).min()
+    gdgT = gdg.T
+    pdgT = gdgT.to_pandas()
+    assert_eq(gdgT[(0, 0)], pdgT[(0, 0)])
+
+
+def test_dataframe_column_name_indexing():
+    df = cudf.DataFrame()
+    data = np.asarray(range(10), dtype=np.int32)
+    df["a"] = data
+    df[1] = data
+    np.testing.assert_equal(
+        df["a"].to_numpy(), np.asarray(range(10), dtype=np.int32)
+    )
+    np.testing.assert_equal(
+        df[1].to_numpy(), np.asarray(range(10), dtype=np.int32)
+    )
+
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame()
+    nelem = 10
+    pdf = pd.DataFrame(
+        {
+            "key1": rng.integers(0, 5, nelem),
+            "key2": rng.integers(0, 3, nelem),
+            1: np.arange(1, 1 + nelem),
+            2: rng.random(nelem),
+        }
+    )
+    df = cudf.from_pandas(pdf)
+
+    assert_eq(df[df.columns], df)
+    assert_eq(df[df.columns[:1]], df[["key1"]])
+
+    for i in range(1, len(pdf.columns) + 1):
+        for idx in combinations(pdf.columns, i):
+            assert pdf[list(idx)].equals(df[list(idx)].to_pandas())
+
+    # test for only numeric columns
+    df = pd.DataFrame()
+    for i in range(0, 10):
+        df[i] = range(nelem)
+    gdf = cudf.DataFrame.from_pandas(df)
+    assert_eq(gdf, df)
+
+    assert_eq(gdf[gdf.columns], gdf)
+    assert_eq(gdf[gdf.columns[:3]], gdf[[0, 1, 2]])
+
+
+def test_dataframe_slicing():
+    rng = np.random.default_rng(seed=0)
+    df = cudf.DataFrame()
+    size = 123
+    df["a"] = ha = rng.integers(low=0, high=100, size=size).astype(np.int32)
+    df["b"] = hb = rng.random(size).astype(np.float32)
+    df["c"] = hc = rng.integers(low=0, high=100, size=size).astype(np.int64)
+    df["d"] = hd = rng.random(size).astype(np.float64)
+
+    # Row slice first 10
+    first_10 = df[:10]
+    assert len(first_10) == 10
+    assert tuple(first_10.columns) == ("a", "b", "c", "d")
+    np.testing.assert_equal(first_10["a"].to_numpy(), ha[:10])
+    np.testing.assert_equal(first_10["b"].to_numpy(), hb[:10])
+    np.testing.assert_equal(first_10["c"].to_numpy(), hc[:10])
+    np.testing.assert_equal(first_10["d"].to_numpy(), hd[:10])
+    del first_10
+
+    # Row slice last 10
+    last_10 = df[-10:]
+    assert len(last_10) == 10
+    assert tuple(last_10.columns) == ("a", "b", "c", "d")
+    np.testing.assert_equal(last_10["a"].to_numpy(), ha[-10:])
+    np.testing.assert_equal(last_10["b"].to_numpy(), hb[-10:])
+    np.testing.assert_equal(last_10["c"].to_numpy(), hc[-10:])
+    np.testing.assert_equal(last_10["d"].to_numpy(), hd[-10:])
+    del last_10
+
+    # Row slice [begin:end]
+    begin = 7
+    end = 121
+    subrange = df[begin:end]
+    assert len(subrange) == end - begin
+    assert tuple(subrange.columns) == ("a", "b", "c", "d")
+    np.testing.assert_equal(subrange["a"].to_numpy(), ha[begin:end])
+    np.testing.assert_equal(subrange["b"].to_numpy(), hb[begin:end])
+    np.testing.assert_equal(subrange["c"].to_numpy(), hc[begin:end])
+    np.testing.assert_equal(subrange["d"].to_numpy(), hd[begin:end])
+    del subrange
+
+
+@pytest.mark.parametrize("slice_start", [None, 0, 1, 3, 10, -10])
+@pytest.mark.parametrize("slice_end", [None, 0, 1, 30, 50, -1])
+def test_dataframe_masked_slicing(slice_start, slice_end):
+    nelem = 50
+    rng = np.random.default_rng(seed=0)
+    mask = rng.choice([True, False], size=nelem)
+    gdf = cudf.DataFrame(
+        {"a": list(range(nelem)), "b": list(range(nelem, 2 * nelem))}
+    )
+    gdf.loc[mask, "a"] = None
+    gdf.loc[mask, "b"] = None
+
+    def do_slice(x):
+        return x[slice_start:slice_end]
+
+    expect = do_slice(gdf.to_pandas())
+    got = do_slice(gdf).to_pandas()
+
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize("dtype", [int, float, str])
+def test_empty_boolean_mask(dtype):
+    gdf = cudf.DataFrame({"a": []}, dtype=dtype)
+    pdf = gdf.to_pandas()
+
+    compare_val = dtype(1)
+
+    expected = pdf[pdf.a == compare_val]
+    got = gdf[gdf.a == compare_val]
+    assert_eq(expected, got)
+
+    expected = pdf.a[pdf.a == compare_val]
+    got = gdf.a[gdf.a == compare_val]
+    assert_eq(expected, got)
+
+
+def test_dataframe_apply_boolean_mask():
+    pdf = pd.DataFrame(
+        {
+            "a": [0, 1, 2, 3],
+            "b": [0.1, 0.2, None, 0.3],
+            "c": ["a", None, "b", "c"],
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    assert_eq(pdf[[True, False, True, False]], gdf[[True, False, True, False]])
+
+
+@pytest.mark.parametrize(
+    "mask_fn", [lambda x: x, lambda x: np.array(x), lambda x: pd.Series(x)]
+)
+def test_dataframe_boolean_mask(mask_fn):
+    mask_base = [
+        True,
+        False,
+        True,
+        False,
+        True,
+        False,
+        True,
+        False,
+        True,
+        False,
+    ]
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.from_pandas(pdf)
+    mask = mask_fn(mask_base)
+    assert len(mask) == gdf.shape[0]
+    pdf_masked = pdf[mask]
+    gdf_masked = gdf[mask]
+    assert pdf_masked.to_string().split() == gdf_masked.to_string().split()
+
+
+@pytest.mark.parametrize(
+    "gdf_kwargs",
+    [
+        {"data": {"a": range(1000)}},
+        {"data": {"a": range(1000), "b": range(1000)}},
+        {
+            "data": {
+                "a": range(20),
+                "b": range(20),
+                "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
+            }
+        },
+        {"index": [1, 2, 3]},
+        {"index": range(1000)},
+        {"columns": ["a", "b", "c", "d"]},
+        {"columns": ["a"], "index": range(1000)},
+        {"columns": ["a", "col2", "...col n"], "index": range(1000)},
+        {"index": cudf.Series(range(1000)).astype("str")},
+        {
+            "columns": ["a", "b", "c", "d"],
+            "index": cudf.Series(range(1000)).astype("str"),
+        },
+    ],
+)
+@pytest.mark.parametrize(
+    "slc",
+    [
+        slice(6, None),  # start but no stop, [6:]
+        slice(None, None, 3),  # only step, [::3]
+        slice(1, 10, 2),  # start, stop, step
+        slice(3, -5, 2),  # negative stop
+        slice(-2, -4),  # slice is empty
+        slice(-10, -20, -1),  # reversed slice
+        slice(None),  # slices everything, same as [:]
+        slice(250, 500),
+        slice(250, 251),
+        slice(50),
+        slice(1, 10),
+        slice(10, 20),
+        slice(15, 24),
+        slice(6),
+    ],
+)
+def test_dataframe_sliced(gdf_kwargs, slc):
+    gdf = cudf.DataFrame(**gdf_kwargs)
+    pdf = gdf.to_pandas()
+
+    actual = gdf[slc]
+    expected = pdf[slc]
+
+    assert_eq(actual, expected)
+
+
+def test_duplicate_labels_raises():
+    df = cudf.DataFrame([[1, 2]], columns=["a", "b"])
+    with pytest.raises(ValueError):
+        df[["a", "a"]]
+    with pytest.raises(ValueError):
+        df.loc[:, ["a", "a"]]
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_iloc.py b/python/cudf/cudf/tests/dataframe/indexing/test_iloc.py
new file mode 100644
index 00000000000..5a4411a5bba
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_iloc.py
@@ -0,0 +1,518 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "iloc_rows",
+    [
+        0,
+        1,
+        slice(None, 0),
+        slice(None, 1),
+        slice(0, 1),
+        slice(1, 2),
+        slice(0, 2),
+        slice(0, None),
+        slice(1, None),
+    ],
+)
+@pytest.mark.parametrize(
+    "iloc_columns",
+    [
+        0,
+        1,
+        slice(None, 0),
+        slice(None, 1),
+        slice(0, 1),
+        slice(1, 2),
+        slice(0, 2),
+        slice(0, None),
+        slice(1, None),
+    ],
+)
+def test_multiindex_iloc(iloc_rows, iloc_columns):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    presult = pdf.iloc[iloc_rows, iloc_columns]
+    gresult = gdf.iloc[iloc_rows, iloc_columns]
+    if isinstance(gresult, cudf.DataFrame):
+        assert_eq(
+            presult, gresult, check_index_type=False, check_column_type=False
+        )
+    else:
+        assert_eq(presult, gresult, check_index_type=False, check_dtype=False)
+
+
+def test_multiindex_iloc_scalar():
+    arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
+    tuples = list(zip(*arrays, strict=True))
+    idx = cudf.MultiIndex.from_tuples(tuples)
+    gdf = cudf.DataFrame(
+        {"first": cp.random.rand(4), "second": cp.random.rand(4)}
+    )
+    gdf.index = idx
+
+    pdf = gdf.to_pandas()
+    assert_eq(pdf.iloc[3], gdf.iloc[3])
+
+
+@pytest.mark.parametrize(
+    "iloc_rows",
+    [
+        0,
+        1,
+        slice(None, 0),
+        slice(None, 1),
+        slice(0, 1),
+        slice(1, 2),
+        slice(0, 2),
+        slice(0, None),
+        slice(1, None),
+    ],
+)
+@pytest.mark.parametrize(
+    "iloc_columns",
+    [
+        0,
+        1,
+        slice(None, 0),
+        slice(None, 1),
+        slice(0, 1),
+        slice(1, 2),
+        slice(0, 2),
+        slice(0, None),
+        slice(1, None),
+    ],
+)
+def test_multicolumn_iloc(iloc_rows, iloc_columns):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    pdf = pdf.T
+    gdf = gdf.T
+    presult = pdf.iloc[iloc_rows, iloc_columns]
+    gresult = gdf.iloc[iloc_rows, iloc_columns]
+    if hasattr(gresult, "name") and isinstance(gresult.name, tuple):
+        name = gresult.name[len(gresult.name) - 1]
+        if isinstance(name, str) and "cudf" in name:
+            gresult.name = name
+    if isinstance(presult, pd.DataFrame):
+        assert_eq(
+            presult, gresult, check_index_type=False, check_column_type=False
+        )
+    else:
+        assert_eq(presult, gresult, check_index_type=False, check_dtype=False)
+
+
+def test_multiindex_multicolumn_zero_row_slice():
+    gdf = cudf.DataFrame(
+        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [1, 2, 3, 4, 5]}
+    )
+    pdf = gdf.to_pandas()
+    gdg = gdf.groupby(["x", "y"]).agg({"z": ["count"]}).iloc[:0]
+    pdg = pdf.groupby(["x", "y"]).agg({"z": ["count"]}).iloc[:0]
+    assert_eq(pdg, gdg, check_dtype=False)
+
+
+def test_dataframe_iloc():
+    nelem = 20
+    rng = np.random.default_rng(seed=0)
+    data = {
+        "a": rng.integers(low=0, high=100, size=nelem).astype(np.int32),
+        "b": rng.random(nelem).astype(np.float32),
+    }
+    gdf = cudf.DataFrame(data)
+    pdf = pd.DataFrame(data)
+
+    assert_eq(gdf.iloc[-1:1], pdf.iloc[-1:1])
+    assert_eq(gdf.iloc[nelem - 1 : -1], pdf.iloc[nelem - 1 : -1])
+    assert_eq(gdf.iloc[0 : nelem - 1], pdf.iloc[0 : nelem - 1])
+    assert_eq(gdf.iloc[0:nelem], pdf.iloc[0:nelem])
+    assert_eq(gdf.iloc[1:1], pdf.iloc[1:1])
+    assert_eq(gdf.iloc[1:2], pdf.iloc[1:2])
+    assert_eq(gdf.iloc[nelem - 1 : nelem + 1], pdf.iloc[nelem - 1 : nelem + 1])
+    assert_eq(gdf.iloc[nelem : nelem * 2], pdf.iloc[nelem : nelem * 2])
+
+    assert_eq(gdf.iloc[-1 * nelem], pdf.iloc[-1 * nelem])
+    assert_eq(gdf.iloc[-1], pdf.iloc[-1])
+    assert_eq(gdf.iloc[0], pdf.iloc[0])
+    assert_eq(gdf.iloc[1], pdf.iloc[1])
+    assert_eq(gdf.iloc[nelem - 1], pdf.iloc[nelem - 1])
+
+    # Repeat the above with iat[]
+    assert_eq(gdf.iloc[-1:1], gdf.iat[-1:1])
+    assert_eq(gdf.iloc[nelem - 1 : -1], gdf.iat[nelem - 1 : -1])
+    assert_eq(gdf.iloc[0 : nelem - 1], gdf.iat[0 : nelem - 1])
+    assert_eq(gdf.iloc[0:nelem], gdf.iat[0:nelem])
+    assert_eq(gdf.iloc[1:1], gdf.iat[1:1])
+    assert_eq(gdf.iloc[1:2], gdf.iat[1:2])
+    assert_eq(gdf.iloc[nelem - 1 : nelem + 1], gdf.iat[nelem - 1 : nelem + 1])
+    assert_eq(gdf.iloc[nelem : nelem * 2], gdf.iat[nelem : nelem * 2])
+
+    assert_eq(gdf.iloc[-1 * nelem], gdf.iat[-1 * nelem])
+    assert_eq(gdf.iloc[-1], gdf.iat[-1])
+    assert_eq(gdf.iloc[0], gdf.iat[0])
+    assert_eq(gdf.iloc[1], gdf.iat[1])
+    assert_eq(gdf.iloc[nelem - 1], gdf.iat[nelem - 1])
+
+    # iloc with list like indexing
+    assert_eq(gdf.iloc[[0]], pdf.iloc[[0]])
+    # iloc with column like indexing
+    assert_eq(gdf.iloc[cudf.Series([0])], pdf.iloc[pd.Series([0])])
+    assert_eq(gdf.iloc[cudf.Series([0])._column], pdf.iloc[pd.Series([0])])
+    assert_eq(gdf.iloc[np.array([0])], pdf.loc[np.array([0])])
+
+
+def test_dataframe_iloc_tuple():
+    rng = np.random.default_rng(seed=0)
+    nelem = 20
+    data = {
+        "a": rng.integers(low=0, high=100, size=nelem).astype(np.int32),
+        "b": rng.random(nelem).astype(np.float32),
+    }
+    gdf = cudf.DataFrame(data)
+    pdf = pd.DataFrame(data)
+
+    assert_eq(gdf.iloc[1, [1]], pdf.iloc[1, [1]], check_dtype=False)
+    assert_eq(gdf.iloc[:, -1], pdf.iloc[:, -1])
+
+
+def test_dataframe_iloc_index_error():
+    rng = np.random.default_rng(seed=0)
+    nelem = 20
+    data = {
+        "a": rng.integers(low=0, high=100, size=nelem).astype(np.int32),
+        "b": rng.random(nelem).astype(np.float32),
+    }
+    gdf = cudf.DataFrame(data)
+    pdf = pd.DataFrame(data)
+
+    with pytest.raises(IndexError):
+        pdf.iloc[nelem * 2]
+    with pytest.raises(IndexError):
+        gdf.iloc[nelem * 2]
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        ((0, 0), 5),
+        ((slice(None), 0), 5),
+        ((slice(None), 0), range(3)),
+        ((slice(None, -1), 0), range(2)),
+        (([0, 1], 0), 5),
+    ],
+)
+def test_dataframe_setitem_iloc(key, value):
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": ["c", "d", "e"]}, index=["one", "two", "three"]
+    )
+    gdf = cudf.from_pandas(pdf)
+    pdf.iloc[key] = value
+    gdf.iloc[key] = value
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "key,value",
+    [
+        ((0, 0), 5.0),
+        ((slice(None), 0), 5.0),
+        ((slice(None), 0), np.arange(7, dtype="float64")),
+    ],
+)
+def test_dataframe_setitem_iloc_multiindex(key, value):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign"]
+    pdf.index = pdfIndex
+    gdf = cudf.from_pandas(pdf)
+
+    pdf.iloc[key] = value
+    gdf.iloc[key] = value
+
+    assert_eq(pdf, gdf)
+
+
+def test_boolean_indexing_single_row():
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": ["c", "d", "e"]}, index=["one", "two", "three"]
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(
+        pdf.loc[[True, False, False], :], gdf.loc[[True, False, False], :]
+    )
+
+
+@pytest.mark.parametrize("index", [["a"], ["a", "a"], ["a", "a", "b", "c"]])
+def test_iloc_categorical_index(index):
+    gdf = cudf.DataFrame({"data": range(len(index))}, index=index)
+    gdf.index = gdf.index.astype("category")
+    pdf = gdf.to_pandas()
+    expect = pdf.iloc[:, 0]
+    got = gdf.iloc[:, 0]
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        ([0], [10, 20]),
+        ([0, 2], [[10, 30], [20, 40]]),
+        (([0, 2], [0, 1]), [[10, 30], [20, 40]]),
+        (([0, 2], 0), [10, 30]),
+        ((0, [0, 1]), [20, 40]),
+    ],
+)
+def test_dataframe_iloc_inplace_update(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.iloc[key] = value
+    expected = pdf.iloc[key] = value
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        ([0, 2], [[10, 30, 50], [20, 40, 60]]),
+        ([0], [[10], [20]]),
+    ],
+)
+def test_dataframe_iloc_inplace_update_shape_mismatch(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.iloc[key] = value
+
+
+def test_dataframe_iloc_inplace_update_shape_mismatch_RHS_df():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.iloc[[0, 2]] = cudf.DataFrame(
+            {"x": [10, 20]}, index=cudf.Index([0, 2])
+        )
+
+
+def test_iloc_single_row_with_nullable_column():
+    # see https://github.com/rapidsai/cudf/issues/11349
+    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.4]})
+    df = cudf.from_pandas(pdf)
+
+    df.iloc[0]  # before the fix for #11349 this would segfault
+    assert_eq(pdf.iloc[0], df.iloc[0])
+
+
+def test_boolean_mask_columns_iloc_series():
+    df = pd.DataFrame(np.zeros((3, 3)))
+    cdf = cudf.from_pandas(df)
+
+    mask = pd.Series([True, False, True], dtype=bool)
+    with pytest.raises(NotImplementedError):
+        df.iloc[:, mask]
+
+    with pytest.raises(NotImplementedError):
+        cdf.iloc[:, mask]
+
+
+def test_iloc_column_boolean_mask_issue_13265():
+    # https://github.com/rapidsai/cudf/issues/13265
+    df = pd.DataFrame(np.arange(4).reshape(2, 2))
+    cdf = cudf.from_pandas(df)
+    expect = df.iloc[:, [True, True]]
+    actual = cdf.iloc[:, [True, True]]
+    assert_eq(expect, actual)
+
+
+def test_iloc_repeated_column_label_issue_13266():
+    # https://github.com/rapidsai/cudf/issues/13266
+    # https://github.com/rapidsai/cudf/issues/13273
+    df = pd.DataFrame(np.arange(4).reshape(2, 2))
+    cdf = cudf.from_pandas(df)
+
+    with pytest.raises(NotImplementedError):
+        cdf.iloc[:, [0, 1, 0]]
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [
+        (..., 0),
+        (0, ...),
+    ],
+    ids=["row_ellipsis", "column_ellipsis"],
+)
+def test_iloc_ellipsis_as_slice_issue_13267(indexer):
+    # https://github.com/rapidsai/cudf/issues/13267
+    df = pd.DataFrame(np.arange(4).reshape(2, 2))
+    cdf = cudf.from_pandas(df)
+
+    expect = df.iloc[indexer]
+    actual = cdf.iloc[indexer]
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [
+        0,
+        (slice(None), 0),
+        ([0, 2], 1),
+        (slice(None), slice(None)),
+        (slice(None), [1, 0]),
+        (0, 0),
+        (1, [1, 0]),
+        ([1, 0], 0),
+        ([1, 2], [0, 1]),
+    ],
+)
+def test_iloc_multiindex_lookup_as_label_issue_13515(indexer):
+    # https://github.com/rapidsai/cudf/issues/13515
+    df = pd.DataFrame(
+        {"a": [1, 1, 3], "b": [2, 3, 4], "c": [1, 6, 7], "d": [1, 8, 9]}
+    ).set_index(["a", "b"])
+    cdf = cudf.from_pandas(df)
+
+    expect = df.iloc[indexer]
+    actual = cdf.iloc[indexer]
+    assert_eq(expect, actual)
+
+
+def test_iloc_loc_mixed_dtype():
+    df = cudf.DataFrame({"a": ["a", "b"], "b": [0, 1]})
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(TypeError):
+            df.iloc[0]
+        with pytest.raises(TypeError):
+            df.loc[0]
+    df = df.astype("str")
+    pdf = df.to_pandas()
+
+    assert_eq(df.iloc[0], pdf.iloc[0])
+    assert_eq(df.loc[0], pdf.loc[0])
+
+
+@pytest.mark.parametrize("typ", ["datetime64[ns]", "timedelta64[ns]"])
+@pytest.mark.parametrize(
+    "idx_method, row_key, col_key", [["iloc", 0, 0], ["loc", "a", "a"]]
+)
+def test_dataframe_iloc_scalar_datetimelike_return_pd_scalar(
+    typ, idx_method, row_key, col_key
+):
+    obj = cudf.DataFrame(
+        [1, 2, 3], index=list("abc"), columns=["a"], dtype=typ
+    )
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(obj, idx_method)[row_key, col_key]
+    expected = getattr(obj.to_pandas(), idx_method)[row_key, col_key]
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "idx_method, row_key, col_key", [["iloc", 0, 0], ["loc", "a", "a"]]
+)
+def test_dataframe_iloc_scalar_interval_return_pd_scalar(
+    idx_method, row_key, col_key
+):
+    iidx = cudf.IntervalIndex.from_breaks([1, 2, 3])
+    obj = cudf.DataFrame({"a": iidx}, index=list("ab"))
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(obj, idx_method)[row_key, col_key]
+    expected = getattr(obj.to_pandas(), idx_method)[row_key, col_key]
+    assert result == expected
+
+
+def test_sliced_categorical_as_ordered():
+    df = cudf.DataFrame({"a": list("caba"), "b": list(range(4))})
+    df["a"] = df["a"].astype("category")
+    df = df.iloc[:2]
+    result = df["a"].cat.as_ordered()
+    expected = cudf.Series(
+        ["c", "a"],
+        dtype=cudf.CategoricalDtype(list("abc"), ordered=True),
+        name="a",
+    )
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("obj", [cudf.Series, cudf.Index])
+def test_iloc_columns_with_cudf_object(obj):
+    data = {"a": [1, 2], "c": [0, 2], "d": ["c", "a"]}
+    col_indexer = obj([0, 2])
+    result = cudf.DataFrame(data).iloc[:, col_indexer]
+    expected = pd.DataFrame(data).iloc[:, col_indexer.to_pandas()]
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
index 9a0ace436a7..9b9e6e9525b 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_loc.py
@@ -1,12 +1,17 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 import re
+import weakref
+from contextlib import contextmanager
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 def test_dataframe_midx_columns_loc():
@@ -171,3 +176,1206 @@ def test_loc_setitem_add_column_partial_12801():
     cdf.loc[cdf.a < 2, "b"] = 1
 
     assert_eq(df, cdf)
+
+
+@contextmanager
+def expect_pandas_performance_warning(idx):
+    with expect_warning_if(
+        (not isinstance(idx[0], tuple) and len(idx) > 2)
+        or (isinstance(idx[0], tuple) and len(idx[0]) > 2),
+        pd.errors.PerformanceWarning,
+    ):
+        yield
+
+
+@pytest.mark.parametrize(
+    "key_tuple",
+    [
+        # return 2 rows, 0 remaining keys = dataframe with entire index
+        ("a", "store", "clouds", "fire"),
+        (("a", "store", "clouds", "fire"), slice(None)),
+        # return 2 rows, 1 remaining key = dataframe with n-k index columns
+        ("a", "store", "storm"),
+        (("a", "store", "storm"), slice(None)),
+        # return 2 rows, 2 remaining keys = dataframe with n-k index columns
+        ("a", "store"),
+        (("a", "store"), slice(None)),
+        # return 2 rows, n-1 remaining keys = dataframe with n-k index columns
+        ("a",),
+        "a",
+        "b",
+        "c",
+        (("a",), slice(None)),
+        # return 1 row, 0 remaining keys = dataframe with entire index
+        ("a", "store", "storm", "smoke"),
+        (("a", "store", "storm", "smoke"), slice(None)),
+        # return 1 row and 1 remaining key = series
+        ("c", "forest", "clear"),
+        (("c", "forest", "clear"), slice(None)),
+    ],
+)
+def test_multiindex_loc(key_tuple):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+    gdf = cudf.from_pandas(pdf)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    # The index is unsorted, which makes things slow but is fine for testing.
+    with expect_pandas_performance_warning(key_tuple):
+        expected = pdf.loc[key_tuple]
+    got = gdf.loc[key_tuple].sort_index()
+    assert_eq(expected.sort_index(), got)
+
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = gdf.loc[key_tuple]
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize("second_val", [[0, 1], [1, 0]])
+def test_multiindex_compatible_ordering(second_val):
+    indexer = (([1, 1], second_val), slice(None))
+    df = pd.DataFrame(
+        {"a": [1, 1, 2, 3], "b": [1, 0, 1, 1], "c": [1, 2, 3, 4]}
+    ).set_index(["a", "b"])
+    cdf = cudf.from_pandas(df)
+    expect = df.loc[indexer]
+    with cudf.option_context("mode.pandas_compatible", True):
+        actual = cdf.loc[indexer]
+    assert_eq(actual, expect)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        slice(("a", "store"), ("b", "house")),
+        slice(None, ("b", "house")),
+        slice(("a", "store"), None),
+        slice(None),
+    ],
+)
+def test_multiindex_loc_slice(arg):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdf = cudf.from_pandas(pdf)
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf = pdf.copy(deep=False)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    assert_eq(pdf.loc[arg], gdf.loc[arg])
+
+
+def test_multiindex_loc_errors():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    gdf = cudf.from_pandas(pdf)
+    gdf.index = gdfIndex
+
+    with pytest.raises(KeyError):
+        gdf.loc[("a", "store", "clouds", "foo")]
+    with pytest.raises(IndexError):
+        gdf.loc[
+            ("a", "store", "clouds", "fire", "x", "y")
+        ]  # too many indexers
+    with pytest.raises(IndexError):
+        gdf.loc[slice(None, ("a", "store", "clouds", "fire", "x", "y"))]
+
+
+def test_multiindex_loc_then_column():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdf = cudf.from_pandas(pdf)
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    # The index is unsorted, which makes things slow but is fine for testing.
+    with pytest.warns(pd.errors.PerformanceWarning):
+        expected = pdf.loc[("a", "store", "clouds", "fire"), :][0]
+    got = gdf.loc[("a", "store", "clouds", "fire"), :][0]
+    assert_eq(expected, got)
+
+
+def test_multiindex_loc_rows_0():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    gdf = cudf.from_pandas(pdf)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+
+    assert_exceptions_equal(
+        lfunc=pdf.loc.__getitem__,
+        rfunc=gdf.loc.__getitem__,
+        lfunc_args_and_kwargs=([(("d",), slice(None, None, None))],),
+        rfunc_args_and_kwargs=([(("d",), slice(None, None, None))],),
+    )
+
+
+def test_multiindex_loc_rows_1_2_key():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    gdf = cudf.from_pandas(pdf)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    assert_eq(pdf.loc[("c", "forest"), :], gdf.loc[("c", "forest"), :])
+
+
+def test_multiindex_loc_rows_1_1_key():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    gdf = cudf.from_pandas(pdf)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    assert_eq(pdf.loc[("c",), :], gdf.loc[("c",), :])
+
+
+def test_multiindex_index_single_row():
+    arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
+    tuples = list(zip(*arrays, strict=True))
+    idx = cudf.MultiIndex.from_tuples(tuples)
+    gdf = cudf.DataFrame({"first": range(4), "second": range(4)})
+    gdf.index = idx
+    pdf = gdf.to_pandas()
+    assert_eq(pdf.loc[("b", 3)], gdf.loc[("b", 3)])
+
+
+@pytest.mark.parametrize("idx_get", [(0, 0), (0, 1), (1, 0), (1, 1)])
+@pytest.mark.parametrize("cols_get", [0, 1, [0, 1], [1, 0], [1], [0]])
+def test_multiindex_loc_scalar(idx_get, cols_get):
+    idx = cudf.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0), (1, 1)])
+    df = cudf.DataFrame({0: range(4), 1: range(10, 50, 10)}, index=idx)
+    pdf = df.to_pandas()
+
+    actual = df.loc[idx_get, cols_get]
+    expected = pdf.loc[idx_get, cols_get]
+
+    assert_eq(actual, expected)
+
+
+def test_multiindex_rows_with_wildcard():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    # The index is unsorted, which makes things slow but is fine for testing.
+    with pytest.warns(pd.errors.PerformanceWarning):
+        assert_eq(
+            pdf.loc[("a",), :].sort_index(), gdf.loc[("a",), :].sort_index()
+        )
+        assert_eq(
+            pdf.loc[(("a"), ("store")), :].sort_index(),
+            gdf.loc[(("a"), ("store")), :].sort_index(),
+        )
+        assert_eq(
+            pdf.loc[(("a"), ("store"), ("storm")), :].sort_index(),
+            gdf.loc[(("a"), ("store"), ("storm")), :].sort_index(),
+        )
+        assert_eq(
+            pdf.loc[(("a"), ("store"), ("storm"), ("smoke")), :].sort_index(),
+            gdf.loc[(("a"), ("store"), ("storm"), ("smoke")), :].sort_index(),
+        )
+        assert_eq(
+            pdf.loc[(slice(None), "store"), :].sort_index(),
+            gdf.loc[(slice(None), "store"), :].sort_index(),
+        )
+        assert_eq(
+            pdf.loc[(slice(None), slice(None), "storm"), :].sort_index(),
+            gdf.loc[(slice(None), slice(None), "storm"), :].sort_index(),
+        )
+        assert_eq(
+            pdf.loc[
+                (slice(None), slice(None), slice(None), "smoke"), :
+            ].sort_index(),
+            gdf.loc[
+                (slice(None), slice(None), slice(None), "smoke"), :
+            ].sort_index(),
+        )
+
+
+def test_multicolumn_loc():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    pdf = pdf.T
+    pdf.columns = pdfIndex
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf.loc[:, "a"], gdf.loc[:, "a"])
+    assert_eq(pdf.loc[:, ("a", "store")], gdf.loc[:, ("a", "store")])
+    assert_eq(pdf.loc[:, "a":"b"], gdf.loc[:, "a":"b"])
+    assert_eq(pdf.loc[:, ["a", "b"]], gdf.loc[:, ["a", "b"]])
+
+
+@pytest.mark.parametrize("scalar", [0, 100])
+def test_dataframe_loc(scalar):
+    size = 123
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": rng.integers(low=0, high=100, size=size),
+            "b": rng.random(size).astype(np.float32),
+            "c": rng.random(size).astype(np.float64),
+            "d": rng.random(size).astype(np.float64),
+        }
+    )
+    pdf.index.name = "index"
+
+    df = cudf.DataFrame.from_pandas(pdf)
+
+    assert_eq(df.loc[:, ["a"]], pdf.loc[:, ["a"]])
+
+    assert_eq(df.loc[:, "d"], pdf.loc[:, "d"])
+
+    # Scalar label
+    assert_eq(df.loc[scalar], pdf.loc[scalar])
+
+    # Full slice
+    assert_eq(df.loc[:, "c"], pdf.loc[:, "c"])
+
+    # Repeat with at[]
+    assert_eq(df.loc[:, ["a"]], df.at[:, ["a"]])
+    assert_eq(df.loc[:, "d"], df.at[:, "d"])
+    assert_eq(df.loc[scalar], df.at[scalar])
+    assert_eq(df.loc[:, "c"], df.at[:, "c"])
+
+
+@pytest.mark.parametrize("step", [1, 5])
+def test_dataframe_loc_slice(step):
+    size = 123
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": rng.integers(low=0, high=100, size=size),
+            "b": rng.random(size).astype(np.float32),
+            "c": rng.random(size).astype(np.float64),
+            "d": rng.random(size).astype(np.float64),
+        }
+    )
+    pdf.index.name = "index"
+
+    df = cudf.DataFrame.from_pandas(pdf)
+    begin = 110
+    end = 122
+
+    assert_eq(
+        df.loc[begin:end:step, ["c", "d", "a"]],
+        pdf.loc[begin:end:step, ["c", "d", "a"]],
+    )
+
+    assert_eq(df.loc[begin:end, ["c", "d"]], pdf.loc[begin:end, ["c", "d"]])
+
+    # Slicing on columns:
+    assert_eq(
+        df.loc[begin:end:step, "a":"c"], pdf.loc[begin:end:step, "a":"c"]
+    )
+
+    # Slicing of size 1:
+    assert_eq(df.loc[begin:begin, "a"], pdf.loc[begin:begin, "a"])
+
+    # TODO: Pandas changes the dtype here when it shouldn't
+    assert_eq(
+        df.loc[begin, "a":"a"], pdf.loc[begin, "a":"a"], check_dtype=False
+    )
+
+    # Repeat with at[]
+    assert_eq(
+        df.loc[begin:end:step, ["c", "d", "a"]],
+        df.at[begin:end:step, ["c", "d", "a"]],
+    )
+    assert_eq(df.loc[begin:end, ["c", "d"]], df.at[begin:end, ["c", "d"]])
+    assert_eq(df.loc[begin:end:step, "a":"c"], df.at[begin:end:step, "a":"c"])
+    assert_eq(df.loc[begin:begin, "a"], df.at[begin:begin, "a"])
+    assert_eq(df.loc[begin, "a":"a"], df.at[begin, "a":"a"], check_dtype=False)
+
+
+def test_dataframe_loc_arraylike():
+    size = 123
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {
+            "a": rng.integers(low=0, high=100, size=size),
+            "b": rng.random(size).astype(np.float32),
+            "c": rng.random(size).astype(np.float64),
+            "d": rng.random(size).astype(np.float64),
+        }
+    )
+    pdf.index.name = "index"
+
+    df = cudf.DataFrame.from_pandas(pdf)
+    # Make int64 index
+    offset = 50
+    df2 = df[offset:]
+    pdf2 = pdf[offset:]
+    begin = 117
+    end = 122
+    assert_eq(
+        df2.loc[begin:end, ["c", "d", "a"]],
+        pdf2.loc[begin:end, ["c", "d", "a"]],
+    )
+
+    # loc with list like indexing
+    assert_eq(df.loc[[0]], pdf.loc[[0]])
+    # loc with column like indexing
+    assert_eq(df.loc[cudf.Series([0])], pdf.loc[pd.Series([0])])
+    assert_eq(df.loc[cudf.Series([0])._column], pdf.loc[pd.Series([0])])
+    assert_eq(df.loc[np.array([0])], pdf.loc[np.array([0])])
+
+
+@pytest.mark.parametrize(
+    "mask",
+    [[True, False, False, False, False], [True, False, True, False, True]],
+)
+@pytest.mark.parametrize("arg", ["a", slice("a", "a"), slice("a", "b")])
+def test_dataframe_loc_mask(mask, arg):
+    pdf = pd.DataFrame(
+        {"a": ["a", "b", "c", "d", "e"], "b": ["f", "g", "h", "i", "j"]}
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    assert_eq(pdf.loc[mask, arg], gdf.loc[mask, arg])
+
+
+def test_dataframe_loc_outbound():
+    rng = np.random.default_rng(seed=0)
+    size = 10
+    ha = rng.integers(low=0, high=100, size=size).astype(np.int32)
+    hb = rng.random(size).astype(np.float32)
+    df = cudf.DataFrame({"a": ha, "b": hb})
+    pdf = pd.DataFrame({"a": ha, "b": hb})
+    assert_exceptions_equal(lambda: pdf.loc[11], lambda: df.loc[11])
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (("one", "a"), 5),
+        ((slice(None), "a"), 5),
+        ((slice(None), "a"), range(3)),
+        ((slice(None), "a"), [3, 2, 1]),
+        ((slice(None, "two"), "a"), range(2)),
+        ((slice(None, "two"), "a"), [4, 5]),
+        ((["one", "two"], "a"), 5),
+        (("one", "c"), 5),
+        ((["one", "two"], "c"), 5),
+        ((slice(None), "c"), 5),
+        ((slice(None), "c"), range(3)),
+        ((slice(None), "c"), [3, 2, 1]),
+        ((slice(None, "two"), "c"), range(2)),
+        ((slice(None, "two"), "c"), [4, 5]),
+    ],
+)
+def test_dataframe_setitem_loc(key, value):
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": ["c", "d", "e"]}, index=["one", "two", "three"]
+    )
+    gdf = cudf.from_pandas(pdf)
+    pdf.loc[key] = value
+    gdf.loc[key] = value
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (("one", "a"), 5),
+        ((slice(None), "a"), range(3)),
+        ((slice(None), "a"), [3, 2, 1]),
+    ],
+)
+def test_dataframe_setitem_loc_empty_df(key, value):
+    pdf, gdf = pd.DataFrame(), cudf.DataFrame()
+    pdf.loc[key] = value
+    gdf.loc[key] = value
+    assert_eq(pdf, gdf, check_dtype=False)
+
+
+def test_sliced_indexing():
+    a = list(range(4, 4 + 150))
+    b = list(range(0, 0 + 150))
+    pdf = pd.DataFrame({"a": a, "b": b})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    pdf = pdf.set_index("a")
+    gdf = gdf.set_index("a")
+    pidx = pdf.index[:75]
+    gidx = gdf.index[:75]
+
+    assert_eq(pdf.loc[pidx], gdf.loc[gidx])
+
+
+@pytest.mark.parametrize(
+    "sli",
+    [
+        slice("2001", "2002"),
+        slice("2002", "2001"),
+        slice("2001", None),
+    ],
+)
+@pytest.mark.parametrize("is_dataframe", [True, False])
+def test_loc_datetime_index(sli, is_dataframe):
+    sli = slice(pd.to_datetime(sli.start), pd.to_datetime(sli.stop))
+
+    if is_dataframe is True:
+        pd_data = pd.DataFrame(
+            {"a": [1, 2, 3]},
+            index=pd.Series(["2001", "2009", "2002"], dtype="datetime64[ns]"),
+        )
+    else:
+        pd_data = pd.Series(
+            [1, 2, 3],
+            pd.Series(["2001", "2009", "2002"], dtype="datetime64[ns]"),
+        )
+
+    gd_data = cudf.from_pandas(pd_data)
+    expect = pd_data.loc[sli]
+    got = gd_data.loc[sli]
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        (
+            ([0], ["x", "y"]),
+            [10, 20],
+        ),
+        (
+            ([0, 2], ["x", "y"]),
+            [[10, 30], [20, 40]],
+        ),
+        (
+            (0, ["x", "y"]),
+            [10, 20],
+        ),
+        (
+            ([0, 2], "x"),
+            [10, 20],
+        ),
+    ],
+)
+def test_dataframe_loc_inplace_update(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[key] = value
+    expected = pdf.loc[key] = value
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_loc_inplace_update_string_index():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=list("abc"))
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[["a"], ["x", "y"]] = [10, 20]
+    expected = pdf.loc[["a"], ["x", "y"]] = [10, 20]
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_loc_iloc_inplace_update_with_RHS_dataframe():
+    loc_key = ([0, 2], ["x", "y"])
+    iloc_key = [0, 2]
+    data = {"x": [10, 20], "y": [30, 40]}
+    index = [0, 2]
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[loc_key] = cudf.DataFrame(data, index=cudf.Index(index))
+    expected = pdf.loc[loc_key] = pd.DataFrame(data, index=pd.Index(index))
+    assert_eq(expected, actual)
+
+    actual = gdf.iloc[iloc_key] = cudf.DataFrame(data, index=cudf.Index(index))
+    expected = pdf.iloc[iloc_key] = pd.DataFrame(data, index=pd.Index(index))
+    assert_eq(expected, actual)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="No warning in older versions of pandas",
+)
+def test_dataframe_loc_inplace_update_with_invalid_RHS_df_columns():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[[0, 2], ["x", "y"]] = cudf.DataFrame(
+        {"b": [10, 20], "y": [30, 40]}, index=cudf.Index([0, 2])
+    )
+    with pytest.warns(FutureWarning):
+        # Seems to be a false warning from pandas,
+        # but nevertheless catching it.
+        expected = pdf.loc[[0, 2], ["x", "y"]] = pd.DataFrame(
+            {"b": [10, 20], "y": [30, 40]}, index=pd.Index([0, 2])
+        )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        (([0, 2], ["x", "y"]), [[10, 30, 50], [20, 40, 60]]),
+        (([0], ["x", "y"]), [[10], [20]]),
+    ],
+)
+def test_dataframe_loc_inplace_update_shape_mismatch(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.loc[key] = value
+
+
+def test_dataframe_loc_inplace_update_shape_mismatch_RHS_df():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.loc[([0, 2], ["x", "y"])] = cudf.DataFrame(
+            {"x": [10, 20]}, index=cudf.Index([0, 2])
+        )
+
+
+@pytest.mark.parametrize(
+    "end, second_dim, is_error",
+    [
+        (40, 2, False),
+        (50, 3, True),
+        (30, 1, False),
+    ],
+)
+@pytest.mark.parametrize("mod", [cp, np])
+def test_dataframe_indexing_setitem_np_cp_array(
+    end, second_dim, is_error, mod
+):
+    array = mod.arange(20, end).reshape(-1, second_dim)
+    gdf = cudf.DataFrame({"a": range(10), "b": range(10)})
+    pdf = gdf.to_pandas()
+    if not is_error:
+        gdf.loc[:, ["a", "b"]] = array
+        pdf.loc[:, ["a", "b"]] = cp.asnumpy(array)
+
+        assert_eq(gdf, pdf)
+    else:
+        assert_exceptions_equal(
+            lfunc=pdf.loc.__setitem__,
+            rfunc=gdf.loc.__setitem__,
+            lfunc_args_and_kwargs=(
+                [(slice(None, None, None), ["a", "b"]), cp.asnumpy(array)],
+                {},
+            ),
+            rfunc_args_and_kwargs=(
+                [(slice(None, None, None), ["a", "b"]), array],
+                {},
+            ),
+        )
+
+
+class TestLocIndexWithOrder:
+    # https://github.com/rapidsai/cudf/issues/12833
+    @pytest.fixture(params=["increasing", "decreasing", "neither"])
+    def order(self, request):
+        return request.param
+
+    @pytest.fixture(params=[-1, 1], ids=["reverse", "forward"])
+    def take_order(self, request):
+        return request.param
+
+    @pytest.fixture(params=["float", "int", "string", "range"])
+    def dtype(self, request):
+        return request.param
+
+    @pytest.fixture
+    def index(self, order, dtype):
+        if dtype == "string":
+            index = ["a", "h", "f", "z"]
+        elif dtype == "int":
+            index = [-1, 10, 7, 14]
+        elif dtype == "float":
+            index = [-1.5, 7.10, 2.4, 11.2]
+        elif dtype == "range":
+            if order == "increasing":
+                return cudf.RangeIndex(2, 10, 3)
+            elif order == "decreasing":
+                return cudf.RangeIndex(10, 1, -3)
+            else:
+                return cudf.RangeIndex(10, 20, 3)
+        else:
+            raise ValueError(f"Unhandled index dtype {dtype}")
+        if order == "decreasing":
+            return sorted(index, reverse=True)
+        elif order == "increasing":
+            return sorted(index)
+        elif order == "neither":
+            return index
+        else:
+            raise ValueError(f"Unhandled index order {order}")
+
+    @pytest.fixture
+    def df(self, index):
+        return cudf.DataFrame({"a": range(len(index))}, index=index)
+
+    def test_loc_index_inindex_slice(self, df, take_order):
+        pdf = df.to_pandas()
+        lo = pdf.index[1]
+        hi = pdf.index[-2]
+        expect = pdf.loc[lo:hi:take_order]
+        actual = df.loc[lo:hi:take_order]
+        assert_eq(expect, actual)
+
+    def test_loc_index_inindex_subset(self, df, take_order):
+        pdf = df.to_pandas()
+        vals = [pdf.index[0], pdf.index[2]][::take_order]
+        expect = pdf.loc[vals]
+        actual = df.loc[vals]
+        assert_eq(expect, actual)
+
+    def test_loc_index_notinindex_slice(self, df, order, dtype, take_order):
+        pdf = df.to_pandas()
+        lo = pdf.index[1]
+        hi = pdf.index[-2]
+        if isinstance(lo, str):
+            lo = chr(ord(lo) - 1)
+            hi = chr(ord(hi) + 1)
+        else:
+            lo -= 1
+            hi += 1
+        if order == "neither" and dtype != "range":
+            with pytest.raises(KeyError):
+                pdf.loc[lo:hi:take_order]
+            with pytest.raises(KeyError):
+                df.loc[lo:hi:take_order]
+        else:
+            expect = pdf.loc[lo:hi:take_order]
+            actual = df.loc[lo:hi:take_order]
+            assert_eq(expect, actual)
+
+
+def test_loc_single_row_from_slice():
+    # see https://github.com/rapidsai/cudf/issues/11930
+    pdf = pd.DataFrame({"a": [10, 20, 30], "b": [1, 2, 3]}).set_index("a")
+    df = cudf.from_pandas(pdf)
+    assert_eq(pdf.loc[5:10], df.loc[5:10])
+
+
+@pytest.mark.parametrize("indexer", ["loc", "iloc"])
+def test_boolean_mask_columns(indexer):
+    df = pd.DataFrame(np.zeros((3, 3)))
+    cdf = cudf.from_pandas(df)
+    mask = [True, False, True]
+    expect = getattr(df, indexer)[:, mask]
+    got = getattr(cdf, indexer)[:, mask]
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("indexer", ["loc", "iloc"])
+@pytest.mark.parametrize(
+    "mask",
+    [[False, True], [False, False, True, True, True]],
+    ids=["too-short", "too-long"],
+)
+def test_boolean_mask_columns_wrong_length(indexer, mask):
+    df = pd.DataFrame(np.zeros((3, 3)))
+    cdf = cudf.from_pandas(df)
+
+    with pytest.raises(IndexError):
+        getattr(df, indexer)[:, mask]
+    with pytest.raises(IndexError):
+        getattr(cdf, indexer)[:, mask]
+
+
+@pytest.mark.parametrize("index_type", ["single", "slice"])
+def test_loc_timestamp_issue_8585(index_type):
+    rng = np.random.default_rng(seed=0)
+    # https://github.com/rapidsai/cudf/issues/8585
+    start = pd.Timestamp("2021-03-12 00:00")
+    end = pd.Timestamp("2021-03-12 11:00")
+    timestamps = pd.date_range(start, end, periods=12)
+    value = rng.normal(size=12)
+    df = pd.DataFrame(value, index=timestamps, columns=["value"])
+    cdf = cudf.from_pandas(df)
+    if index_type == "single":
+        index = pd.Timestamp("2021-03-12 03:00")
+    elif index_type == "slice":
+        index = slice(start, end, None)
+    else:
+        raise ValueError("Invalid index type")
+    expect = df.loc[index]
+    actual = cdf.loc[index]
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize(
+    "index_type",
+    [
+        "single",
+        pytest.param(
+            "slice",
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/8585"
+            ),
+        ),
+        pytest.param(
+            "date_range",
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/8585"
+            ),
+        ),
+    ],
+)
+def test_loc_multiindex_timestamp_issue_8585(index_type):
+    rng = np.random.default_rng(seed=0)
+    # https://github.com/rapidsai/cudf/issues/8585
+    start = pd.Timestamp("2021-03-12 00:00")
+    end = pd.Timestamp("2021-03-12 03:00")
+    timestamps = pd.date_range(start, end, periods=4)
+    labels = ["A", "B", "C"]
+    index = pd.MultiIndex.from_product(
+        [timestamps, labels], names=["timestamp", "label"]
+    )
+    value = rng.normal(size=12)
+    df = pd.DataFrame(value, index=index, columns=["value"])
+    cdf = cudf.from_pandas(df)
+    start = pd.Timestamp("2021-03-12 01:00")
+    end = pd.Timestamp("2021-03-12 02:00")
+    if index_type == "single":
+        index = pd.Timestamp("2021-03-12 03:00")
+    elif index_type == "slice":
+        index = slice(start, end, None)
+    elif index_type == "date_range":
+        index = pd.date_range(start, end, periods=2)
+    else:
+        raise ValueError("Invalid index type")
+    expect = df.loc[index]
+    actual = cdf.loc[index]
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize(
+    "indexer", [(..., 0), (0, ...)], ids=["row_ellipsis", "column_ellipsis"]
+)
+def test_loc_ellipsis_as_slice_issue_13268(indexer):
+    # https://github.com/rapidsai/cudf/issues/13268
+    df = pd.DataFrame(np.arange(4).reshape(2, 2))
+    cdf = cudf.from_pandas(df)
+
+    expect = df.loc[indexer]
+    actual = cdf.loc[indexer]
+    assert_eq(expect, actual)
+
+
+@pytest.mark.xfail(
+    reason="https://github.com/rapidsai/cudf/issues/13269 "
+    "and https://github.com/rapidsai/cudf/issues/13273"
+)
+def test_loc_repeated_column_label_issue_13269():
+    # https://github.com/rapidsai/cudf/issues/13269
+    # https://github.com/rapidsai/cudf/issues/13273
+    df = pd.DataFrame(np.arange(4).reshape(2, 2))
+    cdf = cudf.from_pandas(df)
+
+    expect = df.loc[:, [0, 1, 0]]
+    actual = cdf.loc[:, [0, 1, 0]]
+    assert_eq(expect, actual)
+
+
+def test_loc_column_boolean_mask_issue_13270():
+    # https://github.com/rapidsai/cudf/issues/13270
+    df = pd.DataFrame(np.arange(4).reshape(2, 2))
+    cdf = cudf.from_pandas(df)
+    expect = df.loc[:, [True, True]]
+    actual = cdf.loc[:, [True, True]]
+    assert_eq(expect, actual)
+
+
+def test_loc_unsorted_index_slice_lookup_keyerror_issue_12833():
+    # https://github.com/rapidsai/cudf/issues/12833
+    df = pd.DataFrame({"a": [1, 2, 3]}, index=[7, 0, 4])
+    cdf = cudf.from_pandas(df)
+
+    # Check that pandas don't change their mind
+    with pytest.raises(KeyError):
+        df.loc[1:5]
+
+    with pytest.raises(KeyError):
+        cdf.loc[1:5]
+
+
+@pytest.mark.parametrize("index", [range(5), list(range(5))])
+def test_loc_missing_label_keyerror_issue_13379(index):
+    # https://github.com/rapidsai/cudf/issues/13379
+    df = pd.DataFrame({"a": index}, index=index)
+    cdf = cudf.from_pandas(df)
+    # Check that pandas don't change their mind
+    with pytest.raises(KeyError):
+        df.loc[[0, 5]]
+
+    with pytest.raises(KeyError):
+        cdf.loc[[0, 5]]
+
+
+@pytest.mark.parametrize("series", [True, False], ids=["Series", "DataFrame"])
+def test_loc_repeated_label_ordering_issue_13658(series):
+    # https://github.com/rapidsai/cudf/issues/13658
+    values = range(2048)
+    index = [1 for _ in values]
+    if series:
+        frame = cudf.Series(values, index=index)
+    else:
+        frame = cudf.DataFrame({"a": values}, index=index)
+    expect = frame.to_pandas().loc[[1]]
+    actual = frame.loc[[1]]
+    assert_eq(actual, expect)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        (2, ("one", "second")),
+        (slice(None, None, None), ("two", "first")),
+        (1, ("one", "first")),
+        (slice(None, None, None), ("two", "second")),
+        (slice(None, None, None), ("two", "first", "three")),
+        (3, ("two", "first", "three")),
+        (slice(None, None, None), ("two",)),
+        (0, ("two",)),
+    ],
+)
+def test_loc_dataframe_column_multiindex(arg):
+    gdf = cudf.DataFrame(
+        [list("abcd"), list("efgh"), list("ijkl"), list("mnop")],
+        columns=cudf.MultiIndex.from_product(
+            [["one", "two"], ["first", "second"], ["three"]]
+        ),
+    )
+    pdf = gdf.to_pandas()
+
+    assert_eq(gdf.loc[arg], pdf.loc[arg])
+
+
+def test_loc_setitem_categorical_integer_not_position_based():
+    gdf = cudf.DataFrame(range(3), index=cudf.CategoricalIndex([1, 2, 3]))
+    pdf = gdf.to_pandas()
+    gdf.loc[1] = 10
+    pdf.loc[1] = 10
+    assert_eq(gdf, pdf)
+
+
+def test_scalar_loc_row_categoricalindex():
+    df = cudf.DataFrame(
+        range(4), index=cudf.CategoricalIndex(["a", "a", "b", "c"])
+    )
+    result = df.loc["a"]
+    expected = df.to_pandas().loc["a"]
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("klass", [cudf.DataFrame, cudf.Series])
+@pytest.mark.parametrize("indexer", ["iloc", "loc"])
+def test_iloc_loc_no_circular_reference(klass, indexer):
+    obj = klass([0])
+    ref = weakref.ref(obj)
+    getattr(obj, indexer)[0]
+    del obj
+    assert ref() is None
+
+
+def test_loc_setitem_empty_dataframe():
+    pdf = pd.DataFrame(index=["index_1", "index_2", "index_3"])
+    gdf = cudf.from_pandas(pdf)
+    pdf.loc[["index_1"], "new_col"] = "A"
+    gdf.loc[["index_1"], "new_col"] = "A"
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [15, 14, 12, 10, 1],
+        [1, 10, 12, 14, 15],
+    ],
+)
+@pytest.mark.parametrize(
+    "scalar",
+    [
+        1,
+        10,
+        15,
+        14,
+        0,
+        2,
+    ],
+)
+def test_loc_datetime_monotonic_with_ts(data, scalar):
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
+        index=cudf.Index(data, dtype="datetime64[ns]"),
+    )
+    pdf = gdf.to_pandas()
+
+    i = pd.Timestamp(scalar)
+
+    actual = gdf.loc[i:]
+    expected = pdf.loc[i:]
+
+    assert_eq(actual, expected)
+
+    actual = gdf.loc[:i]
+    expected = pdf.loc[:i]
+
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("scalar", [1, 0])
+def test_loc_datetime_random_with_ts(scalar):
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
+        index=cudf.Index([15, 14, 3, 10, 1], dtype="datetime64[ns]"),
+    )
+    pdf = gdf.to_pandas()
+
+    i = pd.Timestamp(scalar)
+
+    if i not in pdf.index:
+        assert_exceptions_equal(
+            lambda: pdf.loc[i:],
+            lambda: gdf.loc[i:],
+            lfunc_args_and_kwargs=([],),
+            rfunc_args_and_kwargs=([],),
+        )
+        assert_exceptions_equal(
+            lambda: pdf.loc[:i],
+            lambda: gdf.loc[:i],
+            lfunc_args_and_kwargs=([],),
+            rfunc_args_and_kwargs=([],),
+        )
+    else:
+        actual = gdf.loc[i:]
+        expected = pdf.loc[i:]
+
+        assert_eq(actual, expected)
+
+        actual = gdf.loc[:i]
+        expected = pdf.loc[:i]
+
+        assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("indexer", ["iloc", "loc"])
+@pytest.mark.parametrize("dtype", ["category", "timedelta64[ns]"])
+def test_loc_iloc_setitem_col_slice_non_cupy_types(indexer, dtype):
+    df_pd = pd.DataFrame(range(2), dtype=dtype)
+    df_cudf = cudf.DataFrame.from_pandas(df_pd)
+    getattr(df_pd, indexer)[:, 0] = getattr(df_pd, indexer)[:, 0]
+    getattr(df_cudf, indexer)[:, 0] = getattr(df_cudf, indexer)[:, 0]
+    assert_eq(df_pd, df_cudf)
+
+
+@pytest.mark.parametrize("indexer", ["iloc", "loc"])
+@pytest.mark.parametrize(
+    "column_slice",
+    [
+        slice(None),
+        slice(0, 0),
+        slice(0, 1),
+        slice(1, 0),
+        slice(0, 2, 2),
+    ],
+)
+def test_slice_empty_columns(indexer, column_slice):
+    df_pd = pd.DataFrame(index=[0, 1, 2])
+    df_cudf = cudf.from_pandas(df_pd)
+    result = getattr(df_cudf, indexer)[:, column_slice]
+    expected = getattr(df_pd, indexer)[:, column_slice]
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
index f7874f5cd90..86ae7f007f8 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -1,4 +1,6 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import operator
+from contextlib import contextmanager
 
 import cupy as cp
 import numpy as np
@@ -7,6 +9,7 @@
 
 import cudf
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @pytest.mark.parametrize("arg", [[True, False, True], [True, True, True]])
@@ -209,3 +212,131 @@ def test_dataframe_cow_slice_setitem():
                 {"a": [10, 11, 12, 13, 14], "b": [20, 30, 40, 50, 60]}
             ),
         )
+
+
+def test_multiindex_row_shape():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(0, 5)))
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex([["a", "b", "c"]], [[0]])
+    pdfIndex.names = ["alpha"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+
+    assert_exceptions_equal(
+        lfunc=operator.setitem,
+        rfunc=operator.setitem,
+        lfunc_args_and_kwargs=([], {"a": pdf, "b": "index", "c": pdfIndex}),
+        rfunc_args_and_kwargs=([], {"a": gdf, "b": "index", "c": gdfIndex}),
+    )
+
+
+def test_multiindex_column_shape():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(5, 0)))
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex([["a", "b", "c"]], [[0]])
+    pdfIndex.names = ["alpha"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+
+    assert_exceptions_equal(
+        lfunc=operator.setitem,
+        rfunc=operator.setitem,
+        lfunc_args_and_kwargs=([], {"a": pdf, "b": "columns", "c": pdfIndex}),
+        rfunc_args_and_kwargs=([], {"a": gdf, "b": "columns", "c": gdfIndex}),
+    )
+
+
+@contextmanager
+def expect_pandas_performance_warning(idx):
+    with expect_warning_if(
+        (not isinstance(idx[0], tuple) and len(idx) > 2)
+        or (isinstance(idx[0], tuple) and len(idx[0]) > 2),
+        pd.errors.PerformanceWarning,
+    ):
+        yield
+
+
+@pytest.mark.parametrize(
+    "query",
+    [
+        ("a", "store", "clouds", "fire"),
+        ("a", "store", "storm", "smoke"),
+        ("a", "store"),
+        ("b", "house"),
+        ("a", "store", "storm"),
+        ("a",),
+        ("c", "forest", "clear"),
+    ],
+)
+def test_multiindex_columns(query):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdf = pdf.T
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    assert_eq(pdfIndex, gdfIndex)
+    pdf.columns = pdfIndex
+    gdf.columns = gdfIndex
+    # The index is unsorted, which makes things slow but is fine for testing.
+    with expect_pandas_performance_warning(query):
+        expected = pdf[query]
+    got = gdf[query]
+    assert_eq(expected, got)
+
+
+@pytest.mark.xfail(
+    reason="https://github.com/pandas-dev/pandas/issues/43351",
+)
+def test_multicolumn_set_item():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    pdf = pdf.T
+    pdf.columns = pdfIndex
+    gdf = cudf.from_pandas(pdf)
+    pdf["d"] = [1, 2, 3, 4, 5]
+    gdf["d"] = [1, 2, 3, 4, 5]
+    assert_eq(pdf, gdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py b/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py
index 3277b80ca1c..7e9b8f1756f 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_head_tail.py
@@ -78,3 +78,21 @@ def test_no_cols_head(index):
     head_pdf = pdf.head()
 
     assert_eq(head_pdf, head_gdf)
+
+
+@pytest.mark.parametrize("n", [-10, -2, 0, 1])
+def test_empty_df_head_tail_index(n):
+    df = cudf.DataFrame()
+    pdf = pd.DataFrame()
+    assert_eq(df.head(n).index.values, pdf.head(n).index.values)
+    assert_eq(df.tail(n).index.values, pdf.tail(n).index.values)
+
+    df = cudf.DataFrame({"a": [11, 2, 33, 44, 55]})
+    pdf = pd.DataFrame({"a": [11, 2, 33, 44, 55]})
+    assert_eq(df.head(n).index.values, pdf.head(n).index.values)
+    assert_eq(df.tail(n).index.values, pdf.tail(n).index.values)
+
+    df = cudf.DataFrame(index=[1, 2, 3])
+    pdf = pd.DataFrame(index=[1, 2, 3])
+    assert_eq(df.head(n).index.values, pdf.head(n).index.values)
+    assert_eq(df.tail(n).index.values, pdf.tail(n).index.values)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_set_index.py b/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
index b9727941e0f..65969892879 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_set_index.py
@@ -118,3 +118,66 @@ def test_df_cat_set_index():
     expect = pddf.set_index("a")
 
     assert_eq(got, expect)
+
+
+def test_df_set_index_from_series():
+    df = cudf.DataFrame(
+        {
+            "a": list(range(10)),
+            "b": list(range(0, 20, 2)),
+        }
+    )
+
+    # Check set_index(Series)
+    df2 = df.set_index(df["b"])
+    assert list(df2.columns) == ["a", "b"]
+    sliced_strided = df2.loc[2:6]
+    assert len(sliced_strided) == 3
+    assert list(sliced_strided.index.values) == [2, 4, 6]
+
+
+def test_df_set_index_from_name():
+    df = cudf.DataFrame(
+        {
+            "a": list(range(10)),
+            "b": list(range(0, 20, 2)),
+        }
+    )
+
+    # Check set_index(column_name)
+    df2 = df.set_index("b")
+    # 1 less column because 'b' is used as index
+    assert list(df2.columns) == ["a"]
+    sliced_strided = df2.loc[2:6]
+    assert len(sliced_strided) == 3
+    assert list(sliced_strided.index.values) == [2, 4, 6]
+
+
+def test_categorical_index():
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "index": pd.Categorical(["a", "b", "c"])}
+    )
+    initial_df = cudf.from_pandas(pdf)
+    pdf = pdf.set_index("index")
+    gdf1 = cudf.from_pandas(pdf)
+    gdf2 = cudf.DataFrame(
+        {"a": [1, 2, 3], "index": pd.Categorical(["a", "b", "c"])}
+    )
+    assert_eq(initial_df.index, gdf2.index)
+    gdf2 = gdf2.set_index("index")
+
+    assert isinstance(gdf1.index, cudf.CategoricalIndex)
+    assert_eq(pdf, gdf1)
+    assert_eq(pdf.index, gdf1.index)
+    assert_eq(
+        pdf.index.codes,
+        gdf1.index.codes.astype(pdf.index.codes.dtype).to_numpy(),
+    )
+
+    assert isinstance(gdf2.index, cudf.CategoricalIndex)
+    assert_eq(pdf, gdf2)
+    assert_eq(pdf.index, gdf2.index)
+    assert_eq(
+        pdf.index.codes,
+        gdf2.index.codes.astype(pdf.index.codes.dtype).to_numpy(),
+    )
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
index ece422f7822..b75efb35c81 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_transpose.py
@@ -135,3 +135,35 @@ def test_dataframe_transpose_category():
 
     assert_eq(expect, got_function.to_pandas())
     assert_eq(expect, got_property.to_pandas())
+
+
+def test_transpose_multiindex_columns_from_pandas():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    gdf = cudf.from_pandas(pdf)
+    pdfIndex = pd.MultiIndex(
+        [
+            ["a", "b", "c"],
+            ["house", "store", "forest"],
+            ["clouds", "clear", "storm"],
+            ["fire", "smoke", "clear"],
+            [
+                np.datetime64("2001-01-01", "ns"),
+                np.datetime64("2002-01-01", "ns"),
+                np.datetime64("2003-01-01", "ns"),
+            ],
+        ],
+        [
+            [0, 0, 0, 0, 1, 1, 2],
+            [1, 1, 1, 1, 0, 0, 2],
+            [0, 0, 2, 2, 2, 0, 1],
+            [0, 0, 0, 1, 2, 0, 1],
+            [1, 0, 1, 2, 0, 0, 1],
+        ],
+    )
+    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
+    gdfIndex = cudf.from_pandas(pdfIndex)
+    pdf.index = pdfIndex
+    gdf.index = gdfIndex
+    assert_eq(gdf, pdf)
+    assert_eq(gdf.T, pdf.T)
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
index dd0ecbf5d61..255b7756bd1 100644
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -326,3 +326,78 @@ def test_string_index(index):
     pdf.index = index
     gdf.index = index
     assert_eq(pdf, gdf)
+
+
+def test_set_index_as_property():
+    cdf = cudf.DataFrame()
+    col1 = np.arange(10)
+    col2 = np.arange(0, 20, 2)
+    cdf["a"] = col1
+    cdf["b"] = col2
+
+    # Check set_index(Series)
+    cdf.index = cdf["b"]
+
+    assert_eq(cdf.index.to_numpy(), col2)
+
+    with pytest.raises(ValueError):
+        cdf.index = [list(range(10))]
+
+    idx = pd.Index(np.arange(0, 1000, 100))
+    cdf.index = idx
+    assert_eq(cdf.index.to_pandas(), idx)
+
+    df = cdf.to_pandas()
+    assert_eq(df.index, idx)
+
+    head = cdf.head().to_pandas()
+    assert_eq(head.index, idx[:5])
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        lambda: cudf.Index([1]),
+        lambda: cudf.RangeIndex(1),
+        lambda: cudf.MultiIndex(levels=[[0]], codes=[[0]]),
+    ],
+)
+def test_index_assignment_no_shallow_copy(index):
+    index = index()
+    df = cudf.DataFrame(range(1))
+    df.index = index
+    assert df.index is index
+
+
+def test_multiindex_df_assignment():
+    pdf = pd.DataFrame({"x": [1, 2, 3]})
+    gdf = cudf.from_pandas(pdf)
+    pdf.index = pd.MultiIndex([["a", "b"], ["c", "d"]], [[0, 1, 0], [1, 0, 1]])
+    gdf.index = cudf.MultiIndex(
+        levels=[["a", "b"], ["c", "d"]], codes=[[0, 1, 0], [1, 0, 1]]
+    )
+    assert_eq(pdf, gdf)
+
+
+def test_multiindex_index_and_columns():
+    rng = np.random.default_rng(seed=0)
+    gdf = cudf.DataFrame(
+        {
+            "x": rng.integers(0, 5, 5),
+            "y": rng.integers(0, 5, 5),
+        }
+    )
+    pdf = gdf.to_pandas()
+    mi = cudf.MultiIndex(
+        levels=[[0, 1, 2], [3, 4]],
+        codes=[[0, 0, 1, 1, 2], [0, 1, 0, 1, 1]],
+        names=["x", "y"],
+    )
+    gdf.index = mi
+    mc = cudf.MultiIndex(
+        levels=[["val"], ["mean", "min"]], codes=[[0, 0], [0, 1]]
+    )
+    gdf.columns = mc
+    pdf.index = mi.to_pandas()
+    pdf.columns = mc.to_pandas()
+    assert_eq(pdf, gdf)
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index 21fb39059e8..e92a9d4f583 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -1904,3 +1904,24 @@ def test_numpy_non_contiguous():
 
     gdf = cudf.DataFrame.from_records(rec, index="index")
     assert_eq(aa, gdf["a"].values)
+
+
+def test_from_pandas_series():
+    pdf = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
+    ).set_index(["a", "b"])
+
+    result = cudf.from_pandas(pdf)
+    assert_eq(pdf, result)
+
+    test_pdf = pdf["c"]
+    result = cudf.from_pandas(test_pdf)
+    assert_eq(test_pdf, result)
+
+
+def test_from_pandas_with_multiindex():
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(rng.random(size=(7, 5)))
+    pdf.index = pd.MultiIndex.from_arrays([range(7)])
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf, gdf)
diff --git a/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py b/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py
index af39d17864a..a99ddc01650 100644
--- a/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py
+++ b/python/cudf/cudf/tests/indexes/rangeindex/test_binops.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import operator
 
 import pytest
 
@@ -31,3 +32,15 @@ def test_rangeindex_binops_user_option(
         expected,
         actual,
     )
+
+
+@pytest.mark.parametrize(
+    "op", [operator.add, operator.sub, operator.mul, operator.truediv]
+)
+def test_rangeindex_binop_diff_names_none(op):
+    idx1 = cudf.RangeIndex(10, 13, name="foo")
+    idx2 = cudf.RangeIndex(13, 16, name="bar")
+    result = op(idx1, idx2)
+    expected = op(idx1.to_pandas(), idx2.to_pandas())
+    assert_eq(result, expected)
+    assert result.name is None
diff --git a/python/cudf/cudf/tests/reshape/test_concat.py b/python/cudf/cudf/tests/reshape/test_concat.py
index 7cdd40f68bc..3b16d8643b9 100644
--- a/python/cudf/cudf/tests/reshape/test_concat.py
+++ b/python/cudf/cudf/tests/reshape/test_concat.py
@@ -2861,3 +2861,22 @@ def test_concat_with_axis():
         pd.concat([pdgz2, pdgz1]),
         check_index_type=True,
     )
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)],
+        [pd.RangeIndex(10, 20), pd.RangeIndex(22, 40), pd.RangeIndex(50, 60)],
+        [pd.RangeIndex(10, 20, 2), pd.RangeIndex(20, 40, 2)],
+    ],
+)
+def test_range_index_concat(objs):
+    cudf_objs = [cudf.from_pandas(obj) for obj in objs]
+
+    actual = cudf.concat(cudf_objs)
+
+    expected = objs[0]
+    for obj in objs[1:]:
+        expected = expected.append(obj)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/series/indexing/test_getitem.py b/python/cudf/cudf/tests/series/indexing/test_getitem.py
index 2909980bb0e..119183cc431 100644
--- a/python/cudf/cudf/tests/series/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_getitem.py
@@ -1,12 +1,15 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @pytest.mark.parametrize(
@@ -265,3 +268,209 @@ def test_categorical_masking():
     assert len(expect_masked) == len(got_masked)
     assert got_masked.null_count == 0
     assert_eq(got_masked, expect_masked)
+
+
+@pytest.mark.parametrize(
+    "i1, i2, i3",
+    (
+        [
+            (slice(None, 12), slice(3, None), slice(None, None, 2)),
+            (range(12), range(3, 12), range(0, 9, 2)),
+            (np.arange(12), np.arange(3, 12), np.arange(0, 9, 2)),
+            (list(range(12)), list(range(3, 12)), list(range(0, 9, 2))),
+            (
+                pd.Series(range(12)),
+                pd.Series(range(3, 12)),
+                pd.Series(range(0, 9, 2)),
+            ),
+            (
+                cudf.Series(range(12)),
+                cudf.Series(range(3, 12)),
+                cudf.Series(range(0, 9, 2)),
+            ),
+            (
+                [i in range(12) for i in range(20)],
+                [i in range(3, 12) for i in range(12)],
+                [i in range(0, 9, 2) for i in range(9)],
+            ),
+            (
+                np.array([i in range(12) for i in range(20)], dtype=bool),
+                np.array([i in range(3, 12) for i in range(12)], dtype=bool),
+                np.array([i in range(0, 9, 2) for i in range(9)], dtype=bool),
+            ),
+        ]
+    ),
+    ids=(
+        [
+            "slice",
+            "range",
+            "numpy.array",
+            "list",
+            "pandas.Series",
+            "Series",
+            "list[bool]",
+            "numpy.array[bool]",
+        ]
+    ),
+)
+def test_series_indexing(i1, i2, i3):
+    a1 = np.arange(20)
+    series = cudf.Series(a1)
+
+    # Indexing
+    sr1 = series.iloc[i1]
+    assert sr1.null_count == 0
+    np.testing.assert_equal(sr1.to_numpy(), a1[:12])
+
+    sr2 = sr1.iloc[i2]
+    assert sr2.null_count == 0
+    np.testing.assert_equal(sr2.to_numpy(), a1[3:12])
+
+    # Index with stride
+    sr3 = sr2.iloc[i3]
+    assert sr3.null_count == 0
+    np.testing.assert_equal(sr3.to_numpy(), a1[3:12:2])
+
+    # Integer indexing
+    if isinstance(i1, range):
+        for i in i1:  # Python int-s
+            assert series[i] == a1[i]
+    if isinstance(i1, np.ndarray) and i1.dtype == "i":
+        for i in i1:  # numpy integers
+            assert series[i] == a1[i]
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "arg",
+    [
+        1,
+        -1,
+        "b",
+        np.int32(1),
+        np.uint32(1),
+        np.int8(1),
+        np.uint8(1),
+        np.int16(1),
+        np.uint16(1),
+        np.int64(1),
+        np.uint64(1),
+    ],
+)
+def test_series_get_item_iloc_defer(arg):
+    # Indexing for non-numeric dtype Index
+    ps = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "c"]))
+    gs = cudf.from_pandas(ps)
+
+    arg_not_str = not isinstance(arg, str)
+    with expect_warning_if(arg_not_str):
+        expect = ps[arg]
+    with expect_warning_if(arg_not_str):
+        got = gs[arg]
+
+    assert_eq(expect, got)
+
+
+def test_series_indexing_large_size():
+    n_elem = 100_000
+    gsr = cudf.Series(cp.ones(n_elem))
+    gsr[0] = None
+    got = gsr[gsr.isna()]
+    expect = cudf.Series([None], dtype="float64")
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("psr", [pd.Series([1, 2, 3], index=["a", "b", "c"])])
+@pytest.mark.parametrize(
+    "arg", ["b", ["a", "c"], slice(1, 2, 1), [True, False, True]]
+)
+def test_series_get_item(psr, arg):
+    gsr = cudf.from_pandas(psr)
+
+    expect = psr[arg]
+    got = gsr[arg]
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4],
+        [1.0, 2.0, 3.0, 4.0],
+        ["one", "two", "three", "four"],
+        pd.Series(["a", "b", "c", "d"], dtype="category"),
+        pd.Series(pd.date_range("2010-01-01", "2010-01-04")),
+    ],
+)
+@pytest.mark.parametrize(
+    "mask_vals",
+    [
+        [True, True, True, True],
+        [False, False, False, False],
+        [True, False, True, False],
+        [True, False, False, True],
+    ],
+)
+@pytest.mark.parametrize(
+    "mask_class", [list, np.array, pd.Series, cudf.Series]
+)
+@pytest.mark.parametrize("nulls", ["one", "some", "all", "none"])
+def test_series_apply_boolean_mask(data, mask_vals, mask_class, nulls):
+    rng = np.random.default_rng(seed=0)
+    psr = pd.Series(data)
+
+    if len(data) > 0:
+        if nulls == "one":
+            p = rng.integers(0, 4)
+            psr[p] = None
+        elif nulls == "some":
+            p1, p2 = rng.integers(0, 4, (2,))
+            psr[p1] = None
+            psr[p2] = None
+        elif nulls == "all":
+            psr[:] = None
+
+    gsr = cudf.from_pandas(psr)
+
+    # TODO: from_pandas(psr) has dtype "float64"
+    # when psr has dtype "object" and is all None
+    if psr.dtype == "object" and nulls == "all":
+        gsr = cudf.Series([None, None, None, None], dtype="object")
+
+    mask = mask_class(mask_vals)
+    if isinstance(mask, cudf.Series):
+        expect = psr[mask.to_pandas()]
+    else:
+        expect = psr[mask]
+    got = gsr[mask]
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("key", [5, -10, "0", "a", np.array(5), np.array("a")])
+def test_loc_bad_key_type(key):
+    psr = pd.Series([1, 2, 3])
+    gsr = cudf.from_pandas(psr)
+    assert_exceptions_equal(lambda: psr[key], lambda: gsr[key])
+    assert_exceptions_equal(lambda: psr.loc[key], lambda: gsr.loc[key])
+
+
+@pytest.mark.parametrize("key", ["b", 1.0, np.array("b")])
+def test_loc_bad_key_type_string_index(key):
+    psr = pd.Series([1, 2, 3], index=["a", "1", "c"])
+    gsr = cudf.from_pandas(psr)
+    assert_exceptions_equal(lambda: psr[key], lambda: gsr[key])
+    assert_exceptions_equal(lambda: psr.loc[key], lambda: gsr.loc[key])
+
+
+def test_loc_zero_dim_array():
+    psr = pd.Series([1, 2, 3])
+    gsr = cudf.from_pandas(psr)
+
+    assert_eq(psr[np.array(0)], gsr[np.array(0)])
+    assert_eq(psr[np.array([0])[0]], gsr[np.array([0])[0]])
diff --git a/python/cudf/cudf/tests/series/indexing/test_iloc.py b/python/cudf/cudf/tests/series/indexing/test_iloc.py
index 838091cff3e..4bee5a1be5c 100644
--- a/python/cudf/cudf/tests/series/indexing/test_iloc.py
+++ b/python/cudf/cudf/tests/series/indexing/test_iloc.py
@@ -7,7 +7,9 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 def test_series_setitem_singleton_range():
@@ -93,3 +95,244 @@ def test_string_bool_mask(data, bool_, box):
         pa.Array.equals(expect, got)
     else:
         assert expect == got
+
+
+def test_series_iloc():
+    # create random cudf.Series
+    nelem = 20
+    rng = np.random.default_rng(seed=0)
+    ps = pd.Series(rng.random(nelem))
+
+    # gpu cudf.Series
+    gs = cudf.Series(ps)
+
+    # positive tests for indexing
+    np.testing.assert_allclose(gs.iloc[-1 * nelem], ps.iloc[-1 * nelem])
+    np.testing.assert_allclose(gs.iloc[-1], ps.iloc[-1])
+    np.testing.assert_allclose(gs.iloc[0], ps.iloc[0])
+    np.testing.assert_allclose(gs.iloc[1], ps.iloc[1])
+    np.testing.assert_allclose(gs.iloc[nelem - 1], ps.iloc[nelem - 1])
+
+    # positive tests for slice
+    np.testing.assert_allclose(gs.iloc[-1:1].to_numpy(), ps.iloc[-1:1])
+    np.testing.assert_allclose(
+        gs.iloc[nelem - 1 : -1].to_numpy(), ps.iloc[nelem - 1 : -1]
+    )
+    np.testing.assert_allclose(
+        gs.iloc[0 : nelem - 1].to_pandas(), ps.iloc[0 : nelem - 1]
+    )
+    np.testing.assert_allclose(gs.iloc[0:nelem].to_pandas(), ps.iloc[0:nelem])
+    np.testing.assert_allclose(gs.iloc[1:1].to_pandas(), ps.iloc[1:1])
+    np.testing.assert_allclose(gs.iloc[1:2].to_pandas(), ps.iloc[1:2].values)
+    np.testing.assert_allclose(
+        gs.iloc[nelem - 1 : nelem + 1].to_pandas(),
+        ps.iloc[nelem - 1 : nelem + 1],
+    )
+    np.testing.assert_allclose(
+        gs.iloc[nelem : nelem * 2].to_pandas(), ps.iloc[nelem : nelem * 2]
+    )
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (0, 4),
+        (1, 4),
+        ([0, 1], 4),
+        ([0, 1], [4, 5]),
+        (slice(0, 2), [4, 5]),
+        (slice(1, None), [4, 5, 6, 7]),
+        ([], 1),
+        ([], []),
+        (slice(None, None), 1),
+        (slice(-1, -3), 7),
+    ],
+)
+@pytest.mark.parametrize("nulls", ["none", "some", "all"])
+def test_series_setitem_iloc(key, value, nulls):
+    psr = pd.Series([1, 2, 3, 4, 5])
+    if nulls == "some":
+        psr[[0, 4]] = None
+    elif nulls == "all":
+        psr[:] = None
+    gsr = cudf.from_pandas(psr)
+    with expect_warning_if(
+        isinstance(value, list) and len(value) == 0 and nulls == "none"
+    ):
+        psr.iloc[key] = value
+    with expect_warning_if(
+        isinstance(value, list) and len(value) == 0 and not len(key) == 0
+    ):
+        gsr.iloc[key] = value
+    assert_eq(psr, gsr, check_dtype=False)
+
+
+def test_iloc_negative_indices():
+    psr = pd.Series([1, 2, 3, 4, 5])
+    gsr = cudf.from_pandas(psr)
+    assert_eq(psr.iloc[[-1, -2, -4]], gsr.iloc[[-1, -2, -4]])
+
+
+def test_out_of_bounds_indexing_empty():
+    psr = pd.Series(dtype="int64")
+    gsr = cudf.from_pandas(psr)
+    assert_exceptions_equal(
+        lambda: psr.iloc.__setitem__(-1, 2),
+        lambda: gsr.iloc.__setitem__(-1, 2),
+    )
+    assert_exceptions_equal(
+        lambda: psr.iloc.__setitem__(1, 2),
+        lambda: gsr.iloc.__setitem__(1, 2),
+    )
+
+
+@pytest.mark.parametrize(
+    "gdf",
+    [
+        lambda: cudf.DataFrame({"a": range(10000)}),
+        lambda: cudf.DataFrame(
+            {
+                "a": range(10000),
+                "b": range(10000),
+                "c": range(10000),
+                "d": range(10000),
+                "e": range(10000),
+                "f": range(10000),
+            }
+        ),
+        lambda: cudf.DataFrame({"a": range(20), "b": range(20)}),
+        lambda: cudf.DataFrame(
+            {
+                "a": range(20),
+                "b": range(20),
+                "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
+            }
+        ),
+        lambda: cudf.DataFrame(index=[1, 2, 3]),
+        lambda: cudf.DataFrame(index=range(10000)),
+        lambda: cudf.DataFrame(columns=["a", "b", "c", "d"]),
+        lambda: cudf.DataFrame(columns=["a"], index=range(10000)),
+        lambda: cudf.DataFrame(
+            columns=["a", "col2", "...col n"], index=range(10000)
+        ),
+        lambda: cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
+        lambda: cudf.DataFrame(
+            columns=["a", "b", "c", "d"],
+            index=cudf.Series(range(10000)).astype("str"),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "slice",
+    [slice(6), slice(1), slice(7), slice(1, 3)],
+)
+def test_dataframe_iloc_index(gdf, slice):
+    gdf = gdf()
+    pdf = gdf.to_pandas()
+
+    actual = gdf.iloc[:, slice]
+    expected = pdf.iloc[:, slice]
+
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[0], [1], [2]],
+        [[0, 1], [2, 3], [4, 5]],
+        [[[0, 1], [2]], [[3, 4]], [[5, 6]]],
+        [None, [[0, 1], [2]], [[3, 4], [5, 6]]],
+        [[], [[0, 1], [2]], [[3, 4], [5, 6]]],
+        [[], [["a", "b"], None], [["c", "d"], []]],
+    ],
+)
+@pytest.mark.parametrize(
+    "key", [[], [0], [0, 1], [0, 1, 0], slice(None), slice(0, 2), slice(1, 3)]
+)
+def test_iloc_with_lists(data, key):
+    psr = pd.Series(data)
+    gsr = cudf.Series(data)
+    assert_eq(psr.iloc[key], gsr.iloc[key])
+
+    pdf = pd.DataFrame({"a": data, "b": data})
+    gdf = cudf.DataFrame({"a": data, "b": data})
+    assert_eq(pdf.iloc[key], gdf.iloc[key])
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        slice(None, None, -1),
+        slice(None, -1, -1),
+        slice(4, -1, -1),
+        slice(None, None, -3),
+        slice(None, -1, -3),
+        slice(4, -1, -3),
+    ],
+)
+@pytest.mark.parametrize(
+    "pobj", [pd.DataFrame({"a": [1, 2, 3, 4, 5]}), pd.Series([1, 2, 3, 4, 5])]
+)
+def test_iloc_before_zero_terminate(arg, pobj):
+    gobj = cudf.from_pandas(pobj)
+
+    assert_eq(pobj.iloc[arg], gobj.iloc[arg])
+
+
+def test_iloc_decimal():
+    sr = cudf.Series(["1.00", "2.00", "3.00", "4.00"]).astype(
+        cudf.Decimal64Dtype(scale=2, precision=3)
+    )
+    got = sr.iloc[[3, 2, 1, 0]]
+    expect = cudf.Series(
+        ["4.00", "3.00", "2.00", "1.00"],
+    ).astype(cudf.Decimal64Dtype(scale=2, precision=3))
+    assert_eq(expect.reset_index(drop=True), got.reset_index(drop=True))
+
+
+@pytest.mark.parametrize("indexer", [[1], [0, 2]])
+def test_iloc_integer_categorical_issue_13013(indexer):
+    # https://github.com/rapidsai/cudf/issues/13013
+    s = pd.Series([0, 1, 2])
+    index = pd.Categorical(indexer)
+    expect = s.iloc[index]
+    c = cudf.from_pandas(s)
+    actual = c.iloc[index]
+    assert_eq(expect, actual)
+
+
+def test_iloc_incorrect_boolean_mask_length_issue_13015():
+    # https://github.com/rapidsai/cudf/issues/13015
+    s = pd.Series([0, 1, 2])
+    with pytest.raises(IndexError):
+        s.iloc[[True, False]]
+    c = cudf.from_pandas(s)
+    with pytest.raises(IndexError):
+        c.iloc[[True, False]]
+
+
+@pytest.mark.parametrize("typ", ["datetime64[ns]", "timedelta64[ns]"])
+@pytest.mark.parametrize("idx_method, key", [["iloc", 0], ["loc", "a"]])
+def test_series_iloc_scalar_datetimelike_return_pd_scalar(
+    typ, idx_method, key
+):
+    obj = cudf.Series([1, 2, 3], index=list("abc"), dtype=typ)
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(obj, idx_method)[key]
+    expected = getattr(obj.to_pandas(), idx_method)[key]
+    assert result == expected
+
+
+@pytest.mark.parametrize("idx_method, key", [["iloc", 0], ["loc", "a"]])
+def test_series_iloc_scalar_interval_return_pd_scalar(idx_method, key):
+    iidx = cudf.IntervalIndex.from_breaks([1, 2, 3])
+    obj = cudf.Series(iidx, index=list("ab"))
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = getattr(obj, idx_method)[key]
+    expected = getattr(obj.to_pandas(), idx_method)[key]
+    assert result == expected
diff --git a/python/cudf/cudf/tests/series/indexing/test_loc.py b/python/cudf/cudf/tests/series/indexing/test_loc.py
index da67da0699e..0059e64a466 100644
--- a/python/cudf/cudf/tests/series/indexing/test_loc.py
+++ b/python/cudf/cudf/tests/series/indexing/test_loc.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -34,3 +36,380 @@ def test_series_set_item_index_reference():
 
     assert_eq(ps1, gs1)
     assert_eq(ps2, gs2)
+
+
+def test_series_loc_numerical():
+    ps = pd.Series([1, 2, 3, 4, 5], index=[5, 6, 7, 8, 9])
+    gs = cudf.Series.from_pandas(ps)
+
+    assert_eq(ps.loc[5], gs.loc[5])
+    assert_eq(ps.loc[6], gs.loc[6])
+    assert_eq(ps.loc[6:8], gs.loc[6:8])
+    assert_eq(ps.loc[:8], gs.loc[:8])
+    assert_eq(ps.loc[6:], gs.loc[6:])
+    assert_eq(ps.loc[::2], gs.loc[::2])
+    assert_eq(ps.loc[[5, 8, 9]], gs.loc[[5, 8, 9]])
+    assert_eq(
+        ps.loc[[True, False, True, False, True]],
+        gs.loc[[True, False, True, False, True]],
+    )
+    assert_eq(ps.loc[[5, 8, 9]], gs.loc[cp.array([5, 8, 9])])
+
+
+def test_series_loc_float_index():
+    ps = pd.Series([1, 2, 3, 4, 5], index=[5.43, 6.34, 7.34, 8.0, 9.1])
+    gs = cudf.Series.from_pandas(ps)
+
+    assert_eq(ps.loc[5.43], gs.loc[5.43])
+    assert_eq(ps.loc[8], gs.loc[8])
+    assert_eq(ps.loc[6.1:8], gs.loc[6.1:8])
+    assert_eq(ps.loc[:7.1], gs.loc[:7.1])
+    assert_eq(ps.loc[6.345:], gs.loc[6.345:])
+    assert_eq(ps.loc[::2], gs.loc[::2])
+    assert_eq(
+        ps.loc[[True, False, True, False, True]],
+        gs.loc[[True, False, True, False, True]],
+    )
+
+
+def test_series_loc_string():
+    ps = pd.Series(
+        [1, 2, 3, 4, 5], index=["one", "two", "three", "four", "five"]
+    )
+    gs = cudf.Series.from_pandas(ps)
+
+    assert_eq(ps.loc["one"], gs.loc["one"])
+    assert_eq(ps.loc["five"], gs.loc["five"])
+    assert_eq(ps.loc["two":"four"], gs.loc["two":"four"])
+    assert_eq(ps.loc[:"four"], gs.loc[:"four"])
+    assert_eq(ps.loc["two":], gs.loc["two":])
+    assert_eq(ps.loc[::2], gs.loc[::2])
+    assert_eq(ps.loc[["one", "four", "five"]], gs.loc[["one", "four", "five"]])
+    assert_eq(
+        ps.loc[[True, False, True, False, True]],
+        gs.loc[[True, False, True, False, True]],
+    )
+
+
+def test_series_loc_datetime():
+    ps = pd.Series(
+        [1, 2, 3, 4, 5], index=pd.date_range("20010101", "20010105")
+    )
+    gs = cudf.Series.from_pandas(ps)
+
+    # a few different ways of specifying a datetime label:
+    assert_eq(ps.loc["20010101"], gs.loc["20010101"])
+    assert_eq(ps.loc["2001-01-01"], gs.loc["2001-01-01"])
+    assert_eq(
+        ps.loc[pd.to_datetime("2001-01-01")],
+        gs.loc[pd.to_datetime("2001-01-01")],
+    )
+    assert_eq(
+        ps.loc[np.datetime64("2001-01-01")],
+        gs.loc[np.datetime64("2001-01-01")],
+    )
+
+    assert_eq(
+        ps.loc["2001-01-02":"2001-01-05"],
+        gs.loc["2001-01-02":"2001-01-05"],
+        check_freq=False,
+    )
+    assert_eq(ps.loc["2001-01-02":], gs.loc["2001-01-02":], check_freq=False)
+    assert_eq(ps.loc[:"2001-01-04"], gs.loc[:"2001-01-04"], check_freq=False)
+    assert_eq(ps.loc[::2], gs.loc[::2], check_freq=False)
+
+    assert_eq(
+        ps.loc[["2001-01-01", "2001-01-04", "2001-01-05"]],
+        gs.loc[["2001-01-01", "2001-01-04", "2001-01-05"]],
+    )
+
+    assert_eq(
+        ps.loc[
+            [
+                pd.to_datetime("2001-01-01"),
+                pd.to_datetime("2001-01-04"),
+                pd.to_datetime("2001-01-05"),
+            ]
+        ],
+        gs.loc[
+            [
+                pd.to_datetime("2001-01-01"),
+                pd.to_datetime("2001-01-04"),
+                pd.to_datetime("2001-01-05"),
+            ]
+        ],
+    )
+    assert_eq(
+        ps.loc[[True, False, True, False, True]],
+        gs.loc[[True, False, True, False, True]],
+        check_freq=False,
+    )
+
+    just_less_than_max = ps.index.max() - pd.Timedelta("5m")
+
+    assert_eq(
+        ps.loc[:just_less_than_max],
+        gs.loc[:just_less_than_max],
+        check_freq=False,
+    )
+
+
+def test_series_loc_categorical():
+    ps = pd.Series(
+        [1, 2, 3, 4, 5], index=pd.Categorical(["a", "b", "c", "d", "e"])
+    )
+    gs = cudf.Series.from_pandas(ps)
+
+    assert_eq(ps.loc["a"], gs.loc["a"])
+    assert_eq(ps.loc["e"], gs.loc["e"])
+    assert_eq(ps.loc["b":"d"], gs.loc["b":"d"])
+    assert_eq(ps.loc[:"d"], gs.loc[:"d"])
+    assert_eq(ps.loc["b":], gs.loc["b":])
+    assert_eq(ps.loc[::2], gs.loc[::2])
+
+    # order of categories changes, so we can only
+    # compare values:
+    assert_eq(
+        ps.loc[["a", "d", "e"]].values, gs.loc[["a", "d", "e"]].to_numpy()
+    )
+
+    assert_eq(
+        ps.loc[[True, False, True, False, True]],
+        gs.loc[[True, False, True, False, True]],
+    )
+
+
+@pytest.mark.parametrize(
+    "obj",
+    [
+        pd.DataFrame(
+            {"a": [1, 2, 3, 4]},
+            index=pd.MultiIndex.from_frame(
+                pd.DataFrame(
+                    {"A": [2, 3, 1, 4], "B": ["low", "high", "high", "low"]}
+                )
+            ),
+        ),
+        pd.Series(
+            [1, 2, 3, 4],
+            index=pd.MultiIndex.from_frame(
+                pd.DataFrame(
+                    {"A": [2, 3, 1, 4], "B": ["low", "high", "high", "low"]}
+                )
+            ),
+        ),
+    ],
+)
+def test_dataframe_series_loc_multiindex(obj):
+    pindex = pd.MultiIndex.from_frame(
+        pd.DataFrame({"A": [3, 2], "B": ["high", "low"]})
+    )
+
+    gobj = cudf.from_pandas(obj)
+    gindex = cudf.MultiIndex.from_pandas(pindex)
+
+    # cudf MultiIndex as arg
+    expected = obj.loc[pindex]
+    got = gobj.loc[gindex]
+    assert_eq(expected, got)
+
+    # pandas MultiIndex as arg
+    expected = obj.loc[pindex]
+    got = gobj.loc[pindex]
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        ("a", 4),
+        ("b", 4),
+        ("b", np.int8(8)),
+        ("d", 4),
+        ("d", np.int8(16)),
+        ("d", np.float32(16)),
+        (["a", "b"], 4),
+        (["a", "b"], [4, 5]),
+        ([True, False, True], 4),
+        ([False, False, False], 4),
+        ([True, False, True], [4, 5]),
+    ],
+)
+def test_series_setitem_loc(key, value):
+    psr = pd.Series([1, 2, 3], ["a", "b", "c"])
+    gsr = cudf.from_pandas(psr)
+    psr.loc[key] = value
+    gsr.loc[key] = value
+    assert_eq(psr, gsr)
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (1, "d"),
+        (2, "e"),
+        (4, "f"),
+        ([1, 3], "g"),
+        ([1, 3], ["g", "h"]),
+        ([True, False, True], "i"),
+        ([False, False, False], "j"),
+        ([True, False, True], ["k", "l"]),
+    ],
+)
+def test_series_setitem_loc_numeric_index(key, value):
+    psr = pd.Series(["a", "b", "c"], [1, 2, 3])
+    gsr = cudf.from_pandas(psr)
+    psr.loc[key] = value
+    gsr.loc[key] = value
+    assert_eq(psr, gsr)
+
+
+@pytest.mark.parametrize(
+    "sli",
+    [
+        slice("2001", "2020"),
+        slice(None, "2020"),
+    ],
+)
+def test_loc_datetime_index_slice_not_in(sli):
+    pd_data = pd.Series(
+        [1, 2, 3],
+        pd.Series(["2001", "2009", "2002"], dtype="datetime64[ns]"),
+    )
+    gd_data = cudf.from_pandas(pd_data)
+    with pytest.raises(KeyError):
+        assert_eq(pd_data.loc[sli], gd_data.loc[sli])
+
+    with pytest.raises(KeyError):
+        sli = slice(pd.to_datetime(sli.start), pd.to_datetime(sli.stop))
+        assert_eq(pd_data.loc[sli], gd_data.loc[sli])
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        slice(None),
+        slice((1, 2), None),
+        slice(None, (1, 2)),
+        (1, 1),
+        pytest.param(
+            (1, slice(None)),
+            marks=pytest.mark.xfail(
+                reason="https://github.com/pandas-dev/pandas/issues/46704"
+            ),
+        ),
+        1,
+        2,
+    ],
+)
+def test_loc_series_multiindex(arg):
+    gsr = cudf.DataFrame(
+        {"a": [1, 1, 2], "b": [1, 2, 3], "c": ["a", "b", "c"]}
+    ).set_index(["a", "b"])["c"]
+    psr = gsr.to_pandas()
+    assert_eq(psr.loc[arg], gsr.loc[arg])
+
+
+@pytest.mark.parametrize("indexer", ["loc", "iloc"])
+@pytest.mark.parametrize(
+    "mask",
+    [[False, True], [False, False, True, True, True]],
+    ids=["too-short", "too-long"],
+)
+def test_boolean_mask_wrong_length(indexer, mask):
+    s = pd.Series([1, 2, 3, 4])
+
+    indexee = getattr(s, indexer)
+    with pytest.raises(IndexError):
+        indexee[mask]
+
+    c = cudf.from_pandas(s)
+    indexee = getattr(c, indexer)
+    with pytest.raises(IndexError):
+        indexee[mask]
+
+
+def test_loc_repeated_index_label_issue_8693():
+    # https://github.com/rapidsai/cudf/issues/8693
+    s = pd.Series([1, 2, 3, 4], index=[0, 1, 1, 2])
+    cs = cudf.from_pandas(s)
+    expect = s.loc[1]
+    actual = cs.loc[1]
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize("index", [None, [2, 1, 3, 5, 4]])
+def test_loc_bool_key_numeric_index_raises(index):
+    ser = cudf.Series(range(5), index=index)
+    with pytest.raises(KeyError):
+        ser.loc[True]
+
+
+@pytest.mark.parametrize(
+    "arg", [slice(2, 4), slice(2, 5), slice(2.3, 5), slice(4.6, 6)]
+)
+def test_series_iloc_float_int(arg):
+    gs = cudf.Series(range(4), index=[2.0, 3.0, 4.5, 5.5])
+    ps = gs.to_pandas()
+
+    actual = gs.loc[arg]
+    expected = ps.loc[arg]
+
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("indexer", [[1], [0, 2]])
+def test_loc_integer_categorical_issue_13014(indexer):
+    # https://github.com/rapidsai/cudf/issues/13014
+    s = pd.Series([0, 1, 2])
+    index = pd.Categorical(indexer)
+    expect = s.loc[index]
+    c = cudf.from_pandas(s)
+    actual = c.loc[index]
+    assert_eq(expect, actual)
+
+
+@pytest.mark.parametrize("index_is_ordered", [False, True])
+@pytest.mark.parametrize("label_is_ordered", [False, True])
+def test_loc_categorical_ordering_mismatch_issue_13652(
+    index_is_ordered, label_is_ordered
+):
+    # https://github.com/rapidsai/cudf/issues/13652
+    s = cudf.Series(
+        [0, 2, 8, 4, 2],
+        index=cudf.CategoricalIndex(
+            [1, 2, 3, 4, 5],
+            categories=[1, 2, 3, 4, 5],
+            ordered=index_is_ordered,
+        ),
+    )
+    labels = cudf.CategoricalIndex(
+        [1, 4], categories=[1, 4], ordered=label_is_ordered
+    )
+    actual = s.loc[labels]
+    expect = s.to_pandas().loc[labels.to_pandas()]
+    assert_eq(actual, expect)
+
+
+def test_loc_categorical_no_integer_fallback_issue_13653():
+    # https://github.com/rapidsai/cudf/issues/13653
+    s = cudf.Series(
+        [1, 2], index=cudf.CategoricalIndex([3, 4], categories=[3, 4])
+    )
+    actual = s.loc[3]
+    expect = s.to_pandas().loc[3]
+    assert_eq(actual, expect)
+
+
+def test_loc_wrong_type_slice_datetimeindex():
+    ser_cudf = cudf.Series(
+        range(3), index=cudf.date_range("2020-01-01", periods=3, freq="D")
+    )
+    with pytest.raises(TypeError):
+        ser_cudf.loc[2:]
+
+    ser_pd = pd.Series(
+        range(3), index=pd.date_range("2020-01-01", periods=3, freq="D")
+    )
+    with pytest.raises(TypeError):
+        ser_pd.loc[2:]
diff --git a/python/cudf/cudf/tests/series/indexing/test_setitem.py b/python/cudf/cudf/tests/series/indexing/test_setitem.py
index 5024adbe73f..4c7b672399a 100644
--- a/python/cudf/cudf/tests/series/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/series/indexing/test_setitem.py
@@ -862,3 +862,172 @@ def test_categorical_setitem_with_nan():
         [1, np.nan, np.nan, np.nan, np.nan, None], nan_as_null=False
     ).astype(gs.dtype)
     assert_eq(gs, expected_series)
+
+
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="warning not present in older pandas versions",
+)
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (0, 4),
+        (1, 4),
+        ([0, 1], 4),
+        ([0, 1], [4, 5]),
+        (slice(0, 2), [4, 5]),
+        (slice(1, None), [4, 5, 6, 7]),
+        ([], 1),
+        ([], []),
+        (slice(None, None), 1),
+        (slice(-1, -3), 7),
+    ],
+)
+@pytest.mark.parametrize("nulls", ["none", "some", "all"])
+def test_series_setitem_basics(key, value, nulls):
+    psr = pd.Series([1, 2, 3, 4, 5])
+    if nulls == "some":
+        psr[[0, 4]] = None
+    elif nulls == "all":
+        psr[:] = None
+    gsr = cudf.from_pandas(psr)
+    with expect_warning_if(
+        isinstance(value, list) and len(value) == 0 and nulls == "none"
+    ):
+        psr[key] = value
+    with expect_warning_if(
+        isinstance(value, list) and len(value) == 0 and not len(key) == 0
+    ):
+        gsr[key] = value
+    assert_eq(psr, gsr, check_dtype=False)
+
+
+def test_series_setitem_null():
+    gsr = cudf.Series([1, 2, 3, 4])
+    gsr[0] = None
+
+    expect = cudf.Series([None, 2, 3, 4])
+    got = gsr
+    assert_eq(expect, got)
+
+    gsr = cudf.Series([None, 2, 3, 4])
+    gsr[0] = 1
+
+    expect = cudf.Series([1, 2, 3, 4])
+    got = gsr
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (0, 0.5),
+        ([0, 1], 0.5),
+        ([0, 1], [0.5, 2.5]),
+        (slice(0, 2), [0.5, 0.25]),
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_series_setitem_dtype(key, value):
+    psr = pd.Series([1, 2, 3], dtype="int32")
+    gsr = cudf.from_pandas(psr)
+
+    with pytest.warns(FutureWarning):
+        psr[key] = value
+    with pytest.warns(FutureWarning):
+        gsr[key] = value
+
+    assert_eq(psr, gsr)
+
+
+def test_series_setitem_datetime():
+    psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]")
+    gsr = cudf.from_pandas(psr)
+
+    psr[0] = np.datetime64("2005")
+    gsr[0] = np.datetime64("2005")
+
+    assert_eq(psr, gsr)
+
+
+def test_series_setitem_datetime_coerced():
+    psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]")
+    gsr = cudf.from_pandas(psr)
+
+    psr[0] = "2005"
+    gsr[0] = "2005"
+
+    assert_eq(psr, gsr)
+
+
+def test_series_setitem_categorical():
+    psr = pd.Series(["a", "b", "a", "c", "d"], dtype="category")
+    gsr = cudf.from_pandas(psr)
+
+    psr[0] = "d"
+    gsr[0] = "d"
+    assert_eq(psr, gsr)
+
+    psr = psr.cat.add_categories(["e"])
+    gsr = gsr.cat.add_categories(["e"])
+    psr[0] = "e"
+    gsr[0] = "e"
+    assert_eq(psr, gsr)
+
+    psr[[0, 1]] = "b"
+    gsr[[0, 1]] = "b"
+    assert_eq(psr, gsr)
+
+    psr[0:3] = "e"
+    gsr[0:3] = "e"
+    assert_eq(psr, gsr)
+
+
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (0, "d"),
+        (0, "g"),
+        ([0, 1], "g"),
+        ([0, 1], None),
+        (slice(None, 2), "g"),
+        (slice(None, 2), ["g", None]),
+    ],
+)
+def test_series_setitem_string(key, value):
+    psr = pd.Series(["a", "b", "c", "d", "e"])
+    gsr = cudf.from_pandas(psr)
+    psr[key] = value
+    gsr[key] = value
+    assert_eq(psr, gsr)
+
+    psr = pd.Series(["a", None, "c", "d", "e"])
+    gsr = cudf.from_pandas(psr)
+    psr[key] = value
+    gsr[key] = value
+    assert_eq(psr, gsr)
+
+
+def test_out_of_bounds_indexing():
+    psr = pd.Series([1, 2, 3])
+    gsr = cudf.from_pandas(psr)
+
+    assert_exceptions_equal(
+        lambda: psr[[0, 1, 9]],
+        lambda: gsr[[0, 1, 9]],
+    )
+    assert_exceptions_equal(
+        lambda: psr[[0, 1, -4]],
+        lambda: gsr[[0, 1, -4]],
+    )
+    assert_exceptions_equal(
+        lambda: psr.__setitem__([0, 1, 9], 2),
+        lambda: gsr.__setitem__([0, 1, 9], 2),
+    )
+    assert_exceptions_equal(
+        lambda: psr.__setitem__([0, 1, -4], 2),
+        lambda: gsr.__setitem__([0, 1, -4], 2),
+    )
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 677a791e1d9..715797d22aa 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -352,3 +352,23 @@ def test_ndim():
     s = pd.Series(dtype="float64")
     gs = cudf.Series()
     assert s.ndim == gs.ndim
+
+
+def test_multiindex_series_assignment():
+    ps = pd.Series([1, 2, 3])
+    gs = cudf.from_pandas(ps)
+    ps.index = pd.MultiIndex([["a", "b"], ["c", "d"]], [[0, 1, 0], [1, 0, 1]])
+    gs.index = cudf.MultiIndex(
+        levels=[["a", "b"], ["c", "d"]], codes=[[0, 1, 0], [1, 0, 1]]
+    )
+    assert_eq(ps, gs)
+
+
+def test_series_multiindex():
+    pdfIndex = pd.MultiIndex.from_arrays([range(7)])
+    rng = np.random.default_rng(seed=0)
+    ps = pd.Series(rng.random(7))
+    gs = cudf.from_pandas(ps)
+    ps.index = pdfIndex
+    gs.index = cudf.from_pandas(pdfIndex)
+    assert_eq(ps, gs)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
deleted file mode 100644
index c28d65fe45e..00000000000
--- a/python/cudf/cudf/tests/test_index.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-"""
-Test related to Index
-"""
-
-import operator
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core.index import CategoricalIndex
-from cudf.testing import assert_eq
-
-
-def test_df_set_index_from_series():
-    df = cudf.DataFrame()
-    df["a"] = list(range(10))
-    df["b"] = list(range(0, 20, 2))
-
-    # Check set_index(Series)
-    df2 = df.set_index(df["b"])
-    assert list(df2.columns) == ["a", "b"]
-    sliced_strided = df2.loc[2:6]
-    assert len(sliced_strided) == 3
-    assert list(sliced_strided.index.values) == [2, 4, 6]
-
-
-def test_df_set_index_from_name():
-    df = cudf.DataFrame()
-    df["a"] = list(range(10))
-    df["b"] = list(range(0, 20, 2))
-
-    # Check set_index(column_name)
-    df2 = df.set_index("b")
-    # 1 less column because 'b' is used as index
-    assert list(df2.columns) == ["a"]
-    sliced_strided = df2.loc[2:6]
-    assert len(sliced_strided) == 3
-    assert list(sliced_strided.index.values) == [2, 4, 6]
-
-
-def test_categorical_index():
-    pdf = pd.DataFrame()
-    pdf["a"] = [1, 2, 3]
-    pdf["index"] = pd.Categorical(["a", "b", "c"])
-    initial_df = cudf.from_pandas(pdf)
-    pdf = pdf.set_index("index")
-    gdf1 = cudf.from_pandas(pdf)
-    gdf2 = cudf.DataFrame()
-    gdf2["a"] = [1, 2, 3]
-    gdf2["index"] = pd.Categorical(["a", "b", "c"])
-    assert_eq(initial_df.index, gdf2.index)
-    gdf2 = gdf2.set_index("index")
-
-    assert isinstance(gdf1.index, CategoricalIndex)
-    assert_eq(pdf, gdf1)
-    assert_eq(pdf.index, gdf1.index)
-    assert_eq(
-        pdf.index.codes,
-        gdf1.index.codes.astype(pdf.index.codes.dtype).to_numpy(),
-    )
-
-    assert isinstance(gdf2.index, CategoricalIndex)
-    assert_eq(pdf, gdf2)
-    assert_eq(pdf.index, gdf2.index)
-    assert_eq(
-        pdf.index.codes,
-        gdf2.index.codes.astype(pdf.index.codes.dtype).to_numpy(),
-    )
-
-
-def test_set_index_as_property():
-    cdf = cudf.DataFrame()
-    col1 = np.arange(10)
-    col2 = np.arange(0, 20, 2)
-    cdf["a"] = col1
-    cdf["b"] = col2
-
-    # Check set_index(Series)
-    cdf.index = cdf["b"]
-
-    assert_eq(cdf.index.to_numpy(), col2)
-
-    with pytest.raises(ValueError):
-        cdf.index = [list(range(10))]
-
-    idx = pd.Index(np.arange(0, 1000, 100))
-    cdf.index = idx
-    assert_eq(cdf.index.to_pandas(), idx)
-
-    df = cdf.to_pandas()
-    assert_eq(df.index, idx)
-
-    head = cdf.head().to_pandas()
-    assert_eq(head.index, idx[:5])
-
-
-@pytest.mark.parametrize(
-    "n",
-    [-10, -5, -2, 0, 1, 0, 2, 5, 10],
-)
-def test_empty_df_head_tail_index(n):
-    df = cudf.DataFrame()
-    pdf = pd.DataFrame()
-    assert_eq(df.head(n).index.values, pdf.head(n).index.values)
-    assert_eq(df.tail(n).index.values, pdf.tail(n).index.values)
-
-    df = cudf.DataFrame({"a": [11, 2, 33, 44, 55]})
-    pdf = pd.DataFrame({"a": [11, 2, 33, 44, 55]})
-    assert_eq(df.head(n).index.values, pdf.head(n).index.values)
-    assert_eq(df.tail(n).index.values, pdf.tail(n).index.values)
-
-    df = cudf.DataFrame(index=[1, 2, 3])
-    pdf = pd.DataFrame(index=[1, 2, 3])
-    assert_eq(df.head(n).index.values, pdf.head(n).index.values)
-    assert_eq(df.tail(n).index.values, pdf.tail(n).index.values)
-
-
-@pytest.mark.parametrize(
-    "objs",
-    [
-        [pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)],
-        [pd.RangeIndex(10, 20), pd.RangeIndex(22, 40), pd.RangeIndex(50, 60)],
-        [pd.RangeIndex(10, 20, 2), pd.RangeIndex(20, 40, 2)],
-    ],
-)
-def test_range_index_concat(objs):
-    cudf_objs = [cudf.from_pandas(obj) for obj in objs]
-
-    actual = cudf.concat(cudf_objs)
-
-    expected = objs[0]
-    for obj in objs[1:]:
-        expected = expected.append(obj)
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "op", [operator.add, operator.sub, operator.mul, operator.truediv]
-)
-def test_rangeindex_binop_diff_names_none(op):
-    idx1 = cudf.RangeIndex(10, 13, name="foo")
-    idx2 = cudf.RangeIndex(13, 16, name="bar")
-    result = op(idx1, idx2)
-    expected = op(idx1.to_pandas(), idx2.to_pandas())
-    assert_eq(result, expected)
-    assert result.name is None
-
-
-@pytest.mark.parametrize(
-    "index",
-    [
-        lambda: cudf.Index([1]),
-        lambda: cudf.RangeIndex(1),
-        lambda: cudf.MultiIndex(levels=[[0]], codes=[[0]]),
-    ],
-)
-def test_index_assignment_no_shallow_copy(index):
-    index = index()
-    df = cudf.DataFrame(range(1))
-    df.index = index
-    assert df.index is index
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
deleted file mode 100644
index c26527f8282..00000000000
--- a/python/cudf/cudf/tests/test_indexing.py
+++ /dev/null
@@ -1,2372 +0,0 @@
-# Copyright (c) 2021-2025, NVIDIA CORPORATION.
-
-import weakref
-from datetime import datetime
-from itertools import combinations
-
-import cupy
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import _utils as utils, assert_eq
-from cudf.testing._utils import (
-    INTEGER_TYPES,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
-
-index_dtypes = INTEGER_TYPES
-
-
-@pytest.fixture
-def pdf_gdf():
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 3], "b": ["c", "d", "e"]}, index=["one", "two", "three"]
-    )
-    gdf = cudf.from_pandas(pdf)
-    return pdf, gdf
-
-
-@pytest.fixture
-def pdf_gdf_multi():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.random(size=(7, 5)))
-    pdfIndex = pd.MultiIndex(
-        [
-            ["a", "b", "c"],
-            ["house", "store", "forest"],
-            ["clouds", "clear", "storm"],
-            ["fire", "smoke", "clear"],
-        ],
-        [
-            [0, 0, 0, 0, 1, 1, 2],
-            [1, 1, 1, 1, 0, 0, 2],
-            [0, 0, 2, 2, 2, 0, 1],
-            [0, 0, 0, 1, 2, 0, 1],
-        ],
-    )
-    pdfIndex.names = ["alpha", "location", "weather", "sign"]
-    pdf.index = pdfIndex
-    gdf = cudf.from_pandas(pdf)
-    return pdf, gdf
-
-
-@pytest.mark.parametrize(
-    "i1, i2, i3",
-    (
-        [
-            (slice(None, 12), slice(3, None), slice(None, None, 2)),
-            (range(12), range(3, 12), range(0, 9, 2)),
-            (np.arange(12), np.arange(3, 12), np.arange(0, 9, 2)),
-            (list(range(12)), list(range(3, 12)), list(range(0, 9, 2))),
-            (
-                pd.Series(range(12)),
-                pd.Series(range(3, 12)),
-                pd.Series(range(0, 9, 2)),
-            ),
-            (
-                cudf.Series(range(12)),
-                cudf.Series(range(3, 12)),
-                cudf.Series(range(0, 9, 2)),
-            ),
-            (
-                [i in range(12) for i in range(20)],
-                [i in range(3, 12) for i in range(12)],
-                [i in range(0, 9, 2) for i in range(9)],
-            ),
-            (
-                np.array([i in range(12) for i in range(20)], dtype=bool),
-                np.array([i in range(3, 12) for i in range(12)], dtype=bool),
-                np.array([i in range(0, 9, 2) for i in range(9)], dtype=bool),
-            ),
-        ]
-        + [
-            (
-                np.arange(12, dtype=t),
-                np.arange(3, 12, dtype=t),
-                np.arange(0, 9, 2, dtype=t),
-            )
-            for t in index_dtypes
-        ]
-    ),
-    ids=(
-        [
-            "slice",
-            "range",
-            "numpy.array",
-            "list",
-            "pandas.Series",
-            "Series",
-            "list[bool]",
-            "numpy.array[bool]",
-        ]
-        + ["numpy.array[%s]" % np.dtype(t).type.__name__ for t in index_dtypes]
-    ),
-)
-def test_series_indexing(i1, i2, i3):
-    a1 = np.arange(20)
-    series = cudf.Series(a1)
-
-    # Indexing
-    sr1 = series.iloc[i1]
-    assert sr1.null_count == 0
-    np.testing.assert_equal(sr1.to_numpy(), a1[:12])
-
-    sr2 = sr1.iloc[i2]
-    assert sr2.null_count == 0
-    np.testing.assert_equal(sr2.to_numpy(), a1[3:12])
-
-    # Index with stride
-    sr3 = sr2.iloc[i3]
-    assert sr3.null_count == 0
-    np.testing.assert_equal(sr3.to_numpy(), a1[3:12:2])
-
-    # Integer indexing
-    if isinstance(i1, range):
-        for i in i1:  # Python int-s
-            assert series[i] == a1[i]
-    if isinstance(i1, np.ndarray) and i1.dtype in index_dtypes:
-        for i in i1:  # numpy integers
-            assert series[i] == a1[i]
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "arg",
-    [
-        1,
-        -1,
-        "b",
-        np.int32(1),
-        np.uint32(1),
-        np.int8(1),
-        np.uint8(1),
-        np.int16(1),
-        np.uint16(1),
-        np.int64(1),
-        np.uint64(1),
-    ],
-)
-def test_series_get_item_iloc_defer(arg):
-    # Indexing for non-numeric dtype Index
-    ps = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "c"]))
-    gs = cudf.from_pandas(ps)
-
-    arg_not_str = not isinstance(arg, str)
-    with expect_warning_if(arg_not_str):
-        expect = ps[arg]
-    with expect_warning_if(arg_not_str):
-        got = gs[arg]
-
-    assert_eq(expect, got)
-
-
-def test_series_indexing_large_size():
-    n_elem = 100_000
-    gsr = cudf.Series(cupy.ones(n_elem))
-    gsr[0] = None
-    got = gsr[gsr.isna()]
-    expect = cudf.Series([None], dtype="float64")
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("psr", [pd.Series([1, 2, 3], index=["a", "b", "c"])])
-@pytest.mark.parametrize(
-    "arg", ["b", ["a", "c"], slice(1, 2, 1), [True, False, True]]
-)
-def test_series_get_item(psr, arg):
-    gsr = cudf.from_pandas(psr)
-
-    expect = psr[arg]
-    got = gsr[arg]
-
-    assert_eq(expect, got)
-
-
-def test_dataframe_column_name_indexing():
-    df = cudf.DataFrame()
-    data = np.asarray(range(10), dtype=np.int32)
-    df["a"] = data
-    df[1] = data
-    np.testing.assert_equal(
-        df["a"].to_numpy(), np.asarray(range(10), dtype=np.int32)
-    )
-    np.testing.assert_equal(
-        df[1].to_numpy(), np.asarray(range(10), dtype=np.int32)
-    )
-
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame()
-    nelem = 10
-    pdf = pd.DataFrame(
-        {
-            "key1": rng.integers(0, 5, nelem),
-            "key2": rng.integers(0, 3, nelem),
-            1: np.arange(1, 1 + nelem),
-            2: rng.random(nelem),
-        }
-    )
-    df = cudf.from_pandas(pdf)
-
-    assert_eq(df[df.columns], df)
-    assert_eq(df[df.columns[:1]], df[["key1"]])
-
-    for i in range(1, len(pdf.columns) + 1):
-        for idx in combinations(pdf.columns, i):
-            assert pdf[list(idx)].equals(df[list(idx)].to_pandas())
-
-    # test for only numeric columns
-    df = pd.DataFrame()
-    for i in range(0, 10):
-        df[i] = range(nelem)
-    gdf = cudf.DataFrame.from_pandas(df)
-    assert_eq(gdf, df)
-
-    assert_eq(gdf[gdf.columns], gdf)
-    assert_eq(gdf[gdf.columns[:3]], gdf[[0, 1, 2]])
-
-
-def test_dataframe_slicing():
-    rng = np.random.default_rng(seed=0)
-    df = cudf.DataFrame()
-    size = 123
-    df["a"] = ha = rng.integers(low=0, high=100, size=size).astype(np.int32)
-    df["b"] = hb = rng.random(size).astype(np.float32)
-    df["c"] = hc = rng.integers(low=0, high=100, size=size).astype(np.int64)
-    df["d"] = hd = rng.random(size).astype(np.float64)
-
-    # Row slice first 10
-    first_10 = df[:10]
-    assert len(first_10) == 10
-    assert tuple(first_10.columns) == ("a", "b", "c", "d")
-    np.testing.assert_equal(first_10["a"].to_numpy(), ha[:10])
-    np.testing.assert_equal(first_10["b"].to_numpy(), hb[:10])
-    np.testing.assert_equal(first_10["c"].to_numpy(), hc[:10])
-    np.testing.assert_equal(first_10["d"].to_numpy(), hd[:10])
-    del first_10
-
-    # Row slice last 10
-    last_10 = df[-10:]
-    assert len(last_10) == 10
-    assert tuple(last_10.columns) == ("a", "b", "c", "d")
-    np.testing.assert_equal(last_10["a"].to_numpy(), ha[-10:])
-    np.testing.assert_equal(last_10["b"].to_numpy(), hb[-10:])
-    np.testing.assert_equal(last_10["c"].to_numpy(), hc[-10:])
-    np.testing.assert_equal(last_10["d"].to_numpy(), hd[-10:])
-    del last_10
-
-    # Row slice [begin:end]
-    begin = 7
-    end = 121
-    subrange = df[begin:end]
-    assert len(subrange) == end - begin
-    assert tuple(subrange.columns) == ("a", "b", "c", "d")
-    np.testing.assert_equal(subrange["a"].to_numpy(), ha[begin:end])
-    np.testing.assert_equal(subrange["b"].to_numpy(), hb[begin:end])
-    np.testing.assert_equal(subrange["c"].to_numpy(), hc[begin:end])
-    np.testing.assert_equal(subrange["d"].to_numpy(), hd[begin:end])
-    del subrange
-
-
-@pytest.mark.parametrize("step", [1, 2, 5])
-@pytest.mark.parametrize("scalar", [0, 20, 100])
-def test_dataframe_loc(scalar, step):
-    size = 123
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {
-            "a": rng.integers(low=0, high=100, size=size),
-            "b": rng.random(size).astype(np.float32),
-            "c": rng.random(size).astype(np.float64),
-            "d": rng.random(size).astype(np.float64),
-        }
-    )
-    pdf.index.name = "index"
-
-    df = cudf.DataFrame.from_pandas(pdf)
-
-    assert_eq(df.loc[:, ["a"]], pdf.loc[:, ["a"]])
-
-    assert_eq(df.loc[:, "d"], pdf.loc[:, "d"])
-
-    # Scalar label
-    assert_eq(df.loc[scalar], pdf.loc[scalar])
-
-    # Full slice
-    assert_eq(df.loc[:, "c"], pdf.loc[:, "c"])
-
-    # Repeat with at[]
-    assert_eq(df.loc[:, ["a"]], df.at[:, ["a"]])
-    assert_eq(df.loc[:, "d"], df.at[:, "d"])
-    assert_eq(df.loc[scalar], df.at[scalar])
-    assert_eq(df.loc[:, "c"], df.at[:, "c"])
-
-    begin = 110
-    end = 122
-
-    assert_eq(
-        df.loc[begin:end:step, ["c", "d", "a"]],
-        pdf.loc[begin:end:step, ["c", "d", "a"]],
-    )
-
-    assert_eq(df.loc[begin:end, ["c", "d"]], pdf.loc[begin:end, ["c", "d"]])
-
-    # Slicing on columns:
-    assert_eq(
-        df.loc[begin:end:step, "a":"c"], pdf.loc[begin:end:step, "a":"c"]
-    )
-
-    # Slicing of size 1:
-    assert_eq(df.loc[begin:begin, "a"], pdf.loc[begin:begin, "a"])
-
-    # TODO: Pandas changes the dtype here when it shouldn't
-    assert_eq(
-        df.loc[begin, "a":"a"], pdf.loc[begin, "a":"a"], check_dtype=False
-    )
-
-    # Repeat with at[]
-    assert_eq(
-        df.loc[begin:end:step, ["c", "d", "a"]],
-        df.at[begin:end:step, ["c", "d", "a"]],
-    )
-    assert_eq(df.loc[begin:end, ["c", "d"]], df.at[begin:end, ["c", "d"]])
-    assert_eq(df.loc[begin:end:step, "a":"c"], df.at[begin:end:step, "a":"c"])
-    assert_eq(df.loc[begin:begin, "a"], df.at[begin:begin, "a"])
-    assert_eq(df.loc[begin, "a":"a"], df.at[begin, "a":"a"], check_dtype=False)
-
-    # Make int64 index
-    offset = 50
-    df2 = df[offset:]
-    pdf2 = pdf[offset:]
-    begin = 117
-    end = 122
-    assert_eq(
-        df2.loc[begin:end, ["c", "d", "a"]],
-        pdf2.loc[begin:end, ["c", "d", "a"]],
-    )
-
-    # loc with list like indexing
-    assert_eq(df.loc[[0]], pdf.loc[[0]])
-    # loc with column like indexing
-    assert_eq(df.loc[cudf.Series([0])], pdf.loc[pd.Series([0])])
-    assert_eq(df.loc[cudf.Series([0])._column], pdf.loc[pd.Series([0])])
-    assert_eq(df.loc[np.array([0])], pdf.loc[np.array([0])])
-
-
-@pytest.mark.parametrize(
-    "mask",
-    [[True, False, False, False, False], [True, False, True, False, True]],
-)
-@pytest.mark.parametrize("arg", ["a", slice("a", "a"), slice("a", "b")])
-def test_dataframe_loc_mask(mask, arg):
-    pdf = pd.DataFrame(
-        {"a": ["a", "b", "c", "d", "e"], "b": ["f", "g", "h", "i", "j"]}
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    assert_eq(pdf.loc[mask, arg], gdf.loc[mask, arg])
-
-
-def test_dataframe_loc_outbound():
-    rng = np.random.default_rng(seed=0)
-    df = cudf.DataFrame()
-    size = 10
-    df["a"] = ha = rng.integers(low=0, high=100, size=size).astype(np.int32)
-    df["b"] = hb = rng.random(size).astype(np.float32)
-
-    pdf = pd.DataFrame()
-    pdf["a"] = ha
-    pdf["b"] = hb
-
-    assert_exceptions_equal(lambda: pdf.loc[11], lambda: df.loc[11])
-
-
-def test_series_loc_numerical():
-    ps = pd.Series([1, 2, 3, 4, 5], index=[5, 6, 7, 8, 9])
-    gs = cudf.Series.from_pandas(ps)
-
-    assert_eq(ps.loc[5], gs.loc[5])
-    assert_eq(ps.loc[6], gs.loc[6])
-    assert_eq(ps.loc[6:8], gs.loc[6:8])
-    assert_eq(ps.loc[:8], gs.loc[:8])
-    assert_eq(ps.loc[6:], gs.loc[6:])
-    assert_eq(ps.loc[::2], gs.loc[::2])
-    assert_eq(ps.loc[[5, 8, 9]], gs.loc[[5, 8, 9]])
-    assert_eq(
-        ps.loc[[True, False, True, False, True]],
-        gs.loc[[True, False, True, False, True]],
-    )
-    assert_eq(ps.loc[[5, 8, 9]], gs.loc[cupy.array([5, 8, 9])])
-
-
-def test_series_loc_float_index():
-    ps = pd.Series([1, 2, 3, 4, 5], index=[5.43, 6.34, 7.34, 8.0, 9.1])
-    gs = cudf.Series.from_pandas(ps)
-
-    assert_eq(ps.loc[5.43], gs.loc[5.43])
-    assert_eq(ps.loc[8], gs.loc[8])
-    assert_eq(ps.loc[6.1:8], gs.loc[6.1:8])
-    assert_eq(ps.loc[:7.1], gs.loc[:7.1])
-    assert_eq(ps.loc[6.345:], gs.loc[6.345:])
-    assert_eq(ps.loc[::2], gs.loc[::2])
-    assert_eq(
-        ps.loc[[True, False, True, False, True]],
-        gs.loc[[True, False, True, False, True]],
-    )
-
-
-def test_series_loc_string():
-    ps = pd.Series(
-        [1, 2, 3, 4, 5], index=["one", "two", "three", "four", "five"]
-    )
-    gs = cudf.Series.from_pandas(ps)
-
-    assert_eq(ps.loc["one"], gs.loc["one"])
-    assert_eq(ps.loc["five"], gs.loc["five"])
-    assert_eq(ps.loc["two":"four"], gs.loc["two":"four"])
-    assert_eq(ps.loc[:"four"], gs.loc[:"four"])
-    assert_eq(ps.loc["two":], gs.loc["two":])
-    assert_eq(ps.loc[::2], gs.loc[::2])
-    assert_eq(ps.loc[["one", "four", "five"]], gs.loc[["one", "four", "five"]])
-    assert_eq(
-        ps.loc[[True, False, True, False, True]],
-        gs.loc[[True, False, True, False, True]],
-    )
-
-
-def test_series_loc_datetime():
-    ps = pd.Series(
-        [1, 2, 3, 4, 5], index=pd.date_range("20010101", "20010105")
-    )
-    gs = cudf.Series.from_pandas(ps)
-
-    # a few different ways of specifying a datetime label:
-    assert_eq(ps.loc["20010101"], gs.loc["20010101"])
-    assert_eq(ps.loc["2001-01-01"], gs.loc["2001-01-01"])
-    assert_eq(
-        ps.loc[pd.to_datetime("2001-01-01")],
-        gs.loc[pd.to_datetime("2001-01-01")],
-    )
-    assert_eq(
-        ps.loc[np.datetime64("2001-01-01")],
-        gs.loc[np.datetime64("2001-01-01")],
-    )
-
-    assert_eq(
-        ps.loc["2001-01-02":"2001-01-05"],
-        gs.loc["2001-01-02":"2001-01-05"],
-        check_freq=False,
-    )
-    assert_eq(ps.loc["2001-01-02":], gs.loc["2001-01-02":], check_freq=False)
-    assert_eq(ps.loc[:"2001-01-04"], gs.loc[:"2001-01-04"], check_freq=False)
-    assert_eq(ps.loc[::2], gs.loc[::2], check_freq=False)
-
-    assert_eq(
-        ps.loc[["2001-01-01", "2001-01-04", "2001-01-05"]],
-        gs.loc[["2001-01-01", "2001-01-04", "2001-01-05"]],
-    )
-
-    assert_eq(
-        ps.loc[
-            [
-                pd.to_datetime("2001-01-01"),
-                pd.to_datetime("2001-01-04"),
-                pd.to_datetime("2001-01-05"),
-            ]
-        ],
-        gs.loc[
-            [
-                pd.to_datetime("2001-01-01"),
-                pd.to_datetime("2001-01-04"),
-                pd.to_datetime("2001-01-05"),
-            ]
-        ],
-    )
-    assert_eq(
-        ps.loc[[True, False, True, False, True]],
-        gs.loc[[True, False, True, False, True]],
-        check_freq=False,
-    )
-
-    just_less_than_max = ps.index.max() - pd.Timedelta("5m")
-
-    assert_eq(
-        ps.loc[:just_less_than_max],
-        gs.loc[:just_less_than_max],
-        check_freq=False,
-    )
-
-
-def test_series_loc_categorical():
-    ps = pd.Series(
-        [1, 2, 3, 4, 5], index=pd.Categorical(["a", "b", "c", "d", "e"])
-    )
-    gs = cudf.Series.from_pandas(ps)
-
-    assert_eq(ps.loc["a"], gs.loc["a"])
-    assert_eq(ps.loc["e"], gs.loc["e"])
-    assert_eq(ps.loc["b":"d"], gs.loc["b":"d"])
-    assert_eq(ps.loc[:"d"], gs.loc[:"d"])
-    assert_eq(ps.loc["b":], gs.loc["b":])
-    assert_eq(ps.loc[::2], gs.loc[::2])
-
-    # order of categories changes, so we can only
-    # compare values:
-    assert_eq(
-        ps.loc[["a", "d", "e"]].values, gs.loc[["a", "d", "e"]].to_numpy()
-    )
-
-    assert_eq(
-        ps.loc[[True, False, True, False, True]],
-        gs.loc[[True, False, True, False, True]],
-    )
-
-
-@pytest.mark.parametrize(
-    "obj",
-    [
-        pd.DataFrame(
-            {"a": [1, 2, 3, 4]},
-            index=pd.MultiIndex.from_frame(
-                pd.DataFrame(
-                    {"A": [2, 3, 1, 4], "B": ["low", "high", "high", "low"]}
-                )
-            ),
-        ),
-        pd.Series(
-            [1, 2, 3, 4],
-            index=pd.MultiIndex.from_frame(
-                pd.DataFrame(
-                    {"A": [2, 3, 1, 4], "B": ["low", "high", "high", "low"]}
-                )
-            ),
-        ),
-    ],
-)
-def test_dataframe_series_loc_multiindex(obj):
-    pindex = pd.MultiIndex.from_frame(
-        pd.DataFrame({"A": [3, 2], "B": ["high", "low"]})
-    )
-
-    gobj = cudf.from_pandas(obj)
-    gindex = cudf.MultiIndex.from_pandas(pindex)
-
-    # cudf MultiIndex as arg
-    expected = obj.loc[pindex]
-    got = gobj.loc[gindex]
-    assert_eq(expected, got)
-
-    # pandas MultiIndex as arg
-    expected = obj.loc[pindex]
-    got = gobj.loc[pindex]
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("nelem", [2, 5, 20, 100])
-def test_series_iloc(nelem):
-    # create random cudf.Series
-    rng = np.random.default_rng(seed=0)
-    ps = pd.Series(rng.random(nelem))
-
-    # gpu cudf.Series
-    gs = cudf.Series(ps)
-
-    # positive tests for indexing
-    np.testing.assert_allclose(gs.iloc[-1 * nelem], ps.iloc[-1 * nelem])
-    np.testing.assert_allclose(gs.iloc[-1], ps.iloc[-1])
-    np.testing.assert_allclose(gs.iloc[0], ps.iloc[0])
-    np.testing.assert_allclose(gs.iloc[1], ps.iloc[1])
-    np.testing.assert_allclose(gs.iloc[nelem - 1], ps.iloc[nelem - 1])
-
-    # positive tests for slice
-    np.testing.assert_allclose(gs.iloc[-1:1].to_numpy(), ps.iloc[-1:1])
-    np.testing.assert_allclose(
-        gs.iloc[nelem - 1 : -1].to_numpy(), ps.iloc[nelem - 1 : -1]
-    )
-    np.testing.assert_allclose(
-        gs.iloc[0 : nelem - 1].to_pandas(), ps.iloc[0 : nelem - 1]
-    )
-    np.testing.assert_allclose(gs.iloc[0:nelem].to_pandas(), ps.iloc[0:nelem])
-    np.testing.assert_allclose(gs.iloc[1:1].to_pandas(), ps.iloc[1:1])
-    np.testing.assert_allclose(gs.iloc[1:2].to_pandas(), ps.iloc[1:2].values)
-    np.testing.assert_allclose(
-        gs.iloc[nelem - 1 : nelem + 1].to_pandas(),
-        ps.iloc[nelem - 1 : nelem + 1],
-    )
-    np.testing.assert_allclose(
-        gs.iloc[nelem : nelem * 2].to_pandas(), ps.iloc[nelem : nelem * 2]
-    )
-
-
-@pytest.mark.parametrize("nelem", [2, 5, 20, 100])
-def test_dataframe_iloc(nelem):
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame()
-
-    gdf["a"] = ha = rng.integers(low=0, high=100, size=nelem).astype(np.int32)
-    gdf["b"] = hb = rng.random(nelem).astype(np.float32)
-
-    pdf = pd.DataFrame()
-    pdf["a"] = ha
-    pdf["b"] = hb
-
-    gdf.index.name = "index"
-    pdf.index.name = "index"
-
-    assert_eq(gdf.iloc[-1:1], pdf.iloc[-1:1])
-    assert_eq(gdf.iloc[nelem - 1 : -1], pdf.iloc[nelem - 1 : -1])
-    assert_eq(gdf.iloc[0 : nelem - 1], pdf.iloc[0 : nelem - 1])
-    assert_eq(gdf.iloc[0:nelem], pdf.iloc[0:nelem])
-    assert_eq(gdf.iloc[1:1], pdf.iloc[1:1])
-    assert_eq(gdf.iloc[1:2], pdf.iloc[1:2])
-    assert_eq(gdf.iloc[nelem - 1 : nelem + 1], pdf.iloc[nelem - 1 : nelem + 1])
-    assert_eq(gdf.iloc[nelem : nelem * 2], pdf.iloc[nelem : nelem * 2])
-
-    assert_eq(gdf.iloc[-1 * nelem], pdf.iloc[-1 * nelem])
-    assert_eq(gdf.iloc[-1], pdf.iloc[-1])
-    assert_eq(gdf.iloc[0], pdf.iloc[0])
-    assert_eq(gdf.iloc[1], pdf.iloc[1])
-    assert_eq(gdf.iloc[nelem - 1], pdf.iloc[nelem - 1])
-
-    # Repeat the above with iat[]
-    assert_eq(gdf.iloc[-1:1], gdf.iat[-1:1])
-    assert_eq(gdf.iloc[nelem - 1 : -1], gdf.iat[nelem - 1 : -1])
-    assert_eq(gdf.iloc[0 : nelem - 1], gdf.iat[0 : nelem - 1])
-    assert_eq(gdf.iloc[0:nelem], gdf.iat[0:nelem])
-    assert_eq(gdf.iloc[1:1], gdf.iat[1:1])
-    assert_eq(gdf.iloc[1:2], gdf.iat[1:2])
-    assert_eq(gdf.iloc[nelem - 1 : nelem + 1], gdf.iat[nelem - 1 : nelem + 1])
-    assert_eq(gdf.iloc[nelem : nelem * 2], gdf.iat[nelem : nelem * 2])
-
-    assert_eq(gdf.iloc[-1 * nelem], gdf.iat[-1 * nelem])
-    assert_eq(gdf.iloc[-1], gdf.iat[-1])
-    assert_eq(gdf.iloc[0], gdf.iat[0])
-    assert_eq(gdf.iloc[1], gdf.iat[1])
-    assert_eq(gdf.iloc[nelem - 1], gdf.iat[nelem - 1])
-
-    # iloc with list like indexing
-    assert_eq(gdf.iloc[[0]], pdf.iloc[[0]])
-    # iloc with column like indexing
-    assert_eq(gdf.iloc[cudf.Series([0])], pdf.iloc[pd.Series([0])])
-    assert_eq(gdf.iloc[cudf.Series([0])._column], pdf.iloc[pd.Series([0])])
-    assert_eq(gdf.iloc[np.array([0])], pdf.loc[np.array([0])])
-
-
-def test_dataframe_iloc_tuple():
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame()
-    nelem = 123
-    gdf["a"] = ha = rng.integers(low=0, high=100, size=nelem).astype(np.int32)
-    gdf["b"] = hb = rng.random(nelem).astype(np.float32)
-
-    pdf = pd.DataFrame()
-    pdf["a"] = ha
-    pdf["b"] = hb
-
-    assert_eq(gdf.iloc[1, [1]], pdf.iloc[1, [1]], check_dtype=False)
-    assert_eq(gdf.iloc[:, -1], pdf.iloc[:, -1])
-
-
-def test_dataframe_iloc_index_error():
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame()
-    nelem = 123
-    gdf["a"] = ha = rng.integers(low=0, high=100, size=nelem).astype(np.int32)
-    gdf["b"] = hb = rng.random(nelem).astype(np.float32)
-
-    pdf = pd.DataFrame()
-    pdf["a"] = ha
-    pdf["b"] = hb
-
-    with pytest.raises(IndexError):
-        pdf.iloc[nelem * 2]
-    with pytest.raises(IndexError):
-        gdf.iloc[nelem * 2]
-
-
-@pytest.mark.parametrize("nelem", [0, 1, 5, 20, 100])
-@pytest.mark.parametrize("slice_start", [None, 0, 1, 3, 10, -10])
-@pytest.mark.parametrize("slice_end", [None, 0, 1, 30, 50, -1])
-def test_dataframe_masked_slicing(nelem, slice_start, slice_end):
-    gdf = cudf.DataFrame()
-    gdf["a"] = list(range(nelem))
-    gdf["b"] = list(range(nelem, 2 * nelem))
-    gdf["a"] = gdf["a"]._column.set_mask(utils.random_bitmask(nelem))
-    gdf["b"] = gdf["b"]._column.set_mask(utils.random_bitmask(nelem))
-
-    def do_slice(x):
-        return x[slice_start:slice_end]
-
-    expect = do_slice(gdf.to_pandas())
-    got = do_slice(gdf).to_pandas()
-
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize("dtype", [int, float, str])
-def test_empty_boolean_mask(dtype):
-    gdf = cudf.datasets.randomdata(nrows=0, dtypes={"a": dtype})
-    pdf = gdf.to_pandas()
-
-    compare_val = dtype(1)
-
-    expected = pdf[pdf.a == compare_val]
-    got = gdf[gdf.a == compare_val]
-    assert_eq(expected, got)
-
-    expected = pdf.a[pdf.a == compare_val]
-    got = gdf.a[gdf.a == compare_val]
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 3, 4],
-        [1.0, 2.0, 3.0, 4.0],
-        ["one", "two", "three", "four"],
-        pd.Series(["a", "b", "c", "d"], dtype="category"),
-        pd.Series(pd.date_range("2010-01-01", "2010-01-04")),
-    ],
-)
-@pytest.mark.parametrize(
-    "mask_vals",
-    [
-        [True, True, True, True],
-        [False, False, False, False],
-        [True, False, True, False],
-        [True, False, False, True],
-    ],
-)
-@pytest.mark.parametrize(
-    "mask_class", [list, np.array, pd.Series, cudf.Series]
-)
-@pytest.mark.parametrize("nulls", ["one", "some", "all", "none"])
-def test_series_apply_boolean_mask(data, mask_vals, mask_class, nulls):
-    rng = np.random.default_rng(seed=0)
-    psr = pd.Series(data)
-
-    if len(data) > 0:
-        if nulls == "one":
-            p = rng.integers(0, 4)
-            psr[p] = None
-        elif nulls == "some":
-            p1, p2 = rng.integers(0, 4, (2,))
-            psr[p1] = None
-            psr[p2] = None
-        elif nulls == "all":
-            psr[:] = None
-
-    gsr = cudf.from_pandas(psr)
-
-    # TODO: from_pandas(psr) has dtype "float64"
-    # when psr has dtype "object" and is all None
-    if psr.dtype == "object" and nulls == "all":
-        gsr = cudf.Series([None, None, None, None], dtype="object")
-
-    mask = mask_class(mask_vals)
-    if isinstance(mask, cudf.Series):
-        expect = psr[mask.to_pandas()]
-    else:
-        expect = psr[mask]
-    got = gsr[mask]
-
-    assert_eq(expect, got)
-
-
-def test_dataframe_apply_boolean_mask():
-    pdf = pd.DataFrame(
-        {
-            "a": [0, 1, 2, 3],
-            "b": [0.1, 0.2, None, 0.3],
-            "c": ["a", None, "b", "c"],
-        }
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    assert_eq(pdf[[True, False, True, False]], gdf[[True, False, True, False]])
-
-
-@pytest.mark.parametrize(
-    "mask_fn", [lambda x: x, lambda x: np.array(x), lambda x: pd.Series(x)]
-)
-def test_dataframe_boolean_mask(mask_fn):
-    mask_base = [
-        True,
-        False,
-        True,
-        False,
-        True,
-        False,
-        True,
-        False,
-        True,
-        False,
-    ]
-    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
-    gdf = cudf.from_pandas(pdf)
-    mask = mask_fn(mask_base)
-    assert len(mask) == gdf.shape[0]
-    pdf_masked = pdf[mask]
-    gdf_masked = gdf[mask]
-    assert pdf_masked.to_string().split() == gdf_masked.to_string().split()
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (0, 4),
-        (1, 4),
-        ([0, 1], 4),
-        ([0, 1], [4, 5]),
-        (slice(0, 2), [4, 5]),
-        (slice(1, None), [4, 5, 6, 7]),
-        ([], 1),
-        ([], []),
-        (slice(None, None), 1),
-        (slice(-1, -3), 7),
-    ],
-)
-@pytest.mark.parametrize("nulls", ["none", "some", "all"])
-def test_series_setitem_basics(key, value, nulls):
-    psr = pd.Series([1, 2, 3, 4, 5])
-    if nulls == "some":
-        psr[[0, 4]] = None
-    elif nulls == "all":
-        psr[:] = None
-    gsr = cudf.from_pandas(psr)
-    with expect_warning_if(
-        isinstance(value, list) and len(value) == 0 and nulls == "none"
-    ):
-        psr[key] = value
-    with expect_warning_if(
-        isinstance(value, list) and len(value) == 0 and not len(key) == 0
-    ):
-        gsr[key] = value
-    assert_eq(psr, gsr, check_dtype=False)
-
-
-def test_series_setitem_null():
-    gsr = cudf.Series([1, 2, 3, 4])
-    gsr[0] = None
-
-    expect = cudf.Series([None, 2, 3, 4])
-    got = gsr
-    assert_eq(expect, got)
-
-    gsr = cudf.Series([None, 2, 3, 4])
-    gsr[0] = 1
-
-    expect = cudf.Series([1, 2, 3, 4])
-    got = gsr
-    assert_eq(expect, got)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="warning not present in older pandas versions",
-)
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (0, 4),
-        (1, 4),
-        ([0, 1], 4),
-        ([0, 1], [4, 5]),
-        (slice(0, 2), [4, 5]),
-        (slice(1, None), [4, 5, 6, 7]),
-        ([], 1),
-        ([], []),
-        (slice(None, None), 1),
-        (slice(-1, -3), 7),
-    ],
-)
-@pytest.mark.parametrize("nulls", ["none", "some", "all"])
-def test_series_setitem_iloc(key, value, nulls):
-    psr = pd.Series([1, 2, 3, 4, 5])
-    if nulls == "some":
-        psr[[0, 4]] = None
-    elif nulls == "all":
-        psr[:] = None
-    gsr = cudf.from_pandas(psr)
-    with expect_warning_if(
-        isinstance(value, list) and len(value) == 0 and nulls == "none"
-    ):
-        psr.iloc[key] = value
-    with expect_warning_if(
-        isinstance(value, list) and len(value) == 0 and not len(key) == 0
-    ):
-        gsr.iloc[key] = value
-    assert_eq(psr, gsr, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (0, 0.5),
-        ([0, 1], 0.5),
-        ([0, 1], [0.5, 2.5]),
-        (slice(0, 2), [0.5, 0.25]),
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_series_setitem_dtype(key, value):
-    psr = pd.Series([1, 2, 3], dtype="int32")
-    gsr = cudf.from_pandas(psr)
-
-    with pytest.warns(FutureWarning):
-        psr[key] = value
-    with pytest.warns(FutureWarning):
-        gsr[key] = value
-
-    assert_eq(psr, gsr)
-
-
-def test_series_setitem_datetime():
-    psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]")
-    gsr = cudf.from_pandas(psr)
-
-    psr[0] = np.datetime64("2005")
-    gsr[0] = np.datetime64("2005")
-
-    assert_eq(psr, gsr)
-
-
-def test_series_setitem_datetime_coerced():
-    psr = pd.Series(["2001", "2002", "2003"], dtype="datetime64[ns]")
-    gsr = cudf.from_pandas(psr)
-
-    psr[0] = "2005"
-    gsr[0] = "2005"
-
-    assert_eq(psr, gsr)
-
-
-def test_series_setitem_categorical():
-    psr = pd.Series(["a", "b", "a", "c", "d"], dtype="category")
-    gsr = cudf.from_pandas(psr)
-
-    psr[0] = "d"
-    gsr[0] = "d"
-    assert_eq(psr, gsr)
-
-    psr = psr.cat.add_categories(["e"])
-    gsr = gsr.cat.add_categories(["e"])
-    psr[0] = "e"
-    gsr[0] = "e"
-    assert_eq(psr, gsr)
-
-    psr[[0, 1]] = "b"
-    gsr[[0, 1]] = "b"
-    assert_eq(psr, gsr)
-
-    psr[0:3] = "e"
-    gsr[0:3] = "e"
-    assert_eq(psr, gsr)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (0, "d"),
-        (0, "g"),
-        ([0, 1], "g"),
-        ([0, 1], None),
-        (slice(None, 2), "g"),
-        (slice(None, 2), ["g", None]),
-    ],
-)
-def test_series_setitem_string(key, value):
-    psr = pd.Series(["a", "b", "c", "d", "e"])
-    gsr = cudf.from_pandas(psr)
-    psr[key] = value
-    gsr[key] = value
-    assert_eq(psr, gsr)
-
-    psr = pd.Series(["a", None, "c", "d", "e"])
-    gsr = cudf.from_pandas(psr)
-    psr[key] = value
-    gsr[key] = value
-    assert_eq(psr, gsr)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        ("a", 4),
-        ("b", 4),
-        ("b", np.int8(8)),
-        ("d", 4),
-        ("d", np.int8(16)),
-        ("d", np.float32(16)),
-        (["a", "b"], 4),
-        (["a", "b"], [4, 5]),
-        ([True, False, True], 4),
-        ([False, False, False], 4),
-        ([True, False, True], [4, 5]),
-    ],
-)
-def test_series_setitem_loc(key, value):
-    psr = pd.Series([1, 2, 3], ["a", "b", "c"])
-    gsr = cudf.from_pandas(psr)
-    psr.loc[key] = value
-    gsr.loc[key] = value
-    assert_eq(psr, gsr)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (1, "d"),
-        (2, "e"),
-        (4, "f"),
-        ([1, 3], "g"),
-        ([1, 3], ["g", "h"]),
-        ([True, False, True], "i"),
-        ([False, False, False], "j"),
-        ([True, False, True], ["k", "l"]),
-    ],
-)
-def test_series_setitem_loc_numeric_index(key, value):
-    psr = pd.Series(["a", "b", "c"], [1, 2, 3])
-    gsr = cudf.from_pandas(psr)
-    psr.loc[key] = value
-    gsr.loc[key] = value
-    assert_eq(psr, gsr)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        ((0, 0), 5),
-        ((slice(None), 0), 5),
-        ((slice(None), 0), range(3)),
-        ((slice(None, -1), 0), range(2)),
-        (([0, 1], 0), 5),
-    ],
-)
-def test_dataframe_setitem_iloc(key, value, pdf_gdf):
-    pdf, gdf = pdf_gdf
-    pdf = pdf.copy()
-    gdf = gdf.copy()
-    pdf.iloc[key] = value
-    gdf.iloc[key] = value
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (("one", "a"), 5),
-        ((slice(None), "a"), 5),
-        ((slice(None), "a"), range(3)),
-        ((slice(None), "a"), [3, 2, 1]),
-        ((slice(None, "two"), "a"), range(2)),
-        ((slice(None, "two"), "a"), [4, 5]),
-        ((["one", "two"], "a"), 5),
-        (("one", "c"), 5),
-        ((["one", "two"], "c"), 5),
-        ((slice(None), "c"), 5),
-        ((slice(None), "c"), range(3)),
-        ((slice(None), "c"), [3, 2, 1]),
-        ((slice(None, "two"), "c"), range(2)),
-        ((slice(None, "two"), "c"), [4, 5]),
-    ],
-)
-def test_dataframe_setitem_loc(key, value, pdf_gdf):
-    pdf, gdf = pdf_gdf
-    pdf = pdf.copy()
-    gdf = gdf.copy()
-    pdf.loc[key] = value
-    gdf.loc[key] = value
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "key, value",
-    [
-        (("one", "a"), 5),
-        ((slice(None), "a"), range(3)),
-        ((slice(None), "a"), [3, 2, 1]),
-    ],
-)
-def test_dataframe_setitem_loc_empty_df(key, value):
-    pdf, gdf = pd.DataFrame(), cudf.DataFrame()
-    pdf.loc[key] = value
-    gdf.loc[key] = value
-    assert_eq(pdf, gdf, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "key,value",
-    [
-        ((0, 0), 5.0),
-        ((slice(None), 0), 5.0),
-        ((slice(None), 0), np.arange(7, dtype="float64")),
-    ],
-)
-def test_dataframe_setitem_iloc_multiindex(key, value, pdf_gdf_multi):
-    pdf, gdf = pdf_gdf_multi
-    pdf = pdf.copy()
-    gdf = gdf.copy()
-
-    pdf.iloc[key] = value
-    gdf.iloc[key] = value
-
-    assert_eq(pdf, gdf)
-
-
-def test_boolean_indexing_single_row(pdf_gdf):
-    pdf, gdf = pdf_gdf
-    assert_eq(
-        pdf.loc[[True, False, False], :], gdf.loc[[True, False, False], :]
-    )
-
-
-def test_iloc_negative_indices():
-    psr = pd.Series([1, 2, 3, 4, 5])
-    gsr = cudf.from_pandas(psr)
-    assert_eq(psr.iloc[[-1, -2, -4]], gsr.iloc[[-1, -2, -4]])
-
-
-def test_out_of_bounds_indexing():
-    psr = pd.Series([1, 2, 3])
-    gsr = cudf.from_pandas(psr)
-
-    assert_exceptions_equal(
-        lambda: psr[[0, 1, 9]],
-        lambda: gsr[[0, 1, 9]],
-    )
-    assert_exceptions_equal(
-        lambda: psr[[0, 1, -4]],
-        lambda: gsr[[0, 1, -4]],
-    )
-    assert_exceptions_equal(
-        lambda: psr.__setitem__([0, 1, 9], 2),
-        lambda: gsr.__setitem__([0, 1, 9], 2),
-    )
-    assert_exceptions_equal(
-        lambda: psr.__setitem__([0, 1, -4], 2),
-        lambda: gsr.__setitem__([0, 1, -4], 2),
-    )
-
-
-def test_out_of_bounds_indexing_empty():
-    psr = pd.Series(dtype="int64")
-    gsr = cudf.from_pandas(psr)
-    assert_exceptions_equal(
-        lambda: psr.iloc.__setitem__(-1, 2),
-        lambda: gsr.iloc.__setitem__(-1, 2),
-    )
-    assert_exceptions_equal(
-        lambda: psr.iloc.__setitem__(1, 2),
-        lambda: gsr.iloc.__setitem__(1, 2),
-    )
-
-
-def test_sliced_indexing():
-    a = list(range(4, 4 + 150))
-    b = list(range(0, 0 + 150))
-    pdf = pd.DataFrame({"a": a, "b": b})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    pdf = pdf.set_index("a")
-    gdf = gdf.set_index("a")
-    pidx = pdf.index[:75]
-    gidx = gdf.index[:75]
-
-    assert_eq(pdf.loc[pidx], gdf.loc[gidx])
-
-
-@pytest.mark.parametrize("index", [["a"], ["a", "a"], ["a", "a", "b", "c"]])
-def test_iloc_categorical_index(index):
-    gdf = cudf.DataFrame({"data": range(len(index))}, index=index)
-    gdf.index = gdf.index.astype("category")
-    pdf = gdf.to_pandas()
-    expect = pdf.iloc[:, 0]
-    got = gdf.iloc[:, 0]
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "sli",
-    [
-        slice("2001", "2002"),
-        slice("2002", "2001"),
-        slice("2001", None),
-    ],
-)
-@pytest.mark.parametrize("is_dataframe", [True, False])
-def test_loc_datetime_index(sli, is_dataframe):
-    sli = slice(pd.to_datetime(sli.start), pd.to_datetime(sli.stop))
-
-    if is_dataframe is True:
-        pd_data = pd.DataFrame(
-            {"a": [1, 2, 3]},
-            index=pd.Series(["2001", "2009", "2002"], dtype="datetime64[ns]"),
-        )
-    else:
-        pd_data = pd.Series(
-            [1, 2, 3],
-            pd.Series(["2001", "2009", "2002"], dtype="datetime64[ns]"),
-        )
-
-    gd_data = cudf.from_pandas(pd_data)
-    expect = pd_data.loc[sli]
-    got = gd_data.loc[sli]
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "sli",
-    [
-        slice("2001", "2020"),
-        slice(None, "2020"),
-    ],
-)
-def test_loc_datetime_index_slice_not_in(sli):
-    pd_data = pd.Series(
-        [1, 2, 3],
-        pd.Series(["2001", "2009", "2002"], dtype="datetime64[ns]"),
-    )
-    gd_data = cudf.from_pandas(pd_data)
-    with pytest.raises(KeyError):
-        assert_eq(pd_data.loc[sli], gd_data.loc[sli])
-
-    with pytest.raises(KeyError):
-        sli = slice(pd.to_datetime(sli.start), pd.to_datetime(sli.stop))
-        assert_eq(pd_data.loc[sli], gd_data.loc[sli])
-
-
-@pytest.mark.parametrize(
-    "gdf_kwargs",
-    [
-        {"data": {"a": range(1000)}},
-        {"data": {"a": range(1000), "b": range(1000)}},
-        {
-            "data": {
-                "a": range(20),
-                "b": range(20),
-                "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
-            }
-        },
-        {"index": [1, 2, 3]},
-        {"index": range(1000)},
-        {"columns": ["a", "b", "c", "d"]},
-        {"columns": ["a"], "index": range(1000)},
-        {"columns": ["a", "col2", "...col n"], "index": range(1000)},
-        {"index": cudf.Series(range(1000)).astype("str")},
-        {
-            "columns": ["a", "b", "c", "d"],
-            "index": cudf.Series(range(1000)).astype("str"),
-        },
-    ],
-)
-@pytest.mark.parametrize(
-    "slice",
-    [
-        slice(6, None),  # start but no stop, [6:]
-        slice(None, None, 3),  # only step, [::3]
-        slice(1, 10, 2),  # start, stop, step
-        slice(3, -5, 2),  # negative stop
-        slice(-2, -4),  # slice is empty
-        slice(-10, -20, -1),  # reversed slice
-        slice(None),  # slices everything, same as [:]
-        slice(250, 500),
-        slice(250, 251),
-        slice(50),
-        slice(1, 10),
-        slice(10, 20),
-        slice(15, 24),
-        slice(6),
-    ],
-)
-def test_dataframe_sliced(gdf_kwargs, slice):
-    gdf = cudf.DataFrame(**gdf_kwargs)
-    pdf = gdf.to_pandas()
-
-    actual = gdf[slice]
-    expected = pdf[slice]
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "gdf",
-    [
-        lambda: cudf.DataFrame({"a": range(10000)}),
-        lambda: cudf.DataFrame(
-            {
-                "a": range(10000),
-                "b": range(10000),
-                "c": range(10000),
-                "d": range(10000),
-                "e": range(10000),
-                "f": range(10000),
-            }
-        ),
-        lambda: cudf.DataFrame({"a": range(20), "b": range(20)}),
-        lambda: cudf.DataFrame(
-            {
-                "a": range(20),
-                "b": range(20),
-                "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
-            }
-        ),
-        lambda: cudf.DataFrame(index=[1, 2, 3]),
-        lambda: cudf.DataFrame(index=range(10000)),
-        lambda: cudf.DataFrame(columns=["a", "b", "c", "d"]),
-        lambda: cudf.DataFrame(columns=["a"], index=range(10000)),
-        lambda: cudf.DataFrame(
-            columns=["a", "col2", "...col n"], index=range(10000)
-        ),
-        lambda: cudf.DataFrame(index=cudf.Series(range(10000)).astype("str")),
-        lambda: cudf.DataFrame(
-            columns=["a", "b", "c", "d"],
-            index=cudf.Series(range(10000)).astype("str"),
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "slice",
-    [slice(6), slice(1), slice(7), slice(1, 3)],
-)
-def test_dataframe_iloc_index(gdf, slice):
-    gdf = gdf()
-    pdf = gdf.to_pandas()
-
-    actual = gdf.iloc[:, slice]
-    expected = pdf.iloc[:, slice]
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [[0], [1], [2]],
-        [[0, 1], [2, 3], [4, 5]],
-        [[[0, 1], [2]], [[3, 4]], [[5, 6]]],
-        [None, [[0, 1], [2]], [[3, 4], [5, 6]]],
-        [[], [[0, 1], [2]], [[3, 4], [5, 6]]],
-        [[], [["a", "b"], None], [["c", "d"], []]],
-    ],
-)
-@pytest.mark.parametrize(
-    "key", [[], [0], [0, 1], [0, 1, 0], slice(None), slice(0, 2), slice(1, 3)]
-)
-def test_iloc_with_lists(data, key):
-    psr = pd.Series(data)
-    gsr = cudf.Series(data)
-    assert_eq(psr.iloc[key], gsr.iloc[key])
-
-    pdf = pd.DataFrame({"a": data, "b": data})
-    gdf = cudf.DataFrame({"a": data, "b": data})
-    assert_eq(pdf.iloc[key], gdf.iloc[key])
-
-
-@pytest.mark.parametrize("key", [5, -10, "0", "a", np.array(5), np.array("a")])
-def test_loc_bad_key_type(key):
-    psr = pd.Series([1, 2, 3])
-    gsr = cudf.from_pandas(psr)
-    assert_exceptions_equal(lambda: psr[key], lambda: gsr[key])
-    assert_exceptions_equal(lambda: psr.loc[key], lambda: gsr.loc[key])
-
-
-@pytest.mark.parametrize("key", ["b", 1.0, np.array("b")])
-def test_loc_bad_key_type_string_index(key):
-    psr = pd.Series([1, 2, 3], index=["a", "1", "c"])
-    gsr = cudf.from_pandas(psr)
-    assert_exceptions_equal(lambda: psr[key], lambda: gsr[key])
-    assert_exceptions_equal(lambda: psr.loc[key], lambda: gsr.loc[key])
-
-
-def test_loc_zero_dim_array():
-    psr = pd.Series([1, 2, 3])
-    gsr = cudf.from_pandas(psr)
-
-    assert_eq(psr[np.array(0)], gsr[np.array(0)])
-    assert_eq(psr[np.array([0])[0]], gsr[np.array([0])[0]])
-
-
-@pytest.mark.parametrize(
-    "arg",
-    [
-        slice(None),
-        slice((1, 2), None),
-        slice(None, (1, 2)),
-        (1, 1),
-        pytest.param(
-            (1, slice(None)),
-            marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/46704"
-            ),
-        ),
-        1,
-        2,
-    ],
-)
-def test_loc_series_multiindex(arg):
-    gsr = cudf.DataFrame(
-        {"a": [1, 1, 2], "b": [1, 2, 3], "c": ["a", "b", "c"]}
-    ).set_index(["a", "b"])["c"]
-    psr = gsr.to_pandas()
-    assert_eq(psr.loc[arg], gsr.loc[arg])
-
-
-@pytest.mark.parametrize(
-    "arg",
-    [
-        slice(None, None, -1),
-        slice(None, -1, -1),
-        slice(4, -1, -1),
-        slice(None, None, -3),
-        slice(None, -1, -3),
-        slice(4, -1, -3),
-    ],
-)
-@pytest.mark.parametrize(
-    "pobj", [pd.DataFrame({"a": [1, 2, 3, 4, 5]}), pd.Series([1, 2, 3, 4, 5])]
-)
-def test_iloc_before_zero_terminate(arg, pobj):
-    gobj = cudf.from_pandas(pobj)
-
-    assert_eq(pobj.iloc[arg], gobj.iloc[arg])
-
-
-def test_iloc_decimal():
-    sr = cudf.Series(["1.00", "2.00", "3.00", "4.00"]).astype(
-        cudf.Decimal64Dtype(scale=2, precision=3)
-    )
-    got = sr.iloc[[3, 2, 1, 0]]
-    expect = cudf.Series(
-        ["4.00", "3.00", "2.00", "1.00"],
-    ).astype(cudf.Decimal64Dtype(scale=2, precision=3))
-    assert_eq(expect.reset_index(drop=True), got.reset_index(drop=True))
-
-
-@pytest.mark.parametrize(
-    ("key, value"),
-    [
-        (
-            ([0], ["x", "y"]),
-            [10, 20],
-        ),
-        (
-            ([0, 2], ["x", "y"]),
-            [[10, 30], [20, 40]],
-        ),
-        (
-            (0, ["x", "y"]),
-            [10, 20],
-        ),
-        (
-            ([0, 2], "x"),
-            [10, 20],
-        ),
-    ],
-)
-def test_dataframe_loc_inplace_update(key, value):
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    pdf = gdf.to_pandas()
-
-    actual = gdf.loc[key] = value
-    expected = pdf.loc[key] = value
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_loc_inplace_update_string_index():
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=list("abc"))
-    pdf = gdf.to_pandas()
-
-    actual = gdf.loc[["a"], ["x", "y"]] = [10, 20]
-    expected = pdf.loc[["a"], ["x", "y"]] = [10, 20]
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    ("key, value"),
-    [
-        ([0], [10, 20]),
-        ([0, 2], [[10, 30], [20, 40]]),
-        (([0, 2], [0, 1]), [[10, 30], [20, 40]]),
-        (([0, 2], 0), [10, 30]),
-        ((0, [0, 1]), [20, 40]),
-    ],
-)
-def test_dataframe_iloc_inplace_update(key, value):
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    pdf = gdf.to_pandas()
-
-    actual = gdf.iloc[key] = value
-    expected = pdf.iloc[key] = value
-
-    assert_eq(expected, actual)
-
-
-def test_dataframe_loc_iloc_inplace_update_with_RHS_dataframe():
-    loc_key = ([0, 2], ["x", "y"])
-    iloc_key = [0, 2]
-    data = {"x": [10, 20], "y": [30, 40]}
-    index = [0, 2]
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    pdf = gdf.to_pandas()
-
-    actual = gdf.loc[loc_key] = cudf.DataFrame(data, index=cudf.Index(index))
-    expected = pdf.loc[loc_key] = pd.DataFrame(data, index=pd.Index(index))
-    assert_eq(expected, actual)
-
-    actual = gdf.iloc[iloc_key] = cudf.DataFrame(data, index=cudf.Index(index))
-    expected = pdf.iloc[iloc_key] = pd.DataFrame(data, index=pd.Index(index))
-    assert_eq(expected, actual)
-
-
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="No warning in older versions of pandas",
-)
-def test_dataframe_loc_inplace_update_with_invalid_RHS_df_columns():
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    pdf = gdf.to_pandas()
-
-    actual = gdf.loc[[0, 2], ["x", "y"]] = cudf.DataFrame(
-        {"b": [10, 20], "y": [30, 40]}, index=cudf.Index([0, 2])
-    )
-    with pytest.warns(FutureWarning):
-        # Seems to be a false warning from pandas,
-        # but nevertheless catching it.
-        expected = pdf.loc[[0, 2], ["x", "y"]] = pd.DataFrame(
-            {"b": [10, 20], "y": [30, 40]}, index=pd.Index([0, 2])
-        )
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    ("key, value"),
-    [
-        (([0, 2], ["x", "y"]), [[10, 30, 50], [20, 40, 60]]),
-        (([0], ["x", "y"]), [[10], [20]]),
-    ],
-)
-def test_dataframe_loc_inplace_update_shape_mismatch(key, value):
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    with pytest.raises(ValueError, match="shape mismatch:"):
-        gdf.loc[key] = value
-
-
-@pytest.mark.parametrize(
-    ("key, value"),
-    [
-        ([0, 2], [[10, 30, 50], [20, 40, 60]]),
-        ([0], [[10], [20]]),
-    ],
-)
-def test_dataframe_iloc_inplace_update_shape_mismatch(key, value):
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    with pytest.raises(ValueError, match="shape mismatch:"):
-        gdf.iloc[key] = value
-
-
-def test_dataframe_loc_inplace_update_shape_mismatch_RHS_df():
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    with pytest.raises(ValueError, match="shape mismatch:"):
-        gdf.loc[([0, 2], ["x", "y"])] = cudf.DataFrame(
-            {"x": [10, 20]}, index=cudf.Index([0, 2])
-        )
-
-
-def test_dataframe_iloc_inplace_update_shape_mismatch_RHS_df():
-    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
-    with pytest.raises(ValueError, match="shape mismatch:"):
-        gdf.iloc[[0, 2]] = cudf.DataFrame(
-            {"x": [10, 20]}, index=cudf.Index([0, 2])
-        )
-
-
-@pytest.mark.parametrize(
-    "end, second_dim, is_error",
-    [
-        (40, 2, False),
-        (50, 3, True),
-        (30, 1, False),
-    ],
-)
-@pytest.mark.parametrize("mod", [cupy, np])
-def test_dataframe_indexing_setitem_np_cp_array(
-    end, second_dim, is_error, mod
-):
-    array = mod.arange(20, end).reshape(-1, second_dim)
-    gdf = cudf.DataFrame({"a": range(10), "b": range(10)})
-    pdf = gdf.to_pandas()
-    if not is_error:
-        gdf.loc[:, ["a", "b"]] = array
-        pdf.loc[:, ["a", "b"]] = cupy.asnumpy(array)
-
-        assert_eq(gdf, pdf)
-    else:
-        assert_exceptions_equal(
-            lfunc=pdf.loc.__setitem__,
-            rfunc=gdf.loc.__setitem__,
-            lfunc_args_and_kwargs=(
-                [(slice(None, None, None), ["a", "b"]), cupy.asnumpy(array)],
-                {},
-            ),
-            rfunc_args_and_kwargs=(
-                [(slice(None, None, None), ["a", "b"]), array],
-                {},
-            ),
-        )
-
-
-def test_iloc_single_row_with_nullable_column():
-    # see https://github.com/rapidsai/cudf/issues/11349
-    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.4]})
-    df = cudf.from_pandas(pdf)
-
-    df.iloc[0]  # before the fix for #11349 this would segfault
-    assert_eq(pdf.iloc[0], df.iloc[0])
-
-
-def test_loc_single_row_from_slice():
-    # see https://github.com/rapidsai/cudf/issues/11930
-    pdf = pd.DataFrame({"a": [10, 20, 30], "b": [1, 2, 3]}).set_index("a")
-    df = cudf.from_pandas(pdf)
-    assert_eq(pdf.loc[5:10], df.loc[5:10])
-
-
-@pytest.mark.parametrize("indexer", ["loc", "iloc"])
-@pytest.mark.parametrize(
-    "mask",
-    [[False, True], [False, False, True, True, True]],
-    ids=["too-short", "too-long"],
-)
-def test_boolean_mask_wrong_length(indexer, mask):
-    s = pd.Series([1, 2, 3, 4])
-
-    indexee = getattr(s, indexer)
-    with pytest.raises(IndexError):
-        indexee[mask]
-
-    c = cudf.from_pandas(s)
-    indexee = getattr(c, indexer)
-    with pytest.raises(IndexError):
-        indexee[mask]
-
-
-@pytest.mark.parametrize("indexer", ["loc", "iloc"])
-def test_boolean_mask_columns(indexer):
-    df = pd.DataFrame(np.zeros((3, 3)))
-    cdf = cudf.from_pandas(df)
-    mask = [True, False, True]
-    expect = getattr(df, indexer)[:, mask]
-    got = getattr(cdf, indexer)[:, mask]
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("indexer", ["loc", "iloc"])
-@pytest.mark.parametrize(
-    "mask",
-    [[False, True], [False, False, True, True, True]],
-    ids=["too-short", "too-long"],
-)
-def test_boolean_mask_columns_wrong_length(indexer, mask):
-    df = pd.DataFrame(np.zeros((3, 3)))
-    cdf = cudf.from_pandas(df)
-
-    with pytest.raises(IndexError):
-        getattr(df, indexer)[:, mask]
-    with pytest.raises(IndexError):
-        getattr(cdf, indexer)[:, mask]
-
-
-def test_boolean_mask_columns_iloc_series():
-    df = pd.DataFrame(np.zeros((3, 3)))
-    cdf = cudf.from_pandas(df)
-
-    mask = pd.Series([True, False, True], dtype=bool)
-    with pytest.raises(NotImplementedError):
-        df.iloc[:, mask]
-
-    with pytest.raises(NotImplementedError):
-        cdf.iloc[:, mask]
-
-
-@pytest.mark.parametrize("index_type", ["single", "slice"])
-def test_loc_timestamp_issue_8585(index_type):
-    rng = np.random.default_rng(seed=0)
-    # https://github.com/rapidsai/cudf/issues/8585
-    start = pd.Timestamp(
-        datetime.strptime("2021-03-12 00:00", "%Y-%m-%d %H:%M")
-    )
-    end = pd.Timestamp(datetime.strptime("2021-03-12 11:00", "%Y-%m-%d %H:%M"))
-    timestamps = pd.date_range(start, end, periods=12)
-    value = rng.normal(size=12)
-    df = pd.DataFrame(value, index=timestamps, columns=["value"])
-    cdf = cudf.from_pandas(df)
-    if index_type == "single":
-        index = pd.Timestamp(
-            datetime.strptime("2021-03-12 03:00", "%Y-%m-%d %H:%M")
-        )
-    elif index_type == "slice":
-        index = slice(start, end, None)
-    else:
-        raise ValueError("Invalid index type")
-    expect = df.loc[index]
-    actual = cdf.loc[index]
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "index_type",
-    [
-        "single",
-        pytest.param(
-            "slice",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/8585"
-            ),
-        ),
-        pytest.param(
-            "date_range",
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/8585"
-            ),
-        ),
-    ],
-)
-def test_loc_multiindex_timestamp_issue_8585(index_type):
-    rng = np.random.default_rng(seed=0)
-    # https://github.com/rapidsai/cudf/issues/8585
-    start = pd.Timestamp(
-        datetime.strptime("2021-03-12 00:00", "%Y-%m-%d %H:%M")
-    )
-    end = pd.Timestamp(datetime.strptime("2021-03-12 03:00", "%Y-%m-%d %H:%M"))
-    timestamps = pd.date_range(start, end, periods=4)
-    labels = ["A", "B", "C"]
-    index = pd.MultiIndex.from_product(
-        [timestamps, labels], names=["timestamp", "label"]
-    )
-    value = rng.normal(size=12)
-    df = pd.DataFrame(value, index=index, columns=["value"])
-    cdf = cudf.from_pandas(df)
-    start = pd.Timestamp(
-        datetime.strptime("2021-03-12 01:00", "%Y-%m-%d %H:%M")
-    )
-    end = pd.Timestamp(datetime.strptime("2021-03-12 02:00", "%Y-%m-%d %H:%M"))
-    if index_type == "single":
-        index = pd.Timestamp(
-            datetime.strptime("2021-03-12 03:00", "%Y-%m-%d %H:%M")
-        )
-    elif index_type == "slice":
-        index = slice(start, end, None)
-    elif index_type == "date_range":
-        index = pd.date_range(start, end, periods=2)
-    else:
-        raise ValueError("Invalid index type")
-    expect = df.loc[index]
-    actual = cdf.loc[index]
-    assert_eq(expect, actual)
-
-
-def test_loc_repeated_index_label_issue_8693():
-    # https://github.com/rapidsai/cudf/issues/8693
-    s = pd.Series([1, 2, 3, 4], index=[0, 1, 1, 2])
-    cs = cudf.from_pandas(s)
-    expect = s.loc[1]
-    actual = cs.loc[1]
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "indexer", [(..., 0), (0, ...)], ids=["row_ellipsis", "column_ellipsis"]
-)
-def test_loc_ellipsis_as_slice_issue_13268(indexer):
-    # https://github.com/rapidsai/cudf/issues/13268
-    df = pd.DataFrame(np.arange(4).reshape(2, 2))
-    cdf = cudf.from_pandas(df)
-
-    expect = df.loc[indexer]
-    actual = cdf.loc[indexer]
-    assert_eq(expect, actual)
-
-
-@pytest.mark.xfail(
-    reason="https://github.com/rapidsai/cudf/issues/13269 "
-    "and https://github.com/rapidsai/cudf/issues/13273"
-)
-def test_loc_repeated_column_label_issue_13269():
-    # https://github.com/rapidsai/cudf/issues/13269
-    # https://github.com/rapidsai/cudf/issues/13273
-    df = pd.DataFrame(np.arange(4).reshape(2, 2))
-    cdf = cudf.from_pandas(df)
-
-    expect = df.loc[:, [0, 1, 0]]
-    actual = cdf.loc[:, [0, 1, 0]]
-    assert_eq(expect, actual)
-
-
-def test_loc_column_boolean_mask_issue_13270():
-    # https://github.com/rapidsai/cudf/issues/13270
-    df = pd.DataFrame(np.arange(4).reshape(2, 2))
-    cdf = cudf.from_pandas(df)
-    expect = df.loc[:, [True, True]]
-    actual = cdf.loc[:, [True, True]]
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize("indexer", [[1], [0, 2]])
-def test_iloc_integer_categorical_issue_13013(indexer):
-    # https://github.com/rapidsai/cudf/issues/13013
-    s = pd.Series([0, 1, 2])
-    index = pd.Categorical(indexer)
-    expect = s.iloc[index]
-    c = cudf.from_pandas(s)
-    actual = c.iloc[index]
-    assert_eq(expect, actual)
-
-
-def test_iloc_incorrect_boolean_mask_length_issue_13015():
-    # https://github.com/rapidsai/cudf/issues/13015
-    s = pd.Series([0, 1, 2])
-    with pytest.raises(IndexError):
-        s.iloc[[True, False]]
-    c = cudf.from_pandas(s)
-    with pytest.raises(IndexError):
-        c.iloc[[True, False]]
-
-
-def test_iloc_column_boolean_mask_issue_13265():
-    # https://github.com/rapidsai/cudf/issues/13265
-    df = pd.DataFrame(np.arange(4).reshape(2, 2))
-    cdf = cudf.from_pandas(df)
-    expect = df.iloc[:, [True, True]]
-    actual = cdf.iloc[:, [True, True]]
-    assert_eq(expect, actual)
-
-
-def test_iloc_repeated_column_label_issue_13266():
-    # https://github.com/rapidsai/cudf/issues/13266
-    # https://github.com/rapidsai/cudf/issues/13273
-    df = pd.DataFrame(np.arange(4).reshape(2, 2))
-    cdf = cudf.from_pandas(df)
-
-    with pytest.raises(NotImplementedError):
-        cdf.iloc[:, [0, 1, 0]]
-
-
-@pytest.mark.parametrize(
-    "indexer",
-    [
-        (..., 0),
-        (0, ...),
-    ],
-    ids=["row_ellipsis", "column_ellipsis"],
-)
-def test_iloc_ellipsis_as_slice_issue_13267(indexer):
-    # https://github.com/rapidsai/cudf/issues/13267
-    df = pd.DataFrame(np.arange(4).reshape(2, 2))
-    cdf = cudf.from_pandas(df)
-
-    expect = df.iloc[indexer]
-    actual = cdf.iloc[indexer]
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "indexer",
-    [
-        0,
-        (slice(None), 0),
-        ([0, 2], 1),
-        (slice(None), slice(None)),
-        (slice(None), [1, 0]),
-        (0, 0),
-        (1, [1, 0]),
-        ([1, 0], 0),
-        ([1, 2], [0, 1]),
-    ],
-)
-def test_iloc_multiindex_lookup_as_label_issue_13515(indexer):
-    # https://github.com/rapidsai/cudf/issues/13515
-    df = pd.DataFrame(
-        {"a": [1, 1, 3], "b": [2, 3, 4], "c": [1, 6, 7], "d": [1, 8, 9]}
-    ).set_index(["a", "b"])
-    cdf = cudf.from_pandas(df)
-
-    expect = df.iloc[indexer]
-    actual = cdf.iloc[indexer]
-    assert_eq(expect, actual)
-
-
-def test_loc_unsorted_index_slice_lookup_keyerror_issue_12833():
-    # https://github.com/rapidsai/cudf/issues/12833
-    df = pd.DataFrame({"a": [1, 2, 3]}, index=[7, 0, 4])
-    cdf = cudf.from_pandas(df)
-
-    # Check that pandas don't change their mind
-    with pytest.raises(KeyError):
-        df.loc[1:5]
-
-    with pytest.raises(KeyError):
-        cdf.loc[1:5]
-
-
-@pytest.mark.parametrize("index", [range(5), list(range(5))])
-def test_loc_missing_label_keyerror_issue_13379(index):
-    # https://github.com/rapidsai/cudf/issues/13379
-    df = pd.DataFrame({"a": index}, index=index)
-    cdf = cudf.from_pandas(df)
-    # Check that pandas don't change their mind
-    with pytest.raises(KeyError):
-        df.loc[[0, 5]]
-
-    with pytest.raises(KeyError):
-        cdf.loc[[0, 5]]
-
-
-@pytest.mark.parametrize("series", [True, False], ids=["Series", "DataFrame"])
-def test_loc_repeated_label_ordering_issue_13658(series):
-    # https://github.com/rapidsai/cudf/issues/13658
-    values = range(2048)
-    index = [1 for _ in values]
-    if series:
-        frame = cudf.Series(values, index=index)
-    else:
-        frame = cudf.DataFrame({"a": values}, index=index)
-    expect = frame.to_pandas().loc[[1]]
-    actual = frame.loc[[1]]
-    assert_eq(actual, expect)
-
-
-@pytest.mark.parametrize("index", [None, [2, 1, 3, 5, 4]])
-def test_loc_bool_key_numeric_index_raises(index):
-    ser = cudf.Series(range(5), index=index)
-    with pytest.raises(KeyError):
-        ser.loc[True]
-
-
-class TestLocIndexWithOrder:
-    # https://github.com/rapidsai/cudf/issues/12833
-    @pytest.fixture(params=["increasing", "decreasing", "neither"])
-    def order(self, request):
-        return request.param
-
-    @pytest.fixture(params=[-1, 1], ids=["reverse", "forward"])
-    def take_order(self, request):
-        return request.param
-
-    @pytest.fixture(params=["float", "int", "string", "range"])
-    def dtype(self, request):
-        return request.param
-
-    @pytest.fixture
-    def index(self, order, dtype):
-        if dtype == "string":
-            index = ["a", "h", "f", "z"]
-        elif dtype == "int":
-            index = [-1, 10, 7, 14]
-        elif dtype == "float":
-            index = [-1.5, 7.10, 2.4, 11.2]
-        elif dtype == "range":
-            if order == "increasing":
-                return cudf.RangeIndex(2, 10, 3)
-            elif order == "decreasing":
-                return cudf.RangeIndex(10, 1, -3)
-            else:
-                return cudf.RangeIndex(10, 20, 3)
-        else:
-            raise ValueError(f"Unhandled index dtype {dtype}")
-        if order == "decreasing":
-            return sorted(index, reverse=True)
-        elif order == "increasing":
-            return sorted(index)
-        elif order == "neither":
-            return index
-        else:
-            raise ValueError(f"Unhandled index order {order}")
-
-    @pytest.fixture
-    def df(self, index):
-        return cudf.DataFrame({"a": range(len(index))}, index=index)
-
-    def test_loc_index_inindex_slice(self, df, take_order):
-        pdf = df.to_pandas()
-        lo = pdf.index[1]
-        hi = pdf.index[-2]
-        expect = pdf.loc[lo:hi:take_order]
-        actual = df.loc[lo:hi:take_order]
-        assert_eq(expect, actual)
-
-    def test_loc_index_inindex_subset(self, df, take_order):
-        pdf = df.to_pandas()
-        vals = [pdf.index[0], pdf.index[2]][::take_order]
-        expect = pdf.loc[vals]
-        actual = df.loc[vals]
-        assert_eq(expect, actual)
-
-    def test_loc_index_notinindex_slice(
-        self, request, df, order, dtype, take_order
-    ):
-        pdf = df.to_pandas()
-        lo = pdf.index[1]
-        hi = pdf.index[-2]
-        if isinstance(lo, str):
-            lo = chr(ord(lo) - 1)
-            hi = chr(ord(hi) + 1)
-        else:
-            lo -= 1
-            hi += 1
-        if order == "neither" and dtype != "range":
-            with pytest.raises(KeyError):
-                pdf.loc[lo:hi:take_order]
-            with pytest.raises(KeyError):
-                df.loc[lo:hi:take_order]
-        else:
-            expect = pdf.loc[lo:hi:take_order]
-            actual = df.loc[lo:hi:take_order]
-            assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize(
-    "arg",
-    [
-        (2, ("one", "second")),
-        (slice(None, None, None), ("two", "first")),
-        (1, ("one", "first")),
-        (slice(None, None, None), ("two", "second")),
-        (slice(None, None, None), ("two", "first", "three")),
-        (3, ("two", "first", "three")),
-        (slice(None, None, None), ("two",)),
-        (0, ("two",)),
-    ],
-)
-def test_loc_dataframe_column_multiindex(arg):
-    gdf = cudf.DataFrame(
-        [list("abcd"), list("efgh"), list("ijkl"), list("mnop")],
-        columns=cudf.MultiIndex.from_product(
-            [["one", "two"], ["first", "second"], ["three"]]
-        ),
-    )
-    pdf = gdf.to_pandas()
-
-    assert_eq(gdf.loc[arg], pdf.loc[arg])
-
-
-@pytest.mark.parametrize(
-    "arg", [slice(2, 4), slice(2, 5), slice(2.3, 5), slice(4.6, 6)]
-)
-def test_series_iloc_float_int(arg):
-    gs = cudf.Series(range(4), index=[2.0, 3.0, 4.5, 5.5])
-    ps = gs.to_pandas()
-
-    actual = gs.loc[arg]
-    expected = ps.loc[arg]
-
-    assert_eq(actual, expected)
-
-
-def test_iloc_loc_mixed_dtype():
-    df = cudf.DataFrame({"a": ["a", "b"], "b": [0, 1]})
-    with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(TypeError):
-            df.iloc[0]
-        with pytest.raises(TypeError):
-            df.loc[0]
-    df = df.astype("str")
-    pdf = df.to_pandas()
-
-    assert_eq(df.iloc[0], pdf.iloc[0])
-    assert_eq(df.loc[0], pdf.loc[0])
-
-
-def test_loc_setitem_categorical_integer_not_position_based():
-    gdf = cudf.DataFrame(range(3), index=cudf.CategoricalIndex([1, 2, 3]))
-    pdf = gdf.to_pandas()
-    gdf.loc[1] = 10
-    pdf.loc[1] = 10
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.parametrize("typ", ["datetime64[ns]", "timedelta64[ns]"])
-@pytest.mark.parametrize("idx_method, key", [["iloc", 0], ["loc", "a"]])
-def test_series_iloc_scalar_datetimelike_return_pd_scalar(
-    typ, idx_method, key
-):
-    obj = cudf.Series([1, 2, 3], index=list("abc"), dtype=typ)
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(obj, idx_method)[key]
-    expected = getattr(obj.to_pandas(), idx_method)[key]
-    assert result == expected
-
-
-@pytest.mark.parametrize("typ", ["datetime64[ns]", "timedelta64[ns]"])
-@pytest.mark.parametrize(
-    "idx_method, row_key, col_key", [["iloc", 0, 0], ["loc", "a", "a"]]
-)
-def test_dataframe_iloc_scalar_datetimelike_return_pd_scalar(
-    typ, idx_method, row_key, col_key
-):
-    obj = cudf.DataFrame(
-        [1, 2, 3], index=list("abc"), columns=["a"], dtype=typ
-    )
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(obj, idx_method)[row_key, col_key]
-    expected = getattr(obj.to_pandas(), idx_method)[row_key, col_key]
-    assert result == expected
-
-
-@pytest.mark.parametrize("idx_method, key", [["iloc", 0], ["loc", "a"]])
-def test_series_iloc_scalar_interval_return_pd_scalar(idx_method, key):
-    iidx = cudf.IntervalIndex.from_breaks([1, 2, 3])
-    obj = cudf.Series(iidx, index=list("ab"))
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(obj, idx_method)[key]
-    expected = getattr(obj.to_pandas(), idx_method)[key]
-    assert result == expected
-
-
-@pytest.mark.parametrize(
-    "idx_method, row_key, col_key", [["iloc", 0, 0], ["loc", "a", "a"]]
-)
-def test_dataframe_iloc_scalar_interval_return_pd_scalar(
-    idx_method, row_key, col_key
-):
-    iidx = cudf.IntervalIndex.from_breaks([1, 2, 3])
-    obj = cudf.DataFrame({"a": iidx}, index=list("ab"))
-    with cudf.option_context("mode.pandas_compatible", True):
-        result = getattr(obj, idx_method)[row_key, col_key]
-    expected = getattr(obj.to_pandas(), idx_method)[row_key, col_key]
-    assert result == expected
-
-
-def test_scalar_loc_row_categoricalindex():
-    df = cudf.DataFrame(
-        range(4), index=cudf.CategoricalIndex(["a", "a", "b", "c"])
-    )
-    result = df.loc["a"]
-    expected = df.to_pandas().loc["a"]
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("klass", [cudf.DataFrame, cudf.Series])
-@pytest.mark.parametrize("indexer", ["iloc", "loc"])
-def test_iloc_loc_no_circular_reference(klass, indexer):
-    obj = klass([0])
-    ref = weakref.ref(obj)
-    getattr(obj, indexer)[0]
-    del obj
-    assert ref() is None
-
-
-def test_loc_setitem_empty_dataframe():
-    pdf = pd.DataFrame(index=["index_1", "index_2", "index_3"])
-    gdf = cudf.from_pandas(pdf)
-    pdf.loc[["index_1"], "new_col"] = "A"
-    gdf.loc[["index_1"], "new_col"] = "A"
-
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [15, 14, 12, 10, 1],
-        [1, 10, 12, 14, 15],
-    ],
-)
-@pytest.mark.parametrize(
-    "scalar",
-    [
-        1,
-        10,
-        15,
-        14,
-        0,
-        2,
-    ],
-)
-def test_loc_datetime_monotonic_with_ts(data, scalar):
-    gdf = cudf.DataFrame(
-        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
-        index=cudf.Index(data, dtype="datetime64[ns]"),
-    )
-    pdf = gdf.to_pandas()
-
-    i = pd.Timestamp(scalar)
-
-    actual = gdf.loc[i:]
-    expected = pdf.loc[i:]
-
-    assert_eq(actual, expected)
-
-    actual = gdf.loc[:i]
-    expected = pdf.loc[:i]
-
-    assert_eq(actual, expected)
-
-
-@pytest.mark.parametrize("scalar", [1, 10, 15, 14, 0, 2])
-def test_loc_datetime_random_with_ts(scalar):
-    gdf = cudf.DataFrame(
-        {"a": [1, 1, 1, 2, 2], "b": [1, 2, 3, 4, 5]},
-        index=cudf.Index([15, 14, 3, 10, 1], dtype="datetime64[ns]"),
-    )
-    pdf = gdf.to_pandas()
-
-    i = pd.Timestamp(scalar)
-
-    if i not in pdf.index:
-        assert_exceptions_equal(
-            lambda: pdf.loc[i:],
-            lambda: gdf.loc[i:],
-            lfunc_args_and_kwargs=([],),
-            rfunc_args_and_kwargs=([],),
-        )
-        assert_exceptions_equal(
-            lambda: pdf.loc[:i],
-            lambda: gdf.loc[:i],
-            lfunc_args_and_kwargs=([],),
-            rfunc_args_and_kwargs=([],),
-        )
-    else:
-        actual = gdf.loc[i:]
-        expected = pdf.loc[i:]
-
-        assert_eq(actual, expected)
-
-        actual = gdf.loc[:i]
-        expected = pdf.loc[:i]
-
-        assert_eq(actual, expected)
-
-
-def test_sliced_categorical_as_ordered():
-    df = cudf.DataFrame({"a": list("caba"), "b": list(range(4))})
-    df["a"] = df["a"].astype("category")
-    df = df.iloc[:2]
-    result = df["a"].cat.as_ordered()
-    expected = cudf.Series(
-        ["c", "a"],
-        dtype=cudf.CategoricalDtype(list("abc"), ordered=True),
-        name="a",
-    )
-    assert_eq(result, expected)
-
-
-def test_duplicate_labels_raises():
-    df = cudf.DataFrame([[1, 2]], columns=["a", "b"])
-    with pytest.raises(ValueError):
-        df[["a", "a"]]
-    with pytest.raises(ValueError):
-        df.loc[:, ["a", "a"]]
-
-
-@pytest.mark.parametrize("indexer", ["iloc", "loc"])
-@pytest.mark.parametrize("dtype", ["category", "timedelta64[ns]"])
-def test_loc_iloc_setitem_col_slice_non_cupy_types(indexer, dtype):
-    df_pd = pd.DataFrame(range(2), dtype=dtype)
-    df_cudf = cudf.DataFrame.from_pandas(df_pd)
-    getattr(df_pd, indexer)[:, 0] = getattr(df_pd, indexer)[:, 0]
-    getattr(df_cudf, indexer)[:, 0] = getattr(df_cudf, indexer)[:, 0]
-    assert_eq(df_pd, df_cudf)
-
-
-@pytest.mark.parametrize("indexer", ["iloc", "loc"])
-@pytest.mark.parametrize(
-    "column_slice",
-    [
-        slice(None),
-        slice(0, 0),
-        slice(0, 1),
-        slice(1, 0),
-        slice(0, 2, 2),
-    ],
-)
-def test_slice_empty_columns(indexer, column_slice):
-    df_pd = pd.DataFrame(index=[0, 1, 2])
-    df_cudf = cudf.from_pandas(df_pd)
-    result = getattr(df_cudf, indexer)[:, column_slice]
-    expected = getattr(df_pd, indexer)[:, column_slice]
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("obj", [cudf.Series, cudf.Index])
-def test_iloc_columns_with_cudf_object(obj):
-    data = {"a": [1, 2], "c": [0, 2], "d": ["c", "a"]}
-    col_indexer = obj([0, 2])
-    result = cudf.DataFrame(data).iloc[:, col_indexer]
-    expected = pd.DataFrame(data).iloc[:, col_indexer.to_pandas()]
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("indexer", [[1], [0, 2]])
-def test_loc_integer_categorical_issue_13014(indexer):
-    # https://github.com/rapidsai/cudf/issues/13014
-    s = pd.Series([0, 1, 2])
-    index = pd.Categorical(indexer)
-    expect = s.loc[index]
-    c = cudf.from_pandas(s)
-    actual = c.loc[index]
-    assert_eq(expect, actual)
-
-
-@pytest.mark.parametrize("index_is_ordered", [False, True])
-@pytest.mark.parametrize("label_is_ordered", [False, True])
-def test_loc_categorical_ordering_mismatch_issue_13652(
-    index_is_ordered, label_is_ordered
-):
-    # https://github.com/rapidsai/cudf/issues/13652
-    s = cudf.Series(
-        [0, 2, 8, 4, 2],
-        index=cudf.CategoricalIndex(
-            [1, 2, 3, 4, 5],
-            categories=[1, 2, 3, 4, 5],
-            ordered=index_is_ordered,
-        ),
-    )
-    labels = cudf.CategoricalIndex(
-        [1, 4], categories=[1, 4], ordered=label_is_ordered
-    )
-    actual = s.loc[labels]
-    expect = s.to_pandas().loc[labels.to_pandas()]
-    assert_eq(actual, expect)
-
-
-def test_loc_categorical_no_integer_fallback_issue_13653():
-    # https://github.com/rapidsai/cudf/issues/13653
-    s = cudf.Series(
-        [1, 2], index=cudf.CategoricalIndex([3, 4], categories=[3, 4])
-    )
-    actual = s.loc[3]
-    expect = s.to_pandas().loc[3]
-    assert_eq(actual, expect)
-
-
-def test_loc_wrong_type_slice_datetimeindex():
-    ser_cudf = cudf.Series(
-        range(3), index=cudf.date_range("2020-01-01", periods=3, freq="D")
-    )
-    with pytest.raises(TypeError):
-        ser_cudf.loc[2:]
-
-    ser_pd = pd.Series(
-        range(3), index=pd.date_range("2020-01-01", periods=3, freq="D")
-    )
-    with pytest.raises(TypeError):
-        ser_pd.loc[2:]
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
deleted file mode 100644
index edb7fc3ec9b..00000000000
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ /dev/null
@@ -1,603 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
-
-import operator
-from contextlib import contextmanager
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf.core.column import as_column
-from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
-
-
-@contextmanager
-def expect_pandas_performance_warning(idx):
-    with expect_warning_if(
-        (not isinstance(idx[0], tuple) and len(idx) > 2)
-        or (isinstance(idx[0], tuple) and len(idx[0]) > 2),
-        pd.errors.PerformanceWarning,
-    ):
-        yield
-
-
-def test_multiindex_df_assignment():
-    pdf = pd.DataFrame({"x": [1, 2, 3]})
-    gdf = cudf.from_pandas(pdf)
-    pdf.index = pd.MultiIndex([["a", "b"], ["c", "d"]], [[0, 1, 0], [1, 0, 1]])
-    gdf.index = cudf.MultiIndex(
-        levels=[["a", "b"], ["c", "d"]], codes=[[0, 1, 0], [1, 0, 1]]
-    )
-    assert_eq(pdf, gdf)
-
-
-def test_multiindex_series_assignment():
-    ps = pd.Series([1, 2, 3])
-    gs = cudf.from_pandas(ps)
-    ps.index = pd.MultiIndex([["a", "b"], ["c", "d"]], [[0, 1, 0], [1, 0, 1]])
-    gs.index = cudf.MultiIndex(
-        levels=[["a", "b"], ["c", "d"]], codes=[[0, 1, 0], [1, 0, 1]]
-    )
-    assert_eq(ps, gs)
-
-
-def test_string_index():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.random(size=(5, 5)))
-    gdf = cudf.from_pandas(pdf)
-    stringIndex = ["a", "b", "c", "d", "e"]
-    pdf.index = stringIndex
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-    stringIndex = np.array(["a", "b", "c", "d", "e"])
-    pdf.index = stringIndex
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-    stringIndex = cudf.Index(["a", "b", "c", "d", "e"], name="name")
-    pdf.index = stringIndex.to_pandas()
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-    stringIndex = cudf.Index._from_column(
-        as_column(["a", "b", "c", "d", "e"]), name="name"
-    )
-    pdf.index = stringIndex.to_pandas()
-    gdf.index = stringIndex
-    assert_eq(pdf, gdf)
-
-
-def test_multiindex_row_shape():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.random(size=(0, 5)))
-    gdf = cudf.from_pandas(pdf)
-    pdfIndex = pd.MultiIndex([["a", "b", "c"]], [[0]])
-    pdfIndex.names = ["alpha"]
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-
-    assert_exceptions_equal(
-        lfunc=operator.setitem,
-        rfunc=operator.setitem,
-        lfunc_args_and_kwargs=([], {"a": pdf, "b": "index", "c": pdfIndex}),
-        rfunc_args_and_kwargs=([], {"a": gdf, "b": "index", "c": gdfIndex}),
-    )
-
-
-@pytest.fixture
-def pdf():
-    rng = np.random.default_rng(seed=0)
-    return pd.DataFrame(rng.random(size=(7, 5)))
-
-
-@pytest.fixture
-def gdf(pdf):
-    return cudf.from_pandas(pdf)
-
-
-@pytest.fixture
-def pdfIndex():
-    pdfIndex = pd.MultiIndex(
-        [
-            ["a", "b", "c"],
-            ["house", "store", "forest"],
-            ["clouds", "clear", "storm"],
-            ["fire", "smoke", "clear"],
-            [
-                np.datetime64("2001-01-01", "ns"),
-                np.datetime64("2002-01-01", "ns"),
-                np.datetime64("2003-01-01", "ns"),
-            ],
-        ],
-        [
-            [0, 0, 0, 0, 1, 1, 2],
-            [1, 1, 1, 1, 0, 0, 2],
-            [0, 0, 2, 2, 2, 0, 1],
-            [0, 0, 0, 1, 2, 0, 1],
-            [1, 0, 1, 2, 0, 0, 1],
-        ],
-    )
-    pdfIndex.names = ["alpha", "location", "weather", "sign", "timestamp"]
-    return pdfIndex
-
-
-@pytest.fixture
-def pdfIndexNulls():
-    pdfIndex = pd.MultiIndex(
-        [
-            ["a", "b", "c"],
-            ["house", "store", "forest"],
-            ["clouds", "clear", "storm"],
-        ],
-        [
-            [0, 0, 0, -1, 1, 1, 2],
-            [1, -1, 1, 1, 0, 0, -1],
-            [-1, 0, 2, 2, 2, 0, 1],
-        ],
-    )
-    pdfIndex.names = ["alpha", "location", "weather"]
-    return pdfIndex
-
-
-def test_from_pandas(pdf, pdfIndex):
-    pdf = pdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(pdf, gdf)
-
-
-def test_from_pandas_series():
-    pdf = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
-    ).set_index(["a", "b"])
-
-    result = cudf.from_pandas(pdf)
-    assert_eq(pdf, result)
-
-    test_pdf = pdf["c"]
-    result = cudf.from_pandas(test_pdf)
-    assert_eq(test_pdf, result)
-
-
-def test_series_multiindex(pdfIndex):
-    rng = np.random.default_rng(seed=0)
-    ps = pd.Series(rng.random(7))
-    gs = cudf.from_pandas(ps)
-    ps.index = pdfIndex
-    gs.index = cudf.from_pandas(pdfIndex)
-    assert_eq(ps, gs)
-
-
-@pytest.mark.parametrize(
-    "key_tuple",
-    [
-        # return 2 rows, 0 remaining keys = dataframe with entire index
-        ("a", "store", "clouds", "fire"),
-        (("a", "store", "clouds", "fire"), slice(None)),
-        # return 2 rows, 1 remaining key = dataframe with n-k index columns
-        ("a", "store", "storm"),
-        (("a", "store", "storm"), slice(None)),
-        # return 2 rows, 2 remaining keys = dataframe with n-k index columns
-        ("a", "store"),
-        (("a", "store"), slice(None)),
-        # return 2 rows, n-1 remaining keys = dataframe with n-k index columns
-        ("a",),
-        "a",
-        "b",
-        "c",
-        (("a",), slice(None)),
-        # return 1 row, 0 remaining keys = dataframe with entire index
-        ("a", "store", "storm", "smoke"),
-        (("a", "store", "storm", "smoke"), slice(None)),
-        # return 1 row and 1 remaining key = series
-        ("c", "forest", "clear"),
-        (("c", "forest", "clear"), slice(None)),
-    ],
-)
-def test_multiindex_loc(pdf, gdf, pdfIndex, key_tuple):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    # The index is unsorted, which makes things slow but is fine for testing.
-    with expect_pandas_performance_warning(key_tuple):
-        expected = pdf.loc[key_tuple]
-    got = gdf.loc[key_tuple].sort_index()
-    assert_eq(expected.sort_index(), got)
-
-    with cudf.option_context("mode.pandas_compatible", True):
-        got = gdf.loc[key_tuple]
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize("second_val", [[0, 1], [1, 0]])
-def test_multiindex_compatible_ordering(second_val):
-    indexer = (([1, 1], second_val), slice(None))
-    df = pd.DataFrame(
-        {"a": [1, 1, 2, 3], "b": [1, 0, 1, 1], "c": [1, 2, 3, 4]}
-    ).set_index(["a", "b"])
-    cdf = cudf.from_pandas(df)
-    expect = df.loc[indexer]
-    with cudf.option_context("mode.pandas_compatible", True):
-        actual = cdf.loc[indexer]
-    assert_eq(actual, expect)
-
-
-@pytest.mark.parametrize(
-    "arg",
-    [
-        slice(("a", "store"), ("b", "house")),
-        slice(None, ("b", "house")),
-        slice(("a", "store"), None),
-        slice(None),
-    ],
-)
-def test_multiindex_loc_slice(pdf, pdfIndex, arg):
-    gdf = cudf.from_pandas(pdf)
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(pdf.loc[arg], gdf.loc[arg])
-
-
-def test_multiindex_loc_errors(pdf, pdfIndex):
-    gdf = cudf.from_pandas(pdf)
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    gdf.index = gdfIndex
-
-    with pytest.raises(KeyError):
-        gdf.loc[("a", "store", "clouds", "foo")]
-    with pytest.raises(IndexError):
-        gdf.loc[
-            ("a", "store", "clouds", "fire", "x", "y")
-        ]  # too many indexers
-    with pytest.raises(IndexError):
-        gdf.loc[slice(None, ("a", "store", "clouds", "fire", "x", "y"))]
-
-
-def test_multiindex_loc_then_column(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    # The index is unsorted, which makes things slow but is fine for testing.
-    with pytest.warns(pd.errors.PerformanceWarning):
-        expected = pdf.loc[("a", "store", "clouds", "fire"), :][0]
-    got = gdf.loc[("a", "store", "clouds", "fire"), :][0]
-    assert_eq(expected, got)
-
-
-def test_multiindex_loc_rows_0(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-
-    assert_exceptions_equal(
-        lfunc=pdf.loc.__getitem__,
-        rfunc=gdf.loc.__getitem__,
-        lfunc_args_and_kwargs=([(("d",), slice(None, None, None))],),
-        rfunc_args_and_kwargs=([(("d",), slice(None, None, None))],),
-    )
-
-
-def test_multiindex_loc_rows_1_2_key(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(pdf.loc[("c", "forest"), :], gdf.loc[("c", "forest"), :])
-
-
-def test_multiindex_loc_rows_1_1_key(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(pdf.loc[("c",), :], gdf.loc[("c",), :])
-
-
-def test_multiindex_column_shape():
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(rng.random(size=(5, 0)))
-    gdf = cudf.from_pandas(pdf)
-    pdfIndex = pd.MultiIndex([["a", "b", "c"]], [[0]])
-    pdfIndex.names = ["alpha"]
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-
-    assert_exceptions_equal(
-        lfunc=operator.setitem,
-        rfunc=operator.setitem,
-        lfunc_args_and_kwargs=([], {"a": pdf, "b": "columns", "c": pdfIndex}),
-        rfunc_args_and_kwargs=([], {"a": gdf, "b": "columns", "c": gdfIndex}),
-    )
-
-
-@pytest.mark.parametrize(
-    "query",
-    [
-        ("a", "store", "clouds", "fire"),
-        ("a", "store", "storm", "smoke"),
-        ("a", "store"),
-        ("b", "house"),
-        ("a", "store", "storm"),
-        ("a",),
-        ("c", "forest", "clear"),
-    ],
-)
-def test_multiindex_columns(pdf, pdfIndex, query):
-    pdf = pdf.copy(deep=False)
-    pdf = pdf.T
-    gdf = cudf.from_pandas(pdf)
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-    pdf.columns = pdfIndex
-    gdf.columns = gdfIndex
-    # The index is unsorted, which makes things slow but is fine for testing.
-    with expect_pandas_performance_warning(query):
-        expected = pdf[query]
-    got = gdf[query]
-    assert_eq(expected, got)
-
-
-def test_multiindex_index_and_columns():
-    rng = np.random.default_rng(seed=0)
-    gdf = cudf.DataFrame(
-        {
-            "x": rng.integers(0, 5, 5),
-            "y": rng.integers(0, 5, 5),
-        }
-    )
-    pdf = gdf.to_pandas()
-    mi = cudf.MultiIndex(
-        levels=[[0, 1, 2], [3, 4]],
-        codes=[[0, 0, 1, 1, 2], [0, 1, 0, 1, 1]],
-        names=["x", "y"],
-    )
-    gdf.index = mi
-    mc = cudf.MultiIndex(
-        levels=[["val"], ["mean", "min"]], codes=[[0, 0], [0, 1]]
-    )
-    gdf.columns = mc
-    pdf.index = mi.to_pandas()
-    pdf.columns = mc.to_pandas()
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "iloc_rows",
-    [
-        0,
-        1,
-        slice(None, 0),
-        slice(None, 1),
-        slice(0, 1),
-        slice(1, 2),
-        slice(0, 2),
-        slice(0, None),
-        slice(1, None),
-    ],
-)
-@pytest.mark.parametrize(
-    "iloc_columns",
-    [
-        0,
-        1,
-        slice(None, 0),
-        slice(None, 1),
-        slice(0, 1),
-        slice(1, 2),
-        slice(0, 2),
-        slice(0, None),
-        slice(1, None),
-    ],
-)
-def test_multiindex_iloc(pdf, gdf, pdfIndex, iloc_rows, iloc_columns):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    presult = pdf.iloc[iloc_rows, iloc_columns]
-    gresult = gdf.iloc[iloc_rows, iloc_columns]
-    if isinstance(gresult, cudf.DataFrame):
-        assert_eq(
-            presult, gresult, check_index_type=False, check_column_type=False
-        )
-    else:
-        assert_eq(presult, gresult, check_index_type=False, check_dtype=False)
-
-
-def test_multiindex_iloc_scalar():
-    arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
-    tuples = list(zip(*arrays, strict=True))
-    idx = cudf.MultiIndex.from_tuples(tuples)
-    gdf = cudf.DataFrame(
-        {"first": cp.random.rand(4), "second": cp.random.rand(4)}
-    )
-    gdf.index = idx
-
-    pdf = gdf.to_pandas()
-    assert_eq(pdf.iloc[3], gdf.iloc[3])
-
-
-@pytest.mark.parametrize(
-    "iloc_rows",
-    [
-        0,
-        1,
-        slice(None, 0),
-        slice(None, 1),
-        slice(0, 1),
-        slice(1, 2),
-        slice(0, 2),
-        slice(0, None),
-        slice(1, None),
-    ],
-)
-@pytest.mark.parametrize(
-    "iloc_columns",
-    [
-        0,
-        1,
-        slice(None, 0),
-        slice(None, 1),
-        slice(0, 1),
-        slice(1, 2),
-        slice(0, 2),
-        slice(0, None),
-        slice(1, None),
-    ],
-)
-def test_multicolumn_iloc(pdf, gdf, pdfIndex, iloc_rows, iloc_columns):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    assert_eq(pdfIndex, gdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    pdf = pdf.T
-    gdf = gdf.T
-    presult = pdf.iloc[iloc_rows, iloc_columns]
-    gresult = gdf.iloc[iloc_rows, iloc_columns]
-    if hasattr(gresult, "name") and isinstance(gresult.name, tuple):
-        name = gresult.name[len(gresult.name) - 1]
-        if isinstance(name, str) and "cudf" in name:
-            gresult.name = name
-    if isinstance(presult, pd.DataFrame):
-        assert_eq(
-            presult, gresult, check_index_type=False, check_column_type=False
-        )
-    else:
-        assert_eq(presult, gresult, check_index_type=False, check_dtype=False)
-
-
-def test_multicolumn_item():
-    gdf = cudf.DataFrame(
-        {"x": np.arange(10), "y": np.arange(10), "z": np.arange(10)}
-    )
-    gdg = gdf.groupby(["x", "y"]).min()
-    gdgT = gdg.T
-    pdgT = gdgT.to_pandas()
-    assert_eq(gdgT[(0, 0)], pdgT[(0, 0)])
-
-
-def test_groupby_multiindex_columns_from_pandas(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    assert_eq(gdf, pdf)
-    assert_eq(gdf.T, pdf.T)
-
-
-def test_multiindex_rows_with_wildcard(pdf, gdf, pdfIndex):
-    gdfIndex = cudf.from_pandas(pdfIndex)
-    pdf = pdf.copy(deep=False)
-    gdf = gdf.copy(deep=False)
-    pdf.index = pdfIndex
-    gdf.index = gdfIndex
-    # The index is unsorted, which makes things slow but is fine for testing.
-    with pytest.warns(pd.errors.PerformanceWarning):
-        assert_eq(
-            pdf.loc[("a",), :].sort_index(), gdf.loc[("a",), :].sort_index()
-        )
-        assert_eq(
-            pdf.loc[(("a"), ("store")), :].sort_index(),
-            gdf.loc[(("a"), ("store")), :].sort_index(),
-        )
-        assert_eq(
-            pdf.loc[(("a"), ("store"), ("storm")), :].sort_index(),
-            gdf.loc[(("a"), ("store"), ("storm")), :].sort_index(),
-        )
-        assert_eq(
-            pdf.loc[(("a"), ("store"), ("storm"), ("smoke")), :].sort_index(),
-            gdf.loc[(("a"), ("store"), ("storm"), ("smoke")), :].sort_index(),
-        )
-        assert_eq(
-            pdf.loc[(slice(None), "store"), :].sort_index(),
-            gdf.loc[(slice(None), "store"), :].sort_index(),
-        )
-        assert_eq(
-            pdf.loc[(slice(None), slice(None), "storm"), :].sort_index(),
-            gdf.loc[(slice(None), slice(None), "storm"), :].sort_index(),
-        )
-        assert_eq(
-            pdf.loc[
-                (slice(None), slice(None), slice(None), "smoke"), :
-            ].sort_index(),
-            gdf.loc[
-                (slice(None), slice(None), slice(None), "smoke"), :
-            ].sort_index(),
-        )
-
-
-def test_multiindex_multicolumn_zero_row_slice():
-    gdf = cudf.DataFrame(
-        {"x": [1, 5, 3, 4, 1], "y": [1, 1, 2, 2, 5], "z": [1, 2, 3, 4, 5]}
-    )
-    pdf = gdf.to_pandas()
-    gdg = gdf.groupby(["x", "y"]).agg({"z": ["count"]}).iloc[:0]
-    pdg = pdf.groupby(["x", "y"]).agg({"z": ["count"]}).iloc[:0]
-    assert_eq(pdg, gdg, check_dtype=False)
-
-
-def test_multicolumn_loc(pdf, pdfIndex):
-    pdf = pdf.copy(deep=False)
-    pdf = pdf.T
-    pdf.columns = pdfIndex
-    gdf = cudf.from_pandas(pdf)
-    assert_eq(pdf.loc[:, "a"], gdf.loc[:, "a"])
-    assert_eq(pdf.loc[:, ("a", "store")], gdf.loc[:, ("a", "store")])
-    assert_eq(pdf.loc[:, "a":"b"], gdf.loc[:, "a":"b"])
-    assert_eq(pdf.loc[:, ["a", "b"]], gdf.loc[:, ["a", "b"]])
-
-
-@pytest.mark.xfail(
-    reason="https://github.com/pandas-dev/pandas/issues/43351",
-)
-def test_multicolumn_set_item(pdf, pdfIndex):
-    pdf = pdf.copy(deep=False)
-    pdf = pdf.T
-    pdf.columns = pdfIndex
-    gdf = cudf.from_pandas(pdf)
-    pdf["d"] = [1, 2, 3, 4, 5]
-    gdf["d"] = [1, 2, 3, 4, 5]
-    assert_eq(pdf, gdf)
-
-
-def test_multiindex_index_single_row():
-    arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
-    tuples = list(zip(*arrays, strict=True))
-    idx = cudf.MultiIndex.from_tuples(tuples)
-    gdf = cudf.DataFrame(
-        {"first": cp.random.rand(4), "second": cp.random.rand(4)}
-    )
-    gdf.index = idx
-    pdf = gdf.to_pandas()
-    assert_eq(pdf.loc[("b", 3)], gdf.loc[("b", 3)])
-
-
-@pytest.mark.parametrize("idx_get", [(0, 0), (0, 1), (1, 0), (1, 1)])
-@pytest.mark.parametrize("cols_get", [0, 1, [0, 1], [1, 0], [1], [0]])
-def test_multiindex_loc_scalar(idx_get, cols_get):
-    idx = cudf.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0), (1, 1)])
-    df = cudf.DataFrame({0: range(4), 1: range(10, 50, 10)}, index=idx)
-    pdf = df.to_pandas()
-
-    actual = df.loc[idx_get, cols_get]
-    expected = pdf.loc[idx_get, cols_get]
-
-    assert_eq(actual, expected)

From 10766a48851ac443b39c68b4cfcc196e981e104a Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 5 Sep 2025 14:49:54 -0500
Subject: [PATCH 267/366] Use branch-25.10 again (#19902)

Contributes to https://github.com/rapidsai/build-planning/issues/208

Now that https://github.com/rapidsai/shared-workflows/pull/413 is
merged, this converts all GitHub Actions references from `@cuda13.0`
back to `branch-25.10`.

## Notes for Reviewers

This is safe to admin-merge because the change is a no-op... configs on
those 2 branches are identical.
---
 .github/workflows/build.yaml                  | 28 +++++-----
 .github/workflows/pandas-tests.yaml           |  2 +-
 .github/workflows/pr.yaml                     | 56 +++++++++----------
 .../workflows/pr_issue_status_automation.yml  |  8 +--
 .github/workflows/test.yaml                   | 30 +++++-----
 .../trigger-breaking-change-alert.yaml        |  2 +-
 6 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 87cc789ac78..d9bb501c968 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -46,7 +46,7 @@ jobs:
   cpp-build:
     needs: [telemetry-setup]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
   python-build:
     needs: [telemetry-setup, cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -67,7 +67,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -77,7 +77,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -90,7 +90,7 @@ jobs:
   wheel-build-libcudf:
     needs: [telemetry-setup]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -105,7 +105,7 @@ jobs:
   wheel-publish-libcudf:
     needs: wheel-build-libcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -116,7 +116,7 @@ jobs:
   wheel-build-pylibcudf:
     needs: [telemetry-setup, wheel-build-libcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -128,7 +128,7 @@ jobs:
   wheel-publish-pylibcudf:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -139,7 +139,7 @@ jobs:
   wheel-build-cudf:
     needs: [telemetry-setup, wheel-build-pylibcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -151,7 +151,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -162,7 +162,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: [telemetry-setup, wheel-build-cudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -177,7 +177,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -188,7 +188,7 @@ jobs:
   wheel-build-cudf-polars:
     needs: [telemetry-setup, wheel-build-pylibcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -203,7 +203,7 @@ jobs:
   wheel-publish-cudf-polars:
     needs: wheel-build-cudf-polars
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index 7b4fed24910..2639e6d6efb 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -22,7 +22,7 @@ jobs:
   pandas-tests:
       # run the Pandas unit tests
       secrets: inherit
-      uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+      uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
       with:
         build_type: nightly
         branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index d46de789706..eebbadc3d27 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -43,7 +43,7 @@ jobs:
       - telemetry-setup
       - third-party-integration-tests-cudf-pandas
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
@@ -68,7 +68,7 @@ jobs:
   changed-files:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10
     with:
       files_yaml: |
         test_cpp:
@@ -130,14 +130,14 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: "telemetry-summarize spark-rapids-jni"
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
     with:
       build_type: pull-request
       node_type: "cpu16"
@@ -145,7 +145,7 @@ jobs:
   cpp-linters:
     secrets: inherit
     needs: checks
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: pull-request
       script: "ci/cpp_linters.sh"
@@ -153,13 +153,13 @@ jobs:
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.10
     with:
       build_type: pull-request
   conda-cpp-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
@@ -167,14 +167,14 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: ci/build_python.sh
   conda-python-cudf-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -183,7 +183,7 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -191,7 +191,7 @@ jobs:
   conda-java-tests:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java
     with:
       build_type: pull-request
@@ -202,7 +202,7 @@ jobs:
   conda-notebook-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
     with:
       build_type: pull-request
@@ -213,7 +213,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: pull-request
       node_type: "gpu-l4-latest-1"
@@ -223,7 +223,7 @@ jobs:
   wheel-build-libcudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # build for every combination of arch and CUDA version, but only for the latest Python
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
@@ -235,7 +235,7 @@ jobs:
   wheel-build-pylibcudf:
     needs: [checks, wheel-build-libcudf]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: "ci/build_wheel_pylibcudf.sh"
@@ -244,7 +244,7 @@ jobs:
   wheel-build-cudf:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: "ci/build_wheel_cudf.sh"
@@ -253,7 +253,7 @@ jobs:
   wheel-tests-cudf:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
@@ -261,7 +261,7 @@ jobs:
   wheel-build-cudf-polars:
     needs: wheel-build-pylibcudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -273,7 +273,7 @@ jobs:
   wheel-tests-cudf-polars:
     needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -283,7 +283,7 @@ jobs:
   cudf-polars-polars-tests:
     needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -293,7 +293,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -305,7 +305,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: [wheel-build-dask-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -315,7 +315,7 @@ jobs:
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10
     with:
       arch: '["amd64", "arm64"]'
       cuda: '["13.0"]'
@@ -333,7 +333,7 @@ jobs:
   unit-tests-cudf-pandas:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -343,7 +343,7 @@ jobs:
   third-party-integration-tests-cudf-pandas:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       build_type: pull-request
@@ -359,7 +359,7 @@ jobs:
     # run the Pandas unit tests using PR branch
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       build_type: pull-request
@@ -374,7 +374,7 @@ jobs:
   pandas-tests-diff:
     # diff the results of running the Pandas unit tests and publish a job summary
     needs: pandas-tests
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       node_type: "cpu4"
       build_type: pull-request
@@ -382,7 +382,7 @@ jobs:
   narwhals-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml
index e7e48eac44c..148d83e73d6 100644
--- a/.github/workflows/pr_issue_status_automation.yml
+++ b/.github/workflows/pr_issue_status_automation.yml
@@ -23,7 +23,7 @@ on:
 
 jobs:
     get-project-id:
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@cuda13.0
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-25.10
       if: github.event.pull_request.state == 'open'
       secrets: inherit
       permissions:
@@ -34,7 +34,7 @@ jobs:
 
     update-status:
       # This job sets the PR and its linked issues to "In Progress" status
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@cuda13.0
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.10
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
@@ -50,7 +50,7 @@ jobs:
 
     update-sprint:
       # This job sets the PR and its linked issues to the current "Weekly Sprint"
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@cuda13.0
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-25.10
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
@@ -79,7 +79,7 @@ jobs:
 
     update-release:
       # This job sets the PR and its linked issues to the release they are targeting
-      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@cuda13.0
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-25.10
       if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: [get-project-id, process-branch-name]
       with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 52e381dd7d8..340d68b805f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -24,7 +24,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -41,7 +41,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-memcheck-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -53,7 +53,7 @@ jobs:
       script: "ci/test_cpp_memcheck.sh"
   cpp-linters:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -63,7 +63,7 @@ jobs:
       file_to_upload: iwyu_results.txt
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -73,7 +73,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -82,7 +82,7 @@ jobs:
       script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -94,7 +94,7 @@ jobs:
       script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -106,7 +106,7 @@ jobs:
       script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -115,7 +115,7 @@ jobs:
       script: ci/test_wheel_cudf.sh
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -124,7 +124,7 @@ jobs:
       script: ci/test_wheel_dask_cudf.sh
   unit-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -133,7 +133,7 @@ jobs:
       script: ci/cudf_pandas_scripts/run_tests.sh
   third-party-integration-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -145,7 +145,7 @@ jobs:
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
   wheel-tests-cudf-polars:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -154,7 +154,7 @@ jobs:
       script: "ci/test_wheel_cudf_polars.sh"
   cudf-polars-polars-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
@@ -163,7 +163,7 @@ jobs:
       script: "ci/test_cudf_polars_polars_tests.sh"
   narwhals-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 72751d071bb..48bf37afc40 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@cuda13.0
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}

From 61eee9cb234443cf150ee433c5a1f7d4ceda689c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 5 Sep 2025 13:34:35 -0700
Subject: [PATCH 268/366] Pin to CUDA 12 image for integration tests (#19903)

## Description
<!-- Provide a standalone description of changes in this PR. -->
<!-- Reference any issues closed by this PR with "closes #1234". -->
<!-- Note: The pull request title will be included in the CHANGELOG. -->
Until https://github.com/dmlc/xgboost/issues/11648 and
https://github.com/rapidsai/xgboost-feedstock/issues/100 are resolved we
need to run the integration tests on CUDA 12 so that we can still test
xgboost.

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 .github/workflows/pr.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index eebbadc3d27..b6e4828216f 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -352,7 +352,8 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
       continue-on-error: true
-      container_image: "rapidsai/ci-conda:25.10-latest"
+      # TODO: Switch to ci-conda:25-10-latest when XGBoost has CUDA 13 packages
+      container_image: "rapidsai/ci-conda:25.10-cuda12.9.1-ubuntu24.04-py3.13"
       script: |
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
   pandas-tests:

From 0449827686520c5532e2422b1a1296b08b691244 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Sep 2025 13:42:24 -0700
Subject: [PATCH 269/366] Use cupy array instead of numba device array as
 inputs to jit routines (#19897)

xref https://github.com/rapidsai/cudf/pull/19855#discussion_r2323540719

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19897
---
 python/cudf/cudf/utils/applyutils.py | 17 ++++++-----------
 python/cudf/cudf/utils/queryutils.py | 13 +++++--------
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index fc88d6465a2..afee9ea17b5 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -146,21 +146,17 @@ def run(self, df, **launch_params):
         # Get input columns
         if isinstance(self.incols, dict):
             inputs = {
-                v: cuda.as_cuda_array(
-                    df[k]._column.data_array_view(mode="read")
-                )
+                v: df[k]._column.data_array_view(mode="read")
                 for (k, v) in self.incols.items()
             }
         else:
             inputs = {
-                k: cuda.as_cuda_array(
-                    df[k]._column.data_array_view(mode="read")
-                )
+                k: df[k]._column.data_array_view(mode="read")
                 for k in self.incols
             }
         # Allocate output columns
         outputs = {
-            k: cuda.device_array(len(df), dtype=np.dtype(dt))
+            k: cp.empty(len(df), dtype=np.dtype(dt))
             for k, dt in self.outcols.items()
         }
 
@@ -222,14 +218,13 @@ def launch_kernel(self, df, args, chunks, blkct=None, tpb=None):
                 self.kernel[blkct, tpb](len(df), chunks, *args)
 
     def normalize_chunks(self, size, chunks):
+        i64 = np.dtype(np.int64)
         if isinstance(chunks, int):
             # *chunks* is the chunksize
-            return cuda.as_cuda_array(
-                cp.arange(start=0, stop=size, step=chunks)
-            ).view("int64")
+            return cp.arange(start=0, stop=size, step=chunks, dtype=i64)
         else:
             # *chunks* is an array of chunk leading offset
-            return cuda.as_cuda_array(cp.asarray(chunks)).view("int64")
+            return cp.asarray(chunks).view(i64)
 
 
 def _make_row_wise_kernel(func, argnames, extras):
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index aa01e2f7c22..5d18b7e3fe9 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -5,12 +5,13 @@
 import datetime
 from typing import Any
 
+import cupy as cp
 import numpy as np
 from numba import cuda
 
 import cudf
 from cudf.core.buffer import acquire_spill_lock
-from cudf.core.column import column_empty
+from cudf.core.column import as_column
 from cudf.utils import applyutils
 from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.dtypes import (
@@ -223,10 +224,7 @@ def query_execute(df, expr, callenv):
             "query only supports numeric, datetime, timedelta, or bool dtypes."
         )
 
-    colarrays = [
-        cuda.as_cuda_array(col.data_array_view(mode="read"))
-        for col in colarrays
-    ]
+    colarrays = [col.data_array_view(mode="read") for col in colarrays]
 
     kernel = compiled["kernel"]
     # process env args
@@ -250,11 +248,10 @@ def query_execute(df, expr, callenv):
 
     # allocate output buffer
     nrows = len(df)
-    out = column_empty(nrows, dtype=np.dtype(np.bool_))
-    out = out.set_mask(None)
+    out = cp.empty(nrows, dtype=np.dtype(np.bool_))
     # run kernel
     args = [out, *colarrays, *envargs]
     with _CUDFNumbaConfig():
         kernel.forall(nrows)(*args)
     out_mask = applyutils.make_aggregate_nullmask(df, columns=columns)
-    return out.set_mask(out_mask).fillna(False)
+    return as_column(out).set_mask(out_mask).fillna(False)

From f52b66662510081555525916e18cd69caa852286 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Sep 2025 13:42:58 -0700
Subject: [PATCH 270/366] Remove deprecated `DataFrame.apply_rows`, deprecate
 `DataFrame.apply_chunks` and `Groupby.apply_grouped` (#19896)

Enforces deprecation in https://github.com/rapidsai/cudf/pull/19218

Also deprecates the similar `DataFrame.apply_chunks` and `Groupby.apply_grouped` (since it calls `apply_chunks`).

References to these method were completely removed in `guide-to-udfs.ipynb` to not further encourage usage of these deprecated methods

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19896
---
 .../source/user_guide/api_docs/dataframe.rst  |   1 -
 .../source/user_guide/guide-to-udfs.ipynb     | 990 ++----------------
 python/cudf/cudf/core/dataframe.py            |  98 +-
 python/cudf/cudf/core/groupby/groupby.py      |  12 +-
 .../dataframe/methods/test_apply_chunks.py    |  66 +-
 .../dataframe/methods/test_apply_rows.py      | 135 ---
 python/cudf/cudf/tests/groupby/test_apply.py  |  13 +-
 python/cudf/cudf/utils/applyutils.py          | 102 +-
 8 files changed, 163 insertions(+), 1254 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py

diff --git a/docs/cudf/source/user_guide/api_docs/dataframe.rst b/docs/cudf/source/user_guide/api_docs/dataframe.rst
index 6709f96ef6c..eab7da0a941 100644
--- a/docs/cudf/source/user_guide/api_docs/dataframe.rst
+++ b/docs/cudf/source/user_guide/api_docs/dataframe.rst
@@ -109,7 +109,6 @@ Function application, GroupBy & window
    DataFrame.apply
    DataFrame.applymap
    DataFrame.apply_chunks
-   DataFrame.apply_rows
    DataFrame.groupby
    DataFrame.map
    DataFrame.pipe
diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index 0b943709b48..f95e14996e7 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -636,11 +636,7 @@
    "source": [
     "## DataFrame UDFs\n",
     "\n",
-    "Like `cudf.Series`, there are multiple ways of using UDFs on dataframes, which essentially amount to UDFs that expect multiple columns as input:\n",
-    "\n",
-    "- `cudf.DataFrame.apply`, which functions like `pd.DataFrame.apply` and expects a row udf\n",
-    "- `cudf.DataFrame.apply_rows`, which is a thin wrapper around numba and expects a numba kernel\n",
-    "- `cudf.DataFrame.apply_chunks`, which is similar to `cudf.DataFrame.apply_rows` but offers lower level control."
+    "Like `cudf.Series`, `cudf.DataFrame.apply` is the primary way to use UDFs on a row of a dataframe."
    ]
   },
   {
@@ -1697,286 +1693,140 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9c587bd2",
-   "metadata": {},
-   "source": [
-    "### Numba kernels for DataFrames"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "adc6a459",
+   "id": "4bbefa67",
    "metadata": {},
    "source": [
-    "We could apply a UDF on a DataFrame like we did above with `forall`. We'd need to write a kernel that expects multiple inputs, and pass multiple Series as arguments when we execute our kernel. Because this is fairly common and can be difficult to manage, cuDF provides two APIs to streamline this: `apply_rows` and `apply_chunks`. Below, we walk through an example of using `apply_rows`. `apply_chunks` works in a similar way, but also offers more control over low-level kernel behavior."
+    "## Rolling Window UDFs\n",
+    "\n",
+    "For time-series data, we may need to operate on a small \\\"window\\\" of our column at a time, processing each portion independently. We could slide (\\\"roll\\\") this window over the entire column to answer questions like \\\"What is the 3-day moving average of a stock price over the past year?\"\n",
+    "\n",
+    "We can apply more complex functions to rolling windows to `rolling` Series and DataFrames using `apply`. This example is adapted from cuDF's [API documentation](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.DataFrame.rolling.html). First, we'll create an example Series and then create a `rolling` object from the Series."
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "b8ab745d-1369-4400-88c9-23b3bd8eed66",
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "6bc6aea3",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    16.0\n",
+       "1    25.0\n",
+       "2    36.0\n",
+       "3    49.0\n",
+       "4    64.0\n",
+       "5    81.0\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "#### **Note: `DataFrame.apply_rows` is now deprecated and will be removed in a future release.**"
+    "ser = cudf.Series([16, 25, 36, 49, 64, 81], dtype=\"float64\")\n",
+    "ser"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
-   "id": "37c93c69-22d1-430f-8f02-67eef27094f8",
+   "execution_count": 46,
+   "id": "a4c31df1",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "      <th>c</th>\n",
-       "      <th>e</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.691674</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Dan</td>\n",
-       "      <td>-0.958380</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.480099</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Bob</td>\n",
-       "      <td>-0.729580</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.473370</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Xavier</td>\n",
-       "      <td>-0.767454</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.067479</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Alice</td>\n",
-       "      <td>-0.380205</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.970850</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Sarah</td>\n",
-       "      <td>0.342905</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "          a      b       c         e\n",
-       "0 -0.691674   True     Dan -0.958380\n",
-       "1  0.480099  False     Bob -0.729580\n",
-       "2 -0.473370   True  Xavier -0.767454\n",
-       "3  0.067479   True   Alice -0.380205\n",
-       "4 -0.970850  False   Sarah  0.342905"
+       "Rolling [window=3,min_periods=3,center=False]"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = randomdata(\n",
-    "    nrows=10, dtypes={\"a\": float, \"b\": bool, \"c\": str, \"e\": float}, seed=12\n",
-    ")\n",
-    "df.head()"
+    "rolling = ser.rolling(window=3, min_periods=3, center=False)\n",
+    "rolling"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "caef6aac-7d58-41da-9e14-1eb2020f051d",
+   "id": "ff40d863",
    "metadata": {},
    "source": [
-    "Now that we have two numeric columns in our DataFrame, let's write a kernel that uses both of them."
+    "Next, we'll define a function to use on our rolling windows. We created this one to highlight how you can include things like loops, mathematical functions, and conditionals. Rolling window UDFs do not yet support null values."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
-   "id": "90cbcd85",
+   "execution_count": 47,
+   "id": "eb5a081b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def conditional_add(x, y, out):\n",
-    "    for i, (a, e) in enumerate(zip(x, y)):\n",
-    "        if a > 0:\n",
-    "            out[i] = a + e\n",
-    "        else:\n",
-    "            out[i] = a"
+    "import math\n",
+    "\n",
+    "\n",
+    "def example_func(window):\n",
+    "    b = 0\n",
+    "    for a in window:\n",
+    "        b = max(b, math.sqrt(a))\n",
+    "    if b == 8:\n",
+    "        return 100\n",
+    "    return b"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "bce045f2",
+   "id": "df8ba31d",
    "metadata": {},
    "source": [
-    "Notice that we need to `enumerate` through our `zipped` function arguments (which either match or are mapped to our input column names). We can pass this kernel to `apply_rows`. We'll need to specify a few arguments:\n",
-    "- incols\n",
-    "    - A list of names of input columns that match the function arguments. Or, a dictionary mapping input column names to their corresponding function arguments such as `{'col1': 'arg1'}`.\n",
-    "- outcols\n",
-    "    - A dictionary defining our output column names and their data types. These names must match our function arguments.\n",
-    "- kwargs (optional)\n",
-    "    - We can optionally pass keyword arguments as a dictionary. Since we don't need any, we pass an empty one.\n",
-    "    \n",
-    "While it looks like our function is looping sequentially through our columns, it actually executes in parallel in multiple threads on the GPU. This parallelism is the heart of GPU-accelerated computing. With that background, we're ready to use our UDF."
+    "We can execute the function by passing it to `apply`. With `window=3`, `min_periods=3`, and `center=False`, our first two values are `null`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "id": "e782daff",
+   "execution_count": 48,
+   "id": "ddec3263",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "      <th>c</th>\n",
-       "      <th>e</th>\n",
-       "      <th>out</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.691674</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Dan</td>\n",
-       "      <td>-0.958380</td>\n",
-       "      <td>-0.691674</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.480099</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Bob</td>\n",
-       "      <td>-0.729580</td>\n",
-       "      <td>-0.249480</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.473370</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Xavier</td>\n",
-       "      <td>-0.767454</td>\n",
-       "      <td>-0.473370</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.067479</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Alice</td>\n",
-       "      <td>-0.380205</td>\n",
-       "      <td>-0.312726</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.970850</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Sarah</td>\n",
-       "      <td>0.342905</td>\n",
-       "      <td>-0.970850</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "          a      b       c         e       out\n",
-       "0 -0.691674   True     Dan -0.958380 -0.691674\n",
-       "1  0.480099  False     Bob -0.729580 -0.249480\n",
-       "2 -0.473370   True  Xavier -0.767454 -0.473370\n",
-       "3  0.067479   True   Alice -0.380205 -0.312726\n",
-       "4 -0.970850  False   Sarah  0.342905 -0.970850"
+       "0     <NA>\n",
+       "1     <NA>\n",
+       "2      6.0\n",
+       "3      7.0\n",
+       "4    100.0\n",
+       "5      9.0\n",
+       "dtype: float64"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 48,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = df.apply_rows(\n",
-    "    conditional_add,\n",
-    "    incols={\"a\": \"x\", \"e\": \"y\"},\n",
-    "    outcols={\"out\": np.float64},\n",
-    "    kwargs={},\n",
-    ")\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6b838b89",
-   "metadata": {},
-   "source": [
-    "As expected, we see our conditional addition worked. At this point, we've successfully executed UDFs on the core data structures of cuDF."
+    "rolling.apply(example_func)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "fca97003",
+   "id": "187478db",
    "metadata": {},
    "source": [
-    "### Null Handling in `apply_rows` and `apply_chunks`\n",
-    "\n",
-    "By default, DataFrame methods for applying UDFs like `apply_rows` will handle nulls pessimistically (all rows with a null value will be removed from the output if they are used in the kernel). Exploring how not handling not pessimistically can lead to undefined behavior is outside the scope of this guide. Suffice it to say, pessimistic null handling is the safe and consistent approach. You can see an example below."
+    "We can apply this function to every column in a DataFrame, too."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
-   "id": "befd8333",
+   "execution_count": 49,
+   "id": "8b61094a",
    "metadata": {},
    "outputs": [
     {
@@ -2002,83 +1852,63 @@
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
-       "      <th>c</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>963</td>\n",
-       "      <td>1005</td>\n",
-       "      <td>997</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>977</td>\n",
-       "      <td>1026</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>56.0</td>\n",
+       "      <td>56.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>1026</td>\n",
-       "      <td>1019</td>\n",
+       "      <td>57.0</td>\n",
+       "      <td>57.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>1078</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>985</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>58.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>979</td>\n",
-       "      <td>982</td>\n",
-       "      <td>1011</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>59.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "      a     b     c\n",
-       "0   963  1005   997\n",
-       "1   977  1026  <NA>\n",
-       "2  <NA>  1026  1019\n",
-       "3  1078  <NA>   985\n",
-       "4   979   982  1011"
+       "      a     b\n",
+       "0  55.0  55.0\n",
+       "1  56.0  56.0\n",
+       "2  57.0  57.0\n",
+       "3  58.0  58.0\n",
+       "4  59.0  59.0"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 49,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "def gpu_add(a, b, out):\n",
-    "    for i, (x, y) in enumerate(zip(a, b)):\n",
-    "        out[i] = x + y\n",
-    "\n",
-    "\n",
-    "df = randomdata(nrows=5, dtypes={\"a\": int, \"b\": int, \"c\": int}, seed=12)\n",
-    "df.loc[2, \"a\"] = None\n",
-    "df.loc[3, \"b\"] = None\n",
-    "df.loc[1, \"c\"] = None\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c710ce86",
-   "metadata": {},
-   "source": [
-    "In the dataframe above, there are three null values. Each column has a null in a different row. When we use our UDF with `apply_rows`, our output should have two nulls due to pessimistic null handling (because we're not using column `c`, the null value there does not matter to us)."
+    "df2 = cudf.DataFrame()\n",
+    "df2[\"a\"] = np.arange(55, 65, dtype=\"float64\")\n",
+    "df2[\"b\"] = np.arange(55, 65, dtype=\"float64\")\n",
+    "df2.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "id": "d1f3dcaf",
+   "execution_count": 50,
+   "id": "bb8c3019",
    "metadata": {},
    "outputs": [
     {
@@ -2104,349 +1934,33 @@
        "      <th></th>\n",
        "      <th>a</th>\n",
        "      <th>b</th>\n",
-       "      <th>c</th>\n",
-       "      <th>out</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>963</td>\n",
-       "      <td>1005</td>\n",
-       "      <td>997</td>\n",
-       "      <td>1968.0</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>977</td>\n",
-       "      <td>1026</td>\n",
        "      <td>&lt;NA&gt;</td>\n",
-       "      <td>2003.0</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>1026</td>\n",
-       "      <td>1019</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>7.549834435</td>\n",
+       "      <td>7.549834435</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>1078</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>985</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>7.615773106</td>\n",
+       "      <td>7.615773106</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>979</td>\n",
-       "      <td>982</td>\n",
-       "      <td>1011</td>\n",
-       "      <td>1961.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      a     b     c     out\n",
-       "0   963  1005   997  1968.0\n",
-       "1   977  1026  <NA>  2003.0\n",
-       "2  <NA>  1026  1019    <NA>\n",
-       "3  1078  <NA>   985    <NA>\n",
-       "4   979   982  1011  1961.0"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = df.apply_rows(\n",
-    "    gpu_add, incols=[\"a\", \"b\"], outcols={\"out\": np.float64}, kwargs={}\n",
-    ")\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53b9a2f8",
-   "metadata": {},
-   "source": [
-    "As expected, we end up with two nulls in our output. The null values from the columns we used propagated to our output, but the null from the column we ignored did not."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4bbefa67",
-   "metadata": {},
-   "source": [
-    "## Rolling Window UDFs\n",
-    "\n",
-    "For time-series data, we may need to operate on a small \\\"window\\\" of our column at a time, processing each portion independently. We could slide (\\\"roll\\\") this window over the entire column to answer questions like \\\"What is the 3-day moving average of a stock price over the past year?\"\n",
-    "\n",
-    "We can apply more complex functions to rolling windows to `rolling` Series and DataFrames using `apply`. This example is adapted from cuDF's [API documentation](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.DataFrame.rolling.html). First, we'll create an example Series and then create a `rolling` object from the Series."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "6bc6aea3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0    16.0\n",
-       "1    25.0\n",
-       "2    36.0\n",
-       "3    49.0\n",
-       "4    64.0\n",
-       "5    81.0\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ser = cudf.Series([16, 25, 36, 49, 64, 81], dtype=\"float64\")\n",
-    "ser"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "a4c31df1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Rolling [window=3,min_periods=3,center=False]"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rolling = ser.rolling(window=3, min_periods=3, center=False)\n",
-    "rolling"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ff40d863",
-   "metadata": {},
-   "source": [
-    "Next, we'll define a function to use on our rolling windows. We created this one to highlight how you can include things like loops, mathematical functions, and conditionals. Rolling window UDFs do not yet support null values."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "eb5a081b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import math\n",
-    "\n",
-    "\n",
-    "def example_func(window):\n",
-    "    b = 0\n",
-    "    for a in window:\n",
-    "        b = max(b, math.sqrt(a))\n",
-    "    if b == 8:\n",
-    "        return 100\n",
-    "    return b"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "df8ba31d",
-   "metadata": {},
-   "source": [
-    "We can execute the function by passing it to `apply`. With `window=3`, `min_periods=3`, and `center=False`, our first two values are `null`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "ddec3263",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0     <NA>\n",
-       "1     <NA>\n",
-       "2      6.0\n",
-       "3      7.0\n",
-       "4    100.0\n",
-       "5      9.0\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 48,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rolling.apply(example_func)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "187478db",
-   "metadata": {},
-   "source": [
-    "We can apply this function to every column in a DataFrame, too."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "8b61094a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>56.0</td>\n",
-       "      <td>56.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>57.0</td>\n",
-       "      <td>57.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>58.0</td>\n",
-       "      <td>58.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>59.0</td>\n",
-       "      <td>59.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      a     b\n",
-       "0  55.0  55.0\n",
-       "1  56.0  56.0\n",
-       "2  57.0  57.0\n",
-       "3  58.0  58.0\n",
-       "4  59.0  59.0"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df2 = cudf.DataFrame()\n",
-    "df2[\"a\"] = np.arange(55, 65, dtype=\"float64\")\n",
-    "df2[\"b\"] = np.arange(55, 65, dtype=\"float64\")\n",
-    "df2.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "bb8c3019",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>7.549834435</td>\n",
-       "      <td>7.549834435</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>7.615773106</td>\n",
-       "      <td>7.615773106</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>7.681145748</td>\n",
-       "      <td>7.681145748</td>\n",
+       "      <td>7.681145748</td>\n",
+       "      <td>7.681145748</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -2501,312 +2015,6 @@
     "rolling.apply(example_func)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "d4785060",
-   "metadata": {},
-   "source": [
-    "## GroupBy DataFrame UDFs\n",
-    "\n",
-    "We can also apply UDFs to grouped DataFrames using `apply_grouped`.\n",
-    "\n",
-    "First, we'll group our DataFrame based on column `b`, which is either True or False."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "3dc272ab",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "      <th>c</th>\n",
-       "      <th>e</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.691674</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Dan</td>\n",
-       "      <td>-0.958380</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.480099</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Bob</td>\n",
-       "      <td>-0.729580</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.473370</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Xavier</td>\n",
-       "      <td>-0.767454</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.067479</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Alice</td>\n",
-       "      <td>-0.380205</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.970850</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Sarah</td>\n",
-       "      <td>0.342905</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          a      b       c         e\n",
-       "0 -0.691674   True     Dan -0.958380\n",
-       "1  0.480099  False     Bob -0.729580\n",
-       "2 -0.473370   True  Xavier -0.767454\n",
-       "3  0.067479   True   Alice -0.380205\n",
-       "4 -0.970850  False   Sarah  0.342905"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = randomdata(\n",
-    "    nrows=10, dtypes={\"a\": float, \"b\": bool, \"c\": str, \"e\": float}, seed=12\n",
-    ")\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "c0578e0a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "grouped = df.groupby([\"b\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4808726f",
-   "metadata": {},
-   "source": [
-    "Next we'll define a function to apply to each group independently. In this case, we'll take the rolling average of column `e`, and call that new column `rolling_avg_e`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "19f0f7fe",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def rolling_avg(e, rolling_avg_e):\n",
-    "    win_size = 3\n",
-    "    for i in range(cuda.threadIdx.x, len(e), cuda.blockDim.x):\n",
-    "        if i < win_size - 1:\n",
-    "            # If there is not enough data to fill the window,\n",
-    "            # take the average to be NaN\n",
-    "            rolling_avg_e[i] = np.nan\n",
-    "        else:\n",
-    "            total = 0\n",
-    "            for j in range(i - win_size + 1, i + 1):\n",
-    "                total += e[j]\n",
-    "            rolling_avg_e[i] = total / win_size"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7566f359",
-   "metadata": {},
-   "source": [
-    "We can execute this with a very similar API to `apply_rows`. This time, though, it's going to execute independently for each group."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "c43426c3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "      <th>c</th>\n",
-       "      <th>e</th>\n",
-       "      <th>rolling_avg_e</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.480099</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Bob</td>\n",
-       "      <td>-0.729580</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.970850</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Sarah</td>\n",
-       "      <td>0.342905</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>0.801430</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Sarah</td>\n",
-       "      <td>0.632337</td>\n",
-       "      <td>0.081887</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>-0.933157</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Quinn</td>\n",
-       "      <td>-0.420826</td>\n",
-       "      <td>0.184805</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.691674</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Dan</td>\n",
-       "      <td>-0.958380</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.473370</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Xavier</td>\n",
-       "      <td>-0.767454</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.067479</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Alice</td>\n",
-       "      <td>-0.380205</td>\n",
-       "      <td>-0.702013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>0.837494</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Wendy</td>\n",
-       "      <td>-0.057540</td>\n",
-       "      <td>-0.401733</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>0.913899</td>\n",
-       "      <td>True</td>\n",
-       "      <td>Ursula</td>\n",
-       "      <td>0.466252</td>\n",
-       "      <td>0.009502</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>-0.725581</td>\n",
-       "      <td>True</td>\n",
-       "      <td>George</td>\n",
-       "      <td>0.405245</td>\n",
-       "      <td>0.271319</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          a      b       c         e  rolling_avg_e\n",
-       "1  0.480099  False     Bob -0.729580            NaN\n",
-       "4 -0.970850  False   Sarah  0.342905            NaN\n",
-       "6  0.801430  False   Sarah  0.632337       0.081887\n",
-       "7 -0.933157  False   Quinn -0.420826       0.184805\n",
-       "0 -0.691674   True     Dan -0.958380            NaN\n",
-       "2 -0.473370   True  Xavier -0.767454            NaN\n",
-       "3  0.067479   True   Alice -0.380205      -0.702013\n",
-       "5  0.837494   True   Wendy -0.057540      -0.401733\n",
-       "8  0.913899   True  Ursula  0.466252       0.009502\n",
-       "9 -0.725581   True  George  0.405245       0.271319"
-      ]
-     },
-     "execution_count": 54,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "results = grouped.apply_grouped(\n",
-    "    rolling_avg, incols=[\"e\"], outcols=dict(rolling_avg_e=np.float64)\n",
-    ")\n",
-    "results"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c8511306",
-   "metadata": {},
-   "source": [
-    "Notice how, with a window size of three in the kernel, the first two values in each group for our output column are null."
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "0060678c",
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 2dc4fed005e..7dd37045000 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5051,93 +5051,6 @@ def _func(x):  # pragma: no cover
 
         return DataFrame._from_data(result, index=self.index)
 
-    @_performance_tracking
-    @applyutils.doc_apply()
-    def apply_rows(
-        self,
-        func,
-        incols,
-        outcols,
-        kwargs,
-        pessimistic_nulls=True,
-        cache_key=None,
-    ):
-        """
-        Apply a row-wise user defined function.
-
-        Parameters
-        ----------
-        {params}
-
-        Examples
-        --------
-        The user function should loop over the columns and set the output for
-        each row. Loop execution order is arbitrary, so each iteration of
-        the loop **MUST** be independent of each other.
-
-        When ``func`` is invoked, the array args corresponding to the
-        input/output are strided so as to improve GPU parallelism.
-        The loop in the function resembles serial code, but executes
-        concurrently in multiple threads.
-
-        >>> import cudf
-        >>> import numpy as np
-        >>> df = cudf.DataFrame()
-        >>> nelem = 3
-        >>> df['in1'] = np.arange(nelem)
-        >>> df['in2'] = np.arange(nelem)
-        >>> df['in3'] = np.arange(nelem)
-
-        Define input columns for the kernel
-
-        >>> in1 = df['in1']
-        >>> in2 = df['in2']
-        >>> in3 = df['in3']
-        >>> def kernel(in1, in2, in3, out1, out2, kwarg1, kwarg2):
-        ...     for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
-        ...         out1[i] = kwarg2 * x - kwarg1 * y
-        ...         out2[i] = y - kwarg1 * z
-
-        Call ``.apply_rows`` with the name of the input columns, the name and
-        dtype of the output columns, and, optionally, a dict of extra
-        arguments.
-
-        >>> df.apply_rows(kernel,
-        ...               incols=['in1', 'in2', 'in3'],
-        ...               outcols=dict(out1=np.float64, out2=np.float64),
-        ...               kwargs=dict(kwarg1=3, kwarg2=4))
-           in1  in2  in3 out1 out2
-        0    0    0    0  0.0  0.0
-        1    1    1    1  1.0 -2.0
-        2    2    2    2  2.0 -4.0
-        """
-        warnings.warn(
-            "DataFrame.apply_rows is deprecated and will be "
-            "removed in a future release. Please use `apply` "
-            "or use a custom numba kernel instead or refer "
-            "to the UDF guidelines for more information "
-            "https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html",
-            FutureWarning,
-        )
-        for col in incols:
-            current_col_dtype = self._data[col].dtype
-            if current_col_dtype == CUDF_STRING_DTYPE or isinstance(
-                current_col_dtype, CategoricalDtype
-            ):
-                raise TypeError(
-                    "User defined functions are currently not "
-                    "supported on Series with dtypes `str` and `category`."
-                )
-        return applyutils.apply_rows(
-            self,
-            func,
-            incols,
-            outcols,
-            kwargs,
-            pessimistic_nulls,
-            cache_key=cache_key,
-        )
-
     @_performance_tracking
     @applyutils.doc_applychunks()
     def apply_chunks(
@@ -5157,7 +5070,6 @@ def apply_chunks(
         Parameters
         ----------
         {params}
-        {params_chunks}
 
         Examples
         --------
@@ -5188,8 +5100,16 @@ def apply_chunks(
 
         See Also
         --------
-        DataFrame.apply_rows
+        DataFrame.apply
         """
+        warnings.warn(
+            "DataFrame.apply_chunks is deprecated and will be "
+            "removed in a future release. Please use `apply` "
+            "or use a custom numba kernel instead or refer "
+            "to the UDF guidelines for more information "
+            "https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html",
+            FutureWarning,
+        )
         if kwargs is None:
             kwargs = {}
         if chunks is None:
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index cae2fdb9d36..4b0a97714ad 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -2194,12 +2194,22 @@ def rolling_avg(val, avg):
         <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html>`_
 
         """
+        warnings.warn(
+            "apply_grouped is deprecated and will be "
+            "removed in a future release. Please use `apply` "
+            "or use a custom numba kernel instead or refer "
+            "to the UDF guidelines for more information "
+            "https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html",
+            FutureWarning,
+        )
         if not callable(function):
             raise TypeError(f"type {type(function)} is not callable")
 
         _, offsets, _, grouped_values = self._grouped()
         kwargs.update({"chunks": offsets})
-        return grouped_values.apply_chunks(function, **kwargs)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", FutureWarning)
+            return grouped_values.apply_chunks(function, **kwargs)
 
     @_performance_tracking
     def _broadcast(self, values: Series) -> Series:
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py b/python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py
index f215a29de89..82a910fc2e0 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_apply_chunks.py
@@ -34,14 +34,14 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
 
     expect_out1 = extra2 * in1 - extra1 * in2 + in3
     expect_out2 = np.arange(len(df)) % chunksize
-
-    outdf = df.apply_chunks(
-        kernel,
-        incols=["in1", "in2", "in3"],
-        outcols=dict(out1=np.float64, out2=np.int32),
-        kwargs=dict(extra1=extra1, extra2=extra2),
-        chunks=chunksize,
-    )
+    with pytest.warns(FutureWarning):
+        outdf = df.apply_chunks(
+            kernel,
+            incols=["in1", "in2", "in3"],
+            outcols=dict(out1=np.float64, out2=np.int32),
+            kwargs=dict(extra1=extra1, extra2=extra2),
+            chunks=chunksize,
+        )
 
     got_out1 = outdf["out1"]
     got_out2 = outdf["out2"]
@@ -77,13 +77,14 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
         ]
     )
 
-    outdf = df.apply_chunks(
-        kernel,
-        incols=["in1", "in2", "in3"],
-        outcols=dict(out1=np.float64, out2=np.int32),
-        kwargs=dict(extra1=extra1, extra2=extra2),
-        chunks=chunks,
-    )
+    with pytest.warns(FutureWarning):
+        outdf = df.apply_chunks(
+            kernel,
+            incols=["in1", "in2", "in3"],
+            outcols=dict(out1=np.float64, out2=np.int32),
+            kwargs=dict(extra1=extra1, extra2=extra2),
+            chunks=chunks,
+        )
 
     got_out1 = outdf["out1"]
     got_out2 = outdf["out2"]
@@ -124,15 +125,16 @@ def kernel(in1, in2, in3, out1, out2, extra1, extra2):
         ]
     )
 
-    outdf = df.apply_chunks(
-        kernel,
-        incols=["in1", "in2", "in3"],
-        outcols=dict(out1=np.float64, out2=np.int32),
-        kwargs=dict(extra1=extra1, extra2=extra2),
-        chunks=chunks,
-        blkct=blkct,
-        tpb=tpb,
-    )
+    with pytest.warns(FutureWarning):
+        outdf = df.apply_chunks(
+            kernel,
+            incols=["in1", "in2", "in3"],
+            outcols=dict(out1=np.float64, out2=np.int32),
+            kwargs=dict(extra1=extra1, extra2=extra2),
+            chunks=chunks,
+            blkct=blkct,
+            tpb=tpb,
+        )
 
     got_out1 = outdf["out1"]
     got_out2 = outdf["out2"]
@@ -171,13 +173,13 @@ def kernel(q, p, r, out1, out2, extra1, extra2):
             "out2": np.arange(len(df)) % chunksize,
         }
     )
-
-    outdf = df.apply_chunks(
-        kernel,
-        incols={"in1": "q", "in2": "p", "in3": "r"},
-        outcols=dict(out1=np.float64, out2=np.int64),
-        kwargs=dict(extra1=extra1, extra2=extra2),
-        chunks=chunksize,
-    )
+    with pytest.warns(FutureWarning):
+        outdf = df.apply_chunks(
+            kernel,
+            incols={"in1": "q", "in2": "p", "in3": "r"},
+            outcols=dict(out1=np.float64, out2=np.int64),
+            kwargs=dict(extra1=extra1, extra2=extra2),
+            chunks=chunksize,
+        )
 
     assert_eq(outdf[["out1", "out2"]], expected_out)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py b/python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py
deleted file mode 100644
index dbdec9e5e46..00000000000
--- a/python/cudf/cudf/tests/dataframe/methods/test_apply_rows.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
-
-import numpy as np
-import pytest
-
-import cudf
-from cudf import DataFrame
-from cudf.core.column import column
-from cudf.testing import assert_eq
-from cudf.testing._utils import gen_rand_series
-
-
-def _kernel_multiply(a, b, out):
-    # numba doesn't support zip(..., strict=True), so we must tell ruff to ignore it.
-    for i, (x, y) in enumerate(zip(a, b)):  # noqa: B905
-        out[i] = x * y
-
-
-@pytest.mark.parametrize("dtype", [np.dtype("float32"), np.dtype("float64")])
-@pytest.mark.parametrize("has_nulls", [False, True])
-@pytest.mark.parametrize("pessimistic", [False, True])
-def test_dataframe_apply_rows(dtype, has_nulls, pessimistic):
-    count = 1000
-    gdf_series_a = gen_rand_series(dtype, count, has_nulls=has_nulls)
-    gdf_series_b = gen_rand_series(dtype, count, has_nulls=has_nulls)
-    gdf_series_c = gen_rand_series(dtype, count, has_nulls=has_nulls)
-
-    if pessimistic:
-        # pessimistically combine the null masks
-        gdf_series_expected = gdf_series_a * gdf_series_b
-    else:
-        # optimistically ignore the null masks
-        a = cudf.Series._from_column(
-            column.build_column(gdf_series_a.data, dtype)
-        )
-        b = cudf.Series._from_column(
-            column.build_column(gdf_series_b.data, dtype)
-        )
-        gdf_series_expected = a * b
-
-    df_expected = cudf.DataFrame(
-        {
-            "a": gdf_series_a,
-            "b": gdf_series_b,
-            "c": gdf_series_c,
-            "out": gdf_series_expected,
-        }
-    )
-
-    df_original = cudf.DataFrame(
-        {"a": gdf_series_a, "b": gdf_series_b, "c": gdf_series_c}
-    )
-
-    with pytest.warns(FutureWarning):
-        df_actual = df_original.apply_rows(
-            _kernel_multiply,
-            ["a", "b"],
-            {"out": dtype},
-            {},
-            pessimistic_nulls=pessimistic,
-        )
-
-    assert_eq(df_expected, df_actual)
-
-
-def test_df_apply_rows():
-    nelem = 20
-
-    def kernel(in1, in2, in3, out1, out2, extra1, extra2):
-        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):  # noqa: B905
-            out1[i] = extra2 * x - extra1 * y
-            out2[i] = y - extra1 * z
-
-    in1 = np.arange(nelem)
-    in2 = np.arange(nelem)
-    in3 = np.arange(nelem)
-
-    df = DataFrame(
-        {
-            "in1": in1,
-            "in2": in2,
-            "in3": in3,
-        }
-    )
-
-    extra1 = 2.3
-    extra2 = 3.4
-
-    expect_out1 = extra2 * in1 - extra1 * in2
-    expect_out2 = in2 - extra1 * in3
-
-    with pytest.warns(FutureWarning):
-        outdf = df.apply_rows(
-            kernel,
-            incols=["in1", "in2", "in3"],
-            outcols=dict(out1=np.float64, out2=np.float64),
-            kwargs=dict(extra1=extra1, extra2=extra2),
-        )
-
-    got_out1 = outdf["out1"].to_numpy()
-    got_out2 = outdf["out2"].to_numpy()
-
-    np.testing.assert_array_almost_equal(got_out1, expect_out1)
-    np.testing.assert_array_almost_equal(got_out2, expect_out2)
-
-
-def test_df_apply_rows_incols_mapping():
-    nelem = 20
-
-    def kernel(x, y, z, out1, out2, extra1, extra2):
-        for i, (a, b, c) in enumerate(zip(x, y, z)):  # noqa: B905
-            out1[i] = extra2 * a - extra1 * b
-            out2[i] = b - extra1 * c
-
-    df = DataFrame()
-    df["in1"] = in1 = np.arange(nelem)
-    df["in2"] = in2 = np.arange(nelem)
-    df["in3"] = in3 = np.arange(nelem)
-
-    extra1 = 2.3
-    extra2 = 3.4
-
-    expected_out = DataFrame()
-    expected_out["out1"] = extra2 * in1 - extra1 * in2
-    expected_out["out2"] = in2 - extra1 * in3
-
-    with pytest.warns(FutureWarning):
-        outdf = df.apply_rows(
-            kernel,
-            incols={"in1": "x", "in2": "y", "in3": "z"},
-            outcols=dict(out1=np.float64, out2=np.float64),
-            kwargs=dict(extra1=extra1, extra2=extra2),
-        )
-
-    assert_eq(outdf[["out1", "out2"]], expected_out)
diff --git a/python/cudf/cudf/tests/groupby/test_apply.py b/python/cudf/cudf/tests/groupby/test_apply.py
index 8458d3069d0..89b0731316f 100644
--- a/python/cudf/cudf/tests/groupby/test_apply.py
+++ b/python/cudf/cudf/tests/groupby/test_apply.py
@@ -132,12 +132,13 @@ def foo(key1, val1, com1, com2):
             com1[i] = key1[i] * 10000 + val1[i]
             com2[i] = i
 
-    got = got_grpby.apply_grouped(
-        foo,
-        incols=["key1", "val1"],
-        outcols={"com1": np.float64, "com2": np.int32},
-        tpb=8,
-    )
+    with pytest.warns(FutureWarning):
+        got = got_grpby.apply_grouped(
+            foo,
+            incols=["key1", "val1"],
+            outcols={"com1": np.float64, "com2": np.int32},
+            tpb=8,
+        )
 
     got = got.to_pandas()
 
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index afee9ea17b5..e8869cadb2d 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -17,7 +17,7 @@
 from cudf.utils.docutils import docfmt_partial
 from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 
-_doc_applyparams = """
+_doc_applychunkparams = """
 df : DataFrame
     The source dataframe.
 func : function
@@ -36,9 +36,6 @@
     input is null. If False, all outputs will be non-null, but will be the
     result of applying func against the underlying column data, which
     may be garbage.
-"""
-
-_doc_applychunkparams = """
 chunks : int or Series-like
     If it is an ``int``, it is the chunksize.
     If it is an array, it contains integer offset for the start of each chunk.
@@ -61,26 +58,7 @@
     number of blocks.
 """
 
-doc_apply = docfmt_partial(params=_doc_applyparams)
-doc_applychunks = docfmt_partial(
-    params=_doc_applyparams, params_chunks=_doc_applychunkparams
-)
-
-
-@doc_apply()
-def apply_rows(
-    df, func, incols, outcols, kwargs, pessimistic_nulls, cache_key
-):
-    """Row-wise transformation
-
-    Parameters
-    ----------
-    {params}
-    """
-    applyrows = ApplyRowsCompiler(
-        func, incols, outcols, kwargs, pessimistic_nulls, cache_key=cache_key
-    )
-    return applyrows.run(df)
+doc_applychunks = docfmt_partial(params=_doc_applychunkparams)
 
 
 @doc_applychunks()
@@ -100,7 +78,6 @@ def apply_chunks(
     Parameters
     ----------
     {params}
-    {params_chunks}
     """
     applychunks = ApplyChunksCompiler(
         func, incols, outcols, kwargs, pessimistic_nulls, cache_key=None
@@ -184,19 +161,6 @@ def run(self, df, **launch_params):
         return outdf
 
 
-class ApplyRowsCompiler(ApplyKernelCompilerBase):
-    def compile(self, func, argnames, extra_argnames):
-        # Compile kernel
-        kernel = _load_cache_or_make_row_wise_kernel(
-            self.cache_key, func, argnames, extra_argnames
-        )
-        return kernel
-
-    def launch_kernel(self, df, args):
-        with _CUDFNumbaConfig():
-            self.kernel.forall(len(df))(*args)
-
-
 class ApplyChunksCompiler(ApplyKernelCompilerBase):
     def compile(self, func, argnames, extra_argnames):
         # Compile kernel
@@ -227,52 +191,6 @@ def normalize_chunks(self, size, chunks):
             return cp.asarray(chunks).view(i64)
 
 
-def _make_row_wise_kernel(func, argnames, extras):
-    """
-    Make a kernel that does a stride loop over the input rows.
-
-    Each thread is responsible for a row in each iteration.
-    Several iteration may be needed to handling a large number of rows.
-
-    The resulting kernel can be used with any 1D grid size and 1D block size.
-    """
-    # Build kernel source
-    argnames = list(map(_mangle_user, argnames))
-    extras = list(map(_mangle_user, extras))
-    source = """
-def row_wise_kernel({args}):
-{body}
-"""
-
-    args = ", ".join(argnames)
-    body = []
-
-    body.append("tid = cuda.grid(1)")
-    body.append("ntid = cuda.gridsize(1)")
-
-    for a in argnames:
-        if a not in extras:
-            start = "tid"
-            stop = ""
-            stride = "ntid"
-            srcidx = "{a} = {a}[{start}:{stop}:{stride}]"
-            body.append(
-                srcidx.format(a=a, start=start, stop=stop, stride=stride)
-            )
-
-    body.append(f"inner({args})")
-
-    indented = ["{}{}".format(" " * 4, ln) for ln in body]
-    # Finalize source
-    concrete = source.format(args=args, body="\n".join(indented))
-    # Get bytecode
-    glbs = {"inner": cuda.jit(device=True)(func), "cuda": cuda}
-    exec(concrete, glbs)
-    # Compile as CUDA kernel
-    kernel = cuda.jit(glbs["row_wise_kernel"])
-    return kernel
-
-
 def _make_chunk_wise_kernel(func, argnames, extras):
     """
     Make a kernel that does a stride loop over the input chunks.
@@ -339,23 +257,9 @@ def chunk_wise_kernel(nrows, chunks, {args}):
 _cache: dict[Any, Any] = dict()
 
 
-@functools.wraps(_make_row_wise_kernel)
-def _load_cache_or_make_row_wise_kernel(cache_key, func, *args, **kwargs):
-    """Caching version of ``_make_row_wise_kernel``."""
-    if cache_key is None:
-        cache_key = func
-    try:
-        out = _cache[cache_key]
-        return out
-    except KeyError:
-        kernel = _make_row_wise_kernel(func, *args, **kwargs)
-        _cache[cache_key] = kernel
-        return kernel
-
-
 @functools.wraps(_make_chunk_wise_kernel)
 def _load_cache_or_make_chunk_wise_kernel(func, *args, **kwargs):
-    """Caching version of ``_make_row_wise_kernel``."""
+    """Caching version of ``_make_chunk_wise_kernel``."""
     try:
         return _cache[func]
     except KeyError:

From ab7bf4021143ac77f0e5216a35a85612bc7b331f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Sep 2025 13:43:43 -0700
Subject: [PATCH 271/366] Avoid direct construction of cuDF classic columns
 (#19829)

Precursor to https://github.com/rapidsai/cudf/issues/18726. We'll want to minimize direct construction of cuDF classic column via their attributes and instead use a pylibcudf in the future

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19829
---
 python/cudf/cudf/core/accessors/string.py | 34 ++++++----------------
 python/cudf/cudf/core/column/datetime.py  |  8 +-----
 python/cudf/cudf/core/column/struct.py    | 10 +------
 python/cudf/cudf/core/dataframe.py        | 35 +++++++++++++++++------
 4 files changed, 38 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/core/accessors/string.py b/python/cudf/cudf/core/accessors/string.py
index 7b85271e057..46a76c2a34f 100644
--- a/python/cudf/cudf/core/accessors/string.py
+++ b/python/cudf/cudf/core/accessors/string.py
@@ -16,8 +16,7 @@
 from cudf.api.extensions import no_default
 from cudf.api.types import is_integer, is_scalar
 from cudf.core.accessors.base_accessor import BaseAccessor
-from cudf.core.column.column import ColumnBase, as_column
-from cudf.core.column.lists import ListColumn
+from cudf.core.column.column import ColumnBase, as_column, column_empty
 from cudf.core.dtypes import ListDtype
 from cudf.options import get_option
 from cudf.utils.dtypes import (
@@ -499,14 +498,18 @@ def join(
             )
 
         if isinstance(self._column.dtype, ListDtype):
-            list_column: ListColumn = self._column  # type: ignore[assignment]
+            list_column = self._column
         else:
             # If self._column is not a ListColumn, we will have to
             # split each row by character and create a ListColumn out of it.
-            list_column = self._split_by_character()  # type: ignore[assignment]
+            list_column = self._column.fillna("").character_tokenize()  # type: ignore[assignment]
+            if len(list_column) == 0:
+                list_column = column_empty(  # type: ignore[assignment]
+                    len(self._column), dtype=list_column.dtype
+                )
 
         if is_scalar(sep):
-            data = list_column.join_list_elements(sep, string_na_rep, "")
+            data = list_column.join_list_elements(sep, string_na_rep, "")  # type: ignore[attr-defined]
         elif can_convert_to_column(sep):
             sep_column = as_column(sep)
             if len(sep_column) != len(list_column):
@@ -519,7 +522,7 @@ def join(
                     f"sep_na_rep should be a string scalar, got {sep_na_rep} "
                     f"of type: {type(sep_na_rep)}"
                 )
-            data = list_column.join_list_elements(
+            data = list_column.join_list_elements(  # type: ignore[attr-defined]
                 sep_column,  # type: ignore[arg-type]
                 sep_na_rep,
                 string_na_rep,  # type: ignore[arg-type]
@@ -532,25 +535,6 @@ def join(
 
         return self._return_or_inplace(data)
 
-    def _split_by_character(self) -> ListColumn:
-        col = self._column.fillna("")  # sanitize nulls
-        result_col = col.character_tokenize()
-        if isinstance(result_col.dtype, ListDtype) and (result_col.size > 0):
-            return result_col  # type: ignore
-
-        offset_col = col.children[0]
-        child_col = result_col.children[1]
-
-        return ListColumn(
-            data=None,
-            size=len(col),
-            dtype=ListDtype(col.dtype),
-            mask=col.mask,
-            offset=0,
-            null_count=0,
-            children=(offset_col, child_col),  # type: ignore[arg-type]
-        )
-
     def extract(
         self, pat: str, flags: int = 0, expand: bool = True
     ) -> Series | Index:
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 0cc1402c141..6edebe8498c 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -748,13 +748,7 @@ def tz_localize(
         )
         offsets_to_utc = offsets.take(indices, nullify=True)
         gmt_data = localized - offsets_to_utc
-        return DatetimeTZColumn(
-            data=gmt_data.base_data,
-            dtype=dtype,
-            mask=localized.base_mask,
-            size=gmt_data.size,
-            offset=gmt_data.offset,
-        )
+        return gmt_data._with_type_metadata(dtype)
 
     def tz_convert(self, tz: str | None):
         raise TypeError(
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 0815af9c223..4613747865c 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -198,15 +198,7 @@ def _rename_fields(self, names) -> Self:
                 for name, col in zip(names, self.children, strict=True)
             }
         )
-        return StructColumn(  # type: ignore[return-value]
-            data=None,
-            size=self.size,
-            dtype=dtype,
-            mask=self.base_mask,
-            offset=self.offset,
-            null_count=self.null_count,
-            children=self.base_children,
-        )
+        return self._with_type_metadata(dtype)  # type: ignore[return-value]
 
     @property
     def __cuda_array_interface__(self):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7dd37045000..20cac9f6b13 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -56,7 +56,6 @@
     CategoricalColumn,
     ColumnBase,
     StringColumn,
-    StructColumn,
     as_column,
     column_empty,
     concat_columns,
@@ -7768,14 +7767,34 @@ def to_struct(self, name=None):
                 "requires field name to be string. Non-string column names "
                 "will be casted to string as the field name."
             )
-        fields = {str(name): dtype for name, dtype in self._dtypes}
-        col = StructColumn(
-            data=None,
-            dtype=StructDtype(fields=fields),
-            children=tuple(col.copy(deep=True) for col in self._columns),
-            size=len(self),
-            offset=0,
+        dtype = StructDtype(
+            fields={str(name): dtype for name, dtype in self._dtypes}
         )
+        if self._num_columns == 0:
+            col = column_empty(len(self), dtype=dtype)
+        else:
+            first_null_count = self._columns[0].null_count
+            children = (
+                col.copy(deep=True).to_pylibcudf(mode="read")
+                for col in self._columns
+            )
+            if all(
+                col.null_count == first_null_count for col in self._columns
+            ):
+                plc_column = plc.Column.struct_from_children(children)
+            else:
+                plc_column = plc.Column(
+                    plc.DataType(plc.TypeId.STRUCT),
+                    len(self),
+                    None,
+                    None,
+                    0,
+                    0,
+                    list(children),
+                )
+            col = ColumnBase.from_pylibcudf(plc_column)._with_type_metadata(
+                dtype
+            )
         return Series._from_column(
             col,
             index=self.index,

From edf97288c72ec7d4f6984f855ca9c3364b8498fb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 5 Sep 2025 13:44:00 -0700
Subject: [PATCH 272/366] Disable test on non-default stream (#19901)

Disabling this test until we can resolve https://github.com/rapidsai/cudf/issues/19900.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19901
---
 python/pylibcudf/tests/io/test_json.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pylibcudf/tests/io/test_json.py b/python/pylibcudf/tests/io/test_json.py
index 4fdcdcafb0b..55b7a280173 100644
--- a/python/pylibcudf/tests/io/test_json.py
+++ b/python/pylibcudf/tests/io/test_json.py
@@ -21,7 +21,10 @@
 _COMMON_JSON_SOURCE_KWARGS = {"format": "json", "orient": "records"}
 
 
-@pytest.mark.parametrize("stream", [None, Stream()])
+# TODO: Reenable testing on non-default stream once we can resolve
+# https://github.com/rapidsai/cudf/issues/19900
+# @pytest.mark.parametrize("stream", [None, Stream()])
+@pytest.mark.parametrize("stream", [None])
 @pytest.mark.parametrize("rows_per_chunk", [8, 100])
 @pytest.mark.parametrize("lines", [True, False])
 def test_write_json_basic(

From b2ade93c73d83932e287e614c1e27037bb92b27d Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Fri, 5 Sep 2025 15:13:14 -0700
Subject: [PATCH 273/366] Construct next-gen parquet reader with pre-populated
 footer (#19724)

This PR adds a new constructor for the next-gen parquet reader to use a pre-populated `FileMetaData` (parquet footer) struct.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19724
---
 .../cudf/io/experimental/hybrid_scan.hpp      | 10 +++
 .../io/parquet/experimental/hybrid_scan.cpp   |  6 ++
 .../experimental/hybrid_scan_helpers.cpp      | 24 +++++-
 .../experimental/hybrid_scan_helpers.hpp      | 17 ++++
 .../parquet/experimental/hybrid_scan_impl.cpp | 10 +++
 .../parquet/experimental/hybrid_scan_impl.hpp | 10 +++
 .../experimental/hybrid_scan_filters_test.cpp | 77 ++++++++++++++++++-
 7 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/io/experimental/hybrid_scan.hpp b/cpp/include/cudf/io/experimental/hybrid_scan.hpp
index c6a8038d3eb..788155147f9 100644
--- a/cpp/include/cudf/io/experimental/hybrid_scan.hpp
+++ b/cpp/include/cudf/io/experimental/hybrid_scan.hpp
@@ -286,6 +286,16 @@ class hybrid_scan_reader {
   explicit hybrid_scan_reader(cudf::host_span<uint8_t const> footer_bytes,
                               parquet_reader_options const& options);
 
+  /**
+   * @brief Constructor for the experimental parquet reader class to optimally read Parquet files
+   * subject to highly selective filters
+   *
+   * @param parquet_metadata Pre-populated Parquet file metadata
+   * @param options Parquet reader options
+   */
+  explicit hybrid_scan_reader(FileMetaData const& parquet_metadata,
+                              parquet_reader_options const& options);
+
   /**
    * @brief Destructor for the experimental parquet reader class
    */
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan.cpp b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
index 692c77a6fa1..524a8449698 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
@@ -30,6 +30,12 @@ hybrid_scan_reader::hybrid_scan_reader(cudf::host_span<uint8_t const> footer_byt
 {
 }
 
+hybrid_scan_reader::hybrid_scan_reader(FileMetaData const& parquet_metadata,
+                                       parquet_reader_options const& options)
+  : _impl{std::make_unique<detail::hybrid_scan_reader_impl>(parquet_metadata, options)}
+{
+}
+
 hybrid_scan_reader::~hybrid_scan_reader() = default;
 
 [[nodiscard]] text::byte_range_info hybrid_scan_reader::page_index_byte_range() const
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
index af4ce719786..d1144419b20 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
@@ -81,6 +81,16 @@ metadata::metadata(cudf::host_span<uint8_t const> footer_bytes)
   sanitize_schema();
 }
 
+aggregate_reader_metadata::aggregate_reader_metadata(FileMetaData const& parquet_metadata,
+                                                     bool use_arrow_schema,
+                                                     bool has_cols_from_mismatched_srcs)
+  : aggregate_reader_metadata_base({}, false, false)
+{
+  // Just copy over the FileMetaData struct to the internal metadata struct
+  per_file_metadata = std::vector<metadata_base>{metadata{parquet_metadata}.get_file_metadata()};
+  initialize_internals(use_arrow_schema, has_cols_from_mismatched_srcs);
+}
+
 aggregate_reader_metadata::aggregate_reader_metadata(cudf::host_span<uint8_t const> footer_bytes,
                                                      bool use_arrow_schema,
                                                      bool has_cols_from_mismatched_srcs)
@@ -88,10 +98,16 @@ aggregate_reader_metadata::aggregate_reader_metadata(cudf::host_span<uint8_t con
 {
   // Re-initialize internal variables here as base class was initialized without a source
   per_file_metadata = std::vector<metadata_base>{metadata{footer_bytes}.get_file_metadata()};
-  keyval_maps       = collect_keyval_metadata();
-  schema_idx_maps   = init_schema_idx_maps(has_cols_from_mismatched_srcs);
-  num_rows          = calc_num_rows();
-  num_row_groups    = calc_num_row_groups();
+  initialize_internals(use_arrow_schema, has_cols_from_mismatched_srcs);
+}
+
+void aggregate_reader_metadata::initialize_internals(bool use_arrow_schema,
+                                                     bool has_cols_from_mismatched_srcs)
+{
+  keyval_maps     = collect_keyval_metadata();
+  schema_idx_maps = init_schema_idx_maps(has_cols_from_mismatched_srcs);
+  num_rows        = calc_num_rows();
+  num_row_groups  = calc_num_row_groups();
 
   // Force all leaf columns to be nullable
   auto& schema = per_file_metadata.front().schema;
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
index a60ad2a79ba..7f6e034b827 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
@@ -46,6 +46,7 @@ using parquet::detail::row_group_info;
  */
 struct metadata : private metadata_base {
   explicit metadata(cudf::host_span<uint8_t const> footer_bytes);
+  explicit metadata(FileMetaData const& other) { static_cast<FileMetaData&>(*this) = other; }
   metadata_base get_file_metadata() && { return std::move(*this); }
 };
 
@@ -92,6 +93,22 @@ class aggregate_reader_metadata : public aggregate_reader_metadata_base {
                             bool use_arrow_schema,
                             bool has_cols_from_mismatched_srcs);
 
+  /**
+   * @brief Constructor for aggregate_reader_metadata
+   *
+   * @param parquet_metadata Pre-populated Parquet file metadata
+   * @param use_arrow_schema Whether to use Arrow schema
+   * @param has_cols_from_mismatched_srcs Whether to have columns from mismatched sources
+   */
+  aggregate_reader_metadata(FileMetaData const& parquet_metadata,
+                            bool use_arrow_schema,
+                            bool has_cols_from_mismatched_srcs);
+
+  /**
+   * @brief Initialize the internal variables
+   */
+  void initialize_internals(bool use_arrow_schema, bool has_cols_from_mismatched_srcs);
+
   /**
    * @brief Fetch the byte range of the page index in the Parquet file
    */
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index 5c15d7b6f08..e65b614e796 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -87,6 +87,16 @@ hybrid_scan_reader_impl::hybrid_scan_reader_impl(cudf::host_span<uint8_t const>
   _extended_metadata = static_cast<aggregate_reader_metadata*>(_metadata.get());
 }
 
+hybrid_scan_reader_impl::hybrid_scan_reader_impl(FileMetaData const& parquet_metadata,
+                                                 parquet_reader_options const& options)
+{
+  _metadata = std::make_unique<aggregate_reader_metadata>(
+    parquet_metadata,
+    options.is_enabled_use_arrow_schema(),
+    options.get_columns().has_value() and options.is_enabled_allow_mismatched_pq_schemas());
+  _extended_metadata = static_cast<aggregate_reader_metadata*>(_metadata.get());
+}
+
 FileMetaData hybrid_scan_reader_impl::parquet_metadata() const
 {
   return _extended_metadata->parquet_metadata();
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
index feca87aeef4..fd73990fee5 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
@@ -60,6 +60,16 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
   explicit hybrid_scan_reader_impl(cudf::host_span<uint8_t const> footer_bytes,
                                    parquet_reader_options const& options);
 
+  /**
+   * @brief Constructor for the experimental parquet reader implementation to optimally read
+   * Parquet files subject to highly selective filters
+   *
+   * @param parquet_metadata Pre-populated Parquet file metadata
+   * @param options Parquet reader options
+   */
+  explicit hybrid_scan_reader_impl(FileMetaData const& parquet_metadata,
+                                   parquet_reader_options const& options);
+
   /**
    * @copydoc cudf::io::experimental::hybrid_scan::parquet_metadata
    */
diff --git a/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp b/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
index 6f47a0e2351..1bcfc11f74e 100644
--- a/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
@@ -133,8 +133,8 @@ TEST_F(HybridScanFiltersTest, TestMetadata)
   auto parquet_metadata = reader->parquet_metadata();
 
   // Check that the offset and column indices are not present
-  ASSERT_FALSE(parquet_metadata.row_groups[0].columns[0].offset_index.has_value());
-  ASSERT_FALSE(parquet_metadata.row_groups[0].columns[0].column_index.has_value());
+  EXPECT_FALSE(parquet_metadata.row_groups[0].columns[0].offset_index.has_value());
+  EXPECT_FALSE(parquet_metadata.row_groups[0].columns[0].column_index.has_value());
 
   // Get page index byte range from the reader
   auto const page_index_byte_range = reader->page_index_byte_range();
@@ -149,8 +149,8 @@ TEST_F(HybridScanFiltersTest, TestMetadata)
   parquet_metadata = reader->parquet_metadata();
 
   // Check that the offset and column indices are now present
-  ASSERT_TRUE(parquet_metadata.row_groups[0].columns[0].offset_index.has_value());
-  ASSERT_TRUE(parquet_metadata.row_groups[0].columns[0].column_index.has_value());
+  EXPECT_TRUE(parquet_metadata.row_groups[0].columns[0].offset_index.has_value());
+  EXPECT_TRUE(parquet_metadata.row_groups[0].columns[0].column_index.has_value());
 
   // Get all row groups from the reader
   auto input_row_group_indices = reader->all_row_groups(options);
@@ -167,6 +167,75 @@ TEST_F(HybridScanFiltersTest, TestMetadata)
   EXPECT_EQ(reader->total_rows_in_row_groups(input_row_group_indices), 2 * rows_per_row_group);
 }
 
+TEST_F(HybridScanFiltersTest, TestExternalMetadata)
+{
+  srand(0xcaffe);
+
+  auto parquet_metadata = [&]() {
+    // Create a table with several row groups each with a single page.
+    auto constexpr num_concat = 1;
+    auto file_buffer = std::get<1>(create_parquet_with_stats<cudf::timestamp_ms, num_concat>());
+    // Input file buffer span
+    auto const file_buffer_span = cudf::host_span<uint8_t const>(
+      reinterpret_cast<uint8_t const*>(file_buffer.data()), file_buffer.size());
+
+    // Fetch footer and page index bytes from the buffer.
+    auto const footer_buffer = fetch_footer_bytes(file_buffer_span);
+
+    auto const reader = std::make_unique<cudf::io::parquet::experimental::hybrid_scan_reader>(
+      footer_buffer, cudf::io::parquet_reader_options::builder().build());
+
+    // Get page index byte range from the reader
+    auto const page_index_byte_range = reader->page_index_byte_range();
+
+    // Fetch page index bytes from the input buffer
+    auto const page_index_buffer = fetch_page_index_bytes(file_buffer_span, page_index_byte_range);
+
+    // Setup page index
+    reader->setup_page_index(page_index_buffer);
+
+    return reader->parquet_metadata();
+  }();
+
+  // Filtering AST - table[0] < 100
+  using T                = cudf::timestamp_ms;
+  auto literal_value     = cudf::timestamp_scalar<T>(T(typename T::duration(100)));
+  auto literal           = cudf::ast::literal(literal_value);
+  auto col_ref_0         = cudf::ast::column_name_reference("col0");
+  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+  // Create reader options with empty source info
+  cudf::io::parquet_reader_options options =
+    cudf::io::parquet_reader_options::builder().filter(filter_expression);
+
+  // Get Parquet file metadata from the reader
+  auto const reader = std::make_unique<cudf::io::parquet::experimental::hybrid_scan_reader>(
+    parquet_metadata, options);
+
+  // Get Parquet file metadata from the reader
+  parquet_metadata = reader->parquet_metadata();
+
+  // Check that the offset and column indices are present
+  EXPECT_TRUE(parquet_metadata.row_groups[0].columns[0].offset_index.has_value());
+  EXPECT_TRUE(parquet_metadata.row_groups[0].columns[0].column_index.has_value());
+
+  // Get all row groups from the reader
+  auto input_row_group_indices = reader->all_row_groups(options);
+  // Expect 4 = 20000 rows / 5000 rows per row group
+  EXPECT_EQ(input_row_group_indices.size(), 4);
+
+  // Explicitly set the row groups to read
+  options.set_row_groups({{2, 3}});
+
+  // Get all row groups from the reader again
+  input_row_group_indices = reader->all_row_groups(options);
+  // Expect only 2 row groups now
+  EXPECT_EQ(input_row_group_indices.size(), 2);
+
+  auto constexpr rows_per_row_group = page_size_for_ordered_tests;
+  EXPECT_EQ(reader->total_rows_in_row_groups(input_row_group_indices), 2 * rows_per_row_group);
+}
+
 TEST_F(HybridScanFiltersTest, FilterRowGroupsWithStats)
 {
   srand(0xc001);

From ebb3e0d63ce6c6c6a87c77ee53a28cdbe64786f8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 5 Sep 2025 17:28:53 -0700
Subject: [PATCH 274/366] Move test_dataframe.py to new cuDF classic directory
 structure (#19890)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

On top of https://github.com/rapidsai/cudf/pull/19887

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19890
---
 .../tests/dataframe/indexing/test_getitem.py  |   84 +
 .../tests/dataframe/indexing/test_setitem.py  |  429 +++
 .../tests/dataframe/methods/test_astype.py    |  191 ++
 .../tests/dataframe/methods/test_cov_corr.py  |   25 +-
 .../tests/dataframe/methods/test_fillna.py    |   15 +
 .../tests/dataframe/methods/test_insert.py    |   48 +
 .../cudf/tests/dataframe/methods/test_isin.py |  109 +
 .../dataframe/methods/test_reductions.py      |  687 +++++
 .../tests/dataframe/methods/test_to_string.py |  174 ++
 .../cudf/tests/dataframe/test_attributes.py   |  154 +
 .../cudf/cudf/tests/dataframe/test_binops.py  |  287 +-
 .../cudf/tests/dataframe/test_constructors.py |   38 +
 .../tests/series/methods/test_to_arrow.py     |   15 +
 .../tests/series/methods/test_value_counts.py |   27 +
 .../cudf/cudf/tests/series/test_attributes.py |   56 +
 .../cudf/tests/series/test_constructors.py    |   98 +
 python/cudf/cudf/tests/test_dataframe.py      | 2545 -----------------
 17 files changed, 2435 insertions(+), 2547 deletions(-)
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_isin.py
 create mode 100644 python/cudf/cudf/tests/dataframe/methods/test_to_string.py
 create mode 100644 python/cudf/cudf/tests/series/methods/test_to_arrow.py
 delete mode 100644 python/cudf/cudf/tests/test_dataframe.py

diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
index 4178fbcc67a..3aeedc7766f 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_getitem.py
@@ -267,3 +267,87 @@ def test_duplicate_labels_raises():
         df[["a", "a"]]
     with pytest.raises(ValueError):
         df.loc[:, ["a", "a"]]
+
+
+def test_boolmask():
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    rng = np.random.default_rng(seed=0)
+    boolmask = rng.choice([True, False], size=len(pdf))
+    gdf = gdf[boolmask]
+    pdf = pdf[boolmask]
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "mask_shape",
+    [
+        (2, "ab"),
+        (2, "abc"),
+        (3, "ab"),
+        (3, "abc"),
+        (3, "abcd"),
+        (4, "abc"),
+        (4, "abcd"),
+    ],
+)
+def test_dataframe_boolmask(mask_shape):
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame({col: rng.integers(0, 10, 3) for col in "abc"})
+    pdf_mask = pd.DataFrame(
+        {col: rng.integers(0, 2, mask_shape[0]) > 0 for col in mask_shape[1]}
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    gdf_mask = cudf.DataFrame.from_pandas(pdf_mask)
+    gdf = gdf[gdf_mask]
+    pdf = pdf[pdf_mask]
+
+    assert np.array_equal(gdf.columns, pdf.columns)
+    for col in gdf.columns:
+        assert np.array_equal(
+            gdf[col].fillna(-1).to_pandas().values, pdf[col].fillna(-1).values
+        )
+
+
+@pytest.mark.parametrize(
+    "box",
+    [
+        list,
+        pytest.param(
+            cudf.Series,
+            marks=pytest.mark.xfail(
+                reason="Pandas can't index a multiindex with a Series"
+            ),
+        ),
+    ],
+)
+def test_dataframe_multiindex_boolmask(box):
+    mask = box([True, False, True])
+    gdf = cudf.DataFrame(
+        {"w": [3, 2, 1], "x": [1, 2, 3], "y": [0, 1, 0], "z": [1, 1, 1]}
+    )
+    gdg = gdf.groupby(["w", "x"]).count()
+    pdg = gdg.to_pandas()
+    assert_eq(gdg[mask], pdg[mask])
+
+
+@pytest.mark.parametrize(
+    "arg", [slice(2, 8, 3), slice(1, 20, 4), slice(-2, -6, -2)]
+)
+def test_dataframe_strided_slice(arg):
+    mul = pd.DataFrame(
+        {
+            "Index": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+            "AlphaIndex": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
+        }
+    )
+    pdf = pd.DataFrame(
+        {"Val": [10, 9, 8, 7, 6, 5, 4, 3, 2]},
+        index=pd.MultiIndex.from_frame(mul),
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    expect = pdf[arg]
+    got = gdf[arg]
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
index 86ae7f007f8..49ba50e9976 100644
--- a/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
+++ b/python/cudf/cudf/tests/dataframe/indexing/test_setitem.py
@@ -8,6 +8,10 @@
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
 from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
@@ -340,3 +344,428 @@ def test_multicolumn_set_item():
     pdf["d"] = [1, 2, 3, 4, 5]
     gdf["d"] = [1, 2, 3, 4, 5]
     assert_eq(pdf, gdf)
+
+
+def test_dataframe_basic():
+    rng = np.random.default_rng(seed=0)
+    df = cudf.DataFrame()
+
+    # Populate with cuda memory
+    df["keys"] = np.arange(10, dtype=np.float64)
+    np.testing.assert_equal(df["keys"].to_numpy(), np.arange(10))
+    assert len(df) == 10
+
+    # Populate with numpy array
+    rnd_vals = rng.random(10)
+    df["vals"] = rnd_vals
+    np.testing.assert_equal(df["vals"].to_numpy(), rnd_vals)
+    assert len(df) == 10
+    assert tuple(df.columns) == ("keys", "vals")
+
+    # Make another dataframe
+    df2 = cudf.DataFrame()
+    df2["keys"] = np.array([123], dtype=np.float64)
+    df2["vals"] = np.array([321], dtype=np.float64)
+
+    # Concat
+    df = cudf.concat([df, df2])
+    assert len(df) == 11
+
+    hkeys = np.asarray([*np.arange(10, dtype=np.float64).tolist(), 123])
+    hvals = np.asarray([*rnd_vals.tolist(), 321])
+
+    np.testing.assert_equal(df["keys"].to_numpy(), hkeys)
+    np.testing.assert_equal(df["vals"].to_numpy(), hvals)
+
+    # As matrix
+    mat = df.values_host
+
+    expect = np.vstack([hkeys, hvals]).T
+
+    np.testing.assert_equal(mat, expect)
+
+    # test dataframe with tuple name
+    df_tup = cudf.DataFrame()
+    data = np.arange(10)
+    df_tup[(1, "foobar")] = data
+    np.testing.assert_equal(data, df_tup[(1, "foobar")].to_numpy())
+
+    df = cudf.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
+    pdf = pd.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
+    assert_eq(df, pdf)
+
+    gdf = cudf.DataFrame({"id": [0, 1], "val": [None, None]})
+    gdf["val"] = gdf["val"].astype("int")
+
+    assert gdf["val"].isnull().all()
+
+
+def test_dataframe_column_add_drop_via_setitem():
+    df = cudf.DataFrame()
+    data = np.asarray(range(10))
+    df["a"] = data
+    df["b"] = data
+    assert tuple(df.columns) == ("a", "b")
+    del df["a"]
+    assert tuple(df.columns) == ("b",)
+    df["c"] = data
+    assert tuple(df.columns) == ("b", "c")
+    df["a"] = data
+    assert tuple(df.columns) == ("b", "c", "a")
+
+
+def test_dataframe_column_set_via_attr():
+    data_0 = np.asarray([0, 2, 4, 5])
+    data_1 = np.asarray([1, 4, 2, 3])
+    data_2 = np.asarray([2, 0, 3, 0])
+    df = cudf.DataFrame({"a": data_0, "b": data_1, "c": data_2})
+
+    for i in range(10):
+        df.c = df.a
+        assert assert_eq(df.c, df.a, check_names=False)
+        assert tuple(df.columns) == ("a", "b", "c")
+
+        df.c = df.b
+        assert assert_eq(df.c, df.b, check_names=False)
+        assert tuple(df.columns) == ("a", "b", "c")
+
+
+@pytest.mark.parametrize("nelem", [0, 10])
+def test_dataframe_astype(nelem):
+    df = cudf.DataFrame()
+    data = np.asarray(range(nelem), dtype=np.int32)
+    df["a"] = data
+    assert df["a"].dtype is np.dtype(np.int32)
+    df["b"] = df["a"].astype(np.float32)
+    assert df["b"].dtype is np.dtype(np.float32)
+    np.testing.assert_equal(df["a"].to_numpy(), df["b"].to_numpy())
+
+
+def test_dataframe_add_col_to_object_dataframe():
+    # Test for adding column to an empty object dataframe
+    cols = ["a", "b", "c"]
+    df = pd.DataFrame(columns=cols, dtype="str")
+
+    data = {k: ["a"] for k in cols}
+
+    gdf = cudf.DataFrame(data)
+    gdf = gdf[:0]
+
+    assert gdf.dtypes.equals(df.dtypes)
+    gdf["a"] = [1]
+    df["a"] = [10]
+    assert gdf.dtypes.equals(df.dtypes)
+    gdf["b"] = [1.0]
+    df["b"] = [10.0]
+    assert gdf.dtypes.equals(df.dtypes)
+
+
+def test_dataframe_append_empty():
+    pdf = pd.DataFrame(
+        {
+            "key": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
+            "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+        }
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    gdf["newcol"] = 100
+    pdf["newcol"] = 100
+
+    assert len(gdf["newcol"]) == len(pdf)
+    assert len(pdf["newcol"]) == len(pdf)
+    assert_eq(gdf, pdf)
+
+
+def test_dataframe_setitem_from_masked_object():
+    rng = np.random.default_rng(seed=0)
+    ary = rng.standard_normal(100)
+    mask = np.zeros(100, dtype=bool)
+    mask[:20] = True
+    rng.shuffle(mask)
+    ary[mask] = np.nan
+
+    test1_null = cudf.Series(ary, nan_as_null=True)
+    assert test1_null.null_count == 20
+    test1_nan = cudf.Series(ary, nan_as_null=False)
+    assert test1_nan.null_count == 0
+
+    test2_null = cudf.DataFrame.from_pandas(
+        pd.DataFrame({"a": ary}), nan_as_null=True
+    )
+    assert test2_null["a"].null_count == 20
+    test2_nan = cudf.DataFrame.from_pandas(
+        pd.DataFrame({"a": ary}), nan_as_null=False
+    )
+    assert test2_nan["a"].null_count == 0
+
+    gpu_ary = cp.asarray(ary)
+    test3_null = cudf.Series(gpu_ary, nan_as_null=True)
+    assert test3_null.null_count == 20
+    test3_nan = cudf.Series(gpu_ary, nan_as_null=False)
+    assert test3_nan.null_count == 0
+
+    test4 = cudf.DataFrame()
+    lst = [1, 2, None, 4, 5, 6, None, 8, 9]
+    test4["lst"] = lst
+    assert test4["lst"].null_count == 2
+
+
+def test_dataframe_append_to_empty():
+    pdf = pd.DataFrame()
+    pdf["a"] = []
+    pdf["a"] = pdf["a"].astype("str")
+    pdf["b"] = [1, 2, 3]
+
+    gdf = cudf.DataFrame()
+    gdf["a"] = []
+    gdf["b"] = [1, 2, 3]
+
+    assert_eq(gdf, pdf)
+
+
+def test_dataframe_setitem_index_len1():
+    gdf = cudf.DataFrame()
+    gdf["a"] = [1]
+    gdf["b"] = gdf.index._column
+
+    np.testing.assert_equal(gdf.b.to_numpy(), [0])
+
+
+def test_empty_dataframe_setitem_df():
+    gdf1 = cudf.DataFrame()
+    gdf2 = cudf.DataFrame({"a": [1, 2, 3, 4, 5]})
+    gdf1["a"] = gdf2["a"]
+    assert_eq(gdf1, gdf2)
+
+
+@pytest.mark.parametrize("nrows", [0, 3])
+def test_nonmatching_index_setitem(nrows):
+    rng = np.random.default_rng(seed=0)
+
+    gdf = cudf.DataFrame()
+    gdf["a"] = rng.integers(2147483647, size=nrows)
+    gdf["b"] = rng.integers(2147483647, size=nrows)
+    gdf = gdf.set_index("b")
+
+    test_values = rng.integers(2147483647, size=nrows)
+    gdf["c"] = test_values
+    assert len(test_values) == len(gdf["c"])
+    gdf_series = cudf.Series(test_values, index=gdf.index, name="c")
+    assert_eq(gdf["c"].to_pandas(), gdf_series.to_pandas())
+
+
+def test_dataframe_assignment():
+    pdf = pd.DataFrame()
+    for col in "abc":
+        pdf[col] = np.array([0, 1, 1, -2, 10])
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    gdf[gdf < 0] = 999
+    pdf[pdf < 0] = 999
+    assert_eq(gdf, pdf)
+
+
+@pytest.mark.parametrize("a", [[], ["123"]])
+@pytest.mark.parametrize("b", ["123", ["123"]])
+@pytest.mark.parametrize(
+    "misc_data",
+    ["123", ["123"] * 20, 123, [1, 2, 0.8, 0.9] * 50, 0.9, 0.00001],
+)
+@pytest.mark.parametrize("non_list_data", [123, "abc", "zyx", "rapids", 0.8])
+def test_create_dataframe_cols_empty_data(a, b, misc_data, non_list_data):
+    expected = pd.DataFrame({"a": a})
+    actual = cudf.DataFrame.from_pandas(expected)
+    expected["b"] = b
+    actual["b"] = b
+    assert_eq(actual, expected)
+
+    expected = pd.DataFrame({"a": []})
+    actual = cudf.DataFrame.from_pandas(expected)
+    expected["b"] = misc_data
+    actual["b"] = misc_data
+    assert_eq(actual, expected)
+
+    expected = pd.DataFrame({"a": a})
+    actual = cudf.DataFrame.from_pandas(expected)
+    expected["b"] = non_list_data
+    actual["b"] = non_list_data
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "list_input",
+    [
+        pytest.param([1, 2, 3, 4], id="smaller"),
+        pytest.param([1, 2, 3, 4, 5, 6], id="larger"),
+    ],
+)
+@pytest.mark.parametrize(
+    "key",
+    [
+        pytest.param("list_test", id="new_column"),
+        pytest.param("id", id="existing_column"),
+    ],
+)
+def test_setitem_diff_size_list(list_input, key):
+    gdf = cudf.DataFrame(range(5))
+    with pytest.raises(
+        ValueError, match="All columns must be of equal length"
+    ):
+        gdf[key] = list_input
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        [[1, 2, 3, 4], None],
+        [[1, 2, 3, 4, 5, 6], None],
+        [[1, 2, 3], [4, 5, 6]],
+    ],
+)
+@pytest.mark.parametrize("klass", [pd.Series, cudf.Series])
+@pytest.mark.parametrize(
+    "key",
+    [
+        pytest.param("list_test", id="new_column"),
+        pytest.param("id", id="existing_column"),
+    ],
+)
+def test_setitem_diff_size_series(klass, data, index, key):
+    gdf = cudf.DataFrame(range(5))
+    pdf = gdf.to_pandas()
+
+    series_input = klass(data, index=index)
+    pandas_input = series_input
+    if isinstance(pandas_input, cudf.Series):
+        pandas_input = pandas_input.to_pandas()
+
+    expect = pdf
+    expect[key] = pandas_input
+
+    got = gdf
+    got[key] = series_input
+
+    # Pandas uses NaN and typecasts to float64 if there's missing values on
+    # alignment, so need to typecast to float64 for equality comparison
+    expect = expect.astype("float64")
+    got = got.astype("float64")
+
+    assert_eq(expect, got)
+
+
+def test_tupleize_cols_False_set():
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+    pdf[("a", "b")] = [1]
+    gdf[("a", "b")] = [1]
+    assert_eq(pdf, gdf)
+    assert_eq(pdf.columns, gdf.columns)
+
+
+@pytest.mark.parametrize(
+    "col_data",
+    [
+        range(5),
+        ["a", "b", "x", "y", "z"],
+        [1.0, 0.213, 0.34332],
+        ["a"],
+        [1],
+        [0.2323],
+        [],
+    ],
+)
+@pytest.mark.parametrize(
+    "assign_val",
+    [
+        1,
+        2,
+        np.array(2),
+        cp.array(2),
+        0.32324,
+        np.array(0.34248),
+        cp.array(0.34248),
+        "abc",
+        np.array("abc", dtype="object"),
+        np.array("abc", dtype="str"),
+        np.array("abc"),
+        None,
+    ],
+)
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_dataframe_assign_scalar(request, col_data, assign_val):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
+            and len(col_data) == 0,
+            reason="https://github.com/pandas-dev/pandas/issues/56679",
+        )
+    )
+    pdf = pd.DataFrame({"a": col_data})
+    gdf = cudf.DataFrame({"a": col_data})
+
+    pdf["b"] = (
+        cp.asnumpy(assign_val)
+        if isinstance(assign_val, cp.ndarray)
+        else assign_val
+    )
+    gdf["b"] = assign_val
+
+    assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize(
+    "col_data",
+    [
+        1,
+        2,
+        np.array(2),
+        cp.array(2),
+        0.32324,
+        np.array(0.34248),
+        cp.array(0.34248),
+        "abc",
+        np.array("abc", dtype="object"),
+        np.array("abc", dtype="str"),
+        np.array("abc"),
+        None,
+    ],
+)
+@pytest.mark.parametrize(
+    "assign_val",
+    [
+        1,
+        2,
+        np.array(2),
+        cp.array(2),
+        0.32324,
+        np.array(0.34248),
+        cp.array(0.34248),
+        "abc",
+        np.array("abc", dtype="object"),
+        np.array("abc", dtype="str"),
+        np.array("abc"),
+        None,
+    ],
+)
+def test_dataframe_assign_scalar_with_scalar_cols(col_data, assign_val):
+    pdf = pd.DataFrame(
+        {
+            "a": cp.asnumpy(col_data)
+            if isinstance(col_data, cp.ndarray)
+            else col_data
+        },
+        index=["dummy_mandatory_index"],
+    )
+    gdf = cudf.DataFrame({"a": col_data}, index=["dummy_mandatory_index"])
+
+    pdf["b"] = (
+        cp.asnumpy(assign_val)
+        if isinstance(assign_val, cp.ndarray)
+        else assign_val
+    )
+    gdf["b"] = assign_val
+
+    assert_eq(pdf, gdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_astype.py b/python/cudf/cudf/tests/dataframe/methods/test_astype.py
index 9eb93d7b6a8..a8a7ce61420 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_astype.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_astype.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
+import pandas as pd
 import pytest
 
 import cudf
@@ -77,3 +79,192 @@ def test_df_series_dataframe_astype_dtype_dict(copy):
     actual[0] = 3
     expected[0] = 3
     assert_eq(gsr, psr)
+
+
+def test_astype_dict():
+    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["1", "2", "3"]})
+    pdf = gdf.to_pandas()
+
+    assert_eq(pdf.astype({"a": "str"}), gdf.astype({"a": "str"}))
+    assert_eq(
+        pdf.astype({"a": "str", "b": np.int64}),
+        gdf.astype({"a": "str", "b": np.int64}),
+    )
+
+
+@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
+def test_dataframe_astype_preserves_column_dtype(dtype):
+    result = cudf.DataFrame([1], columns=cudf.Index([1], dtype=dtype))
+    result = result.astype(np.int32).columns
+    expected = pd.Index([1], dtype=dtype)
+    assert_eq(result, expected)
+
+
+def test_dataframe_astype_preserves_column_rangeindex():
+    result = cudf.DataFrame([1], columns=range(1))
+    result = result.astype(np.int32).columns
+    expected = pd.RangeIndex(1)
+    assert_eq(result, expected)
+
+
+def test_df_astype_numeric_to_all(
+    numeric_types_as_str, all_supported_types_as_str
+):
+    if "uint" in numeric_types_as_str:
+        data = [1, 2, None, 4, 7]
+    elif "int" in numeric_types_as_str:
+        data = [1, 2, None, 4, -7]
+    elif "float" in numeric_types_as_str:
+        data = [1.0, 2.0, None, 4.0, np.nan, -7.0]
+
+    gdf = cudf.DataFrame()
+
+    gdf["foo"] = cudf.Series(data, dtype=numeric_types_as_str)
+    gdf["bar"] = cudf.Series(data, dtype=numeric_types_as_str)
+
+    insert_data = cudf.Series(data, dtype=numeric_types_as_str)
+
+    expect = cudf.DataFrame()
+    expect["foo"] = insert_data.astype(all_supported_types_as_str)
+    expect["bar"] = insert_data.astype(all_supported_types_as_str)
+
+    got = gdf.astype(all_supported_types_as_str)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "int32",
+        "float32",
+        "category",
+        "datetime64[s]",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "datetime64[ns]",
+    ],
+)
+def test_df_astype_string_to_other(as_dtype):
+    if "datetime64" in as_dtype:
+        # change None to "NaT" after this issue is fixed:
+        # https://github.com/rapidsai/cudf/issues/5117
+        data = ["2001-01-01", "2002-02-02", "2000-01-05", None]
+    elif as_dtype == "int32":
+        data = [1, 2, 3]
+    elif as_dtype == "category":
+        data = ["1", "2", "3", None]
+    elif "float" in as_dtype:
+        data = [1.0, 2.0, 3.0, np.nan]
+
+    insert_data = cudf.Series.from_pandas(pd.Series(data, dtype="str"))
+    expect_data = cudf.Series(data, dtype=as_dtype)
+
+    gdf = cudf.DataFrame()
+    expect = cudf.DataFrame()
+
+    gdf["foo"] = insert_data
+    gdf["bar"] = insert_data
+
+    expect["foo"] = expect_data
+    expect["bar"] = expect_data
+
+    got = gdf.astype(as_dtype)
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "int64",
+        "datetime64[s]",
+        "datetime64[us]",
+        "datetime64[ns]",
+        "str",
+        "category",
+    ],
+)
+def test_df_astype_datetime_to_other(as_dtype):
+    data = [
+        "1991-11-20 00:00:00.000",
+        "2004-12-04 00:00:00.000",
+        "2016-09-13 00:00:00.000",
+        None,
+    ]
+
+    gdf = cudf.DataFrame(
+        {
+            "foo": cudf.Series(data, dtype="datetime64[ms]"),
+            "bar": cudf.Series(data, dtype="datetime64[ms]"),
+        }
+    )
+    expect = cudf.DataFrame()
+
+    if as_dtype == "int64":
+        expect["foo"] = cudf.Series(
+            [690595200000, 1102118400000, 1473724800000, None], dtype="int64"
+        )
+        expect["bar"] = cudf.Series(
+            [690595200000, 1102118400000, 1473724800000, None], dtype="int64"
+        )
+    elif as_dtype == "str":
+        expect["foo"] = cudf.Series(data, dtype="str")
+        expect["bar"] = cudf.Series(data, dtype="str")
+    elif as_dtype == "category":
+        expect["foo"] = cudf.Series(gdf["foo"], dtype="category")
+        expect["bar"] = cudf.Series(gdf["bar"], dtype="category")
+    else:
+        expect["foo"] = cudf.Series(data, dtype=as_dtype)
+        expect["bar"] = cudf.Series(data, dtype=as_dtype)
+
+    got = gdf.astype(as_dtype)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "as_dtype",
+    [
+        "int32",
+        "float32",
+        "category",
+        "datetime64[s]",
+        "datetime64[ms]",
+        "datetime64[us]",
+        "datetime64[ns]",
+        "str",
+    ],
+)
+def test_df_astype_categorical_to_other(as_dtype):
+    if "datetime64" in as_dtype:
+        data = ["2001-01-01", "2002-02-02", "2000-01-05", "2001-01-01"]
+    else:
+        data = [1, 2, 3, 1]
+    psr = pd.Series(data, dtype="category")
+    pdf = pd.DataFrame({"foo": psr, "bar": psr})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    assert_eq(pdf.astype(as_dtype), gdf.astype(as_dtype))
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+def test_df_astype_to_categorical_ordered(ordered):
+    psr = pd.Series([1, 2, 3, 1], dtype="category")
+    pdf = pd.DataFrame({"foo": psr, "bar": psr})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    ordered_dtype_pd = pd.CategoricalDtype(
+        categories=[1, 2, 3], ordered=ordered
+    )
+    ordered_dtype_gd = cudf.CategoricalDtype.from_pandas(ordered_dtype_pd)
+
+    assert_eq(
+        pdf.astype(ordered_dtype_pd).astype("int32"),
+        gdf.astype(ordered_dtype_gd).astype("int32"),
+    )
+
+
+def test_empty_df_astype(all_supported_types_as_str):
+    df = cudf.DataFrame()
+    result = df.astype(dtype=all_supported_types_as_str)
+    assert_eq(df, result)
+    assert_eq(df.to_pandas().astype(dtype=all_supported_types_as_str), result)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py b/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
index 5e172baf92a..ec213285cc1 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
@@ -1,7 +1,8 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
-
 import numpy as np
+import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -13,3 +14,25 @@ def test_df_corr(corr_method):
     got = gdf.corr(corr_method)
     expected = pdf.corr(corr_method)
     assert_eq(got, expected)
+
+
+def test_cov():
+    gdf = cudf.DataFrame(np.random.default_rng(seed=0).normal(-100, 100, 10))
+    pdf = gdf.to_pandas()
+
+    assert_eq(pdf.cov(), gdf.cov())
+
+
+@pytest.mark.xfail(reason="cupy-based cov does not support nulls")
+def test_cov_nans():
+    pdf = pd.DataFrame(
+        {
+            "a": [None, None, None, 2.00758632, None],
+            "b": [0.36403686, None, None, None, None],
+            "c": [None, None, None, 0.64882227, None],
+            "d": [None, -1.46863125, None, 1.22477948, -0.06031689],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(pdf.cov(), gdf.cov())
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_fillna.py b/python/cudf/cudf/tests/dataframe/methods/test_fillna.py
index 8a121cad33d..bb054b83355 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_fillna.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_fillna.py
@@ -74,3 +74,18 @@ def test_fillna_columns_multiindex():
     actual = gdf.fillna(10)
 
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
+def test_dataframe_fillna_preserves_column_dtype(dtype):
+    result = cudf.DataFrame([1, None], columns=cudf.Index([1], dtype=dtype))
+    result = result.fillna(2).columns
+    expected = pd.Index([1], dtype=dtype)
+    assert_eq(result, expected)
+
+
+def test_dataframe_fillna_preserves_column_rangeindex():
+    result = cudf.DataFrame([1, None], columns=range(1))
+    result = result.fillna(2).columns
+    expected = pd.RangeIndex(1)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_insert.py b/python/cudf/cudf/tests/dataframe/methods/test_insert.py
index f3dbe3a7ff0..3d68bf2b560 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_insert.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_insert.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import numpy as np
 import pandas as pd
+import pytest
 
 import cudf
 from cudf.testing import assert_eq
@@ -12,3 +14,49 @@ def test_insert_reset_label_dtype():
     result.insert(1, "a", [2])
     expected.insert(1, "a", [2])
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [5.0, 6.0, 7.0],
+        "single value",
+        np.array(1, dtype="int64"),
+        np.array(0.6273643, dtype="float64"),
+    ],
+)
+def test_insert(data):
+    pdf = pd.DataFrame.from_dict({"A": [1, 2, 3], "B": ["a", "b", "c"]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    # insertion by index
+
+    pdf.insert(0, "foo", data)
+    gdf.insert(0, "foo", data)
+
+    assert_eq(pdf, gdf)
+
+    pdf.insert(3, "bar", data)
+    gdf.insert(3, "bar", data)
+
+    assert_eq(pdf, gdf)
+
+    pdf.insert(1, "baz", data)
+    gdf.insert(1, "baz", data)
+
+    assert_eq(pdf, gdf)
+
+    # pandas insert doesn't support negative indexing
+    pdf.insert(len(pdf.columns), "qux", data)
+    gdf.insert(-1, "qux", data)
+
+    assert_eq(pdf, gdf)
+
+
+def test_insert_NA():
+    pdf = pd.DataFrame.from_dict({"A": [1, 2, 3], "B": ["a", "b", "c"]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pdf["C"] = pd.NA
+    gdf["C"] = cudf.NA
+    assert_eq(pdf, gdf)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_isin.py b/python/cudf/cudf/tests/dataframe/methods/test_isin.py
new file mode 100644
index 00000000000..e553e8c1bad
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_isin.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.DataFrame(
+            {
+                "num_legs": [2, 4],
+                "num_wings": [2, 0],
+                "bird_cats": pd.Series(
+                    ["sparrow", "pigeon"],
+                    dtype="category",
+                    index=["falcon", "dog"],
+                ),
+            },
+            index=["falcon", "dog"],
+        ),
+        pd.DataFrame(
+            {"num_legs": [8, 2], "num_wings": [0, 2]},
+            index=["spider", "falcon"],
+        ),
+        pd.DataFrame(
+            {
+                "num_legs": [8, 2, 1, 0, 2, 4, 5],
+                "num_wings": [2, 0, 2, 1, 2, 4, -1],
+            }
+        ),
+        pd.DataFrame({"a": ["a", "b", "c"]}, dtype="category"),
+        pd.DataFrame({"a": ["a", "b", "c"]}),
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        [0, 2],
+        {"num_wings": [0, 3]},
+        pd.DataFrame(
+            {"num_legs": [8, 2], "num_wings": [0, 2]},
+            index=["spider", "falcon"],
+        ),
+        pd.DataFrame(
+            {
+                "num_legs": [2, 4],
+                "num_wings": [2, 0],
+                "bird_cats": pd.Series(
+                    ["sparrow", "pigeon"],
+                    dtype="category",
+                    index=["falcon", "dog"],
+                ),
+            },
+            index=["falcon", "dog"],
+        ),
+        ["sparrow", "pigeon"],
+        pd.Series(["sparrow", "pigeon"], dtype="category"),
+        pd.Series([1, 2, 3, 4, 5]),
+        "abc",
+        123,
+        pd.Series(["a", "b", "c"]),
+        pd.Series(["a", "b", "c"], dtype="category"),
+        pd.DataFrame({"a": ["a", "b", "c"]}, dtype="category"),
+    ],
+)
+def test_isin_dataframe(data, values):
+    pdf = data
+    gdf = cudf.from_pandas(pdf)
+
+    if cudf.api.types.is_scalar(values):
+        assert_exceptions_equal(
+            lfunc=pdf.isin,
+            rfunc=gdf.isin,
+            lfunc_args_and_kwargs=([values],),
+            rfunc_args_and_kwargs=([values],),
+        )
+    else:
+        try:
+            expected = pdf.isin(values)
+        except TypeError as e:
+            # Can't do isin with different categories
+            if str(e) == (
+                "Categoricals can only be compared if 'categories' "
+                "are the same."
+            ):
+                return
+
+        if isinstance(values, (pd.DataFrame, pd.Series)):
+            values = cudf.from_pandas(values)
+
+        got = gdf.isin(values)
+        assert_eq(got, expected)
+
+
+def test_isin_axis_duplicated_error():
+    df = cudf.DataFrame(range(2))
+    with pytest.raises(ValueError):
+        df.isin(cudf.Series(range(2), index=[1, 1]))
+
+    with pytest.raises(ValueError):
+        df.isin(cudf.DataFrame(range(2), index=[1, 1]))
+
+    with pytest.raises(ValueError):
+        df.isin(cudf.DataFrame([[1, 2]], columns=[1, 1]))
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_reductions.py b/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
index 0061115bc08..5781fa59fe8 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_reductions.py
@@ -1,8 +1,10 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
@@ -250,3 +252,688 @@ def test_dataframe_axis1_unsupported_ops(op):
         NotImplementedError, match="Only axis=0 is currently supported."
     ):
         getattr(df, op)(axis=1)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "x": [np.nan, 2, 3, 4, 100, np.nan],
+            "y": [4, 5, 6, 88, 99, np.nan],
+            "z": [7, 8, 9, 66, np.nan, 77],
+        },
+        {"x": [1, 2, 3], "y": [4, 5, 6], "z": [7, 8, 9]},
+        {
+            "x": [np.nan, np.nan, np.nan],
+            "y": [np.nan, np.nan, np.nan],
+            "z": [np.nan, np.nan, np.nan],
+        },
+        pytest.param(
+            {"x": [], "y": [], "z": []},
+            marks=pytest.mark.xfail(
+                condition=version.parse("11")
+                <= version.parse(cp.__version__)
+                < version.parse("11.1"),
+                reason="Zero-sized array passed to cupy reduction, "
+                "https://github.com/cupy/cupy/issues/6937",
+            ),
+        ),
+        pytest.param(
+            {"x": []},
+            marks=pytest.mark.xfail(
+                condition=version.parse("11")
+                <= version.parse(cp.__version__)
+                < version.parse("11.1"),
+                reason="Zero-sized array passed to cupy reduction, "
+                "https://github.com/cupy/cupy/issues/6937",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize(
+    "func",
+    [
+        "min",
+        "max",
+        "sum",
+        "prod",
+        "product",
+        "cummin",
+        "cummax",
+        "cumsum",
+        "cumprod",
+        "mean",
+        "median",
+        "sum",
+        "std",
+        "var",
+        "kurt",
+        "skew",
+        "all",
+        "any",
+    ],
+)
+def test_dataframe_reductions(data, axis, func, skipna):
+    pdf = pd.DataFrame(data=data)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    # Reductions can fail in numerous possible ways when attempting row-wise
+    # reductions, which are only partially supported. Catching the appropriate
+    # exception here allows us to detect API breakage in the form of changing
+    # exceptions.
+    expected_exception = None
+    if axis == 1:
+        if func in ("kurt", "skew"):
+            expected_exception = NotImplementedError
+        elif func not in cudf.core.dataframe._cupy_nan_methods_map:
+            if skipna is False:
+                expected_exception = NotImplementedError
+            elif any(col._column.nullable for name, col in gdf.items()):
+                expected_exception = ValueError
+            elif func in ("cummin", "cummax"):
+                expected_exception = AttributeError
+
+    # Test different degrees of freedom for var and std.
+    all_kwargs = [{"ddof": 1}, {"ddof": 2}] if func in ("var", "std") else [{}]
+    for kwargs in all_kwargs:
+        if expected_exception is not None:
+            with pytest.raises(expected_exception):
+                (getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),)
+        else:
+            expect = getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs)
+            with expect_warning_if(
+                skipna
+                and func in {"min", "max"}
+                and axis == 1
+                and any(gdf.T[col].isna().all() for col in gdf.T),
+                RuntimeWarning,
+            ):
+                got = getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs)
+            assert_eq(got, expect, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"x": [np.nan, 2, 3, 4, 100, np.nan], "y": [4, 5, 6, 88, 99, np.nan]},
+        {"x": [1, 2, 3], "y": [4, 5, 6]},
+        {"x": [np.nan, np.nan, np.nan], "y": [np.nan, np.nan, np.nan]},
+        {"x": [], "y": []},
+        {"x": []},
+    ],
+)
+def test_dataframe_count_reduction(data):
+    pdf = pd.DataFrame(data=data)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    assert_eq(pdf.count(), gdf.count())
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"x": [np.nan, 2, 3, 4, 100, np.nan], "y": [4, 5, 6, 88, 99, np.nan]},
+        {"x": [1, 2, 3], "y": [4, 5, 6]},
+        {"x": [np.nan, np.nan, np.nan], "y": [np.nan, np.nan, np.nan]},
+        {"x": pd.Series([], dtype="float"), "y": pd.Series([], dtype="float")},
+        {"x": pd.Series([], dtype="int")},
+    ],
+)
+@pytest.mark.parametrize("ops", ["sum", "product", "prod"])
+@pytest.mark.parametrize("min_count", [-10, -1, 0, 3])
+def test_dataframe_min_count_ops(data, ops, skipna, min_count):
+    psr = pd.DataFrame(data)
+    gsr = cudf.from_pandas(psr)
+
+    assert_eq(
+        getattr(psr, ops)(skipna=skipna, min_count=min_count),
+        getattr(gsr, ops)(skipna=skipna, min_count=min_count),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("q", [0.5, 1, 0.001, [0.5], [], [0.005, 0.5, 1]])
+def test_quantile(q, numeric_only):
+    ts = pd.date_range("2018-08-24", periods=5, freq="D")
+    td = pd.to_timedelta(np.arange(5), unit="h")
+    rng = np.random.default_rng(seed=0)
+    pdf = pd.DataFrame(
+        {"date": ts, "delta": td, "val": rng.standard_normal(len(ts))}
+    )
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    assert_eq(pdf["date"].quantile(q), gdf["date"].quantile(q))
+    assert_eq(pdf["delta"].quantile(q), gdf["delta"].quantile(q))
+    assert_eq(pdf["val"].quantile(q), gdf["val"].quantile(q))
+
+    q = q if isinstance(q, list) else [q]
+    assert_eq(
+        pdf.quantile(q, numeric_only=numeric_only),
+        gdf.quantile(q, numeric_only=numeric_only),
+    )
+
+
+@pytest.mark.parametrize("q", [0.2, 1, 0.001, [0.5], [], [0.005, 0.8, 0.03]])
+@pytest.mark.parametrize("interpolation", ["higher", "lower", "nearest"])
+@pytest.mark.parametrize(
+    "decimal_type",
+    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+)
+def test_decimal_quantile(q, interpolation, decimal_type):
+    rng = np.random.default_rng(seed=0)
+    data = ["244.8", "32.24", "2.22", "98.14", "453.23", "5.45"]
+    gdf = cudf.DataFrame(
+        {"id": rng.integers(0, 10, size=len(data)), "val": data}
+    )
+    gdf["id"] = gdf["id"].astype("float64")
+    gdf["val"] = gdf["val"].astype(decimal_type(7, 2))
+    pdf = gdf.to_pandas()
+
+    got = gdf.quantile(q, numeric_only=False, interpolation=interpolation)
+    expected = pdf.quantile(
+        q if isinstance(q, list) else [q],
+        numeric_only=False,
+        interpolation=interpolation,
+    )
+
+    assert_eq(got, expected)
+
+
+def test_empty_quantile():
+    pdf = pd.DataFrame({"x": []}, dtype="float64")
+    df = cudf.DataFrame({"x": []}, dtype="float64")
+
+    actual = df.quantile()
+    expected = pdf.quantile()
+
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0, 1, 2, 3],
+        [-2, -1, 2, 3, 5],
+        [-2, -1, 0, 3, 5],
+        [True, False, False],
+        [True],
+        [False],
+        [],
+        [True, None, False],
+        [True, True, None],
+        [None, None],
+        [[0, 5], [1, 6], [2, 7], [3, 8], [4, 9]],
+        [[1, True], [2, False], [3, False]],
+        [["a", True], ["b", False], ["c", False]],
+    ],
+)
+def test_all(data):
+    # Provide a dtype when data is empty to avoid future pandas changes.
+    dtype = None if data else float
+    if np.array(data).ndim <= 1:
+        pdata = pd.Series(data=data, dtype=dtype)
+        gdata = cudf.Series.from_pandas(pdata)
+        got = gdata.all()
+        expected = pdata.all()
+        assert_eq(got, expected)
+    else:
+        pdata = pd.DataFrame(data, columns=["a", "b"], dtype=dtype).replace(
+            [None], False
+        )
+        gdata = cudf.DataFrame.from_pandas(pdata)
+
+        # test bool_only
+        if pdata["b"].dtype == "bool":
+            got = gdata.all(bool_only=True)
+            expected = pdata.all(bool_only=True)
+            assert_eq(got, expected)
+        else:
+            got = gdata.all()
+            expected = pdata.all()
+            assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0, 1, 2, 3],
+        [-2, -1, 2, 3, 5],
+        [-2, -1, 0, 3, 5],
+        [0, 0, 0, 0, 0],
+        [0, 0, None, 0],
+        [True, False, False],
+        [True],
+        [False],
+        [],
+        [True, None, False],
+        [True, True, None],
+        [None, None],
+        [[0, 5], [1, 6], [2, 7], [3, 8], [4, 9]],
+        [[1, True], [2, False], [3, False]],
+        [["a", True], ["b", False], ["c", False]],
+    ],
+)
+@pytest.mark.parametrize("axis", [0, 1])
+def test_any(data, axis):
+    # Provide a dtype when data is empty to avoid future pandas changes.
+    dtype = float if all(x is None for x in data) or len(data) < 1 else None
+    if np.array(data).ndim <= 1:
+        pdata = pd.Series(data=data, dtype=dtype)
+        gdata = cudf.Series(data=data, dtype=dtype)
+
+        if axis == 1:
+            with pytest.raises(NotImplementedError):
+                gdata.any(axis=axis)
+        else:
+            got = gdata.any(axis=axis)
+            expected = pdata.any(axis=axis)
+            assert_eq(got, expected)
+    else:
+        pdata = pd.DataFrame(data, columns=["a", "b"])
+        gdata = cudf.DataFrame.from_pandas(pdata)
+
+        # test bool_only
+        if pdata["b"].dtype == "bool":
+            got = gdata.any(bool_only=True)
+            expected = pdata.any(bool_only=True)
+            assert_eq(got, expected)
+        else:
+            got = gdata.any(axis=axis)
+            expected = pdata.any(axis=axis)
+            assert_eq(got, expected)
+
+
+def test_empty_dataframe_any(axis):
+    pdf = pd.DataFrame({}, columns=["a", "b"], dtype=float)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    got = gdf.any(axis=axis)
+    expected = pdf.any(axis=axis)
+    assert_eq(got, expected, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        lambda: cudf.datasets.randomdata(
+            nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": int}
+        ),
+        lambda: cudf.datasets.randomdata(
+            nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": str}
+        ),
+        lambda: cudf.datasets.randomdata(
+            nrows=10, dtypes={"a": bool, "b": int, "c": float, "d": str}
+        ),
+        lambda: cudf.DataFrame(),
+        lambda: cudf.DataFrame({"a": [0, 1, 2], "b": [1, None, 3]}),
+        lambda: cudf.DataFrame(
+            {
+                "a": [1, 2, 3, 4],
+                "b": [7, np.nan, 9, 10],
+                "c": cudf.Series(
+                    [np.nan, np.nan, np.nan, np.nan], nan_as_null=False
+                ),
+                "d": cudf.Series([None, None, None, None], dtype="int64"),
+                "e": [100, None, 200, None],
+                "f": cudf.Series([10, None, np.nan, 11], nan_as_null=False),
+            }
+        ),
+        lambda: cudf.DataFrame(
+            {
+                "a": [10, 11, 12, 13, 14, 15],
+                "b": cudf.Series(
+                    [10, None, np.nan, 2234, None, np.nan], nan_as_null=False
+                ),
+            }
+        ),
+    ],
+)
+def test_rowwise_ops(data, reduction_methods, skipna, numeric_only):
+    if reduction_methods in (
+        "median",
+        "quantile",
+        "skew",
+        "kurtosis",
+        "any",
+        "all",
+    ):
+        pytest.skip(f"Test not meant to test {reduction_methods}")
+    gdf = data()
+    pdf = gdf.to_pandas()
+
+    kwargs = {"axis": 1, "skipna": skipna, "numeric_only": numeric_only}
+    if reduction_methods in ("var", "std"):
+        kwargs["ddof"] = 0
+
+    if not numeric_only and not all(
+        (
+            (pdf[column].count() == 0)
+            if skipna
+            else (pdf[column].notna().count() == 0)
+        )
+        or cudf.api.types.is_numeric_dtype(pdf[column].dtype)
+        or pdf[column].dtype.kind == "b"
+        for column in pdf
+    ):
+        with pytest.raises(TypeError):
+            expected = getattr(pdf, reduction_methods)(**kwargs)
+        with pytest.raises(TypeError):
+            got = getattr(gdf, reduction_methods)(**kwargs)
+    else:
+        expected = getattr(pdf, reduction_methods)(**kwargs)
+        got = getattr(gdf, reduction_methods)(**kwargs)
+
+        assert_eq(
+            expected,
+            got,
+            check_dtype=False,
+            check_index_type=False if len(got.index) == 0 else True,
+        )
+
+
+def test_rowwise_ops_nullable_dtypes_all_null(reduction_methods):
+    if reduction_methods in (
+        "median",
+        "quantile",
+        "skew",
+        "kurtosis",
+        "any",
+        "all",
+    ):
+        pytest.skip(f"Test not meant to test {reduction_methods}")
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4],
+            "b": [7, np.nan, 9, 10],
+            "c": cudf.Series([np.nan, np.nan, np.nan, np.nan], dtype=float),
+            "d": cudf.Series([None, None, None, None], dtype="int64"),
+            "e": [100, None, 200, None],
+            "f": cudf.Series([10, None, np.nan, 11], nan_as_null=False),
+        }
+    )
+
+    expected = cudf.Series([None, None, None, None], dtype="float64")
+
+    if reduction_methods in ("var", "std"):
+        got = getattr(gdf, reduction_methods)(axis=1, ddof=0, skipna=False)
+    else:
+        got = getattr(gdf, reduction_methods)(axis=1, skipna=False)
+
+    assert_eq(got.null_count, expected.null_count)
+    assert_eq(got, expected)
+
+
+def test_rowwise_ops_nullable_dtypes_partial_null(reduction_methods):
+    if reduction_methods in (
+        "median",
+        "quantile",
+        "skew",
+        "kurtosis",
+        "any",
+        "all",
+    ):
+        pytest.skip(f"Test not meant to test {reduction_methods}")
+    gdf = cudf.DataFrame(
+        {
+            "a": [10, 11, 12, 13, 14, 15],
+            "b": cudf.Series(
+                [10, None, np.nan, 2234, None, np.nan],
+                nan_as_null=False,
+            ),
+        }
+    )
+
+    if reduction_methods in ("var", "std"):
+        got = getattr(gdf, reduction_methods)(axis=1, ddof=0, skipna=False)
+        expected = getattr(gdf.to_pandas(), reduction_methods)(
+            axis=1, ddof=0, skipna=False
+        )
+    else:
+        got = getattr(gdf, reduction_methods)(axis=1, skipna=False)
+        expected = getattr(gdf.to_pandas(), reduction_methods)(
+            axis=1, skipna=False
+        )
+
+    assert_eq(got.null_count, 2)
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "op,expected",
+    [
+        (
+            "max",
+            lambda: cudf.Series(
+                [10, None, None, 2234, None, 453],
+                dtype="int64",
+            ),
+        ),
+        (
+            "min",
+            lambda: cudf.Series(
+                [10, None, None, 13, None, 15],
+                dtype="int64",
+            ),
+        ),
+        (
+            "sum",
+            lambda: cudf.Series(
+                [20, None, None, 2247, None, 468],
+                dtype="int64",
+            ),
+        ),
+        (
+            "product",
+            lambda: cudf.Series(
+                [100, None, None, 29042, None, 6795],
+                dtype="int64",
+            ),
+        ),
+        (
+            "mean",
+            lambda: cudf.Series(
+                [10.0, None, None, 1123.5, None, 234.0],
+                dtype="float32",
+            ),
+        ),
+        (
+            "var",
+            lambda: cudf.Series(
+                [0.0, None, None, 1233210.25, None, 47961.0],
+                dtype="float32",
+            ),
+        ),
+        (
+            "std",
+            lambda: cudf.Series(
+                [0.0, None, None, 1110.5, None, 219.0],
+                dtype="float32",
+            ),
+        ),
+    ],
+)
+def test_rowwise_ops_nullable_int_dtypes(op, expected):
+    gdf = cudf.DataFrame(
+        {
+            "a": [10, 11, None, 13, None, 15],
+            "b": cudf.Series(
+                [10, None, 323, 2234, None, 453],
+                nan_as_null=False,
+            ),
+        }
+    )
+
+    if op in ("var", "std"):
+        got = getattr(gdf, op)(axis=1, ddof=0, skipna=False)
+    else:
+        got = getattr(gdf, op)(axis=1, skipna=False)
+
+    expected = expected()
+    assert_eq(got.null_count, expected.null_count)
+    assert_eq(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "t2": pd.Series(
+                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
+            ),
+        },
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "t2": pd.Series(
+                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ns]"
+            ),
+            "t3": pd.Series(
+                ["1960-08-31 06:00:00", "2030-08-02 10:00:00"], dtype="<M8[s]"
+            ),
+        },
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "t2": pd.Series(
+                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[us]"
+            ),
+        },
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "t2": pd.Series(
+                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
+            ),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
+        },
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "t2": pd.Series(["1940-08-31 06:00:00", None], dtype="<M8[ms]"),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
+        },
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
+            "f1": pd.Series([-100.001, 123.456], dtype="float64"),
+        },
+        {
+            "t1": pd.Series(
+                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
+            ),
+            "i1": pd.Series([1001, 2002], dtype="int64"),
+            "f1": pd.Series([-100.001, 123.456], dtype="float64"),
+            "b1": pd.Series([True, False], dtype="bool"),
+        },
+    ],
+)
+@pytest.mark.parametrize("op", ["max", "min"])
+def test_rowwise_ops_datetime_dtypes(data, op, skipna, numeric_only):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    if not numeric_only and not all(dt.kind == "M" for dt in gdf.dtypes):
+        with pytest.raises(TypeError):
+            got = getattr(gdf, op)(
+                axis=1, skipna=skipna, numeric_only=numeric_only
+            )
+        with pytest.raises(TypeError):
+            expected = getattr(pdf, op)(
+                axis=1, skipna=skipna, numeric_only=numeric_only
+            )
+    else:
+        got = getattr(gdf, op)(
+            axis=1, skipna=skipna, numeric_only=numeric_only
+        )
+        expected = getattr(pdf, op)(
+            axis=1, skipna=skipna, numeric_only=numeric_only
+        )
+        if got.dtype == cudf.dtype(
+            "datetime64[us]"
+        ) and expected.dtype == np.dtype("datetime64[ns]"):
+            # Workaround for a PANDAS-BUG:
+            # https://github.com/pandas-dev/pandas/issues/52524
+            assert_eq(got.astype("datetime64[ns]"), expected)
+        else:
+            assert_eq(got, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data,op,skipna",
+    [
+        (
+            {
+                "t1": pd.Series(
+                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
+                    dtype="<M8[ms]",
+                ),
+                "t2": pd.Series(
+                    ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
+                ),
+            },
+            "max",
+            True,
+        ),
+        (
+            {
+                "t1": pd.Series(
+                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
+                    dtype="<M8[ms]",
+                ),
+                "t2": pd.Series(
+                    ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
+                ),
+            },
+            "min",
+            False,
+        ),
+        (
+            {
+                "t1": pd.Series(
+                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
+                    dtype="<M8[ms]",
+                ),
+                "t2": pd.Series(
+                    ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
+                ),
+            },
+            "min",
+            True,
+        ),
+    ],
+)
+def test_rowwise_ops_datetime_dtypes_2(data, op, skipna):
+    gdf = cudf.DataFrame(data)
+
+    pdf = gdf.to_pandas()
+
+    got = getattr(gdf, op)(axis=1, skipna=skipna)
+    expected = getattr(pdf, op)(axis=1, skipna=skipna)
+
+    assert_eq(got, expected)
+
+
+def test_rowwise_ops_datetime_dtypes_pdbug():
+    data = {
+        "t1": pd.Series(
+            ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
+            dtype="<M8[ns]",
+        ),
+        "t2": pd.Series(["1940-08-31 06:00:00", pd.NaT], dtype="<M8[ns]"),
+    }
+    pdf = pd.DataFrame(data)
+    gdf = cudf.from_pandas(pdf)
+
+    expected = pdf.max(axis=1, skipna=False)
+    got = gdf.max(axis=1, skipna=False)
+
+    assert_eq(got, expected)
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_string.py b/python/cudf/cudf/tests/dataframe/methods/test_to_string.py
new file mode 100644
index 00000000000..c9f8101586e
--- /dev/null
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_string.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+import copy
+import textwrap
+
+import numpy as np
+import pandas as pd
+
+import cudf
+
+
+def test_dataframe_to_string_with_skipped_rows():
+    # Test skipped rows
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+    )
+
+    with pd.option_context("display.max_rows", 5):
+        got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+            a   b
+        0   1  11
+        1   2  12
+        .. ..  ..
+        4   5  15
+        5   6  16
+
+        [6 rows x 2 columns]"""
+    )
+    assert got == expect
+
+
+def test_dataframe_to_string_with_skipped_rows_and_columns():
+    # Test skipped rows and skipped columns
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6],
+            "b": [11, 12, 13, 14, 15, 16],
+            "c": [11, 12, 13, 14, 15, 16],
+            "d": [11, 12, 13, 14, 15, 16],
+        }
+    )
+
+    with pd.option_context("display.max_rows", 5, "display.max_columns", 3):
+        got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+            a  ...   d
+        0   1  ...  11
+        1   2  ...  12
+        .. ..  ...  ..
+        4   5  ...  15
+        5   6  ...  16
+
+        [6 rows x 4 columns]"""
+    )
+    assert got == expect
+
+
+def test_dataframe_to_string_with_masked_data():
+    # Test masked data
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+    )
+
+    data = np.arange(6)
+    masked = cudf.Series(data)
+    masked.iloc[[1, 4]] = None
+    assert masked.null_count == 2
+    df["c"] = masked
+
+    # Check data
+    values = masked.copy()
+    validids = [0, 2, 3, 5]
+    densearray = masked.dropna().to_numpy()
+    np.testing.assert_equal(data[validids], densearray)
+    # Valid position is correct
+    for i in validids:
+        assert data[i] == values[i]
+    # Null position is correct
+    for i in range(len(values)):
+        if i not in validids:
+            assert values[i] is cudf.NA
+
+    with pd.option_context("display.max_rows", 10):
+        got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+           a   b     c
+        0  1  11     0
+        1  2  12  <NA>
+        2  3  13     2
+        3  4  14     3
+        4  5  15  <NA>
+        5  6  16     5"""
+    )
+    assert got == expect
+
+
+def test_dataframe_to_string_wide():
+    # Test basic
+    df = cudf.DataFrame({f"a{i}": [0, 1, 2] for i in range(100)})
+    with pd.option_context("display.max_columns", 16):
+        got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+           a0  a1  a2  a3  a4  a5  a6  a7  ...  a92  a93  a94  a95  a96  a97  a98  a99
+        0   0   0   0   0   0   0   0   0  ...    0    0    0    0    0    0    0    0
+        1   1   1   1   1   1   1   1   1  ...    1    1    1    1    1    1    1    1
+        2   2   2   2   2   2   2   2   2  ...    2    2    2    2    2    2    2    2
+
+        [3 rows x 100 columns]"""
+    )
+    assert got == expect
+
+
+def test_dataframe_empty_to_string():
+    # Test for printing empty dataframe
+    df = cudf.DataFrame()
+    got = df.to_string()
+
+    expect = "Empty DataFrame\nColumns: []\nIndex: []"
+    assert got == expect
+
+
+def test_dataframe_emptycolumns_to_string():
+    # Test for printing dataframe having empty columns
+    df = cudf.DataFrame()
+    df["a"] = []
+    df["b"] = []
+    got = df.to_string()
+
+    expect = "Empty DataFrame\nColumns: [a, b]\nIndex: []"
+    assert got == expect
+
+
+def test_dataframe_copy():
+    # Test for copying the dataframe using python copy pkg
+    df = cudf.DataFrame()
+    df["a"] = [1, 2, 3]
+    df2 = copy.copy(df)
+    df2["b"] = [4, 5, 6]
+    got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+           a
+        0  1
+        1  2
+        2  3"""
+    )
+    assert got == expect
+
+
+def test_dataframe_copy_shallow():
+    # Test for copy dataframe using class method
+    df = cudf.DataFrame()
+    df["a"] = [1, 2, 3]
+    df2 = df.copy()
+    df2["b"] = [4, 2, 3]
+    got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+           a
+        0  1
+        1  2
+        2  3"""
+    )
+    assert got == expect
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
index 255b7756bd1..823ce4282ab 100644
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -401,3 +401,157 @@ def test_multiindex_index_and_columns():
     pdf.index = mi.to_pandas()
     pdf.columns = mc.to_pandas()
     assert_eq(pdf, gdf)
+
+
+def test_dataframe_column_drop_via_attr():
+    df = cudf.DataFrame({"a": []})
+
+    with pytest.raises(AttributeError):
+        del df.a
+
+    assert tuple(df.columns) == tuple("a")
+
+
+@pytest.mark.parametrize("nelem", [0, 100])
+def test_index_astype(nelem):
+    df = cudf.DataFrame()
+    data = np.asarray(range(nelem), dtype=np.int32)
+    df["a"] = data
+    assert df.index.dtype is np.dtype(np.int64)
+    df.index = df.index.astype(np.float32)
+    assert df.index.dtype is np.dtype(np.float32)
+    df["a"] = df["a"].astype(np.float32)
+    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
+    df["b"] = df["a"]
+    df = df.set_index("b")
+    df["a"] = df["a"].astype(np.int16)
+    df.index = df.index.astype(np.int16)
+    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
+
+
+def test_dataframe_dtypes():
+    dtypes = pd.Series(
+        [np.int32, np.float32, np.float64], index=["c", "a", "b"]
+    )
+    df = cudf.DataFrame({k: np.ones(10, dtype=v) for k, v in dtypes.items()})
+    assert df.dtypes.equals(dtypes)
+
+
+def test_dataframe_dir_and_getattr():
+    df = cudf.DataFrame(
+        {
+            "a": np.ones(10),
+            "b": np.ones(10),
+            "not an id": np.ones(10),
+            "oop$": np.ones(10),
+        }
+    )
+    o = dir(df)
+    assert {"a", "b"}.issubset(o)
+    assert "not an id" not in o
+    assert "oop$" not in o
+
+    # Getattr works
+    assert df.a.equals(df["a"])
+    assert df.b.equals(df["b"])
+    with pytest.raises(AttributeError):
+        df.not_a_column
+
+
+def test_dataframe_shape():
+    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    assert pdf.shape == gdf.shape
+
+
+def test_dataframe_shape_empty():
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+
+    assert pdf.shape == gdf.shape
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.empty,
+        lambda df: df.x.empty,
+        lambda df: df.x.fillna(123, limit=None, method=None, axis=None),
+        lambda df: df.drop("x", axis=1, errors="raise"),
+    ],
+)
+def test_unary_operators(func):
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    p = func(pdf)
+    g = func(gdf)
+    assert_eq(p, g)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"A": [1, 2, 3], "B": [4, 5, 6]},
+        {"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]},
+        {"A": [1, 2, 3], "B": [1.0, 2.0, 3.0]},
+        {"A": np.float32(np.arange(3)), "B": np.float64(np.arange(3))},
+        pytest.param(
+            {"A": [1, None, 3], "B": [1, 2, None]},
+            marks=pytest.mark.xfail(
+                reason="Nulls not supported by values accessor"
+            ),
+        ),
+        pytest.param(
+            {"A": [None, None, None], "B": [None, None, None]},
+            marks=pytest.mark.xfail(
+                reason="Nulls not supported by values accessor"
+            ),
+        ),
+        {"A": [], "B": []},
+        pytest.param(
+            {"A": [1, 2, 3], "B": ["a", "b", "c"]},
+            marks=pytest.mark.xfail(
+                reason="str or categorical not supported by values accessor"
+            ),
+        ),
+        pytest.param(
+            {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]},
+            marks=pytest.mark.xfail(
+                reason="str or categorical not supported by values accessor"
+            ),
+        ),
+    ],
+)
+def test_df_values_property(data):
+    pdf = pd.DataFrame.from_dict(data)
+    gdf = cudf.DataFrame.from_pandas(pdf)
+
+    pmtr = pdf.values
+    gmtr = gdf.values.get()
+
+    np.testing.assert_array_equal(pmtr, gmtr)
+
+
+def test_constructor_properties():
+    df = cudf.DataFrame()
+    key1 = "a"
+    key2 = "b"
+    val1 = np.array([123], dtype=np.float64)
+    val2 = np.array([321], dtype=np.float64)
+    df[key1] = val1
+    df[key2] = val2
+
+    # Correct use of _constructor_sliced (for DataFrame)
+    assert_eq(df[key1], df._constructor_sliced(val1, name=key1))
+
+    # Correct use of _constructor_expanddim (for cudf.Series)
+    assert_eq(df, df[key2]._constructor_expanddim({key1: val1, key2: val2}))
+
+    # Incorrect use of _constructor_sliced (Raises for cudf.Series)
+    with pytest.raises(NotImplementedError):
+        df[key1]._constructor_sliced
+
+    # Incorrect use of _constructor_expanddim (Raises for DataFrame)
+    with pytest.raises(NotImplementedError):
+        df._constructor_expanddim
diff --git a/python/cudf/cudf/tests/dataframe/test_binops.py b/python/cudf/cudf/tests/dataframe/test_binops.py
index a8540b4e711..ff5c28bf8d2 100644
--- a/python/cudf/cudf/tests/dataframe/test_binops.py
+++ b/python/cudf/cudf/tests/dataframe/test_binops.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
+import operator
 
 import numpy as np
 import pandas as pd
@@ -6,7 +7,7 @@
 
 import cudf
 from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @pytest.mark.parametrize(
@@ -168,3 +169,287 @@ def test_dataframe_error_equality(df1, df2, comparison_op):
     assert_exceptions_equal(
         comparison_op, comparison_op, ([df1, df2],), ([gdf1, gdf2],)
     )
+
+
+@pytest.mark.parametrize(
+    "other",
+    [
+        1.0,
+        pd.Series([1.0]),
+        pd.Series([1.0, 2.0]),
+        pd.Series([1.0, 2.0, 3.0]),
+        pd.Series([1.0], index=["x"]),
+        pd.Series([1.0, 2.0], index=["x", "y"]),
+        pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
+        pd.DataFrame({"x": [1.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),
+    ],
+)
+def test_arithmetic_binops_df(arithmetic_op, other):
+    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    pdf[pdf == 1.0] = 2
+    gdf[gdf == 1.0] = 2
+    try:
+        d = arithmetic_op(pdf, other)
+    except Exception:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            cudf_other = cudf.from_pandas(other)
+
+        # that returns before we enter this try-except.
+        assert_exceptions_equal(
+            lfunc=arithmetic_op,
+            rfunc=arithmetic_op,
+            lfunc_args_and_kwargs=([pdf, other], {}),
+            rfunc_args_and_kwargs=([gdf, cudf_other], {}),
+        )
+    else:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+        g = arithmetic_op(gdf, other)
+        assert_eq(d, g)
+
+
+@pytest.mark.parametrize(
+    "other",
+    [
+        1.0,
+        pd.Series([1.0, 2.0], index=["x", "y"]),
+        pd.DataFrame({"x": [1.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),
+    ],
+)
+def test_comparison_binops_df(comparison_op, other):
+    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    pdf[pdf == 1.0] = 2
+    gdf[gdf == 1.0] = 2
+    try:
+        d = comparison_op(pdf, other)
+    except Exception:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            cudf_other = cudf.from_pandas(other)
+
+        # that returns before we enter this try-except.
+        assert_exceptions_equal(
+            lfunc=comparison_op,
+            rfunc=comparison_op,
+            lfunc_args_and_kwargs=([pdf, other], {}),
+            rfunc_args_and_kwargs=([gdf, cudf_other], {}),
+        )
+    else:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+        g = comparison_op(gdf, other)
+        assert_eq(d, g)
+
+
+@pytest.mark.parametrize(
+    "other",
+    [
+        pd.Series([1.0]),
+        pd.Series([1.0, 2.0]),
+        pd.Series([1.0, 2.0, 3.0]),
+        pd.Series([1.0], index=["x"]),
+        pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
+    ],
+)
+def test_comparison_binops_df_reindexing(request, comparison_op, other):
+    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    pdf[pdf == 1.0] = 2
+    gdf[gdf == 1.0] = 2
+    try:
+        d = comparison_op(pdf, other)
+    except Exception:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            cudf_other = cudf.from_pandas(other)
+
+        # that returns before we enter this try-except.
+        assert_exceptions_equal(
+            lfunc=comparison_op,
+            rfunc=comparison_op,
+            lfunc_args_and_kwargs=([pdf, other], {}),
+            rfunc_args_and_kwargs=([gdf, cudf_other], {}),
+        )
+    else:
+        request.applymarker(
+            pytest.mark.xfail(
+                condition=pdf.columns.difference(other.index).size > 0,
+                reason="""
+                Currently we will not match pandas for equality/inequality
+                operators when there are columns that exist in a Series but not
+                the DataFrame because pandas returns True/False values whereas
+                we return NA. However, this reindexing is deprecated in pandas
+                so we opt not to add support. This test should start passing
+                once pandas removes the deprecated behavior in 2.0.  When that
+                happens, this test can be merged with the two tests above into
+                a single test with common parameters.
+                """,
+            )
+        )
+
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+        g = comparison_op(gdf, other)
+        assert_eq(d, g)
+
+
+def test_binops_df_invalid():
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    with pytest.raises(TypeError):
+        gdf + np.array([1, 2])
+
+
+def test_bitwise_binops_df(bitwise_op):
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    d = bitwise_op(pdf, pdf + 1)
+    g = bitwise_op(gdf, gdf + 1)
+    assert_eq(d, g)
+
+
+def test_binops_series(binary_op):
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    pdf = pdf + 1.0
+    gdf = gdf + 1.0
+    d = binary_op(pdf.x, pdf.y)
+    g = binary_op(gdf.x, gdf.y)
+    assert_eq(d, g)
+
+
+def test_bitwise_binops_series(bitwise_op):
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    d = bitwise_op(pdf.x, pdf.y + 1)
+    g = bitwise_op(gdf.x, gdf.y + 1)
+    assert_eq(d, g)
+
+
+@pytest.mark.parametrize("unaryop", [operator.neg, operator.inv, operator.abs])
+@pytest.mark.parametrize(
+    "col_name,assign_col_name", [(None, False), (None, True), ("abc", True)]
+)
+def test_unaryops_df(unaryop, col_name, assign_col_name):
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    if assign_col_name:
+        pdf.columns.name = col_name
+    gdf = cudf.from_pandas(pdf)
+    d = unaryop(pdf - 5)
+    g = unaryop(gdf - 5)
+    assert_eq(d, g)
+
+
+def test_df_abs():
+    rng = np.random.default_rng(seed=0)
+    disturbance = pd.Series(rng.random(10))
+    pdf = pd.DataFrame({"x": range(10), "y": range(10)})
+    pdf = pdf - 5 + disturbance
+    d = np.abs(pdf)
+    g = cudf.from_pandas(pdf).abs()
+    assert_eq(d, g)
+
+
+def test_scale_df():
+    gdf = cudf.DataFrame({"x": range(10), "y": range(10)})
+    got = (gdf - 5).scale()
+    expect = cudf.DataFrame(
+        {"x": np.linspace(0.0, 1.0, 10), "y": np.linspace(0.0, 1.0, 10)}
+    )
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "psr",
+    [
+        pd.Series([4, 2, 3]),
+        pd.Series([4, 2, 3], index=["a", "b", "c"]),
+        pd.Series([4, 2, 3], index=["a", "b", "d"]),
+        pd.Series([4, 2], index=["a", "b"]),
+        pd.Series([4, 2, 3]),
+        pd.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
+    ],
+)
+@pytest.mark.parametrize("colnames", [["a", "b", "c"], [0, 1, 2]])
+def test_df_sr_binop(psr, colnames, binary_op):
+    data = [[3.0, 2.0, 5.0], [3.0, None, 5.0], [6.0, 7.0, np.nan]]
+    data = dict(zip(colnames, data, strict=True))
+
+    gsr = cudf.Series.from_pandas(psr).astype("float64")
+
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas(nullable=True)
+
+    psr = gsr.to_pandas(nullable=True)
+
+    try:
+        expect = binary_op(pdf, psr)
+    except ValueError:
+        with pytest.raises(ValueError):
+            binary_op(gdf, gsr)
+        with pytest.raises(ValueError):
+            binary_op(psr, pdf)
+        with pytest.raises(ValueError):
+            binary_op(gsr, gdf)
+    else:
+        got = binary_op(gdf, gsr).to_pandas(nullable=True)
+        assert_eq(expect, got, check_dtype=False, check_like=True)
+
+        expect = binary_op(psr, pdf)
+        got = binary_op(gsr, gdf).to_pandas(nullable=True)
+        assert_eq(expect, got, check_dtype=False, check_like=True)
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        operator.add,
+        operator.mul,
+        operator.floordiv,
+        operator.truediv,
+        operator.mod,
+        operator.pow,
+        # comparison ops will temporarily XFAIL
+        # see PR  https://github.com/rapidsai/cudf/pull/7491
+        pytest.param(operator.eq, marks=pytest.mark.xfail),
+        pytest.param(operator.lt, marks=pytest.mark.xfail),
+        pytest.param(operator.le, marks=pytest.mark.xfail),
+        pytest.param(operator.gt, marks=pytest.mark.xfail),
+        pytest.param(operator.ge, marks=pytest.mark.xfail),
+        pytest.param(operator.ne, marks=pytest.mark.xfail),
+    ],
+)
+def test_df_sr_binop_col_order(op):
+    colnames = [0, 1, 2]
+    data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
+    data = dict(zip(colnames, data, strict=True))
+
+    gdf = cudf.DataFrame(data)
+    pdf = pd.DataFrame.from_dict(data)
+
+    gsr = cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"])
+    psr = gsr.to_pandas()
+
+    with expect_warning_if(
+        op
+        in {
+            operator.eq,
+            operator.lt,
+            operator.le,
+            operator.gt,
+            operator.ge,
+            operator.ne,
+        },
+        FutureWarning,
+    ):
+        expect = op(pdf, psr).astype("float")
+    out = op(gdf, gsr).astype("float")
+    got = out[expect.columns]
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/dataframe/test_constructors.py b/python/cudf/cudf/tests/dataframe/test_constructors.py
index e92a9d4f583..aa1cbc5632c 100644
--- a/python/cudf/cudf/tests/dataframe/test_constructors.py
+++ b/python/cudf/cudf/tests/dataframe/test_constructors.py
@@ -1925,3 +1925,41 @@ def test_from_pandas_with_multiindex():
     pdf.index = pd.MultiIndex.from_arrays([range(7)])
     gdf = cudf.from_pandas(pdf)
     assert_eq(pdf, gdf)
+
+
+@pytest.mark.parametrize("dtype", ["int", "int64[pyarrow]"])
+def test_from_pandas_with_nullable_pandas_type(dtype):
+    df = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0], dtype=dtype)
+    df.columns.name = "custom_column_name"
+    gdf = cudf.DataFrame.from_pandas(df)
+    assert isinstance(gdf, cudf.DataFrame)
+
+    assert_eq(df, gdf, check_dtype="pyarrow" not in dtype)
+
+    s = df.x
+    gs = cudf.Series.from_pandas(s)
+    assert isinstance(gs, cudf.Series)
+
+    assert_eq(s, gs, check_dtype="pyarrow" not in dtype)
+
+
+def test_df_constructor_dtype(all_supported_types_as_str):
+    if "datetime" in all_supported_types_as_str:
+        data = ["1991-11-20", "2004-12-04", "2016-09-13", None]
+    elif all_supported_types_as_str == "str":
+        data = ["a", "b", "c", None]
+    elif "float" in all_supported_types_as_str:
+        data = [1.0, 0.5, -1.1, np.nan, None]
+    elif "bool" in all_supported_types_as_str:
+        data = [True, False, None]
+    else:
+        data = [1, 2, 3, None]
+
+    sr = cudf.Series(data, dtype=all_supported_types_as_str)
+
+    expect = cudf.DataFrame({"foo": sr, "bar": sr})
+    got = cudf.DataFrame(
+        {"foo": data, "bar": data}, dtype=all_supported_types_as_str
+    )
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_arrow.py b/python/cudf/cudf/tests/series/methods/test_to_arrow.py
new file mode 100644
index 00000000000..94f9e3cec9f
--- /dev/null
+++ b/python/cudf/cudf/tests/series/methods/test_to_arrow.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pandas as pd
+import pyarrow as pa
+
+import cudf
+
+
+def test_to_arrow_missing_categorical():
+    pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
+    pa_cat = pa.array(pd_cat, from_pandas=True)
+    gd_cat = cudf.Series(pa_cat)
+
+    assert isinstance(gd_cat, cudf.Series)
+    assert pa_cat.equals(gd_cat.to_arrow())
diff --git a/python/cudf/cudf/tests/series/methods/test_value_counts.py b/python/cudf/cudf/tests/series/methods/test_value_counts.py
index 68246ecfbf6..b65bddc457a 100644
--- a/python/cudf/cudf/tests/series/methods/test_value_counts.py
+++ b/python/cudf/cudf/tests/series/methods/test_value_counts.py
@@ -144,3 +144,30 @@ def test_series_categorical_missing_value_count():
     actual = gs.value_counts()
 
     assert_eq(expected, actual, check_dtype=False)
+
+
+def test_numeric_alpha_value_counts():
+    pdf = pd.DataFrame(
+        {
+            "numeric": [1, 2, 3, 4, 5, 6, 1, 2, 4] * 10,
+            "alpha": ["u", "h", "d", "a", "m", "u", "h", "d", "a"] * 10,
+        }
+    )
+
+    gdf = cudf.DataFrame(
+        {
+            "numeric": [1, 2, 3, 4, 5, 6, 1, 2, 4] * 10,
+            "alpha": ["u", "h", "d", "a", "m", "u", "h", "d", "a"] * 10,
+        }
+    )
+
+    assert_eq(
+        pdf.numeric.value_counts().sort_index(),
+        gdf.numeric.value_counts().sort_index(),
+        check_dtype=False,
+    )
+    assert_eq(
+        pdf.alpha.value_counts().sort_index(),
+        gdf.alpha.value_counts().sort_index(),
+        check_dtype=False,
+    )
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 715797d22aa..6067f5c1b2a 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 import re
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -372,3 +373,58 @@ def test_series_multiindex():
     ps.index = pdfIndex
     gs.index = cudf.from_pandas(pdfIndex)
     assert_eq(ps, gs)
+
+
+def test_series_shape():
+    ps = pd.Series([1, 2, 3, 4])
+    cs = cudf.Series([1, 2, 3, 4])
+
+    assert ps.shape == cs.shape
+
+
+def test_series_shape_empty():
+    ps = pd.Series([], dtype="float64")
+    cs = cudf.Series([], dtype="float64")
+
+    assert ps.shape == cs.shape
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 4],
+        [],
+        [5.0, 7.0, 8.0],
+        pd.Categorical(["a", "b", "c"]),
+        ["m", "a", "d", "v"],
+    ],
+)
+def test_series_values_host_property(data):
+    pds = pd.Series(data=data, dtype=None if data else float)
+    gds = cudf.Series(data=data, dtype=None if data else float)
+
+    np.testing.assert_array_equal(pds.values, gds.values_host)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 4],
+        [],
+        [5.0, 7.0, 8.0],
+        pytest.param(
+            pd.Categorical(["a", "b", "c"]),
+            marks=pytest.mark.xfail(raises=NotImplementedError),
+        ),
+        pytest.param(
+            ["m", "a", "d", "v"],
+            marks=pytest.mark.xfail(raises=TypeError),
+        ),
+    ],
+)
+def test_series_values_property(data):
+    pds = pd.Series(data=data, dtype=None if data else float)
+    gds = cudf.from_pandas(pds)
+    gds_vals = gds.values
+    assert isinstance(gds_vals, cp.ndarray)
+    np.testing.assert_array_equal(gds_vals.get(), pds.values)
diff --git a/python/cudf/cudf/tests/series/test_constructors.py b/python/cudf/cudf/tests/series/test_constructors.py
index df1c007ada5..0750e5a7eed 100644
--- a/python/cudf/cudf/tests/series/test_constructors.py
+++ b/python/cudf/cudf/tests/series/test_constructors.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
+import array
 import datetime
 import decimal
 import types
@@ -1505,3 +1506,100 @@ def test_categorical_interval_pandas_roundtrip():
     expected = pd.Series(pd.interval_range(0, 5)).astype("category")
     result = cudf.Series.from_pandas(expected).to_pandas()
     assert_eq(result, expected)
+
+
+def test_from_arrow_missing_categorical():
+    pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
+    pa_cat = pa.array(pd_cat, from_pandas=True)
+    gd_cat = cudf.Series(pa_cat)
+
+    assert isinstance(gd_cat, cudf.Series)
+    assert_eq(
+        pd.Series(pa_cat.to_pandas()),  # PyArrow returns a pd.Categorical
+        gd_cat.to_pandas(),
+    )
+
+
+def test_from_python_array(numeric_types_as_str):
+    rng = np.random.default_rng(seed=0)
+    np_arr = rng.integers(0, 100, 10).astype(numeric_types_as_str)
+    data = memoryview(np_arr)
+    data = array.array(data.format, data)
+
+    gs = cudf.Series(data)
+
+    np.testing.assert_equal(gs.to_numpy(), np_arr)
+
+
+def test_as_column_types():
+    col = as_column(cudf.Series([], dtype="float64"))
+    assert_eq(col.dtype, np.dtype("float64"))
+    gds = cudf.Series._from_column(col)
+    pds = pd.Series(pd.Series([], dtype="float64"))
+
+    assert_eq(pds, gds)
+
+    col = as_column(
+        cudf.Series([], dtype="float64"), dtype=np.dtype(np.float32)
+    )
+    assert_eq(col.dtype, np.dtype("float32"))
+    gds = cudf.Series._from_column(col)
+    pds = pd.Series(pd.Series([], dtype="float32"))
+
+    assert_eq(pds, gds)
+
+    col = as_column(cudf.Series([], dtype="float64"), dtype=cudf.dtype("str"))
+    assert_eq(col.dtype, np.dtype("object"))
+    gds = cudf.Series._from_column(col)
+    pds = pd.Series(pd.Series([], dtype="str"))
+
+    assert_eq(pds, gds)
+
+    col = as_column(cudf.Series([], dtype="float64"), dtype=cudf.dtype("str"))
+    assert_eq(col.dtype, np.dtype("object"))
+    gds = cudf.Series._from_column(col)
+    pds = pd.Series(pd.Series([], dtype="object"))
+
+    assert_eq(pds, gds)
+
+    pds = pd.Series(np.array([1, 2, 3]), dtype="float32")
+    gds = cudf.Series._from_column(
+        as_column(np.array([1, 2, 3]), dtype=np.dtype(np.float32))
+    )
+
+    assert_eq(pds, gds)
+
+    pds = pd.Series([1, 2, 3], dtype="float32")
+    gds = cudf.Series([1, 2, 3], dtype="float32")
+
+    assert_eq(pds, gds)
+
+    pds = pd.Series([], dtype="float64")
+    gds = cudf.Series._from_column(as_column(pds))
+    assert_eq(pds, gds)
+
+    pds = pd.Series([1, 2, 4], dtype="int64")
+    gds = cudf.Series._from_column(
+        as_column(cudf.Series([1, 2, 4]), dtype="int64")
+    )
+
+    assert_eq(pds, gds)
+
+    pds = pd.Series([1.2, 18.0, 9.0], dtype="float32")
+    gds = cudf.Series._from_column(
+        as_column(cudf.Series([1.2, 18.0, 9.0]), dtype=np.dtype(np.float32))
+    )
+
+    assert_eq(pds, gds)
+
+    pds = pd.Series([1.2, 18.0, 9.0], dtype="str")
+    gds = cudf.Series._from_column(
+        as_column(cudf.Series([1.2, 18.0, 9.0]), dtype=cudf.dtype("str"))
+    )
+
+    assert_eq(pds, gds)
+
+    pds = pd.Series(pd.Index(["1", "18", "9"]), dtype="int")
+    gds = cudf.Series(cudf.Index(["1", "18", "9"]), dtype="int")
+
+    assert_eq(pds, gds)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
deleted file mode 100644
index d0ad73de09c..00000000000
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ /dev/null
@@ -1,2545 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import array as arr
-import operator
-import textwrap
-from copy import copy
-
-import cupy
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pytest
-from packaging import version
-
-import cudf
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_VERSION,
-)
-from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.core.column.column import as_column
-from cudf.testing import assert_eq
-from cudf.testing._utils import (
-    ALL_TYPES,
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
-from cudf.utils.dtypes import SIZE_TYPE_DTYPE
-
-pytest_xfail = pytest.mark.xfail
-pytestmark = pytest.mark.spilling
-
-# Use this to "unmark" the module level spilling mark
-pytest_unmark_spilling = pytest.mark.skipif(
-    get_global_manager() is not None, reason="unmarked spilling"
-)
-
-# If spilling is enabled globally, we skip many test permutations
-# to reduce running time.
-if get_global_manager() is not None:
-    ALL_TYPES = ["float32"]
-    DATETIME_TYPES = ["datetime64[ms]"]
-    NUMERIC_TYPES = ["float32"]
-    # To save time, we skip tests marked "xfail"
-    pytest_xfail = pytest.mark.skipif
-
-
-def test_dataframe_basic():
-    rng = np.random.default_rng(seed=0)
-    df = cudf.DataFrame()
-
-    # Populate with cuda memory
-    df["keys"] = np.arange(10, dtype=np.float64)
-    np.testing.assert_equal(df["keys"].to_numpy(), np.arange(10))
-    assert len(df) == 10
-
-    # Populate with numpy array
-    rnd_vals = rng.random(10)
-    df["vals"] = rnd_vals
-    np.testing.assert_equal(df["vals"].to_numpy(), rnd_vals)
-    assert len(df) == 10
-    assert tuple(df.columns) == ("keys", "vals")
-
-    # Make another dataframe
-    df2 = cudf.DataFrame()
-    df2["keys"] = np.array([123], dtype=np.float64)
-    df2["vals"] = np.array([321], dtype=np.float64)
-
-    # Concat
-    df = cudf.concat([df, df2])
-    assert len(df) == 11
-
-    hkeys = np.asarray([*np.arange(10, dtype=np.float64).tolist(), 123])
-    hvals = np.asarray([*rnd_vals.tolist(), 321])
-
-    np.testing.assert_equal(df["keys"].to_numpy(), hkeys)
-    np.testing.assert_equal(df["vals"].to_numpy(), hvals)
-
-    # As matrix
-    mat = df.values_host
-
-    expect = np.vstack([hkeys, hvals]).T
-
-    np.testing.assert_equal(mat, expect)
-
-    # test dataframe with tuple name
-    df_tup = cudf.DataFrame()
-    data = np.arange(10)
-    df_tup[(1, "foobar")] = data
-    np.testing.assert_equal(data, df_tup[(1, "foobar")].to_numpy())
-
-    df = cudf.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
-    pdf = pd.DataFrame(pd.DataFrame({"a": [1, 2, 3], "c": ["a", "b", "c"]}))
-    assert_eq(df, pdf)
-
-    gdf = cudf.DataFrame({"id": [0, 1], "val": [None, None]})
-    gdf["val"] = gdf["val"].astype("int")
-
-    assert gdf["val"].isnull().all()
-
-
-def test_dataframe_column_add_drop_via_setitem():
-    df = cudf.DataFrame()
-    data = np.asarray(range(10))
-    df["a"] = data
-    df["b"] = data
-    assert tuple(df.columns) == ("a", "b")
-    del df["a"]
-    assert tuple(df.columns) == ("b",)
-    df["c"] = data
-    assert tuple(df.columns) == ("b", "c")
-    df["a"] = data
-    assert tuple(df.columns) == ("b", "c", "a")
-
-
-def test_dataframe_column_set_via_attr():
-    data_0 = np.asarray([0, 2, 4, 5])
-    data_1 = np.asarray([1, 4, 2, 3])
-    data_2 = np.asarray([2, 0, 3, 0])
-    df = cudf.DataFrame({"a": data_0, "b": data_1, "c": data_2})
-
-    for i in range(10):
-        df.c = df.a
-        assert assert_eq(df.c, df.a, check_names=False)
-        assert tuple(df.columns) == ("a", "b", "c")
-
-        df.c = df.b
-        assert assert_eq(df.c, df.b, check_names=False)
-        assert tuple(df.columns) == ("a", "b", "c")
-
-
-def test_dataframe_column_drop_via_attr():
-    df = cudf.DataFrame({"a": []})
-
-    with pytest.raises(AttributeError):
-        del df.a
-
-    assert tuple(df.columns) == tuple("a")
-
-
-@pytest.mark.parametrize("nelem", [0, 10])
-def test_dataframe_astype(nelem):
-    df = cudf.DataFrame()
-    data = np.asarray(range(nelem), dtype=np.int32)
-    df["a"] = data
-    assert df["a"].dtype is np.dtype(np.int32)
-    df["b"] = df["a"].astype(np.float32)
-    assert df["b"].dtype is np.dtype(np.float32)
-    np.testing.assert_equal(df["a"].to_numpy(), df["b"].to_numpy())
-
-
-def test_astype_dict():
-    gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["1", "2", "3"]})
-    pdf = gdf.to_pandas()
-
-    assert_eq(pdf.astype({"a": "str"}), gdf.astype({"a": "str"}))
-    assert_eq(
-        pdf.astype({"a": "str", "b": np.int64}),
-        gdf.astype({"a": "str", "b": np.int64}),
-    )
-
-
-@pytest.mark.parametrize("nelem", [0, 100])
-def test_index_astype(nelem):
-    df = cudf.DataFrame()
-    data = np.asarray(range(nelem), dtype=np.int32)
-    df["a"] = data
-    assert df.index.dtype is np.dtype(np.int64)
-    df.index = df.index.astype(np.float32)
-    assert df.index.dtype is np.dtype(np.float32)
-    df["a"] = df["a"].astype(np.float32)
-    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
-    df["b"] = df["a"]
-    df = df.set_index("b")
-    df["a"] = df["a"].astype(np.int16)
-    df.index = df.index.astype(np.int16)
-    np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
-
-
-def test_dataframe_to_string_with_skipped_rows():
-    # Test skipped rows
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
-    )
-
-    with pd.option_context("display.max_rows", 5):
-        got = df.to_string()
-
-    expect = textwrap.dedent(
-        """\
-            a   b
-        0   1  11
-        1   2  12
-        .. ..  ..
-        4   5  15
-        5   6  16
-
-        [6 rows x 2 columns]"""
-    )
-    assert got == expect
-
-
-def test_dataframe_to_string_with_skipped_rows_and_columns():
-    # Test skipped rows and skipped columns
-    df = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5, 6],
-            "b": [11, 12, 13, 14, 15, 16],
-            "c": [11, 12, 13, 14, 15, 16],
-            "d": [11, 12, 13, 14, 15, 16],
-        }
-    )
-
-    with pd.option_context("display.max_rows", 5, "display.max_columns", 3):
-        got = df.to_string()
-
-    expect = textwrap.dedent(
-        """\
-            a  ...   d
-        0   1  ...  11
-        1   2  ...  12
-        .. ..  ...  ..
-        4   5  ...  15
-        5   6  ...  16
-
-        [6 rows x 4 columns]"""
-    )
-    assert got == expect
-
-
-def test_dataframe_to_string_with_masked_data():
-    # Test masked data
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
-    )
-
-    data = np.arange(6)
-    mask = np.zeros(1, dtype=SIZE_TYPE_DTYPE)
-    mask[0] = 0b00101101
-
-    masked = cudf.Series._from_column(as_column(data).set_mask(mask))
-    assert masked.null_count == 2
-    df["c"] = masked
-
-    # Check data
-    values = masked.copy()
-    validids = [0, 2, 3, 5]
-    densearray = masked.dropna().to_numpy()
-    np.testing.assert_equal(data[validids], densearray)
-    # Valid position is correct
-    for i in validids:
-        assert data[i] == values[i]
-    # Null position is correct
-    for i in range(len(values)):
-        if i not in validids:
-            assert values[i] is cudf.NA
-
-    with pd.option_context("display.max_rows", 10):
-        got = df.to_string()
-
-    expect = textwrap.dedent(
-        """\
-           a   b     c
-        0  1  11     0
-        1  2  12  <NA>
-        2  3  13     2
-        3  4  14     3
-        4  5  15  <NA>
-        5  6  16     5"""
-    )
-    assert got == expect
-
-
-def test_dataframe_to_string_wide():
-    # Test basic
-    df = cudf.DataFrame({f"a{i}": [0, 1, 2] for i in range(100)})
-    with pd.option_context("display.max_columns", 16):
-        got = df.to_string()
-
-    expect = textwrap.dedent(
-        """\
-           a0  a1  a2  a3  a4  a5  a6  a7  ...  a92  a93  a94  a95  a96  a97  a98  a99
-        0   0   0   0   0   0   0   0   0  ...    0    0    0    0    0    0    0    0
-        1   1   1   1   1   1   1   1   1  ...    1    1    1    1    1    1    1    1
-        2   2   2   2   2   2   2   2   2  ...    2    2    2    2    2    2    2    2
-
-        [3 rows x 100 columns]"""
-    )
-    assert got == expect
-
-
-def test_dataframe_empty_to_string():
-    # Test for printing empty dataframe
-    df = cudf.DataFrame()
-    got = df.to_string()
-
-    expect = "Empty DataFrame\nColumns: []\nIndex: []"
-    assert got == expect
-
-
-def test_dataframe_emptycolumns_to_string():
-    # Test for printing dataframe having empty columns
-    df = cudf.DataFrame()
-    df["a"] = []
-    df["b"] = []
-    got = df.to_string()
-
-    expect = "Empty DataFrame\nColumns: [a, b]\nIndex: []"
-    assert got == expect
-
-
-def test_dataframe_copy():
-    # Test for copying the dataframe using python copy pkg
-    df = cudf.DataFrame()
-    df["a"] = [1, 2, 3]
-    df2 = copy(df)
-    df2["b"] = [4, 5, 6]
-    got = df.to_string()
-
-    expect = textwrap.dedent(
-        """\
-           a
-        0  1
-        1  2
-        2  3"""
-    )
-    assert got == expect
-
-
-def test_dataframe_copy_shallow():
-    # Test for copy dataframe using class method
-    df = cudf.DataFrame()
-    df["a"] = [1, 2, 3]
-    df2 = df.copy()
-    df2["b"] = [4, 2, 3]
-    got = df.to_string()
-
-    expect = textwrap.dedent(
-        """\
-           a
-        0  1
-        1  2
-        2  3"""
-    )
-    assert got == expect
-
-
-def test_dataframe_dtypes():
-    dtypes = pd.Series(
-        [np.int32, np.float32, np.float64], index=["c", "a", "b"]
-    )
-    df = cudf.DataFrame({k: np.ones(10, dtype=v) for k, v in dtypes.items()})
-    assert df.dtypes.equals(dtypes)
-
-
-def test_dataframe_add_col_to_object_dataframe():
-    # Test for adding column to an empty object dataframe
-    cols = ["a", "b", "c"]
-    df = pd.DataFrame(columns=cols, dtype="str")
-
-    data = {k: ["a"] for k in cols}
-
-    gdf = cudf.DataFrame(data)
-    gdf = gdf[:0]
-
-    assert gdf.dtypes.equals(df.dtypes)
-    gdf["a"] = [1]
-    df["a"] = [10]
-    assert gdf.dtypes.equals(df.dtypes)
-    gdf["b"] = [1.0]
-    df["b"] = [10.0]
-    assert gdf.dtypes.equals(df.dtypes)
-
-
-def test_dataframe_dir_and_getattr():
-    df = cudf.DataFrame(
-        {
-            "a": np.ones(10),
-            "b": np.ones(10),
-            "not an id": np.ones(10),
-            "oop$": np.ones(10),
-        }
-    )
-    o = dir(df)
-    assert {"a", "b"}.issubset(o)
-    assert "not an id" not in o
-    assert "oop$" not in o
-
-    # Getattr works
-    assert df.a.equals(df["a"])
-    assert df.b.equals(df["b"])
-    with pytest.raises(AttributeError):
-        df.not_a_column
-
-
-def test_dataframe_append_empty():
-    pdf = pd.DataFrame(
-        {
-            "key": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
-            "value": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        }
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    gdf["newcol"] = 100
-    pdf["newcol"] = 100
-
-    assert len(gdf["newcol"]) == len(pdf)
-    assert len(pdf["newcol"]) == len(pdf)
-    assert_eq(gdf, pdf)
-
-
-def test_dataframe_setitem_from_masked_object():
-    rng = np.random.default_rng(seed=0)
-    ary = rng.standard_normal(100)
-    mask = np.zeros(100, dtype=bool)
-    mask[:20] = True
-    rng.shuffle(mask)
-    ary[mask] = np.nan
-
-    test1_null = cudf.Series(ary, nan_as_null=True)
-    assert test1_null.null_count == 20
-    test1_nan = cudf.Series(ary, nan_as_null=False)
-    assert test1_nan.null_count == 0
-
-    test2_null = cudf.DataFrame.from_pandas(
-        pd.DataFrame({"a": ary}), nan_as_null=True
-    )
-    assert test2_null["a"].null_count == 20
-    test2_nan = cudf.DataFrame.from_pandas(
-        pd.DataFrame({"a": ary}), nan_as_null=False
-    )
-    assert test2_nan["a"].null_count == 0
-
-    gpu_ary = cupy.asarray(ary)
-    test3_null = cudf.Series(gpu_ary, nan_as_null=True)
-    assert test3_null.null_count == 20
-    test3_nan = cudf.Series(gpu_ary, nan_as_null=False)
-    assert test3_nan.null_count == 0
-
-    test4 = cudf.DataFrame()
-    lst = [1, 2, None, 4, 5, 6, None, 8, 9]
-    test4["lst"] = lst
-    assert test4["lst"].null_count == 2
-
-
-def test_dataframe_append_to_empty():
-    pdf = pd.DataFrame()
-    pdf["a"] = []
-    pdf["a"] = pdf["a"].astype("str")
-    pdf["b"] = [1, 2, 3]
-
-    gdf = cudf.DataFrame()
-    gdf["a"] = []
-    gdf["b"] = [1, 2, 3]
-
-    assert_eq(gdf, pdf)
-
-
-def test_dataframe_setitem_index_len1():
-    gdf = cudf.DataFrame()
-    gdf["a"] = [1]
-    gdf["b"] = gdf.index._column
-
-    np.testing.assert_equal(gdf.b.to_numpy(), [0])
-
-
-def test_empty_dataframe_setitem_df():
-    gdf1 = cudf.DataFrame()
-    gdf2 = cudf.DataFrame({"a": [1, 2, 3, 4, 5]})
-    gdf1["a"] = gdf2["a"]
-    assert_eq(gdf1, gdf2)
-
-
-@pytest.mark.parametrize("nrows", [0, 3])
-def test_nonmatching_index_setitem(nrows):
-    rng = np.random.default_rng(seed=0)
-
-    gdf = cudf.DataFrame()
-    gdf["a"] = rng.integers(2147483647, size=nrows)
-    gdf["b"] = rng.integers(2147483647, size=nrows)
-    gdf = gdf.set_index("b")
-
-    test_values = rng.integers(2147483647, size=nrows)
-    gdf["c"] = test_values
-    assert len(test_values) == len(gdf["c"])
-    gdf_series = cudf.Series(test_values, index=gdf.index, name="c")
-    assert_eq(gdf["c"].to_pandas(), gdf_series.to_pandas())
-
-
-@pytest.mark.parametrize("dtype", ["int", "int64[pyarrow]"])
-def test_from_pandas(dtype):
-    df = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0], dtype=dtype)
-    df.columns.name = "custom_column_name"
-    gdf = cudf.DataFrame.from_pandas(df)
-    assert isinstance(gdf, cudf.DataFrame)
-
-    assert_eq(df, gdf, check_dtype="pyarrow" not in dtype)
-
-    s = df.x
-    gs = cudf.Series.from_pandas(s)
-    assert isinstance(gs, cudf.Series)
-
-    assert_eq(s, gs, check_dtype="pyarrow" not in dtype)
-
-
-def test_from_arrow_missing_categorical():
-    pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
-    pa_cat = pa.array(pd_cat, from_pandas=True)
-    gd_cat = cudf.Series(pa_cat)
-
-    assert isinstance(gd_cat, cudf.Series)
-    assert_eq(
-        pd.Series(pa_cat.to_pandas()),  # PyArrow returns a pd.Categorical
-        gd_cat.to_pandas(),
-    )
-
-
-def test_to_arrow_missing_categorical():
-    pd_cat = pd.Categorical(["a", "b", "c"], categories=["a", "b"])
-    pa_cat = pa.array(pd_cat, from_pandas=True)
-    gd_cat = cudf.Series(pa_cat)
-
-    assert isinstance(gd_cat, cudf.Series)
-    assert pa.Array.equals(pa_cat, gd_cat.to_arrow())
-
-
-@pytest.mark.parametrize("data_type", NUMERIC_TYPES)
-def test_from_python_array(data_type):
-    rng = np.random.default_rng(seed=0)
-    np_arr = rng.integers(0, 100, 10).astype(data_type)
-    data = memoryview(np_arr)
-    data = arr.array(data.format, data)
-
-    gs = cudf.Series(data)
-
-    np.testing.assert_equal(gs.to_numpy(), np_arr)
-
-
-def test_series_shape():
-    ps = pd.Series([1, 2, 3, 4])
-    cs = cudf.Series([1, 2, 3, 4])
-
-    assert ps.shape == cs.shape
-
-
-def test_series_shape_empty():
-    ps = pd.Series([], dtype="float64")
-    cs = cudf.Series([], dtype="float64")
-
-    assert ps.shape == cs.shape
-
-
-def test_dataframe_shape():
-    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    assert pdf.shape == gdf.shape
-
-
-def test_dataframe_shape_empty():
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-
-    assert pdf.shape == gdf.shape
-
-
-@pytest.fixture
-def pdf():
-    return pd.DataFrame({"x": range(10), "y": range(10)})
-
-
-@pytest.fixture
-def gdf(pdf):
-    return cudf.DataFrame.from_pandas(pdf)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "x": [np.nan, 2, 3, 4, 100, np.nan],
-            "y": [4, 5, 6, 88, 99, np.nan],
-            "z": [7, 8, 9, 66, np.nan, 77],
-        },
-        {"x": [1, 2, 3], "y": [4, 5, 6], "z": [7, 8, 9]},
-        {
-            "x": [np.nan, np.nan, np.nan],
-            "y": [np.nan, np.nan, np.nan],
-            "z": [np.nan, np.nan, np.nan],
-        },
-        pytest.param(
-            {"x": [], "y": [], "z": []},
-            marks=pytest_xfail(
-                condition=version.parse("11")
-                <= version.parse(cupy.__version__)
-                < version.parse("11.1"),
-                reason="Zero-sized array passed to cupy reduction, "
-                "https://github.com/cupy/cupy/issues/6937",
-            ),
-        ),
-        pytest.param(
-            {"x": []},
-            marks=pytest_xfail(
-                condition=version.parse("11")
-                <= version.parse(cupy.__version__)
-                < version.parse("11.1"),
-                reason="Zero-sized array passed to cupy reduction, "
-                "https://github.com/cupy/cupy/issues/6937",
-            ),
-        ),
-    ],
-)
-@pytest.mark.parametrize("axis", [0, 1])
-@pytest.mark.parametrize(
-    "func",
-    [
-        "min",
-        "max",
-        "sum",
-        "prod",
-        "product",
-        "cummin",
-        "cummax",
-        "cumsum",
-        "cumprod",
-        "mean",
-        "median",
-        "sum",
-        "std",
-        "var",
-        "kurt",
-        "skew",
-        "all",
-        "any",
-    ],
-)
-@pytest.mark.parametrize("skipna", [True, False])
-def test_dataframe_reductions(data, axis, func, skipna):
-    pdf = pd.DataFrame(data=data)
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    # Reductions can fail in numerous possible ways when attempting row-wise
-    # reductions, which are only partially supported. Catching the appropriate
-    # exception here allows us to detect API breakage in the form of changing
-    # exceptions.
-    expected_exception = None
-    if axis == 1:
-        if func in ("kurt", "skew"):
-            expected_exception = NotImplementedError
-        elif func not in cudf.core.dataframe._cupy_nan_methods_map:
-            if skipna is False:
-                expected_exception = NotImplementedError
-            elif any(col._column.nullable for name, col in gdf.items()):
-                expected_exception = ValueError
-            elif func in ("cummin", "cummax"):
-                expected_exception = AttributeError
-
-    # Test different degrees of freedom for var and std.
-    all_kwargs = [{"ddof": 1}, {"ddof": 2}] if func in ("var", "std") else [{}]
-    for kwargs in all_kwargs:
-        if expected_exception is not None:
-            with pytest.raises(expected_exception):
-                (getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),)
-        else:
-            expect = getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs)
-            with expect_warning_if(
-                skipna
-                and func in {"min", "max"}
-                and axis == 1
-                and any(gdf.T[col].isna().all() for col in gdf.T),
-                RuntimeWarning,
-            ):
-                got = getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs)
-            assert_eq(got, expect, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"x": [np.nan, 2, 3, 4, 100, np.nan], "y": [4, 5, 6, 88, 99, np.nan]},
-        {"x": [1, 2, 3], "y": [4, 5, 6]},
-        {"x": [np.nan, np.nan, np.nan], "y": [np.nan, np.nan, np.nan]},
-        {"x": [], "y": []},
-        {"x": []},
-    ],
-)
-@pytest.mark.parametrize("func", [lambda df: df.count()])
-def test_dataframe_count_reduction(data, func):
-    pdf = pd.DataFrame(data=data)
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    assert_eq(func(pdf), func(gdf))
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"x": [np.nan, 2, 3, 4, 100, np.nan], "y": [4, 5, 6, 88, 99, np.nan]},
-        {"x": [1, 2, 3], "y": [4, 5, 6]},
-        {"x": [np.nan, np.nan, np.nan], "y": [np.nan, np.nan, np.nan]},
-        {"x": pd.Series([], dtype="float"), "y": pd.Series([], dtype="float")},
-        {"x": pd.Series([], dtype="int")},
-    ],
-)
-@pytest.mark.parametrize("ops", ["sum", "product", "prod"])
-@pytest.mark.parametrize("skipna", [True, False])
-@pytest.mark.parametrize("min_count", [-10, -1, 0, 1, 2, 3, 10])
-def test_dataframe_min_count_ops(data, ops, skipna, min_count):
-    psr = pd.DataFrame(data)
-    gsr = cudf.from_pandas(psr)
-
-    assert_eq(
-        getattr(psr, ops)(skipna=skipna, min_count=min_count),
-        getattr(gsr, ops)(skipna=skipna, min_count=min_count),
-        check_dtype=False,
-    )
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "binop",
-    [
-        operator.add,
-        operator.mul,
-        operator.floordiv,
-        operator.truediv,
-        operator.mod,
-        operator.pow,
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        1.0,
-        pd.Series([1.0]),
-        pd.Series([1.0, 2.0]),
-        pd.Series([1.0, 2.0, 3.0]),
-        pd.Series([1.0], index=["x"]),
-        pd.Series([1.0, 2.0], index=["x", "y"]),
-        pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
-        pd.DataFrame({"x": [1.0]}),
-        pd.DataFrame({"x": [1.0], "y": [2.0]}),
-        pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),
-    ],
-)
-def test_arithmetic_binops_df(pdf, gdf, binop, other):
-    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
-    pdf[pdf == 1.0] = 2
-    gdf[gdf == 1.0] = 2
-    try:
-        d = binop(pdf, other)
-    except Exception:
-        if isinstance(other, (pd.Series, pd.DataFrame)):
-            cudf_other = cudf.from_pandas(other)
-
-        # that returns before we enter this try-except.
-        assert_exceptions_equal(
-            lfunc=binop,
-            rfunc=binop,
-            lfunc_args_and_kwargs=([pdf, other], {}),
-            rfunc_args_and_kwargs=([gdf, cudf_other], {}),
-        )
-    else:
-        if isinstance(other, (pd.Series, pd.DataFrame)):
-            other = cudf.from_pandas(other)
-        g = binop(gdf, other)
-        assert_eq(d, g)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "binop",
-    [
-        operator.eq,
-        operator.lt,
-        operator.le,
-        operator.gt,
-        operator.ge,
-        operator.ne,
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        1.0,
-        pd.Series([1.0, 2.0], index=["x", "y"]),
-        pd.DataFrame({"x": [1.0]}),
-        pd.DataFrame({"x": [1.0], "y": [2.0]}),
-        pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),
-    ],
-)
-def test_comparison_binops_df(pdf, gdf, binop, other):
-    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
-    pdf[pdf == 1.0] = 2
-    gdf[gdf == 1.0] = 2
-    try:
-        d = binop(pdf, other)
-    except Exception:
-        if isinstance(other, (pd.Series, pd.DataFrame)):
-            cudf_other = cudf.from_pandas(other)
-
-        # that returns before we enter this try-except.
-        assert_exceptions_equal(
-            lfunc=binop,
-            rfunc=binop,
-            lfunc_args_and_kwargs=([pdf, other], {}),
-            rfunc_args_and_kwargs=([gdf, cudf_other], {}),
-        )
-    else:
-        if isinstance(other, (pd.Series, pd.DataFrame)):
-            other = cudf.from_pandas(other)
-        g = binop(gdf, other)
-        assert_eq(d, g)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "binop",
-    [
-        operator.eq,
-        operator.lt,
-        operator.le,
-        operator.gt,
-        operator.ge,
-        operator.ne,
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        pd.Series([1.0]),
-        pd.Series([1.0, 2.0]),
-        pd.Series([1.0, 2.0, 3.0]),
-        pd.Series([1.0], index=["x"]),
-        pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
-    ],
-)
-def test_comparison_binops_df_reindexing(request, pdf, gdf, binop, other):
-    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
-    pdf[pdf == 1.0] = 2
-    gdf[gdf == 1.0] = 2
-    try:
-        d = binop(pdf, other)
-    except Exception:
-        if isinstance(other, (pd.Series, pd.DataFrame)):
-            cudf_other = cudf.from_pandas(other)
-
-        # that returns before we enter this try-except.
-        assert_exceptions_equal(
-            lfunc=binop,
-            rfunc=binop,
-            lfunc_args_and_kwargs=([pdf, other], {}),
-            rfunc_args_and_kwargs=([gdf, cudf_other], {}),
-        )
-    else:
-        request.applymarker(
-            pytest.mark.xfail(
-                condition=pdf.columns.difference(other.index).size > 0,
-                reason="""
-                Currently we will not match pandas for equality/inequality
-                operators when there are columns that exist in a Series but not
-                the DataFrame because pandas returns True/False values whereas
-                we return NA. However, this reindexing is deprecated in pandas
-                so we opt not to add support. This test should start passing
-                once pandas removes the deprecated behavior in 2.0.  When that
-                happens, this test can be merged with the two tests above into
-                a single test with common parameters.
-                """,
-            )
-        )
-
-        if isinstance(other, (pd.Series, pd.DataFrame)):
-            other = cudf.from_pandas(other)
-        g = binop(gdf, other)
-        assert_eq(d, g)
-
-
-def test_binops_df_invalid(gdf):
-    with pytest.raises(TypeError):
-        gdf + np.array([1, 2])
-
-
-@pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])
-def test_bitwise_binops_df(pdf, gdf, binop):
-    d = binop(pdf, pdf + 1)
-    g = binop(gdf, gdf + 1)
-    assert_eq(d, g)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "binop",
-    [
-        operator.add,
-        operator.mul,
-        operator.floordiv,
-        operator.truediv,
-        operator.mod,
-        operator.pow,
-        operator.eq,
-        operator.lt,
-        operator.le,
-        operator.gt,
-        operator.ge,
-        operator.ne,
-    ],
-)
-def test_binops_series(pdf, gdf, binop):
-    pdf = pdf + 1.0
-    gdf = gdf + 1.0
-    d = binop(pdf.x, pdf.y)
-    g = binop(gdf.x, gdf.y)
-    assert_eq(d, g)
-
-
-@pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])
-def test_bitwise_binops_series(pdf, gdf, binop):
-    d = binop(pdf.x, pdf.y + 1)
-    g = binop(gdf.x, gdf.y + 1)
-    assert_eq(d, g)
-
-
-@pytest.mark.parametrize("unaryop", [operator.neg, operator.inv, operator.abs])
-@pytest.mark.parametrize(
-    "col_name,assign_col_name", [(None, False), (None, True), ("abc", True)]
-)
-def test_unaryops_df(pdf, unaryop, col_name, assign_col_name):
-    pd_df = pdf.copy()
-    if assign_col_name:
-        pd_df.columns.name = col_name
-    gdf = cudf.from_pandas(pd_df)
-    d = unaryop(pd_df - 5)
-    g = unaryop(gdf - 5)
-    assert_eq(d, g)
-
-
-def test_df_abs(pdf):
-    rng = np.random.default_rng(seed=0)
-    disturbance = pd.Series(rng.random(10))
-    pdf = pdf - 5 + disturbance
-    d = pdf.apply(np.abs)
-    g = cudf.from_pandas(pdf).abs()
-    assert_eq(d, g)
-
-
-def test_scale_df(gdf):
-    got = (gdf - 5).scale()
-    expect = cudf.DataFrame(
-        {"x": np.linspace(0.0, 1.0, 10), "y": np.linspace(0.0, 1.0, 10)}
-    )
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda df: df.empty,
-        lambda df: df.x.empty,
-        lambda df: df.x.fillna(123, limit=None, method=None, axis=None),
-        lambda df: df.drop("x", axis=1, errors="raise"),
-    ],
-)
-def test_unary_operators(func, pdf, gdf):
-    p = func(pdf)
-    g = func(gdf)
-    assert_eq(p, g)
-
-
-def test_is_monotonic(gdf):
-    pdf = pd.DataFrame({"x": [1, 2, 3]}, index=[3, 1, 2])
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    assert not gdf.index.is_monotonic_increasing
-    assert not gdf.index.is_monotonic_decreasing
-
-
-@pytest.mark.parametrize("q", [0.5, 1, 0.001, [0.5], [], [0.005, 0.5, 1]])
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_quantile(q, numeric_only):
-    ts = pd.date_range("2018-08-24", periods=5, freq="D")
-    td = pd.to_timedelta(np.arange(5), unit="h")
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame(
-        {"date": ts, "delta": td, "val": rng.standard_normal(len(ts))}
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    assert_eq(pdf["date"].quantile(q), gdf["date"].quantile(q))
-    assert_eq(pdf["delta"].quantile(q), gdf["delta"].quantile(q))
-    assert_eq(pdf["val"].quantile(q), gdf["val"].quantile(q))
-
-    q = q if isinstance(q, list) else [q]
-    assert_eq(
-        pdf.quantile(q, numeric_only=numeric_only),
-        gdf.quantile(q, numeric_only=numeric_only),
-    )
-
-
-@pytest.mark.parametrize("q", [0.2, 1, 0.001, [0.5], [], [0.005, 0.8, 0.03]])
-@pytest.mark.parametrize("interpolation", ["higher", "lower", "nearest"])
-@pytest.mark.parametrize(
-    "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
-)
-def test_decimal_quantile(q, interpolation, decimal_type):
-    rng = np.random.default_rng(seed=0)
-    data = ["244.8", "32.24", "2.22", "98.14", "453.23", "5.45"]
-    gdf = cudf.DataFrame(
-        {"id": rng.integers(0, 10, size=len(data)), "val": data}
-    )
-    gdf["id"] = gdf["id"].astype("float64")
-    gdf["val"] = gdf["val"].astype(decimal_type(7, 2))
-    pdf = gdf.to_pandas()
-
-    got = gdf.quantile(q, numeric_only=False, interpolation=interpolation)
-    expected = pdf.quantile(
-        q if isinstance(q, list) else [q],
-        numeric_only=False,
-        interpolation=interpolation,
-    )
-
-    assert_eq(got, expected)
-
-
-def test_empty_quantile():
-    pdf = pd.DataFrame({"x": []}, dtype="float64")
-    df = cudf.DataFrame({"x": []}, dtype="float64")
-
-    actual = df.quantile()
-    expected = pdf.quantile()
-
-    assert_eq(actual, expected)
-
-
-def test_boolmask(pdf, gdf):
-    rng = np.random.default_rng(seed=0)
-    boolmask = rng.integers(0, 2, len(pdf)) > 0
-    gdf = gdf[boolmask]
-    pdf = pdf[boolmask]
-    assert_eq(pdf, gdf)
-
-
-@pytest.mark.parametrize(
-    "mask_shape",
-    [
-        (2, "ab"),
-        (2, "abc"),
-        (3, "ab"),
-        (3, "abc"),
-        (3, "abcd"),
-        (4, "abc"),
-        (4, "abcd"),
-    ],
-)
-def test_dataframe_boolmask(mask_shape):
-    rng = np.random.default_rng(seed=0)
-    pdf = pd.DataFrame({col: rng.integers(0, 10, 3) for col in "abc"})
-    pdf_mask = pd.DataFrame(
-        {col: rng.integers(0, 2, mask_shape[0]) > 0 for col in mask_shape[1]}
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    gdf_mask = cudf.DataFrame.from_pandas(pdf_mask)
-    gdf = gdf[gdf_mask]
-    pdf = pdf[pdf_mask]
-
-    assert np.array_equal(gdf.columns, pdf.columns)
-    for col in gdf.columns:
-        assert np.array_equal(
-            gdf[col].fillna(-1).to_pandas().values, pdf[col].fillna(-1).values
-        )
-
-
-@pytest.mark.parametrize(
-    "box",
-    [
-        list,
-        pytest.param(
-            cudf.Series,
-            marks=pytest_xfail(
-                reason="Pandas can't index a multiindex with a Series"
-            ),
-        ),
-    ],
-)
-def test_dataframe_multiindex_boolmask(box):
-    mask = box([True, False, True])
-    gdf = cudf.DataFrame(
-        {"w": [3, 2, 1], "x": [1, 2, 3], "y": [0, 1, 0], "z": [1, 1, 1]}
-    )
-    gdg = gdf.groupby(["w", "x"]).count()
-    pdg = gdg.to_pandas()
-    assert_eq(gdg[mask], pdg[mask])
-
-
-def test_dataframe_assignment():
-    pdf = pd.DataFrame()
-    for col in "abc":
-        pdf[col] = np.array([0, 1, 1, -2, 10])
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    gdf[gdf < 0] = 999
-    pdf[pdf < 0] = 999
-    assert_eq(gdf, pdf)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [0, 1, 2, 3],
-        [-2, -1, 2, 3, 5],
-        [-2, -1, 0, 3, 5],
-        [True, False, False],
-        [True],
-        [False],
-        [],
-        [True, None, False],
-        [True, True, None],
-        [None, None],
-        [[0, 5], [1, 6], [2, 7], [3, 8], [4, 9]],
-        [[1, True], [2, False], [3, False]],
-        [["a", True], ["b", False], ["c", False]],
-    ],
-)
-def test_all(data):
-    # Provide a dtype when data is empty to avoid future pandas changes.
-    dtype = None if data else float
-    if np.array(data).ndim <= 1:
-        pdata = pd.Series(data=data, dtype=dtype)
-        gdata = cudf.Series.from_pandas(pdata)
-        got = gdata.all()
-        expected = pdata.all()
-        assert_eq(got, expected)
-    else:
-        pdata = pd.DataFrame(data, columns=["a", "b"], dtype=dtype).replace(
-            [None], False
-        )
-        gdata = cudf.DataFrame.from_pandas(pdata)
-
-        # test bool_only
-        if pdata["b"].dtype == "bool":
-            got = gdata.all(bool_only=True)
-            expected = pdata.all(bool_only=True)
-            assert_eq(got, expected)
-        else:
-            got = gdata.all()
-            expected = pdata.all()
-            assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [0, 1, 2, 3],
-        [-2, -1, 2, 3, 5],
-        [-2, -1, 0, 3, 5],
-        [0, 0, 0, 0, 0],
-        [0, 0, None, 0],
-        [True, False, False],
-        [True],
-        [False],
-        [],
-        [True, None, False],
-        [True, True, None],
-        [None, None],
-        [[0, 5], [1, 6], [2, 7], [3, 8], [4, 9]],
-        [[1, True], [2, False], [3, False]],
-        [["a", True], ["b", False], ["c", False]],
-    ],
-)
-@pytest.mark.parametrize("axis", [0, 1])
-def test_any(data, axis):
-    # Provide a dtype when data is empty to avoid future pandas changes.
-    dtype = float if all(x is None for x in data) or len(data) < 1 else None
-    if np.array(data).ndim <= 1:
-        pdata = pd.Series(data=data, dtype=dtype)
-        gdata = cudf.Series(data=data, dtype=dtype)
-
-        if axis == 1:
-            with pytest.raises(NotImplementedError):
-                gdata.any(axis=axis)
-        else:
-            got = gdata.any(axis=axis)
-            expected = pdata.any(axis=axis)
-            assert_eq(got, expected)
-    else:
-        pdata = pd.DataFrame(data, columns=["a", "b"])
-        gdata = cudf.DataFrame.from_pandas(pdata)
-
-        # test bool_only
-        if pdata["b"].dtype == "bool":
-            got = gdata.any(bool_only=True)
-            expected = pdata.any(bool_only=True)
-            assert_eq(got, expected)
-        else:
-            got = gdata.any(axis=axis)
-            expected = pdata.any(axis=axis)
-            assert_eq(got, expected)
-
-
-@pytest.mark.parametrize("axis", [0, 1])
-def test_empty_dataframe_any(axis):
-    pdf = pd.DataFrame({}, columns=["a", "b"], dtype=float)
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    got = gdf.any(axis=axis)
-    expected = pdf.any(axis=axis)
-    assert_eq(got, expected, check_index_type=False)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize("a", [[], ["123"]])
-@pytest.mark.parametrize("b", ["123", ["123"]])
-@pytest.mark.parametrize(
-    "misc_data",
-    ["123", ["123"] * 20, 123, [1, 2, 0.8, 0.9] * 50, 0.9, 0.00001],
-)
-@pytest.mark.parametrize("non_list_data", [123, "abc", "zyx", "rapids", 0.8])
-def test_create_dataframe_cols_empty_data(a, b, misc_data, non_list_data):
-    expected = pd.DataFrame({"a": a})
-    actual = cudf.DataFrame.from_pandas(expected)
-    expected["b"] = b
-    actual["b"] = b
-    assert_eq(actual, expected)
-
-    expected = pd.DataFrame({"a": []})
-    actual = cudf.DataFrame.from_pandas(expected)
-    expected["b"] = misc_data
-    actual["b"] = misc_data
-    assert_eq(actual, expected)
-
-    expected = pd.DataFrame({"a": a})
-    actual = cudf.DataFrame.from_pandas(expected)
-    expected["b"] = non_list_data
-    actual["b"] = non_list_data
-    assert_eq(actual, expected)
-
-
-def test_as_column_types():
-    col = as_column(cudf.Series([], dtype="float64"))
-    assert_eq(col.dtype, np.dtype("float64"))
-    gds = cudf.Series._from_column(col)
-    pds = pd.Series(pd.Series([], dtype="float64"))
-
-    assert_eq(pds, gds)
-
-    col = as_column(
-        cudf.Series([], dtype="float64"), dtype=np.dtype(np.float32)
-    )
-    assert_eq(col.dtype, np.dtype("float32"))
-    gds = cudf.Series._from_column(col)
-    pds = pd.Series(pd.Series([], dtype="float32"))
-
-    assert_eq(pds, gds)
-
-    col = as_column(cudf.Series([], dtype="float64"), dtype=cudf.dtype("str"))
-    assert_eq(col.dtype, np.dtype("object"))
-    gds = cudf.Series._from_column(col)
-    pds = pd.Series(pd.Series([], dtype="str"))
-
-    assert_eq(pds, gds)
-
-    col = as_column(cudf.Series([], dtype="float64"), dtype=cudf.dtype("str"))
-    assert_eq(col.dtype, np.dtype("object"))
-    gds = cudf.Series._from_column(col)
-    pds = pd.Series(pd.Series([], dtype="object"))
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series(np.array([1, 2, 3]), dtype="float32")
-    gds = cudf.Series._from_column(
-        as_column(np.array([1, 2, 3]), dtype=np.dtype(np.float32))
-    )
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series([1, 2, 3], dtype="float32")
-    gds = cudf.Series([1, 2, 3], dtype="float32")
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series([], dtype="float64")
-    gds = cudf.Series._from_column(as_column(pds))
-    assert_eq(pds, gds)
-
-    pds = pd.Series([1, 2, 4], dtype="int64")
-    gds = cudf.Series._from_column(
-        as_column(cudf.Series([1, 2, 4]), dtype="int64")
-    )
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series([1.2, 18.0, 9.0], dtype="float32")
-    gds = cudf.Series._from_column(
-        as_column(cudf.Series([1.2, 18.0, 9.0]), dtype=np.dtype(np.float32))
-    )
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series([1.2, 18.0, 9.0], dtype="str")
-    gds = cudf.Series._from_column(
-        as_column(cudf.Series([1.2, 18.0, 9.0]), dtype=cudf.dtype("str"))
-    )
-
-    assert_eq(pds, gds)
-
-    pds = pd.Series(pd.Index(["1", "18", "9"]), dtype="int")
-    gds = cudf.Series(cudf.Index(["1", "18", "9"]), dtype="int")
-
-    assert_eq(pds, gds)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
-def test_dataframe_astype_preserves_column_dtype(dtype):
-    result = cudf.DataFrame([1], columns=cudf.Index([1], dtype=dtype))
-    result = result.astype(np.int32).columns
-    expected = pd.Index([1], dtype=dtype)
-    assert_eq(result, expected)
-
-
-def test_dataframe_astype_preserves_column_rangeindex():
-    result = cudf.DataFrame([1], columns=range(1))
-    result = result.astype(np.int32).columns
-    expected = pd.RangeIndex(1)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
-def test_dataframe_fillna_preserves_column_dtype(dtype):
-    result = cudf.DataFrame([1, None], columns=cudf.Index([1], dtype=dtype))
-    result = result.fillna(2).columns
-    expected = pd.Index([1], dtype=dtype)
-    assert_eq(result, expected)
-
-
-def test_dataframe_fillna_preserves_column_rangeindex():
-    result = cudf.DataFrame([1, None], columns=range(1))
-    result = result.fillna(2).columns
-    expected = pd.RangeIndex(1)
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 4],
-        [],
-        [5.0, 7.0, 8.0],
-        pd.Categorical(["a", "b", "c"]),
-        ["m", "a", "d", "v"],
-    ],
-)
-def test_series_values_host_property(data):
-    pds = pd.Series(data=data, dtype=None if data else float)
-    gds = cudf.Series(data=data, dtype=None if data else float)
-
-    np.testing.assert_array_equal(pds.values, gds.values_host)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [1, 2, 4],
-        [],
-        [5.0, 7.0, 8.0],
-        pytest.param(
-            pd.Categorical(["a", "b", "c"]),
-            marks=pytest_xfail(raises=NotImplementedError),
-        ),
-        pytest.param(
-            ["m", "a", "d", "v"],
-            marks=pytest_xfail(raises=TypeError),
-        ),
-    ],
-)
-def test_series_values_property(data):
-    pds = pd.Series(data=data, dtype=None if data else float)
-    gds = cudf.from_pandas(pds)
-    gds_vals = gds.values
-    assert isinstance(gds_vals, cupy.ndarray)
-    np.testing.assert_array_equal(gds_vals.get(), pds.values)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"A": [1, 2, 3], "B": [4, 5, 6]},
-        {"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]},
-        {"A": [1, 2, 3], "B": [1.0, 2.0, 3.0]},
-        {"A": np.float32(np.arange(3)), "B": np.float64(np.arange(3))},
-        pytest.param(
-            {"A": [1, None, 3], "B": [1, 2, None]},
-            marks=pytest_xfail(
-                reason="Nulls not supported by values accessor"
-            ),
-        ),
-        pytest.param(
-            {"A": [None, None, None], "B": [None, None, None]},
-            marks=pytest_xfail(
-                reason="Nulls not supported by values accessor"
-            ),
-        ),
-        {"A": [], "B": []},
-        pytest.param(
-            {"A": [1, 2, 3], "B": ["a", "b", "c"]},
-            marks=pytest_xfail(
-                reason="str or categorical not supported by values accessor"
-            ),
-        ),
-        pytest.param(
-            {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]},
-            marks=pytest_xfail(
-                reason="str or categorical not supported by values accessor"
-            ),
-        ),
-    ],
-)
-def test_df_values_property(data):
-    pdf = pd.DataFrame.from_dict(data)
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    pmtr = pdf.values
-    gmtr = gdf.values.get()
-
-    np.testing.assert_array_equal(pmtr, gmtr)
-
-
-def test_numeric_alpha_value_counts():
-    pdf = pd.DataFrame(
-        {
-            "numeric": [1, 2, 3, 4, 5, 6, 1, 2, 4] * 10,
-            "alpha": ["u", "h", "d", "a", "m", "u", "h", "d", "a"] * 10,
-        }
-    )
-
-    gdf = cudf.DataFrame(
-        {
-            "numeric": [1, 2, 3, 4, 5, 6, 1, 2, 4] * 10,
-            "alpha": ["u", "h", "d", "a", "m", "u", "h", "d", "a"] * 10,
-        }
-    )
-
-    assert_eq(
-        pdf.numeric.value_counts().sort_index(),
-        gdf.numeric.value_counts().sort_index(),
-        check_dtype=False,
-    )
-    assert_eq(
-        pdf.alpha.value_counts().sort_index(),
-        gdf.alpha.value_counts().sort_index(),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        pd.DataFrame(
-            {
-                "num_legs": [2, 4],
-                "num_wings": [2, 0],
-                "bird_cats": pd.Series(
-                    ["sparrow", "pigeon"],
-                    dtype="category",
-                    index=["falcon", "dog"],
-                ),
-            },
-            index=["falcon", "dog"],
-        ),
-        pd.DataFrame(
-            {"num_legs": [8, 2], "num_wings": [0, 2]},
-            index=["spider", "falcon"],
-        ),
-        pd.DataFrame(
-            {
-                "num_legs": [8, 2, 1, 0, 2, 4, 5],
-                "num_wings": [2, 0, 2, 1, 2, 4, -1],
-            }
-        ),
-        pd.DataFrame({"a": ["a", "b", "c"]}, dtype="category"),
-        pd.DataFrame({"a": ["a", "b", "c"]}),
-    ],
-)
-@pytest.mark.parametrize(
-    "values",
-    [
-        [0, 2],
-        {"num_wings": [0, 3]},
-        pd.DataFrame(
-            {"num_legs": [8, 2], "num_wings": [0, 2]},
-            index=["spider", "falcon"],
-        ),
-        pd.DataFrame(
-            {
-                "num_legs": [2, 4],
-                "num_wings": [2, 0],
-                "bird_cats": pd.Series(
-                    ["sparrow", "pigeon"],
-                    dtype="category",
-                    index=["falcon", "dog"],
-                ),
-            },
-            index=["falcon", "dog"],
-        ),
-        ["sparrow", "pigeon"],
-        pd.Series(["sparrow", "pigeon"], dtype="category"),
-        pd.Series([1, 2, 3, 4, 5]),
-        "abc",
-        123,
-        pd.Series(["a", "b", "c"]),
-        pd.Series(["a", "b", "c"], dtype="category"),
-        pd.DataFrame({"a": ["a", "b", "c"]}, dtype="category"),
-    ],
-)
-def test_isin_dataframe(data, values):
-    pdf = data
-    gdf = cudf.from_pandas(pdf)
-
-    if cudf.api.types.is_scalar(values):
-        assert_exceptions_equal(
-            lfunc=pdf.isin,
-            rfunc=gdf.isin,
-            lfunc_args_and_kwargs=([values],),
-            rfunc_args_and_kwargs=([values],),
-        )
-    else:
-        try:
-            expected = pdf.isin(values)
-        except TypeError as e:
-            # Can't do isin with different categories
-            if str(e) == (
-                "Categoricals can only be compared if 'categories' "
-                "are the same."
-            ):
-                return
-
-        if isinstance(values, (pd.DataFrame, pd.Series)):
-            values = cudf.from_pandas(values)
-
-        got = gdf.isin(values)
-        assert_eq(got, expected)
-
-
-def test_isin_axis_duplicated_error():
-    df = cudf.DataFrame(range(2))
-    with pytest.raises(ValueError):
-        df.isin(cudf.Series(range(2), index=[1, 1]))
-
-    with pytest.raises(ValueError):
-        df.isin(cudf.DataFrame(range(2), index=[1, 1]))
-
-    with pytest.raises(ValueError):
-        df.isin(cudf.DataFrame([[1, 2]], columns=[1, 1]))
-
-
-def test_constructor_properties():
-    df = cudf.DataFrame()
-    key1 = "a"
-    key2 = "b"
-    val1 = np.array([123], dtype=np.float64)
-    val2 = np.array([321], dtype=np.float64)
-    df[key1] = val1
-    df[key2] = val2
-
-    # Correct use of _constructor_sliced (for DataFrame)
-    assert_eq(df[key1], df._constructor_sliced(val1, name=key1))
-
-    # Correct use of _constructor_expanddim (for cudf.Series)
-    assert_eq(df, df[key2]._constructor_expanddim({key1: val1, key2: val2}))
-
-    # Incorrect use of _constructor_sliced (Raises for cudf.Series)
-    with pytest.raises(NotImplementedError):
-        df[key1]._constructor_sliced
-
-    # Incorrect use of _constructor_expanddim (Raises for DataFrame)
-    with pytest.raises(NotImplementedError):
-        df._constructor_expanddim
-
-
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
-@pytest.mark.parametrize("as_dtype", ALL_TYPES)
-def test_df_astype_numeric_to_all(dtype, as_dtype):
-    if "uint" in dtype:
-        data = [1, 2, None, 4, 7]
-    elif "int" in dtype or "longlong" in dtype:
-        data = [1, 2, None, 4, -7]
-    elif "float" in dtype:
-        data = [1.0, 2.0, None, 4.0, np.nan, -7.0]
-
-    gdf = cudf.DataFrame()
-
-    gdf["foo"] = cudf.Series(data, dtype=dtype)
-    gdf["bar"] = cudf.Series(data, dtype=dtype)
-
-    insert_data = cudf.Series(data, dtype=dtype)
-
-    expect = cudf.DataFrame()
-    expect["foo"] = insert_data.astype(as_dtype)
-    expect["bar"] = insert_data.astype(as_dtype)
-
-    got = gdf.astype(as_dtype)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "int32",
-        "float32",
-        "category",
-        "datetime64[s]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[ns]",
-    ],
-)
-def test_df_astype_string_to_other(as_dtype):
-    if "datetime64" in as_dtype:
-        # change None to "NaT" after this issue is fixed:
-        # https://github.com/rapidsai/cudf/issues/5117
-        data = ["2001-01-01", "2002-02-02", "2000-01-05", None]
-    elif as_dtype == "int32":
-        data = [1, 2, 3]
-    elif as_dtype == "category":
-        data = ["1", "2", "3", None]
-    elif "float" in as_dtype:
-        data = [1.0, 2.0, 3.0, np.nan]
-
-    insert_data = cudf.Series.from_pandas(pd.Series(data, dtype="str"))
-    expect_data = cudf.Series(data, dtype=as_dtype)
-
-    gdf = cudf.DataFrame()
-    expect = cudf.DataFrame()
-
-    gdf["foo"] = insert_data
-    gdf["bar"] = insert_data
-
-    expect["foo"] = expect_data
-    expect["bar"] = expect_data
-
-    got = gdf.astype(as_dtype)
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "int64",
-        "datetime64[s]",
-        "datetime64[us]",
-        "datetime64[ns]",
-        "str",
-        "category",
-    ],
-)
-def test_df_astype_datetime_to_other(as_dtype):
-    data = [
-        "1991-11-20 00:00:00.000",
-        "2004-12-04 00:00:00.000",
-        "2016-09-13 00:00:00.000",
-        None,
-    ]
-
-    gdf = cudf.DataFrame()
-    expect = cudf.DataFrame()
-
-    gdf["foo"] = cudf.Series(data, dtype="datetime64[ms]")
-    gdf["bar"] = cudf.Series(data, dtype="datetime64[ms]")
-
-    if as_dtype == "int64":
-        expect["foo"] = cudf.Series(
-            [690595200000, 1102118400000, 1473724800000, None], dtype="int64"
-        )
-        expect["bar"] = cudf.Series(
-            [690595200000, 1102118400000, 1473724800000, None], dtype="int64"
-        )
-    elif as_dtype == "str":
-        expect["foo"] = cudf.Series(data, dtype="str")
-        expect["bar"] = cudf.Series(data, dtype="str")
-    elif as_dtype == "category":
-        expect["foo"] = cudf.Series(gdf["foo"], dtype="category")
-        expect["bar"] = cudf.Series(gdf["bar"], dtype="category")
-    else:
-        expect["foo"] = cudf.Series(data, dtype=as_dtype)
-        expect["bar"] = cudf.Series(data, dtype=as_dtype)
-
-    got = gdf.astype(as_dtype)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "as_dtype",
-    [
-        "int32",
-        "float32",
-        "category",
-        "datetime64[s]",
-        "datetime64[ms]",
-        "datetime64[us]",
-        "datetime64[ns]",
-        "str",
-    ],
-)
-def test_df_astype_categorical_to_other(as_dtype):
-    if "datetime64" in as_dtype:
-        data = ["2001-01-01", "2002-02-02", "2000-01-05", "2001-01-01"]
-    else:
-        data = [1, 2, 3, 1]
-    psr = pd.Series(data, dtype="category")
-    pdf = pd.DataFrame()
-    pdf["foo"] = psr
-    pdf["bar"] = psr
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    assert_eq(pdf.astype(as_dtype), gdf.astype(as_dtype))
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-def test_df_astype_to_categorical_ordered(ordered):
-    psr = pd.Series([1, 2, 3, 1], dtype="category")
-    pdf = pd.DataFrame()
-    pdf["foo"] = psr
-    pdf["bar"] = psr
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    ordered_dtype_pd = pd.CategoricalDtype(
-        categories=[1, 2, 3], ordered=ordered
-    )
-    ordered_dtype_gd = cudf.CategoricalDtype.from_pandas(ordered_dtype_pd)
-
-    assert_eq(
-        pdf.astype(ordered_dtype_pd).astype("int32"),
-        gdf.astype(ordered_dtype_gd).astype("int32"),
-    )
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [dtype for dtype in ALL_TYPES]
-    + [
-        cudf.CategoricalDtype(ordered=True),
-        cudf.CategoricalDtype(ordered=False),
-    ],
-)
-def test_empty_df_astype(dtype):
-    df = cudf.DataFrame()
-    result = df.astype(dtype=dtype)
-    assert_eq(df, result)
-    assert_eq(df.to_pandas().astype(dtype=dtype), result)
-
-
-@pytest.mark.parametrize(
-    "errors",
-    [
-        pytest.param(
-            "raise", marks=pytest_xfail(reason="should raise error here")
-        ),
-        pytest.param("other", marks=pytest_xfail(raises=ValueError)),
-        "ignore",
-    ],
-)
-def test_series_astype_error_handling(errors):
-    sr = cudf.Series(["random", "words"])
-    got = sr.astype("datetime64[ns]", errors=errors)
-    assert_eq(sr, got)
-
-
-@pytest.mark.parametrize("dtype", ALL_TYPES)
-def test_df_constructor_dtype(dtype):
-    if "datetime" in dtype:
-        data = ["1991-11-20", "2004-12-04", "2016-09-13", None]
-    elif dtype == "str":
-        data = ["a", "b", "c", None]
-    elif "float" in dtype:
-        data = [1.0, 0.5, -1.1, np.nan, None]
-    elif "bool" in dtype:
-        data = [True, False, None]
-    else:
-        data = [1, 2, 3, None]
-
-    sr = cudf.Series(data, dtype=dtype)
-
-    expect = cudf.DataFrame()
-    expect["foo"] = sr
-    expect["bar"] = sr
-    got = cudf.DataFrame({"foo": data, "bar": data}, dtype=dtype)
-
-    assert_eq(expect, got)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "data",
-    [
-        lambda: cudf.datasets.randomdata(
-            nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": int}
-        ),
-        lambda: cudf.datasets.randomdata(
-            nrows=10, dtypes={"a": "category", "b": int, "c": float, "d": str}
-        ),
-        lambda: cudf.datasets.randomdata(
-            nrows=10, dtypes={"a": bool, "b": int, "c": float, "d": str}
-        ),
-        lambda: cudf.DataFrame(),
-        lambda: cudf.DataFrame({"a": [0, 1, 2], "b": [1, None, 3]}),
-        lambda: cudf.DataFrame(
-            {
-                "a": [1, 2, 3, 4],
-                "b": [7, np.nan, 9, 10],
-                "c": cudf.Series(
-                    [np.nan, np.nan, np.nan, np.nan], nan_as_null=False
-                ),
-                "d": cudf.Series([None, None, None, None], dtype="int64"),
-                "e": [100, None, 200, None],
-                "f": cudf.Series([10, None, np.nan, 11], nan_as_null=False),
-            }
-        ),
-        lambda: cudf.DataFrame(
-            {
-                "a": [10, 11, 12, 13, 14, 15],
-                "b": cudf.Series(
-                    [10, None, np.nan, 2234, None, np.nan], nan_as_null=False
-                ),
-            }
-        ),
-    ],
-)
-@pytest.mark.parametrize(
-    "op", ["max", "min", "sum", "product", "mean", "var", "std"]
-)
-@pytest.mark.parametrize("skipna", [True, False])
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_rowwise_ops(data, op, skipna, numeric_only):
-    gdf = data()
-    pdf = gdf.to_pandas()
-
-    kwargs = {"axis": 1, "skipna": skipna, "numeric_only": numeric_only}
-    if op in ("var", "std"):
-        kwargs["ddof"] = 0
-
-    if not numeric_only and not all(
-        (
-            (pdf[column].count() == 0)
-            if skipna
-            else (pdf[column].notna().count() == 0)
-        )
-        or cudf.api.types.is_numeric_dtype(pdf[column].dtype)
-        or pdf[column].dtype.kind == "b"
-        for column in pdf
-    ):
-        with pytest.raises(TypeError):
-            expected = getattr(pdf, op)(**kwargs)
-        with pytest.raises(TypeError):
-            got = getattr(gdf, op)(**kwargs)
-    else:
-        expected = getattr(pdf, op)(**kwargs)
-        got = getattr(gdf, op)(**kwargs)
-
-        assert_eq(
-            expected,
-            got,
-            check_dtype=False,
-            check_index_type=False if len(got.index) == 0 else True,
-        )
-
-
-@pytest.mark.parametrize(
-    "op", ["max", "min", "sum", "product", "mean", "var", "std"]
-)
-def test_rowwise_ops_nullable_dtypes_all_null(op):
-    gdf = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4],
-            "b": [7, np.nan, 9, 10],
-            "c": cudf.Series([np.nan, np.nan, np.nan, np.nan], dtype=float),
-            "d": cudf.Series([None, None, None, None], dtype="int64"),
-            "e": [100, None, 200, None],
-            "f": cudf.Series([10, None, np.nan, 11], nan_as_null=False),
-        }
-    )
-
-    expected = cudf.Series([None, None, None, None], dtype="float64")
-
-    if op in ("var", "std"):
-        got = getattr(gdf, op)(axis=1, ddof=0, skipna=False)
-    else:
-        got = getattr(gdf, op)(axis=1, skipna=False)
-
-    assert_eq(got.null_count, expected.null_count)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "op",
-    [
-        "max",
-        "min",
-        "sum",
-        "product",
-        "mean",
-        "var",
-        "std",
-    ],
-)
-def test_rowwise_ops_nullable_dtypes_partial_null(op):
-    gdf = cudf.DataFrame(
-        {
-            "a": [10, 11, 12, 13, 14, 15],
-            "b": cudf.Series(
-                [10, None, np.nan, 2234, None, np.nan],
-                nan_as_null=False,
-            ),
-        }
-    )
-
-    if op in ("var", "std"):
-        got = getattr(gdf, op)(axis=1, ddof=0, skipna=False)
-        expected = getattr(gdf.to_pandas(), op)(axis=1, ddof=0, skipna=False)
-    else:
-        got = getattr(gdf, op)(axis=1, skipna=False)
-        expected = getattr(gdf.to_pandas(), op)(axis=1, skipna=False)
-
-    assert_eq(got.null_count, 2)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "op,expected",
-    [
-        (
-            "max",
-            lambda: cudf.Series(
-                [10, None, None, 2234, None, 453],
-                dtype="int64",
-            ),
-        ),
-        (
-            "min",
-            lambda: cudf.Series(
-                [10, None, None, 13, None, 15],
-                dtype="int64",
-            ),
-        ),
-        (
-            "sum",
-            lambda: cudf.Series(
-                [20, None, None, 2247, None, 468],
-                dtype="int64",
-            ),
-        ),
-        (
-            "product",
-            lambda: cudf.Series(
-                [100, None, None, 29042, None, 6795],
-                dtype="int64",
-            ),
-        ),
-        (
-            "mean",
-            lambda: cudf.Series(
-                [10.0, None, None, 1123.5, None, 234.0],
-                dtype="float32",
-            ),
-        ),
-        (
-            "var",
-            lambda: cudf.Series(
-                [0.0, None, None, 1233210.25, None, 47961.0],
-                dtype="float32",
-            ),
-        ),
-        (
-            "std",
-            lambda: cudf.Series(
-                [0.0, None, None, 1110.5, None, 219.0],
-                dtype="float32",
-            ),
-        ),
-    ],
-)
-def test_rowwise_ops_nullable_int_dtypes(op, expected):
-    gdf = cudf.DataFrame(
-        {
-            "a": [10, 11, None, 13, None, 15],
-            "b": cudf.Series(
-                [10, None, 323, 2234, None, 453],
-                nan_as_null=False,
-            ),
-        }
-    )
-
-    if op in ("var", "std"):
-        got = getattr(gdf, op)(axis=1, ddof=0, skipna=False)
-    else:
-        got = getattr(gdf, op)(axis=1, skipna=False)
-
-    expected = expected()
-    assert_eq(got.null_count, expected.null_count)
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "t2": pd.Series(
-                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
-            ),
-        },
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "t2": pd.Series(
-                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ns]"
-            ),
-            "t3": pd.Series(
-                ["1960-08-31 06:00:00", "2030-08-02 10:00:00"], dtype="<M8[s]"
-            ),
-        },
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "t2": pd.Series(
-                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[us]"
-            ),
-        },
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "t2": pd.Series(
-                ["1940-08-31 06:00:00", "2020-08-02 10:00:00"], dtype="<M8[ms]"
-            ),
-            "i1": pd.Series([1001, 2002], dtype="int64"),
-        },
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "t2": pd.Series(["1940-08-31 06:00:00", None], dtype="<M8[ms]"),
-            "i1": pd.Series([1001, 2002], dtype="int64"),
-        },
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "i1": pd.Series([1001, 2002], dtype="int64"),
-            "f1": pd.Series([-100.001, 123.456], dtype="float64"),
-        },
-        {
-            "t1": pd.Series(
-                ["2020-08-01 09:00:00", "1920-05-01 10:30:00"], dtype="<M8[ms]"
-            ),
-            "i1": pd.Series([1001, 2002], dtype="int64"),
-            "f1": pd.Series([-100.001, 123.456], dtype="float64"),
-            "b1": pd.Series([True, False], dtype="bool"),
-        },
-    ],
-)
-@pytest.mark.parametrize("op", ["max", "min"])
-@pytest.mark.parametrize("skipna", [True, False])
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_rowwise_ops_datetime_dtypes(data, op, skipna, numeric_only):
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas()
-
-    if not numeric_only and not all(dt.kind == "M" for dt in gdf.dtypes):
-        with pytest.raises(TypeError):
-            got = getattr(gdf, op)(
-                axis=1, skipna=skipna, numeric_only=numeric_only
-            )
-        with pytest.raises(TypeError):
-            expected = getattr(pdf, op)(
-                axis=1, skipna=skipna, numeric_only=numeric_only
-            )
-    else:
-        got = getattr(gdf, op)(
-            axis=1, skipna=skipna, numeric_only=numeric_only
-        )
-        expected = getattr(pdf, op)(
-            axis=1, skipna=skipna, numeric_only=numeric_only
-        )
-        if got.dtype == cudf.dtype(
-            "datetime64[us]"
-        ) and expected.dtype == np.dtype("datetime64[ns]"):
-            # Workaround for a PANDAS-BUG:
-            # https://github.com/pandas-dev/pandas/issues/52524
-            assert_eq(got.astype("datetime64[ns]"), expected)
-        else:
-            assert_eq(got, expected, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    "data,op,skipna",
-    [
-        (
-            {
-                "t1": pd.Series(
-                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
-                    dtype="<M8[ms]",
-                ),
-                "t2": pd.Series(
-                    ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
-                ),
-            },
-            "max",
-            True,
-        ),
-        (
-            {
-                "t1": pd.Series(
-                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
-                    dtype="<M8[ms]",
-                ),
-                "t2": pd.Series(
-                    ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
-                ),
-            },
-            "min",
-            False,
-        ),
-        (
-            {
-                "t1": pd.Series(
-                    ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
-                    dtype="<M8[ms]",
-                ),
-                "t2": pd.Series(
-                    ["1940-08-31 06:00:00", None], dtype="<M8[ms]"
-                ),
-            },
-            "min",
-            True,
-        ),
-    ],
-)
-def test_rowwise_ops_datetime_dtypes_2(data, op, skipna):
-    gdf = cudf.DataFrame(data)
-
-    pdf = gdf.to_pandas()
-
-    got = getattr(gdf, op)(axis=1, skipna=skipna)
-    expected = getattr(pdf, op)(axis=1, skipna=skipna)
-
-    assert_eq(got, expected)
-
-
-def test_rowwise_ops_datetime_dtypes_pdbug():
-    data = {
-        "t1": pd.Series(
-            ["2020-08-01 09:00:00", "1920-05-01 10:30:00"],
-            dtype="<M8[ns]",
-        ),
-        "t2": pd.Series(["1940-08-31 06:00:00", pd.NaT], dtype="<M8[ns]"),
-    }
-    pdf = pd.DataFrame(data)
-    gdf = cudf.from_pandas(pdf)
-
-    expected = pdf.max(axis=1, skipna=False)
-    got = gdf.max(axis=1, skipna=False)
-
-    assert_eq(got, expected)
-
-
-@pytest.mark.parametrize(
-    "data",
-    [
-        [5.0, 6.0, 7.0],
-        "single value",
-        np.array(1, dtype="int64"),
-        np.array(0.6273643, dtype="float64"),
-    ],
-)
-def test_insert(data):
-    pdf = pd.DataFrame.from_dict({"A": [1, 2, 3], "B": ["a", "b", "c"]})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    # insertion by index
-
-    pdf.insert(0, "foo", data)
-    gdf.insert(0, "foo", data)
-
-    assert_eq(pdf, gdf)
-
-    pdf.insert(3, "bar", data)
-    gdf.insert(3, "bar", data)
-
-    assert_eq(pdf, gdf)
-
-    pdf.insert(1, "baz", data)
-    gdf.insert(1, "baz", data)
-
-    assert_eq(pdf, gdf)
-
-    # pandas insert doesn't support negative indexing
-    pdf.insert(len(pdf.columns), "qux", data)
-    gdf.insert(-1, "qux", data)
-
-    assert_eq(pdf, gdf)
-
-
-def test_insert_NA():
-    pdf = pd.DataFrame.from_dict({"A": [1, 2, 3], "B": ["a", "b", "c"]})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    pdf["C"] = pd.NA
-    gdf["C"] = cudf.NA
-    assert_eq(pdf, gdf)
-
-
-def test_cov():
-    gdf = cudf.datasets.randomdata(10)
-    pdf = gdf.to_pandas()
-
-    assert_eq(pdf.cov(), gdf.cov())
-
-
-@pytest_xfail(reason="cupy-based cov does not support nulls")
-def test_cov_nans():
-    pdf = pd.DataFrame()
-    pdf["a"] = [None, None, None, 2.00758632, None]
-    pdf["b"] = [0.36403686, None, None, None, None]
-    pdf["c"] = [None, None, None, 0.64882227, None]
-    pdf["d"] = [None, -1.46863125, None, 1.22477948, -0.06031689]
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf.cov(), gdf.cov())
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "psr",
-    [
-        pd.Series([4, 2, 3]),
-        pd.Series([4, 2, 3], index=["a", "b", "c"]),
-        pd.Series([4, 2, 3], index=["a", "b", "d"]),
-        pd.Series([4, 2], index=["a", "b"]),
-        pd.Series([4, 2, 3]),
-        pd.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
-    ],
-)
-@pytest.mark.parametrize("colnames", [["a", "b", "c"], [0, 1, 2]])
-@pytest.mark.parametrize(
-    "op",
-    [
-        operator.add,
-        operator.mul,
-        operator.floordiv,
-        operator.truediv,
-        operator.mod,
-        operator.pow,
-        operator.eq,
-        operator.lt,
-        operator.le,
-        operator.gt,
-        operator.ge,
-        operator.ne,
-    ],
-)
-def test_df_sr_binop(psr, colnames, op):
-    data = [[3.0, 2.0, 5.0], [3.0, None, 5.0], [6.0, 7.0, np.nan]]
-    data = dict(zip(colnames, data, strict=True))
-
-    gsr = cudf.Series.from_pandas(psr).astype("float64")
-
-    gdf = cudf.DataFrame(data)
-    pdf = gdf.to_pandas(nullable=True)
-
-    psr = gsr.to_pandas(nullable=True)
-
-    try:
-        expect = op(pdf, psr)
-    except ValueError:
-        with pytest.raises(ValueError):
-            op(gdf, gsr)
-        with pytest.raises(ValueError):
-            op(psr, pdf)
-        with pytest.raises(ValueError):
-            op(gsr, gdf)
-    else:
-        got = op(gdf, gsr).to_pandas(nullable=True)
-        assert_eq(expect, got, check_dtype=False, check_like=True)
-
-        expect = op(psr, pdf)
-        got = op(gsr, gdf).to_pandas(nullable=True)
-        assert_eq(expect, got, check_dtype=False, check_like=True)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "op",
-    [
-        operator.add,
-        operator.mul,
-        operator.floordiv,
-        operator.truediv,
-        operator.mod,
-        operator.pow,
-        # comparison ops will temporarily XFAIL
-        # see PR  https://github.com/rapidsai/cudf/pull/7491
-        pytest.param(operator.eq, marks=pytest_xfail()),
-        pytest.param(operator.lt, marks=pytest_xfail()),
-        pytest.param(operator.le, marks=pytest_xfail()),
-        pytest.param(operator.gt, marks=pytest_xfail()),
-        pytest.param(operator.ge, marks=pytest_xfail()),
-        pytest.param(operator.ne, marks=pytest_xfail()),
-    ],
-)
-def test_df_sr_binop_col_order(op):
-    colnames = [0, 1, 2]
-    data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
-    data = dict(zip(colnames, data, strict=True))
-
-    gdf = cudf.DataFrame(data)
-    pdf = pd.DataFrame.from_dict(data)
-
-    gsr = cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"])
-    psr = gsr.to_pandas()
-
-    with expect_warning_if(
-        op
-        in {
-            operator.eq,
-            operator.lt,
-            operator.le,
-            operator.gt,
-            operator.ge,
-            operator.ne,
-        },
-        FutureWarning,
-    ):
-        expect = op(pdf, psr).astype("float")
-    out = op(gdf, gsr).astype("float")
-    got = out[expect.columns]
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "list_input",
-    [
-        pytest.param([1, 2, 3, 4], id="smaller"),
-        pytest.param([1, 2, 3, 4, 5, 6], id="larger"),
-    ],
-)
-@pytest.mark.parametrize(
-    "key",
-    [
-        pytest.param("list_test", id="new_column"),
-        pytest.param("id", id="existing_column"),
-    ],
-)
-def test_setitem_diff_size_list(list_input, key):
-    gdf = cudf.datasets.randomdata(5)
-    with pytest.raises(
-        ValueError, match=("All columns must be of equal length")
-    ):
-        gdf[key] = list_input
-
-
-@pytest.mark.parametrize(
-    "data, index",
-    [
-        [[1, 2, 3, 4], None],
-        [[1, 2, 3, 4, 5, 6], None],
-        [[1, 2, 3], [4, 5, 6]],
-    ],
-)
-@pytest.mark.parametrize("klass", [pd.Series, cudf.Series])
-@pytest.mark.parametrize(
-    "key",
-    [
-        pytest.param("list_test", id="new_column"),
-        pytest.param("id", id="existing_column"),
-    ],
-)
-def test_setitem_diff_size_series(klass, data, index, key):
-    gdf = cudf.datasets.randomdata(5)
-    pdf = gdf.to_pandas()
-
-    series_input = klass(data, index=index)
-    pandas_input = series_input
-    if isinstance(pandas_input, cudf.Series):
-        pandas_input = pandas_input.to_pandas()
-
-    expect = pdf
-    expect[key] = pandas_input
-
-    got = gdf
-    got[key] = series_input
-
-    # Pandas uses NaN and typecasts to float64 if there's missing values on
-    # alignment, so need to typecast to float64 for equality comparison
-    expect = expect.astype("float64")
-    got = got.astype("float64")
-
-    assert_eq(expect, got)
-
-
-def test_tupleize_cols_False_set():
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    pdf[("a", "b")] = [1]
-    gdf[("a", "b")] = [1]
-    assert_eq(pdf, gdf)
-    assert_eq(pdf.columns, gdf.columns)
-
-
-@pytest.mark.parametrize(
-    "arg", [slice(2, 8, 3), slice(1, 20, 4), slice(-2, -6, -2)]
-)
-def test_dataframe_strided_slice(arg):
-    mul = pd.DataFrame(
-        {
-            "Index": [1, 2, 3, 4, 5, 6, 7, 8, 9],
-            "AlphaIndex": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
-        }
-    )
-    pdf = pd.DataFrame(
-        {"Val": [10, 9, 8, 7, 6, 5, 4, 3, 2]},
-        index=pd.MultiIndex.from_frame(mul),
-    )
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    expect = pdf[arg]
-    got = gdf[arg]
-
-    assert_eq(expect, got)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "col_data",
-    [
-        range(5),
-        ["a", "b", "x", "y", "z"],
-        [1.0, 0.213, 0.34332],
-        ["a"],
-        [1],
-        [0.2323],
-        [],
-    ],
-)
-@pytest.mark.parametrize(
-    "assign_val",
-    [
-        1,
-        2,
-        np.array(2),
-        cupy.array(2),
-        0.32324,
-        np.array(0.34248),
-        cupy.array(0.34248),
-        "abc",
-        np.array("abc", dtype="object"),
-        np.array("abc", dtype="str"),
-        np.array("abc"),
-        None,
-    ],
-)
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_dataframe_assign_scalar(request, col_data, assign_val):
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
-            and len(col_data) == 0,
-            reason="https://github.com/pandas-dev/pandas/issues/56679",
-        )
-    )
-    pdf = pd.DataFrame({"a": col_data})
-    gdf = cudf.DataFrame({"a": col_data})
-
-    pdf["b"] = (
-        cupy.asnumpy(assign_val)
-        if isinstance(assign_val, cupy.ndarray)
-        else assign_val
-    )
-    gdf["b"] = assign_val
-
-    assert_eq(pdf, gdf)
-
-
-@pytest_unmark_spilling
-@pytest.mark.parametrize(
-    "col_data",
-    [
-        1,
-        2,
-        np.array(2),
-        cupy.array(2),
-        0.32324,
-        np.array(0.34248),
-        cupy.array(0.34248),
-        "abc",
-        np.array("abc", dtype="object"),
-        np.array("abc", dtype="str"),
-        np.array("abc"),
-        None,
-    ],
-)
-@pytest.mark.parametrize(
-    "assign_val",
-    [
-        1,
-        2,
-        np.array(2),
-        cupy.array(2),
-        0.32324,
-        np.array(0.34248),
-        cupy.array(0.34248),
-        "abc",
-        np.array("abc", dtype="object"),
-        np.array("abc", dtype="str"),
-        np.array("abc"),
-        None,
-    ],
-)
-def test_dataframe_assign_scalar_with_scalar_cols(col_data, assign_val):
-    pdf = pd.DataFrame(
-        {
-            "a": cupy.asnumpy(col_data)
-            if isinstance(col_data, cupy.ndarray)
-            else col_data
-        },
-        index=["dummy_mandatory_index"],
-    )
-    gdf = cudf.DataFrame({"a": col_data}, index=["dummy_mandatory_index"])
-
-    pdf["b"] = (
-        cupy.asnumpy(assign_val)
-        if isinstance(assign_val, cupy.ndarray)
-        else assign_val
-    )
-    gdf["b"] = assign_val
-
-    assert_eq(pdf, gdf)

From 0762a3ca4a8f1c354aacf85f44312ae521eed5f9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 5 Sep 2025 21:19:21 -0700
Subject: [PATCH 275/366] Update strings split APIs with stream parameters
 (#19909)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19909
---
 .../libcudf/strings/split/partition.pxd       |  9 ++-
 .../pylibcudf/libcudf/strings/split/split.pxd | 27 ++++---
 .../pylibcudf/strings/split/partition.pxd     |  7 +-
 .../pylibcudf/strings/split/partition.pyi     | 14 +++-
 .../pylibcudf/strings/split/partition.pyx     | 28 ++++---
 .../pylibcudf/strings/split/split.pxd         | 35 ++++++---
 .../pylibcudf/strings/split/split.pyi         | 46 ++++++++++--
 .../pylibcudf/strings/split/split.pyx         | 74 ++++++++++++++-----
 8 files changed, 177 insertions(+), 63 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
index d1a2ddbaef4..16ec0dab6a6 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.table.table cimport table
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/split/partition.hpp" namespace \
@@ -13,8 +14,10 @@ cdef extern from "cudf/strings/split/partition.hpp" namespace \
 
     cdef unique_ptr[table] partition(
         column_view input,
-        string_scalar delimiter) except +libcudf_exception_handler
+        string_scalar delimiter,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[table] rpartition(
         column_view input,
-        string_scalar delimiter) except +libcudf_exception_handler
+        string_scalar delimiter,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
index 34fb72a3b33..cb4a40f8b3f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -8,6 +8,7 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/split/split.hpp" namespace \
@@ -16,22 +17,26 @@ cdef extern from "cudf/strings/split/split.hpp" namespace \
     cdef unique_ptr[table] split(
         column_view strings_column,
         string_scalar delimiter,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[table] rsplit(
         column_view strings_column,
         string_scalar delimiter,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] split_record(
         column_view strings,
         string_scalar delimiter,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rsplit_record(
         column_view strings,
         string_scalar delimiter,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
 
 cdef extern from "cudf/strings/split/split_re.hpp" namespace \
@@ -40,19 +45,23 @@ cdef extern from "cudf/strings/split/split_re.hpp" namespace \
     cdef unique_ptr[table] split_re(
         const column_view& input,
         regex_program prog,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[table] rsplit_re(
         const column_view& input,
         regex_program prog,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] split_record_re(
         const column_view& input,
         regex_program prog,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rsplit_record_re(
         const column_view& input,
         regex_program prog,
-        size_type maxsplit) except +libcudf_exception_handler
+        size_type maxsplit,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/strings/split/partition.pxd
index c18257a4787..c15a2c96bf2 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pxd
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.table cimport Table
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Table partition(Column input, Scalar delimiter=*)
+cpdef Table partition(Column input, Scalar delimiter=*, Stream stream=*)
 
-cpdef Table rpartition(Column input, Scalar delimiter=*)
+cpdef Table rpartition(Column input, Scalar delimiter=*, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyi b/python/pylibcudf/pylibcudf/strings/split/partition.pyi
index f19a463bd7e..af7f587803d 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyi
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyi
@@ -1,8 +1,18 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 
-def partition(input: Column, delimiter: Scalar | None = None) -> Table: ...
-def rpartition(input: Column, delimiter: Scalar | None = None) -> Table: ...
+def partition(
+    input: Column,
+    delimiter: Scalar | None = None,
+    stream: Stream | None = None,
+) -> Table: ...
+def rpartition(
+    input: Column,
+    delimiter: Scalar | None = None,
+    stream: Stream | None = None,
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
index 36337e82102..bec99581450 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
@@ -17,7 +17,7 @@ from cython.operator import dereference
 
 __all__ = ["partition", "rpartition"]
 
-cpdef Table partition(Column input, Scalar delimiter=None):
+cpdef Table partition(Column input, Scalar delimiter=None, Stream stream=None):
     """
     Returns a set of 3 columns by splitting each string using the
     specified delimiter.
@@ -41,23 +41,25 @@ cpdef Table partition(Column input, Scalar delimiter=None):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    cdef Stream stream
+    cdef Stream stream_local
+
+    stream_local = _get_stream(stream)
 
     if delimiter is None:
-        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view())
+            cpp_make_string_scalar("".encode(), stream_local.view())
         )
 
     with nogil:
         c_result = cpp_partition.partition(
             input.view(),
-            dereference(c_delimiter)
+            dereference(c_delimiter),
+            stream_local.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream_local)
 
-cpdef Table rpartition(Column input, Scalar delimiter=None):
+cpdef Table rpartition(Column input, Scalar delimiter=None, Stream stream=None):
     """
     Returns a set of 3 columns by splitting each string using the
     specified delimiter starting from the end of each string.
@@ -81,18 +83,20 @@ cpdef Table rpartition(Column input, Scalar delimiter=None):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    cdef Stream stream
+    cdef Stream stream_local
+
+    stream_local = _get_stream(stream)
 
     if delimiter is None:
-        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream.view())
+            cpp_make_string_scalar("".encode(), stream_local.view())
         )
 
     with nogil:
         c_result = cpp_partition.rpartition(
             input.view(),
-            dereference(c_delimiter)
+            dereference(c_delimiter),
+            stream_local.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream_local)
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/strings/split/split.pxd
index 355a1874298..c065e965fb1 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pxd
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pxd
@@ -1,24 +1,41 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Table split(Column strings_column, Scalar delimiter, size_type maxsplit)
+cpdef Table split(
+    Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*
+)
 
-cpdef Table rsplit(Column strings_column, Scalar delimiter, size_type maxsplit)
+cpdef Table rsplit(
+    Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*
+)
 
-cpdef Column split_record(Column strings, Scalar delimiter, size_type maxsplit)
+cpdef Column split_record(
+    Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*
+)
 
-cpdef Column rsplit_record(Column strings, Scalar delimiter, size_type maxsplit)
+cpdef Column rsplit_record(
+    Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*
+)
 
-cpdef Table split_re(Column input, RegexProgram prog, size_type maxsplit)
+cpdef Table split_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*
+)
 
-cpdef Table rsplit_re(Column input, RegexProgram prog, size_type maxsplit)
+cpdef Table rsplit_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*
+)
 
-cpdef Column split_record_re(Column input, RegexProgram prog, size_type maxsplit)
+cpdef Column split_record_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*
+)
 
-cpdef Column rsplit_record_re(Column input, RegexProgram prog, size_type maxsplit)
+cpdef Column rsplit_record_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyi b/python/pylibcudf/pylibcudf/strings/split/split.pyi
index 3ccf0bc2a01..86b57ae3f1c 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pyi
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pyi
@@ -1,27 +1,57 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.regex_program import RegexProgram
 from pylibcudf.table import Table
 
 def split(
-    strings_column: Column, delimiter: Scalar, maxsplit: int
+    strings_column: Column,
+    delimiter: Scalar,
+    maxsplit: int,
+    stream: Stream | None = None,
 ) -> Table: ...
 def rsplit(
-    strings_column: Column, delimiter: Scalar, maxsplit: int
+    strings_column: Column,
+    delimiter: Scalar,
+    maxsplit: int,
+    stream: Stream | None = None,
 ) -> Table: ...
 def split_record(
-    strings: Column, delimiter: Scalar, maxsplit: int
+    strings: Column,
+    delimiter: Scalar,
+    maxsplit: int,
+    stream: Stream | None = None,
 ) -> Column: ...
 def rsplit_record(
-    strings: Column, delimiter: Scalar, maxsplit: int
+    strings: Column,
+    delimiter: Scalar,
+    maxsplit: int,
+    stream: Stream | None = None,
 ) -> Column: ...
-def split_re(input: Column, prog: RegexProgram, maxsplit: int) -> Table: ...
-def rsplit_re(input: Column, prog: RegexProgram, maxsplit: int) -> Table: ...
+def split_re(
+    input: Column,
+    prog: RegexProgram,
+    maxsplit: int,
+    stream: Stream | None = None,
+) -> Table: ...
+def rsplit_re(
+    input: Column,
+    prog: RegexProgram,
+    maxsplit: int,
+    stream: Stream | None = None,
+) -> Table: ...
 def split_record_re(
-    input: Column, prog: RegexProgram, maxsplit: int
+    input: Column,
+    prog: RegexProgram,
+    maxsplit: int,
+    stream: Stream | None = None,
 ) -> Column: ...
 def rsplit_record_re(
-    input: Column, prog: RegexProgram, maxsplit: int
+    input: Column,
+    prog: RegexProgram,
+    maxsplit: int,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyx b/python/pylibcudf/pylibcudf/strings/split/split.pyx
index 90087f996f0..118636f141d 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
@@ -10,6 +10,8 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 
@@ -24,7 +26,9 @@ __all__ = [
     "split_record_re",
 ]
 
-cpdef Table split(Column strings_column, Scalar delimiter, size_type maxsplit):
+cpdef Table split(
+    Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=None
+):
     """
     Returns a list of columns by splitting each string using the
     specified delimiter.
@@ -43,6 +47,9 @@ cpdef Table split(Column strings_column, Scalar delimiter, size_type maxsplit):
         Maximum number of splits to perform. -1 indicates all possible
         splits on each string.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Table
@@ -52,18 +59,22 @@ cpdef Table split(Column strings_column, Scalar delimiter, size_type maxsplit):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.split(
             strings_column.view(),
             dereference(c_delimiter),
             maxsplit,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Table rsplit(Column strings_column, Scalar delimiter, size_type maxsplit):
+cpdef Table rsplit(
+    Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=None
+):
     """
     Returns a list of columns by splitting each string using the
     specified delimiter starting from the end of each string.
@@ -82,6 +93,9 @@ cpdef Table rsplit(Column strings_column, Scalar delimiter, size_type maxsplit):
         Maximum number of splits to perform. -1 indicates all possible
         splits on each string.
 
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Table
@@ -91,17 +105,21 @@ cpdef Table rsplit(Column strings_column, Scalar delimiter, size_type maxsplit):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.rsplit(
             strings_column.view(),
             dereference(c_delimiter),
             maxsplit,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
-cpdef Column split_record(Column strings, Scalar delimiter, size_type maxsplit):
+cpdef Column split_record(
+    Column strings, Scalar delimiter, size_type maxsplit, Stream stream=None
+):
     """
     Splits individual strings elements into a list of strings.
 
@@ -128,18 +146,22 @@ cpdef Column split_record(Column strings, Scalar delimiter, size_type maxsplit):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.split_record(
             strings.view(),
             dereference(c_delimiter),
             maxsplit,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column rsplit_record(Column strings, Scalar delimiter, size_type maxsplit):
+cpdef Column rsplit_record(
+    Column strings, Scalar delimiter, size_type maxsplit, Stream stream=None
+):
     """
     Splits individual strings elements into a list of strings starting
     from the end of each string.
@@ -167,18 +189,22 @@ cpdef Column rsplit_record(Column strings, Scalar delimiter, size_type maxsplit)
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.rsplit_record(
             strings.view(),
             dereference(c_delimiter),
             maxsplit,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Table split_re(Column input, RegexProgram prog, size_type maxsplit):
+cpdef Table split_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=None
+):
     """
     Splits strings elements into a table of strings columns
     using a regex_program's pattern to delimit each string.
@@ -203,17 +229,21 @@ cpdef Table split_re(Column input, RegexProgram prog, size_type maxsplit):
         A table of columns of strings.
     """
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.split_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
-cpdef Table rsplit_re(Column input, RegexProgram prog, size_type maxsplit):
+cpdef Table rsplit_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=None
+):
     """
     Splits strings elements into a table of strings columns
     using a regex_program's pattern to delimit each string starting from
@@ -239,17 +269,21 @@ cpdef Table rsplit_re(Column input, RegexProgram prog, size_type maxsplit):
         A table of columns of strings.
     """
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.rsplit_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
-cpdef Column split_record_re(Column input, RegexProgram prog, size_type maxsplit):
+cpdef Column split_record_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=None
+):
     """
     Splits strings elements into a list column of strings using the given
     regex_program to delimit each string.
@@ -274,17 +308,21 @@ cpdef Column split_record_re(Column input, RegexProgram prog, size_type maxsplit
         Lists column of strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.split_record_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column rsplit_record_re(Column input, RegexProgram prog, size_type maxsplit):
+cpdef Column rsplit_record_re(
+    Column input, RegexProgram prog, size_type maxsplit, Stream stream=None
+):
     """
     Splits strings elements into a list column of strings using the given
     regex_program to delimit each string starting from the end of the string.
@@ -309,12 +347,14 @@ cpdef Column rsplit_record_re(Column input, RegexProgram prog, size_type maxspli
         Lists column of strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_split.rsplit_record_re(
             input.view(),
             prog.c_obj.get()[0],
             maxsplit,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)

From dd86d39c28d4389e9d7fd4634fcc0602742c209b Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 8 Sep 2025 09:31:29 -0400
Subject: [PATCH 276/366] Support input filename in ndsh q01 benchmark (#19820)

Add support to supply an input filename to the ndsh q01 benchmark.
Note that the scale-factor will be ignored since that is used only for data-generation.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19820
---
 cpp/benchmarks/ndsh/q01.cpp | 64 ++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/cpp/benchmarks/ndsh/q01.cpp b/cpp/benchmarks/ndsh/q01.cpp
index 485e8e5497c..a77ab5dde12 100644
--- a/cpp/benchmarks/ndsh/q01.cpp
+++ b/cpp/benchmarks/ndsh/q01.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <cudf/binaryop.hpp>
 #include <cudf/column/column.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -67,17 +68,17 @@
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
 {
-  auto const one = cudf::numeric_scalar<double>(1);
+  auto const one = discount.type().id() == cudf::type_id::DECIMAL64
+                     ? cudf::make_fixed_point_scalar<numeric::decimal64>(1L, numeric::scale_type{0})
+                     : cudf::make_fixed_width_scalar<double>(1);
   auto const one_minus_discount =
-    cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr);
-  auto const disc_price_type = cudf::data_type{cudf::type_id::FLOAT64};
-  auto disc_price            = cudf::binary_operation(extendedprice,
-                                           one_minus_discount->view(),
-                                           cudf::binary_operator::MUL,
-                                           disc_price_type,
-                                           stream,
-                                           mr);
-  return disc_price;
+    cudf::binary_operation(*one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr);
+  return cudf::binary_operation(extendedprice,
+                                one_minus_discount->view(),
+                                cudf::binary_operator::MUL,
+                                discount.type(),
+                                stream,
+                                mr);
 }
 
 /**
@@ -94,17 +95,16 @@
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
 {
-  auto const one = cudf::numeric_scalar<double>(1);
+  auto const one = tax.type().id() == cudf::type_id::DECIMAL64
+                     ? cudf::make_fixed_point_scalar<numeric::decimal64>(1L, numeric::scale_type{0})
+                     : cudf::make_fixed_width_scalar<double>(1);
   auto const one_plus_tax =
-    cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr);
-  auto const charge_type = cudf::data_type{cudf::type_id::FLOAT64};
-  auto charge            = cudf::binary_operation(
-    disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, charge_type, stream, mr);
-  return charge;
+    cudf::binary_operation(*one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr);
+  return cudf::binary_operation(
+    disc_price, one_plus_tax->view(), cudf::binary_operator::MUL, tax.type(), stream, mr);
 }
 
-void run_ndsh_q1(nvbench::state& state,
-                 std::unordered_map<std::string, cuio_source_sink_pair>& sources)
+void run_ndsh_q1(nvbench::state& state, cudf::io::source_info const& source)
 {
   // Define the column projections and filter predicate for `lineitem` table
   std::vector<std::string> const lineitem_cols = {"l_returnflag",
@@ -124,8 +124,7 @@ void run_ndsh_q1(nvbench::state& state,
     cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal);
 
   // Read out the `lineitem` table from parquet file
-  auto lineitem = read_parquet(
-    sources.at("lineitem").make_source_info(), lineitem_cols, std::move(lineitem_pred));
+  auto lineitem = read_parquet(source, lineitem_cols, std::move(lineitem_pred));
 
   // Calculate the discount price and charge columns and append to lineitem table
   auto disc_price =
@@ -169,14 +168,27 @@ void run_ndsh_q1(nvbench::state& state,
 void ndsh_q1(nvbench::state& state)
 {
   // Generate the required parquet files in device buffers
-  double const scale_factor = state.get_float64("scale_factor");
+  auto const scale_factor = state.get_float64("scale_factor");
+  auto const filename     = state.get_string("filename");
+  if (!filename.empty() && scale_factor != 1.0) {
+    state.skip("Only scale_factor=1 supported with filename input");
+    return;
+  }
   std::unordered_map<std::string, cuio_source_sink_pair> sources;
-  generate_parquet_data_sources(scale_factor, {"lineitem"}, sources);
+  auto source = [&] {
+    if (filename.empty()) {
+      generate_parquet_data_sources(scale_factor, {"lineitem"}, sources);
+      return sources.at("lineitem").make_source_info();
+    }
+    return cudf::io::source_info(filename);
+  }();
 
   auto stream = cudf::get_default_stream();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
-  state.exec(nvbench::exec_tag::sync,
-             [&](nvbench::launch& launch) { run_ndsh_q1(state, sources); });
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_ndsh_q1(state, source); });
 }
 
-NVBENCH_BENCH(ndsh_q1).set_name("ndsh_q1").add_float64_axis("scale_factor", {0.01, 0.1, 1});
+NVBENCH_BENCH(ndsh_q1)
+  .set_name("ndsh_q1")
+  .add_string_axis("filename", {""})
+  .add_float64_axis("scale_factor", {0.01, 0.1, 1});

From d3052fa377f91605718eb75f879dcfac38cc39db Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 8 Sep 2025 08:02:44 -0700
Subject: [PATCH 277/366] Also use the CUDA 12 container for nightlies (#19917)

## Description
<!-- Provide a standalone description of changes in this PR. -->
<!-- Reference any issues closed by this PR with "closes #1234". -->
<!-- Note: The pull request title will be included in the CHANGELOG. -->
Nightly companion to https://github.com/rapidsai/cudf/pull/19903/

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 .github/workflows/test.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 340d68b805f..d3a800ccdb3 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -140,7 +140,8 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
-      container_image: "rapidsai/ci-conda:25.10-latest"
+      # TODO: Switch to ci-conda:25-10-latest when XGBoost has CUDA 13 packages
+      container_image: "rapidsai/ci-conda:25.10-cuda12.9.1-ubuntu24.04-py3.13"
       script: |
         ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
   wheel-tests-cudf-polars:

From dd31003530d70e803ef33d1633f6a262fbdef74d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 8 Sep 2025 08:35:01 -0700
Subject: [PATCH 278/366] Add stream support to all nvtext modules (#19911)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19911
---
 .../cudf/pandas/scripts/conftest-patch.py     |  1 +
 .../libcudf/nvtext/byte_pair_encode.pxd       |  9 ++-
 .../pylibcudf/libcudf/nvtext/deduplicate.pxd  | 10 ++-
 .../libcudf/nvtext/generate_ngrams.pxd        | 10 ++-
 .../pylibcudf/libcudf/nvtext/jaccard.pxd      |  6 +-
 .../pylibcudf/libcudf/nvtext/minhash.pxd      |  5 ++
 .../libcudf/nvtext/ngrams_tokenize.pxd        |  6 +-
 .../pylibcudf/libcudf/nvtext/normalize.pxd    | 10 ++-
 .../pylibcudf/libcudf/nvtext/replace.pxd      |  9 ++-
 .../pylibcudf/libcudf/nvtext/stemmer.pxd      | 12 ++-
 .../pylibcudf/libcudf/nvtext/tokenize.pxd     | 27 ++++---
 .../libcudf/nvtext/wordpiece_tokenize.pxd     |  7 +-
 .../pylibcudf/nvtext/byte_pair_encode.pxd     |  6 +-
 .../pylibcudf/nvtext/byte_pair_encode.pyi     | 11 ++-
 .../pylibcudf/nvtext/byte_pair_encode.pyx     | 16 ++--
 .../pylibcudf/nvtext/deduplicate.pxd          | 14 +++-
 .../pylibcudf/nvtext/deduplicate.pyi          | 12 ++-
 .../pylibcudf/nvtext/deduplicate.pyx          | 50 +++++++++---
 .../pylibcudf/nvtext/generate_ngrams.pxd      | 13 +++-
 .../pylibcudf/nvtext/generate_ngrams.pyi      | 12 ++-
 .../pylibcudf/nvtext/generate_ngrams.pyx      | 36 +++++++--
 python/pylibcudf/pylibcudf/nvtext/jaccard.pxd |  7 +-
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyi |  8 +-
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyx | 16 +++-
 python/pylibcudf/pylibcudf/nvtext/minhash.pxd | 13 +++-
 python/pylibcudf/pylibcudf/nvtext/minhash.pyi | 30 +++++++-
 python/pylibcudf/pylibcudf/nvtext/minhash.pyx | 44 ++++++++---
 .../pylibcudf/nvtext/ngrams_tokenize.pxd      |  6 +-
 .../pylibcudf/nvtext/ngrams_tokenize.pyi      | 10 ++-
 .../pylibcudf/nvtext/ngrams_tokenize.pyx      | 13 +++-
 .../pylibcudf/pylibcudf/nvtext/normalize.pxd  |  6 +-
 .../pylibcudf/pylibcudf/nvtext/normalize.pyi  | 17 ++++-
 .../pylibcudf/pylibcudf/nvtext/normalize.pyx  | 29 +++++--
 python/pylibcudf/pylibcudf/nvtext/replace.pxd |  7 +-
 python/pylibcudf/pylibcudf/nvtext/replace.pyi |  6 +-
 python/pylibcudf/pylibcudf/nvtext/replace.pyx | 22 ++++--
 python/pylibcudf/pylibcudf/nvtext/stemmer.pxd |  9 ++-
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyi | 13 +++-
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyx | 22 ++++--
 .../pylibcudf/pylibcudf/nvtext/tokenize.pxd   | 20 +++--
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyi   | 32 ++++++--
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyx   | 76 +++++++++++++------
 .../pylibcudf/nvtext/wordpiece_tokenize.pxd   |  4 +-
 .../pylibcudf/nvtext/wordpiece_tokenize.pyi   |  5 +-
 .../pylibcudf/nvtext/wordpiece_tokenize.pyx   | 18 +++--
 45 files changed, 521 insertions(+), 194 deletions(-)

diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index f3fd3ff2aa5..8e62622d7cb 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -14944,6 +14944,7 @@ def pytest_unconfigure(config):
     "tests/groupby/test_numeric_only.py::test_axis1_numeric_only[corrwith-None]",
     "tests/groupby/test_numeric_only.py::test_axis1_numeric_only[skew-False]",
     "tests/groupby/test_numeric_only.py::test_axis1_numeric_only[skew-None]",
+    "tests/groupby/test_raises.py::test_groupby_raises_category[by3-True-fillna-method]",
     "tests/indexing/test_loc.py::TestLocBaseIndependent::test_loc_copy_vs_view",
     "tests/io/json/test_pandas.py::TestPandasContainer::test_json_pandas_nulls[Decimal]",
     "tests/io/parser/test_unsupported.py::test_invalid_file_inputs[python]",
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
index c835c8249ca..927c96a5827 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil:
@@ -13,11 +14,13 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil:
         pass
 
     cdef unique_ptr[bpe_merge_pairs] load_merge_pairs(
-        const column_view &merge_pairs
+        const column_view &merge_pairs,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] byte_pair_encoding(
         const column_view &strings,
         const bpe_merge_pairs &merge_pairs,
-        const string_scalar &separator
+        const string_scalar &separator,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd
index 9ce0c69540f..70607a22e7a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd
@@ -6,6 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 from rmm.librmm.device_uvector cimport device_uvector
 
@@ -15,16 +16,19 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil:
 
     cdef suffix_array_type build_suffix_array(
         column_view source_strings,
-        size_type min_width) except +libcudf_exception_handler
+        size_type min_width,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] resolve_duplicates(
         column_view source_strings,
         column_view indices,
-        size_type min_width) except +libcudf_exception_handler
+        size_type min_width,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] resolve_duplicates_pair(
         column_view input1,
         column_view indices1,
         column_view input2,
         column_view indices2,
-        size_type min_width) except +libcudf_exception_handler
+        size_type min_width,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
index a62361bb190..a1ed975239a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil:
@@ -13,16 +14,19 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil:
     cdef unique_ptr[column] generate_ngrams(
         const column_view &strings,
         size_type ngrams,
-        const string_scalar & separator
+        const string_scalar & separator,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] generate_character_ngrams(
         const column_view &strings,
-        size_type ngrams
+        size_type ngrams,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] hash_character_ngrams(
         const column_view &strings,
         size_type ngrams,
-        uint32_t seed
+        uint32_t seed,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
index d40943f2649..7ce5e0ade68 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd
@@ -1,9 +1,10 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil:
@@ -11,5 +12,6 @@ cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil:
     cdef unique_ptr[column] jaccard_index(
         const column_view &input1,
         const column_view &input2,
-        size_type width
+        size_type width,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
index bfbb99e8eb0..592078e8763 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
@@ -16,6 +17,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const column_view &a,
         const column_view &b,
         const size_type width,
+        cuda_stream_view stream
     ) except +
 
     cdef unique_ptr[column] minhash64(
@@ -24,6 +26,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const column_view &a,
         const column_view &b,
         const size_type width,
+        cuda_stream_view stream
     ) except +
 
     cdef unique_ptr[column] minhash_ngrams(
@@ -32,6 +35,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const uint32_t seed,
         const column_view &a,
         const column_view &b,
+        cuda_stream_view stream
     ) except +
 
     cdef unique_ptr[column] minhash64_ngrams(
@@ -40,4 +44,5 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const uint64_t seed,
         const column_view &a,
         const column_view &b,
+        cuda_stream_view stream
     ) except +
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
index fae8fd1e59a..89fbf15023b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil:
@@ -13,5 +14,6 @@ cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil:
         const column_view & strings,
         size_type ngrams,
         const string_scalar & delimiter,
-        const string_scalar & separator
+        const string_scalar & separator,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
index 53f89d0257c..9b9af204890 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd
@@ -4,12 +4,14 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[column] normalize_spaces(
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef struct character_normalizer "nvtext::character_normalizer":
@@ -17,10 +19,12 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[character_normalizer] create_character_normalizer(
         bool do_lower_case,
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] normalize_characters(
         const column_view & strings,
-        const character_normalizer & normalizer
+        const character_normalizer & normalizer,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
index 82983aaf618..402e41f04d8 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil:
@@ -13,12 +14,14 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil:
         const column_view & strings,
         const column_view & targets,
         const column_view & replacements,
-        const string_scalar & delimiter
+        const string_scalar & delimiter,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] filter_tokens(
         const column_view & strings,
         size_type min_token_length,
         const string_scalar & replacement,
-        const string_scalar & delimiter
+        const string_scalar & delimiter,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
index 1f944d95701..5983d3e03f5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -6,6 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil:
@@ -14,17 +15,20 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil:
         VOWEL
 
     cdef unique_ptr[column] porter_stemmer_measure(
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_letter(
         column_view source_strings,
         letter_type ltype,
-        size_type character_index) except +libcudf_exception_handler
+        size_type character_index,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_letter(
         column_view source_strings,
         letter_type ltype,
-        column_view indices) except +libcudf_exception_handler
+        column_view indices,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
 ctypedef int32_t underlying_type_t_letter_type
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
index a8f9d0451bc..55fcd22fa51 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd
@@ -1,54 +1,63 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil:
 
     cdef unique_ptr[column] tokenize(
         const column_view & strings,
-        const string_scalar & delimiter
+        const string_scalar & delimiter,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] tokenize(
         const column_view & strings,
-        const column_view & delimiters
+        const column_view & delimiters,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_tokens(
         const column_view & strings,
-        const string_scalar & delimiter
+        const string_scalar & delimiter,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_tokens(
         const column_view & strings,
-        const column_view & delimiters
+        const column_view & delimiters,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] character_tokenize(
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] detokenize(
         const column_view & strings,
         const column_view & row_indices,
-        const string_scalar & separator
+        const string_scalar & separator,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef struct tokenize_vocabulary "nvtext::tokenize_vocabulary":
         pass
 
     cdef unique_ptr[tokenize_vocabulary] load_vocabulary(
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] tokenize_with_vocabulary(
         const column_view & strings,
         const tokenize_vocabulary & vocabulary,
         const string_scalar & delimiter,
-        size_type default_id
+        size_type default_id,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd
index 261b86aed47..7287fffd737 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd
@@ -4,6 +4,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil:
@@ -12,11 +13,13 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil:
         pass
 
     cdef unique_ptr[wordpiece_vocabulary] load_wordpiece_vocabulary(
-        const column_view & strings
+        const column_view & strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] wordpiece_tokenize(
         const column_view & strings,
         const wordpiece_vocabulary & vocabulary,
-        size_type max_tokens_per_row
+        size_type max_tokens_per_row,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd
index e4b93e96b9d..077a64965bb 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd
@@ -1,9 +1,10 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.byte_pair_encode cimport bpe_merge_pairs
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
 cdef class BPEMergePairs:
@@ -12,5 +13,6 @@ cdef class BPEMergePairs:
 cpdef Column byte_pair_encoding(
     Column input,
     BPEMergePairs merge_pairs,
-    Scalar separator=*
+    Scalar separator=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
index ca39aa16d7e..cdba4b8aca1 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
@@ -1,11 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class BPEMergePairs:
-    def __init__(self, merge_pairs: Column): ...
+    def __init__(self, merge_pairs: Column, stream: Stream | None = None): ...
 
 def byte_pair_encoding(
-    input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None
+    input: Column,
+    merge_pairs: BPEMergePairs,
+    separator: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
index ed962f76dc3..1e1b73600de 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
@@ -25,17 +25,19 @@ cdef class BPEMergePairs:
 
     For details, see :cpp:class:`cudf::nvtext::bpe_merge_pairs`.
     """
-    def __cinit__(self, Column merge_pairs):
+    def __cinit__(self, Column merge_pairs, Stream stream=None):
         cdef column_view c_pairs = merge_pairs.view()
+        stream = _get_stream(stream)
         with nogil:
-            self.c_obj = move(cpp_load_merge_pairs(c_pairs))
+            self.c_obj = move(cpp_load_merge_pairs(c_pairs, stream.view()))
 
     __hash__ = None
 
 cpdef Column byte_pair_encoding(
     Column input,
     BPEMergePairs merge_pairs,
-    Scalar separator=None
+    Scalar separator=None,
+    Stream stream=None
 ):
     """
     Byte pair encode the input strings.
@@ -50,6 +52,8 @@ cpdef Column byte_pair_encoding(
        Substrings to rebuild each string on.
     separator : Scalar
         String used to build the output after encoding. Default is a space.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -57,10 +61,9 @@ cpdef Column byte_pair_encoding(
         An encoded column of strings.
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if separator is None:
-        stream = _get_stream(None)
         separator = Scalar.from_libcudf(
             cpp_make_string_scalar(" ".encode(), stream.view())
         )
@@ -71,7 +74,8 @@ cpdef Column byte_pair_encoding(
                 input.view(),
                 dereference(merge_pairs.c_obj.get()),
                 dereference(<const string_scalar*>separator.c_obj.get()),
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd
index 429e20c1f66..b3c08787408 100644
--- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd
@@ -2,9 +2,17 @@
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
-cpdef Column build_suffix_array(Column input, size_type min_width)
-cpdef Column resolve_duplicates(Column input, Column indices, size_type min_width)
+cpdef Column build_suffix_array(Column input, size_type min_width, Stream stream=*)
+cpdef Column resolve_duplicates(
+    Column input, Column indices, size_type min_width, Stream stream=*
+)
 cpdef Column resolve_duplicates_pair(
-  Column input1, Column indices1, Column input2, Column indices2, size_type min_width
+    Column input1,
+    Column indices1,
+    Column input2,
+    Column indices2,
+    size_type min_width,
+    Stream stream=*,
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi
index 2a2a855c45d..61a127e20cd 100644
--- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi
@@ -1,10 +1,17 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
-def build_suffix_array(input: Column, min_width: int) -> Column: ...
+def build_suffix_array(
+    input: Column, min_width: int, stream: Stream | None = None
+) -> Column: ...
 def resolve_duplicates(
-    input: Column, indices: Column, min_width: int
+    input: Column,
+    indices: Column,
+    min_width: int,
+    stream: Stream | None = None,
 ) -> Column: ...
 def resolve_duplicates_pair(
     input1: Column,
@@ -12,4 +19,5 @@ def resolve_duplicates_pair(
     input2: Column,
     indices2: Column,
     min_width: int,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx
index f3a7e0cb55a..388c2fd4112 100644
--- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx
@@ -13,8 +13,10 @@ from pylibcudf.libcudf.nvtext.deduplicate cimport (
     resolve_duplicates_pair as cpp_resolve_duplicates_pair,
 )
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
 
 from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "build_suffix_array",
@@ -22,7 +24,9 @@ __all__ = [
     "resolve_duplicates_pair",
 ]
 
-cdef Column _column_from_suffix_array(cpp_suffix_array_type suffix_array):
+cdef Column _column_from_suffix_array(
+    cpp_suffix_array_type suffix_array, Stream stream
+):
     # helper to convert a suffix array to a Column
     return Column.from_libcudf(
         move(
@@ -31,11 +35,12 @@ cdef Column _column_from_suffix_array(cpp_suffix_array_type suffix_array):
                 device_buffer(),
                 0
             )
-        )
+        ),
+        stream
     )
 
 
-cpdef Column build_suffix_array(Column input, size_type min_width):
+cpdef Column build_suffix_array(Column input, size_type min_width, Stream stream=None):
     """
     Builds a suffix array for the input strings column.
     A suffix array is the indices of the sorted set of substrings
@@ -50,6 +55,8 @@ cpdef Column build_suffix_array(Column input, size_type min_width):
     ----------
     input : Column
         Strings column of text
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -57,14 +64,17 @@ cpdef Column build_suffix_array(Column input, size_type min_width):
         New column of suffix array
     """
     cdef cpp_suffix_array_type c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_build_suffix_array(input.view(), min_width)
+        c_result = cpp_build_suffix_array(input.view(), min_width, stream.view())
 
-    return _column_from_suffix_array(move(c_result))
+    return _column_from_suffix_array(move(c_result), stream)
 
 
-cpdef Column resolve_duplicates(Column input, Column indices, size_type min_width):
+cpdef Column resolve_duplicates(
+    Column input, Column indices, size_type min_width, Stream stream=None
+):
     """
     Returns duplicate strings found in the input column
     with min_width minimum number of bytes.
@@ -81,6 +91,8 @@ cpdef Column resolve_duplicates(Column input, Column indices, size_type min_widt
         Suffix array from :cpp:func:`build_suffix_array`
     min_width : size_type
         Minimum width of bytes to detect duplicates
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -88,15 +100,23 @@ cpdef Column resolve_duplicates(Column input, Column indices, size_type min_widt
         New column of duplicate strings
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_resolve_duplicates(input.view(), indices.view(), min_width)
+        c_result = cpp_resolve_duplicates(
+            input.view(), indices.view(), min_width, stream.view()
+        )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column resolve_duplicates_pair(
-    Column input1, Column indices1, Column input2, Column indices2, size_type min_width
+    Column input1,
+    Column indices1,
+    Column input2,
+    Column indices2,
+    size_type min_width,
+    Stream stream=None,
 ):
     """
     Returns duplicate strings in input1 found in input2
@@ -118,6 +138,8 @@ cpdef Column resolve_duplicates_pair(
         Suffix array from :cpp:func:`build_suffix_array` for input2
     min_width : size_type
         Minimum width of bytes to detect duplicates
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -126,10 +148,16 @@ cpdef Column resolve_duplicates_pair(
 
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_resolve_duplicates_pair(
-            input1.view(), indices1.view(), input2.view(), indices2.view(), min_width
+            input1.view(),
+            indices1.view(),
+            input2.view(),
+            indices2.view(),
+            min_width,
+            stream.view(),
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
index bbeb8f241a1..08329767fd4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
@@ -4,10 +4,17 @@ from libc.stdint cimport uint32_t
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator)
+cpdef Column generate_ngrams(
+    Column input, size_type ngrams, Scalar separator, Stream stream=*
+)
 
-cpdef Column generate_character_ngrams(Column input, size_type ngrams=*)
+cpdef Column generate_character_ngrams(
+    Column input, size_type ngrams=*, Stream stream=*
+)
 
-cpdef Column hash_character_ngrams(Column input, size_type ngrams, uint32_t seed)
+cpdef Column hash_character_ngrams(
+    Column input, size_type ngrams, uint32_t seed, Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
index a7d4da97d2a..1c45a77b4ec 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
@@ -1,10 +1,16 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 def generate_ngrams(
-    input: Column, ngrams: int, separator: Scalar
+    input: Column, ngrams: int, separator: Scalar, stream: Stream | None = None
+) -> Column: ...
+def generate_character_ngrams(
+    input: Column, ngrams: int = 2, stream: Stream | None = None
+) -> Column: ...
+def hash_character_ngrams(
+    input: Column, ngrams: int, seed: int, stream: Stream | None = None
 ) -> Column: ...
-def generate_character_ngrams(input: Column, ngrams: int = 2) -> Column: ...
-def hash_character_ngrams(input: Column, ngrams: int, seed: int) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
index 29da693e06f..53e0365b47b 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
@@ -14,6 +14,8 @@ from pylibcudf.libcudf.nvtext.generate_ngrams cimport (
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "generate_ngrams",
@@ -21,7 +23,9 @@ __all__ = [
     "hash_character_ngrams",
 ]
 
-cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator):
+cpdef Column generate_ngrams(
+    Column input, size_type ngrams, Scalar separator, Stream stream=None
+):
     """
     Returns a single column of strings by generating ngrams from a strings column.
 
@@ -35,6 +39,8 @@ cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator):
         The ngram number to generate
     separator : Scalar
         The string to use for separating ngram tokens
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -44,17 +50,21 @@ cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator):
     cdef column_view c_strings = input.view()
     cdef const string_scalar* c_separator = <const string_scalar*>separator.c_obj.get()
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_generate_ngrams(
             c_strings,
             ngrams,
-            c_separator[0]
+            c_separator[0],
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column generate_character_ngrams(Column input, size_type ngrams = 2):
+cpdef Column generate_character_ngrams(
+    Column input, size_type ngrams = 2, Stream stream=None
+):
     """
     Returns a lists column of ngrams of characters within each string.
 
@@ -66,6 +76,8 @@ cpdef Column generate_character_ngrams(Column input, size_type ngrams = 2):
         Input strings
     ngram : size_type
         The ngram number to generate
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -74,16 +86,20 @@ cpdef Column generate_character_ngrams(Column input, size_type ngrams = 2):
     """
     cdef column_view c_strings = input.view()
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_generate_character_ngrams(
             c_strings,
             ngrams,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column hash_character_ngrams(Column input, size_type ngrams, uint32_t seed):
+cpdef Column hash_character_ngrams(
+    Column input, size_type ngrams, uint32_t seed, Stream stream=None
+):
     """
     Returns a lists column of hash values of the characters in each string
 
@@ -97,6 +113,8 @@ cpdef Column hash_character_ngrams(Column input, size_type ngrams, uint32_t seed
         The ngram number to generate
     seed : uint32_t
         Seed used for the hash algorithm
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -105,11 +123,13 @@ cpdef Column hash_character_ngrams(Column input, size_type ngrams, uint32_t seed
     """
     cdef column_view c_strings = input.view()
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_hash_character_ngrams(
             c_strings,
             ngrams,
-            seed
+            seed,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
index a4d4a15335b..fe9d4f9420b 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
@@ -1,7 +1,10 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column jaccard_index(Column input1, Column input2, size_type width)
+cpdef Column jaccard_index(
+    Column input1, Column input2, size_type width, Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
index 18263c5c8fd..d6f8a95372b 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
@@ -1,5 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
-def jaccard_index(input1: Column, input2: Column, width: int) -> Column: ...
+def jaccard_index(
+    input1: Column, input2: Column, width: int, stream: Stream | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
index 90cace088f7..66a58a25e1b 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -9,10 +9,14 @@ from pylibcudf.libcudf.nvtext.jaccard cimport (
     jaccard_index as cpp_jaccard_index,
 )
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["jaccard_index"]
 
-cpdef Column jaccard_index(Column input1, Column input2, size_type width):
+cpdef Column jaccard_index(
+    Column input1, Column input2, size_type width, Stream stream=None
+):
     """
     Returns the Jaccard similarity between individual rows in two strings columns.
 
@@ -26,6 +30,8 @@ cpdef Column jaccard_index(Column input1, Column input2, size_type width):
         Input strings column
     width : size_type
         The ngram number to generate
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -35,12 +41,14 @@ cpdef Column jaccard_index(Column input1, Column input2, size_type width):
     cdef column_view c_input1 = input1.view()
     cdef column_view c_input2 = input2.view()
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_jaccard_index(
             c_input1,
             c_input2,
-            width
+            width,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
index f1e099ca7da..c51a9a8dfd2 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
@@ -4,6 +4,7 @@ from libc.stdint cimport uint32_t, uint64_t
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -14,7 +15,8 @@ cpdef Column minhash(
     uint32_t seed,
     Column a,
     Column b,
-    size_type width
+    size_type width,
+    Stream stream=*
 )
 
 cpdef Column minhash64(
@@ -22,7 +24,8 @@ cpdef Column minhash64(
     uint64_t seed,
     Column a,
     Column b,
-    size_type width
+    size_type width,
+    Stream stream=*
 )
 
 cpdef Column minhash_ngrams(
@@ -30,7 +33,8 @@ cpdef Column minhash_ngrams(
     size_type width,
     uint32_t seed,
     Column a,
-    Column b
+    Column b,
+    Stream stream=*
 )
 
 cpdef Column minhash64_ngrams(
@@ -38,5 +42,6 @@ cpdef Column minhash64_ngrams(
     size_type width,
     uint64_t seed,
     Column a,
-    Column b
+    Column b,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
index bb50a150798..f0f0e48929b 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
@@ -1,16 +1,38 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
 def minhash(
-    input: Column, seed: int, a: Column, b: Column, width: int
+    input: Column,
+    seed: int,
+    a: Column,
+    b: Column,
+    width: int,
+    stream: Stream | None = None,
 ) -> Column: ...
 def minhash64(
-    input: Column, seed: int, a: Column, b: Column, width: int
+    input: Column,
+    seed: int,
+    a: Column,
+    b: Column,
+    width: int,
+    stream: Stream | None = None,
 ) -> Column: ...
 def minhash_ngrams(
-    input: Column, ngrams: int, seed: int, a: Column, b: Column
+    input: Column,
+    ngrams: int,
+    seed: int,
+    a: Column,
+    b: Column,
+    stream: Stream | None = None,
 ) -> Column: ...
 def minhash64_ngrams(
-    input: Column, ngrams: int, seed: int, a: Column, b: Column
+    input: Column,
+    ngrams: int,
+    seed: int,
+    a: Column,
+    b: Column,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
index cdc4a4f3ac8..caedeb4833d 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
@@ -12,6 +12,8 @@ from pylibcudf.libcudf.nvtext.minhash cimport (
     minhash64_ngrams as cpp_minhash64_ngrams,
 )
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "minhash",
@@ -25,7 +27,8 @@ cpdef Column minhash(
     uint32_t seed,
     Column a,
     Column b,
-    size_type width
+    size_type width,
+    Stream stream=None
 ):
     """
     Returns the minhash values for each string.
@@ -52,6 +55,7 @@ cpdef Column minhash(
         List column of minhash values for each string per seed
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_minhash(
@@ -59,17 +63,19 @@ cpdef Column minhash(
             seed,
             a.view(),
             b.view(),
-            width
+            width,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column minhash64(
     Column input,
     uint64_t seed,
     Column a,
     Column b,
-    size_type width
+    size_type width,
+    Stream stream=None
 ):
     """
     Returns the minhash values for each string.
@@ -89,6 +95,8 @@ cpdef Column minhash64(
         2nd parameter value used for the minhash algorithm.
     width : size_type
         Character width used for apply substrings;
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -96,6 +104,7 @@ cpdef Column minhash64(
         List column of minhash values for each string per seed
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_minhash64(
@@ -103,17 +112,19 @@ cpdef Column minhash64(
             seed,
             a.view(),
             b.view(),
-            width
+            width,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column minhash_ngrams(
     Column input,
     size_type ngrams,
     uint32_t seed,
     Column a,
-    Column b
+    Column b,
+    Stream stream=None
 ):
     """
     Returns the minhash values for each input row of strings.
@@ -133,6 +144,8 @@ cpdef Column minhash_ngrams(
         1st parameter value used for the minhash algorithm.
     b : Column
         2nd parameter value used for the minhash algorithm.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -141,6 +154,7 @@ cpdef Column minhash_ngrams(
         value in columns a and b.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_minhash_ngrams(
@@ -148,17 +162,19 @@ cpdef Column minhash_ngrams(
             ngrams,
             seed,
             a.view(),
-            b.view()
+            b.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column minhash64_ngrams(
     Column input,
     size_type ngrams,
     uint64_t seed,
     Column a,
-    Column b
+    Column b,
+    Stream stream=None
 ):
     """
     Returns the minhash values for each input row of strings.
@@ -178,6 +194,8 @@ cpdef Column minhash64_ngrams(
         1st parameter value used for the minhash algorithm.
     b : Column
         2nd parameter value used for the minhash algorithm.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -186,6 +204,7 @@ cpdef Column minhash64_ngrams(
         value in columns a and b.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_minhash64_ngrams(
@@ -193,7 +212,8 @@ cpdef Column minhash64_ngrams(
             ngrams,
             seed,
             a.view(),
-            b.view()
+            b.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd
index 4f791ba1ee9..c44497f0b1d 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd
@@ -1,13 +1,15 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column ngrams_tokenize(
     Column input,
     size_type ngrams,
     Scalar delimiter,
-    Scalar separator
+    Scalar separator,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
index 224640ed44d..41c1bf695b3 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
@@ -1,8 +1,14 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 def ngrams_tokenize(
-    input: Column, ngrams: int, delimiter: Scalar, separator: Scalar
+    input: Column,
+    ngrams: int,
+    delimiter: Scalar,
+    separator: Scalar,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
index 771c7c019fc..656e9313049 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.memory cimport unique_ptr
@@ -11,6 +11,8 @@ from pylibcudf.libcudf.nvtext.ngrams_tokenize cimport (
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["ngrams_tokenize"]
 
@@ -18,7 +20,8 @@ cpdef Column ngrams_tokenize(
     Column input,
     size_type ngrams,
     Scalar delimiter,
-    Scalar separator
+    Scalar separator,
+    Stream stream=None
 ):
     """
     Returns a single column of strings by tokenizing the input strings column
@@ -37,6 +40,8 @@ cpdef Column ngrams_tokenize(
         An empty string will separate tokens using whitespace.
     separator : Scalar
         The string to use for separating ngram tokens
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -44,6 +49,7 @@ cpdef Column ngrams_tokenize(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_ngrams_tokenize(
@@ -51,5 +57,6 @@ cpdef Column ngrams_tokenize(
             ngrams,
             dereference(<const string_scalar*>delimiter.get()),
             dereference(<const string_scalar*>separator.get()),
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd
index 38e5d998a3d..cca7b1fac6e 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd
@@ -4,13 +4,15 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.normalize cimport character_normalizer
+from rmm.pylibrmm.stream cimport Stream
 
 cdef class CharacterNormalizer:
     cdef unique_ptr[character_normalizer] c_obj
 
-cpdef Column normalize_spaces(Column input)
+cpdef Column normalize_spaces(Column input, Stream stream=*)
 
 cpdef Column normalize_characters(
   Column input,
-  CharacterNormalizer normalizer
+  CharacterNormalizer normalizer,
+  Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
index 1ec4e946f30..0c1887fa6a0 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
@@ -1,11 +1,22 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
 class CharacterNormalizer:
-    def __init__(self, do_lower_case: bool, special_tokens: Column): ...
+    def __init__(
+        self,
+        do_lower_case: bool,
+        special_tokens: Column,
+        stream: Stream | None = None,
+    ): ...
 
-def normalize_spaces(input: Column) -> Column: ...
+def normalize_spaces(
+    input: Column, stream: Stream | None = None
+) -> Column: ...
 def normalize_characters(
-    input: Column, normalizer: CharacterNormalizer
+    input: Column,
+    normalizer: CharacterNormalizer,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
index 525b999379b..5f3347b3c83 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
@@ -8,6 +8,8 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.nvtext cimport normalize as cpp_normalize
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "CharacterNormalizer"
@@ -20,19 +22,21 @@ cdef class CharacterNormalizer:
 
     For details, see :cpp:class:`cudf::nvtext::character_normalizer`.
     """
-    def __cinit__(self, bool do_lower_case, Column tokens):
+    def __cinit__(self, bool do_lower_case, Column tokens, Stream stream=None):
         cdef column_view c_tokens = tokens.view()
+        stream = _get_stream(stream)
         with nogil:
             self.c_obj = move(
                 cpp_normalize.create_character_normalizer(
                     do_lower_case,
-                    c_tokens
+                    c_tokens,
+                    stream.view()
                 )
             )
 
     __hash__ = None
 
-cpdef Column normalize_spaces(Column input):
+cpdef Column normalize_spaces(Column input, Stream stream=None):
     """
     Returns a new strings column by normalizing the whitespace in
     each string in the input column.
@@ -43,6 +47,8 @@ cpdef Column normalize_spaces(Column input):
     ----------
     input : Column
         Input strings
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -50,14 +56,17 @@ cpdef Column normalize_spaces(Column input):
         New strings columns of normalized strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_normalize.normalize_spaces(input.view())
+        c_result = cpp_normalize.normalize_spaces(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column normalize_characters(Column input, CharacterNormalizer normalizer):
+cpdef Column normalize_characters(
+    Column input, CharacterNormalizer normalizer, Stream stream=None
+):
     """
     Normalizes strings characters for tokenizing.
 
@@ -69,6 +78,8 @@ cpdef Column normalize_characters(Column input, CharacterNormalizer normalizer):
         Input strings
     normalizer : CharacterNormalizer
         Normalizer object used for modifying the input column text
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -76,11 +87,13 @@ cpdef Column normalize_characters(Column input, CharacterNormalizer normalizer):
         Normalized strings column
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_normalize.normalize_characters(
             input.view(),
-            dereference(normalizer.c_obj.get())
+            dereference(normalizer.c_obj.get()),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/nvtext/replace.pxd
index 624f90e7486..6b118aaa7ed 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pxd
@@ -1,8 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column replace_tokens(
@@ -10,11 +11,13 @@ cpdef Column replace_tokens(
     Column targets,
     Column replacements,
     Scalar delimiter=*,
+    Stream stream=*
 )
 
 cpdef Column filter_tokens(
     Column input,
     size_type min_token_length,
     Scalar replacement=*,
-    Scalar delimiter=*
+    Scalar delimiter=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyi b/python/pylibcudf/pylibcudf/nvtext/replace.pyi
index 1f1ac72ce7c..2636ecdd1eb 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyi
@@ -1,4 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
@@ -8,10 +10,12 @@ def replace_tokens(
     targets: Column,
     replacements: Column,
     delimiter: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
 def filter_tokens(
     input: Column,
     min_token_length: int,
     replacement: Scalar | None = None,
     delimiter: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
index 9464ef7324f..dc81c8e975c 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
@@ -25,6 +25,7 @@ cpdef Column replace_tokens(
     Column targets,
     Column replacements,
     Scalar delimiter=None,
+    Stream stream=None
 ):
     """
     Replaces specified tokens with corresponding replacement strings.
@@ -42,6 +43,8 @@ cpdef Column replace_tokens(
     delimiter : Scalar, optional
         Characters used to separate each string into tokens.
         The default of empty string will identify tokens using whitespace.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -49,9 +52,8 @@ cpdef Column replace_tokens(
         New strings column with replaced strings
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
     if delimiter is None:
-        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -61,15 +63,17 @@ cpdef Column replace_tokens(
             targets.view(),
             replacements.view(),
             dereference(<const string_scalar*>delimiter.get()),
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column filter_tokens(
     Column input,
     size_type min_token_length,
     Scalar replacement=None,
-    Scalar delimiter=None
+    Scalar delimiter=None,
+    Stream stream=None
 ):
     """
     Removes tokens whose lengths are less than a specified number of characters.
@@ -88,20 +92,21 @@ cpdef Column filter_tokens(
     delimiter : Scalar, optional
         Characters used to separate each string into tokens.
         The default of empty string will identify tokens using whitespace.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
     Returns
     -------
     Column
         New strings column of filtered strings
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
     if delimiter is None:
-        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
     if replacement is None:
-        stream = _get_stream(None)
         replacement = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -112,6 +117,7 @@ cpdef Column filter_tokens(
             min_token_length,
             dereference(<const string_scalar*>replacement.get()),
             dereference(<const string_scalar*>delimiter.get()),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd
index 48762efc01f..2be056f9c00 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd
@@ -1,14 +1,17 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.stemmer cimport letter_type
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrSize:
     Column
     size_type
 
-cpdef Column is_letter(Column input, bool check_vowels, ColumnOrSize indices)
+cpdef Column is_letter(
+    Column input, bool check_vowels, ColumnOrSize indices, Stream stream=*
+)
 
-cpdef Column porter_stemmer_measure(Column input)
+cpdef Column porter_stemmer_measure(Column input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
index d6ba1d189bd..ecec50be550 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
@@ -1,8 +1,15 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
 def is_letter(
-    input: Column, check_vowels: bool, indices: Column | int
+    input: Column,
+    check_vowels: bool,
+    indices: Column | int,
+    stream: Stream | None = None,
+) -> Column: ...
+def porter_stemmer_measure(
+    input: Column, stream: Stream | None = None
 ) -> Column: ...
-def porter_stemmer_measure(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
index a64e414b850..33b5f72fe4b 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
@@ -11,15 +11,18 @@ from pylibcudf.libcudf.nvtext.stemmer cimport (
     porter_stemmer_measure as cpp_porter_stemmer_measure,
 )
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
 
 from pylibcudf.libcudf.nvtext.stemmer import letter_type as LetterType # no-cython-lint
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["is_letter", "porter_stemmer_measure", "LetterType"]
 
 cpdef Column is_letter(
     Column input,
     bool check_vowels,
-    ColumnOrSize indices
+    ColumnOrSize indices,
+    Stream stream=None
 ):
     """
     Returns boolean column indicating if the character
@@ -37,6 +40,8 @@ cpdef Column is_letter(
         for consonants.
     indices : Union[Column, size_type]
         The character position(s) to check in each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -44,18 +49,20 @@ cpdef Column is_letter(
         New boolean column.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_is_letter(
             input.view(),
             letter_type.VOWEL if check_vowels else letter_type.CONSONANT,
-            indices if ColumnOrSize is size_type else indices.view()
+            indices if ColumnOrSize is size_type else indices.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column porter_stemmer_measure(Column input):
+cpdef Column porter_stemmer_measure(Column input, Stream stream=None):
     """
     Returns the Porter Stemmer measurements of a strings column.
 
@@ -65,6 +72,8 @@ cpdef Column porter_stemmer_measure(Column input):
     ----------
     input : Column
         Strings column of words to measure
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -72,10 +81,11 @@ cpdef Column porter_stemmer_measure(Column input):
         New column of measure values
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_porter_stemmer_measure(input.view())
+        c_result = cpp_porter_stemmer_measure(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 LetterType.__str__ = LetterType.__repr__
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd
index 0aed9702d61..1c5e439beed 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd
@@ -1,29 +1,33 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.tokenize cimport tokenize_vocabulary
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 cdef class TokenizeVocabulary:
     cdef unique_ptr[tokenize_vocabulary] c_obj
 
-cpdef Column tokenize_scalar(Column input, Scalar delimiter=*)
+cpdef Column tokenize_scalar(Column input, Scalar delimiter=*, Stream stream=*)
 
-cpdef Column tokenize_column(Column input, Column delimiters)
+cpdef Column tokenize_column(Column input, Column delimiters, Stream stream=*)
 
-cpdef Column count_tokens_scalar(Column input, Scalar delimiter=*)
+cpdef Column count_tokens_scalar(Column input, Scalar delimiter=*, Stream stream=*)
 
-cpdef Column count_tokens_column(Column input, Column delimiters)
+cpdef Column count_tokens_column(Column input, Column delimiters, Stream stream=*)
 
-cpdef Column character_tokenize(Column input)
+cpdef Column character_tokenize(Column input, Stream stream=*)
 
-cpdef Column detokenize(Column input, Column row_indices, Scalar separator=*)
+cpdef Column detokenize(
+    Column input, Column row_indices, Scalar separator=*, Stream stream=*
+)
 
 cpdef Column tokenize_with_vocabulary(
     Column input,
     TokenizeVocabulary vocabulary,
     Scalar delimiter,
-    size_type default_id=*
+    size_type default_id=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
index b9aa2393514..5216af399db 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
@@ -1,26 +1,42 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class TokenizeVocabulary:
-    def __init__(self, vocab: Column): ...
+    def __init__(self, vocab: Column, stream: Stream | None = None): ...
 
 def tokenize_scalar(
-    input: Column, delimiter: Scalar | None = None
+    input: Column,
+    delimiter: Scalar | None = None,
+    stream: Stream | None = None,
+) -> Column: ...
+def tokenize_column(
+    input: Column, delimiters: Column, stream: Stream | None = None
 ) -> Column: ...
-def tokenize_column(input: Column, delimiters: Column) -> Column: ...
 def count_tokens_scalar(
-    input: Column, delimiter: Scalar | None = None
+    input: Column,
+    delimiter: Scalar | None = None,
+    stream: Stream | None = None,
+) -> Column: ...
+def count_tokens_column(
+    input: Column, delimiters: Column, stream: Stream | None = None
+) -> Column: ...
+def character_tokenize(
+    input: Column, stream: Stream | None = None
 ) -> Column: ...
-def count_tokens_column(input: Column, delimiters: Column) -> Column: ...
-def character_tokenize(input: Column) -> Column: ...
 def detokenize(
-    input: Column, row_indices: Column, separator: Scalar | None = None
+    input: Column,
+    row_indices: Column,
+    separator: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
 def tokenize_with_vocabulary(
     input: Column,
     vocabulary: TokenizeVocabulary,
     delimiter: Scalar,
     default_id: int = -1,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
index 1d49c828df7..736b8b84fbd 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
@@ -39,14 +39,15 @@ cdef class TokenizeVocabulary:
 
     For details, see :cpp:class:`cudf::nvtext::tokenize_vocabulary`.
     """
-    def __cinit__(self, Column vocab):
+    def __cinit__(self, Column vocab, Stream stream=None):
         cdef column_view c_vocab = vocab.view()
+        stream = _get_stream(stream)
         with nogil:
-            self.c_obj = move(cpp_load_vocabulary(c_vocab))
+            self.c_obj = move(cpp_load_vocabulary(c_vocab, stream.view()))
 
     __hash__ = None
 
-cpdef Column tokenize_scalar(Column input, Scalar delimiter=None):
+cpdef Column tokenize_scalar(Column input, Scalar delimiter=None, Stream stream=None):
     """
     Returns a single column of strings by tokenizing the input
     strings column using the provided characters as delimiters.
@@ -59,6 +60,8 @@ cpdef Column tokenize_scalar(Column input, Scalar delimiter=None):
         Strings column to tokenize
     delimiter : Scalar
         String scalar used to separate individual strings into tokens
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -66,10 +69,9 @@ cpdef Column tokenize_scalar(Column input, Scalar delimiter=None):
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if delimiter is None:
-        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -78,11 +80,12 @@ cpdef Column tokenize_scalar(Column input, Scalar delimiter=None):
         c_result = cpp_tokenize(
             input.view(),
             dereference(<const string_scalar*>delimiter.c_obj.get()),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column tokenize_column(Column input, Column delimiters):
+cpdef Column tokenize_column(Column input, Column delimiters, Stream stream=None):
     """
     Returns a single column of strings by tokenizing the input
     strings column using multiple strings as delimiters.
@@ -95,6 +98,8 @@ cpdef Column tokenize_column(Column input, Column delimiters):
         Strings column to tokenize
     delimiters : Column
         Strings column used to separate individual strings into tokens
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -102,16 +107,20 @@ cpdef Column tokenize_column(Column input, Column delimiters):
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_tokenize(
             input.view(),
             delimiters.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None):
+cpdef Column count_tokens_scalar(
+    Column input, Scalar delimiter=None, Stream stream=None
+):
     """
     Returns the number of tokens in each string of a strings column
     using the provided characters as delimiters.
@@ -124,6 +133,8 @@ cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None):
         Strings column to count tokens
     delimiters : Scalar
         String scalar used to separate each string into tokens
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -131,10 +142,9 @@ cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None):
         New column of token counts
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if delimiter is None:
-        stream = _get_stream(None)
         delimiter = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -143,11 +153,12 @@ cpdef Column count_tokens_scalar(Column input, Scalar delimiter=None):
         c_result = cpp_count_tokens(
             input.view(),
             dereference(<const string_scalar*>delimiter.c_obj.get()),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column count_tokens_column(Column input, Column delimiters):
+cpdef Column count_tokens_column(Column input, Column delimiters, Stream stream=None):
     """
     Returns the number of tokens in each string of a strings column
     using multiple strings as delimiters.
@@ -160,6 +171,8 @@ cpdef Column count_tokens_column(Column input, Column delimiters):
         Strings column to count tokens
     delimiters : Column
         Strings column used to separate each string into tokens
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -167,16 +180,18 @@ cpdef Column count_tokens_column(Column input, Column delimiters):
         New column of token counts
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_count_tokens(
             input.view(),
             delimiters.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column character_tokenize(Column input):
+cpdef Column character_tokenize(Column input, Stream stream=None):
     """
     Returns a single column of strings by converting
     each character to a string.
@@ -187,6 +202,8 @@ cpdef Column character_tokenize(Column input):
     ----------
     input : Column
         Strings column to tokenize
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -194,15 +211,17 @@ cpdef Column character_tokenize(Column input):
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_character_tokenize(input.view())
+        c_result = cpp_character_tokenize(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column detokenize(
     Column input,
     Column row_indices,
-    Scalar separator=None
+    Scalar separator=None,
+    Stream stream=None
 ):
     """
     Creates a strings column from a strings column of tokens
@@ -218,6 +237,8 @@ cpdef Column detokenize(
         The relative output row index assigned for each token in the input column
     separator : Scalar
         String to append after concatenating each token to the proper output row
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -225,10 +246,9 @@ cpdef Column detokenize(
         New strings columns of tokens
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if separator is None:
-        stream = _get_stream(None)
         separator = Scalar.from_libcudf(
             cpp_make_string_scalar(" ".encode(), stream.view())
         )
@@ -237,16 +257,18 @@ cpdef Column detokenize(
         c_result = cpp_detokenize(
             input.view(),
             row_indices.view(),
-            dereference(<const string_scalar*>separator.c_obj.get())
+            dereference(<const string_scalar*>separator.c_obj.get()),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column tokenize_with_vocabulary(
     Column input,
     TokenizeVocabulary vocabulary,
     Scalar delimiter,
-    size_type default_id=-1
+    size_type default_id=-1,
+    Stream stream=None
 ):
     """
     Returns the token ids for the input string by looking
@@ -264,6 +286,8 @@ cpdef Column tokenize_with_vocabulary(
         Used to identify tokens within ``input``
     default_id : size_type
         The token id to be used for tokens not found in the vocabulary; Default is -1
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -271,13 +295,15 @@ cpdef Column tokenize_with_vocabulary(
         Lists column of token ids
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_tokenize_with_vocabulary(
             input.view(),
             dereference(vocabulary.c_obj.get()),
             dereference(<const string_scalar*>delimiter.c_obj.get()),
-            default_id
+            default_id,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd
index 75e86770e0f..d82a1e077af 100644
--- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd
@@ -4,6 +4,7 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.nvtext.wordpiece_tokenize cimport wordpiece_vocabulary
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 cdef class WordPieceVocabulary:
     cdef unique_ptr[wordpiece_vocabulary] c_obj
@@ -11,5 +12,6 @@ cdef class WordPieceVocabulary:
 cpdef Column wordpiece_tokenize(
     Column input,
     WordPieceVocabulary vocabulary,
-    size_type max_words_per_row
+    size_type max_words_per_row,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi
index 274bc31f316..e45582532be 100644
--- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi
@@ -1,12 +1,15 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 
 class WordPieceVocabulary:
-    def __init__(self, vocab: Column): ...
+    def __init__(self, vocab: Column, stream: Stream | None = None): ...
 
 def wordpiece_tokenize(
     input: Column,
     vocabulary: WordPieceVocabulary,
     max_words_per_row: int,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx
index 6ac543c0b96..34d85e6ddfd 100644
--- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx
@@ -11,6 +11,8 @@ from pylibcudf.libcudf.nvtext.wordpiece_tokenize cimport (
     wordpiece_tokenize as cpp_wordpiece_tokenize,
 )
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = [
     "WordPieceVocabulary",
@@ -22,17 +24,19 @@ cdef class WordPieceVocabulary:
 
     For details, see :cpp:class:`cudf::nvtext::wordpiece_tokenize`.
     """
-    def __cinit__(self, Column vocab):
+    def __cinit__(self, Column vocab, Stream stream=None):
         cdef column_view c_vocab = vocab.view()
+        stream = _get_stream(stream)
         with nogil:
-            self.c_obj = move(cpp_load_wordpiece_vocabulary(c_vocab))
+            self.c_obj = move(cpp_load_wordpiece_vocabulary(c_vocab, stream.view()))
 
     __hash__ = None
 
 cpdef Column wordpiece_tokenize(
     Column input,
     WordPieceVocabulary vocabulary,
-    size_type max_words_per_row
+    size_type max_words_per_row,
+    Stream stream=None
 ):
     """
     Returns the token ids for the input string by looking
@@ -49,6 +53,8 @@ cpdef Column wordpiece_tokenize(
         Used to lookup tokens within ``input``
     max_words_per_row : size_type
         Maximum number of words to tokenize per input row
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -56,12 +62,14 @@ cpdef Column wordpiece_tokenize(
         Lists column of token ids
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_wordpiece_tokenize(
             input.view(),
             dereference(vocabulary.c_obj.get()),
-            max_words_per_row
+            max_words_per_row,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)

From aaf9d4dff9900e921691b080a4e4911ba14da0a8 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 8 Sep 2025 09:18:28 -0700
Subject: [PATCH 279/366] Add streams to io/timezone and io/text modules
 (#19913)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19913
---
 python/pylibcudf/pylibcudf/io/text.pxd           |  6 ++++--
 python/pylibcudf/pylibcudf/io/text.pyx           | 16 ++++++++++++----
 python/pylibcudf/pylibcudf/io/timezone.pxd       |  7 +++++--
 python/pylibcudf/pylibcudf/io/timezone.pyi       |  3 ++-
 python/pylibcudf/pylibcudf/io/timezone.pyx       | 16 ++++++++++++----
 python/pylibcudf/pylibcudf/libcudf/io/text.pxd   |  6 ++++--
 .../pylibcudf/pylibcudf/libcudf/io/timezone.pxd  |  6 ++++--
 7 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/io/text.pxd b/python/pylibcudf/pylibcudf/io/text.pxd
index 051e9bc0cde..301e1e2c273 100644
--- a/python/pylibcudf/pylibcudf/io/text.pxd
+++ b/python/pylibcudf/pylibcudf/io/text.pxd
@@ -1,8 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.column cimport Column
+from pylibcudf.io.types cimport Stream
 from pylibcudf.libcudf.io.text cimport parse_options, data_chunk_source
 
 cdef class ParseOptions:
@@ -16,7 +17,8 @@ cdef class DataChunkSource:
 cpdef Column multibyte_split(
     DataChunkSource source,
     str delimiter,
-    ParseOptions options=*
+    ParseOptions options=*,
+    Stream stream=*
 )
 
 cpdef DataChunkSource make_source(str data)
diff --git a/python/pylibcudf/pylibcudf/io/text.pyx b/python/pylibcudf/pylibcudf/io/text.pyx
index d3cbdc4cd60..84350aeffa1 100644
--- a/python/pylibcudf/pylibcudf/io/text.pyx
+++ b/python/pylibcudf/pylibcudf/io/text.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libc.stdint cimport uint64_t
@@ -7,6 +7,8 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from pylibcudf.column cimport Column
+from pylibcudf.utils cimport _get_stream
+from pylibcudf.io.types cimport Stream
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.io cimport text as cpp_text
 
@@ -159,7 +161,8 @@ cpdef DataChunkSource make_source_from_bgzip_file(
 cpdef Column multibyte_split(
     DataChunkSource source,
     str delimiter,
-    ParseOptions options=None
+    ParseOptions options=None,
+    Stream stream=None
 ):
     """
     Splits the source text into a strings column using a multiple byte delimiter.
@@ -177,6 +180,9 @@ cpdef Column multibyte_split(
     options : ParseOptions
         The parsing options to use (including byte range).
 
+    stream : Stream, optional
+        CUDA stream for device memory operations and kernel launches
+
     Returns
     -------
     Column
@@ -186,6 +192,7 @@ cpdef Column multibyte_split(
     cdef unique_ptr[column] c_result
     cdef unique_ptr[data_chunk_source] c_source = move(source.c_source)
     cdef string c_delimiter = delimiter.encode()
+    stream = _get_stream(stream)
 
     if options is None:
         options = ParseOptions()
@@ -196,7 +203,8 @@ cpdef Column multibyte_split(
         c_result = cpp_text.multibyte_split(
             dereference(c_source),
             c_delimiter,
-            c_options
+            c_options,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/io/timezone.pxd
index 2aa755dbbd8..56b67f25b82 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pxd
+++ b/python/pylibcudf/pylibcudf/io/timezone.pxd
@@ -1,6 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from ..table cimport Table
+from .types cimport Stream
 
 
-cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name)
+cpdef Table make_timezone_transition_table(
+    str tzif_dir, str timezone_name, Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyi b/python/pylibcudf/pylibcudf/io/timezone.pyi
index 0582800c4af..f20d7d831dd 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pyi
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyi
@@ -1,7 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from pylibcudf.io.types import Stream
 from pylibcudf.table import Table
 
 def make_timezone_transition_table(
-    tzif_dir: str, timezone_name: str
+    tzif_dir: str, timezone_name: str, stream: Stream | None = None
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx
index af7cf8a4ee5..51dab9a08af 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pyx
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.optional cimport make_optional
@@ -9,11 +9,15 @@ from pylibcudf.libcudf.io.timezone cimport (
 )
 from pylibcudf.libcudf.table.table cimport table
 
+from ..utils cimport _get_stream
 from ..table cimport Table
+from .types cimport Stream
 
 __all__ = ["make_timezone_transition_table"]
 
-cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
+cpdef Table make_timezone_transition_table(
+    str tzif_dir, str timezone_name, Stream stream=None
+):
     """
     Creates a transition table to convert ORC timestamps to UTC.
 
@@ -23,6 +27,8 @@ cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
         The directory where the TZif files are located
     timezone_name : str
         standard timezone name
+    stream : Stream, optional
+        CUDA stream for device memory operations and kernel launches
 
     Returns
     -------
@@ -32,11 +38,13 @@ cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
     cdef unique_ptr[table] c_result
     cdef string c_tzdir = tzif_dir.encode()
     cdef string c_tzname = timezone_name.encode()
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_make_timezone_transition_table(
             make_optional[string](c_tzdir),
-            c_tzname
+            c_tzname,
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
index b49fede21b3..9f2a544eee5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libc.stdint cimport uint64_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/io/text/byte_range_info.hpp" \
@@ -56,5 +57,6 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \
     unique_ptr[column] multibyte_split(
         data_chunk_source source,
         string delimiter,
-        parse_options options
+        parse_options options,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
index b59692ebdac..21c8e8ea199 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd
@@ -1,14 +1,16 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.optional cimport optional
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.table.table cimport table
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil:
     unique_ptr[table] make_timezone_transition_table(
         optional[string] tzif_dir,
-        string timezone_name
+        string timezone_name,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler

From 360ad8eeb9ef4a4b213877a48a2da0c685847987 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 8 Sep 2025 10:19:56 -0700
Subject: [PATCH 280/366] Coalesce IO of chunks with different compression when
 reading Parquet files (#19884)

issue https://github.com/rapidsai/cudf/issues/17313

Parquet reader used to deallocate compressed chunks before decode to reduce memory use. Because of this, IO of compressed and uncompressed chunks is not coalesced together.

Since we introduced chunking in the reader, the compressed chunks are not deallocated early and we can safely coalesce across chunks regardless of their compression.

This PR removes the compression condition when coalescing IO, leading to better IO performance for files with mixed chunks.

If we decide to go back to deallocating the compressed chunks early, we need to restore this condition.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Basit Ayantunde (https://github.com/lamarrr)

URL: https://github.com/rapidsai/cudf/pull/19884
---
 .../io/parquet/reader_impl_preprocess_utils.cu    | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess_utils.cu b/cpp/src/io/parquet/reader_impl_preprocess_utils.cu
index 6029d7ff538..ccd469d965f 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess_utils.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess_utils.cu
@@ -187,18 +187,13 @@ void generate_depth_remappings(
   // Transfer chunk data, coalescing adjacent chunks
   std::vector<std::future<size_t>> read_tasks;
   for (size_t chunk = begin_chunk; chunk < end_chunk;) {
-    size_t const io_offset   = column_chunk_offsets[chunk];
-    size_t io_size           = chunks[chunk].compressed_size;
-    size_t next_chunk        = chunk + 1;
-    bool const is_compressed = (chunks[chunk].codec != Compression::UNCOMPRESSED);
+    size_t const io_offset = column_chunk_offsets[chunk];
+    size_t io_size         = chunks[chunk].compressed_size;
+    size_t next_chunk      = chunk + 1;
     while (next_chunk < end_chunk) {
-      size_t const next_offset      = column_chunk_offsets[next_chunk];
-      bool const is_next_compressed = (chunks[next_chunk].codec != Compression::UNCOMPRESSED);
-      if (next_offset != io_offset + io_size || is_next_compressed != is_compressed ||
+      size_t const next_offset = column_chunk_offsets[next_chunk];
+      if (next_offset != io_offset + io_size ||
           chunk_source_map[chunk] != chunk_source_map[next_chunk]) {
-        // Can't merge if not contiguous or mixing compressed and uncompressed
-        // Not coalescing uncompressed with compressed chunks is so that compressed buffers can be
-        // freed earlier (immediately after decompression stage) to limit peak memory requirements
         break;
       }
       io_size += chunks[next_chunk].compressed_size;

From 31a29fa7c830f9f482cc02b8a9148dce8bca807b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 8 Sep 2025 10:30:37 -0700
Subject: [PATCH 281/366] Add streams to all top-level strings modules (#19910)

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19910
---
 python/pylibcudf/pylibcudf/io/json.pyx        |  3 +-
 .../pylibcudf/libcudf/strings/attributes.pxd  | 12 ++--
 .../pylibcudf/libcudf/strings/capitalize.pxd  | 12 ++--
 .../pylibcudf/libcudf/strings/case.pxd        | 10 ++-
 .../pylibcudf/libcudf/strings/char_types.pxd  |  9 ++-
 .../pylibcudf/libcudf/strings/combine.pxd     | 18 +++--
 .../pylibcudf/libcudf/strings/contains.pxd    | 18 +++--
 .../pylibcudf/libcudf/strings/extract.pxd     | 10 ++-
 .../pylibcudf/libcudf/strings/find.pxd        | 30 ++++++---
 .../libcudf/strings/find_multiple.pxd         |  7 +-
 .../pylibcudf/libcudf/strings/findall.pxd     |  9 ++-
 .../pylibcudf/libcudf/strings/padding.pxd     | 10 ++-
 .../pylibcudf/libcudf/strings/replace.pxd     | 12 ++--
 .../pylibcudf/libcudf/strings/replace_re.pxd  | 12 ++--
 .../pylibcudf/libcudf/strings/reverse.pxd     |  4 +-
 .../pylibcudf/libcudf/strings/strip.pxd       |  6 +-
 .../pylibcudf/libcudf/strings/translate.pxd   |  9 ++-
 .../pylibcudf/libcudf/strings/wrap.pxd        |  6 +-
 .../pylibcudf/strings/attributes.pxd          |  9 +--
 .../pylibcudf/strings/attributes.pyi          | 16 +++--
 .../pylibcudf/strings/attributes.pyx          | 31 ++++++---
 .../pylibcudf/strings/capitalize.pxd          | 12 ++--
 .../pylibcudf/strings/capitalize.pyi          | 13 +++-
 .../pylibcudf/strings/capitalize.pyx          | 26 ++++----
 python/pylibcudf/pylibcudf/strings/case.pxd   |  9 +--
 python/pylibcudf/pylibcudf/strings/case.pyi   | 10 +--
 python/pylibcudf/pylibcudf/strings/case.pyx   | 29 ++++++---
 .../pylibcudf/strings/char_types.pxd          |  9 ++-
 .../pylibcudf/strings/char_types.pyi          |  6 +-
 .../pylibcudf/strings/char_types.pyx          | 20 ++++--
 .../pylibcudf/pylibcudf/strings/combine.pxd   |  7 +-
 .../pylibcudf/pylibcudf/strings/combine.pyi   | 11 +++-
 .../pylibcudf/pylibcudf/strings/combine.pyx   | 27 +++++---
 .../pylibcudf/pylibcudf/strings/contains.pxd  | 12 ++--
 .../pylibcudf/pylibcudf/strings/contains.pyi  | 17 +++--
 .../pylibcudf/pylibcudf/strings/contains.pyx  | 45 ++++++++-----
 .../pylibcudf/pylibcudf/strings/extract.pxd   | 12 +++-
 .../pylibcudf/pylibcudf/strings/extract.pyi   | 12 +++-
 .../pylibcudf/pylibcudf/strings/extract.pyx   | 33 +++++++---
 python/pylibcudf/pylibcudf/strings/find.pxd   | 12 +++-
 python/pylibcudf/pylibcudf/strings/find.pyi   | 28 ++++++--
 python/pylibcudf/pylibcudf/strings/find.pyx   | 65 ++++++++++++++-----
 .../pylibcudf/strings/find_multiple.pxd       |  5 +-
 .../pylibcudf/strings/find_multiple.pyi       | 10 ++-
 .../pylibcudf/strings/find_multiple.pyx       | 22 +++++--
 .../pylibcudf/pylibcudf/strings/findall.pxd   |  7 +-
 .../pylibcudf/pylibcudf/strings/findall.pyi   | 12 +++-
 .../pylibcudf/pylibcudf/strings/findall.pyx   | 24 +++++--
 .../pylibcudf/pylibcudf/strings/padding.pxd   |  9 ++-
 .../pylibcudf/pylibcudf/strings/padding.pyi   | 16 ++++-
 .../pylibcudf/pylibcudf/strings/padding.pyx   | 28 ++++++--
 .../pylibcudf/pylibcudf/strings/replace.pxd   | 12 ++--
 .../pylibcudf/pylibcudf/strings/replace.pyi   | 22 +++++--
 .../pylibcudf/pylibcudf/strings/replace.pyx   | 25 ++++---
 .../pylibcudf/strings/replace_re.pxd          |  9 ++-
 .../pylibcudf/strings/replace_re.pyi          | 11 +++-
 .../pylibcudf/strings/replace_re.pyx          | 19 ++++--
 .../pylibcudf/pylibcudf/strings/reverse.pyx   | 11 +++-
 python/pylibcudf/pylibcudf/strings/strip.pxd  |  6 +-
 python/pylibcudf/pylibcudf/strings/strip.pyi  |  5 +-
 python/pylibcudf/pylibcudf/strings/strip.pyx  | 11 ++--
 .../pylibcudf/pylibcudf/strings/translate.pxd |  8 ++-
 .../pylibcudf/pylibcudf/strings/translate.pyi |  9 ++-
 .../pylibcudf/pylibcudf/strings/translate.pyx | 21 ++++--
 python/pylibcudf/pylibcudf/strings/wrap.pxd   |  5 +-
 python/pylibcudf/pylibcudf/strings/wrap.pyi   |  8 ++-
 python/pylibcudf/pylibcudf/strings/wrap.pyx   | 12 +++-
 67 files changed, 701 insertions(+), 294 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index 97ab5faf9a0..6f333ec2c2a 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -837,7 +837,8 @@ cpdef TableWithMetadata read_json_from_string_column(
             cpp_combine.join_strings(
                 input.view(),
                 dereference(c_separator),
-                dereference(c_narep)
+                dereference(c_narep),
+                s.view()
             )
         )
         c_contents = c_join_string_column.get().release()
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
index 1cf3c912f95..c0fef44d82b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd
@@ -1,17 +1,21 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/attributes.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] count_characters(
-        column_view source_strings) except +libcudf_exception_handler
+        column_view source_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_bytes(
-        column_view source_strings) except +libcudf_exception_handler
+        column_view source_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] code_points(
-        column_view source_strings) except +libcudf_exception_handler
+        column_view source_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
index a3815757d2d..10118dacfff 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd
@@ -1,22 +1,26 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.char_types cimport string_character_types
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/capitalize.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] capitalize(
         const column_view & strings,
-        const string_scalar & delimiters
+        const string_scalar & delimiters,
+        cuda_stream_view stream
         ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] title(
         const column_view & strings,
-        string_character_types sequence_type
+        string_character_types sequence_type,
+        cuda_stream_view stream
         ) except +libcudf_exception_handler
 
     cdef unique_ptr[column] is_title(
-        const column_view & strings) except +libcudf_exception_handler
+        const column_view & strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
index 34126abda02..6658f552e16 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd
@@ -3,14 +3,18 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/case.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] to_lower(
-        const column_view & strings) except +libcudf_exception_handler
+        const column_view & strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] to_upper(
-        const column_view & strings) except +libcudf_exception_handler
+        const column_view & strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] swapcase(
-        const column_view & strings) except +libcudf_exception_handler
+        const column_view & strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
index 6a0ae06c08a..2558b9bf1b4 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 from libc.stdint cimport uint32_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/char_types/char_types.hpp" \
@@ -25,10 +26,12 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \
     cdef unique_ptr[column] all_characters_of_type(
         column_view source_strings,
         string_character_types types,
-        string_character_types verify_types) except +libcudf_exception_handler
+        string_character_types verify_types,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] filter_characters_of_type(
         column_view source_strings,
         string_character_types types_to_remove,
         string_scalar replacement,
-        string_character_types types_to_keep) except +libcudf_exception_handler
+        string_character_types types_to_keep,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
index 90be281429b..f1a237d2990 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from libcpp cimport int
 from libcpp.memory cimport unique_ptr
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.table.table_view cimport table_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
@@ -23,19 +24,22 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         table_view strings_columns,
         string_scalar separator,
         string_scalar narep,
-        separator_on_nulls separate_nulls) except +libcudf_exception_handler
+        separator_on_nulls separate_nulls,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] concatenate(
         table_view strings_columns,
         column_view separators,
         string_scalar separator_narep,
         string_scalar col_narep,
-        separator_on_nulls separate_nulls) except +libcudf_exception_handler
+        separator_on_nulls separate_nulls,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] join_strings(
         column_view input,
         string_scalar separator,
-        string_scalar narep) except +libcudf_exception_handler
+        string_scalar narep,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] join_list_elements(
         column_view lists_strings_column,
@@ -43,11 +47,13 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         string_scalar separator_narep,
         string_scalar string_narep,
         separator_on_nulls separate_nulls,
-        output_if_empty_list empty_list_policy) except +libcudf_exception_handler
+        output_if_empty_list empty_list_policy,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] join_list_elements(
         column_view lists_strings_column,
         string_scalar separator,
         string_scalar narep,
         separator_on_nulls separate_nulls,
-        output_if_empty_list empty_list_policy) except +libcudf_exception_handler
+        output_if_empty_list empty_list_policy,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
index 8eb287c6b06..b4f677a3f2b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd
@@ -1,32 +1,38 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/contains.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] contains_re(
         column_view source_strings,
-        regex_program) except +libcudf_exception_handler
+        regex_program,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] count_re(
         column_view source_strings,
-        regex_program) except +libcudf_exception_handler
+        regex_program,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] matches_re(
         column_view source_strings,
-        regex_program) except +libcudf_exception_handler
+        regex_program,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] like(
         column_view source_strings,
         string_scalar pattern,
-        string_scalar escape_character) except +libcudf_exception_handler
+        string_scalar escape_character,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] like(
         column_view source_strings,
         column_view patterns,
-        string_scalar escape_character) except +libcudf_exception_handler
+        string_scalar escape_character,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
index b6ed4b8fc57..05172cc0f40 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd
@@ -6,19 +6,23 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[table] extract(
         column_view input,
-        regex_program prog) except +libcudf_exception_handler
+        regex_program prog,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] extract_all_record(
         column_view input,
-        regex_program prog) except +libcudf_exception_handler
+        regex_program prog,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] extract_single(
         column_view input,
         regex_program prog,
-        size_type group) except +libcudf_exception_handler
+        size_type group,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
index 4082145c5b8..827f3269d31 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -6,47 +6,57 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] contains(
         column_view source_strings,
-        string_scalar target) except +libcudf_exception_handler
+        string_scalar target,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] contains(
         column_view source_strings,
-        column_view target_strings) except +libcudf_exception_handler
+        column_view target_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] ends_with(
         column_view source_strings,
-        string_scalar target) except +libcudf_exception_handler
+        string_scalar target,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] ends_with(
         column_view source_strings,
-        column_view target_strings) except +libcudf_exception_handler
+        column_view target_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] starts_with(
         column_view source_strings,
-        string_scalar target) except +libcudf_exception_handler
+        string_scalar target,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] starts_with(
         column_view source_strings,
-        column_view target_strings) except +libcudf_exception_handler
+        column_view target_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find(
         column_view source_strings,
         string_scalar target,
         size_type start,
-        size_type stop) except +libcudf_exception_handler
+        size_type stop,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find(
         column_view source_strings,
         column_view target,
-        size_type start) except +libcudf_exception_handler
+        size_type start,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] rfind(
         column_view source_strings,
         string_scalar target,
         size_type start,
-        size_type stop) except +libcudf_exception_handler
+        size_type stop,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
index af1b0070019..54ef59861da 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd
@@ -4,6 +4,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table cimport table
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \
@@ -11,8 +12,10 @@ cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \
 
     cdef unique_ptr[table] contains_multiple(
         column_view input,
-        column_view targets) except +libcudf_exception_handler
+        column_view targets,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find_multiple(
         column_view input,
-        column_view targets) except +libcudf_exception_handler
+        column_view targets,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
index eda68c35e58..15db922a491 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
@@ -1,17 +1,20 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] findall(
         column_view input,
-        regex_program prog) except +libcudf_exception_handler
+        regex_program prog,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] find_re(
         column_view input,
-        regex_program prog) except +libcudf_exception_handler
+        regex_program prog,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
index 6b4be759a54..7c92d1b1373 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
@@ -8,6 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.side_type cimport side_type
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil:
@@ -16,12 +17,15 @@ cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil:
         column_view input,
         size_type width,
         side_type side,
-        string fill_char) except +libcudf_exception_handler
+        string fill_char,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] zfill(
         column_view input,
-        size_type width) except +libcudf_exception_handler
+        size_type width,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] zfill_by_widths(
         column_view input,
-        column_view widths) except +libcudf_exception_handler
+        column_view widths,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
index 68743d85712..4dad3124e35 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil:
@@ -14,15 +15,18 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil:
         column_view source_strings,
         string_scalar repl,
         size_type start,
-        size_type stop) except +libcudf_exception_handler
+        size_type stop,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace(
         column_view source_strings,
         string_scalar target,
         string_scalar repl,
-        int32_t maxrepl) except +libcudf_exception_handler
+        int32_t maxrepl,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_multiple(
         column_view source_strings,
         column_view target_strings,
-        column_view repl_strings) except +libcudf_exception_handler
+        column_view repl_strings,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
index 2a7d50346be..be28000c71a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
 from pylibcudf.libcudf.strings.regex_program cimport regex_program
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil:
@@ -18,15 +19,18 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil:
         column_view input,
         regex_program prog,
         string_scalar replacement,
-        size_type max_replace_count) except +libcudf_exception_handler
+        size_type max_replace_count,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_re(
         column_view input,
         vector[string] patterns,
         column_view replacements,
-        regex_flags flags) except +libcudf_exception_handler
+        regex_flags flags,
+        cuda_stream_view stream) except +libcudf_exception_handler
 
     cdef unique_ptr[column] replace_with_backrefs(
         column_view input,
         regex_program prog,
-        string replacement) except +libcudf_exception_handler
+        string replacement,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd
index 419e5fea6a3..7ff4b088673 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd
@@ -3,8 +3,10 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 cdef extern from "cudf/strings/reverse.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] reverse(
-        column_view source_strings
+        column_view source_strings,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
index 41751ddff3c..7b154fb222e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings.side_type cimport side_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil:
@@ -12,4 +13,5 @@ cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] strip(
         column_view input,
         side_type side,
-        string_scalar to_strip) except +libcudf_exception_handler
+        string_scalar to_strip,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
index 11b63d0ed30..af4f917f0a5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -8,13 +8,15 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport char_utf8
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] translate(
         column_view input,
-        vector[pair[char_utf8, char_utf8]] chars_table
+        vector[pair[char_utf8, char_utf8]] chars_table,
+        cuda_stream_view stream
     ) except +libcudf_exception_handler
 
     cpdef enum class filter_type(bool):
@@ -25,4 +27,5 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil:
         column_view input,
         vector[pair[char_utf8, char_utf8]] characters_to_filter,
         filter_type keep_characters,
-        string_scalar replacement) except +libcudf_exception_handler
+        string_scalar replacement,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
index 2fb49c2a830..ee9553eee05 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
@@ -1,13 +1,15 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
 
 
 cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] wrap(
         column_view input,
-        size_type width) except +libcudf_exception_handler
+        size_type width,
+        cuda_stream_view stream) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/strings/attributes.pxd
index 27398766924..77a5efbedae 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pxd
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column count_characters(Column source_strings)
+cpdef Column count_characters(Column source_strings, Stream stream=*)
 
-cpdef Column count_bytes(Column source_strings)
+cpdef Column count_bytes(Column source_strings, Stream stream=*)
 
-cpdef Column code_points(Column source_strings)
+cpdef Column code_points(Column source_strings, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyi b/python/pylibcudf/pylibcudf/strings/attributes.pyi
index 7fd5c9773d4..90783ecf4ce 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pyi
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyi
@@ -1,7 +1,15 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
-def count_characters(source_strings: Column) -> Column: ...
-def count_bytes(source_strings: Column) -> Column: ...
-def code_points(source_strings: Column) -> Column: ...
+def count_characters(
+    source_strings: Column, stream: Stream | None = None
+) -> Column: ...
+def count_bytes(
+    source_strings: Column, stream: Stream | None = None
+) -> Column: ...
+def code_points(
+    source_strings: Column, stream: Stream | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx
index f1eb09b4965..6ba55ccafa1 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pyx
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx
@@ -1,14 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport attributes as cpp_attributes
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["code_points", "count_bytes", "count_characters"]
 
-cpdef Column count_characters(Column source_strings):
+cpdef Column count_characters(Column source_strings, Stream stream=None):
     """
     Returns a column containing character lengths of each string
     in the given column.
@@ -17,6 +19,8 @@ cpdef Column count_characters(Column source_strings):
     ----------
     source_strings : Column
         Column of strings.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -24,14 +28,15 @@ cpdef Column count_characters(Column source_strings):
         New column with lengths for each string
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_attributes.count_characters(source_strings.view())
+        c_result = cpp_attributes.count_characters(source_strings.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column count_bytes(Column source_strings):
+cpdef Column count_bytes(Column source_strings, Stream stream=None):
     """
     Returns a column containing byte lengths of each string
     in the given column.
@@ -40,6 +45,8 @@ cpdef Column count_bytes(Column source_strings):
     ----------
     source_strings : Column
         Column of strings.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -47,14 +54,15 @@ cpdef Column count_bytes(Column source_strings):
         New column with the number of bytes for each string
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_attributes.count_bytes(source_strings.view())
+        c_result = cpp_attributes.count_bytes(source_strings.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column code_points(Column source_strings):
+cpdef Column code_points(Column source_strings, Stream stream=None):
     """
     Creates a numeric column with code point values (integers)
     for each character of each string.
@@ -63,6 +71,8 @@ cpdef Column code_points(Column source_strings):
     ----------
     source_strings : Column
         Column of strings.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -70,8 +80,9 @@ cpdef Column code_points(Column source_strings):
         New column with code point integer values for each character
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
-        c_result = cpp_attributes.code_points(source_strings.view())
+        c_result = cpp_attributes.code_points(source_strings.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/strings/capitalize.pxd
index b45949d4eb4..63a7809a25e 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pxd
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pxd
@@ -1,9 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.libcudf.strings.char_types cimport string_character_types
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column capitalize(Column input, Scalar delimiters=*)
-cpdef Column title(Column input)
-cpdef Column is_title(Column input)
+cpdef Column capitalize(Column input, Scalar delimiters=*, Stream stream=*)
+cpdef Column title(
+    Column input, string_character_types sequence_type=*, Stream stream=*
+)
+cpdef Column is_title(Column input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
index 5c6689418e2..278eb2846c5 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyi
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
@@ -1,12 +1,19 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.char_types import StringCharacterTypes
 
-def capitalize(input: Column, delimiters: Scalar | None = None) -> Column: ...
+def capitalize(
+    input: Column,
+    delimiters: Scalar | None = None,
+    stream: Stream | None = None,
+) -> Column: ...
 def title(
     input: Column,
     sequence_type: StringCharacterTypes = StringCharacterTypes.ALPHA,
+    stream: Stream | None = None,
 ) -> Column: ...
-def is_title(input: Column) -> Column: ...
+def is_title(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
index 5297696145b..7b124f2f5f2 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
@@ -20,7 +20,8 @@ __all__ = ["capitalize", "is_title", "title"]
 
 cpdef Column capitalize(
     Column input,
-    Scalar delimiters=None
+    Scalar delimiters=None,
+    Stream stream=None
     # TODO: default scalar values
     # https://github.com/rapidsai/cudf/issues/15505
 ):
@@ -41,10 +42,9 @@ cpdef Column capitalize(
         Column of strings capitalized from the input column
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if delimiters is None:
-        stream = _get_stream(None)
         delimiters = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -56,15 +56,17 @@ cpdef Column capitalize(
     with nogil:
         c_result = cpp_capitalize.capitalize(
             input.view(),
-            dereference(cpp_delimiters)
+            dereference(cpp_delimiters),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column title(
     Column input,
-    string_character_types sequence_type=string_character_types.ALPHA
+    string_character_types sequence_type=string_character_types.ALPHA,
+    Stream stream=None
 ):
     """Modifies first character of each word to upper-case and lower-cases
     the rest.
@@ -84,13 +86,14 @@ cpdef Column title(
         Column of titled strings
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_capitalize.title(input.view(), sequence_type)
+        c_result = cpp_capitalize.title(input.view(), sequence_type, stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column is_title(Column input):
+cpdef Column is_title(Column input, Stream stream=None):
     """Checks if the strings in the input column are title formatted.
 
     For details, see :cpp:func:`cudf::strings::is_title`.
@@ -106,7 +109,8 @@ cpdef Column is_title(Column input):
         Column of type BOOL8
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_capitalize.is_title(input.view())
+        c_result = cpp_capitalize.is_title(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/strings/case.pxd
index d3c98d5e3dc..1d6e3b1979d 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pxd
+++ b/python/pylibcudf/pylibcudf/strings/case.pxd
@@ -1,8 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column to_lower(Column input)
-cpdef Column to_upper(Column input)
-cpdef Column swapcase(Column input)
+cpdef Column to_lower(Column input, Stream stream=*)
+cpdef Column to_upper(Column input, Stream stream=*)
+cpdef Column swapcase(Column input, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/case.pyi b/python/pylibcudf/pylibcudf/strings/case.pyi
index 4e50db4d1da..d635e800346 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pyi
+++ b/python/pylibcudf/pylibcudf/strings/case.pyi
@@ -1,7 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
-def to_lower(input: Column) -> Column: ...
-def to_upper(input: Column) -> Column: ...
-def swapcase(input: Column) -> Column: ...
+def to_lower(input: Column, stream: Stream | None = None) -> Column: ...
+def to_upper(input: Column, stream: Stream | None = None) -> Column: ...
+def swapcase(input: Column, stream: Stream | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx
index 57238de185e..c1424871f59 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pyx
+++ b/python/pylibcudf/pylibcudf/strings/case.pyx
@@ -5,10 +5,12 @@ from libcpp.utility cimport move
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport case as cpp_case
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["swapcase", "to_lower", "to_upper"]
 
-cpdef Column to_lower(Column input):
+cpdef Column to_lower(Column input, Stream stream=None):
     """Returns a column of lowercased strings.
 
     For details, see :cpp:func:`cudf::strings::to_lower`.
@@ -17,6 +19,8 @@ cpdef Column to_lower(Column input):
     ----------
     input : Column
         String column
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -24,12 +28,13 @@ cpdef Column to_lower(Column input):
         Column of strings lowercased from the input column
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_case.to_lower(input.view())
+        c_result = cpp_case.to_lower(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column to_upper(Column input):
+cpdef Column to_upper(Column input, Stream stream=None):
     """Returns a column of uppercased strings.
 
     For details, see :cpp:func:`cudf::strings::to_upper`.
@@ -38,6 +43,8 @@ cpdef Column to_upper(Column input):
     ----------
     input : Column
         String column
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -45,12 +52,13 @@ cpdef Column to_upper(Column input):
         Column of strings uppercased from the input column
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_case.to_upper(input.view())
+        c_result = cpp_case.to_upper(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column swapcase(Column input):
+cpdef Column swapcase(Column input, Stream stream=None):
     """Returns a column of strings where the lowercase characters
     are converted to uppercase and the uppercase characters
     are converted to lowercase.
@@ -61,6 +69,8 @@ cpdef Column swapcase(Column input):
     ----------
     input : Column
         String column
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -68,7 +78,8 @@ cpdef Column swapcase(Column input):
         Column of strings
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_case.swapcase(input.view())
+        c_result = cpp_case.swapcase(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/strings/char_types.pxd
index f9f7d244212..96ca5df9c21 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pxd
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pxd
@@ -1,19 +1,22 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.strings.char_types cimport string_character_types
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column all_characters_of_type(
     Column source_strings,
     string_character_types types,
-    string_character_types verify_types
+    string_character_types verify_types,
+    Stream stream=*
 )
 
 cpdef Column filter_characters_of_type(
     Column source_strings,
     string_character_types types_to_remove,
     Scalar replacement,
-    string_character_types types_to_keep
+    string_character_types types_to_keep,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi
index daa36cbb68d..7b7b9c38032 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyi
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi
@@ -1,7 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
@@ -21,10 +23,12 @@ def all_characters_of_type(
     source_strings: Column,
     types: StringCharacterTypes,
     verify_types: StringCharacterTypes,
+    stream: Stream | None = None,
 ) -> Column: ...
 def filter_characters_of_type(
     source_strings: Column,
     types_to_remove: StringCharacterTypes,
     replacement: Scalar,
     types_to_keep: StringCharacterTypes,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx
index 119daa911f6..c9173e4e103 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyx
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx
@@ -7,6 +7,8 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings cimport char_types as cpp_char_types
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 from pylibcudf.libcudf.strings.char_types import \
@@ -21,7 +23,8 @@ __all__ = [
 cpdef Column all_characters_of_type(
     Column source_strings,
     string_character_types types,
-    string_character_types verify_types
+    string_character_types verify_types,
+    Stream stream=None
 ):
     """
     Identifies strings where all characters match the specified type.
@@ -34,6 +37,8 @@ cpdef Column all_characters_of_type(
         The character types to check in each string
     verify_types : StringCharacterTypes
         Only verify against these character types.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -41,21 +46,24 @@ cpdef Column all_characters_of_type(
         New column of boolean results for each string
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_char_types.all_characters_of_type(
             source_strings.view(),
             types,
             verify_types,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 cpdef Column filter_characters_of_type(
     Column source_strings,
     string_character_types types_to_remove,
     Scalar replacement,
-    string_character_types types_to_keep
+    string_character_types types_to_keep,
+    Stream stream=None
 ):
     """
     Filter specific character types from a column of strings.
@@ -71,6 +79,8 @@ cpdef Column filter_characters_of_type(
     types_to_keep : StringCharacterTypes
         Default `ALL_TYPES` means all characters of `types_to_remove`
         will be filtered.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -82,6 +92,7 @@ cpdef Column filter_characters_of_type(
         replacement.c_obj.get()
     )
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_char_types.filter_characters_of_type(
@@ -89,8 +100,9 @@ cpdef Column filter_characters_of_type(
             types_to_remove,
             dereference(c_replacement),
             types_to_keep,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 StringCharacterTypes.__str__ = StringCharacterTypes.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/strings/combine.pxd
index b390c221467..34493ac62b0 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pxd
+++ b/python/pylibcudf/pylibcudf/strings/combine.pxd
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.strings.combine cimport (
 )
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.table cimport Table
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -18,9 +19,12 @@ cpdef Column concatenate(
     Scalar narep=*,
     Scalar col_narep=*,
     separator_on_nulls separate_nulls=*,
+    Stream stream=*,
 )
 
-cpdef Column join_strings(Column input, Scalar separator, Scalar narep)
+cpdef Column join_strings(
+    Column input, Scalar separator, Scalar narep, Stream stream=*
+)
 
 cpdef Column join_list_elements(
     Column source_strings,
@@ -29,4 +33,5 @@ cpdef Column join_list_elements(
     Scalar string_narep,
     separator_on_nulls separate_nulls,
     output_if_empty_list empty_list_policy,
+    Stream stream=*,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi
index 3094b20f141..978bf267999 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyi
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyi
@@ -1,7 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
@@ -20,9 +22,13 @@ def concatenate(
     narep: Scalar | None = None,
     col_narep: Scalar | None = None,
     separate_nulls: SeparatorOnNulls = SeparatorOnNulls.YES,
+    stream: Stream | None = None,
 ) -> Column: ...
 def join_strings(
-    input: Column, separator: Scalar, narep: Scalar
+    input: Column,
+    separator: Scalar,
+    narep: Scalar,
+    stream: Stream | None = None,
 ) -> Column: ...
 def join_list_elements(
     lists_strings_column: Column,
@@ -31,4 +37,5 @@ def join_list_elements(
     string_narep: Scalar,
     separate_nulls: SeparatorOnNulls,
     empty_list_policy: OutputIfEmptyList,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx
index 87650c3bcbd..6125c481f96 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyx
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyx
@@ -33,6 +33,7 @@ cpdef Column concatenate(
     Scalar narep=None,
     Scalar col_narep=None,
     separator_on_nulls separate_nulls=separator_on_nulls.YES,
+    Stream stream=None,
 ):
     """
     Concatenate all columns in the table horizontally into one new string
@@ -64,10 +65,9 @@ cpdef Column concatenate(
     cdef unique_ptr[column] c_result
     cdef const string_scalar* c_col_narep
     cdef const string_scalar* c_separator
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if narep is None:
-        stream = _get_stream(None)
         narep = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -77,7 +77,6 @@ cpdef Column concatenate(
 
     if ColumnOrScalar is Column:
         if col_narep is None:
-            stream = _get_stream(None)
             col_narep = Scalar.from_libcudf(
                 cpp_make_string_scalar("".encode(), stream.view())
             )
@@ -91,7 +90,8 @@ cpdef Column concatenate(
                     separator.view(),
                     dereference(c_narep),
                     dereference(c_col_narep),
-                    separate_nulls
+                    separate_nulls,
+                    stream.view()
                 )
             )
     elif ColumnOrScalar is Scalar:
@@ -106,15 +106,18 @@ cpdef Column concatenate(
                     strings_columns.view(),
                     dereference(c_separator),
                     dereference(c_narep),
-                    separate_nulls
+                    separate_nulls,
+                    stream.view()
                 )
             )
     else:
         raise ValueError("separator must be a Column or a Scalar")
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column join_strings(Column input, Scalar separator, Scalar narep):
+cpdef Column join_strings(
+    Column input, Scalar separator, Scalar narep, Stream stream=None
+):
     """
     Concatenates all strings in the column into one new string delimited
     by an optional separator string.
@@ -136,6 +139,7 @@ cpdef Column join_strings(Column input, Scalar separator, Scalar narep):
         New column containing one string
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     cdef const string_scalar* c_separator = <const string_scalar*>(
         separator.c_obj.get()
     )
@@ -148,10 +152,11 @@ cpdef Column join_strings(Column input, Scalar separator, Scalar narep):
                 input.view(),
                 dereference(c_separator),
                 dereference(c_narep),
+                stream.view()
             )
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column join_list_elements(
@@ -161,6 +166,7 @@ cpdef Column join_list_elements(
     Scalar string_narep,
     separator_on_nulls separate_nulls,
     output_if_empty_list empty_list_policy,
+    Stream stream=None,
 ):
     """
     Given a lists column of strings (each row is a list of strings),
@@ -198,6 +204,7 @@ cpdef Column join_list_elements(
         New strings column with concatenated results
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     cdef const string_scalar* c_separator_narep = <const string_scalar*>(
         separator_narep.c_obj.get()
     )
@@ -216,6 +223,7 @@ cpdef Column join_list_elements(
                     dereference(c_string_narep),
                     separate_nulls,
                     empty_list_policy,
+                    stream.view()
                 )
             )
     elif ColumnOrScalar is Scalar:
@@ -228,11 +236,12 @@ cpdef Column join_list_elements(
                     dereference(c_separator_narep),
                     separate_nulls,
                     empty_list_policy,
+                    stream.view()
                 )
             )
     else:
         raise ValueError("separator must be a Column or a Scalar")
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 OutputIfEmptyList.__str__ = OutputIfEmptyList.__repr__
 SeparatorOnNulls.__str__ = SeparatorOnNulls.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/strings/contains.pxd
index 6146a1119d6..55807763bee 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pxd
+++ b/python/pylibcudf/pylibcudf/strings/contains.pxd
@@ -1,21 +1,23 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_program cimport RegexProgram
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
     Scalar
 
-cpdef Column contains_re(Column input, RegexProgram prog)
+cpdef Column contains_re(Column input, RegexProgram prog, Stream stream=*)
 
-cpdef Column count_re(Column input, RegexProgram prog)
+cpdef Column count_re(Column input, RegexProgram prog, Stream stream=*)
 
-cpdef Column matches_re(Column input, RegexProgram prog)
+cpdef Column matches_re(Column input, RegexProgram prog, Stream stream=*)
 
 cpdef Column like(
     Column input,
     ColumnOrScalar pattern,
-    Scalar escape_character = *
+    Scalar escape_character = *,
+    Stream stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyi b/python/pylibcudf/pylibcudf/strings/contains.pyi
index 1f0620383b3..642102a0270 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pyi
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyi
@@ -1,14 +1,23 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.regex_program import RegexProgram
 
-def contains_re(input: Column, prog: RegexProgram) -> Column: ...
-def count_re(input: Column, prog: RegexProgram) -> Column: ...
-def matches_re(input: Column, prog: RegexProgram) -> Column: ...
+def contains_re(
+    input: Column, prog: RegexProgram, stream: Stream | None = None
+) -> Column: ...
+def count_re(
+    input: Column, prog: RegexProgram, stream: Stream | None = None
+) -> Column: ...
+def matches_re(
+    input: Column, prog: RegexProgram, stream: Stream | None = None
+) -> Column: ...
 def like(
     input: Column,
     pattern: Column | Scalar,
     escape_character: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx
index 7773520d7b3..4eba3c7c745 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pyx
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyx
@@ -19,7 +19,8 @@ __all__ = ["contains_re", "count_re", "like", "matches_re"]
 
 cpdef Column contains_re(
     Column input,
-    RegexProgram prog
+    RegexProgram prog,
+    Stream stream=None
 ):
     """Returns a boolean column identifying rows which match the given
     regex_program object.
@@ -40,19 +41,22 @@ cpdef Column contains_re(
     """
 
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
 
     with nogil:
         result = cpp_contains.contains_re(
             input.view(),
-            prog.c_obj.get()[0]
+            prog.c_obj.get()[0],
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column count_re(
     Column input,
-    RegexProgram prog
+    RegexProgram prog,
+    Stream stream=None
 ):
     """Returns the number of times the given regex_program's pattern
     matches in each string.
@@ -73,19 +77,22 @@ cpdef Column count_re(
     """
 
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
 
     with nogil:
         result = cpp_contains.count_re(
             input.view(),
-            prog.c_obj.get()[0]
+            prog.c_obj.get()[0],
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column matches_re(
     Column input,
-    RegexProgram prog
+    RegexProgram prog,
+    Stream stream=None
 ):
     """Returns a boolean column identifying rows which
     matching the given regex_program object but only at
@@ -107,17 +114,24 @@ cpdef Column matches_re(
     """
 
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
 
     with nogil:
         result = cpp_contains.matches_re(
             input.view(),
-            prog.c_obj.get()[0]
+            prog.c_obj.get()[0],
+            stream.view()
         )
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
-cpdef Column like(Column input, ColumnOrScalar pattern, Scalar escape_character=None):
+cpdef Column like(
+    Column input,
+    ColumnOrScalar pattern,
+    Scalar escape_character=None,
+    Stream stream=None
+):
     """
     Returns a boolean column identifying rows which
     match the given like pattern.
@@ -140,10 +154,9 @@ cpdef Column like(Column input, ColumnOrScalar pattern, Scalar escape_character=
         New column of boolean results for each string
     """
     cdef unique_ptr[column] result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if escape_character is None:
-        stream = _get_stream(None)
         escape_character = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -158,7 +171,8 @@ cpdef Column like(Column input, ColumnOrScalar pattern, Scalar escape_character=
             result = cpp_contains.like(
                 input.view(),
                 pattern.view(),
-                dereference(c_escape_character)
+                dereference(c_escape_character),
+                stream.view()
             )
     elif ColumnOrScalar is Scalar:
         c_pattern = <const string_scalar*>(pattern.c_obj.get())
@@ -166,9 +180,10 @@ cpdef Column like(Column input, ColumnOrScalar pattern, Scalar escape_character=
             result = cpp_contains.like(
                 input.view(),
                 dereference(c_pattern),
-                dereference(c_escape_character)
+                dereference(c_escape_character),
+                stream.view()
             )
     else:
         raise ValueError("pattern must be a Column or a Scalar")
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/strings/extract.pxd
index 3871f5a0e4e..c764c4a504c 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pxd
+++ b/python/pylibcudf/pylibcudf/strings/extract.pxd
@@ -1,10 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
+from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Table extract(Column input, RegexProgram prog)
+cpdef Table extract(Column input, RegexProgram prog, Stream stream=*)
 
-cpdef Column extract_all_record(Column input, RegexProgram prog)
+cpdef Column extract_all_record(Column input, RegexProgram prog, Stream stream=*)
+
+cpdef Column extract_single(
+    Column input, RegexProgram prog, size_type group, Stream stream=*
+)
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyi b/python/pylibcudf/pylibcudf/strings/extract.pyi
index 7e9fde4c10e..f3d047e3c5d 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pyi
+++ b/python/pylibcudf/pylibcudf/strings/extract.pyi
@@ -1,11 +1,17 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.strings.regex_program import RegexProgram
 from pylibcudf.table import Table
 
-def extract(input: Column, prog: RegexProgram) -> Table: ...
-def extract_all_record(input: Column, prog: RegexProgram) -> Column: ...
+def extract(
+    input: Column, prog: RegexProgram, stream: Stream | None = None
+) -> Table: ...
+def extract_all_record(
+    input: Column, prog: RegexProgram, stream: Stream | None = None
+) -> Column: ...
 def extract_single(
-    input: Column, prog: RegexProgram, group: int
+    input: Column, prog: RegexProgram, group: int, stream: Stream | None = None
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyx b/python/pylibcudf/pylibcudf/strings/extract.pyx
index b7e03863b01..7a26db04003 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pyx
+++ b/python/pylibcudf/pylibcudf/strings/extract.pyx
@@ -9,10 +9,12 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["extract", "extract_all_record", "extract_single"]
 
-cpdef Table extract(Column input, RegexProgram prog):
+cpdef Table extract(Column input, RegexProgram prog, Stream stream=None):
     """
     Returns a table of strings columns where each column
     corresponds to the matching group specified in the given
@@ -26,6 +28,8 @@ cpdef Table extract(Column input, RegexProgram prog):
         Strings instance for this operation
     prog : RegexProgram
         Regex program instance
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -33,17 +37,19 @@ cpdef Table extract(Column input, RegexProgram prog):
         Columns of strings extracted from the input column.
     """
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_extract.extract(
             input.view(),
-            prog.c_obj.get()[0]
+            prog.c_obj.get()[0],
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef Column extract_all_record(Column input, RegexProgram prog):
+cpdef Column extract_all_record(Column input, RegexProgram prog, Stream stream=None):
     """
     Returns a lists column of strings where each string column
     row corresponds to the matching group specified in the given
@@ -57,6 +63,8 @@ cpdef Column extract_all_record(Column input, RegexProgram prog):
         Strings instance for this operation
     prog : RegexProgram
         Regex program instance
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -64,20 +72,23 @@ cpdef Column extract_all_record(Column input, RegexProgram prog):
         Lists column containing strings extracted from the input column
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_extract.extract_all_record(
             input.view(),
-            prog.c_obj.get()[0]
+            prog.c_obj.get()[0],
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column extract_single(
     Column input,
     RegexProgram prog,
-    size_type group
+    size_type group,
+    Stream stream=None
 ):
     """
     Returns a column of strings where each string corresponds to the
@@ -93,6 +104,8 @@ cpdef Column extract_single(
         Regex program instance
     group : size_type
         Index of the group number to extract
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -100,12 +113,14 @@ cpdef Column extract_single(
         Column of strings extracted from the input column
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_extract.extract_single(
             input.view(),
             prog.c_obj.get()[0],
-            group
+            group,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/strings/find.pxd
index e7524a9360b..349f8458a38 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pxd
+++ b/python/pylibcudf/pylibcudf/strings/find.pxd
@@ -1,8 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused ColumnOrScalar:
     Column
@@ -12,27 +13,32 @@ cpdef Column find(
     Column input,
     ColumnOrScalar target,
     size_type start=*,
-    size_type stop=*
+    size_type stop=*,
+    Stream stream=*
 )
 
 cpdef Column rfind(
     Column input,
     Scalar target,
     size_type start=*,
-    size_type stop=*
+    size_type stop=*,
+    Stream stream=*
 )
 
 cpdef Column contains(
     Column input,
     ColumnOrScalar target,
+    Stream stream=*
 )
 
 cpdef Column starts_with(
     Column input,
     ColumnOrScalar target,
+    Stream stream=*
 )
 
 cpdef Column ends_with(
     Column input,
     ColumnOrScalar target,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/find.pyi b/python/pylibcudf/pylibcudf/strings/find.pyi
index 3d04a9c3161..31e0b69894f 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pyi
+++ b/python/pylibcudf/pylibcudf/strings/find.pyi
@@ -1,14 +1,30 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 def find(
-    input: Column, target: Column | Scalar, start: int = 0, stop: int = -1
+    input: Column,
+    target: Column | Scalar,
+    start: int = 0,
+    stop: int = -1,
+    stream: Stream | None = None,
 ) -> Column: ...
 def rfind(
-    input: Column, target: Scalar, start: int = 0, stop: int = -1
+    input: Column,
+    target: Scalar,
+    start: int = 0,
+    stop: int = -1,
+    stream: Stream | None = None,
+) -> Column: ...
+def contains(
+    input: Column, target: Column | Scalar, stream: Stream | None = None
+) -> Column: ...
+def starts_with(
+    input: Column, target: Column | Scalar, stream: Stream | None = None
+) -> Column: ...
+def ends_with(
+    input: Column, target: Column | Scalar, stream: Stream | None = None
 ) -> Column: ...
-def contains(input: Column, target: Column | Scalar) -> Column: ...
-def starts_with(input: Column, target: Column | Scalar) -> Column: ...
-def ends_with(input: Column, target: Column | Scalar) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx
index f0af339ff08..168fa07cd91 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find.pyx
@@ -1,10 +1,12 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport find as cpp_find
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 
@@ -16,7 +18,8 @@ cpdef Column find(
     Column input,
     ColumnOrScalar target,
     size_type start=0,
-    size_type stop=-1
+    size_type stop=-1,
+    Stream stream=None
 ):
     """Returns a column of character position values where the target string is
     first found in each string of the provided column.
@@ -42,6 +45,8 @@ cpdef Column find(
     stop : size_type
         Last position (exclusive) to include in the search. Default of -1 will
         search to the end of the string.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -49,12 +54,14 @@ cpdef Column find(
         New integer column with character position values
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
     if ColumnOrScalar is Column:
         with nogil:
             result = cpp_find.find(
                 input.view(),
                 target.view(),
-                start
+                start,
+                stream.view()
             )
     elif ColumnOrScalar is Scalar:
         with nogil:
@@ -62,19 +69,21 @@ cpdef Column find(
                 input.view(),
                 dereference(<string_scalar*>(target.c_obj.get())),
                 start,
-                stop
+                stop,
+                stream.view()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column rfind(
     Column input,
     Scalar target,
     size_type start=0,
-    size_type stop=-1
+    size_type stop=-1,
+    Stream stream=None
 ):
     """
     Returns a column of character position values where the target string is
@@ -93,6 +102,8 @@ cpdef Column rfind(
     stop : size_type
         Last position (exclusive) to include in the search. Default of -1 will
         search to the end of the string.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -100,19 +111,22 @@ cpdef Column rfind(
         New integer column with character position values
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
     with nogil:
         result = cpp_find.rfind(
             input.view(),
             dereference(<string_scalar*>(target.c_obj.get())),
             start,
-            stop
+            stop,
+            stream.view()
         )
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column contains(
     Column input,
     ColumnOrScalar target,
+    Stream stream=None
 ):
     """
     Returns a column of boolean values for each string where true indicates the
@@ -135,6 +149,8 @@ cpdef Column contains(
         The input strings
     target : Union[Column, Scalar]
         String to search for in each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -142,27 +158,31 @@ cpdef Column contains(
         New boolean column with True for each string that contains the target
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
     if ColumnOrScalar is Column:
         with nogil:
             result = cpp_find.contains(
                 input.view(),
-                target.view()
+                target.view(),
+                stream.view()
             )
     elif ColumnOrScalar is Scalar:
         with nogil:
             result = cpp_find.contains(
                 input.view(),
-                dereference(<string_scalar*>(target.c_obj.get()))
+                dereference(<string_scalar*>(target.c_obj.get())),
+                stream.view()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column starts_with(
     Column input,
     ColumnOrScalar target,
+    Stream stream=None
 ):
     """
     Returns a column of boolean values for each string where true indicates the
@@ -185,6 +205,8 @@ cpdef Column starts_with(
         The input strings
     target : Union[Column, Scalar]
         String to search for at the beginning of each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -192,27 +214,31 @@ cpdef Column starts_with(
         New boolean column with True for each string that starts with the target
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
 
     if ColumnOrScalar is Column:
         with nogil:
             result = cpp_find.starts_with(
                 input.view(),
-                target.view()
+                target.view(),
+                stream.view()
             )
     elif ColumnOrScalar is Scalar:
         with nogil:
             result = cpp_find.starts_with(
                 input.view(),
-                dereference(<string_scalar*>(target.c_obj.get()))
+                dereference(<string_scalar*>(target.c_obj.get())),
+                stream.view()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 cpdef Column ends_with(
     Column input,
     ColumnOrScalar target,
+    Stream stream=None
 ):
     """
     Returns a column of boolean values for each string where true indicates the
@@ -234,6 +260,8 @@ cpdef Column ends_with(
         The input strings
     target : Union[Column, Scalar]
         String to search for at the end of each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -241,19 +269,22 @@ cpdef Column ends_with(
         New boolean column with True for each string that ends with the target
     """
     cdef unique_ptr[column] result
+    stream = _get_stream(stream)
     if ColumnOrScalar is Column:
         with nogil:
             result = cpp_find.ends_with(
                 input.view(),
-                target.view()
+                target.view(),
+                stream.view()
             )
     elif ColumnOrScalar is Scalar:
         with nogil:
             result = cpp_find.ends_with(
                 input.view(),
-                dereference(<string_scalar*>(target.c_obj.get()))
+                dereference(<string_scalar*>(target.c_obj.get())),
+                stream.view()
             )
     else:
         raise ValueError(f"Invalid target {target}")
 
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd
index 80cf2da8a2b..edc4d56e759 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd
@@ -2,7 +2,8 @@
 
 from pylibcudf.column cimport Column
 from pylibcudf.table cimport Table
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column find_multiple(Column input, Column targets)
-cpdef Table contains_multiple(Column input, Column targets)
+cpdef Column find_multiple(Column input, Column targets, Stream stream=*)
+cpdef Table contains_multiple(Column input, Column targets, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
index f8a31a3cb98..de3bbbcc757 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
@@ -1,7 +1,13 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
-def find_multiple(input: Column, targets: Column) -> Column: ...
-def contains_multiple(input: Column, targets: Column) -> Table: ...
+def find_multiple(
+    input: Column, targets: Column, stream: Stream | None = None
+) -> Column: ...
+def contains_multiple(
+    input: Column, targets: Column, stream: Stream | None = None
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
index e25a4a2acc9..2f000430ca6 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
@@ -7,10 +7,12 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport find_multiple as cpp_find_multiple
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.table cimport Table
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["find_multiple", "contains_multiple"]
 
-cpdef Column find_multiple(Column input, Column targets):
+cpdef Column find_multiple(Column input, Column targets, Stream stream=None):
     """
     Returns a lists column with character position values where each
     of the target strings are found in each string.
@@ -23,6 +25,8 @@ cpdef Column find_multiple(Column input, Column targets):
         Strings instance for this operation
     targets : Column
         Strings to search for in each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -30,17 +34,19 @@ cpdef Column find_multiple(Column input, Column targets):
         Lists column with character position values
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_find_multiple.find_multiple(
             input.view(),
-            targets.view()
+            targets.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Table contains_multiple(Column input, Column targets):
+cpdef Table contains_multiple(Column input, Column targets, Stream stream=None):
     """
     Returns a table of boolean values where each column indicates
     whether the corresponding target is found at that row.
@@ -53,6 +59,8 @@ cpdef Table contains_multiple(Column input, Column targets):
         Strings instance for this operation
     targets : Column
         Strings to search for in each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -60,11 +68,13 @@ cpdef Table contains_multiple(Column input, Column targets):
         Columns of booleans
     """
     cdef unique_ptr[table] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_find_multiple.contains_multiple(
             input.view(),
-            targets.view()
+            targets.view(),
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result))
+    return Table.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/strings/findall.pxd
index 3c35a9c9aa9..773d10584c9 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/strings/findall.pxd
@@ -1,8 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.strings.regex_program cimport RegexProgram
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column find_re(Column input, RegexProgram pattern)
-cpdef Column findall(Column input, RegexProgram pattern)
+cpdef Column find_re(Column input, RegexProgram pattern, Stream stream=*)
+cpdef Column findall(Column input, RegexProgram pattern, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyi b/python/pylibcudf/pylibcudf/strings/findall.pyi
index 77e38581d22..5879af6d171 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pyi
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyi
@@ -1,7 +1,13 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.strings.regex_program import RegexProgram
 
-def find_re(input: Column, pattern: RegexProgram) -> Column: ...
-def findall(input: Column, pattern: RegexProgram) -> Column: ...
+def find_re(
+    input: Column, pattern: RegexProgram, stream: Stream | None = None
+) -> Column: ...
+def findall(
+    input: Column, pattern: RegexProgram, stream: Stream | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx
index 23c84675a16..092d4482ad3 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pyx
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -6,10 +6,12 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport findall as cpp_findall
 from pylibcudf.strings.regex_program cimport RegexProgram
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["findall", "find_re"]
 
-cpdef Column findall(Column input, RegexProgram pattern):
+cpdef Column findall(Column input, RegexProgram pattern, Stream stream=None):
     """
     Returns a lists column of strings for each matching occurrence using
     the regex_program pattern within each string.
@@ -22,6 +24,8 @@ cpdef Column findall(Column input, RegexProgram pattern):
         Strings instance for this operation
     pattern : RegexProgram
         Regex pattern
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -29,17 +33,19 @@ cpdef Column findall(Column input, RegexProgram pattern):
         New lists column of strings
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_findall.findall(
             input.view(),
-            pattern.c_obj.get()[0]
+            pattern.c_obj.get()[0],
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
-cpdef Column find_re(Column input, RegexProgram pattern):
+cpdef Column find_re(Column input, RegexProgram pattern, Stream stream=None):
     """
     Returns character positions where the pattern first matches
     the elements in input strings.
@@ -52,6 +58,8 @@ cpdef Column find_re(Column input, RegexProgram pattern):
         Strings instance for this operation
     pattern : RegexProgram
         Regex pattern
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -59,11 +67,13 @@ cpdef Column find_re(Column input, RegexProgram pattern):
         New column of integers
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_findall.find_re(
             input.view(),
-            pattern.c_obj.get()[0]
+            pattern.c_obj.get()[0],
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/strings/padding.pxd
index 6ae04b9e222..e46e0b0c47b 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pxd
+++ b/python/pylibcudf/pylibcudf/strings/padding.pxd
@@ -4,10 +4,13 @@ from libcpp.string cimport string
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.strings.side_type cimport side_type
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column pad(Column input, size_type width, side_type side, str fill_char)
+cpdef Column pad(
+    Column input, size_type width, side_type side, str fill_char, Stream stream=*
+)
 
-cpdef Column zfill(Column input, size_type width)
+cpdef Column zfill(Column input, size_type width, Stream stream=*)
 
-cpdef Column zfill_by_widths(Column input, Column widths)
+cpdef Column zfill_by_widths(Column input, Column widths, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyi b/python/pylibcudf/pylibcudf/strings/padding.pyi
index 6f0b0bfb876..6f4ff8eba74 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pyi
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyi
@@ -1,10 +1,20 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.strings.side_type import SideType
 
 def pad(
-    input: Column, width: int, side: SideType, fill_char: str
+    input: Column,
+    width: int,
+    side: SideType,
+    fill_char: str,
+    stream: Stream | None = None,
+) -> Column: ...
+def zfill(
+    input: Column, width: int, stream: Stream | None = None
+) -> Column: ...
+def zfill_by_widths(
+    input: Column, widths: Column, stream: Stream | None = None
 ) -> Column: ...
-def zfill(input: Column, width: int) -> Column: ...
-def zfill_by_widths(input: Column, widths: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx
index 0496e154907..3ba648d83e9 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pyx
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyx
@@ -5,10 +5,14 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport padding as cpp_padding
 from pylibcudf.libcudf.strings.side_type cimport side_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["pad", "zfill", "zfill_by_widths"]
 
-cpdef Column pad(Column input, size_type width, side_type side, str fill_char):
+cpdef Column pad(
+    Column input, size_type width, side_type side, str fill_char, Stream stream=None
+):
     """
     Add padding to each string using a provided character.
 
@@ -24,6 +28,8 @@ cpdef Column pad(Column input, size_type width, side_type side, str fill_char):
         Where to place the padding characters.
     fill_char : str
         Single UTF-8 character to use for padding
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -32,6 +38,7 @@ cpdef Column pad(Column input, size_type width, side_type side, str fill_char):
     """
     cdef unique_ptr[column] c_result
     cdef string c_fill_char = fill_char.encode("utf-8")
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_padding.pad(
@@ -39,11 +46,12 @@ cpdef Column pad(Column input, size_type width, side_type side, str fill_char):
             width,
             side,
             c_fill_char,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column zfill(Column input, size_type width):
+cpdef Column zfill(Column input, size_type width, Stream stream=None):
     """
     Add '0' as padding to the left of each string.
 
@@ -55,6 +63,8 @@ cpdef Column zfill(Column input, size_type width):
         Strings instance for this operation
     width : int
         The minimum number of characters for each string.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -62,16 +72,18 @@ cpdef Column zfill(Column input, size_type width):
         New column of strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_padding.zfill(
             input.view(),
             width,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
-cpdef Column zfill_by_widths(Column input, Column widths):
+cpdef Column zfill_by_widths(Column input, Column widths, Stream stream=None):
     """
     Add '0' as padding to the left of each string.
 
@@ -83,6 +95,8 @@ cpdef Column zfill_by_widths(Column input, Column widths):
         Strings instance for this operation
     widths : Column
         The minimum number of characters for each string.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -90,11 +104,13 @@ cpdef Column zfill_by_widths(Column input, Column widths):
         New column of strings.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_padding.zfill_by_widths(
             input.view(),
             widths.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/strings/replace.pxd
index 26273b96c57..8e0d73b8151 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pxd
+++ b/python/pylibcudf/pylibcudf/strings/replace.pxd
@@ -1,25 +1,29 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column replace(
     Column input,
     Scalar target,
     Scalar repl,
-    size_type maxrepl = *
+    size_type maxrepl = *,
+    Stream stream = *
 )
 cpdef Column replace_multiple(
     Column input,
     Column target,
     Column repl,
-    size_type maxrepl = *
+    size_type maxrepl = *,
+    Stream stream = *
 )
 cpdef Column replace_slice(
     Column input,
     Scalar repl = *,
     size_type start = *,
-    size_type stop = *
+    size_type stop = *,
+    Stream stream = *
 )
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyi b/python/pylibcudf/pylibcudf/strings/replace.pyi
index 64df09ef7e8..319a0d065f8 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pyi
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyi
@@ -1,14 +1,28 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 def replace(
-    input: Column, target: Scalar, repl: Scalar, maxrepl: int = -1
+    input: Column,
+    target: Scalar,
+    repl: Scalar,
+    maxrepl: int = -1,
+    stream: Stream | None = None,
 ) -> Column: ...
 def replace_multiple(
-    input: Column, target: Column, repl: Column, maxrepl: int = -1
+    input: Column,
+    target: Column,
+    repl: Column,
+    maxrepl: int = -1,
+    stream: Stream | None = None,
 ) -> Column: ...
 def replace_slice(
-    input: Column, repl: Scalar | None = None, start: int = 0, stop: int = -1
+    input: Column,
+    repl: Scalar | None = None,
+    start: int = 0,
+    stop: int = -1,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx
index f02dd1c2cdb..3aacd49a4d7 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyx
@@ -24,7 +24,8 @@ cpdef Column replace(
     Column input,
     Scalar target,
     Scalar repl,
-    size_type maxrepl = -1
+    size_type maxrepl = -1,
+    Stream stream=None
 ):
     """Replaces target string within each string with the specified replacement string.
 
@@ -56,6 +57,7 @@ cpdef Column replace(
 
     target_str = <string_scalar *>(target.c_obj.get())
     repl_str = <string_scalar *>(repl.c_obj.get())
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_replace(
@@ -63,16 +65,18 @@ cpdef Column replace(
             target_str[0],
             repl_str[0],
             maxrepl,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column replace_multiple(
     Column input,
     Column target,
     Column repl,
-    size_type maxrepl = -1
+    size_type maxrepl = -1,
+    Stream stream=None
 ):
     """Replaces target string within each string with the specified replacement string.
 
@@ -99,15 +103,17 @@ cpdef Column replace_multiple(
         New string column with target replaced.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_replace_multiple(
             input.view(),
             target.view(),
             repl.view(),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column replace_slice(
@@ -116,7 +122,8 @@ cpdef Column replace_slice(
     # https://github.com/rapidsai/cudf/issues/15505
     Scalar repl = None,
     size_type start = 0,
-    size_type stop = -1
+    size_type stop = -1,
+    Stream stream=None
 ):
     """Replaces each string in the column with the provided repl string
     within the [start,stop) character position range.
@@ -146,10 +153,9 @@ cpdef Column replace_slice(
         New string column
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if repl is None:
-        stream = _get_stream(None)
         repl = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -161,7 +167,8 @@ cpdef Column replace_slice(
             input.view(),
             scalar_str[0],
             start,
-            stop
+            stop,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/strings/replace_re.pxd
index e27ccd55f7d..37f255caa00 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pxd
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pxd
@@ -1,10 +1,11 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_flags cimport regex_flags
 from pylibcudf.strings.regex_program cimport RegexProgram
+from rmm.pylibrmm.stream cimport Stream
 
 ctypedef fused Replacement:
     Column
@@ -20,11 +21,13 @@ cpdef Column replace_re(
     Patterns patterns,
     Replacement replacement=*,
     size_type max_replace_count=*,
-    regex_flags flags=*
+    regex_flags flags=*,
+    Stream stream=*
 )
 
 cpdef Column replace_with_backrefs(
     Column input,
     RegexProgram prog,
-    str replacement
+    str replacement,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyi b/python/pylibcudf/pylibcudf/strings/replace_re.pyi
index 056bafbf7ef..8b15959d62b 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pyi
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyi
@@ -1,7 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from typing import overload
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.strings.regex_flags import RegexFlags
@@ -13,6 +15,7 @@ def replace_re(
     pattern: RegexProgram,
     replacement: Scalar,
     max_replace_count: int = -1,
+    stream: Stream | None = None,
 ) -> Column: ...
 @overload
 def replace_re(
@@ -21,7 +24,11 @@ def replace_re(
     replacement: Column,
     max_replace_count: int = -1,
     flags: RegexFlags = RegexFlags.DEFAULT,
+    stream: Stream | None = None,
 ) -> Column: ...
 def replace_with_backrefs(
-    input: Column, prog: RegexProgram, replacement: str
+    input: Column,
+    prog: RegexProgram,
+    replacement: str,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
index fd4df37dbac..fa449b6eccf 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
@@ -26,6 +26,7 @@ cpdef Column replace_re(
     Replacement replacement=None,
     size_type max_replace_count=-1,
     regex_flags flags=regex_flags.DEFAULT,
+    Stream stream=None,
 ):
     """
     For each string, replaces any character sequence matching the given patterns
@@ -60,11 +61,10 @@ cpdef Column replace_re(
     """
     cdef unique_ptr[column] c_result
     cdef vector[string] c_patterns
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if Patterns is RegexProgram and Replacement is Scalar:
         if replacement is None:
-            stream = _get_stream(None)
             replacement = Scalar.from_libcudf(
                 cpp_make_string_scalar("".encode(), stream.view())
             )
@@ -74,11 +74,12 @@ cpdef Column replace_re(
                     input.view(),
                     patterns.c_obj.get()[0],
                     dereference(<string_scalar*>(replacement.get())),
-                    max_replace_count
+                    max_replace_count,
+                    stream.view()
                 )
             )
 
-        return Column.from_libcudf(move(c_result))
+        return Column.from_libcudf(move(c_result), stream)
     elif Patterns is list and Replacement is Column:
         c_patterns.reserve(len(patterns))
         for pattern in patterns:
@@ -91,10 +92,11 @@ cpdef Column replace_re(
                     c_patterns,
                     replacement.view(),
                     flags,
+                    stream.view()
                 )
             )
 
-        return Column.from_libcudf(move(c_result))
+        return Column.from_libcudf(move(c_result), stream)
     else:
         raise TypeError("Must pass either a RegexProgram and a Scalar or a list")
 
@@ -102,7 +104,8 @@ cpdef Column replace_re(
 cpdef Column replace_with_backrefs(
     Column input,
     RegexProgram prog,
-    str replacement
+    str replacement,
+    Stream stream=None
 ):
     """
     For each string, replaces any character sequence matching the given regex
@@ -127,6 +130,7 @@ cpdef Column replace_with_backrefs(
         New strings column.
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     cdef string c_replacement = replacement.encode()
 
     with nogil:
@@ -134,6 +138,7 @@ cpdef Column replace_with_backrefs(
             input.view(),
             prog.c_obj.get()[0],
             c_replacement,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyx b/python/pylibcudf/pylibcudf/strings/reverse.pyx
index 598f4fcfdb3..2a51d57e6d4 100644
--- a/python/pylibcudf/pylibcudf/strings/reverse.pyx
+++ b/python/pylibcudf/pylibcudf/strings/reverse.pyx
@@ -5,10 +5,12 @@ from libcpp.utility cimport move
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport reverse as cpp_reverse
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["reverse"]
 
-cpdef Column reverse(Column input):
+cpdef Column reverse(Column input, Stream stream=None):
     """Reverses the characters within each string.
 
     Any null string entries return corresponding null output column entries.
@@ -19,6 +21,8 @@ cpdef Column reverse(Column input):
     ----------
     input : Column
         Strings column for this operation
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -26,7 +30,8 @@ cpdef Column reverse(Column input):
         New strings column
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
     with nogil:
-        c_result = cpp_reverse.reverse(input.view())
+        c_result = cpp_reverse.reverse(input.view(), stream.view())
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/strings/strip.pxd
index 8bbe4753edd..bcc3622cb34 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pxd
+++ b/python/pylibcudf/pylibcudf/strings/strip.pxd
@@ -1,12 +1,14 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.side_type cimport side_type
+from rmm.pylibrmm.stream cimport Stream
 
 
 cpdef Column strip(
     Column input,
     side_type side=*,
-    Scalar to_strip=*
+    Scalar to_strip=*,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyi b/python/pylibcudf/pylibcudf/strings/strip.pyi
index 680355fc88f..98e7409fd22 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pyi
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyi
@@ -1,4 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
@@ -8,4 +10,5 @@ def strip(
     input: Column,
     side: SideType = SideType.BOTH,
     to_strip: Scalar | None = None,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx
index 054bed6cd3c..99e5eaf9099 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pyx
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyx
@@ -20,7 +20,8 @@ __all__ = ["strip"]
 cpdef Column strip(
     Column input,
     side_type side=side_type.BOTH,
-    Scalar to_strip=None
+    Scalar to_strip=None,
+    Stream stream=None
 ):
     """Removes the specified characters from the beginning
     or end (or both) of each string.
@@ -43,10 +44,9 @@ cpdef Column strip(
     pylibcudf.Column
         New strings column.
     """
-    cdef Stream stream
+    stream = _get_stream(stream)
 
     if to_strip is None:
-        stream = _get_stream(None)
         to_strip = Scalar.from_libcudf(
             cpp_make_string_scalar("".encode(), stream.view())
         )
@@ -59,7 +59,8 @@ cpdef Column strip(
         c_result = cpp_strip.strip(
             input.view(),
             side,
-            dereference(cpp_to_strip)
+            dereference(cpp_to_strip),
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/strings/translate.pxd
index 0ca746801d7..fddedfdb146 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pxd
+++ b/python/pylibcudf/pylibcudf/strings/translate.pxd
@@ -1,14 +1,16 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.strings.translate cimport filter_type
 from pylibcudf.scalar cimport Scalar
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column translate(Column input, dict chars_table)
+cpdef Column translate(Column input, dict chars_table, Stream stream=*)
 
 cpdef Column filter_characters(
     Column input,
     dict characters_to_filter,
     filter_type keep_characters,
-    Scalar replacement
+    Scalar replacement,
+    Stream stream=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi
index 7158b6eb05c..a8c76fe6296 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyi
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyi
@@ -1,7 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from collections.abc import Mapping
 from enum import IntEnum
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
@@ -10,11 +12,14 @@ class FilterType(IntEnum):
     REMOVE = ...
 
 def translate(
-    input: Column, chars_table: Mapping[int | str, int | str]
+    input: Column,
+    chars_table: Mapping[int | str, int | str],
+    stream: Stream | None = None,
 ) -> Column: ...
 def filter_characters(
     input: Column,
     characters_to_filter: Mapping[int | str, int | str],
     keep_characters: FilterType,
     replacement: Scalar,
+    stream: Stream | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx
index 9edb6a3a76f..3f6f0d9d2bd 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyx
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyx
@@ -9,6 +9,8 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.strings cimport translate as cpp_translate
 from pylibcudf.libcudf.types cimport char_utf8
 from pylibcudf.scalar cimport Scalar
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 from cython.operator import dereference
 from pylibcudf.libcudf.strings.translate import \
@@ -38,7 +40,7 @@ cdef vector[pair[char_utf8, char_utf8]] _table_to_c_table(dict table):
     return c_table
 
 
-cpdef Column translate(Column input, dict chars_table):
+cpdef Column translate(Column input, dict chars_table, Stream stream=None):
     """
     Translates individual characters within each string.
 
@@ -51,6 +53,8 @@ cpdef Column translate(Column input, dict chars_table):
 
     chars_table : dict
         Table of UTF-8 character mappings
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -61,20 +65,23 @@ cpdef Column translate(Column input, dict chars_table):
     cdef vector[pair[char_utf8, char_utf8]] c_chars_table = _table_to_c_table(
         chars_table
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_translate.translate(
             input.view(),
-            c_chars_table
+            c_chars_table,
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 
 cpdef Column filter_characters(
     Column input,
     dict characters_to_filter,
     filter_type keep_characters,
-    Scalar replacement
+    Scalar replacement,
+    Stream stream=None
 ):
     """
     Removes ranges of characters from each string in a strings column.
@@ -95,6 +102,8 @@ cpdef Column filter_characters(
 
     replacement : Scalar
         Replacement string for each character removed.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -108,6 +117,7 @@ cpdef Column filter_characters(
     cdef const string_scalar* c_replacement = <const string_scalar*>(
         replacement.c_obj.get()
     )
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_translate.filter_characters(
@@ -115,7 +125,8 @@ cpdef Column filter_characters(
             c_characters_to_filter,
             keep_characters,
             dereference(c_replacement),
+            stream.view()
         )
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)
 
 FilterType.__str__ = FilterType.__repr__
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/strings/wrap.pxd
index fcc86650acf..2d878019296 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pxd
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pxd
@@ -1,7 +1,8 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.types cimport size_type
+from rmm.pylibrmm.stream cimport Stream
 
 
-cpdef Column wrap(Column input, size_type width)
+cpdef Column wrap(Column input, size_type width, Stream stream=*)
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyi b/python/pylibcudf/pylibcudf/strings/wrap.pyi
index 5658f279197..b01fae624b7 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pyi
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyi
@@ -1,5 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
 
-def wrap(input: Column, width: int) -> Column: ...
+def wrap(
+    input: Column, width: int, stream: Stream | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx
index b696eb48e47..1190ced9e4a 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pyx
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -6,10 +6,12 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport wrap as cpp_wrap
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.utils cimport _get_stream
+from rmm.pylibrmm.stream cimport Stream
 
 __all__ = ["wrap"]
 
-cpdef Column wrap(Column input, size_type width):
+cpdef Column wrap(Column input, size_type width, Stream stream=None):
     """
     Wraps strings onto multiple lines shorter than `width` by
     replacing appropriate white space with
@@ -24,6 +26,8 @@ cpdef Column wrap(Column input, size_type width):
 
     width : int
         Maximum character width of a line within each string
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -31,11 +35,13 @@ cpdef Column wrap(Column input, size_type width):
         Column of wrapped strings
     """
     cdef unique_ptr[column] c_result
+    stream = _get_stream(stream)
 
     with nogil:
         c_result = cpp_wrap.wrap(
             input.view(),
             width,
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result))
+    return Column.from_libcudf(move(c_result), stream)

From 1eb2565220a2de9186b6d5dbd3271bc76c37a565 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 8 Sep 2025 17:36:04 -0400
Subject: [PATCH 282/366] Support `rank(...).over(...)` expressions in
 cudf-polars (#19803)

Contributes to  #19200. Unblocks queries 36, 67, 70, and 86.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19803
---
 .../cudf_polars/dsl/expressions/base.py       |   4 +
 .../cudf_polars/dsl/expressions/rolling.py    | 130 +++++++++++++++++-
 .../cudf_polars/cudf_polars/dsl/translate.py  |   3 +-
 .../cudf_polars/dsl/utils/aggregations.py     |  19 ++-
 .../expressions/test_numeric_unaryops.py      |   4 +-
 .../tests/expressions/test_rolling.py         |  54 +++++++-
 .../tests/test_window_functions.py            |   8 +-
 7 files changed, 204 insertions(+), 18 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/base.py b/python/cudf_polars/cudf_polars/dsl/expressions/base.py
index 30f1347dbd9..1b20ae224ee 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/base.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/base.py
@@ -31,6 +31,10 @@ class ExecutionContext(IntEnum):
     FRAME = enum.auto()
     GROUPBY = enum.auto()
     ROLLING = enum.auto()
+    # Follows GROUPBY semantics but useful
+    # to differentiate from GROUPBY so we can
+    # implement agg/per-row ops independently
+    WINDOW = enum.auto()
 
 
 class Expr(Node["Expr"]):
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
index c01043d2d18..cc22bc1e752 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
@@ -187,7 +187,13 @@ def __init__(
         unsupported = [
             type(named_expr.value).__name__
             for named_expr in self.named_aggs
-            if not isinstance(named_expr.value, (expr.Len, expr.Agg))
+            if not (
+                isinstance(named_expr.value, (expr.Len, expr.Agg))
+                or (
+                    isinstance(named_expr.value, expr.UnaryFunction)
+                    and named_expr.value.name == "rank"
+                )
+            )
         ]
         if unsupported:
             kinds = ", ".join(sorted(set(unsupported)))
@@ -210,10 +216,101 @@ def __init__(
             for ne in self.named_aggs
             for v in (ne.value,)
             if isinstance(v, expr.Agg)
+            or (isinstance(v, expr.UnaryFunction) and v.name == "rank")
         ]
         self.by_count = len(by_expr)
         self.children = tuple(by_expr) + tuple(child_deps)
 
+    def _rank_group_by_scan(
+        self,
+        df: DataFrame,
+        grouper: plc.groupby.GroupBy,
+        rank_named: list[expr.NamedExpr],
+    ) -> tuple[list[str], list[DataType], list[plc.Table]]:
+        rank_requests: list[plc.groupby.GroupByRequest] = []
+        rank_out_names: list[str] = []
+        rank_out_dtypes: list[DataType] = []
+
+        for ne in rank_named:
+            rank_expr = ne.value
+            (child_expr,) = rank_expr.children
+            val_col = child_expr.evaluate(df, context=ExecutionContext.FRAME).obj
+            assert isinstance(rank_expr, expr.UnaryFunction)
+            method_str, descending, _ = rank_expr.options
+
+            rank_method = {
+                "average": plc.aggregation.RankMethod.AVERAGE,
+                "min": plc.aggregation.RankMethod.MIN,
+                "max": plc.aggregation.RankMethod.MAX,
+                "dense": plc.aggregation.RankMethod.DENSE,
+                "ordinal": plc.aggregation.RankMethod.FIRST,
+            }[method_str]
+
+            order = (
+                plc.types.Order.DESCENDING if descending else plc.types.Order.ASCENDING
+            )
+            # Polars semantics: exclude nulls from domain; nulls get null ranks.
+            null_precedence = (
+                plc.types.NullOrder.BEFORE if descending else plc.types.NullOrder.AFTER
+            )
+            agg = plc.aggregation.rank(
+                rank_method,
+                column_order=order,
+                null_handling=plc.types.NullPolicy.EXCLUDE,
+                null_precedence=null_precedence,
+                percentage=plc.aggregation.RankPercentage.NONE,
+            )
+
+            rank_requests.append(plc.groupby.GroupByRequest(val_col, [agg]))
+            rank_out_names.append(ne.name)
+            rank_out_dtypes.append(rank_expr.dtype)
+
+        _, rank_tables = grouper.scan(rank_requests)
+        return rank_out_names, rank_out_dtypes, rank_tables
+
+    def _reorder_grouped_to_input(
+        self,
+        by_cols: list[Column],
+        n_rows: int,
+        rank_tables: list[plc.Table],
+        rank_out_names: list[str],
+        rank_out_dtypes: list[DataType],
+    ) -> list[Column]:
+        # Reorder scan results from grouped-order back to input row order
+        zero = plc.Scalar.from_py(0, plc.types.SIZE_TYPE)
+        one = plc.Scalar.from_py(1, plc.types.SIZE_TYPE)
+        row_id = plc.filling.sequence(n_rows, zero, one)
+
+        key_orders = [k.order for k in by_cols]
+        key_nulls = [k.null_order for k in by_cols]
+        grouped_order = plc.sorting.stable_sorted_order(
+            plc.Table([*(c.obj for c in by_cols), row_id]),
+            [*key_orders, plc.types.Order.ASCENDING],
+            [*key_nulls, plc.types.NullOrder.AFTER],
+        )
+
+        return [
+            Column(
+                plc.copying.scatter(
+                    plc.Table([tbl.columns()[0]]),
+                    grouped_order,
+                    plc.Table(
+                        [
+                            plc.Column.from_scalar(
+                                plc.Scalar.from_py(None, tbl.columns()[0].type()),
+                                n_rows,
+                            )
+                        ]
+                    ),
+                ).columns()[0],
+                name=name,
+                dtype=dtype,
+            )
+            for name, dtype, tbl in zip(
+                rank_out_names, rank_out_dtypes, rank_tables, strict=True
+            )
+        ]
+
     def do_evaluate(  # noqa: D102
         self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
@@ -243,10 +340,21 @@ def do_evaluate(  # noqa: D102
             null_precedence=[k.null_order for k in by_cols],
         )
 
+        # Split up expressions into scalar aggs (eg. Len) vs per-row (eg. rank)
+        scalar_named: list[expr.NamedExpr] = []
+        rank_named: list[expr.NamedExpr] = []
+        for ne in self.named_aggs:
+            v = ne.value
+            if isinstance(v, expr.UnaryFunction) and v.name == "rank":
+                rank_named.append(ne)
+            else:
+                scalar_named.append(ne)
+
+        # Build GroupByRequests for scalar aggregations
         gb_requests: list[plc.groupby.GroupByRequest] = []
         out_names: list[str] = []
         out_dtypes: list[DataType] = []
-        for ne in self.named_aggs:
+        for ne in scalar_named:
             val = ne.value
             out_names.append(ne.name)
             out_dtypes.append(val.dtype)
@@ -274,7 +382,7 @@ def do_evaluate(  # noqa: D102
 
         # Scatter the right order indices into an all-null table
         # and at the position of the index in left order. Now we
-        # the map between rows an groups with the correct ordering
+        # have the map between rows and groups with the correct ordering.
         left_rows = left_order.size()
         target = plc.Column.from_scalar(
             plc.Scalar.from_py(None, plc.types.SIZE_TYPE), left_rows
@@ -285,8 +393,8 @@ def do_evaluate(  # noqa: D102
             plc.Table([target]),
         ).columns()[0]
 
-        # Broadcast each aggregated result back to row-shape using
-        # the aligned mapping between rows indices and group indices
+        # Broadcast each scalar aggregated result back to row-shape using
+        # the aligned mapping between row indices and group indices.
         broadcasted_cols = [
             Column(
                 plc.copying.gather(
@@ -296,10 +404,20 @@ def do_evaluate(  # noqa: D102
                 dtype=dtype,
             )
             for named_expr, dtype, col in zip(
-                self.named_aggs, out_dtypes, out_cols, strict=True
+                scalar_named, out_dtypes, out_cols, strict=True
             )
         ]
 
+        if rank_named:
+            rank_out_names, rank_out_dtypes, rank_tables = self._rank_group_by_scan(
+                df, grouper, rank_named
+            )
+            broadcasted_cols.extend(
+                self._reorder_grouped_to_input(
+                    by_cols, df.num_rows, rank_tables, rank_out_names, rank_out_dtypes
+                )
+            )
+
         # Create a temporary DataFrame with the broadcasted columns named by their
         # placeholder names from agg decomposition, then evaluate the post-expression.
         df = DataFrame(broadcasted_cols)
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 6cadbcc4927..f3aee59bf90 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -760,8 +760,7 @@ def _(
             expr.NamedExpr(next(name_gen), agg),
             name_gen,
             is_top=True,
-            # Follows GROUPBY semantics
-            context=ExecutionContext.GROUPBY,
+            context=ExecutionContext.WINDOW,
         )
 
         mapping = node.options.kind
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 5ddf060c41c..5b2a348ac9f 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -89,10 +89,16 @@ def decompose_single_agg(
     """
     agg = named_expr.value
     name = named_expr.name
-    if isinstance(agg, expr.UnaryFunction) and agg.name in {"rank"}:
-        name = agg.name
-        raise NotImplementedError(
-            f"UnaryFunction {name=} not supported in groupby context"
+    if isinstance(agg, expr.UnaryFunction) and agg.name == "rank":
+        if context != ExecutionContext.WINDOW:
+            raise NotImplementedError(
+                "rank is not supported in groupby or rolling context"
+            )
+        # Ensure Polars semantics for dtype:
+        # - average -> Float64
+        # - min/max/dense/ordinal -> IDX_DTYPE (UInt32/UInt64)
+        return [(named_expr, True)], named_expr.reconstruct(
+            expr.Cast(agg.dtype, expr.Col(agg.dtype, name))
         )
     if isinstance(agg, expr.UnaryFunction) and agg.name == "null_count":
         (child,) = agg.children
@@ -223,7 +229,10 @@ def decompose_single_agg(
             # - ROLLING: sum(all-null window) => null; sum(empty window) => 0 (fill only if empty)
             #
             # Must post-process because libcudf returns null for both empty and all-null windows/groups
-            if not POLARS_VERSION_LT_1323 or context == ExecutionContext.GROUPBY:
+            if not POLARS_VERSION_LT_1323 or context in {
+                ExecutionContext.GROUPBY,
+                ExecutionContext.WINDOW,
+            }:
                 # GROUPBY: always fill top-level nulls with 0
                 return [(named_expr, True)], expr.NamedExpr(
                     name, replace_nulls(col, 0, is_top=is_top)
diff --git a/python/cudf_polars/tests/expressions/test_numeric_unaryops.py b/python/cudf_polars/tests/expressions/test_numeric_unaryops.py
index 8df62b18526..16a2600b7a9 100644
--- a/python/cudf_polars/tests/expressions/test_numeric_unaryops.py
+++ b/python/cudf_polars/tests/expressions/test_numeric_unaryops.py
@@ -122,7 +122,7 @@ def test_null_count():
 @pytest.mark.parametrize("descending", [False, True])
 def test_rank_supported(request, ldf: pl.LazyFrame, method: str, *, descending: bool):
     request.applymarker(
-        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="nested loop join")
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
     )
     expr = pl.col("a").rank(method=method, descending=descending)
     q = ldf.select(expr)
@@ -136,7 +136,7 @@ def test_rank_methods_with_nulls_or_ties(
     request, ldf: pl.LazyFrame, method: str, *, descending: bool, test: str
 ) -> None:
     request.applymarker(
-        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="nested loop join")
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
     )
 
     base = pl.col("a")
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
index 2ec3abefc59..69d48affee0 100644
--- a/python/cudf_polars/tests/expressions/test_rolling.py
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -11,7 +11,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
-from cudf_polars.utils.versions import POLARS_VERSION_LT_130
+from cudf_polars.utils.versions import POLARS_VERSION_LT_130, POLARS_VERSION_LT_132
 
 
 @pytest.fixture
@@ -243,3 +243,55 @@ def test_over_broadcast_input_row_group_indices_aligned():
     q = df.select(pl.col("x").sum().over("g"))
 
     assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
+@pytest.mark.parametrize("descending", [False, True])
+def test_rank_over(request, df: pl.LazyFrame, method: str, *, descending: bool) -> None:
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
+    )
+    expr = pl.col("x").rank(method=method, descending=descending).over("g")
+    q = df.select(expr)
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
+@pytest.mark.parametrize("descending", [False, True])
+def test_rank_over_with_ties(
+    request, df: pl.LazyFrame, method: str, *, descending: bool
+) -> None:
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
+    )
+    v = pl.when(pl.col("g") == 2).then(pl.lit(4)).otherwise(pl.col("x"))
+    expr = v.rank(method=method, descending=descending).over("g")
+    q = df.select(expr)
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
+@pytest.mark.parametrize("descending", [False, True])
+def test_rank_over_with_null_values(
+    request, df: pl.LazyFrame, method: str, *, descending: bool
+) -> None:
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
+    )
+    x_null = pl.when((pl.col("x") % 2) == 0).then(None).otherwise(pl.col("x"))
+    expr = x_null.rank(method=method, descending=descending).over("g")
+    q = df.select(expr)
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
+@pytest.mark.parametrize("descending", [False, True])
+def test_rank_over_with_null_group_keys(
+    request, df: pl.LazyFrame, method: str, *, descending: bool
+) -> None:
+    request.applymarker(
+        pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
+    )
+    expr = pl.col("x").rank(method=method, descending=descending).over("g_null")
+    q = df.select(expr)
+    assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_window_functions.py b/python/cudf_polars/tests/test_window_functions.py
index e1ad4719402..9b77e7cb153 100644
--- a/python/cudf_polars/tests/test_window_functions.py
+++ b/python/cudf_polars/tests/test_window_functions.py
@@ -12,6 +12,7 @@
     assert_gpu_result_equal,
     assert_ir_translation_raises,
 )
+from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 
 
 @pytest.fixture
@@ -105,10 +106,13 @@ def test_over_mapping_strategy(df: pl.LazyFrame, mapping_strategy: str):
     """Test window functions with different mapping strategies."""
     # ignore is for polars' WindowMappingStrategy, which isn't publicly exported.
     # https://github.com/pola-rs/polars/issues/17420
-    query = df.with_columns(
+    q = df.with_columns(
         [pl.col("b").rank().over(pl.col("a"), mapping_strategy=mapping_strategy)]  # type: ignore[arg-type]
     )
-    assert_ir_translation_raises(query, NotImplementedError)
+    if not POLARS_VERSION_LT_132 and mapping_strategy == "group_to_rows":
+        assert_gpu_result_equal(q)
+    else:
+        assert_ir_translation_raises(q, NotImplementedError)
 
 
 @pytest.mark.parametrize("period", ["2d", "3d"])

From ebca3e848ea3479a1b3af721995a08effa63eb6f Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 8 Sep 2025 18:13:16 -0500
Subject: [PATCH 283/366] Remove `diff.sh` and merge diff generation into
 `run.sh` (#19871)

This PR removes a separate job for diff generation and display and instead calculates the same in the existing `pandas-test` job.
Fixes: #19204
Fixes: #19521

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19871
---
 .github/workflows/pandas-tests.yaml         |  4 +--
 .github/workflows/pr.yaml                   | 13 +-------
 ci/cudf_pandas_scripts/pandas-tests/diff.sh | 37 ---------------------
 ci/cudf_pandas_scripts/pandas-tests/run.sh  | 36 ++++++++++++++++----
 ci/release/update-version.sh                |  1 +
 5 files changed, 33 insertions(+), 58 deletions(-)
 delete mode 100755 ci/cudf_pandas_scripts/pandas-tests/diff.sh

diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index 2639e6d6efb..53004b6f0ce 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -29,7 +29,7 @@ jobs:
         date: ${{ inputs.date }}
         sha: ${{ inputs.sha }}
         node_type: "gpu-l4-latest-1"
-        container_image: "rapidsai/citestwheel:25.10-cuda13.0.0-ubuntu24.04-py3.13"
+        container_image: "rapidsai/citestwheel:25.10-latest"
         script: ci/cudf_pandas_scripts/pandas-tests/run.sh main
-        file_to_upload: ./artifacts/main-results.json
+        file_to_upload: ./main-results.json
         artifact-name: main-results.json
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index b6e4828216f..d714bdf7c2e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -38,7 +38,6 @@ jobs:
       - devcontainer
       - unit-tests-cudf-pandas
       - pandas-tests
-      - pandas-tests-diff
       - narwhals-tests
       - telemetry-setup
       - third-party-integration-tests-cudf-pandas
@@ -368,18 +367,8 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       node_type: "gpu-l4-latest-1"
-      container_image: "rapidsai/citestwheel:25.10-cuda13.0.0-ubuntu24.04-py3.13"
+      container_image: "rapidsai/citestwheel:25.10-latest"
       script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
-      file_to_upload: ./artifacts/pr-results.json
-      artifact-name: pr-results.json
-  pandas-tests-diff:
-    # diff the results of running the Pandas unit tests and publish a job summary
-    needs: pandas-tests
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
-    with:
-      node_type: "cpu4"
-      build_type: pull-request
-      script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
   narwhals-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh
deleted file mode 100755
index e30236fdbf1..00000000000
--- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env bash
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
-# All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# Download the summarized results of running the Pandas tests on both the main
-# branch and the PR branch:
-
-# Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir
-GH_JOB_NAME="pandas-tests-diff / build"
-RAPIDS_FULL_VERSION=$(<./VERSION)
-rapids-logger "Github job name: ${GH_JOB_NAME}"
-rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}"
-
-PY_VER="313"
-
-rapids-logger "Fetching latest available results from nightly"
-aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/cudf/" --query "sort_by(Contents[?ends_with(Key, '_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json')], &LastModified)[::].[Key]" --output text  | tee s3_output.txt
-COMPARE_ENV=$(tail -n 1 s3_output.txt)
-rapids-logger "Latest available results from nightly: ${COMPARE_ENV}"
-
-aws s3 cp "s3://rapids-downloads/${COMPARE_ENV}" main-results.json
-# TODO: To be enabled in a follow-up PR.
-# MAIN_RUN_ID=$(gh run list -w "Pandas Test Job" -b branch-25.10 --status success --limit 7 --json databaseId --jq ".[0].databaseId")
-gh run download $GITHUB_RUN_ID -n pr-results.json
-# Compute the diff and prepare job summary:
-python -m pip install pandas tabulate
-python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json "${RAPIDS_FULL_VERSION}" | tee summary.txt >> "$GITHUB_STEP_SUMMARY"
-
-COMMENT=$(head -1 summary.txt | grep -oP '\d+/\d+ \(\d+\.\d+%\).*?(a decrease by|an increase by) \d+\.\d+%')
-echo "$COMMENT"
-jq --arg COMMENT "$COMMENT" --arg GH_JOB_NAME "$GH_JOB_NAME" -n \
-  '{"context": "Pandas tests",
-    "description": $COMMENT,
-    "state":"success",
-    "job_name": $GH_JOB_NAME}' \
-    > gh-status.json
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
index 715f7c71649..53ddd00a55b 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/run.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -47,13 +47,35 @@ timeout 90m bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   --dist worksteal \
   --report-log="${PANDAS_TESTS_BRANCH}.json" 2>&1
 
-SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-${RAPIDS_FULL_VERSION}-results.json
+SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-results.json
 # summarize the results and save them to artifacts:
-python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/"${PANDAS_TESTS_BRANCH}.json" > "pandas-testing/${SUMMARY_FILE_NAME}"
-RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
-mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
-mv pandas-testing/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"/
-rapids-upload-to-s3 "${RAPIDS_ARTIFACTS_DIR}"/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"
-mv "${RAPIDS_ARTIFACTS_DIR}"/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"/${PANDAS_TESTS_BRANCH}-results.json
+python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/"${PANDAS_TESTS_BRANCH}.json" > "./${SUMMARY_FILE_NAME}"
+
+# Exit early if running tests for main branch
+if [[ "${PANDAS_TESTS_BRANCH}" == "main" ]]; then
+    rapids-logger "Exiting early for main branch testing: ${EXITCODE}"
+    exit ${EXITCODE}
+fi
+
+
+MAIN_RUN_ID=$(
+    gh run list                       \
+        -w "Pandas Test Job"          \
+        -b branch-25.10               \
+        --repo 'rapidsai/cudf'        \
+        --status success              \
+        --limit 7                     \
+        --json 'createdAt,databaseId' \
+        --jq 'sort_by(.createdAt) | reverse | .[0] | .databaseId'
+)
+rapids-logger "Fetching latest available results from nightly: ${MAIN_RUN_ID}"
+gh run download                  \
+    --repo 'rapidsai/cudf'        \
+    --name main-results.json \
+    $MAIN_RUN_ID
+
+# Compute the diff and prepare job summary:
+python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json "${RAPIDS_FULL_VERSION}" >> "$GITHUB_STEP_SUMMARY"
+
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 0e3a2d471f5..9dfbf259d2c 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -87,6 +87,7 @@ for FILE in .github/workflows/*.yaml .github/workflows/*.yml; do
 done
 sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh
 sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_cudf_polars_polars_tests.sh
+sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/cudf_pandas_scripts/pandas-tests/run.sh
 
 # Java files
 NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"

From b851bc3e318cf6e6d6a4ab91502b27b90d771755 Mon Sep 17 00:00:00 2001
From: Tianyu Liu <kingcrimsontianyu@gmail.com>
Date: Mon, 8 Sep 2025 23:19:25 -0400
Subject: [PATCH 284/366] Use KvikIO's unified interface to create remote I/O
 endpoints (#19788)

With https://github.com/rapidsai/kvikio/pull/793, KvikIO can infer the endpoint type from the URL, supporting creation of a broader range of remote resources via a single interface. This PR updates the cuDF data source accordingly. Specifically, `pylibcudf` now can read from WebHDFS, S3, S3 presigned URL resources.

Partially addresses https://github.com/rapidsai/cudf/issues/19633

Authors:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19788
---
 cpp/src/io/utilities/datasource.cpp     | 45 ++++++++++++++++---------
 python/pylibcudf/pylibcudf/io/types.pyx |  2 +-
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index 1eeadb68c51..67b0d23e29f 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -368,27 +368,29 @@ class user_datasource_wrapper : public datasource {
  * @brief Remote file source backed by KvikIO, which handles S3 filepaths seamlessly.
  */
 class remote_file_source : public kvikio_source<kvikio::RemoteHandle> {
-  static auto create_s3_handle(char const* filepath)
+ public:
+  explicit remote_file_source(char const* filepath)
+    : kvikio_source{kvikio::RemoteHandle::open(filepath)}
   {
-    return kvikio::RemoteHandle{
-      std::make_unique<kvikio::S3Endpoint>(kvikio::S3Endpoint::parse_s3_url(filepath))};
   }
 
- public:
-  explicit remote_file_source(char const* filepath) : kvikio_source{create_s3_handle(filepath)} {}
-
   ~remote_file_source() override = default;
 
   /**
-   * @brief Is `url` referring to a remote file supported by KvikIO?
+   * @brief Checks if a path has a URL scheme format that could indicate a remote resource
    *
-   * For now, only S3 urls (urls starting with "s3://") are supported.
+   * @note Strictly speaking, there is no definitive way to tell if a given file path refers to a
+   * remote or local file. For instance, it is legal to have a local directory named `s3:` and its
+   * file accessed by `s3://<sub-dir>/<file-name>` (the double slash is collapsed into a single
+   * slash), coincidentally taking on the remote S3 format. Here we ignore this special case and use
+   * a more practical approach: a file path is considered remote simply if it has a RFC
+   * 3986-conformant URL scheme.
    */
-  static bool is_supported_remote_url(std::string const& url)
+  static bool could_be_remote_url(std::string const& filepath)
   {
-    // Regular expression to match "s3://"
-    static std::regex const pattern{R"(^s3://)", std::regex_constants::icase};
-    return std::regex_search(url, pattern);
+    // Regular expression to match the URL scheme conforming to RFC 3986
+    static std::regex const pattern{R"(^[a-zA-Z][a-zA-Z0-9+.-]*://)", std::regex_constants::icase};
+    return std::regex_search(filepath, pattern);
   }
 };
 #else
@@ -398,7 +400,7 @@ class remote_file_source : public kvikio_source<kvikio::RemoteHandle> {
 class remote_file_source : public file_source {
  public:
   explicit remote_file_source(char const* filepath) : file_source(filepath) {}
-  static constexpr bool is_supported_remote_url(std::string const&) { return false; }
+  static constexpr bool could_be_remote_url(std::string const&) { return false; }
 };
 #endif
 }  // namespace
@@ -415,8 +417,21 @@ std::unique_ptr<datasource> datasource::create(std::string const& filepath,
 
     CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy);
   }();
-  if (remote_file_source::is_supported_remote_url(filepath)) {
-    return std::make_unique<remote_file_source>(filepath.c_str());
+
+  if (remote_file_source::could_be_remote_url(filepath)) {
+    try {
+      return std::make_unique<remote_file_source>(filepath.c_str());
+    } catch (std::exception const& ex) {
+      std::string redacted_msg;
+      try {
+        // For security reasons, redact the file path if any from KvikIO's exception message
+        redacted_msg =
+          std::regex_replace(ex.what(), std::regex{filepath}, "<redacted-remote-file-path>");
+      } catch (std::exception const& ex) {
+        redacted_msg = " unknown due to additional process error";
+      }
+      CUDF_FAIL("Error accessing the remote file. Reason: " + redacted_msg, std::runtime_error);
+    }
   } else if (use_memory_mapping) {
     return std::make_unique<memory_mapped_source>(filepath.c_str(), offset, max_size_estimate);
   } else {
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 844f21e56ba..f37982a47b4 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -468,7 +468,7 @@ cdef class SourceInfo:
         different types of sources will raise a `ValueError`.
     """
     # Regular expression that match remote file paths supported by libcudf
-    _is_remote_file_pattern = re.compile(r"^s3://", re.IGNORECASE)
+    _is_remote_file_pattern = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", re.IGNORECASE)
 
     def __init__(self, list sources):
         if not sources:

From 464933cb300dedda422f0d8d4ba270762fee6da9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 8 Sep 2025 22:01:44 -0700
Subject: [PATCH 285/366] Add all missing stream parameters (#19922)

This PR fixes a few oversights in my previous PRs for adding streams support to pylibcudf. With this merged I think we should have stream support everywhere in pylibcudf.

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19922
---
 python/pylibcudf/pylibcudf/column.pxd         |  2 +-
 python/pylibcudf/pylibcudf/column.pyi         | 17 ++--
 python/pylibcudf/pylibcudf/column.pyx         |  7 +-
 .../pylibcudf/pylibcudf/contiguous_split.pxd  |  2 +-
 .../pylibcudf/pylibcudf/contiguous_split.pyi  |  6 +-
 .../pylibcudf/pylibcudf/contiguous_split.pyx  | 43 ++++++----
 python/pylibcudf/pylibcudf/copying.pxd        |  2 +-
 python/pylibcudf/pylibcudf/copying.pyx        |  9 +-
 python/pylibcudf/pylibcudf/datetime.pyx       | 20 +++++
 python/pylibcudf/pylibcudf/filling.pyx        | 10 +++
 python/pylibcudf/pylibcudf/groupby.pxd        | 30 +++++--
 python/pylibcudf/pylibcudf/groupby.pyi        | 21 +++--
 python/pylibcudf/pylibcudf/groupby.pyx        | 85 +++++++++++++------
 python/pylibcudf/pylibcudf/hashing.pyx        | 12 +++
 python/pylibcudf/pylibcudf/io/json.pyx        | 10 +--
 python/pylibcudf/pylibcudf/io/text.pyi        | 20 +++++
 python/pylibcudf/pylibcudf/join.pyx           |  2 +
 .../pylibcudf/libcudf/contiguous_split.pxd    |  5 ++
 .../pylibcudf/pylibcudf/libcudf/groupby.pxd   | 19 +++--
 python/pylibcudf/pylibcudf/lists.pyx          | 12 +++
 .../strings/convert/convert_lists.pyx         |  9 +-
 .../pylibcudf/strings/split/partition.pyx     | 18 ++--
 python/pylibcudf/pylibcudf/transform.pyx      | 14 +++
 python/pylibcudf/pylibcudf/unary.pyx          | 12 +++
 24 files changed, 292 insertions(+), 95 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/io/text.pyi

diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd
index 51fa36f8913..f04d5e179ee 100644
--- a/python/pylibcudf/pylibcudf/column.pxd
+++ b/python/pylibcudf/pylibcudf/column.pxd
@@ -82,7 +82,7 @@ cdef class Column:
     cpdef gpumemoryview data(self)
     cpdef gpumemoryview null_mask(self)
     cpdef list children(self)
-    cpdef Column copy(self)
+    cpdef Column copy(self, Stream stream=*)
     cpdef uint64_t device_buffer_size(self)
     cpdef Column with_mask(self, gpumemoryview, size_type)
 
diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi
index 8794484345e..42e2c7cccd1 100644
--- a/python/pylibcudf/pylibcudf/column.pyi
+++ b/python/pylibcudf/pylibcudf/column.pyi
@@ -4,6 +4,7 @@ from collections.abc import Sequence
 from typing import Any, Protocol, TypedDict
 
 from rmm.pylibrmm.device_buffer import DeviceBuffer
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf._interop_helpers import ArrowLike
 from pylibcudf.gpumemoryview import gpumemoryview
@@ -50,17 +51,21 @@ class Column:
     def data(self) -> gpumemoryview | None: ...
     def null_mask(self) -> gpumemoryview | None: ...
     def children(self) -> list[Column]: ...
-    def copy(self) -> Column: ...
+    def copy(self, stream: Stream | None = None) -> Column: ...
     def device_buffer_size(self) -> int: ...
     def with_mask(
         self, mask: gpumemoryview | None, null_count: int
     ) -> Column: ...
     def list_view(self) -> ListColumnView: ...
     @staticmethod
-    def from_scalar(scalar: Scalar, size: int) -> Column: ...
-    def to_scalar(self) -> Column: ...
+    def from_scalar(
+        scalar: Scalar, size: int, stream: Stream | None = None
+    ) -> Column: ...
+    def to_scalar(self, stream: Stream | None = None) -> Scalar: ...
     @staticmethod
-    def all_null_like(like: Column, size: int) -> Column: ...
+    def all_null_like(
+        like: Column, size: int, stream: Stream | None = None
+    ) -> Column: ...
     @staticmethod
     def from_rmm_buffer(
         buff: DeviceBuffer, dtype: DataType, size: int, children: list[Column]
@@ -74,7 +79,9 @@ class Column:
         cls, obj: SupportsCudaArrayInterface
     ) -> Column: ...
     @classmethod
-    def from_array_interface(cls, obj: SupportsArrayInterface) -> Column: ...
+    def from_array_interface(
+        cls, obj: SupportsArrayInterface, stream: Stream | None = None
+    ) -> Column: ...
     @staticmethod
     def from_array(
         cls, obj: SupportsCudaArrayInterface | SupportsArrayInterface
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index cc0ea75c158..40fd1500c9d 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -1200,12 +1200,13 @@ cdef class Column:
         """The children of the column."""
         return self._children
 
-    cpdef Column copy(self):
+    cpdef Column copy(self, Stream stream=None):
         """Create a copy of the column."""
         cdef unique_ptr[column] c_result
+        stream = _get_stream(stream)
         with nogil:
-            c_result = make_unique[column](self.view())
-        return Column.from_libcudf(move(c_result))
+            c_result = make_unique[column](self.view(), stream.view())
+        return Column.from_libcudf(move(c_result), stream)
 
     cpdef uint64_t device_buffer_size(self):
         """
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/contiguous_split.pxd
index a58db4ed03a..ce1316196f2 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pxd
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pxd
@@ -30,7 +30,7 @@ cdef class PackedColumns:
 
     @staticmethod
     cdef PackedColumns from_libcudf(unique_ptr[packed_columns] data)
-    cpdef tuple release(self)
+    cpdef tuple release(self, Stream stream=*)
 
 cdef class ChunkedPack:
     cdef unique_ptr[chunked_pack] c_obj
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi
index dceaf51afe1..6bbd35c9dd7 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyi
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi
@@ -9,9 +9,11 @@ from pylibcudf.table import Table
 
 class PackedColumns:
     def __init__(self): ...
-    def release(self) -> tuple[memoryview[bytes], gpumemoryview]: ...
+    def release(
+        self, stream: Stream | None = None
+    ) -> tuple[memoryview[bytes], gpumemoryview]: ...
 
-def pack(input: Table) -> PackedColumns: ...
+def pack(input: Table, stream: Stream | None = None) -> PackedColumns: ...
 def unpack(input: PackedColumns) -> Table: ...
 def unpack_from_memoryviews(
     metadata: memoryview[bytes], gpu_data: gpumemoryview
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx
index bd903c0b1f9..17152fb78b2 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyx
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx
@@ -32,6 +32,7 @@ from rmm.pylibrmm.stream cimport Stream
 
 from .gpumemoryview cimport gpumemoryview
 from .table cimport Table
+from .utils cimport _get_stream
 
 
 __all__ = [
@@ -100,12 +101,17 @@ cdef class PackedColumns:
         out.c_obj = move(data)
         return out
 
-    cpdef tuple release(self):
+    cpdef tuple release(self, Stream stream=None):
         """Releases and returns the underlying serialized metadata and gpu data.
 
         The ownership of the memory are transferred to the returned buffers. After
         this call, `self` is empty.
 
+        Parameters
+        ----------
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
+
         Returns
         -------
         memoryview (of a HostBuffer)
@@ -115,13 +121,17 @@ cdef class PackedColumns:
         """
         if not (dereference(self.c_obj).metadata and dereference(self.c_obj).gpu_data):
             raise ValueError("Cannot release empty PackedColumns")
+        stream = _get_stream(stream)
 
         return (
             memoryview(
                 HostBuffer.from_unique_ptr(move(dereference(self.c_obj).metadata))
             ),
             gpumemoryview(
-                DeviceBuffer.c_from_unique_ptr(move(dereference(self.c_obj).gpu_data))
+                DeviceBuffer.c_from_unique_ptr(
+                    move(dereference(self.c_obj).gpu_data),
+                    stream
+                )
             )
         )
 
@@ -156,7 +166,7 @@ cdef class ChunkedPack:
             The table to pack.
         user_buffer_size
             Size of the staging buffer to pack into, must be at least 1MB.
-        stream
+        stream : Stream | None
             Stream used for device memory operations and kernel launches.
         temp_mr
             Memory resource for scratch allocations.
@@ -298,12 +308,24 @@ cdef class ChunkedPack:
         )
 
 
-cpdef PackedColumns pack(Table input):
+cpdef PackedColumns pack(Table input, Stream stream=None):
     """Deep-copy a table into a serialized contiguous memory format.
 
     Later use `unpack` or `unpack_from_memoryviews` to unpack the serialized
     data back into the table.
 
+    Parameters
+    ----------
+    input : Table
+        Table to pack.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+
+    Returns
+    -------
+    PackedColumns
+        The packed columns.
+
     Examples
     --------
     >>> packed = pylibcudf.contiguous_split.pack(...)
@@ -314,20 +336,11 @@ cpdef PackedColumns pack(Table input):
     >>> pylibcudf.contiguous_split.unpack_from_memoryviews(metadata, gpu_data)
 
     For details, see :cpp:func:`cudf::pack`.
-
-    Parameters
-    ----------
-    input : Table
-        Table to pack.
-
-    Returns
-    -------
-    PackedColumns
-        The packed columns.
     """
     cdef unique_ptr[packed_columns] pack
+    stream = _get_stream(stream)
     with nogil:
-        pack = move(make_unique[packed_columns](cpp_pack(input.view())))
+        pack = move(make_unique[packed_columns](cpp_pack(input.view(), stream.view())))
     return PackedColumns.from_libcudf(move(pack))
 
 
diff --git a/python/pylibcudf/pylibcudf/copying.pxd b/python/pylibcudf/pylibcudf/copying.pxd
index 892769582c0..9f67467c009 100644
--- a/python/pylibcudf/pylibcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/copying.pxd
@@ -45,7 +45,7 @@ cpdef Table scatter(
     TableOrListOfScalars source, Column scatter_map, Table target_table, Stream stream=*
 )
 
-cpdef ColumnOrTable empty_like(ColumnOrTable input)
+cpdef ColumnOrTable empty_like(ColumnOrTable input, Stream stream=*)
 
 cpdef Column allocate_like(
     Column input_column, mask_allocation_policy policy, size=*, Stream stream=*
diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx
index d7f7fa14068..c5a8d648851 100644
--- a/python/pylibcudf/pylibcudf/copying.pyx
+++ b/python/pylibcudf/pylibcudf/copying.pyx
@@ -171,7 +171,7 @@ cpdef Table scatter(
     return Table.from_libcudf(move(c_result), stream)
 
 
-cpdef ColumnOrTable empty_like(ColumnOrTable input):
+cpdef ColumnOrTable empty_like(ColumnOrTable input, Stream stream=None):
     """Create an empty column or table with the same type as ``input``.
 
     For details, see :cpp:func:`empty_like`.
@@ -180,6 +180,8 @@ cpdef ColumnOrTable empty_like(ColumnOrTable input):
     ----------
     input : Union[Column, Table]
         The column or table to use as a template for the output.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -188,14 +190,15 @@ cpdef ColumnOrTable empty_like(ColumnOrTable input):
     """
     cdef unique_ptr[table] c_tbl_result
     cdef unique_ptr[column] c_col_result
+    stream = _get_stream(stream)
     if ColumnOrTable is Column:
         with nogil:
             c_col_result = cpp_copying.empty_like(input.view())
-        return Column.from_libcudf(move(c_col_result))
+        return Column.from_libcudf(move(c_col_result), stream)
     else:
         with nogil:
             c_tbl_result = cpp_copying.empty_like(input.view())
-        return Table.from_libcudf(move(c_tbl_result))
+        return Table.from_libcudf(move(c_tbl_result), stream)
 
 
 cpdef Column allocate_like(
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index 0aa13917c97..32319830df2 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -60,6 +60,8 @@ cpdef Column extract_datetime_component(
         The column to extract the component from.
     component : DatetimeComponent
         The datetime component to extract.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -90,6 +92,8 @@ cpdef Column ceil_datetimes(
         The column of input datetime values.
     freq : rounding_frequency
         The frequency to round up to.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -120,6 +124,8 @@ cpdef Column floor_datetimes(
         The column of input datetime values.
     freq : rounding_frequency
         The frequency to round down to.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -150,6 +156,8 @@ cpdef Column round_datetimes(
         The column of input datetime values.
     freq : rounding_frequency
         The frequency to round to.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -182,6 +190,8 @@ cpdef Column add_calendrical_months(
         The column of input timestamp values.
     months : ColumnOrScalar
         The number of months to add.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -216,6 +226,8 @@ cpdef Column day_of_year(Column input, Stream stream=None):
     ----------
     input : Column
         The column of input datetime values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -240,6 +252,8 @@ cpdef Column is_leap_year(Column input, Stream stream=None):
     ----------
     input : Column
         The column of input datetime values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -265,6 +279,8 @@ cpdef Column last_day_of_month(Column input, Stream stream=None):
     ----------
     input : Column
         The column of input datetime values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -291,6 +307,8 @@ cpdef Column extract_quarter(Column input, Stream stream=None):
     ----------
     input : Column
         The column of input datetime values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -315,6 +333,8 @@ cpdef Column days_in_month(Column input, Stream stream=None):
     ----------
     input : Column
         The column of input datetime values.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx
index f8d33adebef..3bce7fa6c05 100644
--- a/python/pylibcudf/pylibcudf/filling.pyx
+++ b/python/pylibcudf/pylibcudf/filling.pyx
@@ -51,6 +51,8 @@ cpdef Column fill(
         The index at which to stop filling.
     value : Scalar
         The value to fill with.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -94,6 +96,8 @@ cpdef void fill_in_place(
         The index at which to stop filling.
     value : Scalar
         The value to fill with.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -125,6 +129,8 @@ cpdef Column sequence(size_type size, Scalar init, Scalar step, Stream stream=No
         The initial value of the sequence
     step : Scalar
         The step of the sequence
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -167,6 +173,8 @@ cpdef Table repeat(
     count : Union[Column, size_type]
         Integer value to repeat each row by or
         non-nullable column of an integral type
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -214,6 +222,8 @@ cpdef Column calendrical_month_sequence(
         The initial timestamp
     months : size_type
         Months to increment
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
diff --git a/python/pylibcudf/pylibcudf/groupby.pxd b/python/pylibcudf/pylibcudf/groupby.pxd
index 79af2f1b746..07523fa9b4c 100644
--- a/python/pylibcudf/pylibcudf/groupby.pxd
+++ b/python/pylibcudf/pylibcudf/groupby.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -17,6 +17,8 @@ from pylibcudf.libcudf.groupby cimport (
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport null_order, order
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .table cimport Table
 
@@ -41,11 +43,25 @@ cdef class GroupBy:
     cdef unique_ptr[vector[order]] _column_order
     cdef unique_ptr[vector[null_order]] _null_precedence
 
-    cpdef tuple aggregate(self, list requests)
-    cpdef tuple scan(self, list requests)
-    cpdef tuple shift(self, Table values, list offset, list fill_values)
-    cpdef tuple replace_nulls(self, Table values, list replace_policies)
-    cpdef tuple get_groups(self, Table values=*)
+    cpdef tuple aggregate(self, list requests, Stream stream=*)
+    cpdef tuple scan(self, list requests, Stream stream=*)
+    cpdef tuple shift(
+        self,
+        Table values,
+        list offset,
+        list fill_values,
+        Stream stream=*
+    )
+    cpdef tuple replace_nulls(
+        self,
+        Table values,
+        list replace_policies,
+        Stream stream=*
+    )
+    cpdef tuple get_groups(self, Table values=*, Stream stream=*)
 
     @staticmethod
-    cdef tuple _parse_outputs(pair[unique_ptr[table], vector[aggregation_result]] c_res)
+    cdef tuple _parse_outputs(
+        pair[unique_ptr[table], vector[aggregation_result]] c_res,
+        Stream stream,
+    )
diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi
index 883ad6e34cf..4d451c45ceb 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyi
+++ b/python/pylibcudf/pylibcudf/groupby.pyi
@@ -1,4 +1,6 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
@@ -22,17 +24,24 @@ class GroupBy:
         null_precedence: list[NullOrder] | None = None,
     ) -> None: ...
     def aggregate(
-        self, requests: list[GroupByRequest]
+        self, requests: list[GroupByRequest], stream: Stream | None = None
     ) -> tuple[Table, list[Table]]: ...
     def scan(
-        self, requests: list[GroupByRequest]
+        self, requests: list[GroupByRequest], stream: Stream | None = None
     ) -> tuple[Table, list[Table]]: ...
     def shift(
-        self, values: Table, offset: list[int], fill_values: list[Scalar]
+        self,
+        values: Table,
+        offset: list[int],
+        fill_values: list[Scalar],
+        stream: Stream | None = None,
     ) -> tuple[Table, Table]: ...
     def replace_nulls(
-        self, value: Table, replace_policies: list[ReplacePolicy]
+        self,
+        value: Table,
+        replace_policies: list[ReplacePolicy],
+        stream: Stream | None = None,
     ) -> tuple[Table, Table]: ...
     def get_groups(
-        self, values: Table | None = None
+        self, values: Table | None = None, stream: Stream | None = None
     ) -> tuple[list[int], Table, Table]: ...
diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx
index e6cb3ac81a7..80cf6d64880 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyx
+++ b/python/pylibcudf/pylibcudf/groupby.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cython.operator cimport dereference
 from libcpp.functional cimport reference_wrapper
@@ -16,13 +16,16 @@ from pylibcudf.libcudf.groupby cimport (
 from pylibcudf.libcudf.replace cimport replace_policy
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.table.table cimport table
+from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
 
+from rmm.pylibrmm.stream cimport Stream
+
 from .aggregation cimport Aggregation
 from .column cimport Column
 from .table cimport Table
 from .types cimport null_order, null_policy, order, sorted
-from .utils cimport _as_vector
+from .utils cimport _as_vector, _get_stream
 
 
 __all__ = ["GroupBy", "GroupByRequest"]
@@ -135,11 +138,12 @@ cdef class GroupBy:
 
     @staticmethod
     cdef tuple _parse_outputs(
-        pair[unique_ptr[table], vector[aggregation_result]] c_res
+        pair[unique_ptr[table], vector[aggregation_result]] c_res,
+        Stream stream
     ):
         # Convert libcudf aggregation/scan outputs into pylibcudf objects.
         # This function is for internal use only.
-        cdef Table group_keys = Table.from_libcudf(move(c_res.first))
+        cdef Table group_keys = Table.from_libcudf(move(c_res.first), stream)
 
         cdef int i, j
         cdef list results = []
@@ -148,12 +152,12 @@ cdef class GroupBy:
             inner_results = []
             for j in range(c_res.second[i].results.size()):
                 inner_results.append(
-                    Column.from_libcudf(move(c_res.second[i].results[j]))
+                    Column.from_libcudf(move(c_res.second[i].results[j]), stream)
                 )
             results.append(Table(inner_results))
         return group_keys, results
 
-    cpdef tuple aggregate(self, list requests):
+    cpdef tuple aggregate(self, list requests, Stream stream=None):
         """Compute aggregations on columns.
 
         For details, see :cpp:func:`cudf::groupby::groupby::aggregate`.
@@ -163,6 +167,8 @@ cdef class GroupBy:
         requests : List[GroupByRequest]
             The list of `~.pylibcudf.groupby.GroupByRequest` , each
             representing a set of aggregations to perform on a given column of values.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -178,14 +184,15 @@ cdef class GroupBy:
             c_requests.push_back(move(request._to_libcudf_agg_request()))
 
         cdef pair[unique_ptr[table], vector[aggregation_result]] c_res
+        stream = _get_stream(stream)
         # TODO: Need to capture C++ exceptions indicating that an invalid type was used.
         # We rely on libcudf to tell us this rather than checking the types beforehand
         # ourselves.
         with nogil:
-            c_res = dereference(self.c_obj).aggregate(c_requests)
-        return GroupBy._parse_outputs(move(c_res))
+            c_res = dereference(self.c_obj).aggregate(c_requests, stream.view())
+        return GroupBy._parse_outputs(move(c_res), stream)
 
-    cpdef tuple scan(self, list requests):
+    cpdef tuple scan(self, list requests, Stream stream=None):
         """Compute scans on columns.
 
         For details, see :cpp:func:`cudf::groupby::groupby::scan`.
@@ -195,6 +202,8 @@ cdef class GroupBy:
         requests : List[GroupByRequest]
             The list of `~.pylibcudf.groupby.GroupByRequest` , each
             representing a set of aggregations to perform on a given column of values.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -210,11 +219,18 @@ cdef class GroupBy:
             c_requests.push_back(move(request._to_libcudf_scan_request()))
 
         cdef pair[unique_ptr[table], vector[aggregation_result]] c_res
+        stream = _get_stream(stream)
         with nogil:
-            c_res = dereference(self.c_obj).scan(c_requests)
-        return GroupBy._parse_outputs(move(c_res))
+            c_res = dereference(self.c_obj).scan(c_requests, stream.view())
+        return GroupBy._parse_outputs(move(c_res), stream)
 
-    cpdef tuple shift(self, Table values, list offset, list fill_values):
+    cpdef tuple shift(
+        self,
+        Table values,
+        list offset,
+        list fill_values,
+        Stream stream=None
+    ):
         """Compute shifts on columns.
 
         For details, see :cpp:func:`cudf::groupby::groupby::shift`.
@@ -227,6 +243,8 @@ cdef class GroupBy:
             The offsets to shift by.
         fill_values : List[Scalar]
             The values to use to fill in missing values.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -239,18 +257,25 @@ cdef class GroupBy:
 
         cdef vector[size_type] c_offset = offset
         cdef pair[unique_ptr[table], unique_ptr[table]] c_res
+        stream = _get_stream(stream)
         with nogil:
             c_res = dereference(self.c_obj).shift(
                 values.view(),
                 c_offset,
-                c_fill_values
+                c_fill_values,
+                stream.view()
             )
         return (
-            Table.from_libcudf(move(c_res.first)),
-            Table.from_libcudf(move(c_res.second)),
+            Table.from_libcudf(move(c_res.first), stream),
+            Table.from_libcudf(move(c_res.second), stream),
         )
 
-    cpdef tuple replace_nulls(self, Table value, list replace_policies):
+    cpdef tuple replace_nulls(
+        self,
+        Table value,
+        list replace_policies,
+        Stream stream=None
+    ):
         """Replace nulls in columns.
 
         For details, see :cpp:func:`cudf::groupby::groupby::replace_nulls`.
@@ -261,6 +286,8 @@ cdef class GroupBy:
             The columns to replace nulls in.
         replace_policies : List[replace_policy]
             The policies to use to replace nulls.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -270,17 +297,19 @@ cdef class GroupBy:
         """
         cdef pair[unique_ptr[table], unique_ptr[table]] c_res
         cdef vector[replace_policy] c_replace_policies = replace_policies
+        stream = _get_stream(stream)
         with nogil:
             c_res = dereference(self.c_obj).replace_nulls(
                 value.view(),
-                c_replace_policies
+                c_replace_policies,
+                stream.view()
             )
         return (
-            Table.from_libcudf(move(c_res.first)),
-            Table.from_libcudf(move(c_res.second)),
+            Table.from_libcudf(move(c_res.first), stream),
+            Table.from_libcudf(move(c_res.second), stream),
         )
 
-    cpdef tuple get_groups(self, Table values=None):
+    cpdef tuple get_groups(self, Table values=None, Stream stream=None):
         """Get the grouped keys and values labels for each row.
 
         For details, see :cpp:func:`cudf::groupby::groupby::get_groups`.
@@ -290,6 +319,8 @@ cdef class GroupBy:
         values : Table, optional
             The columns to get group labels for. If not specified,
             `None` is returned for the group values.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
 
         Returns
         -------
@@ -301,18 +332,20 @@ cdef class GroupBy:
         """
 
         cdef groups c_groups
+        cdef table_view empty_view
+        stream = _get_stream(stream)
         if values:
-            c_groups = dereference(self.c_obj).get_groups(values.view())
+            c_groups = dereference(self.c_obj).get_groups(values.view(), stream.view())
             return (
                 c_groups.offsets,
-                Table.from_libcudf(move(c_groups.keys)),
-                Table.from_libcudf(move(c_groups.values)),
+                Table.from_libcudf(move(c_groups.keys), stream),
+                Table.from_libcudf(move(c_groups.values), stream),
             )
         else:
-            # c_groups.values is nullptr
-            c_groups = dereference(self.c_obj).get_groups()
+            # c_groups.values is nullptr - pass empty table_view and stream
+            c_groups = dereference(self.c_obj).get_groups(empty_view, stream.view())
             return (
                 c_groups.offsets,
-                Table.from_libcudf(move(c_groups.keys)),
+                Table.from_libcudf(move(c_groups.keys), stream),
                 None,
             )
diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx
index fe4cc14f4c2..bdae0595ff7 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyx
+++ b/python/pylibcudf/pylibcudf/hashing.pyx
@@ -190,6 +190,8 @@ cpdef Column md5(Table input, Stream stream=None):
     ----------
     input : Table
         The table of columns to hash
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -215,6 +217,8 @@ cpdef Column sha1(Table input, Stream stream=None):
     ----------
     input : Table
         The table of columns to hash
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -239,6 +243,8 @@ cpdef Column sha224(Table input, Stream stream=None):
     ----------
     input : Table
         The table of columns to hash
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -263,6 +269,8 @@ cpdef Column sha256(Table input, Stream stream=None):
     ----------
     input : Table
         The table of columns to hash
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -287,6 +295,8 @@ cpdef Column sha384(Table input, Stream stream=None):
     ----------
     input : Table
         The table of columns to hash
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -311,6 +321,8 @@ cpdef Column sha512(Table input, Stream stream=None):
     ----------
     input : Table
         The table of columns to hash
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index 6f333ec2c2a..5ee93edb00b 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -829,7 +829,7 @@ cpdef TableWithMetadata read_json_from_string_column(
     cdef unique_ptr[column] c_join_string_column
     cdef column_contents c_contents
     cdef table_with_metadata c_result
-    cdef Stream s = _get_stream(stream)
+    stream = _get_stream(stream)
 
     # Join the string column into a single string
     with nogil:
@@ -838,14 +838,14 @@ cpdef TableWithMetadata read_json_from_string_column(
                 input.view(),
                 dereference(c_separator),
                 dereference(c_narep),
-                s.view()
+                stream.view()
             )
         )
         c_contents = c_join_string_column.get().release()
 
     # Create a new source from the joined string data
     cdef SourceInfo joined_source = SourceInfo(
-            [DeviceBuffer.c_from_unique_ptr(move(c_contents.data))])
+            [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), stream)])
 
     # Create new options using the joined string as source
     cdef JsonReaderOptions options = (
@@ -861,9 +861,9 @@ cpdef TableWithMetadata read_json_from_string_column(
 
     # Read JSON from the joined string
     with nogil:
-        c_result = move(cpp_read_json(options.c_obj, s.view()))
+        c_result = move(cpp_read_json(options.c_obj, stream.view()))
 
-    return TableWithMetadata.from_libcudf(c_result, s)
+    return TableWithMetadata.from_libcudf(c_result, stream)
 
 cdef class JsonWriterOptions:
     """
diff --git a/python/pylibcudf/pylibcudf/io/text.pyi b/python/pylibcudf/pylibcudf/io/text.pyi
new file mode 100644
index 00000000000..9ec2a19cbb3
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/text.pyi
@@ -0,0 +1,20 @@
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
+
+from pylibcudf.column import Column
+from pylibcudf.io.text import DataChunkSource, ParseOptions
+
+def make_source(data: str) -> DataChunkSource: ...
+def make_source_from_file(filename: str) -> DataChunkSource: ...
+def make_source_from_bgzip_file(
+    filename: str,
+    virtual_begin: int = -1,
+    virtual_end: int = -1,
+) -> DataChunkSource: ...
+def multibyte_split(
+    source: DataChunkSource,
+    delimiter: str,
+    options: ParseOptions | None = None,
+    stream: Stream | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index f5b8d1ba6d1..24d3e45e8f7 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -258,6 +258,8 @@ cpdef Table cross_join(Table left, Table right, Stream stream=None):
         The left table to join.
     right: Table
         The right table to join.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
index 95f835a7693..96dae84cab9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
@@ -48,6 +48,11 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
         const table_view& input
     ) except +libcudf_exception_handler
 
+    cdef packed_columns pack (
+        const table_view& input,
+        cuda_stream_view stream,
+    ) except +libcudf_exception_handler
+
     cdef table_view unpack (
         const packed_columns& input
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
index cbbc174d7bf..f7f91fa83bd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.functional cimport reference_wrapper
 from libcpp.memory cimport unique_ptr
@@ -23,6 +23,8 @@ from pylibcudf.libcudf.types cimport (
     sorted,
 )
 
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
 # workaround for https://github.com/cython/cython/issues/3885
 ctypedef const scalar constscalar
 
@@ -82,6 +84,7 @@ cdef extern from "cudf/groupby.hpp" \
             vector[aggregation_result]
         ] aggregate(
             const vector[aggregation_request]& requests,
+            cuda_stream_view stream
         ) except +libcudf_exception_handler
 
         pair[
@@ -89,6 +92,7 @@ cdef extern from "cudf/groupby.hpp" \
             vector[aggregation_result]
         ] scan(
             const vector[scan_request]& requests,
+            cuda_stream_view stream
         ) except +libcudf_exception_handler
 
         pair[
@@ -97,13 +101,18 @@ cdef extern from "cudf/groupby.hpp" \
         ] shift(
             const table_view values,
             const vector[size_type] offset,
-            const vector[reference_wrapper[constscalar]] fill_values
+            const vector[reference_wrapper[constscalar]] fill_values,
+            cuda_stream_view stream
         ) except +libcudf_exception_handler
 
-        groups get_groups() except +libcudf_exception_handler
-        groups get_groups(table_view values) except +libcudf_exception_handler
+        groups get_groups(cuda_stream_view stream) except +libcudf_exception_handler
+        groups get_groups(
+            table_view values,
+            cuda_stream_view stream,
+        ) except +libcudf_exception_handler
 
         pair[unique_ptr[table], unique_ptr[table]] replace_nulls(
             const table_view& values,
-            const vector[replace_policy] replace_policy
+            const vector[replace_policy] replace_policy,
+            cuda_stream_view stream
         ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index 5d6744f41c6..be7dfa496dc 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -119,6 +119,8 @@ cpdef Column concatenate_rows(Table input, Stream stream=None):
     ----------
     input : Table
         The input table
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -184,6 +186,8 @@ cpdef Column contains(Column input, ColumnOrScalar search_key, Stream stream=Non
         The input column.
     search_key : Union[Column, Scalar]
         The search key.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -220,6 +224,8 @@ cpdef Column contains_nulls(Column input, Stream stream=None):
     ----------
     input : Column
         The input column.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -294,6 +300,8 @@ cpdef Column reverse(Column input, Stream stream=None):
     ----------
     input : Column
         The input column.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -404,6 +412,8 @@ cpdef Column count_elements(Column input, Stream stream=None):
     ----------
     input : Column
         The input column
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -701,6 +711,8 @@ cpdef Column apply_boolean_mask(Column input, Column boolean_mask, Stream stream
         The input column.
     boolean_mask : Column
         The boolean mask.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
index 5c0fa8f6cdc..1773f1738ab 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
@@ -54,13 +54,12 @@ cpdef Column format_list_column(
         New strings column
     """
     cdef unique_ptr[column] c_result
-    cdef Stream stream_local
 
-    stream_local = _get_stream(stream)
+    stream = _get_stream(stream)
 
     if na_rep is None:
         na_rep = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream_local.view())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     cdef const string_scalar* c_na_rep = <const string_scalar*>(
@@ -75,7 +74,7 @@ cpdef Column format_list_column(
             input.view(),
             dereference(c_na_rep),
             separators.view(),
-            stream_local.view()
+            stream.view()
         )
 
-    return Column.from_libcudf(move(c_result), stream_local)
+    return Column.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
index bec99581450..547454ea013 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
@@ -41,23 +41,22 @@ cpdef Table partition(Column input, Scalar delimiter=None, Stream stream=None):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    cdef Stream stream_local
 
-    stream_local = _get_stream(stream)
+    stream = _get_stream(stream)
 
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream_local.view())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
         c_result = cpp_partition.partition(
             input.view(),
             dereference(c_delimiter),
-            stream_local.view()
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result), stream_local)
+    return Table.from_libcudf(move(c_result), stream)
 
 cpdef Table rpartition(Column input, Scalar delimiter=None, Stream stream=None):
     """
@@ -83,20 +82,19 @@ cpdef Table rpartition(Column input, Scalar delimiter=None, Stream stream=None):
     cdef const string_scalar* c_delimiter = <const string_scalar*>(
         delimiter.c_obj.get()
     )
-    cdef Stream stream_local
 
-    stream_local = _get_stream(stream)
+    stream = _get_stream(stream)
 
     if delimiter is None:
         delimiter = Scalar.from_libcudf(
-            cpp_make_string_scalar("".encode(), stream_local.view())
+            cpp_make_string_scalar("".encode(), stream.view())
         )
 
     with nogil:
         c_result = cpp_partition.rpartition(
             input.view(),
             dereference(c_delimiter),
-            stream_local.view()
+            stream.view()
         )
 
-    return Table.from_libcudf(move(c_result), stream_local)
+    return Table.from_libcudf(move(c_result), stream)
diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
index 13049782219..13f24cd3f24 100644
--- a/python/pylibcudf/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -46,6 +46,8 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls(
     ----------
     input : Column
         Column to produce new mask from.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -75,6 +77,8 @@ cpdef Column compute_column(Table input, Expression expr, Stream stream=None):
         Table used for expression evaluation
     expr : Expression
         Expression to evaluate
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -102,6 +106,8 @@ cpdef tuple[gpumemoryview, int] bools_to_mask(
     ----------
     input : Column
         Column to produce new mask from.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -137,6 +143,8 @@ cpdef Column mask_to_bools(
         Position of the bit from which the conversion should start
     end_bit : int
         Position of the bit before which the conversion should stop
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -182,6 +190,8 @@ cpdef Column transform(list[Column] inputs,
     is_null_aware: NullAware
         If `NO`, the UDF gets non-nullable parameters
         If `YES`, the UDF gets nullable parameters
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -220,6 +230,8 @@ cpdef tuple[Table, Column] encode(Table input, Stream stream=None):
     ----------
     input : Table
         Table containing values to be encoded
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -254,6 +266,8 @@ cpdef Table one_hot_encode(
         Column containing values to be encoded.
     categories : Column
         Column containing categories
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx
index e68b780fdc1..f4467d87a22 100644
--- a/python/pylibcudf/pylibcudf/unary.pyx
+++ b/python/pylibcudf/pylibcudf/unary.pyx
@@ -37,6 +37,8 @@ cpdef Column unary_operation(Column input, unary_operator op, Stream stream=None
         The column to operate on.
     op : UnaryOperator
         The operation to perform.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -62,6 +64,8 @@ cpdef Column is_null(Column input, Stream stream=None):
     ----------
     input : Column
         The column to check.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -87,6 +91,8 @@ cpdef Column is_valid(Column input, Stream stream=None):
     ----------
     input : Column
         The column to check.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -114,6 +120,8 @@ cpdef Column cast(Column input, DataType data_type, Stream stream=None):
         The column to check.
     data_type : DataType
         The data type to cast to.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -139,6 +147,8 @@ cpdef Column is_nan(Column input, Stream stream=None):
     ----------
     input : Column
         The column to check.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------
@@ -164,6 +174,8 @@ cpdef Column is_not_nan(Column input, Stream stream=None):
     ----------
     input : Column
         The column to check.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
 
     Returns
     -------

From 9f105ed61495bd3ad07dafedfd5234261089ad38 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 9 Sep 2025 12:37:09 -0400
Subject: [PATCH 286/366] Require list type for is_valid_aggregation and
 MERGE_LISTS/SETS (#19876)

Fixes `cudf::is_valid_aggregation()` to return `false` for `MERGE_LISTS` and `MERGE_SETS` when the `data_type` parameter is not a LIST type.

Closes https://github.com/rapidsai/cudf/issues/19717

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Bradley Dice (https://github.com/bdice)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19876
---
 cpp/include/cudf/detail/aggregation/aggregation.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 0d3efa425d9..f71e9ec3262 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -1517,12 +1517,14 @@ struct target_type_impl<Source, aggregation::LAG> {
 
 // Always use list for MERGE_LISTS
 template <typename Source>
+  requires cuda::std::is_same_v<Source, cudf::list_view>
 struct target_type_impl<Source, aggregation::MERGE_LISTS> {
   using type = list_view;
 };
 
 // Always use list for MERGE_SETS
 template <typename Source>
+  requires cuda::std::is_same_v<Source, cudf::list_view>
 struct target_type_impl<Source, aggregation::MERGE_SETS> {
   using type = list_view;
 };

From 6ce40d70f4eea4db93ff887d42b8478d5b1068be Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 9 Sep 2025 09:40:16 -0700
Subject: [PATCH 287/366] Update missing docs (#19925)

Addresses https://github.com/rapidsai/cudf/pull/19922#discussion_r2331597314

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19925
---
 python/pylibcudf/pylibcudf/io/avro.pyx    | 2 +-
 python/pylibcudf/pylibcudf/io/csv.pyx     | 4 ++--
 python/pylibcudf/pylibcudf/io/json.pyx    | 6 +++---
 python/pylibcudf/pylibcudf/io/orc.pyx     | 6 +++---
 python/pylibcudf/pylibcudf/io/parquet.pyx | 6 +++---
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx
index e3f92ca84a0..1f1fb71f451 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pyx
+++ b/python/pylibcudf/pylibcudf/io/avro.pyx
@@ -149,7 +149,7 @@ cpdef TableWithMetadata read_avro(
     ----------
     options: AvroReaderOptions
         Settings for controlling reading behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
     """
     cdef Stream s = _get_stream(stream)
diff --git a/python/pylibcudf/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx
index 174f3313271..50c82332c77 100644
--- a/python/pylibcudf/pylibcudf/io/csv.pyx
+++ b/python/pylibcudf/pylibcudf/io/csv.pyx
@@ -669,7 +669,7 @@ cpdef TableWithMetadata read_csv(
     ----------
     options: CsvReaderOptions
         Settings for controlling reading behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
     """
     cdef table_with_metadata c_result
@@ -875,7 +875,7 @@ cpdef void write_csv(
     ----------
     options: CsvWriterOptions
         Settings for controlling writing behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
     """
     cdef Stream s = _get_stream(stream)
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index 5ee93edb00b..d62d4908ec6 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -698,7 +698,7 @@ cpdef tuple chunked_read_json(
     chunk_size : int, default 100_000_000 bytes.
         The number of bytes to be read in chunks.
         The chunk_size should be set to at least row_size.
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
 
     Returns
@@ -766,7 +766,7 @@ cpdef TableWithMetadata read_json(
     ----------
     options: JsonReaderOptions
         Settings for controlling reading behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
 
     Returns
@@ -812,7 +812,7 @@ cpdef TableWithMetadata read_json_from_string_column(
         Set compression type of the string column contents
     recovery_mode: JSONRecoveryMode
         Set recovery option for corrupted JSON input in string column
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
 
     Returns
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx
index 9ef3daf2e9f..6a0f4fb5648 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pyx
+++ b/python/pylibcudf/pylibcudf/io/orc.pyx
@@ -430,7 +430,7 @@ cpdef TableWithMetadata read_orc(OrcReaderOptions options, Stream stream = None)
     ----------
     options: OrcReaderOptions
         Settings for controlling reading behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
     """
     cdef table_with_metadata c_result
@@ -632,7 +632,7 @@ cpdef void write_orc(OrcWriterOptions options, Stream stream = None):
     ----------
     options: OrcWriterOptions
         Settings for controlling writing behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
 
     Returns
@@ -681,7 +681,7 @@ cdef class OrcChunkedWriter:
         ----------
         options: ChunkedOrcWriterOptions
             Settings for controlling writing behavior
-        stream: Stream
+        stream : Stream | None
             CUDA stream used for device memory operations and kernel launches
 
         Returns
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
index 29dfb8df0aa..a1a8a122a66 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -366,7 +366,7 @@ cpdef read_parquet(ParquetReaderOptions options, Stream stream = None):
     ----------
     options: ParquetReaderOptions
         Settings for controlling reading behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
     """
     cdef Stream s = _get_stream(stream)
@@ -436,7 +436,7 @@ cdef class ChunkedParquetWriter:
         ----------
         options: ChunkedParquetWriterOptions
             Settings for controlling writing behavior
-        stream: Stream
+        stream : Stream | None
             CUDA stream used for device memory operations and kernel launches
 
         Returns
@@ -1011,7 +1011,7 @@ cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = Non
     ----------
     options : ParquetWriterOptions
         Settings for controlling writing behavior
-    stream: Stream
+    stream : Stream | None
         CUDA stream used for device memory operations and kernel launches
 
     Returns

From 89e9c69d72a316389469ac7d1996baef77b2acc0 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Tue, 9 Sep 2025 21:36:19 +0200
Subject: [PATCH 288/366] Implement distributed sorted for ``cudf_polars``
 (#18912)

This implements distributed sorting for cudf-polars, it should work but is missing new sorting tests and I am also not quite sure that local tests exercised the `rapidsmpf` paths properly.

Right now, it is structured around introducing a ``ShuffleSorted`` IR but maybe it would be nicer to merge the steps further.

It does some smaller refactors to the shuffling to re-use some code there (but not actually move too much sorting related logic into the file).

Main missing things:
* [ ] The biggest thing missing for sure is new tests which may flush out a bug or two.
* As a follow up maybe:  I use `concat` in both paths (`rapidsmpf` and not) for merging the exchanged chunks.  If the sort isn't stable, then this should use `plc...merge`, this will require changing `rapidsmpf`, a bit more.
* I have not modified `zlice` handling (top/bottom limits).  For small values there is probably little point but for large result slices this needs to be threaded in.

Authors:
  - Sebastian Berg (https://github.com/seberg)
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: https://github.com/rapidsai/cudf/pull/18912
---
 python/cudf_polars/cudf_polars/dsl/ir.py      |  10 +-
 .../cudf_polars/experimental/join.py          |   8 +-
 .../cudf_polars/experimental/shuffle.py       |  67 +-
 .../cudf_polars/experimental/sort.py          | 587 +++++++++++++++++-
 .../tests/experimental/test_join.py           |   4 +-
 .../tests/experimental/test_rapidsmpf.py      |  61 +-
 .../tests/experimental/test_sort.py           | 112 +++-
 7 files changed, 793 insertions(+), 56 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 161c8f4a576..e7688d07f2e 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -2213,9 +2213,13 @@ class MergeSorted(IR):
     """Key that is sorted."""
 
     def __init__(self, schema: Schema, key: str, left: IR, right: IR):
-        assert isinstance(left, Sort)
-        assert isinstance(right, Sort)
-        assert left.order == right.order
+        # Children must be Sort or Repartition(Sort).
+        # The Repartition(Sort) case happens during fallback.
+        left_sort_child = left if isinstance(left, Sort) else left.children[0]
+        right_sort_child = right if isinstance(right, Sort) else right.children[0]
+        assert isinstance(left_sort_child, Sort)
+        assert isinstance(right_sort_child, Sort)
+        assert left_sort_child.order == right_sort_child.order
         assert len(left.schema.keys()) <= len(right.schema.keys())
         self.schema = schema
         self.key = key
diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py
index fed3333454a..aec813d6bd5 100644
--- a/python/cudf_polars/cudf_polars/experimental/join.py
+++ b/python/cudf_polars/cudf_polars/experimental/join.py
@@ -12,7 +12,7 @@
 from cudf_polars.experimental.base import PartitionInfo, get_key_name
 from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
 from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.shuffle import Shuffle, _partition_dataframe
+from cudf_polars.experimental.shuffle import Shuffle, _hash_partition_dataframe
 from cudf_polars.experimental.utils import _concat, _fallback_inform, _lower_ir_fallback
 
 if TYPE_CHECKING:
@@ -354,10 +354,12 @@ def _(
         for part_out in range(out_size):
             if split_large:
                 graph[(split_name, part_out)] = (
-                    _partition_dataframe,
+                    _hash_partition_dataframe,
                     (large_name, part_out),
-                    large_on,
+                    part_out,
                     small_size,
+                    None,
+                    large_on,
                 )
 
             _concat_list = []
diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py
index 3571ee67e7a..21ae99f1f6c 100644
--- a/python/cudf_polars/cudf_polars/experimental/shuffle.py
+++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 import operator
-from typing import TYPE_CHECKING, Any, Literal, TypedDict
+from typing import TYPE_CHECKING, Any, Concatenate, Literal, TypeVar, TypedDict
 
 import pylibcudf as plc
 from rmm.pylibrmm.stream import DEFAULT_STREAM
@@ -19,7 +19,7 @@
 from cudf_polars.experimental.utils import _concat
 
 if TYPE_CHECKING:
-    from collections.abc import MutableMapping, Sequence
+    from collections.abc import Callable, MutableMapping, Sequence
 
     from cudf_polars.containers import DataType
     from cudf_polars.dsl.expr import NamedExpr
@@ -39,7 +39,7 @@ class ShuffleOptions(TypedDict):
     on: Sequence[str]
     column_names: Sequence[str]
     dtypes: Sequence[DataType]
-    cluster_kind: str
+    cluster_kind: Literal["dask", "single"]
 
 
 # Experimental rapidsmpf shuffler integration
@@ -126,7 +126,8 @@ class Shuffle(IR):
 
     Notes
     -----
-    Only hash-based partitioning is supported (for now).
+    Only hash-based partitioning is supported (for now).  See
+    `ShuffleSorted` for sorting-based shuffling.
     """
 
     __slots__ = ("keys", "shuffle_method")
@@ -163,43 +164,47 @@ def do_evaluate(
 
 
 @nvtx_annotate_cudf_polars(message="Shuffle")
-def _partition_dataframe(
+def _hash_partition_dataframe(
     df: DataFrame,
-    keys: tuple[NamedExpr, ...],
-    count: int,
+    partition_id: int,  # Used only by sorted shuffling
+    partition_count: int,
+    options: MutableMapping[str, Any] | None,  # No options required
+    on: tuple[NamedExpr, ...],
 ) -> dict[int, DataFrame]:
     """
-    Partition an input DataFrame for shuffling.
-
-    Notes
-    -----
-    This utility only supports hash partitioning (for now).
+    Partition an input DataFrame for hash-based shuffling.
 
     Parameters
     ----------
     df
         DataFrame to partition.
-    keys
-        Shuffle key(s).
-    count
+    partition_id
+        Partition index (unused for hash partitioning).
+    partition_count
         Total number of output partitions.
+    options
+        Options (unused for hash partitioning).
+    on
+        Expressions used for the hash partitioning.
 
     Returns
     -------
     A dictionary mapping between int partition indices and
     DataFrame fragments.
     """
+    assert not options, f"Expected no options, got: {options}"
+
     if df.num_rows == 0:
         # Fast path for empty DataFrame
-        return dict.fromkeys(range(count), df)
+        return dict.fromkeys(range(partition_count), df)
 
     # Hash the specified keys to calculate the output
     # partition for each row
     partition_map = plc.binaryop.binary_operation(
         plc.hashing.murmurhash3_x86_32(
-            DataFrame([expr.evaluate(df) for expr in keys]).table
+            DataFrame([expr.evaluate(df) for expr in on]).table
         ),
-        plc.Scalar.from_py(count, plc.DataType(plc.TypeId.UINT32)),
+        plc.Scalar.from_py(partition_count, plc.DataType(plc.TypeId.UINT32)),
         plc.binaryop.BinaryOperator.PYMOD,
         plc.types.DataType(plc.types.TypeId.UINT32),
     )
@@ -208,8 +213,9 @@ def _partition_dataframe(
     t, offsets = plc.partitioning.partition(
         df.table,
         partition_map,
-        count,
+        partition_count,
     )
+    splits = offsets[1:-1]
 
     # Split and return the partitioned result
     return {
@@ -218,16 +224,25 @@ def _partition_dataframe(
             df.column_names,
             df.dtypes,
         )
-        for i, split in enumerate(plc.copying.split(t, offsets[1:-1]))
+        for i, split in enumerate(plc.copying.split(t, splits))
     }
 
 
+# When dropping Python 3.10, can use _simple_shuffle_graph[OPT_T](...)
+OPT_T = TypeVar("OPT_T")
+
+
 def _simple_shuffle_graph(
     name_in: str,
     name_out: str,
-    keys: tuple[NamedExpr, ...],
     count_in: int,
     count_out: int,
+    _partition_dataframe_func: Callable[
+        Concatenate[DataFrame, int, int, OPT_T, ...],
+        MutableMapping[int, DataFrame],
+    ],
+    options: OPT_T,
+    *other: Any,
 ) -> MutableMapping[Any, Any]:
     """Make a simple all-to-all shuffle graph."""
     split_name = f"split-{name_out}"
@@ -238,10 +253,12 @@ def _simple_shuffle_graph(
         _concat_list = []
         for part_in in range(count_in):
             graph[(split_name, part_in)] = (
-                _partition_dataframe,
+                _partition_dataframe_func,
                 (name_in, part_in),
-                keys,
+                part_in,
                 count_out,
+                options,
+                *other,
             )
             _concat_list.append((inter_name, part_out, part_in))
             graph[_concat_list[-1]] = (
@@ -330,7 +347,9 @@ def _(
     return _simple_shuffle_graph(
         get_key_name(ir.children[0]),
         get_key_name(ir),
-        ir.keys,
         partition_info[ir.children[0]].count,
         partition_info[ir].count,
+        _hash_partition_dataframe,
+        None,
+        ir.keys,
     )
diff --git a/python/cudf_polars/cudf_polars/experimental/sort.py b/python/cudf_polars/cudf_polars/experimental/sort.py
index 739e6a96727..7357006a648 100644
--- a/python/cudf_polars/cudf_polars/experimental/sort.py
+++ b/python/cudf_polars/cudf_polars/experimental/sort.py
@@ -4,19 +4,435 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Literal, TypedDict
 
-from cudf_polars.dsl.ir import Sort
-from cudf_polars.experimental.base import PartitionInfo
-from cudf_polars.experimental.dispatch import lower_ir_node
+import polars as pl
+
+import pylibcudf as plc
+from rmm.pylibrmm.stream import DEFAULT_STREAM
+
+from cudf_polars.containers import Column, DataFrame, DataType
+from cudf_polars.dsl.expr import Col
+from cudf_polars.dsl.ir import IR, Sort
+from cudf_polars.dsl.traversal import traversal
+from cudf_polars.dsl.utils.naming import unique_names
+from cudf_polars.experimental.base import PartitionInfo, get_key_name
+from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
 from cudf_polars.experimental.repartition import Repartition
-from cudf_polars.experimental.utils import _lower_ir_fallback
+from cudf_polars.experimental.shuffle import _simple_shuffle_graph
+from cudf_polars.experimental.utils import _concat, _fallback_inform, _lower_ir_fallback
+from cudf_polars.utils.config import ShuffleMethod
 
 if TYPE_CHECKING:
-    from collections.abc import MutableMapping
+    from collections.abc import MutableMapping, Sequence
 
-    from cudf_polars.dsl.ir import IR
+    from cudf_polars.dsl.expr import NamedExpr
     from cudf_polars.experimental.dispatch import LowerIRTransformer
+    from cudf_polars.typing import Schema
+
+
+def find_sort_splits(
+    tbl: plc.Table,
+    sort_boundaries: plc.Table,
+    my_part_id: int,
+    column_order: Sequence[plc.types.Order],
+    null_order: Sequence[plc.types.NullOrder],
+) -> list[int]:
+    """
+    Find local sort splits given all (global) split candidates.
+
+    The reason for much of the complexity is to get the result sizes as
+    precise as possible even when e.g. all values are equal.
+    In other words, this goes through extra effort to split the data at the
+    precise boundaries (which includes part_id and local_row_number).
+
+    Parameters
+    ----------
+    tbl
+        Locally sorted table only containing sort columns.
+    sort_boundaries
+        Sorted table containing the global sort/split boundaries.  Compared to `tbl`
+        must contain additional partition_id and local_row_number columns.
+    my_part_id
+        The partition id of the local node (as the `split_candidates` column).
+    column_order
+        The order in which tbl is sorted.
+    null_order
+        The null order in which tbl is sorted.
+
+    Returns
+    -------
+    The split points for the local partition.
+    """
+    column_order = list(column_order)
+    null_order = list(null_order)
+
+    # We now need to find the local split points.  To do this, first split out
+    # the partition id and the local row number of the final split values
+    *sort_boundaries, split_part_id, split_local_row = sort_boundaries.columns()
+    sort_boundaries = plc.Table(sort_boundaries)
+    # Now we find the first and last row in the local table corresponding to the split value
+    # (first and last, because there may be multiple rows with the same split value)
+    split_first_col = plc.search.lower_bound(
+        tbl, sort_boundaries, column_order, null_order
+    )
+    split_last_col = plc.search.upper_bound(
+        tbl, sort_boundaries, column_order, null_order
+    )
+    # And convert to list for final processing
+    split_first_col = pl.Series(split_first_col).to_list()
+    split_last_col = pl.Series(split_last_col).to_list()
+    split_part_id = pl.Series(split_part_id).to_list()
+    split_local_row = pl.Series(split_local_row).to_list()
+
+    # Find the final split points.  This is slightly tricky because of the possibility
+    # of equal values, which is why we need the part_id and local_row.
+    # Consider for example the case when all data is equal.
+    split_points = []
+    for first, last, part_id, local_row in zip(
+        split_first_col, split_last_col, split_part_id, split_local_row, strict=False
+    ):
+        if part_id < my_part_id:
+            # Local data is globally later so split at first valid row.
+            split_points.append(first)
+        elif part_id > my_part_id:
+            # Local data is globally earlier so split after last valid row.
+            split_points.append(last)
+        else:
+            # The split point is within our chunk, so use original local row
+            split_points.append(local_row)
+
+    return split_points
+
+
+def _select_local_split_candidates(
+    df: DataFrame,
+    by: Sequence[str],
+    num_partitions: int,
+    my_part_id: int,
+) -> DataFrame:
+    """
+    Create a graph that selects the local sort boundaries for a partition.
+
+    Returns a pylibcudf table with the local sort boundaries (including part and
+    row id columns).  The columns are already in the order of `by`.
+    """
+    df = df.select(by)
+    name_gen = unique_names(df.column_names)
+    part_id_dtype = DataType(pl.UInt32())
+    if df.num_rows == 0:
+        # Return empty DataFrame with the correct column names and dtypes
+        return DataFrame(
+            [
+                *df.columns,
+                Column(
+                    plc.column_factories.make_empty_column(part_id_dtype.plc),
+                    dtype=part_id_dtype,
+                    name=next(name_gen),
+                ),
+                Column(
+                    plc.column_factories.make_empty_column(part_id_dtype.plc),
+                    dtype=part_id_dtype,
+                    name=next(name_gen),
+                ),
+            ]
+        )
+
+    candidates = [i * df.num_rows // num_partitions for i in range(num_partitions)]
+    row_id = plc.Column.from_iterable_of_py(candidates, part_id_dtype.plc)
+
+    res = plc.copying.gather(df.table, row_id, plc.copying.OutOfBoundsPolicy.DONT_CHECK)
+    part_id = plc.Column.from_scalar(
+        plc.Scalar.from_py(my_part_id, part_id_dtype.plc),
+        len(candidates),
+    )
+
+    return DataFrame.from_table(
+        plc.Table([*res.columns(), part_id, row_id]),
+        [*df.column_names, next(name_gen), next(name_gen)],
+        [*df.dtypes, part_id_dtype, part_id_dtype],
+    )
+
+
+def _get_final_sort_boundaries(
+    sort_boundaries_candidates: DataFrame,
+    column_order: Sequence[plc.types.Order],
+    null_order: Sequence[plc.types.NullOrder],
+    num_partitions: int,
+) -> plc.Table:
+    """
+    Find the global sort split boundaries from all gathered split candidates.
+
+    Parameters
+    ----------
+    sort_boundaries_candidates
+        All gathered split candidates.
+    column_order
+        The order in which the split candidates are sorted.
+    null_order
+        The null order in which the split candidates are sorted.
+    num_partitions
+        The number of partitions to split the data into.
+
+    """
+    column_order = list(column_order)
+    null_order = list(null_order)
+
+    # The global split candidates need to be stable sorted to find the correct
+    # final split points.
+    # NOTE: This could be a merge if done earlier (but it should be small data).
+    sorted_candidates = plc.sorting.sort(
+        sort_boundaries_candidates.table,
+        # split candidates has the additional partition_id and row_number columns
+        column_order + [plc.types.Order.ASCENDING] * 2,
+        null_order + [plc.types.NullOrder.AFTER] * 2,
+    )
+    selected_candidates = plc.Column.from_iterable_of_py(
+        [
+            i * sorted_candidates.num_rows() // num_partitions
+            for i in range(1, num_partitions)
+        ]
+    )
+    # Get the actual values at which we will split the data
+    sort_boundaries = plc.copying.gather(
+        sorted_candidates, selected_candidates, plc.copying.OutOfBoundsPolicy.DONT_CHECK
+    )
+
+    return DataFrame.from_table(
+        sort_boundaries,
+        sort_boundaries_candidates.column_names,
+        sort_boundaries_candidates.dtypes,
+    )
+
+
+def _sort_boundaries_graph(
+    name_in: str,
+    by: Sequence[str],
+    column_order: Sequence[plc.types.Order],
+    null_order: Sequence[plc.types.NullOrder],
+    count: int,
+) -> tuple[str, MutableMapping[Any, Any]]:
+    """Graph to get the boundaries from all partitions."""
+    local_boundaries_name = f"sort-boundaries_local-{name_in}"
+    concat_boundaries_name = f"sort-boundaries-concat-{name_in}"
+    global_boundaries_name = f"sort-boundaries-{name_in}"
+    graph: MutableMapping[Any, Any] = {}
+
+    _concat_list = []
+    for part_id in range(count):
+        graph[(local_boundaries_name, part_id)] = (
+            _select_local_split_candidates,
+            (name_in, part_id),
+            by,
+            count,
+            part_id,
+        )
+        _concat_list.append((local_boundaries_name, part_id))
+
+    graph[concat_boundaries_name] = (_concat, *_concat_list)
+    graph[global_boundaries_name] = (
+        _get_final_sort_boundaries,
+        concat_boundaries_name,
+        column_order,
+        null_order,
+        count,
+    )
+    return global_boundaries_name, graph
+
+
+class SortedShuffleOptions(TypedDict):
+    """RapidsMPF shuffling options."""
+
+    by: Sequence[str]
+    order: Sequence[plc.types.Order]
+    null_order: Sequence[plc.types.NullOrder]
+    column_names: Sequence[str]
+    column_dtypes: Sequence[DataType]
+    cluster_kind: Literal["dask", "single"]
+
+
+# Experimental rapidsmpf shuffler integration
+class RMPFIntegrationSortedShuffle:  # pragma: no cover
+    """cuDF-Polars protocol for rapidsmpf shuffler."""
+
+    @staticmethod
+    def insert_partition(
+        df: DataFrame,
+        partition_id: int,
+        partition_count: int,
+        shuffler: Any,
+        options: SortedShuffleOptions,
+        sort_boundaries: DataFrame,
+    ) -> None:
+        """Add cudf-polars DataFrame chunks to an RMP shuffler."""
+        from rapidsmpf.integrations.cudf.partition import split_and_pack
+
+        if options["cluster_kind"] == "dask":
+            from rapidsmpf.integrations.dask import get_worker_context
+
+        else:
+            from rapidsmpf.integrations.single import get_worker_context
+
+        context = get_worker_context()
+
+        by = options["by"]
+
+        splits = find_sort_splits(
+            df.select(by).table,
+            sort_boundaries.table,
+            partition_id,
+            options["order"],
+            options["null_order"],
+        )
+        packed_inputs = split_and_pack(
+            df.table,
+            splits=splits,
+            br=context.br,
+            stream=DEFAULT_STREAM,
+        )
+        shuffler.insert_chunks(packed_inputs)
+
+    @staticmethod
+    def extract_partition(
+        partition_id: int,
+        shuffler: Any,
+        options: SortedShuffleOptions,
+    ) -> DataFrame:
+        """Extract a finished partition from the RMP shuffler."""
+        from rapidsmpf.integrations.cudf.partition import (
+            unpack_and_concat,
+            unspill_partitions,
+        )
+
+        if options["cluster_kind"] == "dask":
+            from rapidsmpf.integrations.dask import get_worker_context
+
+        else:
+            from rapidsmpf.integrations.single import get_worker_context
+
+        context = get_worker_context()
+
+        shuffler.wait_on(partition_id)
+        column_names = options["column_names"]
+        column_dtypes = options["column_dtypes"]
+
+        # TODO: When sorting, this step should finalize with a merge (unless we
+        # require stability, as cudf merge is not stable).
+        return DataFrame.from_table(
+            unpack_and_concat(
+                unspill_partitions(
+                    shuffler.extract(partition_id),
+                    br=context.br,
+                    stream=DEFAULT_STREAM,
+                    allow_overbooking=True,
+                    statistics=context.statistics,
+                ),
+                br=context.br,
+                stream=DEFAULT_STREAM,
+            ),
+            column_names,
+            column_dtypes,
+        )
+
+
+def _sort_partition_dataframe(
+    df: DataFrame,
+    partition_id: int,  # Not currently used
+    partition_count: int,
+    options: MutableMapping[str, Any],
+    sort_boundaries: DataFrame,
+) -> MutableMapping[int, DataFrame]:
+    """
+    Partition a sorted DataFrame for shuffling.
+
+    Parameters
+    ----------
+    df
+        The DataFrame to partition.
+    partition_id
+        The partition id of the current partition.
+    partition_count
+        The total number of partitions.
+    options
+        The sort options ``(by, order, null_order)``.
+    sort_boundaries
+        The global sort boundary candidates used to decide where to split.
+    """
+    if df.num_rows == 0:  # pragma: no cover
+        # Fast path for empty DataFrame
+        return {i: df for i in range(partition_count)}
+
+    splits = find_sort_splits(
+        df.select(options["by"]).table,
+        sort_boundaries.table,
+        partition_id,
+        options["order"],
+        options["null_order"],
+    )
+
+    # Split and return the partitioned result
+    return {
+        i: DataFrame.from_table(
+            split,
+            df.column_names,
+            df.dtypes,
+        )
+        for i, split in enumerate(plc.copying.split(df.table, splits))
+    }
+
+
+class ShuffleSorted(IR):
+    """
+    Shuffle already locally sorted multi-partition data.
+
+    Shuffling is performed by extracting sort boundary candidates from all partitions,
+    sharing them all-to-all and then exchanging data accordingly.
+    The sorting information is required to be passed in identically to the already
+    performed local sort and as of now the final result needs to be sorted again to
+    merge the partitions.
+    """
+
+    __slots__ = ("by", "null_order", "order", "shuffle_method")
+    _non_child = ("schema", "by", "order", "null_order", "shuffle_method")
+    by: tuple[NamedExpr, ...]
+    """Keys by which the data was sorted."""
+    order: tuple[plc.types.Order, ...]
+    """Sort order if sorted."""
+    null_order: tuple[plc.types.NullOrder, ...]
+    """Null precedence if sorted."""
+    shuffle_method: ShuffleMethod
+    """Shuffle method to use."""
+
+    def __init__(
+        self,
+        schema: Schema,
+        by: tuple[NamedExpr, ...],
+        order: tuple[plc.types.Order, ...],
+        null_order: tuple[plc.types.NullOrder, ...],
+        shuffle_method: ShuffleMethod,
+        df: IR,
+    ):
+        self.schema = schema
+        self.by = by
+        self.order = order
+        self.null_order = null_order
+        self.shuffle_method = shuffle_method
+        self._non_child_args = (schema, by, order, null_order, shuffle_method)
+        self.children = (df,)
+
+    @classmethod
+    def do_evaluate(
+        cls,
+        schema: Schema,
+        by: tuple[NamedExpr, ...],
+        order: tuple[plc.types.Order, ...],
+        null_order: tuple[plc.types.NullOrder, ...],
+        shuffle_method: ShuffleMethod,
+        df: DataFrame,
+    ) -> DataFrame:  # pragma: no cover
+        """Evaluate and return a dataframe."""
+        # Single-partition ShuffleSorted evaluation is a no-op
+        return df
 
 
 @lower_ir_node.register(Sort)
@@ -26,8 +442,24 @@ def _(
     # Special handling for slicing
     # (May be a top- or bottom-k operation)
 
-    if ir.zlice is not None and ir.zlice[0] < 1:
-        # TODO: Handle large slices (e.g. 1m+ rows)
+    if ir.zlice is not None:
+        # TODO: Handle large slices (e.g. 1m+ rows), this should go into the branch
+        # below, but will require additional logic there.
+
+        # Check if zlice has an offset, i.e. includes the start or reaches the end.
+        # If an offset exists it would be incorrect to apply in the first pwise sort.
+        has_offset = ir.zlice[0] > 0 or (
+            ir.zlice[0] < 0
+            and ir.zlice[1] is not None
+            and ir.zlice[0] + ir.zlice[1] < 0
+        )
+        if has_offset:
+            return _lower_ir_fallback(
+                ir,
+                rec,
+                msg="Sort does not support a multi-partition slice with an offset.",
+            )
+
         from cudf_polars.experimental.parallel import _lower_ir_pwise
 
         # Sort input partitions
@@ -41,5 +473,138 @@ def _(
             partition_info[new_node] = PartitionInfo(count=1)
         return new_node, partition_info
 
-    # Fallback
-    return _lower_ir_fallback(ir, rec, msg="Sort does not support multiple partitions.")
+    # Check sort keys
+    if not all(
+        isinstance(expr, Col) for expr in traversal([e.value for e in ir.by])
+    ):  # pragma: no cover
+        return _lower_ir_fallback(
+            ir,
+            rec,
+            msg="sort currently only supports column names as `by` keys.",
+        )
+
+    # Extract child partitioning
+    child, partition_info = rec(ir.children[0])
+
+    # Extract shuffle method
+    config_options = rec.state["config_options"]
+    assert config_options.executor.name == "streaming", (
+        "'in-memory' executor not supported in 'lower_ir_node'"
+    )
+    # Avoid rapidsmpf shuffle with maintain_order=True (for now)
+    shuffle_method = (
+        ShuffleMethod("tasks") if ir.stable else config_options.executor.shuffle_method
+    )
+    if (
+        shuffle_method != config_options.executor.shuffle_method
+    ):  # pragma: no cover; Requires rapidsmpf
+        _fallback_inform(
+            f"shuffle_method={shuffle_method} does not support maintain_order=True. "
+            "Falling back to shuffle_method='tasks'.",
+            config_options,
+        )
+
+    # Handle single-partition case
+    if partition_info[child].count == 1:
+        single_part_node = ir.reconstruct([child])
+        partition_info[single_part_node] = partition_info[child]
+        return single_part_node, partition_info
+
+    local_sort_node = ir.reconstruct([child])
+    partition_info[local_sort_node] = partition_info[child]
+
+    shuffle = ShuffleSorted(
+        ir.schema,
+        ir.by,
+        ir.order,
+        ir.null_order,
+        shuffle_method,
+        local_sort_node,
+    )
+    partition_info[shuffle] = partition_info[child]
+
+    # We sort again locally.
+    assert ir.zlice is None  # zlice handling would be incorrect without adjustment
+    final_sort_node = ir.reconstruct([shuffle])
+    partition_info[final_sort_node] = partition_info[shuffle]
+
+    return final_sort_node, partition_info
+
+
+@generate_ir_tasks.register(ShuffleSorted)
+def _(
+    ir: ShuffleSorted, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    by = [ne.value.name for ne in ir.by if isinstance(ne.value, Col)]
+    if len(by) != len(ir.by):  # pragma: no cover
+        # We should not reach here as this is checked in the lower_ir_node
+        raise NotImplementedError("Sorting columns must be column names.")
+
+    (child,) = ir.children
+
+    sort_boundaries_name, graph = _sort_boundaries_graph(
+        get_key_name(child),
+        by,
+        ir.order,
+        ir.null_order,
+        partition_info[child].count,
+    )
+
+    options = {
+        "by": by,
+        "order": ir.order,
+        "null_order": ir.null_order,
+        "column_names": list(ir.schema.keys()),
+        "column_dtypes": list(ir.schema.values()),
+    }
+
+    # Try using rapidsmpf shuffler if we have "simple" shuffle
+    # keys, and the "shuffle_method" config is set to "rapidsmpf"
+    shuffle_method = ir.shuffle_method
+    if shuffle_method in ("rapidsmpf", "rapidsmpf-single"):  # pragma: no cover
+        try:
+            if shuffle_method == "rapidsmpf-single":
+                from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph
+
+                options["cluster_kind"] = "single"
+            else:
+                from rapidsmpf.integrations.dask import rapidsmpf_shuffle_graph
+
+                options["cluster_kind"] = "dask"
+            graph.update(
+                rapidsmpf_shuffle_graph(
+                    get_key_name(child),
+                    get_key_name(ir),
+                    partition_info[child].count,
+                    partition_info[ir].count,
+                    RMPFIntegrationSortedShuffle,
+                    options,
+                    sort_boundaries_name,
+                )
+            )
+        except (ImportError, ValueError) as err:
+            # ImportError: rapidsmpf is not installed
+            # ValueError: rapidsmpf couldn't find a distributed client
+            if shuffle_method == "rapidsmpf":  # pragma: no cover
+                # Only raise an error if the user specifically
+                # set the shuffle method to "rapidsmpf"
+                raise ValueError(
+                    "Rapidsmpf is not installed correctly or the current "
+                    "Dask cluster does not support rapidsmpf shuffling."
+                ) from err
+        else:
+            return graph
+
+    # Simple task-based fall-back
+    graph.update(
+        _simple_shuffle_graph(
+            get_key_name(child),
+            get_key_name(ir),
+            partition_info[child].count,
+            partition_info[ir].count,
+            _sort_partition_dataframe,
+            options,
+            sort_boundaries_name,
+        )
+    )
+    return graph
diff --git a/python/cudf_polars/tests/experimental/test_join.py b/python/cudf_polars/tests/experimental/test_join.py
index 3d6e373290c..d141f1ac7b9 100644
--- a/python/cudf_polars/tests/experimental/test_join.py
+++ b/python/cudf_polars/tests/experimental/test_join.py
@@ -203,11 +203,13 @@ def test_join_and_slice(zlice):
             assert q.collect(engine=engine).height == q.collect().height
     else:
         assert q.collect(engine=engine).height == q.collect().height
+
     # Need sort to match order after a join
     q = left.join(right, on="a", how="inner").sort(pl.col("a")).slice(*zlice)
     if zlice == (2, 2):
         with pytest.warns(
-            UserWarning, match="Sort does not support multiple partitions."
+            UserWarning,
+            match="Sort does not support a multi-partition slice with an offset.",
         ):
             assert_gpu_result_equal(q, engine=engine)
     else:
diff --git a/python/cudf_polars/tests/experimental/test_rapidsmpf.py b/python/cudf_polars/tests/experimental/test_rapidsmpf.py
index c550a5e2d9f..0f9affdc56e 100644
--- a/python/cudf_polars/tests/experimental/test_rapidsmpf.py
+++ b/python/cudf_polars/tests/experimental/test_rapidsmpf.py
@@ -7,7 +7,7 @@
 
 import polars as pl
 
-from cudf_polars.testing.asserts import assert_gpu_result_equal
+from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
 from cudf_polars.utils.config import ConfigOptions
 
 
@@ -18,8 +18,7 @@ def test_join_rapidsmpf(
     rapidsmpf_spill: bool,  # noqa: FBT001
 ) -> None:
     # Check that we have a distributed cluster running.
-    # This tests must be run with:
-    # --executor='streaming' --scheduler='distributed'
+    # This tests must be run with: --scheduler='distributed'
     distributed = pytest.importorskip("distributed")
     try:
         client = distributed.get_client()
@@ -137,3 +136,59 @@ def test_rapidsmpf_spill_synchronous_unsupported() -> None:
     )
     with pytest.raises(ValueError, match="rapidsmpf_spill.*not supported.*synchronous"):
         ConfigOptions.from_polars_engine(engine)
+
+
+@pytest.mark.parametrize("max_rows_per_partition", [1, 5])
+def test_sort_rapidsmpf(max_rows_per_partition: int) -> None:
+    # Require rapidsmpf, but don't require a distributed cluster,
+    # because single-worker shuffle can be used.
+    pytest.importorskip("rapidsmpf")
+
+    # Setup the GPUEngine config
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": max_rows_per_partition,
+            "shuffle_method": "rapidsmpf",
+            "scheduler": DEFAULT_SCHEDULER,
+        },
+    )
+
+    df = pl.LazyFrame(
+        {
+            "x": range(15),
+            "y": [1, 2, 3] * 5,
+            "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3,
+        }
+    )
+    q = df.sort(by=["y", "z"])
+
+    assert_gpu_result_equal(q, engine=engine, check_row_order=True)
+
+
+def test_sort_stable_rapidsmpf_warns():
+    pytest.importorskip("rapidsmpf")
+
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": 3,
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "rapidsmpf",
+            "fallback_mode": "warn",
+        },
+    )
+
+    df = pl.LazyFrame(
+        {
+            "x": range(15),
+            "y": [1, 2, 3] * 5,
+            "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3,
+        }
+    )
+
+    q = df.sort(by=["y", "z"], maintain_order=True)
+    with pytest.warns(UserWarning, match="Falling back to shuffle_method='tasks'."):
+        assert_gpu_result_equal(q, engine=engine, check_row_order=True)
diff --git a/python/cudf_polars/tests/experimental/test_sort.py b/python/cudf_polars/tests/experimental/test_sort.py
index 72dc55ee0cf..a742ecb0bd8 100644
--- a/python/cudf_polars/tests/experimental/test_sort.py
+++ b/python/cudf_polars/tests/experimental/test_sort.py
@@ -25,6 +25,20 @@ def engine():
     )
 
 
+@pytest.fixture(scope="module")
+def engine_large():
+    return pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": 2_100,
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+            "fallback_mode": "raise",
+        },
+    )
+
+
 @pytest.fixture(scope="module")
 def df():
     return pl.LazyFrame(
@@ -36,19 +50,63 @@ def df():
     )
 
 
+def large_frames():
+    x = [1.0] * 10_000
+    x[-1] = float("nan")
+    y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] * 1000
+
+    yield pytest.param(
+        pl.LazyFrame(
+            {
+                "x": x,
+            }
+        ),
+        ["x"],
+        False,
+        id="all_equal_one_nan",
+    )
+
+    yield pytest.param(
+        pl.LazyFrame(
+            {
+                "x": x,
+                "y": y,
+            }
+        ),
+        ["x", "y"],
+        False,
+        id="two_cols",
+    )
+
+    idx = list(range(10_000))
+    yield pytest.param(
+        pl.LazyFrame(
+            {
+                "x": x,
+                "y": y,
+                "idx": idx,
+            }
+        ),
+        ["x", "y"],
+        True,
+        id="two_col_stable",
+    )
+
+
 def test_sort(df, engine):
     q = df.sort(by=["y", "z"])
-    if POLARS_VERSION_LT_130:
-        with pytest.raises(
-            pl.exceptions.ComputeError,
-            match="Sort does not support multiple partitions.",
-        ):
-            assert_gpu_result_equal(q, engine=engine)
-    else:
-        with pytest.raises(
-            NotImplementedError, match="Sort does not support multiple partitions."
-        ):
-            assert_gpu_result_equal(q, engine=engine)
+    assert_gpu_result_equal(q, engine=engine)
+
+
+@pytest.mark.parametrize("large_df,by,stable", list(large_frames()))
+@pytest.mark.parametrize(
+    "nulls_last,descending", [(True, False), (True, True), (False, True)]
+)
+def test_large_sort(large_df, by, engine_large, stable, nulls_last, descending):
+    q = large_df.sort(
+        by=by, nulls_last=nulls_last, maintain_order=stable, descending=descending
+    )
+    assert_gpu_result_equal(q, engine=engine_large)
 
 
 def test_sort_head(df, engine):
@@ -59,3 +117,35 @@ def test_sort_head(df, engine):
 def test_sort_tail(df, engine):
     q = df.sort(by=["y", "z"]).tail(2)
     assert_gpu_result_equal(q, engine=engine)
+
+
+@pytest.mark.parametrize("offset", [1, -4])
+def test_sort_slice(df, engine, offset):
+    # Slice in the middle, which distributed sorts need to be careful with
+    q = df.sort(by=["y", "z"]).slice(offset, 2)
+    if POLARS_VERSION_LT_130:
+        exception = pl.exceptions.ComputeError
+    else:
+        exception = NotImplementedError
+
+    with pytest.raises(
+        exception,
+        match="Sort does not support a multi-partition slice with an offset.",
+    ):
+        assert_gpu_result_equal(q, engine=engine)
+
+
+def test_sort_after_sparse_join():
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "max_rows_per_partition": 4,
+        },
+    )
+
+    left = pl.LazyFrame({"foo": list(range(5)), "bar": list(range(5))})
+    right = pl.LazyFrame({"foo": list(range(1))})
+    q = left.join(right, on="foo", how="inner").sort(by=["foo"])
+    assert_gpu_result_equal(q, engine=engine)

From 7dc2383315e406b3085cab9b5785d027b1c14c2b Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 9 Sep 2025 16:14:49 -0500
Subject: [PATCH 289/366] update nvidia-ml-py (>=12), use cuda-toolkit wheels
 (#19927)

Contributes to https://github.com/rapidsai/build-planning/issues/213

* switches uses of `nvidia-cuda-*-cu{12,13}` wheels to https://pypi.org/project/cuda-toolkit/

Contributes to https://github.com/rapidsai/build-planning/issues/214, updating pins to match the rest of RAPIDS:

* nvidia-ml-py: `>=12`

## Notes for Reviewers

### Benefits of these changes

The stricter `nvidia-ml-py` floor I'd introduced in an earlier PR was a mistake... `>=12` has what we need. A looser floor means a bit lower risk of environment-solve conflicts.

Using the `cuda-toolkit` metapackage means we don't need to maintain our own pins for mutually compatible versions of CTK sub-components (which each follow their own different versioning conventions).

#

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19927
---
 conda/environments/all_cuda-129_arch-aarch64.yaml |  3 +--
 conda/environments/all_cuda-129_arch-x86_64.yaml  |  3 +--
 conda/environments/all_cuda-130_arch-aarch64.yaml |  3 +--
 conda/environments/all_cuda-130_arch-x86_64.yaml  |  3 +--
 conda/recipes/cudf-polars/recipe.yaml             |  3 ++-
 conda/recipes/dask-cudf/recipe.yaml               |  5 ++---
 dependencies.yaml                                 | 15 ++++++---------
 python/cudf_polars/pyproject.toml                 |  4 ++--
 python/dask_cudf/pyproject.toml                   |  2 +-
 9 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 03505a99545..b197f829d19 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -59,8 +59,7 @@ dependencies:
 - numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
-- nvidia-ml-py
-- nvidia-ml-py>=12.560.30
+- nvidia-ml-py>=12
 - nvtx>=0.2.1
 - openpyxl
 - packaging
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 63ec249d456..c342aa87116 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -60,8 +60,7 @@ dependencies:
 - numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
-- nvidia-ml-py
-- nvidia-ml-py>=12.560.30
+- nvidia-ml-py>=12
 - nvtx>=0.2.1
 - openpyxl
 - packaging
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index 454a4053fdf..6666010fad0 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -59,8 +59,7 @@ dependencies:
 - numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
-- nvidia-ml-py
-- nvidia-ml-py>=12.560.30
+- nvidia-ml-py>=12
 - nvtx>=0.2.1
 - openpyxl
 - packaging
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 580fc9e4b1b..19745ad1df5 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -60,8 +60,7 @@ dependencies:
 - numba>=0.60.0,<0.62.0a0
 - numpy>=1.23,<3.0a0
 - numpydoc
-- nvidia-ml-py
-- nvidia-ml-py>=12.560.30
+- nvidia-ml-py>=12
 - nvtx>=0.2.1
 - openpyxl
 - packaging
diff --git a/conda/recipes/cudf-polars/recipe.yaml b/conda/recipes/cudf-polars/recipe.yaml
index 4c398235a1a..b5fd97b718b 100644
--- a/conda/recipes/cudf-polars/recipe.yaml
+++ b/conda/recipes/cudf-polars/recipe.yaml
@@ -47,7 +47,8 @@ requirements:
     - setuptools
     - cuda-version =${{ cuda_version }}
   run:
-    - nvidia-ml-py
+    # 'nvidia-ml-py' provides the 'pynvml' module
+    - nvidia-ml-py>=12
     - python
     - pylibcudf =${{ version }}
     - polars >=1.28,<1.33
diff --git a/conda/recipes/dask-cudf/recipe.yaml b/conda/recipes/dask-cudf/recipe.yaml
index 077195dfb97..e5cf5e2db9a 100644
--- a/conda/recipes/dask-cudf/recipe.yaml
+++ b/conda/recipes/dask-cudf/recipe.yaml
@@ -34,9 +34,8 @@ requirements:
   run:
     - python
     - cudf =${{ version }}
-    # 'nvidia-ml-py' provides the 'pynvml' module, since v12.560.30
-    # ref: https://github.com/conda-forge/nvidia-ml-py-feedstock/pull/24
-    - nvidia-ml-py>=12.560.30
+    # 'nvidia-ml-py' provides the 'pynvml' module
+    - nvidia-ml-py>=12
     - rapids-dask-dependency =${{ minor_version }}
     - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
 
diff --git a/dependencies.yaml b/dependencies.yaml
index fdb33201aa3..6ddc9fbe0a3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -702,20 +702,19 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - nvidia-cuda-nvcc-cu12
-              - nvidia-cuda-nvrtc-cu12
+              - cuda-toolkit[nvcc,nvrtc]==12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - nvidia-cuda-nvcc-cu13
-              - nvidia-cuda-nvrtc-cu13
+              - cuda-toolkit[nvcc,nvrtc]==13.*
           - {matrix: null, packages: []}
   run_cudf_polars:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - nvidia-ml-py
+          # 'nvidia-ml-py' provides the 'pynvml' module
+          - &nvidia_ml_py nvidia-ml-py>=12
           - packaging
           - polars>=1.28,<1.33
     specific:
@@ -729,14 +728,12 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - rapids-dask-dependency==25.10.*,>=0.0.0a0
-          - nvidia-ml-py
+          - *nvidia_ml_py
   run_dask_cudf:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          # 'nvidia-ml-py' provides the 'pynvml' module, since v12.560.30
-          # ref: https://github.com/conda-forge/nvidia-ml-py-feedstock/pull/24
-          - nvidia-ml-py>=12.560.30
+          - *nvidia_ml_py
           - rapids-dask-dependency==25.10.*,>=0.0.0a0
   run_custreamz:
     common:
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index d50c2d08b24..df0611348bb 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -19,7 +19,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "nvidia-ml-py",
+    "nvidia-ml-py>=12",
     "packaging",
     "polars>=1.28,<1.33",
     "pylibcudf==25.10.*,>=0.0.0a0",
@@ -47,7 +47,7 @@ test = [
     "rich",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 experimental = [
-    "nvidia-ml-py",
+    "nvidia-ml-py>=12",
     "rapids-dask-dependency==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index ef0a7d9376f..b313403a907 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
     "cupy-cuda13x>=13.6.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
-    "nvidia-ml-py>=12.560.30",
+    "nvidia-ml-py>=12",
     "pandas>=2.0,<2.4.0dev0",
     "rapids-dask-dependency==25.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From 2bfc314e531f959e0e0326d5016a8b20714bb420 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Tue, 9 Sep 2025 16:32:52 -0500
Subject: [PATCH 290/366] Add local row-count and unique-count estimates to
 `explain(... logical=True)` (#19864)

Addresses key components of https://github.com/rapidsai/cudf/issues/19393

It is unclear how many IR subclasses we should target for `update_column_stats` logic in this single PR. I'm starting with the basics only in this PR.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19864
---
 .../cudf_polars/experimental/base.py          | 101 ++++--
 .../cudf_polars/experimental/dispatch.py      |  19 ++
 .../cudf_polars/experimental/explain.py       |  15 +-
 .../cudf_polars/experimental/io.py            |   8 +-
 .../cudf_polars/experimental/statistics.py    | 291 +++++++++++++++++-
 python/cudf_polars/cudf_polars/testing/io.py  |  54 +++-
 .../tests/experimental/test_explain.py        | 245 ++++++++++++++-
 .../tests/experimental/test_stats.py          |  90 ++----
 8 files changed, 715 insertions(+), 108 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py
index 37a88263c38..372b30e6064 100644
--- a/python/cudf_polars/cudf_polars/experimental/base.py
+++ b/python/cudf_polars/cudf_polars/experimental/base.py
@@ -7,7 +7,7 @@
 import dataclasses
 from collections import defaultdict
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Generic, TypeVar
+from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
 
 if TYPE_CHECKING:
     from collections.abc import Generator, Iterator, MutableMapping
@@ -107,21 +107,36 @@ class DataSourceInfo:
     sampling of the underlying datasource.
     """
 
+    _unique_stats_columns: set[str]
+
     @property
-    def row_count(self) -> ColumnStat[int]:
+    def row_count(self) -> ColumnStat[int]:  # pragma: no cover
         """Data source row-count estimate."""
-        return ColumnStat[int]()  # pragma: no cover
+        raise NotImplementedError("Sub-class must implement row_count.")
 
-    def unique_stats(self, column: str) -> UniqueStats:
+    def unique_stats(self, column: str) -> UniqueStats:  # pragma: no cover
         """Return unique-value statistics for a column."""
-        return UniqueStats()  # pragma: no cover
+        raise NotImplementedError("Sub-class must implement unique_stats.")
 
     def storage_size(self, column: str) -> ColumnStat[int]:
         """Return the average column size for a single file."""
         return ColumnStat[int]()
 
+    @property
+    def unique_stats_columns(self) -> set[str]:
+        """Return the set of columns needing unique-value information."""
+        return self._unique_stats_columns
+
     def add_unique_stats_column(self, column: str) -> None:
         """Add a column needing unique-value information."""
+        self._unique_stats_columns.add(column)
+
+
+class DataSourcePair(NamedTuple):
+    """Pair of table-source and column-name information."""
+
+    table_source: DataSourceInfo
+    column_name: str
 
 
 class ColumnSourceInfo:
@@ -130,10 +145,9 @@ class ColumnSourceInfo:
 
     Parameters
     ----------
-    table_source_info
-        Table data source information.
-    column_name
-        Column name in the data source.
+    table_source_pairs
+        Sequence of DataSourcePair objects.
+        Union operations will result in multiple elements.
 
     Notes
     -----
@@ -142,27 +156,40 @@ class ColumnSourceInfo:
     """
 
     __slots__ = (
-        "_allow_unique_sampling",
-        "column_name",
         "implied_unique_count",
-        "table_source_info",
+        "table_source_pairs",
     )
-    table_source_info: DataSourceInfo
-    column_name: str
+    table_source_pairs: list[DataSourcePair]
     implied_unique_count: ColumnStat[int]
     """Unique-value count implied by join heuristics."""
-    _allow_unique_sampling: bool
 
-    def __init__(self, table_source_info: DataSourceInfo, column_name: str) -> None:
-        self.table_source_info = table_source_info
-        self.column_name = column_name
+    def __init__(self, *table_source_pairs: DataSourcePair) -> None:
+        self.table_source_pairs = list(table_source_pairs)
         self.implied_unique_count = ColumnStat[int](None)
-        self._allow_unique_sampling = False
+
+    @property
+    def is_unique_stats_column(self) -> bool:
+        """Return whether this column requires unique-value information."""
+        return any(
+            pair.column_name in pair.table_source.unique_stats_columns
+            for pair in self.table_source_pairs
+        )
 
     @property
     def row_count(self) -> ColumnStat[int]:
         """Data source row-count estimate."""
-        return self.table_source_info.row_count
+        return ColumnStat[int](
+            # Use sum of table-source row-count estimates.
+            value=sum(
+                value
+                for pair in self.table_source_pairs
+                if (value := pair.table_source.row_count.value) is not None
+            )
+            or None,
+            # Row-count may be exact if there is only one table source.
+            exact=len(self.table_source_pairs) == 1
+            and self.table_source_pairs[0].table_source.row_count.exact,
+        )
 
     def unique_stats(self, *, force: bool = False) -> UniqueStats:
         """
@@ -174,26 +201,34 @@ def unique_stats(self, *, force: bool = False) -> UniqueStats:
             If True, return unique-value statistics even if the column
             wasn't marked as needing unique-value information.
         """
-        return (
-            self.table_source_info.unique_stats(self.column_name)
+        if (force or self.is_unique_stats_column) and len(self.table_source_pairs) == 1:
+            # Single table source.
+            # TODO: Handle multiple tables sources if/when necessary.
+            # We may never need to do this if the source unique-value
+            # statistics are only "used" by the Scan/DataFrameScan nodes.
+            table_source, column_name = self.table_source_pairs[0]
+            return table_source.unique_stats(column_name)
+        else:
             # Avoid sampling unique-stats if this column
-            # wasn't marked as needing unique-stats.
-            if force or self._allow_unique_sampling
-            else UniqueStats()
-        )
+            # wasn't marked as "needing" unique-stats.
+            return UniqueStats()
 
     @property
     def storage_size(self) -> ColumnStat[int]:
         """Return the average column size for a single file."""
-        return self.table_source_info.storage_size(self.column_name)
+        # We don't need to handle concatenated statistics for ``storage_size``.
+        # Just return the storage size of the first table source.
+        if self.table_source_pairs:
+            table_source, column_name = self.table_source_pairs[0]
+            return table_source.storage_size(column_name)
+        else:  # pragma: no cover; We never call this for empty table sources.
+            return ColumnStat[int]()
 
     def add_unique_stats_column(self, column: str | None = None) -> None:
         """Add a column needing unique-value information."""
-        if column in (None, self.column_name):
-            self._allow_unique_sampling = True
-        return self.table_source_info.add_unique_stats_column(
-            column or self.column_name
-        )
+        # We must call add_unique_stats_column for ALL table sources.
+        for table_source, column_name in self.table_source_pairs:
+            table_source.add_unique_stats_column(column or column_name)
 
 
 class ColumnStats:
@@ -229,7 +264,7 @@ def __init__(
     ) -> None:
         self.name = name
         self.children = children
-        self.source_info = source_info or ColumnSourceInfo(DataSourceInfo(), name)
+        self.source_info = source_info or ColumnSourceInfo()
         self.unique_count = unique_count or ColumnStat[int](None)
 
     def new_parent(
diff --git a/python/cudf_polars/cudf_polars/experimental/dispatch.py b/python/cudf_polars/cudf_polars/experimental/dispatch.py
index b48ab73d671..479c31de451 100644
--- a/python/cudf_polars/cudf_polars/experimental/dispatch.py
+++ b/python/cudf_polars/cudf_polars/experimental/dispatch.py
@@ -132,3 +132,22 @@ def initialize_column_stats(
     attributes for each column of each IR node.
     """
     raise AssertionError(f"Unhandled type {type(ir)}")  # pragma: no cover
+
+
+@singledispatch
+def update_column_stats(
+    ir: IR, stats: StatsCollector, config_options: ConfigOptions
+) -> None:
+    """
+    Finalize local column statistics for an IR node.
+
+    Parameters
+    ----------
+    ir
+        The IR node to finalize local column statistics for.
+    stats
+        The `StatsCollector` object containing known statistics.
+    config_options
+        GPUEngine configuration options.
+    """
+    raise AssertionError(f"Unhandled type {type(ir)}")  # pragma: no cover
diff --git a/python/cudf_polars/cudf_polars/experimental/explain.py b/python/cudf_polars/cudf_polars/experimental/explain.py
index 5e8def363be..9388e6e60f5 100644
--- a/python/cudf_polars/cudf_polars/experimental/explain.py
+++ b/python/cudf_polars/cudf_polars/experimental/explain.py
@@ -17,7 +17,9 @@
     Sort,
 )
 from cudf_polars.dsl.translate import Translator
+from cudf_polars.experimental.base import ColumnStat
 from cudf_polars.experimental.parallel import lower_ir_graph
+from cudf_polars.experimental.statistics import collect_statistics
 from cudf_polars.utils.config import ConfigOptions
 
 if TYPE_CHECKING:
@@ -26,7 +28,7 @@
     import polars as pl
 
     from cudf_polars.dsl.ir import IR
-    from cudf_polars.experimental.base import PartitionInfo
+    from cudf_polars.experimental.base import PartitionInfo, StatsCollector
 
 
 def explain_query(
@@ -57,7 +59,8 @@ def explain_query(
         lowered_ir, partition_info = lower_ir_graph(ir, config)
         return _repr_ir_tree(lowered_ir, partition_info)
     else:
-        return _repr_ir_tree(ir)
+        # Include row-count statistics for the logical plan
+        return _repr_ir_tree(ir, stats=collect_statistics(ir, config))
 
 
 def _repr_ir_tree(
@@ -65,14 +68,20 @@ def _repr_ir_tree(
     partition_info: MutableMapping[IR, PartitionInfo] | None = None,
     *,
     offset: str = "",
+    stats: StatsCollector | None = None,
 ) -> str:
     header = _repr_ir(ir, offset=offset)
     count = partition_info[ir].count if partition_info else None
+    if stats is not None:
+        # Include row-count estimate (if available)
+        row_count_estimate = stats.row_count.get(ir, ColumnStat[int](None)).value
+        row_count = f"~{row_count_estimate}" if row_count_estimate else "unknown"
+        header = header.rstrip("\n") + f" {row_count=}\n"
     if count is not None:
         header = header.rstrip("\n") + f" [{count}]\n"
 
     children_strs = [
-        _repr_ir_tree(child, partition_info, offset=offset + "  ")
+        _repr_ir_tree(child, partition_info, offset=offset + "  ", stats=stats)
         for child in ir.children
     ]
 
diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index e36e73c5f90..120395354a9 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -23,6 +23,7 @@
     ColumnStat,
     ColumnStats,
     DataSourceInfo,
+    DataSourcePair,
     PartitionInfo,
     UniqueStats,
     get_key_name,
@@ -700,6 +701,7 @@ def __init__(
         self.paths = paths
         self.max_footer_samples = max_footer_samples
         self.max_row_group_samples = max_row_group_samples
+        self._unique_stats_columns = set()
         # Helper attributes
         self._key_columns: set[str] = set()  # Used to fuse lazy row-group sampling
         self._unique_stats: dict[str, UniqueStats] = {}
@@ -802,6 +804,7 @@ def storage_size(self, column: str) -> ColumnStat[int]:
 
     def add_unique_stats_column(self, column: str) -> None:
         """Add a column needing unique-value information."""
+        self._unique_stats_columns.add(column)
         if column not in self._key_columns and column not in self._unique_stats:
             self._key_columns.add(column)
 
@@ -830,7 +833,7 @@ def _extract_scan_stats(
         return {
             name: ColumnStats(
                 name=name,
-                source_info=ColumnSourceInfo(table_source_info, name),
+                source_info=ColumnSourceInfo(DataSourcePair(table_source_info, name)),
             )
             for name in ir.schema
         }
@@ -852,6 +855,7 @@ class DataFrameSourceInfo(DataSourceInfo):
     def __init__(self, df: Any):
         self._df = df
         self._key_columns: set[str] = set()
+        self._unique_stats_columns = set()
         self._unique_stats: dict[str, UniqueStats] = {}
 
     @functools.cached_property
@@ -883,7 +887,7 @@ def _extract_dataframescan_stats(ir: DataFrameScan) -> dict[str, ColumnStats]:
     return {
         name: ColumnStats(
             name=name,
-            source_info=ColumnSourceInfo(table_source_info, name),
+            source_info=ColumnSourceInfo(DataSourcePair(table_source_info, name)),
         )
         for name in ir.schema
     }
diff --git a/python/cudf_polars/cudf_polars/experimental/statistics.py b/python/cudf_polars/cudf_polars/experimental/statistics.py
index fcd6644a71a..786049ed85d 100644
--- a/python/cudf_polars/cudf_polars/experimental/statistics.py
+++ b/python/cudf_polars/cudf_polars/experimental/statistics.py
@@ -16,24 +16,76 @@
     HConcat,
     Join,
     Scan,
+    Sort,
     Union,
 )
 from cudf_polars.dsl.traversal import post_traversal
 from cudf_polars.experimental.base import (
+    ColumnSourceInfo,
     ColumnStat,
     ColumnStats,
     JoinKey,
     StatsCollector,
 )
-from cudf_polars.experimental.dispatch import initialize_column_stats
+from cudf_polars.experimental.dispatch import (
+    initialize_column_stats,
+    update_column_stats,
+)
+from cudf_polars.utils import conversion
 
 if TYPE_CHECKING:
     from collections.abc import Mapping, Sequence
 
     from cudf_polars.experimental.base import JoinInfo
+    from cudf_polars.typing import Slice as Zlice
     from cudf_polars.utils.config import ConfigOptions
 
 
+def collect_statistics(root: IR, config_options: ConfigOptions) -> StatsCollector:
+    """
+    Collect column statistics for a query.
+
+    Parameters
+    ----------
+    root
+        Root IR node for collecting column statistics.
+    config_options
+        GPUEngine configuration options.
+
+    Returns
+    -------
+    A StatsCollector object with populated column statistics.
+    """
+    # Start with base statistics.
+    # Here we build an outline of the statistics that will be
+    # collected before any real data is sampled. We will not
+    # read any Parquet metadata or sample any unique-value
+    # statistics during this step.
+    # (That said, Polars does it's own metadata sampling
+    # before we ever get the logical plan in cudf-polars)
+    stats = collect_base_stats(root, config_options)
+
+    # Apply PK-FK heuristics.
+    # Here we use PK-FK heuristics to estimate the unique count
+    # for each join key. We will not do any unique-value sampling
+    # during this step. However, we will use Parquet metadata to
+    # estimate the row-count for each table source. This metadata
+    # is cached in the DataSourceInfo object for each table.
+    apply_pkfk_heuristics(stats.join_info)
+
+    # Update statistics for each node.
+    # Here we set local row-count and unique-value statistics
+    # on each node in the IR graph. We DO perform unique-value
+    # sampling during this step. However, we only sample columns
+    # that have been marked as needing unique-value statistics
+    # during the `collect_base_stats` step. We always sample ALL
+    # "marked" columns within the same table source at once.
+    for node in post_traversal([root]):
+        update_column_stats(node, stats, config_options)
+
+    return stats
+
+
 def collect_base_stats(root: IR, config_options: ConfigOptions) -> StatsCollector:
     """
     Collect base datasource statistics.
@@ -319,11 +371,16 @@ def _(
 def _(
     ir: IR, stats: StatsCollector, config_options: ConfigOptions
 ) -> dict[str, ColumnStats]:
-    # Union looses source information for now.
     return {
         name: ColumnStats(
             name=name,
             children=tuple(stats.column_stats[child][name] for child in ir.children),
+            source_info=ColumnSourceInfo(
+                *itertools.chain.from_iterable(
+                    stats.column_stats[child][name].source_info.table_source_pairs
+                    for child in ir.children
+                )
+            ),
         )
         for name in ir.schema
     }
@@ -345,3 +402,233 @@ def _(
     from cudf_polars.experimental.io import _extract_dataframescan_stats
 
     return _extract_dataframescan_stats(ir)
+
+
+def known_child_row_counts(ir: IR, stats: StatsCollector) -> list[int]:
+    """
+    Get all non-null row-count estimates for the children of and IR node.
+
+    Parameters
+    ----------
+    ir
+        IR node to get non-null row-count estimates for.
+    stats
+        StatsCollector object to get row-count estimates from.
+
+    Returns
+    -------
+    List of non-null row-count estimates for all children.
+    """
+    return [
+        value
+        for child in ir.children
+        if (value := stats.row_count[child].value) is not None
+    ]
+
+
+def apply_slice(num_rows: int, zlice: Zlice | None) -> int:
+    """Apply a slice to a row-count estimate."""
+    if zlice is None:
+        return num_rows
+    s, e = conversion.from_polars_slice(zlice, num_rows=num_rows)
+    return e - s
+
+
+def copy_child_unique_counts(column_stats_mapping: dict[str, ColumnStats]) -> None:
+    """
+    Copy unique-count estimates from children to parent.
+
+    Parameters
+    ----------
+    column_stats_mapping
+        Mapping of column names to ColumnStats objects.
+    """
+    for column_stats in column_stats_mapping.values():
+        column_stats.unique_count = ColumnStat[int](
+            # Assume we get the maximum child unique-count estimate
+            value=max(
+                (
+                    cs.unique_count.value
+                    for cs in column_stats.children
+                    if cs.unique_count.value is not None
+                ),
+                default=None,
+            ),
+            exact=len(column_stats.children) == 1
+            and column_stats.children[0].unique_count.exact,
+        )
+
+
+@update_column_stats.register(IR)
+def _(ir: IR, stats: StatsCollector, config_options: ConfigOptions) -> None:
+    # Default `update_column_stats` implementation.
+    # Propagate largest child row-count estimate.
+    stats.row_count[ir] = ColumnStat[int](
+        max(known_child_row_counts(ir, stats), default=None)
+    )
+
+    # Apply slice if relevant.
+    # We can also limit the unique-count estimate to the row-count estimate.
+    max_unique_count: int | None = None
+    if (value := stats.row_count[ir].value) is not None and isinstance(ir, Sort):
+        # Apply slice for IR nodes supporting slice pushdown.
+        # TODO: Include types other than Sort.
+        max_unique_count = apply_slice(value, ir.zlice)
+        stats.row_count[ir] = ColumnStat[int](max_unique_count)
+
+    for column_stats in stats.column_stats[ir].values():
+        column_stats.unique_count = ColumnStat[int](
+            max(
+                (
+                    min(value, max_unique_count or value)
+                    for cs in column_stats.children
+                    if (value := cs.unique_count.value) is not None
+                ),
+                default=None,
+            )
+        )
+
+
+@update_column_stats.register(DataFrameScan)
+def _(ir: DataFrameScan, stats: StatsCollector, config_options: ConfigOptions) -> None:
+    # Use datasource row-count estimate.
+    if stats.column_stats[ir]:
+        stats.row_count[ir] = next(
+            iter(stats.column_stats[ir].values())
+        ).source_info.row_count
+    else:  # pragma: no cover; We always have stats.column_stats[ir]
+        stats.row_count[ir] = ColumnStat[int](None)
+
+    # Update unique-count estimates with sampled statistics
+    for column_stats in stats.column_stats[ir].values():
+        if column_stats.source_info.implied_unique_count.value is None:
+            # We don't have a unique-count estimate, so we need to sample the data.
+            source_unique_stats = column_stats.source_info.unique_stats(force=False)
+            if source_unique_stats.count.value is not None:
+                column_stats.unique_count = source_unique_stats.count
+        else:
+            column_stats.unique_count = column_stats.source_info.implied_unique_count
+
+
+@update_column_stats.register(Scan)
+def _(ir: Scan, stats: StatsCollector, config_options: ConfigOptions) -> None:
+    # Use datasource row-count estimate.
+    if stats.column_stats[ir]:
+        # TODO: Apply predicate selectivity
+        stats.row_count[ir] = next(
+            iter(stats.column_stats[ir].values())
+        ).source_info.row_count
+    else:  # pragma: no cover; We always have stats.column_stats[ir]
+        # No column stats available.
+        stats.row_count[ir] = ColumnStat[int](None)
+
+    # Account for the n_rows argument.
+    if ir.n_rows != -1:
+        if (metadata_value := stats.row_count[ir].value) is not None:
+            stats.row_count[ir] = ColumnStat[int](min(metadata_value, ir.n_rows))
+        else:
+            stats.row_count[ir] = ColumnStat[int](ir.n_rows)
+
+    # Update unique-count estimates with estimated and/or sampled statistics
+    for column_stats in stats.column_stats[ir].values():
+        if column_stats.source_info.implied_unique_count.value is None:
+            # We don't have a unique-count estimate, so we need to sample the data.
+            source_unique_stats = column_stats.source_info.unique_stats(force=False)
+            if source_unique_stats.count.value is not None:
+                column_stats.unique_count = source_unique_stats.count
+            elif (
+                unique_fraction := source_unique_stats.fraction.value
+            ) is not None and (row_count := stats.row_count[ir].value) is not None:
+                column_stats.unique_count = ColumnStat[int](
+                    max(1, int(unique_fraction * row_count))
+                )
+        else:
+            column_stats.unique_count = column_stats.source_info.implied_unique_count
+
+
+@update_column_stats.register(Distinct)
+@update_column_stats.register(GroupBy)
+def _(
+    ir: Distinct | GroupBy, stats: StatsCollector, config_options: ConfigOptions
+) -> None:
+    # Update statistics for a Distinct or GroupBy node.
+    (child,) = ir.children
+    child_column_stats = stats.column_stats[child]
+    child_row_count = stats.row_count[child].value
+    key_names = (
+        list(ir.subset or ir.schema)
+        if isinstance(ir, Distinct)
+        else [n.name for n in ir.keys]
+    )
+    unique_counts = [child_column_stats[k].unique_count.value for k in key_names]
+    known_unique_count = sum(c for c in unique_counts if c is not None)
+    unknown_unique_count = sum(c is None for c in unique_counts)
+    if unknown_unique_count > 0:
+        # Use the child row-count to be conservative.
+        # TODO: Should we use a different heuristic here? For example,
+        # we could assume each unknown key introduces a factor of 3.
+        stats.row_count[ir] = ColumnStat[int](child_row_count)
+    else:
+        unique_count = known_unique_count
+        if child_row_count is not None:
+            # Don't allow the unique-count to exceed the child row-count.
+            unique_count = min(child_row_count, unique_count)
+        stats.row_count[ir] = ColumnStat[int](unique_count)
+
+    copy_child_unique_counts(stats.column_stats[ir])
+
+
+@update_column_stats.register(Join)
+def _(ir: Join, stats: StatsCollector, config_options: ConfigOptions) -> None:
+    # Apply basic join-cardinality estimation.
+    child_row_counts = known_child_row_counts(ir, stats)
+    if len(child_row_counts) == 2:
+        # Both children have row-count estimates.
+
+        # Use the PK-FK unique-count estimate for the join key.
+        # Otherwise, use the maximum unique-count estimate from the children.
+        unique_count_estimate = max(
+            # Join-based estimate (higher priority).
+            [
+                u.implied_unique_count
+                for u in stats.join_info.join_map[ir]
+                if u.implied_unique_count is not None
+            ],
+            default=None,
+        )
+        # TODO: Use local unique-count statistics if the implied unique-count
+        # estimates are missing. This never happens for now, but it will happen
+        # if/when we add a config option to disable PK-FK heuristics.
+
+        # Calculate the output row-count estimate.
+        left_rows, right_rows = child_row_counts
+        if unique_count_estimate is not None:
+            stats.row_count[ir] = ColumnStat[int](
+                max(1, (left_rows * right_rows) // unique_count_estimate)
+            )
+        else:  # pragma: no cover; We always have a unique-count estimate (for now).
+            stats.row_count[ir] = ColumnStat[int](max((1, left_rows, right_rows)))
+    else:
+        # One or more children have an unknown row-count estimate.
+        stats.row_count[ir] = ColumnStat[int](None)
+
+    copy_child_unique_counts(stats.column_stats[ir])
+
+
+@update_column_stats.register(Union)
+def _(ir: Union, stats: StatsCollector, config_options: ConfigOptions) -> None:
+    # Add up child row-count estimates.
+    row_counts = known_child_row_counts(ir, stats)
+    stats.row_count[ir] = ColumnStat[int](sum(row_counts) or None)
+    # Add up unique counts (NOTE: This is probably very conservative).
+    for column_stats in stats.column_stats[ir].values():
+        column_stats.unique_count = ColumnStat[int](
+            sum(
+                (
+                    cs.unique_count.value
+                    for cs in column_stats.children
+                    if cs.unique_count.value is not None
+                ),
+            )
+            or None
+        )
diff --git a/python/cudf_polars/cudf_polars/testing/io.py b/python/cudf_polars/cudf_polars/testing/io.py
index 57debfefc35..ced3a3d9bfc 100644
--- a/python/cudf_polars/cudf_polars/testing/io.py
+++ b/python/cudf_polars/cudf_polars/testing/io.py
@@ -8,11 +8,11 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import polars as pl
+
 if TYPE_CHECKING:
     from typing import Literal
 
-    import polars as pl
-
 __all__: list[str] = ["make_partitioned_source"]
 
 
@@ -70,3 +70,53 @@ def write(part: pl.DataFrame, file_path: Path) -> None:
         for i, part in enumerate(df.iter_slices(stride)):
             file_path = path / f"part.{i}.{fmt}"
             write(part, file_path)
+
+
+def make_lazy_frame(
+    df: pl.DataFrame,
+    fmt: Literal["csv", "parquet", "frame"],
+    *,
+    path: str | Path | None = None,
+    n_files: int = 1,
+    n_rows: int | None = None,
+) -> pl.LazyFrame:
+    """
+    Returns a pl.LazyFrame from a pl.DataFrame.
+
+    Parameters
+    ----------
+    df : polars.DataFrame
+        The input DataFrame to convert to a LazyFrame.
+    path : str | pathlib.Path
+        The base path to write the file(s) to.
+        This option is ignored if fmt is "frame".
+    fmt : Literal["parquet", "csv", "frame"]
+        The format to use for IO.
+    n_files : int, default 1
+        If greater than 1, splits the data into multiple files.
+        This option is ignored if fmt is "frame".
+    n_rows : optional, int
+        Slice to apply to the final LazyFrame before returning.
+    """
+    from cudf_polars.experimental.io import _clear_source_info_cache
+
+    _clear_source_info_cache()
+
+    if fmt == "frame":
+        if n_rows is not None:
+            return df.slice(0, n_rows).lazy()
+        return df.lazy()
+    else:
+        assert path is not None, f"path is required for fmt={fmt}."
+        row_group_size: int | None = None
+        if fmt == "parquet":
+            read = pl.scan_parquet
+            row_group_size = 10
+        elif fmt == "csv":
+            read = pl.scan_csv
+        else:  # pragma: no cover
+            raise ValueError(f"Unsupported format: {fmt}")
+        make_partitioned_source(
+            df, path, fmt=fmt, n_files=n_files, row_group_size=row_group_size
+        )
+        return read(path, n_rows=n_rows) if n_rows is not None else read(path)
diff --git a/python/cudf_polars/tests/experimental/test_explain.py b/python/cudf_polars/tests/experimental/test_explain.py
index d251860002d..6d330536dac 100644
--- a/python/cudf_polars/tests/experimental/test_explain.py
+++ b/python/cudf_polars/tests/experimental/test_explain.py
@@ -3,13 +3,15 @@
 
 from __future__ import annotations
 
+import re
+
 import pytest
 
 import polars as pl
 
 from cudf_polars.experimental.explain import explain_query
-from cudf_polars.testing.asserts import DEFAULT_SCHEDULER
-from cudf_polars.testing.io import make_partitioned_source
+from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
+from cudf_polars.testing.io import make_lazy_frame, make_partitioned_source
 
 
 @pytest.fixture(scope="module")
@@ -23,6 +25,20 @@ def df():
     )
 
 
+@pytest.fixture(scope="module")
+def engine():
+    return pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+            "target_partition_size": 10_000,
+            "max_rows_per_partition": 1_000,
+        },
+    )
+
+
 def test_explain_logical_plan(tmp_path, df):
     make_partitioned_source(df, tmp_path, fmt="parquet", n_files=2)
 
@@ -153,3 +169,228 @@ def test_explain_logical_plan_wide_table():
     plan = explain_query(q, engine, physical=False)
 
     assert "DATAFRAMESCAN ('col0', 'col1', 'col2', '...', 'col18', 'col19')" in plan
+
+
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+@pytest.mark.parametrize("n_rows", [None, 3])
+def test_explain_logical_io_then_distinct(engine, tmp_path, kind, n_rows):
+    # Create simple Distinct + Sort query
+    df = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3, 4, 5, 6],
+            "customer_id": [101, 102, 101, 103, 103, 104],
+            "amount": [50.0, 75.0, 30.0, 120.0, 85.0, 40.0],
+            "year": [2023, 2023, 2023, 2024, 2024, 2024],
+        }
+    )
+    df = make_lazy_frame(df, kind, path=tmp_path, n_files=2, n_rows=n_rows)
+    q = df.unique(subset=["customer_id"]).sort("order_id")
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q, engine=engine)
+
+    # Check query plan
+    repr = explain_query(q, engine, physical=False)
+    if kind == "csv":
+        # CSV will NOT provide row-count statistics unless n_rows is provided,
+        # and it will never provide unique-count statistics.
+        if n_rows is None:
+            assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
+        else:
+            assert re.search(
+                rf"^\s*SORT.*row_count=\'~{n_rows}\'\s*$", repr, re.MULTILINE
+            )
+    else:
+        assert re.search(
+            rf"^\s*SORT.*row_count=\'~{q.collect().height}\'\s*$", repr, re.MULTILINE
+        )
+
+
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+def test_explain_logical_io_then_join(engine, tmp_path, kind):
+    # Create simple Join + Sort query
+    sales = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3, 4, 5, 6],
+            "customer_id": [101, 102, 101, 103, 102, 101],
+            "amount": [50.0, 75.0, 30.0, 120.0, 85.0, 40.0],
+            "product": ["A", "B", "A", "C", "B", "A"],
+        }
+    )
+    sales = make_lazy_frame(sales, kind, path=tmp_path / f"sales_{kind}")
+    customers = pl.DataFrame(
+        {
+            "customer_id": [101, 102, 103],
+            "customer_name": ["Alice", "Bob", "Charlie"],
+            "region": ["North", "South", "North"],
+        }
+    )
+    customers = make_lazy_frame(customers, kind, path=tmp_path / f"customers_{kind}")
+    q = sales.join(customers, on="customer_id", how="inner").sort("customer_id")
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q, engine=engine)
+
+    # Check the query plan
+    repr = explain_query(q, engine, physical=False)
+    if kind == "csv":
+        assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
+    else:
+        assert re.search(
+            rf"^\s*SORT.*row_count=\'~{q.collect().height}\'\s*$", repr, re.MULTILINE
+        )
+
+
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+def test_explain_logical_io_then_join_then_groupby(engine, tmp_path, kind):
+    # Create simple Join + GroupBy + Sort query
+    sales = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3, 4, 5, 6],
+            "customer_id": [101, 102, 101, 103, 102, 101],
+            "amount": [50.0, 75.0, 30.0, 120.0, 85.0, 40.0],
+            "product": ["A", "B", "A", "C", "B", "A"],
+        }
+    )
+    sales = make_lazy_frame(sales, kind, path=tmp_path / f"sales_{kind}")
+    customers = pl.DataFrame(
+        {
+            "customer_id": [101, 102, 103],
+            "customer_name": ["Alice", "Bob", "Charlie"],
+            "region": ["North", "South", "North"],
+        }
+    )
+    customers = make_lazy_frame(customers, kind, path=tmp_path / f"customers_{kind}")
+    q_join = sales.join(customers, on="customer_id", how="inner")
+    q_gb = q_join.group_by("customer_id").agg(
+        [
+            pl.col("amount").sum().alias("total_amount"),
+            pl.col("order_id").count().alias("order_count"),
+            pl.col("customer_name").first().alias("name"),
+            pl.col("region").first().alias("region"),
+        ]
+    )
+    q = q_gb.sort("customer_id")
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q, engine=engine)
+
+    # Check the query plan
+    repr = explain_query(q, engine, physical=False)
+    if kind == "csv":
+        assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
+    else:
+        join_count = q_join.collect().height
+        gb_count = q_gb.collect().height
+        final_count = q.collect().height
+        assert re.search(
+            rf"^\s*GROUPBY.*row_count=\'~{gb_count}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*JOIN Inner.*row_count=\'~{join_count}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*SORT.*row_count=\'~{final_count}\'\s*$", repr, re.MULTILINE
+        )
+
+
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+def test_explain_logical_io_then_concat_then_groupby(engine, tmp_path, kind):
+    # Create first table - sales data from 2023
+    sales_2023 = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3],
+            "customer_id": [101, 102, 101],
+            "amount": [50.0, 75.0, 30.0],
+            "year": [2023, 2023, 2023],
+        }
+    )
+    df_2023 = make_lazy_frame(sales_2023, kind, path=tmp_path / f"sales_2023.{kind}")
+
+    # Create second table - sales data from 2024
+    sales_2024 = pl.DataFrame(
+        {
+            "order_id": [4, 5, 6],
+            "customer_id": [103, 103, 104],
+            "amount": [120.0, 85.0, 40.0],
+            "year": [2024, 2024, 2024],
+        }
+    )
+    df_2024 = make_lazy_frame(sales_2024, kind, path=tmp_path / f"sales_2024.{kind}")
+
+    # Create second table - sales data from 2025
+    sales_2025 = pl.DataFrame(
+        {
+            "order_id": [7, 8, 9],
+            "customer_id": [105, 109, 106],
+            "amount": [10.0, 50.0, 100.0],
+            "year": [2025, 2025, 2025],
+        }
+    )
+    df_2025 = make_lazy_frame(sales_2025, kind, path=tmp_path / f"sales_2025.{kind}")
+
+    def _gb(df):
+        return df.group_by("customer_id").agg(
+            [
+                pl.col("amount").sum().alias("total_amount"),
+                pl.col("order_id").count().alias("order_count"),
+                pl.col("year").min().alias("first_year"),
+                pl.col("year").max().alias("last_year"),
+            ]
+        )
+
+    # Group by customer_id after concatenation
+    q_concat_1 = pl.concat([df_2023, df_2024])
+    q_gb_1 = _gb(q_concat_1)
+    q_1 = q_gb_1.sort("customer_id").head(2)
+    q_gb_2 = _gb(df_2025)
+    q_concat_2 = pl.concat([q_gb_1, q_gb_2])
+    q_2 = q_concat_2.sort("customer_id").head(2)
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q_1, engine=engine)
+    assert_gpu_result_equal(q_2, engine=engine)
+
+    # Check query plan q_1
+    repr = explain_query(q_1, engine, physical=False)
+    if kind == "csv":
+        assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
+    else:
+        concat_count_1 = q_concat_1.collect().height
+        gb_count_1 = q_gb_1.collect().height
+        final_count_1 = q_1.collect().height
+        assert re.search(
+            rf"^\s*UNION.*row_count=\'~{concat_count_1}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*GROUPBY.*row_count=\'~{gb_count_1}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*SORT.*row_count=\'~{final_count_1}\'\s*$", repr, re.MULTILINE
+        )
+
+    # Check query plan q_2
+    repr = explain_query(q_2, engine, physical=False)
+    if kind == "csv":
+        assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
+    else:
+        concat_count_1 = q_concat_1.collect().height
+        concat_count_2 = q_concat_2.collect().height
+        gb_count_1 = q_gb_1.collect().height
+        gb_count_2 = q_gb_2.collect().height
+        final_count_2 = q_2.collect().height
+        assert re.search(
+            rf"^\s*UNION.*row_count=\'~{concat_count_1}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*UNION.*row_count=\'~{concat_count_2}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*GROUPBY.*row_count=\'~{gb_count_1}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*GROUPBY.*row_count=\'~{gb_count_2}\'\s*$", repr, re.MULTILINE
+        )
+        assert re.search(
+            rf"^\s*SORT.*row_count=\'~{final_count_2}\'\s*$", repr, re.MULTILINE
+        )
diff --git a/python/cudf_polars/tests/experimental/test_stats.py b/python/cudf_polars/tests/experimental/test_stats.py
index dfc1d5ca065..f77cc309329 100644
--- a/python/cudf_polars/tests/experimental/test_stats.py
+++ b/python/cudf_polars/tests/experimental/test_stats.py
@@ -17,7 +17,7 @@
     find_equivalence_sets,
 )
 from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
-from cudf_polars.testing.io import make_partitioned_source
+from cudf_polars.testing.io import make_lazy_frame, make_partitioned_source
 from cudf_polars.utils.config import ConfigOptions
 
 
@@ -32,17 +32,23 @@ def df():
     )
 
 
-def test_base_stats_dataframescan(df):
-    row_count = df.height
-    q = pl.LazyFrame(df)
-    engine = pl.GPUEngine(
+@pytest.fixture(scope="module")
+def engine():
+    return pl.GPUEngine(
         raise_on_fail=True,
         executor="streaming",
         executor_options={
-            "max_rows_per_partition": 1_000,
             "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+            "target_partition_size": 10_000,
+            "max_rows_per_partition": 1_000,
         },
     )
+
+
+def test_base_stats_dataframescan(df, engine):
+    row_count = df.height
+    q = pl.LazyFrame(df)
     ir = Translator(q._ldf.visit(), engine).translate_ir()
     stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
     column_stats = stats.column_stats[ir]
@@ -51,9 +57,9 @@ def test_base_stats_dataframescan(df):
     source_info_x = column_stats["x"].source_info
     source_info_y = column_stats["y"].source_info
     source_info_z = column_stats["z"].source_info
-    table_source_info = source_info_x.table_source_info
-    assert table_source_info is source_info_y.table_source_info
-    assert table_source_info is source_info_z.table_source_info
+    table_source_info = source_info_x.table_source_pairs[0].table_source
+    assert table_source_info is source_info_y.table_source_pairs[0].table_source
+    assert table_source_info is source_info_z.table_source_pairs[0].table_source
     assert source_info_x.row_count.value == row_count
     assert source_info_x.row_count.exact
 
@@ -131,9 +137,9 @@ def test_base_stats_parquet(
     source_info_x = column_stats["x"].source_info
     source_info_y = column_stats["y"].source_info
     source_info_z = column_stats["z"].source_info
-    table_source_info = source_info_x.table_source_info
-    assert table_source_info is source_info_y.table_source_info
-    assert table_source_info is source_info_z.table_source_info
+    table_source_info = source_info_x.table_source_pairs[0].table_source
+    assert table_source_info is source_info_y.table_source_pairs[0].table_source
+    assert table_source_info is source_info_z.table_source_pairs[0].table_source
     if max_footer_samples:
         assert source_info_x.row_count.value == df.height
         assert source_info_x.row_count.exact
@@ -182,17 +188,8 @@ def test_base_stats_parquet(
         assert set(table_source_info._unique_stats) == {"x", "y", "z"}
 
 
-def test_base_stats_csv(tmp_path, df):
-    make_partitioned_source(df, tmp_path, "csv", n_files=3)
-    q = pl.scan_csv(tmp_path)
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "target_partition_size": 10_000,
-            "scheduler": DEFAULT_SCHEDULER,
-        },
-    )
+def test_base_stats_csv(engine, tmp_path, df):
+    q = make_lazy_frame(df, "csv", path=tmp_path, n_files=3)
     ir = Translator(q._ldf.visit(), engine).translate_ir()
     stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
     column_stats = stats.column_stats[ir]
@@ -273,15 +270,7 @@ def test_base_stats_parquet_groupby(
 
 
 @pytest.mark.parametrize("how", ["inner", "left", "right"])
-def test_base_stats_join(how):
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "scheduler": DEFAULT_SCHEDULER,
-            "shuffle_method": "tasks",
-        },
-    )
+def test_base_stats_join(how, engine):
     left = pl.LazyFrame(
         {
             "x": range(15),
@@ -318,15 +307,7 @@ def test_base_stats_join(how):
         assert ir_column_stats["z"].source_info.row_count.value == right_count
 
 
-def test_base_stats_union():
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "scheduler": DEFAULT_SCHEDULER,
-            "shuffle_method": "tasks",
-        },
-    )
+def test_base_stats_union(engine):
     left = pl.LazyFrame(
         {
             "x": range(15),
@@ -347,23 +328,13 @@ def test_base_stats_union():
     stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
     column_stats = stats.column_stats[ir]
 
-    # We lose source info after a Union, but we
-    # can set accurate row-count and unique-value
-    # estimates for the current IR in #19392
+    # Source row-count estimate is the sum of the two sources
     source_info_x = column_stats["x"].source_info
-    assert source_info_x.row_count.value is None
+    assert source_info_x.row_count.value == 24
 
 
-def test_base_stats_distinct(df):
+def test_base_stats_distinct(engine, df):
     row_count = df.height
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "scheduler": DEFAULT_SCHEDULER,
-            "shuffle_method": "tasks",
-        },
-    )
     q = pl.LazyFrame(df).unique(subset=["y"])
     ir = Translator(q._ldf.visit(), engine).translate_ir()
     stats = collect_base_stats(ir, ConfigOptions.from_polars_engine(engine))
@@ -374,16 +345,7 @@ def test_base_stats_distinct(df):
     assert source_info_y.row_count.exact
 
 
-def test_base_stats_join_key_info():
-    engine = pl.GPUEngine(
-        raise_on_fail=True,
-        executor="streaming",
-        executor_options={
-            "scheduler": DEFAULT_SCHEDULER,
-            "shuffle_method": "tasks",
-        },
-    )
-
+def test_base_stats_join_key_info(engine):
     # Customers table (PK: cust_id)
     customers = pl.LazyFrame(
         {

From b3382d32ee42324618f7ff6e5ad3f055353039f1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Sep 2025 19:01:54 -0700
Subject: [PATCH 291/366] Define FrozenList proxy independently in cudf.pandas
 (#19931)

xref https://github.com/rapidsai/cudf/issues/19818

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19931
---
 python/cudf/cudf/pandas/_wrappers/pandas.py       | 8 +++++++-
 python/cudf/cudf/pandas/scripts/conftest-patch.py | 6 ------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index d4f52670cd1..6bad243ed5b 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -1905,6 +1905,13 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
     },
 )
 
+FrozenList = make_final_proxy_type(
+    "FrozenList",
+    _Unusable,
+    pd.core.indexes.frozen.FrozenList,
+    fast_to_slow=_Unusable(),
+    slow_to_fast=_Unusable(),
+)
 
 # The following are subclasses of `pandas.core.base.PandasObj`,
 # excluding subclasses defined in `pandas.core.internals`.  These are
@@ -1913,7 +1920,6 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
 
 _PANDAS_OBJ_FINAL_TYPES = [
     pd.core.arrays.sparse.array.SparseArray,
-    pd.core.indexes.frozen.FrozenList,
     pd.core.indexes.category.CategoricalIndex,
     pd.core.indexes.datetimelike.DatetimeTimedeltaMixin,
     pd.core.indexes.datetimelike.DatetimeIndexOpsMixin,
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 8e62622d7cb..dd0e44b77a2 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -5678,12 +5678,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_int_level_names[True]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_mixed_levels[False]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_mixed_levels[True]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_columns_non_unique_index[False-index0-columns0]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_columns_non_unique_index[False-index1-columns1]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_columns_non_unique_index[False-index2-columns2]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_columns_non_unique_index[True-index0-columns0]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_columns_non_unique_index[True-index1-columns1]",
-    "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_columns_non_unique_index[True-index2-columns2]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_preserve_categorical_dtype[False-labels0-data0-False]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_preserve_categorical_dtype[False-labels0-data0-True]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_stack_multi_preserve_categorical_dtype[False-labels1-data1-False]",

From 3f66b1eee2586bdc3ac17a05a500b6f2319e9027 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Wed, 10 Sep 2025 07:17:36 -0500
Subject: [PATCH 292/366] Add `rmm-release-threshold` to pdsh benchmarks CLI
 (#19918)

This adds a new `rmm-release-threshold` option to the PDSH benchmarks to control the `rmm_release_threshold` passed to `LocalCUDACluster` when creating the dask cluster used for these benchmarks with the distributed scheduler.

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19918
---
 .../cudf_polars/experimental/benchmarks/utils.py       | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 45142c31173..01433410d7a 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -406,6 +406,7 @@ def initialize_dask_cluster(run_config: RunConfig, args: argparse.Namespace):  #
         "protocol": args.protocol,
         "rmm_pool_size": args.rmm_pool_size,
         "rmm_async": args.rmm_async,
+        "rmm_release_threshold": args.rmm_release_threshold,
         "threads_per_worker": run_config.threads,
     }
 
@@ -625,6 +626,15 @@ def parse_args(
             Fraction of total GPU memory to allocate for RMM pool.
             Default: 0.5 (50%% of GPU memory)"""),
     )
+    parser.add_argument(
+        "--rmm-release-threshold",
+        default=None,
+        type=float,
+        help=textwrap.dedent("""\
+            Passed to dask_cuda.LocalCUDACluster to control the release
+            threshold for RMM pool memory.
+            Default: None (no release threshold)"""),
+    )
     parser.add_argument(
         "--rmm-async",
         action=argparse.BooleanOptionalAction,

From 062e6021cb7dd0c62daa4fce5966e9ec1a5253cd Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 10 Sep 2025 09:30:09 -0400
Subject: [PATCH 293/366] Check for integer overflow in
 cudf::strings::find_multiple (#19867)

Adds a check for integer multiplication overflow in `cudf::strings::find_multiple` logic.
The output is the number of input rows times the number of targets which could exceed the `size_type` limit on the returned list column.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19867
---
 cpp/src/strings/search/find_multiple.cu | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/cpp/src/strings/search/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu
index 67226b259d4..b2126fdc92a 100644
--- a/cpp/src/strings/search/find_multiple.cu
+++ b/cpp/src/strings/search/find_multiple.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,12 +46,19 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
   CUDF_EXPECTS(
     !targets.has_nulls(), "Search targets cannot contain null strings", std::invalid_argument);
 
+  auto const total_elements =
+    static_cast<int64_t>(strings_count) * static_cast<int64_t>(targets_count);
+  CUDF_EXPECTS(
+    total_elements <= static_cast<decltype(total_elements)>(std::numeric_limits<size_type>::max()),
+    "Size of output exceeds the column size limit",
+    std::overflow_error);
+
   auto strings_column = column_device_view::create(input.parent(), stream);
   auto d_strings      = *strings_column;
   auto targets_column = column_device_view::create(targets.parent(), stream);
   auto d_targets      = *targets_column;
 
-  auto const total_count = strings_count * targets_count;
+  auto const total_count = static_cast<size_type>(total_elements);
 
   // create output column
   auto results = make_numeric_column(

From a0c038db521f27395b358ae62d957115da02a656 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 10 Sep 2025 09:34:58 -0400
Subject: [PATCH 294/366] Fix missing stream from cudf::top_k_order (#19866)

Fixes passing stream for internal functions in `cudf::top_k_order`
Adds stream tests for the new `top_k` APIs.
Also fixes parameter name to remove 'sort'. Reference https://github.com/rapidsai/cudf/pull/19597#discussion_r2292106446

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19866
---
 cpp/include/cudf/sorting.hpp       | 16 ++++++++--------
 cpp/src/column/column.cu           |  2 +-
 cpp/src/sort/segmented_top_k.cu    | 18 +++++++++---------
 cpp/src/sort/top_k.cu              | 25 ++++++++++++++-----------
 cpp/tests/streams/sorting_test.cpp | 11 +++++++++++
 5 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index 432d8b6f332..9561a8d67f3 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -381,7 +381,7 @@ std::unique_ptr<table> stable_segmented_sort_by_key(
  *
  * @param col Column to compute top k
  * @param k Number of values to return
- * @param sort_order The desired sort order for the top k values.
+ * @param topk_order The desired sort order for the top k values.
  *                   Default is high to low.
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
@@ -390,7 +390,7 @@ std::unique_ptr<table> stable_segmented_sort_by_key(
 std::unique_ptr<column> top_k(
   column_view const& col,
   size_type k,
-  order sort_order                  = order::DESCENDING,
+  order topk_order                  = order::DESCENDING,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
@@ -404,7 +404,7 @@ std::unique_ptr<column> top_k(
  *
  * @param col Column to compute top k
  * @param k Number of values to return
- * @param sort_order The desired sort order for the top k values.
+ * @param topk_order The desired sort order for the top k values.
  *                   Default is high to low.
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
@@ -413,7 +413,7 @@ std::unique_ptr<column> top_k(
 std::unique_ptr<column> top_k_order(
   column_view const& col,
   size_type k,
-  order sort_order                  = order::DESCENDING,
+  order topk_order                  = order::DESCENDING,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
@@ -439,7 +439,7 @@ std::unique_ptr<column> top_k_order(
  * @param col Column to compute top k
  * @param segment_offsets Start offset index for each contiguous segment
  * @param k Number of values to return for each segment
- * @param sort_order DESCENDING is the largest k values (default).
+ * @param topk_order DESCENDING is the largest k values (default).
  *                   ASCENDING is the smallest k values.
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
@@ -449,7 +449,7 @@ std::unique_ptr<column> segmented_top_k(
   column_view const& col,
   column_view const& segment_offsets,
   size_type k,
-  order sort_order                  = order::DESCENDING,
+  order topk_order                  = order::DESCENDING,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
@@ -475,7 +475,7 @@ std::unique_ptr<column> segmented_top_k(
  * @param col Column to compute top k
  * @param segment_offsets Start offset index for each contiguous segment
  * @param k Number of values to return for each segment
- * @param sort_order DESCENDING is the indices of the largest k values (default).
+ * @param topk_order DESCENDING is the indices of the largest k values (default).
  *                   ASCENDING is the indices of the smallest k values.
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
@@ -485,7 +485,7 @@ std::unique_ptr<column> segmented_top_k_order(
   column_view const& col,
   column_view const& segment_offsets,
   size_type k,
-  order sort_order                  = order::DESCENDING,
+  order topk_order                  = order::DESCENDING,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu
index cbc67e4d9f5..b14ff551b24 100644
--- a/cpp/src/column/column.cu
+++ b/cpp/src/column/column.cu
@@ -167,7 +167,7 @@ void column::set_null_count(size_type new_null_count)
 namespace {
 struct create_column_from_view {
   cudf::column_view view;
-  rmm::cuda_stream_view stream{cudf::get_default_stream()};
+  rmm::cuda_stream_view stream;
   rmm::device_async_resource_ref mr;
 
   template <typename ColumnType>
diff --git a/cpp/src/sort/segmented_top_k.cu b/cpp/src/sort/segmented_top_k.cu
index ef27f2cc9f9..015f30e7300 100644
--- a/cpp/src/sort/segmented_top_k.cu
+++ b/cpp/src/sort/segmented_top_k.cu
@@ -77,7 +77,7 @@ CUDF_KERNEL void resolve_segment_indices(device_span<size_type const> d_offsets,
 std::unique_ptr<column> segmented_top_k_order(column_view const& col,
                                               column_view const& segment_offsets,
                                               size_type k,
-                                              order sort_order,
+                                              order topk_order,
                                               rmm::cuda_stream_view stream,
                                               rmm::device_async_resource_ref mr)
 {
@@ -99,10 +99,10 @@ std::unique_ptr<column> segmented_top_k_order(column_view const& col,
                "segment_offsets must not have nulls",
                std::invalid_argument);
 
-  auto const nulls   = sort_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
+  auto const nulls   = topk_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
   auto const temp_mr = cudf::get_current_device_resource_ref();
   auto const indices = cudf::detail::segmented_sorted_order(
-    cudf::table_view({col}), segment_offsets, {sort_order}, {nulls}, stream, temp_mr);
+    cudf::table_view({col}), segment_offsets, {topk_order}, {nulls}, stream, temp_mr);
   auto const d_indices = indices->mutable_view().begin<size_type>();
 
   auto segment_sizes = rmm::device_uvector<size_type>(segment_offsets.size() - 1, stream);
@@ -128,14 +128,14 @@ std::unique_ptr<column> segmented_top_k_order(column_view const& col,
 std::unique_ptr<column> segmented_top_k(column_view const& col,
                                         column_view const& segment_offsets,
                                         size_type k,
-                                        order sort_order,
+                                        order topk_order,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
   if (col.is_empty()) { return cudf::make_empty_column(col.type()); }
 
   auto ordered =
-    cudf::detail::segmented_top_k_order(col, segment_offsets, k, sort_order, stream, mr);
+    cudf::detail::segmented_top_k_order(col, segment_offsets, k, topk_order, stream, mr);
   auto lv = cudf::lists_column_view(ordered->view());
   if (lv.is_empty()) { return cudf::make_empty_lists_column(col.type(), stream, mr); }
 
@@ -161,22 +161,22 @@ std::unique_ptr<column> segmented_top_k(column_view const& col,
 std::unique_ptr<column> segmented_top_k(column_view const& col,
                                         column_view const& segment_offsets,
                                         size_type k,
-                                        order sort_order,
+                                        order topk_order,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::segmented_top_k(col, segment_offsets, k, sort_order, stream, mr);
+  return detail::segmented_top_k(col, segment_offsets, k, topk_order, stream, mr);
 }
 
 std::unique_ptr<column> segmented_top_k_order(column_view const& col,
                                               column_view const& segment_offsets,
                                               size_type k,
-                                              order sort_order,
+                                              order topk_order,
                                               rmm::cuda_stream_view stream,
                                               rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::segmented_top_k_order(col, segment_offsets, k, sort_order, stream, mr);
+  return detail::segmented_top_k_order(col, segment_offsets, k, topk_order, stream, mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/sort/top_k.cu b/cpp/src/sort/top_k.cu
index 3263144d000..e474b364e1b 100644
--- a/cpp/src/sort/top_k.cu
+++ b/cpp/src/sort/top_k.cu
@@ -36,7 +36,7 @@ namespace detail {
 
 std::unique_ptr<column> top_k(column_view const& col,
                               size_type k,
-                              order sort_order,
+                              order topk_order,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr)
 {
@@ -45,8 +45,9 @@ std::unique_ptr<column> top_k(column_view const& col,
   if (k >= col.size()) { return std::make_unique<column>(col, stream, mr); }
 
   // code will be specialized for fixed-width types once CUB topk function is available
-  auto const nulls     = sort_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
-  auto const indices   = sorted_order<sort_method::STABLE>(col, sort_order, nulls, stream, mr);
+  auto const nulls   = topk_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
+  auto const indices = sorted_order<sort_method::STABLE>(
+    col, topk_order, nulls, stream, cudf::get_current_device_resource_ref());
   auto const k_indices = cudf::detail::split(indices->view(), {k}, stream).front();
   auto result          = cudf::detail::gather(cudf::table_view({col}),
                                      k_indices,
@@ -59,7 +60,7 @@ std::unique_ptr<column> top_k(column_view const& col,
 
 std::unique_ptr<column> top_k_order(column_view const& col,
                                     size_type k,
-                                    order sort_order,
+                                    order topk_order,
                                     rmm::cuda_stream_view stream,
                                     rmm::device_async_resource_ref mr)
 {
@@ -70,31 +71,33 @@ std::unique_ptr<column> top_k_order(column_view const& col,
       col.size(), numeric_scalar<size_type>(0, true, stream), stream, mr);
   }
 
-  auto const nulls   = sort_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
-  auto const indices = sorted_order<sort_method::STABLE>(col, sort_order, nulls, stream, mr);
-  return std::make_unique<column>(cudf::detail::split(indices->view(), {k}, stream).front());
+  auto const nulls   = topk_order == order::ASCENDING ? null_order::AFTER : null_order::BEFORE;
+  auto const indices = sorted_order<sort_method::STABLE>(
+    col, topk_order, nulls, stream, cudf::get_current_device_resource_ref());
+  return std::make_unique<column>(
+    cudf::detail::split(indices->view(), {k}, stream).front(), stream, mr);
 }
 
 }  // namespace detail
 
 std::unique_ptr<column> top_k(column_view const& col,
                               size_type k,
-                              order sort_order,
+                              order topk_order,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::top_k(col, k, sort_order, stream, mr);
+  return detail::top_k(col, k, topk_order, stream, mr);
 }
 
 std::unique_ptr<column> top_k_order(column_view const& col,
                                     size_type k,
-                                    order sort_order,
+                                    order topk_order,
                                     rmm::cuda_stream_view stream,
                                     rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::top_k_order(col, k, sort_order, stream, mr);
+  return detail::top_k_order(col, k, topk_order, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/tests/streams/sorting_test.cpp b/cpp/tests/streams/sorting_test.cpp
index 8a71ca75c42..f11536a40b6 100644
--- a/cpp/tests/streams/sorting_test.cpp
+++ b/cpp/tests/streams/sorting_test.cpp
@@ -132,4 +132,15 @@ TEST_F(SortingTest, StableSegmentedSortByKey)
     values, keys, segment_offsets, {}, {}, cudf::test::get_default_stream());
 }
 
+TEST_F(SortingTest, TopK)
+{
+  auto stream = cudf::test::get_default_stream();
+  cudf::test::fixed_width_column_wrapper<int32_t> const input{10, 20, 30, 40, 50};
+  cudf::top_k(input, 2, cudf::order::ASCENDING, stream);
+  cudf::top_k_order(input, 2, cudf::order::ASCENDING, stream);
+  cudf::test::fixed_width_column_wrapper<int32_t> const offsets{0, 5};
+  cudf::segmented_top_k(input, offsets, 2, cudf::order::ASCENDING, stream);
+  cudf::segmented_top_k_order(input, offsets, 2, cudf::order::ASCENDING, stream);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From 831f0b3ae890c65602c55f10d4fab5aceb6b5391 Mon Sep 17 00:00:00 2001
From: Shruti Shivakumar <shruti.shivakumar@gmail.com>
Date: Wed, 10 Sep 2025 08:29:33 -0700
Subject: [PATCH 295/366] Add object-oriented APIs for left semi- and anti-
 join (Part I) (#19778)

This PR introduces an object-oriented design for left semi- and anti- joins. Though the public `filtered_join` class enforces the reuse of the right table i.e. the filter table in this PR, the [follow-on PR](https://github.com/rapidsai/cudf/pull/19779) allows for reuse of either the left table or the right table in join operations. When the right table i.e. the filter table is reused, then the more memory-efficient and performant static_set data structure is used, wherein the right table is hashed and queried into for (non-)existence of each row in the left table for (anti-)semi-join.
Note that reusing the right table matches the existing behavior of filtered joins, and is retained in the current design.

Authors:
  - Shruti Shivakumar (https://github.com/shrshi)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/19778
---
 cpp/CMakeLists.txt                            |   1 +
 cpp/benchmarks/join/left_join.cu              |  51 ++-
 .../detail/join/distinct_filtered_join.cuh    | 130 ++++++
 .../cudf/detail/join/filtered_join.cuh        | 246 +++++++++++
 cpp/include/cudf/join/filtered_join.hpp       | 171 ++++++++
 cpp/src/join/filtered_join.cu                 | 398 ++++++++++++++++++
 cpp/tests/join/semi_anti_join_tests.cpp       |  47 ++-
 cpp/tests/streams/join_test.cpp               |  15 +-
 8 files changed, 1033 insertions(+), 26 deletions(-)
 create mode 100644 cpp/include/cudf/detail/join/distinct_filtered_join.cuh
 create mode 100644 cpp/include/cudf/detail/join/filtered_join.cuh
 create mode 100644 cpp/include/cudf/join/filtered_join.hpp
 create mode 100644 cpp/src/join/filtered_join.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 28b35d4cbb6..3aec9a872d5 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -583,6 +583,7 @@ add_library(
   src/join/conditional_join.cu
   src/join/cross_join.cu
   src/join/distinct_hash_join.cu
+  src/join/filtered_join.cu
   src/join/hash_join.cu
   src/join/join.cu
   src/join/join_utils.cu
diff --git a/cpp/benchmarks/join/left_join.cu b/cpp/benchmarks/join/left_join.cu
index 8e5bfd1cfe0..8c715c09345 100644
--- a/cpp/benchmarks/join/left_join.cu
+++ b/cpp/benchmarks/join/left_join.cu
@@ -16,7 +16,8 @@
 
 #include "join_common.hpp"
 
-#include <cudf/join/join.hpp>
+#include <cudf/join/filtered_join.hpp>
+#include <cudf/utilities/default_stream.hpp>
 
 auto const num_keys = 1;
 
@@ -26,12 +27,24 @@ void nvbench_left_anti_join(nvbench::state& state,
                                                nvbench::enum_type<NullEquality>,
                                                nvbench::enum_type<DataType>>)
 {
+  auto const num_operations   = static_cast<cudf::size_type>(state.get_int64("num_operations"));
+  auto const reuse_left_table = state.get_string("reuse_table") == "left"
+                                  ? cudf::set_as_build_table::LEFT
+                                  : cudf::set_as_build_table::RIGHT;
+  if (reuse_left_table == cudf::set_as_build_table::LEFT) {
+    state.skip("Not yet implemented");
+    return;
+  }
   auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
 
-  auto join = [](cudf::table_view const& left,
-                 cudf::table_view const& right,
-                 cudf::null_equality compare_nulls) {
-    return cudf::left_anti_join(left, right, compare_nulls);
+  auto join = [num_operations, reuse_left_table](cudf::table_view const& left,
+                                                 cudf::table_view const& right,
+                                                 cudf::null_equality compare_nulls) {
+    cudf::filtered_join obj(right, compare_nulls, reuse_left_table, cudf::get_default_stream());
+    for (auto i = 0; i < num_operations - 1; i++) {
+      [[maybe_unused]] auto result = obj.anti_join(left);
+    }
+    return obj.anti_join(left);
   };
 
   BM_join<Nullable, join_t::HASH, NullEquality>(state, dtypes, join);
@@ -43,12 +56,24 @@ void nvbench_left_semi_join(nvbench::state& state,
                                                nvbench::enum_type<NullEquality>,
                                                nvbench::enum_type<DataType>>)
 {
+  auto const num_operations   = static_cast<cudf::size_type>(state.get_int64("num_operations"));
+  auto const reuse_left_table = state.get_string("reuse_table") == "left"
+                                  ? cudf::set_as_build_table::LEFT
+                                  : cudf::set_as_build_table::RIGHT;
+  if (reuse_left_table == cudf::set_as_build_table::LEFT) {
+    state.skip("Not yet implemented");
+    return;
+  }
   auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
 
-  auto join = [](cudf::table_view const& left,
-                 cudf::table_view const& right,
-                 cudf::null_equality compare_nulls) {
-    return cudf::left_semi_join(left, right, compare_nulls);
+  auto join = [num_operations, reuse_left_table](cudf::table_view const& left,
+                                                 cudf::table_view const& right,
+                                                 cudf::null_equality compare_nulls) {
+    cudf::filtered_join obj(right, compare_nulls, reuse_left_table, cudf::get_default_stream());
+    for (auto i = 0; i < num_operations - 1; i++) {
+      [[maybe_unused]] auto result = obj.semi_join(left);
+    }
+    return obj.semi_join(left);
   };
   BM_join<Nullable, join_t::HASH, NullEquality>(state, dtypes, join);
 }
@@ -60,7 +85,9 @@ NVBENCH_BENCH_TYPES(nvbench_left_anti_join,
   .set_name("left_anti_join")
   .set_type_axes_names({"Nullable", "NullEquality", "DataType"})
   .add_int64_axis("left_size", JOIN_SIZE_RANGE)
-  .add_int64_axis("right_size", JOIN_SIZE_RANGE);
+  .add_int64_axis("right_size", JOIN_SIZE_RANGE)
+  .add_int64_axis("num_operations", {4})
+  .add_string_axis("reuse_table", {"left", "right"});
 
 NVBENCH_BENCH_TYPES(nvbench_left_semi_join,
                     NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
@@ -69,4 +96,6 @@ NVBENCH_BENCH_TYPES(nvbench_left_semi_join,
   .set_name("left_semi_join")
   .set_type_axes_names({"Nullable", "NullEquality", "DataType"})
   .add_int64_axis("left_size", JOIN_SIZE_RANGE)
-  .add_int64_axis("right_size", JOIN_SIZE_RANGE);
+  .add_int64_axis("right_size", JOIN_SIZE_RANGE)
+  .add_int64_axis("num_operations", {4})
+  .add_string_axis("reuse_table", {"left", "right"});
diff --git a/cpp/include/cudf/detail/join/distinct_filtered_join.cuh b/cpp/include/cudf/detail/join/distinct_filtered_join.cuh
new file mode 100644
index 00000000000..1058951779e
--- /dev/null
+++ b/cpp/include/cudf/detail/join/distinct_filtered_join.cuh
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/detail/join/filtered_join.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/resource_ref.hpp>
+
+// Forward declaration
+namespace cudf::experimental::row::equality {
+class preprocessed_table;
+}
+
+namespace cudf {
+namespace detail {
+
+/**
+ * @brief Implementation of filtered join using set hash tables
+ *
+ * This class extends the base filtered_join to implement join operations
+ * using set semantics, where duplicate keys are not allowed in the hash table.
+ * This implementation is more memory efficient when the same filter table (right table)
+ * is to be reused for multiple semi/anti join operations.
+ */
+class distinct_filtered_join : public filtered_join {
+ private:
+  /**
+   * @brief Performs either a semi or anti join based on the specified kind
+   *
+   * @param probe The table to probe the hash table with
+   * @param kind The kind of join to perform (SEMI or ANTI)
+   * @param stream CUDA stream on which to perform operations
+   * @param mr Memory resource for allocations
+   * @return Device vector of indices representing the join result
+   */
+  std::unique_ptr<rmm::device_uvector<cudf::size_type>> semi_anti_join(
+    cudf::table_view const& probe,
+    join_kind kind,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr);
+
+  /**
+   * @brief Core implementation for querying the hash table
+   *
+   * Performs the actual hash table query operation for both semi and anti joins
+   * using set semantics.
+   *
+   * @tparam CGSize CUDA cooperative group size
+   * @tparam Ref Reference type for the hash table
+   * @param probe The table to probe the hash table with
+   * @param preprocessed_probe Preprocessed probe table for row operators
+   * @param kind The kind of join to perform
+   * @param query_ref Reference to the hash table for querying
+   * @param stream CUDA stream on which to perform operations
+   * @param mr Memory resource for allocations
+   * @return Device vector of indices representing the join result
+   */
+  template <int32_t CGSize, typename Ref>
+  std::unique_ptr<rmm::device_uvector<cudf::size_type>> query_build_table(
+    cudf::table_view const& probe,
+    std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
+    join_kind kind,
+    Ref query_ref,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr);
+
+ public:
+  /**
+   * @brief Constructor for filtered join with set
+   *
+   * @param build The table to build the hash table from
+   * @param compare_nulls How null values should be compared
+   * @param load_factor Target load factor for the hash table
+   * @param stream CUDA stream on which to perform operations
+   */
+  distinct_filtered_join(cudf::table_view const& build,
+                         cudf::null_equality compare_nulls,
+                         double load_factor,
+                         rmm::cuda_stream_view stream);
+
+  /**
+   * @brief Implementation of semi join for set
+   *
+   * Returns indices of probe table rows that have matching keys in the build table.
+   *
+   * @param probe The table to probe the hash table with
+   * @param stream CUDA stream on which to perform operations
+   * @param mr Memory resource for allocations
+   * @return Device vector of indices representing the join result
+   */
+  std::unique_ptr<rmm::device_uvector<cudf::size_type>> semi_join(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) override;
+
+  /**
+   * @brief Implementation of anti join for set
+   *
+   * Returns indices of probe table rows that do not have matching keys in the build table.
+   *
+   * @param probe The table to probe the hash table with
+   * @param stream CUDA stream on which to perform operations
+   * @param mr Memory resource for allocations
+   * @return Device vector of indices representing the join result
+   */
+  std::unique_ptr<rmm::device_uvector<cudf::size_type>> anti_join(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) override;
+};
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/include/cudf/detail/join/filtered_join.cuh b/cpp/include/cudf/detail/join/filtered_join.cuh
new file mode 100644
index 00000000000..c3a421c1fda
--- /dev/null
+++ b/cpp/include/cudf/detail/join/filtered_join.cuh
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/detail/cuco_helpers.hpp>
+#include <cudf/detail/join/join.hpp>
+#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/table/primitive_row_operators.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cuco/bucket_storage.cuh>
+#include <cuco/extent.cuh>
+#include <cuco/static_set_ref.cuh>
+#include <cuco/types.cuh>
+#include <cuda/std/type_traits>
+
+#include <type_traits>
+
+// Forward declaration
+namespace cudf::experimental::row::equality {
+class preprocessed_table;
+}
+
+namespace cudf {
+namespace detail {
+
+using cudf::experimental::row::lhs_index_type;
+using cudf::experimental::row::rhs_index_type;
+
+/**
+ * @brief Base class providing common functionality for filtered join operations.
+ *
+ * This abstract class implements the core components needed for hash-based semi
+ * and anti join operations.
+ */
+class filtered_join {
+ public:
+  /**
+   * @brief Properties of the build table used in the join operation
+   */
+  struct build_properties {
+    bool has_nulls;           ///< True if nested nulls are present in build table
+    bool has_nested_columns;  ///< True if the build table contains nested columns
+  };
+
+  /**
+   * @brief Adapter for insertion operations in the hash table
+   *
+   * Returns result of self comparator passed
+   */
+  template <typename T>
+  struct insertion_adapter {
+    insertion_adapter(T const& _c) : _comparator{_c} {}
+    __device__ constexpr bool operator()(
+      cuco::pair<hash_value_type, lhs_index_type> const& lhs,
+      cuco::pair<hash_value_type, lhs_index_type> const& rhs) const noexcept
+    {
+      if (lhs.first != rhs.first) { return false; }
+      auto const lhs_index = static_cast<size_type>(lhs.second);
+      auto const rhs_index = static_cast<size_type>(rhs.second);
+      return _comparator(lhs_index, rhs_index);
+    }
+
+   private:
+    T _comparator;
+  };
+
+  /**
+   * @brief Adapter for extracting hash values from key-value pairs
+   */
+  struct hash_extract_fn {
+    template <typename T>
+    __device__ constexpr hash_value_type operator()(
+      cuco::pair<hash_value_type, T> const& key) const noexcept
+    {
+      return key.first;
+    }
+  };
+
+  /**
+   * @brief Adapter for generating key-value pairs from indices
+   *
+   * @tparam T Index type
+   * @tparam Hasher Hash function type
+   */
+  template <typename T, typename Hasher>
+  struct key_pair_fn {
+    CUDF_HOST_DEVICE constexpr key_pair_fn(Hasher const& hasher) : _hasher{hasher} {}
+
+    __device__ __forceinline__ auto operator()(size_type i) const noexcept
+    {
+      return cuco::pair{_hasher(i), T{i}};
+    }
+
+   private:
+    Hasher _hasher;
+  };
+
+  /**
+   * @brief Adapter for comparing key-value pairs
+   *
+   * Compares hash values first for performance, then uses the provided equality comparator
+   *
+   * @tparam Equal Equality comparator type
+   */
+  template <typename Equal>
+  struct comparator_adapter {
+    comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {}
+
+    __device__ constexpr auto operator()(
+      cuco::pair<hash_value_type, rhs_index_type> const& rhs,
+      cuco::pair<hash_value_type, lhs_index_type> const& lhs) const noexcept
+    {
+      if (lhs.first != rhs.first) { return false; }
+      return _d_equal(lhs.second, rhs.second);
+    }
+
+   private:
+    Equal _d_equal;
+  };
+
+  /**
+   * @brief Constructor for filtered_join base class
+   *
+   * Initializes the hash table with the build table and prepares it for join operations.
+   *
+   * @param build The table to build the hash table from
+   * @param compare_nulls How null values should be compared
+   * @param load_factor Target load factor for the hash table
+   * @param stream CUDA stream on which to perform operations
+   */
+  filtered_join(cudf::table_view const& build,
+                cudf::null_equality compare_nulls,
+                double load_factor,
+                rmm::cuda_stream_view stream);
+
+  /**
+   * Virtual semi join function overridden in derived classes
+   */
+  virtual std::unique_ptr<rmm::device_uvector<cudf::size_type>> semi_join(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) = 0;
+
+  /**
+   * Virtual anti join function overridden in derived classes
+   */
+  virtual std::unique_ptr<rmm::device_uvector<cudf::size_type>> anti_join(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream,
+    rmm::device_async_resource_ref mr) = 0;
+
+  /**
+   * Virtual abstract base class destructor
+   */
+  virtual ~filtered_join() = default;
+
+ protected:
+  // Key type used in the hash table
+  using key = cuco::pair<hash_value_type, lhs_index_type>;
+
+  // Storage type for the hash table buckets
+  using storage_type =
+    cuco::bucket_storage<key,
+                         1,  /// fixing bucket size to be 1 i.e each thread handles one slot
+                         cuco::extent<cudf::size_type>,
+                         cudf::detail::cuco_allocator<char>>;
+
+  // Hasher for primitive row types
+  using primitive_row_hasher =
+    cudf::row::primitive::row_hasher<cudf::hashing::detail::default_hash>;
+  // Linear probing scheme with bucket size 1 for primitive types
+  using primitive_probing_scheme = cuco::linear_probing<1, hash_extract_fn>;
+  // Equality comparator for primitive rows
+  using primitive_row_comparator = cudf::row::primitive::row_equality_comparator;
+
+  // Hasher for complex row types with dynamic null handling
+  using row_hasher =
+    cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                     nullate::DYNAMIC>;
+  // Linear probing scheme with bucket size 4 for nested data structures
+  using nested_probing_scheme = cuco::linear_probing<4, hash_extract_fn>;
+  // Linear probing scheme with bucket size 1 for simple data
+  using simple_probing_scheme = cuco::linear_probing<1, hash_extract_fn>;
+  // Equality comparator for complex rows with null handling and NaN comparison
+  using row_comparator = cudf::experimental::row::equality::device_row_comparator<
+    true,
+    cudf::nullate::DYNAMIC,
+    cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+
+  storage_type _bucket_storage;  ///< Storage for hash table buckets
+
+  // Empty sentinel key used to mark empty slots in the hash table
+  static constexpr auto empty_sentinel_key = cuco::empty_key{
+    cuco::pair{std::numeric_limits<hash_value_type>::max(), lhs_index_type{JoinNoneValue}}};
+  build_properties _build_props;           ///< Properties of the build table
+  cudf::table_view _build;                 ///< input table to build the hash map
+  cudf::null_equality const _nulls_equal;  ///< whether to consider nulls as equal
+  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
+    _preprocessed_build;  ///< input table preprocssed for row operators
+
+  /**
+   * @brief Populates the hash table with the build table
+   *
+   * @tparam CGSize CUDA cooperative group size
+   * @tparam Ref Reference type for the hash table
+   * @param insert_ref Reference to the hash table for insertion
+   * @param stream CUDA stream on which to perform operations
+   */
+  template <int32_t CGSize, typename Ref>
+  void insert_build_table(Ref const& insert_ref, rmm::cuda_stream_view stream);
+
+ private:
+  /**
+   * @brief Calculates the required storage size for the hash table
+   *
+   * Computes the appropriate size for the bucket storage based on the input
+   * table size and desired load factor.
+   *
+   * @param tbl Table for which to calculate storage
+   * @param load_factor Target load factor for the hash table
+   * @return Calculated bucket storage size
+   */
+  auto compute_bucket_storage_size(cudf::table_view tbl, double load_factor);
+};
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/include/cudf/join/filtered_join.hpp b/cpp/include/cudf/join/filtered_join.hpp
new file mode 100644
index 00000000000..68cef8ed453
--- /dev/null
+++ b/cpp/include/cudf/join/filtered_join.hpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <utility>
+
+namespace CUDF_EXPORT cudf {
+
+/**
+ * @addtogroup column_join
+ * @{
+ * @file
+ */
+
+namespace detail {
+/**
+ * @brief Forward declaration for our filtered hash join
+ */
+class filtered_join;
+}  // namespace detail
+
+/**
+ * @brief Specifies which table to use as the build table in a hash join operation
+ * @see filtered_join
+ */
+enum class set_as_build_table { LEFT, RIGHT };
+
+/**
+ * @brief Filtered hash join that builds hash table on creation and probes results in subsequent
+ * `*_join` member functions
+ *
+ * This class enables the filtered hash join scheme that builds hash table once, and probes as many
+ * times as needed (possibly in parallel). When the hash table is created from the right table i.e.
+ * the table that acts as the filter to be applied on left tables in subsequent `_join` operations,
+ * the `cuco::static_set` data structure is used. On the other hand, when the left table is to be
+ * reused, the underlying hash table data structure is the `cuco::static_multiset`. Since multiset
+ * operations are computationally more expensive that set operations, right table reuse should be
+ * preferred if possible.
+ *
+ * @note All NaNs are considered as equal
+ */
+class filtered_join {
+ public:
+  filtered_join() = delete;
+  ~filtered_join();
+  filtered_join(filtered_join const&)            = delete;
+  filtered_join(filtered_join&&)                 = delete;
+  filtered_join& operator=(filtered_join const&) = delete;
+  filtered_join& operator=(filtered_join&&)      = delete;
+
+  /**
+   * @brief Constructs a filtered hash join object for subsequent probe calls
+   *
+   * @param build The build table
+   * @param compare_nulls Controls whether null join-key values should match or not
+   * @param reuse_tbl Specifies which table to use as the build table. If LEFT, the build table
+   * is considered as the left table and is reused with multiple right (probe) tables. If RIGHT,
+   * the build table is considered as the right/filter table and will be applied to multiple left
+   * (probe) tables.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   */
+  filtered_join(cudf::table_view const& build,
+                cudf::null_equality compare_nulls = null_equality::EQUAL,
+                set_as_build_table reuse_tbl      = set_as_build_table::RIGHT,
+                rmm::cuda_stream_view stream      = cudf::get_default_stream());
+
+  /**
+   * @brief Constructs a filtered hash join object for subsequent probe calls
+   *
+   * @param build The build table
+   * @param compare_nulls Controls whether null join-key values should match or not
+   * @param reuse_tbl Specifies which table to use as the build table. If LEFT, the build table
+   * is considered as the left table and is reused with multiple right (probe) tables. If RIGHT,
+   * the build table is considered as the right/filter table and will be applied to multiple left
+   * (probe) tables.
+   * @param load_factor The desired ratio of filled slots to total slots in the hash table, must be
+   * in range (0,1]. For example, 0.5 indicates a target of 50% occupancy. Note that the actual
+   * occupancy achieved may be slightly lower than the specified value.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   */
+  filtered_join(cudf::table_view const& build,
+                null_equality compare_nulls  = null_equality::EQUAL,
+                set_as_build_table reuse_tbl = set_as_build_table::RIGHT,
+                double load_factor           = 0.5,
+                rmm::cuda_stream_view stream = cudf::get_default_stream());
+
+  /**
+   * @brief Returns a vector of row indices corresponding to a semi-join
+   * between the specified tables.
+   *
+   * The returned vector contains the row indices from the left table
+   * for which there is a matching row in the right table. Note that the left table
+   * is the build table if `reuse_left_table` is set to true, and is the probe table
+   * otherwise.
+   *
+   * @code{.pseudo}
+   * TableA: {{0, 1, 2}}
+   * TableB: {{1, 2, 3}}
+   * Result: {1, 2}
+   * @endcode
+   *
+   * @param probe The probe table
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the returned table and columns' device memory
+   *
+   * @return A vector `left_indices` that can be used to construct
+   * the result of performing a left semi join
+   */
+  [[nodiscard]] std::unique_ptr<rmm::device_uvector<size_type>> semi_join(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const;
+
+  /**
+   * @brief Returns a vector of row indices corresponding to a anti-join
+   * between the specified tables.
+   *
+   * The returned vector contains the row indices from the left table
+   * for which there are no matching rows in the right table. Note that the left table
+   * is the build table if `reuse_left_table` is set to true, and is the probe table
+   * otherwise.
+   *
+   * @code{.pseudo}
+   * TableA: {{0, 1, 2}}
+   * TableB: {{1, 2, 3}}
+   * Result: {1, 2}
+   * @endcode
+   *
+   * @param probe The probe table
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the returned table and columns' device memory
+   *
+   * @return A vector `left_indices` that can be used to construct
+   * the result of performing a left anti join
+   */
+  [[nodiscard]] std::unique_ptr<rmm::device_uvector<size_type>> anti_join(
+    cudf::table_view const& probe,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const;
+
+ private:
+  set_as_build_table _reuse_tbl;
+  std::unique_ptr<cudf::detail::filtered_join> _impl;  ///< Filtered hash join implementation
+};
+
+/** @} */  // end of group
+
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/filtered_join.cu b/cpp/src/join/filtered_join.cu
new file mode 100644
index 00000000000..abaa9e647eb
--- /dev/null
+++ b/cpp/src/join/filtered_join.cu
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "join_common_utils.cuh"
+
+#include <cudf/detail/cuco_helpers.hpp>
+#include <cudf/detail/join/distinct_filtered_join.cuh>
+#include <cudf/detail/join/filtered_join.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/join/filtered_join.hpp>
+#include <cudf/table/experimental/row_operators.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cuco/bucket_storage.cuh>
+#include <cuco/detail/open_addressing/kernels.cuh>
+#include <cuco/extent.cuh>
+#include <cuco/operator.hpp>
+#include <cuco/static_set_ref.cuh>
+#include <cuda/std/iterator>
+#include <thrust/iterator/counting_iterator.h>
+
+namespace cudf {
+namespace detail {
+namespace {
+
+/**
+ * @brief Build a row bitmask for the input table.
+ *
+ * The output bitmask will have invalid bits corresponding to the input rows having nulls (at
+ * any nested level) and vice versa.
+ *
+ * @param input The input table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return A pair of pointer to the output bitmask and the buffer containing the bitmask
+ */
+std::pair<rmm::device_buffer, bitmask_type const*> build_row_bitmask(table_view const& input,
+                                                                     rmm::cuda_stream_view stream)
+{
+  auto const nullable_columns = get_nullable_columns(input);
+  CUDF_EXPECTS(nullable_columns.size() > 0,
+               "The input table has nulls thus it should have nullable columns.");
+
+  // If there are more than one nullable column, we compute `bitmask_and` of their null masks.
+  // Otherwise, we have only one nullable column and can use its null mask directly.
+  if (nullable_columns.size() > 1) {
+    auto row_bitmask =
+      cudf::detail::bitmask_and(
+        table_view{nullable_columns}, stream, cudf::get_current_device_resource_ref())
+        .first;
+    auto const row_bitmask_ptr = static_cast<bitmask_type const*>(row_bitmask.data());
+    return std::pair(std::move(row_bitmask), row_bitmask_ptr);
+  }
+
+  return std::pair(rmm::device_buffer{0, stream}, nullable_columns.front().null_mask());
+}
+
+struct gather_mask {
+  join_kind kind;
+  device_span<bool const> flagged;
+  __device__ bool operator()(size_type idx) const noexcept
+  {
+    return flagged[idx] == (kind == join_kind::LEFT_SEMI_JOIN);
+  }
+};
+
+}  // namespace
+
+auto filtered_join::compute_bucket_storage_size(cudf::table_view tbl, double load_factor)
+{
+  auto const size_with_primitive_probe = static_cast<cudf::size_type>(
+    cuco::make_valid_extent<primitive_probing_scheme, storage_type, cudf::size_type>(tbl.num_rows(),
+                                                                                     load_factor));
+  auto const size_with_nested_probe = static_cast<cudf::size_type>(
+    cuco::make_valid_extent<nested_probing_scheme, storage_type, cudf::size_type>(tbl.num_rows(),
+                                                                                  load_factor));
+  auto const size_with_simple_probe = static_cast<cudf::size_type>(
+    cuco::make_valid_extent<simple_probing_scheme, storage_type, cudf::size_type>(tbl.num_rows(),
+                                                                                  load_factor));
+  return std::max({size_with_primitive_probe, size_with_nested_probe, size_with_simple_probe});
+}
+
+template <int32_t CGSize, typename Ref>
+void filtered_join::insert_build_table(Ref const& insert_ref, rmm::cuda_stream_view stream)
+{
+  cudf::scoped_range range{"distinct_filtered_join::insert_build_table"};
+  auto insert = [&]<typename Iterator>(Iterator build_iter) {
+    // Build hash table by inserting all rows from build table
+    auto const grid_size = cuco::detail::grid_size(_build.num_rows(), CGSize);
+
+    if (_build_props.has_nulls && _nulls_equal == null_equality::UNEQUAL) {
+      auto const bitmask_buffer_and_ptr = build_row_bitmask(_build, stream);
+      auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
+      cuco::detail::open_addressing_ns::insert_if_n<CGSize, cuco::detail::default_block_size()>
+        <<<grid_size, cuco::detail::default_block_size(), 0, stream.value()>>>(
+          build_iter,
+          _build.num_rows(),
+          thrust::counting_iterator<size_type>{0},
+          row_is_valid{row_bitmask_ptr},
+          insert_ref);
+    } else {
+      cuco::detail::open_addressing_ns::insert_if_n<CGSize, cuco::detail::default_block_size()>
+        <<<grid_size, cuco::detail::default_block_size(), 0, stream.value()>>>(
+          build_iter,
+          _build.num_rows(),
+          thrust::constant_iterator<bool>{true},
+          cuda::std::identity{},
+          insert_ref);
+    }
+  };
+
+  if (cudf::is_primitive_row_op_compatible(_build)) {
+    auto const d_build_hasher =
+      primitive_row_hasher{nullate::DYNAMIC{_build_props.has_nulls}, _preprocessed_build};
+    auto const build_iter = cudf::detail::make_counting_transform_iterator(
+      size_type{0}, key_pair_fn<lhs_index_type, primitive_row_hasher>{d_build_hasher});
+
+    insert(build_iter);
+  } else {
+    auto const d_build_hasher =
+      cudf::experimental::row::hash::row_hasher{_preprocessed_build}.device_hasher(
+        nullate::DYNAMIC(_build_props.has_nulls));
+    auto const build_iter = cudf::detail::make_counting_transform_iterator(
+      size_type{0}, key_pair_fn<lhs_index_type, row_hasher>{d_build_hasher});
+
+    insert(build_iter);
+  }
+}
+
+template <int32_t CGSize, typename Ref>
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::query_build_table(
+  cudf::table_view const& probe,
+  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
+  join_kind kind,
+  Ref query_ref,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  cudf::scoped_range range{"distinct_filtered_join::query_build_table"};
+  auto const probe_has_nulls = has_nested_nulls(probe);
+  auto const has_any_nulls   = probe_has_nulls || _build_props.has_nulls;
+
+  auto query_set = [this,
+                    probe,
+                    probe_has_nulls,
+                    query_ref,
+                    stream]<typename InputProbeIterator, typename OutputContainsIterator>(
+                     InputProbeIterator probe_iter, OutputContainsIterator contains_iter) {
+    auto const grid_size = cuco::detail::grid_size(probe.num_rows(), CGSize);
+    if (probe_has_nulls && _nulls_equal == null_equality::UNEQUAL) {
+      auto const bitmask_buffer_and_ptr = build_row_bitmask(probe, stream);
+      auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
+      cuco::detail::open_addressing_ns::contains_if_n<CGSize, cuco::detail::default_block_size()>
+        <<<grid_size, cuco::detail::default_block_size(), 0, stream.value()>>>(
+          probe_iter,
+          probe.num_rows(),
+          thrust::counting_iterator<size_type>{0},
+          row_is_valid{row_bitmask_ptr},
+          contains_iter,
+          query_ref);
+    } else {
+      cuco::detail::open_addressing_ns::contains_if_n<CGSize, cuco::detail::default_block_size()>
+        <<<grid_size, cuco::detail::default_block_size(), 0, stream.value()>>>(
+          probe_iter,
+          probe.num_rows(),
+          thrust::constant_iterator<bool>{true},
+          cuda::std::identity{},
+          contains_iter,
+          query_ref);
+    }
+  };
+
+  auto contains_map = rmm::device_uvector<bool>(probe.num_rows(), stream);
+  if (cudf::is_primitive_row_op_compatible(_build)) {
+    auto const d_probe_hasher =
+      primitive_row_hasher{nullate::DYNAMIC{has_any_nulls}, preprocessed_probe};
+    auto const probe_iter = cudf::detail::make_counting_transform_iterator(
+      size_type{0}, key_pair_fn<rhs_index_type, primitive_row_hasher>{d_probe_hasher});
+
+    query_set(probe_iter, contains_map.begin());
+  } else {
+    auto const d_probe_hasher =
+      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(
+        nullate::DYNAMIC(has_any_nulls));
+    auto const probe_iter = cudf::detail::make_counting_transform_iterator(
+      size_type{0}, key_pair_fn<rhs_index_type, row_hasher>{d_probe_hasher});
+
+    query_set(probe_iter, contains_map.begin());
+  }
+  rmm::device_uvector<size_type> gather_map(probe.num_rows(), stream, mr);
+  auto gather_map_end = thrust::copy_if(rmm::exec_policy(stream),
+                                        thrust::counting_iterator<size_type>(0),
+                                        thrust::counting_iterator<size_type>(probe.num_rows()),
+                                        gather_map.begin(),
+                                        gather_mask{kind, contains_map});
+  gather_map.resize(cuda::std::distance(gather_map.begin(), gather_map_end), stream);
+  return std::make_unique<rmm::device_uvector<size_type>>(std::move(gather_map));
+}
+
+filtered_join::filtered_join(cudf::table_view const& build,
+                             cudf::null_equality compare_nulls,
+                             double load_factor,
+                             rmm::cuda_stream_view stream)
+  : _build_props{build_properties{cudf::has_nested_nulls(build), cudf::has_nested_columns(build)}},
+    _nulls_equal{compare_nulls},
+    _build{build},
+    _preprocessed_build{
+      cudf::experimental::row::equality::preprocessed_table::create(_build, stream)},
+    _bucket_storage{cuco::extent<cudf::size_type>{compute_bucket_storage_size(build, load_factor)},
+                    cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()}}
+{
+  _bucket_storage.initialize(empty_sentinel_key, stream);
+}
+
+distinct_filtered_join::distinct_filtered_join(cudf::table_view const& build,
+                                               cudf::null_equality compare_nulls,
+                                               double load_factor,
+                                               rmm::cuda_stream_view stream)
+  : filtered_join(build, compare_nulls, load_factor, stream)
+{
+  cudf::scoped_range range{"distinct_filtered_join::distinct_filtered_join"};
+  if (cudf::is_primitive_row_op_compatible(build)) {
+    auto const d_build_comparator =
+      primitive_row_comparator{nullate::DYNAMIC{_build_props.has_nulls},
+                               _preprocessed_build,
+                               _preprocessed_build,
+                               compare_nulls};
+    cuco::static_set_ref set_ref{empty_sentinel_key,
+                                 insertion_adapter{d_build_comparator},
+                                 primitive_probing_scheme{},
+                                 cuco::thread_scope_device,
+                                 _bucket_storage.ref()};
+    auto insert_ref = set_ref.rebind_operators(cuco::insert);
+    insert_build_table<primitive_probing_scheme::cg_size>(insert_ref, stream);
+  } else if (_build_props.has_nested_columns) {
+    auto const d_build_comparator =
+      cudf::experimental::row::equality::self_comparator{_preprocessed_build}.equal_to<true>(
+        nullate::DYNAMIC{_build_props.has_nulls},
+        compare_nulls,
+        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+    cuco::static_set_ref set_ref{empty_sentinel_key,
+                                 insertion_adapter{d_build_comparator},
+                                 nested_probing_scheme{},
+                                 cuco::thread_scope_device,
+                                 _bucket_storage.ref()};
+    auto insert_ref = set_ref.rebind_operators(cuco::insert);
+    insert_build_table<nested_probing_scheme::cg_size>(insert_ref, stream);
+  } else {
+    auto const d_build_comparator =
+      cudf::experimental::row::equality::self_comparator{_preprocessed_build}.equal_to<false>(
+        nullate::DYNAMIC{_build_props.has_nulls},
+        compare_nulls,
+        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+    cuco::static_set_ref set_ref{empty_sentinel_key,
+                                 insertion_adapter{d_build_comparator},
+                                 simple_probing_scheme{},
+                                 cuco::thread_scope_device,
+                                 _bucket_storage.ref()};
+    auto insert_ref = set_ref.rebind_operators(cuco::insert);
+    insert_build_table<simple_probing_scheme::cg_size>(insert_ref, stream);
+  }
+}
+
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::semi_anti_join(
+  cudf::table_view const& probe,
+  join_kind kind,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  cudf::scoped_range range{"distinct_filtered_join::semi_anti_join"};
+  auto const has_any_nulls = has_nested_nulls(probe) || _build_props.has_nulls;
+
+  auto const preprocessed_probe = [&probe, stream] {
+    cudf::scoped_range range{"distinct_filtered_join::semi_anti_join::preprocessed_probe"};
+    return cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+  }();
+
+  if (cudf::is_primitive_row_op_compatible(_build)) {
+    auto const d_build_probe_comparator = primitive_row_comparator{
+      nullate::DYNAMIC{has_any_nulls}, _preprocessed_build, preprocessed_probe, _nulls_equal};
+
+    cuco::static_set_ref set_ref{empty_sentinel_key,
+                                 comparator_adapter{d_build_probe_comparator},
+                                 primitive_probing_scheme{},
+                                 cuco::thread_scope_device,
+                                 _bucket_storage.ref()};
+    auto query_ref = set_ref.rebind_operators(cuco::op::contains);
+    return query_build_table<primitive_probing_scheme::cg_size>(
+      probe, preprocessed_probe, kind, query_ref, stream, mr);
+  } else {
+    auto const d_build_probe_comparator = cudf::experimental::row::equality::two_table_comparator{
+      _preprocessed_build, preprocessed_probe};
+
+    if (_build_props.has_nested_columns) {
+      auto d_build_probe_nan_comparator = d_build_probe_comparator.equal_to<true>(
+        nullate::DYNAMIC{has_any_nulls},
+        _nulls_equal,
+        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+      cuco::static_set_ref set_ref{empty_sentinel_key,
+                                   comparator_adapter{d_build_probe_nan_comparator},
+                                   nested_probing_scheme{},
+                                   cuco::thread_scope_device,
+                                   _bucket_storage.ref()};
+      auto query_ref = set_ref.rebind_operators(cuco::op::contains);
+      return query_build_table<nested_probing_scheme::cg_size>(
+        probe, preprocessed_probe, kind, query_ref, stream, mr);
+    } else {
+      auto d_build_probe_nan_comparator = d_build_probe_comparator.equal_to<false>(
+        nullate::DYNAMIC{has_any_nulls},
+        _nulls_equal,
+        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+      cuco::static_set_ref set_ref{empty_sentinel_key,
+                                   comparator_adapter{d_build_probe_nan_comparator},
+                                   simple_probing_scheme{},
+                                   cuco::thread_scope_device,
+                                   _bucket_storage.ref()};
+      auto query_ref = set_ref.rebind_operators(cuco::op::contains);
+      return query_build_table<simple_probing_scheme::cg_size>(
+        probe, preprocessed_probe, kind, query_ref, stream, mr);
+    }
+  }
+}
+
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::semi_join(
+  cudf::table_view const& probe, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
+{
+  return semi_anti_join(probe, join_kind::LEFT_SEMI_JOIN, stream, mr);
+}
+
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::anti_join(
+  cudf::table_view const& probe, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
+{
+  return semi_anti_join(probe, join_kind::LEFT_ANTI_JOIN, stream, mr);
+}
+
+}  // namespace detail
+
+filtered_join::~filtered_join() = default;
+
+filtered_join::filtered_join(cudf::table_view const& build,
+                             null_equality compare_nulls,
+                             set_as_build_table reuse_tbl,
+                             double load_factor,
+                             rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(
+    reuse_tbl == set_as_build_table::RIGHT,
+    "Left table reuse is yet to be implemented. Filtered join requires the right table to be the "
+    "build table");
+  _reuse_tbl = reuse_tbl;
+  _impl      = std::make_unique<cudf::detail::distinct_filtered_join>(
+    build, compare_nulls, load_factor, stream);
+}
+
+filtered_join::filtered_join(cudf::table_view const& build,
+                             null_equality compare_nulls,
+                             set_as_build_table reuse_tbl,
+                             rmm::cuda_stream_view stream)
+  : filtered_join(build, compare_nulls, reuse_tbl, cudf::detail::CUCO_DESIRED_LOAD_FACTOR, stream)
+{
+}
+
+std::unique_ptr<rmm::device_uvector<size_type>> filtered_join::semi_join(
+  cudf::table_view const& probe,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
+{
+  return _impl->semi_join(probe, stream, mr);
+}
+
+std::unique_ptr<rmm::device_uvector<size_type>> filtered_join::anti_join(
+  cudf::table_view const& probe,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr) const
+{
+  return _impl->anti_join(probe, stream, mr);
+}
+
+}  // namespace cudf
diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp
index c7c47d3bd96..260163f46db 100644
--- a/cpp/tests/join/semi_anti_join_tests.cpp
+++ b/cpp/tests/join/semi_anti_join_tests.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "cudf/join/join.hpp"
+
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -22,7 +24,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
-#include <cudf/join/join.hpp>
+#include <cudf/join/filtered_join.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
@@ -31,9 +33,12 @@
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <memory>
+
 template <typename T>
 using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;
 using strcol_wrapper = cudf::test::strings_column_wrapper;
@@ -49,13 +54,9 @@ namespace {
 // they were modified in https://github.com/rapidsai/cudf/pull/7454. This
 // helper function allows us to avoid rewriting all our tests in terms of
 // gather maps.
-template <std::unique_ptr<rmm::device_uvector<cudf::size_type>> (*join_impl)(
-  cudf::table_view const& left_keys,
-  cudf::table_view const& right_keys,
-  cudf::null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)>
+template <typename Func>
 std::unique_ptr<cudf::table> join_and_gather(
+  Func join_impl,
   cudf::table_view const& left_input,
   cudf::table_view const& right_input,
   std::vector<cudf::size_type> const& left_on,
@@ -81,8 +82,20 @@ std::unique_ptr<cudf::table> left_semi_join(
   std::vector<cudf::size_type> const& right_on,
   cudf::null_equality compare_nulls = cudf::null_equality::EQUAL)
 {
-  return join_and_gather<cudf::left_semi_join>(
-    left_input, right_input, left_on, right_on, compare_nulls);
+  return join_and_gather(
+    [](cudf::table_view const& left,
+       cudf::table_view const& right,
+       cudf::null_equality compare_nulls,
+       rmm::cuda_stream_view stream,
+       rmm::device_async_resource_ref mr) {
+      cudf::filtered_join obj(right, compare_nulls, cudf::set_as_build_table::RIGHT, stream);
+      return obj.semi_join(left, stream, mr);
+    },
+    left_input,
+    right_input,
+    left_on,
+    right_on,
+    compare_nulls);
 }
 
 std::unique_ptr<cudf::table> left_anti_join(
@@ -92,8 +105,20 @@ std::unique_ptr<cudf::table> left_anti_join(
   std::vector<cudf::size_type> const& right_on,
   cudf::null_equality compare_nulls = cudf::null_equality::EQUAL)
 {
-  return join_and_gather<cudf::left_anti_join>(
-    left_input, right_input, left_on, right_on, compare_nulls);
+  return join_and_gather(
+    [](cudf::table_view const& left,
+       cudf::table_view const& right,
+       cudf::null_equality compare_nulls,
+       rmm::cuda_stream_view stream,
+       rmm::device_async_resource_ref mr) {
+      cudf::filtered_join obj(right, compare_nulls, cudf::set_as_build_table::RIGHT, stream);
+      return obj.anti_join(left, stream, mr);
+    },
+    left_input,
+    right_input,
+    left_on,
+    right_on,
+    compare_nulls);
 }
 
 TEST_F(JoinTest, TestSimple)
diff --git a/cpp/tests/streams/join_test.cpp b/cpp/tests/streams/join_test.cpp
index 6a0d2db103e..5e364c8455e 100644
--- a/cpp/tests/streams/join_test.cpp
+++ b/cpp/tests/streams/join_test.cpp
@@ -22,6 +22,7 @@
 #include <cudf/ast/expressions.hpp>
 #include <cudf/column/column.hpp>
 #include <cudf/join/conditional_join.hpp>
+#include <cudf/join/filtered_join.hpp>
 #include <cudf/join/join.hpp>
 #include <cudf/join/mixed_join.hpp>
 #include <cudf/join/sort_merge_join.hpp>
@@ -81,14 +82,20 @@ TEST_F(JoinTest, FullJoin)
 
 TEST_F(JoinTest, LeftSemiJoin)
 {
-  cudf::left_semi_join(
-    table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream());
+  cudf::filtered_join obj(table1,
+                          cudf::null_equality::EQUAL,
+                          cudf::set_as_build_table::RIGHT,
+                          cudf::test::get_default_stream());
+  [[maybe_unused]] auto join_result = obj.semi_join(table0, cudf::test::get_default_stream());
 }
 
 TEST_F(JoinTest, LeftAntiJoin)
 {
-  cudf::left_anti_join(
-    table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream());
+  cudf::filtered_join obj(table1,
+                          cudf::null_equality::EQUAL,
+                          cudf::set_as_build_table::RIGHT,
+                          cudf::test::get_default_stream());
+  [[maybe_unused]] auto join_result = obj.anti_join(table0, cudf::test::get_default_stream());
 }
 
 TEST_F(JoinTest, CrossJoin) { cudf::cross_join(table0, table1, cudf::test::get_default_stream()); }

From 92dcf509b77ab3ed2a4520d5e4d466362229345a Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Wed, 10 Sep 2025 10:41:38 -0700
Subject: [PATCH 296/366] Revert "Migrate mixed join to use multiset #19660"
 (#19933)

This PR reverts #19660 because it introduces bugs, and the proper fix would cause significant performance regressions in the mixed join benchmarks. We plan to re-apply it in a new PR targeting 25.12.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19933
---
 cpp/include/cudf/join/mixed_join.hpp          |  77 ++--
 cpp/src/join/join_common_utils.cuh            |   4 +-
 cpp/src/join/join_common_utils.hpp            |  51 +++
 cpp/src/join/mixed_join.cu                    | 369 ++++++++----------
 cpp/src/join/mixed_join_common_utils.cuh      |  64 +--
 cpp/src/join/mixed_join_kernel.cu             |  20 +-
 cpp/src/join/mixed_join_kernel.cuh            | 296 ++++----------
 cpp/src/join/mixed_join_kernel.hpp            |  69 ++--
 cpp/src/join/mixed_join_kernel_nulls.cu       |  20 +-
 cpp/src/join/mixed_join_size_kernel.cu        |  23 +-
 cpp/src/join/mixed_join_size_kernel.cuh       | 181 ++++-----
 cpp/src/join/mixed_join_size_kernel.hpp       |  62 +--
 cpp/src/join/mixed_join_size_kernel_nulls.cu  |  23 +-
 cpp/tests/join/mixed_join_tests.cu            |  65 +--
 .../java/ai/rapids/cudf/MixedJoinSize.java    |  43 ++
 java/src/main/java/ai/rapids/cudf/Table.java  | 114 +++---
 java/src/main/native/src/TableJni.cpp         | 157 ++++----
 .../test/java/ai/rapids/cudf/TableTest.java   |  24 +-
 python/pylibcudf/pylibcudf/join.pyx           | 106 +----
 python/pylibcudf/pylibcudf/libcudf/join.pxd   |  27 +-
 20 files changed, 801 insertions(+), 994 deletions(-)
 create mode 100644 java/src/main/java/ai/rapids/cudf/MixedJoinSize.java

diff --git a/cpp/include/cudf/join/mixed_join.hpp b/cpp/include/cudf/join/mixed_join.hpp
index 8f17b0ebbc7..27bba061345 100644
--- a/cpp/include/cudf/join/mixed_join.hpp
+++ b/cpp/include/cudf/join/mixed_join.hpp
@@ -38,6 +38,15 @@ namespace CUDF_EXPORT cudf {
  * @file
  */
 
+/**
+ * @brief Type alias for output size data used in mixed joins.
+ *
+ * This type represents an optional pair containing:
+ * - The exact output size of the join operation
+ * - A device span of per-row match counts for each row in the larger input table
+ */
+using output_size_data_type = std::optional<std::pair<std::size_t, device_span<size_type const>>>;
+
 /**
  * @brief Returns a pair of row index vectors corresponding to all pairs of
  * rows between the specified tables where the columns of the equality table
@@ -53,7 +62,7 @@ namespace CUDF_EXPORT cudf {
  * responsibility to choose a suitable compare_nulls value AND use appropriate
  * null-safe operators in the expression.
  *
- * If the specified output size is less than the actual output size, the behavior is undefined.
+ * If the provided output size or per-row counts are incorrect, behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -76,7 +85,8 @@ namespace CUDF_EXPORT cudf {
  * @param right_conditional The right table used for the conditional join
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
- * @param output_size An optional value indicating the exact output size (may be
+ * @param output_size_data An optional pair of values indicating the exact output size and the
+ * number of matches for each row in the larger of the two input tables, left or right (may be
  * precomputed using the corresponding mixed_inner_join_size API).
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
@@ -92,7 +102,7 @@ mixed_inner_join(table_view const& left_equality,
                  table_view const& right_conditional,
                  ast::expression const& binary_predicate,
                  null_equality compare_nulls            = null_equality::EQUAL,
-                 std::optional<std::size_t> output_size = {},
+                 output_size_data_type output_size_data = {},
                  rmm::cuda_stream_view stream           = cudf::get_default_stream(),
                  rmm::device_async_resource_ref mr      = cudf::get_current_device_resource_ref());
 
@@ -113,7 +123,7 @@ mixed_inner_join(table_view const& left_equality,
  * responsibility to choose a suitable compare_nulls value AND use appropriate
  * null-safe operators in the expression.
  *
- * If the specified output size is less than the actual output size, the behavior is undefined.
+ * If the provided output size or per-row counts are incorrect, behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -136,7 +146,8 @@ mixed_inner_join(table_view const& left_equality,
  * @param right_conditional The right table used for the conditional join
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
- * @param output_size An optional value indicating the exact output size (may be
+ * @param output_size_data An optional pair of values indicating the exact output size and the
+ * number of matches for each row in the larger of the two input tables, left or right (may be
  * precomputed using the corresponding mixed_left_join_size API).
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
@@ -152,7 +163,7 @@ mixed_left_join(table_view const& left_equality,
                 table_view const& right_conditional,
                 ast::expression const& binary_predicate,
                 null_equality compare_nulls            = null_equality::EQUAL,
-                std::optional<std::size_t> output_size = {},
+                output_size_data_type output_size_data = {},
                 rmm::cuda_stream_view stream           = cudf::get_default_stream(),
                 rmm::device_async_resource_ref mr      = cudf::get_current_device_resource_ref());
 
@@ -173,7 +184,7 @@ mixed_left_join(table_view const& left_equality,
  * responsibility to choose a suitable compare_nulls value AND use appropriate
  * null-safe operators in the expression.
  *
- * If the specified output size is less than the actual output size, the behavior is undefined.
+ * If the provided output size or per-row counts are incorrect, behavior is undefined.
  *
  * @code{.pseudo}
  * left_equality: {{0, 1, 2}}
@@ -196,7 +207,9 @@ mixed_left_join(table_view const& left_equality,
  * @param right_conditional The right table used for the conditional join
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
- * @param output_size An optional value indicating the exact output size
+ * @param output_size_data An optional pair of values indicating the exact output size and the
+ * number of matches for each row in the larger of the two input tables, left or right (may be
+ * precomputed using the corresponding mixed_full_join_size API).
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
@@ -211,7 +224,7 @@ mixed_full_join(table_view const& left_equality,
                 table_view const& right_conditional,
                 ast::expression const& binary_predicate,
                 null_equality compare_nulls            = null_equality::EQUAL,
-                std::optional<std::size_t> output_size = {},
+                output_size_data_type output_size_data = {},
                 rmm::cuda_stream_view stream           = cudf::get_default_stream(),
                 rmm::device_async_resource_ref mr      = cudf::get_current_device_resource_ref());
 
@@ -334,16 +347,23 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
  * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return The size that would result from performing the requested join.
+ * @return A pair containing the size that would result from performing the
+ * requested join and the number of matches for each row in one of the two
+ * tables. Which of the two tables is an implementation detail and should not
+ * be relied upon, simply passed to the corresponding `mixed_inner_join` API as
+ * is.
  */
-std::size_t mixed_inner_join_size(table_view const& left_equality,
-                                  table_view const& right_equality,
-                                  table_view const& left_conditional,
-                                  table_view const& right_conditional,
-                                  ast::expression const& binary_predicate,
-                                  null_equality compare_nulls  = null_equality::EQUAL,
-                                  rmm::cuda_stream_view stream = cudf::get_default_stream());
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_inner_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls       = null_equality::EQUAL,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
 /**
  * @brief Returns the exact number of matches (rows) when performing a
@@ -369,16 +389,23 @@ std::size_t mixed_inner_join_size(table_view const& left_equality,
  * @param binary_predicate The condition on which to join
  * @param compare_nulls Whether or not null values join to each other or not
  * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return The size that would result from performing the requested join.
+ * @return A pair containing the size that would result from performing the
+ * requested join and the number of matches for each row in one of the two
+ * tables. Which of the two tables is an implementation detail and should not
+ * be relied upon, simply passed to the corresponding `mixed_left_join` API as
+ * is.
  */
-std::size_t mixed_left_join_size(table_view const& left_equality,
-                                 table_view const& right_equality,
-                                 table_view const& left_conditional,
-                                 table_view const& right_conditional,
-                                 ast::expression const& binary_predicate,
-                                 null_equality compare_nulls  = null_equality::EQUAL,
-                                 rmm::cuda_stream_view stream = cudf::get_default_stream());
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls       = null_equality::EQUAL,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
 /** @} */  // end of group
 
diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh
index 8d4f7670502..4c1570158ff 100644
--- a/cpp/src/join/join_common_utils.cuh
+++ b/cpp/src/join/join_common_utils.cuh
@@ -156,13 +156,13 @@ void build_join_hash_table(
     auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
 
     if (nulls_equal == cudf::null_equality::EQUAL or not nullable(build)) {
-      hash_table.insert_async(iter, iter + build.num_rows(), stream.value());
+      hash_table.insert(iter, iter + build.num_rows(), stream.value());
     } else {
       auto const stencil = thrust::counting_iterator<size_type>{0};
       auto const pred    = row_is_valid{bitmask};
 
       // insert valid rows
-      hash_table.insert_if_async(iter, iter + build.num_rows(), stencil, pred, stream.value());
+      hash_table.insert_if(iter, iter + build.num_rows(), stencil, pred, stream.value());
     }
   };
 
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 27d3c3afe45..93dd1fc2bb1 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -20,6 +20,7 @@
 #include <cudf/hashing.hpp>
 #include <cudf/table/table_view.hpp>
 
+#include <cuco/static_multimap.cuh>
 #include <cuco/static_multiset.cuh>
 #include <cuda/atomic>
 
@@ -31,5 +32,55 @@ using pair_type = cuco::pair<hash_value_type, size_type>;
 
 using hash_type = cuco::murmurhash3_32<hash_value_type>;
 
+// Multimap type used for mixed joins. TODO: This is a temporary alias used
+// until the mixed joins are converted to using CGs properly. Right now it's
+// using a cooperative group of size 1.
+using mixed_multimap_type =
+  cuco::static_multimap<hash_value_type,
+                        size_type,
+                        cuda::thread_scope_device,
+                        cudf::detail::cuco_allocator<char>,
+                        cuco::legacy::double_hashing<1, hash_type, hash_type>>;
+
+/**
+ * @brief Remaps a hash value to avoid collisions with sentinel values.
+ *
+ * @param hash The hash value to potentially remap
+ * @param sentinel The reserved value
+ */
+template <typename H, typename S>
+constexpr auto remap_sentinel_hash(H hash, S sentinel)
+{
+  // Arbitrarily choose hash - 1
+  return (hash == sentinel) ? (hash - 1) : hash;
+}
+
+/**
+ * @brief Device functor to create a pair of hash value and row index for use with cuco data
+ * structures.
+ *
+ * @tparam T Type of row index, must be convertible to `size_type`.
+ * @tparam Hasher The type of internal hasher to compute row hash.
+ */
+template <typename Hasher, typename T = size_type>
+class make_pair_function {
+ public:
+  CUDF_HOST_DEVICE make_pair_function(Hasher const& hash, hash_value_type const empty_key_sentinel)
+    : _hash{hash}, _empty_key_sentinel{empty_key_sentinel}
+  {
+  }
+
+  __device__ __forceinline__ auto operator()(size_type i) const noexcept
+  {
+    // Compute the hash value of row `i`
+    auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel);
+    return cuco::make_pair(row_hash_value, T{i});
+  }
+
+ private:
+  Hasher _hash;
+  hash_value_type const _empty_key_sentinel;
+};
+
 bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type);
 }  // namespace cudf::detail
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index 24f2fe4c164..3ed4d37f2e6 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -16,7 +16,6 @@
 
 #include "join_common_utils.cuh"
 #include "join_common_utils.hpp"
-#include "mixed_join_common_utils.cuh"
 #include "mixed_join_kernel.hpp"
 #include "mixed_join_size_kernel.hpp"
 
@@ -38,9 +37,8 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/transform.h>
+#include <thrust/fill.h>
+#include <thrust/scan.h>
 
 #include <optional>
 #include <utility>
@@ -48,86 +46,19 @@
 namespace cudf {
 namespace detail {
 
-namespace {
-
-/**
- * @brief Precompute input pairs and hash indices for mixed join operations
- *
- * Precomputes input pairs and hash indices in a single pass to reduce code duplication
- * between mixed_join and compute_mixed_join_output_size functions.
- *
- * Precomputation reduces register pressure in probing kernels by avoiding expensive
- * on-the-fly calculations of iterator transforms and hash table indices.
- *
- * @tparam HashProbe Type of the device hasher for computing probe keys
- * @param hash_table Hash table for probing
- * @param hash_probe Device hasher for probe keys
- * @param probe_table_num_rows Number of rows in probe table
- * @param stream CUDA stream
- * @param mr Memory resource
- * @return Pair of device vectors: precomputed input pairs and hash indices
- */
-template <typename HashProbe>
-std::pair<rmm::device_uvector<cuco::pair<hash_value_type, size_type>>,
-          rmm::device_uvector<cuda::std::pair<size_type, size_type>>>
-precompute_mixed_join_data(mixed_join_hash_table_t const& hash_table,
-                           HashProbe const& hash_probe,
-                           size_type probe_table_num_rows,
-                           rmm::cuda_stream_view stream,
-                           rmm::device_async_resource_ref mr)
-{
-  auto input_pairs =
-    rmm::device_uvector<cuco::pair<hash_value_type, size_type>>(probe_table_num_rows, stream, mr);
-  auto hash_indices =
-    rmm::device_uvector<cuda::std::pair<size_type, size_type>>(probe_table_num_rows, stream, mr);
-
-  auto const extent                        = hash_table.capacity();
-  auto const probe_hash_fn                 = hash_table.hash_function();
-  static constexpr std::size_t bucket_size = mixed_join_hash_table_t::bucket_size;
-
-  // Functor to pre-compute both input pairs and initial slots and step sizes for double hashing.
-  auto precompute_fn = [=] __device__(size_type i) {
-    auto const probe_key = cuco::pair<hash_value_type, size_type>{hash_probe(i), i};
-
-    // Use the probing scheme's hash functions for proper double hashing
-    auto const hash1_val = cuda::std::get<0>(probe_hash_fn)(probe_key);
-    auto const hash2_val = cuda::std::get<1>(probe_hash_fn)(probe_key);
-
-    // Double hashing logic: initial position and step size
-    auto const init_idx = (hash1_val % (extent / bucket_size)) * bucket_size;
-    auto const step_val =
-      ((hash2_val % (extent / bucket_size - std::size_t{1})) + std::size_t{1}) * bucket_size;
-
-    return cuda::std::pair{
-      probe_key,
-      cuda::std::pair{static_cast<size_type>(init_idx), static_cast<size_type>(step_val)}};
-  };
-
-  // Single transform to fill both arrays using zip iterator
-  thrust::transform(
-    rmm::exec_policy_nosync(stream),
-    thrust::counting_iterator<size_type>(0),
-    thrust::counting_iterator<size_type>(probe_table_num_rows),
-    thrust::make_zip_iterator(thrust::make_tuple(input_pairs.begin(), hash_indices.begin())),
-    precompute_fn);
-
-  return std::make_pair(std::move(input_pairs), std::move(hash_indices));
-}
-
-}  // anonymous namespace
-
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_join(table_view const& left_equality,
-           table_view const& right_equality,
-           table_view const& left_conditional,
-           table_view const& right_conditional,
-           ast::expression const& binary_predicate,
-           null_equality compare_nulls,
-           join_kind join_type,
-           std::optional<std::size_t> const& output_size,
-           rmm::cuda_stream_view stream,
-           rmm::device_async_resource_ref mr)
+mixed_join(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  join_kind join_type,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> const& output_size_data,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
                "The left conditional and equality tables must have the same number of rows.");
@@ -141,10 +72,10 @@ mixed_join(table_view const& left_equality,
   auto const left_num_rows{left_conditional.num_rows()};
   auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
 
-  // The "probe" table is the table we iterate over during the join operation.
-  // For performance optimization, we choose the larger table as the probe table.
-  // The kernels are launched with one thread per row of the probe table.
-  auto const probe_table_num_rows{swap_tables ? right_num_rows : left_num_rows};
+  // The "outer" table is the larger of the two tables. The kernels are
+  // launched with one thread per row of the outer table, which also means that
+  // it is the probe table for the hash
+  auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
 
   // We can immediately filter out cases where the right table is empty. In
   // some cases, we return all the rows of the left table with a corresponding
@@ -198,17 +129,13 @@ mixed_join(table_view const& left_equality,
   auto probe_view = table_device_view::create(probe, stream);
   auto build_view = table_device_view::create(build, stream);
 
-  mixed_join_hash_table_t hash_table{
-    cuco::extent{static_cast<std::size_t>(build.num_rows())},
-    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
-    cuco::empty_key{
-      cuco::pair{std::numeric_limits<hash_value_type>::max(), cudf::detail::JoinNoneValue}},
-    {},
-    {},
-    {},
-    {},
-    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()},
-    stream.value()};
+  // Don't use multimap_type because we want a CG size of 1.
+  mixed_multimap_type hash_table{
+    compute_hash_table_size(build.num_rows()),
+    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
+    cuco::empty_value{cudf::detail::JoinNoneValue},
+    stream.value(),
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
@@ -223,21 +150,24 @@ mixed_join(table_view const& left_equality,
                         compare_nulls,
                         static_cast<bitmask_type const*>(row_bitmask.data()),
                         stream);
-  auto hash_table_storage = cudf::device_span<cuco::pair<hash_value_type, size_type>>{
-    hash_table.data(), hash_table.capacity()};
+  auto hash_table_view = hash_table.get_device_view();
 
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
   // For inner joins we support optimizing the join by launching one thread for
   // whichever table is larger rather than always using the left table.
-  detail::grid_1d const config(probe_table_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
+  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
   auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
   join_kind const kernel_join_type =
     join_type == join_kind::FULL_JOIN ? join_kind::LEFT_JOIN : join_type;
 
-  // If the join size was not provided as an input, compute it here.
+  // If the join size data was not provided as an input, compute it here.
   std::size_t join_size;
+  // Using an optional because we only need to allocate a new vector if one was
+  // not passed as input, and rmm::device_uvector is not default constructible
+  std::optional<rmm::device_uvector<size_type>> matches_per_row{};
+  device_span<size_type const> matches_per_row_span{};
 
   auto const preprocessed_probe = detail::row::equality::preprocessed_table::create(probe, stream);
   auto const row_hash           = cudf::detail::row::hash::row_hasher{preprocessed_probe};
@@ -246,38 +176,50 @@ mixed_join(table_view const& left_equality,
     cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
-  auto [input_pairs, hash_indices] =
-    precompute_mixed_join_data(hash_table, hash_probe, probe_table_num_rows, stream, mr);
-
-  if (output_size.has_value()) {
-    join_size = output_size.value();
+  if (output_size_data.has_value()) {
+    join_size            = output_size_data->first;
+    matches_per_row_span = output_size_data->second;
   } else {
+    matches_per_row =
+      rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
+    // Note that the view goes out of scope after this else statement, but the
+    // data owned by matches_per_row stays alive so the data pointer is valid.
+    auto mutable_matches_per_row_span = cudf::device_span<size_type>{
+      matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
+    matches_per_row_span = cudf::device_span<size_type const>{
+      matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
     if (has_nulls) {
       join_size = launch_compute_mixed_join_output_size<true>(*left_conditional_view,
                                                               *right_conditional_view,
-                                                              kernel_join_type,
+                                                              *probe_view,
+                                                              *build_view,
+                                                              hash_probe,
                                                               equality_probe,
-                                                              hash_table_storage,
-                                                              input_pairs.data(),
-                                                              hash_indices.data(),
+                                                              kernel_join_type,
+                                                              hash_table_view,
                                                               parser.device_expression_data,
                                                               swap_tables,
+                                                              mutable_matches_per_row_span,
                                                               config,
                                                               shmem_size_per_block,
-                                                              stream);
+                                                              stream,
+                                                              mr);
     } else {
       join_size = launch_compute_mixed_join_output_size<false>(*left_conditional_view,
                                                                *right_conditional_view,
-                                                               kernel_join_type,
+                                                               *probe_view,
+                                                               *build_view,
+                                                               hash_probe,
                                                                equality_probe,
-                                                               hash_table_storage,
-                                                               input_pairs.data(),
-                                                               hash_indices.data(),
+                                                               kernel_join_type,
+                                                               hash_table_view,
                                                                parser.device_expression_data,
                                                                swap_tables,
+                                                               mutable_matches_per_row_span,
                                                                config,
                                                                shmem_size_per_block,
-                                                               stream);
+                                                               stream,
+                                                               mr);
     }
   }
 
@@ -292,6 +234,14 @@ mixed_join(table_view const& left_equality,
                      std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
+  // Given the number of matches per row, we need to compute the offsets for insertion.
+  auto join_result_offsets =
+    rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
+  thrust::exclusive_scan(rmm::exec_policy{stream},
+                         matches_per_row_span.begin(),
+                         matches_per_row_span.end(),
+                         join_result_offsets.begin());
+
   auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
   auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
 
@@ -301,14 +251,16 @@ mixed_join(table_view const& left_equality,
   if (has_nulls) {
     launch_mixed_join<true>(*left_conditional_view,
                             *right_conditional_view,
-                            kernel_join_type,
+                            *probe_view,
+                            *build_view,
+                            hash_probe,
                             equality_probe,
-                            hash_table_storage,
-                            input_pairs.data(),
-                            hash_indices.data(),
+                            kernel_join_type,
+                            hash_table_view,
                             join_output_l,
                             join_output_r,
                             parser.device_expression_data,
+                            join_result_offsets.data(),
                             swap_tables,
                             config,
                             shmem_size_per_block,
@@ -316,14 +268,16 @@ mixed_join(table_view const& left_equality,
   } else {
     launch_mixed_join<false>(*left_conditional_view,
                              *right_conditional_view,
-                             kernel_join_type,
+                             *probe_view,
+                             *build_view,
+                             hash_probe,
                              equality_probe,
-                             hash_table_storage,
-                             input_pairs.data(),
-                             hash_indices.data(),
+                             kernel_join_type,
+                             hash_table_view,
                              join_output_l,
                              join_output_r,
                              parser.device_expression_data,
+                             join_result_offsets.data(),
                              swap_tables,
                              config,
                              shmem_size_per_block,
@@ -342,14 +296,16 @@ mixed_join(table_view const& left_equality,
   return join_indices;
 }
 
-std::size_t compute_mixed_join_output_size(table_view const& left_equality,
-                                           table_view const& right_equality,
-                                           table_view const& left_conditional,
-                                           table_view const& right_conditional,
-                                           ast::expression const& binary_predicate,
-                                           null_equality compare_nulls,
-                                           join_kind join_type,
-                                           rmm::cuda_stream_view stream)
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>>
+compute_mixed_join_output_size(table_view const& left_equality,
+                               table_view const& right_equality,
+                               table_view const& left_conditional,
+                               table_view const& right_conditional,
+                               ast::expression const& binary_predicate,
+                               null_equality compare_nulls,
+                               join_kind join_type,
+                               rmm::cuda_stream_view stream,
+                               rmm::device_async_resource_ref mr)
 {
   // Until we add logic to handle the number of non-matches in the right table,
   // full joins are not supported in this function. Note that this does not
@@ -371,21 +327,30 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
   auto const left_num_rows{left_conditional.num_rows()};
   auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
 
-  // The "probe" table is the table we iterate over during the join operation.
-  // For performance optimization, we choose the larger table as the probe table.
-  // The kernels are launched with one thread per row of the probe table.
-  auto const probe_table_num_rows{swap_tables ? right_num_rows : left_num_rows};
+  // The "outer" table is the larger of the two tables. The kernels are
+  // launched with one thread per row of the outer table, which also means that
+  // it is the probe table for the hash
+  auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
+
+  auto matches_per_row = std::make_unique<rmm::device_uvector<size_type>>(
+    static_cast<std::size_t>(outer_num_rows), stream, mr);
+  auto matches_per_row_span = cudf::device_span<size_type>{
+    matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
 
   // We can immediately filter out cases where one table is empty.
   if (right_num_rows == 0) {
     switch (join_type) {
       // Left joins return all the row indices from left with a corresponding NULL from the right.
       case join_kind::LEFT_JOIN: {
-        return left_num_rows;
+        thrust::fill(
+          rmm::exec_policy{stream}, matches_per_row_span.begin(), matches_per_row_span.end(), 1);
+        return {left_num_rows, std::move(matches_per_row)};
       }
       // Inner joins return empty output because no matches can exist.
       case join_kind::INNER_JOIN: {
-        return 0;
+        thrust::fill(
+          rmm::exec_policy{stream}, matches_per_row_span.begin(), matches_per_row_span.end(), 0);
+        return {0, std::move(matches_per_row)};
       }
       default: CUDF_FAIL("Invalid join kind."); break;
     }
@@ -394,14 +359,14 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
       // Left and inner joins all return empty sets.
       case join_kind::LEFT_JOIN:
       case join_kind::INNER_JOIN: {
-        return 0;
+        thrust::fill(
+          rmm::exec_policy{stream}, matches_per_row_span.begin(), matches_per_row_span.end(), 0);
+        return {0, std::move(matches_per_row)};
       }
       default: CUDF_FAIL("Invalid join kind."); break;
     }
   }
 
-  auto mr = cudf::get_current_device_resource_ref();
-
   // If evaluating the expression may produce null outputs we create a nullable
   // output column and follow the null-supporting expression evaluation code
   // path.
@@ -423,17 +388,13 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
   auto probe_view = table_device_view::create(probe, stream);
   auto build_view = table_device_view::create(build, stream);
 
-  mixed_join_hash_table_t hash_table{
-    cuco::extent{static_cast<size_t>(build.num_rows())},
-    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
-    cuco::empty_key{
-      cuco::pair{std::numeric_limits<hash_value_type>::max(), cudf::detail::JoinNoneValue}},
-    {},
-    {},
-    {},
-    {},
-    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()},
-    stream.value()};
+  // Don't use multimap_type because we want a CG size of 1.
+  mixed_multimap_type hash_table{
+    compute_hash_table_size(build.num_rows()),
+    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
+    cuco::empty_value{cudf::detail::JoinNoneValue},
+    stream.value(),
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream}};
 
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
@@ -447,15 +408,14 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
                         compare_nulls,
                         static_cast<bitmask_type const*>(row_bitmask.data()),
                         stream);
-  auto hash_table_storage = cudf::device_span<cuco::pair<hash_value_type, size_type>>{
-    hash_table.data(), hash_table.capacity()};
+  auto hash_table_view = hash_table.get_device_view();
 
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
   // For inner joins we support optimizing the join by launching one thread for
   // whichever table is larger rather than always using the left table.
-  detail::grid_1d const config(probe_table_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
+  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
   auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
 
   auto const preprocessed_probe = detail::row::equality::preprocessed_table::create(probe, stream);
@@ -465,58 +425,61 @@ std::size_t compute_mixed_join_output_size(table_view const& left_equality,
     cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
-  // Precompute input pairs and hash indices using common utility function
-  auto [input_pairs, hash_indices] =
-    precompute_mixed_join_data(hash_table, hash_probe, probe_table_num_rows, stream, mr);
-
   // Determine number of output rows without actually building the output to simply
   // find what the size of the output will be.
   std::size_t const size = [&]() {
     if (has_nulls) {
       return launch_compute_mixed_join_output_size<true>(*left_conditional_view,
                                                          *right_conditional_view,
-                                                         join_type,
+                                                         *probe_view,
+                                                         *build_view,
+                                                         hash_probe,
                                                          equality_probe,
-                                                         hash_table_storage,
-                                                         input_pairs.data(),
-                                                         hash_indices.data(),
+                                                         join_type,
+                                                         hash_table_view,
                                                          parser.device_expression_data,
                                                          swap_tables,
+                                                         matches_per_row_span,
                                                          config,
                                                          shmem_size_per_block,
-                                                         stream);
+                                                         stream,
+                                                         mr);
     } else {
       return launch_compute_mixed_join_output_size<false>(*left_conditional_view,
                                                           *right_conditional_view,
-                                                          join_type,
+                                                          *probe_view,
+                                                          *build_view,
+                                                          hash_probe,
                                                           equality_probe,
-                                                          hash_table_storage,
-                                                          input_pairs.data(),
-                                                          hash_indices.data(),
+                                                          join_type,
+                                                          hash_table_view,
                                                           parser.device_expression_data,
                                                           swap_tables,
+                                                          matches_per_row_span,
                                                           config,
                                                           shmem_size_per_block,
-                                                          stream);
+                                                          stream,
+                                                          mr);
     }
   }();
 
-  return size;
+  return {size, std::move(matches_per_row)};
 }
 
 }  // namespace detail
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-mixed_inner_join(table_view const& left_equality,
-                 table_view const& right_equality,
-                 table_view const& left_conditional,
-                 table_view const& right_conditional,
-                 ast::expression const& binary_predicate,
-                 null_equality compare_nulls,
-                 std::optional<std::size_t> const output_size,
-                 rmm::cuda_stream_view stream,
-                 rmm::device_async_resource_ref mr)
+mixed_inner_join(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> const output_size_data,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::mixed_join(left_equality,
@@ -526,18 +489,20 @@ mixed_inner_join(table_view const& left_equality,
                             binary_predicate,
                             compare_nulls,
                             detail::join_kind::INNER_JOIN,
-                            output_size,
+                            output_size_data,
                             stream,
                             mr);
 }
 
-std::size_t mixed_inner_join_size(table_view const& left_equality,
-                                  table_view const& right_equality,
-                                  table_view const& left_conditional,
-                                  table_view const& right_conditional,
-                                  ast::expression const& binary_predicate,
-                                  null_equality compare_nulls,
-                                  rmm::cuda_stream_view stream)
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_inner_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::compute_mixed_join_output_size(left_equality,
@@ -547,7 +512,8 @@ std::size_t mixed_inner_join_size(table_view const& left_equality,
                                                 binary_predicate,
                                                 compare_nulls,
                                                 detail::join_kind::INNER_JOIN,
-                                                stream);
+                                                stream,
+                                                mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
@@ -558,7 +524,7 @@ mixed_left_join(table_view const& left_equality,
                 table_view const& right_conditional,
                 ast::expression const& binary_predicate,
                 null_equality compare_nulls,
-                std::optional<std::size_t> const output_size,
+                output_size_data_type const output_size_data,
                 rmm::cuda_stream_view stream,
                 rmm::device_async_resource_ref mr)
 {
@@ -570,18 +536,20 @@ mixed_left_join(table_view const& left_equality,
                             binary_predicate,
                             compare_nulls,
                             detail::join_kind::LEFT_JOIN,
-                            output_size,
+                            output_size_data,
                             stream,
                             mr);
 }
 
-std::size_t mixed_left_join_size(table_view const& left_equality,
-                                 table_view const& right_equality,
-                                 table_view const& left_conditional,
-                                 table_view const& right_conditional,
-                                 ast::expression const& binary_predicate,
-                                 null_equality compare_nulls,
-                                 rmm::cuda_stream_view stream)
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::compute_mixed_join_output_size(left_equality,
@@ -591,7 +559,8 @@ std::size_t mixed_left_join_size(table_view const& left_equality,
                                                 binary_predicate,
                                                 compare_nulls,
                                                 detail::join_kind::LEFT_JOIN,
-                                                stream);
+                                                stream,
+                                                mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
@@ -602,7 +571,7 @@ mixed_full_join(table_view const& left_equality,
                 table_view const& right_conditional,
                 ast::expression const& binary_predicate,
                 null_equality compare_nulls,
-                std::optional<std::size_t> const output_size,
+                output_size_data_type const output_size_data,
                 rmm::cuda_stream_view stream,
                 rmm::device_async_resource_ref mr)
 {
@@ -614,7 +583,7 @@ mixed_full_join(table_view const& left_equality,
                             binary_predicate,
                             compare_nulls,
                             detail::join_kind::FULL_JOIN,
-                            output_size,
+                            output_size_data,
                             stream,
                             mr);
 }
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
index ed1c1023e47..123babeb835 100644
--- a/cpp/src/join/mixed_join_common_utils.cuh
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -18,14 +18,12 @@
 #include "join/join_common_utils.hpp"
 
 #include <cudf/ast/detail/expression_evaluator.cuh>
-#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
 #include <cub/cub.cuh>
-#include <cuco/static_multiset.cuh>
 #include <cuco/static_set.cuh>
 
 namespace cudf {
@@ -38,56 +36,6 @@ using row_hash = cudf::detail::row::hash::device_row_hasher<cudf::hashing::detai
 using row_equality = cudf::detail::row::equality::strong_index_comparator_adapter<
   cudf::detail::row::equality::device_row_comparator<false, cudf::nullate::DYNAMIC>>;
 
-// Comparator that always returns false to ensure all values are inserted (like hash_join)
-struct mixed_join_always_not_equal {
-  __device__ constexpr bool operator()(cuco::pair<hash_value_type, size_type> const&,
-                                       cuco::pair<hash_value_type, size_type> const&) const noexcept
-  {
-    // multiset always insert
-    return false;
-  }
-};
-
-// hasher1 and hasher2 used for double hashing. The first hash is used to determine the initial slot
-// and the second hash is used to determine the step size.
-//
-// For the first hash, we use the row hash value directly so there is no need to hash it again.
-//
-// For the second hash, we hash the row hash value again to determine the step size.
-struct mixed_join_hasher1 {
-  __device__ constexpr hash_value_type operator()(
-    cuco::pair<hash_value_type, size_type> const& key) const noexcept
-  {
-    return key.first;
-  }
-};
-
-struct mixed_join_hasher2 {
-  mixed_join_hasher2(hash_value_type seed) : _hash{seed} {}
-
-  __device__ constexpr hash_value_type operator()(
-    cuco::pair<hash_value_type, size_type> const& key) const noexcept
-  {
-    return _hash(key.first);
-  }
-
- private:
-  using hash_type = cuco::murmurhash3_32<hash_value_type>;
-  hash_type _hash;
-};
-
-// Hash table type used for mixed joins
-using mixed_join_hash_table_t =
-  cuco::static_multiset<cuco::pair<hash_value_type, size_type>,
-                        cuco::extent<std::size_t>,
-                        cuda::thread_scope_device,
-                        mixed_join_always_not_equal,
-                        cuco::double_hashing<1, mixed_join_hasher1, mixed_join_hasher2>,
-                        cudf::detail::cuco_allocator<char>,
-                        cuco::storage<2>>;
-template <typename Tag>
-using mixed_join_hash_table_ref_t = mixed_join_hash_table_t::ref_type<Tag>;
-
 /**
  * @brief Equality comparator for use with cuco map methods that require expression evaluation.
  *
@@ -171,8 +119,8 @@ template <bool has_nulls>
 struct pair_expression_equality : public expression_equality<has_nulls> {
   using expression_equality<has_nulls>::expression_equality;
 
-  __device__ __forceinline__ bool operator()(pair_type const& left_row,
-                                             pair_type const& right_row) const noexcept
+  __device__ __forceinline__ bool operator()(pair_type const& build_row,
+                                             pair_type const& probe_row) const noexcept
   {
     using cudf::detail::row::lhs_index_type;
     using cudf::detail::row::rhs_index_type;
@@ -183,10 +131,10 @@ struct pair_expression_equality : public expression_equality<has_nulls> {
     // 2. The contents of the columns involved in the equality condition are equal.
     // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator)
     // evaluates to true.
-    if ((left_row.first == right_row.first) &&
-        this->equality_probe(lhs_index_type{left_row.second}, rhs_index_type{right_row.second})) {
-      auto const lrow_idx = this->swap_tables ? right_row.second : left_row.second;
-      auto const rrow_idx = this->swap_tables ? left_row.second : right_row.second;
+    if ((probe_row.first == build_row.first) &&
+        this->equality_probe(lhs_index_type{probe_row.second}, rhs_index_type{build_row.second})) {
+      auto const lrow_idx = this->swap_tables ? build_row.second : probe_row.second;
+      auto const rrow_idx = this->swap_tables ? probe_row.second : build_row.second;
       this->evaluator.evaluate(
         output_dest, lrow_idx, rrow_idx, 0, this->thread_intermediate_storage);
       return (output_dest.is_valid() && output_dest.value());
diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu
index 307a7cb5035..319cce64704 100644
--- a/cpp/src/join/mixed_join_kernel.cu
+++ b/cpp/src/join/mixed_join_kernel.cu
@@ -22,16 +22,18 @@ namespace cudf::detail {
 template void launch_mixed_join<false>(
   table_device_view left_table,
   table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
-  cudf::size_type* join_output_l,
-  cudf::size_type* join_output_r,
+  table_device_view probe,
+  table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  size_type* join_output_l,
+  size_type* join_output_r,
   cudf::ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables,
+  detail::grid_1d const config,
   int64_t shmem_size_per_block,
   rmm::cuda_stream_view stream);
 
diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
index 52659b316c5..830c783f2e1 100644
--- a/cpp/src/join/mixed_join_kernel.cuh
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -26,185 +26,40 @@
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/table/table_device_view.cuh>
-#include <cudf/types.hpp>
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <cuda/atomic>
+#include <cooperative_groups.h>
+#include <cub/cub.cuh>
+#include <thrust/iterator/discard_iterator.h>
 
-namespace cudf::detail {
+namespace cudf {
+namespace detail {
 
-/**
- * @brief Optimized standalone retrieve implementation for hash table probing
- *
- * This implementation uses precomputed hash indices and storage references
- * for efficient mixed join operations with minimal overhead.
- *
- * @tparam is_outer Boolean flag indicating whether outer join semantics should be used
- */
-template <bool is_outer, bool has_nulls>
-__device__ __forceinline__ void retrieve(
-  cooperative_groups::thread_block const& block,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  pair_expression_equality<has_nulls> const& key_equal,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_probe_begin,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_probe_end,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* input_hash_begin,
-  cudf::size_type* output_probe,
-  cudf::size_type* output_match,
-  cuda::atomic<size_t, cuda::thread_scope_device>& atomic_counter) noexcept
-{
-  static constexpr auto bucket_size = 2;
-  static constexpr auto block_size  = DEFAULT_JOIN_BLOCK_SIZE;
-  namespace cg                      = cooperative_groups;
-
-  auto const n = cuda::std::distance(input_probe_begin, input_probe_end);
-
-  // Use warps to efficiently flush shared memory buffers when they get full
-  // Each warp manages its own buffer segment to avoid conflicts
-  auto constexpr num_warps            = block_size / warp_size;
-  auto constexpr max_matches_per_step = warp_size * bucket_size;
-  auto constexpr buffer_size          = max_matches_per_step + warp_size;
-
-  auto const warp    = cg::tiled_partition<warp_size>(block);
-  auto const warp_id = warp.meta_group_rank();
-  auto const stride  = block_size;
-  auto idx           = threadIdx.x;
-
-  __shared__ cudf::size_type probe_output_buffer[num_warps][buffer_size];
-  __shared__ cudf::size_type match_output_buffer[num_warps][buffer_size];
-  __shared__ cudf::size_type warp_counter[num_warps];
-
-  if (warp.thread_rank() == 0) {
-    cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> init_counter_ref{
-      warp_counter[warp_id]};
-    init_counter_ref.store(0, cuda::memory_order_relaxed);
-  }
-  warp.sync();
-
-  auto flush_output_buffer = [&](auto const& warp_group) {
-    size_type offset = 0;
-    auto const count = warp_counter[warp_id];
-    auto const rank  = warp_group.thread_rank();
-    if (rank == 0) { offset = atomic_counter.fetch_add(count, cuda::memory_order_relaxed); }
-    offset = warp_group.shfl(offset, 0);
-
-    for (auto i = rank; i < count; i += warp_group.size()) {
-      *(output_probe + offset + i) = probe_output_buffer[warp_id][i];
-      *(output_match + offset + i) = match_output_buffer[warp_id][i];
-    }
-  };
-
-  while (warp.any(idx < n)) {
-    bool active_flag       = idx < n;
-    auto const active_warp = cg::binary_partition<warp_size>(warp, active_flag);
-
-    if (active_flag) {
-      auto const& probe_key = *(input_probe_begin + idx);
-      auto const& hash_idx  = *(input_hash_begin + idx);
-
-      auto const extent     = hash_table_storage.size();
-      auto current_slot_idx = static_cast<std::size_t>(hash_idx.first);
-      auto const step       = static_cast<std::size_t>(hash_idx.second);
-
-      bool running                      = true;
-      [[maybe_unused]] bool found_match = false;
-
-      while (active_warp.any(running)) {
-        if (running) {
-          auto const bucket_slots = *reinterpret_cast<
-            cuda::std::array<cuco::pair<hash_value_type, cudf::size_type>, 2> const*>(
-            hash_table_storage.data() + current_slot_idx);
-
-          auto const first_slot_is_empty  = bucket_slots[0].second == cudf::detail::JoinNoneValue;
-          auto const second_slot_is_empty = bucket_slots[1].second == cudf::detail::JoinNoneValue;
-          auto const first_equals =
-            (not first_slot_is_empty and key_equal(probe_key, bucket_slots[0]));
-          auto const second_equals =
-            (not second_slot_is_empty and key_equal(probe_key, bucket_slots[1]));
+namespace cg = cooperative_groups;
 
-          if (first_equals or second_equals) {
-            if constexpr (is_outer) { found_match = true; }
-
-            cudf::size_type num_matches = (first_equals ? 1 : 0) + (second_equals ? 1 : 0);
-            cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> counter_ref{
-              warp_counter[warp_id]};
-            cudf::size_type output_idx =
-              counter_ref.fetch_add(num_matches, cuda::memory_order_relaxed);
-
-            auto const probe_row_index = probe_key.second;
-
-            if (first_equals) {
-              probe_output_buffer[warp_id][output_idx] = probe_row_index;
-              match_output_buffer[warp_id][output_idx] = bucket_slots[0].second;
-              if (second_equals) {
-                probe_output_buffer[warp_id][output_idx + 1] = probe_row_index;
-                match_output_buffer[warp_id][output_idx + 1] = bucket_slots[1].second;
-              }
-            } else if (second_equals) {
-              probe_output_buffer[warp_id][output_idx] = probe_row_index;
-              match_output_buffer[warp_id][output_idx] = bucket_slots[1].second;
-            }
-          }
-
-          if (first_slot_is_empty or second_slot_is_empty) {
-            running = false;
-
-            if constexpr (is_outer) {
-              if (not found_match) {
-                cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> counter_ref{
-                  warp_counter[warp_id]};
-                auto const output_idx      = counter_ref.fetch_add(1, cuda::memory_order_relaxed);
-                auto const probe_row_index = probe_key.second;
-                probe_output_buffer[warp_id][output_idx] = probe_row_index;
-                match_output_buffer[warp_id][output_idx] = cudf::detail::JoinNoneValue;
-              }
-            }
-          }
-        }
-
-        active_warp.sync();
-        // Check if warp's shared memory buffer is getting full and needs flushing
-        if (warp_counter[warp_id] > (buffer_size - max_matches_per_step)) {
-          flush_output_buffer(active_warp);
-          active_warp.sync();
-
-          if (active_warp.thread_rank() == 0) { warp_counter[warp_id] = 0; }
-          active_warp.sync();
-        }
-
-        current_slot_idx = (current_slot_idx + step) % extent;
-        if (current_slot_idx == static_cast<std::size_t>(hash_idx.first)) { running = false; }
-      }
-    }
-
-    warp.sync();
-    idx += stride;
-  }
-
-  warp.sync();
-  // Final flush: ensure any remaining buffered matches are written to global memory
-  cuda::atomic_ref<cudf::size_type, cuda::thread_scope_block> final_counter_ref{
-    warp_counter[warp_id]};
-  if (final_counter_ref.load(cuda::memory_order_relaxed) > 0) { flush_output_buffer(warp); }
-}
-
-template <bool has_nulls>
-CUDF_KERNEL void __launch_bounds__(DEFAULT_JOIN_BLOCK_SIZE)
+template <cudf::size_type block_size, bool has_nulls>
+CUDF_KERNEL void __launch_bounds__(block_size)
   mixed_join(table_device_view left_table,
              table_device_view right_table,
-             join_kind join_type,
-             row_equality equality_probe,
-             cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-             cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-             cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+             table_device_view probe,
+             table_device_view build,
+             row_hash const hash_probe,
+             row_equality const equality_probe,
+             join_kind const join_type,
+             cudf::detail::mixed_multimap_type::device_view hash_table_view,
              size_type* join_output_l,
              size_type* join_output_r,
              cudf::ast::detail::expression_device_view device_expression_data,
-             bool swap_tables)
+             cudf::size_type const* join_result_offsets,
+             bool const swap_tables)
 {
+  // Normally the casting of a shared memory array is used to create multiple
+  // arrays of different types from the shared memory buffer, but here it is
+  // used to circumvent conflicts between arrays of different types between
+  // different template instantiations due to the extern specifier.
   extern __shared__ char raw_intermediate_storage[];
-  auto intermediate_storage =
+  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
   auto thread_intermediate_storage =
     &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
@@ -213,75 +68,84 @@ CUDF_KERNEL void __launch_bounds__(DEFAULT_JOIN_BLOCK_SIZE)
   cudf::size_type const right_num_rows = right_table.num_rows();
   auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
 
-  auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>{
-    left_table, right_table, device_expression_data};
-  auto const equality = pair_expression_equality<has_nulls>{
-    evaluator, thread_intermediate_storage, swap_tables, equality_probe};
+  cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size;
 
-  namespace cg = cooperative_groups;
+  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
 
-  auto const block = cg::this_thread_block();
-  cuda::atomic<size_t, cuda::thread_scope_device> counter_ref{0};
+  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
+  make_pair_function pair_func{hash_probe, empty_key_sentinel};
 
-  auto const block_begin_offset = block.group_index().x * DEFAULT_JOIN_BLOCK_SIZE;
-  auto const block_end_offset   = cuda::std::min(
-    outer_num_rows, static_cast<cudf::size_type>(block_begin_offset + DEFAULT_JOIN_BLOCK_SIZE));
+  if (outer_row_index < outer_num_rows) {
+    // Figure out the number of elements for this key.
+    cg::thread_block_tile<1> this_thread = cg::this_thread();
+    // Figure out the number of elements for this key.
+    auto query_pair = pair_func(outer_row_index);
+    auto equality   = pair_expression_equality<has_nulls>{
+      evaluator, thread_intermediate_storage, swap_tables, equality_probe};
+
+    auto probe_key_begin       = thrust::make_discard_iterator();
+    auto probe_value_begin     = swap_tables ? join_output_r + join_result_offsets[outer_row_index]
+                                             : join_output_l + join_result_offsets[outer_row_index];
+    auto contained_key_begin   = thrust::make_discard_iterator();
+    auto contained_value_begin = swap_tables ? join_output_l + join_result_offsets[outer_row_index]
+                                             : join_output_r + join_result_offsets[outer_row_index];
 
-  if (block_begin_offset < block_end_offset) {
     if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) {
-      retrieve<true>(block,
-                     hash_table_storage,
-                     equality,
-                     input_pairs + block_begin_offset,
-                     input_pairs + block_end_offset,
-                     hash_indices + block_begin_offset,
-                     swap_tables ? join_output_r : join_output_l,
-                     swap_tables ? join_output_l : join_output_r,
-                     counter_ref);
+      hash_table_view.pair_retrieve_outer(this_thread,
+                                          query_pair,
+                                          probe_key_begin,
+                                          probe_value_begin,
+                                          contained_key_begin,
+                                          contained_value_begin,
+                                          equality);
     } else {
-      retrieve<false>(block,
-                      hash_table_storage,
-                      equality,
-                      input_pairs + block_begin_offset,
-                      input_pairs + block_end_offset,
-                      hash_indices + block_begin_offset,
-                      swap_tables ? join_output_r : join_output_l,
-                      swap_tables ? join_output_l : join_output_r,
-                      counter_ref);
+      hash_table_view.pair_retrieve(this_thread,
+                                    query_pair,
+                                    probe_key_begin,
+                                    probe_value_begin,
+                                    contained_key_begin,
+                                    contained_value_begin,
+                                    equality);
     }
   }
 }
 
 template <bool has_nulls>
-void launch_mixed_join(
-  table_device_view left_table,
-  table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
-  size_type* join_output_l,
-  size_type* join_output_r,
-  cudf::ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
-  int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream)
+void launch_mixed_join(table_device_view left_table,
+                       table_device_view right_table,
+                       table_device_view probe,
+                       table_device_view build,
+                       row_hash const hash_probe,
+                       row_equality const equality_probe,
+                       join_kind const join_type,
+                       cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                       size_type* join_output_l,
+                       size_type* join_output_r,
+                       cudf::ast::detail::expression_device_view device_expression_data,
+                       cudf::size_type const* join_result_offsets,
+                       bool const swap_tables,
+                       detail::grid_1d const config,
+                       int64_t shmem_size_per_block,
+                       rmm::cuda_stream_view stream)
 {
-  mixed_join<has_nulls>
+  mixed_join<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,
-      join_type,
+      probe,
+      build,
+      hash_probe,
       equality_probe,
-      hash_table_storage,
-      input_pairs,
-      hash_indices,
+      join_type,
+      hash_table_view,
       join_output_l,
       join_output_r,
       device_expression_data,
+      join_result_offsets,
       swap_tables);
 }
 
-}  // namespace cudf::detail
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernel.hpp b/cpp/src/join/mixed_join_kernel.hpp
index e72d9370db9..ae347fc4674 100644
--- a/cpp/src/join/mixed_join_kernel.hpp
+++ b/cpp/src/join/mixed_join_kernel.hpp
@@ -33,48 +33,53 @@ namespace CUDF_EXPORT cudf {
 namespace detail {
 
 /**
- * @brief Performs a mixed join using hash lookup and expression evaluation.
+ * @brief Performs a join using the combination of a hash lookup to identify
+ * equal rows between one pair of tables and the evaluation of an expression
+ * containing an arbitrary expression.
  *
  * This method probes the hash table with each row in the probe table using a
  * custom equality comparator that also checks that the conditional expression
  * evaluates to true between the left/right tables when a match is found
  * between probe and build rows.
  *
+ * @tparam block_size The number of threads per block for this kernel
  * @tparam has_nulls Whether or not the inputs may contain nulls.
  *
- * @param left_table The left table
- * @param right_table The right table
- * @param join_type The type of join to be performed
- * @param equality_probe The equality comparator used when probing the hash table
- * @param hash_table_storage The hash table storage for probing operations
- * @param input_pairs Array of hash-value/row-index pairs for probing
- * @param hash_indices Array of hash index pairs for efficient lookup
- * @param join_output_l The left result of the join operation
- * @param join_output_r The right result of the join operation
- * @param device_expression_data Container of device data required to evaluate the desired
- * expression
- * @param swap_tables If true, the kernel was launched with one thread per right row and
- * the kernel needs to internally loop over left rows. Otherwise, loop over right rows
- * @param config Grid configuration for kernel launch
- * @param shmem_size_per_block Shared memory size per block in bytes
- * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param[in] left_table The left table
+ * @param[in] right_table The right table
+ * @param[in] probe The table with which to probe the hash table for matches.
+ * @param[in] build The table with which the hash table was built.
+ * @param[in] hash_probe The hasher used for the probe table.
+ * @param[in] equality_probe The equality comparator used when probing the hash table.
+ * @param[in] join_type The type of join to be performed
+ * @param[in] hash_table_view The hash table built from `build`.
+ * @param[out] join_output_l The left result of the join operation
+ * @param[out] join_output_r The right result of the join operation
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
+ * @param[in] join_result_offsets The starting indices in join_output[l|r]
+ * where the matches for each row begin. Equivalent to a prefix sum of
+ * matches_per_row.
+ * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
  */
 template <bool has_nulls>
-void launch_mixed_join(
-  cudf::table_device_view left_table,
-  cudf::table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
-  cudf::size_type* join_output_l,
-  cudf::size_type* join_output_r,
-  cudf::ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
-  int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream);
+void launch_mixed_join(table_device_view left_table,
+                       table_device_view right_table,
+                       table_device_view probe,
+                       table_device_view build,
+                       row_hash const hash_probe,
+                       row_equality const equality_probe,
+                       join_kind const join_type,
+                       cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                       size_type* join_output_l,
+                       size_type* join_output_r,
+                       cudf::ast::detail::expression_device_view device_expression_data,
+                       cudf::size_type const* join_result_offsets,
+                       bool const swap_tables,
+                       detail::grid_1d const config,
+                       int64_t shmem_size_per_block,
+                       rmm::cuda_stream_view stream);
 
 }  // namespace detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu
index a45781f2588..b42678a9d9b 100644
--- a/cpp/src/join/mixed_join_kernel_nulls.cu
+++ b/cpp/src/join/mixed_join_kernel_nulls.cu
@@ -22,16 +22,18 @@ namespace cudf::detail {
 template void launch_mixed_join<true>(
   table_device_view left_table,
   table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
-  cudf::size_type* join_output_l,
-  cudf::size_type* join_output_r,
+  table_device_view probe,
+  table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  size_type* join_output_l,
+  size_type* join_output_r,
   cudf::ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables,
+  detail::grid_1d const config,
   int64_t shmem_size_per_block,
   rmm::cuda_stream_view stream);
 
diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu
index bce24655bae..e3bba4e8d17 100644
--- a/cpp/src/join/mixed_join_size_kernel.cu
+++ b/cpp/src/join/mixed_join_size_kernel.cu
@@ -21,18 +21,21 @@ namespace cudf {
 namespace detail {
 
 template std::size_t launch_compute_mixed_join_output_size<false>(
-  table_device_view left_table,
-  table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  cudf::table_device_view left_table,
+  cudf::table_device_view right_table,
+  cudf::table_device_view probe,
+  cudf::table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
   ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
+  bool const swap_tables,
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream);
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
index 51426baed2e..07d9bee4aff 100644
--- a/cpp/src/join/mixed_join_size_kernel.cuh
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -29,127 +29,79 @@
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <cuda/atomic>
-
-namespace cudf::detail {
-
-/**
- * @brief Standalone count implementation using precomputed hash indices
- *
- * This implementation provides essential count functionality for mixed joins
- * using precomputed probe indices and step sizes.
- */
-template <bool has_nulls>
-__device__ __forceinline__ auto standalone_count(
-  pair_expression_equality<has_nulls> const& key_equal,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const& probe_key,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const& hash_idx,
-  join_kind join_type) noexcept
-{
-  cudf::size_type count = 0;
-  auto const extent     = hash_table_storage.size();
-  auto const* data      = hash_table_storage.data();
-  auto probe_idx        = static_cast<std::size_t>(hash_idx.first);   // initial probe index
-  auto const step       = static_cast<std::size_t>(hash_idx.second);  // step size
-
-  while (true) {
-    auto const bucket_slots =
-      *reinterpret_cast<cuda::std::array<cuco::pair<hash_value_type, cudf::size_type>, 2> const*>(
-        data + probe_idx);
-
-    // Check for empty slots and key equality
-    auto const first_slot_is_empty  = bucket_slots[0].second == cudf::detail::JoinNoneValue;
-    auto const second_slot_is_empty = bucket_slots[1].second == cudf::detail::JoinNoneValue;
-    auto const first_slot_equals =
-      (not first_slot_is_empty and key_equal(probe_key, bucket_slots[0]));
-    auto const second_slot_equals =
-      (not second_slot_is_empty and key_equal(probe_key, bucket_slots[1]));
-
-    count += (first_slot_equals + second_slot_equals);
-
-    // Exit if we find an empty slot
-    if (first_slot_is_empty or second_slot_is_empty) {
-      // Handle outer join logic: non-matching rows are counted as 1 match
-      if ((join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) && count == 0) {
-        return 1;
-      }
-      return count;
-    }
-
-    // Move to next bucket using precomputed step
-    probe_idx = (probe_idx + step) % extent;
-
-    // Detect full cycle completion
-    if (probe_idx == static_cast<std::size_t>(hash_idx.first)) {
-      // Handle outer join logic: non-matching rows are counted as 1 match
-      if ((join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) && count == 0) {
-        return 1;
-      }
-      return count;
-    }
-  }
-}
-
-template <bool has_nulls>
-CUDF_KERNEL void __launch_bounds__(DEFAULT_JOIN_BLOCK_SIZE) compute_mixed_join_output_size(
-  table_device_view left_table,
-  table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
-  ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  size_t* d_total_count)
+#include <cooperative_groups.h>
+#include <cub/cub.cuh>
+#include <thrust/iterator/discard_iterator.h>
+
+namespace cudf {
+namespace detail {
+namespace cg = cooperative_groups;
+
+template <int block_size, bool has_nulls>
+CUDF_KERNEL void __launch_bounds__(block_size)
+  compute_mixed_join_output_size(table_device_view left_table,
+                                 table_device_view right_table,
+                                 table_device_view probe,
+                                 table_device_view build,
+                                 row_hash const hash_probe,
+                                 row_equality const equality_probe,
+                                 join_kind const join_type,
+                                 cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                                 ast::detail::expression_device_view device_expression_data,
+                                 bool const swap_tables,
+                                 std::size_t* output_size,
+                                 cudf::device_span<cudf::size_type> matches_per_row)
 {
   // The (required) extern storage of the shared memory array leads to
   // conflicting declarations between different templates. The easiest
   // workaround is to declare an arbitrary (here char) array type then cast it
   // after the fact to the appropriate type.
   extern __shared__ char raw_intermediate_storage[];
-  auto intermediate_storage =
+  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
   auto thread_intermediate_storage =
     intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates);
 
-  using BlockReduce = cub::BlockReduce<size_t, DEFAULT_JOIN_BLOCK_SIZE>;
-  __shared__ typename BlockReduce::TempStorage temp_storage;
-
+  std::size_t thread_counter{0};
   auto const start_idx                 = cudf::detail::grid_1d::global_thread_id();
   auto const stride                    = cudf::detail::grid_1d::grid_stride();
   cudf::size_type const left_num_rows  = left_table.num_rows();
   cudf::size_type const right_num_rows = right_table.num_rows();
   auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
 
-  auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>{
-    left_table, right_table, device_expression_data};
+  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
+
+  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
+  make_pair_function pair_func{hash_probe, empty_key_sentinel};
 
   // Figure out the number of elements for this key.
+  cg::thread_block_tile<1> this_thread = cg::this_thread();
   // TODO: Address asymmetry in operator.
   auto count_equality = pair_expression_equality<has_nulls>{
     evaluator, thread_intermediate_storage, swap_tables, equality_probe};
 
-  // Thread-local count
-  size_t per_thread_count = 0;
-
   for (auto outer_row_index = start_idx; outer_row_index < outer_num_rows;
        outer_row_index += stride) {
-    auto const& probe_key = input_pairs[outer_row_index];
-    auto const& hash_idx  = hash_indices[outer_row_index];
-
-    auto match_count =
-      standalone_count(count_equality, hash_table_storage, probe_key, hash_idx, join_type);
-
-    per_thread_count += match_count;
+    auto query_pair = pair_func(outer_row_index);
+    if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) {
+      matches_per_row[outer_row_index] =
+        hash_table_view.pair_count_outer(this_thread, query_pair, count_equality);
+    } else {
+      matches_per_row[outer_row_index] =
+        hash_table_view.pair_count(this_thread, query_pair, count_equality);
+    }
+    thread_counter += matches_per_row[outer_row_index];
   }
 
-  size_t per_block_count = BlockReduce(temp_storage).Sum(per_thread_count);
+  using BlockReduce = cub::BlockReduce<cudf::size_type, block_size>;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter);
 
+  // Add block counter to global counter
   if (threadIdx.x == 0) {
-    cuda::atomic_ref<size_t, cuda::thread_scope_device> counter_ref(*d_total_count);
-    counter_ref.fetch_add(per_block_count, cuda::memory_order_relaxed);
+    cuda::atomic_ref<std::size_t, cuda::thread_scope_device> ref{*output_size};
+    ref.fetch_add(block_counter, cuda::std::memory_order_relaxed);
   }
 }
 
@@ -157,34 +109,39 @@ template <bool has_nulls>
 std::size_t launch_compute_mixed_join_output_size(
   table_device_view left_table,
   table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  table_device_view probe,
+  table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
   ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
+  bool const swap_tables,
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream)
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
-  cudf::detail::device_scalar<std::size_t> d_total_count{
-    0, stream, cudf::get_current_device_resource_ref()};
+  // Allocate storage for the counter used to get the size of the join output
+  cudf::detail::device_scalar<std::size_t> size(0, stream, mr);
 
-  compute_mixed_join_output_size<has_nulls>
+  compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, has_nulls>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       left_table,
       right_table,
-      join_type,
+      probe,
+      build,
+      hash_probe,
       equality_probe,
-      hash_table_storage,
-      input_pairs,
-      hash_indices,
+      join_type,
+      hash_table_view,
       device_expression_data,
       swap_tables,
-      d_total_count.data());
-
-  return d_total_count.value(stream);
+      size.data(),
+      matches_per_row);
+  return size.value(stream);
 }
 
-}  // namespace cudf::detail
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel.hpp b/cpp/src/join/mixed_join_size_kernel.hpp
index d48239a3e13..4cc5b928cc2 100644
--- a/cpp/src/join/mixed_join_size_kernel.hpp
+++ b/cpp/src/join/mixed_join_size_kernel.hpp
@@ -46,41 +46,47 @@ namespace detail {
  * evaluates to true between the left/right tables when a match is found
  * between probe and build rows.
  *
- * Uses the current device memory resource for internal allocations.
- *
+ * @tparam block_size The number of threads per block for this kernel
  * @tparam has_nulls Whether or not the inputs may contain nulls.
  *
- * @param left_table The left table
- * @param right_table The right table
- * @param join_type The type of join to be performed
- * @param equality_probe The equality comparator used when probing the hash table
- * @param hash_table_storage The hash table storage for probing operations
- * @param input_pairs Array of hash-value/row-index pairs for probing
- * @param hash_indices Array of hash index pairs for efficient lookup
- * @param device_expression_data Container of device data required to evaluate the desired
- * expression
- * @param swap_tables If true, the kernel was launched with one thread per right row and
- * the kernel needs to internally loop over left rows. Otherwise, loop over right rows
- * @param config Grid configuration for kernel launch
- * @param shmem_size_per_block Shared memory size per block in bytes
- * @param stream CUDA stream used for device memory operations and kernel launches
- *
- * @return The resulting output size
+ * @param[in] left_table The left table
+ * @param[in] right_table The right table
+ * @param[in] probe The table with which to probe the hash table for matches.
+ * @param[in] build The table with which the hash table was built.
+ * @param[in] hash_probe The hasher used for the probe table.
+ * @param[in] equality_probe The equality comparator used when probing the hash table.
+ * @param[in] join_type The type of join to be performed
+ * @param[in] hash_table_view The hash table built from `build`.
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
+ * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
+ * @param[out] output_size The resulting output size
+ * @param[out] matches_per_row The number of matches in one pair of
+ * equality/conditional tables for each row in the other pair of tables. If
+ * swap_tables is true, matches_per_row corresponds to the right_table,
+ * otherwise it corresponds to the left_table. Note that corresponding swap of
+ * left/right tables to determine which is the build table and which is the
+ * probe table has already happened on the host.
  */
+
 template <bool has_nulls>
 std::size_t launch_compute_mixed_join_output_size(
-  table_device_view left_table,
-  table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  cudf::table_device_view left_table,
+  cudf::table_device_view right_table,
+  cudf::table_device_view probe,
+  cudf::table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
   ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
+  bool const swap_tables,
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream);
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 
 }  // namespace detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu
index 66b0439cd42..cbfe4ca2239 100644
--- a/cpp/src/join/mixed_join_size_kernel_nulls.cu
+++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu
@@ -20,18 +20,21 @@ namespace cudf {
 namespace detail {
 
 template std::size_t launch_compute_mixed_join_output_size<true>(
-  table_device_view left_table,
-  table_device_view right_table,
-  join_kind join_type,
-  row_equality equality_probe,
-  cudf::device_span<cuco::pair<hash_value_type, cudf::size_type>> hash_table_storage,
-  cuco::pair<hash_value_type, cudf::size_type> const* input_pairs,
-  cuda::std::pair<cudf::size_type, cudf::size_type> const* hash_indices,
+  cudf::table_device_view left_table,
+  cudf::table_device_view right_table,
+  cudf::table_device_view probe,
+  cudf::table_device_view build,
+  row_hash const hash_probe,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
   ast::detail::expression_device_view device_expression_data,
-  bool swap_tables,
-  detail::grid_1d const& config,
+  bool const swap_tables,
+  cudf::device_span<cudf::size_type> matches_per_row,
+  detail::grid_1d const config,
   int64_t shmem_size_per_block,
-  rmm::cuda_stream_view stream);
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu
index f334aff89e9..ef32d365425 100644
--- a/cpp/tests/join/mixed_join_tests.cu
+++ b/cpp/tests/join/mixed_join_tests.cu
@@ -210,12 +210,19 @@ struct MixedJoinPairReturnTest : public MixedJoinTest<T> {
                      std::vector<std::pair<cudf::size_type, cudf::size_type>> expected_outputs,
                      cudf::null_equality compare_nulls = cudf::null_equality::EQUAL)
   {
-    auto result_size = this->join_size(
+    auto [result_size, actual_counts] = this->join_size(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
     EXPECT_TRUE(result_size == expected_outputs.size());
 
-    // Since we no longer get per-row counts, we can't verify them.
-    // Instead just verify that the total count matches
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> expected_counts_cw(
+      expected_counts.begin(), expected_counts.end());
+    auto const actual_counts_view =
+      cudf::column_view(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
+                        actual_counts->size(),
+                        actual_counts->data(),
+                        nullptr,
+                        0);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_counts_cw, actual_counts_view);
 
     auto result = this->join(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
@@ -318,12 +325,13 @@ struct MixedJoinPairReturnTest : public MixedJoinTest<T> {
    * It should be a simply forwarding of arguments to the appropriate cudf
    * mixed join size computation API.
    */
-  virtual std::size_t join_size(cudf::table_view left_equality,
-                                cudf::table_view right_equality,
-                                cudf::table_view left_conditional,
-                                cudf::table_view right_conditional,
-                                cudf::ast::operation predicate,
-                                cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0;
+  virtual std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0;
 };
 
 /**
@@ -342,12 +350,13 @@ struct MixedInnerJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
   }
 
-  std::size_t join_size(cudf::table_view left_equality,
-                        cudf::table_view right_equality,
-                        cudf::table_view left_conditional,
-                        cudf::table_view right_conditional,
-                        cudf::ast::operation predicate,
-                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
     return cudf::mixed_inner_join_size(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
@@ -532,12 +541,13 @@ struct MixedLeftJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
   }
 
-  std::size_t join_size(cudf::table_view left_equality,
-                        cudf::table_view right_equality,
-                        cudf::table_view left_conditional,
-                        cudf::table_view right_conditional,
-                        cudf::ast::operation predicate,
-                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
     return cudf::mixed_left_join_size(
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
@@ -598,12 +608,13 @@ struct MixedFullJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
   }
 
-  std::size_t join_size(cudf::table_view left_equality,
-                        cudf::table_view right_equality,
-                        cudf::table_view left_conditional,
-                        cudf::table_view right_conditional,
-                        cudf::ast::operation predicate,
-                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
     // Full joins don't actually support size calculations, and there's no easy way to spoof it.
     CUDF_FAIL("Size calculation not supported for full joins.");
diff --git a/java/src/main/java/ai/rapids/cudf/MixedJoinSize.java b/java/src/main/java/ai/rapids/cudf/MixedJoinSize.java
new file mode 100644
index 00000000000..811f0b9a0b0
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/MixedJoinSize.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ai.rapids.cudf;
+
+/** This class tracks size information associated with a mixed table join. */
+public final class MixedJoinSize implements AutoCloseable {
+  private final long outputRowCount;
+  // This is in flux, avoid exposing publicly until the dust settles.
+  private ColumnVector matches;
+
+  MixedJoinSize(long outputRowCount, ColumnVector matches) {
+    this.outputRowCount = outputRowCount;
+    this.matches = matches;
+  }
+
+  /** Return the number of output rows that would be generated from the mixed join */
+  public long getOutputRowCount() {
+    return outputRowCount;
+  }
+
+  ColumnVector getMatches() {
+    return matches;
+  }
+
+  @Override
+  public synchronized void close() {
+    matches.close();
+  }
+}
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index ad463ffeb85..bdd2f928730 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -738,31 +738,31 @@ private static native long[] conditionalLeftAntiJoinGatherMapWithCount(long left
                                                                          long condition,
                                                                          long rowCount) throws CudfException;
 
-  private static native long mixedLeftJoinRowCount(long leftKeysTable, long rightKeysTable,
-                                                   long leftConditionTable, long rightConditionTable,
-                                                   long condition, boolean compareNullsEqual);
+  private static native long[] mixedLeftJoinSize(long leftKeysTable, long rightKeysTable,
+                                                 long leftConditionTable, long rightConditionTable,
+                                                 long condition, boolean compareNullsEqual);
 
   private static native long[] mixedLeftJoinGatherMaps(long leftKeysTable, long rightKeysTable,
                                                        long leftConditionTable, long rightConditionTable,
                                                        long condition, boolean compareNullsEqual);
 
-  private static native long[] mixedLeftJoinGatherMapsWithCount(long leftKeysTable, long rightKeysTable,
-                                                                long leftConditionTable, long rightConditionTable,
-                                                                long condition, boolean compareNullsEqual,
-                                                                long outputRowCount);
+  private static native long[] mixedLeftJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
+                                                               long leftConditionTable, long rightConditionTable,
+                                                               long condition, boolean compareNullsEqual,
+                                                               long outputRowCount, long matchesColumnView);
 
-  private static native long mixedInnerJoinRowCount(long leftKeysTable, long rightKeysTable,
-                                                    long leftConditionTable, long rightConditionTable,
-                                                    long condition, boolean compareNullsEqual);
+  private static native long[] mixedInnerJoinSize(long leftKeysTable, long rightKeysTable,
+                                                  long leftConditionTable, long rightConditionTable,
+                                                  long condition, boolean compareNullsEqual);
 
   private static native long[] mixedInnerJoinGatherMaps(long leftKeysTable, long rightKeysTable,
                                                         long leftConditionTable, long rightConditionTable,
                                                         long condition, boolean compareNullsEqual);
 
-  private static native long[] mixedInnerJoinGatherMapsWithCount(long leftKeysTable, long rightKeysTable,
-                                                                 long leftConditionTable, long rightConditionTable,
-                                                                 long condition, boolean compareNullsEqual,
-                                                                 long outputRowCount);
+  private static native long[] mixedInnerJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
+                                                               long leftConditionTable, long rightConditionTable,
+                                                               long condition, boolean compareNullsEqual,
+                                                               long outputRowCount, long matchesColumnView);
 
   private static native long[] mixedFullJoinGatherMaps(long leftKeysTable, long rightKeysTable,
                                                        long leftConditionTable, long rightConditionTable,
@@ -3097,26 +3097,31 @@ public GatherMap[] conditionalLeftJoinGatherMaps(Table rightTable,
   }
 
   /**
-   * Computes the number of rows resulting from a left join between two tables using a mix of
-   * equality and inequality conditions. The entire join condition is assumed to be a logical AND
-   * of the equality condition and inequality condition.
-   *
+   * Computes output size information for a left join between two tables using a mix of equality
+   * and inequality conditions. The entire join condition is assumed to be a logical AND of the
+   * equality condition and inequality condition.
+   * NOTE: It is the responsibility of the caller to close the resulting size information object
+   * or native resources can be leaked!
    * @param leftKeys the left table's key columns for the equality condition
    * @param rightKeys the right table's key columns for the equality condition
    * @param leftConditional the left table's columns needed to evaluate the inequality condition
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @return row count of the join result
+   * @return size information for the join
    */
-  public static long mixedLeftJoinRowCount(Table leftKeys, Table rightKeys,
-                                           Table leftConditional, Table rightConditional,
-                                           CompiledExpression condition,
-                                           NullEquality nullEquality) {
-    return mixedLeftJoinRowCount(
-        leftKeys.getNativeView(), rightKeys.getNativeView(),
-        leftConditional.getNativeView(), rightConditional.getNativeView(),
-        condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
+  public static MixedJoinSize mixedLeftJoinSize(Table leftKeys, Table rightKeys,
+                                                Table leftConditional, Table rightConditional,
+                                                CompiledExpression condition,
+                                                NullEquality nullEquality) {
+    long[] mixedSizeInfo = mixedLeftJoinSize(
+            leftKeys.getNativeView(), rightKeys.getNativeView(),
+            leftConditional.getNativeView(), rightConditional.getNativeView(),
+            condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
+    assert mixedSizeInfo.length == 2;
+    long outputRowCount = mixedSizeInfo[0];
+    long matchesColumnHandle = mixedSizeInfo[1];
+    return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
   }
 
   /**
@@ -3157,8 +3162,8 @@ public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKey
    *
    * It is the responsibility of the caller to close the resulting gather map instances.
    *
-   * This interface allows passing the row count from
-   * {@link #mixedLeftJoinRowCount(Table, Table, Table, Table, CompiledExpression, NullEquality)}
+   * This interface allows passing the size result from
+   * {@link #mixedLeftJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
    * when the output size was computed previously.
    *
    * @param leftKeys the left table's key columns for the equality condition
@@ -3167,20 +3172,20 @@ public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKey
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @param outputRowCount number of output rows in the join result
+   * @param joinSize mixed join size result
    * @return left and right table gather maps
    */
   public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKeys,
                                                     Table leftConditional, Table rightConditional,
                                                     CompiledExpression condition,
                                                     NullEquality nullEquality,
-                                                    long outputRowCount) {
-    long[] gatherMapData = mixedLeftJoinGatherMapsWithCount(
-        leftKeys.getNativeView(), rightKeys.getNativeView(),
-        leftConditional.getNativeView(), rightConditional.getNativeView(),
-        condition.getNativeHandle(),
-        nullEquality == NullEquality.EQUAL,
-        outputRowCount);
+                                                    MixedJoinSize joinSize) {
+    long[] gatherMapData = mixedLeftJoinGatherMapsWithSize(
+            leftKeys.getNativeView(), rightKeys.getNativeView(),
+            leftConditional.getNativeView(), rightConditional.getNativeView(),
+            condition.getNativeHandle(),
+            nullEquality == NullEquality.EQUAL,
+            joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
     return buildJoinGatherMaps(gatherMapData);
   }
 
@@ -3359,26 +3364,31 @@ public GatherMap[] conditionalInnerJoinGatherMaps(Table rightTable,
   }
 
   /**
-   * Computes the number of rows resulting from an inner join between two tables using a mix of
-   * equality and inequality conditions. The entire join condition is assumed to be a logical AND
-   * of the equality condition and inequality condition.
-   *
+   * Computes output size information for an inner join between two tables using a mix of equality
+   * and inequality conditions. The entire join condition is assumed to be a logical AND of the
+   * equality condition and inequality condition.
+   * NOTE: It is the responsibility of the caller to close the resulting size information object
+   * or native resources can be leaked!
    * @param leftKeys the left table's key columns for the equality condition
    * @param rightKeys the right table's key columns for the equality condition
    * @param leftConditional the left table's columns needed to evaluate the inequality condition
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @return row count of the join result
+   * @return size information for the join
    */
-  public static long mixedInnerJoinRowCount(Table leftKeys, Table rightKeys,
-                                            Table leftConditional, Table rightConditional,
-                                            CompiledExpression condition,
-                                            NullEquality nullEquality) {
-    return mixedInnerJoinRowCount(
+  public static MixedJoinSize mixedInnerJoinSize(Table leftKeys, Table rightKeys,
+                                                 Table leftConditional, Table rightConditional,
+                                                 CompiledExpression condition,
+                                                 NullEquality nullEquality) {
+    long[] mixedSizeInfo = mixedInnerJoinSize(
         leftKeys.getNativeView(), rightKeys.getNativeView(),
         leftConditional.getNativeView(), rightConditional.getNativeView(),
         condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
+    assert mixedSizeInfo.length == 2;
+    long outputRowCount = mixedSizeInfo[0];
+    long matchesColumnHandle = mixedSizeInfo[1];
+    return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
   }
 
   /**
@@ -3420,7 +3430,7 @@ public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKe
    * It is the responsibility of the caller to close the resulting gather map instances.
    *
    * This interface allows passing the size result from
-   * {@link #mixedInnerJoinRowCount(Table, Table, Table, Table, CompiledExpression, NullEquality)}
+   * {@link #mixedInnerJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
    * when the output size was computed previously.
    *
    * @param leftKeys the left table's key columns for the equality condition
@@ -3429,20 +3439,20 @@ public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKe
    * @param rightConditional the right table's columns needed to evaluate the inequality condition
    * @param condition the inequality condition of the join
    * @param nullEquality whether nulls should compare as equal
-   * @param outputRowCount number of output rows in the join result
+   * @param joinSize mixed join size result
    * @return left and right table gather maps
    */
   public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKeys,
                                                      Table leftConditional, Table rightConditional,
                                                      CompiledExpression condition,
                                                      NullEquality nullEquality,
-                                                     long outputRowCount) {
-    long[] gatherMapData = mixedInnerJoinGatherMapsWithCount(
+                                                     MixedJoinSize joinSize) {
+    long[] gatherMapData = mixedInnerJoinGatherMapsWithSize(
         leftKeys.getNativeView(), rightKeys.getNativeView(),
         leftConditional.getNativeView(), rightConditional.getNativeView(),
         condition.getNativeHandle(),
         nullEquality == NullEquality.EQUAL,
-        outputRowCount);
+        joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
     return buildJoinGatherMaps(gatherMapData);
   }
 
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index f9a576b2d1e..3f0eeeddfb4 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -851,14 +851,14 @@ jlongArray cond_join_gather_single_map(
 }
 
 template <typename T>
-jlong mixed_join_size(JNIEnv* env,
-                      jlong j_left_keys,
-                      jlong j_right_keys,
-                      jlong j_left_condition,
-                      jlong j_right_condition,
-                      jlong j_condition,
-                      jboolean j_nulls_equal,
-                      T join_size_func)
+jlongArray mixed_join_size(JNIEnv* env,
+                           jlong j_left_keys,
+                           jlong j_right_keys,
+                           jlong j_left_condition,
+                           jlong j_right_condition,
+                           jlong j_condition,
+                           jboolean j_nulls_equal,
+                           T join_size_func)
 {
   JNI_NULL_CHECK(env, j_left_keys, "left keys table is null", 0);
   JNI_NULL_CHECK(env, j_right_keys, "right keys table is null", 0);
@@ -874,14 +874,37 @@ jlong mixed_join_size(JNIEnv* env,
     auto const condition = reinterpret_cast<cudf::jni::ast::compiled_expr const*>(j_condition);
     auto const nulls_equal =
       j_nulls_equal ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL;
-    return join_size_func(*left_keys,
-                          *right_keys,
-                          *left_condition,
-                          *right_condition,
-                          condition->get_top_expression(),
-                          nulls_equal);
+    auto [join_size, matches_per_row] = join_size_func(*left_keys,
+                                                       *right_keys,
+                                                       *left_condition,
+                                                       *right_condition,
+                                                       condition->get_top_expression(),
+                                                       nulls_equal);
+    if (matches_per_row->size() > std::numeric_limits<cudf::size_type>::max()) {
+      throw std::runtime_error("Too many values in device buffer to convert into a column");
+    }
+    auto col_size = static_cast<size_type>(matches_per_row->size());
+    auto col_data = matches_per_row->release();
+    cudf::jni::native_jlongArray result(env, 2);
+    result[0] = static_cast<jlong>(join_size);
+    result[1] = ptr_as_jlong(new cudf::column{cudf::data_type{cudf::type_id::INT32},
+                                              col_size,
+                                              std::move(col_data),
+                                              rmm::device_buffer{},
+                                              0});
+    return result.get_jArray();
   }
-  CATCH_STD(env, 0);
+  CATCH_STD(env, NULL);
+}
+
+std::pair<std::size_t, cudf::device_span<cudf::size_type const>> get_mixed_size_info(
+  JNIEnv* env, jlong j_output_row_count, jlong j_matches_view)
+{
+  auto const row_count = static_cast<std::size_t>(j_output_row_count);
+  auto const matches   = reinterpret_cast<cudf::column_view const*>(j_matches_view);
+  return std::make_pair(row_count,
+                        cudf::device_span<cudf::size_type const>(
+                          matches->template data<cudf::size_type>(), matches->size()));
 }
 
 template <typename T>
@@ -3032,14 +3055,14 @@ Java_ai_rapids_cudf_Table_conditionalLeftJoinGatherMapsWithCount(JNIEnv* env,
                                           });
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_mixedLeftJoinRowCount(JNIEnv* env,
-                                                                        jclass,
-                                                                        jlong j_left_keys,
-                                                                        jlong j_right_keys,
-                                                                        jlong j_left_condition,
-                                                                        jlong j_right_condition,
-                                                                        jlong j_condition,
-                                                                        jboolean j_nulls_equal)
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedLeftJoinSize(JNIEnv* env,
+                                                                         jclass,
+                                                                         jlong j_left_keys,
+                                                                         jlong j_right_keys,
+                                                                         jlong j_left_condition,
+                                                                         jlong j_right_condition,
+                                                                         jlong j_condition,
+                                                                         jboolean j_nulls_equal)
 {
   return cudf::jni::mixed_join_size(
     env,
@@ -3090,16 +3113,18 @@ Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMaps(JNIEnv* env,
 }
 
 JNIEXPORT jlongArray JNICALL
-Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMapsWithCount(JNIEnv* env,
-                                                           jclass,
-                                                           jlong j_left_keys,
-                                                           jlong j_right_keys,
-                                                           jlong j_left_condition,
-                                                           jlong j_right_condition,
-                                                           jlong j_condition,
-                                                           jboolean j_nulls_equal,
-                                                           jlong j_output_row_count)
-{
+Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMapsWithSize(JNIEnv* env,
+                                                          jclass,
+                                                          jlong j_left_keys,
+                                                          jlong j_right_keys,
+                                                          jlong j_left_condition,
+                                                          jlong j_right_condition,
+                                                          jlong j_condition,
+                                                          jboolean j_nulls_equal,
+                                                          jlong j_output_row_count,
+                                                          jlong j_matches_view)
+{
+  auto size_info = cudf::jni::get_mixed_size_info(env, j_output_row_count, j_matches_view);
   return cudf::jni::mixed_join_gather_maps(
     env,
     j_left_keys,
@@ -3108,15 +3133,14 @@ Java_ai_rapids_cudf_Table_mixedLeftJoinGatherMapsWithCount(JNIEnv* env,
     j_right_condition,
     j_condition,
     j_nulls_equal,
-    [row_count = static_cast<std::size_t>(j_output_row_count)](
-      cudf::table_view const& left_keys,
-      cudf::table_view const& right_keys,
-      cudf::table_view const& left_condition,
-      cudf::table_view const& right_condition,
-      cudf::ast::expression const& condition,
-      cudf::null_equality nulls_equal) {
+    [&size_info](cudf::table_view const& left_keys,
+                 cudf::table_view const& right_keys,
+                 cudf::table_view const& left_condition,
+                 cudf::table_view const& right_condition,
+                 cudf::ast::expression const& condition,
+                 cudf::null_equality nulls_equal) {
       return cudf::mixed_left_join(
-        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, row_count);
+        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, size_info);
     });
 }
 
@@ -3245,14 +3269,14 @@ Java_ai_rapids_cudf_Table_conditionalInnerJoinGatherMapsWithCount(JNIEnv* env,
                                           });
 }
 
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_mixedInnerJoinRowCount(JNIEnv* env,
-                                                                         jclass,
-                                                                         jlong j_left_keys,
-                                                                         jlong j_right_keys,
-                                                                         jlong j_left_condition,
-                                                                         jlong j_right_condition,
-                                                                         jlong j_condition,
-                                                                         jboolean j_nulls_equal)
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_mixedInnerJoinSize(JNIEnv* env,
+                                                                          jclass,
+                                                                          jlong j_left_keys,
+                                                                          jlong j_right_keys,
+                                                                          jlong j_left_condition,
+                                                                          jlong j_right_condition,
+                                                                          jlong j_condition,
+                                                                          jboolean j_nulls_equal)
 {
   return cudf::jni::mixed_join_size(
     env,
@@ -3303,16 +3327,18 @@ Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMaps(JNIEnv* env,
 }
 
 JNIEXPORT jlongArray JNICALL
-Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMapsWithCount(JNIEnv* env,
-                                                            jclass,
-                                                            jlong j_left_keys,
-                                                            jlong j_right_keys,
-                                                            jlong j_left_condition,
-                                                            jlong j_right_condition,
-                                                            jlong j_condition,
-                                                            jboolean j_nulls_equal,
-                                                            jlong j_output_row_count)
+Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMapsWithSize(JNIEnv* env,
+                                                           jclass,
+                                                           jlong j_left_keys,
+                                                           jlong j_right_keys,
+                                                           jlong j_left_condition,
+                                                           jlong j_right_condition,
+                                                           jlong j_condition,
+                                                           jboolean j_nulls_equal,
+                                                           jlong j_output_row_count,
+                                                           jlong j_matches_view)
 {
+  auto size_info = cudf::jni::get_mixed_size_info(env, j_output_row_count, j_matches_view);
   return cudf::jni::mixed_join_gather_maps(
     env,
     j_left_keys,
@@ -3321,15 +3347,14 @@ Java_ai_rapids_cudf_Table_mixedInnerJoinGatherMapsWithCount(JNIEnv* env,
     j_right_condition,
     j_condition,
     j_nulls_equal,
-    [row_count = static_cast<std::size_t>(j_output_row_count)](
-      cudf::table_view const& left_keys,
-      cudf::table_view const& right_keys,
-      cudf::table_view const& left_condition,
-      cudf::table_view const& right_condition,
-      cudf::ast::expression const& condition,
-      cudf::null_equality nulls_equal) {
+    [&size_info](cudf::table_view const& left_keys,
+                 cudf::table_view const& right_keys,
+                 cudf::table_view const& left_condition,
+                 cudf::table_view const& right_condition,
+                 cudf::ast::expression const& condition,
+                 cudf::null_equality nulls_equal) {
       return cudf::mixed_inner_join(
-        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, row_count);
+        left_keys, right_keys, left_condition, right_condition, condition, nulls_equal, size_info);
     });
 }
 
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index e539ded8089..4cdcb30071c 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -2465,11 +2465,11 @@ void testMixedLeftJoinGatherMapsWithSize() {
              .column(  0,   1, 2,   3,   4,   5,   6, 7, 8,   9)
              .column(inv, inv, 2, inv, inv, inv, inv, 0, 1, inv)
              .build()) {
-      long rowCount = Table.mixedLeftJoinRowCount(leftKeys, rightKeys, left, right,
+      MixedJoinSize sizeInfo = Table.mixedLeftJoinSize(leftKeys, rightKeys, left, right,
           condition, NullEquality.UNEQUAL);
-      assertEquals(expected.getRowCount(), rowCount);
+      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
       GatherMap[] maps = Table.mixedLeftJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-          NullEquality.UNEQUAL, rowCount);
+          NullEquality.UNEQUAL, sizeInfo);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
@@ -2501,11 +2501,11 @@ void testMixedLeftJoinGatherMapsNullsWithSize() {
              .column(0,   1,   2,   3,   4,   5,   6, 7, 7, 8,   9)
              .column(0, inv, inv, inv, inv, inv, inv, 0, 2, 1, inv)
              .build()) {
-      long rowCount = Table.mixedLeftJoinRowCount(leftKeys, rightKeys, left, right,
+      MixedJoinSize sizeInfo = Table.mixedLeftJoinSize(leftKeys, rightKeys, left, right,
           condition, NullEquality.EQUAL);
-      assertEquals(expected.getRowCount(), rowCount);
+      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
       GatherMap[] maps = Table.mixedLeftJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-              NullEquality.EQUAL, rowCount);
+              NullEquality.EQUAL, sizeInfo);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
@@ -2980,11 +2980,11 @@ void testMixedInnerJoinGatherMapsWithSize() {
              .column(2, 7, 8)
              .column(2, 0, 1)
              .build()) {
-      long rowCount = Table.mixedInnerJoinRowCount(leftKeys, rightKeys, left, right,
+      MixedJoinSize sizeInfo = Table.mixedInnerJoinSize(leftKeys, rightKeys, left, right,
           condition, NullEquality.UNEQUAL);
-      assertEquals(expected.getRowCount(), rowCount);
+      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
       GatherMap[] maps = Table.mixedInnerJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-          NullEquality.UNEQUAL, rowCount);
+          NullEquality.UNEQUAL, sizeInfo);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
@@ -3015,11 +3015,11 @@ void testMixedInnerJoinGatherMapsNullsWithSize() {
              .column(0, 7, 7, 8)
              .column(0, 0, 2, 1)
              .build()) {
-      long rowCount = Table.mixedInnerJoinRowCount(leftKeys, rightKeys, left, right,
+      MixedJoinSize sizeInfo = Table.mixedInnerJoinSize(leftKeys, rightKeys, left, right,
           condition, NullEquality.EQUAL);
-      assertEquals(expected.getRowCount(), rowCount);
+      assertEquals(expected.getRowCount(), sizeInfo.getOutputRowCount());
       GatherMap[] maps = Table.mixedInnerJoinGatherMaps(leftKeys, rightKeys, left, right, condition,
-          NullEquality.EQUAL, rowCount);
+          NullEquality.EQUAL, sizeInfo);
       try {
         verifyJoinGatherMaps(maps, expected);
       } finally {
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index 24d3e45e8f7..93eb681de65 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -33,10 +33,8 @@ __all__ = [
     "left_semi_join",
     "mixed_full_join",
     "mixed_inner_join",
-    "mixed_inner_join_size",
     "mixed_left_anti_join",
     "mixed_left_join",
-    "mixed_left_join_size",
     "mixed_left_semi_join",
 ]
 
@@ -520,7 +518,7 @@ cpdef tuple mixed_inner_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
-    cdef optional[size_t] empty_optional
+    cdef cpp_join.output_size_data_type empty_optional
 
     stream = _get_stream(stream)
 
@@ -540,55 +538,6 @@ cpdef tuple mixed_inner_join(
         _column_from_gather_map(move(c_result.second), stream),
     )
 
-cpdef size_t mixed_inner_join_size(
-    Table left_keys,
-    Table right_keys,
-    Table left_conditional,
-    Table right_conditional,
-    Expression binary_predicate,
-    null_equality nulls_equal,
-    Stream stream=None
-):
-    """Get the size of a mixed inner join between two tables.
-
-    For details, see :cpp:func:`mixed_inner_join_size`.
-
-    Parameters
-    ----------
-    left_keys : Table
-        The left table to use for the equality join.
-    right_keys : Table
-        The right table to use for the equality join.
-    left_conditional : Table
-        The left table to use for the conditional join.
-    right_conditional : Table
-        The right table to use for the conditional join.
-    binary_predicate : Expression
-        Condition to join on.
-    nulls_equal : NullEquality
-        Should nulls compare equal in the equality join?
-
-    Returns
-    -------
-    int
-        The number of rows that would be produced by the join.
-    """
-    cdef size_t result
-
-    stream = _get_stream(stream)
-
-    with nogil:
-        result = cpp_join.mixed_inner_join_size(
-            left_keys.view(),
-            right_keys.view(),
-            left_conditional.view(),
-            right_conditional.view(),
-            dereference(binary_predicate.c_obj.get()),
-            nulls_equal,
-            stream.view()
-        )
-    return result
-
 
 cpdef tuple mixed_left_join(
     Table left_keys,
@@ -625,7 +574,7 @@ cpdef tuple mixed_left_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
-    cdef optional[size_t] empty_optional
+    cdef cpp_join.output_size_data_type empty_optional
 
     stream = _get_stream(stream)
 
@@ -645,55 +594,6 @@ cpdef tuple mixed_left_join(
         _column_from_gather_map(move(c_result.second), stream),
     )
 
-cpdef size_t mixed_left_join_size(
-    Table left_keys,
-    Table right_keys,
-    Table left_conditional,
-    Table right_conditional,
-    Expression binary_predicate,
-    null_equality nulls_equal,
-    Stream stream=None
-):
-    """Get the size of a mixed left join between two tables.
-
-    For details, see :cpp:func:`mixed_left_join_size`.
-
-    Parameters
-    ----------
-    left_keys : Table
-        The left table to use for the equality join.
-    right_keys : Table
-        The right table to use for the equality join.
-    left_conditional : Table
-        The left table to use for the conditional join.
-    right_conditional : Table
-        The right table to use for the conditional join.
-    binary_predicate : Expression
-        Condition to join on.
-    nulls_equal : NullEquality
-        Should nulls compare equal in the equality join?
-
-    Returns
-    -------
-    int
-        The number of rows that would be produced by the join.
-    """
-    cdef size_t result
-
-    stream = _get_stream(stream)
-
-    with nogil:
-        result = cpp_join.mixed_left_join_size(
-            left_keys.view(),
-            right_keys.view(),
-            left_conditional.view(),
-            right_conditional.view(),
-            dereference(binary_predicate.c_obj.get()),
-            nulls_equal,
-            stream.view()
-        )
-    return result
-
 
 cpdef tuple mixed_full_join(
     Table left_keys,
@@ -730,7 +630,7 @@ cpdef tuple mixed_full_join(
         join.
     """
     cdef cpp_join.gather_map_pair_type c_result
-    cdef optional[size_t] empty_optional
+    cdef cpp_join.output_size_data_type empty_optional
 
     stream = _get_stream(stream)
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 0f702d6ea20..111576ea1d9 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -19,6 +19,7 @@ from pylibcudf.libcudf.utilities.span cimport device_span
 
 ctypedef unique_ptr[device_uvector[size_type]] gather_map_type
 ctypedef pair[gather_map_type, gather_map_type] gather_map_pair_type
+ctypedef optional[pair[size_t, device_span[const size_type]]] output_size_data_type
 
 cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
     cdef gather_map_pair_type inner_join(
@@ -168,7 +169,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        optional[size_t] output_size,
+        output_size_data_type output_size_data,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
@@ -179,7 +180,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        optional[size_t] output_size,
+        output_size_data_type output_size_data,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
@@ -190,7 +191,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        optional[size_t] output_size,
+        output_size_data_type output_size_data,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
 
@@ -213,23 +214,3 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         null_equality compare_nulls,
         cuda_stream_view stream
     ) except +libcudf_exception_handler
-
-    cdef size_t mixed_inner_join_size(
-        const table_view left_equality,
-        const table_view right_equality,
-        const table_view left_conditional,
-        const table_view right_conditional,
-        const expression binary_predicate,
-        null_equality compare_nulls,
-        cuda_stream_view stream
-    ) except +libcudf_exception_handler
-
-    cdef size_t mixed_left_join_size(
-        const table_view left_equality,
-        const table_view right_equality,
-        const table_view left_conditional,
-        const table_view right_conditional,
-        const expression binary_predicate,
-        null_equality compare_nulls,
-        cuda_stream_view stream
-    ) except +libcudf_exception_handler

From a03c97d29bfe3ccde3a90cd4db178c349af88f02 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 10 Sep 2025 14:59:36 -0700
Subject: [PATCH 297/366] Perform more input validation in cuDF classic APIs
 (#19929)

closes https://github.com/rapidsai/cudf/issues/19826

I extended the issue to other tests that appear to test input validation

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19929
---
 ci/test_narwhals.sh                           | 10 ++++
 python/cudf/cudf/core/dataframe.py            | 27 ++++++----
 python/cudf/cudf/core/dtypes.py               |  6 +++
 python/cudf/cudf/core/frame.py                |  2 +
 python/cudf/cudf/core/groupby/groupby.py      |  4 ++
 python/cudf/cudf/core/index.py                |  5 +-
 python/cudf/cudf/core/indexed_frame.py        | 51 ++++++++++++-------
 python/cudf/cudf/core/series.py               | 25 ++++++---
 .../cudf/pandas/scripts/conftest-patch.py     | 42 ---------------
 .../tests/general_functions/test_api_types.py | 18 +++----
 10 files changed, 102 insertions(+), 88 deletions(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index c40e49184c0..73372adcc4a 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -27,6 +27,13 @@ rapids-pip-retry install -U -e .
 rapids-logger "Check narwhals versions"
 python -c "import narwhals; print(narwhals.show_versions())"
 
+# test_fill_null_strategies_with_limit_as_none[cudf]: Narwhals passes inplace=None instead of a bool
+# test_fill_null_series_limit_as_none[cudf]: Narwhals passes inplace=None instead of a bool
+TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF=" \
+test_fill_null_strategies_with_limit_as_none[cudf] or \
+test_fill_null_series_limit_as_none[cudf] \
+"
+
 rapids-logger "Run narwhals tests for cuDF"
 PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 python -m pytest \
     --cache-clear \
@@ -34,6 +41,9 @@ PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 python -m pytest \
     -p env \
     -p no:pytest_benchmark \
     -p cudf.testing.narwhals_test_plugin \
+    -k "not ( \
+        ${TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF} \
+    )" \
     --numprocesses=8 \
     --dist=worksteal \
     --constructors=cudf
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 20cac9f6b13..9ac849f68db 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3078,11 +3078,11 @@ def reindex(
     def set_index(
         self,
         keys,
-        drop=True,
-        append=False,
-        inplace=False,
-        verify_integrity=False,
-    ):
+        drop: bool = True,
+        append: bool = False,
+        inplace: bool = False,
+        verify_integrity: bool = False,
+    ) -> Self | None:
         """Return a new DataFrame with a new index
 
         Parameters
@@ -3177,6 +3177,13 @@ def set_index(
             keys = [keys]
         if len(keys) == 0:
             raise ValueError("No valid columns to be added to index.")
+        if not isinstance(drop, bool):
+            raise TypeError("drop must be a boolean")
+        if not isinstance(append, bool):
+            raise TypeError("append must be a boolean")
+        if not isinstance(inplace, bool):
+            raise TypeError("inplace must be a boolean")
+
         if append:
             keys = [self.index, *keys]
 
@@ -3640,10 +3647,10 @@ def diff(
     def drop_duplicates(
         self,
         subset=None,
-        keep="first",
-        inplace=False,
-        ignore_index=False,
-    ):
+        keep: Literal["first", "last", False] = "first",
+        inplace: bool = False,
+        ignore_index: bool = False,
+    ) -> Self | None:
         """
         Return DataFrame with duplicate rows removed.
 
@@ -6134,7 +6141,7 @@ def interpolate(
         method="linear",
         axis=0,
         limit=None,
-        inplace=False,
+        inplace: bool = False,
         limit_direction=None,
         limit_area=None,
         downcast=None,
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 4b67544ce5a..490691b97d4 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -194,6 +194,8 @@ class CategoricalDtype(_BaseDtype):
     """
 
     def __init__(self, categories=None, ordered: bool | None = False) -> None:
+        if not (ordered is None or isinstance(ordered, bool)):
+            raise ValueError("ordered must be a boolean or None")
         self._categories = self._init_categories(categories)
         self._ordered = ordered
 
@@ -278,6 +280,10 @@ def to_pandas(self) -> pd.CategoricalDtype:
     def _init_categories(self, categories: Any) -> ColumnBase | None:
         if categories is None:
             return categories
+        from cudf.api.types import is_scalar
+
+        if is_scalar(categories):
+            raise ValueError("categories must be a list-like object")
         if len(categories) == 0 and not isinstance(
             getattr(categories, "dtype", None),
             (cudf.IntervalDtype, pd.IntervalDtype),
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f68bf062411..0e45c41d137 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -227,6 +227,8 @@ def _from_columns_like_self(
     def _mimic_inplace(
         self, result: Self, inplace: bool = False
     ) -> Self | None:
+        if not isinstance(inplace, bool):
+            raise TypeError("inplace must be a boolean")
         if inplace:
             for col in self._column_names:
                 if col in result._data:
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 4b0a97714ad..a3e43bc2d86 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1702,6 +1702,10 @@ def _normalize_aggs(
                 column_names, aggs_per_column = aggs.keys(), aggs.values()
                 columns = tuple(self.obj._data[col] for col in column_names)
             else:
+                if isinstance(aggs, list) and len(aggs) != len(set(aggs)):
+                    raise pd.errors.SpecificationError(
+                        "Function names must be unique if there is no new column names assigned"
+                    )
                 values = self.grouping.values
                 column_names = values._column_names
                 columns = values._columns
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e4478aa586b..a6c0412d5ec 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -3523,7 +3523,10 @@ def __init__(
 
         name = _getdefault_name(data, name=name)
         data = as_column(data)
-
+        if data.dtype.kind == "b":
+            raise ValueError(
+                "Boolean data cannot be converted to a DatetimeIndex"
+            )
         if dtype is not None:
             dtype = cudf.dtype(dtype)
             if dtype.kind != "M":
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index f09d15e091b..502e2292e17 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -594,11 +594,11 @@ def replace(
         self,
         to_replace=None,
         value=no_default,
-        inplace=False,
+        inplace: bool = False,
         limit=None,
-        regex=False,
+        regex: bool = False,
         method=no_default,
-    ):
+    ) -> Self | None:
         """Replace values given in ``to_replace`` with ``value``.
 
         Parameters
@@ -1880,7 +1880,7 @@ def interpolate(
         method="linear",
         axis=0,
         limit=None,
-        inplace=False,
+        inplace: bool = False,
         limit_direction=None,
         limit_area=None,
         downcast=None,
@@ -1930,13 +1930,18 @@ def interpolate(
         elif method not in {"linear", "values", "index"}:
             raise ValueError(f"Interpolation method `{method}` not found")
 
+        if not isinstance(inplace, bool):
+            raise ValueError("inplace must be a boolean")
+        elif inplace is True:
+            raise NotImplementedError("inplace is not supported")
+
         data = self
 
         if not isinstance(data.index, cudf.RangeIndex):
             perm_sort = data.index.argsort()
             data = data._gather(
                 GatherMap.from_column_unchecked(
-                    as_column(perm_sort),
+                    as_column(perm_sort),  # type: ignore[arg-type]
                     len(data),
                     nullify=False,
                 )
@@ -1970,7 +1975,7 @@ def interpolate(
             # TODO: This should be a scatter, avoiding an argsort.
             else result._gather(
                 GatherMap.from_column_unchecked(
-                    as_column(perm_sort.argsort()),
+                    as_column(perm_sort.argsort()),  # type: ignore[arg-type]
                     len(result),
                     nullify=False,
                 )
@@ -2983,9 +2988,9 @@ def _positions_from_column_names(
     def drop_duplicates(
         self,
         subset=None,
-        keep="first",
-        nulls_are_equal=True,
-        ignore_index=False,
+        keep: Literal["first", "last", False] = "first",
+        nulls_are_equal: bool = True,
+        ignore_index: bool = False,
     ):
         """
         Drop duplicate rows in frame.
@@ -3212,8 +3217,13 @@ def split_from_pylibcudf(split: list[plc.Column]) -> list[ColumnBase]:
 
     @_performance_tracking
     def bfill(
-        self, value=None, axis=None, inplace=None, limit=None, limit_area=None
-    ):
+        self,
+        value=None,
+        axis=None,
+        inplace: bool = False,
+        limit=None,
+        limit_area=None,
+    ) -> Self | None:
         """
         Synonym for :meth:`Series.fillna` with ``method='bfill'``.
 
@@ -3235,7 +3245,9 @@ def bfill(
             )
 
     @_performance_tracking
-    def backfill(self, value=None, axis=None, inplace=None, limit=None):
+    def backfill(
+        self, value=None, axis=None, inplace: bool = False, limit=None
+    ) -> Self | None:
         """
         Synonym for :meth:`Series.fillna` with ``method='bfill'``.
 
@@ -3259,7 +3271,7 @@ def ffill(
         self,
         value=None,
         axis=None,
-        inplace=None,
+        inplace: bool = False,
         limit=None,
         limit_area: Literal["inside", "outside", None] = None,
     ):
@@ -3284,7 +3296,7 @@ def ffill(
             )
 
     @_performance_tracking
-    def pad(self, value=None, axis=None, inplace=None, limit=None):
+    def pad(self, value=None, axis=None, inplace: bool = False, limit=None):
         """
         Synonym for :meth:`Series.fillna` with ``method='ffill'``.
 
@@ -5073,9 +5085,9 @@ def drop(
         index=None,
         columns=None,
         level=None,
-        inplace=False,
-        errors="raise",
-    ):
+        inplace: bool = False,
+        errors: Literal["ignore", "raise"] = "raise",
+    ) -> Self | None:
         """Drop specified labels from rows or columns.
 
         Remove rows or columns by specifying label names and corresponding
@@ -5256,7 +5268,9 @@ def drop(
                 "'index' or 'columns'"
             )
 
-        if inplace:
+        if not isinstance(inplace, bool):
+            raise ValueError("inplace must be a boolean")
+        elif inplace:
             out = self
         else:
             out = self.copy()
@@ -5275,6 +5289,7 @@ def drop(
 
         if not inplace:
             return out
+        return None
 
     @_performance_tracking
     def _explode(self, explode_column: Any, ignore_index: bool):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 5c7d34da65a..01df1fbffae 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -780,9 +780,9 @@ def drop(
         index=None,
         columns=None,
         level=None,
-        inplace=False,
-        errors="raise",
-    ):
+        inplace: bool = False,
+        errors: Literal["ignore", "raise"] = "raise",
+    ) -> Self | None:
         if axis == 1:
             raise ValueError("No axis named 1 for object type Series")
         # Ignore columns for Series
@@ -1136,7 +1136,7 @@ def __array_function__(self, func, types, args, kwargs):
         return NotImplemented
 
     @_performance_tracking
-    def map(self, arg, na_action=None) -> "Series":
+    def map(self, arg, na_action: None | Literal["ignore"] = None) -> Self:
         """
         Map values of Series according to input correspondence.
 
@@ -1195,6 +1195,10 @@ def map(self, arg, na_action=None) -> "Series":
             Please note map currently only supports fixed-width numeric
             type functions.
         """
+        if not (na_action is None or na_action == "ignore"):
+            raise ValueError("na_action must either be 'ignore' or None")
+        elif na_action == "ignore":
+            raise NotImplementedError(f"{na_action=} is not supported")
         if isinstance(arg, dict):
             if hasattr(arg, "__missing__"):
                 raise NotImplementedError(
@@ -1663,7 +1667,12 @@ def dropna(
         return self._mimic_inplace(result, inplace=inplace)
 
     @_performance_tracking
-    def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
+    def drop_duplicates(
+        self,
+        keep: Literal["first", "last", False] = "first",
+        inplace: bool = False,
+        ignore_index: bool = False,
+    ) -> Self | None:
         """
         Return Series with duplicate values removed.
 
@@ -2262,11 +2271,11 @@ def replace(
         self,
         to_replace=None,
         value=no_default,
-        inplace=False,
+        inplace: bool = False,
         limit=None,
-        regex=False,
+        regex: bool = False,
         method=no_default,
-    ):
+    ) -> Self | None:
         if is_dict_like(to_replace) and value not in {None, no_default}:
             raise ValueError(
                 "Series.replace cannot use dict-like to_replace and non-None "
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index dd0e44b77a2..4777212124d 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -131,9 +131,6 @@ def pytest_unconfigure(config):
     "tests/apply/test_frame_transform.py::test_transform_ufunc[axis='index'-DataFrame]",
     "tests/apply/test_frame_transform.py::test_transform_ufunc[axis=0-DataFrame]",
     "tests/apply/test_frame_transform.py::test_transform_ufunc[axis=1-DataFrame]",
-    "tests/apply/test_invalid_arg.py::test_map_arg_is_dict_with_invalid_na_action_raises[True]",
-    "tests/apply/test_invalid_arg.py::test_map_arg_is_dict_with_invalid_na_action_raises[____]",
-    "tests/apply/test_invalid_arg.py::test_map_with_invalid_na_action_raises",
     "tests/apply/test_series_apply.py::test_apply[False]",
     "tests/apply/test_series_apply.py::test_apply[compat]",
     "tests/apply/test_str.py::test_apply_np_transformer[apply-abs]",
@@ -777,7 +774,6 @@ def pytest_unconfigure(config):
     "tests/arrays/categorical/test_repr.py::TestCategoricalRepr::test_categorical_repr_int_with_nan",
     "tests/arrays/categorical/test_sorting.py::TestCategoricalSort::test_sort_values",
     "tests/arrays/categorical/test_sorting.py::TestCategoricalSort::test_sort_values_na_position",
-    "tests/arrays/datetimes/test_constructors.py::TestDatetimeArrayConstructor::test_bool_dtype_raises",
     "tests/arrays/datetimes/test_constructors.py::TestDatetimeArrayConstructor::test_copy",
     "tests/arrays/datetimes/test_constructors.py::test_from_arrow_from_empty[ms-Europe/Berlin]",
     "tests/arrays/datetimes/test_constructors.py::test_from_arrow_from_empty[ns-Asia/Kolkata]",
@@ -2218,7 +2214,6 @@ def pytest_unconfigure(config):
     "tests/dtypes/cast/test_find_common_type.py::test_numpy_dtypes[source_dtypes30-object]",
     "tests/dtypes/cast/test_find_common_type.py::test_raises_empty_input",
     "tests/dtypes/test_common.py::TestNumpyEADtype::test_categorical_dtype",
-    "tests/dtypes/test_dtypes.py::TestCategoricalDtype::test_constructor_invalid",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtype::test_from_values_or_dtype[None-None-None-None-expected0]",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtype::test_repr_range_categories",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_categorical_dtype_equality_requires_categories",
@@ -2231,7 +2226,6 @@ def pytest_unconfigure(config):
     "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_categorical_equality[True-False]",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_categorical_equality[True-True]",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_from_categorical_dtype_identity",
-    "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_invalid_raises",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_nan_invalid",
     "tests/dtypes/test_dtypes.py::TestCategoricalDtypeParametrized::test_non_unique_invalid",
     "tests/dtypes/test_dtypes.py::TestIntervalDtype::test_basic_dtype",
@@ -5760,22 +5754,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_unary.py::TestDataFrameUnaryOperators::test_pos_object_raises[df0]",
     "tests/frame/test_unary.py::TestDataFrameUnaryOperators::test_pos_raises[df0]",
     "tests/frame/test_unary.py::TestDataFrameUnaryOperators::test_unary_nullable",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[1-drop_duplicates]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[1-dropna]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[1-reset_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[1-set_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[5.0-drop_duplicates]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[5.0-dropna]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[5.0-reset_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[5.0-set_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[True-drop_duplicates]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[True-dropna]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[True-reset_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[True-set_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[inplace2-drop_duplicates]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[inplace2-dropna]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[inplace2-reset_index]",
-    "tests/frame/test_validate.py::TestDataFrameValidate::test_validate_bool_args[inplace2-set_index]",
     "tests/generic/test_duplicate_labels.py::TestPreserves::test_binops[1-False-add]",
     "tests/generic/test_duplicate_labels.py::TestPreserves::test_binops[1-False-sub]",
     "tests/generic/test_duplicate_labels.py::TestPreserves::test_binops[1-True-add]",
@@ -6494,10 +6472,6 @@ def pytest_unconfigure(config):
     "tests/generic/test_finalize.py::test_timedelta_property[microseconds]",
     "tests/generic/test_finalize.py::test_timedelta_property[nanoseconds]",
     "tests/generic/test_finalize.py::test_timedelta_property[seconds]",
-    "tests/generic/test_frame.py::TestDataFrame2::test_validate_bool_args[1]",
-    "tests/generic/test_frame.py::TestDataFrame2::test_validate_bool_args[5.0]",
-    "tests/generic/test_frame.py::TestDataFrame2::test_validate_bool_args[True]",
-    "tests/generic/test_frame.py::TestDataFrame2::test_validate_bool_args[value2]",
     "tests/generic/test_frame.py::TestDataFrame::test_metadata_propagation_indiv",
     "tests/generic/test_frame.py::TestDataFrame::test_set_attribute",
     "tests/generic/test_generic.py::TestGeneric::test_data_deprecated[DataFrame]",
@@ -6599,7 +6573,6 @@ def pytest_unconfigure(config):
     "tests/groupby/aggregate/test_aggregate.py::test_callable_result_dtype_frame[apply-int64-int32-keys1-agg_index1]",
     "tests/groupby/aggregate/test_aggregate.py::test_callable_result_dtype_frame[apply-int64-int64-keys0-agg_index0]",
     "tests/groupby/aggregate/test_aggregate.py::test_callable_result_dtype_frame[apply-int64-int64-keys1-agg_index1]",
-    "tests/groupby/aggregate/test_aggregate.py::test_func_duplicates_raises",
     "tests/groupby/aggregate/test_aggregate.py::test_groupby_aggregate_directory[size]",
     "tests/groupby/aggregate/test_aggregate.py::test_groupby_aggregate_empty_key_empty_return",
     "tests/groupby/aggregate/test_aggregate.py::test_groupby_aggregation_multi_level_column",
@@ -12447,9 +12420,6 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_map.py::test_map_defaultdict",
     "tests/series/methods/test_map.py::test_map_defaultdict_ignore_na",
     "tests/series/methods/test_map.py::test_map_defaultdict_missing_key[None]",
-    "tests/series/methods/test_map.py::test_map_defaultdict_na_key[ignore]",
-    "tests/series/methods/test_map.py::test_map_dict_ignore_na[Series]",
-    "tests/series/methods/test_map.py::test_map_dict_ignore_na[dict]",
     "tests/series/methods/test_map.py::test_map_dict_subclass_with_missing",
     "tests/series/methods/test_map.py::test_map_dict_with_tuple_keys",
     "tests/series/methods/test_map.py::test_map_empty[categorical]",
@@ -13099,18 +13069,6 @@ def pytest_unconfigure(config):
     "tests/series/test_unary.py::TestSeriesUnaryOps::test_all_numeric_unary_operators[UInt64-source1-neg_target1-abs_target1]",
     "tests/series/test_unary.py::TestSeriesUnaryOps::test_all_numeric_unary_operators[UInt8-source0-neg_target0-abs_target0]",
     "tests/series/test_unary.py::TestSeriesUnaryOps::test_all_numeric_unary_operators[UInt8-source1-neg_target1-abs_target1]",
-    "tests/series/test_validate.py::test_validate_bool_args[1-drop_duplicates]",
-    "tests/series/test_validate.py::test_validate_bool_args[1-dropna]",
-    "tests/series/test_validate.py::test_validate_bool_args[1-sort_index]",
-    "tests/series/test_validate.py::test_validate_bool_args[5.0-drop_duplicates]",
-    "tests/series/test_validate.py::test_validate_bool_args[5.0-dropna]",
-    "tests/series/test_validate.py::test_validate_bool_args[5.0-sort_index]",
-    "tests/series/test_validate.py::test_validate_bool_args[True-drop_duplicates]",
-    "tests/series/test_validate.py::test_validate_bool_args[True-dropna]",
-    "tests/series/test_validate.py::test_validate_bool_args[True-sort_index]",
-    "tests/series/test_validate.py::test_validate_bool_args[inplace2-drop_duplicates]",
-    "tests/series/test_validate.py::test_validate_bool_args[inplace2-dropna]",
-    "tests/series/test_validate.py::test_validate_bool_args[inplace2-sort_index]",
     "tests/strings/test_api.py::test_api_for_categorical[count-object]",
     "tests/strings/test_api.py::test_api_for_categorical[count-string=object]",
     "tests/strings/test_api.py::test_api_for_categorical[count-string=str[pyarrow]]",
diff --git a/python/cudf/cudf/tests/general_functions/test_api_types.py b/python/cudf/cudf/tests/general_functions/test_api_types.py
index cdfd3f9ca34..71120313902 100644
--- a/python/cudf/cudf/tests/general_functions/test_api_types.py
+++ b/python/cudf/cudf/tests/general_functions/test_api_types.py
@@ -85,7 +85,7 @@
         (cudf.Decimal32Dtype, False),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), True),
+        (cudf.CategoricalDtype(["a"]), True),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -191,7 +191,7 @@ def test_is_categorical_dtype(obj, expect):
         (cudf.Decimal32Dtype, True),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), True),
@@ -293,7 +293,7 @@ def test_is_numeric_dtype(obj, expect):
         (cudf.Decimal32Dtype, False),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -395,7 +395,7 @@ def test_is_integer_dtype(obj, expect):
         (cudf.Decimal32Dtype, False),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -512,7 +512,7 @@ def test_is_integer(obj, expect):
         (cudf.Decimal32Dtype, False),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -614,7 +614,7 @@ def test_is_string_dtype(obj, expect):
         (cudf.Decimal32Dtype, False),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -716,7 +716,7 @@ def test_is_datetime_dtype(obj, expect):
         (cudf.Decimal32Dtype, False),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), True),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -818,7 +818,7 @@ def test_is_list_dtype(obj, expect):
         (cudf.Decimal32Dtype, False),
         # (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), True),
         (cudf.Decimal128Dtype(5, 2), False),
@@ -923,7 +923,7 @@ def test_is_struct_dtype(obj, expect):
         (cudf.Decimal32Dtype, True),
         (cudf.IntervalDtype, False),
         # cuDF dtype instances.
-        (cudf.CategoricalDtype("a"), False),
+        (cudf.CategoricalDtype(["a"]), False),
         (cudf.ListDtype(int), False),
         (cudf.StructDtype({"a": int}), False),
         (cudf.Decimal128Dtype(5, 2), True),

From 9168b48482e1c7161c49cad50ee28a283a9f1651 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 10 Sep 2025 15:16:58 -0700
Subject: [PATCH 298/366] Move test_binops.py to new cuDF classic directory
 structure (#19914)

Towards https://github.com/rapidsai/cudf/issues/9999
Towards https://github.com/rapidsai/cudf/issues/15723

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19914
---
 .../cudf/cudf/tests/dataframe/test_binops.py  |  245 ++
 python/cudf/cudf/tests/series/test_binops.py  | 2185 +++++++++++++-
 python/cudf/cudf/tests/test_binops.py         | 2514 -----------------
 3 files changed, 2429 insertions(+), 2515 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/test_binops.py

diff --git a/python/cudf/cudf/tests/dataframe/test_binops.py b/python/cudf/cudf/tests/dataframe/test_binops.py
index ff5c28bf8d2..445041ee860 100644
--- a/python/cudf/cudf/tests/dataframe/test_binops.py
+++ b/python/cudf/cudf/tests/dataframe/test_binops.py
@@ -453,3 +453,248 @@ def test_df_sr_binop_col_order(op):
     got = out[expect.columns]
 
     assert_eq(expect, got)
+
+
+def test_different_shapes_and_columns(request, arithmetic_op):
+    if arithmetic_op is operator.pow:
+        msg = "TODO: Support `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`"
+        request.applymarker(pytest.mark.xfail(reason=msg))
+
+    # Empty frame on the right side
+    pd_frame = arithmetic_op(pd.DataFrame({"x": [1, 2]}), pd.DataFrame({}))
+    cd_frame = arithmetic_op(cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({}))
+    assert_eq(cd_frame, pd_frame)
+
+    # Empty frame on the left side
+    pd_frame = pd.DataFrame({}) + pd.DataFrame({"x": [1, 2]})
+    cd_frame = cudf.DataFrame({}) + cudf.DataFrame({"x": [1, 2]})
+    assert_eq(cd_frame, pd_frame)
+
+    # Note: the below rely on a discrepancy between cudf and pandas
+    # While pandas inserts columns in alphabetical order, cudf inserts in the
+    # order of whichever column comes first. So the following code will not
+    # work if the names of columns are reversed i.e. ('y', 'x') != ('x', 'y')
+
+    # More rows on the left side
+    pd_frame = pd.DataFrame({"x": [1, 2, 3]}) + pd.DataFrame({"y": [1, 2]})
+    cd_frame = cudf.DataFrame({"x": [1, 2, 3]}) + cudf.DataFrame({"y": [1, 2]})
+    assert_eq(cd_frame, pd_frame)
+
+    # More rows on the right side
+    pd_frame = pd.DataFrame({"x": [1, 2]}) + pd.DataFrame({"y": [1, 2, 3]})
+    cd_frame = cudf.DataFrame({"x": [1, 2]}) + cudf.DataFrame({"y": [1, 2, 3]})
+    assert_eq(cd_frame, pd_frame)
+
+
+def test_different_shapes_and_same_columns(arithmetic_op):
+    pd_frame = arithmetic_op(
+        pd.DataFrame({"x": [1, 2]}), pd.DataFrame({"x": [1, 2, 3]})
+    )
+    cd_frame = arithmetic_op(
+        cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({"x": [1, 2, 3]})
+    )
+    # cast x as float64 so it matches pandas dtype
+    cd_frame["x"] = cd_frame["x"].astype(np.float64)
+    assert_eq(cd_frame, pd_frame)
+
+
+def test_different_shapes_and_columns_with_unaligned_indices(
+    request, arithmetic_op
+):
+    if arithmetic_op is operator.pow:
+        msg = "TODO: Support `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`"
+        request.applymarker(pytest.mark.xfail(reason=msg))
+
+    # Test with a RangeIndex
+    pdf1 = pd.DataFrame({"x": [4, 3, 2, 1], "y": [7, 3, 8, 6]})
+    # Test with an Index
+    pdf2 = pd.DataFrame(
+        {"x": [1, 2, 3, 7], "y": [4, 5, 6, 7]}, index=[0, 1, 3, 4]
+    )
+    # Test with an Index in a different order
+    pdf3 = pd.DataFrame(
+        {"x": [4, 5, 6, 7], "y": [1, 2, 3, 7], "z": [0, 5, 3, 7]},
+        index=[0, 3, 5, 3],
+    )
+    gdf1 = cudf.DataFrame.from_pandas(pdf1)
+    gdf2 = cudf.DataFrame.from_pandas(pdf2)
+    gdf3 = cudf.DataFrame.from_pandas(pdf3)
+
+    pd_frame = arithmetic_op(arithmetic_op(pdf1, pdf2), pdf3)
+    cd_frame = arithmetic_op(arithmetic_op(gdf1, gdf2), gdf3)
+    # cast x and y as float64 so it matches pandas dtype
+    cd_frame["x"] = cd_frame["x"].astype(np.float64)
+    cd_frame["y"] = cd_frame["y"].astype(np.float64)
+
+    # Sort both frames by index and then by all columns to ensure consistent ordering
+    pd_sorted = pd_frame.sort_index().sort_values(list(pd_frame.columns))
+    cd_sorted = cd_frame.sort_index().sort_values(list(cd_frame.columns))
+    assert_eq(cd_sorted, pd_sorted)
+
+    pdf1 = pd.DataFrame({"x": [1, 1]}, index=["a", "a"])
+    pdf2 = pd.DataFrame({"x": [2]}, index=["a"])
+    gdf1 = cudf.DataFrame.from_pandas(pdf1)
+    gdf2 = cudf.DataFrame.from_pandas(pdf2)
+    pd_frame = arithmetic_op(pdf1, pdf2)
+    cd_frame = arithmetic_op(gdf1, gdf2)
+
+    # Sort both frames consistently for comparison
+    pd_sorted = pd_frame.sort_index().sort_values(list(pd_frame.columns))
+    cd_sorted = cd_frame.sort_index().sort_values(list(cd_frame.columns))
+    assert_eq(pd_sorted, cd_sorted)
+
+
+@pytest.mark.parametrize(
+    "pdf2",
+    [
+        pd.DataFrame({"a": [3, 2, 1]}, index=[3, 2, 1]),
+        pd.DataFrame([3, 2]),
+    ],
+)
+def test_df_different_index_shape(pdf2, comparison_op):
+    df1 = cudf.DataFrame([1, 2, 3], index=[1, 2, 3])
+
+    pdf1 = df1.to_pandas()
+    df2 = cudf.DataFrame.from_pandas(pdf2)
+
+    assert_exceptions_equal(
+        lfunc=comparison_op,
+        rfunc=comparison_op,
+        lfunc_args_and_kwargs=([pdf1, pdf2],),
+        rfunc_args_and_kwargs=([df1, df2],),
+    )
+
+
+@pytest.mark.parametrize("nulls", ["none", "some"])
+@pytest.mark.parametrize("fill_value", [None, 27])
+@pytest.mark.parametrize("other", ["df", "scalar"])
+def test_operator_func_dataframe(
+    arithmetic_op_method, nulls, fill_value, other
+):
+    num_rows = 100
+    num_cols = 3
+
+    def gen_df():
+        rng = np.random.default_rng(seed=0)
+        data = rng.random((num_rows, num_cols)) * 10000
+        if nulls == "some":
+            data.ravel()[
+                rng.choice(
+                    num_rows * num_cols,
+                    size=int(num_rows * num_cols / 2),
+                    replace=False,
+                )
+            ] = np.nan
+        return pd.DataFrame(data, columns=["A", "B", "C"])
+
+    pdf1 = gen_df()
+    pdf2 = gen_df() if other == "df" else 59.0
+    gdf1 = cudf.DataFrame.from_pandas(pdf1)
+    gdf2 = cudf.DataFrame.from_pandas(pdf2) if other == "df" else 59.0
+
+    got = getattr(gdf1, arithmetic_op_method)(gdf2, fill_value=fill_value)
+    expect = getattr(pdf1, arithmetic_op_method)(pdf2, fill_value=fill_value)[
+        list(got._data)
+    ]
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("nulls", ["none", "some"])
+@pytest.mark.parametrize("other", ["df", "scalar"])
+def test_logical_operator_func_dataframe(comparison_op_method, nulls, other):
+    num_rows = 100
+    num_cols = 3
+
+    def gen_df():
+        rng = np.random.default_rng(seed=0)
+        data = rng.random((num_rows, num_cols)) * 10000
+        if nulls == "some":
+            data.ravel()[
+                rng.choice(
+                    num_rows * num_cols,
+                    size=int(num_rows * num_cols / 2),
+                    replace=False,
+                )
+            ] = np.nan
+        return pd.DataFrame(data, columns=["A", "B", "C"])
+
+    pdf1 = gen_df()
+    pdf2 = gen_df() if other == "df" else 59.0
+    gdf1 = cudf.DataFrame.from_pandas(pdf1, nan_as_null=False)
+    gdf2 = (
+        cudf.DataFrame.from_pandas(pdf2, nan_as_null=False)
+        if other == "df"
+        else 59.0
+    )
+
+    got = getattr(gdf1, comparison_op_method)(gdf2)
+    expect = getattr(pdf1, comparison_op_method)(pdf2)[list(got._data)]
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("data", [None, [-9, 7], [12, 18]])
+@pytest.mark.parametrize("scalar", [1, 3, 12, np.nan])
+def test_empty_column(binary_op, data, scalar):
+    gdf = cudf.DataFrame(columns=["a", "b"])
+    if data is not None:
+        gdf["a"] = data
+
+    pdf = gdf.to_pandas()
+
+    got = binary_op(gdf, scalar)
+    expected = binary_op(pdf, scalar)
+
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        lambda: cudf.DataFrame(
+            [[1, 2, 3, 4], [5, 6, 7, 8], [10, 11, 12, 13], [14, 15, 16, 17]]
+        ),
+        lambda: cudf.DataFrame(
+            [
+                [1.2, 2.3, 3.4, 4.5],
+                [5.6, 6.7, 7.8, 8.9],
+                [7.43, 4.2, 23.2, 23.2],
+                [9.1, 2.4, 4.5, 65.34],
+            ]
+        ),
+        lambda: cudf.Series([14, 15, 16, 17]),
+        lambda: cudf.Series([14.15, 15.16, 16.17, 17.18]),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        lambda: cudf.DataFrame([[9, 10], [11, 12], [13, 14], [15, 16]]),
+        lambda: cudf.DataFrame(
+            [[9.4, 10.5], [11.6, 12.7], [13.8, 14.9], [15.1, 16.2]]
+        ),
+        lambda: cudf.Series([5, 6, 7, 8]),
+        lambda: cudf.Series([5.6, 6.7, 7.8, 8.9]),
+        lambda: np.array([5, 6, 7, 8]),
+        lambda: [25.5, 26.6, 27.7, 28.8],
+    ],
+)
+def test_binops_dot(df, other):
+    df = df()
+    other = other()
+    pdf = df.to_pandas()
+    host_other = other.to_pandas() if hasattr(other, "to_pandas") else other
+
+    expected = pdf @ host_other
+    got = df @ other
+
+    assert_eq(expected, got)
+
+
+def test_binop_dot_preserve_index():
+    ser = cudf.Series(range(2), index=["A", "B"])
+    df = cudf.DataFrame(np.eye(2), columns=["A", "B"], index=["A", "B"])
+    result = ser @ df
+    expected = ser.to_pandas() @ df.to_pandas()
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/series/test_binops.py b/python/cudf/cudf/tests/series/test_binops.py
index 222b9373dc9..f87777a669d 100644
--- a/python/cudf/cudf/tests/series/test_binops.py
+++ b/python/cudf/cudf/tests/series/test_binops.py
@@ -2,6 +2,8 @@
 import datetime
 import decimal
 import operator
+import re
+from concurrent.futures import ThreadPoolExecutor
 
 import cupy as cp
 import numpy as np
@@ -9,8 +11,17 @@
 import pytest
 
 import cudf
+from cudf.core._compat import (
+    PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_GE_220,
+    PANDAS_VERSION,
+)
 from cudf.testing import assert_eq
-from cudf.testing._utils import assert_exceptions_equal
+from cudf.testing._utils import (
+    _decimal_series,
+    assert_exceptions_equal,
+    gen_rand_series,
+)
 
 
 @pytest.mark.parametrize(
@@ -882,3 +893,2175 @@ def test_cat_series_binop_error():
         lfunc_args_and_kwargs=([pd_ser_b, pd_ser_a],),
         rfunc_args_and_kwargs=([gdf_ser_b, gdf_ser_a],),
     )
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+def test_series_binop(arithmetic_op, obj_class):
+    nelem = 1000
+    rng = np.random.default_rng(seed=0)
+    arr1 = rng.random(nelem) * 10000
+    # Keeping a low value because CUDA 'pow' has 2 full range error
+    arr2 = rng.random(nelem) * 10
+
+    sr1 = cudf.Series(arr1)
+    sr2 = cudf.Series(arr2)
+    psr1 = sr1.to_pandas()
+    psr2 = sr2.to_pandas()
+
+    if obj_class == "Index":
+        sr1 = cudf.Index(sr1)
+        sr2 = cudf.Index(sr2)
+
+    expect = arithmetic_op(psr1, psr2)
+    result = arithmetic_op(sr1, sr2)
+
+    if obj_class == "Index":
+        result = cudf.Series(result)
+
+    assert_eq(result, expect)
+
+
+def test_series_binop_concurrent(arithmetic_op):
+    def func(index):
+        rng = np.random.default_rng(seed=0)
+        arr = rng.random(100) * 10
+        sr = cudf.Series(arr)
+
+        result = arithmetic_op(sr.astype("int32"), sr)
+        expect = arithmetic_op(arr.astype("int32"), arr)
+
+        np.testing.assert_almost_equal(result.to_numpy(), expect, decimal=5)
+
+    indices = range(10)
+    with ThreadPoolExecutor(4) as e:  # four processes
+        list(e.map(func, indices))
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+def test_series_binop_scalar(arithmetic_op, obj_class):
+    nelem = 10
+    rng = np.random.default_rng(seed=0)
+    arr = rng.random(nelem)
+    rhs = rng.choice(arr).item()
+
+    sr = cudf.Series(arr)
+    if obj_class == "Index":
+        sr = cudf.Index(sr)
+
+    result = arithmetic_op(sr, rhs)
+
+    if obj_class == "Index":
+        result = cudf.Series(result)
+
+    np.testing.assert_almost_equal(result.to_numpy(), arithmetic_op(arr, rhs))
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+@pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])
+def test_series_bitwise_binop(
+    binop, obj_class, integer_types_as_str, integer_types_as_str2
+):
+    rng = np.random.default_rng(seed=0)
+    arr1 = (rng.random(100) * 100).astype(integer_types_as_str)
+    sr1 = cudf.Series(arr1)
+
+    arr2 = (rng.random(100) * 100).astype(integer_types_as_str2)
+    sr2 = cudf.Series(arr2)
+
+    if obj_class == "Index":
+        sr1 = cudf.Index(sr1)
+        sr2 = cudf.Index(sr2)
+
+    result = binop(sr1, sr2)
+
+    if obj_class == "Index":
+        result = cudf.Series(result)
+
+    np.testing.assert_almost_equal(result.to_numpy(), binop(arr1, arr2))
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+def test_series_compare(
+    comparison_op, obj_class, numeric_and_temporal_types_as_str
+):
+    rng = np.random.default_rng(seed=0)
+    arr1 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
+    arr2 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
+    sr1 = cudf.Series(arr1)
+    sr2 = cudf.Series(arr2)
+
+    if obj_class == "Index":
+        sr1 = cudf.Index(sr1)
+        sr2 = cudf.Index(sr2)
+
+    result1 = comparison_op(sr1, sr1)
+    result2 = comparison_op(sr2, sr2)
+    result3 = comparison_op(sr1, sr2)
+
+    if obj_class == "Index":
+        result1 = cudf.Series(result1)
+        result2 = cudf.Series(result2)
+        result3 = cudf.Series(result3)
+
+    np.testing.assert_equal(result1.to_numpy(), comparison_op(arr1, arr1))
+    np.testing.assert_equal(result2.to_numpy(), comparison_op(arr2, arr2))
+    np.testing.assert_equal(result3.to_numpy(), comparison_op(arr1, arr2))
+
+
+@pytest.mark.parametrize(
+    "dtype,val",
+    [("int8", 200), ("int32", 2**32), ("uint8", -128), ("uint64", -1)],
+)
+@pytest.mark.parametrize("reverse", [False, True])
+def test_series_compare_integer(dtype, val, comparison_op, reverse):
+    # Tests that these actually work, even though they are out of bound.
+    force_cast_val = np.array(val).astype(dtype)
+    sr = cudf.Series(
+        [np.iinfo(dtype).min, np.iinfo(dtype).max, force_cast_val, None],
+        dtype=dtype,
+    )
+    # We expect the same result as comparing to a value within range (e.g. 0)
+    # except that a NULL value evaluates to False
+    exp = False
+    if reverse:
+        if comparison_op(val, 0):
+            exp = True
+        res = comparison_op(val, sr)
+    else:
+        if comparison_op(0, val):
+            exp = True
+        res = comparison_op(sr, val)
+
+    expected = cudf.Series([exp, exp, exp, None])
+    assert_eq(res, expected)
+
+
+@pytest.mark.parametrize(
+    "ltype, rtype",
+    [
+        ("datetime64[ms]", "datetime64[ms]"),
+        ("datetime64[ns]", "datetime64[ms]"),
+        ("timedelta64[s]", "timedelta64[s]"),
+        ("timedelta64[s]", "timedelta64[ms]"),
+        ("float32", "float64"),
+        ("int32", "float64"),
+        ("str", "str"),
+    ],
+)
+def test_series_compare_nulls(comparison_op, ltype, rtype):
+    ldata = [1, 2, None, None, 5]
+    rdata = [2, 1, None, 4, None]
+
+    lser = cudf.Series(ldata, dtype=ltype)
+    rser = cudf.Series(rdata, dtype=rtype)
+
+    lmask = ~lser.isnull()
+    rmask = ~rser.isnull()
+
+    expect_mask = np.logical_and(lmask, rmask)
+    expect = cudf.Series([None] * 5, dtype="bool")
+    expect[expect_mask] = comparison_op(lser[expect_mask], rser[expect_mask])
+
+    got = comparison_op(lser, rser)
+    assert_eq(expect, got)
+
+
+def test_str_series_compare_str(comparison_op):
+    str_series_cmp_data = pd.Series(
+        ["a", "b", None, "d", "e", None], dtype="string"
+    )
+    expect = comparison_op(str_series_cmp_data, "a")
+    got = comparison_op(cudf.Series.from_pandas(str_series_cmp_data), "a")
+
+    assert_eq(expect, got.to_pandas(nullable=True))
+
+
+def test_str_series_compare_str_reflected(comparison_op):
+    str_series_cmp_data = pd.Series(
+        ["a", "b", None, "d", "e", None], dtype="string"
+    )
+    expect = comparison_op("a", str_series_cmp_data)
+    got = comparison_op("a", cudf.Series.from_pandas(str_series_cmp_data))
+
+    assert_eq(expect, got.to_pandas(nullable=True))
+
+
+@pytest.mark.parametrize("cmp_scalar", [1, 1.5, True])
+def test_str_series_compare_num(comparison_op, cmp_scalar):
+    if comparison_op not in {operator.eq, operator.ne}:
+        pytest.skip("Only eq and ne are relevant for this test")
+    str_series_cmp_data = pd.Series(
+        ["a", "b", None, "d", "e", None], dtype="string"
+    )
+    expect = comparison_op(str_series_cmp_data, cmp_scalar)
+    got = comparison_op(
+        cudf.Series.from_pandas(str_series_cmp_data), cmp_scalar
+    )
+
+    assert_eq(expect, got.to_pandas(nullable=True))
+
+
+@pytest.mark.parametrize("cmp_scalar", [1, 1.5, True])
+def test_str_series_compare_num_reflected(comparison_op, cmp_scalar):
+    if comparison_op not in {operator.eq, operator.ne}:
+        pytest.skip("Only eq and ne are relevant for this test")
+    str_series_cmp_data = pd.Series(
+        ["a", "b", None, "d", "e", None], dtype="string"
+    )
+    expect = comparison_op(cmp_scalar, str_series_cmp_data)
+    got = comparison_op(
+        cmp_scalar, cudf.Series.from_pandas(str_series_cmp_data)
+    )
+
+    assert_eq(expect, got.to_pandas(nullable=True))
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+def test_series_compare_scalar(
+    request, comparison_op, obj_class, numeric_and_temporal_types_as_str
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            numeric_and_temporal_types_as_str
+            in {"datetime64[ns]", "timedelta64[ns]"},
+            reason=f"Fails with {numeric_and_temporal_types_as_str}",
+        )
+    )
+
+    rng = np.random.default_rng(seed=0)
+    arr1 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
+    sr1 = cudf.Series(arr1)
+    rhs = rng.choice(arr1).item()
+
+    if obj_class == "Index":
+        sr1 = cudf.Index(sr1)
+
+    result1 = comparison_op(sr1, rhs)
+    result2 = comparison_op(rhs, sr1)
+
+    if obj_class == "Index":
+        result1 = cudf.Series(result1)
+        result2 = cudf.Series(result2)
+
+    np.testing.assert_equal(result1.to_numpy(), comparison_op(arr1, rhs))
+    np.testing.assert_equal(result2.to_numpy(), comparison_op(rhs, arr1))
+
+
+@pytest.mark.parametrize("lhs_nulls", ["none", "some"])
+@pytest.mark.parametrize("rhs_nulls", ["none", "some"])
+def test_validity_add(lhs_nulls, rhs_nulls):
+    nelem = 10
+    rng = np.random.default_rng(seed=0)
+    # LHS
+    lhs_data = rng.random(nelem)
+    if lhs_nulls == "some":
+        lhs_mask = rng.choice([True, False], size=nelem)
+        lhs = cudf.Series(lhs_data)
+        lhs.loc[lhs_mask] = None
+    else:
+        lhs = cudf.Series(lhs_data)
+    # RHS
+    rhs_data = rng.random(nelem)
+    if rhs_nulls == "some":
+        rhs_mask = rng.choice([True, False], size=nelem)
+        rhs = cudf.Series(rhs_data)
+        rhs.loc[rhs_mask] = None
+    else:
+        rhs = cudf.Series(rhs_data)
+    # Result
+    res = lhs + rhs
+    if lhs_nulls == "some" and rhs_nulls == "some":
+        res_mask = lhs_mask | rhs_mask
+    if lhs_nulls == "some" and rhs_nulls == "none":
+        res_mask = lhs_mask
+    if lhs_nulls == "none" and rhs_nulls == "some":
+        res_mask = rhs_mask
+    # Fill NA values
+    na_value = -10000
+    got = res.fillna(na_value).to_numpy()
+    expect = lhs_data + rhs_data
+    if lhs_nulls == "some" or rhs_nulls == "some":
+        expect[res_mask] = na_value
+
+    np.testing.assert_array_equal(expect, got)
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+@pytest.mark.parametrize("binop", [operator.add, operator.mul])
+def test_series_binop_mixed_dtype(
+    binop, numeric_types_as_str, numeric_types_as_str2, obj_class
+):
+    nelem = 10
+    rng = np.random.default_rng(seed=0)
+    lhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str)
+    rhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str2)
+
+    sr1 = cudf.Series(lhs)
+    sr2 = cudf.Series(rhs)
+
+    if obj_class == "Index":
+        sr1 = cudf.Index(sr1)
+        sr2 = cudf.Index(sr2)
+
+    result = binop(cudf.Series(sr1), cudf.Series(sr2))
+
+    if obj_class == "Index":
+        result = cudf.Series(result)
+
+    np.testing.assert_almost_equal(result.to_numpy(), binop(lhs, rhs))
+
+
+@pytest.mark.parametrize("obj_class", ["Series", "Index"])
+def test_series_cmpop_mixed_dtype(
+    comparison_op, numeric_types_as_str, numeric_types_as_str2, obj_class
+):
+    nelem = 5
+    rng = np.random.default_rng(seed=0)
+    lhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str)
+    rhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str2)
+
+    sr1 = cudf.Series(lhs)
+    sr2 = cudf.Series(rhs)
+
+    if obj_class == "Index":
+        sr1 = cudf.Index(sr1)
+        sr2 = cudf.Index(sr2)
+
+    result = comparison_op(cudf.Series(sr1), cudf.Series(sr2))
+
+    if obj_class == "Index":
+        result = cudf.Series(result)
+
+    np.testing.assert_array_equal(result.to_numpy(), comparison_op(lhs, rhs))
+
+
+@pytest.mark.filterwarnings(
+    "ignore:invalid value encountered in power:RuntimeWarning"
+)
+@pytest.mark.filterwarnings(
+    "ignore:divide by zero encountered in power:RuntimeWarning"
+)
+@pytest.mark.parametrize("obj_class", [cudf.Series, cudf.Index])
+@pytest.mark.parametrize("scalar", [-1, 0, 1])
+def test_series_reflected_ops_scalar(
+    arithmetic_op, scalar, numeric_types_as_str, obj_class
+):
+    # create random series
+    func = lambda x: arithmetic_op(scalar, x)  # noqa: E731
+    rng = np.random.default_rng(0)
+    random_series = pd.Series(
+        rng.integers(10, 100, 100).astype(numeric_types_as_str)
+    )
+
+    gs = obj_class(random_series)
+
+    try:
+        gs_result = func(gs)
+    except OverflowError:
+        # An error is fine, if pandas raises the same error:
+        with pytest.raises(OverflowError):
+            func(random_series)
+
+        return
+
+    # class typing
+    if obj_class == "Index":
+        gs = cudf.Series(gs)
+
+    # pandas
+    ps_result = func(random_series)
+
+    # verify
+    np.testing.assert_allclose(ps_result, gs_result.to_numpy())
+
+
+def test_boolean_scalar_binop(comparison_op):
+    rng = np.random.default_rng(seed=0)
+    psr = pd.Series(rng.choice([True, False], 10))
+    gsr = cudf.from_pandas(psr)
+    assert_eq(comparison_op(psr, True), comparison_op(gsr, True))
+    assert_eq(comparison_op(psr, False), comparison_op(gsr, False))
+
+
+@pytest.mark.parametrize("has_nulls", [True, False])
+@pytest.mark.parametrize("fill_value", [None, 27])
+def test_operator_func_between_series(
+    float_types_as_str, arithmetic_op_method, has_nulls, fill_value
+):
+    count = 1000
+    gdf_series_a = gen_rand_series(
+        float_types_as_str, count, has_nulls=has_nulls, stride=10000
+    )
+    gdf_series_b = gen_rand_series(
+        float_types_as_str, count, has_nulls=has_nulls, stride=100
+    )
+    pdf_series_a = gdf_series_a.to_pandas()
+    pdf_series_b = gdf_series_b.to_pandas()
+
+    gdf_result = getattr(gdf_series_a, arithmetic_op_method)(
+        gdf_series_b, fill_value=fill_value
+    )
+    pdf_result = getattr(pdf_series_a, arithmetic_op_method)(
+        pdf_series_b, fill_value=fill_value
+    )
+
+    assert_eq(pdf_result, gdf_result)
+
+
+@pytest.mark.parametrize("has_nulls", [True, False])
+@pytest.mark.parametrize("fill_value", [None, 27])
+def test_operator_func_series_and_scalar(
+    float_types_as_str, arithmetic_op_method, has_nulls, fill_value
+):
+    count = 1000
+    scalar = 59
+    gdf_series = gen_rand_series(
+        float_types_as_str, count, has_nulls=has_nulls, stride=10000
+    )
+    pdf_series = gdf_series.to_pandas()
+
+    gdf_series_result = getattr(gdf_series, arithmetic_op_method)(
+        scalar,
+        fill_value=fill_value,
+    )
+    pdf_series_result = getattr(pdf_series, arithmetic_op_method)(
+        scalar,
+        fill_value=fill_value,
+    )
+
+    assert_eq(pdf_series_result, gdf_series_result)
+
+
+@pytest.mark.parametrize("fill_value", [0, 1, None, np.nan])
+@pytest.mark.parametrize("scalar_a", [0, 1, None, np.nan])
+@pytest.mark.parametrize("scalar_b", [0, 1, None, np.nan])
+def test_operator_func_between_series_logical(
+    float_types_as_str, comparison_op_method, scalar_a, scalar_b, fill_value
+):
+    gdf_series_a = cudf.Series([scalar_a], nan_as_null=False).astype(
+        float_types_as_str
+    )
+    gdf_series_b = cudf.Series([scalar_b], nan_as_null=False).astype(
+        float_types_as_str
+    )
+
+    pdf_series_a = gdf_series_a.to_pandas(nullable=True)
+    pdf_series_b = gdf_series_b.to_pandas(nullable=True)
+
+    gdf_series_result = getattr(gdf_series_a, comparison_op_method)(
+        gdf_series_b, fill_value=fill_value
+    )
+    pdf_series_result = getattr(pdf_series_a, comparison_op_method)(
+        pdf_series_b, fill_value=fill_value
+    )
+    expect = pdf_series_result
+    got = gdf_series_result.to_pandas(nullable=True)
+
+    # If fill_value is np.nan, things break down a bit,
+    # because setting a NaN into a pandas nullable float
+    # array still gets transformed to <NA>. As such,
+    # pd_series_with_nulls.fillna(np.nan) has no effect.
+    if (
+        (pdf_series_a.isnull().sum() != pdf_series_b.isnull().sum())
+        and np.isscalar(fill_value)
+        and np.isnan(fill_value)
+    ):
+        with pytest.raises(AssertionError):
+            assert_eq(expect, got)
+        return
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("has_nulls", [True, False])
+@pytest.mark.parametrize("scalar", [-59.0, np.nan, 0, 59.0])
+@pytest.mark.parametrize("fill_value", [None, 1.0])
+def test_operator_func_series_and_scalar_logical(
+    request,
+    float_types_as_str,
+    comparison_op_method,
+    has_nulls,
+    scalar,
+    fill_value,
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
+            and fill_value == 1.0
+            and scalar is np.nan
+            and (
+                has_nulls
+                or (not has_nulls and comparison_op_method not in {"eq", "ne"})
+            ),
+            reason="https://github.com/pandas-dev/pandas/issues/57447",
+        )
+    )
+    if has_nulls:
+        gdf_series = cudf.Series(
+            [-1.0, 0, cudf.NA, 1.1], dtype=float_types_as_str
+        )
+    else:
+        gdf_series = cudf.Series(
+            [-1.0, 0, 10.5, 1.1], dtype=float_types_as_str
+        )
+    pdf_series = gdf_series.to_pandas(nullable=True)
+    gdf_series_result = getattr(gdf_series, comparison_op_method)(
+        scalar,
+        fill_value=fill_value,
+    )
+    pdf_series_result = getattr(pdf_series, comparison_op_method)(
+        scalar, fill_value=fill_value
+    )
+
+    expect = pdf_series_result
+    got = gdf_series_result.to_pandas(nullable=True)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("rhs", [0, 10])
+def test_binop_bool_uint(request, binary_op_method, rhs):
+    if binary_op_method in {"rmod", "rfloordiv"}:
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/12162"
+            ),
+        )
+    psr = pd.Series([True, False, False])
+    gsr = cudf.from_pandas(psr)
+    assert_eq(
+        getattr(psr, binary_op_method)(rhs),
+        getattr(gsr, binary_op_method)(rhs),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("scalar_divisor", [False, True])
+def test_floordiv_zero_float64(
+    integer_types_as_str, integer_types_as_str2, scalar_divisor
+):
+    sr = pd.Series([1, 2, 3], dtype=integer_types_as_str)
+    cr = cudf.from_pandas(sr)
+
+    if scalar_divisor:
+        pd_div = getattr(np, integer_types_as_str2)(0)
+        cudf_div = pd_div
+    else:
+        pd_div = pd.Series([0], dtype=integer_types_as_str2)
+        cudf_div = cudf.from_pandas(pd_div)
+    assert_eq(sr // pd_div, cr // cudf_div)
+
+
+@pytest.mark.parametrize("scalar_divisor", [False, True])
+@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12162")
+def test_floordiv_zero_bool(scalar_divisor):
+    sr = pd.Series([True, True, False], dtype=np.bool_)
+    cr = cudf.from_pandas(sr)
+
+    if scalar_divisor:
+        pd_div = np.bool_(0)
+        cudf_div = pd_div
+    else:
+        pd_div = pd.Series([0], dtype=np.bool_)
+        cudf_div = cudf.from_pandas(pd_div)
+
+    with pytest.raises((NotImplementedError, ZeroDivisionError)):
+        sr // pd_div
+    with pytest.raises((NotImplementedError, ZeroDivisionError)):
+        cr // cudf_div
+
+
+def test_rmod_zero_nan(numeric_and_bool_types_as_str, request):
+    request.applymarker(
+        pytest.mark.xfail(
+            numeric_and_bool_types_as_str == "bool",
+            reason="pandas returns int8, cuDF returns int64",
+        )
+    )
+    sr = pd.Series([1, 1, 0], dtype=numeric_and_bool_types_as_str)
+    cr = cudf.from_pandas(sr)
+    assert_eq(1 % sr, 1 % cr)
+    expected_dtype = (
+        np.float64 if cr.dtype.kind != "f" else numeric_and_bool_types_as_str
+    )
+    assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype))
+
+
+def test_series_misc_binop():
+    pds = pd.Series([1, 2, 4], name="abc xyz")
+    gds = cudf.Series([1, 2, 4], name="abc xyz")
+
+    assert_eq(pds + 1, gds + 1)
+    assert_eq(1 + pds, 1 + gds)
+
+    assert_eq(pds + pds, gds + gds)
+
+    pds1 = pd.Series([1, 2, 4], name="hello world")
+    gds1 = cudf.Series([1, 2, 4], name="hello world")
+
+    assert_eq(pds + pds1, gds + gds1)
+    assert_eq(pds1 + pds, gds1 + gds)
+
+    assert_eq(pds1 + pds + 5, gds1 + gds + 5)
+
+
+def test_int8_float16_binop():
+    a = cudf.Series([1], dtype="int8")
+    b = np.float16(2)
+    expect = cudf.Series([0.5])
+    got = a / b
+    assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize("dtype", ["int64", "float64", "str"])
+def test_vector_to_none_binops(dtype):
+    data = cudf.Series([1, 2, 3, None], dtype=dtype)
+
+    expect = cudf.Series([None] * 4).astype(dtype)
+    got = data + None
+
+    assert_eq(expect, got)
+
+
+def is_timezone_aware_dtype(dtype: str) -> bool:
+    return bool(re.match(r"^datetime64\[ns, .+\]$", dtype))
+
+
+@pytest.mark.parametrize("n_periods", [0, 1, -12])
+@pytest.mark.parametrize(
+    "frequency",
+    [
+        "months",
+        "years",
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "microseconds",
+        "nanoseconds",
+    ],
+)
+@pytest.mark.parametrize(
+    "dtype, components",
+    [
+        ["datetime64[ns]", "00.012345678"],
+        ["datetime64[us]", "00.012345"],
+        ["datetime64[ms]", "00.012"],
+        ["datetime64[s]", "00"],
+        ["datetime64[ns, Asia/Kathmandu]", "00.012345678"],
+    ],
+)
+@pytest.mark.parametrize("op", [operator.add, operator.sub])
+def test_datetime_dateoffset_binaryop(
+    request, n_periods, frequency, dtype, components, op
+):
+    request.applymarker(
+        pytest.mark.xfail(
+            PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
+            and dtype in {"datetime64[ms]", "datetime64[s]"}
+            and frequency == "microseconds"
+            and n_periods == 0,
+            reason="https://github.com/pandas-dev/pandas/issues/57448",
+        )
+    )
+    if (
+        not PANDAS_GE_220
+        and dtype in {"datetime64[ms]", "datetime64[s]"}
+        and frequency in ("microseconds", "nanoseconds")
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+    if (
+        not PANDAS_GE_220
+        and dtype == "datetime64[us]"
+        and frequency == "nanoseconds"
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+
+    date_col = [
+        f"2000-01-01 00:00:{components}",
+        f"2000-01-31 00:00:{components}",
+        f"2000-02-29 00:00:{components}",
+    ]
+    if is_timezone_aware_dtype(dtype):
+        # Construct naive datetime64[ns] Series
+        gsr = cudf.Series(date_col, dtype="datetime64[ns]")
+        psr = gsr.to_pandas()
+
+        # Convert to timezone-aware (both cudf and pandas)
+        gsr = gsr.dt.tz_localize("UTC").dt.tz_convert("Asia/Kathmandu")
+        psr = psr.dt.tz_localize("UTC").dt.tz_convert("Asia/Kathmandu")
+    else:
+        gsr = cudf.Series(date_col, dtype=dtype)
+        psr = gsr.to_pandas()
+
+    kwargs = {frequency: n_periods}
+
+    goffset = cudf.DateOffset(**kwargs)
+    poffset = pd.DateOffset(**kwargs)
+
+    expect = op(psr, poffset)
+    got = op(gsr, goffset)
+
+    if is_timezone_aware_dtype(dtype):
+        assert isinstance(expect.dtype, pd.DatetimeTZDtype)
+        assert str(expect.dtype.tz) == str(got.dtype.tz)
+        expect = expect.dt.tz_convert("UTC")
+        got = got.dt.tz_convert("UTC")
+
+    assert_eq(expect, got)
+
+    expect = op(psr, -poffset)
+    got = op(gsr, -goffset)
+
+    if is_timezone_aware_dtype(dtype):
+        assert isinstance(expect.dtype, pd.DatetimeTZDtype)
+        assert str(expect.dtype.tz) == str(got.dtype.tz)
+        expect = expect.dt.tz_convert("UTC")
+        got = got.dt.tz_convert("UTC")
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"months": 2, "years": 5},
+        {"microseconds": 1, "seconds": 1},
+        {"months": 2, "years": 5, "seconds": 923, "microseconds": 481},
+        {"milliseconds": 4},
+        {"milliseconds": 4, "years": 2},
+        {"nanoseconds": 12},
+    ],
+)
+@pytest.mark.filterwarnings(
+    "ignore:Non-vectorized DateOffset:pandas.errors.PerformanceWarning"
+)
+@pytest.mark.filterwarnings(
+    "ignore:Discarding nonzero nanoseconds:UserWarning"
+)
+@pytest.mark.parametrize("op", [operator.add, operator.sub])
+@pytest.mark.skipif(
+    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    reason="Fails in older versions of pandas",
+)
+def test_datetime_dateoffset_binaryop_multiple(kwargs, op):
+    gsr = cudf.Series(
+        [
+            "2000-01-01 00:00:00.012345678",
+            "2000-01-31 00:00:00.012345678",
+            "2000-02-29 00:00:00.012345678",
+        ],
+        dtype="datetime64[ns]",
+    )
+    psr = gsr.to_pandas()
+
+    poffset = pd.DateOffset(**kwargs)
+    goffset = cudf.DateOffset(**kwargs)
+
+    expect = op(psr, poffset)
+    got = op(gsr, goffset)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("n_periods", [0, 1, -12])
+@pytest.mark.parametrize(
+    "frequency",
+    [
+        "months",
+        "years",
+        "days",
+        "hours",
+        "minutes",
+        "seconds",
+        "microseconds",
+        "nanoseconds",
+    ],
+)
+@pytest.mark.parametrize(
+    "dtype, components",
+    [
+        ["datetime64[ns]", "00.012345678"],
+        ["datetime64[us]", "00.012345"],
+        ["datetime64[ms]", "00.012"],
+        ["datetime64[s]", "00"],
+    ],
+)
+def test_datetime_dateoffset_binaryop_reflected(
+    n_periods, frequency, dtype, components
+):
+    if (
+        not PANDAS_GE_220
+        and dtype in {"datetime64[ms]", "datetime64[s]"}
+        and frequency in ("microseconds", "nanoseconds")
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+    if (
+        not PANDAS_GE_220
+        and dtype == "datetime64[us]"
+        and frequency == "nanoseconds"
+        and n_periods != 0
+    ):
+        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
+
+    date_col = [
+        f"2000-01-01 00:00:{components}",
+        f"2000-01-31 00:00:{components}",
+        f"2000-02-29 00:00:{components}",
+    ]
+    gsr = cudf.Series(date_col, dtype=dtype)
+    psr = gsr.to_pandas()  # converts to nanos
+
+    kwargs = {frequency: n_periods}
+
+    goffset = cudf.DateOffset(**kwargs)
+    poffset = pd.DateOffset(**kwargs)
+
+    expect = poffset + psr
+    got = goffset + gsr
+
+    # TODO: Remove check_dtype once we get some clarity on:
+    # https://github.com/pandas-dev/pandas/issues/57448
+    assert_eq(expect, got, check_dtype=False)
+
+    with pytest.raises(TypeError):
+        poffset - psr
+
+    with pytest.raises(TypeError):
+        goffset - gsr
+
+
+@pytest.mark.parametrize("frame", [cudf.Series, cudf.Index, cudf.DataFrame])
+@pytest.mark.parametrize(
+    "dtype", ["int", "str", "datetime64[s]", "timedelta64[s]", "category"]
+)
+def test_binops_with_lhs_numpy_scalar(frame, dtype):
+    data = [1, 2, 3, 4, 5]
+
+    data = (
+        frame({"a": data}, dtype=dtype)
+        if isinstance(frame, cudf.DataFrame)
+        else frame(data, dtype=dtype)
+    )
+
+    if dtype == "datetime64[s]":
+        val = cudf.dtype(dtype).type(4, "s")
+    elif dtype == "timedelta64[s]":
+        val = cudf.dtype(dtype).type(4, "s")
+    elif dtype == "category":
+        val = np.int64(4)
+    elif dtype == "str":
+        val = str(4)
+    else:
+        val = cudf.dtype(dtype).type(4)
+
+    # Compare equality with series on left side to dispatch to the pandas/cudf
+    # __eq__ operator and avoid a DeprecationWarning from numpy.
+    expected = data.to_pandas() == val
+    got = data == val
+
+    assert_eq(expected, got)
+
+
+def test_binops_with_NA_consistent(
+    numeric_and_temporal_types_as_str, comparison_op_method
+):
+    data = [1, 2, 3]
+    sr = cudf.Series(data, dtype=numeric_and_temporal_types_as_str)
+
+    result = getattr(sr, comparison_op_method)(cudf.NA)
+    if sr.dtype.kind in "mM":
+        assert result.null_count == len(data)
+    else:
+        if comparison_op_method == "ne":
+            expect_all = True
+        else:
+            expect_all = False
+        assert (result == expect_all).all()
+
+
+@pytest.mark.parametrize(
+    "op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype",
+    [
+        (
+            operator.add,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["3.0", "4.0"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+        ),
+        (
+            operator.add,
+            2,
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["3.5", "4.0"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+        ),
+        (
+            operator.add,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["2.25", "1.005"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["3.75", "3.005"],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=17),
+            ["0.1", "0.2"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["100.1", "200.2"],
+            cudf.Decimal128Dtype(scale=3, precision=23),
+        ),
+        (
+            operator.sub,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=1, precision=2),
+            ["2.25", "1.005"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["-0.75", "0.995"],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.sub,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=1, precision=2),
+            ["2.25", "1.005"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["-0.75", "0.995"],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=10),
+            ["0.1", "0.2"],
+            cudf.Decimal64Dtype(scale=6, precision=10),
+            ["99.9", "199.8"],
+            cudf.Decimal128Dtype(scale=6, precision=19),
+        ),
+        (
+            operator.sub,
+            2,
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["2.25", "1.005"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["-0.25", "0.995"],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.mul,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", "3.0"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["2.25", "6.0"],
+            cudf.Decimal64Dtype(scale=5, precision=8),
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["0.1", "0.2"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["10.0", "40.0"],
+            cudf.Decimal64Dtype(scale=1, precision=8),
+        ),
+        (
+            operator.mul,
+            ["1000", "2000"],
+            cudf.Decimal64Dtype(scale=-3, precision=4),
+            ["0.343", "0.500"],
+            cudf.Decimal64Dtype(scale=3, precision=3),
+            ["343.0", "1000.0"],
+            cudf.Decimal64Dtype(scale=0, precision=8),
+        ),
+        (
+            operator.mul,
+            200,
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["0.343", "0.500"],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["68.60", "100.0"],
+            cudf.Decimal64Dtype(scale=3, precision=10),
+        ),
+        (
+            operator.truediv,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+            ["1.5", "3.0"],
+            cudf.Decimal64Dtype(scale=1, precision=4),
+            ["1.0", "0.6"],
+            cudf.Decimal64Dtype(scale=7, precision=10),
+        ),
+        (
+            operator.truediv,
+            ["110", "200"],
+            cudf.Decimal64Dtype(scale=-1, precision=3),
+            ["0.1", "0.2"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+            ["1000.0", "1000.0"],
+            cudf.Decimal64Dtype(scale=6, precision=12),
+        ),
+        (
+            operator.truediv,
+            ["132.86", "15.25"],
+            cudf.Decimal64Dtype(scale=4, precision=14),
+            ["2.34", "8.50"],
+            cudf.Decimal64Dtype(scale=2, precision=8),
+            ["56.77", "1.79"],
+            cudf.Decimal128Dtype(scale=13, precision=25),
+        ),
+        (
+            operator.truediv,
+            ["20", "20"],
+            cudf.Decimal128Dtype(scale=2, precision=6),
+            ["20", "20"],
+            cudf.Decimal128Dtype(scale=2, precision=6),
+            ["1.0", "1.0"],
+            cudf.Decimal128Dtype(scale=9, precision=15),
+        ),
+        (
+            operator.add,
+            ["1.5", None, "2.0"],
+            cudf.Decimal64Dtype(scale=1, precision=2),
+            ["1.5", None, "2.0"],
+            cudf.Decimal64Dtype(scale=1, precision=2),
+            ["3.0", None, "4.0"],
+            cudf.Decimal64Dtype(scale=1, precision=3),
+        ),
+        (
+            operator.add,
+            ["1.5", None],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["2.25", "1.005"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["3.75", None],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.sub,
+            ["1.5", None],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["2.25", None],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["-0.75", None],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.sub,
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["2.25", None],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["-0.75", None],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
+        (
+            operator.mul,
+            ["1.5", None],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", None],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["2.25", None],
+            cudf.Decimal64Dtype(scale=5, precision=8),
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=10),
+            ["0.1", None],
+            cudf.Decimal64Dtype(scale=3, precision=12),
+            ["10.0", None],
+            cudf.Decimal128Dtype(scale=1, precision=23),
+        ),
+        (
+            operator.eq,
+            ["0.18", "0.42"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.18", "0.21"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            [True, False],
+            bool,
+        ),
+        (
+            operator.eq,
+            ["0.18", "0.42"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.1800", "0.2100"],
+            cudf.Decimal64Dtype(scale=4, precision=5),
+            [True, False],
+            bool,
+        ),
+        (
+            operator.eq,
+            ["100", None],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-1, precision=4),
+            [True, None],
+            bool,
+        ),
+        (
+            operator.ne,
+            ["0.06", "0.42"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.18", "0.42"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            [True, False],
+            bool,
+        ),
+        (
+            operator.ne,
+            ["1.33", "1.21"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.1899", "1.21"],
+            cudf.Decimal64Dtype(scale=4, precision=5),
+            [True, False],
+            bool,
+        ),
+        (
+            operator.ne,
+            ["300", None],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["110", "5500"],
+            cudf.Decimal64Dtype(scale=-1, precision=4),
+            [True, None],
+            bool,
+        ),
+        (
+            operator.lt,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.10", "0.87", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            [False, True, False],
+            bool,
+        ),
+        (
+            operator.lt,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.1000", "0.8700", "1.0000"],
+            cudf.Decimal64Dtype(scale=4, precision=5),
+            [False, True, False],
+            bool,
+        ),
+        (
+            operator.lt,
+            ["200", None, "100"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["100", "200", "100"],
+            cudf.Decimal64Dtype(scale=-1, precision=4),
+            [False, None, False],
+            bool,
+        ),
+        (
+            operator.gt,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.10", "0.87", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            [True, False, False],
+            bool,
+        ),
+        (
+            operator.gt,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.1000", "0.8700", "1.0000"],
+            cudf.Decimal64Dtype(scale=4, precision=5),
+            [True, False, False],
+            bool,
+        ),
+        (
+            operator.gt,
+            ["300", None, "100"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["100", "200", "100"],
+            cudf.Decimal64Dtype(scale=-1, precision=4),
+            [True, None, False],
+            bool,
+        ),
+        (
+            operator.le,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.10", "0.87", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            [False, True, True],
+            bool,
+        ),
+        (
+            operator.le,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.1000", "0.8700", "1.0000"],
+            cudf.Decimal64Dtype(scale=4, precision=5),
+            [False, True, True],
+            bool,
+        ),
+        (
+            operator.le,
+            ["300", None, "100"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["100", "200", "100"],
+            cudf.Decimal64Dtype(scale=-1, precision=4),
+            [False, None, True],
+            bool,
+        ),
+        (
+            operator.ge,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.10", "0.87", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            [True, False, True],
+            bool,
+        ),
+        (
+            operator.ge,
+            ["0.18", "0.42", "1.00"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["0.1000", "0.8700", "1.0000"],
+            cudf.Decimal64Dtype(scale=4, precision=5),
+            [True, False, True],
+            bool,
+        ),
+        (
+            operator.ge,
+            ["300", None, "100"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            ["100", "200", "100"],
+            cudf.Decimal64Dtype(scale=-1, precision=4),
+            [True, None, True],
+            bool,
+        ),
+    ],
+)
+def test_binops_decimal(op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype):
+    if isinstance(lhs, (int, float)):
+        a = lhs
+    else:
+        a = _decimal_series(lhs, l_dtype)
+    b = _decimal_series(rhs, r_dtype)
+    expect = (
+        _decimal_series(expect, expect_dtype)
+        if isinstance(
+            expect_dtype,
+            (cudf.Decimal64Dtype, cudf.Decimal32Dtype, cudf.Decimal128Dtype),
+        )
+        else cudf.Series(expect, dtype=expect_dtype)
+    )
+
+    got = op(a, b)
+    assert expect.dtype == got.dtype
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "op,lhs,l_dtype,rhs,r_dtype,expect,expect_dtype",
+    [
+        (
+            "radd",
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["3.0", "4.0"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+        ),
+        (
+            "rsub",
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=10),
+            ["0.1", "0.2"],
+            cudf.Decimal64Dtype(scale=6, precision=10),
+            ["-99.9", "-199.8"],
+            cudf.Decimal128Dtype(scale=6, precision=19),
+        ),
+        (
+            "rmul",
+            ["1000", "2000"],
+            cudf.Decimal64Dtype(scale=-3, precision=4),
+            ["0.343", "0.500"],
+            cudf.Decimal64Dtype(scale=3, precision=3),
+            ["343.0", "1000.0"],
+            cudf.Decimal64Dtype(scale=0, precision=8),
+        ),
+        (
+            "rtruediv",
+            ["1.5", "0.5"],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["1.0", "4.0"],
+            cudf.Decimal64Dtype(scale=10, precision=16),
+        ),
+    ],
+)
+def test_binops_reflect_decimal(
+    op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype
+):
+    a = _decimal_series(lhs, l_dtype)
+    b = _decimal_series(rhs, r_dtype)
+    expect = _decimal_series(expect, expect_dtype)
+
+    got = getattr(a, op)(b)
+    assert expect.dtype == got.dtype
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("powers", [0, 1, 2])
+def test_binops_decimal_pow(powers):
+    s = cudf.Series(
+        [
+            decimal.Decimal("1.324324"),
+            None,
+            decimal.Decimal("2"),
+            decimal.Decimal("3"),
+            decimal.Decimal("5"),
+        ]
+    )
+    ps = s.to_pandas()
+
+    assert_eq(s**powers, ps**powers, check_dtype=False)
+
+
+def test_binops_raise_error():
+    s = cudf.Series([decimal.Decimal("1.324324")])
+
+    with pytest.raises(TypeError):
+        s // 1
+
+
+@pytest.mark.parametrize(
+    "op, ldata, ldtype, rdata, expected1, expected2",
+    [
+        (
+            operator.eq,
+            ["100", "41", None],
+            cudf.Decimal64Dtype(scale=0, precision=5),
+            [100, 42, 12],
+            [True, False, None],
+            [True, False, None],
+        ),
+        (
+            operator.eq,
+            ["100.000", "42.001", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            [100, 42, 12],
+            [True, False, None],
+            [True, False, None],
+        ),
+        (
+            operator.eq,
+            ["100", "40", None],
+            cudf.Decimal64Dtype(scale=-1, precision=3),
+            [100, 42, 12],
+            [True, False, None],
+            [True, False, None],
+        ),
+        (
+            operator.ne,
+            ["100", "42", "24", None],
+            cudf.Decimal64Dtype(scale=0, precision=3),
+            [100, 40, 24, 12],
+            [False, True, False, None],
+            [False, True, False, None],
+        ),
+        (
+            operator.ne,
+            ["10.1", "88", "11", None],
+            cudf.Decimal64Dtype(scale=1, precision=3),
+            [10, 42, 11, 12],
+            [True, True, False, None],
+            [True, True, False, None],
+        ),
+        (
+            operator.ne,
+            ["100.000", "42", "23.999", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            [100, 42, 24, 12],
+            [False, False, True, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.lt,
+            ["100", "40", "28", None],
+            cudf.Decimal64Dtype(scale=0, precision=3),
+            [100, 42, 24, 12],
+            [False, True, False, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.lt,
+            ["100.000", "42.002", "23.999", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            [100, 42, 24, 12],
+            [False, False, True, None],
+            [False, True, False, None],
+        ),
+        (
+            operator.lt,
+            ["100", "40", "10", None],
+            cudf.Decimal64Dtype(scale=-1, precision=3),
+            [100, 42, 8, 12],
+            [False, True, False, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.gt,
+            ["100", "42", "20", None],
+            cudf.Decimal64Dtype(scale=0, precision=3),
+            [100, 40, 24, 12],
+            [False, True, False, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.gt,
+            ["100.000", "42.002", "23.999", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            [100, 42, 24, 12],
+            [False, True, False, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.gt,
+            ["100", "40", "10", None],
+            cudf.Decimal64Dtype(scale=-1, precision=3),
+            [100, 42, 8, 12],
+            [False, False, True, None],
+            [False, True, False, None],
+        ),
+        (
+            operator.le,
+            ["100", "40", "28", None],
+            cudf.Decimal64Dtype(scale=0, precision=3),
+            [100, 42, 24, 12],
+            [True, True, False, None],
+            [True, False, True, None],
+        ),
+        (
+            operator.le,
+            ["100.000", "42.002", "23.999", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            [100, 42, 24, 12],
+            [True, False, True, None],
+            [True, True, False, None],
+        ),
+        (
+            operator.le,
+            ["100", "40", "10", None],
+            cudf.Decimal64Dtype(scale=-1, precision=3),
+            [100, 42, 8, 12],
+            [True, True, False, None],
+            [True, False, True, None],
+        ),
+        (
+            operator.ge,
+            ["100", "42", "20", None],
+            cudf.Decimal64Dtype(scale=0, precision=3),
+            [100, 40, 24, 12],
+            [True, True, False, None],
+            [True, False, True, None],
+        ),
+        (
+            operator.ge,
+            ["100.000", "42.002", "23.999", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            [100, 42, 24, 12],
+            [True, True, False, None],
+            [True, False, True, None],
+        ),
+        (
+            operator.ge,
+            ["100", "40", "10", None],
+            cudf.Decimal64Dtype(scale=-1, precision=3),
+            [100, 42, 8, 12],
+            [True, False, True, None],
+            [True, True, False, None],
+        ),
+    ],
+)
+@pytest.mark.parametrize("reflected", [True, False])
+def test_binops_decimal_comp_mixed_integer(
+    op,
+    ldata,
+    ldtype,
+    rdata,
+    expected1,
+    expected2,
+    integer_types_as_str,
+    reflected,
+):
+    """
+    Tested compare operations:
+        eq, lt, gt, le, ge
+    Each operation has 3 decimal data setups, with scale from {==0, >0, <0}.
+    Decimal precisions are sufficient to hold the digits.
+    For each decimal data setup, there is at least one row that lead to one
+    of the following compare results: {True, False, None}.
+    """
+    if not reflected:
+        expected = cudf.Series(expected1, dtype=bool)
+    else:
+        expected = cudf.Series(expected2, dtype=bool)
+
+    lhs = _decimal_series(ldata, ldtype)
+    rhs = cudf.Series(rdata, dtype=integer_types_as_str)
+
+    if reflected:
+        rhs, lhs = lhs, rhs
+
+    actual = op(lhs, rhs)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "op, lhs, l_dtype, rhs, expect, expect_dtype, reflect",
+    [
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal(1),
+            ["101", "201"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            False,
+        ),
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            1,
+            ["101", "201"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            False,
+        ),
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("1.5"),
+            ["101.5", "201.5"],
+            cudf.Decimal64Dtype(scale=1, precision=7),
+            False,
+        ),
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal(1),
+            ["101", "201"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            True,
+        ),
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            1,
+            ["101", "201"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            True,
+        ),
+        (
+            operator.add,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("1.5"),
+            ["101.5", "201.5"],
+            cudf.Decimal64Dtype(scale=1, precision=7),
+            True,
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            1,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=5),
+            False,
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal(2),
+            ["200", "400"],
+            cudf.Decimal64Dtype(scale=-2, precision=5),
+            False,
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("1.5"),
+            ["150", "300"],
+            cudf.Decimal64Dtype(scale=-1, precision=6),
+            False,
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            1,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=5),
+            True,
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal(2),
+            ["200", "400"],
+            cudf.Decimal64Dtype(scale=-2, precision=5),
+            True,
+        ),
+        (
+            operator.mul,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("1.5"),
+            ["150", "300"],
+            cudf.Decimal64Dtype(scale=-1, precision=6),
+            True,
+        ),
+        (
+            operator.truediv,
+            ["1000", "2000"],
+            cudf.Decimal64Dtype(scale=-2, precision=4),
+            1,
+            ["1000", "2000"],
+            cudf.Decimal64Dtype(scale=6, precision=12),
+            False,
+        ),
+        (
+            operator.truediv,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            decimal.Decimal(2),
+            ["50", "100"],
+            cudf.Decimal64Dtype(scale=6, precision=9),
+            False,
+        ),
+        (
+            operator.truediv,
+            ["35.23", "54.91"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+            decimal.Decimal("1.5"),
+            ["23.4", "36.6"],
+            cudf.Decimal64Dtype(scale=6, precision=9),
+            False,
+        ),
+        (
+            operator.truediv,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            1,
+            ["0", "0"],
+            cudf.Decimal64Dtype(scale=6, precision=9),
+            True,
+        ),
+        (
+            operator.truediv,
+            ["1.2", "0.5"],
+            cudf.Decimal64Dtype(scale=1, precision=6),
+            decimal.Decimal(20),
+            ["10", "40"],
+            cudf.Decimal64Dtype(scale=7, precision=10),
+            True,
+        ),
+        (
+            operator.truediv,
+            ["1.22", "5.24"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            decimal.Decimal("8.55"),
+            ["7", "1"],
+            cudf.Decimal64Dtype(scale=6, precision=9),
+            True,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal(2),
+            ["98", "198"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            False,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("2.5"),
+            ["97.5", "197.5"],
+            cudf.Decimal64Dtype(scale=1, precision=7),
+            False,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            4,
+            ["96", "196"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            False,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal(2),
+            ["-98", "-198"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            True,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            4,
+            ["-96", "-196"],
+            cudf.Decimal64Dtype(scale=0, precision=6),
+            True,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("2.5"),
+            ["-97.5", "-197.5"],
+            cudf.Decimal64Dtype(scale=1, precision=7),
+            True,
+        ),
+        (
+            operator.sub,
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=3),
+            decimal.Decimal("2.5"),
+            ["-97.5", "-197.5"],
+            cudf.Decimal64Dtype(scale=1, precision=7),
+            True,
+        ),
+    ],
+)
+def test_binops_decimal_scalar(
+    op, lhs, l_dtype, rhs, expect, expect_dtype, reflect
+):
+    lhs = cudf.Series(
+        [x if x is None else decimal.Decimal(x) for x in lhs],
+        dtype=l_dtype,
+    )
+    expect = cudf.Series(
+        [x if x is None else decimal.Decimal(x) for x in expect],
+        dtype=expect_dtype,
+    )
+
+    if reflect:
+        lhs, rhs = rhs, lhs
+
+    got = op(lhs, rhs)
+    assert expect.dtype == got.dtype
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "op, ldata, ldtype, rdata, expected1, expected2",
+    [
+        (
+            operator.eq,
+            ["100.00", "41", None],
+            cudf.Decimal64Dtype(scale=0, precision=5),
+            100,
+            [True, False, None],
+            [True, False, None],
+        ),
+        (
+            operator.eq,
+            ["100.123", "41", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            decimal.Decimal("100.123"),
+            [True, False, None],
+            [True, False, None],
+        ),
+        (
+            operator.ne,
+            ["100.00", "41", None],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            100,
+            [False, True, None],
+            [False, True, None],
+        ),
+        (
+            operator.ne,
+            ["100.123", "120.21", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            decimal.Decimal("100.123"),
+            [False, True, None],
+            [False, True, None],
+        ),
+        (
+            operator.gt,
+            ["100.00", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            100,
+            [False, False, True, None],
+            [False, True, False, None],
+        ),
+        (
+            operator.gt,
+            ["100.123", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            decimal.Decimal("100.123"),
+            [False, False, True, None],
+            [False, True, False, None],
+        ),
+        (
+            operator.ge,
+            ["100.00", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            100,
+            [True, False, True, None],
+            [True, True, False, None],
+        ),
+        (
+            operator.ge,
+            ["100.123", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            decimal.Decimal("100.123"),
+            [True, False, True, None],
+            [True, True, False, None],
+        ),
+        (
+            operator.lt,
+            ["100.00", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            100,
+            [False, True, False, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.lt,
+            ["100.123", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            decimal.Decimal("100.123"),
+            [False, True, False, None],
+            [False, False, True, None],
+        ),
+        (
+            operator.le,
+            ["100.00", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=2, precision=5),
+            100,
+            [True, True, False, None],
+            [True, False, True, None],
+        ),
+        (
+            operator.le,
+            ["100.123", "41", "120.21", None],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            decimal.Decimal("100.123"),
+            [True, True, False, None],
+            [True, False, True, None],
+        ),
+    ],
+)
+@pytest.mark.parametrize("reflected", [True, False])
+def test_binops_decimal_scalar_compare(
+    op, ldata, ldtype, rdata, expected1, expected2, reflected
+):
+    """
+    Tested compare operations:
+        eq, lt, gt, le, ge
+    Each operation has 3 data setups: pyints, Decimal, and
+    For each data setup, there is at least one row that lead to one of the
+    following compare results: {True, False, None}.
+    """
+    if not reflected:
+        expected = cudf.Series(expected1, dtype=bool)
+    else:
+        expected = cudf.Series(expected2, dtype=bool)
+
+    lhs = _decimal_series(ldata, ldtype)
+    rhs = rdata
+
+    if reflected:
+        rhs, lhs = lhs, rhs
+
+    actual = op(lhs, rhs)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("null_scalar", [None, cudf.NA, np.datetime64("NaT")])
+def test_column_null_scalar_comparison(
+    request, all_supported_types_as_str, null_scalar, comparison_op
+):
+    # This test is meant to validate that comparing
+    # a series of any dtype with a null scalar produces
+    # a new series where all the elements are <NA>.
+    request.applymarker(
+        pytest.mark.xfail(
+            all_supported_types_as_str == "category",
+            raises=ValueError,
+            reason="Value ... not found in column",
+        )
+    )
+    dtype = cudf.dtype(all_supported_types_as_str)
+
+    if isinstance(null_scalar, np.datetime64):
+        if dtype.kind not in "mM":
+            pytest.skip(f"{null_scalar} not applicable for {dtype}")
+        null_scalar = null_scalar.astype(dtype)
+
+    data = [1, 2, 3, 4, 5]
+    sr = cudf.Series(data, dtype=dtype)
+    result = comparison_op(sr, null_scalar)
+
+    assert result.isnull().all()
+
+
+def test_equality_ops_index_mismatch(comparison_op_method):
+    a = cudf.Series(
+        [1, 2, 3, None, None, 4], index=["a", "b", "c", "d", "e", "f"]
+    )
+    b = cudf.Series(
+        [-5, 4, 3, 2, 1, 0, 19, 11],
+        index=["aa", "b", "c", "d", "e", "f", "y", "z"],
+    )
+
+    pa = a.to_pandas(nullable=True)
+    pb = b.to_pandas(nullable=True)
+    expected = getattr(pa, comparison_op_method)(pb)
+    actual = getattr(a, comparison_op_method)(b).to_pandas(nullable=True)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("null_case", ["neither", "left", "right", "both"])
+def test_null_equals_columnops(all_supported_types_as_str, null_case):
+    # Generate tuples of:
+    # (left_data, right_data, compare_bool
+    # where compare_bool is the correct answer to
+    # if the columns should compare as null equals
+
+    def set_null_cases(column_l, column_r, case):
+        if case == "neither":
+            return column_l, column_r
+        elif case == "left":
+            column_l[1] = None
+        elif case == "right":
+            column_r[1] = None
+        elif case == "both":
+            column_l[1] = None
+            column_r[1] = None
+        else:
+            raise ValueError("Unknown null case")
+        return column_l, column_r
+
+    data = [1, 2, 3]
+
+    left = cudf.Series(data, dtype=all_supported_types_as_str)
+    right = cudf.Series(data, dtype=all_supported_types_as_str)
+    if null_case in {"left", "right"}:
+        answer = False
+    else:
+        answer = True
+    left, right = set_null_cases(left, right, null_case)
+    assert left._column.equals(right._column) is answer
+
+
+def test_add_series_to_dataframe():
+    """Verify that missing columns result in NaNs, not NULLs."""
+    assert cp.all(
+        cp.isnan(
+            (
+                cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+                + cudf.Series([1, 2, 3], index=["a", "b", "c"])
+            )["c"]
+        )
+    )
+
+
+@pytest.mark.parametrize("obj_class", [cudf.Series, cudf.Index])
+def test_binops_cupy_array(obj_class, arithmetic_op):
+    # Skip 0 to not deal with NaNs from division.
+    data = range(1, 100)
+    lhs = obj_class(data)
+    rhs = cp.array(data)
+    assert (arithmetic_op(lhs, rhs) == arithmetic_op(lhs, lhs)).all()
+
+
+def test_binop_series_with_repeated_index():
+    # GH: #11094
+    psr1 = pd.Series([1, 1], index=["a", "a"])
+    psr2 = pd.Series([1], index=["a"])
+    gsr1 = cudf.from_pandas(psr1)
+    gsr2 = cudf.from_pandas(psr2)
+    expected = psr1 - psr2
+    got = gsr1 - gsr2
+    assert_eq(expected, got)
+
+
+def test_binop_integer_power_series_series():
+    # GH: #10178
+    gs_base = cudf.Series([3, -3, 8, -8])
+    gs_exponent = cudf.Series([1, 1, 7, 7])
+    ps_base = gs_base.to_pandas()
+    ps_exponent = gs_exponent.to_pandas()
+    expected = ps_base**ps_exponent
+    got = gs_base**gs_exponent
+    assert_eq(expected, got)
+
+
+def test_binop_integer_power_series_int():
+    # GH: #10178
+    gs_base = cudf.Series([3, -3, 8, -8])
+    exponent = 1
+    ps_base = gs_base.to_pandas()
+    expected = ps_base**exponent
+    got = gs_base**exponent
+    assert_eq(expected, got)
+
+
+def test_binop_integer_power_int_series():
+    # GH: #10178
+    base = 3
+    gs_exponent = cudf.Series([1, 1, 7, 7])
+    ps_exponent = gs_exponent.to_pandas()
+    expected = base**ps_exponent
+    got = base**gs_exponent
+    assert_eq(expected, got)
+
+
+def test_binop_index_series(arithmetic_op):
+    gi = cudf.Index([10, 11, 12])
+    gs = cudf.Series([1, 2, 3])
+
+    actual = arithmetic_op(gi, gs)
+    expected = arithmetic_op(gi.to_pandas(), gs.to_pandas())
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+@pytest.mark.parametrize("name1", [None, "name1"])
+@pytest.mark.parametrize("name2", [None, "name2"])
+def test_binop_index_dt_td_series_with_names(name1, name2):
+    gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]", name=name1)
+    gs = cudf.Series([10, 11, 12], dtype="timedelta64[ns]", name=name2)
+    expected = gi.to_pandas() + gs.to_pandas()
+    actual = gi + gs
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("data1", [[1, 2, 3], [10, 11, None]])
+@pytest.mark.parametrize("data2", [[1, 2, 3], [10, 11, None]])
+def test_binop_eq_ne_index_series(data1, data2):
+    gi = cudf.Index(data1, dtype="datetime64[ns]", name=np.nan)
+    gs = cudf.Series(data2, dtype="timedelta64[ns]", name="abc")
+
+    actual = gi == gs
+    expected = gi.to_pandas() == gs.to_pandas()
+
+    assert_eq(expected, actual)
+
+    actual = gi != gs
+    expected = gi.to_pandas() != gs.to_pandas()
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("scalar", [np.datetime64, np.timedelta64])
+def test_binop_lhs_numpy_datetimelike_scalar(scalar):
+    slr1 = scalar(1, "ms")
+    slr2 = scalar(1, "ns")
+    result = slr1 < cudf.Series([slr2])
+    expected = slr1 < pd.Series([slr2])
+    assert_eq(result, expected)
+
+    result = slr2 < cudf.Series([slr1])
+    expected = slr2 < pd.Series([slr1])
+    assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("ordered", [True, False])
+@pytest.mark.parametrize(
+    "data_left, data_right",
+    [
+        [[1, 2], [1, 2]],
+        [[1, 2], [1, 3]],
+    ],
+)
+def test_cat_non_cat_compare_ops(
+    comparison_op, data_left, data_right, ordered
+):
+    pd_non_cat = pd.Series(data_left)
+    pd_cat = pd.Series(
+        data_right,
+        dtype=pd.CategoricalDtype(categories=data_right, ordered=ordered),
+    )
+
+    cudf_non_cat = cudf.Series.from_pandas(pd_non_cat)
+    cudf_cat = cudf.Series.from_pandas(pd_cat)
+
+    if (
+        not ordered and comparison_op not in {operator.eq, operator.ne}
+    ) or comparison_op in {
+        operator.gt,
+        operator.lt,
+        operator.le,
+        operator.ge,
+    }:
+        with pytest.raises(TypeError):
+            comparison_op(pd_non_cat, pd_cat)
+        with pytest.raises(TypeError):
+            comparison_op(cudf_non_cat, cudf_cat)
+    else:
+        expected = comparison_op(pd_non_cat, pd_cat)
+        result = comparison_op(cudf_non_cat, cudf_cat)
+        assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "left_data, right_data",
+    [[["a", "b"], [1, 2]], [[[1, 2, 3], [4, 5]], [{"a": 1}, {"a": 2}]]],
+)
+@pytest.mark.parametrize(
+    "op, expected_data",
+    [[operator.eq, [False, False]], [operator.ne, [True, True]]],
+)
+@pytest.mark.parametrize("with_na", [True, False])
+def test_eq_ne_non_comparable_types(
+    left_data, right_data, op, expected_data, with_na
+):
+    if with_na:
+        left_data[0] = None
+    left = cudf.Series(left_data)
+    right = cudf.Series(right_data)
+    result = op(left, right)
+    if with_na:
+        expected_data[0] = None
+    expected = cudf.Series(expected_data)
+    assert_eq(result, expected)
+
+
+def test_binops_compare_stdlib_date_scalar(comparison_op):
+    dt = datetime.date(2020, 1, 1)
+    data = [dt]
+    result = comparison_op(cudf.Series(data), dt)
+    expected = comparison_op(pd.Series(data), dt)
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
deleted file mode 100644
index 3a5326fb350..00000000000
--- a/python/cudf/cudf/tests/test_binops.py
+++ /dev/null
@@ -1,2514 +0,0 @@
-# Copyright (c) 2018-2025, NVIDIA CORPORATION.
-
-import datetime
-import decimal
-import itertools
-import operator
-import re
-import warnings
-from concurrent.futures import ThreadPoolExecutor
-
-import cupy as cp
-import numpy as np
-import pandas as pd
-import pytest
-
-import cudf
-from cudf import Index, Series
-from cudf.core._compat import (
-    PANDAS_CURRENT_SUPPORTED_VERSION,
-    PANDAS_GE_220,
-    PANDAS_VERSION,
-)
-from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.core.column.column import as_column
-from cudf.testing import _utils as utils, assert_eq
-from cudf.utils.dtypes import (
-    DATETIME_TYPES,
-    FLOAT_TYPES,
-    INTEGER_TYPES,
-    NUMERIC_TYPES,
-    TIMEDELTA_TYPES,
-)
-
-STRING_TYPES = {"str"}
-pytest_xfail = pytest.mark.xfail
-pytestmark = pytest.mark.spilling
-
-# If spilling is enabled globally, we skip many test permutations
-# to reduce running time.
-if get_global_manager() is not None:
-    DATETIME_TYPES = {"datetime64[ms]"}
-    NUMERIC_TYPES = {"float32"}
-    FLOAT_TYPES = {"float64"}
-    INTEGER_TYPES = {"int16"}
-    TIMEDELTA_TYPES = {"timedelta64[s]"}
-    # To save time, we skip tests marked "pytest.mark.xfail"
-    pytest_xfail = pytest.mark.skipif
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-def test_series_binop(request, arithmetic_op, obj_class):
-    request.applymarker(
-        pytest.mark.xfail(
-            arithmetic_op is operator.floordiv,
-            reason="https://github.com/rapidsai/cudf/issues/17073",
-        )
-    )
-    nelem = 1000
-    arr1 = utils.gen_rand("float64", nelem) * 10000
-    # Keeping a low value because CUDA 'pow' has 2 full range error
-    arr2 = utils.gen_rand("float64", nelem) * 10
-
-    sr1 = Series(arr1)
-    sr2 = Series(arr2)
-    psr1 = sr1.to_pandas()
-    psr2 = sr2.to_pandas()
-
-    if obj_class == "Index":
-        sr1 = Index(sr1)
-        sr2 = Index(sr2)
-
-    expect = arithmetic_op(psr1, psr2)
-    result = arithmetic_op(sr1, sr2)
-
-    if obj_class == "Index":
-        result = Series(result)
-
-    assert_eq(result, expect)
-
-
-def test_series_binop_concurrent(arithmetic_op):
-    def func(index):
-        rng = np.random.default_rng(seed=0)
-        arr = rng.random(100) * 10
-        sr = Series(arr)
-
-        result = arithmetic_op(sr.astype("int32"), sr)
-        expect = arithmetic_op(arr.astype("int32"), arr)
-
-        np.testing.assert_almost_equal(result.to_numpy(), expect, decimal=5)
-
-    indices = range(10)
-    with ThreadPoolExecutor(4) as e:  # four processes
-        list(e.map(func, indices))
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-def test_series_binop_scalar(arithmetic_op, obj_class):
-    nelem = 10
-    rng = np.random.default_rng(seed=0)
-    arr = rng.random(nelem)
-    rhs = rng.choice(arr).item()
-
-    sr = Series(arr)
-    if obj_class == "Index":
-        sr = Index(sr)
-
-    result = arithmetic_op(sr, rhs)
-
-    if obj_class == "Index":
-        result = Series(result)
-
-    np.testing.assert_almost_equal(result.to_numpy(), arithmetic_op(arr, rhs))
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])
-def test_series_bitwise_binop(
-    binop, obj_class, integer_types_as_str, integer_types_as_str2
-):
-    rng = np.random.default_rng(seed=0)
-    arr1 = (rng.random(100) * 100).astype(integer_types_as_str)
-    sr1 = Series(arr1)
-
-    arr2 = (rng.random(100) * 100).astype(integer_types_as_str2)
-    sr2 = Series(arr2)
-
-    if obj_class == "Index":
-        sr1 = Index(sr1)
-        sr2 = Index(sr2)
-
-    result = binop(sr1, sr2)
-
-    if obj_class == "Index":
-        result = Series(result)
-
-    np.testing.assert_almost_equal(result.to_numpy(), binop(arr1, arr2))
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-def test_series_compare(
-    comparison_op, obj_class, numeric_and_temporal_types_as_str
-):
-    rng = np.random.default_rng(seed=0)
-    arr1 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
-    arr2 = rng.integers(0, 100, 100).astype(numeric_and_temporal_types_as_str)
-    sr1 = Series(arr1)
-    sr2 = Series(arr2)
-
-    if obj_class == "Index":
-        sr1 = Index(sr1)
-        sr2 = Index(sr2)
-
-    result1 = comparison_op(sr1, sr1)
-    result2 = comparison_op(sr2, sr2)
-    result3 = comparison_op(sr1, sr2)
-
-    if obj_class == "Index":
-        result1 = Series(result1)
-        result2 = Series(result2)
-        result3 = Series(result3)
-
-    np.testing.assert_equal(result1.to_numpy(), comparison_op(arr1, arr1))
-    np.testing.assert_equal(result2.to_numpy(), comparison_op(arr2, arr2))
-    np.testing.assert_equal(result3.to_numpy(), comparison_op(arr1, arr2))
-
-
-@pytest.mark.parametrize(
-    "dtype,val",
-    [("int8", 200), ("int32", 2**32), ("uint8", -128), ("uint64", -1)],
-)
-@pytest.mark.parametrize("reverse", [False, True])
-def test_series_compare_integer(dtype, val, comparison_op, reverse):
-    # Tests that these actually work, even though they are out of bound.
-    force_cast_val = np.array(val).astype(dtype)
-    sr = Series(
-        [np.iinfo(dtype).min, np.iinfo(dtype).max, force_cast_val, None],
-        dtype=dtype,
-    )
-    # We expect the same result as comparing to a value within range (e.g. 0)
-    # except that a NULL value evaluates to False
-    exp = False
-    if reverse:
-        if comparison_op(val, 0):
-            exp = True
-        res = comparison_op(val, sr)
-    else:
-        if comparison_op(0, val):
-            exp = True
-        res = comparison_op(sr, val)
-
-    expected = Series([exp, exp, exp, None])
-    assert_eq(res, expected)
-
-
-@pytest.mark.parametrize(
-    "dtypes",
-    [
-        *itertools.combinations_with_replacement(DATETIME_TYPES, 2),
-        *itertools.combinations_with_replacement(TIMEDELTA_TYPES, 2),
-        *itertools.combinations_with_replacement(NUMERIC_TYPES, 2),
-        *itertools.combinations_with_replacement(STRING_TYPES, 2),
-    ],
-)
-def test_series_compare_nulls(comparison_op, dtypes):
-    ltype, rtype = dtypes
-
-    ldata = [1, 2, None, None, 5]
-    rdata = [2, 1, None, 4, None]
-
-    lser = Series(ldata, dtype=ltype)
-    rser = Series(rdata, dtype=rtype)
-
-    lmask = ~lser.isnull()
-    rmask = ~rser.isnull()
-
-    expect_mask = np.logical_and(lmask, rmask)
-    expect = cudf.Series([None] * 5, dtype="bool")
-    expect[expect_mask] = comparison_op(lser[expect_mask], rser[expect_mask])
-
-    got = comparison_op(lser, rser)
-    assert_eq(expect, got)
-
-
-@pytest.fixture
-def str_series_cmp_data():
-    return pd.Series(["a", "b", None, "d", "e", None], dtype="string")
-
-
-@pytest.fixture(ids=["eq", "ne"], params=[operator.eq, operator.ne])
-def str_series_compare_num_cmpop(request):
-    return request.param
-
-
-@pytest.fixture(ids=["int", "float", "bool"], params=[1, 1.5, True])
-def cmp_scalar(request):
-    return request.param
-
-
-def test_str_series_compare_str(str_series_cmp_data, comparison_op):
-    expect = comparison_op(str_series_cmp_data, "a")
-    got = comparison_op(Series.from_pandas(str_series_cmp_data), "a")
-
-    assert_eq(expect, got.to_pandas(nullable=True))
-
-
-def test_str_series_compare_str_reflected(str_series_cmp_data, comparison_op):
-    expect = comparison_op("a", str_series_cmp_data)
-    got = comparison_op("a", Series.from_pandas(str_series_cmp_data))
-
-    assert_eq(expect, got.to_pandas(nullable=True))
-
-
-def test_str_series_compare_num(
-    str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar
-):
-    expect = str_series_compare_num_cmpop(str_series_cmp_data, cmp_scalar)
-    got = str_series_compare_num_cmpop(
-        Series.from_pandas(str_series_cmp_data), cmp_scalar
-    )
-
-    assert_eq(expect, got.to_pandas(nullable=True))
-
-
-def test_str_series_compare_num_reflected(
-    str_series_cmp_data, str_series_compare_num_cmpop, cmp_scalar
-):
-    expect = str_series_compare_num_cmpop(cmp_scalar, str_series_cmp_data)
-    got = str_series_compare_num_cmpop(
-        cmp_scalar, Series.from_pandas(str_series_cmp_data)
-    )
-
-    assert_eq(expect, got.to_pandas(nullable=True))
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("dtype", [*utils.NUMERIC_TYPES, "datetime64[ms]"])
-def test_series_compare_scalar(comparison_op, obj_class, dtype):
-    rng = np.random.default_rng(seed=0)
-    arr1 = rng.integers(0, 100, 100).astype(dtype)
-    sr1 = Series(arr1)
-    rhs = rng.choice(arr1).item()
-
-    if obj_class == "Index":
-        sr1 = Index(sr1)
-
-    result1 = comparison_op(sr1, rhs)
-    result2 = comparison_op(rhs, sr1)
-
-    if obj_class == "Index":
-        result1 = Series(result1)
-        result2 = Series(result2)
-
-    np.testing.assert_equal(result1.to_numpy(), comparison_op(arr1, rhs))
-    np.testing.assert_equal(result2.to_numpy(), comparison_op(rhs, arr1))
-
-
-_nulls = ["none", "some"]
-
-
-@pytest.mark.parametrize("lhs_nulls", _nulls)
-@pytest.mark.parametrize("rhs_nulls", _nulls)
-def test_validity_add(lhs_nulls, rhs_nulls):
-    nelem = 10
-    rng = np.random.default_rng(seed=0)
-    # LHS
-    lhs_data = rng.random(nelem)
-    if lhs_nulls == "some":
-        lhs_mask = utils.random_bitmask(nelem)
-        lhs_bitmask = utils.expand_bits_to_bytes(lhs_mask)[:nelem]
-        lhs_null_count = utils.count_zero(lhs_bitmask)
-        assert lhs_null_count >= 0
-        lhs = Series._from_column(as_column(lhs_data).set_mask(lhs_mask))
-        assert lhs.null_count == lhs_null_count
-    else:
-        lhs = Series(lhs_data)
-    # RHS
-    rhs_data = rng.random(nelem)
-    if rhs_nulls == "some":
-        rhs_mask = utils.random_bitmask(nelem)
-        rhs_bitmask = utils.expand_bits_to_bytes(rhs_mask)[:nelem]
-        rhs_null_count = utils.count_zero(rhs_bitmask)
-        assert rhs_null_count >= 0
-        rhs = Series._from_column(as_column(rhs_data).set_mask(rhs_mask))
-        assert rhs.null_count == rhs_null_count
-    else:
-        rhs = Series(rhs_data)
-    # Result
-    res = lhs + rhs
-    if lhs_nulls == "some" and rhs_nulls == "some":
-        res_mask = np.asarray(
-            utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool_
-        )[:nelem]
-    if lhs_nulls == "some" and rhs_nulls == "none":
-        res_mask = np.asarray(
-            utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool_
-        )[:nelem]
-    if lhs_nulls == "none" and rhs_nulls == "some":
-        res_mask = np.asarray(
-            utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool_
-        )[:nelem]
-    # Fill NA values
-    na_value = -10000
-    got = res.fillna(na_value).to_numpy()
-    expect = lhs_data + rhs_data
-    if lhs_nulls == "some" or rhs_nulls == "some":
-        expect[~res_mask] = na_value
-
-    np.testing.assert_array_equal(expect, got)
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-@pytest.mark.parametrize("binop", [operator.add, operator.mul])
-def test_series_binop_mixed_dtype(
-    binop, numeric_types_as_str, numeric_types_as_str2, obj_class
-):
-    nelem = 10
-    rng = np.random.default_rng(seed=0)
-    lhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str)
-    rhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str2)
-
-    sr1 = Series(lhs)
-    sr2 = Series(rhs)
-
-    if obj_class == "Index":
-        sr1 = Index(sr1)
-        sr2 = Index(sr2)
-
-    result = binop(Series(sr1), Series(sr2))
-
-    if obj_class == "Index":
-        result = Series(result)
-
-    np.testing.assert_almost_equal(result.to_numpy(), binop(lhs, rhs))
-
-
-@pytest.mark.parametrize("obj_class", ["Series", "Index"])
-def test_series_cmpop_mixed_dtype(
-    comparison_op, numeric_types_as_str, numeric_types_as_str2, obj_class
-):
-    nelem = 5
-    rng = np.random.default_rng(seed=0)
-    lhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str)
-    rhs = (rng.random(nelem) * nelem).astype(numeric_types_as_str2)
-
-    sr1 = Series(lhs)
-    sr2 = Series(rhs)
-
-    if obj_class == "Index":
-        sr1 = Index(sr1)
-        sr2 = Index(sr2)
-
-    result = comparison_op(Series(sr1), Series(sr2))
-
-    if obj_class == "Index":
-        result = Series(result)
-
-    np.testing.assert_array_equal(result.to_numpy(), comparison_op(lhs, rhs))
-
-
-@pytest.mark.filterwarnings(
-    "ignore:invalid value encountered in power:RuntimeWarning"
-)
-@pytest.mark.filterwarnings(
-    "ignore:divide by zero encountered in power:RuntimeWarning"
-)
-@pytest.mark.parametrize("obj_class", [cudf.Series, cudf.Index])
-@pytest.mark.parametrize("scalar", [-1, 0, 1])
-def test_series_reflected_ops_scalar(
-    arithmetic_op, scalar, numeric_types_as_str, obj_class
-):
-    # create random series
-    func = lambda x: arithmetic_op(scalar, x)  # noqa: E731
-    random_series = utils.gen_rand(numeric_types_as_str, 100, low=10, seed=12)
-
-    gs = obj_class(random_series)
-
-    try:
-        gs_result = func(gs)
-    except OverflowError:
-        # An error is fine, if pandas raises the same error:
-        with pytest.raises(OverflowError):
-            func(random_series)
-
-        return
-
-    # class typing
-    if obj_class == "Index":
-        gs = Series(gs)
-
-    # pandas
-    ps_result = func(random_series)
-
-    # verify
-    np.testing.assert_allclose(ps_result, gs_result.to_numpy())
-
-
-def test_different_shapes_and_columns(request, arithmetic_op):
-    if arithmetic_op is operator.pow:
-        msg = "TODO: Support `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`"
-        request.applymarker(pytest.mark.xfail(reason=msg))
-
-    # Empty frame on the right side
-    pd_frame = arithmetic_op(pd.DataFrame({"x": [1, 2]}), pd.DataFrame({}))
-    cd_frame = arithmetic_op(cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({}))
-    assert_eq(cd_frame, pd_frame)
-
-    # Empty frame on the left side
-    pd_frame = pd.DataFrame({}) + pd.DataFrame({"x": [1, 2]})
-    cd_frame = cudf.DataFrame({}) + cudf.DataFrame({"x": [1, 2]})
-    assert_eq(cd_frame, pd_frame)
-
-    # Note: the below rely on a discrepancy between cudf and pandas
-    # While pandas inserts columns in alphabetical order, cudf inserts in the
-    # order of whichever column comes first. So the following code will not
-    # work if the names of columns are reversed i.e. ('y', 'x') != ('x', 'y')
-
-    # More rows on the left side
-    pd_frame = pd.DataFrame({"x": [1, 2, 3]}) + pd.DataFrame({"y": [1, 2]})
-    cd_frame = cudf.DataFrame({"x": [1, 2, 3]}) + cudf.DataFrame({"y": [1, 2]})
-    assert_eq(cd_frame, pd_frame)
-
-    # More rows on the right side
-    pd_frame = pd.DataFrame({"x": [1, 2]}) + pd.DataFrame({"y": [1, 2, 3]})
-    cd_frame = cudf.DataFrame({"x": [1, 2]}) + cudf.DataFrame({"y": [1, 2, 3]})
-    assert_eq(cd_frame, pd_frame)
-
-
-def test_different_shapes_and_same_columns(arithmetic_op):
-    pd_frame = arithmetic_op(
-        pd.DataFrame({"x": [1, 2]}), pd.DataFrame({"x": [1, 2, 3]})
-    )
-    cd_frame = arithmetic_op(
-        cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({"x": [1, 2, 3]})
-    )
-    # cast x as float64 so it matches pandas dtype
-    cd_frame["x"] = cd_frame["x"].astype(np.float64)
-    assert_eq(cd_frame, pd_frame)
-
-
-def test_different_shapes_and_columns_with_unaligned_indices(
-    request, arithmetic_op
-):
-    if arithmetic_op is operator.pow:
-        msg = "TODO: Support `pow(1, NaN) == 1` and `pow(NaN, 0) == 1`"
-        request.applymarker(pytest.mark.xfail(reason=msg))
-
-    # Test with a RangeIndex
-    pdf1 = pd.DataFrame({"x": [4, 3, 2, 1], "y": [7, 3, 8, 6]})
-    # Test with an Index
-    pdf2 = pd.DataFrame(
-        {"x": [1, 2, 3, 7], "y": [4, 5, 6, 7]}, index=[0, 1, 3, 4]
-    )
-    # Test with an Index in a different order
-    pdf3 = pd.DataFrame(
-        {"x": [4, 5, 6, 7], "y": [1, 2, 3, 7], "z": [0, 5, 3, 7]},
-        index=[0, 3, 5, 3],
-    )
-    gdf1 = cudf.DataFrame.from_pandas(pdf1)
-    gdf2 = cudf.DataFrame.from_pandas(pdf2)
-    gdf3 = cudf.DataFrame.from_pandas(pdf3)
-
-    pd_frame = arithmetic_op(arithmetic_op(pdf1, pdf2), pdf3)
-    cd_frame = arithmetic_op(arithmetic_op(gdf1, gdf2), gdf3)
-    # cast x and y as float64 so it matches pandas dtype
-    cd_frame["x"] = cd_frame["x"].astype(np.float64)
-    cd_frame["y"] = cd_frame["y"].astype(np.float64)
-
-    # Sort both frames by index and then by all columns to ensure consistent ordering
-    pd_sorted = pd_frame.sort_index().sort_values(list(pd_frame.columns))
-    cd_sorted = cd_frame.sort_index().sort_values(list(cd_frame.columns))
-    assert_eq(cd_sorted, pd_sorted)
-
-    pdf1 = pd.DataFrame({"x": [1, 1]}, index=["a", "a"])
-    pdf2 = pd.DataFrame({"x": [2]}, index=["a"])
-    gdf1 = cudf.DataFrame.from_pandas(pdf1)
-    gdf2 = cudf.DataFrame.from_pandas(pdf2)
-    pd_frame = arithmetic_op(pdf1, pdf2)
-    cd_frame = arithmetic_op(gdf1, gdf2)
-
-    # Sort both frames consistently for comparison
-    pd_sorted = pd_frame.sort_index().sort_values(list(pd_frame.columns))
-    cd_sorted = cd_frame.sort_index().sort_values(list(cd_frame.columns))
-    assert_eq(pd_sorted, cd_sorted)
-
-
-@pytest.mark.parametrize(
-    "pdf2",
-    [
-        pd.DataFrame({"a": [3, 2, 1]}, index=[3, 2, 1]),
-        pd.DataFrame([3, 2]),
-    ],
-)
-def test_df_different_index_shape(pdf2, comparison_op):
-    df1 = cudf.DataFrame([1, 2, 3], index=[1, 2, 3])
-
-    pdf1 = df1.to_pandas()
-    df2 = cudf.DataFrame.from_pandas(pdf2)
-
-    utils.assert_exceptions_equal(
-        lfunc=comparison_op,
-        rfunc=comparison_op,
-        lfunc_args_and_kwargs=([pdf1, pdf2],),
-        rfunc_args_and_kwargs=([df1, df2],),
-    )
-
-
-def test_boolean_scalar_binop(comparison_op):
-    rng = np.random.default_rng(seed=0)
-    psr = pd.Series(rng.choice([True, False], 10))
-    gsr = cudf.from_pandas(psr)
-    assert_eq(comparison_op(psr, True), comparison_op(gsr, True))
-    assert_eq(comparison_op(psr, False), comparison_op(gsr, False))
-
-
-@pytest.mark.parametrize("has_nulls", [True, False])
-@pytest.mark.parametrize("fill_value", [None, 27])
-def test_operator_func_between_series(
-    float_types_as_str, arithmetic_op_method, has_nulls, fill_value
-):
-    count = 1000
-    gdf_series_a = utils.gen_rand_series(
-        float_types_as_str, count, has_nulls=has_nulls, stride=10000
-    )
-    gdf_series_b = utils.gen_rand_series(
-        float_types_as_str, count, has_nulls=has_nulls, stride=100
-    )
-    pdf_series_a = gdf_series_a.to_pandas()
-    pdf_series_b = gdf_series_b.to_pandas()
-
-    gdf_result = getattr(gdf_series_a, arithmetic_op_method)(
-        gdf_series_b, fill_value=fill_value
-    )
-    pdf_result = getattr(pdf_series_a, arithmetic_op_method)(
-        pdf_series_b, fill_value=fill_value
-    )
-
-    assert_eq(pdf_result, gdf_result)
-
-
-@pytest.mark.parametrize("has_nulls", [True, False])
-@pytest.mark.parametrize("fill_value", [None, 27])
-def test_operator_func_series_and_scalar(
-    float_types_as_str, arithmetic_op_method, has_nulls, fill_value
-):
-    count = 1000
-    scalar = 59
-    gdf_series = utils.gen_rand_series(
-        float_types_as_str, count, has_nulls=has_nulls, stride=10000
-    )
-    pdf_series = gdf_series.to_pandas()
-
-    gdf_series_result = getattr(gdf_series, arithmetic_op_method)(
-        scalar,
-        fill_value=fill_value,
-    )
-    pdf_series_result = getattr(pdf_series, arithmetic_op_method)(
-        scalar,
-        fill_value=fill_value,
-    )
-
-    assert_eq(pdf_series_result, gdf_series_result)
-
-
-@pytest.mark.parametrize("fill_value", [0, 1, None, np.nan])
-@pytest.mark.parametrize("scalar_a", [0, 1, None, np.nan])
-@pytest.mark.parametrize("scalar_b", [0, 1, None, np.nan])
-def test_operator_func_between_series_logical(
-    float_types_as_str, comparison_op_method, scalar_a, scalar_b, fill_value
-):
-    gdf_series_a = Series([scalar_a], nan_as_null=False).astype(
-        float_types_as_str
-    )
-    gdf_series_b = Series([scalar_b], nan_as_null=False).astype(
-        float_types_as_str
-    )
-
-    pdf_series_a = gdf_series_a.to_pandas(nullable=True)
-    pdf_series_b = gdf_series_b.to_pandas(nullable=True)
-
-    gdf_series_result = getattr(gdf_series_a, comparison_op_method)(
-        gdf_series_b, fill_value=fill_value
-    )
-    pdf_series_result = getattr(pdf_series_a, comparison_op_method)(
-        pdf_series_b, fill_value=fill_value
-    )
-    expect = pdf_series_result
-    got = gdf_series_result.to_pandas(nullable=True)
-
-    # If fill_value is np.nan, things break down a bit,
-    # because setting a NaN into a pandas nullable float
-    # array still gets transformed to <NA>. As such,
-    # pd_series_with_nulls.fillna(np.nan) has no effect.
-    if (
-        (pdf_series_a.isnull().sum() != pdf_series_b.isnull().sum())
-        and np.isscalar(fill_value)
-        and np.isnan(fill_value)
-    ):
-        with pytest.raises(AssertionError):
-            assert_eq(expect, got)
-        return
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("has_nulls", [True, False])
-@pytest.mark.parametrize("scalar", [-59.0, np.nan, 0, 59.0])
-@pytest.mark.parametrize("fill_value", [None, 1.0])
-def test_operator_func_series_and_scalar_logical(
-    request,
-    float_types_as_str,
-    comparison_op_method,
-    has_nulls,
-    scalar,
-    fill_value,
-):
-    request.applymarker(
-        pytest.mark.xfail(
-            PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
-            and fill_value == 1.0
-            and scalar is np.nan
-            and (
-                has_nulls
-                or (not has_nulls and comparison_op_method not in {"eq", "ne"})
-            ),
-            reason="https://github.com/pandas-dev/pandas/issues/57447",
-        )
-    )
-    if has_nulls:
-        gdf_series = cudf.Series(
-            [-1.0, 0, cudf.NA, 1.1], dtype=float_types_as_str
-        )
-    else:
-        gdf_series = cudf.Series(
-            [-1.0, 0, 10.5, 1.1], dtype=float_types_as_str
-        )
-    pdf_series = gdf_series.to_pandas(nullable=True)
-    gdf_series_result = getattr(gdf_series, comparison_op_method)(
-        scalar,
-        fill_value=fill_value,
-    )
-    pdf_series_result = getattr(pdf_series, comparison_op_method)(
-        scalar, fill_value=fill_value
-    )
-
-    expect = pdf_series_result
-    got = gdf_series_result.to_pandas(nullable=True)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("nulls", _nulls)
-@pytest.mark.parametrize("fill_value", [None, 27])
-@pytest.mark.parametrize("other", ["df", "scalar"])
-def test_operator_func_dataframe(
-    arithmetic_op_method, nulls, fill_value, other
-):
-    num_rows = 100
-    num_cols = 3
-
-    def gen_df():
-        rng = np.random.default_rng(seed=0)
-        pdf = pd.DataFrame()
-        from string import ascii_lowercase
-
-        cols = rng.choice(num_cols + 5, num_cols, replace=False)
-
-        for i in range(num_cols):
-            colname = ascii_lowercase[cols[i]]
-            data = utils.gen_rand("float64", num_rows) * 10000
-            if nulls == "some":
-                idx = rng.choice(
-                    num_rows, size=int(num_rows / 2), replace=False
-                )
-                data[idx] = np.nan
-            pdf[colname] = data
-        return pdf
-
-    pdf1 = gen_df()
-    pdf2 = gen_df() if other == "df" else 59.0
-    gdf1 = cudf.DataFrame.from_pandas(pdf1)
-    gdf2 = cudf.DataFrame.from_pandas(pdf2) if other == "df" else 59.0
-
-    got = getattr(gdf1, arithmetic_op_method)(gdf2, fill_value=fill_value)
-    expect = getattr(pdf1, arithmetic_op_method)(pdf2, fill_value=fill_value)[
-        list(got._data)
-    ]
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("nulls", _nulls)
-@pytest.mark.parametrize("other", ["df", "scalar"])
-def test_logical_operator_func_dataframe(comparison_op_method, nulls, other):
-    num_rows = 100
-    num_cols = 3
-
-    def gen_df():
-        rng = np.random.default_rng(seed=0)
-        pdf = pd.DataFrame()
-        from string import ascii_lowercase
-
-        cols = rng.choice(num_cols + 5, num_cols, replace=False)
-
-        for i in range(num_cols):
-            colname = ascii_lowercase[cols[i]]
-            data = utils.gen_rand("float64", num_rows) * 10000
-            if nulls == "some":
-                idx = rng.choice(
-                    num_rows, size=int(num_rows / 2), replace=False
-                )
-                data[idx] = np.nan
-            pdf[colname] = data
-        return pdf
-
-    pdf1 = gen_df()
-    pdf2 = gen_df() if other == "df" else 59.0
-    gdf1 = cudf.DataFrame.from_pandas(pdf1, nan_as_null=False)
-    gdf2 = (
-        cudf.DataFrame.from_pandas(pdf2, nan_as_null=False)
-        if other == "df"
-        else 59.0
-    )
-
-    got = getattr(gdf1, comparison_op_method)(gdf2)
-    expect = getattr(pdf1, comparison_op_method)(pdf2)[list(got._data)]
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("rhs", [0, 1, 10])
-def test_binop_bool_uint(request, binary_op_method, rhs):
-    if binary_op_method in {"rmod", "rfloordiv"}:
-        request.applymarker(
-            pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/12162"
-            ),
-        )
-    psr = pd.Series([True, False, False])
-    gsr = cudf.from_pandas(psr)
-    assert_eq(
-        getattr(psr, binary_op_method)(rhs),
-        getattr(gsr, binary_op_method)(rhs),
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "series_dtype", (np.int8, np.uint8, np.int64, np.uint64)
-)
-@pytest.mark.parametrize(
-    "divisor_dtype",
-    (
-        np.int8,
-        np.uint8,
-        np.int64,
-        np.uint64,
-    ),
-)
-@pytest.mark.parametrize("scalar_divisor", [False, True])
-def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor):
-    sr = pd.Series([1, 2, 3], dtype=series_dtype)
-    cr = cudf.from_pandas(sr)
-
-    if scalar_divisor:
-        pd_div = divisor_dtype(0)
-        cudf_div = pd_div
-    else:
-        pd_div = pd.Series([0], dtype=divisor_dtype)
-        cudf_div = cudf.from_pandas(pd_div)
-    assert_eq(sr // pd_div, cr // cudf_div)
-
-
-@pytest.mark.parametrize("scalar_divisor", [False, True])
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/12162")
-def test_floordiv_zero_bool(scalar_divisor):
-    sr = pd.Series([True, True, False], dtype=np.bool_)
-    cr = cudf.from_pandas(sr)
-
-    if scalar_divisor:
-        pd_div = np.bool_(0)
-        cudf_div = pd_div
-    else:
-        pd_div = pd.Series([0], dtype=np.bool_)
-        cudf_div = cudf.from_pandas(pd_div)
-
-    with pytest.raises((NotImplementedError, ZeroDivisionError)):
-        sr // pd_div
-    with pytest.raises((NotImplementedError, ZeroDivisionError)):
-        cr // cudf_div
-
-
-def test_rmod_zero_nan(numeric_and_bool_types_as_str, request):
-    request.applymarker(
-        pytest.mark.xfail(
-            numeric_and_bool_types_as_str == "bool",
-            reason="pandas returns int8, cuDF returns int64",
-        )
-    )
-    sr = pd.Series([1, 1, 0], dtype=numeric_and_bool_types_as_str)
-    cr = cudf.from_pandas(sr)
-    assert_eq(1 % sr, 1 % cr)
-    expected_dtype = (
-        np.float64 if cr.dtype.kind != "f" else numeric_and_bool_types_as_str
-    )
-    assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype))
-
-
-def test_series_misc_binop():
-    pds = pd.Series([1, 2, 4], name="abc xyz")
-    gds = cudf.Series([1, 2, 4], name="abc xyz")
-
-    assert_eq(pds + 1, gds + 1)
-    assert_eq(1 + pds, 1 + gds)
-
-    assert_eq(pds + pds, gds + gds)
-
-    pds1 = pd.Series([1, 2, 4], name="hello world")
-    gds1 = cudf.Series([1, 2, 4], name="hello world")
-
-    assert_eq(pds + pds1, gds + gds1)
-    assert_eq(pds1 + pds, gds1 + gds)
-
-    assert_eq(pds1 + pds + 5, gds1 + gds + 5)
-
-
-def test_int8_float16_binop():
-    a = cudf.Series([1], dtype="int8")
-    b = np.float16(2)
-    expect = cudf.Series([0.5])
-    got = a / b
-    assert_eq(expect, got, check_dtype=False)
-
-
-@pytest.mark.parametrize("dtype", ["int64", "float64", "str"])
-def test_vector_to_none_binops(dtype):
-    data = Series([1, 2, 3, None], dtype=dtype)
-
-    expect = Series([None] * 4).astype(dtype)
-    got = data + None
-
-    assert_eq(expect, got)
-
-
-def is_timezone_aware_dtype(dtype: str) -> bool:
-    return bool(re.match(r"^datetime64\[ns, .+\]$", dtype))
-
-
-@pytest.mark.parametrize("n_periods", [0, 1, -12])
-@pytest.mark.parametrize(
-    "frequency",
-    [
-        "months",
-        "years",
-        "days",
-        "hours",
-        "minutes",
-        "seconds",
-        "microseconds",
-        "nanoseconds",
-    ],
-)
-@pytest.mark.parametrize(
-    "dtype, components",
-    [
-        ["datetime64[ns]", "00.012345678"],
-        ["datetime64[us]", "00.012345"],
-        ["datetime64[ms]", "00.012"],
-        ["datetime64[s]", "00"],
-        ["datetime64[ns, Asia/Kathmandu]", "00.012345678"],
-    ],
-)
-@pytest.mark.parametrize("op", [operator.add, operator.sub])
-def test_datetime_dateoffset_binaryop(
-    request, n_periods, frequency, dtype, components, op
-):
-    request.applymarker(
-        pytest.mark.xfail(
-            PANDAS_VERSION >= PANDAS_CURRENT_SUPPORTED_VERSION
-            and dtype in {"datetime64[ms]", "datetime64[s]"}
-            and frequency == "microseconds"
-            and n_periods == 0,
-            reason="https://github.com/pandas-dev/pandas/issues/57448",
-        )
-    )
-    if (
-        not PANDAS_GE_220
-        and dtype in {"datetime64[ms]", "datetime64[s]"}
-        and frequency in ("microseconds", "nanoseconds")
-        and n_periods != 0
-    ):
-        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
-    if (
-        not PANDAS_GE_220
-        and dtype == "datetime64[us]"
-        and frequency == "nanoseconds"
-        and n_periods != 0
-    ):
-        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
-
-    date_col = [
-        f"2000-01-01 00:00:{components}",
-        f"2000-01-31 00:00:{components}",
-        f"2000-02-29 00:00:{components}",
-    ]
-    if is_timezone_aware_dtype(dtype):
-        # Construct naive datetime64[ns] Series
-        gsr = cudf.Series(date_col, dtype="datetime64[ns]")
-        psr = gsr.to_pandas()
-
-        # Convert to timezone-aware (both cudf and pandas)
-        gsr = gsr.dt.tz_localize("UTC").dt.tz_convert("Asia/Kathmandu")
-        psr = psr.dt.tz_localize("UTC").dt.tz_convert("Asia/Kathmandu")
-    else:
-        gsr = cudf.Series(date_col, dtype=dtype)
-        psr = gsr.to_pandas()
-
-    kwargs = {frequency: n_periods}
-
-    goffset = cudf.DateOffset(**kwargs)
-    poffset = pd.DateOffset(**kwargs)
-
-    expect = op(psr, poffset)
-    got = op(gsr, goffset)
-
-    if is_timezone_aware_dtype(dtype):
-        assert isinstance(expect.dtype, pd.DatetimeTZDtype)
-        assert str(expect.dtype.tz) == str(got.dtype.tz)
-        expect = expect.dt.tz_convert("UTC")
-        got = got.dt.tz_convert("UTC")
-
-    assert_eq(expect, got)
-
-    expect = op(psr, -poffset)
-    got = op(gsr, -goffset)
-
-    if is_timezone_aware_dtype(dtype):
-        assert isinstance(expect.dtype, pd.DatetimeTZDtype)
-        assert str(expect.dtype.tz) == str(got.dtype.tz)
-        expect = expect.dt.tz_convert("UTC")
-        got = got.dt.tz_convert("UTC")
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        {"months": 2, "years": 5},
-        {"microseconds": 1, "seconds": 1},
-        {"months": 2, "years": 5, "seconds": 923, "microseconds": 481},
-        {"milliseconds": 4},
-        {"milliseconds": 4, "years": 2},
-        {"nanoseconds": 12},
-    ],
-)
-@pytest.mark.filterwarnings(
-    "ignore:Non-vectorized DateOffset:pandas.errors.PerformanceWarning"
-)
-@pytest.mark.filterwarnings(
-    "ignore:Discarding nonzero nanoseconds:UserWarning"
-)
-@pytest.mark.parametrize("op", [operator.add, operator.sub])
-@pytest.mark.skipif(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
-def test_datetime_dateoffset_binaryop_multiple(request, kwargs, op):
-    gsr = cudf.Series(
-        [
-            "2000-01-01 00:00:00.012345678",
-            "2000-01-31 00:00:00.012345678",
-            "2000-02-29 00:00:00.012345678",
-        ],
-        dtype="datetime64[ns]",
-    )
-    psr = gsr.to_pandas()
-
-    poffset = pd.DateOffset(**kwargs)
-    goffset = cudf.DateOffset(**kwargs)
-
-    expect = op(psr, poffset)
-    got = op(gsr, goffset)
-
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("n_periods", [0, 1, -12])
-@pytest.mark.parametrize(
-    "frequency",
-    [
-        "months",
-        "years",
-        "days",
-        "hours",
-        "minutes",
-        "seconds",
-        "microseconds",
-        "nanoseconds",
-    ],
-)
-@pytest.mark.parametrize(
-    "dtype, components",
-    [
-        ["datetime64[ns]", "00.012345678"],
-        ["datetime64[us]", "00.012345"],
-        ["datetime64[ms]", "00.012"],
-        ["datetime64[s]", "00"],
-    ],
-)
-def test_datetime_dateoffset_binaryop_reflected(
-    n_periods, frequency, dtype, components
-):
-    if (
-        not PANDAS_GE_220
-        and dtype in {"datetime64[ms]", "datetime64[s]"}
-        and frequency in ("microseconds", "nanoseconds")
-        and n_periods != 0
-    ):
-        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
-    if (
-        not PANDAS_GE_220
-        and dtype == "datetime64[us]"
-        and frequency == "nanoseconds"
-        and n_periods != 0
-    ):
-        pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
-
-    date_col = [
-        f"2000-01-01 00:00:{components}",
-        f"2000-01-31 00:00:{components}",
-        f"2000-02-29 00:00:{components}",
-    ]
-    gsr = cudf.Series(date_col, dtype=dtype)
-    psr = gsr.to_pandas()  # converts to nanos
-
-    kwargs = {frequency: n_periods}
-
-    goffset = cudf.DateOffset(**kwargs)
-    poffset = pd.DateOffset(**kwargs)
-
-    expect = poffset + psr
-    got = goffset + gsr
-
-    # TODO: Remove check_dtype once we get some clarity on:
-    # https://github.com/pandas-dev/pandas/issues/57448
-    assert_eq(expect, got, check_dtype=False)
-
-    with pytest.raises(TypeError):
-        poffset - psr
-
-    with pytest.raises(TypeError):
-        goffset - gsr
-
-
-@pytest.mark.parametrize("frame", [cudf.Series, cudf.Index, cudf.DataFrame])
-@pytest.mark.parametrize(
-    "dtype", ["int", "str", "datetime64[s]", "timedelta64[s]", "category"]
-)
-def test_binops_with_lhs_numpy_scalar(frame, dtype):
-    data = [1, 2, 3, 4, 5]
-
-    data = (
-        frame({"a": data}, dtype=dtype)
-        if isinstance(frame, cudf.DataFrame)
-        else frame(data, dtype=dtype)
-    )
-
-    if dtype == "datetime64[s]":
-        val = cudf.dtype(dtype).type(4, "s")
-    elif dtype == "timedelta64[s]":
-        val = cudf.dtype(dtype).type(4, "s")
-    elif dtype == "category":
-        val = np.int64(4)
-    elif dtype == "str":
-        val = str(4)
-    else:
-        val = cudf.dtype(dtype).type(4)
-
-    # Compare equality with series on left side to dispatch to the pandas/cudf
-    # __eq__ operator and avoid a DeprecationWarning from numpy.
-    expected = data.to_pandas() == val
-    got = data == val
-
-    assert_eq(expected, got)
-
-
-def test_binops_with_NA_consistent(
-    numeric_and_temporal_types_as_str, comparison_op_method
-):
-    data = [1, 2, 3]
-    sr = cudf.Series(data, dtype=numeric_and_temporal_types_as_str)
-
-    result = getattr(sr, comparison_op_method)(cudf.NA)
-    if sr.dtype.kind in "mM":
-        assert result.null_count == len(data)
-    else:
-        if comparison_op_method == "ne":
-            expect_all = True
-        else:
-            expect_all = False
-        assert (result == expect_all).all()
-
-
-@pytest.mark.parametrize(
-    "op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype",
-    [
-        (
-            operator.add,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["3.0", "4.0"],
-            cudf.Decimal64Dtype(scale=2, precision=4),
-        ),
-        (
-            operator.add,
-            2,
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["3.5", "4.0"],
-            cudf.Decimal64Dtype(scale=2, precision=4),
-        ),
-        (
-            operator.add,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["2.25", "1.005"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["3.75", "3.005"],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=17),
-            ["0.1", "0.2"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["100.1", "200.2"],
-            cudf.Decimal128Dtype(scale=3, precision=23),
-        ),
-        (
-            operator.sub,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=1, precision=2),
-            ["2.25", "1.005"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["-0.75", "0.995"],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.sub,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=1, precision=2),
-            ["2.25", "1.005"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["-0.75", "0.995"],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=10),
-            ["0.1", "0.2"],
-            cudf.Decimal64Dtype(scale=6, precision=10),
-            ["99.9", "199.8"],
-            cudf.Decimal128Dtype(scale=6, precision=19),
-        ),
-        (
-            operator.sub,
-            2,
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["2.25", "1.005"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["-0.25", "0.995"],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.mul,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["1.5", "3.0"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["2.25", "6.0"],
-            cudf.Decimal64Dtype(scale=5, precision=8),
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["0.1", "0.2"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["10.0", "40.0"],
-            cudf.Decimal64Dtype(scale=1, precision=8),
-        ),
-        (
-            operator.mul,
-            ["1000", "2000"],
-            cudf.Decimal64Dtype(scale=-3, precision=4),
-            ["0.343", "0.500"],
-            cudf.Decimal64Dtype(scale=3, precision=3),
-            ["343.0", "1000.0"],
-            cudf.Decimal64Dtype(scale=0, precision=8),
-        ),
-        (
-            operator.mul,
-            200,
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            ["0.343", "0.500"],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            ["68.60", "100.0"],
-            cudf.Decimal64Dtype(scale=3, precision=10),
-        ),
-        (
-            operator.truediv,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=4),
-            ["1.5", "3.0"],
-            cudf.Decimal64Dtype(scale=1, precision=4),
-            ["1.0", "0.6"],
-            cudf.Decimal64Dtype(scale=7, precision=10),
-        ),
-        (
-            operator.truediv,
-            ["110", "200"],
-            cudf.Decimal64Dtype(scale=-1, precision=3),
-            ["0.1", "0.2"],
-            cudf.Decimal64Dtype(scale=2, precision=4),
-            ["1000.0", "1000.0"],
-            cudf.Decimal64Dtype(scale=6, precision=12),
-        ),
-        (
-            operator.truediv,
-            ["132.86", "15.25"],
-            cudf.Decimal64Dtype(scale=4, precision=14),
-            ["2.34", "8.50"],
-            cudf.Decimal64Dtype(scale=2, precision=8),
-            ["56.77", "1.79"],
-            cudf.Decimal128Dtype(scale=13, precision=25),
-        ),
-        (
-            operator.truediv,
-            ["20", "20"],
-            cudf.Decimal128Dtype(scale=2, precision=6),
-            ["20", "20"],
-            cudf.Decimal128Dtype(scale=2, precision=6),
-            ["1.0", "1.0"],
-            cudf.Decimal128Dtype(scale=9, precision=15),
-        ),
-        (
-            operator.add,
-            ["1.5", None, "2.0"],
-            cudf.Decimal64Dtype(scale=1, precision=2),
-            ["1.5", None, "2.0"],
-            cudf.Decimal64Dtype(scale=1, precision=2),
-            ["3.0", None, "4.0"],
-            cudf.Decimal64Dtype(scale=1, precision=3),
-        ),
-        (
-            operator.add,
-            ["1.5", None],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["2.25", "1.005"],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["3.75", None],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.sub,
-            ["1.5", None],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["2.25", None],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["-0.75", None],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.sub,
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["2.25", None],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["-0.75", None],
-            cudf.Decimal64Dtype(scale=3, precision=5),
-        ),
-        (
-            operator.mul,
-            ["1.5", None],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["1.5", None],
-            cudf.Decimal64Dtype(scale=3, precision=4),
-            ["2.25", None],
-            cudf.Decimal64Dtype(scale=5, precision=8),
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=10),
-            ["0.1", None],
-            cudf.Decimal64Dtype(scale=3, precision=12),
-            ["10.0", None],
-            cudf.Decimal128Dtype(scale=1, precision=23),
-        ),
-        (
-            operator.eq,
-            ["0.18", "0.42"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.18", "0.21"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            [True, False],
-            bool,
-        ),
-        (
-            operator.eq,
-            ["0.18", "0.42"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.1800", "0.2100"],
-            cudf.Decimal64Dtype(scale=4, precision=5),
-            [True, False],
-            bool,
-        ),
-        (
-            operator.eq,
-            ["100", None],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-1, precision=4),
-            [True, None],
-            bool,
-        ),
-        (
-            operator.ne,
-            ["0.06", "0.42"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.18", "0.42"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            [True, False],
-            bool,
-        ),
-        (
-            operator.ne,
-            ["1.33", "1.21"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.1899", "1.21"],
-            cudf.Decimal64Dtype(scale=4, precision=5),
-            [True, False],
-            bool,
-        ),
-        (
-            operator.ne,
-            ["300", None],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["110", "5500"],
-            cudf.Decimal64Dtype(scale=-1, precision=4),
-            [True, None],
-            bool,
-        ),
-        (
-            operator.lt,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.10", "0.87", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            [False, True, False],
-            bool,
-        ),
-        (
-            operator.lt,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.1000", "0.8700", "1.0000"],
-            cudf.Decimal64Dtype(scale=4, precision=5),
-            [False, True, False],
-            bool,
-        ),
-        (
-            operator.lt,
-            ["200", None, "100"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["100", "200", "100"],
-            cudf.Decimal64Dtype(scale=-1, precision=4),
-            [False, None, False],
-            bool,
-        ),
-        (
-            operator.gt,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.10", "0.87", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            [True, False, False],
-            bool,
-        ),
-        (
-            operator.gt,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.1000", "0.8700", "1.0000"],
-            cudf.Decimal64Dtype(scale=4, precision=5),
-            [True, False, False],
-            bool,
-        ),
-        (
-            operator.gt,
-            ["300", None, "100"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["100", "200", "100"],
-            cudf.Decimal64Dtype(scale=-1, precision=4),
-            [True, None, False],
-            bool,
-        ),
-        (
-            operator.le,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.10", "0.87", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            [False, True, True],
-            bool,
-        ),
-        (
-            operator.le,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.1000", "0.8700", "1.0000"],
-            cudf.Decimal64Dtype(scale=4, precision=5),
-            [False, True, True],
-            bool,
-        ),
-        (
-            operator.le,
-            ["300", None, "100"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["100", "200", "100"],
-            cudf.Decimal64Dtype(scale=-1, precision=4),
-            [False, None, True],
-            bool,
-        ),
-        (
-            operator.ge,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.10", "0.87", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            [True, False, True],
-            bool,
-        ),
-        (
-            operator.ge,
-            ["0.18", "0.42", "1.00"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["0.1000", "0.8700", "1.0000"],
-            cudf.Decimal64Dtype(scale=4, precision=5),
-            [True, False, True],
-            bool,
-        ),
-        (
-            operator.ge,
-            ["300", None, "100"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            ["100", "200", "100"],
-            cudf.Decimal64Dtype(scale=-1, precision=4),
-            [True, None, True],
-            bool,
-        ),
-    ],
-)
-def test_binops_decimal(op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype):
-    if isinstance(lhs, (int, float)):
-        a = lhs
-    else:
-        a = utils._decimal_series(lhs, l_dtype)
-    b = utils._decimal_series(rhs, r_dtype)
-    expect = (
-        utils._decimal_series(expect, expect_dtype)
-        if isinstance(
-            expect_dtype,
-            (cudf.Decimal64Dtype, cudf.Decimal32Dtype, cudf.Decimal128Dtype),
-        )
-        else cudf.Series(expect, dtype=expect_dtype)
-    )
-
-    got = op(a, b)
-    assert expect.dtype == got.dtype
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "op,lhs,l_dtype,rhs,r_dtype,expect,expect_dtype",
-    [
-        (
-            "radd",
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            ["3.0", "4.0"],
-            cudf.Decimal64Dtype(scale=2, precision=4),
-        ),
-        (
-            "rsub",
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=10),
-            ["0.1", "0.2"],
-            cudf.Decimal64Dtype(scale=6, precision=10),
-            ["-99.9", "-199.8"],
-            cudf.Decimal128Dtype(scale=6, precision=19),
-        ),
-        (
-            "rmul",
-            ["1000", "2000"],
-            cudf.Decimal64Dtype(scale=-3, precision=4),
-            ["0.343", "0.500"],
-            cudf.Decimal64Dtype(scale=3, precision=3),
-            ["343.0", "1000.0"],
-            cudf.Decimal64Dtype(scale=0, precision=8),
-        ),
-        (
-            "rtruediv",
-            ["1.5", "0.5"],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            ["1.5", "2.0"],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            ["1.0", "4.0"],
-            cudf.Decimal64Dtype(scale=10, precision=16),
-        ),
-    ],
-)
-def test_binops_reflect_decimal(
-    op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype
-):
-    a = utils._decimal_series(lhs, l_dtype)
-    b = utils._decimal_series(rhs, r_dtype)
-    expect = utils._decimal_series(expect, expect_dtype)
-
-    got = getattr(a, op)(b)
-    assert expect.dtype == got.dtype
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize("powers", [0, 1, 2])
-def test_binops_decimal_pow(powers):
-    s = cudf.Series(
-        [
-            decimal.Decimal("1.324324"),
-            None,
-            decimal.Decimal("2"),
-            decimal.Decimal("3"),
-            decimal.Decimal("5"),
-        ]
-    )
-    ps = s.to_pandas()
-
-    assert_eq(s**powers, ps**powers, check_dtype=False)
-
-
-def test_binops_raise_error():
-    s = cudf.Series([decimal.Decimal("1.324324")])
-
-    with pytest.raises(TypeError):
-        s // 1
-
-
-@pytest.mark.parametrize(
-    "op, ldata, ldtype, rdata, expected1, expected2",
-    [
-        (
-            operator.eq,
-            ["100", "41", None],
-            cudf.Decimal64Dtype(scale=0, precision=5),
-            [100, 42, 12],
-            [True, False, None],
-            [True, False, None],
-        ),
-        (
-            operator.eq,
-            ["100.000", "42.001", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            [100, 42, 12],
-            [True, False, None],
-            [True, False, None],
-        ),
-        (
-            operator.eq,
-            ["100", "40", None],
-            cudf.Decimal64Dtype(scale=-1, precision=3),
-            [100, 42, 12],
-            [True, False, None],
-            [True, False, None],
-        ),
-        (
-            operator.ne,
-            ["100", "42", "24", None],
-            cudf.Decimal64Dtype(scale=0, precision=3),
-            [100, 40, 24, 12],
-            [False, True, False, None],
-            [False, True, False, None],
-        ),
-        (
-            operator.ne,
-            ["10.1", "88", "11", None],
-            cudf.Decimal64Dtype(scale=1, precision=3),
-            [10, 42, 11, 12],
-            [True, True, False, None],
-            [True, True, False, None],
-        ),
-        (
-            operator.ne,
-            ["100.000", "42", "23.999", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            [100, 42, 24, 12],
-            [False, False, True, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.lt,
-            ["100", "40", "28", None],
-            cudf.Decimal64Dtype(scale=0, precision=3),
-            [100, 42, 24, 12],
-            [False, True, False, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.lt,
-            ["100.000", "42.002", "23.999", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            [100, 42, 24, 12],
-            [False, False, True, None],
-            [False, True, False, None],
-        ),
-        (
-            operator.lt,
-            ["100", "40", "10", None],
-            cudf.Decimal64Dtype(scale=-1, precision=3),
-            [100, 42, 8, 12],
-            [False, True, False, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.gt,
-            ["100", "42", "20", None],
-            cudf.Decimal64Dtype(scale=0, precision=3),
-            [100, 40, 24, 12],
-            [False, True, False, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.gt,
-            ["100.000", "42.002", "23.999", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            [100, 42, 24, 12],
-            [False, True, False, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.gt,
-            ["100", "40", "10", None],
-            cudf.Decimal64Dtype(scale=-1, precision=3),
-            [100, 42, 8, 12],
-            [False, False, True, None],
-            [False, True, False, None],
-        ),
-        (
-            operator.le,
-            ["100", "40", "28", None],
-            cudf.Decimal64Dtype(scale=0, precision=3),
-            [100, 42, 24, 12],
-            [True, True, False, None],
-            [True, False, True, None],
-        ),
-        (
-            operator.le,
-            ["100.000", "42.002", "23.999", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            [100, 42, 24, 12],
-            [True, False, True, None],
-            [True, True, False, None],
-        ),
-        (
-            operator.le,
-            ["100", "40", "10", None],
-            cudf.Decimal64Dtype(scale=-1, precision=3),
-            [100, 42, 8, 12],
-            [True, True, False, None],
-            [True, False, True, None],
-        ),
-        (
-            operator.ge,
-            ["100", "42", "20", None],
-            cudf.Decimal64Dtype(scale=0, precision=3),
-            [100, 40, 24, 12],
-            [True, True, False, None],
-            [True, False, True, None],
-        ),
-        (
-            operator.ge,
-            ["100.000", "42.002", "23.999", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            [100, 42, 24, 12],
-            [True, True, False, None],
-            [True, False, True, None],
-        ),
-        (
-            operator.ge,
-            ["100", "40", "10", None],
-            cudf.Decimal64Dtype(scale=-1, precision=3),
-            [100, 42, 8, 12],
-            [True, False, True, None],
-            [True, True, False, None],
-        ),
-    ],
-)
-@pytest.mark.parametrize("reflected", [True, False])
-def test_binops_decimal_comp_mixed_integer(
-    op,
-    ldata,
-    ldtype,
-    rdata,
-    expected1,
-    expected2,
-    integer_types_as_str,
-    reflected,
-):
-    """
-    Tested compare operations:
-        eq, lt, gt, le, ge
-    Each operation has 3 decimal data setups, with scale from {==0, >0, <0}.
-    Decimal precisions are sufficient to hold the digits.
-    For each decimal data setup, there is at least one row that lead to one
-    of the following compare results: {True, False, None}.
-    """
-    if not reflected:
-        expected = cudf.Series(expected1, dtype=bool)
-    else:
-        expected = cudf.Series(expected2, dtype=bool)
-
-    lhs = utils._decimal_series(ldata, ldtype)
-    rhs = cudf.Series(rdata, dtype=integer_types_as_str)
-
-    if reflected:
-        rhs, lhs = lhs, rhs
-
-    actual = op(lhs, rhs)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "op, lhs, l_dtype, rhs, expect, expect_dtype, reflect",
-    [
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal(1),
-            ["101", "201"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            False,
-        ),
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            1,
-            ["101", "201"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            False,
-        ),
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("1.5"),
-            ["101.5", "201.5"],
-            cudf.Decimal64Dtype(scale=1, precision=7),
-            False,
-        ),
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal(1),
-            ["101", "201"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            True,
-        ),
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            1,
-            ["101", "201"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            True,
-        ),
-        (
-            operator.add,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("1.5"),
-            ["101.5", "201.5"],
-            cudf.Decimal64Dtype(scale=1, precision=7),
-            True,
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            1,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=5),
-            False,
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal(2),
-            ["200", "400"],
-            cudf.Decimal64Dtype(scale=-2, precision=5),
-            False,
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("1.5"),
-            ["150", "300"],
-            cudf.Decimal64Dtype(scale=-1, precision=6),
-            False,
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            1,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=5),
-            True,
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal(2),
-            ["200", "400"],
-            cudf.Decimal64Dtype(scale=-2, precision=5),
-            True,
-        ),
-        (
-            operator.mul,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("1.5"),
-            ["150", "300"],
-            cudf.Decimal64Dtype(scale=-1, precision=6),
-            True,
-        ),
-        (
-            operator.truediv,
-            ["1000", "2000"],
-            cudf.Decimal64Dtype(scale=-2, precision=4),
-            1,
-            ["1000", "2000"],
-            cudf.Decimal64Dtype(scale=6, precision=12),
-            False,
-        ),
-        (
-            operator.truediv,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            decimal.Decimal(2),
-            ["50", "100"],
-            cudf.Decimal64Dtype(scale=6, precision=9),
-            False,
-        ),
-        (
-            operator.truediv,
-            ["35.23", "54.91"],
-            cudf.Decimal64Dtype(scale=2, precision=4),
-            decimal.Decimal("1.5"),
-            ["23.4", "36.6"],
-            cudf.Decimal64Dtype(scale=6, precision=9),
-            False,
-        ),
-        (
-            operator.truediv,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            1,
-            ["0", "0"],
-            cudf.Decimal64Dtype(scale=6, precision=9),
-            True,
-        ),
-        (
-            operator.truediv,
-            ["1.2", "0.5"],
-            cudf.Decimal64Dtype(scale=1, precision=6),
-            decimal.Decimal(20),
-            ["10", "40"],
-            cudf.Decimal64Dtype(scale=7, precision=10),
-            True,
-        ),
-        (
-            operator.truediv,
-            ["1.22", "5.24"],
-            cudf.Decimal64Dtype(scale=2, precision=3),
-            decimal.Decimal("8.55"),
-            ["7", "1"],
-            cudf.Decimal64Dtype(scale=6, precision=9),
-            True,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal(2),
-            ["98", "198"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            False,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("2.5"),
-            ["97.5", "197.5"],
-            cudf.Decimal64Dtype(scale=1, precision=7),
-            False,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            4,
-            ["96", "196"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            False,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal(2),
-            ["-98", "-198"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            True,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            4,
-            ["-96", "-196"],
-            cudf.Decimal64Dtype(scale=0, precision=6),
-            True,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("2.5"),
-            ["-97.5", "-197.5"],
-            cudf.Decimal64Dtype(scale=1, precision=7),
-            True,
-        ),
-        (
-            operator.sub,
-            ["100", "200"],
-            cudf.Decimal64Dtype(scale=-2, precision=3),
-            decimal.Decimal("2.5"),
-            ["-97.5", "-197.5"],
-            cudf.Decimal64Dtype(scale=1, precision=7),
-            True,
-        ),
-    ],
-)
-def test_binops_decimal_scalar(
-    op, lhs, l_dtype, rhs, expect, expect_dtype, reflect
-):
-    lhs = cudf.Series(
-        [x if x is None else decimal.Decimal(x) for x in lhs],
-        dtype=l_dtype,
-    )
-    expect = cudf.Series(
-        [x if x is None else decimal.Decimal(x) for x in expect],
-        dtype=expect_dtype,
-    )
-
-    if reflect:
-        lhs, rhs = rhs, lhs
-
-    got = op(lhs, rhs)
-    assert expect.dtype == got.dtype
-    assert_eq(expect, got)
-
-
-@pytest.mark.parametrize(
-    "op, ldata, ldtype, rdata, expected1, expected2",
-    [
-        (
-            operator.eq,
-            ["100.00", "41", None],
-            cudf.Decimal64Dtype(scale=0, precision=5),
-            100,
-            [True, False, None],
-            [True, False, None],
-        ),
-        (
-            operator.eq,
-            ["100.123", "41", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            decimal.Decimal("100.123"),
-            [True, False, None],
-            [True, False, None],
-        ),
-        (
-            operator.ne,
-            ["100.00", "41", None],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            100,
-            [False, True, None],
-            [False, True, None],
-        ),
-        (
-            operator.ne,
-            ["100.123", "120.21", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            decimal.Decimal("100.123"),
-            [False, True, None],
-            [False, True, None],
-        ),
-        (
-            operator.gt,
-            ["100.00", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            100,
-            [False, False, True, None],
-            [False, True, False, None],
-        ),
-        (
-            operator.gt,
-            ["100.123", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            decimal.Decimal("100.123"),
-            [False, False, True, None],
-            [False, True, False, None],
-        ),
-        (
-            operator.ge,
-            ["100.00", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            100,
-            [True, False, True, None],
-            [True, True, False, None],
-        ),
-        (
-            operator.ge,
-            ["100.123", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            decimal.Decimal("100.123"),
-            [True, False, True, None],
-            [True, True, False, None],
-        ),
-        (
-            operator.lt,
-            ["100.00", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            100,
-            [False, True, False, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.lt,
-            ["100.123", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            decimal.Decimal("100.123"),
-            [False, True, False, None],
-            [False, False, True, None],
-        ),
-        (
-            operator.le,
-            ["100.00", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=2, precision=5),
-            100,
-            [True, True, False, None],
-            [True, False, True, None],
-        ),
-        (
-            operator.le,
-            ["100.123", "41", "120.21", None],
-            cudf.Decimal64Dtype(scale=3, precision=6),
-            decimal.Decimal("100.123"),
-            [True, True, False, None],
-            [True, False, True, None],
-        ),
-    ],
-)
-@pytest.mark.parametrize("reflected", [True, False])
-def test_binops_decimal_scalar_compare(
-    op, ldata, ldtype, rdata, expected1, expected2, reflected
-):
-    """
-    Tested compare operations:
-        eq, lt, gt, le, ge
-    Each operation has 3 data setups: pyints, Decimal, and
-    For each data setup, there is at least one row that lead to one of the
-    following compare results: {True, False, None}.
-    """
-    if not reflected:
-        expected = cudf.Series(expected1, dtype=bool)
-    else:
-        expected = cudf.Series(expected2, dtype=bool)
-
-    lhs = utils._decimal_series(ldata, ldtype)
-    rhs = rdata
-
-    if reflected:
-        rhs, lhs = lhs, rhs
-
-    actual = op(lhs, rhs)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("null_scalar", [None, cudf.NA, np.datetime64("NaT")])
-def test_column_null_scalar_comparison(
-    request, all_supported_types_as_str, null_scalar, comparison_op
-):
-    # This test is meant to validate that comparing
-    # a series of any dtype with a null scalar produces
-    # a new series where all the elements are <NA>.
-    request.applymarker(
-        pytest.mark.xfail(
-            all_supported_types_as_str == "category",
-            raises=ValueError,
-            reason="Value ... not found in column",
-        )
-    )
-    dtype = cudf.dtype(all_supported_types_as_str)
-
-    if isinstance(null_scalar, np.datetime64):
-        if dtype.kind not in "mM":
-            pytest.skip(f"{null_scalar} not applicable for {dtype}")
-        null_scalar = null_scalar.astype(dtype)
-
-    data = [1, 2, 3, 4, 5]
-    sr = cudf.Series(data, dtype=dtype)
-    result = comparison_op(sr, null_scalar)
-
-    assert result.isnull().all()
-
-
-def test_equality_ops_index_mismatch(comparison_op_method):
-    a = cudf.Series(
-        [1, 2, 3, None, None, 4], index=["a", "b", "c", "d", "e", "f"]
-    )
-    b = cudf.Series(
-        [-5, 4, 3, 2, 1, 0, 19, 11],
-        index=["aa", "b", "c", "d", "e", "f", "y", "z"],
-    )
-
-    pa = a.to_pandas(nullable=True)
-    pb = b.to_pandas(nullable=True)
-    expected = getattr(pa, comparison_op_method)(pb)
-    actual = getattr(a, comparison_op_method)(b).to_pandas(nullable=True)
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    sorted(
-        itertools.chain(
-            NUMERIC_TYPES,
-            DATETIME_TYPES,
-            TIMEDELTA_TYPES,
-            STRING_TYPES,
-            ["category"],
-        )
-    ),
-)
-@pytest.mark.parametrize("null_case", ["neither", "left", "right", "both"])
-def test_null_equals_columnops(dtype, null_case):
-    # Generate tuples of:
-    # (left_data, right_data, compare_bool
-    # where compare_bool is the correct answer to
-    # if the columns should compare as null equals
-
-    def set_null_cases(column_l, column_r, case):
-        if case == "neither":
-            return column_l, column_r
-        elif case == "left":
-            column_l[1] = None
-        elif case == "right":
-            column_r[1] = None
-        elif case == "both":
-            column_l[1] = None
-            column_r[1] = None
-        else:
-            raise ValueError("Unknown null case")
-        return column_l, column_r
-
-    data = [1, 2, 3]
-
-    left = cudf.Series(data, dtype=dtype)
-    right = cudf.Series(data, dtype=dtype)
-    if null_case in {"left", "right"}:
-        answer = False
-    else:
-        answer = True
-    left, right = set_null_cases(left, right, null_case)
-    assert left._column.equals(right._column) is answer
-
-
-def test_add_series_to_dataframe():
-    """Verify that missing columns result in NaNs, not NULLs."""
-    assert cp.all(
-        cp.isnan(
-            (
-                cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-                + cudf.Series([1, 2, 3], index=["a", "b", "c"])
-            )["c"]
-        )
-    )
-
-
-@pytest.mark.parametrize("obj_class", [cudf.Series, cudf.Index])
-def test_binops_cupy_array(obj_class, arithmetic_op):
-    # Skip 0 to not deal with NaNs from division.
-    data = range(1, 100)
-    lhs = obj_class(data)
-    rhs = cp.array(data)
-    assert (arithmetic_op(lhs, rhs) == arithmetic_op(lhs, lhs)).all()
-
-
-@pytest.mark.parametrize("data", [None, [-9, 7], [12, 18]])
-@pytest.mark.parametrize("scalar", [1, 3, 12, np.nan])
-def test_empty_column(binary_op, data, scalar):
-    gdf = cudf.DataFrame(columns=["a", "b"])
-    if data is not None:
-        gdf["a"] = data
-
-    pdf = gdf.to_pandas()
-
-    got = binary_op(gdf, scalar)
-    expected = binary_op(pdf, scalar)
-
-    assert_eq(expected, got)
-
-
-@pytest.mark.parametrize(
-    "df",
-    [
-        lambda: cudf.DataFrame(
-            [[1, 2, 3, 4], [5, 6, 7, 8], [10, 11, 12, 13], [14, 15, 16, 17]]
-        ),
-        pytest.param(
-            lambda: cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]),
-            marks=pytest_xfail(
-                reason="Cannot access Frame.values if frame contains nulls"
-            ),
-        ),
-        lambda: cudf.DataFrame(
-            [
-                [1.2, 2.3, 3.4, 4.5],
-                [5.6, 6.7, 7.8, 8.9],
-                [7.43, 4.2, 23.2, 23.2],
-                [9.1, 2.4, 4.5, 65.34],
-            ]
-        ),
-        lambda: cudf.Series([14, 15, 16, 17]),
-        lambda: cudf.Series([14.15, 15.16, 16.17, 17.18]),
-    ],
-)
-@pytest.mark.parametrize(
-    "other",
-    [
-        lambda: cudf.DataFrame([[9, 10], [11, 12], [13, 14], [15, 16]]),
-        lambda: cudf.DataFrame(
-            [[9.4, 10.5], [11.6, 12.7], [13.8, 14.9], [15.1, 16.2]]
-        ),
-        lambda: cudf.Series([5, 6, 7, 8]),
-        lambda: cudf.Series([5.6, 6.7, 7.8, 8.9]),
-        lambda: np.array([5, 6, 7, 8]),
-        lambda: [25.5, 26.6, 27.7, 28.8],
-    ],
-)
-def test_binops_dot(df, other):
-    df = df()
-    other = other()
-    pdf = df.to_pandas()
-    host_other = other.to_pandas() if hasattr(other, "to_pandas") else other
-
-    expected = pdf @ host_other
-    got = df @ other
-
-    assert_eq(expected, got)
-
-
-def test_binop_dot_preserve_index():
-    ser = cudf.Series(range(2), index=["A", "B"])
-    df = cudf.DataFrame(np.eye(2), columns=["A", "B"], index=["A", "B"])
-    result = ser @ df
-    expected = ser.to_pandas() @ df.to_pandas()
-    assert_eq(result, expected)
-
-
-def test_binop_series_with_repeated_index():
-    # GH: #11094
-    psr1 = pd.Series([1, 1], index=["a", "a"])
-    psr2 = pd.Series([1], index=["a"])
-    gsr1 = cudf.from_pandas(psr1)
-    gsr2 = cudf.from_pandas(psr2)
-    expected = psr1 - psr2
-    got = gsr1 - gsr2
-    assert_eq(expected, got)
-
-
-def test_binop_integer_power_series_series():
-    # GH: #10178
-    gs_base = cudf.Series([3, -3, 8, -8])
-    gs_exponent = cudf.Series([1, 1, 7, 7])
-    ps_base = gs_base.to_pandas()
-    ps_exponent = gs_exponent.to_pandas()
-    expected = ps_base**ps_exponent
-    got = gs_base**gs_exponent
-    assert_eq(expected, got)
-
-
-def test_binop_integer_power_series_int():
-    # GH: #10178
-    gs_base = cudf.Series([3, -3, 8, -8])
-    exponent = 1
-    ps_base = gs_base.to_pandas()
-    expected = ps_base**exponent
-    got = gs_base**exponent
-    assert_eq(expected, got)
-
-
-def test_binop_integer_power_int_series():
-    # GH: #10178
-    base = 3
-    gs_exponent = cudf.Series([1, 1, 7, 7])
-    ps_exponent = gs_exponent.to_pandas()
-    expected = base**ps_exponent
-    got = base**gs_exponent
-    assert_eq(expected, got)
-
-
-def test_binop_index_series(arithmetic_op):
-    gi = cudf.Index([10, 11, 12])
-    gs = cudf.Series([1, 2, 3])
-
-    actual = arithmetic_op(gi, gs)
-    expected = arithmetic_op(gi.to_pandas(), gs.to_pandas())
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES)
-@pytest.mark.parametrize("name2", utils.SERIES_OR_INDEX_NAMES)
-def test_binop_index_dt_td_series_with_names(name1, name2):
-    gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]", name=name1)
-    gs = cudf.Series([10, 11, 12], dtype="timedelta64[ns]", name=name2)
-    with warnings.catch_warnings():
-        # Numpy raises a deprecation warning:
-        # "elementwise comparison failed; this will raise an error "
-        warnings.simplefilter("ignore", (DeprecationWarning,))
-
-        expected = gi.to_pandas() + gs.to_pandas()
-    actual = gi + gs
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("data1", [[1, 2, 3], [10, 11, None]])
-@pytest.mark.parametrize("data2", [[1, 2, 3], [10, 11, None]])
-def test_binop_eq_ne_index_series(data1, data2):
-    gi = cudf.Index(data1, dtype="datetime64[ns]", name=np.nan)
-    gs = cudf.Series(data2, dtype="timedelta64[ns]", name="abc")
-
-    actual = gi == gs
-    expected = gi.to_pandas() == gs.to_pandas()
-
-    assert_eq(expected, actual)
-
-    actual = gi != gs
-    expected = gi.to_pandas() != gs.to_pandas()
-
-    assert_eq(expected, actual)
-
-
-@pytest.mark.parametrize("scalar", [np.datetime64, np.timedelta64])
-def test_binop_lhs_numpy_datetimelike_scalar(scalar):
-    slr1 = scalar(1, "ms")
-    slr2 = scalar(1, "ns")
-    result = slr1 < cudf.Series([slr2])
-    expected = slr1 < pd.Series([slr2])
-    assert_eq(result, expected)
-
-    result = slr2 < cudf.Series([slr1])
-    expected = slr2 < pd.Series([slr1])
-    assert_eq(result, expected)
-
-
-@pytest.mark.parametrize("ordered", [True, False])
-@pytest.mark.parametrize(
-    "data_left, data_right",
-    [
-        [[1, 2], [1, 2]],
-        [[1, 2], [1, 3]],
-    ],
-)
-def test_cat_non_cat_compare_ops(
-    comparison_op, data_left, data_right, ordered
-):
-    pd_non_cat = pd.Series(data_left)
-    pd_cat = pd.Series(
-        data_right,
-        dtype=pd.CategoricalDtype(categories=data_right, ordered=ordered),
-    )
-
-    cudf_non_cat = cudf.Series.from_pandas(pd_non_cat)
-    cudf_cat = cudf.Series.from_pandas(pd_cat)
-
-    if (
-        not ordered and comparison_op not in {operator.eq, operator.ne}
-    ) or comparison_op in {
-        operator.gt,
-        operator.lt,
-        operator.le,
-        operator.ge,
-    }:
-        with pytest.raises(TypeError):
-            comparison_op(pd_non_cat, pd_cat)
-        with pytest.raises(TypeError):
-            comparison_op(cudf_non_cat, cudf_cat)
-    else:
-        expected = comparison_op(pd_non_cat, pd_cat)
-        result = comparison_op(cudf_non_cat, cudf_cat)
-        assert_eq(result, expected)
-
-
-@pytest.mark.parametrize(
-    "left_data, right_data",
-    [[["a", "b"], [1, 2]], [[[1, 2, 3], [4, 5]], [{"a": 1}, {"a": 2}]]],
-)
-@pytest.mark.parametrize(
-    "op, expected_data",
-    [[operator.eq, [False, False]], [operator.ne, [True, True]]],
-)
-@pytest.mark.parametrize("with_na", [True, False])
-def test_eq_ne_non_comparable_types(
-    left_data, right_data, op, expected_data, with_na
-):
-    if with_na:
-        left_data[0] = None
-    left = cudf.Series(left_data)
-    right = cudf.Series(right_data)
-    result = op(left, right)
-    if with_na:
-        expected_data[0] = None
-    expected = cudf.Series(expected_data)
-    assert_eq(result, expected)
-
-
-def test_binops_compare_stdlib_date_scalar(comparison_op):
-    dt = datetime.date(2020, 1, 1)
-    data = [dt]
-    result = comparison_op(cudf.Series(data), dt)
-    expected = comparison_op(pd.Series(data), dt)
-    assert_eq(result, expected)

From 1a5f96d9e4fd3319062a8ff19653e4c07bc45102 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 15 Sep 2025 10:58:59 -0400
Subject: [PATCH 299/366] Skip failing polars iceberg test (#19955)

A new version of pyiceberg, caused this test to not emit a warning which polars expected. Lets just skip it for now and then unskip it in https://github.com/rapidsai/cudf/pull/19912.

Alsop unblocks CI https://github.com/rapidsai/cudf/actions/runs/17723476953

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19955
---
 python/cudf_polars/cudf_polars/testing/plugin.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index f55f97de334..de0c902d294 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -207,6 +207,8 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/series/test_describe.py::test_series_describe_float": "https://github.com/rapidsai/cudf/issues/19324",
     "tests/unit/series/test_describe.py::test_series_describe_int": "https://github.com/rapidsai/cudf/issues/19324",
     "tests/unit/streaming/test_streaming.py::test_streaming_apply": "https://github.com/pola-rs/polars/issues/22558",
+    # New iceberg release causes this test to fail. We can remove in the next polars version bump: https://github.com/rapidsai/cudf/pull/19912
+    "tests/unit/io/test_iceberg.py::test_fill_missing_fields_with_identity_partition_values[False]": "https://github.com/pola-rs/polars/pull/24456",
 }
 
 
From 8b95d182df29cb455b60dfd7dd53dee1c7020d9d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 15 Sep 2025 08:26:05 -0700
Subject: [PATCH 300/366] Make stream a required parameter for from_libcudf
 methods (#19945)

This change ensures that we never create an rmm buffer that is not tied to the stream provided via an API.

Contributes to #15163

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19945
---
 python/cudf/cudf/_lib/strings_udf.pyx           | 5 +++--
 python/pylibcudf/pylibcudf/column.pxd           | 5 ++++-
 python/pylibcudf/pylibcudf/column.pyx           | 5 ++++-
 python/pylibcudf/pylibcudf/column_factories.pxd | 3 ++-
 python/pylibcudf/pylibcudf/column_factories.pyi | 5 ++++-
 python/pylibcudf/pylibcudf/column_factories.pyx | 5 +++--
 6 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx
index 431930664de..5d027833cb1 100644
--- a/python/cudf/cudf/_lib/strings_udf.pyx
+++ b/python/cudf/cudf/_lib/strings_udf.pyx
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.strings_udf cimport (
 )
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.stream import DEFAULT_STREAM
 
 import numpy as np
 
@@ -26,7 +27,7 @@ def column_to_string_view_array(plc_Column strings_col):
     with nogil:
         c_buffer = move(cpp_to_string_view_array(input_view))
 
-    return DeviceBuffer.c_from_unique_ptr(move(c_buffer))
+    return DeviceBuffer.c_from_unique_ptr(move(c_buffer), DEFAULT_STREAM)
 
 
 def column_from_managed_udf_string_array(DeviceBuffer d_buffer):
@@ -37,7 +38,7 @@ def column_from_managed_udf_string_array(DeviceBuffer d_buffer):
     with nogil:
         c_result = move(cpp_column_from_managed_udf_string_array(data, size))
 
-    return plc_Column.from_libcudf(move(c_result))
+    return plc_Column.from_libcudf(move(c_result), DEFAULT_STREAM)
 
 
 def get_character_flags_table_ptr():
diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd
index f04d5e179ee..6144b445909 100644
--- a/python/pylibcudf/pylibcudf/column.pxd
+++ b/python/pylibcudf/pylibcudf/column.pxd
@@ -56,7 +56,10 @@ cdef class Column:
     cdef mutable_column_view mutable_view(self) nogil
 
     @staticmethod
-    cdef Column from_libcudf(unique_ptr[column] libcudf_col, Stream stream=*)
+    cdef Column from_libcudf(
+        unique_ptr[column] libcudf_col,
+        Stream stream,
+    )
 
     @staticmethod
     cdef Column from_column_view(const column_view& cv, Column owner)
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 40fd1500c9d..22e218d8e4b 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -559,7 +559,10 @@ cdef class Column:
         )
 
     @staticmethod
-    cdef Column from_libcudf(unique_ptr[column] libcudf_col, Stream stream=None):
+    cdef Column from_libcudf(
+        unique_ptr[column] libcudf_col,
+        Stream stream,
+    ):
         """Create a Column from a libcudf column.
 
         This method is for pylibcudf's functions to use to ingest outputs of
diff --git a/python/pylibcudf/pylibcudf/column_factories.pxd b/python/pylibcudf/pylibcudf/column_factories.pxd
index 00a576dea45..2563b9e9d2e 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/column_factories.pxd
@@ -16,7 +16,8 @@ ctypedef fused MaskArg:
 
 
 cpdef Column make_empty_column(
-    MakeEmptyColumnOperand type_or_id
+    MakeEmptyColumnOperand type_or_id,
+    Stream stream=*,
 )
 
 cpdef Column make_numeric_column(
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi
index 58556d580df..ea957886a43 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyi
+++ b/python/pylibcudf/pylibcudf/column_factories.pyi
@@ -4,7 +4,10 @@ from rmm.pylibrmm.stream import Stream
 from pylibcudf.column import Column
 from pylibcudf.types import DataType, MaskState, TypeId
 
-def make_empty_column(type_or_id: DataType | TypeId) -> Column: ...
+def make_empty_column(
+    type_or_id: DataType | TypeId,
+    stream: Stream | None = None,
+) -> Column: ...
 def make_numeric_column(
     type_: DataType, size: int, mstate: MaskState, stream: Stream | None = None
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx
index cec01077425..67e64d20571 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyx
+++ b/python/pylibcudf/pylibcudf/column_factories.pyx
@@ -28,7 +28,7 @@ __all__ = [
     "make_timestamp_column",
 ]
 
-cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id):
+cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id, Stream stream=None):
     """Creates an empty column of the specified type.
 
     For details, see :cpp:func::`make_empty_column`.
@@ -45,6 +45,7 @@ cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id):
     """
     cdef unique_ptr[column] result
     cdef type_id id
+    stream = _get_stream(stream)
 
     if MakeEmptyColumnOperand is object:
         if isinstance(type_or_id, TypeId):
@@ -65,7 +66,7 @@ cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id):
         raise TypeError(
             "Must pass a TypeId or DataType"
         )
-    return Column.from_libcudf(move(result))
+    return Column.from_libcudf(move(result), stream)
 
 
 cpdef Column make_numeric_column(

From 8741b072fb1574c17d5a0056d700cf355631d615 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 15 Sep 2025 11:02:54 -0700
Subject: [PATCH 301/366] Revert "ci(labeler): update labeler action to @v5"
 (#19961)

Reverts rapidsai/cudf#19581

We don't seem to have automatic labeling working since this change
merged.
---
 .github/labeler.yml           | 42 ++++++++++++++---------------------
 .github/workflows/labeler.yml |  2 +-
 2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 87077c8ea9e..63ef619b64e 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,34 +1,26 @@
 # Documentation for config - https://github.com/actions/labeler#common-examples
 
 Python:
-  - changed-files:
-      any-glob-to-any-file:
-        - 'python/**'
-        - 'notebooks/**'
+  - 'python/**'
+  - 'notebooks/**'
+
 cudf.pandas:
-  - changed-files:
-      any-glob-to-any-file:
-        - 'python/cudf/cudf/pandas/**'
-        - 'python/cudf/cudf_pandas_tests/**'
+  - 'python/cudf/cudf/pandas/**'
+  - 'python/cudf/cudf_pandas_tests/**'
+
 cudf-polars:
-  - changed-files:
-      any-glob-to-any-file:
-        - 'python/cudf_polars/**'
+  - 'python/cudf_polars/**'
+
 pylibcudf:
-  - changed-files:
-      any-glob-to-any-file:
-        - 'python/pylibcudf/**'
+  - 'python/pylibcudf/**'
+
 libcudf:
-  - changed-files:
-      any-glob-to-any-file:
-        - 'cpp/**'
+  - 'cpp/**'
+
 CMake:
-  - changed-files:
-      any-glob-to-any-file:
-        - '**/CMakeLists.txt'
-        - '**/cmake/**'
-        - '**/*.cmake'
+  - '**/CMakeLists.txt'
+  - '**/cmake/**'
+  - '**/*.cmake'
+
 Java:
-  - changed-files:
-      any-glob-to-any-file:
-        - 'java/**'
+  - 'java/**'
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index fa134653d49..acfefc5e4af 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -13,6 +13,6 @@ jobs:
         persist-credentials: false
         sparse-checkout: .github/labeler.yml
         sparse-checkout-cone-mode: false
-    - uses: actions/labeler@v5
+    - uses: actions/labeler@v4
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"

From b3813850f0053bf2910d625f210d15fdea09acb1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 15 Sep 2025 11:41:34 -0700
Subject: [PATCH 302/366] Change target type so we can test on workflows
 (#19963)

## Description
<!-- Provide a standalone description of changes in this PR. -->
<!-- Reference any issues closed by this PR with "closes #1234". -->
<!-- Note: The pull request title will be included in the CHANGELOG. -->
This change will allow us to test the v5 labeler on a PR without needing
to blindly merge changes.

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 .github/workflows/labeler.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index acfefc5e4af..46cce630444 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -1,7 +1,7 @@
 name: "Pull Request Labeler"
 
 on:
-- pull_request_target
+- pull_request
 
 jobs:
   triage:

From 6f086ff51b8c4433c607d7a188ea601c3b6d0f01 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 15 Sep 2025 12:40:38 -0700
Subject: [PATCH 303/366] Update to actions/labeler v5 (#19962)

## Description
<!-- Provide a standalone description of changes in this PR. -->
<!-- Reference any issues closed by this PR with "closes #1234". -->
<!-- Note: The pull request title will be included in the CHANGELOG. -->
Trying https://github.com/rapidsai/cudf/pull/19581 again, this time
testing on the branch to make sure labels are still correctly applied.

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 .github/labeler.yml           | 42 +++++++++++++++++++++--------------
 .github/workflows/labeler.yml |  7 ++++--
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 63ef619b64e..a59b05cab34 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,26 +1,34 @@
 # Documentation for config - https://github.com/actions/labeler#common-examples
 
 Python:
-  - 'python/**'
-  - 'notebooks/**'
-
+- changed-files:
+  - any-glob-to-any-file:
+    - 'python/**'
+    - 'notebooks/**'
 cudf.pandas:
-  - 'python/cudf/cudf/pandas/**'
-  - 'python/cudf/cudf_pandas_tests/**'
-
+- changed-files:
+  - any-glob-to-any-file:
+    - 'python/cudf/cudf/pandas/**'
+    - 'python/cudf/cudf_pandas_tests/**'
 cudf-polars:
-  - 'python/cudf_polars/**'
-
+- changed-files:
+  - any-glob-to-any-file:
+    - 'python/cudf_polars/**'
 pylibcudf:
-  - 'python/pylibcudf/**'
-
+- changed-files:
+  - any-glob-to-any-file:
+    - 'python/pylibcudf/**'
 libcudf:
-  - 'cpp/**'
-
+- changed-files:
+  - any-glob-to-any-file:
+    - 'cpp/**'
 CMake:
-  - '**/CMakeLists.txt'
-  - '**/cmake/**'
-  - '**/*.cmake'
-
+- changed-files:
+  - any-glob-to-any-file:
+    - '**/CMakeLists.txt'
+    - '**/cmake/**'
+    - '**/*.cmake'
 Java:
-  - 'java/**'
+- changed-files:
+  - any-glob-to-any-file:
+    - 'java/**'
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 46cce630444..1ce0448d231 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -1,10 +1,13 @@
 name: "Pull Request Labeler"
 
 on:
-- pull_request
+- pull_request_target
 
 jobs:
   triage:
+    permissions:
+      contents: read
+      pull-requests: write
     runs-on: ubuntu-latest
     steps:
     - name: Checkout code
@@ -13,6 +16,6 @@ jobs:
         persist-credentials: false
         sparse-checkout: .github/labeler.yml
         sparse-checkout-cone-mode: false
-    - uses: actions/labeler@v4
+    - uses: actions/labeler@v5
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"

From 7d7cc8658f4120f4e6d982a3826402bf65313ca4 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 15 Sep 2025 15:25:20 -0500
Subject: [PATCH 304/366] Allow newer CMake in Java tests (#19949)

Previously, Java tests added an upper bound on CMake < 4.0.0. https://github.com/rapidsai/cudf/pull/18392

It seems like this should be resolved with the changes made upstream to Thrift: https://github.com/apache/thrift/pull/3126

Closes https://github.com/rapidsai/cudf/issues/18391.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19949
---
 dependencies.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 6ddc9fbe0a3..2ea1eb57fa8 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -765,7 +765,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - cmake>=3.30.4,<4.0.0
+          - *cmake_ver
           - ninja
           - maven
           - openjdk=8.*

From 58ca8467600dfc629f159aa4e02eff279aaef373 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 15 Sep 2025 14:15:50 -0700
Subject: [PATCH 305/366] Add missing type stub (#19958)

I noticed this omission while I was going through stream and memory resource changes.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19958
---
 python/pylibcudf/pylibcudf/strings/reverse.pyi | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 python/pylibcudf/pylibcudf/strings/reverse.pyi

diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyi b/python/pylibcudf/pylibcudf/strings/reverse.pyi
new file mode 100644
index 00000000000..4a486730457
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/reverse.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.stream import Stream
+
+from pylibcudf.column import Column
+
+def reverse(input: Column, stream: Stream | None = None) -> Column: ...

From 66289b2cc496393c32558dd1569e1fc7314a3256 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 15 Sep 2025 17:13:40 -0500
Subject: [PATCH 306/366] Add missing `Styler` attributes (#19956)

Fixes: #19809
This PR adds all the missing attributes for `Styler` class.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19956
---
 python/cudf/cudf/pandas/_wrappers/pandas.py   | 30 ++++++++++++-
 .../cudf/pandas/scripts/conftest-patch.py     | 43 -------------------
 2 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 6bad243ed5b..e16799f9976 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -1109,20 +1109,46 @@ def Index__setattr__(self, name, value):
 
 try:
     from pandas.io.formats.style import Styler as pd_Styler  # isort: skip
+    from pandas.io.formats.style import StylerRenderer as pd_StylerRenderer
+
+    StylerRenderer = make_final_proxy_type(
+        "StylerRenderer",
+        _Unusable,
+        pd_StylerRenderer,
+        fast_to_slow=_Unusable(),
+        slow_to_fast=_Unusable(),
+    )
 
     Styler = make_final_proxy_type(
         "Styler",
         _Unusable,
         pd_Styler,
+        bases=(StylerRenderer,),
         fast_to_slow=_Unusable(),
         slow_to_fast=_Unusable(),
         additional_attributes={
             "css": _FastSlowAttribute("css"),
             "ctx": _FastSlowAttribute("ctx"),
-            "index": _FastSlowAttribute("ctx"),
+            "index": _FastSlowAttribute("index"),
             "data": _FastSlowAttribute("data"),
-            "_display_funcs": _FastSlowAttribute("_display_funcs"),
+            "_display_funcs": _FastSlowAttribute(
+                "_display_funcs", private=True
+            ),
             "table_styles": _FastSlowAttribute("table_styles"),
+            "columns": _FastSlowAttribute("columns"),
+            "caption": _FastSlowAttribute("caption"),
+            "_todo": _FastSlowAttribute("_todo", private=True),
+            "ctx_columns": _FastSlowAttribute("ctx_columns"),
+            "ctx_index": _FastSlowAttribute("ctx_index"),
+            "_display_funcs_index": _FastSlowAttribute(
+                "_display_funcs_index", private=True
+            ),
+            "uuid": _FastSlowAttribute("uuid"),
+            "hide_index_": _FastSlowAttribute("hide_index_"),
+            "hide_index_names": _FastSlowAttribute("hide_index_names"),
+            "hide_columns_": _FastSlowAttribute("hide_columns_"),
+            "hide_column_names": _FastSlowAttribute("hide_column_names"),
+            "table_attributes": _FastSlowAttribute("table_attributes"),
         },
     )
 except ImportError:
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 4777212124d..21da12e68e2 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -9833,41 +9833,14 @@ def pytest_unconfigure(config):
     "tests/io/excel/test_writers.py::TestExcelWriterEngineTests::test_register_writer",
     "tests/io/excel/test_writers.py::TestFSPath::test_excelfile_fspath",
     "tests/io/formats/style/test_format.py::test_format_clear[format_index-_display_funcs_columns-kwargs2]",
-    "tests/io/formats/style/test_format.py::test_format_clear[format_index-_display_funcs_index-kwargs1]",
     "tests/io/formats/style/test_html.py::test_from_custom_template_style",
     "tests/io/formats/style/test_html.py::test_from_custom_template_table",
     "tests/io/formats/style/test_html.py::test_html_template_extends_options",
     "tests/io/formats/style/test_html.py::test_replaced_css_class_names",
-    "tests/io/formats/style/test_html.py::test_sticky_basic[True-False-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_basic[True-False-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_basic[True-True-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_basic[True-True-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels0-False-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels0-False-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels0-True-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels0-True-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels1-False-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels1-False-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels1-True-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[levels1-True-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[one-False-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[one-False-True]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[one-True-False]",
-    "tests/io/formats/style/test_html.py::test_sticky_levels[one-True-True]",
-    "tests/io/formats/style/test_style.py::TestStyler::test_apply_axis",
     "tests/io/formats/style/test_style.py::TestStyler::test_caption",
-    "tests/io/formats/style/test_style.py::TestStyler::test_export",
     "tests/io/formats/style/test_style.py::TestStyler::test_non_reducing_multi_slice_on_multiindex[slice_7]",
     "tests/io/formats/style/test_style.py::TestStyler::test_table_styles",
     "tests/io/formats/style/test_style.py::TestStyler::test_uuid",
-    "tests/io/formats/style/test_style.py::test_apply_map_header[columns-apply]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header[columns-map]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header[index-apply]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header[index-map]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header_mi[columns-apply]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header_mi[columns-map]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header_mi[index-apply]",
-    "tests/io/formats/style/test_style.py::test_apply_map_header_mi[index-map]",
     "tests/io/formats/style/test_style.py::test_clear",
     "tests/io/formats/style/test_style.py::test_copy[False-False-False]",
     "tests/io/formats/style/test_style.py::test_copy[False-False-True]",
@@ -9877,22 +9850,6 @@ def pytest_unconfigure(config):
     "tests/io/formats/style/test_style.py::test_copy[True-False-True]",
     "tests/io/formats/style/test_style.py::test_copy[True-True-False]",
     "tests/io/formats/style/test_style.py::test_copy[True-True-True]",
-    "tests/io/formats/style/test_style.py::test_export",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[False-1]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[False-level2]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[False-level3]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[False-one]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[True-1]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[True-level2]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[True-level3]",
-    "tests/io/formats/style/test_style.py::test_hide_columns_level[True-one]",
-    "tests/io/formats/style/test_style.py::test_hide_index_level[1]",
-    "tests/io/formats/style/test_style.py::test_hide_index_level[level2]",
-    "tests/io/formats/style/test_style.py::test_hide_index_level[level3]",
-    "tests/io/formats/style/test_style.py::test_hide_index_level[one]",
-    "tests/io/formats/style/test_to_latex.py::test_css_convert_apply_index[columns]",
-    "tests/io/formats/style/test_to_latex.py::test_parse_latex_table_wrapping",
-    "tests/io/formats/style/test_to_latex.py::test_styler_object_after_render",
     "tests/io/formats/test_eng_formatting.py::TestEngFormatter::test_nan",
     "tests/io/formats/test_format.py::TestDataFrameFormatting::test_info_repr",
     "tests/io/formats/test_format.py::TestDataFrameFormatting::test_info_repr_max_cols",

From 220ff97641b968023801d3aea6e70b1f68ce0bdf Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 15 Sep 2025 17:16:07 -0700
Subject: [PATCH 307/366] Add memory resource arguments to concatenate (#19943)

This PR adds the ability to provide a custom memory resource to concatenate, which is then forwarded all the way down the chain of libcudf calls and rmm buffer creation. This PR is meant to serve as a template for how we can expose memory resources in the pylibcudf API as well as how we can avoid issues like those described in #14229.

Contributes to #15170
Contributes to #14229
Contributes to #15198

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19943
---
 python/pylibcudf/pylibcudf/column.pxd         |  2 ++
 python/pylibcudf/pylibcudf/column.pyx         | 11 ++++++----
 python/pylibcudf/pylibcudf/concatenate.pxd    |  3 ++-
 python/pylibcudf/pylibcudf/concatenate.pyi    |  2 ++
 python/pylibcudf/pylibcudf/concatenate.pyx    | 20 +++++++++++++------
 .../pylibcudf/pylibcudf/contiguous_split.pyi  |  2 +-
 .../pylibcudf/pylibcudf/contiguous_split.pyx  |  7 ++++---
 .../pylibcudf/libcudf/concatenate.pxd         |  7 +++++--
 python/pylibcudf/pylibcudf/table.pxd          |  7 ++++++-
 python/pylibcudf/pylibcudf/table.pyx          | 12 ++++++++---
 python/pylibcudf/pylibcudf/utils.pxd          |  2 ++
 python/pylibcudf/pylibcudf/utils.pyx          | 10 ++++++++++
 12 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd
index 6144b445909..8ce24367e23 100644
--- a/python/pylibcudf/pylibcudf/column.pxd
+++ b/python/pylibcudf/pylibcudf/column.pxd
@@ -6,6 +6,7 @@ from libc.stdint cimport uint64_t
 
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport (
     column_view,
@@ -59,6 +60,7 @@ cdef class Column:
     cdef Column from_libcudf(
         unique_ptr[column] libcudf_col,
         Stream stream,
+        DeviceMemoryResource mr=*
     )
 
     @staticmethod
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 22e218d8e4b..a788b42c394 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -36,6 +36,7 @@ from pylibcudf.libcudf.copying cimport get_element
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .gpumemoryview cimport gpumemoryview
 from .filling cimport sequence
@@ -52,7 +53,7 @@ from ._interop_helpers cimport (
     _release_device_array,
     _metadata_to_libcudf,
 )
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 from .gpumemoryview import _datatype_from_dtype_desc
 from ._interop_helpers import ArrowLike, ColumnMetadata
@@ -562,6 +563,7 @@ cdef class Column:
     cdef Column from_libcudf(
         unique_ptr[column] libcudf_col,
         Stream stream,
+        DeviceMemoryResource mr=None
     ):
         """Create a Column from a libcudf column.
 
@@ -577,23 +579,24 @@ cdef class Column:
         cdef column_contents contents = libcudf_col.get().release()
 
         stream = _get_stream(stream)
+        mr = _get_memory_resource(mr)
         # Note that when converting to cudf Column objects we'll need to pull
         # out the base object.
         cdef gpumemoryview data = gpumemoryview(
-            DeviceBuffer.c_from_unique_ptr(move(contents.data), stream)
+            DeviceBuffer.c_from_unique_ptr(move(contents.data), stream, mr)
         )
 
         cdef gpumemoryview mask = None
         if null_count > 0:
             mask = gpumemoryview(
-                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), stream)
+                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), stream, mr)
             )
 
         children = []
         if contents.children.size() != 0:
             for i in range(contents.children.size()):
                 children.append(
-                    Column.from_libcudf(move(contents.children[i]), stream)
+                    Column.from_libcudf(move(contents.children[i]), stream, mr)
                 )
 
         return Column(
diff --git a/python/pylibcudf/pylibcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/concatenate.pxd
index 629c88161ae..b9a649fd58a 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/concatenate.pxd
@@ -3,10 +3,11 @@
 from .table cimport Table
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 # There is no way to define a fused type that is a list of other objects, so we cannot
 # unify the column and table paths without using runtime dispatch instead. In this case
 # we choose to prioritize API consistency over performance, so we use the same function
 # with a bit of runtime dispatch overhead.
-cpdef concatenate(list objects, Stream stream=*)
+cpdef concatenate(list objects, Stream stream=*, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi
index 020ba1eb997..4c569db1567 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyi
+++ b/python/pylibcudf/pylibcudf/concatenate.pyi
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
@@ -8,4 +9,5 @@ from pylibcudf.table import Table
 def concatenate[ColumnOrTable: (Column, Table)](
     objects: list[ColumnOrTable],
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> ColumnOrTable: ...
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx
index 2937732e2cd..e98f96e2b64 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyx
+++ b/python/pylibcudf/pylibcudf/concatenate.pyx
@@ -10,14 +10,15 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .table cimport Table
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 __all__ = ["concatenate"]
 
-cpdef concatenate(list objects, Stream stream=None):
+cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None):
     """Concatenate columns or tables.
 
     Parameters
@@ -26,6 +27,8 @@ cpdef concatenate(list objects, Stream stream=None):
         The list of Columns or Tables to concatenate.
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned object's device memory.
 
     Returns
     -------
@@ -38,6 +41,7 @@ cpdef concatenate(list objects, Stream stream=None):
     cdef vector[column_view] c_columns
     cdef vector[table_view] c_tables
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     cdef unique_ptr[column] c_col_result
     cdef unique_ptr[table] c_tbl_result
@@ -47,14 +51,18 @@ cpdef concatenate(list objects, Stream stream=None):
             c_tables.push_back((<Table?>tbl).view())
 
         with nogil:
-            c_tbl_result = cpp_concatenate.concatenate(c_tables, stream.view())
-        return Table.from_libcudf(move(c_tbl_result), stream)
+            c_tbl_result = cpp_concatenate.concatenate(
+                c_tables, stream.view(), mr.get_mr()
+            )
+        return Table.from_libcudf(move(c_tbl_result), stream, mr)
     elif isinstance(objects[0], Column):
         for column in objects:
             c_columns.push_back((<Column?>column).view())
 
         with nogil:
-            c_col_result = cpp_concatenate.concatenate(c_columns, stream.view())
-        return Column.from_libcudf(move(c_col_result), stream)
+            c_col_result = cpp_concatenate.concatenate(
+                c_columns, stream.view(), mr.get_mr()
+            )
+        return Column.from_libcudf(move(c_col_result), stream, mr)
     else:
         raise ValueError("input must be a list of Columns or Tables")
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi
index 6bbd35c9dd7..d32524e9bed 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyi
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi
@@ -26,7 +26,7 @@ class ChunkedPack:
         input: Table,
         user_buffer_size: int,
         stream: Stream,
-        temp_mr: DeviceMemoryResource,
+        temp_mr: DeviceMemoryResource | None = None,
     ) -> ChunkedPack: ...
     def has_next(self) -> bool: ...
     def get_total_contiguous_size(self) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx
index 17152fb78b2..c01eb5428aa 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyx
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx
@@ -32,7 +32,7 @@ from rmm.pylibrmm.stream cimport Stream
 
 from .gpumemoryview cimport gpumemoryview
 from .table cimport Table
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 
 __all__ = [
@@ -155,7 +155,7 @@ cdef class ChunkedPack:
         Table input,
         size_t user_buffer_size,
         Stream stream,
-        DeviceMemoryResource temp_mr,
+        DeviceMemoryResource temp_mr=None,
     ):
         """
         Create a chunked packer.
@@ -168,13 +168,14 @@ cdef class ChunkedPack:
             Size of the staging buffer to pack into, must be at least 1MB.
         stream : Stream | None
             Stream used for device memory operations and kernel launches.
-        temp_mr
+        temp_mr : DeviceMemoryResource | None
             Memory resource for scratch allocations.
 
         Returns
         -------
         New ChunkedPack object.
         """
+        temp_mr = _get_memory_resource(temp_mr)
         cdef unique_ptr[chunked_pack] obj = chunked_pack.create(
             input.view(), user_buffer_size, stream.view(), temp_mr.get_mr()
         )
diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
index 3711105401d..f0944766397 100644
--- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
@@ -8,6 +8,7 @@ from pylibcudf.libcudf.utilities.span cimport host_span
 
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
@@ -22,9 +23,11 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[column] concatenate(
         const vector[column_view] columns,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource *mr
     ) except +libcudf_exception_handler
     cdef unique_ptr[table] concatenate(
         const vector[table_view] tables,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource *mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/table.pxd b/python/pylibcudf/pylibcudf/table.pxd
index 8e0617ea48e..e5219762a9f 100644
--- a/python/pylibcudf/pylibcudf/table.pxd
+++ b/python/pylibcudf/pylibcudf/table.pxd
@@ -4,6 +4,7 @@ from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 cdef class Table:
     # List[pylibcudf.Column]
@@ -16,7 +17,11 @@ cdef class Table:
     cpdef tuple shape(self)
 
     @staticmethod
-    cdef Table from_libcudf(unique_ptr[table] libcudf_tbl, Stream stream=*)
+    cdef Table from_libcudf(
+        unique_ptr[table] libcudf_tbl,
+        Stream stream=*,
+        DeviceMemoryResource mr=*
+    )
 
     @staticmethod
     cdef Table from_table_view(const table_view& tv, Table owner)
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index bb6888fe77a..afe8bc6ab4d 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -12,6 +12,7 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.interop cimport (
@@ -29,7 +30,7 @@ from pylibcudf.libcudf.table.table cimport table
 
 from .column cimport Column
 from .types cimport DataType
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 from pylibcudf._interop_helpers cimport (
     _release_schema,
     _release_array,
@@ -159,7 +160,11 @@ cdef class Table:
         return table_view(c_columns)
 
     @staticmethod
-    cdef Table from_libcudf(unique_ptr[table] libcudf_tbl, Stream stream=None):
+    cdef Table from_libcudf(
+        unique_ptr[table] libcudf_tbl,
+        Stream stream=None,
+        DeviceMemoryResource mr=None
+    ):
         """Create a Table from a libcudf table.
 
         This method is for pylibcudf's functions to use to ingest outputs of
@@ -170,8 +175,9 @@ cdef class Table:
 
         cdef vector[unique_ptr[column]].size_type i
         stream = _get_stream(stream)
+        mr = _get_memory_resource(mr)
         return Table([
-            Column.from_libcudf(move(c_columns[i]), stream)
+            Column.from_libcudf(move(c_columns[i]), stream, mr)
             for i in range(c_columns.size())
         ])
 
diff --git a/python/pylibcudf/pylibcudf/utils.pxd b/python/pylibcudf/pylibcudf/utils.pxd
index 2f8107f2a6a..327f943d079 100644
--- a/python/pylibcudf/pylibcudf/utils.pxd
+++ b/python/pylibcudf/pylibcudf/utils.pxd
@@ -4,6 +4,8 @@ from libcpp.functional cimport reference_wrapper
 from libcpp.vector cimport vector
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 cdef vector[reference_wrapper[const scalar]] _as_vector(list source)
 cdef Stream _get_stream(Stream stream = *)
+cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = *)
diff --git a/python/pylibcudf/pylibcudf/utils.pyx b/python/pylibcudf/pylibcudf/utils.pyx
index 491e9b6bfe9..aed9d089297 100644
--- a/python/pylibcudf/pylibcudf/utils.pyx
+++ b/python/pylibcudf/pylibcudf/utils.pyx
@@ -10,6 +10,10 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 from .scalar cimport Scalar
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport (
+    DeviceMemoryResource,
+    get_current_device_resource,
+)
 
 from rmm.pylibrmm.stream import DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM
 
@@ -66,3 +70,9 @@ cdef Stream _get_stream(Stream stream = None):
     if stream is None:
         return CUDF_DEFAULT_STREAM
     return stream
+
+
+cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = None):
+    if mr is None:
+        return get_current_device_resource()
+    return mr

From 42b0f0c6e95d82d86584948828de1a3135430eff Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 15 Sep 2025 19:52:38 -0500
Subject: [PATCH 308/366] Add `ArrowDtype` proxy class (#19960)

Fixes: #19853
This PR adds `ArrowDtype` proxy class.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19960
---
 python/cudf/cudf/pandas/_wrappers/pandas.py   | 16 ++++++
 .../cudf/pandas/scripts/conftest-patch.py     | 50 -------------------
 2 files changed, 16 insertions(+), 50 deletions(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index e16799f9976..cf459cd69f5 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -429,6 +429,22 @@ def Index__setattr__(self, name, value):
     },
 )
 
+# Special caseing `ArrowDtype` as it is not yet added to `cudf` namespace
+# both fast and slow paths are `pd.ArrowDtype`
+ArrowDtype = make_final_proxy_type(
+    "ArrowDtype",
+    pd.ArrowDtype,
+    pd.ArrowDtype,
+    bases=(pd.api.extensions.ExtensionDtype,),
+    fast_to_slow=lambda fast: fast,
+    slow_to_fast=lambda slow: slow,
+    additional_attributes={
+        "__from_arrow__": _FastSlowAttribute("__from_arrow__"),
+        "__hash__": _FastSlowAttribute("__hash__"),
+        "pyarrow_dtype": _FastSlowAttribute("pyarrow_dtype"),
+    },
+)
+
 SparseArray = make_final_proxy_type(
     "SparseDtype",
     _Unusable,
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 21da12e68e2..f6fa5de127f 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -2754,46 +2754,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_arith_series_with_scalar[uint8-__rmod__]",
     "tests/extension/test_arrow.py::TestArrowArray::test_array_interface[duration[ns]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_array_interface[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[binary]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[bool]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[date32[day]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[date64[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[decimal128(7, 3)]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[double]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[duration[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[duration[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[float]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[int16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[int32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[int64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[int8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[string]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[time32[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[time32[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[time64[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[time64[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ms, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ms, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ms, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ns, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ns, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ns, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[s, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[s, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[s, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[us, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[us, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[us, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[timestamp[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[uint16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[uint32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[uint64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_array_type[uint8]",
     "tests/extension/test_arrow.py::TestArrowArray::test_astype_own_type[binary-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_astype_own_type[bool-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_astype_own_type[date32[day]-False]",
@@ -3030,8 +2990,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8-2-indices2-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[ms]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[ns]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[s]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[s]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[us]]",
@@ -3044,14 +3002,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ms, tz=UTC]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ms]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns, tz=US/Eastern]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns, tz=US/Pacific]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[ns]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[s, tz=US/Eastern]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[s, tz=US/Eastern]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[timestamp[s, tz=US/Pacific]]",

From 71aadc1701c7c5d117b0ec416dd0ac91efb2b412 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 15 Sep 2025 18:08:51 -0700
Subject: [PATCH 309/366] Run pytest-benchmarks in CI with --benchmark-disable
 (#19969)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we want to run these benchmark in CI for correctness (no runtime errors), the `--benchmark-disable` flag:

> Disable benchmarks. Benchmarked functions are only ran once and no stats are reported. Use this is you want to run the test but don’t do any benchmarking.

xref https://pytest-benchmark.readthedocs.io/en/latest/usage.html

Additionally, uses the `NUM_ROWS` variable more consistently to benchmark with smaller data sizes in CI

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19969
---
 ci/test_python_cudf.sh                        |  2 ++
 python/cudf/benchmarks/API/bench_dataframe.py |  6 +++---
 python/cudf/benchmarks/API/bench_functions.py | 12 +++---------
 python/cudf/benchmarks/API/bench_index.py     |  9 +++++----
 python/cudf/benchmarks/API/bench_series.py    |  4 ++--
 5 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index d92654d35d0..6188fc82a54 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -43,6 +43,7 @@ rapids-logger "pytest cudf"
 
 rapids-logger "pytest for cudf benchmarks"
 ./ci/run_cudf_pytest_benchmarks.sh \
+  --benchmark-disable \
   --numprocesses=8 \
   --dist=worksteal \
   --cov-config=.coveragerc \
@@ -52,6 +53,7 @@ rapids-logger "pytest for cudf benchmarks"
 
 rapids-logger "pytest for cudf benchmarks using pandas"
 ./ci/run_cudf_pandas_pytest_benchmarks.sh \
+  --benchmark-disable \
   --numprocesses=8 \
   --dist=worksteal \
   --cov-config=.coveragerc \
diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py
index c39da43cbfd..ad0b38a609f 100644
--- a/python/cudf/benchmarks/API/bench_dataframe.py
+++ b/python/cudf/benchmarks/API/bench_dataframe.py
@@ -165,18 +165,18 @@ def bench_construction_with_framelike(
     )
 
 
-@pytest.mark.parametrize("N", [100, 1_000_000, 100_000_000])
+@pytest.mark.parametrize("N", NUM_ROWS)
 def bench_from_arrow(benchmark, N):
     rng = numpy.random.default_rng(seed=10)
     benchmark(cudf.DataFrame, {None: pa.array(rng.random(N))})
 
 
-@pytest.mark.parametrize("N", [100, 1_000_000])
+@pytest.mark.parametrize("N", NUM_ROWS)
 def bench_construction(benchmark, N):
     benchmark(cudf.DataFrame, {None: cupy.random.rand(N)})
 
 
-@pytest.mark.parametrize("N", [100, 100_000])
+@pytest.mark.parametrize("N", NUM_ROWS)
 @pytest.mark.pandas_incompatible
 def bench_construction_numba_device_array(benchmark, N):
     benchmark(cudf.DataFrame, numba.cuda.to_device(numpy.ones((100, N))))
diff --git a/python/cudf/benchmarks/API/bench_functions.py b/python/cudf/benchmarks/API/bench_functions.py
index f902111b0db..f6ee1c0a337 100644
--- a/python/cudf/benchmarks/API/bench_functions.py
+++ b/python/cudf/benchmarks/API/bench_functions.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 """Benchmarks of free functions that accept cudf objects."""
 
@@ -12,21 +12,15 @@
 @pytest_cases.parametrize_with_cases(
     "objs", prefix="concat", cases="cases_functions"
 )
-@pytest.mark.parametrize(
-    "axis",
-    [
-        1,
-    ],
-)
 @pytest.mark.parametrize("join", ["inner", "outer"])
 @pytest.mark.parametrize("ignore_index", [True, False])
 def bench_concat_axis_1(benchmark, objs, axis, join, ignore_index):
     benchmark(
-        cudf.concat, objs=objs, axis=axis, join=join, ignore_index=ignore_index
+        cudf.concat, objs=objs, axis=1, join=join, ignore_index=ignore_index
     )
 
 
-@pytest.mark.parametrize("size", [10_000, 100_000])
+@pytest.mark.parametrize("size", NUM_ROWS)
 @pytest.mark.parametrize("cardinality", [10, 100, 1000])
 @pytest.mark.parametrize("dtype", [cupy.bool_, cupy.float64])
 def bench_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):
diff --git a/python/cudf/benchmarks/API/bench_index.py b/python/cudf/benchmarks/API/bench_index.py
index a4e7d4c4280..74de1c90869 100644
--- a/python/cudf/benchmarks/API/bench_index.py
+++ b/python/cudf/benchmarks/API/bench_index.py
@@ -3,11 +3,11 @@
 """Benchmarks of Index methods."""
 
 import pytest
-from config import cudf, cupy
+from config import NUM_ROWS, cudf, cupy
 from utils import benchmark_with_object
 
 
-@pytest.mark.parametrize("N", [100, 1_000_000])
+@pytest.mark.parametrize("N", NUM_ROWS)
 def bench_construction(benchmark, N):
     benchmark(cudf.Index, cupy.random.rand(N))
 
@@ -17,6 +17,7 @@ def bench_sort_values(benchmark, index):
     benchmark(index.sort_values)
 
 
-def bench_large_unique_categories_repr(benchmark):
-    pi = cudf.CategoricalIndex(range(100_000_000))
+@pytest.mark.parametrize("N", NUM_ROWS)
+def bench_large_unique_categories_repr(benchmark, N):
+    pi = cudf.CategoricalIndex(range(N))
     benchmark(repr, pi)
diff --git a/python/cudf/benchmarks/API/bench_series.py b/python/cudf/benchmarks/API/bench_series.py
index 5b82c38d0d0..3a3323f3707 100644
--- a/python/cudf/benchmarks/API/bench_series.py
+++ b/python/cudf/benchmarks/API/bench_series.py
@@ -3,11 +3,11 @@
 """Benchmarks of Series methods."""
 
 import pytest
-from config import cudf, cupy
+from config import NUM_ROWS, cudf, cupy
 from utils import benchmark_with_object
 
 
-@pytest.mark.parametrize("N", [100, 1_000_000])
+@pytest.mark.parametrize("N", NUM_ROWS)
 def bench_construction(benchmark, N):
     benchmark(cudf.Series, cupy.random.rand(N))
 

From 6ec0f2a6af8e49075aa7e605d9af9e181f58f508 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Mon, 15 Sep 2025 19:03:45 -0700
Subject: [PATCH 310/366] Enable data page mask computation for nullable `list`
 and `struct` columns (#19617)

Contributes to #17896

This PR enables computing and using a data page mask for `list` and `struct` (both nullable and non-nullable) columns

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Paul Mattione (https://github.com/pmattione-nvidia)
  - Vukasin Milovanovic (https://github.com/vuule)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/19617
---
 cpp/src/io/parquet/decode_fixed.cu            |  11 +-
 .../experimental/hybrid_scan_helpers.cpp      |  14 +-
 .../experimental/hybrid_scan_helpers.hpp      |   6 +-
 .../parquet/experimental/hybrid_scan_impl.cpp |  22 +--
 .../parquet/experimental/page_index_filter.cu |  34 ++--
 cpp/src/io/parquet/page_data.cu               |  24 ++-
 cpp/src/io/parquet/page_decode.cuh            |  10 +-
 cpp/src/io/parquet/page_delta_decode.cu       |  23 +--
 cpp/src/io/parquet/page_string_decode.cu      |  84 +++++++---
 cpp/src/io/parquet/page_string_utils.cuh      |   7 +-
 cpp/src/io/parquet/parquet_gpu.hpp            |  20 ++-
 cpp/src/io/parquet/reader_impl.cpp            |  54 +++---
 .../io/parquet/reader_impl_chunking_utils.cu  |   3 +-
 .../io/experimental/hybrid_scan_test.cpp      | 156 ++++++++++++++++--
 14 files changed, 323 insertions(+), 145 deletions(-)

diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu
index 71c44f915fa..18d6e8eb60b 100644
--- a/cpp/src/io/parquet/decode_fixed.cu
+++ b/cpp/src/io/parquet/decode_fixed.cu
@@ -1050,7 +1050,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
   // Exit super early for simple types if the page does not need to be decoded
   if constexpr (not has_lists_t and not has_strings_t and not has_nesting_t) {
     if (not page_mask[page_idx]) {
-      pp->num_nulls  = pp->num_rows;
+      pp->num_nulls  = pp->nesting[s->col.max_nesting_depth - 1].batch_size;
       pp->num_valids = 0;
       // Set s->nesting info = nullptr to bypass `null_count_back_copier` at return
       s->nesting_info = nullptr;
@@ -1071,15 +1071,18 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
 
   // Write list and/or string offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
-    pp->num_nulls  = pp->num_rows;
-    pp->num_valids = 0;
-    // Update offsets for all list depth levels
+    //  Update offsets for all list depth levels
     if constexpr (has_lists_t) { update_list_offsets_for_pruned_pages<decode_block_size_t>(s); }
     // Update string offsets or write string sizes for small and large strings respectively
     if constexpr (has_strings_t) {
       update_string_offsets_for_pruned_pages<decode_block_size_t, has_lists_t>(
         s, initial_str_offsets, pages[page_idx]);
     }
+    // Must be set after computing above list and string offsets
+    pp->num_nulls = pp->nesting[s->col.max_nesting_depth - 1].batch_size;
+    if constexpr (not has_lists_t) { pp->num_nulls -= s->first_row; }
+    pp->num_valids = 0;
+
     return;
   }
 
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
index d1144419b20..eecd78aeeb5 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.cpp
@@ -109,13 +109,15 @@ void aggregate_reader_metadata::initialize_internals(bool use_arrow_schema,
   num_rows        = calc_num_rows();
   num_row_groups  = calc_num_row_groups();
 
-  // Force all leaf columns to be nullable
+  // Force all non-nullable (REQUIRED) columns to be nullable without modifying REPEATED columns to
+  // preserve list structures
   auto& schema = per_file_metadata.front().schema;
-  std::for_each(schema.begin(), schema.end(), [](auto& col) {
-    // Modifying the repetition type of lists converts them to structs, so we must skip that
-    auto const is_leaf_col =
-      not(col.type == Type::UNDEFINED or col.is_stub() or col.is_list() or col.is_struct());
-    if (is_leaf_col) { col.repetition_type = FieldRepetitionType::OPTIONAL; }
+  std::for_each(schema.begin() + 1, schema.end(), [](auto& col) {
+    // TODO: Store information of whichever column schema we modified here and restore it to
+    // `REQUIRED` if we end up not pruning any pages out of it
+    if (col.repetition_type == FieldRepetitionType::REQUIRED) {
+      col.repetition_type = FieldRepetitionType::OPTIONAL;
+    }
   });
 
   // Collect and apply arrow:schema from Parquet's key value metadata section
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
index 7f6e034b827..a10f7746da3 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
@@ -283,8 +283,7 @@ class aggregate_reader_metadata : public aggregate_reader_metadata_base {
    *
    * @param row_mask Boolean column indicating which rows need to be read after page-pruning
    * @param row_group_indices Input row groups indices
-   * @param output_dtypes Datatypes of output columns
-   * @param output_column_schemas schema indices of output columns
+   * @param input_columns Input column information
    * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A vector of boolean vectors indicating which data pages need to be decoded to produce
@@ -293,8 +292,7 @@ class aggregate_reader_metadata : public aggregate_reader_metadata_base {
   [[nodiscard]] std::vector<std::vector<bool>> compute_data_page_mask(
     cudf::column_view row_mask,
     cudf::host_span<std::vector<size_type> const> row_group_indices,
-    cudf::host_span<cudf::data_type const> output_dtypes,
-    cudf::host_span<cudf::size_type const> output_column_schemas,
+    cudf::host_span<input_column_info const> input_columns,
     rmm::cuda_stream_view stream) const;
 };
 
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index e65b614e796..4f7f0750630 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -447,13 +447,10 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
 
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
-  auto output_dtypes = get_output_types(_output_buffers_template);
-
-  auto data_page_mask =
-    (mask_data_pages == use_data_page_mask::YES)
-      ? _extended_metadata->compute_data_page_mask(
-          row_mask, row_group_indices, output_dtypes, _output_column_schemas, stream)
-      : std::vector<std::vector<bool>>{};
+  auto data_page_mask = (mask_data_pages == use_data_page_mask::YES)
+                          ? _extended_metadata->compute_data_page_mask(
+                              row_mask, row_group_indices, _input_columns, stream)
+                          : std::vector<std::vector<bool>>{};
 
   prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
 
@@ -478,13 +475,10 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
 
   select_columns(read_columns_mode::PAYLOAD_COLUMNS, options);
 
-  auto output_dtypes = get_output_types(_output_buffers_template);
-
-  auto data_page_mask =
-    (mask_data_pages == use_data_page_mask::YES)
-      ? _extended_metadata->compute_data_page_mask(
-          row_mask, row_group_indices, output_dtypes, _output_column_schemas, stream)
-      : std::vector<std::vector<bool>>{};
+  auto data_page_mask = (mask_data_pages == use_data_page_mask::YES)
+                          ? _extended_metadata->compute_data_page_mask(
+                              row_mask, row_group_indices, _input_columns, stream)
+                          : std::vector<std::vector<bool>>{};
 
   prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
 
diff --git a/cpp/src/io/parquet/experimental/page_index_filter.cu b/cpp/src/io/parquet/experimental/page_index_filter.cu
index 91311322c8d..667f997e752 100644
--- a/cpp/src/io/parquet/experimental/page_index_filter.cu
+++ b/cpp/src/io/parquet/experimental/page_index_filter.cu
@@ -193,16 +193,16 @@ namespace {
 }
 
 /**
- * @brief Compute if the page index is present in all parquet data sources for all output columns
+ * @brief Compute if the page index is present in all parquet data sources for all columns
  */
 [[nodiscard]] bool compute_has_page_index(
   cudf::host_span<metadata_base const> file_metadatas,
   cudf::host_span<std::vector<size_type> const> row_group_indices,
-  cudf::host_span<size_type const> output_column_schemas)
+  cudf::host_span<size_type const> column_schema_indices)
 {
   // For all output columns, check all parquet data sources
   return std::all_of(
-    output_column_schemas.begin(), output_column_schemas.end(), [&](auto const schema_idx) {
+    column_schema_indices.begin(), column_schema_indices.end(), [&](auto const schema_idx) {
       // For all parquet data sources
       return std::all_of(
         thrust::counting_iterator<size_t>(0),
@@ -223,6 +223,7 @@ namespace {
         });
     });
 }
+
 /**
  * @brief Construct a vector of all required data pages from the page row counts
  */
@@ -691,20 +692,28 @@ std::unique_ptr<cudf::column> aggregate_reader_metadata::build_row_mask_with_pag
 std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask(
   cudf::column_view row_mask,
   cudf::host_span<std::vector<size_type> const> row_group_indices,
-  cudf::host_span<cudf::data_type const> output_dtypes,
-  cudf::host_span<cudf::size_type const> output_column_schemas,
+  cudf::host_span<input_column_info const> input_columns,
   rmm::cuda_stream_view stream) const
 {
+  CUDF_FUNC_RANGE();
+
   CUDF_EXPECTS(row_mask.type().id() == cudf::type_id::BOOL8,
                "Input row bitmask should be of type BOOL8");
 
   auto const total_rows  = row_mask.size();
-  auto const num_columns = output_dtypes.size();
+  auto const num_columns = input_columns.size();
 
+  // Collect column schema indices from the input columns. Note: We can't use
+  // `output_column_schemas` here in case of lists and structs.
+  auto column_schema_indices = std::vector<size_type>(input_columns.size());
+  std::transform(
+    input_columns.begin(), input_columns.end(), column_schema_indices.begin(), [](auto const& col) {
+      return col.schema_idx;
+    });
   auto const has_page_index =
-    compute_has_page_index(per_file_metadata, row_group_indices, output_column_schemas);
+    compute_has_page_index(per_file_metadata, row_group_indices, column_schema_indices);
 
-  // TODO: Don't use page pruning in case of lists and structs until we support them
+  // Return early if page index is not present
   if (not has_page_index) {
     CUDF_LOG_WARN("Encountered missing Parquet page index for one or more output columns");
     return {};  // An empty data page mask indicates all pages are required
@@ -719,7 +728,7 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
   col_chunk_page_offsets.reserve(num_columns);
 
   if (num_columns == 1) {
-    auto const schema_idx = output_column_schemas[0];
+    auto const schema_idx = column_schema_indices.front();
     auto [counts, offsets, chunk_offsets] =
       make_page_row_counts_and_offsets(per_file_metadata, row_group_indices, schema_idx, stream);
     page_row_counts.emplace_back(std::move(counts));
@@ -738,9 +747,10 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
                   [&](auto const col_idx) {
                     page_row_counts_and_offsets_tasks.emplace_back(
                       cudf::detail::host_worker_pool().submit_task([&, col_idx = col_idx] {
-                        auto const schema_idx = output_column_schemas[col_idx];
-                        return make_page_row_counts_and_offsets(
-                          per_file_metadata, row_group_indices, schema_idx, streams[col_idx]);
+                        return make_page_row_counts_and_offsets(per_file_metadata,
+                                                                row_group_indices,
+                                                                column_schema_indices[col_idx],
+                                                                streams[col_idx]);
                       }));
                   });
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 9f391588c1b..f27ba317325 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -89,11 +89,14 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   // Write list offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
-    auto& page      = pages[page_idx];
-    page.num_nulls  = page.num_rows;
-    page.num_valids = 0;
+    auto& page = pages[page_idx];
     // Update offsets for all list depth levels
     if (has_repetition) { update_list_offsets_for_pruned_pages<decode_block_size>(s); }
+
+    // Must be set after computing above list offsets
+    page.num_nulls = page.nesting[s->col.max_nesting_depth - 1].batch_size;
+    page.num_nulls -= has_repetition ? 0 : s->first_row;
+    page.num_valids = 0;
     return;
   }
 
@@ -285,9 +288,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   // Write list offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
-    auto& page      = pages[page_idx];
-    page.num_nulls  = page.num_rows;
-    page.num_valids = 0;
+    auto& page = pages[page_idx];
 
     // Update offsets for all list depth levels
     if (has_repetition) { update_list_offsets_for_pruned_pages<decode_block_size>(s); }
@@ -301,11 +302,13 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
       s->col.logical_type.has_value() and s->col.logical_type->type == LogicalType::DECIMAL;
     if (dtype == Type::FIXED_LEN_BYTE_ARRAY or (dtype == Type::BYTE_ARRAY and not is_decimal)) {
       // Initial string offset
-      auto const initial_value = s->page.str_offset;
+      auto const initial_value = page.str_offset;
+
+      // We must use the batch size from the nesting info (the size of the page for this batch)
+      auto value_count = page.nesting[s->col.max_nesting_depth - 1].batch_size;
 
       // If no repetition we haven't calculated start/end bounds and instead just skipped
       // values until we reach first_row. account for that here.
-      auto value_count = s->page.num_input_values;
       if (not has_repetition) { value_count -= s->first_row; }
 
       auto& ni    = s->nesting_info[s->col.max_nesting_depth - 1];
@@ -316,6 +319,11 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
         offptr[idx] = initial_value;
       }
     }
+
+    page.num_nulls = page.nesting[s->col.max_nesting_depth - 1].batch_size;
+    page.num_nulls -= has_repetition ? 0 : s->first_row;
+    page.num_valids = 0;
+
     return;
   }
 
diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh
index 0ab76a5fd4b..aadd73cb659 100644
--- a/cpp/src/io/parquet/page_decode.cuh
+++ b/cpp/src/io/parquet/page_decode.cuh
@@ -738,17 +738,17 @@ static __device__ void update_list_offsets_for_pruned_pages(page_state_s* state)
   auto const tid               = cg::this_thread_block().thread_rank();
 
   // Iterate by depth and store offset(s) to the list location(s)
-  for (int depth = tid; depth < max_depth; depth += block_size) {
+  for (int depth = 0; depth < max_depth; depth++) {
     auto& nesting_info = state->nesting_info[depth];
     // If we're -not- at a leaf column and we're within nesting/row bounds and we have a valid
     // data_out pointer, it implies this is a list column, so emit an offset for the current nesting
     // level equal to current length of the next nesting level
     if (in_nesting_bounds and nesting_info.data_out != nullptr) {
       auto const& next_nesting_info = state->nesting_info[depth + 1];
-      int const idx                 = nesting_info.value_count;
-      cudf::size_type const offset =
-        next_nesting_info.value_count + next_nesting_info.page_start_value;
-      (reinterpret_cast<cudf::size_type*>(nesting_info.data_out))[idx] = offset;
+      auto const offset             = next_nesting_info.page_start_value;
+      for (int idx = tid; idx < state->page.nesting[depth].batch_size; idx += block_size) {
+        (reinterpret_cast<cudf::size_type*>(nesting_info.data_out))[idx] = offset;
+      }
     }
   }
 }
diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu
index 660f7f96876..d34c599a107 100644
--- a/cpp/src/io/parquet/page_delta_decode.cu
+++ b/cpp/src/io/parquet/page_delta_decode.cu
@@ -347,11 +347,12 @@ CUDF_KERNEL void __launch_bounds__(decode_delta_binary_block_size)
 
   // Write list offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
-    auto& page      = pages[page_idx];
-    page.num_nulls  = page.num_rows;
-    page.num_valids = 0;
+    auto& page = pages[page_idx];
     // Update offsets for all list depth levels
     if (has_repetition) { update_list_offsets_for_pruned_pages<decode_delta_binary_block_size>(s); }
+    page.num_nulls = page.nesting[s->col.max_nesting_depth - 1].batch_size;
+    page.num_nulls -= has_repetition ? 0 : s->first_row;
+    page.num_valids = 0;
     return;
   }
 
@@ -504,10 +505,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   // Write list/string offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
-    auto page        = &pages[page_idx];
-    page->num_nulls  = page->num_rows;
-    page->num_valids = 0;
-
+    auto page = &pages[page_idx];
     // Update list offsets and string offsets or sizes depending on the large-string property
     if (has_repetition) {
       // Update list offsets
@@ -520,6 +518,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
       update_string_offsets_for_pruned_pages<decode_block_size, false>(
         s, initial_str_offsets, pages[page_idx]);
     }
+    page->num_nulls = page->nesting[s->col.max_nesting_depth - 1].batch_size;
+    page->num_nulls -= has_repetition ? 0 : s->first_row;
+    page->num_valids = 0;
 
     return;
   }
@@ -714,10 +715,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
 
   // Write list/string offsets and exit if the page does not need to be decoded
   if (not page_mask[page_idx]) {
-    auto page        = &pages[page_idx];
-    page->num_nulls  = page->num_rows;
-    page->num_valids = 0;
-
+    auto page = &pages[page_idx];
     // Update list offsets and string offsets or sizes depending on the large-string property
     if (has_repetition) {
       // Update list offsets
@@ -730,6 +728,9 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size)
       update_string_offsets_for_pruned_pages<decode_block_size, false>(
         s, initial_str_offsets, pages[page_idx]);
     }
+    page->num_nulls = page->nesting[s->col.max_nesting_depth - 1].batch_size;
+    page->num_nulls -= has_repetition ? 0 : s->first_row;
+    page->num_valids = 0;
 
     return;
   }
diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu
index 381521cf2af..b4ecfb6b791 100644
--- a/cpp/src/io/parquet/page_string_decode.cu
+++ b/cpp/src/io/parquet/page_string_decode.cu
@@ -591,7 +591,7 @@ __device__ thrust::pair<size_t, size_t> totalDeltaByteArraySize(uint8_t const* d
  * @tparam level_t Type used to store decoded repetition and definition levels
  */
 template <typename level_t>
-CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBounds(
+CUDF_KERNEL void __launch_bounds__(preprocess_block_size) compute_string_page_bounds_kernel(
   PageInfo* pages, device_span<ColumnChunkDesc const> chunks, size_t min_row, size_t num_rows)
 {
   __shared__ __align__(16) page_state_s state_g;
@@ -662,11 +662,16 @@ CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputeStringPageBo
  *
  * @param pages All pages to be decoded
  * @param chunks All chunks to be decoded
+ * @param page_mask Page mask indicating if this column needs to be decoded
  * @param min_rows crop all rows below min_row
  * @param num_rows Maximum number of rows to read
  */
-CUDF_KERNEL void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPageStringSizes(
-  PageInfo* pages, device_span<ColumnChunkDesc const> chunks, size_t min_row, size_t num_rows)
+CUDF_KERNEL void __launch_bounds__(delta_preproc_block_size)
+  compute_delta_page_string_sizes_kernel(PageInfo* pages,
+                                         device_span<ColumnChunkDesc const> chunks,
+                                         device_span<bool const> page_mask,
+                                         size_t min_row,
+                                         size_t num_rows)
 {
   __shared__ __align__(16) page_state_s state_g;
 
@@ -688,6 +693,11 @@ CUDF_KERNEL void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPage
                              page_processing_stage::STRING_BOUNDS)) {
     return;
   }
+  // Return early if the page is pruned
+  if (not page_mask[page_idx]) {
+    pp->str_bytes = 0;
+    return;
+  }
 
   auto const start_value = pp->start_val;
 
@@ -743,11 +753,16 @@ CUDF_KERNEL void __launch_bounds__(delta_preproc_block_size) gpuComputeDeltaPage
  *
  * @param pages All pages to be decoded
  * @param chunks All chunks to be decoded
+ * @param page_mask Page mask indicating if this column needs to be decoded
  * @param min_rows crop all rows below min_row
  * @param num_rows Maximum number of rows to read
  */
-CUDF_KERNEL void __launch_bounds__(delta_length_block_size) gpuComputeDeltaLengthPageStringSizes(
-  PageInfo* pages, device_span<ColumnChunkDesc const> chunks, size_t min_row, size_t num_rows)
+CUDF_KERNEL void __launch_bounds__(delta_length_block_size)
+  compute_delta_length_page_string_sizes_kernel(PageInfo* pages,
+                                                device_span<ColumnChunkDesc const> chunks,
+                                                device_span<bool const> page_mask,
+                                                size_t min_row,
+                                                size_t num_rows)
 {
   using cudf::detail::warp_size;
   using WarpReduce = cub::WarpReduce<uleb128_t>;
@@ -774,6 +789,12 @@ CUDF_KERNEL void __launch_bounds__(delta_length_block_size) gpuComputeDeltaLengt
     return;
   }
 
+  // Return early if the page is pruned
+  if (not page_mask[page_idx]) {
+    pp->str_bytes = 0;
+    return;
+  }
+
   bool const is_bounds_pg = is_bounds_page(s, min_row, num_rows, has_repetition);
 
   // if we have size info, then we only need to do this for bounds pages
@@ -845,11 +866,16 @@ CUDF_KERNEL void __launch_bounds__(delta_length_block_size) gpuComputeDeltaLengt
  *
  * @param pages All pages to be decoded
  * @param chunks All chunks to be decoded
+ * @param page_mask Page mask indicating if this column needs to be decoded
  * @param min_rows crop all rows below min_row
  * @param num_rows Maximum number of rows to read
  */
-CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputePageStringSizes(
-  PageInfo* pages, device_span<ColumnChunkDesc const> chunks, size_t min_row, size_t num_rows)
+CUDF_KERNEL void __launch_bounds__(preprocess_block_size)
+  compute_page_string_sizes_kernel(PageInfo* pages,
+                                   device_span<ColumnChunkDesc const> chunks,
+                                   device_span<bool const> page_mask,
+                                   size_t min_row,
+                                   size_t num_rows)
 {
   __shared__ __align__(16) page_state_s state_g;
 
@@ -872,6 +898,12 @@ CUDF_KERNEL void __launch_bounds__(preprocess_block_size) gpuComputePageStringSi
     return;
   }
 
+  // Return early if the page is pruned
+  if (not page_mask[page_idx]) {
+    pp->str_bytes = 0;
+    return;
+  }
+
   bool const is_bounds_pg = is_bounds_page(s, min_row, num_rows, has_repetition);
 
   // if we have size info, then we only need to do this for bounds pages
@@ -950,24 +982,25 @@ struct page_tform_functor {
 }  // anonymous namespace
 
 /**
- * @copydoc cudf::io::parquet::detail::ComputePageStringSizes
+ * @copydoc cudf::io::parquet::detail::compute_page_string_sizes
  */
-void ComputePageStringSizes(cudf::detail::hostdevice_span<PageInfo> pages,
-                            cudf::detail::hostdevice_span<ColumnChunkDesc const> chunks,
-                            rmm::device_uvector<uint8_t>& temp_string_buf,
-                            size_t min_row,
-                            size_t num_rows,
-                            int level_type_size,
-                            uint32_t kernel_mask,
-                            rmm::cuda_stream_view stream)
+void compute_page_string_sizes(cudf::detail::hostdevice_span<PageInfo> pages,
+                               cudf::detail::hostdevice_span<ColumnChunkDesc const> chunks,
+                               cudf::device_span<bool const> page_mask,
+                               rmm::device_uvector<uint8_t>& temp_string_buf,
+                               size_t min_row,
+                               size_t num_rows,
+                               int level_type_size,
+                               uint32_t kernel_mask,
+                               rmm::cuda_stream_view stream)
 {
   dim3 const dim_block(preprocess_block_size, 1);
   dim3 const dim_grid(pages.size(), 1);  // 1 threadblock per page
   if (level_type_size == 1) {
-    gpuComputeStringPageBounds<uint8_t>
+    compute_string_page_bounds_kernel<uint8_t>
       <<<dim_grid, dim_block, 0, stream.value()>>>(pages.device_ptr(), chunks, min_row, num_rows);
   } else {
-    gpuComputeStringPageBounds<uint16_t>
+    compute_string_page_bounds_kernel<uint16_t>
       <<<dim_grid, dim_block, 0, stream.value()>>>(pages.device_ptr(), chunks, min_row, num_rows);
   }
 
@@ -979,17 +1012,20 @@ void ComputePageStringSizes(cudf::detail::hostdevice_span<PageInfo> pages,
   int s_idx = 0;
   if (BitAnd(kernel_mask, decode_kernel_mask::DELTA_BYTE_ARRAY) != 0) {
     dim3 dim_delta(delta_preproc_block_size, 1);
-    gpuComputeDeltaPageStringSizes<<<dim_grid, dim_delta, 0, streams[s_idx++].value()>>>(
-      pages.device_ptr(), chunks, min_row, num_rows);
+    compute_delta_page_string_sizes_kernel<<<dim_grid, dim_delta, 0, streams[s_idx++].value()>>>(
+      pages.device_ptr(), chunks, page_mask, min_row, num_rows);
   }
   if (BitAnd(kernel_mask, decode_kernel_mask::DELTA_LENGTH_BA) != 0) {
     dim3 dim_delta(delta_length_block_size, 1);
-    gpuComputeDeltaLengthPageStringSizes<<<dim_grid, dim_delta, 0, streams[s_idx++].value()>>>(
-      pages.device_ptr(), chunks, min_row, num_rows);
+    compute_delta_length_page_string_sizes_kernel<<<dim_grid,
+                                                    dim_delta,
+                                                    0,
+                                                    streams[s_idx++].value()>>>(
+      pages.device_ptr(), chunks, page_mask, min_row, num_rows);
   }
   if (BitAnd(kernel_mask, STRINGS_MASK_NON_DELTA) != 0) {
-    gpuComputePageStringSizes<<<dim_grid, dim_block, 0, streams[s_idx++].value()>>>(
-      pages.device_ptr(), chunks, min_row, num_rows);
+    compute_page_string_sizes_kernel<<<dim_grid, dim_block, 0, streams[s_idx++].value()>>>(
+      pages.device_ptr(), chunks, page_mask, min_row, num_rows);
   }
 
   // synchronize the streams
diff --git a/cpp/src/io/parquet/page_string_utils.cuh b/cpp/src/io/parquet/page_string_utils.cuh
index 00407420651..2f41bdb661f 100644
--- a/cpp/src/io/parquet/page_string_utils.cuh
+++ b/cpp/src/io/parquet/page_string_utils.cuh
@@ -169,9 +169,10 @@ __device__ void update_string_offsets_for_pruned_pages(
   namespace cg = cooperative_groups;
 
   // Initial string offset
-  auto const initial_value = state->page.str_offset;
-  auto value_count         = state->page.num_input_values;
-  auto const tid           = cg::this_thread_block().thread_rank();
+  auto const initial_value = page.str_offset;
+  // We must use the batch size from the nesting info (the size of the page for this batch)
+  auto value_count = page.nesting[state->col.max_nesting_depth - 1].batch_size;
+  auto const tid   = cg::this_thread_block().thread_rank();
 
   // Offsets pointer contains string sizes in case of large strings and actual offsets
   // otherwise
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 6ee49ff1c65..4ad436230f6 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -329,7 +329,7 @@ struct PageInfo {
   int32_t num_rows;           // number of rows in this page
   bool is_num_rows_adjusted;  // Flag to indicate if the number of rows of this page have been
                               // adjusted to compensate for the list row size estimates.
-  // the next four are calculated in gpuComputePageStringSizes
+  // the next four are calculated in compute_page_string_sizes_kernel
   int32_t num_nulls;       // number of null values (V2 header), but recalculated for string cols
   int32_t num_valids;      // number of non-null values, taking into account skip_rows/num_rows
   int32_t start_val;       // index of first value of the string data stream to use
@@ -773,6 +773,7 @@ void compute_page_sizes(cudf::detail::hostdevice_span<PageInfo> pages,
  *
  * @param[in,out] pages All pages to be decoded
  * @param[in] chunks All chunks to be decoded
+ * @param[in] page_mask Boolean vector indicating if a page needs to be decoded or is pruned
  * @param[out] temp_string_buf Temporary space needed for decoding DELTA_BYTE_ARRAY strings
  * @param[in] min_rows crop all rows below min_row
  * @param[in] num_rows Maximum number of rows to read
@@ -780,14 +781,15 @@ void compute_page_sizes(cudf::detail::hostdevice_span<PageInfo> pages,
  * @param[in] kernel_mask Mask of kernels to run
  * @param[in] stream CUDA stream to use
  */
-void ComputePageStringSizes(cudf::detail::hostdevice_span<PageInfo> pages,
-                            cudf::detail::hostdevice_span<ColumnChunkDesc const> chunks,
-                            rmm::device_uvector<uint8_t>& temp_string_buf,
-                            size_t min_row,
-                            size_t num_rows,
-                            int level_type_size,
-                            uint32_t kernel_mask,
-                            rmm::cuda_stream_view stream);
+void compute_page_string_sizes(cudf::detail::hostdevice_span<PageInfo> pages,
+                               cudf::detail::hostdevice_span<ColumnChunkDesc const> chunks,
+                               cudf::device_span<bool const> page_mask,
+                               rmm::device_uvector<uint8_t>& temp_string_buf,
+                               size_t min_row,
+                               size_t num_rows,
+                               int level_type_size,
+                               uint32_t kernel_mask,
+                               rmm::cuda_stream_view stream);
 
 /**
  * @brief Launches kernel for reading the column data stored in the pages
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index ccc75fbd293..4d785a84923 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -75,6 +75,23 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
   // figure out which kernels to run
   auto const kernel_mask = get_aggregated_decode_kernel_mask(subpass.pages, _stream);
 
+  // Use the `_subpass_page_mask` derived from `_page_mask` if non empty, otherwise set all pages in
+  // this subpass to be decoded
+  auto host_page_mask = [&]() {
+    if (_subpass_page_mask.empty()) {
+      auto page_mask = cudf::detail::make_host_vector<bool>(subpass.pages.size(), _stream);
+      std::fill(page_mask.begin(), page_mask.end(), true);
+      return page_mask;
+    } else {
+      return _subpass_page_mask;
+    }
+  }();
+
+  CUDF_EXPECTS(host_page_mask.size() == subpass.pages.size(),
+               "Page mask size must be equal to the number of pages in the subpass");
+
+  auto page_mask = cudf::detail::make_device_uvector_async(host_page_mask, _stream, _mr);
+
   // Check to see if there are any string columns present. If so, then we need to get size info
   // for each string page. This size info will be used to pre-allocate memory for the column,
   // allowing the page decoder to write string data directly to the column buffer, rather than
@@ -93,14 +110,15 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
       });
 
     if (!_has_page_index || uses_custom_row_bounds(mode) || has_flba) {
-      ComputePageStringSizes(subpass.pages,
-                             pass.chunks,
-                             delta_temp_buf,
-                             skip_rows,
-                             num_rows,
-                             level_type_size,
-                             kernel_mask,
-                             _stream);
+      compute_page_string_sizes(subpass.pages,
+                                pass.chunks,
+                                page_mask,
+                                delta_temp_buf,
+                                skip_rows,
+                                num_rows,
+                                level_type_size,
+                                kernel_mask,
+                                _stream);
     }
 
     // Compute column string sizes (using page string offsets) for this output table chunk
@@ -216,23 +234,6 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
     }
   }
 
-  // Use the `_subpass_page_mask` derived from `_page_mask` if non empty, otherwise set all pages in
-  // this subpass to be decoded
-  auto host_page_mask = [&]() {
-    if (_subpass_page_mask.empty()) {
-      auto page_mask = cudf::detail::make_host_vector<bool>(subpass.pages.size(), _stream);
-      std::fill(page_mask.begin(), page_mask.end(), true);
-      return page_mask;
-    } else {
-      return _subpass_page_mask;
-    }
-  }();
-
-  CUDF_EXPECTS(host_page_mask.size() == subpass.pages.size(),
-               "Page mask size must be equal to the number of pages in the subpass");
-
-  auto page_mask = cudf::detail::make_device_uvector_async(host_page_mask, _stream, _mr);
-
   // Create an empty device vector to store the initial str offset for large string columns from for
   // string decoders.
   auto initial_str_offsets = rmm::device_uvector<size_t>{0, _stream, _mr};
@@ -902,13 +903,12 @@ void reader_impl::update_output_nullmasks_for_pruned_pages(cudf::host_span<bool
       auto const start_row = chunks[chunk_idx].start_row + page.chunk_row;
       auto const end_row   = start_row + page.num_rows;
       auto& input_col      = _input_columns[chunk_idx % num_columns];
-      auto max_depth       = input_col.nesting_depth();
+      auto const max_depth = input_col.nesting_depth();
       auto* cols           = &_output_buffers;
 
       for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
         auto& out_buf = (*cols)[input_col.nesting[l_idx]];
         cols          = &out_buf.children;
-        // Continue if the current column is a list column
         if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) { continue; }
         // Add the nullmask and bit bounds to corresponding lists
         null_masks.emplace_back(out_buf.null_mask());
diff --git a/cpp/src/io/parquet/reader_impl_chunking_utils.cu b/cpp/src/io/parquet/reader_impl_chunking_utils.cu
index 978ccd9dde5..002b7d00fdb 100644
--- a/cpp/src/io/parquet/reader_impl_chunking_utils.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking_utils.cu
@@ -128,7 +128,8 @@ void codec_stats::add_pages(host_span<ColumnChunkDesc const> chunks,
                             page_selection selection,
                             host_span<bool const> page_mask)
 {
-  // Create a page mask iterator that defaults to true if the page_mask is empty
+  // TODO: Use a thrust::counting_transform_iterator instead when we are able to avoid decompressing
+  // the list column page in `compute_page_sizes_kernel`
   auto page_mask_iter = thrust::make_constant_iterator(true);
 
   // Zip iterator for iterating over pages and the page mask
diff --git a/cpp/tests/io/experimental/hybrid_scan_test.cpp b/cpp/tests/io/experimental/hybrid_scan_test.cpp
index 5cb787f451e..5bb14db4028 100644
--- a/cpp/tests/io/experimental/hybrid_scan_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_test.cpp
@@ -363,55 +363,176 @@ TEST_F(HybridScanTest, PruneDataPagesOnlyAndScanAllColumns)
   }
 }
 
-TEST_F(HybridScanTest, MaterializeListPayloadColumn)
+TEST_F(HybridScanTest, MaterializeMixedPayloadColumns)
 {
-  srand(0xc0ffee);
-  using T = uint32_t;
+  std::mt19937 gen(0xcaffe);
 
   // Parquet buffer
   std::vector<char> parquet_buffer;
   {
     auto constexpr num_rows = num_ordered_rows;
-    // int32_t column
-    auto col0 = testdata::ascending<T>();
-    // string column
+
+    // Validity helpers
+    std::bernoulli_distribution bn(0.7f);
+    auto valids =
+      cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+    auto list_valids =
+      cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 100; });
+    auto struct_valids_iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 121; });
+    std::vector<bool> struct_valids(num_rows);
+    std::copy(struct_valids_iter, struct_valids_iter + num_rows, struct_valids.begin());
+
+    // str and list<str> helpers
+    std::vector<std::string> strings{
+      "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
+    std::uniform_int_distribution<int> uni(0, strings.size() - 1);
+    auto string_iter = cudf::detail::make_counting_transform_iterator(
+      0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
+
+    // uint32_t(non-nullable)
+    auto col0 = testdata::ascending<uint32_t>();
+    // str(non-nullable)
     auto col1 = testdata::ascending<cudf::string_view>();
-    // list<bool> column
+
+    // list<bool(nullable)>(nullable)
     auto bools_iter =
       cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
     auto bools_col =
-      cudf::test::fixed_width_column_wrapper<bool>(bools_iter, bools_iter + num_rows);
+      cudf::test::fixed_width_column_wrapper<bool>(bools_iter, bools_iter + num_rows, valids);
     auto offsets_iter = thrust::counting_iterator<int32_t>(0);
     auto offsets_col =
       cudf::test::fixed_width_column_wrapper<int32_t>(offsets_iter, offsets_iter + num_rows + 1);
-    auto col2 = cudf::make_lists_column(
-      num_rows, offsets_col.release(), bools_col.release(), 0, rmm::device_buffer{});
+    auto [null_mask, null_count] =
+      cudf::test::detail::make_null_mask(list_valids, list_valids + num_rows);
+    auto _col2 = cudf::make_lists_column(
+      num_rows, offsets_col.release(), bools_col.release(), null_count, std::move(null_mask));
+    auto col2 = cudf::purge_nonempty_nulls(*_col2);
+
+    // list<list<bool(nullable)>(nullable)>(nullable)
+    auto col3 = make_parquet_list_list_col<bool>(0, num_rows, 5, 8, true);
+
+    // list<str(nullable)>(must be non-nullable)
+    auto const make_list_str_column = [&](bool is_nullable) {
+      constexpr int string_per_row  = 3;
+      constexpr int num_string_rows = num_rows * string_per_row;
+      cudf::test::strings_column_wrapper string_col{
+        string_iter, string_iter + num_string_rows, valids};
+      auto offset_iter = cudf::detail::make_counting_transform_iterator(
+        0, [](cudf::size_type idx) { return idx * string_per_row; });
+      cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offset_iter,
+                                                                      offset_iter + num_rows + 1);
+      auto [null_mask, null_count] = [&]() {
+        if (is_nullable) {
+          return cudf::test::detail::make_null_mask(valids, valids + num_rows);
+        } else {
+          return std::make_pair(rmm::device_buffer{}, 0);
+        }
+      }();
+      return cudf::make_lists_column(
+        num_rows, offsets.release(), string_col.release(), null_count, std::move(null_mask));
+    };
+
+    // str(nullable)
+    auto col4 = cudf::test::strings_column_wrapper{string_iter, string_iter + num_rows, valids};
+
+    // list<str(nullable)>(non-nullable)
+    auto col5 = make_list_str_column(false);
+
+    // list<str(nullable)>(nullable)
+    auto col6 = make_list_str_column(true);
+
+    // struct<list<str(nullable)>(nullable), int(nullable), float(nullable)>(nullable)
+    auto values    = thrust::make_counting_iterator(0);
+    auto col7_list = make_list_str_column(true);
+    cudf::test::fixed_width_column_wrapper<int> col7_ints(values, values + num_rows, valids);
+    cudf::test::fixed_width_column_wrapper<float> col7_floats(values, values + num_rows, valids);
+    std::vector<std::unique_ptr<cudf::column>> col7_children;
+    col7_children.push_back(std::move(col7_list));
+    col7_children.push_back(col7_ints.release());
+    col7_children.push_back(col7_floats.release());
+    cudf::test::structs_column_wrapper _col7(std::move(col7_children), struct_valids);
+    auto col7 = cudf::purge_nonempty_nulls(_col7);
+
+    // struct<str(nullable), bool(nullable)>(nullable)
+    auto col8_str = cudf::test::strings_column_wrapper{string_iter, string_iter + num_rows, valids};
+    cudf::test::fixed_width_column_wrapper<bool> col8_bools(values, values + num_rows, valids);
+    std::vector<std::unique_ptr<cudf::column>> col8_children;
+    col8_children.push_back(col8_str.release());
+    col8_children.push_back(col8_bools.release());
+    cudf::test::structs_column_wrapper _col8(std::move(col8_children), struct_valids);
+    auto col8 = cudf::purge_nonempty_nulls(_col8);
+
+    // list<list<str(nullable)>(nullable)>(nullable)
+    constexpr int string_per_row  = 3;
+    constexpr int lists_per_list  = 2;
+    constexpr int num_string_rows = num_rows * string_per_row * lists_per_list;
+    cudf::test::strings_column_wrapper string_col{
+      string_iter, string_iter + num_string_rows, valids};
+    auto offset_iter = cudf::detail::make_counting_transform_iterator(
+      0, [](cudf::size_type idx) { return idx * string_per_row; });
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> list_offsets(
+      offset_iter, offset_iter + (num_rows * lists_per_list) + 1);
+    std::tie(null_mask, null_count) =
+      cudf::test::detail::make_null_mask(list_valids, list_valids + (num_rows * lists_per_list));
+
+    auto _list_col = cudf::make_lists_column(num_rows * lists_per_list,
+                                             list_offsets.release(),
+                                             string_col.release(),
+                                             null_count,
+                                             std::move(null_mask));
+    auto list_col  = cudf::purge_nonempty_nulls(*_list_col);
+
+    auto list_list_offsets_iter = cudf::detail::make_counting_transform_iterator(
+      0, [](cudf::size_type idx) { return idx * lists_per_list; });
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> list_list_offsets(
+      list_list_offsets_iter, list_list_offsets_iter + num_rows + 1);
+    auto list_list_valids =
+      cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 80; });
+    std::tie(null_mask, null_count) =
+      cudf::test::detail::make_null_mask(list_list_valids, list_list_valids + num_rows);
+
+    auto _col9 = cudf::make_lists_column(
+      num_rows, list_list_offsets.release(), std::move(list_col), null_count, std::move(null_mask));
+
+    auto col9 = cudf::purge_nonempty_nulls(*_col9);
 
     // Input table
-    auto table = cudf::table_view{{col0, col1, *col2}};
+    auto constexpr num_concat = 3;
+    auto table =
+      cudf::table_view{{col0, col1, *col2, *col3, col4, *col5, *col6, *col7, *col8, *col9}};
+    auto expected = cudf::concatenate(std::vector<table_view>(num_concat, table));
+    table         = expected->view();
     cudf::io::table_input_metadata expected_metadata(table);
     expected_metadata.column_metadata[0].set_name("col0");
     expected_metadata.column_metadata[1].set_name("col1");
     expected_metadata.column_metadata[2].set_name("col2");
-
+    expected_metadata.column_metadata[3].set_name("col3");
+    expected_metadata.column_metadata[4].set_name("col4");
+    expected_metadata.column_metadata[5].set_name("col5");
+    expected_metadata.column_metadata[6].set_name("col6");
+    expected_metadata.column_metadata[7].set_name("col7");
+    expected_metadata.column_metadata[8].set_name("col8");
+    expected_metadata.column_metadata[9].set_name("col9");
     // Write to parquet buffer
     cudf::io::parquet_writer_options out_opts =
       cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&parquet_buffer}, table)
         .metadata(std::move(expected_metadata))
-        .row_group_size_rows(page_size_for_ordered_tests)
-        .max_page_size_rows(page_size_for_ordered_tests / 5)
+        .row_group_size_rows(num_rows)
+        .max_page_size_rows(page_size_for_ordered_tests)
         .compression(cudf::io::compression_type::AUTO)
         .dictionary_policy(cudf::io::dictionary_policy::ALWAYS)
         .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN);
     cudf::io::write_parquet(out_opts);
   }
 
-  // Filtering AST - table[0] < 100
+  // Filtering AST - table[0] >= 100
   auto constexpr num_filter_columns = 1;
   auto literal_value                = cudf::numeric_scalar<uint32_t>(100);
   auto literal                      = cudf::ast::literal(literal_value);
   auto col_ref_0                    = cudf::ast::column_name_reference("col0");
-  auto filter_expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+  auto filter_expression =
+    cudf::ast::operation(cudf::ast::ast_operator::GREATER_EQUAL, col_ref_0, literal);
 
   auto stream     = cudf::get_default_stream();
   auto mr         = cudf::get_current_device_resource_ref();
@@ -434,6 +555,7 @@ TEST_F(HybridScanTest, MaterializeListPayloadColumn)
         .filter(filter_expression);
     auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}), read_payload_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2, 3, 4, 5, 6, 7, 8, 9}),
+                                       read_payload_table->view());
   }
 }

From fec23dd8cc35429a4e164ea95f13f2702a7daa91 Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Tue, 16 Sep 2025 12:13:15 +0100
Subject: [PATCH 311/366] [FEA] Implement AST Expression - JIT codegen (#19733)

This pull request implements a utility to convert AST column expression trees into UDFs.
It uses a static-single-assignment (SSA) intermediate program representation (IR) where each IR-node represents a single operation.
Using SSA enables more robust, predictable, and testable code generation compared to directly dumping the code from the AST tree via simple string manipulation.
The IR also has the advantage that it can be used to fully evaluate the program context, i.e., determining output types based on input types, inspecting operators, and input-dependent nullability checks, etc.
Compared to the AST, the IR is intended to be closer to what the final SASS/PTX program is, and is designed to have less ambiguous context information.
Follows up https://github.com/rapidsai/cudf/issues/18023

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19733
---
 cpp/CMakeLists.txt                            |   2 +
 cpp/benchmarks/ast/polynomials.cpp            |  36 +-
 cpp/benchmarks/ast/transform.cpp              |  51 +-
 cpp/benchmarks/filter/minmax_filter.cpp       |  17 +-
 .../Modules/JitifyPreprocessKernels.cmake     |   2 +-
 cpp/include/cudf/ast/expressions.hpp          |  57 +-
 cpp/include/cudf/stream_compaction.hpp        |  60 +-
 cpp/include/cudf/transform.hpp                |  21 +
 cpp/src/ast/expressions.cpp                   |  30 +-
 cpp/src/jit/accessors.cuh                     |  18 +-
 cpp/src/jit/helpers.cpp                       | 120 ++++
 cpp/src/jit/helpers.hpp                       |  75 +--
 cpp/src/jit/row_ir.cpp                        | 559 ++++++++++++++++++
 cpp/src/jit/row_ir.hpp                        | 507 ++++++++++++++++
 cpp/src/runtime/context.cpp                   |   9 +-
 cpp/src/runtime/context.hpp                   |   3 +
 cpp/src/stream_compaction/filter/filter.cu    | 173 +++---
 .../stream_compaction/filter/jit/kernel.cu    |   1 +
 cpp/src/transform/jit/kernel.cu               |   1 +
 cpp/src/transform/transform.cpp               |  28 +-
 cpp/tests/CMakeLists.txt                      |   2 +
 cpp/tests/ast/transform_tests.cpp             | 302 ++++++----
 cpp/tests/filter/filter_test.cpp              | 101 ++--
 cpp/tests/jit/row_ir.cpp                      | 316 ++++++++++
 cpp/tests/streams/stream_compaction_test.cpp  |  34 ++
 cpp/tests/streams/transform_test.cpp          |  11 +
 26 files changed, 2185 insertions(+), 351 deletions(-)
 create mode 100644 cpp/src/jit/helpers.cpp
 create mode 100644 cpp/src/jit/row_ir.cpp
 create mode 100644 cpp/src/jit/row_ir.hpp
 create mode 100644 cpp/tests/jit/row_ir.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3aec9a872d5..03674f5b47c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -578,7 +578,9 @@ add_library(
   src/io/utilities/type_inference.cu
   src/io/utilities/trie.cu
   src/jit/cache.cpp
+  src/jit/helpers.cpp
   src/jit/parser.cpp
+  src/jit/row_ir.cpp
   src/jit/util.cpp
   src/join/conditional_join.cu
   src/join/cross_join.cu
diff --git a/cpp/benchmarks/ast/polynomials.cpp b/cpp/benchmarks/ast/polynomials.cpp
index 1897752fa98..11e496e65e5 100644
--- a/cpp/benchmarks/ast/polynomials.cpp
+++ b/cpp/benchmarks/ast/polynomials.cpp
@@ -30,12 +30,28 @@
 
 #include <random>
 
+namespace {
+
+enum class engine_type : uint8_t { AST = 0, JIT = 1 };
+
+engine_type engine_from_string(std::string_view str)
+{
+  if (str == "ast") {
+    return engine_type::AST;
+  } else if (str == "jit") {
+    return engine_type::JIT;
+  } else {
+    CUDF_FAIL("unrecognized engine enum: " + std::string(str));
+  }
+}
+
 template <typename key_type>
-static void BM_ast_polynomials(nvbench::state& state)
+void BM_ast_polynomials(nvbench::state& state)
 {
   auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const order            = static_cast<cudf::size_type>(state.get_int64("order"));
   auto const null_probability = state.get_float64("null_probability");
+  auto const engine           = engine_from_string(state.get_string("engine"));
 
   CUDF_EXPECTS(order > 0, "Polynomial order must be greater than 0");
 
@@ -80,7 +96,18 @@ static void BM_ast_polynomials(nvbench::state& state)
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     cudf::scoped_range range{"benchmark_iteration"};
-    cudf::compute_column(*table, tree.back(), launch.get_stream().get_stream());
+
+    switch (engine) {
+      case engine_type::AST: {
+        cudf::compute_column(*table, tree.back(), launch.get_stream().get_stream());
+        break;
+      }
+      case engine_type::JIT: {
+        cudf::compute_column_jit(*table, tree.back(), launch.get_stream().get_stream());
+        break;
+      }
+      default: CUDF_FAIL("Invalid engine type");
+    }
   });
 }
 
@@ -90,7 +117,10 @@ static void BM_ast_polynomials(nvbench::state& state)
     .set_name(#name)                                                             \
     .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})   \
     .add_int64_axis("order", {1, 2, 4, 8, 16, 32})                               \
-    .add_float64_axis("null_probability", {0.01})
+    .add_float64_axis("null_probability", {0.01})                                \
+    .add_string_axis("engine", {"ast", "jit"})
+
+}  // namespace
 
 AST_POLYNOMIAL_BENCHMARK_DEFINE(ast_polynomials_float32, float);
 
diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp
index 06ec7543213..973985149f7 100644
--- a/cpp/benchmarks/ast/transform.cpp
+++ b/cpp/benchmarks/ast/transform.cpp
@@ -49,11 +49,25 @@ enum class TreeType {
                    // child column reference
 };
 
+enum class engine_type : uint8_t { AST = 0, JIT = 1 };
+
+static engine_type engine_from_string(std::string_view str)
+{
+  if (str == "ast") {
+    return engine_type::AST;
+  } else if (str == "jit") {
+    return engine_type::JIT;
+  } else {
+    CUDF_FAIL("unrecognized engine enum: " + std::string(str));
+  }
+}
+
 template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
 static void BM_ast_transform(nvbench::state& state)
 {
   auto const num_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
+  auto const engine      = engine_from_string(state.get_string("engine"));
 
   // Create table data
   auto const num_columns = reuse_columns ? 1 : tree_levels + 1;
@@ -94,8 +108,19 @@ static void BM_ast_transform(nvbench::state& state)
   state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
   state.add_global_memory_writes<key_type>(num_rows);
 
-  state.exec(nvbench::exec_tag::sync,
-             [&](nvbench::launch&) { cudf::compute_column(table, root_expression); });
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
+    switch (engine) {
+      case engine_type::AST: {
+        cudf::compute_column(table, root_expression);
+        break;
+      }
+      case engine_type::JIT: {
+        cudf::compute_column_jit(table, root_expression);
+        break;
+      }
+      default: CUDF_FAIL("Invalid engine type");
+    }
+  });
 }
 
 template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
@@ -105,6 +130,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
   auto const num_rows     = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const tree_levels  = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
   auto const hit_rate     = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
+  auto const engine       = engine_from_string(state.get_string("engine"));
 
   CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");
 
@@ -155,8 +181,19 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
 
   auto const& expression = tree.back();
 
-  state.exec(nvbench::exec_tag::sync,
-             [&](nvbench::launch&) { cudf::compute_column(table, expression); });
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
+    switch (engine) {
+      case engine_type::AST: {
+        cudf::compute_column(table, expression);
+        break;
+      }
+      case engine_type::JIT: {
+        cudf::compute_column_jit(table, expression);
+        break;
+      }
+      default: CUDF_FAIL("Invalid engine type");
+    }
+  });
 }
 
 #define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
@@ -167,7 +204,8 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
   NVBENCH_BENCH(name)                                                                      \
     .set_name(#name)                                                                       \
     .add_int64_axis("tree_levels", {1, 5, 10})                                             \
-    .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})
+    .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})             \
+    .add_string_axis("engine", {"ast", "jit"})
 
 AST_TRANSFORM_BENCHMARK_DEFINE(
   ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);
@@ -193,7 +231,8 @@ AST_TRANSFORM_BENCHMARK_DEFINE(
     .add_int64_axis("string_width", {32, 64, 128, 256})                        \
     .add_int64_axis("num_rows", {32768, 262144, 2097152})                      \
     .add_int64_axis("tree_levels", {1, 2, 3, 4})                               \
-    .add_int64_axis("hit_rate", {50, 100})
+    .add_int64_axis("hit_rate", {50, 100})                                     \
+    .add_string_axis("engine", {"ast", "jit"})
 
 AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and,
                                               cudf::ast::ast_operator::EQUAL,
diff --git a/cpp/benchmarks/filter/minmax_filter.cpp b/cpp/benchmarks/filter/minmax_filter.cpp
index af3e02b7eb0..1960922f5be 100644
--- a/cpp/benchmarks/filter/minmax_filter.cpp
+++ b/cpp/benchmarks/filter/minmax_filter.cpp
@@ -32,6 +32,8 @@
 #include <concepts>
 #include <vector>
 
+namespace {
+
 template <typename T>
 struct benchmark_data;
 
@@ -82,7 +84,7 @@ bool boolean_from_string(std::string_view str)
 }
 
 template <typename key_type>
-static void BM_filter_min_max(nvbench::state& state)
+void BM_filter_min_max(nvbench::state& state)
 {
   auto const num_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const engine_name = state.get_string("engine");
@@ -126,10 +128,10 @@ static void BM_filter_min_max(nvbench::state& state)
     tree.push(cudf::ast::operation{cudf::ast::ast_operator::LOGICAL_AND, filter_min, filter_max});
   }
 
-  std::vector<cudf::column_view> filter_inputs;
-  filter_inputs.push_back(column->view());
-  filter_inputs.push_back(min_scalar_column->view());
-  filter_inputs.push_back(max_scalar_column->view());
+  std::vector<cudf::column_view> predicate_columns;
+  predicate_columns.push_back(column->view());
+  predicate_columns.push_back(min_scalar_column->view());
+  predicate_columns.push_back(max_scalar_column->view());
 
   // Use the number of bytes read from global memory
   state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows));
@@ -147,11 +149,11 @@ static void BM_filter_min_max(nvbench::state& state)
           cudf::apply_boolean_mask(input_table, filter_boolean->view(), stream, mr);
       } break;
       case engine_type::JIT: {
-        auto result = cudf::filter(filter_inputs,
+        auto result = cudf::filter(predicate_columns,
                                    udf,
+                                   std::vector{predicate_columns[0]},
                                    false,
                                    std::nullopt,
-                                   std::vector{true, false, false},
                                    cudf::null_aware::NO,
                                    stream,
                                    mr);
@@ -168,6 +170,7 @@ static void BM_filter_min_max(nvbench::state& state)
     .add_string_axis("engine", {"ast", "jit"})                                  \
     .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})  \
     .add_string_axis("nullable", {"true", "false"})
+}  // namespace
 
 FILTER_BENCHMARK_DEFINE(filter_min_max_int32, int32_t);
 FILTER_BENCHMARK_DEFINE(filter_min_max_int64, int64_t);
diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index 1e363a3e55b..7b34de90a29 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -48,7 +48,7 @@ function(jit_preprocess_files)
         $<TARGET_FILE:jitify_preprocess> ${ARG_FILE} -o ${ARG_OUTPUT_DIR} -i -std=c++20
         -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -DCUDF_RUNTIME_JIT
         -I${CUDF_SOURCE_DIR}/include -I${CUDF_SOURCE_DIR}/src ${includes}
-        --no-preinclude-workarounds --no-replace-pragma-once --diag-suppress=47
+        --no-preinclude-workarounds --no-replace-pragma-once --diag-suppress=47 --device-int128
       COMMENT "Custom command to JIT-compile files."
     )
   endforeach()
diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp
index 6e084f9fb2d..9b273b87231 100644
--- a/cpp/include/cudf/ast/expressions.hpp
+++ b/cpp/include/cudf/ast/expressions.hpp
@@ -27,6 +27,29 @@
 #include <vector>
 
 namespace CUDF_EXPORT cudf {
+
+namespace detail {
+namespace row_ir {
+
+/**
+ * @brief The base class for all IR nodes
+ *
+ * This class defines the interface for IR nodes, which can be instantiated and used to generate
+ * code. Each IR node represents a specific operation or value in the program. They represent a
+ * single-static-assignment (SSA) variable in the program IR. It is separate from the AST as it
+ * contains more detailed program information and analysis that would be needed to instantiate the
+ * program and generate correct and robust code.
+ */
+struct node;
+
+/**
+ * @brief A converter that converts AST expressions to IR nodes and CUDA UDFs.
+ */
+struct ast_converter;
+
+}  // namespace row_ir
+}  // namespace detail
+
 namespace ast {
 /**
  * @addtogroup expressions
@@ -64,6 +87,15 @@ struct expression {
   virtual std::reference_wrapper<expression const> accept(
     detail::expression_transformer& visitor) const = 0;
 
+  /**
+   * @brief Accepts an `row_ir::ast_converter` class.
+   *
+   * @param visitor The `row_ir::ast_converter` converting this expression tree
+   * @return The IR node representing this expression
+   */
+  [[nodiscard]] virtual std::unique_ptr<cudf::detail::row_ir::node> accept(
+    cudf::detail::row_ir::ast_converter& visitor) const = 0;
+
   /**
    * @brief Returns true if the expression may evaluate to null.
    *
@@ -269,6 +301,12 @@ class literal : public expression {
   std::reference_wrapper<expression const> accept(
     detail::expression_transformer& visitor) const override;
 
+  /**
+   * @copydoc expression::accept
+   */
+  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
+    cudf::detail::row_ir::ast_converter& visitor) const override;
+
   [[nodiscard]] bool may_evaluate_null(table_view const& left,
                                        table_view const& right,
                                        rmm::cuda_stream_view stream) const override
@@ -375,6 +413,12 @@ class column_reference : public expression {
     return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
   }
 
+  /**
+   * @copydoc expression::accept
+   */
+  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
+    cudf::detail::row_ir::ast_converter& visitor) const override;
+
  private:
   cudf::size_type column_index;
   table_reference table_source;
@@ -441,6 +485,12 @@ class operation : public expression {
                                        table_view const& right,
                                        rmm::cuda_stream_view stream) const override;
 
+  /**
+   * @copydoc expression::accept
+   */
+  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
+    cudf::detail::row_ir::ast_converter& visitor) const override;
+
  private:
   ast_operator op;
   std::vector<std::reference_wrapper<expression const>> operands;
@@ -484,6 +534,12 @@ class column_name_reference : public expression {
     return true;
   }
 
+  /**
+   * @copydoc expression::accept
+   */
+  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
+    cudf::detail::row_ir::ast_converter& visitor) const override;
+
  private:
   std::string column_name;
 };
@@ -581,5 +637,4 @@ class tree {
 
 /** @} */  // end of group
 }  // namespace ast
-
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index 8ac3d7bb2e2..4b3cf11e420 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -33,6 +33,10 @@ namespace CUDF_EXPORT cudf {
  * @brief Column APIs for filtering rows
  */
 
+namespace ast {
+class expression;
+}
+
 /**
  * @brief Filters a table to remove null elements with threshold count.
  *
@@ -437,7 +441,7 @@ cudf::size_type distinct_count(table_view const& input,
  *
  * @throws std::invalid_argument if any of the input columns have different sizes (except scalars of
  * size 1)
- * @throws std::invalid_argument if `output_type` or any of the inputs are not fixed-width or string
+ * @throws std::invalid_argument if the output or any of the inputs are not fixed-width or string
  * types
  * @throws cudf::logic_error if JIT is not supported by the runtime
  * @throws std::invalid_argument if the size of `copy_mask` does not match the number of input
@@ -445,26 +449,52 @@ cudf::size_type distinct_count(table_view const& input,
  *
  * The size of the resulting column is the size of the largest column.
  *
- * @param columns       Immutable views of the columns to filter
+ * @param predicate_columns Immutable views of the predicate columns
  * @param predicate_udf The PTX/CUDA string of the transform function to apply
- * @param is_ptx        true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
- * @param user_data     User-defined device data to pass to the UDF.
- * @param copy_mask     Optional vector of booleans indicating which columns to copy from the input
- *                      columns to the output. If not provided, all columns are copied.
+ * @param filter_columns Immutable view of the columns to be filtered
+ * @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code
+ * @param user_data User-defined device data to pass to the UDF.
  * @param is_null_aware Signifies the UDF will receive row inputs as optional values
- * @param stream        CUDA stream used for device memory operations and kernel launches
- * @param mr            Device memory resource used to allocate the returned column's device memory
- * @return              The filtered target columns
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return The filtered target columns
  */
 std::vector<std::unique_ptr<column>> filter(
-  std::vector<column_view> const& columns,
+  std::vector<column_view> const& predicate_columns,
   std::string const& predicate_udf,
+  std::vector<column_view> const& filter_columns,
   bool is_ptx,
-  std::optional<void*> user_data             = std::nullopt,
-  std::optional<std::vector<bool>> copy_mask = std::nullopt,
-  null_aware is_null_aware                   = null_aware::NO,
-  rmm::cuda_stream_view stream               = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr          = cudf::get_current_device_resource_ref());
+  std::optional<void*> user_data    = std::nullopt,
+  null_aware is_null_aware          = null_aware::NO,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
+/**
+ * @brief Creates new table by applying a filter function against every
+ * element of the input columns.
+ *
+ * Null values in the input columns are considered as not matching the filter.
+ *
+ * Computes:
+ * `out[i]... = predicate(columns[i]... ) ? (columns[i]...): not-applied`.
+ *
+ * @throws std::invalid_argument if the output or any of the inputs are not fixed-width or string
+ * types
+ * @throws cudf::logic_error if JIT is not supported by the runtime
+ *
+ * @param predicate_table The table used for predicate expression evaluation
+ * @param predicate_expr The predicate filter expression
+ * @param filter_table The table to be filtered
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return The filtered table
+ */
+std::unique_ptr<table> filter(
+  table_view const& predicate_table,
+  ast::expression const& predicate_expr,
+  table_view const& filter_table,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
 /** @} */
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index c49392c92aa..37573ae3867 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -109,6 +109,27 @@ std::unique_ptr<column> compute_column(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Compute a new column by evaluating an expression tree on a table using a JIT-compiled
+ * kernel.
+ *
+ * This evaluates an expression over a table to produce a new column. Also called an n-ary
+ * transform.
+ *
+ * @throws cudf::logic_error if passed an expression operating on table_reference::RIGHT.
+ *
+ * @param table The table used for expression evaluation
+ * @param expr The root of the expression tree
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource
+ * @return Output column
+ */
+std::unique_ptr<column> compute_column_jit(
+  table_view const& table,
+  ast::expression const& expr,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /**
  * @brief Creates a bitmask from a column of boolean elements.
  *
diff --git a/cpp/src/ast/expressions.cpp b/cpp/src/ast/expressions.cpp
index a0cf8cdb0e2..dec468cb9f0 100644
--- a/cpp/src/ast/expressions.cpp
+++ b/cpp/src/ast/expressions.cpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include "jit/row_ir.hpp"
+
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/detail/expression_transformer.hpp>
 #include <cudf/ast/detail/operators.hpp>
@@ -94,6 +96,32 @@ auto column_name_reference::accept(detail::expression_transformer& visitor) cons
 {
   return visitor.visit(*this);
 }
-}  // namespace ast
 
+std::unique_ptr<cudf::detail::row_ir::node> literal::accept(
+  cudf::detail::row_ir::ast_converter& converter) const
+{
+  return converter.add_ir_node(*this);
+}
+
+std::unique_ptr<cudf::detail::row_ir::node> column_reference::accept(
+  cudf::detail::row_ir::ast_converter& converter) const
+{
+  return converter.add_ir_node(*this);
+}
+
+std::unique_ptr<cudf::detail::row_ir::node> operation::accept(
+  cudf::detail::row_ir::ast_converter& converter) const
+{
+  return converter.add_ir_node(*this);
+}
+
+std::unique_ptr<cudf::detail::row_ir::node> column_name_reference::accept(
+  cudf::detail::row_ir::ast_converter&) const
+{
+  CUDF_FAIL(
+    "column_name_reference is not supported in row_ir. row_ir only supports resolved expressions",
+    std::invalid_argument);
+}
+
+}  // namespace ast
 }  // namespace cudf
diff --git a/cpp/src/jit/accessors.cuh b/cpp/src/jit/accessors.cuh
index a8e68d49e44..f31150dcb2e 100644
--- a/cpp/src/jit/accessors.cuh
+++ b/cpp/src/jit/accessors.cuh
@@ -53,11 +53,11 @@ struct column_accessor {
   }
 
   template <typename ColumnView>
-  static __device__ cuda::std::optional<T> nullable_element(ColumnView const* outputs,
+  static __device__ cuda::std::optional<T> nullable_element(ColumnView const* columns,
                                                             cudf::size_type row)
   {
-    if (is_null(outputs, row)) { return cuda::std::nullopt; }
-    return outputs[index].template element<T>(row);
+    if (is_null(columns, row)) { return cuda::std::nullopt; }
+    return columns[index].template element<T>(row);
   }
 };
 
@@ -99,9 +99,9 @@ struct scalar_accessor {
   static constexpr int32_t index = Accessor::index;
 
   template <typename ColumnView>
-  static __device__ decltype(auto) element(ColumnView const* outputs, cudf::size_type)
+  static __device__ decltype(auto) element(ColumnView const* columns, cudf::size_type)
   {
-    return Accessor::element(outputs, 0);
+    return Accessor::element(columns, 0);
   }
 
   static __device__ void assign(cudf::mutable_column_device_view_core const* outputs,
@@ -112,15 +112,15 @@ struct scalar_accessor {
   }
 
   template <typename ColumnView>
-  static __device__ bool is_null(ColumnView const* inputs, cudf::size_type)
+  static __device__ bool is_null(ColumnView const* columns, cudf::size_type)
   {
-    return Accessor::is_null(inputs, 0);
+    return Accessor::is_null(columns, 0);
   }
 
   template <typename ColumnView>
-  static __device__ decltype(auto) nullable_element(ColumnView const* outputs, cudf::size_type)
+  static __device__ decltype(auto) nullable_element(ColumnView const* columns, cudf::size_type)
   {
-    return Accessor::nullable_element(outputs, 0);
+    return Accessor::nullable_element(columns, 0);
   }
 };
 
diff --git a/cpp/src/jit/helpers.cpp b/cpp/src/jit/helpers.cpp
new file mode 100644
index 00000000000..d87bc3513de
--- /dev/null
+++ b/cpp/src/jit/helpers.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "helpers.hpp"
+
+namespace cudf {
+namespace jit {
+
+typename std::vector<column_view>::const_iterator get_transform_base_column(
+  std::vector<column_view> const& inputs)
+{
+  // TODO(lamarrr): remove ambiguous row-size-related logic for processing scalars in transforms and
+  // filters and use strongly-typed scalars
+
+  if (inputs.empty()) { return inputs.end(); }
+
+  auto [smallest, largest] = std::minmax_element(
+    inputs.begin(), inputs.end(), [](auto const& a, auto const& b) { return a.size() < b.size(); });
+
+  /// when the largest size is 1, the size-1 column could be a scalar or an actual column, it would
+  /// be a scalar if it has columns that are zero-sized
+  if (largest->size() != 1) { return largest; }
+
+  if (smallest->size() == 0) { return smallest; }
+
+  return largest;
+}
+
+jitify2::StringVec build_jit_template_params(
+  bool has_user_data,
+  null_aware is_null_aware,
+  std::vector<std::string> const& span_outputs,
+  std::vector<std::string> const& column_outputs,
+  std::vector<input_column_reflection> const& column_inputs)
+{
+  jitify2::StringVec tparams;
+
+  tparams.emplace_back(jitify2::reflection::reflect(has_user_data));
+  tparams.emplace_back(jitify2::reflection::reflect(is_null_aware == null_aware::YES));
+
+  std::transform(thrust::counting_iterator<size_t>(0),
+                 thrust::counting_iterator(span_outputs.size()),
+                 std::back_inserter(tparams),
+                 [&](auto i) {
+                   return jitify2::reflection::Template("cudf::jit::span_accessor")
+                     .instantiate(span_outputs[i], i);
+                 });
+
+  std::transform(thrust::counting_iterator<size_t>(0),
+                 thrust::counting_iterator(column_outputs.size()),
+                 std::back_inserter(tparams),
+                 [&](auto i) {
+                   return jitify2::reflection::Template("cudf::jit::column_accessor")
+                     .instantiate(column_outputs[i], i);
+                 });
+
+  std::transform(thrust::counting_iterator<size_t>(0),
+                 thrust::counting_iterator(column_inputs.size()),
+                 std::back_inserter(tparams),
+                 [&](auto i) { return column_inputs[i].accessor(i); });
+
+  return tparams;
+}
+
+std::map<uint32_t, std::string> build_ptx_params(std::vector<std::string> const& output_typenames,
+                                                 std::vector<std::string> const& input_typenames,
+                                                 bool has_user_data)
+{
+  std::map<uint32_t, std::string> params;
+  uint32_t index = 0;
+
+  if (has_user_data) {
+    params.emplace(index++, "void *");
+    params.emplace(index++, jitify2::reflection::reflect<cudf::size_type>());
+  }
+
+  for (auto& name : output_typenames) {
+    params.emplace(index++, name + "*");
+  }
+
+  for (auto& name : input_typenames) {
+    params.emplace(index++, name);
+  }
+
+  return params;
+}
+
+input_column_reflection reflect_input_column(size_type base_column_size, column_view column)
+{
+  return input_column_reflection{type_to_name(column.type()),
+                                 is_scalar(base_column_size, column.size())};
+}
+
+std::vector<input_column_reflection> reflect_input_columns(size_type base_column_size,
+                                                           std::vector<column_view> const& inputs)
+{
+  std::vector<input_column_reflection> reflections;
+  std::transform(
+    inputs.begin(), inputs.end(), std::back_inserter(reflections), [&](auto const& view) {
+      return reflect_input_column(base_column_size, view);
+    });
+
+  return reflections;
+}
+
+}  // namespace jit
+}  // namespace cudf
diff --git a/cpp/src/jit/helpers.hpp b/cpp/src/jit/helpers.hpp
index e8aa089fa3c..4ac2976c2fc 100644
--- a/cpp/src/jit/helpers.hpp
+++ b/cpp/src/jit/helpers.hpp
@@ -22,15 +22,18 @@
 #include <jit/span.cuh>
 #include <jit_preprocessed_files/transform/jit/kernel.cu.jit.hpp>
 
+#include <algorithm>
+
 namespace cudf {
 namespace jit {
-namespace {
 
 constexpr bool is_scalar(cudf::size_type base_column_size, cudf::size_type column_size)
 {
   return column_size == 1 && column_size != base_column_size;
 }
 
+typename std::vector<column_view>::const_iterator get_transform_base_column(
+  std::vector<column_view> const& inputs);
 struct input_column_reflection {
   std::string type_name;
   bool is_scalar = false;
@@ -51,59 +54,11 @@ jitify2::StringVec build_jit_template_params(
   null_aware is_null_aware,
   std::vector<std::string> const& span_outputs,
   std::vector<std::string> const& column_outputs,
-  std::vector<input_column_reflection> const& column_inputs)
-{
-  jitify2::StringVec tparams;
-
-  tparams.emplace_back(jitify2::reflection::reflect(has_user_data));
-  tparams.emplace_back(jitify2::reflection::reflect(is_null_aware == null_aware::YES));
-
-  std::transform(thrust::counting_iterator<size_t>(0),
-                 thrust::counting_iterator(span_outputs.size()),
-                 std::back_inserter(tparams),
-                 [&](auto i) {
-                   return jitify2::reflection::Template("cudf::jit::span_accessor")
-                     .instantiate(span_outputs[i], i);
-                 });
-
-  std::transform(thrust::counting_iterator<size_t>(0),
-                 thrust::counting_iterator(column_outputs.size()),
-                 std::back_inserter(tparams),
-                 [&](auto i) {
-                   return jitify2::reflection::Template("cudf::jit::column_accessor")
-                     .instantiate(column_outputs[i], i);
-                 });
-
-  std::transform(thrust::counting_iterator<size_t>(0),
-                 thrust::counting_iterator(column_inputs.size()),
-                 std::back_inserter(tparams),
-                 [&](auto i) { return column_inputs[i].accessor(i); });
-
-  return tparams;
-}
+  std::vector<input_column_reflection> const& column_inputs);
 
 std::map<uint32_t, std::string> build_ptx_params(std::vector<std::string> const& output_typenames,
                                                  std::vector<std::string> const& input_typenames,
-                                                 bool has_user_data)
-{
-  std::map<uint32_t, std::string> params;
-  uint32_t index = 0;
-
-  if (has_user_data) {
-    params.emplace(index++, "void *");
-    params.emplace(index++, jitify2::reflection::reflect<cudf::size_type>());
-  }
-
-  for (auto& name : output_typenames) {
-    params.emplace(index++, name + "*");
-  }
-
-  for (auto& name : input_typenames) {
-    params.emplace(index++, name);
-  }
-
-  return params;
-}
+                                                 bool has_user_data);
 
 template <typename T>
 rmm::device_uvector<T> to_device_vector(std::vector<T> const& host,
@@ -140,23 +95,10 @@ column_views_to_device(std::vector<ColumnView> const& views,
   return std::make_tuple(std::move(handles), std::move(device_array));
 }
 
-input_column_reflection reflect_input_column(size_type base_column_size, column_view column)
-{
-  return input_column_reflection{type_to_name(column.type()),
-                                 is_scalar(base_column_size, column.size())};
-}
+input_column_reflection reflect_input_column(size_type base_column_size, column_view column);
 
 std::vector<input_column_reflection> reflect_input_columns(size_type base_column_size,
-                                                           std::vector<column_view> const& inputs)
-{
-  std::vector<input_column_reflection> reflections;
-  std::transform(
-    inputs.begin(), inputs.end(), std::back_inserter(reflections), [&](auto const& view) {
-      return reflect_input_column(base_column_size, view);
-    });
-
-  return reflections;
-}
+                                                           std::vector<column_view> const& inputs);
 
 template <typename ColumnView>
 std::vector<std::string> column_type_names(std::vector<ColumnView> const& views)
@@ -170,6 +112,5 @@ std::vector<std::string> column_type_names(std::vector<ColumnView> const& views)
   return names;
 }
 
-}  // namespace
 }  // namespace jit
 }  // namespace cudf
diff --git a/cpp/src/jit/row_ir.cpp b/cpp/src/jit/row_ir.cpp
new file mode 100644
index 00000000000..58d5d9dd10a
--- /dev/null
+++ b/cpp/src/jit/row_ir.cpp
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jit/row_ir.hpp"
+
+#include "runtime/context.hpp"
+
+#include <cudf/column/column_factories.hpp>
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+
+namespace cudf {
+
+namespace detail {
+
+namespace row_ir {
+
+std::string cuda_type(type_info type) { return type_to_name(type.type); }
+
+std::string instance_context::make_tmp_id()
+{
+  return std::format("{}{}", tmp_prefix_, num_tmp_vars_++);
+}
+
+void instance_context::reset() { num_tmp_vars_ = 0; }
+
+get_input::get_input(int32_t input) : id_(), input_(input), type_() {}
+
+std::string_view get_input::get_id() { return id_; }
+
+type_info get_input::get_type() { return type_; }
+
+void get_input::instantiate(instance_context& ctx, instance_info const& info)
+{
+  id_               = ctx.make_tmp_id();
+  auto const& input = info.inputs[input_];
+  type_             = input.type;
+}
+
+std::string get_input::generate_code(instance_context& ctx,
+                                     target_info const& info,
+                                     instance_info const& instance)
+{
+  switch (info.id) {
+    case target::CUDA: {
+      return std::format("{} {} = {};", cuda_type(type_), id_, instance.inputs[input_].id);
+    }
+    default:
+      CUDF_FAIL("Unsupported target: " + std::to_string(static_cast<int>(info.id)),
+                std::invalid_argument);
+  }
+}
+
+set_output::set_output(int32_t output, std::unique_ptr<node> source)
+  : id_(), output_(output), source_(std::move(source)), type_(), output_id_()
+{
+}
+
+std::string_view set_output::get_id() { return id_; }
+
+type_info set_output::get_type() { return type_; }
+
+node& set_output::get_source() { return *source_; }
+
+void set_output::instantiate(instance_context& ctx, instance_info const& info)
+{
+  source_->instantiate(ctx, info);
+  id_              = ctx.make_tmp_id();
+  auto source_type = source_->get_type();
+  // output is never allowed to be nullable
+  type_      = type_info{source_type.type};
+  output_id_ = info.outputs[output_].id;
+}
+
+std::string set_output::generate_code(instance_context& ctx,
+                                      target_info const& info,
+                                      instance_info const& instance)
+{
+  switch (info.id) {
+    case target::CUDA: {
+      auto source_code = source_->generate_code(ctx, info, instance);
+      return std::format(
+        "{}\n"
+        "{} {} = {};\n"
+        "*{} = {};",
+        source_code,
+        cuda_type(this->get_type()),
+        id_,
+        source_->get_id(),
+        output_id_,
+        id_);
+    }
+    default:
+      CUDF_FAIL("Unsupported target: " + std::to_string(static_cast<int>(info.id)),
+                std::invalid_argument);
+  }
+}
+
+/// @brief returns true if the operator depends on the nullability of its operands
+static bool is_nullness_dependent_operator(cudf::ast::ast_operator op)
+{
+  switch (op) {
+    case ast::ast_operator::NULL_EQUAL:
+    case ast::ast_operator::NULL_LOGICAL_AND:
+    case ast::ast_operator::NULL_LOGICAL_OR:
+    case ast::ast_operator::IS_NULL: return true;
+
+    case ast::ast_operator::ADD:
+    case ast::ast_operator::SUB:
+    case ast::ast_operator::MUL:
+    case ast::ast_operator::DIV:
+    case ast::ast_operator::TRUE_DIV:
+    case ast::ast_operator::FLOOR_DIV:
+    case ast::ast_operator::MOD:
+    case ast::ast_operator::PYMOD:
+    case ast::ast_operator::POW:
+    case ast::ast_operator::NOT_EQUAL:
+    case ast::ast_operator::EQUAL:
+    case ast::ast_operator::LESS:
+    case ast::ast_operator::GREATER:
+    case ast::ast_operator::LESS_EQUAL:
+    case ast::ast_operator::GREATER_EQUAL:
+    case ast::ast_operator::BITWISE_AND:
+    case ast::ast_operator::BITWISE_OR:
+    case ast::ast_operator::BITWISE_XOR:
+    case ast::ast_operator::LOGICAL_AND:
+    case ast::ast_operator::LOGICAL_OR:
+    case ast::ast_operator::IDENTITY:
+    case ast::ast_operator::SIN:
+    case ast::ast_operator::COS:
+    case ast::ast_operator::TAN:
+    case ast::ast_operator::ARCSIN:
+    case ast::ast_operator::ARCCOS:
+    case ast::ast_operator::ARCTAN:
+    case ast::ast_operator::SINH:
+    case ast::ast_operator::COSH:
+    case ast::ast_operator::TANH:
+    case ast::ast_operator::ARCSINH:
+    case ast::ast_operator::ARCCOSH:
+    case ast::ast_operator::ARCTANH:
+    case ast::ast_operator::EXP:
+    case ast::ast_operator::LOG:
+    case ast::ast_operator::SQRT:
+    case ast::ast_operator::CBRT:
+    case ast::ast_operator::CEIL:
+    case ast::ast_operator::FLOOR:
+    case ast::ast_operator::ABS:
+    case ast::ast_operator::RINT:
+    case ast::ast_operator::BIT_INVERT:
+    case ast::ast_operator::NOT:
+    case ast::ast_operator::CAST_TO_INT64:
+    case ast::ast_operator::CAST_TO_UINT64:
+    case ast::ast_operator::CAST_TO_FLOAT64: return false;
+
+    default: CUDF_UNREACHABLE("Unrecognized operator type.");
+  }
+}
+
+operation::operation(opcode op, std::unique_ptr<node>* move_begin, std::unique_ptr<node>* move_end)
+  : id_(), op_(op), operands_(), type_()
+{
+  std::move(move_begin, move_end, std::back_inserter(operands_));
+  CUDF_EXPECTS(static_cast<size_type>(operands_.size()) == ast::detail::ast_operator_arity(op),
+               "Invalid number of arguments for operator.",
+               std::invalid_argument);
+  CUDF_EXPECTS(
+    operands_.size() > 0, "Operator must have at least one operand", std::invalid_argument);
+
+  CUDF_EXPECTS(!is_nullness_dependent_operator(op),
+               "Nullness-dependent operators are not supported",
+               std::invalid_argument);
+}
+
+operation::operation(opcode op, std::vector<std::unique_ptr<node>> operands)
+  : operation(op, operands.data(), operands.data() + operands.size())
+{
+}
+
+std::string_view operation::get_id() { return id_; }
+
+type_info operation::get_type() { return type_; }
+
+opcode operation::get_opcode() const { return op_; }
+
+std::span<std::unique_ptr<node> const> operation::get_operands() const { return operands_; }
+
+void operation::instantiate(instance_context& ctx, instance_info const& info)
+{
+  for (auto& arg : operands_) {
+    arg->instantiate(ctx, info);
+  }
+
+  id_ = ctx.make_tmp_id();
+  std::vector<data_type> operand_types;
+
+  for (auto& arg : operands_) {
+    operand_types.emplace_back(arg->get_type().type);
+  }
+
+  auto type_id = ast::detail::ast_operator_return_type(op_, operand_types).id();
+
+  // all decimal operation result types should have scale equal to the minimum scale of the operands
+  auto scale = std::accumulate(operand_types.begin() + 1,
+                               operand_types.end(),
+                               operand_types.front().scale(),
+                               [](auto const& a, auto const& b) { return std::min(a, b.scale()); });
+
+  type_ = type_info{cudf::data_type{type_id, scale}};
+}
+
+std::string operation::generate_code(instance_context& ctx,
+                                     target_info const& info,
+                                     instance_info const& instance)
+{
+  std::string operands_code;
+
+  for (auto& arg : operands_) {
+    operands_code =
+      std::format("{}{}{}", operands_code, arg->generate_code(ctx, info, instance), "\n");
+  }
+
+  auto operation_code = [&]() {
+    switch (info.id) {
+      case target::CUDA: {
+        auto first_operand = operands_[0]->get_id();
+        auto operands_str  = (operands_.size() == 1)
+                               ? std::string{first_operand}
+                               : std::accumulate(operands_.begin() + 1,
+                                                operands_.end(),
+                                                std::string{first_operand},
+                                                [](auto const& a, auto& node) {
+                                                  return std::format("{}, {}", a, node->get_id());
+                                                });
+
+        auto cuda = std::format(
+          "{} {} = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::{}, "
+          "false>{{}}({});",
+          cuda_type(type_),
+          id_,
+          ast::detail::ast_operator_string(op_),
+          operands_str);
+        return cuda;
+      }
+      default:
+        CUDF_FAIL("Unsupported target: " + std::to_string(static_cast<int>(info.id)),
+                  std::invalid_argument);
+    }
+  }();
+
+  return operands_code + operation_code;
+}
+
+std::span<ast_input_spec const> ast_converter::get_input_specs() const { return input_specs_; }
+
+int32_t ast_converter::add_ast_input(ast_input_spec in)
+{
+  auto id = static_cast<int32_t>(input_specs_.size());
+  input_specs_.push_back(std::move(in));
+  return id;
+}
+
+std::unique_ptr<row_ir::node> ast_converter::add_ir_node(ast::literal const& expr)
+{
+  auto index = add_ast_input(
+    ast_scalar_input_spec{expr.get_scalar(),
+                          expr.get_value(),
+                          make_column_from_scalar(expr.get_scalar(), 1, stream_, mr_)});
+  return std::make_unique<row_ir::get_input>(index);
+}
+
+std::unique_ptr<row_ir::node> ast_converter::add_ir_node(ast::column_reference const& expr)
+{
+  CUDF_EXPECTS(expr.get_table_source() == ast::table_reference::LEFT,
+               "IR input column reference must be LEFT.",
+               std::invalid_argument);
+  auto index =
+    add_ast_input(ast_column_input_spec{expr.get_table_source(), expr.get_column_index()});
+  return std::make_unique<row_ir::get_input>(index);
+}
+
+std::unique_ptr<row_ir::node> ast_converter::add_ir_node(ast::operation const& expr)
+{
+  std::vector<std::unique_ptr<row_ir::node>> operands;
+  for (auto const& operand : expr.get_operands()) {
+    operands.push_back(operand.get().accept(*this));
+  }
+  return std::make_unique<row_ir::operation>(expr.get_operator(), std::move(operands));
+}
+
+void ast_converter::add_input_var(ast_column_input_spec const& in, ast_args const& args)
+{
+  // TODO(lamarrr): consider mangling column name to make debugging easier
+  auto id   = std::format("in_{}", input_vars_.size());
+  auto type = args.table.column(in.column).type();
+  input_vars_.emplace_back(std::move(id), type_info{type});
+}
+
+void ast_converter::add_input_var(ast_scalar_input_spec const& in,
+                                  [[maybe_unused]] ast_args const& args)
+{
+  auto id   = std::format("in_{}", input_vars_.size());
+  auto type = in.ref.get().type();
+  input_vars_.emplace_back(std::move(id), type_info{type});
+}
+
+void ast_converter::add_output_var()
+{
+  auto id = std::format("out_{}", output_vars_.size());
+  output_vars_.emplace_back(std::move(id));
+}
+
+template <typename Fn, typename... Args>
+decltype(auto) dispatch_input_spec(ast_input_spec const& in, Fn&& fn, Args&&... args)
+{
+  if (std::holds_alternative<ast_column_input_spec>(in)) {
+    return fn(std::get<ast_column_input_spec>(in), std::forward<Args>(args)...);
+  } else if (std::holds_alternative<ast_scalar_input_spec>(in)) {
+    return fn(std::get<ast_scalar_input_spec>(in), std::forward<Args>(args)...);
+  } else {
+    CUDF_FAIL("Unsupported input type");
+  }
+}
+
+column_view get_column_view(ast_column_input_spec const& spec, ast_args const& args)
+{
+  CUDF_EXPECTS(spec.table == ast::table_reference::LEFT,
+               "Table reference must be LEFT",
+               std::invalid_argument);
+  return args.table.column(spec.column);
+}
+
+column_view get_column_view(ast_scalar_input_spec const& spec, ast_args const& args)
+{
+  return spec.broadcast_column->view();
+}
+
+void ast_converter::generate_code(target target_id,
+                                  ast::expression const& expr,
+                                  ast_args const& args)
+{
+  auto output_expr_ir = expr.accept(*this);
+  output_irs_.emplace_back(std::make_unique<row_ir::set_output>(0, std::move(output_expr_ir)));
+
+  bool uses_input_table =
+    std::any_of(input_specs_.begin(), input_specs_.end(), [](auto const& spec) {
+      return std::holds_alternative<ast_column_input_spec>(spec);
+    });
+
+  if (!uses_input_table && args.table.num_columns() > 0) {
+    // this means none of the inputs tables to the IR are actually used in the expression. In order
+    // to still run the transform-equivalent operation of AST, we need to add one of the table's
+    // columns as an unused input. This is done because the output size of a transform is determined
+    // by the largest input column.
+    input_specs_.emplace_back(ast_column_input_spec{ast::table_reference::LEFT, 0});
+  }
+
+  // resolve the flattened input references into IR input variables
+  for (auto const& input : input_specs_) {
+    dispatch_input_spec(input, [this](auto&... args) { add_input_var(args...); }, args);
+  }
+
+  // add 1 auto-deduced output variable
+  add_output_var();
+
+  instance_context instance_ctx;
+  instance_info instance{input_vars_, output_vars_};
+
+  // instantiate the IR nodes
+  for (auto& ir : output_irs_) {
+    ir->instantiate(instance_ctx, instance);
+  }
+
+  target_info target{target_id};
+
+  std::string body;
+
+  for (auto& ir : output_irs_) {
+    body = std::format("{}{}{}", body, ir->generate_code(instance_ctx, target, instance), "\n");
+  }
+
+  switch (target.id) {
+    case target::CUDA: {
+      {
+        auto output_decl = [&](size_t i) {
+          auto const& var  = output_vars_[i];
+          auto const& ir   = output_irs_[i];
+          auto output_type = ir->get_type();
+          return std::format("{}* {}", cuda_type(output_type), var.id);
+        };
+
+        auto input_decl = [&](size_t i) {
+          auto const& var = input_vars_[i];
+          return std::format("{} {}", cuda_type(var.type), var.id);
+        };
+
+        std::vector<std::string> params_decls;
+
+        for (size_t i = 0; i < output_vars_.size(); ++i) {
+          params_decls.push_back(output_decl(i));
+        }
+
+        for (size_t i = 0; i < input_vars_.size(); ++i) {
+          params_decls.push_back(input_decl(i));
+        }
+
+        auto params_decl = [&] {
+          if (params_decls.empty()) {
+            return std::string{};
+          } else if (params_decls.size() == 1) {
+            return params_decls[0];
+          } else {
+            return std::accumulate(
+              params_decls.begin() + 1,
+              params_decls.end(),
+              params_decls[0],
+              [](auto const& a, auto const& b) { return std::format("{}, {}", a, b); });
+          }
+        }();
+
+        code_ = std::format(
+          R"***(
+__device__ void expression({})
+{{
+{}
+return;
+}}
+)***",
+          params_decl,
+          body);
+      }
+      break;
+    }
+    default:
+      CUDF_FAIL("Unsupported target: " + std::to_string(static_cast<int>(target.id)),
+                std::invalid_argument);
+  }
+}
+
+// Due to the AST expression tree structure, we can't generate the IR without the target
+// tables
+
+transform_args ast_converter::compute_column(target target_id,
+                                             ast::expression const& expr,
+                                             ast_args const& args,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+{
+  ast_converter converter{stream, mr};
+
+  // TODO(lamarrr): support null-sensitive operators
+
+  // TODO(lamarrr): consider deduplicating ast expression's input column references. See
+  // TransformTest/1.DeeplyNestedArithmeticLogicalExpression for reference
+
+  converter.generate_code(target_id, expr, args);
+
+  std::vector<column_view> columns;
+  std::vector<std::unique_ptr<column>> scalar_columns;
+
+  for (auto& input : converter.input_specs_) {
+    auto column_view =
+      dispatch_input_spec(input, [](auto&... args) { return get_column_view(args...); }, args);
+    columns.push_back(column_view);
+
+    if (std::holds_alternative<ast_scalar_input_spec>(input)) {
+      auto& scalar_input = std::get<ast_scalar_input_spec>(input);
+      scalar_columns.push_back(std::move(scalar_input.broadcast_column));
+    }
+  }
+
+  auto output_column_type = converter.output_irs_[0]->get_type();
+
+  transform_args transform{std::move(scalar_columns),
+                           std::move(columns),
+                           std::move(converter.code_),
+                           output_column_type.type,
+                           false,
+                           std::nullopt,
+                           null_aware::NO};
+
+  if (get_context().dump_codegen()) {
+    std::cout << "Generated code for transform: " << transform.udf << std::endl;
+  }
+
+  return transform;
+}
+
+filter_args ast_converter::filter(target target_id,
+                                  ast::expression const& expr,
+                                  ast_args const& args,
+                                  table_view const& filter_table,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::device_async_resource_ref mr)
+{
+  ast_converter converter{stream, mr};
+  converter.generate_code(target_id, expr, args);
+
+  CUDF_EXPECTS(converter.output_irs_.size() == 1,
+               "Filter expression must return a single output.",
+               std::invalid_argument);
+  CUDF_EXPECTS(converter.output_irs_[0]->get_type().type == data_type{type_id::BOOL8},
+               "Filter expression must return a boolean type.",
+               std::invalid_argument);
+
+  std::vector<column_view> columns;
+  std::vector<std::unique_ptr<column>> scalar_columns;
+
+  for (auto& input : converter.input_specs_) {
+    auto column_view =
+      dispatch_input_spec(input, [](auto&... args) { return get_column_view(args...); }, args);
+    columns.push_back(column_view);
+
+    // move the scalar broadcast column so the user can make it live long enough
+    // to be used in the filter result.
+    if (std::holds_alternative<ast_scalar_input_spec>(input)) {
+      auto& scalar_input = std::get<ast_scalar_input_spec>(input);
+      scalar_columns.push_back(std::move(scalar_input.broadcast_column));
+    }
+  }
+
+  std::vector<column_view> filter_columns;
+  std::transform(filter_table.begin(),
+                 filter_table.end(),
+                 std::back_inserter(filter_columns),
+                 [](auto const& col) { return col; });
+
+  filter_args filter{std::move(scalar_columns),
+                     std::move(columns),
+                     std::move(converter.code_),
+                     std::move(filter_columns),
+                     false,
+                     std::nullopt,
+                     null_aware::NO};
+
+  if (get_context().dump_codegen()) {
+    std::cout << "Generated code for filter: " << filter.predicate_udf << std::endl;
+  }
+
+  return filter;
+}
+
+}  // namespace row_ir
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/jit/row_ir.hpp b/cpp/src/jit/row_ir.hpp
new file mode 100644
index 00000000000..d3e09a54a11
--- /dev/null
+++ b/cpp/src/jit/row_ir.hpp
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cudf/ast/detail/operators.hpp>
+#include <cudf/ast/expressions.hpp>
+#include <cudf/io/types.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/export.hpp>
+
+#include <string>
+#include <string_view>
+
+namespace CUDF_EXPORT cudf {
+namespace detail {
+
+/**
+ * @brief cudf IR for operations acting on elements of a row
+ */
+namespace row_ir {
+
+/**
+ * @brief The target for which the IR is generated.
+ */
+enum class target {
+  CUDA = 0  /// < CUDA C++
+};
+
+/**
+ * @brief The type information of the variable used in the IR.
+ */
+struct type_info {
+  data_type type = data_type{type_id::EMPTY};  ///< The data type of the variable
+};
+
+/**
+ * @brief The information about the variable used in the IR.
+ */
+struct var_info {
+  std::string id = {};  ///< The variable identifier
+  type_info type = {};  ///< The type information of the variable
+};
+
+/**
+ * @brief The information about the variable used in the IR without type information. The type
+ * information is auto-deduced from an IR node.
+ */
+struct untyped_var_info {
+  std::string id = {};  ///< The variable identifier
+};
+
+/**
+ * @brief The information needed to instantiate the IR nodes
+ */
+struct instance_info {
+  std::span<var_info const> inputs;           ///< The input variables
+  std::span<untyped_var_info const> outputs;  ///< The output variables
+};
+
+/**
+ * @brief The information about the target for which the IR is generated.
+ */
+struct target_info {
+  target id = target::CUDA;  ///< The target identifier
+};
+
+/**
+ * @brief The context within which the IR is instantiated.
+ * This context is used to generate temporary variable identifiers and any state setup needed for
+ * the IR instantiation.
+ */
+struct instance_context {
+ private:
+  int32_t num_tmp_vars_   = 0;       ///< The number of temporary variables generated
+  std::string tmp_prefix_ = "tmp_";  ///< The prefix for temporary variable identifiers
+
+ public:
+  instance_context() = default;  ///< Default constructor
+
+  instance_context(instance_context const&) = delete;
+
+  instance_context& operator=(instance_context const&) = delete;
+
+  instance_context(instance_context&&) = default;  ///< Move constructor
+
+  instance_context& operator=(instance_context&&) = default;  ///< Move assignment operator
+
+  ~instance_context() = default;  ///< Destructor
+
+  /**
+   * @brief Generate a globally unique temporary variable identifier
+   * @return A unique temporary variable identifier
+   */
+  std::string make_tmp_id();
+
+  void reset();
+};
+
+struct node {
+  /**
+   * @brief Get the identifier of the IR node
+   * @return The identifier of the IR node
+   */
+  virtual std::string_view get_id() = 0;
+
+  /**
+   * @brief Get the type info of the IR node
+   * @return The type information of the IR node
+   */
+  virtual type_info get_type() = 0;
+
+  /**
+   * @brief Instantiate the IR node with the given context and instance information, setting up any
+   * necessary state and preprocessing needed for code generation.
+   * @param ctx The context within which the IR is instantiated
+   * @param info The instance information
+   */
+  virtual void instantiate(instance_context& ctx, instance_info const& info) = 0;
+
+  /**
+   * @brief Generate the code for the IR node based on the instance context and target information.
+   * @param ctx The context within which the IR is instantiated
+   * @param info The target information
+   * @param instance The instance information
+   * @return The generated code for the IR node
+   */
+  virtual std::string generate_code(instance_context& ctx,
+                                    target_info const& info,
+                                    instance_info const& instance) = 0;
+
+  virtual ~node() = default;
+};
+
+/**
+ * @brief The operation code used in the IR nodes.
+ */
+using opcode = ast::ast_operator;
+
+/**
+ * @brief An IR node that retrieves an input variable by its index.
+ * This node is used to access input variables in the IR.
+ */
+struct get_input final : node {
+ private:
+  std::string id_;  ///< The identifier of the IR node
+  int32_t input_;   ///< The index of the input variable
+  type_info type_;  ///< The type information of the IR node
+
+ public:
+  /**
+   * @brief Construct a new get_input IR node
+   * @param input The index of the input variable
+   */
+  get_input(int32_t input);
+
+  get_input(get_input const&) = delete;
+
+  get_input& operator=(get_input const&) = delete;
+
+  get_input(get_input&&) = default;  ///< Move constructor
+
+  get_input& operator=(get_input&&) = default;  ///< Move assignment operator
+
+  ~get_input() override = default;  ///< Destructor
+
+  /**
+   * @copydoc node::get_id
+   */
+  std::string_view get_id() override;
+
+  /**
+   * @copydoc node::get_type
+   */
+  type_info get_type() override;
+
+  /**
+   * @copydoc node::instantiate
+   */
+  void instantiate(instance_context& ctx, instance_info const& info) override;
+
+  /**
+   * @copydoc node::generate_code
+   */
+  std::string generate_code(instance_context& ctx,
+                            target_info const& info,
+                            instance_info const& instance) override;
+};
+
+/**
+ * @brief An IR node that sets the output variable to the value of a source IR node.
+ */
+struct set_output final : node {
+ private:
+  std::string id_;                ///< The identifier of the IR node
+  int32_t output_;                ///< The index of the output variable
+  std::unique_ptr<node> source_;  ///< The source IR node from which the value is taken
+  type_info type_;                ///< The type information of the IR node
+  std::string output_id_;         ///< The identifier of the output variable
+
+ public:
+  /**
+   * @brief Construct a new set_output IR node
+   * @param output The index of the output variable
+   * @param source The source IR node from which the value is taken
+   */
+  set_output(int32_t output, std::unique_ptr<node> source);
+
+  set_output(set_output const&) = delete;
+
+  set_output& operator=(set_output const&) = delete;
+
+  set_output(set_output&&) = default;  ///< Move constructor
+
+  set_output& operator=(set_output&&) = default;  ///< Move assignment operator
+
+  ~set_output() override = default;  ///< Destructor
+
+  /**
+   * @copydoc node::get_id
+   */
+  std::string_view get_id() override;
+
+  /**
+   * @copydoc node::get_type
+   */
+  type_info get_type() override;
+
+  /**
+   * @brief Get the source IR node from which the value is taken
+   */
+  node& get_source();
+
+  /**
+   * @copydoc node::instantiate
+   */
+  void instantiate(instance_context& ctx, instance_info const& info) override;
+
+  /**
+   * @copydoc node::generate_code
+   */
+  std::string generate_code(instance_context& ctx,
+                            target_info const& info,
+                            instance_info const& instance) override;
+};
+
+/**
+ * @brief An IR node that represents an operation with zero or more operands.
+ */
+struct operation final : node {
+ private:
+  std::string id_;                               ///< The identifier of the IR node
+  opcode op_;                                    ///< The operation code
+  std::vector<std::unique_ptr<node>> operands_;  ///< The operands of the operation
+  type_info type_;                               ///< The type information of the IR node
+
+  operation(opcode op, std::unique_ptr<node>* move_begin, std::unique_ptr<node>* move_end);
+
+ public:
+  /**
+   * @brief Create a set of operand IR nodes
+   */
+  template <typename... T>
+    requires(std::is_base_of_v<node, T> && ...)
+  static std::array<std::unique_ptr<node>, sizeof...(T)> operands(T&&... args)
+  {
+    return {std::make_unique<T>(std::forward<T>(args))...};
+  }
+
+  /**
+   * @brief Create a set of operand IR nodes from existing unique pointers
+   */
+  template <typename... T>
+    requires(std::is_base_of_v<node, T> && ...)
+  static std::array<std::unique_ptr<node>, sizeof...(T)> operands(std::unique_ptr<T>&&... args)
+  {
+    return {std::move(args)...};
+  }
+
+  /**
+   * @brief Construct a new operation IR node
+   * @param op The operation code
+   * @param operands The operands of the operation
+   */
+  operation(opcode op, std::vector<std::unique_ptr<node>> operands);
+
+  template <size_t N>
+  operation(opcode op, std::array<std::unique_ptr<node>, N> operands)
+    : operation{op, operands.data(), operands.data() + N}
+  {
+  }
+
+  operation(operation const&) = delete;
+
+  operation& operator=(operation const&) = delete;
+
+  operation(operation&&) = default;  ///< Move constructor
+
+  operation& operator=(operation&&) = default;  ///< Move assignment operator
+
+  ~operation() override = default;  ///< Destructor
+
+  /**
+   * @copydoc node::get_id
+   */
+  std::string_view get_id() override;
+
+  /**
+   * @copydoc node::get_type
+   */
+  type_info get_type() override;
+
+  /**
+   * @brief Get the operation code of the operation
+   * @return The operation code of the operation
+   */
+  [[nodiscard]] opcode get_opcode() const;
+
+  /** @brief Get the operands of the operation
+   * @return A span of unique pointers to the operands of the operation
+   */
+  [[nodiscard]] std::span<std::unique_ptr<node> const> get_operands() const;
+
+  /**
+   * @copydoc node::instantiate
+   */
+  void instantiate(instance_context& ctx, instance_info const& info) override;
+
+  /**
+   * @copydoc node::generate_code
+   */
+  std::string generate_code(instance_context& ctx,
+                            target_info const& info,
+                            instance_info const& instance) override;
+};
+
+/**
+ * @brief A specification of an input column to the AST
+ */
+struct ast_column_input_spec {
+  ast::table_reference table = {};  ///< The table reference (LEFT or RIGHT)
+  int32_t column             = 0;   ///< The column index in the referenced table
+};
+
+/**
+ * @brief A specification of an input scalar to the AST
+ */
+struct ast_scalar_input_spec {
+  std::reference_wrapper<scalar const> ref;  ///< The scalar value
+  ast::generic_scalar_device_view view;      ///< The device view of the scalar value
+  std::unique_ptr<column> broadcast_column =
+    nullptr;  ///< The broadcasted column, a column of size 1
+};
+
+/**
+ * @brief An input specification for the AST
+ */
+using ast_input_spec = std::variant<ast_column_input_spec, ast_scalar_input_spec>;
+
+/**
+ * @brief The arguments needed to invoke a `cudf::transform`
+ */
+struct transform_args {
+  std::vector<std::unique_ptr<column>> scalar_columns =
+    {};  ///< The scalar columns created during the expression conversion
+  std::vector<column_view> columns = {};  ///< The input columns to the transform
+  std::string udf                  = {};  ///< The user-defined function to apply
+  data_type output_type          = data_type{type_id::EMPTY};  ///< The output type of the transform
+  bool is_ptx                    = false;           ///< Whether the transform is a PTX kernel
+  std::optional<void*> user_data = std::nullopt;    ///< User data to pass to the transform
+  null_aware is_null_aware       = null_aware::NO;  ///< Whether the transform is null-aware
+};
+
+/**
+ * @brief The arguments needed to invoke a `cudf::filter`
+ */
+struct filter_args {
+  std::vector<std::unique_ptr<column>> scalar_columns =
+    {};  ///< The scalar columns created during the expression conversion
+  std::vector<column_view> predicate_columns = {};  ///< The input columns to the predicate UDF
+  std::string predicate_udf = {};  ///< The user-defined function to apply as a predicate
+  std::vector<column_view> filter_columns = {};     ///< The input columns to the filter
+  bool is_ptx                             = false;  ///< Whether the filter is a PTX device function
+  std::optional<void*> user_data          = std::nullopt;    ///< User data to pass to the filter
+  null_aware is_null_aware                = null_aware::NO;  ///< Whether the filter is null-aware
+};
+
+/**
+ * @brief The AST input column arguments used to resolve the column expressions
+ */
+struct ast_args {
+  table_view table = {};  ///< The table view containing the columns
+};
+
+/**
+ * @brief AST Converter is a class for converting AST expressions to codegen targets, ie. CUDA.
+ */
+struct ast_converter {
+ private:
+  std::vector<ast_input_spec> input_specs_;              ///< The input specs for the AST
+  std::vector<var_info> input_vars_;                     ///< The input variables for the IR
+  std::vector<untyped_var_info> output_vars_;            ///< The output variables for the IR
+  std::vector<std::unique_ptr<set_output>> output_irs_;  ///< The output IR nodes
+  std::string code_;                                     ///< The generated code for the IR
+  rmm::cuda_stream_view
+    stream_;  ///< CUDA stream used for device memory operations and kernel launches.
+  rmm::device_async_resource_ref
+    mr_;  ///< Device memory resource used to allocate the returned table's device memory
+
+ public:
+  /**
+   * @brief Construct a new AST Converter object
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource used to allocate the returned table's device memory
+   */
+  ast_converter(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
+    : stream_(stream), mr_(mr)
+  {
+  }
+
+  ast_converter(ast_converter const&)            = delete;
+  ast_converter& operator=(ast_converter const&) = delete;
+
+  ast_converter(ast_converter&&) = default;  ///< Move constructor
+
+  ast_converter& operator=(ast_converter&&) = default;  ///< Move assignment operator
+
+  ~ast_converter() = default;  ///< Destructor
+
+ private:
+  friend class ast::literal;
+  friend class ast::column_reference;
+  friend class ast::operation;
+  friend class ast::column_name_reference;
+
+  std::unique_ptr<row_ir::node> add_ir_node(ast::literal const& expr);
+
+  std::unique_ptr<row_ir::node> add_ir_node(ast::column_reference const& expr);
+
+  std::unique_ptr<row_ir::node> add_ir_node(ast::operation const& expr);
+
+  [[nodiscard]] std::span<ast_input_spec const> get_input_specs() const;
+
+  /**
+   * @brief add an AST input/input_reference and return its reference index
+   */
+  int32_t add_ast_input(ast_input_spec in);
+
+  void add_input_var(ast_column_input_spec const& in, ast_args const& args);
+
+  void add_input_var(ast_scalar_input_spec const& in, ast_args const& args);
+
+  void add_output_var();
+
+  void generate_code(target target, ast::expression const& expr, ast_args const& args);
+
+ public:
+  /**
+   * @brief Convert an AST `compute_column` expression to a `cudf::transform`
+   * @param target The target for which the IR is generated
+   * @param expr The AST expression to convert
+   * @param null_aware Whether to use null-aware operators
+   * @param args The arguments needed to resolve the AST expression
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource used to allocate the returned table's device memory
+   * @return The result of the conversion, containing the transform arguments and scalar columns
+   */
+  static transform_args compute_column(target target,
+                                       ast::expression const& expr,
+                                       ast_args const& args,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::device_async_resource_ref mr);
+
+  /**
+   * @brief Convert an AST `filter` expression to a `cudf::filter`
+   * @param target The target for which the IR is generated
+   * @param expr The AST expression to convert
+   * @param null_aware Whether to use null-aware operators
+   * @param args The arguments needed to resolve the AST expression
+   * @param filter_table The table to be filtered
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource used to allocate the returned table's device memory
+   * @return The result of the conversion, containing the filter arguments and scalar columns
+   */
+  static filter_args filter(target target,
+                            ast::expression const& expr,
+                            ast_args const& args,
+                            table_view const& filter_table,
+                            rmm::cuda_stream_view stream,
+                            rmm::device_async_resource_ref mr);
+};
+
+}  // namespace row_ir
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/runtime/context.cpp b/cpp/src/runtime/context.cpp
index fa4579580a6..8c767348518 100644
--- a/cpp/src/runtime/context.cpp
+++ b/cpp/src/runtime/context.cpp
@@ -16,6 +16,7 @@
 
 #include "runtime/context.hpp"
 
+#include "io/utilities/getenv_or.hpp"
 #include "jit/cache.hpp"
 
 #include <cudf/context.hpp>
@@ -25,10 +26,16 @@
 
 namespace cudf {
 
-context::context() : _program_cache{std::make_unique<jit::program_cache>()} {}
+context::context() : _program_cache{std::make_unique<jit::program_cache>()}
+{
+  auto dump_codegen_flag = getenv_or("LIBCUDF_JIT_DUMP_CODEGEN", std::string{"OFF"});
+  _dump_codegen          = (dump_codegen_flag == "ON" || dump_codegen_flag == "1");
+}
 
 jit::program_cache& context::program_cache() { return *_program_cache; }
 
+bool context::dump_codegen() const { return _dump_codegen; }
+
 std::unique_ptr<context>& get_context_ptr_ref()
 {
   static std::unique_ptr<context> context;
diff --git a/cpp/src/runtime/context.hpp b/cpp/src/runtime/context.hpp
index 77a69b75148..40279c2e88e 100644
--- a/cpp/src/runtime/context.hpp
+++ b/cpp/src/runtime/context.hpp
@@ -33,6 +33,7 @@ class program_cache;
 class context {
  private:
   std::unique_ptr<jit::program_cache> _program_cache;
+  bool _dump_codegen = false;
 
  public:
   context();
@@ -43,6 +44,8 @@ class context {
   ~context()                         = default;
 
   jit::program_cache& program_cache();
+
+  [[nodiscard]] bool dump_codegen() const;
 };
 
 std::unique_ptr<context>& get_context_ptr_ref();
diff --git a/cpp/src/stream_compaction/filter/filter.cu b/cpp/src/stream_compaction/filter/filter.cu
index 79f17d7d7c9..4de9c7633a2 100644
--- a/cpp/src/stream_compaction/filter/filter.cu
+++ b/cpp/src/stream_compaction/filter/filter.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "jit/row_ir.hpp"
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
@@ -178,39 +180,37 @@ void launch_filter_kernel(jitify2::ConfiguredKernel& kernel,
 }
 
 void perform_checks(column_view base_column,
-                    std::vector<column_view> const& columns,
-                    std::optional<std::vector<bool>> copy_mask)
+                    std::vector<column_view> const& predicate_columns,
+                    std::vector<column_view> const& filter_columns)
 {
-  CUDF_EXPECTS(std::all_of(columns.begin(),
-                           columns.end(),
-                           [](auto& input) {
-                             return is_fixed_width(input.type()) ||
-                                    (input.type().id() == type_id::STRING);
-                           }),
-               "Filters only support fixed-width and string types",
-               std::invalid_argument);
-
-  CUDF_EXPECTS(std::all_of(columns.begin(),
-                           columns.end(),
-                           [&](auto& input) {
-                             return cudf::jit::is_scalar(base_column.size(), input.size()) ||
-                                    (input.size() == base_column.size());
-                           }),
-               "All filter columns must have the same size or be scalar (have size 1)",
-               std::invalid_argument);
-
-  CUDF_EXPECTS(!copy_mask.has_value() || (copy_mask->size() == columns.size()),
-               "The size of the copy mask must match the number of input columns",
-               std::invalid_argument);
+  auto check_columns = [&](std::vector<column_view> const& columns) {
+    CUDF_EXPECTS(std::all_of(columns.begin(),
+                             columns.end(),
+                             [](auto& input) {
+                               return is_fixed_width(input.type()) ||
+                                      (input.type().id() == type_id::STRING);
+                             }),
+                 "Filters only support fixed-width and string types",
+                 std::invalid_argument);
+
+    CUDF_EXPECTS(std::all_of(columns.begin(),
+                             columns.end(),
+                             [&](auto& input) {
+                               return cudf::jit::is_scalar(base_column.size(), input.size()) ||
+                                      (input.size() == base_column.size());
+                             }),
+                 "All filter columns must have the same size or be scalar (have size 1)",
+                 std::invalid_argument);
+  };
+
+  check_columns(predicate_columns);
+  check_columns(filter_columns);
 }
 
 jitify2::Kernel get_kernel(std::string const& kernel_name, std::string const& cuda_source)
 {
   return cudf::jit::get_program_cache(*stream_compaction_filter_jit_kernel_cu_jit)
-    .get_kernel(kernel_name,
-                {},
-                {{"cudf/detail/operation-udf.hpp", cuda_source}},
-                {"-arch=sm_.", "--device-int128"});
+    .get_kernel(kernel_name, {}, {{"cudf/detail/operation-udf.hpp", cuda_source}}, {"-arch=sm_."});
 }
 
 jitify2::ConfiguredKernel build_kernel(std::string const& kernel_name,
@@ -246,15 +246,16 @@ jitify2::ConfiguredKernel build_kernel(std::string const& kernel_name,
     ->configure_1d_max_occupancy(0, 0, nullptr, stream.value());
 }
 
-std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
-                                                      std::vector<column_view> const& columns,
-                                                      std::string const& predicate_udf,
-                                                      bool is_ptx,
-                                                      std::optional<void*> user_data,
-                                                      std::optional<std::vector<bool>> copy_mask,
-                                                      null_aware is_null_aware,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::device_async_resource_ref mr)
+std::vector<std::unique_ptr<column>> filter_operation(
+  column_view base_column,
+  std::vector<column_view> const& predicate_columns,
+  std::string const& predicate_udf,
+  std::vector<column_view> const& filter_columns,
+  bool is_ptx,
+  std::optional<void*> user_data,
+  null_aware is_null_aware,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
   rmm::device_uvector<cudf::size_type> filter_indices{
     static_cast<size_t>(base_column.size()), stream, mr};
@@ -262,7 +263,7 @@ std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
   auto kernel = build_kernel("cudf::filtering::jit::kernel",
                              base_column.size(),
                              {"cudf::size_type"},
-                             columns,
+                             predicate_columns,
                              user_data.has_value(),
                              is_null_aware,
                              predicate_udf,
@@ -273,7 +274,7 @@ std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
   cudf::jit::device_span<cudf::size_type> const filter_indices_span{filter_indices.data(),
                                                                     filter_indices.size()};
 
-  launch_filter_kernel(kernel, filter_indices_span, columns, user_data, stream, mr);
+  launch_filter_kernel(kernel, filter_indices_span, predicate_columns, user_data, stream, mr);
 
   auto num_selected = thrust::transform_reduce(rmm::exec_policy(stream),
                                                filter_indices.begin(),
@@ -284,26 +285,21 @@ std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
 
   std::vector<std::unique_ptr<column>> filtered;
 
-  std::for_each(thrust::counting_iterator<size_t>(0),
-                thrust::counting_iterator<size_t>(columns.size()),
-                [&](auto i) {
-                  auto column      = columns[i];
-                  auto should_copy = !copy_mask.has_value() || (*copy_mask)[i];
-
-                  if (should_copy) {
-                    if (cudf::jit::is_scalar(base_column.size(), column.size())) {
-                      // broadcast scalar columns
-                      auto tiled = cudf::tile(cudf::table_view{{column}}, num_selected, stream, mr);
-                      auto tiled_columns = tiled->release();
-                      filtered.push_back(std::move(tiled_columns[0]));
-                    } else {
-                      auto d_column        = cudf::column_device_view::create(column, stream);
-                      auto filtered_column = filter_column(
-                        *d_column, num_selected, filter_indices_span.as_const(), stream, mr);
-                      filtered.push_back(std::move(filtered_column));
-                    }
-                  }
-                });
+  std::transform(filter_columns.begin(),
+                 filter_columns.end(),
+                 std::back_inserter(filtered),
+                 [&](auto const& column) {
+                   if (cudf::jit::is_scalar(base_column.size(), column.size())) {
+                     // broadcast scalar columns
+                     auto tiled = cudf::tile(cudf::table_view{{column}}, num_selected, stream, mr);
+                     auto tiled_columns = tiled->release();
+                     return std::move(tiled_columns.front());
+                   } else {
+                     auto d_column = cudf::column_device_view::create(column, stream);
+                     return filter_column(
+                       *d_column, num_selected, filter_indices_span.as_const(), stream, mr);
+                   }
+                 });
 
   return filtered;
 }
@@ -311,42 +307,81 @@ std::vector<std::unique_ptr<column>> filter_operation(column_view base_column,
 }  // namespace
 
 namespace detail {
-std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& columns,
+std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& predicate_columns,
                                             std::string const& predicate_udf,
+                                            std::vector<column_view> const& filter_columns,
                                             bool is_ptx,
                                             std::optional<void*> user_data,
-                                            std::optional<std::vector<bool>> copy_mask,
                                             null_aware is_null_aware,
                                             rmm::cuda_stream_view stream,
                                             rmm::device_async_resource_ref mr)
 {
-  CUDF_EXPECTS(!columns.empty(), "Filters must have at least 1 column", std::invalid_argument);
+  CUDF_EXPECTS(
+    !predicate_columns.empty(), "Filters must have at least 1 column", std::invalid_argument);
+  CUDF_EXPECTS(
+    !filter_columns.empty(), "Filters must have at least 1 column", std::invalid_argument);
 
-  auto const base_column = std::max_element(
-    columns.begin(), columns.end(), [](auto& a, auto& b) { return a.size() < b.size(); });
+  auto const base_column = cudf::jit::get_transform_base_column(predicate_columns);
 
-  perform_checks(*base_column, columns, copy_mask);
+  perform_checks(*base_column, predicate_columns, filter_columns);
 
-  auto filtered = filter_operation(
-    *base_column, columns, predicate_udf, is_ptx, user_data, copy_mask, is_null_aware, stream, mr);
+  auto filtered = filter_operation(*base_column,
+                                   predicate_columns,
+                                   predicate_udf,
+                                   filter_columns,
+                                   is_ptx,
+                                   user_data,
+                                   is_null_aware,
+                                   stream,
+                                   mr);
 
   return filtered;
 }
 
+std::unique_ptr<table> filter(table_view const& predicate_table,
+                              ast::expression const& predicate_expr,
+                              table_view const& filter_table,
+                              rmm::cuda_stream_view stream,
+                              rmm::device_async_resource_ref mr)
+{
+  cudf::detail::row_ir::ast_args ast_args{.table = predicate_table};
+  auto args = cudf::detail::row_ir::ast_converter::filter(
+    cudf::detail::row_ir::target::CUDA, predicate_expr, ast_args, filter_table, stream, mr);
+
+  return std::make_unique<table>(cudf::detail::filter(args.predicate_columns,
+                                                      args.predicate_udf,
+                                                      args.filter_columns,
+                                                      args.is_ptx,
+                                                      args.user_data,
+                                                      args.is_null_aware,
+                                                      stream,
+                                                      mr));
+}
+
 }  // namespace detail
 
-std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& columns,
+std::vector<std::unique_ptr<column>> filter(std::vector<column_view> const& predicate_columns,
                                             std::string const& predicate_udf,
+                                            std::vector<column_view> const& filter_columns,
                                             bool is_ptx,
                                             std::optional<void*> user_data,
-                                            std::optional<std::vector<bool>> copy_mask,
                                             null_aware is_null_aware,
                                             rmm::cuda_stream_view stream,
                                             rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::filter(
-    columns, predicate_udf, is_ptx, user_data, copy_mask, is_null_aware, stream, mr);
+    predicate_columns, predicate_udf, filter_columns, is_ptx, user_data, is_null_aware, stream, mr);
+}
+
+std::unique_ptr<table> filter(table_view const& predicate_table,
+                              ast::expression const& predicate_expr,
+                              table_view const& filter_table,
+                              rmm::cuda_stream_view stream,
+                              rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::filter(predicate_table, predicate_expr, filter_table, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/filter/jit/kernel.cu b/cpp/src/stream_compaction/filter/jit/kernel.cu
index c43c896ef86..67c4d9ad808 100644
--- a/cpp/src/stream_compaction/filter/jit/kernel.cu
+++ b/cpp/src/stream_compaction/filter/jit/kernel.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/ast/detail/operator_functor.cuh>
 #include <cudf/column/column_device_view_base.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/strings/string_view.cuh>
diff --git a/cpp/src/transform/jit/kernel.cu b/cpp/src/transform/jit/kernel.cu
index 53409e9defd..5ac76ed65a9 100644
--- a/cpp/src/transform/jit/kernel.cu
+++ b/cpp/src/transform/jit/kernel.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/ast/detail/operator_functor.cuh>
 #include <cudf/column/column_device_view_base.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/strings/string_view.cuh>
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp
index 4d3aa4cb443..5c1fde01b1d 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/transform.cpp
@@ -30,6 +30,7 @@
 #include <jit/cache.hpp>
 #include <jit/helpers.hpp>
 #include <jit/parser.hpp>
+#include <jit/row_ir.hpp>
 #include <jit/span.cuh>
 #include <jit/util.hpp>
 #include <jit_preprocessed_files/transform/jit/kernel.cu.jit.hpp>
@@ -42,10 +43,7 @@ namespace {
 jitify2::Kernel get_kernel(std::string const& kernel_name, std::string const& cuda_source)
 {
   return cudf::jit::get_program_cache(*transform_jit_kernel_cu_jit)
-    .get_kernel(kernel_name,
-                {},
-                {{"cudf/detail/operation-udf.hpp", cuda_source}},
-                {"-arch=sm_.", "--device-int128"});
+    .get_kernel(kernel_name, {}, {{"cudf/detail/operation-udf.hpp", cuda_source}}, {"-arch=sm_."});
 }
 
 jitify2::ConfiguredKernel build_transform_kernel(
@@ -318,8 +316,7 @@ std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
                "Optional types are not supported in PTX UDFs",
                std::invalid_argument);
 
-  auto const base_column = std::max_element(
-    inputs.begin(), inputs.end(), [](auto& a, auto& b) { return a.size() < b.size(); });
+  auto const base_column = cudf::jit::get_transform_base_column(inputs);
 
   transformation::jit::perform_checks(*base_column, output_type, inputs);
 
@@ -349,4 +346,23 @@ std::unique_ptr<column> transform(std::vector<column_view> const& inputs,
   return detail::transform(inputs, udf, output_type, is_ptx, user_data, is_null_aware, stream, mr);
 }
 
+std::unique_ptr<column> compute_column_jit(table_view const& table,
+                                           ast::expression const& expr,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr)
+{
+  cudf::detail::row_ir::ast_args ast_args{.table = table};
+  auto args = cudf::detail::row_ir::ast_converter::compute_column(
+    cudf::detail::row_ir::target::CUDA, expr, ast_args, stream, mr);
+
+  return cudf::transform(args.columns,
+                         args.udf,
+                         args.output_type,
+                         args.is_ptx,
+                         args.user_data,
+                         args.is_null_aware,
+                         stream,
+                         mr);
+}
+
 }  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index d0a8763a5b0..4ca97a89db5 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -701,6 +701,8 @@ ConfigureTest(LABEL_BINS_TEST labeling/label_bins_tests.cpp)
 ConfigureTest(JIT_PARSER_TEST jit/parse_ptx_function.cpp)
 target_include_directories(JIT_PARSER_TEST PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>")
 
+ConfigureTest(ROW_IR_TEST jit/row_ir.cpp)
+
 # ##################################################################################################
 # * stream testing ---------------------------------------------------------------------------------
 if(CUDF_BUILD_STREAMS_TEST_UTIL)
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index 2fee4ecb06d..a3c5c590413 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -19,6 +19,7 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/testing_main.hpp>
+#include <cudf_test/type_lists.hpp>
 
 #include <cudf/ast/expressions.hpp>
 #include <cudf/column/column.hpp>
@@ -41,10 +42,43 @@ using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;
 
 constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
 
+template <typename T>
 struct TransformTest : public cudf::test::BaseFixture {};
 
-TEST_F(TransformTest, ColumnReference)
+struct ComputeColumnTest : public cudf::test::BaseFixture {};
+
+struct executor_ast {
+  static std::unique_ptr<cudf::column> compute_column(
+    cudf::table_view const& table,
+    cudf::ast::expression const& expr,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
+  {
+    return cudf::compute_column(table, expr, stream, mr);
+  }
+};
+
+struct executor_jit {
+  static std::unique_ptr<cudf::column> compute_column(
+    cudf::table_view const& table,
+    cudf::ast::expression const& expr,
+    rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+    rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())
+  {
+    return cudf::compute_column_jit(table, expr, stream, mr);
+  }
+};
+
+using Executors = cudf::test::Types<executor_ast, executor_jit>;
+
+using AstTransformTest = TransformTest<executor_ast>;
+
+TYPED_TEST_SUITE(TransformTest, Executors);
+
+TYPED_TEST(TransformTest, ColumnReference)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{10, 7, 20, 0};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -52,13 +86,15 @@ TEST_F(TransformTest, ColumnReference)
   auto col_ref_0 = cudf::ast::column_reference(0);
 
   auto const& expected = c_0;
-  auto result          = cudf::compute_column(table, col_ref_0);
+  auto result          = Executor::compute_column(table, col_ref_0);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAdditionDoubleCast)
+TYPED_TEST(TransformTest, BasicAdditionDoubleCast)
 {
+  using Executor = TypeParam;
+
   auto c_0 = column_wrapper<double>{3, 20, 1, 50};
   std::vector<__int128_t> data1{10, 7, 20, 0};
   auto c_1 = cudf::test::fixed_point_column_wrapper<__int128_t>(
@@ -69,12 +105,14 @@ TEST_F(TransformTest, BasicAdditionDoubleCast)
   auto cast       = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_FLOAT64, col_ref_1);
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, cast);
   auto expected   = column_wrapper<double>{13, 27, 21, 50};
-  auto result     = cudf::compute_column(table, expression);
+  auto result     = Executor::compute_column(table, expression);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, Literal)
+TYPED_TEST(TransformTest, Literal)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{10, 7, 20, 0};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -83,13 +121,15 @@ TEST_F(TransformTest, Literal)
   auto literal       = cudf::ast::literal(literal_value);
 
   auto expected = column_wrapper<int32_t>{42, 42, 42, 42};
-  auto result   = cudf::compute_column(table, literal);
+  auto result   = Executor::compute_column(table, literal);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, NullLiteral)
+TYPED_TEST(TransformTest, NullLiteral)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{0, 0, 0, 0};
   auto table = cudf::table_view{{c_0}};
 
@@ -99,13 +139,13 @@ TEST_F(TransformTest, NullLiteral)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::IDENTITY, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<int32_t>({-123, -123, -123, -123}, {0, 0, 0, 0});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, IsNull)
+TEST_F(AstTransformTest, IsNull)
 {
   auto c_0   = column_wrapper<int32_t>{{0, 1, 2, 0}, {0, 1, 1, 0}};
   auto table = cudf::table_view{{c_0}};
@@ -116,24 +156,26 @@ TEST_F(TransformTest, IsNull)
   auto literal       = cudf::ast::literal(literal_value);
   auto expression    = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, literal);
 
-  auto result    = cudf::compute_column(table, expression);
+  auto result    = executor_ast::compute_column(table, expression);
   auto expected1 = column_wrapper<bool>({0, 0, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result->view(), verbosity);
 
   literal_value.set_valid_async(false);
-  result         = cudf::compute_column(table, expression);
+  result         = executor_ast::compute_column(table, expression);
   auto expected2 = column_wrapper<bool>({1, 1, 1, 1}, cudf::test::iterators::no_nulls());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result->view(), verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected2, result->view(), verbosity);
 
   auto col_ref_0   = cudf::ast::column_reference(0);
   auto expression2 = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, col_ref_0);
-  result           = cudf::compute_column(table, expression2);
+  result           = executor_ast::compute_column(table, expression2);
   auto expected3   = column_wrapper<bool>({1, 0, 0, 1}, cudf::test::iterators::no_nulls());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, result->view(), verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected3, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAddition)
+TYPED_TEST(TransformTest, BasicAddition)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{10, 7, 20, 0};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -143,13 +185,15 @@ TEST_F(TransformTest, BasicAddition)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<int32_t>{13, 27, 21, 50};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAdditionEmptyTable)
+TYPED_TEST(TransformTest, BasicAdditionEmptyTable)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{};
   auto c_1   = column_wrapper<int32_t>{};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -159,13 +203,15 @@ TEST_F(TransformTest, BasicAdditionEmptyTable)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<int32_t>{};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAdditionCast)
+TYPED_TEST(TransformTest, BasicAdditionCast)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int64_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int8_t>{10, 7, 20, 0};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -176,13 +222,15 @@ TEST_F(TransformTest, BasicAdditionCast)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, cast);
 
   auto expected = column_wrapper<int64_t>{13, 27, 21, 50};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicEquality)
+TYPED_TEST(TransformTest, BasicEquality)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{3, 7, 1, 0};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -192,13 +240,15 @@ TEST_F(TransformTest, BasicEquality)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<bool>{true, false, true, false};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAdditionLarge)
+TYPED_TEST(TransformTest, BasicAdditionLarge)
 {
+  using Executor = TypeParam;
+
   auto a     = thrust::make_counting_iterator(0);
   auto col   = column_wrapper<int32_t>(a, a + 2000);
   auto table = cudf::table_view{{col, col}};
@@ -208,13 +258,15 @@ TEST_F(TransformTest, BasicAdditionLarge)
 
   auto b        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
   auto expected = column_wrapper<int32_t>(b, b + 2000);
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, LessComparator)
+TYPED_TEST(TransformTest, LessComparator)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{10, 7, 20, 0};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -224,18 +276,19 @@ TEST_F(TransformTest, LessComparator)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<bool>{true, false, true, false};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, LessComparatorLarge)
+TYPED_TEST(TransformTest, LessComparatorLarge)
 {
-  auto a     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
-  auto b     = thrust::make_counting_iterator(500);
-  auto c_0   = column_wrapper<int32_t>(a, a + 2000);
-  auto c_1   = column_wrapper<int32_t>(b, b + 2000);
-  auto table = cudf::table_view{{c_0, c_1}};
+  auto a         = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto b         = thrust::make_counting_iterator(500);
+  using Executor = TypeParam;
+  auto c_0       = column_wrapper<int32_t>(a, a + 2000);
+  auto c_1       = column_wrapper<int32_t>(b, b + 2000);
+  auto table     = cudf::table_view{{c_0, c_1}};
 
   auto col_ref_0  = cudf::ast::column_reference(0);
   auto col_ref_1  = cudf::ast::column_reference(1);
@@ -243,13 +296,15 @@ TEST_F(TransformTest, LessComparatorLarge)
 
   auto c        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i < 500; });
   auto expected = column_wrapper<bool>(c, c + 2000);
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, MultiLevelTreeArithmetic)
+TYPED_TEST(TransformTest, MultiLevelTreeArithmetic)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{10, 7, 20, 0};
   auto c_2   = column_wrapper<int32_t>{-3, 66, 2, -99};
@@ -268,21 +323,22 @@ TEST_F(TransformTest, MultiLevelTreeArithmetic)
   auto expression_tree = cudf::ast::operation(
     cudf::ast::ast_operator::ADD, expression_left_subtree, expression_right_subtree);
 
-  auto result   = cudf::compute_column(table, expression_tree);
+  auto result   = Executor::compute_column(table, expression_tree);
   auto expected = column_wrapper<int32_t>{7, 73, 22, -99};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, MultiLevelTreeArithmeticLarge)
+TYPED_TEST(TransformTest, MultiLevelTreeArithmeticLarge)
 {
-  auto a     = thrust::make_counting_iterator(0);
-  auto b     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i + 1; });
-  auto c     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
-  auto c_0   = column_wrapper<int32_t>(a, a + 2000);
-  auto c_1   = column_wrapper<int32_t>(b, b + 2000);
-  auto c_2   = column_wrapper<int32_t>(c, c + 2000);
-  auto table = cudf::table_view{{c_0, c_1, c_2}};
+  auto a         = thrust::make_counting_iterator(0);
+  auto b         = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i + 1; });
+  auto c         = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  using Executor = TypeParam;
+  auto c_0       = column_wrapper<int32_t>(a, a + 2000);
+  auto c_1       = column_wrapper<int32_t>(b, b + 2000);
+  auto c_2       = column_wrapper<int32_t>(c, c + 2000);
+  auto table     = cudf::table_view{{c_0, c_1, c_2}};
 
   auto col_ref_0 = cudf::ast::column_reference(0);
   auto col_ref_1 = cudf::ast::column_reference(1);
@@ -294,7 +350,7 @@ TEST_F(TransformTest, MultiLevelTreeArithmeticLarge)
   auto expr_tree =
     cudf::ast::operation(cudf::ast::ast_operator::SUB, expr_left_subtree, expr_right_subtree);
 
-  auto result = cudf::compute_column(table, expr_tree);
+  auto result = Executor::compute_column(table, expr_tree);
   auto calc   = [](auto i) { return (i * (i + 1)) - (i + (i * 2)); };
   auto d      = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return calc(i); });
   auto expected = column_wrapper<int32_t>(d, d + 2000);
@@ -302,8 +358,10 @@ TEST_F(TransformTest, MultiLevelTreeArithmeticLarge)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, ImbalancedTreeArithmetic)
+TYPED_TEST(TransformTest, ImbalancedTreeArithmetic)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<double>{0.15, 0.37, 4.2, 21.3};
   auto c_1   = column_wrapper<double>{0.0, -42.0, 1.0, 98.6};
   auto c_2   = column_wrapper<double>{0.6, std::numeric_limits<double>::infinity(), 0.999, 1.0};
@@ -319,15 +377,17 @@ TEST_F(TransformTest, ImbalancedTreeArithmetic)
   auto expression_tree =
     cudf::ast::operation(cudf::ast::ast_operator::SUB, col_ref_2, expression_right_subtree);
 
-  auto result = cudf::compute_column(table, expression_tree);
+  auto result = Executor::compute_column(table, expression_tree);
   auto expected =
     column_wrapper<double>{0.6, std::numeric_limits<double>::infinity(), -3.201, -2099.18};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, ImbalancedTreeArithmeticDeep)
+TYPED_TEST(TransformTest, ImbalancedTreeArithmeticDeep)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int64_t>{4, 5, 6};
   auto table = cudf::table_view{{c_0}};
 
@@ -347,14 +407,16 @@ TEST_F(TransformTest, ImbalancedTreeArithmeticDeep)
   auto expression_tree = cudf::ast::operation(
     cudf::ast::ast_operator::EQUAL, expression_left_subtree, expression_right_subtree);
 
-  auto result   = cudf::compute_column(table, expression_tree);
+  auto result   = Executor::compute_column(table, expression_tree);
   auto expected = column_wrapper<bool>{false, false, false};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, DeeplyNestedArithmeticLogicalExpression)
+TYPED_TEST(TransformTest, DeeplyNestedArithmeticLogicalExpression)
 {
+  using Executor = TypeParam;
+
   // Test logic for deeply nested arithmetic and logical expressions.
   constexpr int64_t left_depth_level  = 100;
   constexpr int64_t right_depth_level = 75;
@@ -419,14 +481,16 @@ TEST_F(TransformTest, DeeplyNestedArithmeticLogicalExpression)
   // If all $1 values and $R values are zeros, the result is true because of the equality check
   // combined with the OR operator in OR(<($L, $0), ==($1, $R)).
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<bool>{true, true, true};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, MultiLevelTreeComparator)
+TYPED_TEST(TransformTest, MultiLevelTreeComparator)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{10, 7, 20, 0};
   auto c_2   = column_wrapper<int32_t>{-3, 66, 2, -99};
@@ -445,13 +509,13 @@ TEST_F(TransformTest, MultiLevelTreeComparator)
   auto expression_tree = cudf::ast::operation(
     cudf::ast::ast_operator::LOGICAL_AND, expression_left_subtree, expression_right_subtree);
 
-  auto result   = cudf::compute_column(table, expression_tree);
+  auto result   = Executor::compute_column(table, expression_tree);
   auto expected = column_wrapper<bool>{false, true, false, false};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, MultiTypeOperationFailure)
+TEST_F(ComputeColumnTest, MultiTypeOperationFailure)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<double>{0.15, 0.77, 4.2, 21.3};
@@ -470,8 +534,10 @@ TEST_F(TransformTest, MultiTypeOperationFailure)
   EXPECT_THROW(cudf::compute_column(table, expression_1_plus_0), cudf::logic_error);
 }
 
-TEST_F(TransformTest, LiteralComparison)
+TYPED_TEST(TransformTest, LiteralComparison)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto table = cudf::table_view{{c_0}};
 
@@ -481,14 +547,16 @@ TEST_F(TransformTest, LiteralComparison)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_0, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<bool>{false, false, false, true};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, UnaryNot)
+TYPED_TEST(TransformTest, UnaryNot)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 0, 1, 50};
   auto table = cudf::table_view{{c_0}};
 
@@ -496,14 +564,16 @@ TEST_F(TransformTest, UnaryNot)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::NOT, col_ref_0);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<bool>{false, true, false, false};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, UnaryTrigonometry)
+TYPED_TEST(TransformTest, UnaryTrigonometry)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<double>{0.0, M_PI / 4, M_PI / 3};
   auto table = cudf::table_view{{c_0}};
 
@@ -511,21 +581,21 @@ TEST_F(TransformTest, UnaryTrigonometry)
 
   auto expected_sin   = column_wrapper<double>{0.0, std::sqrt(2) / 2, std::sqrt(3.0) / 2.0};
   auto expression_sin = cudf::ast::operation(cudf::ast::ast_operator::SIN, col_ref_0);
-  auto result_sin     = cudf::compute_column(table, expression_sin);
+  auto result_sin     = Executor::compute_column(table, expression_sin);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_sin, result_sin->view(), verbosity);
 
   auto expected_cos   = column_wrapper<double>{1.0, std::sqrt(2) / 2, 0.5};
   auto expression_cos = cudf::ast::operation(cudf::ast::ast_operator::COS, col_ref_0);
-  auto result_cos     = cudf::compute_column(table, expression_cos);
+  auto result_cos     = Executor::compute_column(table, expression_cos);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_cos, result_cos->view(), verbosity);
 
   auto expected_tan   = column_wrapper<double>{0.0, 1.0, std::sqrt(3.0)};
   auto expression_tan = cudf::ast::operation(cudf::ast::ast_operator::TAN, col_ref_0);
-  auto result_tan     = cudf::compute_column(table, expression_tan);
+  auto result_tan     = Executor::compute_column(table, expression_tan);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_tan, result_tan->view(), verbosity);
 }
 
-TEST_F(TransformTest, ArityCheckFailure)
+TYPED_TEST(TransformTest, ArityCheckFailure)
 {
   auto col_ref_0 = cudf::ast::column_reference(0);
   EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0),
@@ -534,8 +604,10 @@ TEST_F(TransformTest, ArityCheckFailure)
                std::invalid_argument);
 }
 
-TEST_F(TransformTest, StringComparison)
+TYPED_TEST(TransformTest, StringComparison)
 {
+  using Executor = TypeParam;
+
   auto c_0   = cudf::test::strings_column_wrapper({"a", "bb", "ccc", "dddd"});
   auto c_1   = cudf::test::strings_column_wrapper({"aa", "b", "cccc", "ddd"});
   auto table = cudf::table_view{{c_0, c_1}};
@@ -545,13 +617,15 @@ TEST_F(TransformTest, StringComparison)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<bool>{true, false, true, false};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, StringScalarComparison)
+TYPED_TEST(TransformTest, StringScalarComparison)
 {
+  using Executor = TypeParam;
+
   auto c_0 =
     cudf::test::strings_column_wrapper({"1", "12", "123", "23"}, {true, true, false, true});
   auto table = cudf::table_view{{c_0}};
@@ -563,20 +637,22 @@ TEST_F(TransformTest, StringScalarComparison)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
 
   auto expected = column_wrapper<bool>{{true, true, true, false}, {true, true, false, true}};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 
   // compare with null literal
   literal_value.set_valid_async(false);
   auto expected2 = column_wrapper<bool>{{false, false, false, false}, {false, false, false, false}};
-  auto result2   = cudf::compute_column(table, expression);
+  auto result2   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, NumericScalarComparison)
+TYPED_TEST(TransformTest, NumericScalarComparison)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{1, 12, 123, 23};
   auto table = cudf::table_view{{c_0}};
 
@@ -587,27 +663,31 @@ TEST_F(TransformTest, NumericScalarComparison)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
 
   auto expected = column_wrapper<bool>{true, false, false, false};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, CopyColumn)
+TYPED_TEST(TransformTest, CopyColumn)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 0, 1, 50};
   auto table = cudf::table_view{{c_0}};
 
   auto col_ref_0  = cudf::ast::column_reference(0);
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::IDENTITY, col_ref_0);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<int32_t>{3, 0, 1, 50};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, CopyLiteral)
+TYPED_TEST(TransformTest, CopyLiteral)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{0, 0, 0, 0};
   auto table = cudf::table_view{{c_0}};
 
@@ -616,14 +696,16 @@ TEST_F(TransformTest, CopyLiteral)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::IDENTITY, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<int32_t>{-123, -123, -123, -123};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, TrueDiv)
+TYPED_TEST(TransformTest, TrueDiv)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{3, 0, 1, 50};
   auto table = cudf::table_view{{c_0}};
 
@@ -633,14 +715,16 @@ TEST_F(TransformTest, TrueDiv)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::TRUE_DIV, col_ref_0, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<double>{1.5, 0.0, 0.5, 25.0};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, FloorDiv)
+TYPED_TEST(TransformTest, FloorDiv)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<double>{3.0, 0.0, 1.0, 50.0};
   auto table = cudf::table_view{{c_0}};
 
@@ -650,14 +734,16 @@ TEST_F(TransformTest, FloorDiv)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::FLOOR_DIV, col_ref_0, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<double>{1.0, 0.0, 0.0, 25.0};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, Mod)
+TYPED_TEST(TransformTest, Mod)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<double>{3.0, 0.0, -1.0, -50.0};
   auto table = cudf::table_view{{c_0}};
 
@@ -667,14 +753,16 @@ TEST_F(TransformTest, Mod)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::MOD, col_ref_0, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<double>{1.0, 0.0, -1.0, 0.0};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, PyMod)
+TYPED_TEST(TransformTest, PyMod)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<double>{3.0, 0.0, -1.0, -50.0};
   auto table = cudf::table_view{{c_0}};
 
@@ -684,13 +772,13 @@ TEST_F(TransformTest, PyMod)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::PYMOD, col_ref_0, literal);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<double>{1.0, 0.0, 1.0, 0.0};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicEqualityNullEqualNoNulls)
+TEST_F(AstTransformTest, BasicEqualityNullEqualNoNulls)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
   auto c_1   = column_wrapper<int32_t>{3, 7, 1, 0};
@@ -701,13 +789,15 @@ TEST_F(TransformTest, BasicEqualityNullEqualNoNulls)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::NULL_EQUAL, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<bool>{true, false, true, false};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = executor_ast::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicEqualityNormalEqualWithNulls)
+TYPED_TEST(TransformTest, BasicEqualityNormalEqualWithNulls)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{{3, 20, 1, 50}, {1, 1, 0, 0}};
   auto c_1   = column_wrapper<int32_t>{{3, 7, 1, 0}, {1, 1, 0, 0}};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -717,12 +807,12 @@ TEST_F(TransformTest, BasicEqualityNormalEqualWithNulls)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<bool>{{true, false, true, true}, {1, 1, 0, 0}};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicEqualityNulls)
+TEST_F(AstTransformTest, BasicEqualityNulls)
 {
   auto c_0   = column_wrapper<int32_t>{{3, 20, 1, 2, 50}, {1, 1, 0, 1, 0}};
   auto c_1   = column_wrapper<int32_t>{{3, 7, 1, 2, 0}, {1, 1, 1, 0, 0}};
@@ -733,13 +823,15 @@ TEST_F(TransformTest, BasicEqualityNulls)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::NULL_EQUAL, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<bool>{{true, false, false, false, true}, {1, 1, 1, 1, 1}};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = executor_ast::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, UnaryNotNulls)
+TYPED_TEST(TransformTest, UnaryNotNulls)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{{3, 0, 0, 50}, {0, 0, 1, 1}};
   auto table = cudf::table_view{{c_0}};
 
@@ -747,14 +839,16 @@ TEST_F(TransformTest, UnaryNotNulls)
 
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::NOT, col_ref_0);
 
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
   auto expected = column_wrapper<bool>{{false, true, true, false}, {0, 0, 1, 1}};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAdditionNulls)
+TYPED_TEST(TransformTest, BasicAdditionNulls)
 {
+  using Executor = TypeParam;
+
   auto c_0   = column_wrapper<int32_t>{{3, 20, 1, 50}, {0, 0, 1, 1}};
   auto c_1   = column_wrapper<int32_t>{{10, 7, 20, 0}, {0, 1, 0, 1}};
   auto table = cudf::table_view{{c_0, c_1}};
@@ -764,13 +858,15 @@ TEST_F(TransformTest, BasicAdditionNulls)
   auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, col_ref_1);
 
   auto expected = column_wrapper<int32_t>{{0, 0, 0, 50}, {0, 0, 0, 1}};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, BasicAdditionLargeNulls)
+TYPED_TEST(TransformTest, BasicAdditionLargeNulls)
 {
+  using Executor = TypeParam;
+
   auto N = 2000;
   auto a = thrust::make_counting_iterator(0);
 
@@ -790,12 +886,12 @@ TEST_F(TransformTest, BasicAdditionLargeNulls)
 
   auto b        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
   auto expected = column_wrapper<int32_t>(b, b + N, validities.begin());
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = Executor::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, NullLogicalAnd)
+TEST_F(AstTransformTest, NullLogicalAnd)
 {
   auto c_0   = column_wrapper<bool>{{false, false, true, true, false, false, true, true},
                                     {1, 1, 1, 1, 1, 0, 0, 0}};
@@ -810,12 +906,12 @@ TEST_F(TransformTest, NullLogicalAnd)
 
   auto expected = column_wrapper<bool>{{false, false, false, true, false, false, false, true},
                                        {1, 1, 1, 1, 1, 0, 1, 0}};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = executor_ast::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, NullLogicalOr)
+TEST_F(AstTransformTest, NullLogicalOr)
 {
   auto c_0   = column_wrapper<bool>{{false, false, true, true, false, false, true, true},
                                     {1, 1, 1, 1, 1, 0, 1, 0}};
@@ -830,13 +926,15 @@ TEST_F(TransformTest, NullLogicalOr)
 
   auto expected = column_wrapper<bool>{{false, true, true, true, false, true, true, true},
                                        {1, 1, 1, 1, 0, 1, 1, 0}};
-  auto result   = cudf::compute_column(table, expression);
+  auto result   = executor_ast::compute_column(table, expression);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
-TEST_F(TransformTest, ScalarOnly)
+TYPED_TEST(TransformTest, ScalarOnly)
 {
+  using Executor = TypeParam;
+
   auto column = column_wrapper<int>{1, 2, 3, 4, 5};
   auto table  = cudf::table_view{{column}};
 
@@ -850,15 +948,17 @@ TEST_F(TransformTest, ScalarOnly)
   auto const& neq =
     tree.push(cudf::ast::operation{cudf::ast::ast_operator::NOT_EQUAL, first, second});
 
-  auto result = cudf::compute_column(table, neq);
+  auto result = Executor::compute_column(table, neq);
 
   auto expected = column_wrapper<bool>{true, true, true, true, true};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
 
-TEST_F(TransformTest, ComplexScalarOnly)
+TYPED_TEST(TransformTest, ComplexScalarOnly)
 {
+  using Executor = TypeParam;
+
   auto column = column_wrapper<int>{1, 2, 3, 4, 5};
   auto table  = cudf::table_view{{column}};
 
@@ -872,7 +972,7 @@ TEST_F(TransformTest, ComplexScalarOnly)
   auto const& neq =
     tree.push(cudf::ast::operation{cudf::ast::ast_operator::NOT_EQUAL, first, second});
 
-  auto result = cudf::compute_column(table, neq);
+  auto result = Executor::compute_column(table, neq);
 
   auto expected = column_wrapper<bool>{true, true, true, true, true};
 
diff --git a/cpp/tests/filter/filter_test.cpp b/cpp/tests/filter/filter_test.cpp
index 9c0a577b137..4fe17424c40 100644
--- a/cpp/tests/filter/filter_test.cpp
+++ b/cpp/tests/filter/filter_test.cpp
@@ -56,8 +56,7 @@ TYPED_TEST(FilterNumericTest, NoAssertions)
   std::vector<std::unique_ptr<cudf::column>> results;
 
   EXPECT_NO_THROW(
-    results = cudf::filter(
-      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
+    results = cudf::filter({a, b}, this->udf, {a}, false, std::nullopt, cudf::null_aware::NO));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
 
@@ -79,8 +78,7 @@ TYPED_TEST(FilterChronoTest, NoAssertions)
 
   std::vector<std::unique_ptr<cudf::column>> results;
   EXPECT_NO_THROW(
-    results = cudf::filter(
-      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
+    results = cudf::filter({a, b}, this->udf, {a}, false, std::nullopt, cudf::null_aware::NO));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
 
@@ -104,8 +102,7 @@ TYPED_TEST(FilterFixedPointTest, NoAssertions)
   std::vector<std::unique_ptr<cudf::column>> results;
 
   EXPECT_NO_THROW(
-    results = cudf::filter(
-      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
+    results = cudf::filter({a, b}, this->udf, {a}, false, std::nullopt, cudf::null_aware::NO));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
@@ -122,32 +119,12 @@ TEST_F(FilterTestFixture, StringNoAssertions)
   std::vector<std::unique_ptr<cudf::column>> results;
 
   EXPECT_NO_THROW(
-    results = cudf::filter(
-      {a, b}, this->udf, false, std::nullopt, std::vector{true, false}, cudf::null_aware::NO));
+    results = cudf::filter({a, b}, this->udf, {a}, false, std::nullopt, cudf::null_aware::NO));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results[0]->view());
 }
 
 struct FilterAssertsTest : public FilterTestFixture {};
 
-TEST_F(FilterAssertsTest, CopyMask)
-{
-  auto a           = cudf::test::fixed_width_column_wrapper<int32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-  auto b           = cudf::test::fixed_width_column_wrapper<int32_t>{2};
-  std::string cuda = R"***(
-__device__ void is_divisible(bool* out, int32_t a, int32_t b) { *out = ((a % b) == 0); }
-  )***";
-
-  EXPECT_NO_THROW(
-    cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true, true}, cudf::null_aware::NO));
-  EXPECT_THROW(
-    cudf::filter({a, b}, cuda, false, std::nullopt, std::vector{true}, cudf::null_aware::NO),
-    std::invalid_argument);
-  EXPECT_THROW(
-    cudf::filter(
-      {a, b}, cuda, false, std::nullopt, std::vector{true, true, true}, cudf::null_aware::NO),
-    std::invalid_argument);
-}
-
 struct FilterTest : public FilterTestFixture {};
 
 TEST_F(FilterTest, Basic)
@@ -158,8 +135,7 @@ TEST_F(FilterTest, Basic)
 __device__ void is_even(bool* out, int32_t a) { *out = (a % 2 == 0); }
   )***";
 
-  auto result =
-    cudf::filter({a}, cuda, false, std::nullopt, std::vector{true}, cudf::null_aware::NO);
+  auto result   = cudf::filter({a}, cuda, {a}, false, std::nullopt, cudf::null_aware::NO);
   auto expected = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result[0]->view());
@@ -168,8 +144,7 @@ __device__ void is_even(bool* out, int32_t a) { *out = (a % 2 == 0); }
 __device__ void is_even(bool* out, cuda::std::optional<int32_t> a) { *out = a.has_value() && (*a % 2 == 0); }
   )***";
 
-  auto null_result =
-    cudf::filter({a}, null_cuda, false, std::nullopt, std::vector{true}, cudf::null_aware::YES);
+  auto null_result = cudf::filter({a}, null_cuda, {a}, false, std::nullopt, cudf::null_aware::YES);
   auto null_expected = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(null_expected, null_result[0]->view());
@@ -183,7 +158,7 @@ TEST_F(FilterTest, ScalarBroadcast)
 __device__ void is_divisible(bool* out, int32_t a, int32_t b) { *out = ((a % b) == 0); }
   )***";
 
-  auto result = cudf::filter({a, b}, cuda, false, std::nullopt, std::nullopt, cudf::null_aware::NO);
+  auto result     = cudf::filter({a, b}, cuda, {a, b}, false, std::nullopt, cudf::null_aware::NO);
   auto expected_a = cudf::test::fixed_width_column_wrapper<int32_t>{2, 4, 6, 8, 10};
   auto expected_b = cudf::test::fixed_width_column_wrapper<int32_t>{2, 2, 2, 2, 2};
 
@@ -226,22 +201,21 @@ __device__ void filter(bool* out,
   auto timezone1 = cudf::test::strings_column_wrapper{"CET"};
   auto timezone2 = cudf::test::strings_column_wrapper{"EST"};
 
-  auto result =
-    cudf::filter({countries,
-                  timezones,
-                  average_tmp,
-                  average_humidity,
-                  min_tmp,
-                  max_tmp,
-                  min_hum,
-                  max_hum,
-                  timezone1,
-                  timezone2},
-                 cuda,
-                 false,
-                 std::nullopt,
-                 std::vector{true, true, false, false, false, false, false, false, false, false},
-                 cudf::null_aware::NO);
+  auto result = cudf::filter({countries,
+                              timezones,
+                              average_tmp,
+                              average_humidity,
+                              min_tmp,
+                              max_tmp,
+                              min_hum,
+                              max_hum,
+                              timezone1,
+                              timezone2},
+                             cuda,
+                             {countries, timezones},
+                             false,
+                             std::nullopt,
+                             cudf::null_aware::NO);
 
   EXPECT_EQ(result.size(), 2);
 
@@ -289,22 +263,21 @@ __device__ void filter(bool* out,
   auto timezone1 = cudf::test::strings_column_wrapper{"CET"};
   auto timezone2 = cudf::test::strings_column_wrapper{"EST"};
 
-  auto result =
-    cudf::filter({countries,
-                  timezones,
-                  average_tmp,
-                  average_humidity,
-                  min_tmp,
-                  max_tmp,
-                  min_hum,
-                  max_hum,
-                  timezone1,
-                  timezone2},
-                 cuda,
-                 false,
-                 std::nullopt,
-                 std::vector{true, true, false, false, false, false, false, false, false, false},
-                 cudf::null_aware::NO);
+  auto result = cudf::filter({countries,
+                              timezones,
+                              average_tmp,
+                              average_humidity,
+                              min_tmp,
+                              max_tmp,
+                              min_hum,
+                              max_hum,
+                              timezone1,
+                              timezone2},
+                             cuda,
+                             {countries, timezones},
+                             false,
+                             std::nullopt,
+                             cudf::null_aware::NO);
 
   auto expected_countries = cudf::test::strings_column_wrapper{"Germany", "Spain"};
 
diff --git a/cpp/tests/jit/row_ir.cpp b/cpp/tests/jit/row_ir.cpp
new file mode 100644
index 00000000000..73f4f2ffa93
--- /dev/null
+++ b/cpp/tests/jit/row_ir.cpp
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jit/row_ir.hpp"
+
+#include "cudf_test/column_wrapper.hpp"
+
+#include <cudf_test/debug_utilities.hpp>
+#include <cudf_test/testing_main.hpp>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/transform.hpp>
+
+#include <algorithm>
+#include <cctype>
+
+using namespace cudf;
+namespace row_ir = cudf::detail::row_ir;
+
+struct RowIRCudaCodeGenTest : public ::testing::Test {};
+
+TEST_F(RowIRCudaCodeGenTest, GetInput)
+{
+  row_ir::target_info target_info{row_ir::target::CUDA};
+
+  row_ir::var_info inputs[] = {{"in_0", {data_type{type_id::INT32}}},
+                               {"in_1", {data_type{type_id::FLOAT32}}}};
+
+  row_ir::instance_info info{inputs, {}};
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::get_input get_input_0{0};
+    get_input_0.instantiate(ctx, info);
+    auto code = get_input_0.generate_code(ctx, target_info, info);
+
+    auto expected_code = "int32_t tmp_0 = in_0;";
+
+    EXPECT_EQ(code, expected_code);
+  }
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::get_input get_input_1{1};
+    get_input_1.instantiate(ctx, info);
+    auto null_code = get_input_1.generate_code(ctx, target_info, info);
+
+    auto expected_null_code = "float tmp_0 = in_1;";
+
+    EXPECT_EQ(null_code, expected_null_code);
+  }
+}
+
+TEST_F(RowIRCudaCodeGenTest, SetOutput)
+{
+  row_ir::target_info target_info{row_ir::target::CUDA};
+
+  row_ir::var_info inputs[] = {{"in_0", {data_type{type_id::INT32}}},
+                               {"in_1", {data_type{type_id::FLOAT32}}}};
+
+  row_ir::untyped_var_info outputs[] = {{"out_0"}, {"out_1"}};
+
+  row_ir::instance_info info{inputs, outputs};
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::set_output set_output_0{0, std::make_unique<row_ir::get_input>(0)};
+    set_output_0.instantiate(ctx, info);
+    auto code = set_output_0.generate_code(ctx, target_info, info);
+
+    auto expected_code =
+      R"***(int32_t tmp_0 = in_0;
+int32_t tmp_1 = tmp_0;
+*out_0 = tmp_1;)***";
+
+    EXPECT_EQ(code, expected_code);
+  }
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::set_output set_output_1{1, std::make_unique<row_ir::get_input>(1)};
+    set_output_1.instantiate(ctx, info);
+    auto code = set_output_1.generate_code(ctx, target_info, info);
+
+    auto expected_code =
+      R"***(float tmp_0 = in_1;
+float tmp_1 = tmp_0;
+*out_1 = tmp_1;)***";
+
+    EXPECT_EQ(code, expected_code);
+  }
+}
+
+TEST_F(RowIRCudaCodeGenTest, UnaryOperation)
+{
+  row_ir::target_info target_info{row_ir::target::CUDA};
+
+  row_ir::var_info inputs[] = {{"in_0", {data_type{type_id::INT32}}},
+                               {"in_1", {data_type{type_id::DECIMAL32}}}};
+
+  row_ir::untyped_var_info outputs[] = {{"out_0"}, {"out_1"}};
+
+  row_ir::instance_info info{inputs, outputs};
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::operation op{row_ir::opcode::IDENTITY,
+                         row_ir::operation::operands(row_ir::get_input(0))};
+    op.instantiate(ctx, info);
+    auto code = op.generate_code(ctx, target_info, info);
+
+    auto expected_code =
+      R"***(int32_t tmp_0 = in_0;
+int32_t tmp_1 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::IDENTITY, false>{}(tmp_0);)***";
+
+    EXPECT_EQ(code, expected_code);
+  }
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::operation op{row_ir::opcode::IDENTITY,
+                         row_ir::operation::operands(row_ir::get_input(1))};
+    op.instantiate(ctx, info);
+    auto null_code = op.generate_code(ctx, target_info, info);
+
+    auto expected_null_code =
+      R"***(numeric::decimal32 tmp_0 = in_1;
+numeric::decimal32 tmp_1 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::IDENTITY, false>{}(tmp_0);)***";
+
+    EXPECT_EQ(null_code, expected_null_code);
+  }
+}
+
+TEST_F(RowIRCudaCodeGenTest, BinaryOperation)
+{
+  row_ir::target_info target_info{row_ir::target::CUDA};
+
+  row_ir::var_info inputs[] = {{"in_0", {data_type{type_id::INT32}}},
+                               {"in_1", {data_type{type_id::DECIMAL32}}}};
+
+  row_ir::untyped_var_info outputs[] = {{"out_0"}, {"out_1"}};
+
+  row_ir::instance_info info{inputs, outputs};
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::operation op{row_ir::opcode::ADD,
+                         row_ir::operation::operands(row_ir::get_input(0), row_ir::get_input(0))};
+    op.instantiate(ctx, info);
+    auto code = op.generate_code(ctx, target_info, info);
+
+    auto expected_code =
+      R"***(int32_t tmp_0 = in_0;
+int32_t tmp_1 = in_0;
+int32_t tmp_2 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::ADD, false>{}(tmp_0, tmp_1);)***";
+
+    EXPECT_EQ(code, expected_code);
+  }
+
+  {
+    row_ir::instance_context ctx{};
+    row_ir::operation op{row_ir::opcode::ADD,
+                         row_ir::operation::operands(row_ir::get_input(1), row_ir::get_input(1))};
+    op.instantiate(ctx, info);
+    auto null_code = op.generate_code(ctx, target_info, info);
+
+    auto expected_null_code =
+      R"***(numeric::decimal32 tmp_0 = in_1;
+numeric::decimal32 tmp_1 = in_1;
+numeric::decimal32 tmp_2 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::ADD, false>{}(tmp_0, tmp_1);)***";
+
+    EXPECT_EQ(null_code, expected_null_code);
+  }
+}
+
+TEST_F(RowIRCudaCodeGenTest, VectorLengthOperation)
+{
+  row_ir::target_info target_info{row_ir::target::CUDA};
+
+  row_ir::var_info inputs[] = {
+    {"in_0", {data_type{type_id::FLOAT64}}},
+    {"in_1", {data_type{type_id::FLOAT64}}},
+    {"in_2", {data_type{type_id::FLOAT64}}},
+    {"in_3", {data_type{type_id::FLOAT64}}},
+  };
+
+  row_ir::untyped_var_info outputs[] = {{"out_0"}, {"out_1"}};
+
+  row_ir::instance_info info{inputs, outputs};
+
+  auto length_operation = [&](int32_t input0, int32_t input1, int32_t output) {
+    // This function generates the IR for the vector length operation:
+    // length(v) = sqrt(x^2 + y^2)
+    // where v = (x, y) and v is a 2D vector.
+    auto x2 = std::make_unique<row_ir::operation>(
+      row_ir::opcode::MUL,
+      row_ir::operation::operands(row_ir::get_input(input0), row_ir::get_input(input0)));
+
+    auto y2 = std::make_unique<row_ir::operation>(
+      row_ir::opcode::MUL,
+      row_ir::operation::operands(row_ir::get_input(input1), row_ir::get_input(input1)));
+
+    auto sum = std::make_unique<row_ir::operation>(
+      row_ir::opcode::ADD, row_ir::operation::operands(std::move(x2), std::move(y2)));
+
+    auto length = std::make_unique<row_ir::operation>(row_ir::opcode::SQRT,
+                                                      row_ir::operation::operands(std::move(sum)));
+
+    return std::make_unique<row_ir::set_output>(output, std::move(length));
+  };
+
+  {
+    row_ir::instance_context ctx{};
+
+    auto expr_ir = length_operation(0, 1, 0);
+    expr_ir->instantiate(ctx, info);
+
+    auto code = expr_ir->generate_code(ctx, target_info, info);
+
+    auto expected_code =
+      R"***(double tmp_0 = in_0;
+double tmp_1 = in_0;
+double tmp_2 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::MUL, false>{}(tmp_0, tmp_1);
+double tmp_3 = in_1;
+double tmp_4 = in_1;
+double tmp_5 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::MUL, false>{}(tmp_3, tmp_4);
+double tmp_6 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::ADD, false>{}(tmp_2, tmp_5);
+double tmp_7 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::SQRT, false>{}(tmp_6);
+double tmp_8 = tmp_7;
+*out_0 = tmp_8;)***";
+
+    EXPECT_EQ(code, expected_code);
+  }
+}
+
+TEST_F(RowIRCudaCodeGenTest, AstConversionBasic)
+{
+  ast::tree ast_tree;
+  auto forty_two          = cudf::numeric_scalar(42);
+  auto& column_ref        = ast_tree.push(ast::column_reference{0, ast::table_reference::LEFT});
+  auto& forty_two_literal = ast_tree.push(ast::literal{forty_two});
+  auto& add_op =
+    ast_tree.push(ast::operation{ast::ast_operator::ADD, forty_two_literal, column_ref});
+
+  auto column = cudf::test::fixed_width_column_wrapper<int32_t>({69, 69, 69, 69, 69, 69}).release();
+
+  auto expected_iter = detail::make_counting_transform_iterator(0, [](auto i) { return 69 + 42; });
+  auto expected =
+    cudf::test::fixed_width_column_wrapper<int32_t>(expected_iter, expected_iter + column->size());
+
+  row_ir::ast_args args{.table = cudf::table_view{{column->view()}}};
+
+  auto transform_args =
+    row_ir::ast_converter::compute_column(row_ir::target::CUDA,
+                                          add_op,
+                                          args,
+                                          cudf::get_default_stream(),
+                                          cudf::get_current_device_resource_ref());
+
+  ASSERT_EQ(transform_args.scalar_columns.size(), 1);
+  ASSERT_EQ(transform_args.scalar_columns[0]->view().size(), 1);
+  EXPECT_FALSE(transform_args.is_ptx);
+  EXPECT_EQ(transform_args.is_null_aware, null_aware::NO);
+  EXPECT_EQ(transform_args.output_type, data_type{type_id::INT32});
+  ASSERT_EQ(transform_args.columns.size(), 2);
+
+  /// Scalar column should be represented as a column of size 1
+  ASSERT_EQ(transform_args.columns[0].size(), 1);
+  EXPECT_EQ(transform_args.columns[0].type(), data_type{type_id::INT32});
+  EXPECT_EQ(transform_args.columns[0].null_count(), 0);
+
+  /// The input column should be the second column in the transform args
+  ASSERT_EQ(transform_args.columns[1].size(), column->size());
+  EXPECT_EQ(transform_args.columns[1].type(), column->type());
+  EXPECT_EQ(transform_args.columns[1].null_count(), column->null_count());
+
+  auto expected_udf = R"***(
+__device__ void expression(int32_t* out_0, int32_t in_0, int32_t in_1)
+{
+int32_t tmp_0 = in_0;
+int32_t tmp_1 = in_1;
+int32_t tmp_2 = cudf::ast::detail::operator_functor<cudf::ast::ast_operator::ADD, false>{}(tmp_0, tmp_1);
+int32_t tmp_3 = tmp_2;
+*out_0 = tmp_3;
+
+return;
+}
+)***";
+
+  EXPECT_EQ(transform_args.udf, expected_udf);
+
+  auto result = cudf::transform(transform_args.columns,
+                                transform_args.udf,
+                                transform_args.output_type,
+                                transform_args.is_ptx,
+                                transform_args.user_data,
+                                transform_args.is_null_aware);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp
index ab9af49ee2e..026ee5d6f40 100644
--- a/cpp/tests/streams/stream_compaction_test.cpp
+++ b/cpp/tests/streams/stream_compaction_test.cpp
@@ -20,6 +20,7 @@
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/testing_main.hpp>
 
+#include <cudf/ast/expressions.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/stream_compaction.hpp>
 #include <cudf/table/table.hpp>
@@ -377,6 +378,39 @@ TEST_F(StreamCompactionTest, ApplyBooleanMask)
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
 }
 
+TEST_F(StreamCompactionTest, FilterUDF)
+{
+  auto const col      = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14};
+  auto col_ref_0      = cudf::ast::column_reference(0);
+  auto const expected = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}.release();
+  auto const result   = cudf::filter({col},
+                                   R"***(
+__device__ void filter(bool * out, int32_t a){
+  *out = a < 10;
+})***",
+                                     {col},
+                                   false,
+                                   std::nullopt,
+                                   cudf::null_aware::NO,
+                                   cudf::test::get_default_stream());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result[0]);
+}
+
+TEST_F(StreamCompactionTest, FilterASTJit)
+{
+  auto const col  = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14};
+  auto col_ref_0  = cudf::ast::column_reference(0);
+  auto max_scalar = cudf::numeric_scalar<int32_t>(
+    10, true, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref());
+  auto const max_literal = cudf::ast::literal(max_scalar);
+  auto expression = cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, max_literal);
+  cudf::table_view input({col});
+  auto const col_expected = int32s_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  cudf::table_view expected({col_expected});
+  auto const result = cudf::filter(input, expression, input, cudf::test::get_default_stream());
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+}
+
 TEST_F(StreamCompactionTest, UniqueCountColumn)
 {
   std::vector<int32_t> const input = {1, 3,  3,  4,  31, 1, 8,  2, 0, 4, 1,
diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp
index 91f6c197857..86d5fb18b6f 100644
--- a/cpp/tests/streams/transform_test.cpp
+++ b/cpp/tests/streams/transform_test.cpp
@@ -110,6 +110,17 @@ TEST_F(TransformTest, ComputeColumn)
   cudf::compute_column(table, expression, cudf::test::get_default_stream());
 }
 
+TEST_F(TransformTest, ComputeColumnJIT)
+{
+  auto c_0        = cudf::test::fixed_width_column_wrapper<cudf::size_type>{3, 20, 1, 50};
+  auto c_1        = cudf::test::fixed_width_column_wrapper<cudf::size_type>{10, 7, 20, 0};
+  auto table      = cudf::table_view{{c_0, c_1}};
+  auto col_ref_0  = cudf::ast::column_reference(0);
+  auto col_ref_1  = cudf::ast::column_reference(1);
+  auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, col_ref_1);
+  cudf::compute_column_jit(table, expression, cudf::test::get_default_stream());
+}
+
 TEST_F(TransformTest, BoolsToMask)
 {
   std::vector<bool> input({1, 0, 1, 0, 1, 0, 1, 0});

From 224d6f66fa95c32a06a5427b2f9804ec61085ecd Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Sep 2025 13:08:02 -0700
Subject: [PATCH 312/366] Pin duckdb<1.4 in test_python_narwhals (#19982)

I believe the duckdb 1.4 release is causing some Narwhals tests to fail e.g. https://github.com/rapidsai/cudf/actions/runs/17772838602/job/50515353242?pr=19941 (and I think the [Narwhals CI is also failing with similar tracebacks](https://github.com/narwhals-dev/narwhals/actions/runs/17766808825/job/50492144758)), so pinning this dependency to get the CI back to green


EDIT: The core issue might be an ibis incompatibility with duckdb 1.4 https://github.com/ibis-project/ibis/issues/11621

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19982
---
 dependencies.yaml                                                | 1 +
 .../third_party_integration_tests/dependencies.yaml              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/dependencies.yaml b/dependencies.yaml
index 2ea1eb57fa8..7b51525cc94 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -899,6 +899,7 @@ dependencies:
               - pytest-env
               - sqlframe
               - ibis-framework[duckdb]
+              - duckdb<1.4.0
   depends_on_libcudf:
     common:
       - output_types: conda
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
index 6e01e034cf4..2d81f6417f8 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -283,6 +283,7 @@ dependencies:
           - pip
           - pip:
               - ibis-framework[duckdb]
+              - duckdb<1.4.0
   test_hvplot:
     common:
       - output_types: conda

From 993fdd42a66e3bb856ff2bd3cdfa829c29c20177 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Sep 2025 17:08:30 -0700
Subject: [PATCH 313/366] Avoid undefined numpy protocols on cudf.pandas proxy
 objects (#19968)

closes https://github.com/rapidsai/cudf/issues/19818

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19968
---
 python/cudf/cudf/pandas/_wrappers/pandas.py | 56 ++++++++++++++++++---
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index cf459cd69f5..084452c0c05 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -1953,6 +1953,9 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
     pd.core.indexes.frozen.FrozenList,
     fast_to_slow=_Unusable(),
     slow_to_fast=_Unusable(),
+    additional_attributes={
+        "__hash__": _FastSlowAttribute("__hash__"),
+    },
 )
 
 # The following are subclasses of `pandas.core.base.PandasObj`,
@@ -1960,13 +1963,55 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
 # not strictly part of the Pandas public API, but they do appear as
 # return types.
 
-_PANDAS_OBJ_FINAL_TYPES = [
-    pd.core.arrays.sparse.array.SparseArray,
-    pd.core.indexes.category.CategoricalIndex,
+NDFrame = make_final_proxy_type(
+    "NDFrame",
+    _Unusable,
+    pd.core.generic.NDFrame,
+    fast_to_slow=_Unusable(),
+    slow_to_fast=_Unusable(),
+    additional_attributes={
+        "__array__": array_method,
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
+    },
+)
+
+DatetimeTimedeltaMixin = make_final_proxy_type(
+    "DatetimeTimedeltaMixin",
+    _Unusable,
     pd.core.indexes.datetimelike.DatetimeTimedeltaMixin,
+    fast_to_slow=_Unusable(),
+    slow_to_fast=_Unusable(),
+    additional_attributes={
+        "__array__": array_method,
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
+    },
+)
+
+DatetimeIndexOpsMixin = make_final_proxy_type(
+    "DatetimeIndexOpsMixin",
+    _Unusable,
     pd.core.indexes.datetimelike.DatetimeIndexOpsMixin,
+    fast_to_slow=_Unusable(),
+    slow_to_fast=_Unusable(),
+    additional_attributes={
+        "__array__": array_method,
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
+    },
+)
+
+NDArrayBackedExtensionIndex = make_final_proxy_type(
+    "NDArrayBackedExtensionIndex",
+    _Unusable,
     pd.core.indexes.extension.NDArrayBackedExtensionIndex,
-    pd.core.generic.NDFrame,
+    fast_to_slow=_Unusable(),
+    slow_to_fast=_Unusable(),
+    additional_attributes={
+        "__array__": array_method,
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
+    },
+)
+
+_PANDAS_OBJ_FINAL_TYPES = [
     pd.core.indexes.accessors.PeriodProperties,
     pd.core.indexes.accessors.Properties,
     pd.plotting._core.PlotAccessor,
@@ -1995,9 +2040,6 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
         fast_to_slow=_Unusable(),
         slow_to_fast=_Unusable(),
         additional_attributes={
-            "__array__": array_method,
-            "__array_function__": array_function_method,
-            "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
             "__hash__": _FastSlowAttribute("__hash__"),
         },
     )

From efd5a731981fd202872c252537242bd71cea382a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Sep 2025 19:29:17 -0700
Subject: [PATCH 314/366] Don't fall back in Series.describe in cudf.pandas for
 numeric types (#19941)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

closes https://github.com/rapidsai/cudf/issues/19936

```python
In [1]: %load_ext cudf.pandas

In [2]: import pandas as pd

In [3]: df = pd.read_parquet("nyc_taxi_data")

In [4]: numeric_cols = ["passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount" ,"congestion_surc
      ⋮ harge" ,"Airport_fee" ,"cbd_congestion_fee"]

In [5]: %%cudf.pandas.profile
   ...: df[numeric_cols].head(5).describe().round(2).T
   ...:
   ...:
Out[5]:
                       count   mean    std    min    25%    50%    75%    max
passenger_count          5.0   1.00   0.00   1.00   1.00   1.00   1.00   1.00
trip_distance            5.0   9.64   5.38   2.94   7.30   9.98  10.27  17.70
fare_amount              5.0  39.78  19.69  17.00  29.60  38.70  43.60  70.00
extra                    5.0   5.00   2.56   1.00   4.25   6.00   6.00   7.75
mta_tax                  5.0   0.50   0.00   0.50   0.50   0.50   0.50   0.50
tip_amount               5.0   8.39   4.46   3.00   5.00   9.00  10.87  14.10
tolls_amount             5.0   2.78   3.80   0.00   0.00   0.00   6.94   6.94
improvement_surcharge    5.0   1.00   0.00   1.00   1.00   1.00   1.00   1.00
total_amount             5.0  60.10  21.38  25.75  54.79  66.97  72.24  80.75
congestion_surcharge     5.0   2.00   1.12   0.00   2.50   2.50   2.50   2.50
Airport_fee              5.0   1.40   0.78   0.00   1.75   1.75   1.75   1.75
cbd_congestion_fee       5.0   0.45   0.41   0.00   0.00   0.75   0.75   0.75

                                         Total time elapsed: 1.201 seconds
                                       6 GPU function calls in 0.969 seconds
                                       0 CPU function calls in 0.000 seconds

                                                       Stats

┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ Function              ┃ GPU ncalls ┃ GPU cumtime ┃ GPU percall ┃ CPU ncalls ┃ CPU cumtime ┃ CPU percall ┃
┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ DataFrame.__getitem__ │ 1          │ 0.003       │ 0.003       │ 0          │ 0.000       │ 0.000       │
│ NDFrame.head          │ 1          │ 0.013       │ 0.013       │ 0          │ 0.000       │ 0.000       │
│ NDFrame.describe      │ 1          │ 0.865       │ 0.865       │ 0          │ 0.000       │ 0.000       │
│ DataFrame.round       │ 1          │ 0.008       │ 0.008       │ 0          │ 0.000       │ 0.000       │
│ DataFrame             │ 1          │ 0.000       │ 0.000       │ 0          │ 0.000       │ 0.000       │
│ DataFrame.__repr__    │ 1          │ 0.081       │ 0.081       │ 0          │ 0.000       │ 0.000       │
└───────────────────────┴────────────┴─────────────┴─────────────┴────────────┴─────────────┴─────────────┘
```

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19941
---
 python/cudf/cudf/core/series.py               | 27 +++++++++++--------
 .../cudf/pandas/scripts/conftest-patch.py     |  2 ++
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 01df1fbffae..f040b0f5bfd 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -13,6 +13,7 @@
 import cupy
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from typing_extensions import Self, assert_never
 
 import pylibcudf as plc  # noqa: TC002
@@ -72,8 +73,6 @@
 if TYPE_CHECKING:
     from collections.abc import Hashable, MutableMapping
 
-    import pyarrow as pa
-
     from cudf._typing import (
         DataFrameOrSeries,
         Dtype,
@@ -89,7 +88,7 @@ def _format_percentile_names(percentiles):
 
 def _describe_numeric(obj, percentiles):
     # Helper for Series.describe with numerical data.
-    data = {
+    return {
         "count": obj.count(),
         "mean": obj.mean(),
         "std": obj.std(),
@@ -103,7 +102,6 @@ def _describe_numeric(obj, percentiles):
         ),
         "max": obj.max(),
     }
-    return {k: round(v, 6) for k, v in data.items()}
 
 
 def _describe_timetype(obj, percentiles, typ):
@@ -3234,9 +3232,11 @@ def describe(
         percentiles=None,
         include=None,
         exclude=None,
-    ):
+    ) -> Self:
         """{docstring}"""
-        if cudf.get_option("mode.pandas_compatible"):
+        if cudf.get_option("mode.pandas_compatible") and not (
+            is_dtype_obj_numeric(self.dtype) and self.dtype.kind != "b"
+        ):
             raise NotImplementedError(
                 "cudf.Series.describe is not implemented in "
                 "pandas compatibility mode."
@@ -3258,15 +3258,20 @@ def describe(
             # pandas defaults
             percentiles = np.array([0.25, 0.5, 0.75])
 
-        dtype = "str"
+        dtype: Dtype | None = "str"
         if self.dtype.kind == "b":
             data = _describe_categorical(self, percentiles)
-        elif is_dtype_obj_numeric(self._column.dtype):
+        elif is_dtype_obj_numeric(self.dtype):
             data = _describe_numeric(self, percentiles)
-            dtype = None
-        elif self._column.dtype.kind == "m":
+            if isinstance(self.dtype, pd.ArrowDtype):
+                dtype = pd.ArrowDtype(pa.float64())
+            elif is_pandas_nullable_extension_dtype(self.dtype):
+                dtype = pd.Float64Dtype()
+            else:
+                dtype = None
+        elif self.dtype.kind == "m":
             data = _describe_timedelta(self, percentiles)
-        elif self._column.dtype.kind == "M":
+        elif self.dtype.kind == "M":
             data = _describe_timestamp(self, percentiles)
         else:
             data = _describe_categorical(self, percentiles)
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index f6fa5de127f..f4ab37e7b1c 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -6780,6 +6780,7 @@ def pytest_unconfigure(config):
     "tests/groupby/test_api.py::test_all_methods_categorized",
     "tests/groupby/test_api.py::test_tab_completion",
     "tests/groupby/test_apply.py::test_groupby_apply_all_none",
+    "tests/groupby/test_apply.py::test_apply_concat_preserve_names",
     "tests/groupby/test_apply.py::test_apply_datetime_issue[group_column_dtlike0]",
     "tests/groupby/test_apply.py::test_apply_datetime_issue[group_column_dtlike1]",
     "tests/groupby/test_apply.py::test_apply_frame_concat_series",
@@ -6855,6 +6856,7 @@ def pytest_unconfigure(config):
     "tests/groupby/test_categorical.py::test_sort_datetimelike[False-True]",
     "tests/groupby/test_categorical.py::test_sort_datetimelike[True-False]",
     "tests/groupby/test_categorical.py::test_sort_datetimelike[True-True]",
+    "tests/groupby/test_categorical.py::test_unstack_categorical",
     "tests/groupby/test_counting.py::TestCounting::test_ngroup_distinct",
     "tests/groupby/test_counting.py::test_count_arrow_string_array[string=str[pyarrow]]",
     "tests/groupby/test_counting.py::test_count_arrow_string_array[string=str[python]]",

From 7362ce261962c62a406402d7c7b965d3bcbaf7a5 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Tue, 16 Sep 2025 19:39:40 -0700
Subject: [PATCH 315/366] Implement chunking in the next-gen parquet reader
 (#19526)

Contributes to #17896

This PR implements chunking in the next-gen parquet reader. The chunking mechanism is slightly different than that of the mainline parquet reader. Specifically, the outer level chunking (across row groups) is now expected to be done by user by calling `setup_chunking_for_x_columns` with a custom span of row groups to read and the corresponding span of data page mask

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - https://github.com/nvdbaranec
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19526
---
 .../cudf/io/experimental/hybrid_scan.hpp      |  85 ++++-
 .../io/parquet/experimental/hybrid_scan.cpp   |  70 +++-
 .../experimental/hybrid_scan_chunking.cu      |  31 +-
 .../experimental/hybrid_scan_helpers.hpp      |   2 +
 .../parquet/experimental/hybrid_scan_impl.cpp | 198 +++++++++--
 .../parquet/experimental/hybrid_scan_impl.hpp | 122 ++++---
 .../experimental/hybrid_scan_preprocess.cu    |  16 +-
 .../parquet/experimental/page_index_filter.cu |  22 +-
 cpp/src/io/parquet/reader_impl.cpp            |  50 ++-
 cpp/src/io/parquet/reader_impl.hpp            |  12 +-
 .../experimental/hybrid_scan_filters_test.cpp |   5 +-
 .../io/experimental/hybrid_scan_test.cpp      | 320 +++++++++++++++---
 12 files changed, 777 insertions(+), 156 deletions(-)

diff --git a/cpp/include/cudf/io/experimental/hybrid_scan.hpp b/cpp/include/cudf/io/experimental/hybrid_scan.hpp
index 788155147f9..23a138ac1ca 100644
--- a/cpp/include/cudf/io/experimental/hybrid_scan.hpp
+++ b/cpp/include/cudf/io/experimental/hybrid_scan.hpp
@@ -452,8 +452,8 @@ class hybrid_scan_reader {
    */
   [[nodiscard]] table_with_metadata materialize_filter_columns(
     cudf::host_span<size_type const> row_group_indices,
-    std::vector<rmm::device_buffer> column_chunk_buffers,
-    cudf::mutable_column_view row_mask,
+    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+    cudf::mutable_column_view& row_mask,
     use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream) const;
@@ -482,12 +482,89 @@ class hybrid_scan_reader {
    */
   [[nodiscard]] table_with_metadata materialize_payload_columns(
     cudf::host_span<size_type const> row_group_indices,
-    std::vector<rmm::device_buffer> column_chunk_buffers,
-    cudf::column_view row_mask,
+    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+    cudf::column_view const& row_mask,
     use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream) const;
 
+  /**
+   * @brief Setup chunking information for filter columns and preprocess the input data pages
+   *
+   * @param chunk_read_limit Limit on total number of bytes to be returned per table chunk. `0` if
+   * there is no limit
+   * @param pass_read_limit Limit on the memory used for reading and decompressing data. `0` if
+   * there is no limit
+   * @param row_group_indices Input row groups indices
+   * @param row_mask Boolean column indicating which rows need to be read. All rows read if empty
+   * @param mask_data_pages Whether to build and use a data page mask using the row mask
+   * @param column_chunk_buffers Device buffers containing column chunk data of filter columns
+   * @param options Parquet reader options
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   */
+  void setup_chunking_for_filter_columns(std::size_t chunk_read_limit,
+                                         std::size_t pass_read_limit,
+                                         cudf::host_span<size_type const> row_group_indices,
+                                         cudf::column_view const& row_mask,
+                                         use_data_page_mask mask_data_pages,
+                                         std::vector<rmm::device_buffer>&& column_chunk_buffers,
+                                         parquet_reader_options const& options,
+                                         rmm::cuda_stream_view stream) const;
+
+  /**
+   * @brief Materializes a chunk of filter columns and updates the corresponding range of input row
+   * mask to only the rows that exist in the output table
+   *
+   * @param[in,out] row_mask Mutable boolean column indicating surviving rows from page pruning
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return Table chunk of materialized filter columns and metadata
+   */
+  [[nodiscard]] table_with_metadata materialize_filter_columns_chunk(
+    cudf::mutable_column_view& row_mask, rmm::cuda_stream_view stream) const;
+
+  /**
+   * @brief Setup chunking information for payload columns and preprocess the input data pages
+   *
+   * @param chunk_read_limit Limit on total number of bytes to be returned per table chunk. `0` if
+   * there is no limit
+   * @param pass_read_limit Limit on the memory used for reading and decompressing data. `0` if
+   * there is no limit
+   * @param row_group_indices Input row groups indices
+   * @param row_mask Boolean column indicating which rows need to be read. All rows read if empty
+   * @param mask_data_pages Whether to build and use a data page mask using the row mask
+   * @param column_chunk_buffers Device buffers containing column chunk data of payload columns
+   * @param options Parquet reader options
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   */
+  void setup_chunking_for_payload_columns(std::size_t chunk_read_limit,
+                                          std::size_t pass_read_limit,
+                                          cudf::host_span<size_type const> row_group_indices,
+                                          cudf::column_view const& row_mask,
+                                          use_data_page_mask mask_data_pages,
+                                          std::vector<rmm::device_buffer>&& column_chunk_buffers,
+                                          parquet_reader_options const& options,
+                                          rmm::cuda_stream_view stream) const;
+
+  /**
+   * @brief Materializes a chunk of payload columns and applies the corresponding range of input row
+   * mask to the output table chunk
+   *
+   * @param row_mask Boolean column indicating which rows need to be read. All rows read if empty
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return Table chunk of materialized filter columns and metadata
+   */
+  [[nodiscard]] table_with_metadata materialize_payload_columns_chunk(
+    cudf::column_view const& row_mask, rmm::cuda_stream_view stream) const;
+
+  /**
+   * @brief Check if there is any parquet data left to read for the current setup
+   *
+   * @return Boolean indicating if there is any data left to read
+   */
+  [[nodiscard]] bool has_next_table_chunk() const;
+
  private:
   std::unique_ptr<detail::hybrid_scan_reader_impl> _impl;
 };
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan.cpp b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
index 524a8449698..e39d5a53218 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan.cpp
@@ -177,8 +177,8 @@ hybrid_scan_reader::filter_column_chunks_byte_ranges(
 
 table_with_metadata hybrid_scan_reader::materialize_filter_columns(
   cudf::host_span<size_type const> row_group_indices,
-  std::vector<rmm::device_buffer> column_chunk_buffers,
-  cudf::mutable_column_view row_mask,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  cudf::mutable_column_view& row_mask,
   use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
@@ -211,8 +211,8 @@ hybrid_scan_reader::payload_column_chunks_byte_ranges(
 
 table_with_metadata hybrid_scan_reader::materialize_payload_columns(
   cudf::host_span<size_type const> row_group_indices,
-  std::vector<rmm::device_buffer> column_chunk_buffers,
-  cudf::column_view row_mask,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  cudf::column_view const& row_mask,
   use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream) const
@@ -231,4 +231,66 @@ table_with_metadata hybrid_scan_reader::materialize_payload_columns(
                                             stream);
 }
 
+void hybrid_scan_reader::setup_chunking_for_filter_columns(
+  std::size_t chunk_read_limit,
+  std::size_t pass_read_limit,
+  cudf::host_span<size_type const> row_group_indices,
+  cudf::column_view const& row_mask,
+  use_data_page_mask mask_data_pages,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  parquet_reader_options const& options,
+  rmm::cuda_stream_view stream) const
+{
+  // Temporary vector with row group indices from the first source
+  auto const input_row_group_indices =
+    std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
+
+  return _impl->setup_chunking_for_filter_columns(chunk_read_limit,
+                                                  pass_read_limit,
+                                                  input_row_group_indices,
+                                                  row_mask,
+                                                  mask_data_pages,
+                                                  std::move(column_chunk_buffers),
+                                                  options,
+                                                  stream);
+}
+
+table_with_metadata hybrid_scan_reader::materialize_filter_columns_chunk(
+  cudf::mutable_column_view& row_mask, rmm::cuda_stream_view stream) const
+{
+  return _impl->materialize_filter_columns_chunk(row_mask, stream);
+}
+
+void hybrid_scan_reader::setup_chunking_for_payload_columns(
+  std::size_t chunk_read_limit,
+  std::size_t pass_read_limit,
+  cudf::host_span<size_type const> row_group_indices,
+  cudf::column_view const& row_mask,
+  use_data_page_mask mask_data_pages,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  parquet_reader_options const& options,
+  rmm::cuda_stream_view stream) const
+{
+  // Temporary vector with row group indices from the first source
+  auto const input_row_group_indices =
+    std::vector<std::vector<size_type>>{{row_group_indices.begin(), row_group_indices.end()}};
+
+  return _impl->setup_chunking_for_payload_columns(chunk_read_limit,
+                                                   pass_read_limit,
+                                                   input_row_group_indices,
+                                                   row_mask,
+                                                   mask_data_pages,
+                                                   std::move(column_chunk_buffers),
+                                                   options,
+                                                   stream);
+}
+
+table_with_metadata hybrid_scan_reader::materialize_payload_columns_chunk(
+  cudf::column_view const& row_mask, rmm::cuda_stream_view stream) const
+{
+  return _impl->materialize_payload_columns_chunk(row_mask, stream);
+}
+
+bool hybrid_scan_reader::has_next_table_chunk() const { return _impl->has_next_table_chunk(); }
+
 }  // namespace cudf::io::parquet::experimental
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu b/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu
index 71f0533de31..b49d13d4000 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_chunking.cu
@@ -42,14 +42,14 @@ using parquet::detail::ColumnChunkDesc;
 using parquet::detail::pass_intermediate_data;
 
 void hybrid_scan_reader_impl::handle_chunking(
+  read_mode mode,
   std::vector<rmm::device_buffer> column_chunk_buffers,
-  cudf::host_span<std::vector<bool> const> data_page_mask,
-  parquet_reader_options const& options)
+  cudf::host_span<std::vector<bool> const> data_page_mask)
 {
   // if this is our first time in here, setup the first pass.
   if (!_pass_itm_data) {
     // setup the next pass
-    setup_next_pass(std::move(column_chunk_buffers), options);
+    setup_next_pass(std::move(column_chunk_buffers));
 
     // Must be called as soon as we create the pass
     set_pass_page_mask(data_page_mask);
@@ -76,20 +76,19 @@ void hybrid_scan_reader_impl::handle_chunking(
       _pass_itm_data.reset();
 
       _file_itm_data._current_input_pass++;
-      // no more passes. we are absolutely done with this file.
-      if (_file_itm_data._current_input_pass == _file_itm_data.num_passes()) { return; }
 
-      // setup the next pass
-      setup_next_pass(std::move(column_chunk_buffers), options);
+      // no more passes. we are absolutely done with this file.
+      CUDF_EXPECTS(_file_itm_data._current_input_pass == _file_itm_data.num_passes(),
+                   "Hybrid scan reader must only create one pass per chunking setup");
+      return;
     }
   }
 
   // setup the next sub pass
-  setup_next_subpass(read_mode::READ_ALL);
+  setup_next_subpass(mode);
 }
 
-void hybrid_scan_reader_impl::setup_next_pass(std::vector<rmm::device_buffer> column_chunk_buffers,
-                                              parquet_reader_options const& options)
+void hybrid_scan_reader_impl::setup_next_pass(std::vector<rmm::device_buffer> column_chunk_buffers)
 {
   auto const num_passes = _file_itm_data.num_passes();
   CUDF_EXPECTS(num_passes == 1,
@@ -141,7 +140,7 @@ void hybrid_scan_reader_impl::setup_next_pass(std::vector<rmm::device_buffer> co
     //   confuses the chunked reader
     detect_malformed_pages(pass.pages,
                            pass.chunks,
-                           (options.get_num_rows().has_value() or options.get_skip_rows() > 0)
+                           uses_custom_row_bounds(read_mode::READ_ALL)
                              ? std::nullopt
                              : std::make_optional(pass.num_rows),
                            _stream);
@@ -165,6 +164,16 @@ void hybrid_scan_reader_impl::setup_next_pass(std::vector<rmm::device_buffer> co
       decomp_dict_data_size +
       thrust::reduce(rmm::exec_policy(_stream), chunk_iter, chunk_iter + pass.chunks.size());
 
+    // if we are doing subpass reading, generate more accurate num_row estimates for list columns.
+    // this helps us to generate more accurate subpass splits.
+    if (pass.has_compressed_data && _input_pass_read_limit != 0) {
+      if (_has_page_index) {
+        generate_list_column_row_counts(is_estimate_row_counts::NO);
+      } else {
+        generate_list_column_row_counts(is_estimate_row_counts::YES);
+      }
+    }
+
     _stream.synchronize();
   }
 }
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
index a10f7746da3..cb8e18ceaec 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_helpers.hpp
@@ -284,6 +284,7 @@ class aggregate_reader_metadata : public aggregate_reader_metadata_base {
    * @param row_mask Boolean column indicating which rows need to be read after page-pruning
    * @param row_group_indices Input row groups indices
    * @param input_columns Input column information
+   * @param row_mask_offset Offset into the row mask column for the current pass
    * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A vector of boolean vectors indicating which data pages need to be decoded to produce
@@ -293,6 +294,7 @@ class aggregate_reader_metadata : public aggregate_reader_metadata_base {
     cudf::column_view row_mask,
     cudf::host_span<std::vector<size_type> const> row_group_indices,
     cudf::host_span<input_column_info const> input_columns,
+    cudf::size_type row_mask_offset,
     rmm::cuda_stream_view stream) const;
 };
 
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
index 4f7f0750630..b3947519ac9 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.cpp
@@ -20,6 +20,7 @@
 #include "hybrid_scan_helpers.hpp"
 #include "io/parquet/reader_impl_chunking_utils.cuh"
 
+#include <cudf/copying.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/transform.hpp>
@@ -146,6 +147,9 @@ void hybrid_scan_reader_impl::select_columns(read_columns_mode read_columns_mode
     _is_filter_columns_selected  = false;
   }
 
+  // Reset the rows processed so far
+  _rows_processed_so_far = 0;
+
   CUDF_EXPECTS(_input_columns.size() > 0 and _output_buffers.size() > 0, "No columns selected");
 
   // Clear the output buffers templates
@@ -426,8 +430,8 @@ hybrid_scan_reader_impl::payload_column_chunks_byte_ranges(
 
 table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
-  std::vector<rmm::device_buffer> column_chunk_buffers,
-  cudf::mutable_column_view row_mask,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  cudf::mutable_column_view& row_mask,
   use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
@@ -447,20 +451,22 @@ table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns(
 
   select_columns(read_columns_mode::FILTER_COLUMNS, options);
 
-  auto data_page_mask = (mask_data_pages == use_data_page_mask::YES)
-                          ? _extended_metadata->compute_data_page_mask(
-                              row_mask, row_group_indices, _input_columns, stream)
-                          : std::vector<std::vector<bool>>{};
+  auto data_page_mask =
+    (mask_data_pages == use_data_page_mask::YES)
+      ? _extended_metadata->compute_data_page_mask(
+          row_mask, row_group_indices, _input_columns, _rows_processed_so_far, stream)
+      : std::vector<std::vector<bool>>{};
 
-  prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
+  prepare_data(
+    read_mode::READ_ALL, row_group_indices, std::move(column_chunk_buffers), data_page_mask);
 
   return read_chunk_internal(read_mode::READ_ALL, read_columns_mode::FILTER_COLUMNS, row_mask);
 }
 
 table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
   cudf::host_span<std::vector<size_type> const> row_group_indices,
-  std::vector<rmm::device_buffer> column_chunk_buffers,
-  cudf::column_view row_mask,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  cudf::column_view const& row_mask,
   use_data_page_mask mask_data_pages,
   parquet_reader_options const& options,
   rmm::cuda_stream_view stream)
@@ -475,16 +481,136 @@ table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns(
 
   select_columns(read_columns_mode::PAYLOAD_COLUMNS, options);
 
-  auto data_page_mask = (mask_data_pages == use_data_page_mask::YES)
-                          ? _extended_metadata->compute_data_page_mask(
-                              row_mask, row_group_indices, _input_columns, stream)
-                          : std::vector<std::vector<bool>>{};
+  auto data_page_mask =
+    (mask_data_pages == use_data_page_mask::YES)
+      ? _extended_metadata->compute_data_page_mask(
+          row_mask, row_group_indices, _input_columns, _rows_processed_so_far, stream)
+      : std::vector<std::vector<bool>>{};
 
-  prepare_data(row_group_indices, std::move(column_chunk_buffers), data_page_mask, options);
+  prepare_data(
+    read_mode::READ_ALL, row_group_indices, std::move(column_chunk_buffers), data_page_mask);
 
   return read_chunk_internal(read_mode::READ_ALL, read_columns_mode::PAYLOAD_COLUMNS, row_mask);
 }
 
+void hybrid_scan_reader_impl::setup_chunking_for_filter_columns(
+  std::size_t chunk_read_limit,
+  std::size_t pass_read_limit,
+  cudf::host_span<std::vector<size_type> const> row_group_indices,
+  cudf::column_view const& row_mask,
+  use_data_page_mask mask_data_pages,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  parquet_reader_options const& options,
+  rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
+  CUDF_EXPECTS(options.get_filter().has_value(), "Encountered empty converted filter expression");
+
+  reset_internal_state();
+
+  table_metadata metadata;
+  populate_metadata(metadata);
+  _expr_conv = named_to_reference_converter(options.get_filter(), metadata);
+
+  CUDF_EXPECTS(_expr_conv.get_converted_expr().has_value(), "Filter expression must not be empty");
+
+  initialize_options(row_group_indices, options, stream);
+  _input_pass_read_limit   = pass_read_limit;
+  _output_chunk_read_limit = chunk_read_limit;
+
+  select_columns(read_columns_mode::FILTER_COLUMNS, options);
+
+  auto data_page_mask =
+    (mask_data_pages == use_data_page_mask::YES)
+      ? _extended_metadata->compute_data_page_mask(
+          row_mask, row_group_indices, _input_columns, _rows_processed_so_far, _stream)
+      : std::vector<std::vector<bool>>{};
+
+  prepare_data(
+    read_mode::CHUNKED_READ, row_group_indices, std::move(column_chunk_buffers), data_page_mask);
+}
+
+table_with_metadata hybrid_scan_reader_impl::materialize_filter_columns_chunk(
+  cudf::mutable_column_view& row_mask, rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(_file_preprocessed, "Chunking for filter columns not yet setup");
+
+  // Reset the output buffers to their original states (right after reader construction).
+  // Don't need to do it if we read the file all at once.
+  if (_file_itm_data._current_input_pass < _file_itm_data.num_passes() and
+      not is_first_output_chunk()) {
+    _output_buffers.resize(0);
+    for (auto const& buff : _output_buffers_template) {
+      _output_buffers.emplace_back(cudf::io::detail::inline_column_buffer::empty_like(buff));
+    }
+  }
+
+  prepare_data(read_mode::CHUNKED_READ, {}, {}, {});
+  return read_chunk_internal(read_mode::CHUNKED_READ, read_columns_mode::FILTER_COLUMNS, row_mask);
+}
+
+void hybrid_scan_reader_impl::setup_chunking_for_payload_columns(
+  std::size_t chunk_read_limit,
+  std::size_t pass_read_limit,
+  cudf::host_span<std::vector<size_type> const> row_group_indices,
+  cudf::column_view const& row_mask,
+  use_data_page_mask mask_data_pages,
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  parquet_reader_options const& options,
+  rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(not row_group_indices.empty(), "Empty input row group indices encountered");
+  CUDF_EXPECTS(row_mask.null_count() == 0,
+               "Row mask must not have any nulls when materializing payload column");
+
+  reset_internal_state();
+
+  initialize_options(row_group_indices, options, stream);
+  _input_pass_read_limit   = pass_read_limit;
+  _output_chunk_read_limit = chunk_read_limit;
+
+  select_columns(read_columns_mode::PAYLOAD_COLUMNS, options);
+
+  auto data_page_mask =
+    (mask_data_pages == use_data_page_mask::YES)
+      ? _extended_metadata->compute_data_page_mask(
+          row_mask, row_group_indices, _input_columns, _rows_processed_so_far, _stream)
+      : std::vector<std::vector<bool>>{};
+
+  prepare_data(
+    read_mode::CHUNKED_READ, row_group_indices, std::move(column_chunk_buffers), data_page_mask);
+}
+
+table_with_metadata hybrid_scan_reader_impl::materialize_payload_columns_chunk(
+  cudf::column_view const& row_mask, rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(_file_preprocessed, "Chunking for payload columns not yet setup");
+
+  // Reset the output buffers to their original states (right after reader construction).
+  // Don't need to do it if we read the file all at once.
+  if (_file_itm_data._current_input_pass < _file_itm_data.num_passes() and
+      not is_first_output_chunk()) {
+    _output_buffers.resize(0);
+    for (auto const& buff : _output_buffers_template) {
+      _output_buffers.emplace_back(cudf::io::detail::inline_column_buffer::empty_like(buff));
+    }
+  }
+  prepare_data(read_mode::CHUNKED_READ, {}, {}, {});
+  return read_chunk_internal(read_mode::CHUNKED_READ, read_columns_mode::PAYLOAD_COLUMNS, row_mask);
+}
+
+bool hybrid_scan_reader_impl::has_next_table_chunk()
+{
+  CUDF_EXPECTS(_file_preprocessed, "Chunking for filter columns not yet setup");
+  prepare_data(read_mode::CHUNKED_READ, {}, {}, {});
+
+  // current_input_pass will only be incremented to be == num_passes after
+  // the last chunk in the last subpass in the last pass has been returned
+  // if not has_more_work then check if this is the first pass in an empty
+  // table and return true so it could be read once.
+  return has_more_work() or is_first_output_chunk();
+}
+
 void hybrid_scan_reader_impl::reset_internal_state()
 {
   _file_itm_data     = file_intermediate_data{};
@@ -527,10 +653,10 @@ void hybrid_scan_reader_impl::initialize_options(
 }
 
 void hybrid_scan_reader_impl::prepare_data(
+  read_mode mode,
   cudf::host_span<std::vector<size_type> const> row_group_indices,
-  std::vector<rmm::device_buffer> column_chunk_buffers,
-  cudf::host_span<std::vector<bool> const> data_page_mask,
-  parquet_reader_options const& options)
+  std::vector<rmm::device_buffer>&& column_chunk_buffers,
+  cudf::host_span<std::vector<bool> const> data_page_mask)
 {
   // if we have not preprocessed at the whole-file level, do that now
   if (not _file_preprocessed) {
@@ -538,13 +664,13 @@ void hybrid_scan_reader_impl::prepare_data(
     // - read row group information
     // - setup information on (parquet) chunks
     // - compute schedule of input passes
-    prepare_row_groups(row_group_indices, options);
+    prepare_row_groups(read_mode::READ_ALL, row_group_indices);
   }
 
   // handle any chunking work (ratcheting through the subpasses and chunks within
   // our current pass) if in bounds
   if (_file_itm_data._current_input_pass < _file_itm_data.num_passes()) {
-    handle_chunking(std::move(column_chunk_buffers), data_page_mask, options);
+    handle_chunking(mode, std::move(column_chunk_buffers), data_page_mask);
   }
 }
 
@@ -635,6 +761,23 @@ table_with_metadata hybrid_scan_reader_impl::read_chunk_internal(
     }
   }
 
+  out_columns =
+    cudf::structs::detail::enforce_null_consistency(std::move(out_columns), _stream, _mr);
+
+  // Check if number of rows per source should be included in output metadata.
+  if (include_output_num_rows_per_source()) {
+    // For chunked reading, compute the output number of rows per source
+    if (mode == read_mode::CHUNKED_READ) {
+      out_metadata.num_rows_per_source =
+        calculate_output_num_rows_per_source(read_info.skip_rows, read_info.num_rows);
+    }
+    // Simply move the number of rows per file if reading all at once
+    else {
+      // Move is okay here as we are reading in one go.
+      out_metadata.num_rows_per_source = std::move(_file_itm_data.num_rows_per_source);
+    }
+  }
+
   // Add empty columns if needed. Filter output columns based on filter.
   return finalize_output(read_columns_mode, out_metadata, out_columns, row_mask);
 }
@@ -693,8 +836,11 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
     auto output_table =
       cudf::detail::apply_boolean_mask(read_table->view(), *final_row_mask, _stream, _mr);
 
+    auto const mask_offset = _rows_processed_so_far;
+    _rows_processed_so_far += read_table->num_rows();
+
     // Update the input row mask to reflect the final row mask.
-    update_row_mask(final_row_mask->view(), row_mask, _stream);
+    update_row_mask(final_row_mask->view(), row_mask, mask_offset, _stream);
 
     // Return the final output table and metadata
     return {std::move(output_table), std::move(out_metadata)};
@@ -704,10 +850,18 @@ table_with_metadata hybrid_scan_reader_impl::finalize_output(
     CUDF_EXPECTS(read_columns_mode == read_columns_mode::PAYLOAD_COLUMNS, "Invalid read mode");
     CUDF_EXPECTS(row_mask.type().id() == type_id::BOOL8,
                  "Predicate filter should return a boolean");
-    CUDF_EXPECTS(row_mask.size() == read_table->num_rows(),
+
+    auto const mask_offset = _rows_processed_so_far;
+    _rows_processed_so_far += read_table->num_rows();
+
+    CUDF_EXPECTS(mask_offset + read_table->num_rows() <= row_mask.size(),
                  "Encountered invalid sized row mask to apply");
+    auto effective_row_mask =
+      (read_table->num_rows() == row_mask.size())
+        ? row_mask
+        : cudf::split(row_mask, {mask_offset, mask_offset + read_table->num_rows()}, _stream)[1];
     auto output_table =
-      cudf::detail::apply_boolean_mask(read_table->view(), row_mask, _stream, _mr);
+      cudf::detail::apply_boolean_mask(read_table->view(), effective_row_mask, _stream, _mr);
     return {std::move(output_table), std::move(out_metadata)};
   }
 }
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
index fd73990fee5..e862959a44f 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_impl.hpp
@@ -155,8 +155,8 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    */
   [[nodiscard]] table_with_metadata materialize_filter_columns(
     cudf::host_span<std::vector<size_type> const> row_group_indices,
-    std::vector<rmm::device_buffer> column_chunk_buffers,
-    cudf::mutable_column_view row_mask,
+    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+    cudf::mutable_column_view& row_mask,
     use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream);
@@ -178,25 +178,70 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    */
   [[nodiscard]] table_with_metadata materialize_payload_columns(
     cudf::host_span<std::vector<size_type> const> row_group_indices,
-    std::vector<rmm::device_buffer> column_chunk_buffers,
-    cudf::column_view row_mask,
+    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+    cudf::column_view const& row_mask,
     use_data_page_mask mask_data_pages,
     parquet_reader_options const& options,
     rmm::cuda_stream_view stream);
 
   /**
-   * @brief Updates the output row mask such that such that out_row_mask[i] = true iff
-   * in_row_mask[i] is valid and true
+   * @copydoc cudf::io::experimental::hybrid_scan::setup_chunking_for_filter_columns
+   */
+  void setup_chunking_for_filter_columns(
+    std::size_t chunk_read_limit,
+    std::size_t pass_read_limit,
+    cudf::host_span<std::vector<size_type> const> row_group_indices,
+    cudf::column_view const& row_mask,
+    use_data_page_mask mask_data_pages,
+    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+    parquet_reader_options const& options,
+    rmm::cuda_stream_view stream);
+
+  /**
+   * @copydoc cudf::io::experimental::hybrid_scan::materialize_filter_columns_chunk
+   */
+  [[nodiscard]] table_with_metadata materialize_filter_columns_chunk(
+    cudf::mutable_column_view& row_mask, rmm::cuda_stream_view stream);
+
+  /**
+   * @copydoc cudf::io::experimental::hybrid_scan::setup_chunking_for_payload_columns
+   */
+  void setup_chunking_for_payload_columns(
+    std::size_t chunk_read_limit,
+    std::size_t pass_read_limit,
+    cudf::host_span<std::vector<size_type> const> row_group_indices,
+    cudf::column_view const& row_mask,
+    use_data_page_mask mask_data_pages,
+    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+    parquet_reader_options const& options,
+    rmm::cuda_stream_view stream);
+
+  /**
+   * @copydoc cudf::io::experimental::hybrid_scan::materialize_payload_columns_chunk
+   */
+  [[nodiscard]] table_with_metadata materialize_payload_columns_chunk(
+    cudf::column_view const& row_mask, rmm::cuda_stream_view stream);
+
+  /**
+   * @copydoc cudf::io::experimental::hybrid_scan::has_next_table_chunk
+   */
+  [[nodiscard]] bool has_next_table_chunk();
+
+  /**
+   * @brief Updates the output row mask such that such that out_row_mask[i + out_row_mask_offset] =
+   * true if and only if in_row_mask[i] is valid and true
    *
    * Updates the output row mask to reflect the final valid and surviving rows from the input row
    * mask. This is inline with the masking behavior of cudf::detail::apply_boolean_mask
    *
    * @param in_row_mask Input row mask column
    * @param out_row_mask Output row mask column
+   * @param out_row_mask_offset Offset into the output row mask column
    * @param stream CUDA stream
    */
-  static void update_row_mask(cudf::column_view in_row_mask,
-                              cudf::mutable_column_view out_row_mask,
+  static void update_row_mask(cudf::column_view const& in_row_mask,
+                              cudf::mutable_column_view& out_row_mask,
+                              cudf::size_type out_row_mask_offset,
                               rmm::cuda_stream_view stream);
 
  private:
@@ -205,17 +250,6 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    */
   enum class read_columns_mode { FILTER_COLUMNS, PAYLOAD_COLUMNS };
 
-  /**
-   * @brief Check if the user has specified custom row bounds
-   *
-   * @param read_mode Value indicating if the data sources are read all at once or chunk by chunk
-   * @return True if the user has specified custom row bounds
-   */
-  [[nodiscard]] bool uses_custom_row_bounds(read_mode mode) const
-  {
-    return (_options.num_rows.has_value() or _options.skip_rows != 0);
-  }
-
   /**
    * @brief Initialize the necessary options related internal variables for use later on
    *
@@ -234,14 +268,6 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    */
   void set_pass_page_mask(cudf::host_span<std::vector<bool> const> data_page_mask);
 
-  /**
-   * @brief Fill a BOOL8 row mask column with the specified value
-   *
-   * @param row_mask The row mask to sanitize
-   * @param value The boolean value to fill
-   */
-  void fill_row_mask(cudf::mutable_column_view row_mask, bool value = true);
-
   /**
    * @brief Select the columns to be read based on the read mode
    *
@@ -267,14 +293,14 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    * groups and computes the schedule of top level passes (see `pass_intermediate_data`) and the
    * schedule of subpasses (see `subpass_intermediate_data`).
    *
+   * @param mode Value indicating if the data sources are read all at once or chunk by chunk
    * @param row_group_indices Row group indices to read
    * @param column_chunk_buffers Device buffers containing column chunk data
-   * @param options Parquet reader options
    */
-  void prepare_data(cudf::host_span<std::vector<size_type> const> row_group_indices,
-                    std::vector<rmm::device_buffer> column_chunk_buffers,
-                    cudf::host_span<std::vector<bool> const> data_page_mask,
-                    parquet_reader_options const& options);
+  void prepare_data(read_mode mode,
+                    cudf::host_span<std::vector<size_type> const> row_group_indices,
+                    std::vector<rmm::device_buffer>&& column_chunk_buffers,
+                    cudf::host_span<std::vector<bool> const> data_page_mask);
 
   /**
    * @brief Create descriptors for filter column chunks and decode dictionary page headers
@@ -301,22 +327,22 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
   /**
    * @brief Prepares the select input row groups and associated chunk information
    *
+   * @param mode Value indicating if the data sources are read all at once or chunk by chunk
    * @param row_group_indices Row group indices to read
-   * @param options Parquet reader options
    */
-  void prepare_row_groups(cudf::host_span<std::vector<size_type> const> row_group_indices,
-                          parquet_reader_options const& options);
+  void prepare_row_groups(read_mode mode,
+                          cudf::host_span<std::vector<size_type> const> row_group_indices);
 
   /**
    * @brief Ratchet the pass/subpass/chunk process forward.
    *
+   * @param mode Value indicating if the data sources are read all at once or chunk by chunk
    * @param column_chunk_buffers Device buffers containing column chunk data
    * @param data_page_mask Input data page mask from page-pruning step for the current pass
-   * @param options Parquet reader options
    */
-  void handle_chunking(std::vector<rmm::device_buffer> column_chunk_buffers,
-                       cudf::host_span<std::vector<bool> const> data_page_mask,
-                       parquet_reader_options const& options);
+  void handle_chunking(read_mode mode,
+                       std::vector<rmm::device_buffer> column_chunk_buffers,
+                       cudf::host_span<std::vector<bool> const> data_page_mask);
 
   /**
    * @brief Setup step for the next input read pass.
@@ -325,10 +351,8 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    * requested set of all row groups.
    *
    * @param column_chunk_buffers Device buffers containing column chunk data
-   * @param options Parquet reader options
    */
-  void setup_next_pass(std::vector<rmm::device_buffer> column_chunk_buffers,
-                       parquet_reader_options const& options);
+  void setup_next_pass(std::vector<rmm::device_buffer> column_chunk_buffers);
 
   /**
    * @brief Setup pointers to columns chunks to be processed for this pass.
@@ -376,7 +400,7 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
    * This function is called internally and expects all preprocessing steps have already been done.
    *
    * @tparam RowMaskView View type of the row mask column
-   * @param[in] mode Read mode indicating if we are reading all at once or chunk by chunk
+   * @param mode Value indicating if the data sources are read all at once or chunk by chunk
    * @param[in] read_columns_mode Read mode indicating if we are reading filter or payload columns
    * @param[in,out] row_mask Boolean column indicating which rows need to be read after page-pruning
    *                         for filter columns, or after materialize step for payload columns
@@ -387,11 +411,23 @@ class hybrid_scan_reader_impl : public parquet::detail::reader_impl {
                                           read_columns_mode read_columns_mode,
                                           RowMaskView row_mask);
 
+  /**
+   * @brief Check if this is the first output chunk
+   *
+   * @return True if this is the first output chunk
+   */
+  [[nodiscard]] bool is_first_output_chunk() const
+  {
+    return _file_itm_data._output_chunk_count == 0 and _rows_processed_so_far == 0;
+  }
+
  private:
   aggregate_reader_metadata* _extended_metadata;
 
   std::optional<std::vector<std::string>> _filter_columns_names;
 
+  cudf::size_type _rows_processed_so_far{0};
+
   bool _use_pandas_metadata{false};
 
   bool _is_filter_columns_selected{false};
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu b/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
index 9a707e6577d..39488301c8a 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
@@ -113,8 +113,7 @@ void decode_dictionary_page_headers(cudf::detail::hostdevice_span<ColumnChunkDes
 }  // namespace
 
 void hybrid_scan_reader_impl::prepare_row_groups(
-  cudf::host_span<std::vector<size_type> const> row_group_indices,
-  parquet_reader_options const& options)
+  read_mode mode, cudf::host_span<std::vector<size_type> const> row_group_indices)
 {
   std::tie(_file_itm_data.global_skip_rows,
            _file_itm_data.global_num_rows,
@@ -142,7 +141,7 @@ void hybrid_scan_reader_impl::prepare_row_groups(
     create_global_chunk_info();
 
     // compute schedule of input reads.
-    compute_input_passes(read_mode::READ_ALL);
+    compute_input_passes(mode);
   }
 
   _file_preprocessed = true;
@@ -310,25 +309,28 @@ hybrid_scan_reader_impl::prepare_dictionaries(
   return {has_compressed_data, std::move(chunks), std::move(pages)};
 }
 
-void hybrid_scan_reader_impl::update_row_mask(cudf::column_view in_row_mask,
-                                              cudf::mutable_column_view out_row_mask,
+void hybrid_scan_reader_impl::update_row_mask(cudf::column_view const& in_row_mask,
+                                              cudf::mutable_column_view& out_row_mask,
+                                              cudf::size_type out_row_mask_offset,
                                               rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
 
   auto const total_rows = static_cast<cudf::size_type>(in_row_mask.size());
 
-  CUDF_EXPECTS(total_rows == out_row_mask.size(),
+  CUDF_EXPECTS(out_row_mask_offset + total_rows <= out_row_mask.size(),
                "Input and output row mask columns must have the same number of rows");
   CUDF_EXPECTS(out_row_mask.type().id() == type_id::BOOL8,
                "Output row mask column must be a boolean column");
+  CUDF_EXPECTS(in_row_mask.type().id() == type_id::BOOL8,
+               "Input row mask column must be a boolean column");
 
   // Update output row mask such that out_row_mask[i] = true, iff in_row_mask[i] is valid and true.
   // This is inline with the masking behavior of cudf::detail::apply_boolean_mask.
   thrust::transform(rmm::exec_policy_nosync(stream),
                     thrust::counting_iterator<cudf::size_type>(0),
                     thrust::make_counting_iterator(total_rows),
-                    out_row_mask.begin<bool>(),
+                    out_row_mask.begin<bool>() + out_row_mask_offset,
                     [is_nullable = in_row_mask.nullable(),
                      in_row_mask = in_row_mask.begin<bool>(),
                      in_bitmask  = in_row_mask.null_mask()] __device__(auto row_idx) {
diff --git a/cpp/src/io/parquet/experimental/page_index_filter.cu b/cpp/src/io/parquet/experimental/page_index_filter.cu
index 667f997e752..d47bf063fd7 100644
--- a/cpp/src/io/parquet/experimental/page_index_filter.cu
+++ b/cpp/src/io/parquet/experimental/page_index_filter.cu
@@ -693,6 +693,7 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
   cudf::column_view row_mask,
   cudf::host_span<std::vector<size_type> const> row_group_indices,
   cudf::host_span<input_column_info const> input_columns,
+  cudf::size_type row_mask_offset,
   rmm::cuda_stream_view stream) const
 {
   CUDF_FUNC_RANGE();
@@ -700,11 +701,9 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
   CUDF_EXPECTS(row_mask.type().id() == cudf::type_id::BOOL8,
                "Input row bitmask should be of type BOOL8");
 
-  auto const total_rows  = row_mask.size();
   auto const num_columns = input_columns.size();
 
-  // Collect column schema indices from the input columns. Note: We can't use
-  // `output_column_schemas` here in case of lists and structs.
+  // Collect column schema indices from the input columns.
   auto column_schema_indices = std::vector<size_type>(input_columns.size());
   std::transform(
     input_columns.begin(), input_columns.end(), column_schema_indices.begin(), [](auto const& col) {
@@ -765,15 +764,18 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
                   });
   }
 
-  CUDF_EXPECTS(page_row_offsets.back().back() == total_rows,
+  auto const total_rows = page_row_offsets.back().back();
+
+  CUDF_EXPECTS(row_mask_offset + total_rows <= row_mask.size(),
                "Mismatch in total rows in input row mask and row groups",
                std::invalid_argument);
 
   // Return if all rows are required or all are invalid.
-  if (row_mask.null_count() == row_mask.size() or thrust::all_of(rmm::exec_policy(stream),
-                                                                 row_mask.begin<bool>(),
-                                                                 row_mask.end<bool>(),
-                                                                 cuda::std::identity{})) {
+  if (row_mask.null_count(row_mask_offset, row_mask_offset + total_rows) == total_rows or
+      thrust::all_of(rmm::exec_policy(stream),
+                     row_mask.begin<bool>() + row_mask_offset,
+                     row_mask.begin<bool>() + row_mask_offset + total_rows,
+                     cuda::std::identity{})) {
     return all_required_data_pages(page_row_counts);
   }
 
@@ -791,7 +793,7 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
   std::for_each(
     thrust::counting_iterator<size_t>(0),
     thrust::counting_iterator(num_columns),
-    [&](auto const col_idx) {
+    [&, total_rows](auto const col_idx) {
       // Construct a row indices mapping based on page row counts and offsets
       auto const total_pages_in_this_column = page_row_counts[col_idx].size();
 
@@ -806,7 +808,7 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
         rmm::exec_policy_nosync(stream),
         page_indices.begin(),
         page_indices.end(),
-        thrust::counting_iterator<size_type>(0),
+        thrust::counting_iterator<size_type>(row_mask_offset),
         select_page_indices.begin(),
         is_row_required_fn{row_mask.nullable(), row_mask.null_mask(), row_mask.data<bool>()});
 
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 4d785a84923..9f8666243c3 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -32,6 +32,7 @@
 #include <bitset>
 #include <limits>
 #include <numeric>
+#include <utility>
 
 namespace cudf::io::parquet::detail {
 
@@ -450,7 +451,7 @@ void reader_impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num_
   page_nesting_decode.device_to_host_async(_stream);
 
   // Invalidate output buffer nullmasks at row indices spanned by pruned pages
-  update_output_nullmasks_for_pruned_pages(host_page_mask);
+  update_output_nullmasks_for_pruned_pages(host_page_mask, skip_rows, num_rows);
 
   // Copy over initial string offsets from device
   auto h_initial_str_offsets = cudf::detail::make_host_vector_async(initial_str_offsets, _stream);
@@ -872,7 +873,9 @@ bool reader_impl::has_next()
   return has_more_work() or is_first_output_chunk();
 }
 
-void reader_impl::update_output_nullmasks_for_pruned_pages(cudf::host_span<bool const> page_mask)
+void reader_impl::update_output_nullmasks_for_pruned_pages(cudf::host_span<bool const> page_mask,
+                                                           size_t skip_rows,
+                                                           size_t num_rows)
 {
   auto const& subpass    = _pass_itm_data->subpass;
   auto const& pages      = subpass->pages;
@@ -895,13 +898,46 @@ void reader_impl::update_output_nullmasks_for_pruned_pages(cudf::host_span<bool
 
   std::for_each(
     page_and_mask_begin, page_and_mask_begin + pages.size(), [&](auto const& page_and_mask_pair) {
-      // Return early if the page is valid
+      // Return early if the page is valid - Note: dictionary pages are always valid
       if (thrust::get<1>(page_and_mask_pair)) { return; }
 
-      auto const& page     = thrust::get<0>(page_and_mask_pair);
-      auto const chunk_idx = page.chunk_idx;
-      auto const start_row = chunks[chunk_idx].start_row + page.chunk_row;
-      auto const end_row   = start_row + page.num_rows;
+      // Get the current page
+      auto const& page = thrust::get<0>(page_and_mask_pair);
+
+      // Table row bounds
+      auto const table_start_row = skip_rows;
+      auto const table_end_row   = skip_rows + num_rows;
+
+      // Current page row bounds
+      auto const chunk_idx      = page.chunk_idx;
+      auto const page_start_row = chunks[chunk_idx].start_row + page.chunk_row;
+      auto const page_end_row   = page_start_row + page.num_rows;
+
+      auto const is_page_out_of_bounds =
+        (page_start_row < table_start_row and page_end_row < table_start_row) or
+        (page_start_row > table_end_row and page_end_row > table_end_row);
+      if (is_page_out_of_bounds) { return; }
+
+      // The page is either completely or partially within the table row bounds
+      auto const [start_row, end_row] = [&]() -> std::pair<size_type, size_type> {
+        // Page is completely within the table row bounds
+        if (page_start_row >= table_start_row and page_end_row <= table_end_row) {
+          return {page_start_row - table_start_row, page_end_row - table_start_row};
+        }
+        // Page starts earlier but ends within the table row bounds
+        else if (page_start_row < table_start_row) {
+          return {0, page_end_row - table_start_row};
+        }
+        // Page starts within the table row bounds but ends after
+        else {
+          return {page_start_row - table_start_row, num_rows};
+        }
+      }();
+
+      // Make sure the row bounds are valid
+      CUDF_EXPECTS(std::cmp_greater_equal(start_row, 0) and std::cmp_less_equal(end_row, num_rows),
+                   "Invalid row bounds");
+
       auto& input_col      = _input_columns[chunk_idx % num_columns];
       auto const max_depth = input_col.nesting_depth();
       auto* cols           = &_output_buffers;
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 9312257a7ec..46e8aaf0826 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -287,7 +287,7 @@ class reader_impl {
    *
    * @param read_mode Value indicating if the data sources are read all at once or chunk by chunk
    * @param skip_rows Crop all rows below skip_rows
-   * @param num_rows Maximum number of rows to read
+   * @param num_rows Number of rows to read
    */
   void allocate_columns(read_mode mode, size_t skip_rows, size_t num_rows);
 
@@ -302,8 +302,8 @@ class reader_impl {
    * @brief Converts the page data and outputs to columns.
    *
    * @param read_mode Value indicating if the data sources are read all at once or chunk by chunk
-   * @param skip_rows Minimum number of rows from start
-   * @param num_rows Number of rows to output
+   * @param skip_rows Number of rows to skip from the start
+   * @param num_rows Number of rows to decode
    */
   void decode_page_data(read_mode mode, size_t skip_rows, size_t num_rows);
 
@@ -311,8 +311,12 @@ class reader_impl {
    * @brief Invalidate output buffer nullmask for rows spanned by the pruned pages
    *
    * @param page_mask Boolean vector indicating if a page needs to be decoded or is pruned
+   * @param skip_rows Offset of the first row in the table chunk
+   * @param num_rows Number of rows in the table chunk
    */
-  void update_output_nullmasks_for_pruned_pages(cudf::host_span<bool const> page_mask);
+  void update_output_nullmasks_for_pruned_pages(cudf::host_span<bool const> page_mask,
+                                                size_t skip_rows,
+                                                size_t num_rows);
 
   /**
    * @brief Creates file-wide parquet chunk information.
diff --git a/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp b/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
index 1bcfc11f74e..411e0a707c2 100644
--- a/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_filters_test.cpp
@@ -443,9 +443,8 @@ TEST_F(HybridScanFiltersTest, FilterRowGroupsWithDictBasic)
   srand(0xcafe);
   using T = uint32_t;
 
-  // A table not concated with itself with result in a parquet file with several row groups each
-  // with a single page. Since there is only one page per row group, the page and row group stats
-  // are identical and we can only prune row groups.
+  // A table with several row groups each containing a single page per column. The data page and row
+  // group stats are identical so only row groups can be pruned using stats
   auto constexpr num_concat = 1;
   auto const buffer         = std::get<1>(create_parquet_with_stats<T, num_concat>());
   auto stream               = cudf::get_default_stream();
diff --git a/cpp/tests/io/experimental/hybrid_scan_test.cpp b/cpp/tests/io/experimental/hybrid_scan_test.cpp
index 5bb14db4028..dfcb7fb411d 100644
--- a/cpp/tests/io/experimental/hybrid_scan_test.cpp
+++ b/cpp/tests/io/experimental/hybrid_scan_test.cpp
@@ -21,6 +21,7 @@
 #include <cudf_test/table_utilities.hpp>
 
 #include <cudf/column/column.hpp>
+#include <cudf/concatenate.hpp>
 #include <cudf/io/experimental/hybrid_scan.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/io/text/byte_range_info.hpp>
@@ -39,47 +40,51 @@ auto constexpr bloom_filter_alignment = rmm::CUDA_ALLOCATION_ALIGNMENT;
 namespace {
 
 /**
- * @brief Read parquet file with the hybrid scan reader
+ * @brief Concatenate a vector of tables and return the resultant table
  *
- * @param buffer Buffer containing the parquet file
- * @param filter_expression Filter expression
- * @param num_filter_columns Number of filter columns
- * @param payload_column_names List of paths of select payload column names, if any
- * @param stream CUDA stream for hybrid scan reader
- * @param mr Device memory resource
+ * @param tables Vector of tables to concatenate
+ * @param stream CUDA stream to use
  *
- * @return Tuple of filter table, payload table, filter metadata, payload metadata, and the final
- *         row validity column
+ * @return Unique pointer to the resultant concatenated table.
  */
-auto hybrid_scan(std::vector<char>& buffer,
-                 cudf::ast::operation const& filter_expression,
-                 cudf::size_type num_filter_columns,
-                 std::optional<std::vector<std::string>> const& payload_column_names,
-                 rmm::cuda_stream_view stream,
-                 rmm::device_async_resource_ref mr,
-                 rmm::mr::aligned_resource_adaptor<rmm::mr::device_memory_resource>& aligned_mr)
+std::unique_ptr<cudf::table> concatenate_tables(std::vector<std::unique_ptr<cudf::table>> tables,
+                                                rmm::cuda_stream_view stream)
 {
-  // Create reader options with empty source info
-  cudf::io::parquet_reader_options options =
-    cudf::io::parquet_reader_options::builder().filter(filter_expression);
-
-  // Set payload column names if provided
-  if (payload_column_names.has_value()) { options.set_columns(payload_column_names.value()); }
-
-  // Input file buffer span
-  auto const file_buffer_span =
-    cudf::host_span<uint8_t const>(reinterpret_cast<uint8_t const*>(buffer.data()), buffer.size());
+  if (tables.size() == 1) { return std::move(tables[0]); }
+
+  std::vector<cudf::table_view> table_views;
+  table_views.reserve(tables.size());
+  std::transform(
+    tables.begin(), tables.end(), std::back_inserter(table_views), [&](auto const& tbl) {
+      return tbl->view();
+    });
+  // Construct the final table
+  return cudf::concatenate(table_views, stream);
+}
 
+/**
+ * @brief Apply parquet filters to the file buffer
+ *
+ * @param file_buffer_span Input file buffer span
+ * @param options Reader options
+ * @param stream CUDA stream
+ * @param mr Device memory resource
+ *
+ * @return A tuple of the reader, filtered row group indices, and row mask and data page mask from
+ * data page pruning
+ */
+auto apply_parquet_filters(cudf::host_span<uint8_t const> file_buffer_span,
+                           cudf::io::parquet_reader_options const& options,
+                           rmm::cuda_stream_view stream,
+                           rmm::device_async_resource_ref mr)
+{
   // Fetch footer and page index bytes from the buffer.
   auto const footer_buffer = fetch_footer_bytes(file_buffer_span);
 
   // Create hybrid scan reader with footer bytes
-  auto const reader =
+  auto reader =
     std::make_unique<cudf::io::parquet::experimental::hybrid_scan_reader>(footer_buffer, options);
 
-  // Get Parquet file metadata from the reader
-  [[maybe_unused]] auto const parquet_metadata = reader->parquet_metadata();
-
   // Get page index byte range from the reader
   auto const page_index_byte_range = reader->page_index_byte_range();
 
@@ -141,10 +146,52 @@ auto hybrid_scan(std::vector<char>& buffer,
     current_row_group_indices = bloom_filtered_row_group_indices;
   }
 
-  // Filter data pages with page index stats
+  // Build row mask using page index stats
   auto row_mask =
     reader->build_row_mask_with_page_index_stats(current_row_group_indices, options, stream, mr);
 
+  std::vector<cudf::size_type> final_row_group_indices(current_row_group_indices.begin(),
+                                                       current_row_group_indices.end());
+
+  return std::tuple{std::move(reader), std::move(final_row_group_indices), std::move(row_mask)};
+}
+/**
+ * @brief Read parquet file with the hybrid scan reader
+ *
+ * @param buffer Buffer containing the parquet file
+ * @param filter_expression Filter expression
+ * @param num_filter_columns Number of filter columns
+ * @param payload_column_names List of paths of select payload column names, if any
+ * @param stream CUDA stream for hybrid scan reader
+ * @param mr Device memory resource
+ *
+ * @return Tuple of filter table, payload table, filter metadata, payload metadata, and the final
+ *         row validity column
+ */
+auto hybrid_scan(std::vector<char>& buffer,
+                 cudf::ast::operation const& filter_expression,
+                 cudf::size_type num_filter_columns,
+                 std::optional<std::vector<std::string>> const& payload_column_names,
+                 rmm::cuda_stream_view stream,
+                 rmm::device_async_resource_ref mr,
+                 rmm::mr::aligned_resource_adaptor<rmm::mr::device_memory_resource>& aligned_mr)
+{
+  // Create reader options with empty source info
+  cudf::io::parquet_reader_options options =
+    cudf::io::parquet_reader_options::builder().filter(filter_expression);
+
+  // Set payload column names if provided
+  if (payload_column_names.has_value()) { options.set_columns(payload_column_names.value()); }
+
+  // Input file buffer span
+  auto const file_buffer_span =
+    cudf::host_span<uint8_t const>(reinterpret_cast<uint8_t const*>(buffer.data()), buffer.size());
+
+  auto [reader, filtered_row_group_indices, row_mask] =
+    apply_parquet_filters(file_buffer_span, options, stream, mr);
+
+  auto current_row_group_indices = cudf::host_span<cudf::size_type>(filtered_row_group_indices);
+
   // Get column chunk byte ranges from the reader
   auto const filter_column_chunk_byte_ranges =
     reader->filter_column_chunks_byte_ranges(current_row_group_indices, options);
@@ -154,10 +201,11 @@ auto hybrid_scan(std::vector<char>& buffer,
     fetch_byte_ranges(file_buffer_span, filter_column_chunk_byte_ranges, stream, mr);
 
   // Materialize the table with only the filter columns
+  auto row_mask_mutable_view = row_mask->mutable_view();
   auto [filter_table, filter_metadata] =
     reader->materialize_filter_columns(current_row_group_indices,
                                        std::move(filter_column_chunk_buffers),
-                                       row_mask->mutable_view(),
+                                       row_mask_mutable_view,
                                        cudf::io::parquet::experimental::use_data_page_mask::YES,
                                        options,
                                        stream);
@@ -186,6 +234,137 @@ auto hybrid_scan(std::vector<char>& buffer,
                     std::move(row_mask)};
 }
 
+/**
+ * @brief Read parquet file with the hybrid scan reader
+ *
+ * @param buffer Buffer containing the parquet file
+ * @param filter_expression Filter expression
+ * @param num_filter_columns Number of filter columns
+ * @param payload_column_names List of paths of select payload column names, if any
+ * @param stream CUDA stream for hybrid scan reader
+ * @param mr Device memory resource
+ *
+ * @return Tuple of filter table, payload table, filter metadata, payload metadata, and the final
+ *         row validity column
+ */
+auto chunked_hybrid_scan(
+  std::vector<char> const& buffer,
+  cudf::ast::operation const& filter_expression,
+  cudf::size_type num_filter_columns,
+  std::optional<std::vector<std::string>> const& payload_column_names,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr,
+  rmm::mr::aligned_resource_adaptor<rmm::mr::device_memory_resource>& aligned_mr)
+{
+  // Create reader options with empty source info
+  cudf::io::parquet_reader_options options =
+    cudf::io::parquet_reader_options::builder().filter(filter_expression);
+
+  // Set payload column names if provided
+  if (payload_column_names.has_value()) { options.set_columns(payload_column_names.value()); }
+
+  // Input file buffer span
+  auto const file_buffer_span =
+    cudf::host_span<uint8_t const>(reinterpret_cast<uint8_t const*>(buffer.data()), buffer.size());
+
+  auto [reader, filtered_row_group_indices, row_mask] =
+    apply_parquet_filters(file_buffer_span, options, stream, mr);
+
+  auto current_row_group_indices = cudf::host_span<cudf::size_type>(filtered_row_group_indices);
+
+  // Helper to split the materialization of filter columns into chunks
+  auto tables          = std::vector<std::unique_ptr<cudf::table>>{};
+  auto filter_metadata = cudf::io::table_metadata{};
+  auto const materialize_filter_columns =
+    [&](cudf::host_span<cudf::size_type const> row_group_indices) {
+      // Get column chunk byte ranges from the reader and fetch device buffers
+      auto const filter_column_chunk_byte_ranges =
+        reader->filter_column_chunks_byte_ranges(row_group_indices, options);
+      auto filter_column_chunk_buffers =
+        fetch_byte_ranges(file_buffer_span, filter_column_chunk_byte_ranges, stream, mr);
+
+      // Setup chunking for filter columns and materialize the columns
+      reader->setup_chunking_for_filter_columns(
+        1024,
+        1024,
+        row_group_indices,
+        row_mask->view(),
+        cudf::io::parquet::experimental::use_data_page_mask::YES,
+        std::move(filter_column_chunk_buffers),
+        options,
+        stream);
+
+      auto row_mask_mutable_view = row_mask->mutable_view();
+      while (reader->has_next_table_chunk()) {
+        auto chunk = reader->materialize_filter_columns_chunk(row_mask_mutable_view, stream);
+        tables.push_back(std::move(chunk.tbl));
+        filter_metadata = std::move(chunk.metadata);
+      }
+    };
+
+  if (current_row_group_indices.size() > 1) {
+    auto const row_group_split = current_row_group_indices.size() / 2;
+    materialize_filter_columns(
+      cudf::host_span<cudf::size_type const>{current_row_group_indices.begin(), row_group_split});
+    materialize_filter_columns(
+      cudf::host_span<cudf::size_type const>{current_row_group_indices.begin() + row_group_split,
+                                             current_row_group_indices.size() - row_group_split});
+  } else {
+    materialize_filter_columns(current_row_group_indices);
+  }
+
+  auto filter_table = concatenate_tables(std::move(tables), stream);
+
+  // Helper to split the materialization of payload columns into chunks
+  tables.clear();
+  auto payload_metadata = cudf::io::table_metadata{};
+  auto const materialize_payload_columns =
+    [&](cudf::host_span<cudf::size_type const> row_group_indices) {
+      // Get column chunk byte ranges from the reader and fetch device buffers
+      auto const payload_column_chunk_byte_ranges =
+        reader->payload_column_chunks_byte_ranges(row_group_indices, options);
+      auto payload_column_chunk_buffers =
+        fetch_byte_ranges(file_buffer_span, payload_column_chunk_byte_ranges, stream, mr);
+
+      // Setup chunking for payload columns and materialize the table
+      reader->setup_chunking_for_payload_columns(
+        1024,
+        1024,
+        row_group_indices,
+        row_mask->view(),
+        cudf::io::parquet::experimental::use_data_page_mask::YES,
+        std::move(payload_column_chunk_buffers),
+        options,
+        stream);
+
+      while (reader->has_next_table_chunk()) {
+        auto chunk = reader->materialize_payload_columns_chunk(row_mask->view(), stream);
+        tables.push_back(std::move(chunk.tbl));
+        payload_metadata = std::move(chunk.metadata);
+      }
+    };
+
+  if (current_row_group_indices.size() > 1) {
+    auto const row_group_split = current_row_group_indices.size() / 2;
+    materialize_payload_columns(
+      cudf::host_span<cudf::size_type const>{current_row_group_indices.begin(), row_group_split});
+    materialize_payload_columns(
+      cudf::host_span<cudf::size_type const>{current_row_group_indices.begin() + row_group_split,
+                                             current_row_group_indices.size() - row_group_split});
+  } else {
+    materialize_payload_columns(current_row_group_indices);
+  }
+
+  auto payload_table = concatenate_tables(std::move(tables), stream);
+
+  // Return the filter table and metadata, payload table and metadata, and the final row mask
+  return std::tuple{std::move(filter_table),
+                    std::move(payload_table),
+                    std::move(filter_metadata),
+                    std::move(payload_metadata),
+                    std::move(row_mask)};
+}
+
 }  // namespace
 
 // Base test fixture for tests
@@ -196,9 +375,8 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanAllColumns)
   srand(0xc0ffee);
   using T = uint32_t;
 
-  // A table not concated with itself with result in a parquet file with several row groups each
-  // with a single page. Since there is only one page per row group, the page and row group stats
-  // are identical and we can only prune row groups.
+  // A table with several row groups each containing a single page per column. The data page and row
+  // group stats are identical so only row groups can be pruned using stats
   auto constexpr num_concat            = 1;
   auto [written_table, parquet_buffer] = create_parquet_with_stats<T, num_concat>();
 
@@ -218,8 +396,19 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanAllColumns)
   auto [read_filter_table, read_payload_table, read_filter_meta, read_payload_meta, row_mask] =
     hybrid_scan(parquet_buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
 
+  // Read parquet using the chunked hybrid scan reader
+  auto [read_filter_table_chunked,
+        read_payload_table_chunked,
+        read_filter_meta_chunked,
+        read_payload_meta_chunked,
+        row_mask_chunked] =
+    chunked_hybrid_scan(
+      parquet_buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
+
   CUDF_EXPECTS(read_filter_table->num_rows() == read_payload_table->num_rows(),
                "Filter and payload tables should have the same number of rows");
+  CUDF_EXPECTS(read_filter_table_chunked->num_rows() == read_payload_table_chunked->num_rows(),
+               "Filter and payload tables should have the same number of rows");
 
   // Check equivalence (equal without checking nullability) with the parquet file read with the
   // original reader
@@ -230,7 +419,11 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanAllColumns)
         .filter(filter_expression);
     auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}),
+                                       read_filter_table_chunked->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}), read_payload_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}),
+                                       read_payload_table_chunked->view());
   }
 }
 
@@ -239,9 +432,8 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanSelectColumns)
   srand(0xcafe);
   using T = cudf::timestamp_ms;
 
-  // A table not concated with itself with result in a parquet file with several row groups each
-  // with a single page. Since there is only one page per row group, the page and row group stats
-  // are identical and we can only prune row groups.
+  // A table with several row groups each containing a single page per column. The data page and row
+  // group stats are identical so only row groups can be pruned using stats
   auto constexpr num_concat            = 1;
   auto [written_table, parquet_buffer] = create_parquet_with_stats<T, num_concat>();
 
@@ -268,16 +460,35 @@ TEST_F(HybridScanTest, PruneRowGroupsOnlyAndScanSelectColumns)
                   stream,
                   mr,
                   aligned_mr);
+    // Read parquet using the chunked hybrid scan reader
+    auto [read_filter_table_chunked,
+          read_payload_table_chunked,
+          read_filter_meta_chunked,
+          read_payload_meta_chunked,
+          row_mask_chunked] = chunked_hybrid_scan(parquet_buffer,
+                                                  filter_expression,
+                                                  num_filter_columns,
+                                                  payload_column_names,
+                                                  stream,
+                                                  mr,
+                                                  aligned_mr);
 
     CUDF_EXPECTS(read_filter_table->num_rows() == read_payload_table->num_rows(),
                  "Filter and payload tables should have the same number of rows");
+    CUDF_EXPECTS(read_filter_table_chunked->num_rows() == read_payload_table_chunked->num_rows(),
+                 "Filter and payload tables should have the same number of rows");
+
     cudf::io::parquet_reader_options const options =
       cudf::io::parquet_reader_options::builder(
         cudf::io::source_info(cudf::host_span<char>(parquet_buffer.data(), parquet_buffer.size())))
         .filter(filter_expression);
     auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}),
+                                       read_filter_table_chunked->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({2}), read_payload_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({2}),
+                                       read_payload_table_chunked->view());
   }
 
   {
@@ -309,9 +520,9 @@ TEST_F(HybridScanTest, PruneDataPagesOnlyAndScanAllColumns)
   srand(0xf00d);
   using T = cudf::duration_ms;
 
-  // A table concatenated multiple times by itself with result in a parquet file with a row group
-  // per concatenation with multiple pages per row group. Since all row groups will be identical,
-  // we can only prune pages based on page index stats
+  // A table concatenated with itself results in a parquet file with a row group per concatenated
+  // table, each containing multiple pages per column. All row groups will be identical so only data
+  // pages can be pruned using page index stats
   auto constexpr num_concat    = 2;
   auto [written_table, buffer] = create_parquet_with_stats<T, num_concat>();
 
@@ -331,8 +542,18 @@ TEST_F(HybridScanTest, PruneDataPagesOnlyAndScanAllColumns)
   auto [read_filter_table, read_payload_table, read_filter_meta, read_payload_meta, row_mask] =
     hybrid_scan(buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
 
+  // Read parquet using the chunked hybrid scan reader
+  auto [read_filter_table_chunked,
+        read_payload_table_chunked,
+        read_filter_meta_chunked,
+        read_payload_meta_chunked,
+        row_mask_chunked] =
+    chunked_hybrid_scan(buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
+
   CUDF_EXPECTS(read_filter_table->num_rows() == read_payload_table->num_rows(),
                "Filter and payload tables should have the same number of rows");
+  CUDF_EXPECTS(read_filter_table_chunked->num_rows() == read_payload_table_chunked->num_rows(),
+               "Filter and payload tables should have the same number of rows");
 
   // Check equivalence (equal without checking nullability) with the parquet file read with the
   // original reader
@@ -343,7 +564,11 @@ TEST_F(HybridScanTest, PruneDataPagesOnlyAndScanAllColumns)
         .filter(filter_expression);
     auto [expected_tbl, expected_meta] = read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}),
+                                       read_filter_table_chunked->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}), read_payload_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2}),
+                                       read_payload_table_chunked->view());
   }
 
   // Check equivalence (equal without checking nullability) with the original table with the
@@ -543,6 +768,15 @@ TEST_F(HybridScanTest, MaterializeMixedPayloadColumns)
   auto [read_filter_table, read_payload_table, read_filter_meta, read_payload_meta, row_mask] =
     hybrid_scan(parquet_buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
 
+  // Read parquet using the chunked hybrid scan reader
+  auto [read_filter_table_chunked,
+        read_payload_table_chunked,
+        read_filter_meta_chunked,
+        read_payload_meta_chunked,
+        row_mask_chunked] =
+    chunked_hybrid_scan(
+      parquet_buffer, filter_expression, num_filter_columns, {}, stream, mr, aligned_mr);
+
   CUDF_EXPECTS(read_filter_table->num_rows() == read_payload_table->num_rows(),
                "Filter and payload tables should have the same number of rows");
 
@@ -555,7 +789,11 @@ TEST_F(HybridScanTest, MaterializeMixedPayloadColumns)
         .filter(filter_expression);
     auto [expected_tbl, expected_meta] = cudf::io::read_parquet(options, stream);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}), read_filter_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({0}),
+                                       read_filter_table_chunked->view());
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2, 3, 4, 5, 6, 7, 8, 9}),
                                        read_payload_table->view());
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_tbl->select({1, 2, 3, 4, 5, 6, 7, 8, 9}),
+                                       read_payload_table_chunked->view());
   }
 }

From 764b63b2765f181f9c15b663aaa076108a6b9c36 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 17 Sep 2025 11:13:33 -0400
Subject: [PATCH 316/366] Skip Narwhals pandas get_dtype_backend[pyarrow] tests
 after ArrowDtype proxy changes (#19992)

PR #19960 introduced an `ArrowDtype` proxy, which subclasses `pandas.api.extensions.ExtensionDtype`. This causes pandas to now expose additional arrow-backed dtypes (e.g., `bool[pyarrow]`, `duration[ns][pyarrow]`) through the `ExtensionDtype` when cudf.pandas is active.

We'll skip these test for now, until narwhals updates the the test.

CC @galipremsagar @mroeschke

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19992
---
 ci/test_narwhals.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index 73372adcc4a..cab9dd28355 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -110,8 +110,11 @@ test_pandas_object_series \
 
 # test_dtypes: With cudf.pandas loaded, to_pandas() preserves Arrow dtypes like list and struct, so pandas
 # columns aren't object anymore. The test expects object, causing a mismatch.
+# test_get_dtype_backend: We now preserve arrow extension dtypes
+# (e.g. bool[pyarrow], duration[ns][pyarrow]).
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF_PANDAS=" \
-test_dtypes \
+test_dtypes or \
+(test_get_dtype_backend and pyarrow and (pandas or modin)) \
 "
 
 PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 NARWHALS_DEFAULT_CONSTRUCTORS=pandas python -m pytest \

From b185495da5017cc8d2c82f14143ae39d14730885 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 17 Sep 2025 11:17:54 -0400
Subject: [PATCH 317/366] Move groupby benchmarks to nvbench (#19930)

Changes the remaining groupby benchmarks from googlebench to nvbench

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Bradley Dice (https://github.com/bdice)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19930
---
 cpp/benchmarks/CMakeLists.txt                 | 12 +--
 cpp/benchmarks/groupby/group_common.hpp       | 29 -------
 cpp/benchmarks/groupby/group_no_requests.cpp  | 63 ++++++--------
 cpp/benchmarks/groupby/group_nth.cpp          | 37 ++++-----
 cpp/benchmarks/groupby/group_scan.cpp         | 64 +++++----------
 cpp/benchmarks/groupby/group_shift.cpp        | 41 ++++------
 .../groupby/group_struct_values.cpp           | 82 ++++++++++---------
 cpp/benchmarks/groupby/group_sum.cpp          | 66 +++++++--------
 8 files changed, 156 insertions(+), 238 deletions(-)
 delete mode 100644 cpp/benchmarks/groupby/group_common.hpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 531a0c92c7b..f711b19183d 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -175,7 +175,6 @@ ConfigureNVBench(NDSH_Q10_NVBENCH ndsh/q10.cpp ndsh/utilities.cpp)
 
 # ##################################################################################################
 # * filter benchmark -------------------------------------------------------------------
-
 ConfigureNVBench(FILTER_NVBENCH filter/minmax_filter.cpp)
 
 # ##################################################################################################
@@ -268,11 +267,6 @@ ConfigureNVBench(FILL_NVBENCH filling/repeat.cpp)
 
 # ##################################################################################################
 # * groupby benchmark -----------------------------------------------------------------------------
-ConfigureBench(
-  GROUPBY_BENCH groupby/group_sum.cpp groupby/group_nth.cpp groupby/group_shift.cpp
-  groupby/group_struct_values.cpp groupby/group_no_requests.cpp groupby/group_scan.cpp
-)
-
 ConfigureNVBench(
   GROUPBY_NVBENCH
   groupby/group_complex_keys.cpp
@@ -280,9 +274,15 @@ ConfigureNVBench(
   groupby/group_m2_var_std.cpp
   groupby/group_max.cpp
   groupby/group_max_multithreaded.cpp
+  groupby/group_no_requests.cpp
+  groupby/group_nth.cpp
   groupby/group_nunique.cpp
   groupby/group_rank.cpp
+  groupby/group_scan.cpp
+  groupby/group_shift.cpp
   groupby/group_struct_keys.cpp
+  groupby/group_struct_values.cpp
+  groupby/group_sum.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/groupby/group_common.hpp b/cpp/benchmarks/groupby/group_common.hpp
deleted file mode 100644
index fba5bc28822..00000000000
--- a/cpp/benchmarks/groupby/group_common.hpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <random>
-
-template <typename T>
-T random_int(T min, T max)
-{
-  static unsigned seed = 13377331;
-  static std::mt19937 engine{seed};
-  static std::uniform_int_distribution<T> uniform{min, max};
-
-  return uniform(engine);
-}
diff --git a/cpp/benchmarks/groupby/group_no_requests.cpp b/cpp/benchmarks/groupby/group_no_requests.cpp
index 34618acec75..bee307bff66 100644
--- a/cpp/benchmarks/groupby/group_no_requests.cpp
+++ b/cpp/benchmarks/groupby/group_no_requests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,78 +15,61 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/groupby/group_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
 
-class Groupby : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-void BM_basic_no_requests(benchmark::State& state)
+static void bench_groupby_no_requests(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
   auto keys_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
 
   std::vector<cudf::groupby::aggregation_request> requests;
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
+  state.add_global_memory_reads<nvbench::int8_t>(keys_table->alloc_size());
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     cudf::groupby::groupby gb_obj(*keys_table);
     auto result = gb_obj.aggregate(requests);
-  }
-}
-
-BENCHMARK_DEFINE_F(Groupby, BasicNoRequest)(::benchmark::State& state)
-{
-  BM_basic_no_requests(state);
+  });
 }
 
-BENCHMARK_REGISTER_F(Groupby, BasicNoRequest)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(10000)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000);
+NVBENCH_BENCH(bench_groupby_no_requests)
+  .set_name("no_requests")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});
 
-void BM_pre_sorted_no_requests(benchmark::State& state)
+static void bench_groupby_pre_sorted_no_requests(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
   auto keys_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
 
   auto sort_order  = cudf::sorted_order(*keys_table);
   auto sorted_keys = cudf::gather(*keys_table, *sort_order);
   // No need to sort values using sort_order because they were generated randomly
 
   std::vector<cudf::groupby::aggregation_request> requests;
+  state.add_global_memory_reads<nvbench::int8_t>(keys_table->alloc_size());
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES);
     auto result = gb_obj.aggregate(requests);
-  }
-}
-
-BENCHMARK_DEFINE_F(Groupby, PreSortedNoRequests)(::benchmark::State& state)
-{
-  BM_pre_sorted_no_requests(state);
+  });
 }
 
-BENCHMARK_REGISTER_F(Groupby, PreSortedNoRequests)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000);
+NVBENCH_BENCH(bench_groupby_pre_sorted_no_requests)
+  .set_name("pre_sorted_no_requests")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/groupby/group_nth.cpp b/cpp/benchmarks/groupby/group_nth.cpp
index f2c24433858..4a86d9ad1fd 100644
--- a/cpp/benchmarks/groupby/group_nth.cpp
+++ b/cpp/benchmarks/groupby/group_nth.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,27 +15,24 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/groupby/group_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
 
-class Groupby : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-void BM_pre_sorted_nth(benchmark::State& state)
+static void bench_groupby_nth(nvbench::state& state)
 {
   // const cudf::size_type num_columns{(cudf::size_type)state.range(0)};
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
   auto keys_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
-  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
+  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
 
   auto sort_order  = cudf::sorted_order(*keys_table);
   auto sorted_keys = cudf::gather(*keys_table, *sort_order);
@@ -49,17 +46,15 @@ void BM_pre_sorted_nth(benchmark::State& state)
   requests[0].aggregations.push_back(
     cudf::make_nth_element_aggregation<cudf::groupby_aggregation>(-1));
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    auto result = gb_obj.aggregate(requests);
-  }
-}
+  state.add_global_memory_reads<nvbench::int8_t>(vals->alloc_size());
+  auto groups = gb_obj.get_groups();
+  state.add_global_memory_writes<nvbench::int8_t>(groups.keys->alloc_size());
 
-BENCHMARK_DEFINE_F(Groupby, PreSortedNth)(::benchmark::State& state) { BM_pre_sorted_nth(state); }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = gb_obj.aggregate(requests); });
+}
 
-BENCHMARK_REGISTER_F(Groupby, PreSortedNth)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(1000000)    /*   1M */
-  ->Arg(10000000)   /*  10M */
-  ->Arg(100000000); /* 100M */
+NVBENCH_BENCH(bench_groupby_nth)
+  .set_name("nth")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/groupby/group_scan.cpp b/cpp/benchmarks/groupby/group_scan.cpp
index 2ae5b6fc2b8..2c470073138 100644
--- a/cpp/benchmarks/groupby/group_scan.cpp
+++ b/cpp/benchmarks/groupby/group_scan.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,9 +15,6 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/groupby/group_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
@@ -25,16 +22,16 @@
 #include <cudf/sorting.hpp>
 #include <cudf/table/table.hpp>
 
-class Groupby : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-void BM_basic_sum_scan(benchmark::State& state)
+static void bench_groupby_sum_scan(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
-  auto keys = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
-  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
+  auto keys = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
+  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
 
   cudf::groupby::groupby gb_obj(cudf::table_view({keys->view(), keys->view(), keys->view()}));
 
@@ -43,32 +40,25 @@ void BM_basic_sum_scan(benchmark::State& state)
   requests[0].values = vals->view();
   requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_scan_aggregation>());
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-
-    auto result = gb_obj.scan(requests);
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = gb_obj.scan(requests); });
 }
 
-BENCHMARK_DEFINE_F(Groupby, BasicSumScan)(::benchmark::State& state) { BM_basic_sum_scan(state); }
-
-BENCHMARK_REGISTER_F(Groupby, BasicSumScan)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000);
+NVBENCH_BENCH(bench_groupby_sum_scan)
+  .set_name("sum_scan")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});
 
-void BM_pre_sorted_sum_scan(benchmark::State& state)
+static void bench_groupby_pre_sorted_sum_scan(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
   auto keys_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
   profile.set_null_probability(0.1);
-  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
+  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
 
   auto sort_order  = cudf::sorted_order(*keys_table);
   auto sorted_keys = cudf::gather(*keys_table, *sort_order);
@@ -81,21 +71,11 @@ void BM_pre_sorted_sum_scan(benchmark::State& state)
   requests[0].values = vals->view();
   requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_scan_aggregation>());
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-
-    auto result = gb_obj.scan(requests);
-  }
-}
-
-BENCHMARK_DEFINE_F(Groupby, PreSortedSumScan)(::benchmark::State& state)
-{
-  BM_pre_sorted_sum_scan(state);
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = gb_obj.scan(requests); });
 }
 
-BENCHMARK_REGISTER_F(Groupby, PreSortedSumScan)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000);
+NVBENCH_BENCH(bench_groupby_pre_sorted_sum_scan)
+  .set_name("pre_sorted_sum_scan")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/groupby/group_shift.cpp b/cpp/benchmarks/groupby/group_shift.cpp
index eda2b3dd158..ea058412e06 100644
--- a/cpp/benchmarks/groupby/group_shift.cpp
+++ b/cpp/benchmarks/groupby/group_shift.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,20 +15,17 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/groupby/group_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/groupby.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 
-class Groupby : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-void BM_group_shift(benchmark::State& state)
+static void bench_groupby_shift(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   int const num_groups = 100;
 
   data_profile const profile =
@@ -36,30 +33,26 @@ void BM_group_shift(benchmark::State& state)
       cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, num_groups);
 
   auto keys_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
   auto vals_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
 
-  cudf::groupby::groupby gb_obj(*keys_table);
+  cudf::groupby::groupby gb_obj(keys_table->view());
 
   std::vector<cudf::size_type> offsets{
-    static_cast<cudf::size_type>(column_size / float(num_groups) * 0.5)};  // forward shift half way
+    static_cast<cudf::size_type>(num_rows / float(num_groups) * 0.5)};  // forward shift half way
   // null fill value
   auto fill_value = cudf::make_default_constructed_scalar(cudf::data_type(cudf::type_id::INT64));
-  // non null fill value
-  // auto fill_value = cudf::make_fixed_width_scalar(static_cast<int64_t>(42));
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
+  state.add_global_memory_reads<nvbench::int8_t>(keys_table->alloc_size());
+  state.add_global_memory_writes<nvbench::int8_t>(vals_table->alloc_size());
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     auto result = gb_obj.shift(*vals_table, offsets, {*fill_value});
-  }
+  });
 }
 
-BENCHMARK_DEFINE_F(Groupby, Shift)(::benchmark::State& state) { BM_group_shift(state); }
-
-BENCHMARK_REGISTER_F(Groupby, Shift)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond);
+NVBENCH_BENCH(bench_groupby_shift)
+  .set_name("shift")
+  .add_int64_axis("num_rows", {1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/groupby/group_struct_values.cpp b/cpp/benchmarks/groupby/group_struct_values.cpp
index 024fd3708fd..b0afbeeaa3f 100644
--- a/cpp/benchmarks/groupby/group_struct_values.cpp
+++ b/cpp/benchmarks/groupby/group_struct_values.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,14 +15,14 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/structs/structs_column_view.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 static constexpr cudf::size_type num_struct_members = 8;
 static constexpr cudf::size_type max_int            = 100;
 static constexpr cudf::size_type max_str_length     = 32;
@@ -44,51 +44,57 @@ static auto create_data_table(cudf::size_type n_rows)
 }
 
 // Max aggregation/scan technically has the same performance as min.
-template <typename OpType>
-void BM_groupby_min_struct(benchmark::State& state)
+static void bench_groupby_min_struct(nvbench::state& state)
 {
-  auto const n_rows = static_cast<cudf::size_type>(state.range(0));
-  auto data_cols    = create_data_table(n_rows)->release();
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto data_cols      = create_data_table(num_rows)->release();
 
   auto const keys_view = data_cols.front()->view();
   auto const values =
     cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer());
 
-  using RequestType = std::conditional_t<std::is_same_v<OpType, cudf::groupby_aggregation>,
-                                         cudf::groupby::aggregation_request,
-                                         cudf::groupby::scan_request>;
-
   auto gb_obj   = cudf::groupby::groupby(cudf::table_view({keys_view}));
-  auto requests = std::vector<RequestType>();
-  requests.emplace_back(RequestType());
+  auto requests = std::vector<cudf::groupby::aggregation_request>();
+  requests.emplace_back(cudf::groupby::aggregation_request());
+  requests.front().aggregations.push_back(cudf::make_min_aggregation<cudf::groupby_aggregation>());
   requests.front().values = values->view();
-  requests.front().aggregations.push_back(cudf::make_min_aggregation<OpType>());
-
-  for (auto _ : state) {
-    [[maybe_unused]] auto const timer = cuda_event_timer(state, true);
-    if constexpr (std::is_same_v<OpType, cudf::groupby_aggregation>) {
-      [[maybe_unused]] auto const result = gb_obj.aggregate(requests);
-    } else {
-      [[maybe_unused]] auto const result = gb_obj.scan(requests);
-    }
-  }
+
+  state.add_global_memory_reads<nvbench::int8_t>(values->alloc_size());
+  state.add_global_memory_writes<nvbench::int8_t>(values->alloc_size());
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = gb_obj.aggregate(requests); });
 }
 
-class Groupby : public cudf::benchmark {};
+NVBENCH_BENCH(bench_groupby_min_struct)
+  .set_name("min_struct")
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000});
+
+static void bench_groupby_min_struct_scan(nvbench::state& state)
+{
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto data_cols      = create_data_table(num_rows)->release();
+
+  auto const keys_view = data_cols.front()->view();
+  auto const values =
+    cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer());
 
-#define MIN_RANGE 10'000
-#define MAX_RANGE 10'000'000
+  auto gb_obj   = cudf::groupby::groupby(cudf::table_view({keys_view}));
+  auto requests = std::vector<cudf::groupby::scan_request>();
+  requests.emplace_back(cudf::groupby::scan_request());
+  requests.front().aggregations.push_back(
+    cudf::make_min_aggregation<cudf::groupby_scan_aggregation>());
+  requests.front().values = values->view();
+
+  state.add_global_memory_reads<nvbench::int8_t>(values->alloc_size());
+  state.add_global_memory_writes<nvbench::int8_t>(values->alloc_size());
 
-#define REGISTER_BENCHMARK(name, op_type)                       \
-  BENCHMARK_DEFINE_F(Groupby, name)(::benchmark::State & state) \
-  {                                                             \
-    BM_groupby_min_struct<op_type>(state);                      \
-  }                                                             \
-  BENCHMARK_REGISTER_F(Groupby, name)                           \
-    ->UseManualTime()                                           \
-    ->Unit(benchmark::kMillisecond)                             \
-    ->RangeMultiplier(4)                                        \
-    ->Ranges({{MIN_RANGE, MAX_RANGE}});
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = gb_obj.scan(requests); });
+}
 
-REGISTER_BENCHMARK(Aggregation, cudf::groupby_aggregation)
-REGISTER_BENCHMARK(Scan, cudf::groupby_scan_aggregation)
+NVBENCH_BENCH(bench_groupby_min_struct_scan)
+  .set_name("min_struct_scan")
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/benchmarks/groupby/group_sum.cpp b/cpp/benchmarks/groupby/group_sum.cpp
index b3fd881ccbc..d4fdb612323 100644
--- a/cpp/benchmarks/groupby/group_sum.cpp
+++ b/cpp/benchmarks/groupby/group_sum.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,25 +15,22 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/groupby/group_common.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
 
-class Groupby : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-void BM_basic_sum(benchmark::State& state)
+static void bench_groupby_basic_sum(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
-  auto keys = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
-  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
+  auto keys = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
+  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
 
   cudf::groupby::groupby gb_obj(cudf::table_view({keys->view(), keys->view(), keys->view()}));
 
@@ -42,33 +39,29 @@ void BM_basic_sum(benchmark::State& state)
   requests[0].values = vals->view();
   requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
+  state.add_global_memory_reads<nvbench::int8_t>(vals->alloc_size());
+  auto groups = gb_obj.get_groups();
+  state.add_global_memory_writes<nvbench::int8_t>(groups.keys->alloc_size());
 
-    auto result = gb_obj.aggregate(requests);
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });
 }
 
-BENCHMARK_DEFINE_F(Groupby, Basic)(::benchmark::State& state) { BM_basic_sum(state); }
+NVBENCH_BENCH(bench_groupby_basic_sum)
+  .set_name("sum")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});
 
-BENCHMARK_REGISTER_F(Groupby, Basic)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(10000)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000);
-
-void BM_pre_sorted_sum(benchmark::State& state)
+static void bench_groupby_pre_sorted_sum(nvbench::state& state)
 {
-  cudf::size_type const column_size{(cudf::size_type)state.range(0)};
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
 
   data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution(
     cudf::type_to_id<int64_t>(), distribution_id::UNIFORM, 0, 100);
   auto keys_table =
-    create_random_table({cudf::type_to_id<int64_t>()}, row_count{column_size}, profile);
+    create_random_table({cudf::type_to_id<int64_t>()}, row_count{num_rows}, profile);
   profile.set_null_probability(0.1);
-  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{column_size}, profile);
+  auto vals = create_random_column(cudf::type_to_id<int64_t>(), row_count{num_rows}, profile);
 
   auto sort_order  = cudf::sorted_order(*keys_table);
   auto sorted_keys = cudf::gather(*keys_table, *sort_order);
@@ -81,18 +74,15 @@ void BM_pre_sorted_sum(benchmark::State& state)
   requests[0].values = vals->view();
   requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
+  state.add_global_memory_reads<nvbench::int8_t>(vals->alloc_size());
+  auto groups = gb_obj.get_groups();
+  state.add_global_memory_writes<nvbench::int8_t>(groups.keys->alloc_size());
 
-    auto result = gb_obj.aggregate(requests);
-  }
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });
 }
 
-BENCHMARK_DEFINE_F(Groupby, PreSorted)(::benchmark::State& state) { BM_pre_sorted_sum(state); }
-
-BENCHMARK_REGISTER_F(Groupby, PreSorted)
-  ->UseManualTime()
-  ->Unit(benchmark::kMillisecond)
-  ->Arg(1000000)
-  ->Arg(10000000)
-  ->Arg(100000000);
+NVBENCH_BENCH(bench_groupby_pre_sorted_sum)
+  .set_name("pre_sorted_sum")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000});

From 0b3855c38b0522448cb79f3b4e16aed9dd6681ae Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 17 Sep 2025 12:22:13 -0700
Subject: [PATCH 318/366] Raise more exceptions for invalid or unsupported cuDF
 arguments (#19990)

Some low-hanging fixes to pass more of the pandas test suite.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/19990
---
 python/cudf/cudf/core/dataframe.py            |  4 +-
 python/cudf/cudf/core/indexed_frame.py        |  9 ++++
 python/cudf/cudf/core/window/rolling.py       |  4 ++
 .../cudf/pandas/scripts/conftest-patch.py     | 52 -------------------
 4 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 9ac849f68db..00a996af796 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6298,7 +6298,7 @@ def quantile(
                 return Series._from_column(
                     result._columns[0], name=q, index=result.index
                 )
-        else:
+        elif method == "single":
             # Ensure that qs is non-scalar so that we always get a column back.
             interpolation = interpolation or "linear"
             result = {}
@@ -6321,6 +6321,8 @@ def quantile(
                 result.index = data_df.keys()
                 result.name = q
                 return result
+        else:
+            raise ValueError(f"Invalid method: {method}")
 
         result.index = Index(list(map(float, qs)), dtype="float64")
         return result
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 502e2292e17..14476b0c3f9 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1937,6 +1937,15 @@ def interpolate(
 
         data = self
 
+        if limit is not None:
+            raise NotImplementedError("limit is not supported")
+        if limit_direction is not None:
+            raise NotImplementedError("limit_direction is not supported")
+        if limit_area is not None:
+            raise NotImplementedError("limit_area is not supported")
+        if downcast is not None:
+            raise NotImplementedError("downcast is not supported")
+
         if not isinstance(data.index, cudf.RangeIndex):
             perm_sort = data.index.argsort()
             data = data._gather(
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 1604951ad93..ef6cf7b9606 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -222,6 +222,8 @@ def __init__(
         step: int | None = None,
         method: str = "single",
     ) -> None:
+        if not isinstance(center, bool):
+            raise ValueError("center must be a boolean")
         self.center = center
         if axis != 0:
             warnings.warn(
@@ -517,6 +519,8 @@ def _normalize_window_and_min_periods(
                 raise ValueError("window cannot be zero or negative")
             if min_periods is None:
                 return window, window
+            elif not is_integer(min_periods):
+                raise ValueError("min_periods must be an integer")
             else:
                 return window, min_periods
         elif isinstance(window, BaseIndexer):
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index f4ab37e7b1c..8b3f219fe6d 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -4899,35 +4899,13 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_info.py::test_info_memory_usage_deep_not_pypy",
     "tests/frame/methods/test_info.py::test_memory_usage_empty_no_warning",
     "tests/frame/methods/test_info.py::test_usage_via_getsizeof",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_inerpolate_invalid_downcast",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interp_basic",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interp_combo",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interp_inplace",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interp_inplace_row",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interp_nan_idx",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interp_rowwise",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[int16]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[int32]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[int64]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[int8]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[uint16]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[uint32]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[uint64]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_arrow[uint8]",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_datetimelike_values[DataFrame]",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_datetimelike_values[Series]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[Int16]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[Int32]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[Int64]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[Int8]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[UInt16]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[UInt32]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[UInt64]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea[UInt8]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea_float[Float32]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea_float[Float64]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea_float[float32[pyarrow]]",
-    "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_ea_float[float64[pyarrow]]",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_inplace[DataFrame]",
     "tests/frame/methods/test_interpolate.py::TestDataFrameInterpolate::test_interpolate_inplace[Series]",
     "tests/frame/methods/test_join.py::TestDataFrameJoin::test_join",
@@ -4944,7 +4922,6 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_pop.py::TestDataFramePop::test_pop",
     "tests/frame/methods/test_quantile.py::TestDataFrameQuantile::test_empty[linear-single]",
     "tests/frame/methods/test_quantile.py::TestDataFrameQuantile::test_empty[nearest-table]",
-    "tests/frame/methods/test_quantile.py::TestDataFrameQuantile::test_invalid_method",
     "tests/frame/methods/test_quantile.py::TestDataFrameQuantile::test_quantile_datetime[ms]",
     "tests/frame/methods/test_quantile.py::TestDataFrameQuantile::test_quantile_datetime[ns]",
     "tests/frame/methods/test_quantile.py::TestDataFrameQuantile::test_quantile_datetime[s]",
@@ -12291,26 +12268,9 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_info.py::test_info_memory_usage_deep_not_pypy",
     "tests/series/methods/test_info.py::test_info_series[False]",
     "tests/series/methods/test_info.py::test_info_series[True]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit_area",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit_bad_direction",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit_before_ends",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit_direction",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit_forward",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_limit_to_ends",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interp_unlimited",
     "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_akima",
     "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_cubicspline",
     "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_from_derivatives",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_float_limit[index]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_float_limit[linear]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_float_limit[values]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_nonpositive_limit[index--1]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_nonpositive_limit[index-0]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_nonpositive_limit[linear--1]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_nonpositive_limit[linear-0]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_nonpositive_limit[values--1]",
-    "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_invalid_nonpositive_limit[values-0]",
     "tests/series/methods/test_interpolate.py::TestSeriesInterpolateData::test_interpolate_piecewise_polynomial",
     "tests/series/methods/test_isin.py::TestSeriesIsIn::test_isin_masked_types[data0-values0-expected0-Float64]",
     "tests/series/methods/test_isin.py::TestSeriesIsIn::test_isin_masked_types[data0-values0-expected0-Int64]",
@@ -14518,12 +14478,6 @@ def pytest_unconfigure(config):
     "tests/window/test_rolling.py::test_closed_min_max_datetime[max-right-expected4-int]",
     "tests/window/test_rolling.py::test_closed_min_max_datetime[min-right-expected0-int]",
     "tests/window/test_rolling.py::test_constructor_with_timedelta_window[window0]",
-    "tests/window/test_rolling.py::test_invalid_constructor[DataFrame-2.0]",
-    "tests/window/test_rolling.py::test_invalid_constructor[DataFrame-foo]",
-    "tests/window/test_rolling.py::test_invalid_constructor[DataFrame-w2]",
-    "tests/window/test_rolling.py::test_invalid_constructor[Series-2.0]",
-    "tests/window/test_rolling.py::test_invalid_constructor[Series-foo]",
-    "tests/window/test_rolling.py::test_invalid_constructor[Series-w2]",
     "tests/window/test_rolling.py::test_min_periods1",
     "tests/window/test_rolling.py::test_missing_minp_zero",
     "tests/window/test_rolling.py::test_numeric_only_series[max-False-int]",
@@ -14610,12 +14564,6 @@ def pytest_unconfigure(config):
     "tests/window/test_timeseries_window.py::TestRollingTS::test_rolling_on_decreasing_index[us]",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_rolling_on_empty",
     "tests/window/test_win_type.py::test_cmov_window_corner[None]",
-    "tests/window/test_win_type.py::test_invalid_constructor[DataFrame-2.0]",
-    "tests/window/test_win_type.py::test_invalid_constructor[DataFrame-foo]",
-    "tests/window/test_win_type.py::test_invalid_constructor[DataFrame-w2]",
-    "tests/window/test_win_type.py::test_invalid_constructor[Series-2.0]",
-    "tests/window/test_win_type.py::test_invalid_constructor[Series-foo]",
-    "tests/window/test_win_type.py::test_invalid_constructor[Series-w2]",
     "tests/window/test_win_type.py::test_invalid_scipy_arg",
     "tests/window/test_win_type.py::test_win_type_not_implemented",
     r"""tests/io/json/test_pandas.py::TestPandasContainer::test_json_indent_all_orients[table-{\n    "schema":{\n        "fields":[\n            {\n                "name":"index",\n                "type":"integer"\n            },\n            {\n                "name":"a",\n                "type":"string"\n            },\n            {\n                "name":"b",\n                "type":"string"\n            }\n        ],\n        "primaryKey":[\n            "index"\n        ],\n        "pandas_version":"1.4.0"\n    },\n    "data":[\n        {\n            "index":0,\n            "a":"foo",\n            "b":"bar"\n        },\n        {\n            "index":1,\n            "a":"baz",\n            "b":"qux"\n        }\n    ]\n}]""",

From 5b35b8039d0fef4bca3e0cba980ebd2755a84392 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 17 Sep 2025 13:37:05 -0700
Subject: [PATCH 319/366] Make cudf.pandas callables usable with
 inspect.getfullargspec (#19988)

closes https://github.com/rapidsai/cudf/issues/19839

Also needed to change a return type from an int to a numpy scalar to fully pass the removed xfailing tests

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/19988
---
 python/cudf/cudf/core/column/numerical_base.py    |  4 ++--
 python/cudf/cudf/pandas/fast_slow_proxy.py        | 12 ++++++++++++
 python/cudf/cudf/pandas/scripts/conftest-patch.py | 11 -----------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index f25e1c6e4e3..9cb9ff3050e 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -103,7 +103,7 @@ def kurtosis(self, skipna: bool | None = None) -> float:
         V = self.var()
 
         if V == 0:
-            return 0
+            return np.float64(0)
 
         term_one_section_one = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))
         term_one_section_two = m4_numerator / (V**2)
@@ -128,7 +128,7 @@ def skew(self, skipna: bool | None = None) -> ScalarLike:
         m2 = self.var(ddof=0)
 
         if m2 == 0:
-            return 0
+            return np.float64(0)
 
         unbiased_coef = ((n * (n - 1)) ** 0.5) / (n - 2)
         skew = unbiased_coef * m3 / (m2 ** (3 / 2))
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index 73e2d371836..dc9b7e5f49e 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -732,6 +732,18 @@ def __call__(self, *args, **kwargs) -> Any:
         )
         return result
 
+    @property
+    def __code__(self):
+        return self._fsproxy_slow.__code__
+
+    @property
+    def __defaults__(self):
+        return self._fsproxy_slow.__defaults__
+
+    @property
+    def __kwdefaults__(self):
+        return self._fsproxy_slow.__kwdefaults__
+
 
 class _FunctionProxy(_CallableProxyMixin):
     """
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 8b3f219fe6d..1598e78d136 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -10371,17 +10371,6 @@ def pytest_unconfigure(config):
     "tests/reductions/test_stat_reductions.py::TestDatetimeLikeStatReductions::test_period_mean[s-array]",
     "tests/reductions/test_stat_reductions.py::TestDatetimeLikeStatReductions::test_td64_mean[Index]",
     "tests/reductions/test_stat_reductions.py::TestDatetimeLikeStatReductions::test_td64_mean[Series]",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_kurt",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_kurt_corner",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_max",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_mean",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_median",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_min",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_prod",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_sem",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_skew",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_sum",
-    "tests/reductions/test_stat_reductions.py::TestSeriesStatReductions::test_var_std",
     "tests/resample/test_base.py::test_first_last_skipna[Float32-False-first]",
     "tests/resample/test_base.py::test_first_last_skipna[Float32-False-last]",
     "tests/resample/test_base.py::test_first_last_skipna[Float32-True-first]",

From d3c76ba71eea4f4e3cddf36953f9e3657584578c Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 17 Sep 2025 17:12:31 -0400
Subject: [PATCH 320/366] Fix data_type creation in ast::operation::instantiate
 (#19994)

Fixes creating the data_type for the `ast::operation` object. Only fixed-point types are allowed to use the scale constructor.
The constructor includes an assert that throws in a **debug build**.

Example failed test:
```
$ gtests/ROW_IR_TEST
[==========] Running 6 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 6 tests from RowIRCudaCodeGenTest
[ RUN      ] RowIRCudaCodeGenTest.GetInput
[       OK ] RowIRCudaCodeGenTest.GetInput (0 ms)
[ RUN      ] RowIRCudaCodeGenTest.SetOutput
[       OK ] RowIRCudaCodeGenTest.SetOutput (0 ms)
[ RUN      ] RowIRCudaCodeGenTest.UnaryOperation
ROW_IR_TEST: /cudf/cpp/include/cudf/types.hpp:285: cudf::data_type::data_type(cudf::type_id, int32_t): Assertion `id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128' failed.
Aborted (core dumped)

```

Same failures show up in `AST_TEST`, `STREAM_STREAM_COMPACTION_TEST`, and `STREAM_TRANSFORM_TEST`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19994
---
 cpp/src/jit/row_ir.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/src/jit/row_ir.cpp b/cpp/src/jit/row_ir.cpp
index 58d5d9dd10a..11e175eda5b 100644
--- a/cpp/src/jit/row_ir.cpp
+++ b/cpp/src/jit/row_ir.cpp
@@ -220,7 +220,8 @@ void operation::instantiate(instance_context& ctx, instance_info const& info)
                                operand_types.front().scale(),
                                [](auto const& a, auto const& b) { return std::min(a, b.scale()); });
 
-  type_ = type_info{cudf::data_type{type_id, scale}};
+  auto dt = cudf::data_type{type_id};
+  type_   = type_info{cudf::is_fixed_point(dt) ? cudf::data_type{type_id, scale} : dt};
 }
 
 std::string operation::generate_code(instance_context& ctx,

From b4d62badfee178ce634f70ee3912f7540c6ac6a1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 17 Sep 2025 14:42:29 -0700
Subject: [PATCH 321/366] Return False instead of NA for comparison ops against
 NA in cudf.pandas (#19942)

closes https://github.com/rapidsai/cudf/issues/19932

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/19942
---
 ci/test_narwhals.sh                           |   2 +
 python/cudf/cudf/core/column/decimal.py       |   5 +-
 python/cudf/cudf/core/column/numerical.py     |  23 ++--
 .../cudf/pandas/scripts/conftest-patch.py     | 109 +-----------------
 python/cudf/cudf/tests/series/test_binops.py  |  26 +++++
 5 files changed, 53 insertions(+), 112 deletions(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index cab9dd28355..2b9459836c5 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -112,8 +112,10 @@ test_pandas_object_series \
 # columns aren't object anymore. The test expects object, causing a mismatch.
 # test_get_dtype_backend: We now preserve arrow extension dtypes
 # (e.g. bool[pyarrow], duration[ns][pyarrow]).
+# test_explode_multiple_cols[pandas-l1-more_columns0-expected0] matches pandas now so needs a skip in the test
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF_PANDAS=" \
 test_dtypes or \
+test_explode_multiple_cols or \
 (test_get_dtype_backend and pyarrow and (pandas or modin)) \
 "
 
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index f036cd6c084..7a63ca061b1 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -255,12 +255,15 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
         }:
             if isinstance(rhs, (int, Decimal)):
                 rhs = _to_plc_scalar(rhs, self.dtype)
-            return binaryop.binaryop(
+            result = binaryop.binaryop(
                 lhs,
                 rhs,
                 op,
                 get_dtype_of_same_kind(self.dtype, np.dtype(np.bool_)),
             )
+            if cudf.get_option("mode.pandas_compatible"):
+                result = result.fillna(op == "__ne__")
+            return result
         else:
             raise TypeError(
                 f"{op} not supported for the following dtypes: "
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 1faa75b8881..8051bfa0019 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -225,6 +225,14 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                 np.bool_: np.float64,
             }
 
+        cmp_ops = {
+            "__lt__",
+            "__gt__",
+            "__le__",
+            "__ge__",
+            "__eq__",
+            "__ne__",
+        }
         out_dtype = None
         if op in {"__truediv__", "__rtruediv__"}:
             # Division with integer types results in a suitable float.
@@ -236,14 +244,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                 return self.astype(
                     get_dtype_of_same_kind(self.dtype, np.dtype(truediv_type))
                 )._binaryop(other, op)
-        elif op in {
-            "__lt__",
-            "__gt__",
-            "__le__",
-            "__ge__",
-            "__eq__",
-            "__ne__",
-        }:
+        elif op in cmp_ops:
             out_dtype = get_dtype_of_same_kind(self.dtype, np.dtype(np.bool_))
 
             # If `other` is a Python integer and it is out-of-bounds
@@ -360,6 +361,12 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                 and rhs.null_count > 0
             ):
                 res = res.fillna(lhs.to_py())
+        elif (
+            cudf.get_option("mode.pandas_compatible")
+            and op in cmp_ops
+            and not is_pandas_nullable_extension_dtype(self.dtype)
+        ):
+            res = res.fillna(op == "__ne__")
         return res
 
     def nans_to_nulls(self: Self) -> Self:
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 1598e78d136..77c8da1bc9e 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -191,7 +191,6 @@ def pytest_unconfigure(config):
     "tests/apply/test_str.py::test_transform_groupby_kernel_frame[axis=1-pct_change]",
     "tests/apply/test_str.py::test_transform_groupby_kernel_frame[axis=1-rank]",
     "tests/apply/test_str.py::test_transform_groupby_kernel_frame[axis=1-shift]",
-    "tests/arithmetic/test_categorical.py::TestCategoricalComparisons::test_categorical_nan_equality",
     "tests/arithmetic/test_categorical.py::TestCategoricalComparisons::test_categorical_tuple_equality",
     "tests/arithmetic/test_datetime64.py::TestDatetime64Arithmetic::test_dt64_array_sub_dt_with_different_timezone[DataFrame]",
     "tests/arithmetic/test_datetime64.py::TestDatetime64Arithmetic::test_dt64_array_sub_dt_with_different_timezone[Index]",
@@ -761,8 +760,6 @@ def pytest_unconfigure(config):
     "tests/arrays/categorical/test_constructors.py::TestCategoricalConstructors::test_constructor_preserves_freq",
     "tests/arrays/categorical/test_constructors.py::TestCategoricalConstructors::test_from_sequence_copy",
     "tests/arrays/categorical/test_indexing.py::TestCategoricalIndexing::test_where_new_category_raises",
-    "tests/arrays/categorical/test_missing.py::TestCategoricalMissing::test_compare_categorical_with_missing[a10-a20-categories0]",
-    "tests/arrays/categorical/test_missing.py::TestCategoricalMissing::test_compare_categorical_with_missing[a11-a21-categories1]",
     "tests/arrays/categorical/test_missing.py::TestCategoricalMissing::test_use_inf_as_na_outside_context[values2-expected2]",
     "tests/arrays/categorical/test_replace.py::test_replace_categorical[a-a-result2-None]",
     "tests/arrays/categorical/test_replace.py::test_replace_categorical[b-None-result3-Categorical.categories length are different]",
@@ -2828,6 +2825,12 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_combine_le[timestamp[us, tz=US/Pacific]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_combine_le[timestamp[us, tz=UTC]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_combine_le[timestamp[us]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-eq]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-ne]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-gt]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-ge]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-lt]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-le]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[duration[ms]-eq]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[duration[ms]-ge]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[duration[ms]-gt]",
@@ -5159,18 +5162,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_arithmetic.py::TestFrameFlexArithmetic::test_broadcast_multiindex[python-None]",
     "tests/frame/test_arithmetic.py::TestFrameFlexArithmetic::test_flex_add_scalar_fill_value[numexpr]",
     "tests/frame/test_arithmetic.py::TestFrameFlexArithmetic::test_flex_add_scalar_fill_value[python]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[numexpr-eq]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[numexpr-ge]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[numexpr-gt]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[numexpr-le]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[numexpr-lt]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[numexpr-ne]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[python-eq]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[python-ge]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[python-gt]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[python-le]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[python-lt]",
-    "tests/frame/test_arithmetic.py::TestFrameFlexComparisons::test_bool_flex_frame[python-ne]",
     "tests/frame/test_arithmetic.py::test_arithmetic_multiindex_align[numexpr]",
     "tests/frame/test_arithmetic.py::test_arithmetic_multiindex_align[python]",
     "tests/frame/test_arithmetic.py::test_bool_frame_mult_float[numexpr]",
@@ -12682,70 +12673,6 @@ def pytest_unconfigure(config):
     "tests/series/test_arithmetic.py::TestSeriesFlexArithmetic::test_operators_combine[python-sub-sub-0]",
     "tests/series/test_arithmetic.py::TestSeriesFlexArithmetic::test_operators_combine[python-truediv-truediv-0]",
     "tests/series/test_arithmetic.py::TestSeriesFlexArithmetic::test_operators_combine[python-truediv-truediv-1]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[numexpr-values0-eq]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[numexpr-values1-ne]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[numexpr-values2-le]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[numexpr-values3-lt]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[numexpr-values4-ge]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[numexpr-values5-gt]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[python-values0-eq]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[python-values1-ne]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[python-values2-le]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[python-values3-lt]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[python-values4-ge]",
-    "tests/series/test_arithmetic.py::TestSeriesFlexComparison::test_comparison_flex_alignment[python-values5-gt]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-bool-dtype]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-categorical]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-datetime]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-float32]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-float64]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-int16]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-int32]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-int64]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-int8]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-interval]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-multi]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-nullable_bool]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-nullable_float]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-nullable_int]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-nullable_uint]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-object]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-range]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-repeats]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-string-pyarrow]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-string-python]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-string]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-tuples]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-uint16]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-uint32]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-uint64]",
-    "tests/series/test_arithmetic.py::test_none_comparison[numexpr-uint8]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-bool-dtype]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-categorical]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-datetime]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-float32]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-float64]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-int16]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-int32]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-int64]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-int8]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-interval]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-multi]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-nullable_bool]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-nullable_float]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-nullable_int]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-nullable_uint]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-object]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-range]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-repeats]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-string-pyarrow]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-string-python]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-string]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-tuples]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-uint16]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-uint32]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-uint64]",
-    "tests/series/test_arithmetic.py::test_none_comparison[python-uint8]",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_categorical_sideeffects_free",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor",
     "tests/series/test_constructors.py::TestSeriesConstructors::test_constructor_bool_dtype_missing_values",
@@ -14031,14 +13958,6 @@ def pytest_unconfigure(config):
     "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_std[all_data0-True-True-0-True]",
     "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_std[all_data0-True-True-2-False]",
     "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_std[all_data0-True-True-2-True]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-False-False-0]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-False-False-2]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-False-True-0]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-False-True-2]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-True-False-0]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-True-False-2]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-True-True-0]",
-    "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data11-True-True-2]",
     "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data12-False-False-0]",
     "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data12-False-False-2]",
     "tests/window/moments/test_moments_consistency_ewm.py::test_ewm_consistency_var_debiasing_factors[all_data12-False-True-0]",
@@ -14197,14 +14116,6 @@ def pytest_unconfigure(config):
     "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data0-rolling_consistency_cases1-False-1]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data0-rolling_consistency_cases1-True-0]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data0-rolling_consistency_cases1-True-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases0-False-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases0-False-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases0-True-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases0-True-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases1-False-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases1-False-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases1-True-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_moments_consistency_var[all_data1-rolling_consistency_cases1-True-1]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_apply_consistency_sum[all_data0-rolling_consistency_cases0-False-sum]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_apply_consistency_sum[all_data0-rolling_consistency_cases0-True-sum]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_apply_consistency_sum[all_data0-rolling_consistency_cases1-False-sum]",
@@ -14317,14 +14228,6 @@ def pytest_unconfigure(config):
     "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data0-rolling_consistency_cases1-False-1]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data0-rolling_consistency_cases1-True-0]",
     "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data0-rolling_consistency_cases1-True-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases0-False-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases0-False-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases0-True-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases0-True-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases1-False-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases1-False-1]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases1-True-0]",
-    "tests/window/moments/test_moments_consistency_rolling.py::test_rolling_consistency_var_std_cov[all_data1-rolling_consistency_cases1-True-1]",
     "tests/window/test_api.py::test_rolling_max_min_periods[None]",
     "tests/window/test_api.py::test_rolling_min_min_periods[None]",
     "tests/window/test_apply.py::test_ragged_apply[engine_and_raw1]",
diff --git a/python/cudf/cudf/tests/series/test_binops.py b/python/cudf/cudf/tests/series/test_binops.py
index f87777a669d..dc9b42be6ab 100644
--- a/python/cudf/cudf/tests/series/test_binops.py
+++ b/python/cudf/cudf/tests/series/test_binops.py
@@ -8,6 +8,7 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -449,6 +450,31 @@ def test_timedelta_series_cmpops_pandas_compatibility(comparison_op):
     assert_eq(expect, got)
 
 
+def test_compare_ops_numeric_to_null_pandas_compatible(comparison_op):
+    data = [None, 1, 3]
+    pser = pd.Series(data)
+    gser = cudf.Series(data)
+    expected = comparison_op(pser, 2)
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = comparison_op(gser, 2)
+    assert_eq(expected, result)
+
+
+def test_compare_ops_decimal_to_null_pandas_compatible(comparison_op):
+    data = pa.array([None, 1, 3], pa.decimal128(3, 2))
+    gser = cudf.Series(data)
+    expected = cudf.Series(
+        [
+            comparison_op == operator.ne,
+            comparison_op(1, 2),
+            comparison_op(3, 2),
+        ]
+    )
+    with cudf.option_context("mode.pandas_compatible", True):
+        result = comparison_op(gser, 2)
+    assert_eq(expected, result)
+
+
 def test_string_equality():
     data1 = ["b", "c", "d", "a", "c"]
     data2 = ["a", None, "c", "a", "c"]

From 56a473119806e4cdfa21f069539ee8c039b545ac Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 17 Sep 2025 14:50:44 -0700
Subject: [PATCH 322/366] Use a more robust metric for sorting (de)compression
 tasks (#19656)

This PR makes the hybrid (de)compression more robust and avoid previous cases where host was given more work that it should have gotten, leading to worse performance compared to device-only mode.

Main changes:
- Use a decompression cost metric that takes compression ratio into account, based on benchmarks profiles.
- Add a constant factor to the host cost to account for the synchronization overhead.
- Only use the host when a significant drop in kernel latency is expected (blocks given to the host are much more expensive than the ones left on the device).

This mode is enabled by setting environment variables `LIBCUDF_HOST_DECOMPRESSION`/`LIBCUDF_HOST_COMPRESSION` to `HYBRID`. For best performance, it's also recommended to set the thread pool size to 8 via the environment variable `LIBCUDF_NUM_HOST_WORKERS`.

Note: no tests cover the hybrid mode, will add this in a separate PR.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19656
---
 cpp/src/io/comp/common.cpp          |  29 ----
 cpp/src/io/comp/common_internal.hpp |  81 ++++++++---
 cpp/src/io/comp/compression.cpp     |  23 +--
 cpp/src/io/comp/decompression.cpp   |  21 +--
 cpp/src/io/comp/gpuinflate.cu       | 214 +++++++++++++++++++++++++---
 5 files changed, 279 insertions(+), 89 deletions(-)

diff --git a/cpp/src/io/comp/common.cpp b/cpp/src/io/comp/common.cpp
index 99805f693ed..29a1843aadb 100644
--- a/cpp/src/io/comp/common.cpp
+++ b/cpp/src/io/comp/common.cpp
@@ -18,7 +18,6 @@
 #include "nvcomp_adapter.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/types.hpp>
 
 namespace cudf::io::detail {
@@ -43,32 +42,4 @@ namespace cudf::io::detail {
   }
 }
 
-[[nodiscard]] size_t find_split_index(device_span<device_span<uint8_t const> const> inputs,
-                                      host_engine_state host_state,
-                                      size_t auto_mode_threshold,
-                                      size_t hybrid_mode_target_ratio,
-                                      rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-  if (host_state == host_engine_state::OFF or inputs.empty()) { return 0; }
-  if (host_state == host_engine_state::ON) { return inputs.size(); }
-
-  if (host_state == host_engine_state::AUTO) {
-    return inputs.size() < auto_mode_threshold ? inputs.size() : 0;
-  }
-
-  if (host_state == host_engine_state::HYBRID) {
-    auto const h_inputs    = cudf::detail::make_host_vector(inputs, stream);
-    size_t total_host_size = 0;
-    for (size_t i = 0; i < h_inputs.size(); ++i) {
-      if (total_host_size >= hybrid_mode_target_ratio * h_inputs[i].size()) { return i; }
-      total_host_size += h_inputs[i].size();
-    }
-    return inputs.size();  // No split
-  }
-
-  CUDF_FAIL("Invalid host engine state for compression: " +
-            std::to_string(static_cast<uint8_t>(host_state)));
-}
-
 }  // namespace cudf::io::detail
diff --git a/cpp/src/io/comp/common_internal.hpp b/cpp/src/io/comp/common_internal.hpp
index d6b45a18da3..2be83e47e84 100644
--- a/cpp/src/io/comp/common_internal.hpp
+++ b/cpp/src/io/comp/common_internal.hpp
@@ -44,10 +44,10 @@ constexpr size_t default_host_compression_auto_threshold = 128;
 constexpr size_t default_host_decompression_auto_threshold = 128;
 // Estimated ratio between total CPU decompression throughput and decompression throughput of a
 // single GPU block; higher values lead to more host decompression in HYBRID mode
-constexpr double default_host_device_decompression_work_ratio = 100;
+constexpr double default_host_device_decompression_cost_ratio = 32;
 // Estimated ratio between total CPU compression throughput and compression throughput of a
 // single GPU block; higher values lead to more host compression in HYBRID mode
-constexpr double default_host_device_compression_work_ratio = 100;
+constexpr double default_host_device_compression_cost_ratio = 64;
 
 [[nodiscard]] constexpr std::optional<nvcomp::compression_type> to_nvcomp_compression(
   compression_type compression)
@@ -69,9 +69,9 @@ struct sorted_codec_parameters {
 };
 
 /**
- * @brief Sorts input and output spans by input size in descending order
+ * @brief Sorts input and output spans for decompression by input size in descending order
  *
- * This function creates a sorted view of the inputs and outputs where they are
+ * This function creates a sorted view of the inputs and outputs for decompression, where they are
  * ordered by the size of each input span in descending order (largest first).
  * This can reduce latency by processing larger chunks first.
  *
@@ -81,31 +81,74 @@ struct sorted_codec_parameters {
  * @param mr Memory resource to use for allocations of results
  * @return sorted_codec_parameters containing sorted inputs, outputs, and original ordering
  */
-sorted_codec_parameters sort_tasks(device_span<device_span<uint8_t const> const> inputs,
-                                   device_span<device_span<uint8_t> const> outputs,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref mr);
+[[nodiscard]] sorted_codec_parameters sort_decompression_tasks(
+  device_span<device_span<uint8_t const> const> inputs,
+  device_span<device_span<uint8_t> const> outputs,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
 
 /**
- * @brief Finds the split index for input data based on the specified thresholds and target ratio.
+ * @brief Finds the split index for decompression tasks
  *
- * This function determines the index at which to split the input data for processing, using
- * the provided thresholds and target ratio. Inputs before the split index will be processed
- * using the host engine, while those after will be processed using the device engine.
+ * This function determines the index at which to split the input data for decompression processing,
+ * using decompression-specific cost calculations.
  *
  * @param inputs A device span of input data to be split
+ * @param outputs A device span of output buffers corresponding to inputs
  * @param host_state The state of the host-side engine used for processing
  * @param auto_mode_threshold The threshold value used to determine the split in automatic mode
- * @param hybrid_mode_target_ratio The target ratio used to determine the split in hybrid mode
+ * @param hybrid_mode_cost_ratio The cost ratio used to determine the split in hybrid mode
  * @param stream The CUDA stream to be used for asynchronous execution
  *
- * @return The index at which the input data should be split.
+ * @return The index at which the input data should be split for decompression.
  */
-[[nodiscard]] size_t find_split_index(device_span<device_span<uint8_t const> const> inputs,
-                                      host_engine_state host_state,
-                                      size_t auto_mode_threshold,
-                                      size_t hybrid_mode_target_ratio,
-                                      rmm::cuda_stream_view stream);
+[[nodiscard]] size_t split_decompression_tasks(device_span<device_span<uint8_t const> const> inputs,
+                                               device_span<device_span<uint8_t> const> outputs,
+                                               host_engine_state host_state,
+                                               size_t auto_mode_threshold,
+                                               size_t hybrid_mode_cost_ratio,
+                                               rmm::cuda_stream_view stream);
+
+/**
+ * @brief Sorts input and output spans for compression by output size in descending order
+ *
+ * This function creates a sorted view of the inputs and outputs for compression, where they are
+ * ordered by the size of each output span in descending order (largest first).
+ * This can reduce latency by processing larger chunks first.
+ *
+ * @param inputs Device spans of input data to be sorted
+ * @param outputs Device spans of output buffers corresponding to inputs
+ * @param stream CUDA stream for asynchronous execution
+ * @param mr Memory resource to use for allocations of results
+ * @return sorted_codec_parameters containing sorted inputs, outputs, and original ordering
+ */
+[[nodiscard]] sorted_codec_parameters sort_compression_tasks(
+  device_span<device_span<uint8_t const> const> inputs,
+  device_span<device_span<uint8_t> const> outputs,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+/**
+ * @brief Finds the split index for compression tasks
+ *
+ * This function determines the index at which to split the input data for compression processing,
+ * using compression-specific cost calculations.
+ *
+ * @param inputs A device span of input data to be split
+ * @param outputs A device span of output buffers corresponding to inputs
+ * @param host_state The state of the host-side engine used for processing
+ * @param auto_mode_threshold The threshold value used to determine the split in automatic mode
+ * @param hybrid_mode_cost_ratio The cost ratio used to determine the split in hybrid mode
+ * @param stream The CUDA stream to be used for asynchronous execution
+ *
+ * @return The index at which the input data should be split for compression.
+ */
+[[nodiscard]] size_t split_compression_tasks(device_span<device_span<uint8_t const> const> inputs,
+                                             device_span<device_span<uint8_t> const> outputs,
+                                             host_engine_state host_state,
+                                             size_t auto_mode_threshold,
+                                             size_t hybrid_mode_cost_ratio,
+                                             rmm::cuda_stream_view stream);
 
 /**
  * @brief Copies results back to their original positions using the ordering map
diff --git a/cpp/src/io/comp/compression.cpp b/cpp/src/io/comp/compression.cpp
index 9ac3c9c8d5b..e9431af487a 100644
--- a/cpp/src/io/comp/compression.cpp
+++ b/cpp/src/io/comp/compression.cpp
@@ -446,26 +446,27 @@ void compress(compression_type compression,
 
   // sort inputs by size, largest first
   auto const [sorted_inputs, sorted_outputs, order] =
-    sort_tasks(inputs, outputs, stream, cudf::get_current_device_resource_ref());
+    sort_compression_tasks(inputs, outputs, stream, cudf::get_current_device_resource_ref());
   auto inputs_view  = device_span<device_span<uint8_t const> const>(sorted_inputs);
   auto outputs_view = device_span<device_span<uint8_t> const>(sorted_outputs);
 
-  auto tmp_results = cudf::detail::make_device_uvector_async<detail::codec_exec_result>(
-    results, stream, cudf::get_current_device_resource_ref());
-  auto results_view = device_span<codec_exec_result>(tmp_results);
-
-  auto const split_idx = detail::find_split_index(
+  auto const split_idx = split_compression_tasks(
     inputs_view,
-    detail::get_host_engine_state(compression),
+    outputs_view,
+    get_host_engine_state(compression),
     getenv_or("LIBCUDF_HOST_COMPRESSION_THRESHOLD", default_host_compression_auto_threshold),
-    getenv_or("LIBCUDF_HOST_COMPRESSION_RATIO", default_host_device_compression_work_ratio),
+    getenv_or("LIBCUDF_HOST_COMPRESSION_RATIO", default_host_device_compression_cost_ratio),
     stream);
 
+  auto tmp_results = cudf::detail::make_device_uvector_async<detail::codec_exec_result>(
+    results, stream, cudf::get_current_device_resource_ref());
+  auto results_view = device_span<codec_exec_result>(tmp_results);
+
   auto const streams = cudf::detail::fork_streams(stream, 2);
   detail::device_compress(compression,
-                          inputs_view.subspan(split_idx, sorted_inputs.size() - split_idx),
-                          outputs_view.subspan(split_idx, sorted_outputs.size() - split_idx),
-                          results_view.subspan(split_idx, tmp_results.size() - split_idx),
+                          inputs_view.subspan(split_idx, inputs_view.size() - split_idx),
+                          outputs_view.subspan(split_idx, outputs_view.size() - split_idx),
+                          results_view.subspan(split_idx, results_view.size() - split_idx),
                           streams[0]);
   detail::host_compress(compression,
                         inputs_view.subspan(0, split_idx),
diff --git a/cpp/src/io/comp/decompression.cpp b/cpp/src/io/comp/decompression.cpp
index 7362e41a5fa..178b5d75176 100644
--- a/cpp/src/io/comp/decompression.cpp
+++ b/cpp/src/io/comp/decompression.cpp
@@ -785,26 +785,27 @@ void decompress(compression_type compression,
 
   // sort inputs by size, largest first
   auto const [sorted_inputs, sorted_outputs, order] =
-    sort_tasks(inputs, outputs, stream, cudf::get_current_device_resource_ref());
+    sort_decompression_tasks(inputs, outputs, stream, cudf::get_current_device_resource_ref());
   auto inputs_view  = device_span<device_span<uint8_t const> const>(sorted_inputs);
   auto outputs_view = device_span<device_span<uint8_t> const>(sorted_outputs);
 
-  auto tmp_results = cudf::detail::make_device_uvector_async<detail::codec_exec_result>(
-    results, stream, cudf::get_current_device_resource_ref());
-  auto results_view = device_span<codec_exec_result>(tmp_results);
-
-  auto const split_idx = find_split_index(
+  auto const split_idx = split_decompression_tasks(
     inputs_view,
+    outputs_view,
     get_host_engine_state(compression),
     getenv_or("LIBCUDF_HOST_DECOMPRESSION_THRESHOLD", default_host_decompression_auto_threshold),
-    getenv_or("LIBCUDF_HOST_DECOMPRESSION_RATIO", default_host_device_decompression_work_ratio),
+    getenv_or("LIBCUDF_HOST_DECOMPRESSION_RATIO", default_host_device_decompression_cost_ratio),
     stream);
 
+  auto tmp_results = cudf::detail::make_device_uvector_async<detail::codec_exec_result>(
+    results, stream, cudf::get_current_device_resource_ref());
+  auto results_view = device_span<codec_exec_result>(tmp_results);
+
   auto const streams = cudf::detail::fork_streams(stream, 2);
   detail::device_decompress(compression,
-                            inputs_view.subspan(split_idx, sorted_inputs.size() - split_idx),
-                            outputs_view.subspan(split_idx, sorted_outputs.size() - split_idx),
-                            results_view.subspan(split_idx, tmp_results.size() - split_idx),
+                            inputs_view.subspan(split_idx, inputs_view.size() - split_idx),
+                            outputs_view.subspan(split_idx, outputs_view.size() - split_idx),
+                            results_view.subspan(split_idx, results_view.size() - split_idx),
                             max_uncomp_chunk_size,
                             max_total_uncomp_size,
                             streams[0]);
diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu
index 307b35d2bc2..47d2b5ed566 100644
--- a/cpp/src/io/comp/gpuinflate.cu
+++ b/cpp/src/io/comp/gpuinflate.cu
@@ -48,6 +48,7 @@ Mark Adler    madler@alumni.caltech.edu
 #include "io/utilities/block_utils.cuh"
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -59,6 +60,8 @@ Mark Adler    madler@alumni.caltech.edu
 
 namespace cudf::io::detail {
 
+namespace {
+
 constexpr int max_bits    = 15;   // maximum bits in a code
 constexpr int max_l_codes = 286;  // maximum number of literal/length codes
 constexpr int max_d_codes = 30;   // maximum number of distance codes
@@ -1208,41 +1211,83 @@ CUDF_KERNEL void __launch_bounds__(1024)
   if (t < len) { dst[t] = src[t]; }
 }
 
-void gpuinflate(device_span<device_span<uint8_t const> const> inputs,
-                device_span<device_span<uint8_t> const> outputs,
-                device_span<codec_exec_result> results,
-                gzip_header_included parse_hdr,
-                rmm::cuda_stream_view stream)
-{
-  constexpr int block_size = 128;  // Threads per block
-  if (inputs.size() > 0) {
-    inflate_kernel<block_size>
-      <<<inputs.size(), block_size, 0, stream.value()>>>(inputs, outputs, results, parse_hdr);
+enum class task_type { DECOMPRESSION, COMPRESSION };
+
+class cost_model {
+ private:
+  // Relative cost of the trivial cases (incompressible data)
+  static constexpr double trivial_case_cost_ratio = 0.1;
+  static constexpr double host_constant_overhead  = 5000;
+  static constexpr double cost_decay_exponent     = 4;
+
+  static CUDF_HOST_DEVICE double cost_factor(size_t input_size,
+                                             size_t output_size,
+                                             task_type task_type)
+  {
+    if (task_type == task_type::DECOMPRESSION) {
+      auto const compression_ratio = std::max(1., static_cast<double>(output_size) / input_size);
+      // When the compression ratio is one, the cost factor is the same as the copy cost ratio,
+      // meaning that the cost of decompressing the block is the same as the cost of copying it. The
+      // cost factor asymptotes to one as the compression ratio increases, meaning that the cost
+      // approaches the base cost of decompressing (which is a lot higher than the copy cost)
+      return 1. - (1. - trivial_case_cost_ratio) /
+                    cuda::std::pow(compression_ratio, cost_decay_exponent);
+    } else {
+      // We don't know the compression ratio for compression, so use a constant cost factor
+      return 1.;
+    }
   }
-}
 
-void gpu_copy_uncompressed_blocks(device_span<device_span<uint8_t const> const> inputs,
-                                  device_span<device_span<uint8_t> const> outputs,
-                                  rmm::cuda_stream_view stream)
-{
-  if (inputs.size() > 0) {
-    copy_uncompressed_kernel<<<inputs.size(), 1024, 0, stream.value()>>>(inputs, outputs);
+ public:
+  static CUDF_HOST_DEVICE double task_device_cost(size_t input_size,
+                                                  size_t output_size,
+                                                  task_type task_type)
+  {
+    return cost_factor(input_size, output_size, task_type) * input_size;
   }
-}
+
+  static double task_host_cost(size_t input_size,
+                               size_t output_size,
+                               double device_host_ratio,
+                               task_type task_type)
+  {
+    CUDF_EXPECTS(device_host_ratio > 0, "device_host_ratio must be > 0");
+    // Cost to copy the block to host and back; NOTE: assumes that the copy throughput is the same
+    // as the decompression/compression throughput when the data is incompressible
+    auto const copy_cost = trivial_case_cost_ratio * (input_size + output_size);
+    return (cost_factor(input_size, output_size, task_type) * input_size + copy_cost) /
+             device_host_ratio +
+           host_constant_overhead;
+  }
+};
 
 sorted_codec_parameters sort_tasks(device_span<device_span<uint8_t const> const> inputs,
                                    device_span<device_span<uint8_t> const> outputs,
+                                   task_type task_type,
                                    rmm::cuda_stream_view stream,
                                    rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   rmm::device_uvector<std::size_t> order(inputs.size(), stream, mr);
   thrust::sequence(rmm::exec_policy_nosync(stream), order.begin(), order.end());
+
+  // Precompute costs to avoid repeated computation during sorting
+  rmm::device_uvector<double> costs(inputs.size(), stream, mr);
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    thrust::make_zip_iterator(inputs.begin(), outputs.begin()),
+                    thrust::make_zip_iterator(inputs.end(), outputs.end()),
+                    costs.begin(),
+                    [task_type] __device__(auto const& input_output_pair) {
+                      auto const& input  = thrust::get<0>(input_output_pair);
+                      auto const& output = thrust::get<1>(input_output_pair);
+                      return cost_model::task_device_cost(input.size(), output.size(), task_type);
+                    });
+
   thrust::sort(rmm::exec_policy_nosync(stream),
                order.begin(),
                order.end(),
-               [inputs] __device__(std::size_t a, std::size_t b) {
-                 return inputs[a].size() > inputs[b].size();
+               [costs = costs.data()] __device__(std::size_t a, std::size_t b) {
+                 return costs[a] > costs[b];
                });
 
   auto sorted_inputs = rmm::device_uvector<device_span<uint8_t const>>(inputs.size(), stream, mr);
@@ -1262,6 +1307,103 @@ sorted_codec_parameters sort_tasks(device_span<device_span<uint8_t const> const>
   return {std::move(sorted_inputs), std::move(sorted_outputs), std::move(order)};
 }
 
+[[nodiscard]] size_t split_tasks(device_span<device_span<uint8_t const> const> inputs,
+                                 device_span<device_span<uint8_t> const> outputs,
+                                 host_engine_state host_state,
+                                 size_t auto_mode_threshold,
+                                 size_t hybrid_mode_cost_ratio,
+                                 task_type task,
+                                 rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  if (host_state == host_engine_state::OFF or inputs.empty()) { return 0; }
+  if (host_state == host_engine_state::ON) { return inputs.size(); }
+
+  if (host_state == host_engine_state::AUTO) {
+    return inputs.size() < auto_mode_threshold ? inputs.size() : 0;
+  }
+
+  if (host_state == host_engine_state::HYBRID) {
+    auto const h_inputs  = cudf::detail::make_host_vector(inputs, stream);
+    auto const h_outputs = cudf::detail::make_host_vector(outputs, stream);
+
+    // Compute total costs (host + device) for each potential split index
+    std::vector<double> total_costs;
+    double total_host_cost = 0;
+    // Compute costs for each index up until the first index where the host cost is greater than
+    // the device cost for the remaining tasks. We don't want the CPU to be the bottleneck.
+    for (size_t i = 0; i < h_inputs.size(); ++i) {
+      // Device cost reflects the latency of the kernel, so we use the cost of the most expensive
+      // remaining task, which is the first one after the split
+      auto const device_cost =
+        cost_model::task_device_cost(h_inputs[i].size(), h_outputs[i].size(), task);
+      // Total cost is host cost (all previous tasks) + device cost (most expensive remaining task)
+      total_costs.push_back(total_host_cost + device_cost);
+
+      // If the host cost is greater than the device cost for the remaining tasks, we don't want to
+      // split at an index greater than this one.
+      if (total_host_cost > device_cost) { break; }
+
+      // Host cost includes the cost of all previous tasks, so it grows with the index
+      total_host_cost += cost_model::task_host_cost(
+        h_inputs[i].size(), h_outputs[i].size(), hybrid_mode_cost_ratio, task);
+    }
+
+    // Find the index at which the sum of host and device cost is minimal. Because the device cost
+    // is modeled as the cost of the most expensive remaining task, the index at which the sum is
+    // minimal is where the device cost has a big drop and using the host reduces the kernel
+    // latency.
+    auto const optimal_split_it = std::min_element(total_costs.begin(), total_costs.end());
+    // Depending on the costs, this may be 0, which means that no tasks should be run on the host.
+    return std::distance(total_costs.begin(), optimal_split_it);
+  }
+
+  CUDF_FAIL("Invalid host engine state for compression: " +
+            std::to_string(static_cast<uint8_t>(host_state)));
+}
+
+}  // namespace
+
+void gpuinflate(device_span<device_span<uint8_t const> const> inputs,
+                device_span<device_span<uint8_t> const> outputs,
+                device_span<codec_exec_result> results,
+                gzip_header_included parse_hdr,
+                rmm::cuda_stream_view stream)
+{
+  constexpr int block_size = 128;  // Threads per block
+  if (inputs.size() > 0) {
+    inflate_kernel<block_size>
+      <<<inputs.size(), block_size, 0, stream.value()>>>(inputs, outputs, results, parse_hdr);
+  }
+}
+
+void gpu_copy_uncompressed_blocks(device_span<device_span<uint8_t const> const> inputs,
+                                  device_span<device_span<uint8_t> const> outputs,
+                                  rmm::cuda_stream_view stream)
+{
+  constexpr auto block_size = 1024;
+  if (inputs.size() > 0) {
+    copy_uncompressed_kernel<<<inputs.size(), block_size, 0, stream.value()>>>(inputs, outputs);
+  }
+}
+
+sorted_codec_parameters sort_decompression_tasks(
+  device_span<device_span<uint8_t const> const> inputs,
+  device_span<device_span<uint8_t> const> outputs,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  return sort_tasks(inputs, outputs, task_type::DECOMPRESSION, stream, mr);
+}
+
+sorted_codec_parameters sort_compression_tasks(device_span<device_span<uint8_t const> const> inputs,
+                                               device_span<device_span<uint8_t> const> outputs,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  return sort_tasks(inputs, outputs, task_type::COMPRESSION, stream, mr);
+}
+
 void copy_results_to_original_order(device_span<codec_exec_result const> sorted_results,
                                     device_span<codec_exec_result> original_results,
                                     device_span<std::size_t const> order,
@@ -1274,4 +1416,36 @@ void copy_results_to_original_order(device_span<codec_exec_result const> sorted_
                   original_results.begin());
 }
 
+size_t split_compression_tasks(device_span<device_span<uint8_t const> const> inputs,
+                               device_span<device_span<uint8_t> const> outputs,
+                               host_engine_state host_state,
+                               size_t auto_mode_threshold,
+                               size_t hybrid_mode_cost_ratio,
+                               rmm::cuda_stream_view stream)
+{
+  return split_tasks(inputs,
+                     outputs,
+                     host_state,
+                     auto_mode_threshold,
+                     hybrid_mode_cost_ratio,
+                     task_type::COMPRESSION,
+                     stream);
+}
+
+size_t split_decompression_tasks(device_span<device_span<uint8_t const> const> inputs,
+                                 device_span<device_span<uint8_t> const> outputs,
+                                 host_engine_state host_state,
+                                 size_t auto_mode_threshold,
+                                 size_t hybrid_mode_cost_ratio,
+                                 rmm::cuda_stream_view stream)
+{
+  return split_tasks(inputs,
+                     outputs,
+                     host_state,
+                     auto_mode_threshold,
+                     hybrid_mode_cost_ratio,
+                     task_type::DECOMPRESSION,
+                     stream);
+}
+
 }  // namespace cudf::io::detail

From d1c7dcbb4b0274549e52b7c2a276e00adc1a70cf Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 17 Sep 2025 17:17:12 -0700
Subject: [PATCH 323/366] Multithreaded CPU algorithm for data page mask
 computation (#19602)

Contributes to #19469

This PR adds a multithreaded CPU algorithm to compute data page mask during page pruning in the next-gen (experimental) parquet reader for highly selective table scan.

The algorithm is roughly 50% faster than the existing one measured in #19469.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/19602
---
 .../parquet/experimental/page_index_filter.cu | 141 +++++++++---------
 1 file changed, 71 insertions(+), 70 deletions(-)

diff --git a/cpp/src/io/parquet/experimental/page_index_filter.cu b/cpp/src/io/parquet/experimental/page_index_filter.cu
index d47bf063fd7..216d9a23f55 100644
--- a/cpp/src/io/parquet/experimental/page_index_filter.cu
+++ b/cpp/src/io/parquet/experimental/page_index_filter.cu
@@ -579,29 +579,6 @@ struct page_stats_caster : public stats_caster_base {
   }
 };
 
-/**
- * @brief Functor to compute if a row in the row mask is required
- *
- * The row is required if the row mask value at row_index is either invalid or a valid `true`
- *
- * @param is_nullable Whether the row mask is nullable
- * @param nullmask The nullmask of the row mask
- * @param row_mask_data The row mask data values
- *
- * @return True if the row is valid, false otherwise.
- */
-struct is_row_required_fn {
-  bool is_nullable;
-  bitmask_type const* nullmask;
-  bool const* row_mask_data;
-
-  __device__ bool operator()(size_type row_index) const
-  {
-    auto const is_invalid = is_nullable and not bit_is_set(nullmask, row_index);
-    return is_invalid or row_mask_data[row_index];
-  }
-};
-
 }  // namespace
 
 std::unique_ptr<cudf::column> aggregate_reader_metadata::build_row_mask_with_page_index_stats(
@@ -779,13 +756,47 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
     return all_required_data_pages(page_row_counts);
   }
 
-  auto const mr = cudf::get_current_device_resource_ref();
-
   // Vector to hold data page mask for each column
-  auto data_page_mask = std::vector<std::vector<bool>>();
-  data_page_mask.reserve(num_columns);
+  auto data_page_mask = std::vector<std::vector<bool>>(num_columns);
+
+  // Total number of surviving pages across all columns
+  std::atomic<size_t> total_surviving_pages{0};
+
+  // Tasks to compute data page mask for each column
+  std::vector<std::future<void>> data_page_mask_tasks;
+  data_page_mask_tasks.reserve(num_columns);
+
+  // Host row mask validity and first bit offset
+  auto const [host_row_mask_validity, first_bit_offset] = [&] {
+    if (row_mask.nullable()) {
+      auto const first_word_idx = word_index(row_mask_offset);
+      auto const last_word_idx  = word_index(row_mask_offset + total_rows);
+      auto const num_words      = last_word_idx - first_word_idx + 1;
+      auto const max_words      = num_bitmask_words(row_mask.size()) - first_word_idx - 1;
+      CUDF_EXPECTS(num_words <= max_words,
+                   "Encountered unexpected number of bitmask words to copy from the row mask");
+      return std::pair{
+        cudf::detail::make_host_vector(
+          device_span<bitmask_type const>(row_mask.null_mask() + first_word_idx, num_words),
+          stream),
+        intra_word_index(row_mask_offset)};
+    } else {
+      // Empty vector if row mask is not nullable
+      return std::pair{cudf::detail::make_host_vector<bitmask_type>(0, stream), 0};
+    }
+  }();
+
+  // Iterator for row mask validity
+  auto is_row_valid = cudf::detail::make_counting_transform_iterator(
+    first_bit_offset,
+    [is_nullable = row_mask.nullable(), nullmask = host_row_mask_validity.data()](auto bit_index) {
+      // Always valid if row mask is not nullable or check if the corresponding bit is set
+      return not is_nullable or bit_is_set(nullmask, bit_index);
+    });
 
-  auto total_surviving_pages = size_t{0};
+  // Host row mask data
+  auto const is_row_required = cudf::detail::make_host_vector(
+    device_span<uint8_t const>(row_mask.data<uint8_t>() + row_mask_offset, total_rows), stream);
 
   // For all columns, look up which pages contain at least one required row. i.e.
   // !validity_it[row_idx] or is_row_required[row_idx] satisfies, and add its byte range to the
@@ -793,50 +804,40 @@ std::vector<std::vector<bool>> aggregate_reader_metadata::compute_data_page_mask
   std::for_each(
     thrust::counting_iterator<size_t>(0),
     thrust::counting_iterator(num_columns),
-    [&, total_rows](auto const col_idx) {
-      // Construct a row indices mapping based on page row counts and offsets
-      auto const total_pages_in_this_column = page_row_counts[col_idx].size();
-
-      auto const page_indices = make_page_indices_async(
-        page_row_counts[col_idx], page_row_offsets[col_idx], total_rows, stream);
-
-      // Device vector to hold page indices with at least one required row
-      rmm::device_uvector<size_type> select_page_indices(total_rows, stream, mr);
-
-      // Copy page indices with at least one required row
-      auto const filtered_pages_end_iter = thrust::copy_if(
-        rmm::exec_policy_nosync(stream),
-        page_indices.begin(),
-        page_indices.end(),
-        thrust::counting_iterator<size_type>(row_mask_offset),
-        select_page_indices.begin(),
-        is_row_required_fn{row_mask.nullable(), row_mask.null_mask(), row_mask.data<bool>()});
-
-      // Remove duplicate page indices across (presorted) rows
-      auto const filtered_uniq_page_end_iter = thrust::unique(
-        rmm::exec_policy_nosync(stream), select_page_indices.begin(), filtered_pages_end_iter);
-
-      // Number of final filtered pages for this column
-      size_t const num_surviving_pages_this_column =
-        thrust::distance(select_page_indices.begin(), filtered_uniq_page_end_iter);
-
-      total_surviving_pages += num_surviving_pages_this_column;
-
-      // Copy the filtered page indices for this column to host
-      auto host_select_page_indices = cudf::detail::make_host_vector(
-        cudf::device_span<cudf::size_type const>{select_page_indices.data(),
-                                                 num_surviving_pages_this_column},
-        stream);
-
-      // Vector to data page mask the this column
-      auto valid_pages = std::vector<bool>(total_pages_in_this_column, false);
-      std::for_each(host_select_page_indices.begin(),
-                    host_select_page_indices.end(),
-                    [&](auto const page_idx) { valid_pages[page_idx] = true; });
-
-      data_page_mask.push_back(std::move(valid_pages));
+    [&](auto const col_idx) {
+      data_page_mask_tasks.emplace_back(
+        cudf::detail::host_worker_pool().submit_task([&, col_idx = col_idx] {
+          // Construct a row indices mapping based on page row counts and offsets
+          auto const total_pages_this_column = page_row_counts[col_idx].size();
+          auto valid_pages_this_column       = std::vector<bool>(total_pages_this_column, false);
+          // Number of final filtered pages for this column
+          size_t num_surviving_pages_this_column = 0;
+          // For all rows
+          for (auto row_idx = 0; row_idx < total_rows; ++row_idx) {
+            // If this row is required or invalid, add its page index to the output list.
+            if (not is_row_valid[row_idx] or is_row_required[row_idx]) {
+              // binary search to find the page index this row_idx belongs to and set the
+              // page index to true page_indices
+              auto const& offsets = page_row_offsets[col_idx];
+              auto const page_itr = std::upper_bound(offsets.cbegin(), offsets.cend(), row_idx);
+              CUDF_EXPECTS(page_itr != offsets.cbegin() and page_itr != offsets.cend(),
+                           "Invalid page index");
+              auto const page_idx               = std::distance(offsets.cbegin(), page_itr) - 1;
+              valid_pages_this_column[page_idx] = true;
+              num_surviving_pages_this_column++;
+              // Move row_idx to the last row of this page
+              row_idx = offsets[page_idx + 1] - 1;
+            }
+          }
+
+          total_surviving_pages.fetch_add(num_surviving_pages_this_column);
+          data_page_mask[col_idx] = std::move(valid_pages_this_column);
+        }));
     });
 
+  std::for_each(
+    data_page_mask_tasks.begin(), data_page_mask_tasks.end(), [](auto& task) { task.get(); });
+
   // Total number of input pages across all columns
   auto const total_pages = std::accumulate(
     page_row_counts.cbegin(),

From 83d7cb6464328efe96733cc75acee607b6e2cb38 Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Thu, 18 Sep 2025 03:25:58 +0200
Subject: [PATCH 324/366] Remove UCX-Py (#19979)

UCX-Py is being archived, therefore options to select it need to be removed, as well as use `distributed_ucxx` for testing.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19979
---
 .../cudf_polars/cudf_polars/experimental/benchmarks/utils.py  | 4 ++--
 python/dask_cudf/dask_cudf/tests/test_distributed.py          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 01433410d7a..82ae9ab544d 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -596,8 +596,8 @@ def parse_args(
         "--protocol",
         default="ucx",
         type=str,
-        choices=["ucx-old", "ucx"],
-        help="Communication protocol to use for Dask: ucx (uses ucxx) or ucx-old (uses ucx-py)",
+        choices=["ucx"],
+        help="Communication protocol to use for Dask: ucx (uses ucxx)",
     )
     parser.add_argument(
         "--shuffle",
diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py
index ddaae555ed0..ceb940f6a64 100644
--- a/python/dask_cudf/dask_cudf/tests/test_distributed.py
+++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py
@@ -57,7 +57,7 @@ def test_merge():
     not at_least_n_gpus(2), reason="Machine does not have two GPUs"
 )
 def test_ucx_seriesgroupby():
-    pytest.importorskip("ucp")
+    pytest.importorskip("distributed_ucxx")
 
     # Repro Issue#3913
     with dask_cuda.LocalCUDACluster(n_workers=2, protocol="ucx") as cluster:

From 235b4b1bc2cac221165917abd6d494707370bfbf Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 17 Sep 2025 21:46:31 -0400
Subject: [PATCH 325/366] Support ordered grouped windows in cudf-polars
 (#19891)

Add support for expressions like `over(partition_by=..., order_by=...)`. And contributes to #19200 by unblocking PDS-DS Q47 and Q57.

Also contributes to #18633

- Depends on https://github.com/rapidsai/cudf/pull/19803

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19891
---
 .../cudf_polars/dsl/expressions/rolling.py    | 305 +++++++++++++++---
 .../cudf_polars/cudf_polars/dsl/translate.py  |  12 +-
 .../cudf_polars/dsl/utils/aggregations.py     |  14 +-
 .../experimental/benchmarks/utils.py          |   2 +-
 .../tests/experimental/test_rolling.py        |  18 +-
 .../tests/expressions/test_rolling.py         |  96 +++++-
 6 files changed, 370 insertions(+), 77 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
index cc22bc1e752..43a27b6ea8a 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/rolling.py
@@ -6,6 +6,9 @@
 
 from __future__ import annotations
 
+import itertools
+from dataclasses import dataclass
+from functools import singledispatchmethod
 from typing import TYPE_CHECKING, Any
 
 import polars as pl
@@ -19,13 +22,26 @@
 from cudf_polars.dsl.utils.windows import offsets_to_windows, range_window_bounds
 
 if TYPE_CHECKING:
-    from collections.abc import Sequence
+    from collections.abc import Generator, Sequence
 
     from cudf_polars.typing import ClosedInterval, Duration
 
 __all__ = ["GroupedRollingWindow", "RollingWindow", "to_request"]
 
 
+@dataclass(frozen=True)
+class UnaryOp:
+    named_exprs: list[expr.NamedExpr]
+    order_index: plc.Column | None = None
+    by_cols_for_scan: list[Column] | None = None
+    local_grouper: plc.groupby.GroupBy | None = None
+
+
+@dataclass(frozen=True)
+class RankOp(UnaryOp):
+    pass
+
+
 def to_request(
     value: expr.Expr, orderby: Column, df: DataFrame
 ) -> plc.rolling.RollingRequest:
@@ -62,6 +78,17 @@ def to_request(
     return plc.rolling.RollingRequest(col.obj, min_periods, value.agg_request)
 
 
+def _by_exprs(b: Expr | tuple) -> Generator[Expr]:
+    if isinstance(b, Expr):
+        yield b
+    elif isinstance(b, tuple):  # pragma: no cover; tests cover this path when
+        # run with the distributed scheduler only
+        for item in b:
+            yield from _by_exprs(item)
+    else:
+        yield expr.Literal(DataType(pl.Int64()), b)  # pragma: no cover
+
+
 class RollingWindow(Expr):
     __slots__ = (
         "closed_window",
@@ -167,8 +194,21 @@ class GroupedRollingWindow(Expr):
       when polars supports that expression.
     """
 
-    __slots__ = ("by_count", "named_aggs", "options", "post")
-    _non_child = ("dtype", "options", "named_aggs", "post", "by_count")
+    __slots__ = (
+        "_order_by_expr",
+        "by_count",
+        "named_aggs",
+        "options",
+        "post",
+    )
+    _non_child = (
+        "dtype",
+        "options",
+        "named_aggs",
+        "post",
+        "by_count",
+        "_order_by_expr",
+    )
 
     def __init__(
         self,
@@ -177,12 +217,14 @@ def __init__(
         named_aggs: Sequence[expr.NamedExpr],
         post: expr.NamedExpr,
         *by: Expr,
+        _order_by_expr: Expr | None = None,
     ) -> None:
         self.dtype = dtype
         self.options = options
         self.named_aggs = tuple(named_aggs)
         self.post = post
         self.is_pointwise = False
+        self._order_by_expr = _order_by_expr
 
         unsupported = [
             type(named_expr.value).__name__
@@ -191,7 +233,7 @@ def __init__(
                 isinstance(named_expr.value, (expr.Len, expr.Agg))
                 or (
                     isinstance(named_expr.value, expr.UnaryFunction)
-                    and named_expr.value.name == "rank"
+                    and named_expr.value.name in {"rank"}
                 )
             )
         ]
@@ -204,10 +246,7 @@ def __init__(
         # Ensures every partition-by is an Expr
         # Fixes over(1) cases with the streaming
         # executor and a small blocksize
-        by_expr = [
-            (b if isinstance(b, Expr) else expr.Literal(DataType(pl.Int64()), b))
-            for b in by
-        ]
+        by_expr = [e for b in by for e in _by_exprs(b)]
 
         # Expose agg dependencies as children so the streaming
         # executor retains required source columns
@@ -216,17 +255,49 @@ def __init__(
             for ne in self.named_aggs
             for v in (ne.value,)
             if isinstance(v, expr.Agg)
-            or (isinstance(v, expr.UnaryFunction) and v.name == "rank")
+            or (isinstance(v, expr.UnaryFunction) and v.name in {"rank"})
         ]
         self.by_count = len(by_expr)
-        self.children = tuple(by_expr) + tuple(child_deps)
+        self.children = tuple(
+            itertools.chain(
+                by_expr,
+                (() if self._order_by_expr is None else (self._order_by_expr,)),
+                child_deps,
+            )
+        )
 
-    def _rank_group_by_scan(
+    @staticmethod
+    def _sorted_grouper(by_cols_for_scan: list[Column]) -> plc.groupby.GroupBy:
+        return plc.groupby.GroupBy(
+            plc.Table([c.obj for c in by_cols_for_scan]),
+            null_handling=plc.types.NullPolicy.INCLUDE,
+            keys_are_sorted=plc.types.Sorted.YES,
+            column_order=[k.order for k in by_cols_for_scan],
+            null_precedence=[k.null_order for k in by_cols_for_scan],
+        )
+
+    @singledispatchmethod
+    def _apply_unary_op(
+        self,
+        op: UnaryOp,
+        _: DataFrame,
+        __: plc.groupby.GroupBy,
+    ) -> tuple[list[str], list[DataType], list[plc.Table]]:
+        raise NotImplementedError(
+            f"Unsupported unary op: {type(op).__name__}"
+        )  # pragma: no cover; translation raises first
+
+    @_apply_unary_op.register
+    def _(
         self,
+        op: RankOp,
         df: DataFrame,
         grouper: plc.groupby.GroupBy,
-        rank_named: list[expr.NamedExpr],
     ) -> tuple[list[str], list[DataType], list[plc.Table]]:
+        rank_named = op.named_exprs
+        order_index = op.order_index
+        by_cols_for_scan = op.by_cols_for_scan
+
         rank_requests: list[plc.groupby.GroupByRequest] = []
         rank_out_names: list[str] = []
         rank_out_dtypes: list[DataType] = []
@@ -235,6 +306,12 @@ def _rank_group_by_scan(
             rank_expr = ne.value
             (child_expr,) = rank_expr.children
             val_col = child_expr.evaluate(df, context=ExecutionContext.FRAME).obj
+            if order_index is not None:
+                val_col = plc.copying.gather(
+                    plc.Table([val_col]),
+                    order_index,
+                    plc.copying.OutOfBoundsPolicy.NULLIFY,
+                ).columns()[0]
             assert isinstance(rank_expr, expr.UnaryFunction)
             method_str, descending, _ = rank_expr.options
 
@@ -265,35 +342,41 @@ def _rank_group_by_scan(
             rank_out_names.append(ne.name)
             rank_out_dtypes.append(rank_expr.dtype)
 
-        _, rank_tables = grouper.scan(rank_requests)
+        if order_index is not None and by_cols_for_scan is not None:
+            # order_by expressions require us order each group
+            lg = op.local_grouper
+            assert isinstance(lg, plc.groupby.GroupBy)
+            _, rank_tables = lg.scan(rank_requests)
+        else:
+            _, rank_tables = grouper.scan(rank_requests)
         return rank_out_names, rank_out_dtypes, rank_tables
 
-    def _reorder_grouped_to_input(
+    def _reorder_to_input(
         self,
+        row_id: plc.Column,
         by_cols: list[Column],
         n_rows: int,
         rank_tables: list[plc.Table],
         rank_out_names: list[str],
         rank_out_dtypes: list[DataType],
+        *,
+        order_index: plc.Column | None = None,
     ) -> list[Column]:
         # Reorder scan results from grouped-order back to input row order
-        zero = plc.Scalar.from_py(0, plc.types.SIZE_TYPE)
-        one = plc.Scalar.from_py(1, plc.types.SIZE_TYPE)
-        row_id = plc.filling.sequence(n_rows, zero, one)
-
-        key_orders = [k.order for k in by_cols]
-        key_nulls = [k.null_order for k in by_cols]
-        grouped_order = plc.sorting.stable_sorted_order(
-            plc.Table([*(c.obj for c in by_cols), row_id]),
-            [*key_orders, plc.types.Order.ASCENDING],
-            [*key_nulls, plc.types.NullOrder.AFTER],
-        )
+        if order_index is None:
+            key_orders = [k.order for k in by_cols]
+            key_nulls = [k.null_order for k in by_cols]
+            order_index = plc.sorting.stable_sorted_order(
+                plc.Table([*(c.obj for c in by_cols), row_id]),
+                [*key_orders, plc.types.Order.ASCENDING],
+                [*key_nulls, plc.types.NullOrder.AFTER],
+            )
 
         return [
             Column(
                 plc.copying.scatter(
                     plc.Table([tbl.columns()[0]]),
-                    grouped_order,
+                    order_index,
                     plc.Table(
                         [
                             plc.Column.from_scalar(
@@ -311,6 +394,90 @@ def _reorder_grouped_to_input(
             )
         ]
 
+    def _split_named_expr(
+        self,
+    ) -> tuple[list[expr.NamedExpr], dict[str, list[expr.NamedExpr]]]:
+        """Split into reductions vs unary window operations."""
+        reductions: list[expr.NamedExpr] = []
+        unary_window_ops: dict[str, list[expr.NamedExpr]] = {
+            "rank": [],
+        }
+
+        for ne in self.named_aggs:
+            v = ne.value
+            if isinstance(v, expr.UnaryFunction) and v.name in unary_window_ops:
+                unary_window_ops[v.name].append(ne)
+            else:
+                reductions.append(ne)
+        return reductions, unary_window_ops
+
+    def _build_window_order_index(
+        self,
+        by_cols: list[Column],
+        *,
+        row_id: plc.Column,
+        order_by_col: Column | None,
+        ob_desc: bool,
+        ob_nulls_last: bool,
+        value_col: plc.Column | None = None,
+        value_desc: bool = False,
+    ) -> plc.Column:
+        """Compute a stable row ordering for unary operations in a grouped context."""
+        cols: list[plc.Column] = [c.obj for c in by_cols]
+        orders: list[plc.types.Order] = [k.order for k in by_cols]
+        nulls: list[plc.types.NullOrder] = [k.null_order for k in by_cols]
+
+        if value_col is not None:
+            # for rank(...).over(...) the ranked ("sorted") order takes precedence over order_by
+            cols.append(value_col)
+            orders.append(
+                plc.types.Order.DESCENDING if value_desc else plc.types.Order.ASCENDING
+            )
+            nulls.append(
+                plc.types.NullOrder.BEFORE if value_desc else plc.types.NullOrder.AFTER
+            )
+
+        if order_by_col is not None:
+            cols.append(order_by_col.obj)
+            orders.append(
+                plc.types.Order.DESCENDING if ob_desc else plc.types.Order.ASCENDING
+            )
+            nulls.append(
+                plc.types.NullOrder.AFTER
+                if ob_nulls_last
+                else plc.types.NullOrder.BEFORE
+            )
+
+        # Use the row id to break ties
+        cols.append(row_id)
+        orders.append(plc.types.Order.ASCENDING)
+        nulls.append(plc.types.NullOrder.AFTER)
+
+        return plc.sorting.stable_sorted_order(plc.Table(cols), orders, nulls)
+
+    def _gather_columns(
+        self,
+        cols: list[Column],
+        order_index: plc.Column,
+    ) -> list[plc.Column] | list[Column]:
+        gathered_tbl = plc.copying.gather(
+            plc.Table([c.obj for c in cols]),
+            order_index,
+            plc.copying.OutOfBoundsPolicy.NULLIFY,
+        )
+
+        return [
+            Column(
+                gathered_tbl.columns()[i],
+                name=c.name,
+                dtype=c.dtype,
+                order=c.order,
+                null_order=c.null_order,
+                is_sorted=True,
+            )
+            for i, c in enumerate(cols)
+        ]
+
     def do_evaluate(  # noqa: D102
         self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
@@ -320,10 +487,18 @@ def do_evaluate(  # noqa: D102
             )  # pragma: no cover; translation raises first
 
         by_exprs = self.children[: self.by_count]
+        order_by_expr = (
+            self.children[self.by_count] if self._order_by_expr is not None else None
+        )
         by_cols = broadcast(
             *(b.evaluate(df) for b in by_exprs),
             target_length=df.num_rows,
         )
+        order_by_col = (
+            broadcast(order_by_expr.evaluate(df), target_length=df.num_rows)[0]
+            if order_by_expr is not None
+            else None
+        )
 
         by_tbl = plc.Table([c.obj for c in by_cols])
 
@@ -340,15 +515,7 @@ def do_evaluate(  # noqa: D102
             null_precedence=[k.null_order for k in by_cols],
         )
 
-        # Split up expressions into scalar aggs (eg. Len) vs per-row (eg. rank)
-        scalar_named: list[expr.NamedExpr] = []
-        rank_named: list[expr.NamedExpr] = []
-        for ne in self.named_aggs:
-            v = ne.value
-            if isinstance(v, expr.UnaryFunction) and v.name == "rank":
-                rank_named.append(ne)
-            else:
-                scalar_named.append(ne)
+        scalar_named, unary_window_ops = self._split_named_expr()
 
         # Build GroupByRequests for scalar aggregations
         gb_requests: list[plc.groupby.GroupByRequest] = []
@@ -408,15 +575,67 @@ def do_evaluate(  # noqa: D102
             )
         ]
 
-        if rank_named:
-            rank_out_names, rank_out_dtypes, rank_tables = self._rank_group_by_scan(
-                df, grouper, rank_named
-            )
-            broadcasted_cols.extend(
-                self._reorder_grouped_to_input(
-                    by_cols, df.num_rows, rank_tables, rank_out_names, rank_out_dtypes
+        row_id = plc.filling.sequence(
+            df.num_rows,
+            plc.Scalar.from_py(0, plc.types.SIZE_TYPE),
+            plc.Scalar.from_py(1, plc.types.SIZE_TYPE),
+        )
+
+        if rank_named := unary_window_ops["rank"]:
+            if self._order_by_expr is not None:
+                _, _, ob_desc, ob_nulls_last = self.options
+                for ne in rank_named:
+                    rank_expr = ne.value
+                    assert isinstance(rank_expr, expr.UnaryFunction)
+                    (child,) = rank_expr.children
+                    val = child.evaluate(df, context=ExecutionContext.FRAME).obj
+                    desc = rank_expr.options[1]
+
+                    order_index = self._build_window_order_index(
+                        by_cols,
+                        row_id=row_id,
+                        order_by_col=order_by_col,
+                        ob_desc=ob_desc,
+                        ob_nulls_last=ob_nulls_last,
+                        value_col=val,
+                        value_desc=desc,
+                    )
+                    by_cols_for_scan = self._gather_columns(by_cols, order_index)
+                    local = GroupedRollingWindow._sorted_grouper(by_cols_for_scan)
+                    names, dtypes, tables = self._apply_unary_op(
+                        RankOp(
+                            named_exprs=[ne],
+                            order_index=order_index,
+                            by_cols_for_scan=by_cols_for_scan,
+                            local_grouper=local,
+                        ),
+                        df,
+                        grouper,
+                    )
+                    broadcasted_cols.extend(
+                        self._reorder_to_input(
+                            row_id,
+                            by_cols,
+                            df.num_rows,
+                            tables,
+                            names,
+                            dtypes,
+                            order_index=order_index,
+                        )
+                    )
+            else:
+                names, dtypes, tables = self._apply_unary_op(
+                    RankOp(
+                        named_exprs=rank_named, order_index=None, by_cols_for_scan=None
+                    ),
+                    df,
+                    grouper,
+                )
+                broadcasted_cols.extend(
+                    self._reorder_to_input(
+                        row_id, by_cols, df.num_rows, tables, names, dtypes
+                    )
                 )
-            )
 
         # Create a temporary DataFrame with the broadcasted columns named by their
         # placeholder names from agg decomposition, then evaluate the post-expression.
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index f3aee59bf90..842053045fe 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -768,24 +768,24 @@ def _(
         descending = bool(getattr(node, "order_by_descending", False))
         nulls_last = bool(getattr(node, "order_by_nulls_last", False))
 
-        if has_order_by or descending or nulls_last:
-            raise NotImplementedError(
-                f"over(order_by) not supported yet: "
-                f"{node.order_by=}, {descending=}, {nulls_last=}"
-            )
-
         if mapping != "groups_to_rows":
             raise NotImplementedError(
                 f"over(mapping_strategy) not supported yet: {mapping=}; "
                 f"expected 'groups_to_rows'"
             )
 
+        order_by_expr = (
+            translator.translate_expr(n=node.order_by, schema=schema)
+            if has_order_by
+            else None
+        )
         return expr.GroupedRollingWindow(
             dtype,
             (mapping, has_order_by, descending, nulls_last),
             [agg for agg, _ in aggs],
             post,
             *(translator.translate_expr(n=n, schema=schema) for n in node.partition_by),
+            _order_by_expr=order_by_expr,
         )
     assert_never(node.options)
 
diff --git a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
index 5b2a348ac9f..88760a86b37 100644
--- a/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
+++ b/python/cudf_polars/cudf_polars/dsl/utils/aggregations.py
@@ -89,17 +89,21 @@ def decompose_single_agg(
     """
     agg = named_expr.value
     name = named_expr.name
-    if isinstance(agg, expr.UnaryFunction) and agg.name == "rank":
+    if isinstance(agg, expr.UnaryFunction) and agg.name in {
+        "rank",
+    }:
         if context != ExecutionContext.WINDOW:
             raise NotImplementedError(
-                "rank is not supported in groupby or rolling context"
+                f"{agg.name} is not supported in groupby or rolling context"
             )
         # Ensure Polars semantics for dtype:
         # - average -> Float64
         # - min/max/dense/ordinal -> IDX_DTYPE (UInt32/UInt64)
-        return [(named_expr, True)], named_expr.reconstruct(
-            expr.Cast(agg.dtype, expr.Col(agg.dtype, name))
-        )
+        post_col: expr.Expr = expr.Col(agg.dtype, name)
+        if agg.name == "rank":
+            post_col = expr.Cast(agg.dtype, post_col)
+
+        return [(named_expr, True)], named_expr.reconstruct(post_col)
     if isinstance(agg, expr.UnaryFunction) and agg.name == "null_count":
         (child,) = agg.children
 
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index 82ae9ab544d..fcaed980dd7 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -244,7 +244,7 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig:
         except ValueError:
             scale_factor = float(scale_factor)
 
-        if args.scale is not None:
+        if "pdsh" in name and args.scale is not None:
             # Validate the user-supplied scale factor
             sf_inf = _infer_scale_factor(name, path, args.suffix)
             rel_error = abs((scale_factor - sf_inf) / sf_inf)
diff --git a/python/cudf_polars/tests/experimental/test_rolling.py b/python/cudf_polars/tests/experimental/test_rolling.py
index 0748a3ce52e..207cd566965 100644
--- a/python/cudf_polars/tests/experimental/test_rolling.py
+++ b/python/cudf_polars/tests/experimental/test_rolling.py
@@ -3,6 +3,8 @@
 
 from __future__ import annotations
 
+import pytest
+
 import polars as pl
 
 from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
@@ -29,11 +31,14 @@ def test_rolling_datetime():
         executor_options={
             "max_rows_per_partition": 3,
             "scheduler": DEFAULT_SCHEDULER,
-            "fallback_mode": StreamingFallbackMode.SILENT,
+            "fallback_mode": StreamingFallbackMode.WARN,
         },
     )
     q = df.with_columns(pl.sum("a").rolling(index_column="dt", period="2d"))
-    assert_gpu_result_equal(q, engine=engine)
+    with pytest.warns(
+        UserWarning, match="This HStack not supported for multiple partitions."
+    ):
+        assert_gpu_result_equal(q, engine=engine)
 
 
 def test_over_in_filter_unsupported() -> None:
@@ -50,8 +55,11 @@ def test_over_in_filter_unsupported() -> None:
         executor_options={
             "max_rows_per_partition": 1,
             "scheduler": DEFAULT_SCHEDULER,
-            "fallback_mode": StreamingFallbackMode.SILENT,
+            "fallback_mode": StreamingFallbackMode.WARN,
         },
     )
-
-    assert_gpu_result_equal(q, engine=engine)
+    with pytest.warns(
+        UserWarning,
+        match=r"over\(...\) inside filter is not supported for multiple partitions.*",
+    ):
+        assert_gpu_result_equal(q, engine=engine)
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
index 69d48affee0..9e37d904347 100644
--- a/python/cudf_polars/tests/expressions/test_rolling.py
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -203,9 +203,31 @@ def test_window_over_group_sum_all_null_group_is_zero(df):
     assert_gpu_result_equal(q)
 
 
-def test_over_with_order_by_unsupported(df):
-    q = df.select(pl.col("x").sum().over("g", order_by="x"))
-    assert_ir_translation_raises(q, NotImplementedError)
+@pytest.mark.parametrize(
+    "order_by",
+    [
+        "x",
+        pl.col("x") * 2,
+        pl.when((pl.col("x") % 2) == 0).then(pl.col("x")).otherwise(-pl.col("x")),
+        ["x", "x2"],
+        ["g_null", "g2", "x2"],
+        [pl.col("g") + 7, (pl.col("x") * 3) - 2],
+    ],
+)
+@pytest.mark.parametrize("order_by_descending", [False, True])
+@pytest.mark.parametrize("order_by_nulls_last", [False, True])
+def test_over_with_order_by(df, order_by, order_by_descending, order_by_nulls_last):
+    q = df.select(
+        pl.col("x")
+        .sum()
+        .over(
+            "g",
+            order_by=order_by,
+            descending=order_by_descending,
+            nulls_last=order_by_nulls_last,
+        )
+    )
+    assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("strategy", ["explode", "join"], ids=["explode", "join"])
@@ -247,51 +269,91 @@ def test_over_broadcast_input_row_group_indices_aligned():
 
 @pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
 @pytest.mark.parametrize("descending", [False, True])
-def test_rank_over(request, df: pl.LazyFrame, method: str, *, descending: bool) -> None:
+@pytest.mark.parametrize("order_by", [None, ["g2", pl.col("x2") * 2]])
+def test_rank_over(
+    request,
+    df: pl.LazyFrame,
+    method: str,
+    *,
+    descending: bool,
+    order_by: None | list[str | pl.Expr],
+) -> None:
     request.applymarker(
         pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
     )
-    expr = pl.col("x").rank(method=method, descending=descending).over("g")
-    q = df.select(expr)
+    q = df.select(
+        pl.col("x")
+        .rank(method=method, descending=descending)
+        .over("g", order_by=order_by)
+    )
     assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
 @pytest.mark.parametrize("descending", [False, True])
+@pytest.mark.parametrize("order_by", [None, ["g2", pl.col("x2") * 2]])
 def test_rank_over_with_ties(
-    request, df: pl.LazyFrame, method: str, *, descending: bool
+    request,
+    df: pl.LazyFrame,
+    method: str,
+    *,
+    descending: bool,
+    order_by: None | list[str | pl.Expr],
 ) -> None:
     request.applymarker(
         pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
     )
-    v = pl.when(pl.col("g") == 2).then(pl.lit(4)).otherwise(pl.col("x"))
-    expr = v.rank(method=method, descending=descending).over("g")
-    q = df.select(expr)
+    q = df.select(
+        pl.when(pl.col("g") == 2)
+        .then(pl.lit(4))
+        .otherwise(pl.col("x"))
+        .rank(method=method, descending=descending)
+        .over("g", order_by=order_by)
+    )
     assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
 @pytest.mark.parametrize("descending", [False, True])
+@pytest.mark.parametrize("order_by", [None, ["g2", pl.col("x2") * 2]])
 def test_rank_over_with_null_values(
-    request, df: pl.LazyFrame, method: str, *, descending: bool
+    request,
+    df: pl.LazyFrame,
+    method: str,
+    *,
+    descending: bool,
+    order_by: None | list[str | pl.Expr],
 ) -> None:
     request.applymarker(
         pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
     )
-    x_null = pl.when((pl.col("x") % 2) == 0).then(None).otherwise(pl.col("x"))
-    expr = x_null.rank(method=method, descending=descending).over("g")
-    q = df.select(expr)
+    q = df.select(
+        pl.when((pl.col("x") % 2) == 0)
+        .then(None)
+        .otherwise(pl.col("x"))
+        .rank(method=method, descending=descending)
+        .over("g", order_by=order_by)
+    )
     assert_gpu_result_equal(q)
 
 
 @pytest.mark.parametrize("method", ["ordinal", "dense", "min", "max", "average"])
 @pytest.mark.parametrize("descending", [False, True])
+@pytest.mark.parametrize("order_by", [None, ["g2", pl.col("x2") * 2]])
 def test_rank_over_with_null_group_keys(
-    request, df: pl.LazyFrame, method: str, *, descending: bool
+    request,
+    df: pl.LazyFrame,
+    method: str,
+    *,
+    descending: bool,
+    order_by: None | list[str | pl.Expr],
 ) -> None:
     request.applymarker(
         pytest.mark.xfail(condition=POLARS_VERSION_LT_132, reason="rank unsupported")
     )
-    expr = pl.col("x").rank(method=method, descending=descending).over("g_null")
-    q = df.select(expr)
+    q = df.select(
+        pl.col("x")
+        .rank(method=method, descending=descending)
+        .over("g_null", order_by=order_by)
+    )
     assert_gpu_result_equal(q)

From 80d8c2ca8cf7da5530af555f4d29b757b70fa4a9 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 17 Sep 2025 22:05:02 -0400
Subject: [PATCH 326/366] Fix OOB memory read in decode_page_data_generic
 kernel (#19995)

Fixes a out of bounds memory read on the `decode_page_data_generic` kernel.
Error shows up with `compute-sanitizer` with `HybridScanTest.PruneDataPagesOnlyAndScanAllColumns` using the following:
```
compute-sanitizer --tool memcheck gtests/HYBRID_SCAN --gtest_filter=HybridScanTest.PruneDataPagesOnlyAndScanAllColumns --rmm_mode=cuda
```
Partial output:
```
[ RUN      ] HybridScanTest.PruneDataPagesOnlyAndScanAllColumns
========= Invalid __global__ read of size 4 bytes
=========     at void cudf::io::parquet::detail::<unnamed>::decode_page_data_generic<unsigned char, (int)128, (cudf::io::parquet::detail::decode_kernel_mask)32>(cudf::io::parquet::detail::PageInfo *, cudf::device_span<const cudf::io::parquet::detail::ColumnChunkDesc, (unsigned long)18446744073709551615>, unsigned long, unsigned long, cudf::device_span<const bool, (unsigned long)18446744073709551615>, cudf::device_span<unsigned long, (unsigned long)18446744073709551615>, unsigned int *)+0x100d0 in decode_fixed.cu:1053
=========     by thread (0,0,0) in block (7,0,0)
=========     Access to 0x7065b66314ac is out of bounds
=========     and is 45 bytes after the nearest allocation at 0x7065b6631400 of size 128 bytes
=========     Saved host backtrace up to driver entry point at kernel launch time
=========         Host Frame: cudf::io::parquet::detail::decode_page_data(cudf::detail::hostdevice_span<cudf::io::parquet::detail::PageInfo>, cudf::detail::hostdevice_span<cudf::io::parquet::detail::ColumnChunkDesc const>, unsigned long, unsigned long, int, cudf::io::parquet::detail::decode_kernel_mask, cudf::device_span<bool const, 18446744073709551615ul>, cudf::device_span<unsigned long, 18446744073709551615ul>, unsigned int*, rmm::cuda_stream_view) [0xeb9dc4] in libcudf.so
=========         Host Frame: cudf::io::parquet::detail::reader_impl::decode_page_data(cudf::io::parquet::detail::reader_impl::read_mode, unsigned long, unsigned long)::{lambda(cudf::io::parquet::detail::decode_kernel_mask)#1}::operator()(cudf::io::parquet::detail::decode_kernel_mask) const [0xe22f79] in libcudf.so
=========         Host Frame: cudf::io::parquet::detail::reader_impl::decode_page_data(cudf::io::parquet::detail::reader_impl::read_mode, unsigned long, unsigned long) [0xe293f7] in libcudf.so
=========         Host Frame: cudf::io::table_with_metadata cudf::io::parquet::experimental::detail::hybrid_scan_reader_impl::read_chunk_internal<cudf::mutable_column_view>(cudf::io::parquet::detail::reader_impl::read_mode, cudf::io::parquet::experimental::detail::hybrid_scan_reader_impl::read_columns_mode, cudf::mutable_column_view) [0xdb6c8c] in libcudf.so
=========         Host Frame: cudf::io::parquet::experimental::detail::hybrid_scan_reader_impl::materialize_filter_columns(cudf::host_span<std::vector<int, std::allocator<int> > const, 18446744073709551615ul>, std::vector<rmm::device_buffer, std::allocator<rmm::device_buffer> >&&, cudf::mutable_column_view&, cudf::io::parquet::experimental::use_data_page_mask, cudf::io::parquet_reader_options const&, rmm::cuda_stream_view) [0xdb828f] in libcudf.so
=========         Host Frame: cudf::io::parquet::experimental::hybrid_scan_reader::materialize_filter_columns(cudf::host_span<int const, 18446744073709551615ul>, std::vector<rmm::device_buffer, std::allocator<rmm::device_buffer> >&&, cudf::mutable_column_view&, cudf::io::parquet::experimental::use_data_page_mask, cudf::io::parquet_reader_options const&, rmm::cuda_stream_view) const [0xd99139] in libcudf.so
 ...
```

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19995
---
 cpp/src/io/parquet/decode_fixed.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu
index 18d6e8eb60b..b22fc61fb9d 100644
--- a/cpp/src/io/parquet/decode_fixed.cu
+++ b/cpp/src/io/parquet/decode_fixed.cu
@@ -1050,7 +1050,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
   // Exit super early for simple types if the page does not need to be decoded
   if constexpr (not has_lists_t and not has_strings_t and not has_nesting_t) {
     if (not page_mask[page_idx]) {
-      pp->num_nulls  = pp->nesting[s->col.max_nesting_depth - 1].batch_size;
+      pp->num_nulls  = pp->nesting[0].batch_size;
       pp->num_valids = 0;
       // Set s->nesting info = nullptr to bypass `null_count_back_copier` at return
       s->nesting_info = nullptr;

From 54c9ce04b314f2aecc87db550b8759c185f08b7d Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 18 Sep 2025 08:23:47 -0400
Subject: [PATCH 327/366] Add to_arrow method to pylibcudf core types (#19787)

Adds`to_arrow` methods to pylibcudf core types. Not a priority but a quality of life improvement for pylibcudf developers. Eg. Call `.to_arrow` instead of `plc.interop.to_arrow`

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19787
---
 docs/cudf/source/conf.py                      |  1 +
 .../cudf_polars/containers/dataframe.py       |  2 +-
 .../pylibcudf/pylibcudf/_interop_helpers.pyx  |  9 +++
 python/pylibcudf/pylibcudf/column.pyi         |  1 +
 python/pylibcudf/pylibcudf/column.pyx         | 38 +++++++++-
 python/pylibcudf/pylibcudf/interop.pyx        | 72 ++++---------------
 python/pylibcudf/pylibcudf/scalar.pyi         |  5 ++
 python/pylibcudf/pylibcudf/scalar.pyx         | 24 +++++++
 python/pylibcudf/pylibcudf/table.pyi          |  1 +
 python/pylibcudf/pylibcudf/table.pyx          | 26 ++++++-
 python/pylibcudf/pylibcudf/types.pyi          |  1 +
 python/pylibcudf/pylibcudf/types.pyx          | 61 ++++++++++++++++
 .../pylibcudf/tests/test_column_to_arrow.py   |  9 +++
 python/pylibcudf/tests/test_table.py          | 12 ++++
 14 files changed, 200 insertions(+), 62 deletions(-)
 create mode 100644 python/pylibcudf/tests/test_column_to_arrow.py

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index b162ba6752c..d1f6bfbf025 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -620,6 +620,7 @@ def on_missing_reference(app, env, node, contnode):
     ("py:class", "typing_extensions.Self"),
     ("py:class", "np.uint32"),
     ("py:class", "np.uint64"),
+    ("py:class", "ArrowLike"),
 ]
 
 
diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 3a095be3cfe..6cb4012988a 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -55,7 +55,7 @@ def _create_polars_column_metadata(
 # This is also defined in pylibcudf.interop
 class _ObjectWithArrowMetadata:
     def __init__(
-        self, obj: plc.Table, metadata: list[plc.interop.ColumnMetadata]
+        self, obj: plc.Table | plc.Column, metadata: list[plc.interop.ColumnMetadata]
     ) -> None:
         self.obj = obj
         self.metadata = metadata
diff --git a/python/pylibcudf/pylibcudf/_interop_helpers.pyx b/python/pylibcudf/pylibcudf/_interop_helpers.pyx
index 3e474426898..d51d46830ac 100644
--- a/python/pylibcudf/pylibcudf/_interop_helpers.pyx
+++ b/python/pylibcudf/pylibcudf/_interop_helpers.pyx
@@ -31,6 +31,15 @@ class ArrowLike(metaclass=_ArrowLikeMeta):
     pass
 
 
+class _ObjectWithArrowMetadata:
+    def __init__(self, obj, metadata=None):
+        self.obj = obj
+        self.metadata = metadata
+
+    def __arrow_c_array__(self, requested_schema=None):
+        return self.obj._to_schema(self.metadata), self.obj._to_host_array()
+
+
 @dataclass
 class ColumnMetadata:
     """Metadata associated with a column.
diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi
index 42e2c7cccd1..ff4945c123e 100644
--- a/python/pylibcudf/pylibcudf/column.pyi
+++ b/python/pylibcudf/pylibcudf/column.pyi
@@ -70,6 +70,7 @@ class Column:
     def from_rmm_buffer(
         buff: DeviceBuffer, dtype: DataType, size: int, children: list[Column]
     ) -> Column: ...
+    def to_arrow(self, metadata: list | str | None = None) -> ArrowLike: ...
     @staticmethod
     def from_arrow(
         obj: ArrowLike, dtype: DataType | None = None
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index a788b42c394..06a488902b6 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -56,7 +56,7 @@ from ._interop_helpers cimport (
 from .utils cimport _get_stream, _get_memory_resource
 
 from .gpumemoryview import _datatype_from_dtype_desc
-from ._interop_helpers import ArrowLike, ColumnMetadata
+from ._interop_helpers import ArrowLike, ColumnMetadata, _ObjectWithArrowMetadata
 
 import array
 from itertools import accumulate
@@ -64,6 +64,14 @@ import functools
 import operator
 from typing import Iterable
 
+try:
+    import pyarrow as pa
+    pa_err = None
+except ImportError as e:
+    pa = None
+    pa_err = e
+
+
 __all__ = ["Column", "ListColumnView", "is_c_contiguous"]
 
 
@@ -337,12 +345,38 @@ cdef class Column:
         self._children = children
         self._num_children = len(children)
 
+    def to_arrow(
+        self,
+        metadata: ColumnMetadata | str | None = None
+    ) -> ArrowLike:
+        """Create a PyArrow array from a pylibcudf column.
+
+        Parameters
+        ----------
+        metadata : list
+            The metadata to attach to the columns of the table.
+
+        Returns
+        -------
+        pyarrow.Array
+        """
+        if pa_err is not None:
+            raise RuntimeError(
+                "pyarrow was not found on your system. Please "
+                "pip install pylibcudf with the [pyarrow] extra for a "
+                "compatible pyarrow version."
+            ) from pa_err
+        # TODO: Once the arrow C device interface registers more
+        # types that it supports, we can call pa.array(self) if
+        # no metadata is passed.
+        return pa.array(_ObjectWithArrowMetadata(self, metadata))
+
     @staticmethod
     def from_arrow(
         obj: ArrowLike,
         dtype: DataType | None = None,
         Stream stream=None
-    ) -> Column:
+    ) -> ArrowLike:
         """
         Create a Column from an Arrow-like object using the Arrow C Data Interface.
 
diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx
index 0595bb4a777..75edb59b093 100644
--- a/python/pylibcudf/pylibcudf/interop.pyx
+++ b/python/pylibcudf/pylibcudf/interop.pyx
@@ -23,8 +23,7 @@ from rmm.pylibrmm.stream cimport Stream
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
-from .types cimport DataType, type_id
-from .types import LIBCUDF_TO_ARROW_TYPES
+from .types cimport DataType
 from .utils cimport _get_stream
 from ._interop_helpers import ColumnMetadata
 
@@ -71,7 +70,7 @@ def from_arrow(pyarrow_object, *, DataType data_type=None):
 
 
 @singledispatch
-def to_arrow(plc_object, metadata=None):
+def to_arrow(plc_object, **kwargs):
     """Convert to a PyArrow object.
 
     Parameters
@@ -105,8 +104,13 @@ if pa is not None:
         return Table.from_arrow(pyarrow_object, dtype=data_type)
 
     @from_arrow.register(pa.Scalar)
-    def _from_arrow_scalar(pyarrow_object, *, DataType data_type=None):
-        return Scalar.from_arrow(pyarrow_object, dtype=data_type)
+    def _from_arrow_scalar(
+        pyarrow_object,
+        *,
+        DataType data_type=None,
+        Stream stream = None
+    ):
+        return Scalar.from_arrow(pyarrow_object, dtype=data_type, stream=stream)
 
     @from_arrow.register(pa.Array)
     def _from_arrow_column(pyarrow_object, *, DataType data_type=None):
@@ -114,70 +118,22 @@ if pa is not None:
 
     @to_arrow.register(DataType)
     def _to_arrow_datatype(plc_object, **kwargs):
-        """
-        Convert a datatype to arrow.
-
-        Translation of some types requires extra information as a keyword
-        argument. Specifically:
-
-        - When translating a decimal type, provide ``precision``
-        - When translating a struct type, provide ``fields``
-        - When translating a list type, provide the wrapped ``value_type``
-        """
-        if plc_object.id() in {
-            type_id.DECIMAL32,
-            type_id.DECIMAL64,
-            type_id.DECIMAL128
-        }:
-            if not (precision := kwargs.get("precision")):
-                raise ValueError(
-                    "Precision must be provided for decimal types"
-                )
-                # no pa.decimal32 or pa.decimal64
-            return pa.decimal128(precision, -plc_object.scale())
-        elif plc_object.id() == type_id.STRUCT:
-            if not (fields := kwargs.get("fields")):
-                raise ValueError(
-                    "Fields must be provided for struct types"
-                )
-            return pa.struct(fields)
-        elif plc_object.id() == type_id.LIST:
-            if not (value_type := kwargs.get("value_type")):
-                raise ValueError(
-                    "Value type must be provided for list types"
-                )
-            return pa.list_(value_type)
-        else:
-            try:
-                return LIBCUDF_TO_ARROW_TYPES[plc_object.id()]
-            except KeyError:
-                raise TypeError(
-                    f"Unable to convert {plc_object.id()} to arrow datatype"
-                )
-
-    class _ObjectWithArrowMetadata:
-        def __init__(self, obj, metadata=None):
-            self.obj = obj
-            self.metadata = metadata
-
-        def __arrow_c_array__(self, requested_schema=None):
-            return self.obj._to_schema(self.metadata), self.obj._to_host_array()
+        """Convert a datatype to arrow."""
+        return plc_object.to_arrow(**kwargs)
 
     @to_arrow.register(Table)
     def _to_arrow_table(plc_object, metadata=None):
         """Create a PyArrow table from a pylibcudf table."""
-        return pa.table(_ObjectWithArrowMetadata(plc_object, metadata))
+        return plc_object.to_arrow(metadata=metadata)
 
     @to_arrow.register(Column)
     def _to_arrow_array(plc_object, metadata=None):
         """Create a PyArrow array from a pylibcudf column."""
-        return pa.array(_ObjectWithArrowMetadata(plc_object, metadata))
+        return plc_object.to_arrow(metadata=metadata)
 
     @to_arrow.register(Scalar)
     def _to_arrow_scalar(plc_object, metadata=None):
-        # Note that metadata for scalars is primarily important for preserving
-        # information on nested types since names are otherwise irrelevant.
-        return to_arrow(Column.from_scalar(plc_object, 1), metadata=metadata)[0]
+        return plc_object.to_arrow(metadata=metadata)
 
 
 cpdef Table from_dlpack(object managed_tensor, Stream stream=None):
diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi
index be84726ef18..be38a979813 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyi
+++ b/python/pylibcudf/pylibcudf/scalar.pyi
@@ -9,12 +9,17 @@ from pylibcudf.types import DataType
 
 NpGeneric = type[Any]
 
+PaScalar = type[Any]
+
 class Scalar:
     def __init__(self): ...
     def type(self) -> DataType: ...
     def is_valid(self) -> bool: ...
     @staticmethod
     def empty_like(column: Column, stream: Stream | None = None) -> Scalar: ...
+    def to_arrow(
+        self, metadata: list | str | None = None, stream: Stream | None = None
+    ) -> PaScalar: ...
     @staticmethod
     def from_arrow(
         pa_val: Any,
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 7c183b96bbb..46fb1faa9b0 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -59,6 +59,7 @@ from .traits cimport is_floating_point
 from .types cimport DataType
 from .utils cimport _get_stream
 from functools import singledispatch
+from ._interop_helpers import ArrowLike, ColumnMetadata
 
 try:
     import pyarrow as pa
@@ -150,6 +151,29 @@ cdef class Scalar:
         """True if the scalar is valid, false if not"""
         return self.get().is_valid()
 
+    def to_arrow(
+        self,
+        metadata: list[ColumnMetadata] | str | None = None,
+        stream: Stream = None,
+    ) -> ArrowLike:
+        """Create a PyArrow array from a pylibcudf scalar.
+
+        Parameters
+        ----------
+        metadata : list
+            The metadata to attach to the columns of the table.
+        stream : Stream | None
+            CUDA stream on which to perform the operation.
+
+        Returns
+        -------
+        pyarrow.Scalar
+        """
+        stream = _get_stream(stream)
+        # Note that metadata for scalars is primarily important for preserving
+        # information on nested types since names are otherwise irrelevant.
+        return Column.from_scalar(self, 1, stream).to_arrow(metadata=metadata)[0]
+
     @staticmethod
     def from_arrow(
         pa_val,
diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi
index a505d87d85e..c8d11b99e95 100644
--- a/python/pylibcudf/pylibcudf/table.pyi
+++ b/python/pylibcudf/pylibcudf/table.pyi
@@ -10,6 +10,7 @@ class Table:
     def num_rows(self) -> int: ...
     def shape(self) -> tuple[int, int]: ...
     def columns(self) -> list[Column]: ...
+    def to_arrow(self, metadata: list) -> ArrowLike: ...
     @staticmethod
     def from_arrow(
         arrow_like: ArrowLike, dtype: DataType | None = None
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index afe8bc6ab4d..38dcb0f0291 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -37,7 +37,15 @@ from pylibcudf._interop_helpers cimport (
     _release_device_array,
     _metadata_to_libcudf,
 )
-from ._interop_helpers import ArrowLike, ColumnMetadata
+from ._interop_helpers import ArrowLike, ColumnMetadata, _ObjectWithArrowMetadata
+
+try:
+    import pyarrow as pa
+    pa_err = None
+except ImportError as e:
+    pa = None
+    pa_err = e
+
 
 __all__ = ["Table"]
 
@@ -62,6 +70,22 @@ cdef class Table:
             raise ValueError("All columns must be pylibcudf Column objects")
         self._columns = columns
 
+    def to_arrow(
+        self,
+        metadata: list[ColumnMetadata | str] | None = None
+    ) -> ArrowLike:
+        """Create a PyArrow table from a pylibcudf table."""
+        if pa_err is not None:
+            raise RuntimeError(
+                "pyarrow was not found on your system. Please "
+                "pip install pylibcudf with the [pyarrow] extra for a "
+                "compatible pyarrow version."
+            ) from pa_err
+        # TODO: Once the arrow C device interface registers more
+        # types that it supports, we can call pa.table(self) if
+        # no metadata is passed.
+        return pa.table(_ObjectWithArrowMetadata(self, metadata))
+
     @staticmethod
     def from_arrow(obj: ArrowLike, dtype: DataType | None = None) -> Table:
         """
diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi
index e3c0951da52..b56dc54dbcc 100644
--- a/python/pylibcudf/pylibcudf/types.pyi
+++ b/python/pylibcudf/pylibcudf/types.pyi
@@ -85,6 +85,7 @@ class DataType:
     def __init__(self, type_id: TypeId, scale: int = 0): ...
     def id(self) -> TypeId: ...
     def scale(self) -> int: ...
+    def to_arrow(self, **kwargs) -> PyarrowDataType: ...
     @staticmethod
     def from_arrow(pa_typ: PyarrowDataType) -> DataType: ...
     def from_py(self, type: type) -> DataType: ...
diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx
index a84171f991c..2b4927f703b 100644
--- a/python/pylibcudf/pylibcudf/types.pyx
+++ b/python/pylibcudf/pylibcudf/types.pyx
@@ -24,6 +24,13 @@ from pylibcudf.libcudf.types import sorted as Sorted  # no-cython-lint, isort:sk
 
 from functools import cache
 
+try:
+    import pyarrow as pa
+    pa_err = None
+except ImportError as e:
+    pa = None
+    pa_err = e
+
 try:
     import pyarrow as pa
 
@@ -189,6 +196,60 @@ cdef class DataType:
         ret.c_obj = dt
         return ret
 
+    def to_arrow(self, **kwargs):
+        """
+        Convert a datatype to arrow.
+
+        Returns
+        -------
+        pyarrow.DataType
+
+        Notes
+        -----
+        Translation of some types requires extra information as a keyword
+        argument. Specifically:
+
+        - When translating a decimal type, provide ``precision``
+        - When translating a struct type, provide ``fields``
+        - When translating a list type, provide the wrapped ``value_type``
+        """
+        if pa_err is not None:
+            raise RuntimeError(
+                "pyarrow was not found on your system. Please "
+                "pip install pylibcudf with the [pyarrow] extra for a "
+                "compatible pyarrow version."
+            ) from pa_err
+        if self.id() in {
+            type_id.DECIMAL32,
+            type_id.DECIMAL64,
+            type_id.DECIMAL128
+        }:
+            if not (precision := kwargs.get("precision")):
+                raise ValueError(
+                    "Precision must be provided for decimal types"
+                )
+                # no pa.decimal32 or pa.decimal64
+            return pa.decimal128(precision, -self.scale())
+        elif self.id() == type_id.STRUCT:
+            if not (fields := kwargs.get("fields")):
+                raise ValueError(
+                    "Fields must be provided for struct types"
+                )
+            return pa.struct(fields)
+        elif self.id() == type_id.LIST:
+            if not (value_type := kwargs.get("value_type")):
+                raise ValueError(
+                    "Value type must be provided for list types"
+                )
+            return pa.list_(value_type)
+        else:
+            try:
+                return LIBCUDF_TO_ARROW_TYPES[self.id()]
+            except KeyError:
+                raise TypeError(
+                    f"Unable to convert {self.id()} to arrow datatype"
+                )
+
     @staticmethod
     def from_arrow(pa_typ) -> DataType:
         """
diff --git a/python/pylibcudf/tests/test_column_to_arrow.py b/python/pylibcudf/tests/test_column_to_arrow.py
new file mode 100644
index 00000000000..661210c7574
--- /dev/null
+++ b/python/pylibcudf/tests/test_column_to_arrow.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+from utils import assert_column_eq
+
+
+def test_column_to_arrow(table_data):
+    plc_tbl, _ = table_data
+    for col in plc_tbl.tbl.columns():
+        assert_column_eq(col, col.to_arrow())
diff --git a/python/pylibcudf/tests/test_table.py b/python/pylibcudf/tests/test_table.py
index ae39f80efef..7c81f56b653 100644
--- a/python/pylibcudf/tests/test_table.py
+++ b/python/pylibcudf/tests/test_table.py
@@ -2,6 +2,7 @@
 
 import pyarrow as pa
 import pytest
+from utils import assert_table_eq
 
 import pylibcudf as plc
 
@@ -19,3 +20,14 @@ def test_table_shape(arrow_tbl):
     plc_tbl = plc.Table.from_arrow(arrow_tbl)
 
     assert plc_tbl.shape() == arrow_tbl.shape
+
+
+def test_table_to_arrow(table_data):
+    plc_tbl, _ = table_data
+    expect = plc_tbl.tbl
+    got = expect.to_arrow()
+    # The order of `got` and `expect` is reversed here
+    # because in almost all pylibcudf tests the `expect`
+    # is a pyarrow object while `got` is a pylibcudf object,
+    # whereas in this case those types are reversed.
+    assert_table_eq(got, expect)

From 8921686cef85b2e5d9040937c661a369405d1f63 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 18 Sep 2025 09:14:53 -0400
Subject: [PATCH 328/366] Fix is_valid_rolling_aggregation for STD aggregation
 (#19888)

Fixes the `cudf::is_valid_rolling_aggregation` to properly indicate STD aggregation is supported.

Closes https://github.com/rapidsai/cudf/issues/19718

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/19888
---
 cpp/src/rolling/detail/rolling_operators.cuh      | 15 +++++++++++++++
 python/cudf_polars/tests/test_window_functions.py |  1 -
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/cpp/src/rolling/detail/rolling_operators.cuh b/cpp/src/rolling/detail/rolling_operators.cuh
index dea196ab676..378974a1643 100644
--- a/cpp/src/rolling/detail/rolling_operators.cuh
+++ b/cpp/src/rolling/detail/rolling_operators.cuh
@@ -604,6 +604,11 @@ struct corresponding_rolling_operator<InputType, aggregation::Kind::VARIANCE> {
   using type = DeviceRollingVariance<InputType>;
 };
 
+template <typename InputType>
+struct corresponding_rolling_operator<InputType, aggregation::Kind::STD> {
+  using type = DeviceRollingVariance<InputType>;  // uses the variance agg
+};
+
 template <typename InputType>
 struct corresponding_rolling_operator<InputType, aggregation::Kind::LEAD> {
   using type = DeviceRollingLead<InputType>;
@@ -634,6 +639,16 @@ struct create_rolling_operator<InputType, aggregation::Kind::VARIANCE> {
   }
 };
 
+template <typename InputType>
+struct create_rolling_operator<InputType, aggregation::Kind::STD> {
+  auto operator()(size_type min_periods, rolling_aggregation const& agg)
+  {
+    // uses the variance agg
+    return DeviceRollingVariance<InputType>{
+      min_periods, dynamic_cast<cudf::detail::var_aggregation const&>(agg)._ddof};
+  }
+};
+
 template <typename InputType>
 struct create_rolling_operator<InputType, aggregation::Kind::LEAD> {
   auto operator()(size_type, rolling_aggregation const& agg)
diff --git a/python/cudf_polars/tests/test_window_functions.py b/python/cudf_polars/tests/test_window_functions.py
index 9b77e7cb153..284dec13ffb 100644
--- a/python/cudf_polars/tests/test_window_functions.py
+++ b/python/cudf_polars/tests/test_window_functions.py
@@ -69,7 +69,6 @@ def agg_expr(request):
         pl.col("b").first(),  # libcudf doesn't support rolling first
         pl.col("b").last(),  # libcudf doesn't support rolling last
         pl.col("b").median(),  # libcudf doesn't support rolling median
-        pl.col("b").std(),  # libcudf doesn't support rolling std
         pl.col("b").quantile(0.5),  # libcudf doesn't support rolling quantile
     ],
 )

From f2d36d34dcad5d9bae22dd9346cd7eebda34d054 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Thu, 18 Sep 2025 08:52:11 -0700
Subject: [PATCH 329/366] Add mixed join benchmark with complex AST operators
 (#20004)

This PR introduces a benchmark for mixed joins that involve complex AST operators.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/20004
---
 cpp/benchmarks/join/mixed_join.cu | 64 +++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu
index c68a35155e4..6399e84fc9a 100644
--- a/cpp/benchmarks/join/mixed_join.cu
+++ b/cpp/benchmarks/join/mixed_join.cu
@@ -20,6 +20,30 @@
 
 auto const num_keys = 2;
 
+void create_complex_ast_expression(cudf::ast::tree& tree, cudf::size_type ast_levels)
+{
+  CUDF_EXPECTS(ast_levels > 0, "Number of AST levels must be greater than 0");
+
+  // For mixed joins, the conditional tables only have 1 column each (column 0)
+  // So we'll create multiple comparisons of the same column to stress the AST evaluation
+  tree.push(cudf::ast::column_reference(0));
+  tree.push(cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT));
+
+  tree.push(cudf::ast::operation(cudf::ast::ast_operator::EQUAL, tree.at(0), tree.at(1)));
+
+  if (ast_levels == 1) { return; }
+
+  // For multiple levels, create additional comparisons of the same columns
+  // This will create expressions like: (col0_L == col0_R) && (col0_L == col0_R) && ...
+  // Total operators created: (2 * ast_levels - 1) = ast_levels EQUAL + (ast_levels-1) LOGICAL_AND
+  for (cudf::size_type i = 1; i < ast_levels; i++) {
+    tree.push(cudf::ast::operation(cudf::ast::ast_operator::EQUAL, tree.at(0), tree.at(1)));
+
+    tree.push(cudf::ast::operation(
+      cudf::ast::ast_operator::LOGICAL_AND, tree.at(tree.size() - 2), tree.back()));
+  }
+}
+
 template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
 void nvbench_mixed_inner_join(nvbench::state& state,
                               nvbench::type_list<nvbench::enum_type<Nullable>,
@@ -140,6 +164,36 @@ void nvbench_mixed_left_anti_join(nvbench::state& state,
   BM_join<Nullable, join_t::MIXED, NullEquality>(state, dtypes, join);
 }
 
+template <bool Nullable, cudf::null_equality NullEquality, data_type DataType>
+void nvbench_mixed_inner_join_complex_ast(nvbench::state& state,
+                                          nvbench::type_list<nvbench::enum_type<Nullable>,
+                                                             nvbench::enum_type<NullEquality>,
+                                                             nvbench::enum_type<DataType>>)
+{
+  auto const ast_levels = static_cast<cudf::size_type>(state.get_int64("ast_levels"));
+
+  auto join = [ast_levels](cudf::table_view const& left_equality_input,
+                           cudf::table_view const& right_equality_input,
+                           cudf::table_view const& left_conditional_input,
+                           cudf::table_view const& right_conditional_input,
+                           cudf::ast::operation binary_pred,
+                           cudf::null_equality compare_nulls) {
+    // Create complex AST expression with multiple levels
+    cudf::ast::tree tree;
+    create_complex_ast_expression(tree, ast_levels);
+
+    return cudf::mixed_inner_join(left_equality_input,
+                                  right_equality_input,
+                                  left_conditional_input,
+                                  right_conditional_input,
+                                  tree.back(),
+                                  compare_nulls);
+  };
+
+  auto dtypes = cycle_dtypes(get_type_or_group(static_cast<int32_t>(DataType)), num_keys);
+  BM_join<Nullable, join_t::MIXED, NullEquality>(state, dtypes, join);
+}
+
 NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join,
                     NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
                                       DEFAULT_JOIN_NULL_EQUALITY,
@@ -149,6 +203,16 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join,
   .add_int64_axis("left_size", JOIN_SIZE_RANGE)
   .add_int64_axis("right_size", JOIN_SIZE_RANGE);
 
+NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join_complex_ast,
+                    NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
+                                      DEFAULT_JOIN_NULL_EQUALITY,
+                                      DEFAULT_JOIN_DATATYPES))
+  .set_name("mixed_inner_join_complex_ast")
+  .set_type_axes_names({"Nullable", "NullEquality", "DataType"})
+  .add_int64_axis("left_size", JOIN_SIZE_RANGE)
+  .add_int64_axis("right_size", JOIN_SIZE_RANGE)
+  .add_int64_axis("ast_levels", {1, 5, 10});
+
 NVBENCH_BENCH_TYPES(nvbench_mixed_left_join,
                     NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE,
                                       DEFAULT_JOIN_NULL_EQUALITY,

From 1e9b597e1f29c2ace74fc8d5cbc43ffa7c3a6725 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 18 Sep 2025 12:09:25 -0500
Subject: [PATCH 330/366] Fix incorrect type propagation in dataframe
 assignment (#20010)

Fixes: #20009
This PR fixes the incorrect type assignment in dataframe assignment.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20010
---
 python/cudf/cudf/core/dataframe.py            |   6 -
 python/cudf/cudf/core/frame.py                |   2 +-
 python/cudf/cudf/core/indexed_frame.py        |  19 ++
 .../cudf/pandas/scripts/conftest-patch.py     | 176 ------------------
 4 files changed, 20 insertions(+), 183 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 00a996af796..8167f6c26f9 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3539,12 +3539,6 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
                 )
 
         value = as_column(value, nan_as_null=nan_as_null)
-        if cudf.get_option("mode.pandas_compatible"):
-            dtype = value.dtype
-            if self._num_columns > 0:
-                _, first_dtype = next(self._dtypes)
-                dtype = get_dtype_of_same_kind(first_dtype, dtype)
-                value = value.astype(dtype)
         self._data.insert(name, value, loc=loc)
 
     @property  # type:ignore
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 0e45c41d137..088e9fef546 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -494,7 +494,7 @@ def _get_columns_by_label(self, labels) -> Self:
 
         Akin to cudf.DataFrame(...).loc[:, labels]
         """
-        return self._from_data_like_self(self._data.select_by_label(labels))
+        return self._from_data(self._data.select_by_label(labels))
 
     @property
     @_performance_tracking
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 14476b0c3f9..e81f849bade 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -327,6 +327,17 @@ def _from_data(
         out._index = index
         return out
 
+    @_performance_tracking
+    def _get_columns_by_label(self, labels) -> Self:
+        """
+        Returns columns of the Frame specified by `labels`.
+
+        Akin to cudf.DataFrame(...).loc[:, labels]
+        """
+        return self._from_data(
+            self._data.select_by_label(labels), index=self.index
+        )
+
     @_performance_tracking
     def _from_data_like_self(self, data: MutableMapping):
         out = super()._from_data_like_self(data)
@@ -6480,6 +6491,14 @@ def convert_dtypes(
         if not (convert_floating and convert_integer):
             return self.copy()
         else:
+            if (
+                cudf.get_option("mode.pandas_compatible")
+                and dtype_backend is None
+            ):
+                raise NotImplementedError(
+                    "The `dtype_backend` argument is not supported in "
+                    "pandas_compatible mode."
+                )
             cols = []
             for col in self._columns:
                 if col.dtype.kind == "f":
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 77c8da1bc9e..d2e9784d318 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -1790,7 +1790,6 @@ def pytest_unconfigure(config):
     "tests/copy_view/test_indexing.py::test_column_as_series_no_item_cache[numpy-getitem]",
     "tests/copy_view/test_indexing.py::test_column_as_series_no_item_cache[numpy-iloc]",
     "tests/copy_view/test_indexing.py::test_column_as_series_no_item_cache[numpy-loc]",
-    "tests/copy_view/test_indexing.py::test_column_as_series_set_with_upcast[nullable]",
     "tests/copy_view/test_indexing.py::test_column_as_series_set_with_upcast[numpy]",
     "tests/copy_view/test_indexing.py::test_dataframe_add_column_from_series[nullable]",
     "tests/copy_view/test_indexing.py::test_dataframe_add_column_from_series[numpy]",
@@ -1881,14 +1880,10 @@ def pytest_unconfigure(config):
     "tests/copy_view/test_indexing.py::test_subset_loc_rows_columns[numpy-slice-slice-single-block]",
     "tests/copy_view/test_indexing.py::test_subset_row_slice[nullable]",
     "tests/copy_view/test_indexing.py::test_subset_row_slice[numpy]",
-    "tests/copy_view/test_indexing.py::test_subset_set_column[nullable]",
     "tests/copy_view/test_indexing.py::test_subset_set_column_with_loc[nullable-mixed-block]",
     "tests/copy_view/test_indexing.py::test_subset_set_column_with_loc[nullable-single-block]",
     "tests/copy_view/test_indexing.py::test_subset_set_column_with_loc[numpy-mixed-block]",
     "tests/copy_view/test_indexing.py::test_subset_set_column_with_loc[numpy-single-block]",
-    "tests/copy_view/test_indexing.py::test_subset_set_with_column_indexer[nullable-array]",
-    "tests/copy_view/test_indexing.py::test_subset_set_with_column_indexer[nullable-mask]",
-    "tests/copy_view/test_indexing.py::test_subset_set_with_column_indexer[nullable-slice]",
     "tests/copy_view/test_indexing.py::test_subset_set_with_mask[nullable]",
     "tests/copy_view/test_indexing.py::test_subset_set_with_mask[numpy]",
     "tests/copy_view/test_indexing.py::test_subset_set_with_row_indexer[nullable-iloc-array]",
@@ -2874,9 +2869,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-0-indices1-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-2-indices2-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[decimal128(7, 3)--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[decimal128(7, 3)-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[decimal128(7, 3)-2-indices2-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double--2-indices0-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double--2-indices0-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double-0-indices1-False]",
@@ -3201,49 +3193,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_na_with_index[timestamp[us, tz=US/Pacific]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_na_with_index[timestamp[us, tz=UTC]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_with_index[date64[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[bool]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[decimal128(7, 3)]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[double]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[duration[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[duration[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[float]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[int16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[int32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[int64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[int8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[string]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[timestamp[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[timestamp[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_set_frame_expand_extension_with_regular[uint8]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[decimal128(7, 3)]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[bool]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[decimal128(7, 3)]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[double]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[duration[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[duration[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[float]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[int16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[int32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[int64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[int8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[string]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[timestamp[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[timestamp[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[uint16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[uint32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[uint64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_expand_columns[uint8]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[decimal128(7, 3)]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[double]",
@@ -3847,28 +3797,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_series_constructor[UInt32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_series_constructor[UInt64Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_series_constructor[UInt8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[BooleanDtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[Float32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[Float64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[Int16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[Int32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[Int64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[Int8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[UInt16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[UInt32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[UInt64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_set_frame_expand_extension_with_regular[UInt8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[BooleanDtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[Float32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[Float64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[Int16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[Int32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[Int64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[Int8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[UInt16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[UInt32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[UInt64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_expand_columns[UInt8Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_frame_2d_values[BooleanDtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_frame_2d_values[Float32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_setitem_frame_2d_values[Float64Dtype]",
@@ -4510,28 +4438,12 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[pyarrow_numpy-True]",
     "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[python-False]",
     "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[python-True]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=str[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=str[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=str[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=string[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=string[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=string[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_set_frame_expand_extension_with_regular[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[pyarrow-False]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[pyarrow-True]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[pyarrow_numpy-False]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[pyarrow_numpy-True]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[python-False]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[python-True]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=str[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=str[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=str[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=string[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=string[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=string[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_setitem_expand_columns[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_frame_2d_values[pyarrow-False]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_frame_2d_values[pyarrow-True]",
     "tests/extension/test_string.py::TestStringArray::test_setitem_frame_2d_values[pyarrow_numpy-False]",
@@ -4844,11 +4756,9 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug[NaTType-scalar12-scalar22]",
     "tests/frame/methods/test_combine_first.py::test_combine_first_timestamp_bug_NaT",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[pyarrow-False-expected0]",
-    "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[pyarrow-True-Int32]",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[pyarrow_numpy-False-expected0]",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[pyarrow_numpy-True-Int32]",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[python-False-expected0]",
-    "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes[python-True-Int32]",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes_avoid_block_splitting",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_convert_dtypes_pyarrow_to_np_nullable",
     "tests/frame/methods/test_convert_dtypes.py::TestConvertDtypes::test_pyarrow_engine_lines_false",
@@ -6506,16 +6416,8 @@ def pytest_unconfigure(config):
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_EA_known_dtypes[data2-median-always_float-True]",
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_EA_known_dtypes[data2-var-always_float-False]",
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_EA_known_dtypes[data2-var-always_float-True]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[count]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[max]",
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[mean]",
     "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[median]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[min]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[prod]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[sem]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[std]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[sum]",
-    "tests/groupby/aggregate/test_cython.py::test_cython_agg_nullable_int[var]",
     "tests/groupby/aggregate/test_cython.py::test_cythonized_aggers[prod]",
     "tests/groupby/aggregate/test_cython.py::test_cythonized_aggers[sum]",
     "tests/groupby/aggregate/test_other.py::test_agg_over_numpy_arrays",
@@ -6698,24 +6600,6 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_value_counts.py::test_compound[True-False-expected_rows1-expected_count1-expected_group_size1-True-string[pyarrow_numpy]]",
     "tests/groupby/methods/test_value_counts.py::test_compound[True-True-expected_rows2-expected_count2-expected_group_size2-False-string[pyarrow_numpy]]",
     "tests/groupby/methods/test_value_counts.py::test_compound[True-True-expected_rows2-expected_count2-expected_group_size2-True-string[pyarrow_numpy]]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[pyarrow]-False-None-expected_rows0-expected_count0-expected_group_size0-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[pyarrow]-False-None-expected_rows0-expected_count0-expected_group_size0-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[pyarrow]-True-False-expected_rows1-expected_count1-expected_group_size1-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[pyarrow]-True-False-expected_rows1-expected_count1-expected_group_size1-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[pyarrow]-True-True-expected_rows2-expected_count2-expected_group_size2-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[pyarrow]-True-True-expected_rows2-expected_count2-expected_group_size2-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[python]-False-None-expected_rows0-expected_count0-expected_group_size0-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[python]-False-None-expected_rows0-expected_count0-expected_group_size0-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[python]-True-False-expected_rows1-expected_count1-expected_group_size1-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[python]-True-False-expected_rows1-expected_count1-expected_group_size1-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[python]-True-True-expected_rows2-expected_count2-expected_group_size2-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=str[python]-True-True-expected_rows2-expected_count2-expected_group_size2-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-False-None-expected_rows0-expected_count0-expected_group_size0-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-False-None-expected_rows0-expected_count0-expected_group_size0-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-True-False-expected_rows1-expected_count1-expected_group_size1-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-True-False-expected_rows1-expected_count1-expected_group_size1-True]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-True-True-expected_rows2-expected_count2-expected_group_size2-False]",
-    "tests/groupby/methods/test_value_counts.py::test_compound[string=string[python]-True-True-expected_rows2-expected_count2-expected_group_size2-True]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-False-count-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-False-count-False-expected_data1-expected_index1]",
@@ -12074,142 +11958,82 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_clip.py::TestSeriesClip::test_series_clipping_with_na_values[UInt8-NoneType]",
     "tests/series/methods/test_clip.py::TestSeriesClip::test_series_clipping_with_na_values[UInt8-float0]",
     "tests/series/methods/test_clip.py::TestSeriesClip::test_series_clipping_with_na_values[UInt8-float1]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params11]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params16]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params17]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params19]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params1]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params24]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params25]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params27]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params3]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params8]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases0-params9]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params11]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params16]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params17]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params19]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params1]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params24]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params25]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params27]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params3]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params8]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases1-params9]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params11]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params16]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params17]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params19]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params1]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params24]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params25]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params27]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params3]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params8]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases10-params9]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params11]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params16]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params17]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params19]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params1]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params24]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params25]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params27]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params3]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params8]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases11-params9]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params12]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params14]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params16]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params20]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params22]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params24]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params28]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params30]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params4]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params6]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases12-params8]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params16]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params17]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params19]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params1]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params20]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params21]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params22]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params23]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params3]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params4]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params5]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params6]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases2-params7]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params11]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params12]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params14]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params16]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params17]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params19]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params1]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params20]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params22]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params24]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params25]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params27]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params28]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params30]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params3]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params4]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params6]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params8]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases6-params9]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params0]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params10]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params12]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params14]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params16]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params18]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params20]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params22]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params24]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params26]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params28]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params2]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params30]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params4]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params6]",
-    "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes[test_cases7-params8]",
     "tests/series/methods/test_convert_dtypes.py::TestSeriesConvertDtypes::test_convert_dtypes_pyarrow_to_np_nullable",
     "tests/series/methods/test_copy.py::TestCopy::test_copy[None]",
     "tests/series/methods/test_diff.py::TestSeriesDiff::test_diff_bool[input0-output0-1]",

From 904575ea67499e35097d1f3db6af7ee318633553 Mon Sep 17 00:00:00 2001
From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com>
Date: Thu, 18 Sep 2025 14:03:57 -0400
Subject: [PATCH 331/366] Fix chunked reads of list of bools. (#20000)

When doing a chunked read of a list of bools, we need to skip ahead in the decode of the bools.

Authors:
  - Paul Mattione (https://github.com/pmattione-nvidia)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/20000
---
 cpp/src/io/parquet/decode_fixed.cu          |  7 +++++++
 cpp/tests/io/parquet_chunked_reader_test.cu | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/cpp/src/io/parquet/decode_fixed.cu b/cpp/src/io/parquet/decode_fixed.cu
index b22fc61fb9d..49450f5f172 100644
--- a/cpp/src/io/parquet/decode_fixed.cu
+++ b/cpp/src/io/parquet/decode_fixed.cu
@@ -1173,6 +1173,13 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
         initialize_string_descriptors<is_calc_sizes_only::YES>(s, sb, skipped_leaf_values, block);
         if (t == 0) { s->dict_pos = processed_count; }
         block.sync();
+      } else if constexpr (has_bools_t) {
+        if (bools_are_rle_stream) {
+          skip_decode<rolling_buf_size>(bool_stream, skipped_leaf_values, t);
+        } else {
+          if (t == 0) { s->dict_pos = skipped_leaf_values; }
+        }
+        block.sync();
       }
     }
   }
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cu b/cpp/tests/io/parquet_chunked_reader_test.cu
index 0dbccc18230..892ce1c179b 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cu
+++ b/cpp/tests/io/parquet_chunked_reader_test.cu
@@ -60,6 +60,7 @@ using int64s_col       = cudf::test::fixed_width_column_wrapper<int64_t>;
 using strings_col      = cudf::test::strings_column_wrapper;
 using structs_col      = cudf::test::structs_column_wrapper;
 using int32s_lists_col = cudf::test::lists_column_wrapper<int32_t>;
+using bools_lists_col  = cudf::test::lists_column_wrapper<bool>;
 
 auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                 std::string const& filename,
@@ -632,6 +633,13 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
     input_columns.emplace_back(
       std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
 
+    auto const bools_lists = bools_lists_col{bools_lists_col{},
+                                             bools_lists_col{false},
+                                             bools_lists_col{false, true},
+                                             bools_lists_col{true, true, false}};
+    input_columns.emplace_back(
+      std::move(cudf::gather(cudf::table_view{{bools_lists}}, gather_map)->release().front()));
+
     return write_file(input_columns,
                       "chunked_read_with_lists_no_null",
                       false /*nullable*/,
@@ -715,6 +723,16 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
     input_columns.emplace_back(
       std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
 
+    auto const bools_lists = bools_lists_col{
+      // these will all be null
+      bools_lists_col{},
+      bools_lists_col{false},
+      bools_lists_col{false, true},
+      bools_lists_col{
+        true, true, false, true, true, false, false} /* this list will be nullified out */};
+    input_columns.emplace_back(
+      std::move(cudf::gather(cudf::table_view{{bools_lists}}, gather_map)->release().front()));
+
     return write_file(input_columns,
                       "chunked_read_with_lists_nulls",
                       true /*nullable*/,

From fd557e51992929c490c59209df12076f14ea17c4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 18 Sep 2025 12:59:30 -0700
Subject: [PATCH 332/366] Fill missing values in `Series/Index.values` for
 numeric types with np.nan by default (#19923)

As an outcome of replacing `Column.data_array_view` with `Column.values` to return a cupy representation xref https://github.com/rapidsai/cudf/pull/19897#pullrequestreview-3190698350, this PR will also start filling missing values with `np.nan` to match pandas


Before

```python
In [1]: import pandas as pd, cudf

In [2]: pd.Series([1, None]).values
Out[2]: array([ 1., nan])

In [3]: cudf.Series([1, None]).values
...
ValueError: Column must have no nulls.
```

After

```python
In [2]: cudf.Series([1, None]).values
Out[2]: array([ 1., nan])
```

This is a net increase in xfailing pandas unit tests. From what I can tell, the failure themes are

* Tests that are modifying an internal `_values` attribute of categorical data which after `cudf.pandas` erroneously errors due to being read-only
* The `.freq` attribute not being supported/correct on `DatetimeIndex`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19923
---
 ci/test_narwhals.sh                           |  2 +
 python/cudf/cudf/core/column/categorical.py   | 23 +-----
 python/cudf/cudf/core/column/column.py        | 76 +------------------
 python/cudf/cudf/core/column/numerical.py     | 25 ++++++
 python/cudf/cudf/core/column/string.py        | 22 ++----
 python/cudf/cudf/core/column/temporal_base.py | 15 +++-
 python/cudf/cudf/core/cut.py                  |  4 +
 python/cudf/cudf/core/dataframe.py            |  6 ++
 python/cudf/cudf/core/frame.py                | 15 ++++
 python/cudf/cudf/core/groupby/groupby.py      |  2 +-
 python/cudf/cudf/core/series.py               | 14 ++--
 python/cudf/cudf/pandas/_wrappers/numpy.py    |  6 +-
 .../cudf/pandas/scripts/conftest-patch.py     | 45 +++++++++--
 .../tests/dataframe/methods/test_cov_corr.py  |  4 +-
 .../tests/dataframe/methods/test_to_cupy.py   | 17 +++--
 .../cudf/tests/dataframe/test_attributes.py   |  7 +-
 .../tests/indexes/index/test_attributes.py    |  6 +-
 .../indexes/timedeltaindex/test_attributes.py |  5 +-
 .../cudf/tests/series/methods/test_to_cupy.py | 17 ++++-
 .../cudf/cudf/tests/series/test_attributes.py |  5 +-
 python/cudf/cudf/utils/applyutils.py          |  8 +-
 python/cudf/cudf/utils/queryutils.py          |  2 +-
 22 files changed, 169 insertions(+), 157 deletions(-)

diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index 2b9459836c5..bd5bd7e208d 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -27,9 +27,11 @@ rapids-pip-retry install -U -e .
 rapids-logger "Check narwhals versions"
 python -c "import narwhals; print(narwhals.show_versions())"
 
+# test_to_numpy[cudf]: Passes as of https://github.com/rapidsai/cudf/pull/19923
 # test_fill_null_strategies_with_limit_as_none[cudf]: Narwhals passes inplace=None instead of a bool
 # test_fill_null_series_limit_as_none[cudf]: Narwhals passes inplace=None instead of a bool
 TESTS_THAT_NEED_NARWHALS_FIX_FOR_CUDF=" \
+test_to_numpy[cudf] or \
 test_fill_null_strategies_with_limit_as_none[cudf] or \
 test_fill_null_series_limit_as_none[cudf] \
 "
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 648416dc388..f3ae3b57942 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -4,7 +4,7 @@
 
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Literal, cast
+from typing import TYPE_CHECKING, Any, cast
 
 import numpy as np
 import pandas as pd
@@ -31,8 +31,6 @@
 if TYPE_CHECKING:
     from collections.abc import Mapping, MutableSequence, Sequence
 
-    import cupy as cp
-
     from cudf._typing import (
         ColumnBinaryOperand,
         ColumnLike,
@@ -391,30 +389,11 @@ def to_arrow(self) -> pa.Array:
             ordered=self.ordered,
         )
 
-    @property
-    def values_host(self) -> np.ndarray:
-        """
-        Return a numpy representation of the CategoricalColumn.
-        """
-        return self.to_pandas().values
-
-    @property
-    def values(self):
-        """
-        Return a CuPy representation of the CategoricalColumn.
-        """
-        raise NotImplementedError("cudf.Categorical is not yet implemented")
-
     def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self:
         return (
             self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype)  # type: ignore[return-value]
         )
 
-    def data_array_view(
-        self, *, mode: Literal["write", "read"] = "write"
-    ) -> cp.ndarray:
-        return self.codes.data_array_view(mode=mode)
-
     def unique(self) -> Self:
         return self.codes.unique()._with_type_metadata(self.dtype)  # type: ignore[return-value]
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 6f5d71fd2f9..82870ecc0d1 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -655,48 +655,6 @@ def from_cuda_array_interface(cls, arbitrary: Any) -> Self:
             column = column.set_mask(mask)
         return column  # type: ignore[return-value]
 
-    def data_array_view(
-        self, *, mode: Literal["write", "read"] = "write"
-    ) -> cp.ndarray:
-        """
-        View the data as a device array object
-
-        Parameters
-        ----------
-        mode : str, default 'write'
-            Supported values are {'read', 'write'}
-            If 'write' is passed, a device array object
-            with readonly flag set to False in CAI is returned.
-            If 'read' is passed, a device array object
-            with readonly flag set to True in CAI is returned.
-            This also means, If the caller wishes to modify
-            the data returned through this view, they must
-            pass mode="write", else pass mode="read".
-
-        Returns
-        -------
-        cupy.ndarray
-        """
-        if self.data is not None:
-            if mode == "read":
-                obj = cuda_array_interface_wrapper(
-                    ptr=self.data.get_ptr(mode="read"),
-                    size=self.data.size,
-                    owner=self.data,
-                )
-            elif mode == "write":
-                obj = self.data
-            else:
-                raise ValueError(f"Unsupported mode: {mode}")
-        else:
-            obj = None
-
-        if cudf.get_option("mode.pandas_compatible"):
-            dtype = getattr(self.dtype, "numpy_dtype", self.dtype)
-        else:
-            dtype = self.dtype
-        return cp.asarray(obj).view(dtype)
-
     def __len__(self) -> int:
         return self.size
 
@@ -765,44 +723,14 @@ def values_host(self) -> np.ndarray:
         """
         Return a numpy representation of the Column.
         """
-        if len(self) == 0:
-            return np.array([], dtype=self.dtype)
-
-        if (
-            cudf.get_option("mode.pandas_compatible")
-            and is_pandas_nullable_extension_dtype(self.dtype)
-            and self.dtype.kind in "iuf"
-            and self.has_nulls()
-        ):
-            col = self.astype(
-                np.dtype("float32")
-                if getattr(self.dtype, "numpy_dtype", self.dtype)
-                == np.dtype("float32")
-                else np.dtype("float64")
-            )
-            col = col.fillna(np.nan)
-            with acquire_spill_lock():
-                res = col.data_array_view(mode="read").get()
-            return res
-
-        if self.has_nulls():
-            raise ValueError("Column must have no nulls.")
-
-        with acquire_spill_lock():
-            return self.data_array_view(mode="read").get()
+        return self.to_pandas().to_numpy()
 
     @property
     def values(self) -> cp.ndarray:
         """
         Return a CuPy representation of the Column.
         """
-        if len(self) == 0:
-            return cp.array([], dtype=self.dtype)
-
-        if self.has_nulls():
-            raise ValueError("Column must have no nulls.")
-
-        return self.data_array_view(mode="write")
+        raise NotImplementedError(f"cupy does not support {self.dtype}")
 
     def find_and_replace(
         self,
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 8051bfa0019..c0ebc3343a1 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -125,6 +125,31 @@ def __contains__(self, item: ScalarLike) -> bool:
         # TODO: Use `scalar`-based `contains` wrapper
         return self.contains(as_column([search_item], dtype=self.dtype)).any()
 
+    @property
+    def values(self) -> cp.ndarray:
+        """
+        Return a CuPy representation of the NumericalColumn.
+        """
+        dtype = self.dtype
+        if is_pandas_nullable_extension_dtype(dtype):
+            dtype = getattr(dtype, "numpy_dtype", dtype)
+
+        if len(self) == 0:
+            return cp.empty(0, dtype=dtype)
+
+        col = self
+        if col.has_nulls():
+            if dtype.kind == "b":
+                raise ValueError(
+                    f"Column must have no nulls for dtype={col.dtype}"
+                )
+            elif dtype.kind != "f":
+                dtype = np.dtype(np.float64)
+                col = col.astype(dtype)  # type: ignore[assignment]
+            col = col.fillna(np.nan)
+
+        return cp.asarray(col.data).view(dtype)
+
     def indices_of(self, value: ScalarLike) -> NumericalColumn:
         if isinstance(value, (bool, np.bool_)) and self.dtype.kind != "b":
             raise ValueError(
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index e7a7105e6a9..9b4b46eb837 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -35,8 +35,7 @@
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterable
 
-    import cupy
-    import numba.cuda
+    import cupy as cp
 
     from cudf._typing import (
         ColumnBinaryOperand,
@@ -241,11 +240,6 @@ def any(self, skipna: bool = True) -> bool:
 
         raise NotImplementedError("`any` not implemented for `StringColumn`")
 
-    def data_array_view(
-        self, *, mode="write"
-    ) -> numba.cuda.devicearray.DeviceNDArray:
-        raise ValueError("Cannot get an array view of a StringColumn")
-
     @property
     def __cuda_array_interface__(self):
         raise NotImplementedError(
@@ -449,18 +443,12 @@ def as_string_column(self, dtype) -> StringColumn:
         return col
 
     @property
-    def values_host(self) -> np.ndarray:
-        """
-        Return a numpy representation of the StringColumn.
-        """
-        return self.to_pandas().values
-
-    @property
-    def values(self) -> cupy.ndarray:
+    def values(self) -> cp.ndarray:
         """
-        Return a CuPy representation of the StringColumn.
+        Return a CuPy representation of the Column.
         """
-        raise TypeError("String arrays are not supported by cupy")
+        # dask checks for a TypeError instead of NotImplementedError
+        raise TypeError(f"cupy does not support {self.dtype}")
 
     def to_pandas(
         self,
diff --git a/python/cudf/cudf/core/column/temporal_base.py b/python/cudf/cudf/core/column/temporal_base.py
index f3f739161eb..c413591f15a 100644
--- a/python/cudf/cudf/core/column/temporal_base.py
+++ b/python/cudf/cudf/core/column/temporal_base.py
@@ -22,6 +22,7 @@
     cudf_dtype_from_pa_type,
     cudf_dtype_to_pa_type,
     find_common_type,
+    is_pandas_nullable_extension_dtype,
 )
 from cudf.utils.utils import is_na_like
 
@@ -198,9 +199,19 @@ def time_unit(self) -> str:
     @property
     def values(self) -> cp.ndarray:
         """
-        Return a CuPy representation of the DateTimeColumn.
+        Return a CuPy representation of the TemporalBaseColumn.
         """
-        raise NotImplementedError(f"cupy does not support {self.dtype}")
+        if is_pandas_nullable_extension_dtype(self.dtype):
+            dtype = getattr(self.dtype, "numpy_dtype", self.dtype)
+        else:
+            dtype = self.dtype
+
+        if len(self) == 0:
+            return cp.empty(0, dtype=self._UNDERLYING_DTYPE).view(dtype)
+
+        if self.has_nulls():
+            raise ValueError("cupy does not support NaT.")
+        return cp.asarray(self.data).view(dtype)
 
     def element_indexing(self, index: int) -> ScalarLike:
         result = super().element_indexing(index)
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index fe02bd8b550..9e4f0487c8a 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -167,6 +167,10 @@ def cut(
             else:
                 mn = min(x)
                 mx = max(x)
+            if mn == mx:
+                raise NotImplementedError(
+                    "cut on homogeneous data is not supported."
+                )
             bins = np.linspace(mn, mx, bins + 1, endpoint=True)
             adj = (mx - mn) * 0.001
             if right:
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8167f6c26f9..13b9c703fc1 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7701,6 +7701,9 @@ def cov(self, min_periods=None, ddof: int = 1, numeric_only: bool = False):
                 "numeric_only is currently not supported."
             )
 
+        if self.isna().any().any():
+            raise NotImplementedError("cupy-based cov does not support nulls")
+
         cov = cupy.cov(self.values, ddof=ddof, rowvar=False)
         cols = self._data.to_pandas_index
         df = DataFrame(cupy.asfortranarray(cov), index=cols)
@@ -7729,6 +7732,9 @@ def corr(
         DataFrame
             The requested correlation matrix.
         """
+        if self.isna().any().any():
+            raise NotImplementedError("cupy-based corr does not support nulls")
+
         if method == "pearson":
             values = self.values
         elif method == "spearman":
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 088e9fef546..6f39c567666 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -621,6 +621,21 @@ def to_array(
                 to_dtype = find_common_type(
                     [dtype for _, dtype in self._dtypes]
                 )
+                if to_dtype is not None and any(
+                    col.has_nulls() for col in self._columns
+                ):
+                    if to_dtype.kind == "b" or any(
+                        dtype.kind == "b"  # type: ignore[union-attr]
+                        for _, dtype in self._dtypes
+                    ):
+                        if module == cupy:
+                            raise ValueError(
+                                "Cannot convert to cupy bool array with nulls."
+                            )
+                        else:
+                            to_dtype = numpy.dtype("object")
+                    elif to_dtype.kind in "ui":
+                        to_dtype = numpy.dtype("float64")
 
             if cudf.get_option(
                 "mode.pandas_compatible"
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index a3e43bc2d86..1a934abbe84 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1593,7 +1593,7 @@ def sample(
                         [plc.types.NullOrder.AFTER],
                     )
                     indices = ColumnBase.from_pylibcudf(plc_table.columns()[0])
-                indices = indices.data_array_view(mode="read")
+                indices = indices.values
             # Which indices are we going to want?
             want = np.arange(samples_per_group.sum(), dtype=SIZE_TYPE_DTYPE)
             scan = np.empty_like(samples_per_group)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f040b0f5bfd..a8451bbe7e8 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -10,7 +10,7 @@
 from shutil import get_terminal_size
 from typing import TYPE_CHECKING, Any, Literal
 
-import cupy
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -1090,7 +1090,7 @@ def __array_function__(self, func, types, args, kwargs):
             # Assume that cupy subpackages match numpy and search the
             # corresponding cupy submodule based on the func's __module__.
             numpy_submodule = func.__module__.split(".")[1:]
-            cupy_func = cupy
+            cupy_func = cp
             for name in (*numpy_submodule, func.__name__):
                 cupy_func = getattr(cupy_func, name, None)
 
@@ -1107,7 +1107,7 @@ def __array_function__(self, func, types, args, kwargs):
             out = cupy_func(*(s.values for s in args), **kwargs)
 
             # Return (host) scalar values immediately.
-            if not isinstance(out, cupy.ndarray):
+            if not isinstance(out, cp.ndarray):
                 return out
 
             # 0D array (scalar)
@@ -2950,7 +2950,7 @@ def isin(self, values):
         )
 
     @_performance_tracking
-    def unique(self):
+    def unique(self) -> cp.ndarray | Self:
         """
         Returns unique values of this Series.
 
@@ -2985,6 +2985,10 @@ def unique(self):
                 raise NotImplementedError(
                     "cudf does not support ExtensionArrays"
                 )
+            elif self.dtype.kind in "mM":
+                raise NotImplementedError(
+                    "cuDF does not implement DatetimeArray or TimedeltaArray"
+                )
             return res.values
         return Series._from_column(res, name=self.name)
 
@@ -3206,7 +3210,7 @@ def quantile(
                 np_array_q = np.asarray(q)
             except TypeError:
                 try:
-                    np_array_q = cupy.asarray(q).get()
+                    np_array_q = cp.asarray(q).get()
                 except TypeError:
                     raise TypeError(
                         f"q must be a scalar or array-like, got {type(q)}"
diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py
index 68ebe620013..4e8391083a2 100644
--- a/python/cudf/cudf/pandas/_wrappers/numpy.py
+++ b/python/cudf/cudf/pandas/_wrappers/numpy.py
@@ -95,7 +95,11 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor):
         and arr.shape == ()
         and constructor not in _CONSTRUCTORS
     ):
-        return arr.dtype.type(arr.item())
+        value = arr.item()
+        if arr.dtype.kind in "mM":
+            unit, _ = numpy.datetime_data(arr.dtype)
+            return arr.dtype.type(value, unit)
+        return arr.dtype.type(value)
     else:
         # Note, this super call means that the constructed ndarray
         # class cannot be subclassed (because then super(cls,
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index d2e9784d318..af9ba511695 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -1437,8 +1437,11 @@ def pytest_unconfigure(config):
     "tests/base/test_conversion.py::test_to_numpy_copy[arr1-True]",
     "tests/base/test_conversion.py::test_values_consistent[arr0-ndarray-int64]",
     "tests/base/test_conversion.py::test_values_consistent[arr1-ndarray-object]",
+    "tests/base/test_fillna.py::test_fillna[datetime]",
     "tests/base/test_fillna.py::test_fillna[multi]",
     "tests/base/test_fillna.py::test_fillna[tuples]",
+    "tests/base/test_fillna.py::test_fillna_null[categorical-nan]",
+    "tests/base/test_fillna.py::test_fillna_null[categorical-None]",
     "tests/base/test_misc.py::test_memory_usage[bool-object]",
     "tests/base/test_misc.py::test_memory_usage[bool-series]",
     "tests/base/test_misc.py::test_memory_usage[bytearray-series]",
@@ -1523,8 +1526,14 @@ def pytest_unconfigure(config):
     "tests/base/test_misc.py::test_ndarray_compat_properties[tuples]",
     "tests/base/test_unique.py::test_unique[multi]",
     "tests/base/test_unique.py::test_unique[tuples]",
+    "tests/base/test_unique.py::test_unique_null[categorical-nan]",
+    "tests/base/test_unique.py::test_unique_null[categorical-None]",
+    "tests/base/test_unique.py::test_nunique_null[categorical-nan]",
+    "tests/base/test_unique.py::test_nunique_null[categorical-None]",
     "tests/base/test_value_counts.py::test_value_counts[multi]",
     "tests/base/test_value_counts.py::test_value_counts[tuples]",
+    "tests/base/test_value_counts.py::test_value_counts_null[categorical-nan]",
+    "tests/base/test_value_counts.py::test_value_counts_null[categorical-None]",
     "tests/base/test_value_counts.py::test_value_counts_bins[index]",
     "tests/base/test_value_counts.py::test_value_counts_bins[series]",
     "tests/base/test_value_counts.py::test_value_counts_null[float32-series-None]",
@@ -3193,7 +3202,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_na_with_index[timestamp[us, tz=US/Pacific]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_na_with_index[timestamp[us, tz=UTC]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_with_index[date64[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[decimal128(7, 3)]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[decimal128(7, 3)]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[double]",
@@ -3215,6 +3223,14 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint32]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint64]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint8]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[ms]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[ns]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[s]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[us]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ms]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ns]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[s]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[us]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[binary-positive]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-absolute]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-negative]",
@@ -4765,11 +4781,7 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_copy.py::TestCopy::test_copy_consolidates",
     "tests/frame/methods/test_count.py::TestDataFrameCount::test_count",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_item_cache",
-    "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_nooverlap[pearson]",
-    "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_nullable_integer[pearson-other_column2-nullable_column0]",
     "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_numeric_only[False-spearman]",
-    "tests/frame/methods/test_cov_corr.py::TestDataFrameCorr::test_corr_scipy_method[pearson]",
-    "tests/frame/methods/test_cov_corr.py::TestDataFrameCov::test_cov",
     "tests/frame/methods/test_describe.py::TestDataFrameDescribe::test_describe_datetime_columns",
     "tests/frame/methods/test_diff.py::TestDataFrameDiff::test_diff_all_int_dtype[int16]",
     "tests/frame/methods/test_diff.py::TestDataFrameDiff::test_diff_all_int_dtype[int8]",
@@ -6884,6 +6896,14 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[cumprod-True-True-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[cumsum-True-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[cumsum-True-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-True-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-False-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-True-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[ngroup-True-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[ngroup-True-False-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[ngroup-True-True-False]",
@@ -6894,6 +6914,14 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[pct_change-True-True-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[rank-True-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[rank-True-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-True-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-False-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-True-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_false_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_true_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_agg[False-tuples1-outputs1]",
@@ -7355,6 +7383,7 @@ def pytest_unconfigure(config):
     "tests/indexes/categorical/test_indexing.py::TestContains::test_contains_na_dtype[False]",
     "tests/indexes/categorical/test_indexing.py::TestContains::test_contains_nan",
     "tests/indexes/categorical/test_indexing.py::TestWhere::test_where_non_categories",
+    "tests/indexes/datetimelike_/test_equals.py::TestDatetimeIndexEquals::test_equals2",
     "tests/indexes/datetimelike_/test_sort_values.py::TestSortValues::test_sort_values[DatetimeIndex]",
     "tests/indexes/datetimelike_/test_sort_values.py::TestSortValues::test_sort_values[TimedeltaIndex]",
     "tests/indexes/datetimelike_/test_sort_values.py::TestSortValues::test_sort_values_with_freq_periodindex2[idx1]",
@@ -7677,7 +7706,6 @@ def pytest_unconfigure(config):
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--object]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--string[pyarrow_numpy]]",
     "tests/indexes/period/test_constructors.py::TestPeriodIndex::test_constructor_fromarraylike",
-    "tests/indexes/period/test_indexing.py::TestWhere::test_where_invalid_dtypes",
     "tests/indexes/period/test_join.py::TestJoin::test_join_self[inner]",
     "tests/indexes/period/test_join.py::TestJoin::test_join_self[left]",
     "tests/indexes/period/test_join.py::TestJoin::test_join_self[outer]",
@@ -7790,6 +7818,7 @@ def pytest_unconfigure(config):
     "tests/indexes/test_any_index.py::TestIndexing::test_getitem_error[range-2.5]",
     "tests/indexes/test_any_index.py::TestIndexing::test_getitem_error[tuples-2.5]",
     "tests/indexes/test_base.py::TestIndex::test_cached_properties_not_settable",
+    "tests/indexes/test_base.py::TestIndex::test_constructor_dtypes_datetime[None-Index-values]",
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[float32]",
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[float64]",
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[int32]",
@@ -7864,6 +7893,7 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_str_bool_return",
     "tests/indexes/test_base.py::TestIndex::test_tab_completion[index1-False]",
     "tests/indexes/test_common.py::TestCommon::test_constructor_non_hashable_name[range]",
+    "tests/indexes/test_common.py::TestCommon::test_hasnans_isnans[categorical]",
     "tests/indexes/test_common.py::TestCommon::test_to_frame[bool-dtype-None]",
     "tests/indexes/test_common.py::TestCommon::test_to_frame[bool-dtype-new_name]",
     "tests/indexes/test_common.py::TestCommon::test_to_frame[bool-object-None]",
@@ -8090,6 +8120,7 @@ def pytest_unconfigure(config):
     "tests/indexes/test_old_base.py::TestBase::test_ensure_copied_data[uint64]",
     "tests/indexes/test_old_base.py::TestBase::test_ensure_copied_data[uint8]",
     "tests/indexes/test_old_base.py::TestBase::test_equals_op[simple_index13]",
+    "tests/indexes/test_old_base.py::TestBase::test_fillna[categorical]",
     "tests/indexes/test_old_base.py::TestBase::test_getitem_2d_deprecated[simple_index0]",
     "tests/indexes/test_old_base.py::TestBase::test_getitem_2d_deprecated[simple_index4]",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[bool-dtype]",
@@ -8233,6 +8264,7 @@ def pytest_unconfigure(config):
     "tests/indexes/timedeltas/test_formats.py::TestTimedeltaIndexRendering::test_representation[__repr__]",
     "tests/indexes/timedeltas/test_formats.py::TestTimedeltaIndexRendering::test_representation[__str__]",
     "tests/indexes/timedeltas/test_indexing.py::TestContains::test_contains",
+    "tests/indexes/timedeltas/test_indexing.py::TestWhere::test_where_invalid_dtypes",
     "tests/indexes/timedeltas/test_searchsorted.py::TestSearchSorted::test_searchsorted_invalid_argument_dtype[arg0]",
     "tests/indexes/timedeltas/test_setops.py::TestTimedeltaIndex::test_intersection_equal[False]",
     "tests/indexes/timedeltas/test_setops.py::TestTimedeltaIndex::test_intersection_equal[None]",
@@ -14514,6 +14546,7 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_getitem_series_integer_with_missing_raises[string=string[python]-False-integer-array]",
     "tests/extension/test_string.py::TestStringArray::test_getitem_series_integer_with_missing_raises[string=string[python]-True-integer-array]",
     "tests/indexing/test_indexing.py::TestMisc::test_no_reference_cycle",
+    "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_iloc_ea_series_indexer_with_na",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_type_error_multiindex",
     "tests/generic/test_finalize.py::test_finalize_called[asfreq', 'h0]",
     "tests/generic/test_finalize.py::test_finalize_called[asfreq', 'h1]",
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py b/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
index ec213285cc1..5a729dd7795 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_cov_corr.py
@@ -23,7 +23,9 @@ def test_cov():
     assert_eq(pdf.cov(), gdf.cov())
 
 
-@pytest.mark.xfail(reason="cupy-based cov does not support nulls")
+@pytest.mark.xfail(
+    raises=NotImplementedError, reason="cupy-based cov does not support nulls"
+)
 def test_cov_nans():
     pdf = pd.DataFrame(
         {
diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
index 2d47bd17840..bf804e8c1c4 100644
--- a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
+++ b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py
@@ -77,8 +77,9 @@ def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
     df = cudf.DataFrame({"a": data})
 
     if has_nulls and not use_na_value:
-        with pytest.raises(ValueError, match="Column must have no nulls"):
-            df.to_cupy()
+        result = df.to_cupy()
+        expected = df.fillna(np.nan).to_cupy()
+        assert_eq(result, expected)
         return
 
     na_value = 0.0 if use_na_value else None
@@ -110,11 +111,13 @@ def test_dataframe_to_cupy_null_values():
         data[~boolmask] = na
         refvalues[k] = data
 
-    # Check null value causes error
-    with pytest.raises(ValueError):
-        df.to_cupy()
-    with pytest.raises(ValueError):
-        df.to_numpy()
+    result = df.to_cupy()
+    expected = df.fillna(np.nan).to_cupy()
+    assert_eq(result, expected)
+
+    result = df.to_numpy()
+    expected = df.fillna(np.nan).to_numpy()
+    assert_eq(result, expected)
 
     for k in df.columns:
         df[k] = df[k].fillna(na)
diff --git a/python/cudf/cudf/tests/dataframe/test_attributes.py b/python/cudf/cudf/tests/dataframe/test_attributes.py
index 823ce4282ab..11e0ee69f84 100644
--- a/python/cudf/cudf/tests/dataframe/test_attributes.py
+++ b/python/cudf/cudf/tests/dataframe/test_attributes.py
@@ -496,12 +496,7 @@ def test_unary_operators(func):
         {"A": [1.0, 2.0, 3.0], "B": [4.0, 5.0, 6.0]},
         {"A": [1, 2, 3], "B": [1.0, 2.0, 3.0]},
         {"A": np.float32(np.arange(3)), "B": np.float64(np.arange(3))},
-        pytest.param(
-            {"A": [1, None, 3], "B": [1, 2, None]},
-            marks=pytest.mark.xfail(
-                reason="Nulls not supported by values accessor"
-            ),
-        ),
+        {"A": [1, None, 3], "B": [1, 2, None]},
         pytest.param(
             {"A": [None, None, None], "B": [None, None, None]},
             marks=pytest.mark.xfail(
diff --git a/python/cudf/cudf/tests/indexes/index/test_attributes.py b/python/cudf/cudf/tests/indexes/index/test_attributes.py
index 3ccf155875c..61c3481039b 100644
--- a/python/cudf/cudf/tests/indexes/index/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/index/test_attributes.py
@@ -2,6 +2,7 @@
 import datetime
 import re
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -175,8 +176,9 @@ def test_index_values():
 
 def test_index_null_values():
     gidx = cudf.Index([1.0, None, 3, 0, None])
-    with pytest.raises(ValueError):
-        gidx.values
+    result = gidx.values
+    expected = cp.array([1.0, np.nan, 3, 0, np.nan])
+    assert_eq(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py b/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py
index b74e173bd4e..091390fc332 100644
--- a/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py
+++ b/python/cudf/cudf/tests/indexes/timedeltaindex/test_attributes.py
@@ -96,5 +96,6 @@ def test_tdi_asi8():
 
 def test_error_values():
     s = cudf.Series([1, 2, 3], dtype="timedelta64[ns]")
-    with pytest.raises(NotImplementedError, match="cupy does not support"):
-        s.values
+    result = s.values
+    expected = cp.array([1, 2, 3]).view("timedelta64[ns]")
+    assert_eq(result, expected)
diff --git a/python/cudf/cudf/tests/series/methods/test_to_cupy.py b/python/cudf/cudf/tests/series/methods/test_to_cupy.py
index 54897a73b0f..da75ca3d813 100644
--- a/python/cudf/cudf/tests/series/methods/test_to_cupy.py
+++ b/python/cudf/cudf/tests/series/methods/test_to_cupy.py
@@ -30,8 +30,21 @@ def test_series_to_cupy(
         return
 
     if has_nulls and not use_na_value:
-        with pytest.raises(ValueError, match="Column must have no nulls"):
-            sr.to_cupy()
+        if numeric_and_bool_types_as_str == "bool":
+            with pytest.raises(ValueError, match="Column must have no nulls"):
+                sr.to_cupy()
+        else:
+            result = sr.to_cupy()
+            expected = (
+                sr.astype(
+                    "float32"
+                    if numeric_and_bool_types_as_str == "float32"
+                    else "float64"
+                )
+                .fillna(np.nan)
+                .to_cupy()
+            )
+            assert_eq(result, expected)
         return
 
     na_value = {
diff --git a/python/cudf/cudf/tests/series/test_attributes.py b/python/cudf/cudf/tests/series/test_attributes.py
index 6067f5c1b2a..1d3fa4f81a5 100644
--- a/python/cudf/cudf/tests/series/test_attributes.py
+++ b/python/cudf/cudf/tests/series/test_attributes.py
@@ -345,8 +345,9 @@ def test_datetime_has_null_test_pyarrow():
 
 def test_error_values_datetime():
     s = cudf.Series([1, 2, 3], dtype="datetime64[ns]")
-    with pytest.raises(NotImplementedError, match="cupy does not support"):
-        s.values
+    result = s.values
+    expected = cp.array([1, 2, 3]).view("datetime64[ns]")
+    assert_eq(result, expected)
 
 
 def test_ndim():
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index e8869cadb2d..4a338939ccf 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -123,14 +123,10 @@ def run(self, df, **launch_params):
         # Get input columns
         if isinstance(self.incols, dict):
             inputs = {
-                v: df[k]._column.data_array_view(mode="read")
-                for (k, v) in self.incols.items()
+                v: df[k]._column.values for (k, v) in self.incols.items()
             }
         else:
-            inputs = {
-                k: df[k]._column.data_array_view(mode="read")
-                for k in self.incols
-            }
+            inputs = {k: df[k]._column.values for k in self.incols}
         # Allocate output columns
         outputs = {
             k: cp.empty(len(df), dtype=np.dtype(dt))
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index 5d18b7e3fe9..f006dfb11b8 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -224,7 +224,7 @@ def query_execute(df, expr, callenv):
             "query only supports numeric, datetime, timedelta, or bool dtypes."
         )
 
-    colarrays = [col.data_array_view(mode="read") for col in colarrays]
+    colarrays = [col.values for col in colarrays]
 
     kernel = compiled["kernel"]
     # process env args

From 1b8f19a8edb0568cddf711e888e8dcfe76a136f3 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 18 Sep 2025 16:17:24 -0500
Subject: [PATCH 333/366] Fix type metadata preservation in `shift` (#20017)

Fixes: #20005

This PR preserves type metadata in `shift` API.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20017
---
 python/cudf/cudf/core/indexed_frame.py        |   2 +-
 .../cudf/pandas/scripts/conftest-patch.py     | 234 ------------------
 2 files changed, 1 insertion(+), 235 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index e81f849bade..cf5b6a55b2f 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -2039,7 +2039,7 @@ def shift(
         )
         return self._from_data_like_self(
             self._data._from_columns_like_self(data_columns)
-        )
+        )._copy_type_metadata(self)
 
     @_performance_tracking
     def truncate(self, before=None, after=None, axis=0, copy=True):
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index af9ba511695..ee57eb6ca0b 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -2872,126 +2872,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_construct_empty_dataframe[timestamp[us, tz=US/Eastern]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_construct_empty_dataframe[timestamp[us, tz=US/Pacific]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_construct_empty_dataframe[timestamp[us, tz=UTC]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[bool-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[double-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ms]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ms]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ms]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ms]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ms]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ms]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ns]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ns]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ns]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ns]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ns]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[ns]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[s]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[s]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[s]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[s]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[s]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[s]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[us]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[us]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[us]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[us]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[us]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[duration[us]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[float--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[float--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[float-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[float-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[float-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[float-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int16--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int16--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int16-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int16-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int16-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int16-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int32--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int32--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int32-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int32-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int32-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int32-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int64--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int64--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int64-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int64-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int64-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int64-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int8--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int8--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int8-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int8-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int8-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[int8-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[string--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[string--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[string-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[string-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[string-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[string-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ms]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ms]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ms]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ms]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ms]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ms]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ns]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ns]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ns]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ns]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ns]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[ns]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[s]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[s]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[s]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[s]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[s]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[s]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[us]--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[us]--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[us]-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[us]-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[us]-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[timestamp[us]-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint16--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint16--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint16-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint16-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint16-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint16-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint32--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint32--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint32-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint32-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint32-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint32-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint64--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint64--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint64-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint64-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint64-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint64-2-indices2-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8--2-indices0-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8--2-indices0-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8-0-indices1-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8-0-indices1-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8-2-indices2-False]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_container_shift[uint8-2-indices2-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[ms]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[ms]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_getitem_scalar[duration[s]]",
@@ -3682,72 +3562,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_astype_string[Float32Dtype-string[python]]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_astype_string[Float64Dtype-string[pyarrow]]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_astype_string[Float64Dtype-string[python]]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[BooleanDtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[BooleanDtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[BooleanDtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[BooleanDtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[BooleanDtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[BooleanDtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float32Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float32Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float32Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float32Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float32Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float32Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float64Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float64Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float64Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float64Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float64Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Float64Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int16Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int16Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int16Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int16Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int16Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int16Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int32Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int32Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int32Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int32Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int32Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int32Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int64Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int64Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int64Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int64Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int64Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int64Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int8Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int8Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int8Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int8Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int8Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[Int8Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt16Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt16Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt16Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt16Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt16Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt16Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt32Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt32Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt32Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt32Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt32Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt32Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt64Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt64Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt64Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt64Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt64Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt64Dtype-2-indices2-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt8Dtype--2-indices0-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt8Dtype--2-indices0-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt8Dtype-0-indices1-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt8Dtype-0-indices1-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt8Dtype-2-indices2-False]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_container_shift[UInt8Dtype-2-indices2-True]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_grouping_grouper[BooleanDtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_grouping_grouper[Float32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_grouping_grouper[Float64Dtype]",
@@ -4312,54 +4126,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_container_shift[python-True-0-indices1-True]",
     "tests/extension/test_string.py::TestStringArray::test_container_shift[python-True-2-indices2-False]",
     "tests/extension/test_string.py::TestStringArray::test_container_shift[python-True-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-False--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-False--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-False-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-False-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-False-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-False-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-True--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-True--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-True-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-True-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-True-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[pyarrow]-True-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-False--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-False--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-False-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-False-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-False-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-False-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-True--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-True--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-True-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-True-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-True-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=str[python]-True-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-False--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-False--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-False-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-False-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-False-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-False-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-True--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-True--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-True-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-True-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-True-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[pyarrow]-True-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-False--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-False--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-False-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-False-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-False-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-False-2-indices2-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-True--2-indices0-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-True--2-indices0-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-True-0-indices1-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-True-0-indices1-True]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-True-2-indices2-False]",
-    "tests/extension/test_string.py::TestStringArray::test_container_shift[string=string[python]-True-2-indices2-True]",
     "tests/extension/test_string.py::TestStringArray::test_grouping_grouper[pyarrow-False]",
     "tests/extension/test_string.py::TestStringArray::test_grouping_grouper[pyarrow-True]",
     "tests/extension/test_string.py::TestStringArray::test_grouping_grouper[pyarrow_numpy-False]",

From dca637e576913ad17e556df64662acb257fe6bf3 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 18 Sep 2025 18:22:21 -0400
Subject: [PATCH 334/366] Use to_arrow methods throughout pylibcudf and cudf
 (#20013)

Contributes to https://github.com/rapidsai/cudf/issues/20006 because we still need to deprecate the interop methods.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20013
---
 python/cudf/cudf/core/column/categorical.py   | 12 +++----
 python/cudf/cudf/core/column/column.py        |  4 +--
 python/pylibcudf/pylibcudf/column.pyx         |  6 ++--
 python/pylibcudf/pylibcudf/table.pyx          | 12 ++++++-
 python/pylibcudf/tests/common/utils.py        |  8 ++---
 python/pylibcudf/tests/io/test_csv.py         |  2 +-
 python/pylibcudf/tests/io/test_json.py        |  2 +-
 .../pylibcudf/tests/test_column_factories.py  |  2 +-
 .../tests/test_column_struct_from_children.py | 21 ++++++------
 python/pylibcudf/tests/test_copying.py        |  5 ++-
 python/pylibcudf/tests/test_datetime.py       | 32 ++++++++---------
 python/pylibcudf/tests/test_interop.py        | 34 ++++++++-----------
 python/pylibcudf/tests/test_join.py           | 26 +++++++-------
 python/pylibcudf/tests/test_labeling.py       |  8 ++---
 .../tests/test_nvtext_generate_ngrams.py      |  5 ++-
 python/pylibcudf/tests/test_nvtext_minhash.py | 10 +++---
 .../tests/test_nvtext_subword_tokenize.py     |  4 +--
 .../pylibcudf/tests/test_nvtext_tokenize.py   |  4 +--
 python/pylibcudf/tests/test_quantiles.py      |  4 +--
 python/pylibcudf/tests/test_reshape.py        |  5 ++-
 python/pylibcudf/tests/test_rolling.py        |  4 +--
 python/pylibcudf/tests/test_round.py          |  2 +-
 python/pylibcudf/tests/test_scalar.py         | 22 ++++++------
 python/pylibcudf/tests/test_string_extract.py | 10 +++---
 python/pylibcudf/tests/test_string_findall.py |  4 +--
 python/pylibcudf/tests/test_transpose.py      |  3 +-
 26 files changed, 121 insertions(+), 130 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index f3ae3b57942..cf025fc9627 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -11,8 +11,6 @@
 import pyarrow as pa
 from typing_extensions import Self
 
-import pylibcudf as plc
-
 import cudf
 from cudf.api.types import is_scalar
 from cudf.core.column import column
@@ -31,6 +29,8 @@
 if TYPE_CHECKING:
     from collections.abc import Mapping, MutableSequence, Sequence
 
+    from pylibcudf import Scalar as plc_Scalar
+
     from cudf._typing import (
         ColumnBinaryOperand,
         ColumnLike,
@@ -234,7 +234,7 @@ def __setitem__(self, key, value):
 
     def _fill(
         self,
-        fill_value: plc.Scalar,
+        fill_value: plc_Scalar,
         begin: int,
         end: int,
         inplace: bool = False,
@@ -242,7 +242,7 @@ def _fill(
         if end <= begin or begin >= self.size:
             return self if inplace else self.copy()
 
-        fill_code = self._encode(plc.interop.to_arrow(fill_value))
+        fill_code = self._encode(fill_value.to_arrow())
         result = self if inplace else self.copy()
         result.codes._fill(
             pa_scalar_to_plc_scalar(pa.scalar(fill_code)),
@@ -399,7 +399,7 @@ def unique(self) -> Self:
 
     def _cast_self_and_other_for_where(
         self, other: ScalarLike | ColumnBase, inplace: bool
-    ) -> tuple[ColumnBase, plc.Scalar | ColumnBase]:
+    ) -> tuple[ColumnBase, plc_Scalar | ColumnBase]:
         if is_scalar(other):
             try:
                 other = self._encode(other)
@@ -594,7 +594,7 @@ def notnull(self) -> ColumnBase:
 
     def _validate_fillna_value(
         self, fill_value: ScalarLike | ColumnLike
-    ) -> plc.Scalar | ColumnBase:
+    ) -> plc_Scalar | ColumnBase:
         """Align fill_value for .fillna based on column type."""
         if is_scalar(fill_value):
             if fill_value != _DEFAULT_CATEGORICAL_VALUE:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 82870ecc0d1..87133109900 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -812,7 +812,7 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        return plc.interop.to_arrow(self.to_pylibcudf(mode="read"))
+        return self.to_pylibcudf(mode="read").to_arrow()
 
     @classmethod
     def from_arrow(cls, array: pa.Array) -> ColumnBase:
@@ -1097,7 +1097,7 @@ def element_indexing(self, index: int):
                 self.to_pylibcudf(mode="read"),
                 index,
             )
-        py_element = plc.interop.to_arrow(plc_scalar)
+        py_element = plc_scalar.to_arrow()
         if not py_element.is_valid:
             return self._PANDAS_NA_VALUE
         # Calling .as_py() on a pyarrow.StructScalar with duplicate field names
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 06a488902b6..034f001848a 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -349,12 +349,12 @@ cdef class Column:
         self,
         metadata: ColumnMetadata | str | None = None
     ) -> ArrowLike:
-        """Create a PyArrow array from a pylibcudf column.
+        """Create a pyarrow array from a pylibcudf column.
 
         Parameters
         ----------
-        metadata : list
-            The metadata to attach to the columns of the table.
+        metadata : ColumnMetadata | str | None
+            The metadata to attach to the column.
 
         Returns
         -------
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index 38dcb0f0291..0576b2b7158 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -74,7 +74,17 @@ cdef class Table:
         self,
         metadata: list[ColumnMetadata | str] | None = None
     ) -> ArrowLike:
-        """Create a PyArrow table from a pylibcudf table."""
+        """Create a pyarrow table from a pylibcudf table.
+
+        Parameters
+        ----------
+        metadata : list[ColumnMetadata | str] | None
+            The metadata to attach to the columns of the table.
+
+        Returns
+        -------
+        pyarrow.Table
+        """
         if pa_err is not None:
             raise RuntimeError(
                 "pyarrow was not found on your system. Please "
diff --git a/python/pylibcudf/tests/common/utils.py b/python/pylibcudf/tests/common/utils.py
index 6302d59563b..a81992bac87 100644
--- a/python/pylibcudf/tests/common/utils.py
+++ b/python/pylibcudf/tests/common/utils.py
@@ -69,15 +69,11 @@ def assert_column_eq(
     if isinstance(lhs, (pa.Array, pa.ChunkedArray)) and isinstance(
         rhs, plc.Column
     ):
-        rhs = plc.interop.to_arrow(
-            rhs, metadata=metadata_from_arrow_type(lhs.type)
-        )
+        rhs = rhs.to_arrow(metadata=metadata_from_arrow_type(lhs.type))
     elif isinstance(lhs, plc.Column) and isinstance(
         rhs, (pa.Array, pa.ChunkedArray)
     ):
-        lhs = plc.interop.to_arrow(
-            lhs, metadata=metadata_from_arrow_type(rhs.type)
-        )
+        lhs = lhs.to_arrow(metadata=metadata_from_arrow_type(rhs.type))
     else:
         raise ValueError(
             "One of the inputs must be a Column and the other an Array"
diff --git a/python/pylibcudf/tests/io/test_csv.py b/python/pylibcudf/tests/io/test_csv.py
index 9f09391ad64..ea7852f9ecb 100644
--- a/python/pylibcudf/tests/io/test_csv.py
+++ b/python/pylibcudf/tests/io/test_csv.py
@@ -131,7 +131,7 @@ def test_read_csv_byte_range(table_data, chunk_size, tmp_path):
     tbls = []
     for tbl_w_meta in tbls_w_meta:
         if tbl_w_meta.tbl.num_rows() > 0:
-            tbls.append(plc.interop.to_arrow(tbl_w_meta.tbl))
+            tbls.append(tbl_w_meta.tbl.to_arrow())
     full_tbl = pa.concat_tables(tbls)
 
     full_tbl_plc = plc.io.TableWithMetadata(
diff --git a/python/pylibcudf/tests/io/test_json.py b/python/pylibcudf/tests/io/test_json.py
index 55b7a280173..7a027d74013 100644
--- a/python/pylibcudf/tests/io/test_json.py
+++ b/python/pylibcudf/tests/io/test_json.py
@@ -308,7 +308,7 @@ def test_read_json_lines_byte_range(source_or_sink, chunk_size):
     tbls = []
     for tbl_w_meta in tbls_w_meta:
         if tbl_w_meta.tbl.num_rows() > 0:
-            tbls.append(plc.interop.to_arrow(tbl_w_meta.tbl))
+            tbls.append(tbl_w_meta.tbl.to_arrow())
     full_tbl = pa.concat_tables(tbls)
 
     full_tbl_plc = plc.io.TableWithMetadata(
diff --git a/python/pylibcudf/tests/test_column_factories.py b/python/pylibcudf/tests/test_column_factories.py
index cacdf64355f..945113d0d13 100644
--- a/python/pylibcudf/tests/test_column_factories.py
+++ b/python/pylibcudf/tests/test_column_factories.py
@@ -137,7 +137,7 @@ def validate_empty_column(col, mask_state, dtype):
     elif mask_state == plc.types.MaskState.ALL_NULL:
         assert col.null_count() == EMPTY_COL_SIZE
 
-    assert plc.interop.to_arrow(col).type == dtype
+    assert col.to_arrow().type == dtype
 
 
 def test_make_numeric_column(numeric_pa_type, mask_state):
diff --git a/python/pylibcudf/tests/test_column_struct_from_children.py b/python/pylibcudf/tests/test_column_struct_from_children.py
index 719cee5b7de..4d3205cbf2f 100644
--- a/python/pylibcudf/tests/test_column_struct_from_children.py
+++ b/python/pylibcudf/tests/test_column_struct_from_children.py
@@ -40,15 +40,14 @@ def test_struct_from_children_children_different_sizes(children_plc):
 
 def test_struct_from_children_struct_list(children_plc):
     result = plc.Column.struct_from_children(children_plc)
-    expected = plc.interop.to_arrow(
-        plc.Column(
-            plc.DataType(plc.TypeId.STRUCT),
-            children_plc[0].size(),
-            None,
-            children_plc[0].null_mask(),
-            children_plc[0].null_count(),
-            0,
-            children_plc,
-        )
-    )
+    expected = plc.Column(
+        plc.DataType(plc.TypeId.STRUCT),
+        children_plc[0].size(),
+        None,
+        children_plc[0].null_mask(),
+        children_plc[0].null_count(),
+        0,
+        children_plc,
+    ).to_arrow()
+
     assert_column_eq(result, expected)
diff --git a/python/pylibcudf/tests/test_copying.py b/python/pylibcudf/tests/test_copying.py
index d636b10d536..700686f2b12 100644
--- a/python/pylibcudf/tests/test_copying.py
+++ b/python/pylibcudf/tests/test_copying.py
@@ -1025,10 +1025,9 @@ def test_get_element(input_column):
     index = 1
     pa_input_column, plc_input_column = input_column
     result = plc.copying.get_element(plc_input_column, index)
-
     assert (
-        plc.interop.to_arrow(
-            result, metadata_from_arrow_type(pa_input_column.type)
+        result.to_arrow(
+            metadata=metadata_from_arrow_type(pa_input_column.type)
         ).as_py()
         == pa_input_column[index].as_py()
     )
diff --git a/python/pylibcudf/tests/test_datetime.py b/python/pylibcudf/tests/test_datetime.py
index 013b8560d51..97d8409cd06 100644
--- a/python/pylibcudf/tests/test_datetime.py
+++ b/python/pylibcudf/tests/test_datetime.py
@@ -70,9 +70,9 @@ def test_extract_datetime_component(datetime_column, component):
     got = plc.datetime.extract_datetime_component(datetime_column, component)
     # libcudf produces an int16, arrow produces an int64
 
-    expect = getattr(pc, attr)(
-        plc.interop.to_arrow(datetime_column), **kwargs
-    ).cast(pa.int16())
+    expect = getattr(pc, attr)(datetime_column.to_arrow(), **kwargs).cast(
+        pa.int16()
+    )
 
     assert_column_eq(expect, got)
 
@@ -87,8 +87,8 @@ def test_extract_datetime_component(datetime_column, component):
 )
 def test_rounding_operations(datetime_column, op, rounding_frequency):
     got = getattr(plc.datetime, op[1])(datetime_column, rounding_frequency[1])
-    pa_col = plc.interop.to_arrow(datetime_column)
-    pa_got = plc.interop.to_arrow(got)
+    pa_col = datetime_column.to_arrow()
+    pa_got = got.to_arrow()
     expect = getattr(pc, op[0])(
         pa_col,
         unit=rounding_frequency[0],
@@ -125,22 +125,22 @@ def add_calendrical_months(timestamps, months):
                 result.append(None)
         return pa.array(result)
 
-    pa_col = plc.interop.to_arrow(datetime_column)
+    pa_col = datetime_column.to_arrow()
     got = plc.datetime.add_calendrical_months(
         datetime_column,
         plc.Scalar.from_arrow(months)
         if isinstance(months, pa.Scalar)
         else plc.Column.from_arrow(months),
     )
-    pa_got = plc.interop.to_arrow(got)
+    pa_got = got.to_arrow()
     expect = add_calendrical_months(pa_col, months).cast(pa_got.type)
     assert_column_eq(expect, got)
 
 
 def test_day_of_year(datetime_column):
     got = plc.datetime.day_of_year(datetime_column)
-    pa_got = plc.interop.to_arrow(got)
-    pa_col = plc.interop.to_arrow(datetime_column)
+    pa_got = got.to_arrow()
+    pa_col = datetime_column.to_arrow()
     expect = pa.array(
         [
             d.as_py().timetuple().tm_yday if d.as_py() is not None else None
@@ -153,7 +153,7 @@ def test_day_of_year(datetime_column):
 
 def test_is_leap_year(datetime_column):
     got = plc.datetime.is_leap_year(datetime_column)
-    pa_col = plc.interop.to_arrow(datetime_column)
+    pa_col = datetime_column.to_arrow()
     expect = pc.is_leap_year(pa_col)
     assert_column_eq(expect, got)
 
@@ -168,16 +168,16 @@ def last_day_of_month(dates):
         ]
 
     got = plc.datetime.last_day_of_month(datetime_column)
-    pa_got = plc.interop.to_arrow(got)
-    pa_col = plc.interop.to_arrow(datetime_column)
+    pa_got = got.to_arrow()
+    pa_col = datetime_column.to_arrow()
     expect = pa.array(last_day_of_month(pa_col), type=pa_got.type)
     assert_column_eq(expect, got)
 
 
 def test_extract_quarter(datetime_column):
     got = plc.datetime.extract_quarter(datetime_column)
-    pa_col = plc.interop.to_arrow(datetime_column)
-    pa_got = plc.interop.to_arrow(got)
+    pa_col = datetime_column.to_arrow()
+    pa_got = got.to_arrow()
     expect = pc.quarter(pa_col).cast(pa_got.type)
     assert_column_eq(expect, got)
 
@@ -190,7 +190,7 @@ def days_in_month(dates):
         ]
 
     got = plc.datetime.days_in_month(datetime_column)
-    pa_col = plc.interop.to_arrow(datetime_column)
-    pa_got = plc.interop.to_arrow(got)
+    pa_col = datetime_column.to_arrow()
+    pa_got = got.to_arrow()
     expect = pa.array(days_in_month(pa_col), type=pa_got.type)
     assert_column_eq(expect, got)
diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py
index b1a6e9f2c66..d10f5a150ed 100644
--- a/python/pylibcudf/tests/test_interop.py
+++ b/python/pylibcudf/tests/test_interop.py
@@ -22,11 +22,9 @@ def test_list_dtype_roundtrip():
     assert plc_type == plc.types.DataType(plc.types.TypeId.LIST)
 
     with pytest.raises(ValueError):
-        plc.interop.to_arrow(plc_type)
+        plc_type.to_arrow()
 
-    arrow_type = plc.interop.to_arrow(
-        plc_type, value_type=list_type.value_type
-    )
+    arrow_type = plc_type.to_arrow(value_type=list_type.value_type)
     assert arrow_type == list_type
 
 
@@ -37,19 +35,18 @@ def test_struct_dtype_roundtrip():
     assert plc_type == plc.types.DataType(plc.types.TypeId.STRUCT)
 
     with pytest.raises(ValueError):
-        plc.interop.to_arrow(plc_type)
+        plc_type.to_arrow()
 
-    arrow_type = plc.interop.to_arrow(
-        plc_type,
+    arrow_type = plc_type.to_arrow(
         fields=[struct_type.field(i) for i in range(struct_type.num_fields)],
     )
     assert arrow_type == struct_type
 
 
 def test_table_with_nested_dtype_to_arrow():
-    pa_array = pa.array([[{"": 1}]])
-    plc_table = plc.Table([plc.Column.from_arrow(pa_array)])
-    result = plc.interop.to_arrow(plc_table)
+    result = plc.Table(
+        [plc.Column.from_arrow(pa.array([[{"": 1}]]))]
+    ).to_arrow()
     expected_schema = pa.schema(
         [
             pa.field(
@@ -75,11 +72,9 @@ def test_decimal128_roundtrip():
     assert plc_type.id() == plc.types.TypeId.DECIMAL128
 
     with pytest.raises(ValueError):
-        plc.interop.to_arrow(plc_type)
+        plc_type.to_arrow()
 
-    arrow_type = plc.interop.to_arrow(
-        plc_type, precision=decimal_type.precision
-    )
+    arrow_type = plc_type.to_arrow(precision=decimal_type.precision)
     assert arrow_type == decimal_type
 
 
@@ -94,9 +89,9 @@ def test_decimal_other(data_type):
     precision = 3
 
     with pytest.raises(ValueError):
-        plc.interop.to_arrow(data_type)
+        data_type.to_arrow()
 
-    arrow_type = plc.interop.to_arrow(data_type, precision=precision)
+    arrow_type = data_type.to_arrow(precision=precision)
     assert arrow_type == pa.decimal128(precision, 0)
 
 
@@ -121,8 +116,8 @@ def test_decimal_respect_metadata_precision(plc_type, request):
     plc_column = plc.unary.cast(
         plc.Column.from_arrow(expected), plc.DataType(plc_type, scale=-scale)
     )
-    result = plc.interop.to_arrow(
-        plc_column, metadata=plc.interop.ColumnMetadata(precision=precision)
+    result = plc_column.to_arrow(
+        metadata=plc.interop.ColumnMetadata(precision=precision)
     )
     if parse(pa.__version__) >= parse("19.0.0"):
         if plc_type == plc.TypeId.DECIMAL64:
@@ -143,8 +138,7 @@ def test_decimal_precision_metadata_out_of_range(precision):
         plc.DataType(plc.TypeId.DECIMAL128, scale=-scale),
     )
     with pytest.raises(TypeError):
-        plc.interop.to_arrow(
-            plc_column,
+        plc_column.to_arrow(
             metadata=plc.interop.ColumnMetadata(precision=precision),
         )
 
diff --git a/python/pylibcudf/tests/test_join.py b/python/pylibcudf/tests/test_join.py
index cc59ab6d266..8bd0085ec2e 100644
--- a/python/pylibcudf/tests/test_join.py
+++ b/python/pylibcudf/tests/test_join.py
@@ -79,7 +79,9 @@ def test_conditional_join(
     pleft = plc.Table.from_arrow(left)
     pright = plc.Table.from_arrow(right)
 
-    g_left, g_right = map(plc.interop.to_arrow, join_type(pleft, pright, expr))
+    g_left, g_right = map(
+        lambda self: self.to_arrow(), join_type(pleft, pright, expr)
+    )
 
     assert set(g_left.to_pylist()) == expect_left
     assert set(g_right.to_pylist()) == expect_right
@@ -97,7 +99,7 @@ def test_conditional_semianti_join(left, right, expr, join_type, expect):
     pleft = plc.Table.from_arrow(left)
     pright = plc.Table.from_arrow(right)
 
-    g_left = plc.interop.to_arrow(join_type(pleft, pright, expr))
+    g_left = join_type(pleft, pright, expr).to_arrow()
 
     assert set(g_left.to_pylist()) == expect
 
@@ -127,7 +129,7 @@ def test_mixed_join(
     pright = plc.Table.from_arrow(right)
 
     g_left, g_right = map(
-        plc.interop.to_arrow,
+        lambda self: self.to_arrow(),
         join_type(
             plc.Table(pleft.columns()[1:]),
             plc.Table(pright.columns()[1:]),
@@ -161,15 +163,13 @@ def test_mixed_semianti_join(
     pleft = plc.Table.from_arrow(left)
     pright = plc.Table.from_arrow(right)
 
-    g_left = plc.interop.to_arrow(
-        join_type(
-            plc.Table(pleft.columns()[1:]),
-            plc.Table(pright.columns()[1:]),
-            pleft,
-            pright,
-            expr,
-            null_equality,
-        )
-    )
+    g_left = join_type(
+        plc.Table(pleft.columns()[1:]),
+        plc.Table(pright.columns()[1:]),
+        pleft,
+        pright,
+        expr,
+        null_equality,
+    ).to_arrow()
 
     assert set(g_left.to_pylist()) == expect
diff --git a/python/pylibcudf/tests/test_labeling.py b/python/pylibcudf/tests/test_labeling.py
index 531e820bd32..7f9c95b41ef 100644
--- a/python/pylibcudf/tests/test_labeling.py
+++ b/python/pylibcudf/tests/test_labeling.py
@@ -16,11 +16,9 @@ def test_label_bins(left_inclusive, right_inclusive):
     in_col = plc.Column.from_arrow(pa.array([1, 2, 3]))
     left_edges = plc.Column.from_arrow(pa.array([0, 5]))
     right_edges = plc.Column.from_arrow(pa.array([4, 6]))
-    result = plc.interop.to_arrow(
-        plc.labeling.label_bins(
-            in_col, left_edges, left_inclusive, right_edges, right_inclusive
-        )
-    )
+    result = plc.labeling.label_bins(
+        in_col, left_edges, left_inclusive, right_edges, right_inclusive
+    ).to_arrow()
     expected = pa.array([0, 0, 0], type=pa.int32())
     assert result.equals(expected)
 
diff --git a/python/pylibcudf/tests/test_nvtext_generate_ngrams.py b/python/pylibcudf/tests/test_nvtext_generate_ngrams.py
index ffa4b89423c..f04ac835f75 100644
--- a/python/pylibcudf/tests/test_nvtext_generate_ngrams.py
+++ b/python/pylibcudf/tests/test_nvtext_generate_ngrams.py
@@ -42,10 +42,9 @@ def test_generate_character_ngrams(input_col, ngram):
 @pytest.mark.parametrize("ngram", [2, 3])
 @pytest.mark.parametrize("seed", [0, 3])
 def test_hash_character_ngrams(input_col, ngram, seed):
-    result = plc.nvtext.generate_ngrams.hash_character_ngrams(
+    pa_result = plc.nvtext.generate_ngrams.hash_character_ngrams(
         plc.Column.from_arrow(input_col), ngram, seed
-    )
-    pa_result = plc.interop.to_arrow(result)
+    ).to_arrow()
     assert all(
         len(got) == max(0, len(s.as_py()) - ngram + 1)
         for got, s in zip(pa_result, input_col, strict=True)
diff --git a/python/pylibcudf/tests/test_nvtext_minhash.py b/python/pylibcudf/tests/test_nvtext_minhash.py
index bc6b569dbc6..ace4c60e1ba 100644
--- a/python/pylibcudf/tests/test_nvtext_minhash.py
+++ b/python/pylibcudf/tests/test_nvtext_minhash.py
@@ -21,14 +21,13 @@ def test_minhash(minhash_input_data, width):
         if seed_type == pa.uint32()
         else plc.nvtext.minhash.minhash64
     )
-    result = minhash_func(
+    pa_result = minhash_func(
         plc.Column.from_arrow(input_arr),
         0,
         plc.Column.from_arrow(seeds),
         plc.Column.from_arrow(seeds),
         width,
-    )
-    pa_result = plc.interop.to_arrow(result)
+    ).to_arrow()
     assert all(
         len(got) == len(seeds)
         for got, s in zip(pa_result, input_arr, strict=True)
@@ -70,14 +69,13 @@ def test_minhash_ngrams(minhash_ngrams_input_data, ngrams):
         if seed_type == pa.uint32()
         else plc.nvtext.minhash.minhash64_ngrams
     )
-    result = minhash_func(
+    pa_result = minhash_func(
         plc.Column.from_arrow(input_arr),
         ngrams,
         0,
         plc.Column.from_arrow(ab),
         plc.Column.from_arrow(ab),
-    )
-    pa_result = plc.interop.to_arrow(result)
+    ).to_arrow()
     assert all(
         len(got) == len(ab)
         for got, s in zip(pa_result, input_arr, strict=True)
diff --git a/python/pylibcudf/tests/test_nvtext_subword_tokenize.py b/python/pylibcudf/tests/test_nvtext_subword_tokenize.py
index 09030588632..c438a37535a 100644
--- a/python/pylibcudf/tests/test_nvtext_subword_tokenize.py
+++ b/python/pylibcudf/tests/test_nvtext_subword_tokenize.py
@@ -23,8 +23,8 @@ def test_wordpiece_tokenize(max_words):
     got = plc.nvtext.wordpiece_tokenize.wordpiece_tokenize(
         plc.Column.from_arrow(strings_col), vocab, max_words
     )
-    expect_type = plc.interop.to_arrow(
-        got.type(), value_type=pa.list_(pa.int32()).value_type
+    expect_type = got.type().to_arrow(
+        value_type=pa.list_(pa.int32()).value_type
     )
     if max_words == 5:
         expect = pa.array(
diff --git a/python/pylibcudf/tests/test_nvtext_tokenize.py b/python/pylibcudf/tests/test_nvtext_tokenize.py
index 9f22dc514de..b2617f2cee4 100644
--- a/python/pylibcudf/tests/test_nvtext_tokenize.py
+++ b/python/pylibcudf/tests/test_nvtext_tokenize.py
@@ -88,8 +88,8 @@ def test_tokenize_with_vocabulary(input_col, default_id):
         plc.Scalar.from_arrow(pa.scalar(" ")),
         default_id,
     )
-    expect_type = plc.interop.to_arrow(
-        got.type(), value_type=pa.list_(pa.int32()).value_type
+    expect_type = got.type().to_arrow(
+        value_type=pa.list_(pa.int32()).value_type
     )
     if default_id == -1:
         expect = pa.array([[0], [-1, -1], [2]], type=expect_type)
diff --git a/python/pylibcudf/tests/test_quantiles.py b/python/pylibcudf/tests/test_quantiles.py
index 1c72637fd2d..5af60efb15e 100644
--- a/python/pylibcudf/tests/test_quantiles.py
+++ b/python/pylibcudf/tests/test_quantiles.py
@@ -174,7 +174,7 @@ def test_quantiles(
             "interp cannot be an arithmetic interpolation strategy for quantiles"
         )
 
-    pa_tbl_data = plc.interop.to_arrow(plc_tbl_data, ["a", "b"])
+    pa_tbl_data = plc_tbl_data.to_arrow(["a", "b"])
 
     expect = _pyarrow_quantiles(
         pa_tbl_data,
@@ -227,6 +227,6 @@ def test_quantile_q_array_like(col_data, q):
 )
 def test_quantiles_q_array_like(plc_tbl_data, q):
     got = plc.quantiles.quantiles(plc_tbl_data, q=q)
-    pa_tbl_data = plc.interop.to_arrow(plc_tbl_data, ["a", "b"])
+    pa_tbl_data = plc_tbl_data.to_arrow(["a", "b"])
     expect = _pyarrow_quantiles(pa_tbl_data, q=q)
     assert_table_eq(expect, got)
diff --git a/python/pylibcudf/tests/test_reshape.py b/python/pylibcudf/tests/test_reshape.py
index 9ee7e389335..c378812d28c 100644
--- a/python/pylibcudf/tests/test_reshape.py
+++ b/python/pylibcudf/tests/test_reshape.py
@@ -32,10 +32,9 @@ def test_tile(reshape_data, cnt):
     raw_data, reshape_plc_tbl = reshape_data
     got = plc.reshape.tile(reshape_plc_tbl, cnt)
 
-    tiled_data = [pa.array(col * cnt) for col in raw_data]
-
     expect = pa.Table.from_arrays(
-        tiled_data, schema=plc.interop.to_arrow(reshape_plc_tbl).schema
+        [pa.array(col * cnt) for col in raw_data],
+        schema=reshape_plc_tbl.to_arrow().schema,
     )
 
     assert_table_eq(expect, got)
diff --git a/python/pylibcudf/tests/test_rolling.py b/python/pylibcudf/tests/test_rolling.py
index 9f44dbf0985..747255ba451 100644
--- a/python/pylibcudf/tests/test_rolling.py
+++ b/python/pylibcudf/tests/test_rolling.py
@@ -142,14 +142,14 @@ def expect(
         preceding_endpoint,
         (plc.rolling.BoundedClosed, plc.rolling.BoundedOpen),
     ):
-        prec_delta = plc.interop.to_arrow(preceding_endpoint.delta).as_py()
+        prec_delta = preceding_endpoint.delta.to_arrow().as_py()
     else:
         prec_delta = 0
     if isinstance(
         following_endpoint,
         (plc.rolling.BoundedClosed, plc.rolling.BoundedOpen),
     ):
-        foll_delta = plc.interop.to_arrow(following_endpoint.delta).as_py()
+        foll_delta = following_endpoint.delta.to_arrow().as_py()
     else:
         foll_delta = 0
     if sort_order == plc.types.Order.ASCENDING:
diff --git a/python/pylibcudf/tests/test_round.py b/python/pylibcudf/tests/test_round.py
index bc919500a09..8f25685cb07 100644
--- a/python/pylibcudf/tests/test_round.py
+++ b/python/pylibcudf/tests/test_round.py
@@ -27,6 +27,6 @@ def test_round(column, round_mode, decimals):
         "half_to_even": plc.round.RoundingMethod.HALF_EVEN,
     }[round_mode]
     got = plc.round.round(column, decimals, method)
-    expect = pc.round(plc.interop.to_arrow(column), decimals, round_mode)
+    expect = pc.round(column.to_arrow(), decimals, round_mode)
 
     assert_column_eq(expect, got)
diff --git a/python/pylibcudf/tests/test_scalar.py b/python/pylibcudf/tests/test_scalar.py
index 748724cf069..01d5d85ab04 100644
--- a/python/pylibcudf/tests/test_scalar.py
+++ b/python/pylibcudf/tests/test_scalar.py
@@ -45,13 +45,15 @@ def test_from_py(py_scalar):
         # instead of to_arrow
         assert result.to_py() == expected.as_py()
     else:
-        assert plc.interop.to_arrow(result).equals(expected)
+        assert result.to_arrow().equals(expected)
     if isinstance(py_scalar, decimal.Decimal):
-        assert plc.interop.to_arrow(
-            result.type(), precision=len(py_scalar.as_tuple().digits)
-        ).equals(expected.type)
+        assert (
+            result.type()
+            .to_arrow(precision=len(py_scalar.as_tuple().digits))
+            .equals(expected.type)
+        )
     else:
-        assert plc.interop.to_arrow(result.type()).equals(expected.type)
+        assert result.type().to_arrow().equals(expected.type)
 
 
 def test_to_py_none():
@@ -97,8 +99,8 @@ def test_to_py(py_scalar):
 def test_from_py_with_dtype(val, tid):
     dtype = DataType(tid)
     result = plc.Scalar.from_py(val, dtype)
-    expected = pa.scalar(val).cast(plc.interop.to_arrow(dtype))
-    assert plc.interop.to_arrow(result).equals(expected)
+    expected = pa.scalar(val).cast(dtype.to_arrow())
+    assert result.to_arrow().equals(expected)
 
 
 @pytest.mark.parametrize(
@@ -216,7 +218,7 @@ def test_from_py_none_no_type_raises():
 def test_from_py_none():
     result = plc.Scalar.from_py(None, plc.DataType(plc.TypeId.STRING))
     expected = pa.scalar(None, type=pa.string())
-    assert plc.interop.to_arrow(result).equals(expected)
+    assert result.to_arrow().equals(expected)
 
 
 @pytest.mark.parametrize(
@@ -241,7 +243,7 @@ def test_from_numpy(np, np_type):
     np_val = np_klass("1" if np_type == "str_" else 1)
     result = plc.Scalar.from_numpy(np_val)
     expected = pa.scalar(np_val)
-    assert plc.interop.to_arrow(result).equals(expected)
+    assert result.to_arrow().equals(expected)
 
 
 @pytest.mark.parametrize("np_type", ["datetime64", "timedelta64"])
@@ -266,7 +268,7 @@ def test_round_trip_scalar_through_column(py_scalar):
         # instead of to_arrow
         assert result.to_py() == expected.as_py()
     else:
-        assert plc.interop.to_arrow(result).equals(expected)
+        assert result.to_arrow().equals(expected)
 
 
 def test_non_constant_column_to_scalar_raises():
diff --git a/python/pylibcudf/tests/test_string_extract.py b/python/pylibcudf/tests/test_string_extract.py
index 49fbea6d43e..87fe7532e03 100644
--- a/python/pylibcudf/tests/test_string_extract.py
+++ b/python/pylibcudf/tests/test_string_extract.py
@@ -10,13 +10,12 @@ def test_extract():
     pattern = "([ab])(\\d)"
     pa_pattern = "(?P<letter>[ab])(?P<digit>\\d)"
     arr = pa.array(["a1", "b2", "c3"])
-    plc_result = plc.strings.extract.extract(
+    result = plc.strings.extract.extract(
         plc.Column.from_arrow(arr),
         plc.strings.regex_program.RegexProgram.create(
             pattern, plc.strings.regex_flags.RegexFlags.DEFAULT
         ),
-    )
-    result = plc.interop.to_arrow(plc_result)
+    ).to_arrow()
     expected = pc.extract_regex(arr, pa_pattern)
     for i, result_col in enumerate(result.itercolumns()):
         expected_col = pa.chunked_array(expected.field(i))
@@ -26,12 +25,11 @@ def test_extract():
 def test_extract_all_record():
     pattern = "([ab])(\\d)"
     arr = pa.array(["a1", "b2", "c3"])
-    plc_result = plc.strings.extract.extract_all_record(
+    result = plc.strings.extract.extract_all_record(
         plc.Column.from_arrow(arr),
         plc.strings.regex_program.RegexProgram.create(
             pattern, plc.strings.regex_flags.RegexFlags.DEFAULT
         ),
-    )
-    result = plc.interop.to_arrow(plc_result)
+    ).to_arrow()
     expected = pa.array([["a", "1"], ["b", "2"], None], type=result.type)
     assert result.equals(expected)
diff --git a/python/pylibcudf/tests/test_string_findall.py b/python/pylibcudf/tests/test_string_findall.py
index 2fe22fb82fd..5b57d42865c 100644
--- a/python/pylibcudf/tests/test_string_findall.py
+++ b/python/pylibcudf/tests/test_string_findall.py
@@ -18,7 +18,7 @@ def test_findall():
     )
     expect = pa.array(
         [re.findall(pattern, elem) for elem in arr.to_pylist()],
-        type=plc.interop.to_arrow(got.type(), value_type=pa.string()),
+        type=got.type().to_arrow(value_type=pa.string()),
     )
     assert_column_eq(expect, got)
 
@@ -34,6 +34,6 @@ def test_find_re():
     )
     expect = pa.array(
         [0, 2, 3, -1],
-        type=plc.interop.to_arrow(got.type(), value_type=pa.string()),
+        type=got.type().to_arrow(value_type=pa.string()),
     )
     assert_column_eq(expect, got)
diff --git a/python/pylibcudf/tests/test_transpose.py b/python/pylibcudf/tests/test_transpose.py
index dd8e45be97b..e81006bf2de 100644
--- a/python/pylibcudf/tests/test_transpose.py
+++ b/python/pylibcudf/tests/test_transpose.py
@@ -26,11 +26,10 @@ def test_transpose(arr):
     arrow_tbl = pa.table(data)
     plc_tbl = plc.Table.from_arrow(arrow_tbl)
     got = plc.transpose.transpose(plc_tbl)
-    pa_got = plc.interop.to_arrow(got)
     expect = pa.table(
         pa.Table.from_pandas(
             arrow_tbl.to_pandas().T, preserve_index=False
         ).rename_columns([""] * len(arr)),
-        schema=pa_got.schema,
+        schema=got.to_arrow().schema,
     )
     assert_table_eq(expect, got)

From b19da6e100c93c300121f2345a0c147cbd7e8fd8 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Thu, 18 Sep 2025 19:48:23 -0400
Subject: [PATCH 335/366] Remote IO support in cudf-polars (#19921)

- Closes https://github.com/rapidsai/cudf/issues/19633
- Depends on #19788

Authors:
  - Matthew Murray (https://github.com/Matt711)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/19921
---
 .../all_cuda-129_arch-aarch64.yaml            |   1 +
 .../all_cuda-129_arch-x86_64.yaml             |   1 +
 .../all_cuda-130_arch-aarch64.yaml            |   1 +
 .../all_cuda-130_arch-x86_64.yaml             |   1 +
 dependencies.yaml                             |   2 +
 python/cudf_polars/cudf_polars/dsl/ir.py      |  22 +++-
 python/cudf_polars/pyproject.toml             |   1 +
 python/cudf_polars/tests/test_scan.py         | 116 ++++++++++++++++++
 python/pylibcudf/pylibcudf/io/types.pyx       |  12 +-
 9 files changed, 149 insertions(+), 8 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index b197f829d19..7a603a29ec9 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -74,6 +74,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
+- pytest-httpserver
 - pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index c342aa87116..40ed1cbbc6b 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -75,6 +75,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
+- pytest-httpserver
 - pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index 6666010fad0..e58e93e1aa6 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -74,6 +74,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
+- pytest-httpserver
 - pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 19745ad1df5..0d4dc2b2c2d 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -75,6 +75,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cases>=3.8.2
 - pytest-cov
+- pytest-httpserver
 - pytest-rerunfailures!=16.0.0
 - pytest-xdist
 - python-confluent-kafka>=2.8.0,<2.9.0a0
diff --git a/dependencies.yaml b/dependencies.yaml
index 7b51525cc94..0e96a48ad86 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -97,6 +97,7 @@ files:
       - test_python_common
       - test_python_cudf_common
       - test_python_pylibcudf
+      - test_python_cudf_polars
       - depends_on_cudf
       - depends_on_dask_cuda
       - depends_on_pylibcudf
@@ -891,6 +892,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - rich
+          - pytest-httpserver
   test_python_narwhals:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index e7688d07f2e..d8d5472d49d 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -325,18 +325,30 @@ def __init__(
             raise NotImplementedError(
                 "Read from cloud storage"
             )  # pragma: no cover; no test yet
-        if any(
-            str(p).startswith("https:/" if POLARS_VERSION_LT_131 else "https://")
-            for p in self.paths
-        ):
+        if (
+            any(str(p).startswith("https:/") for p in self.paths)
+            and POLARS_VERSION_LT_131
+        ):  # pragma: no cover; polars passed us the wrong URI
+            # https://github.com/pola-rs/polars/issues/22766
             raise NotImplementedError("Read from https")
         if any(
             str(p).startswith("file:/" if POLARS_VERSION_LT_131 else "file://")
             for p in self.paths
         ):
-            # TODO: removing the file:// may work
             raise NotImplementedError("Read from file URI")
         if self.typ == "csv":
+            if any(plc.io.SourceInfo._is_remote_uri(p) for p in self.paths):
+                # This works fine when the file has no leading blank lines,
+                # but currently we do some file introspection
+                # to skip blanks before parsing the header.
+                # For remote files we cannot determine if leading blank lines
+                # exist, so we're punting on CSV support.
+                # TODO: Once the CSV reader supports skipping leading
+                # blank lines natively, we can remove this guard.
+                raise NotImplementedError(
+                    "Reading CSV from remote is not yet supported"
+                )
+
             if self.reader_options["skip_rows_after_header"] != 0:
                 raise NotImplementedError("Skipping rows after header in CSV reader")
             parse_options = self.reader_options["parse_options"]
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index df0611348bb..cea7240d287 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -43,6 +43,7 @@ test = [
     "numpy>=1.23,<3.0a0",
     "pytest",
     "pytest-cov",
+    "pytest-httpserver",
     "pytest-xdist",
     "rich",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index 7510fe833be..cd6cbf3d981 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -6,6 +6,7 @@
 from typing import TYPE_CHECKING
 
 import pytest
+from werkzeug import Response
 
 import polars as pl
 
@@ -14,10 +15,14 @@
     assert_ir_translation_raises,
 )
 from cudf_polars.testing.io import make_partitioned_source
+from cudf_polars.utils.versions import POLARS_VERSION_LT_131
 
 if TYPE_CHECKING:
     from pathlib import Path
 
+    from pytest_httpserver import HTTPServer
+    from werkzeug import Request
+
 
 NO_CHUNK_ENGINE = pl.GPUEngine(raise_on_fail=True, parquet_options={"chunked": False})
 
@@ -487,3 +492,114 @@ def test_scan_from_file_uri(tmp_path: Path) -> None:
     df.write_parquet(path)
     q = pl.scan_parquet(f"file://{path}")
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+@pytest.mark.parametrize("chunked", [False, True])
+def test_scan_parquet_remote(
+    request, tmp_path: Path, df: pl.DataFrame, httpserver: HTTPServer, *, chunked: bool
+) -> None:
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=POLARS_VERSION_LT_131,
+            reason="remote IO not supported",
+        )
+    )
+    path = tmp_path / "foo.parquet"
+    df.write_parquet(path)
+    bytes_ = path.read_bytes()
+    size = len(bytes_)
+
+    def head_handler(_: Request) -> Response:
+        return Response(
+            status=200,
+            headers={
+                "Content-Type": "parquet",
+                "Accept-Ranges": "bytes",
+                "Content-Length": size,
+            },
+        )
+
+    def get_handler(req: Request) -> Response:
+        # parse bytes=200-500 for example (the actual data)
+        rng = req.headers.get("Range")
+        if rng and rng.startswith("bytes="):
+            start, end = map(int, req.headers["Range"][6:].split("-"))
+            mv = memoryview(bytes_)[start : end + 1]
+            return Response(
+                mv.tobytes(),
+                status=206,
+                headers={
+                    "Content-Type": "parquet",
+                    "Accept-Ranges": "bytes",
+                    "Content-Length": len(mv),
+                    "Content-Range": f"bytes {start}-{end}/{size}",
+                },
+            )
+        return Response(
+            bytes_,
+            status=200,
+            headers={
+                "Content-Type": "parquet",
+                "Accept-Ranges": "bytes",
+                "Content-Length": size,
+            },
+        )
+
+    server_path = "/foo.parquet"
+    httpserver.expect_request(server_path, method="HEAD").respond_with_handler(
+        head_handler
+    )
+    httpserver.expect_request(server_path, method="GET").respond_with_handler(
+        get_handler
+    )
+
+    q = pl.scan_parquet(httpserver.url_for(server_path))
+
+    assert_gpu_result_equal(
+        q, engine=pl.GPUEngine(raise_on_fail=True, parquet_options={"chunked": chunked})
+    )
+
+
+def test_scan_ndjson_remote(
+    request, tmp_path: Path, df: pl.LazyFrame, httpserver: HTTPServer
+) -> None:
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=POLARS_VERSION_LT_131,
+            reason="remote IO not supported",
+        )
+    )
+    path = tmp_path / "foo.jsonl"
+    df.write_ndjson(path)
+    bytes_ = path.read_bytes()
+    size = len(bytes_)
+
+    def head_handler(_: Request) -> Response:
+        return Response(
+            status=200,
+            headers={
+                "Content-Type": "ndjson",
+                "Content-Length": size,
+            },
+        )
+
+    def get_handler(_: Request) -> Response:
+        return Response(
+            bytes_,
+            status=200,
+            headers={
+                "Content-Type": "ndjson",
+                "Content-Length": size,
+            },
+        )
+
+    server_path = "/foo.jsonl"
+    httpserver.expect_request(server_path, method="HEAD").respond_with_handler(
+        head_handler
+    )
+    httpserver.expect_request(server_path, method="GET").respond_with_handler(
+        get_handler
+    )
+
+    q = pl.scan_ndjson(httpserver.url_for(server_path))
+    assert_gpu_result_equal(q)
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index f37982a47b4..d0671c86402 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -301,6 +301,7 @@ cdef class TableInputMetadata:
             for i in range(self.c_obj.column_metadata.size())
         ]
 
+
 cdef class TableWithMetadata:
     """A container holding a table and its associated metadata
     (e.g. column names)
@@ -467,8 +468,6 @@ cdef class SourceInfo:
         A homogeneous list of sources to read from. Mixing
         different types of sources will raise a `ValueError`.
     """
-    # Regular expression that match remote file paths supported by libcudf
-    _is_remote_file_pattern = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", re.IGNORECASE)
 
     def __init__(self, list sources):
         if not sources:
@@ -483,7 +482,7 @@ cdef class SourceInfo:
             for src in sources:
                 if not isinstance(src, (os.PathLike, str)):
                     raise ValueError("All sources must be of the same type!")
-                if not (os.path.isfile(src) or self._is_remote_file_pattern.match(src)):
+                if not (os.path.isfile(src) or SourceInfo._is_remote_uri(src)):
                     raise FileNotFoundError(
                         errno.ENOENT, os.strerror(errno.ENOENT), src
                     )
@@ -538,6 +537,13 @@ cdef class SourceInfo:
 
         self.c_obj = source_info(host_span[host_span[const_byte]](self._hspans))
 
+    @staticmethod
+    def _is_remote_uri(path: str | os.PathLike) -> bool:
+        # Regular expression that match remote file paths supported by libcudf
+        return re.compile(
+            r"^[a-zA-Z][a-zA-Z0-9+.\-]*://", re.IGNORECASE
+        ).match(str(path)) is not None
+
     def _init_byte_like_sources(self, list sources, type expected_type):
         cdef const unsigned char[::1] c_buffer
         cdef bint empty_buffer = True

From 11ae2105752aa3874cfcdb52da9dc00a3497468b Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 18 Sep 2025 21:45:57 -0500
Subject: [PATCH 336/366] Fix `value_counts` missing `nan` bug (#20026)

Fixes: #20024

This PR fixes a bug where `dropna=True` and `nan's` are not dropped from `value_counts` result.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20026
---
 python/cudf/cudf/core/series.py               |  3 +
 .../cudf/pandas/scripts/conftest-patch.py     | 70 -------------------
 2 files changed, 3 insertions(+), 70 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index a8451bbe7e8..0c9ceb3bcf6 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3117,6 +3117,9 @@ def value_counts(
             res = res[res.index.notna()]
         else:
             res = self.groupby(self, dropna=dropna).count(dropna=dropna)
+            if dropna:
+                res = res[res.index.notna()]
+
             if isinstance(self.dtype, CategoricalDtype) and len(res) != len(
                 self.dtype.categories
             ):
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index ee57eb6ca0b..d82d5585d53 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -1536,74 +1536,6 @@ def pytest_unconfigure(config):
     "tests/base/test_value_counts.py::test_value_counts_null[categorical-None]",
     "tests/base/test_value_counts.py::test_value_counts_bins[index]",
     "tests/base/test_value_counts.py::test_value_counts_bins[series]",
-    "tests/base/test_value_counts.py::test_value_counts_null[float32-series-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[float32-series-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-bool-dtype-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-bool-dtype-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-bool-object-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-bool-object-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-categorical-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-categorical-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-complex128-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-complex128-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-complex64-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-complex64-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-datetime-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-datetime-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-datetime-tz-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-datetime-tz-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-float32-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-float32-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-float64-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-float64-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int16-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int16-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int32-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int32-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int64-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int64-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int8-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-int8-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-interval-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-interval-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-mi-with-dt64tz-level-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-mi-with-dt64tz-level-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-multi-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-multi-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_bool-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_bool-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_float-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_float-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_int-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_int-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_uint-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-nullable_uint-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-object-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-object-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-period-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-period-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-range-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-range-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-repeats-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-repeats-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-string-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-string-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-string-pyarrow-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-string-pyarrow-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-string-python-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-string-python-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-timedelta-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-timedelta-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-tuples-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-tuples-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint16-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint16-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint32-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint32-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint64-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint64-index-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint8-index-None]",
-    "tests/base/test_value_counts.py::test_value_counts_null[series-with-uint8-index-nan]",
     "tests/base/test_value_counts.py::test_value_counts_timedelta64[index-ms]",
     "tests/base/test_value_counts.py::test_value_counts_timedelta64[index-ns]",
     "tests/base/test_value_counts.py::test_value_counts_timedelta64[index-s]",
@@ -3832,7 +3764,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-frame-index3]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-series-index2]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_unstack[float-series-index3]",
-    "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_value_counts[float-data_missing-True]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_value_counts_with_normalize[object]",
     "tests/extension/test_period.py::Test2DCompat::test_copy_order[2D]",
     "tests/extension/test_period.py::Test2DCompat::test_copy_order[D]",
@@ -12628,7 +12559,6 @@ def pytest_unconfigure(config):
     "tests/test_algos.py::TestUnique::test_timedelta64_dtype_array_returned",
     "tests/test_algos.py::TestValueCounts::test_categorical_nans",
     "tests/test_algos.py::TestValueCounts::test_value_counts_dropna",
-    "tests/test_algos.py::TestValueCounts::test_value_counts_normalized[float64]",
     "tests/test_common.py::test_serializable[obj0]",
     "tests/test_common.py::test_standardize_mapping",
     "tests/test_common.py::test_temp_setattr[False]",

From ac75eca0dab1b200adb7e5f65e7f3502d753a610 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 18 Sep 2025 19:56:58 -0700
Subject: [PATCH 337/366] xfail(strict=False) test_scan_hf_url_raises due to
 rate limiting (#20027)

It appears this test actually makes a network request to Hugging Face via Polars which can fail if Hugging Face rate limits us e.g. https://github.com/rapidsai/cudf/actions/runs/17837593656/job/50721165949?pr=19996#step:11:2692

In the future, this test should probably mock the network call as to also be polite to HF.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/20027
---
 python/cudf_polars/tests/test_scan.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index cd6cbf3d981..a2c12205e69 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -422,6 +422,11 @@ def test_scan_parquet_chunked(
     )
 
 
+@pytest.mark.xfail(
+    raises=pl.exceptions.ComputeError,
+    reason="Rate limited by Hugging Face",
+    strict=False,
+)
 def test_scan_hf_url_raises():
     q = pl.scan_csv("hf://datasets/scikit-learn/iris/Iris.csv")
     assert_ir_translation_raises(q, NotImplementedError)

From 3c7e34888a4b96c6e36f6b0f65ed1862b671388c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 18 Sep 2025 20:24:11 -0700
Subject: [PATCH 338/366] Keep mr alive along with arrow tables and columns
 (#20028)

When we ingest data via the Arrow C Data Interface in Python and produce our own data structures, these data structures will use rmm's current device resource to allocate memory. If that resource goes out of scope later, there is currently no link to keep them alive. This issue is the same reason that rmm's DeviceBuffer class in Python [stores the current mr](https://github.com/rapidsai/rmm/blob/de85ed361daa9868df15f62aff257e0cef291843/python/rmm/rmm/pylibrmm/device_buffer.pyx#L94). Now that our Arrow interop code no longer creates those DeviceBuffers, we need similar protections here. With this fix there is still some small potential for issues due to #14229, but that will be fixed soon enough with the work on #15170.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/20028
---
 python/pylibcudf/pylibcudf/column.pyx  |  9 ++++++++-
 python/pylibcudf/pylibcudf/table.pyx   |  8 +++++++-
 python/pylibcudf/tests/test_interop.py | 24 ++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 034f001848a..e7e936554e3 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -36,7 +36,10 @@ from pylibcudf.libcudf.copying cimport get_element
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 from rmm.pylibrmm.stream cimport Stream
-from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.memory_resource cimport (
+    DeviceMemoryResource,
+    get_current_device_resource,
+)
 
 from .gpumemoryview cimport gpumemoryview
 from .filling cimport sequence
@@ -86,6 +89,7 @@ cdef is_iterable(obj):
 cdef class _ArrowColumnHolder:
     """A holder for an Arrow column for gpumemoryview lifetime management."""
     cdef unique_ptr[arrow_column] col
+    cdef DeviceMemoryResource mr
 
 
 cdef class OwnerWithCAI:
@@ -433,6 +437,7 @@ cdef class Column:
             )
 
             result = _ArrowColumnHolder()
+            result.mr = get_current_device_resource()
             with nogil:
                 c_result = make_unique[arrow_column](
                     move(dereference(c_schema)),
@@ -448,6 +453,7 @@ cdef class Column:
             c_array = <ArrowArray*>PyCapsule_GetPointer(h_array, "arrow_array")
 
             result = _ArrowColumnHolder()
+            result.mr = get_current_device_resource()
             with nogil:
                 c_result = make_unique[arrow_column](
                     move(dereference(c_schema)),
@@ -467,6 +473,7 @@ cdef class Column:
             )
 
             result = _ArrowColumnHolder()
+            result.mr = get_current_device_resource()
             with nogil:
                 c_result = make_unique[arrow_column](
                     move(dereference(c_arrow_stream)), stream.view()
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index 0576b2b7158..23bff6e4b83 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -12,7 +12,10 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from rmm.pylibrmm.stream cimport Stream
-from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.memory_resource cimport (
+    DeviceMemoryResource,
+    get_current_device_resource,
+)
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.interop cimport (
@@ -53,6 +56,7 @@ __all__ = ["Table"]
 cdef class _ArrowTableHolder:
     """A holder for an Arrow table for gpumemoryview lifetime management."""
     cdef unique_ptr[arrow_table] tbl
+    cdef DeviceMemoryResource mr
 
 
 cdef class Table:
@@ -147,6 +151,7 @@ cdef class Table:
             )
 
             result = _ArrowTableHolder()
+            result.mr = get_current_device_resource()
             with nogil:
                 c_result = make_unique[arrow_table](
                     move(dereference(c_schema)), move(dereference(c_array))
@@ -161,6 +166,7 @@ cdef class Table:
             )
 
             result = _ArrowTableHolder()
+            result.mr = get_current_device_resource()
             with nogil:
                 c_result = make_unique[arrow_table](move(dereference(c_stream)))
             result.tbl.swap(c_result)
diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py
index d10f5a150ed..ce3f7787534 100644
--- a/python/pylibcudf/tests/test_interop.py
+++ b/python/pylibcudf/tests/test_interop.py
@@ -12,6 +12,8 @@
 from packaging.version import parse
 from utils import assert_column_eq, assert_table_eq
 
+import rmm
+
 import pylibcudf as plc
 
 
@@ -229,3 +231,25 @@ def test_column_from_arrow_stream(data):
     pa_arr = pa.chunked_array(data)
     col = plc.Column.from_arrow(pa_arr)
     assert_column_eq(pa_arr, col)
+
+
+def test_arrow_object_lifetime():
+    def f():
+        # Store a temporary so it is cached in the frame when the exception is raised
+        t = plc.interop.from_arrow(pa.Table.from_pydict({"a": [1]}))  # noqa: F841
+        raise ValueError("test exception")
+
+    # Nested try-excepts are necessary for Python to extend the lifetime of the stack
+    # frame of f enough to be problematic.
+    try:
+        try:
+            previous = rmm.mr.get_current_device_resource()
+            rmm.mr.set_current_device_resource(
+                rmm.mr.CudaAsyncMemoryResource()
+            )
+            f()
+        finally:
+            rmm.mr.set_current_device_resource(previous)
+    except ValueError:
+        # Ignore the exception. A failure in this test is a seg fault
+        pass

From 12238d7ddb95ef36d91408b25e7d82c855f3c24a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 18 Sep 2025 21:24:16 -0700
Subject: [PATCH 339/366] Add memory resource arguments to join, round, and
 labeling (#20001)

Contributes to #15170

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/20001
---
 python/pylibcudf/pylibcudf/join.pxd     |  50 +++++++---
 python/pylibcudf/pylibcudf/join.pyi     |  21 +++-
 python/pylibcudf/pylibcudf/join.pyx     | 127 ++++++++++++++++--------
 python/pylibcudf/pylibcudf/labeling.pxd |   4 +-
 python/pylibcudf/pylibcudf/labeling.pyi |   2 +
 python/pylibcudf/pylibcudf/labeling.pyx |  11 +-
 python/pylibcudf/pylibcudf/round.pxd    |   4 +-
 python/pylibcudf/pylibcudf/round.pyi    |   2 +
 python/pylibcudf/pylibcudf/round.pyx    |  11 +-
 9 files changed, 163 insertions(+), 69 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd
index e5aa8be2261..14bc4b214e7 100644
--- a/python/pylibcudf/pylibcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/join.pxd
@@ -2,6 +2,7 @@
 
 from pylibcudf.libcudf.types cimport null_equality
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .expressions cimport Expression
@@ -12,72 +13,84 @@ cpdef tuple inner_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple left_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple full_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef Column left_semi_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef Column left_anti_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
-cpdef Table cross_join(Table left, Table right, Stream stream=*)
+cpdef Table cross_join(
+    Table left, Table right, Stream stream=*, DeviceMemoryResource mr=*
+)
 
 cpdef tuple conditional_inner_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple conditional_left_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple conditional_full_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef Column conditional_left_semi_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef Column conditional_left_anti_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple mixed_inner_join(
@@ -87,7 +100,8 @@ cpdef tuple mixed_inner_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple mixed_left_join(
@@ -97,7 +111,8 @@ cpdef tuple mixed_left_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef tuple mixed_full_join(
@@ -107,7 +122,8 @@ cpdef tuple mixed_full_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef Column mixed_left_semi_join(
@@ -117,7 +133,8 @@ cpdef Column mixed_left_semi_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
 
 cpdef Column mixed_left_anti_join(
@@ -127,5 +144,6 @@ cpdef Column mixed_left_anti_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*,
 )
diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi
index 5008bbd2a94..7e6d83e9467 100644
--- a/python/pylibcudf/pylibcudf/join.pyi
+++ b/python/pylibcudf/pylibcudf/join.pyi
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from rmm.pylibrmm import Stream
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 
 from pylibcudf.column import Column
 from pylibcudf.expressions import Expression
@@ -12,63 +13,76 @@ def inner_join(
     right_keys: Table,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def left_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def full_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def left_semi_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def left_anti_join(
     left_keys: Table,
     right_keys: Table,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def cross_join(
-    left: Table, right: Table, stream: Stream | None = None
+    left: Table,
+    right: Table,
+    stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
 def conditional_inner_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_left_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_full_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def conditional_left_semi_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def conditional_left_anti_join(
     left: Table,
     right: Table,
     binary_predicate: Expression,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def mixed_inner_join(
     left_keys: Table,
@@ -78,6 +92,7 @@ def mixed_inner_join(
     binary_predicate: Expression,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_left_join(
     left_keys: Table,
@@ -87,6 +102,7 @@ def mixed_left_join(
     binary_predicate: Expression,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_full_join(
     left_keys: Table,
@@ -96,6 +112,7 @@ def mixed_full_join(
     binary_predicate: Expression,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> tuple[Column, Column]: ...
 def mixed_left_semi_join(
     left_keys: Table,
@@ -105,6 +122,7 @@ def mixed_left_semi_join(
     binary_predicate: Expression,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
 def mixed_left_anti_join(
     left_keys: Table,
@@ -114,4 +132,5 @@ def mixed_left_anti_join(
     binary_predicate: Expression,
     nulls_equal: NullEquality,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index 93eb681de65..ed631d991cd 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -13,11 +13,12 @@ from pylibcudf.libcudf.types cimport null_equality
 
 from rmm.librmm.device_buffer cimport device_buffer
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .expressions cimport Expression
 from .table cimport Table
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 __all__ = [
     "conditional_full_join",
@@ -38,7 +39,9 @@ __all__ = [
     "mixed_left_semi_join",
 ]
 
-cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map, Stream stream):
+cdef Column _column_from_gather_map(
+    cpp_join.gather_map_type gather_map, Stream stream, DeviceMemoryResource mr
+):
     # helper to convert a gather map to a Column
     return Column.from_libcudf(
         move(
@@ -48,7 +51,8 @@ cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map, Stream
                 0
             )
         ),
-        stream
+        stream,
+        mr
     )
 
 
@@ -56,7 +60,8 @@ cpdef tuple inner_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform an inner join between two tables.
 
@@ -80,14 +85,15 @@ cpdef tuple inner_join(
     cdef cpp_join.gather_map_pair_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.inner_join(
             left_keys.view(), right_keys.view(), nulls_equal, stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -95,7 +101,8 @@ cpdef tuple left_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a left join between two tables.
 
@@ -119,14 +126,15 @@ cpdef tuple left_join(
     cdef cpp_join.gather_map_pair_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.left_join(
             left_keys.view(), right_keys.view(), nulls_equal, stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -134,7 +142,8 @@ cpdef tuple full_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a full join between two tables.
 
@@ -158,14 +167,15 @@ cpdef tuple full_join(
     cdef cpp_join.gather_map_pair_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.full_join(
             left_keys.view(), right_keys.view(), nulls_equal, stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -173,7 +183,8 @@ cpdef Column left_semi_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a left semi join between two tables.
 
@@ -196,6 +207,7 @@ cpdef Column left_semi_join(
     cdef cpp_join.gather_map_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.left_semi_join(
@@ -204,14 +216,15 @@ cpdef Column left_semi_join(
             nulls_equal,
             stream.view()
         )
-    return _column_from_gather_map(move(c_result), stream)
+    return _column_from_gather_map(move(c_result), stream, mr)
 
 
 cpdef Column left_anti_join(
     Table left_keys,
     Table right_keys,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a left anti join between two tables.
 
@@ -234,6 +247,7 @@ cpdef Column left_anti_join(
     cdef cpp_join.gather_map_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.left_anti_join(
@@ -242,10 +256,12 @@ cpdef Column left_anti_join(
             nulls_equal,
             stream.view()
         )
-    return _column_from_gather_map(move(c_result), stream)
+    return _column_from_gather_map(move(c_result), stream, mr)
 
 
-cpdef Table cross_join(Table left, Table right, Stream stream=None):
+cpdef Table cross_join(
+    Table left, Table right, Stream stream=None, DeviceMemoryResource mr=None
+):
     """Perform a cross join on two tables.
 
     For details see :cpp:func:`cross_join`.
@@ -258,6 +274,8 @@ cpdef Table cross_join(Table left, Table right, Stream stream=None):
         The right table to join.
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned table's device memory.
 
     Returns
     -------
@@ -267,17 +285,19 @@ cpdef Table cross_join(Table left, Table right, Stream stream=None):
     cdef unique_ptr[table] result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         result = cpp_join.cross_join(left.view(), right.view(), stream.view())
-    return Table.from_libcudf(move(result), stream)
+    return Table.from_libcudf(move(result), stream, mr)
 
 
 cpdef tuple conditional_inner_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a conditional inner join between two tables.
 
@@ -302,6 +322,7 @@ cpdef tuple conditional_inner_join(
     cdef optional[size_t] output_size
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.conditional_inner_join(
@@ -312,8 +333,8 @@ cpdef tuple conditional_inner_join(
             stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -321,7 +342,8 @@ cpdef tuple conditional_left_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a conditional left join between two tables.
 
@@ -346,6 +368,7 @@ cpdef tuple conditional_left_join(
     cdef optional[size_t] output_size
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.conditional_left_join(
@@ -356,8 +379,8 @@ cpdef tuple conditional_left_join(
             stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -365,7 +388,8 @@ cpdef tuple conditional_full_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a conditional full join between two tables.
 
@@ -389,6 +413,7 @@ cpdef tuple conditional_full_join(
     cdef cpp_join.gather_map_pair_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.conditional_full_join(
@@ -398,8 +423,8 @@ cpdef tuple conditional_full_join(
             stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -407,7 +432,8 @@ cpdef Column conditional_left_semi_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a conditional left semi join between two tables.
 
@@ -431,6 +457,7 @@ cpdef Column conditional_left_semi_join(
     cdef optional[size_t] output_size
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.conditional_left_semi_join(
@@ -440,14 +467,15 @@ cpdef Column conditional_left_semi_join(
             output_size,
             stream.view()
         )
-    return _column_from_gather_map(move(c_result), stream)
+    return _column_from_gather_map(move(c_result), stream, mr)
 
 
 cpdef Column conditional_left_anti_join(
     Table left,
     Table right,
     Expression binary_predicate,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a conditional left anti join between two tables.
 
@@ -471,6 +499,7 @@ cpdef Column conditional_left_anti_join(
     cdef optional[size_t] output_size
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.conditional_left_anti_join(
@@ -480,7 +509,7 @@ cpdef Column conditional_left_anti_join(
             output_size,
             stream.view()
         )
-    return _column_from_gather_map(move(c_result), stream)
+    return _column_from_gather_map(move(c_result), stream, mr)
 
 
 cpdef tuple mixed_inner_join(
@@ -490,7 +519,8 @@ cpdef tuple mixed_inner_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a mixed inner join between two tables.
 
@@ -521,6 +551,7 @@ cpdef tuple mixed_inner_join(
     cdef cpp_join.output_size_data_type empty_optional
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.mixed_inner_join(
@@ -534,8 +565,8 @@ cpdef tuple mixed_inner_join(
             stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -546,7 +577,8 @@ cpdef tuple mixed_left_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a mixed left join between two tables.
 
@@ -577,6 +609,7 @@ cpdef tuple mixed_left_join(
     cdef cpp_join.output_size_data_type empty_optional
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.mixed_left_join(
@@ -590,8 +623,8 @@ cpdef tuple mixed_left_join(
             stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -602,7 +635,8 @@ cpdef tuple mixed_full_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a mixed full join between two tables.
 
@@ -633,6 +667,7 @@ cpdef tuple mixed_full_join(
     cdef cpp_join.output_size_data_type empty_optional
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.mixed_full_join(
@@ -646,8 +681,8 @@ cpdef tuple mixed_full_join(
             stream.view()
         )
     return (
-        _column_from_gather_map(move(c_result.first), stream),
-        _column_from_gather_map(move(c_result.second), stream),
+        _column_from_gather_map(move(c_result.first), stream, mr),
+        _column_from_gather_map(move(c_result.second), stream, mr),
     )
 
 
@@ -658,7 +693,8 @@ cpdef Column mixed_left_semi_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a mixed left semi join between two tables.
 
@@ -687,6 +723,7 @@ cpdef Column mixed_left_semi_join(
     cdef cpp_join.gather_map_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.mixed_left_semi_join(
@@ -698,7 +735,7 @@ cpdef Column mixed_left_semi_join(
             nulls_equal,
             stream.view()
         )
-    return _column_from_gather_map(move(c_result), stream)
+    return _column_from_gather_map(move(c_result), stream, mr)
 
 
 cpdef Column mixed_left_anti_join(
@@ -708,7 +745,8 @@ cpdef Column mixed_left_anti_join(
     Table right_conditional,
     Expression binary_predicate,
     null_equality nulls_equal,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None,
 ):
     """Perform a mixed left anti join between two tables.
 
@@ -737,6 +775,7 @@ cpdef Column mixed_left_anti_join(
     cdef cpp_join.gather_map_type c_result
 
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_join.mixed_left_anti_join(
@@ -748,4 +787,4 @@ cpdef Column mixed_left_anti_join(
             nulls_equal,
             stream.view()
         )
-    return _column_from_gather_map(move(c_result), stream)
+    return _column_from_gather_map(move(c_result), stream, mr)
diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd
index 13205ee19ba..37aa0f56ac4 100644
--- a/python/pylibcudf/pylibcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/labeling.pxd
@@ -5,6 +5,7 @@ from pylibcudf.libcudf.labeling cimport inclusive
 from .column cimport Column
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 cpdef Column label_bins(
@@ -13,5 +14,6 @@ cpdef Column label_bins(
     inclusive left_inclusive,
     Column right_edges,
     inclusive right_inclusive,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi
index 7f0a42cad9c..3a8a4428986 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyi
+++ b/python/pylibcudf/pylibcudf/labeling.pyi
@@ -2,6 +2,7 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
@@ -17,4 +18,5 @@ def label_bins(
     right_edges: Column,
     right_inclusive: Inclusive,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index d5be40f2558..2c7694f06f8 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -9,9 +9,10 @@ from pylibcudf.libcudf.labeling cimport inclusive
 from pylibcudf.libcudf.labeling import inclusive as Inclusive  # no-cython-lint
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 __all__ = ["Inclusive", "label_bins"]
 
@@ -21,7 +22,8 @@ cpdef Column label_bins(
     inclusive left_inclusive,
     Column right_edges,
     inclusive right_inclusive,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None
 ):
     """Labels elements based on membership in the specified bins.
 
@@ -41,6 +43,8 @@ cpdef Column label_bins(
         Whether or not the right edge is inclusive.
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned column's device memory.
 
     Returns
     -------
@@ -50,6 +54,7 @@ cpdef Column label_bins(
     """
     cdef unique_ptr[column] c_result
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_labeling.label_bins(
@@ -61,6 +66,6 @@ cpdef Column label_bins(
             stream.view()
         )
 
-    return Column.from_libcudf(move(c_result), stream)
+    return Column.from_libcudf(move(c_result), stream, mr)
 
 Inclusive.__str__ = Inclusive.__repr__
diff --git a/python/pylibcudf/pylibcudf/round.pxd b/python/pylibcudf/pylibcudf/round.pxd
index 4ab17203d31..ea56aa47b32 100644
--- a/python/pylibcudf/pylibcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/round.pxd
@@ -5,11 +5,13 @@ from pylibcudf.libcudf.round cimport rounding_method
 from .column cimport Column
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 cpdef Column round(
     Column source,
     int32_t decimal_places = *,
     rounding_method round_method = *,
-    Stream stream = *
+    Stream stream = *,
+    DeviceMemoryResource mr = *
 )
diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi
index 676c6d609aa..9a3df87cdb1 100644
--- a/python/pylibcudf/pylibcudf/round.pyi
+++ b/python/pylibcudf/pylibcudf/round.pyi
@@ -2,6 +2,7 @@
 
 from enum import IntEnum
 
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
@@ -15,4 +16,5 @@ def round(
     decimal_places: int = 0,
     round_method: RoundingMethod = RoundingMethod.HALF_UP,
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
index 0bead59fc3b..d7be3d14fe0 100644
--- a/python/pylibcudf/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -10,9 +10,10 @@ from pylibcudf.libcudf.round import \
 from pylibcudf.libcudf.column.column cimport column
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 __all__ = ["RoundingMethod", "round"]
 
@@ -20,7 +21,8 @@ cpdef Column round(
     Column source,
     int32_t decimal_places = 0,
     rounding_method round_method = rounding_method.HALF_UP,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None
 ):
     """Rounds all the values in a column to the specified number of decimal places.
 
@@ -38,6 +40,8 @@ cpdef Column round(
         (default rounding_method.HALF_UP)
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned column's device memory.
 
     Returns
     -------
@@ -46,6 +50,7 @@ cpdef Column round(
     """
     cdef unique_ptr[column] c_result
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_round(
@@ -55,6 +60,6 @@ cpdef Column round(
             stream.view()
         )
 
-    return Column.from_libcudf(move(c_result), stream)
+    return Column.from_libcudf(move(c_result), stream, mr)
 
 RoundingMethod.__str__ = RoundingMethod.__repr__

From 1f0d4ff53676ca2c8e689da8c0f7b95dbd78f4e0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Fri, 19 Sep 2025 07:45:36 -0500
Subject: [PATCH 340/366] Default to False if `CUDA_ENABLE_NRT` isn't set in
 config (#19981)

https://github.com/NVIDIA/numba-cuda/pull/450 changes `numba-cuda` to use its own config and provides a mechanism by which if CUDA config options are set by the user in `numba.config`, they have the effect of having been set in `numba.cuda.config` while warning the user to switch things over.

The PR also updates a part of `numba-cuda` that [populates the `CUDA_ENABLE_NRT` attribute](https://github.com/NVIDIA/numba-cuda/blob/97ce4b1525de474efbc93b45eea0668167fcce3b/numba_cuda/numba/cuda/memory_management/nrt.py#L35-L39) of `numba.core.config` if set by the user to instead now be a `numba.cuda.config` option, this change together with current state cuDF causes an `AttributeError` to be thrown [here](https://github.com/rapidsai/cudf/blob/branch-25.10/python/cudf/cudf/core/udf/nrt_utils.py#L40).

The final state we would like to reach would be to simply target the new config. However for the above PR to be released we must pass tests against cuDF creating somewhat of a chicken and egg problem until we can switch back to a stable release of cuDF in `numba-cuda` CI.

This change passes us through the `AttributeError` while still causing NRT to be activated at the correct time due to the old config being respected still.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19981
---
 python/cudf/cudf/core/udf/nrt_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/udf/nrt_utils.py b/python/cudf/cudf/core/udf/nrt_utils.py
index bf84dd56f12..8fe38f4de53 100644
--- a/python/cudf/cudf/core/udf/nrt_utils.py
+++ b/python/cudf/cudf/core/udf/nrt_utils.py
@@ -37,7 +37,7 @@ def nrt_enabled():
     for a single kernel launch, so we use this context
     to enable it for those that we know need it.
     """
-    original_value = numba_config.CUDA_ENABLE_NRT
+    original_value = getattr(numba_config, "CUDA_ENABLE_NRT", False)
     numba_config.CUDA_ENABLE_NRT = True
     try:
         yield

From d93fa9ca4baae369214d80aa7731d39af2a1f8b5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <toaugspurger@nvidia.com>
Date: Fri, 19 Sep 2025 07:59:18 -0500
Subject: [PATCH 341/366] Compatibility for rapidsmpf's unspill_partitions
 (#20020)

https://github.com/rapidsai/rapidsmpf/pull/515 removed the `stream` argument from `unspill_partitions`.

Authors:
  - Tom Augspurger (https://github.com/TomAugspurger)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: https://github.com/rapidsai/cudf/pull/20020
---
 python/cudf_polars/cudf_polars/experimental/shuffle.py | 1 -
 python/cudf_polars/cudf_polars/experimental/sort.py    | 1 -
 2 files changed, 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py
index 21ae99f1f6c..9093de133b7 100644
--- a/python/cudf_polars/cudf_polars/experimental/shuffle.py
+++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py
@@ -108,7 +108,6 @@ def extract_partition(
                 unspill_partitions(
                     shuffler.extract(partition_id),
                     br=context.br,
-                    stream=DEFAULT_STREAM,
                     allow_overbooking=True,
                     statistics=context.statistics,
                 ),
diff --git a/python/cudf_polars/cudf_polars/experimental/sort.py b/python/cudf_polars/cudf_polars/experimental/sort.py
index 7357006a648..13345b7a00e 100644
--- a/python/cudf_polars/cudf_polars/experimental/sort.py
+++ b/python/cudf_polars/cudf_polars/experimental/sort.py
@@ -323,7 +323,6 @@ def extract_partition(
                 unspill_partitions(
                     shuffler.extract(partition_id),
                     br=context.br,
-                    stream=DEFAULT_STREAM,
                     allow_overbooking=True,
                     statistics=context.statistics,
                 ),

From c2cb7bd9c041db1df5ab7c09510ba9f1a2ad631d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 19 Sep 2025 12:52:46 -0500
Subject: [PATCH 342/366] Add support for `attrs` (#19978)

Fixes: #15606

This PR adds support for `attrs` in `cudf`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19978
---
 .../source/user_guide/api_docs/dataframe.rst  |   8 +
 .../source/user_guide/api_docs/series.rst     |   7 +
 .../cudf/cudf/core/accessors/base_accessor.py |   5 +-
 python/cudf/cudf/core/dataframe.py            |  83 +-
 python/cudf/cudf/core/indexed_frame.py        |  71 +-
 python/cudf/cudf/core/reshape.py              |   7 +-
 python/cudf/cudf/core/series.py               |  58 +-
 python/cudf/cudf/pandas/_wrappers/pandas.py   |   2 +
 .../cudf/pandas/scripts/conftest-patch.py     | 711 +-----------------
 9 files changed, 193 insertions(+), 759 deletions(-)

diff --git a/docs/cudf/source/user_guide/api_docs/dataframe.rst b/docs/cudf/source/user_guide/api_docs/dataframe.rst
index eab7da0a941..9ff9a3b196e 100644
--- a/docs/cudf/source/user_guide/api_docs/dataframe.rst
+++ b/docs/cudf/source/user_guide/api_docs/dataframe.rst
@@ -246,6 +246,14 @@ Time Series-related
    DataFrame.shift
    DataFrame.resample
 
+
+Metadata
+~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.attrs
+
 Serialization / IO / conversion
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
diff --git a/docs/cudf/source/user_guide/api_docs/series.rst b/docs/cudf/source/user_guide/api_docs/series.rst
index c47323dcceb..546997becc8 100644
--- a/docs/cudf/source/user_guide/api_docs/series.rst
+++ b/docs/cudf/source/user_guide/api_docs/series.rst
@@ -236,6 +236,13 @@ Time Series-related
    Series.shift
    Series.resample
 
+Metadata
+~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   Series.attrs
+
 Accessors
 ---------
 
diff --git a/python/cudf/cudf/core/accessors/base_accessor.py b/python/cudf/cudf/core/accessors/base_accessor.py
index e7165f4b355..3d37e4460cc 100644
--- a/python/cudf/cudf/core/accessors/base_accessor.py
+++ b/python/cudf/cudf/core/accessors/base_accessor.py
@@ -81,13 +81,16 @@ def _return_or_inplace(
                     return idx
                 else:
                     return self._parent._constructor_expanddim._from_data(
-                        data=table, index=self._parent.index
+                        data=table,
+                        index=self._parent.index,
+                        attrs=self._parent.attrs,
                     )
             elif isinstance(self._parent, cudf.Series):
                 return cudf.Series._from_column(
                     new_col,
                     name=self._parent.name,
                     index=self._parent.index if retain_index else None,
+                    attrs=self._parent.attrs,
                 )
             elif isinstance(self._parent, cudf.Index):
                 return cudf.Index._from_column(new_col, name=self._parent.name)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 13b9c703fc1..457f4369774 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import copy
 import functools
 import inspect
 import itertools
@@ -994,13 +995,14 @@ def __init__(
 
         second_index = None
         second_columns = None
-
+        attrs = None
         if isinstance(data, (DataFrame, pd.DataFrame)):
             if isinstance(data, pd.DataFrame):
                 data = self.from_pandas(data, nan_as_null=nan_as_null)
             col_accessor = data._data
             index, second_index = data.index, index
             second_columns = columns
+            attrs = data.attrs
         elif isinstance(data, (Series, pd.Series)):
             if isinstance(data, pd.Series):
                 data = Series.from_pandas(data, nan_as_null=nan_as_null)
@@ -1181,7 +1183,7 @@ def __init__(
                 label_dtype=second_columns.dtype,
             )
 
-        super().__init__(col_accessor, index=index)
+        super().__init__(col_accessor, index=index, attrs=attrs)
         if second_index is not None:
             reindexed = self.reindex(index=second_index, copy=False)
             self._data = reindexed._data
@@ -1191,13 +1193,14 @@ def __init__(
             self._data = self.astype(dtype)._data
 
     @classmethod
-    def _from_data(
+    def _from_data(  # type: ignore[override]
         cls,
         data: MutableMapping,
         index: Index | None = None,
         columns: Any = None,
+        attrs: dict | None = None,
     ) -> Self:
-        out = super()._from_data(data=data, index=index)
+        out = super()._from_data(data=data, index=index, attrs=attrs)
         if columns is not None:
             out.columns = columns
         return out
@@ -1354,10 +1357,10 @@ def _getitem_preprocessed(
         inputs.
         """
         if col_is_scalar:
-            series = Series._from_data(ca, index=self.index)
+            series = Series._from_data(ca, index=self.index, attrs=self.attrs)
             return series._getitem_preprocessed(spec)
         if ca.names != self._column_names:
-            frame = self._from_data(ca, index=self.index)
+            frame = self._from_data(ca, index=self.index, attrs=self.attrs)
         else:
             frame = self
         if isinstance(spec, indexing_utils.MapIndexer):
@@ -1385,6 +1388,7 @@ def _getitem_preprocessed(
                 )
                 result.index = result_index
                 result.name = new_name
+                result._attrs = frame.attrs
                 return result
             except TypeError:
                 if get_option("mode.pandas_compatible"):
@@ -1481,7 +1485,9 @@ def __getitem__(self, arg):
                     and all(n == "" for n in out._column_names[0])
                 )
             ):
-                out = self._constructor_sliced._from_data(out._data)
+                out = self._constructor_sliced._from_data(
+                    out._data, attrs=self.attrs
+                )
                 out._data.multiindex = False
                 out.index = self.index
                 out.name = arg
@@ -3414,17 +3420,16 @@ def reset_index(
         allow_duplicates: bool = False,
         names: Hashable | Sequence[Hashable] | None = None,
     ):
+        data, index = self._reset_index(
+            level=level,
+            drop=drop,
+            col_level=col_level,
+            col_fill=col_fill,
+            allow_duplicates=allow_duplicates,
+            names=names,
+        )
         return self._mimic_inplace(
-            DataFrame._from_data(
-                *self._reset_index(
-                    level=level,
-                    drop=drop,
-                    col_level=col_level,
-                    col_fill=col_fill,
-                    allow_duplicates=allow_duplicates,
-                    names=names,
-                )
-            ),
+            DataFrame._from_data(data=data, index=index, attrs=self.attrs),
             inplace=inplace,
         )
 
@@ -4306,6 +4311,7 @@ def transpose(self) -> Self:
         result = type(self)._from_data(
             ColumnAccessor(dict(enumerate(result_columns)), verify=False),
             index=Index(index),
+            attrs=self.attrs,
         )
         # Set the old index as the new column names
         result.columns = self.index
@@ -5049,7 +5055,7 @@ def _func(x):  # pragma: no cover
             apply_sr = Series._from_column(col)
             result[name] = apply_sr.apply(_func)._column
 
-        return DataFrame._from_data(result, index=self.index)
+        return DataFrame._from_data(result, index=self.index, attrs=self.attrs)
 
     @_performance_tracking
     @applyutils.doc_applychunks()
@@ -5645,6 +5651,7 @@ def to_pandas(
 
         out_df = pd.DataFrame(out_data, index=out_index)
         out_df.columns = self._data.to_pandas_index
+        out_df.attrs = self.attrs
 
         return out_df
 
@@ -5700,6 +5707,7 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
             df = cls._from_data(data, index)
             # Checks duplicate columns and sets column metadata
             df.columns = dataframe.columns
+            df._attrs = copy.deepcopy(dataframe.attrs)
             return df
         else:
             raise TypeError(
@@ -6290,7 +6298,10 @@ def quantile(
             if q_is_number:
                 result = result.transpose()
                 return Series._from_column(
-                    result._columns[0], name=q, index=result.index
+                    result._columns[0],
+                    name=q,
+                    index=result.index,
+                    attrs=self.attrs,
                 )
         elif method == "single":
             # Ensure that qs is non-scalar so that we always get a column back.
@@ -6308,7 +6319,7 @@ def quantile(
                     if len(res) == 0:
                         res = column_empty(row_count=len(qs), dtype=ser.dtype)
                     result[k] = res
-            result = DataFrame._from_data(result)
+            result = DataFrame._from_data(result, attrs=self.attrs)
 
             if q_is_number and numeric_only:
                 result = result.fillna(np.nan).iloc[0]
@@ -6458,7 +6469,7 @@ def make_false_column_like_self():
             )
 
         # TODO: Update this logic to properly preserve MultiIndex columns.
-        return DataFrame._from_data(result, self.index)
+        return DataFrame._from_data(result, self.index, attrs=self.attrs)
 
     #
     # Stats
@@ -6570,6 +6581,7 @@ def count(self, axis=0, numeric_only=False):
                 ]
             ),
             index=Index(self._column_names),
+            attrs=self.attrs,
         )
 
     _SUPPORT_AXIS_LOOKUP = {
@@ -6619,12 +6631,14 @@ def _reduce(
             )
             source = self._get_columns_by_label(numeric_cols)
             if source.empty:
-                return Series(
+                res = Series(
                     index=self._data.to_pandas_index[:0]
                     if axis == 0
                     else source.index,
                     dtype="float64",
                 )
+                res._attrs = self._attrs
+                return res
         if (
             axis == 2
             and op in {"kurtosis", "skew"}
@@ -6730,7 +6744,7 @@ def _reduce(
                     new_dtype = get_dtype_of_same_kind(common_dtype, res_dtype)
                     res = res.astype(new_dtype)
 
-                return Series._from_column(res, index=idx)
+                return Series._from_column(res, index=idx, attrs=self.attrs)
 
     @_performance_tracking
     def _scan(
@@ -7012,10 +7026,13 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
             result = as_column(result, dtype=result_dtype)
             if mask is not None:
                 result = result.set_mask(mask._column.as_mask())
-            return Series._from_column(result, index=self.index)
+            return Series._from_column(
+                result, index=self.index, attrs=self.attrs
+            )
         else:
             result_df = DataFrame(result, index=self.index)
             result_df._set_columns_like(prepared._data)
+            result_df._attrs = self.attrs
             return result_df
 
     @_performance_tracking
@@ -7636,7 +7653,9 @@ def unnamed_group_generator():
 
         # Construct the resulting dataframe / series
         if not has_unnamed_levels:
-            result = Series._from_column(stacked[0], index=new_index)
+            result = Series._from_column(
+                stacked[0], index=new_index, attrs=self.attrs
+            )
         else:
             if unnamed_level_values.nlevels == 1:
                 unnamed_level_values = unnamed_level_values.get_level_values(0)
@@ -7661,7 +7680,9 @@ def unnamed_group_generator():
                 unnamed_level_values.names,
             )
 
-            result = DataFrame._from_data(data, index=new_index)
+            result = DataFrame._from_data(
+                data, index=new_index, attrs=self.attrs
+            )
 
         if not future_stack and dropna:
             return result.dropna(how="all")
@@ -7708,6 +7729,7 @@ def cov(self, min_periods=None, ddof: int = 1, numeric_only: bool = False):
         cols = self._data.to_pandas_index
         df = DataFrame(cupy.asfortranarray(cov), index=cols)
         df._set_columns_like(self._data)
+        df._attrs = self.attrs
         return df
 
     def corr(
@@ -7754,6 +7776,7 @@ def corr(
         cols = self._data.to_pandas_index
         df = DataFrame(cupy.asfortranarray(corr), index=cols)
         df._set_columns_like(self._data)
+        df._attrs = self.attrs
         return df
 
     @_performance_tracking
@@ -8064,9 +8087,13 @@ def nunique(self, axis=0, dropna: bool = True) -> Series:
         if axis != 0:
             raise NotImplementedError("axis parameter is not supported yet.")
         counts = [col.distinct_count(dropna=dropna) for col in self._columns]
-        return self._constructor_sliced(
-            counts, index=self._data.to_pandas_index
+        res = self._constructor_sliced(
+            counts,
+            index=self._data.to_pandas_index,
+            dtype="float64" if len(counts) == 0 else None,
         )
+        res._attrs = self.attrs
+        return res
 
     def _sample_axis_1(
         self,
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index cf5b6a55b2f..6e019444d5c 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import copy
 import itertools
 import operator
 import textwrap
@@ -288,6 +289,7 @@ def __init__(
         self,
         data: ColumnAccessor | MutableMapping[Any, ColumnBase],
         index: Index,
+        attrs: dict[Hashable, Any] | None = None,
     ):
         super().__init__(data=data)
         if not isinstance(index, Index):
@@ -300,6 +302,10 @@ def __init__(
                 f"match length of index ({len(index)})"
             )
         self._index = index
+        if attrs is None:
+            self._attrs = {}
+        else:
+            self._attrs = attrs
 
     @property
     def _num_rows(self) -> int:
@@ -310,11 +316,47 @@ def _num_rows(self) -> int:
     def _index_names(self) -> tuple[Any, ...]:  # TODO: Tuple[str]?
         return self.index._column_names
 
+    @property
+    def attrs(self) -> dict[Hashable, Any]:
+        """
+        Dictionary of global attributes of this dataset.
+
+        Notes
+        -----
+        Many operations that create new datasets will copy ``attrs``. Copies
+        are always deep so that changing ``attrs`` will only affect the
+        present dataset. ``cudf.concat`` copies ``attrs`` only if all input
+        datasets have the same ``attrs``.
+
+        Examples
+        --------
+        For Series:
+
+        >>> import cudf
+        >>> ser = cudf.Series([1, 2, 3])
+        >>> ser.attrs = {"A": [10, 20, 30]}
+        >>> ser.attrs
+        {'A': [10, 20, 30]}
+
+        For DataFrame:
+
+        >>> df = cudf.DataFrame({'A': [1, 2], 'B': [3, 4]})
+        >>> df.attrs = {"A": [10, 20, 30]}
+        >>> df.attrs
+        {'A': [10, 20, 30]}
+        """
+        return self._attrs
+
+    @attrs.setter
+    def attrs(self, value: Mapping[Hashable, Any]) -> None:
+        self._attrs = dict(value)
+
     @classmethod
-    def _from_data(
+    def _from_data(  # type: ignore[override]
         cls,
         data: MutableMapping,
         index: Index | None = None,
+        attrs: dict | None = None,
     ):
         out = super()._from_data(data)
         if index is None:
@@ -325,6 +367,7 @@ def _from_data(
                 f"index must be a cudf.Index not {type(index).__name__}"
             )
         out._index = index
+        out._attrs = {} if attrs is None else attrs
         return out
 
     @_performance_tracking
@@ -335,13 +378,16 @@ def _get_columns_by_label(self, labels) -> Self:
         Akin to cudf.DataFrame(...).loc[:, labels]
         """
         return self._from_data(
-            self._data.select_by_label(labels), index=self.index
+            self._data.select_by_label(labels),
+            index=self.index,
+            attrs=self.attrs,
         )
 
     @_performance_tracking
     def _from_data_like_self(self, data: MutableMapping):
         out = super()._from_data_like_self(data)
         out.index = self.index
+        out._attrs = copy.deepcopy(self._attrs)
         return out
 
     @_performance_tracking
@@ -384,7 +430,7 @@ def _from_columns_like_self(
                 index.name = index_names[0]
 
         data = dict(zip(column_names, data_columns, strict=True))
-        frame = type(self)._from_data(data, index)
+        frame = type(self)._from_data(data, index, attrs=self.attrs)
         return frame._copy_type_metadata(self)
 
     def __round__(self, digits=0):
@@ -398,6 +444,7 @@ def _mimic_inplace(
     ) -> Self | None:
         if inplace:
             self._index = result.index
+            self._attrs = result._attrs
         return super()._mimic_inplace(result, inplace)
 
     @_performance_tracking
@@ -574,6 +621,7 @@ def copy(self, deep: bool = True) -> Self:
             self._data.copy(deep=deep),
             # Indexes are immutable so copies can always be shallow.
             self.index.copy(deep=False),
+            attrs=copy.deepcopy(self.attrs) if deep else self._attrs,
         )
 
     @_performance_tracking
@@ -3182,7 +3230,9 @@ def duplicated(
             pa_scalar_to_plc_scalar(pa.scalar(False)),
             bounds_check=False,
         )
-        return cudf.Series._from_column(result, index=self.index, name=name)
+        return cudf.Series._from_column(
+            result, index=self.index, name=name, attrs=self.attrs
+        )
 
     @_performance_tracking
     def _empty_like(self, keep_index: bool = True) -> Self:
@@ -3491,7 +3541,9 @@ def _apply(self, func, kernel_class, *args, **kwargs):
             col = as_column(ans_col, retty)
 
         col.set_base_mask(ans_mask.as_mask())
-        result = cudf.Series._from_column(col, index=self.index)
+        result = cudf.Series._from_column(
+            col, index=self.index, attrs=self.attrs
+        )
 
         return result
 
@@ -3852,6 +3904,7 @@ def _reindex(
                 rangeindex=rangeindex,
             ),
             index=index,
+            attrs=self.attrs,
         )
 
         result.fillna(fill_value, inplace=True)
@@ -4837,6 +4890,7 @@ def _binaryop(
                 **ca_attributes,
             ),
             index=out_index,
+            attrs=self.attrs,
         )
 
     def _make_operands_and_index_for_binop(
@@ -6801,13 +6855,17 @@ def _drop_rows_by_labels(
 
         if isinstance(obj, cudf.Series):
             return obj.__class__._from_data(
-                join_res.iloc[:, idx_nlv:]._data, index=midx, name=obj.name
+                join_res.iloc[:, idx_nlv:]._data,
+                index=midx,
+                name=obj.name,
+                attrs=obj.attrs,
             )
         else:
             return obj.__class__._from_data(
                 join_res.iloc[:, idx_nlv:]._data,
                 index=midx,
                 columns=obj._data.to_pandas_index,
+                attrs=obj.attrs,
             )
 
     else:
@@ -6829,6 +6887,7 @@ def _drop_rows_by_labels(
         # but we need to preserve the type of
         # index being returned, Hence this type-cast.
         res.index = res.index.astype(orig_index_type)
+        res._attrs = obj.attrs
         return res
 
 
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index e9ee79f35aa..0032a11d4ad 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -713,7 +713,7 @@ def _tile(base_col: ColumnBase, reps: int) -> ColumnBase:
         [frame[val]._column for val in value_vars]
     )
 
-    result = cudf.DataFrame._from_data(mdata)
+    result = cudf.DataFrame._from_data(mdata, attrs=frame.attrs)
     if not ignore_index:
         taker = np.tile(np.arange(len(frame)), frame.shape[1] - len(id_vars))
         result.index = frame.index.take(taker)
@@ -1039,6 +1039,7 @@ def pivot(
     result = _pivot(
         data._data.select_by_label(cols_to_select), index_data, column_data
     )
+    result._attrs = data.attrs  # type: ignore[has-type]
 
     # MultiIndex to Index
     if not values_is_list:
@@ -1165,6 +1166,7 @@ def unstack(df, level, fill_value=None, sort: bool = True):
         res.index.names = (
             tuple(df._data.to_pandas_index.names) + df.index.names
         )
+        res._attrs = df.attrs
         return res
     else:
         index = df.index.droplevel(level)
@@ -1182,6 +1184,7 @@ def unstack(df, level, fill_value=None, sort: bool = True):
             )
             columns.names = new_names
         result = _pivot(df, index, columns)
+        result._attrs = df.attrs  # type: ignore[has-type]
         if result.index.nlevels == 1:
             result.index = result.index.get_level_values(result.index.names[0])
         return result
@@ -1505,5 +1508,5 @@ def pivot_table(
         table.columns = table._data.to_pandas_index.droplevel(0)
     if len(index) == 0 and len(columns) > 0:
         table = table.T
-
+    table._attrs = data.attrs
     return table
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 0c9ceb3bcf6..8cc961b5549 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -510,11 +510,12 @@ def __init__(
             data = {}
         if dtype is not None:
             dtype = cudf.dtype(dtype)
-
+        atts = None
         if isinstance(data, (pd.Series, pd.Index, Index, Series)):
             if copy and not isinstance(data, (pd.Series, pd.Index)):
                 data = data.copy(deep=True)
             name_from_data = data.name
+            atts = getattr(data, "attrs", None)
             column = as_column(data, nan_as_null=nan_as_null, dtype=dtype)
             if isinstance(data, (pd.Series, Series)):
                 index_from_data = ensure_index(data.index)
@@ -582,7 +583,7 @@ def __init__(
             first_index = index
             second_index = None
 
-        super().__init__({name: column}, index=first_index)
+        super().__init__({name: column}, index=first_index, attrs=atts)
         if second_index is not None:
             reindexed = self.reindex(index=second_index, copy=False)
             self._data = reindexed._data
@@ -596,9 +597,10 @@ def _from_column(
         *,
         name: Hashable = None,
         index: Index | None = None,
+        attrs: dict | None = None,
     ) -> Self:
         ca = ColumnAccessor({name: column}, verify=False)
-        return cls._from_data(ca, index=index)
+        return cls._from_data(ca, index=index, attrs=attrs)
 
     @classmethod
     @_performance_tracking
@@ -607,8 +609,9 @@ def _from_data(
         data: MutableMapping,
         index: Index | None = None,
         name: Any = no_default,
+        attrs: dict | None = None,
     ) -> Series:
-        out = super()._from_data(data=data, index=index)
+        out = super()._from_data(data=data, index=index, attrs=attrs)
         if name is not no_default:
             out.name = name
         return out
@@ -993,12 +996,14 @@ def reset_index(
             if name is no_default:
                 name = 0 if self.name is None else self.name
             data[name] = data.pop(self.name)
-            return self._constructor_expanddim._from_data(data, index)
+            return self._constructor_expanddim._from_data(
+                data, index, attrs=self.attrs
+            )
         # For ``name`` behavior, see:
         # https://github.com/pandas-dev/pandas/issues/44575
         # ``name`` has to be ignored when `drop=True`
         return self._mimic_inplace(
-            Series._from_data(data, index, self.name),
+            Series._from_data(data, index, self.name, attrs=self.attrs),
             inplace=inplace,
         )
 
@@ -1035,7 +1040,9 @@ def to_frame(self, name: Hashable = no_default) -> DataFrame:
         13   <NA>
         15      d
         """
-        return self._to_frame(name=name, index=self.index)
+        res = self._to_frame(name=name, index=self.index)
+        res._attrs = self.attrs  # type: ignore[has-type]
+        return res
 
     @_performance_tracking
     def memory_usage(self, index: bool = True, deep: bool = False) -> int:  # type: ignore[override]
@@ -1847,7 +1854,7 @@ def between(self, left, right, inclusive="both") -> Series:
                 "'left', 'right', or 'neither'."
             )
         return self._from_column(
-            lmask & rmask, name=self.name, index=self.index
+            lmask & rmask, name=self.name, index=self.index, attrs=self.attrs
         )
 
     @_performance_tracking
@@ -1951,11 +1958,13 @@ def to_pandas(
             index = self.index.to_pandas()
         else:
             index = None  # type: ignore[assignment]
-        return pd.Series(
+        res = pd.Series(
             self._column.to_pandas(nullable=nullable, arrow_type=arrow_type),
             index=index,
             name=self.name,
         )
+        res.attrs = self.attrs
+        return res
 
     @property  # type: ignore
     @_performance_tracking
@@ -2656,7 +2665,9 @@ def mode(self, dropna=True):
             val_counts = val_counts[val_counts == val_counts.iloc[0]]
 
         return Series._from_column(
-            val_counts.index.sort_values()._column, name=self.name
+            val_counts.index.sort_values()._column,
+            name=self.name,
+            attrs=self.attrs,
         )
 
     @_performance_tracking
@@ -2946,7 +2957,10 @@ def isin(self, values):
             )
 
         return Series._from_column(
-            self._column.isin(values), name=self.name, index=self.index
+            self._column.isin(values),
+            name=self.name,
+            index=self.index,
+            attrs=self.attrs,
         )
 
     @_performance_tracking
@@ -2990,7 +3004,7 @@ def unique(self) -> cp.ndarray | Self:
                     "cuDF does not implement DatetimeArray or TimedeltaArray"
                 )
             return res.values
-        return Series._from_column(res, name=self.name)
+        return Series._from_column(res, name=self.name, attrs=self.attrs)
 
     @_performance_tracking
     def value_counts(
@@ -3230,6 +3244,7 @@ def quantile(
             result,
             name=self.name,
             index=cudf.Index(np_array_q) if quant_index else None,
+            attrs=self.attrs,
         )
 
     @docutils.doc_describe()
@@ -3283,12 +3298,14 @@ def describe(
         else:
             data = _describe_categorical(self, percentiles)
 
-        return Series(
+        res = Series(
             data=data.values(),
             index=data.keys(),
             dtype=dtype,
             name=self.name,
         )
+        res._attrs = self.attrs
+        return res
 
     @_performance_tracking
     def digitize(self, bins: np.ndarray, right: bool = False) -> Self:
@@ -3323,7 +3340,9 @@ def digitize(self, bins: np.ndarray, right: bool = False) -> Self:
         dtype: int32
         """
         return type(self)._from_column(
-            self._column.digitize(bins, right), name=self.name
+            self._column.digitize(bins, right),
+            name=self.name,
+            attrs=self.attrs,
         )
 
     @_performance_tracking
@@ -3507,7 +3526,9 @@ def rename(
                 ".rename does not currently support relabeling the index."
             )
         out_data = self._data.copy(deep=copy)
-        return Series._from_data(out_data, self.index, name=index)
+        return Series._from_data(
+            out_data, self.index, name=index, attrs=self.attrs
+        )
 
     @_performance_tracking
     def add_prefix(self, prefix, axis=None):
@@ -3517,6 +3538,7 @@ def add_prefix(self, prefix, axis=None):
             # TODO: Change to deep=False when copy-on-write is default
             data=self._data.copy(deep=True),
             index=prefix + self.index.astype(str),
+            attrs=self.attrs,
         )
 
     @_performance_tracking
@@ -3527,6 +3549,7 @@ def add_suffix(self, suffix, axis=None):
             # TODO: Change to deep=False when copy-on-write is default
             data=self._data.copy(deep=True),
             index=self.index.astype(str) + suffix,
+            attrs=self.attrs,
         )
 
     @_performance_tracking
@@ -3718,6 +3741,7 @@ def where(
                 self._column.where(cond, other, inplace),
                 index=self.index,
                 name=self.name,
+                attrs=self.attrs,
             ),
             inplace=inplace,
         )
@@ -4470,7 +4494,7 @@ def isocalendar(self) -> DataFrame:
         """
         ca = ColumnAccessor(self.series._column.isocalendar(), verify=False)
         return self.series._constructor_expanddim._from_data(
-            ca, index=self.series.index
+            ca, index=self.series.index, attrs=self.series.attrs
         )
 
     @property  # type: ignore
@@ -5153,7 +5177,7 @@ def components(self) -> DataFrame:
         """
         ca = ColumnAccessor(self.series._column.components, verify=False)
         return self.series._constructor_expanddim._from_data(
-            ca, index=self.series.index
+            ca, index=self.series.index, attrs=self.series.attrs
         )
 
     def total_seconds(self) -> Series:
diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 084452c0c05..1593b1f4036 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -299,6 +299,7 @@ def _DataFrame__dtypes(self):
         "_ipython_canary_method_should_not_exist_": ignore_ipython_canary_check,
         "dtypes": property(_DataFrame__dtypes),
         "__iter__": custom_iter,
+        "attrs": _FastSlowAttribute("attrs"),
     },
 )
 
@@ -346,6 +347,7 @@ def _Series_dtype(self):
         "_constructor_expanddim": _FastSlowAttribute("_constructor_expanddim"),
         "_accessors": set(),
         "dtype": property(_Series_dtype),
+        "attrs": _FastSlowAttribute("attrs"),
     },
 )
 
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index d82d5585d53..b67f350f09e 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -110,7 +110,6 @@ def pytest_unconfigure(config):
     "tests/apply/test_frame_apply.py::test_mixed_column_raises[min-df0]",
     "tests/apply/test_frame_apply.py::test_mixed_column_raises[sum-df0]",
     "tests/apply/test_frame_apply.py::test_nuiscance_columns",
-    "tests/apply/test_frame_apply.py::test_nunique_empty",
     "tests/apply/test_frame_transform.py::test_transform_listlike[axis='columns'-ops0-names0]",
     "tests/apply/test_frame_transform.py::test_transform_listlike[axis='columns'-ops1-names1]",
     "tests/apply/test_frame_transform.py::test_transform_listlike[axis='columns'-ops2-names2]",
@@ -4742,8 +4741,6 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_values.py::TestDataFrameValues::test_interleave_with_tzaware",
     "tests/frame/methods/test_values.py::TestDataFrameValues::test_values",
     "tests/frame/methods/test_values.py::TestPrivateValues::test_private_values_dt64_multiblock",
-    "tests/frame/test_api.py::TestDataFrameMisc::test_attrs",
-    "tests/frame/test_api.py::TestDataFrameMisc::test_attrs_deepcopy",
     "tests/frame/test_api.py::TestDataFrameMisc::test_column_contains_raises",
     "tests/frame/test_api.py::TestDataFrameMisc::test_display_max_dir_items",
     "tests/frame/test_api.py::TestDataFrameMisc::test_get_agg_axis",
@@ -5332,683 +5329,6 @@ def pytest_unconfigure(config):
     "tests/generic/test_duplicate_labels.py::test_inplace_raises[method0-True]",
     "tests/generic/test_duplicate_labels.py::test_inplace_raises[method1-True]",
     "tests/generic/test_duplicate_labels.py::test_inplace_raises[method2-False]",
-    "tests/generic/test_finalize.py::test_binops[add-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[add-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[add-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[add-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[add-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[add-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[add-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[add-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[add-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[and_-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[and_-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[and_-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[and_-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[and_-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[and_-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[and_-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[and_-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[and_-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[eq-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[eq-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[eq-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[eq-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[eq-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[eq-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[eq-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[eq-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[eq-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[floordiv-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[ge-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[ge-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[ge-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[ge-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ge-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[ge-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[ge-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ge-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ge-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[gt-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[gt-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[gt-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[gt-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[gt-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[gt-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[gt-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[gt-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[gt-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[le-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[le-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[le-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[le-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[le-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[le-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[le-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[le-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[le-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[lt-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[lt-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[lt-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[lt-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[lt-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[lt-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[lt-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[lt-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[lt-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[mod-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[mod-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[mod-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[mod-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[mod-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[mod-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[mod-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[mod-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[mod-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[mul-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[mul-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[mul-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[mul-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[mul-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[mul-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[mul-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[mul-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[mul-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[ne-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[ne-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[ne-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[ne-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ne-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[ne-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[ne-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ne-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ne-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[or_-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[or_-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[or_-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[or_-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[or_-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[or_-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[or_-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[or_-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[or_-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[pow-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[pow-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[pow-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[pow-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[pow-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[pow-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[pow-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[pow-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[pow-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[radd-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[radd-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[radd-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[radd-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[radd-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[radd-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[radd-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[radd-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[radd-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rand_-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rfloordiv-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmod-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rmul-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[ror_-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rpow-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rsub-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rtruediv-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(DataFrame,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(Series,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[rxor-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[sub-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[sub-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[sub-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[sub-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[sub-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[sub-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[sub-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[sub-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[sub-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[truediv-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_binops[xor-(DataFrame,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(DataFrame,DataFrame)-left]",
-    "tests/generic/test_finalize.py::test_binops[xor-(DataFrame,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(DataFrame,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[xor-(DataFrame,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(DataFrame,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[xor-(Series,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(Series,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[xor-(Series,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(Series,Series)-left]",
-    "tests/generic/test_finalize.py::test_binops[xor-(Series,int)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(Series,int)-left]",
-    "tests/generic/test_finalize.py::test_binops[xor-(int,DataFrame)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(int,DataFrame)-right]",
-    "tests/generic/test_finalize.py::test_binops[xor-(int,Series)-both]",
-    "tests/generic/test_finalize.py::test_binops[xor-(int,Series)-right]",
-    "tests/generic/test_finalize.py::test_datetime_method[ceil', 'h]",
-    "tests/generic/test_finalize.py::test_datetime_method[day_name]",
-    "tests/generic/test_finalize.py::test_datetime_method[floor', 'h]",
-    "tests/generic/test_finalize.py::test_datetime_method[month_name]",
-    "tests/generic/test_finalize.py::test_datetime_method[normalize]",
-    "tests/generic/test_finalize.py::test_datetime_method[round', 'h]",
-    "tests/generic/test_finalize.py::test_datetime_method[strftime', '%Y]",
-    "tests/generic/test_finalize.py::test_datetime_method[to_period]",
-    "tests/generic/test_finalize.py::test_datetime_method[tz_localize', 'CET]",
-    "tests/generic/test_finalize.py::test_datetime_property[date]",
-    "tests/generic/test_finalize.py::test_datetime_property[day]",
-    "tests/generic/test_finalize.py::test_datetime_property[day_of_week]",
-    "tests/generic/test_finalize.py::test_datetime_property[day_of_year]",
-    "tests/generic/test_finalize.py::test_datetime_property[dayofweek]",
-    "tests/generic/test_finalize.py::test_datetime_property[dayofyear]",
-    "tests/generic/test_finalize.py::test_datetime_property[days_in_month]",
-    "tests/generic/test_finalize.py::test_datetime_property[daysinmonth]",
-    "tests/generic/test_finalize.py::test_datetime_property[hour]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_leap_year]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_month_end]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_month_start]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_quarter_end]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_quarter_start]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_year_end]",
-    "tests/generic/test_finalize.py::test_datetime_property[is_year_start]",
-    "tests/generic/test_finalize.py::test_datetime_property[microsecond]",
-    "tests/generic/test_finalize.py::test_datetime_property[minute]",
-    "tests/generic/test_finalize.py::test_datetime_property[month]",
-    "tests/generic/test_finalize.py::test_datetime_property[nanosecond]",
-    "tests/generic/test_finalize.py::test_datetime_property[quarter]",
-    "tests/generic/test_finalize.py::test_datetime_property[second]",
-    "tests/generic/test_finalize.py::test_datetime_property[time]",
-    "tests/generic/test_finalize.py::test_datetime_property[timetz]",
-    "tests/generic/test_finalize.py::test_datetime_property[year]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function abs>0]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function abs>1]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function inv>0]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function inv>1]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function neg>0]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function neg>1]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function pos>0]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function pos>1]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function round>0]",
-    "tests/generic/test_finalize.py::test_finalize_called[<built-in function round>1]",
-    "tests/generic/test_finalize.py::test_finalize_called[__getitem__', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[__getitem__', ['A0]",
-    "tests/generic/test_finalize.py::test_finalize_called[__getitem__', ['A1]",
-    "tests/generic/test_finalize.py::test_finalize_called[__getitem__0]",
-    "tests/generic/test_finalize.py::test_finalize_called[__getitem__1]",
-    "tests/generic/test_finalize.py::test_finalize_called[add0]",
-    "tests/generic/test_finalize.py::test_finalize_called[add1]",
-    "tests/generic/test_finalize.py::test_finalize_called[add_prefix', '_0]",
-    "tests/generic/test_finalize.py::test_finalize_called[add_prefix', '_1]",
-    "tests/generic/test_finalize.py::test_finalize_called[add_suffix', '_0]",
-    "tests/generic/test_finalize.py::test_finalize_called[add_suffix', '_1]",
-    "tests/generic/test_finalize.py::test_finalize_called[all]",
-    "tests/generic/test_finalize.py::test_finalize_called[any]",
-    "tests/generic/test_finalize.py::test_finalize_called[apply0]",
-    "tests/generic/test_finalize.py::test_finalize_called[apply1]",
-    "tests/generic/test_finalize.py::test_finalize_called[assign]",
-    "tests/generic/test_finalize.py::test_finalize_called[astype', <class 'float0]",
-    "tests/generic/test_finalize.py::test_finalize_called[astype', <class 'float1]",
-    "tests/generic/test_finalize.py::test_finalize_called[between]",
-    "tests/generic/test_finalize.py::test_finalize_called[clip0]",
-    "tests/generic/test_finalize.py::test_finalize_called[clip1]",
-    "tests/generic/test_finalize.py::test_finalize_called[combine]",
-    "tests/generic/test_finalize.py::test_finalize_called[combine_first]",
-    "tests/generic/test_finalize.py::test_finalize_called[convert_dtypes0]",
-    "tests/generic/test_finalize.py::test_finalize_called[convert_dtypes1]",
-    "tests/generic/test_finalize.py::test_finalize_called[copy0]",
-    "tests/generic/test_finalize.py::test_finalize_called[copy1]",
-    "tests/generic/test_finalize.py::test_finalize_called[corr]",
-    "tests/generic/test_finalize.py::test_finalize_called[corrwith]",
-    "tests/generic/test_finalize.py::test_finalize_called[count]",
-    "tests/generic/test_finalize.py::test_finalize_called[cov]",
-    "tests/generic/test_finalize.py::test_finalize_called[cummax0]",
-    "tests/generic/test_finalize.py::test_finalize_called[cummax1]",
-    "tests/generic/test_finalize.py::test_finalize_called[cummin0]",
-    "tests/generic/test_finalize.py::test_finalize_called[cummin1]",
-    "tests/generic/test_finalize.py::test_finalize_called[cumprod0]",
-    "tests/generic/test_finalize.py::test_finalize_called[cumprod1]",
-    "tests/generic/test_finalize.py::test_finalize_called[cumsum0]",
-    "tests/generic/test_finalize.py::test_finalize_called[cumsum1]",
-    "tests/generic/test_finalize.py::test_finalize_called[describe0]",
-    "tests/generic/test_finalize.py::test_finalize_called[describe1]",
-    "tests/generic/test_finalize.py::test_finalize_called[drop', columns=['A]",
-    "tests/generic/test_finalize.py::test_finalize_called[drop0]",
-    "tests/generic/test_finalize.py::test_finalize_called[drop1]",
-    "tests/generic/test_finalize.py::test_finalize_called[drop_duplicates0]",
-    "tests/generic/test_finalize.py::test_finalize_called[drop_duplicates1]",
-    "tests/generic/test_finalize.py::test_finalize_called[droplevel', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[dropna]",
-    "tests/generic/test_finalize.py::test_finalize_called[duplicated0]",
-    "tests/generic/test_finalize.py::test_finalize_called[duplicated1]",
-    "tests/generic/test_finalize.py::test_finalize_called[eval', 'A + 1', engine='python]",
-    "tests/generic/test_finalize.py::test_finalize_called[explode', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[fillna', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[fillna', method='ffill]",
-    "tests/generic/test_finalize.py::test_finalize_called[fillna]",
-    "tests/generic/test_finalize.py::test_finalize_called[get', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[head0]",
-    "tests/generic/test_finalize.py::test_finalize_called[head1]",
-    "tests/generic/test_finalize.py::test_finalize_called[idxmax]",
-    "tests/generic/test_finalize.py::test_finalize_called[idxmin]",
-    "tests/generic/test_finalize.py::test_finalize_called[infer_objects0]",
-    "tests/generic/test_finalize.py::test_finalize_called[infer_objects1]",
-    "tests/generic/test_finalize.py::test_finalize_called[interpolate0]",
-    "tests/generic/test_finalize.py::test_finalize_called[interpolate1]",
-    "tests/generic/test_finalize.py::test_finalize_called[isin0]",
-    "tests/generic/test_finalize.py::test_finalize_called[isin1]",
-    "tests/generic/test_finalize.py::test_finalize_called[isin2]",
-    "tests/generic/test_finalize.py::test_finalize_called[isin3]",
-    "tests/generic/test_finalize.py::test_finalize_called[isna0]",
-    "tests/generic/test_finalize.py::test_finalize_called[isna1]",
-    "tests/generic/test_finalize.py::test_finalize_called[isnull0]",
-    "tests/generic/test_finalize.py::test_finalize_called[isnull1]",
-    "tests/generic/test_finalize.py::test_finalize_called[kurt]",
-    "tests/generic/test_finalize.py::test_finalize_called[map]",
-    "tests/generic/test_finalize.py::test_finalize_called[mask0]",
-    "tests/generic/test_finalize.py::test_finalize_called[mask1]",
-    "tests/generic/test_finalize.py::test_finalize_called[max]",
-    "tests/generic/test_finalize.py::test_finalize_called[mean]",
-    "tests/generic/test_finalize.py::test_finalize_called[median]",
-    "tests/generic/test_finalize.py::test_finalize_called[melt', id_vars=['A'], value_vars=['B]",
-    "tests/generic/test_finalize.py::test_finalize_called[min]",
-    "tests/generic/test_finalize.py::test_finalize_called[mode0]",
-    "tests/generic/test_finalize.py::test_finalize_called[mode1]",
-    "tests/generic/test_finalize.py::test_finalize_called[nlargest', 1, 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[notna0]",
-    "tests/generic/test_finalize.py::test_finalize_called[notna1]",
-    "tests/generic/test_finalize.py::test_finalize_called[notnull0]",
-    "tests/generic/test_finalize.py::test_finalize_called[notnull1]",
-    "tests/generic/test_finalize.py::test_finalize_called[nsmallest', 1, 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[nunique]",
-    "tests/generic/test_finalize.py::test_finalize_called[pct_change0]",
-    "tests/generic/test_finalize.py::test_finalize_called[pct_change1]",
-    "tests/generic/test_finalize.py::test_finalize_called[pivot', columns='A]",
-    "tests/generic/test_finalize.py::test_finalize_called[pivot_table', columns='A', aggfunc=['mean', 'sum]",
-    "tests/generic/test_finalize.py::test_finalize_called[pivot_table', columns='A]",
-    "tests/generic/test_finalize.py::test_finalize_called[pop', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[prod]",
-    "tests/generic/test_finalize.py::test_finalize_called[quantile0]",
-    "tests/generic/test_finalize.py::test_finalize_called[quantile1]",
-    "tests/generic/test_finalize.py::test_finalize_called[quantile2]",
-    "tests/generic/test_finalize.py::test_finalize_called[quantile3]",
-    "tests/generic/test_finalize.py::test_finalize_called[query', 'A == 1]",
-    "tests/generic/test_finalize.py::test_finalize_called[rank0]",
-    "tests/generic/test_finalize.py::test_finalize_called[rank1]",
-    "tests/generic/test_finalize.py::test_finalize_called[reindex0]",
-    "tests/generic/test_finalize.py::test_finalize_called[reindex1]",
-    "tests/generic/test_finalize.py::test_finalize_called[reindex_like0]",
-    "tests/generic/test_finalize.py::test_finalize_called[reindex_like1]",
-    "tests/generic/test_finalize.py::test_finalize_called[rename', 'name]",
-    "tests/generic/test_finalize.py::test_finalize_called[rename', columns={'A': 'a]",
-    "tests/generic/test_finalize.py::test_finalize_called[rename0]",
-    "tests/generic/test_finalize.py::test_finalize_called[rename1]",
-    "tests/generic/test_finalize.py::test_finalize_called[rename_axis', columns='a]",
-    "tests/generic/test_finalize.py::test_finalize_called[rename_axis', index='a]",
-    "tests/generic/test_finalize.py::test_finalize_called[repeat]",
-    "tests/generic/test_finalize.py::test_finalize_called[replace]",
-    "tests/generic/test_finalize.py::test_finalize_called[reset_index0]",
-    "tests/generic/test_finalize.py::test_finalize_called[reset_index1]",
-    "tests/generic/test_finalize.py::test_finalize_called[reset_index2]",
-    "tests/generic/test_finalize.py::test_finalize_called[round0]",
-    "tests/generic/test_finalize.py::test_finalize_called[round1]",
-    "tests/generic/test_finalize.py::test_finalize_called[sample0]",
-    "tests/generic/test_finalize.py::test_finalize_called[sample1]",
-    "tests/generic/test_finalize.py::test_finalize_called[select_dtypes', include='int]",
-    "tests/generic/test_finalize.py::test_finalize_called[sem]",
-    "tests/generic/test_finalize.py::test_finalize_called[set_axis', ['A]",
-    "tests/generic/test_finalize.py::test_finalize_called[set_axis', ['a', 'b]",
-    "tests/generic/test_finalize.py::test_finalize_called[set_index', 'A]",
-    "tests/generic/test_finalize.py::test_finalize_called[shift]",
-    "tests/generic/test_finalize.py::test_finalize_called[skew]",
-    "tests/generic/test_finalize.py::test_finalize_called[sort_index]",
-    "tests/generic/test_finalize.py::test_finalize_called[sort_values', by='A]",
-    "tests/generic/test_finalize.py::test_finalize_called[sort_values]",
-    "tests/generic/test_finalize.py::test_finalize_called[squeeze', axis='columns]",
-    "tests/generic/test_finalize.py::test_finalize_called[squeeze]",
-    "tests/generic/test_finalize.py::test_finalize_called[stack]",
-    "tests/generic/test_finalize.py::test_finalize_called[std]",
-    "tests/generic/test_finalize.py::test_finalize_called[sum]",
-    "tests/generic/test_finalize.py::test_finalize_called[swaplevel]",
-    "tests/generic/test_finalize.py::test_finalize_called[tail0]",
-    "tests/generic/test_finalize.py::test_finalize_called[tail1]",
-    "tests/generic/test_finalize.py::test_finalize_called[take0]",
-    "tests/generic/test_finalize.py::test_finalize_called[take1]",
-    "tests/generic/test_finalize.py::test_finalize_called[to_frame]",
-    "tests/generic/test_finalize.py::test_finalize_called[to_period', freq='D]",
-    "tests/generic/test_finalize.py::test_finalize_called[to_timestamp0]",
-    "tests/generic/test_finalize.py::test_finalize_called[to_timestamp1]",
-    "tests/generic/test_finalize.py::test_finalize_called[transform0]",
-    "tests/generic/test_finalize.py::test_finalize_called[transform1]",
-    "tests/generic/test_finalize.py::test_finalize_called[transpose]",
-    "tests/generic/test_finalize.py::test_finalize_called[truncate0]",
-    "tests/generic/test_finalize.py::test_finalize_called[truncate1]",
-    "tests/generic/test_finalize.py::test_finalize_called[unstack]",
-    "tests/generic/test_finalize.py::test_finalize_called[where0]",
-    "tests/generic/test_finalize.py::test_finalize_called[where1]",
-    "tests/generic/test_finalize.py::test_finalize_called[xs', 'a0]",
-    "tests/generic/test_finalize.py::test_finalize_called[xs', 'a1]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>0-obj0]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>0-obj1]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>1-obj0]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>1-obj1]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>2-obj0]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>2-obj1]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>3-obj0]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[<lambda>3-obj1]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[method0-obj0]",
-    "tests/generic/test_finalize.py::test_groupby_finalize[method0-obj1]",
-    "tests/generic/test_finalize.py::test_string_method[capitalize]",
-    "tests/generic/test_finalize.py::test_string_method[casefold]",
-    "tests/generic/test_finalize.py::test_string_method[cat', ['a]",
-    "tests/generic/test_finalize.py::test_string_method[contains', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[count', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[encode', 'utf-8]",
-    "tests/generic/test_finalize.py::test_string_method[endswith', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[find', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[findall', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[get]",
-    "tests/generic/test_finalize.py::test_string_method[get_dummies]",
-    "tests/generic/test_finalize.py::test_string_method[index', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[isalnum]",
-    "tests/generic/test_finalize.py::test_string_method[isalpha]",
-    "tests/generic/test_finalize.py::test_string_method[isdecimal]",
-    "tests/generic/test_finalize.py::test_string_method[isdigit]",
-    "tests/generic/test_finalize.py::test_string_method[islower]",
-    "tests/generic/test_finalize.py::test_string_method[isnumeric]",
-    "tests/generic/test_finalize.py::test_string_method[isspace]",
-    "tests/generic/test_finalize.py::test_string_method[istitle]",
-    "tests/generic/test_finalize.py::test_string_method[isupper]",
-    "tests/generic/test_finalize.py::test_string_method[len]",
-    "tests/generic/test_finalize.py::test_string_method[ljust]",
-    "tests/generic/test_finalize.py::test_string_method[lower]",
-    "tests/generic/test_finalize.py::test_string_method[lstrip]",
-    "tests/generic/test_finalize.py::test_string_method[normalize', 'NFC]",
-    "tests/generic/test_finalize.py::test_string_method[pad]",
-    "tests/generic/test_finalize.py::test_string_method[partition', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[repeat]",
-    "tests/generic/test_finalize.py::test_string_method[replace', 'a', 'b]",
-    "tests/generic/test_finalize.py::test_string_method[rfind', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[rindex', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[rjust]",
-    "tests/generic/test_finalize.py::test_string_method[rpartition', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[rstrip]",
-    "tests/generic/test_finalize.py::test_string_method[slice]",
-    "tests/generic/test_finalize.py::test_string_method[slice_replace', 1, repl='a]",
-    "tests/generic/test_finalize.py::test_string_method[startswith', 'a]",
-    "tests/generic/test_finalize.py::test_string_method[strip]",
-    "tests/generic/test_finalize.py::test_string_method[swapcase]",
-    "tests/generic/test_finalize.py::test_string_method[translate', {'a': 'b]",
-    "tests/generic/test_finalize.py::test_string_method[upper]",
-    "tests/generic/test_finalize.py::test_string_method[wrap]",
-    "tests/generic/test_finalize.py::test_string_method[zfill]",
-    "tests/generic/test_finalize.py::test_timedelta_methods[method0]",
-    "tests/generic/test_finalize.py::test_timedelta_property[components]",
-    "tests/generic/test_finalize.py::test_timedelta_property[days]",
-    "tests/generic/test_finalize.py::test_timedelta_property[microseconds]",
-    "tests/generic/test_finalize.py::test_timedelta_property[nanoseconds]",
-    "tests/generic/test_finalize.py::test_timedelta_property[seconds]",
     "tests/generic/test_frame.py::TestDataFrame::test_metadata_propagation_indiv",
     "tests/generic/test_frame.py::TestDataFrame::test_set_attribute",
     "tests/generic/test_generic.py::TestGeneric::test_data_deprecated[DataFrame]",
@@ -9270,11 +8590,6 @@ def pytest_unconfigure(config):
     "tests/indexing/test_scalar.py::TestScalar::test_at_set_ints_other[uints-series]",
     "tests/indexing/test_scalar.py::TestScalar::test_iat_set_ints[ints-series]",
     "tests/indexing/test_scalar.py::TestScalar::test_iat_set_ints[uints-series]",
-    "tests/interchange/test_impl.py::test_dataframe[<lambda>0]",
-    "tests/interchange/test_impl.py::test_dataframe[<lambda>1]",
-    "tests/interchange/test_impl.py::test_dataframe[<lambda>2]",
-    "tests/interchange/test_impl.py::test_dataframe[<lambda>3]",
-    "tests/interchange/test_impl.py::test_dataframe[<lambda>4]",
     "tests/interchange/test_impl.py::test_multi_chunk_column",
     "tests/interchange/test_impl.py::test_pandas_nullable_with_missing_values[data14-timestamp[us, Asia/Kathmandu][pyarrow]-timestamp[us, tz=Asia/Kathmandu]]",
     "tests/interchange/test_impl.py::test_pandas_nullable_without_missing_values[data14-timestamp[us, Asia/Kathmandu][pyarrow]-timestamp[us, tz=Asia/Kathmandu]]",
@@ -9640,7 +8955,6 @@ def pytest_unconfigure(config):
     "tests/io/test_parquet.py::TestParquetPyArrow::test_infer_string_large_string_type",
     "tests/io/test_parquet.py::TestParquetPyArrow::test_read_dtype_backend_pyarrow_config",
     "tests/io/test_pickle.py::test_pickles",
-    "tests/io/test_spss.py::test_spss_metadata",
     "tests/io/test_sql.py::test_api_execute_sql[sqlite_buildin_iris]",
     "tests/io/test_sql.py::test_api_execute_sql[sqlite_conn_iris]",
     "tests/io/test_sql.py::test_api_execute_sql[sqlite_conn_iris]",
@@ -12145,7 +11459,6 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_update.py::TestUpdate::test_update_dtypes[other8-int32-expected8-FutureWarning]",
     "tests/series/methods/test_update.py::TestUpdate::test_update_dtypes[other9-int64-expected9-FutureWarning]",
     "tests/series/methods/test_value_counts.py::TestSeriesValueCounts::test_value_counts_categorical_with_nan",
-    "tests/series/test_api.py::TestSeriesMisc::test_attrs",
     "tests/series/test_api.py::TestSeriesMisc::test_index_tab_completion[index0]",
     "tests/series/test_api.py::TestSeriesMisc::test_index_tab_completion[index10]",
     "tests/series/test_api.py::TestSeriesMisc::test_index_tab_completion[index11]",
@@ -14012,9 +13325,6 @@ def pytest_unconfigure(config):
     "tests/window/test_win_type.py::test_win_type_not_implemented",
     r"""tests/io/json/test_pandas.py::TestPandasContainer::test_json_indent_all_orients[table-{\n    "schema":{\n        "fields":[\n            {\n                "name":"index",\n                "type":"integer"\n            },\n            {\n                "name":"a",\n                "type":"string"\n            },\n            {\n                "name":"b",\n                "type":"string"\n            }\n        ],\n        "primaryKey":[\n            "index"\n        ],\n        "pandas_version":"1.4.0"\n    },\n    "data":[\n        {\n            "index":0,\n            "a":"foo",\n            "b":"bar"\n        },\n        {\n            "index":1,\n            "a":"baz",\n            "b":"qux"\n        }\n    ]\n}]""",
     r"""tests/io/json/test_pandas.py::TestPandasContainer::test_json_indent_all_orients[table-{\n"schema":{\n    "fields":[\n            {\n            "name":"index",\n            "type":"integer"\n            },\n            {\n            "name":"a",\n            "type":"string"\n            },\n            {\n            "name":"b",\n            "type":"string"\n            }\n        ],\n    "primaryKey":[\n        "index"\n        ],\n    "pandas_version":"1.4.0"\n    },\n"data":[\n        {\n        "index":0,\n        "a":"foo",\n        "b":"bar"\n        },\n        {\n        "index":1,\n        "a":"baz",\n        "b":"qux"\n        }\n    ]\n}]""",
-    r"tests/generic/test_finalize.py::test_string_method[extract', '(\\\\w)(\\\\d)0]",
-    r"tests/generic/test_finalize.py::test_string_method[extract', '(\\\\w)(\\\\d)1]",
-    r"tests/generic/test_finalize.py::test_string_method[match', '\\\\w]",
     r"tests/io/parser/common/test_index.py::test_multi_index_blank_df[c_high-True-a,b\nc,d-expected1-header1]",
     r"tests/io/parser/common/test_index.py::test_multi_index_blank_df[c_low-True-a,b\nc,d-expected1-header1]",
     r"tests/io/parser/common/test_index.py::test_multi_index_blank_df[python-True-a,b\nc,d-expected1-header1]",
@@ -14034,6 +13344,12 @@ def pytest_unconfigure(config):
     r"tests/util/test_assert_series_equal.py::test_series_equal_index_dtype[True-s11-s21-MultiIndex level \\[0\\] are different]",
 }
 NODEIDS_THAT_XPASS_WITH_CUDF_PANDAS = {
+    "tests/generic/test_finalize.py::test_categorical_accessor[method0]",
+    "tests/generic/test_finalize.py::test_categorical_accessor[method1]",
+    "tests/generic/test_finalize.py::test_categorical_accessor[method2]",
+    "tests/generic/test_finalize.py::test_categorical_accessor[method4]",
+    "tests/generic/test_finalize.py::test_categorical_accessor[method7]",
+    "tests/generic/test_finalize.py::test_categorical_accessor[method8]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint8-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint16-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint32-skew-True]",
@@ -14188,10 +13504,6 @@ def pytest_unconfigure(config):
     "tests/frame/test_query_eval.py::TestDataFrameEval::test_extension_array_eval[numexpr-pandas]",
     "tests/frame/test_query_eval.py::TestDataFrameEval::test_extension_array_eval[numexpr-python]",
     "tests/frame/test_stack_unstack.py::TestDataFrameReshape::test_unstack_preserve_dtypes",
-    "tests/generic/test_finalize.py::test_finalize_called[at_time', '12:00_0]",
-    "tests/generic/test_finalize.py::test_finalize_called[at_time', '12:00_1]",
-    "tests/generic/test_finalize.py::test_finalize_called[between_time', '12:00', '13:00_0]",
-    "tests/generic/test_finalize.py::test_finalize_called[between_time', '12:00', '13:00_1]",
     "tests/groupby/test_groupby.py::test_ops_not_as_index[idxmin]",
     "tests/groupby/test_numeric_only.py::test_axis1_numeric_only[corrwith-False]",
     "tests/groupby/test_numeric_only.py::test_axis1_numeric_only[corrwith-None]",
@@ -14244,17 +13556,6 @@ def pytest_unconfigure(config):
     "tests/indexing/test_indexing.py::TestMisc::test_no_reference_cycle",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_iloc_ea_series_indexer_with_na",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_type_error_multiindex",
-    "tests/generic/test_finalize.py::test_finalize_called[asfreq', 'h0]",
-    "tests/generic/test_finalize.py::test_finalize_called[asfreq', 'h1]",
-    "tests/generic/test_finalize.py::test_finalize_called[at_time', '12:000]",
-    "tests/generic/test_finalize.py::test_finalize_called[at_time', '12:001]",
-    "tests/generic/test_finalize.py::test_finalize_called[between_time', '12:00', '13:000]",
-    "tests/generic/test_finalize.py::test_finalize_called[between_time', '12:00', '13:001]",
-    "tests/generic/test_finalize.py::test_finalize_called[last', '3D0]",
-    "tests/generic/test_finalize.py::test_finalize_called[last', '3D1]",
-    "tests/generic/test_finalize.py::test_finalize_called[to_period]",
-    "tests/generic/test_finalize.py::test_finalize_called[tz_localize', 'CET0]",
-    "tests/generic/test_finalize.py::test_finalize_called[tz_localize', 'CET1]",
     "tests/indexes/datetimes/test_partial_slicing.py::TestSlicing::test_slice_month",
     "tests/indexes/test_old_base.py::TestBase::test_memory_usage[nullable_int]",
     "tests/indexes/test_setops.py::test_union_different_types[bool-dtype-nullable_int]",

From 6094eb6577e25e756a9e7658d423d33213bdc9e7 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 19 Sep 2025 11:35:18 -0700
Subject: [PATCH 343/366] Remove test_scan_hf_url_raises (#20035)

Follow up to https://github.com/rapidsai/cudf/pull/20027

Discussed offline, this test isn't useful at the moment and eventually another version will be written once scanning CSV over remote IO is supported.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/20035
---
 python/cudf_polars/cudf_polars/dsl/ir.py |  4 +++-
 python/cudf_polars/tests/test_scan.py    | 10 ----------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index d8d5472d49d..6ba439a1875 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -337,7 +337,9 @@ def __init__(
         ):
             raise NotImplementedError("Read from file URI")
         if self.typ == "csv":
-            if any(plc.io.SourceInfo._is_remote_uri(p) for p in self.paths):
+            if any(
+                plc.io.SourceInfo._is_remote_uri(p) for p in self.paths
+            ):  # pragma: no cover; no test yet
                 # This works fine when the file has no leading blank lines,
                 # but currently we do some file introspection
                 # to skip blanks before parsing the header.
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index a2c12205e69..9fe986ffbff 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -422,16 +422,6 @@ def test_scan_parquet_chunked(
     )
 
 
-@pytest.mark.xfail(
-    raises=pl.exceptions.ComputeError,
-    reason="Rate limited by Hugging Face",
-    strict=False,
-)
-def test_scan_hf_url_raises():
-    q = pl.scan_csv("hf://datasets/scikit-learn/iris/Iris.csv")
-    assert_ir_translation_raises(q, NotImplementedError)
-
-
 def test_select_arbitrary_order_with_row_index_column(tmp_path):
     df = pl.DataFrame({"a": [1, 2, 3]})
     df.write_parquet(tmp_path / "df.parquet")

From 61dd3c7afc82c8f09f053cb983af7a4ca0db65cc Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Fri, 19 Sep 2025 13:05:56 -0700
Subject: [PATCH 344/366] Experimental API to read a parquet table, build a
 custom index column, and apply roaring bitmap deletion vector (#19237)

Closes #18981

This PR adds an experimental API which reads a parquet table, adds a row index column to it using supplied row group offsets and sizes, and applies the supplied 64-bit roaring bitmap based deletion vector. The resultant table is returned along with its metadata.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - https://github.com/apps/pre-commit-ci
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/19237
---
 cpp/CMakeLists.txt                            |   4 +
 cpp/benchmarks/CMakeLists.txt                 |  13 +
 .../experimental/parquet_deletion_vectors.cpp | 255 ++++++++++
 cpp/cmake/thirdparty/get_croaring.cmake       |  47 ++
 .../cudf/io/experimental/deletion_vectors.hpp |  66 +++
 .../parquet/experimental/deletion_vectors.cu  | 231 +++++++++
 cpp/tests/CMakeLists.txt                      |  16 +-
 cpp/tests/io/parquet_deletion_vectors_test.cu | 478 ++++++++++++++++++
 8 files changed, 1109 insertions(+), 1 deletion(-)
 create mode 100644 cpp/benchmarks/io/parquet/experimental/parquet_deletion_vectors.cpp
 create mode 100644 cpp/cmake/thirdparty/get_croaring.cmake
 create mode 100644 cpp/include/cudf/io/experimental/deletion_vectors.hpp
 create mode 100644 cpp/src/io/parquet/experimental/deletion_vectors.cu
 create mode 100644 cpp/tests/io/parquet_deletion_vectors_test.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 03674f5b47c..2bdc4c93167 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -292,6 +292,9 @@ include(cmake/thirdparty/get_cccl.cmake)
 # find rmm
 include(cmake/thirdparty/get_rmm.cmake)
 
+# find croaring
+include(cmake/thirdparty/get_croaring.cmake)
+
 # find flatbuffers
 include(cmake/thirdparty/get_flatbuffers.cmake)
 
@@ -536,6 +539,7 @@ add_library(
   src/io/parquet/compact_protocol_writer.cpp
   src/io/parquet/decode_preprocess.cu
   src/io/parquet/experimental/dictionary_page_filter.cu
+  src/io/parquet/experimental/deletion_vectors.cu
   src/io/parquet/experimental/hybrid_scan.cpp
   src/io/parquet/experimental/hybrid_scan_chunking.cu
   src/io/parquet/experimental/hybrid_scan_helpers.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index f711b19183d..0f6dde253ac 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -329,6 +329,19 @@ ConfigureNVBench(
   PARQUET_EXPERIMENTAL_READER_NVBENCH io/parquet/experimental/parquet_dictionary_page_filter.cpp
 )
 
+# ##################################################################################################
+# * parquet deletion vector benchmark
+# ----------------------------------------------------------------------
+ConfigureNVBench(
+  PARQUET_DELETION_VECTORS_NVBENCH io/parquet/experimental/parquet_deletion_vectors.cpp
+)
+target_compile_definitions(
+  PARQUET_DELETION_VECTORS_NVBENCH
+  PRIVATE DISABLENEON=1 ROARING_DISABLE_X64=1 ROARING_DISABLE_AVX=1
+          CROARING_COMPILER_SUPPORTS_AVX512=0
+)
+target_link_libraries(PARQUET_DELETION_VECTORS_NVBENCH PRIVATE roaring)
+
 # ##################################################################################################
 # * parquet multithread reader benchmark
 # ----------------------------------------------------------------------
diff --git a/cpp/benchmarks/io/parquet/experimental/parquet_deletion_vectors.cpp b/cpp/benchmarks/io/parquet/experimental/parquet_deletion_vectors.cpp
new file mode 100644
index 00000000000..d4ef7cf8309
--- /dev/null
+++ b/cpp/benchmarks/io/parquet/experimental/parquet_deletion_vectors.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/io/experimental/deletion_vectors.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+#include <roaring/roaring64.h>
+
+#include <random>
+
+namespace {
+/**
+ * @brief Serializes a roaring64 bitmap to a vector of cuda::std::byte
+ *
+ * @param deletion_vector Pointer to the roaring64 bitmap to serialize
+ *
+ * @return Host vector of bytes containing the serialized roaring64 bitmap
+ */
+auto serialize_roaring_bitmap(roaring64_bitmap_t const* roaring_bitmap)
+{
+  auto const num_bytes = roaring64_bitmap_portable_size_in_bytes(roaring_bitmap);
+  CUDF_EXPECTS(num_bytes > 0, "Roaring64 bitmap is empty");
+  auto serialized_bitmap = thrust::host_vector<cuda::std::byte>(num_bytes);
+  std::ignore            = roaring64_bitmap_portable_serialize(
+    roaring_bitmap, reinterpret_cast<char*>(serialized_bitmap.data()));
+  return serialized_bitmap;
+}
+
+/**
+ * @brief Builds a host vector of expected row indices from the specified row group offsets and
+ * row counts
+ *
+ * @param row_group_offsets Row group offsets
+ * @param row_group_num_rows Number of rows in each row group
+ * @param num_rows Total number of table rows
+ *
+ * @return Host vector of expected row indices
+ */
+auto build_row_indices(cudf::host_span<size_t const> row_group_offsets,
+                       cudf::host_span<cudf::size_type const> row_group_num_rows,
+                       cudf::size_type num_rows)
+{
+  auto const num_row_groups = static_cast<cudf::size_type>(row_group_num_rows.size());
+
+  // Row group span offsets
+  auto row_group_span_offsets = thrust::host_vector<cudf::size_type>(num_row_groups + 1);
+  row_group_span_offsets[0]   = 0;
+  thrust::inclusive_scan(
+    row_group_num_rows.begin(), row_group_num_rows.end(), row_group_span_offsets.begin() + 1);
+
+  // Expected row indices data
+  auto expected_row_indices = thrust::host_vector<size_t>(num_rows);
+  std::fill(expected_row_indices.begin(), expected_row_indices.end(), 1);
+
+  // Scatter row group row offsets to expected row indices
+  thrust::scatter(row_group_offsets.begin(),
+                  row_group_offsets.end(),
+                  row_group_span_offsets.begin(),
+                  expected_row_indices.begin());
+
+  // Inclusive scan to compute the rest of the expected row indices
+  std::for_each(
+    thrust::counting_iterator(0), thrust::counting_iterator(num_row_groups), [&](auto i) {
+      auto start_row_index = row_group_span_offsets[i];
+      auto end_row_index   = row_group_span_offsets[i + 1];
+      thrust::inclusive_scan(expected_row_indices.begin() + start_row_index,
+                             expected_row_indices.begin() + end_row_index,
+                             expected_row_indices.begin() + start_row_index);
+    });
+
+  return expected_row_indices;
+}
+
+/**
+ * @brief Builds a roaring64 deletion vector and a (host) row mask vector based on the specified
+ * probability of a row being deleted
+ *
+ * @param row_group_offsets Row group row offsets
+ * @param row_group_num_rows Number of rows in each row group
+ * @param num_rows Number of rows in the table
+ * @param deletion_probability The probability of a row being deleted
+ *
+ * @return Serialized roaring64 bitmap buffer
+ */
+auto build_deletion_vector(cudf::host_span<size_t const> row_group_offsets,
+                           cudf::host_span<cudf::size_type const> row_group_num_rows,
+                           cudf::size_type num_rows,
+                           float deletion_probability)
+{
+  std::mt19937 engine{0xbaLL};
+  std::bernoulli_distribution dist(deletion_probability);
+
+  auto row_indices = build_row_indices(row_group_offsets, row_group_num_rows, num_rows);
+
+  CUDF_EXPECTS(std::cmp_equal(row_indices.size(), num_rows),
+               "Row indices vector must have the same number of rows as the table");
+
+  auto input_row_mask = thrust::host_vector<bool>(num_rows);
+  std::generate(input_row_mask.begin(), input_row_mask.end(), [&]() { return dist(engine); });
+
+  auto deletion_vector = roaring64_bitmap_create();
+
+  // Context for the roaring64 bitmap for faster (bulk) add operations
+  auto roaring64_context =
+    roaring64_bulk_context_t{.high_bytes = {0, 0, 0, 0, 0, 0}, .leaf = nullptr};
+
+  std::for_each(thrust::counting_iterator<size_t>(0),
+                thrust::counting_iterator<size_t>(num_rows),
+                [&](auto row_idx) {
+                  // Insert provided host row index if the row is deleted in the row mask
+                  if (not input_row_mask[row_idx]) {
+                    roaring64_bitmap_add_bulk(
+                      deletion_vector, &roaring64_context, row_indices[row_idx]);
+                  }
+                });
+
+  return serialize_roaring_bitmap(deletion_vector);
+}
+
+auto setup_table_and_deletion_vector(nvbench::state& state)
+{
+  auto const num_columns = static_cast<cudf::size_type>(state.get_int64("num_cols"));
+  auto const rows_per_row_group =
+    static_cast<cudf::size_type>(state.get_int64("rows_per_row_group"));
+  auto const num_row_groups       = static_cast<cudf::size_type>(state.get_int64("num_row_groups"));
+  auto const deletion_probability = static_cast<float>(state.get_float64("deletion_probability"));
+  auto const source_type          = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const num_rows             = rows_per_row_group * num_row_groups;
+
+  cuio_source_sink_pair source_sink(source_type);
+
+  // Create a table and write it to parquet sink
+  {
+    auto const d_types = std::vector<cudf::type_id>{
+      cudf::type_id::FLOAT64,
+      cudf::type_id::DURATION_MICROSECONDS,
+      cudf::type_id::TIMESTAMP_MILLISECONDS,
+      cudf::type_id::STRING,
+    };
+
+    auto const table = create_random_table(cycle_dtypes(d_types, num_columns),
+                                           row_count{num_rows},
+                                           data_profile_builder().null_probability(0.10),
+                                           0xbad);
+    cudf::io::parquet_writer_options write_opts =
+      cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), table->view())
+        .row_group_size_rows(rows_per_row_group)
+        .compression(cudf::io::compression_type::NONE);
+    cudf::io::write_parquet(write_opts);
+  }
+
+  // Row offsets for each row group - arbitrary, only used to build the index column
+  auto row_group_offsets = thrust::host_vector<size_t>(num_row_groups);
+  row_group_offsets[0]   = static_cast<size_t>(std::llround(2e9));
+  std::for_each(
+    thrust::counting_iterator<size_t>(1),
+    thrust::counting_iterator<size_t>(num_row_groups),
+    [&](auto i) { row_group_offsets[i] = std::llround(row_group_offsets[i - 1] + 0.5e9); });
+
+  // Row group splits
+  auto row_group_splits = thrust::host_vector<cudf::size_type>(num_row_groups - 1);
+  {
+    std::mt19937 engine{0xf00d};
+    std::uniform_int_distribution<cudf::size_type> dist{1, num_rows};
+    std::generate(row_group_splits.begin(), row_group_splits.end(), [&]() { return dist(engine); });
+    std::sort(row_group_splits.begin(), row_group_splits.end());
+  }
+
+  // Number of rows in each row group
+  auto row_group_num_rows = thrust::host_vector<cudf::size_type>{};
+  {
+    row_group_num_rows.reserve(num_row_groups);
+    auto previous_split = cudf::size_type{0};
+    std::transform(row_group_splits.begin(),
+                   row_group_splits.end(),
+                   std::back_inserter(row_group_num_rows),
+                   [&](auto current_split) {
+                     auto current_split_size = current_split - previous_split;
+                     previous_split          = current_split;
+                     return current_split_size;
+                   });
+    row_group_num_rows.push_back(num_rows - row_group_splits.back());
+  }
+
+  auto deletion_vector =
+    build_deletion_vector(row_group_offsets, row_group_num_rows, num_rows, deletion_probability);
+
+  return std::tuple{std::move(source_sink),
+                    std::move(row_group_offsets),
+                    std::move(row_group_num_rows),
+                    std::move(deletion_vector)};
+}
+
+}  // namespace
+
+void BM_parquet_deletion_vectors(nvbench::state& state)
+{
+  auto const num_row_groups = static_cast<cudf::size_type>(state.get_int64("num_row_groups"));
+  auto const rows_per_row_group =
+    static_cast<cudf::size_type>(state.get_int64("rows_per_row_group"));
+  auto const num_rows = rows_per_row_group * num_row_groups;
+
+  auto [source_sink, row_group_offsets, row_group_num_rows, deletion_vector] =
+    setup_table_and_deletion_vector(state);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();
+
+      timer.start();
+      auto const result = cudf::io::parquet::experimental::read_parquet_and_apply_deletion_vector(
+        read_opts, deletion_vector, row_group_offsets, row_group_num_rows);
+      timer.stop();
+    });
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / time, "rows_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
+}
+
+NVBENCH_BENCH(BM_parquet_deletion_vectors)
+  .set_name("parquet_deletion_vectors")
+  .set_min_samples(4)
+  .add_int64_power_of_two_axis("num_row_groups", nvbench::range(4, 14, 2))
+  .add_int64_axis("rows_per_row_group", {5'000, 10'000})
+  .add_string_axis("io_type", {"DEVICE_BUFFER"})
+  .add_float64_axis("deletion_probability", {0.15, 0.5, 0.75})
+  .add_int64_axis("num_cols", {4});
diff --git a/cpp/cmake/thirdparty/get_croaring.cmake b/cpp/cmake/thirdparty/get_croaring.cmake
new file mode 100644
index 00000000000..993006b135d
--- /dev/null
+++ b/cpp/cmake/thirdparty/get_croaring.cmake
@@ -0,0 +1,47 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Use CPM to clone CRoaring and set up the necessary targets and include directories.
+function(find_and_configure_roaring VERSION)
+  rapids_cpm_find(
+    roaring ${VERSION}
+    GLOBAL_TARGETS roaring
+    CPM_ARGS
+    GIT_REPOSITORY https://github.com/RoaringBitmap/CRoaring.git
+    GIT_TAG v${VERSION}
+    GIT_SHALLOW TRUE
+    OPTIONS "ROARING_BUILD_STATIC ON"
+            "BUILD_SHARED_LIBS OFF"
+            "ENABLE_ROARING_TESTS OFF"
+            "ENABLE_ROARING_MICROBENCHMARKS OFF"
+            "ROARING_DISABLE_NEON ON"
+            "ROARING_DISABLE_X64 ON"
+            "ROARING_DISABLE_AVX2 ON"
+            "ROARING_DISABLE_AVX512 ON"
+  )
+  if(roaring_ADDED)
+    set_target_properties(roaring PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  endif()
+
+  if(DEFINED roaring_SOURCE_DIR)
+    set(roaring_INCLUDE_DIR
+        "${roaring_SOURCE_DIR}"
+        PARENT_SCOPE
+    )
+  endif()
+
+endfunction()
+
+set(roaring_VERSION_cudf "4.3.11")
+find_and_configure_roaring(${roaring_VERSION_cudf})
diff --git a/cpp/include/cudf/io/experimental/deletion_vectors.hpp b/cpp/include/cudf/io/experimental/deletion_vectors.hpp
new file mode 100644
index 00000000000..6dc9c1d02d8
--- /dev/null
+++ b/cpp/include/cudf/io/experimental/deletion_vectors.hpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/io/parquet.hpp>
+#include <cudf/io/types.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/export.hpp>
+
+namespace CUDF_EXPORT cudf {
+namespace io::parquet::experimental {
+/**
+ * @addtogroup io_readers
+ * @{
+ * @file
+ */
+
+/**
+ * @brief Reads a table from parquet source, prepends an index column to it, deserializes the
+ * roaring64 deletion vector and applies it to the read table
+ *
+ * Reads a table from a parquet source, builds a row index column to the table using the specified
+ * row group offsets and row counts and prepends it to the table, deserializes the specified
+ * roaring64 deletion vector and applies it to the read table. If the row group offsets and row
+ * counts are empty, the index column is simply a sequence of UINT64 from 0 to the total number of
+ * rows in the table. If the serialized roaring64 bitmap span is empty, the read table (prepended
+ * with the index column) is returned as is.
+ *
+ * @ingroup io_readers
+ *
+ * @param options Parquet reader options
+ * @param serialized_roaring64 Host span of `portable` serialized roaring64 bitmap
+ * @param row_group_offsets Host span of row index offsets for each row group
+ * @param row_group_num_rows Host span of number of rows in each row group
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate device memory of the returned table
+ *
+ * @return Read table with a prepended index column filtered using the deletion vector, along with
+ * its metadata
+ */
+table_with_metadata read_parquet_and_apply_deletion_vector(
+  parquet_reader_options const& options,
+  cudf::host_span<cuda::std::byte const> serialized_roaring64,
+  cudf::host_span<size_t const> row_group_offsets,
+  cudf::host_span<size_type const> row_group_num_rows,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref());
+
+/** @} */  // end of group
+
+}  // namespace io::parquet::experimental
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/io/parquet/experimental/deletion_vectors.cu b/cpp/src/io/parquet/experimental/deletion_vectors.cu
new file mode 100644
index 00000000000..d598609ee35
--- /dev/null
+++ b/cpp/src/io/parquet/experimental/deletion_vectors.cu
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/io/experimental/deletion_vectors.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <cuco/roaring_bitmap.cuh>
+#include <cuda/functional>
+#include <thrust/host_vector.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/scatter.h>
+#include <thrust/sequence.h>
+
+#include <numeric>
+
+namespace cudf::io::parquet::experimental {
+
+namespace {
+
+/**
+ * @brief Computes a row index column from the specified row group row offsets and counts
+ *
+ * @param row_group_offsets Host span of row group offsets
+ * @param row_group_num_rows Host span of number of rows in each row group
+ * @param num_rows Total number of table rows
+ * @param stream CUDA stream for kernel launches and data transfers
+ * @param mr Device memory resource to allocate device memory for the row index column
+ *
+ * @return UINT64 column containing row indices
+ */
+std::unique_ptr<cudf::column> compute_row_index_column(
+  cudf::host_span<size_t const> row_group_offsets,
+  cudf::host_span<size_type const> row_group_num_rows,
+  size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto const num_row_groups = static_cast<size_type>(row_group_num_rows.size());
+
+  if (row_group_offsets.empty()) {
+    auto row_indices      = rmm::device_buffer(num_rows * sizeof(size_t), stream, mr);
+    auto row_indices_iter = static_cast<size_t*>(row_indices.data());
+    thrust::sequence(
+      rmm::exec_policy_nosync(stream), row_indices_iter, row_indices_iter + num_rows, 0);
+    return std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::UINT64},
+                                          num_rows,
+                                          std::move(row_indices),
+                                          rmm::device_buffer{},
+                                          0);
+  }
+
+  // Convert number of rows in each row group to row group span offsets. Here, a span is the range
+  // of (table) rows that belong to a row group
+  auto row_group_span_offsets =
+    cudf::detail::make_host_vector<size_type>(num_row_groups + 1, stream);
+  row_group_span_offsets[0] = 0;
+  std::inclusive_scan(
+    row_group_num_rows.begin(), row_group_num_rows.end(), row_group_span_offsets.begin() + 1);
+
+  auto row_indices      = rmm::device_buffer(num_rows * sizeof(size_t), stream, mr);
+  auto row_indices_iter = static_cast<size_t*>(row_indices.data());
+  thrust::fill(rmm::exec_policy_nosync(stream), row_indices_iter, row_indices_iter + num_rows, 1);
+
+  auto row_group_keys = rmm::device_uvector<size_type>(num_rows, stream);
+  thrust::fill(rmm::exec_policy_nosync(stream), row_group_keys.begin(), row_group_keys.end(), 0);
+
+  // Scatter row group offsets and row group indices (or span indices) to their corresponding
+  // row group span offsets
+  auto d_row_group_offsets = cudf::detail::make_device_uvector_async(
+    row_group_offsets, stream, cudf::get_current_device_resource_ref());
+  auto d_row_group_span_offsets = cudf::detail::make_device_uvector_async(
+    row_group_span_offsets, stream, cudf::get_current_device_resource_ref());
+  auto in_iter =
+    thrust::make_zip_iterator(d_row_group_offsets.begin(), thrust::counting_iterator<size_type>(0));
+  auto out_iter = thrust::make_zip_iterator(row_indices_iter, row_group_keys.begin());
+  thrust::scatter(rmm::exec_policy_nosync(stream),
+                  in_iter,
+                  in_iter + num_row_groups,
+                  d_row_group_span_offsets.begin(),
+                  out_iter);
+
+  // Fill in the the rest of the row group span indices
+  thrust::inclusive_scan(rmm::exec_policy_nosync(stream),
+                         row_group_keys.begin(),
+                         row_group_keys.end(),
+                         row_group_keys.begin(),
+                         cuda::maximum<cudf::size_type>());
+
+  // Segmented inclusive scan to compute the rest of the row indices
+  thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream),
+                                row_group_keys.begin(),
+                                row_group_keys.end(),
+                                row_indices_iter,
+                                row_indices_iter);
+
+  return std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::UINT64},
+                                        num_rows,
+                                        std::move(row_indices),
+                                        rmm::device_buffer{},
+                                        0);
+}
+
+/**
+ * @brief Builds a BOOL8 row mask column from the specified host span of row indices and the
+ * roaring64 deletion vector
+ *
+ * @param row_indices Host span of row indices
+ * @param deletion_vector Pointer to the roaring64 deletion vector
+ * @param num_rows Number of rows in the column
+ * @param stream CUDA stream for kernel launches and data transfers
+ * @param mr Device memory resource to allocate device memory for the row mask column
+ *
+ * @return Unique pointer to the row mask column
+ */
+std::unique_ptr<cudf::column> build_row_mask_column(
+  cudf::column_view const& row_index_column,
+  cudf::host_span<cuda::std::byte const> deletion_vector,
+  size_type num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto row_mask = rmm::device_buffer(num_rows, stream, mr);
+  cuco::experimental::roaring_bitmap<cuda::std::uint64_t> roaring_bitmap(
+    deletion_vector.data(), {}, stream);
+
+  // Iterator to negate and store the output value from `contains_async`
+  auto row_mask_iter = thrust::make_transform_output_iterator(
+    static_cast<bool*>(row_mask.data()), [] __device__(auto b) { return not b; });
+  roaring_bitmap.contains_async(
+    row_index_column.begin<size_t>(), row_index_column.end<size_t>(), row_mask_iter, stream);
+
+  return std::make_unique<cudf::column>(
+    cudf::data_type{cudf::type_id::BOOL8}, num_rows, std::move(row_mask), rmm::device_buffer{}, 0);
+}
+
+}  // namespace
+
+table_with_metadata read_parquet_and_apply_deletion_vector(
+  parquet_reader_options const& options,
+  cudf::host_span<cuda::std::byte const> serialized_roaring64,
+  cudf::host_span<size_t const> row_group_offsets,
+  cudf::host_span<size_type const> row_group_num_rows,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(
+    row_group_offsets.size() == row_group_num_rows.size(),
+    "Encountered a mismatch in the number of row group offsets and row group row counts");
+  CUDF_EXPECTS(
+    not options.get_filter().has_value(),
+    "Encountered a non-empty AST filter expression. Use a roaring64 bitmap deletion vector to "
+    "filter the table instead");
+
+  // Use default mr to read parquet table and build row index column if we will be applying the
+  // deletion vector to produce a new table later
+  auto const table_mr =
+    serialized_roaring64.empty() ? mr : rmm::mr::get_current_device_resource_ref();
+
+  // Read the parquet table
+  auto [table, metadata] = cudf::io::read_parquet(options, stream, table_mr);
+  auto const num_rows    = table->num_rows();
+
+  CUDF_EXPECTS(
+    row_group_num_rows.empty() or
+      std::cmp_equal(metadata.num_input_row_groups, row_group_num_rows.size()),
+    "Encountered a mismatch in the number of row groups in parquet file and the specified "
+    "row group offsets/row counts vectors");
+
+  // Compute a row index column from the specified row group offsets and counts
+  auto row_index_column =
+    compute_row_index_column(row_group_offsets, row_group_num_rows, num_rows, stream, table_mr);
+
+  // Prepend row index column to the table columns
+  auto index_and_table_columns = std::vector<std::unique_ptr<cudf::column>>{};
+  index_and_table_columns.reserve(table->num_columns() + 1);
+  index_and_table_columns.push_back(std::move(row_index_column));
+  auto table_columns = table->release();
+  index_and_table_columns.insert(index_and_table_columns.end(),
+                                 std::make_move_iterator(table_columns.begin()),
+                                 std::make_move_iterator(table_columns.end()));
+
+  // Table with the index and table columns
+  auto table_with_index = std::make_unique<cudf::table>(std::move(index_and_table_columns));
+
+  // Also prepend the row index column's metadata to the table schema
+  auto updated_schema_info = std::vector<cudf::io::column_name_info>{};
+  updated_schema_info.reserve(metadata.schema_info.size() + 1);
+  updated_schema_info.emplace_back("index");
+  updated_schema_info.insert(updated_schema_info.end(),
+                             std::make_move_iterator(metadata.schema_info.begin()),
+                             std::make_move_iterator(metadata.schema_info.end()));
+  metadata.schema_info = std::move(updated_schema_info);
+
+  if (serialized_roaring64.empty()) {
+    return table_with_metadata{std::move(table_with_index), std::move(metadata)};
+  } else {
+    // Filter the table using the deletion vector
+    auto row_mask = build_row_mask_column(table_with_index->get_column(0).view(),
+                                          serialized_roaring64,
+                                          num_rows,
+                                          stream,
+                                          cudf::get_current_device_resource_ref());
+    return table_with_metadata{
+      // Supply user-provided mr to apply_boolean_mask to allocate output table's memory
+      cudf::apply_boolean_mask(table_with_index->view(), row_mask->view(), stream, mr),
+      std::move(metadata)};
+  }
+}
+
+}  // namespace cudf::io::parquet::experimental
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 4ca97a89db5..e9138f70d47 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -25,7 +25,7 @@ rapids_test_init()
 function(ConfigureTest CMAKE_TEST_NAME)
   set(options)
   set(one_value GPUS PERCENT STREAM_MODE)
-  set(multi_value EXTRA_LIBS)
+  set(multi_value EXTRA_LIBS EXTRA_COMPILE_DEFINITIONS)
   cmake_parse_arguments(_CUDF_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN})
 
   if(NOT DEFINED _CUDF_TEST_GPUS AND NOT DEFINED _CUDF_TEST_PERCENT)
@@ -60,6 +60,7 @@ function(ConfigureTest CMAKE_TEST_NAME)
 
   target_compile_definitions(
     ${CMAKE_TEST_NAME} PRIVATE THRUST_FORCE_32_BIT_OFFSET_TYPE=1 CCCL_AVOID_SORT_UNROLL=1
+                               ${_CUDF_TEST_EXTRA_COMPILE_DEFINITIONS}
   )
 
   target_link_libraries(
@@ -341,6 +342,19 @@ ConfigureTest(
   io/parquet_common.cpp
   io/parquet_test.cpp
 )
+ConfigureTest(
+  PARQUET_DELETION_VECTORS_TEST
+  io/parquet_deletion_vectors_test.cu
+  io/parquet_common.cpp
+  io/parquet_test.cpp
+  EXTRA_LIBS
+  roaring
+  EXTRA_COMPILE_DEFINITIONS
+  DISABLENEON=1
+  ROARING_DISABLE_X64=1
+  ROARING_DISABLE_AVX=1
+  CROARING_COMPILER_SUPPORTS_AVX512=0
+)
 ConfigureTest(
   JSON_TEST io/json/json_test.cpp io/json/json_chunked_reader.cu
   GPUS 1
diff --git a/cpp/tests/io/parquet_deletion_vectors_test.cu b/cpp/tests/io/parquet_deletion_vectors_test.cu
new file mode 100644
index 00000000000..8223527b3ff
--- /dev/null
+++ b/cpp/tests/io/parquet_deletion_vectors_test.cu
@@ -0,0 +1,478 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parquet_common.hpp"
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <cudf/io/experimental/deletion_vectors.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <cuco/roaring_bitmap.cuh>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/sequence.h>
+
+#include <roaring/roaring.h>
+#include <roaring/roaring64.h>
+
+namespace {
+
+/**
+ * @brief Builds a cudf column from a span of host data
+ *
+ * @param host_data Span of host data
+ * @param data_type The data type of the column
+ * @param stream The stream to use for the operation
+ * @param mr The memory resource to use for the operation
+ *
+ * @return A unique pointer to a column containing the row mask
+ */
+template <typename T>
+auto build_column_from_host_data(cudf::host_span<T const> host_data,
+                                 cudf::type_id data_type,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(not host_data.empty(), "Host data vector must not be empty");
+
+  auto const num_rows = host_data.size();
+  rmm::device_buffer buffer{num_rows * sizeof(T), stream, mr};
+  cudf::detail::cuda_memcpy_async<T>(
+    cudf::device_span<T>{static_cast<T*>(buffer.data()), num_rows}, host_data, stream);
+  return std::make_unique<cudf::column>(
+    cudf::data_type{data_type}, num_rows, std::move(buffer), rmm::device_buffer{}, 0);
+}
+
+/**
+ * @brief Builds a host vector of expected row indices from the specified row group offsets and
+ * row counts
+ *
+ * @param row_group_offsets Row group offsets
+ * @param row_group_num_rows Number of rows in each row group
+ * @param num_rows Total number of table rows
+ *
+ * @return Host vector of expected row indices
+ */
+auto build_expected_row_indices(cudf::host_span<size_t const> row_group_offsets,
+                                cudf::host_span<cudf::size_type const> row_group_num_rows,
+                                cudf::size_type num_rows)
+{
+  auto const num_row_groups = static_cast<cudf::size_type>(row_group_num_rows.size());
+
+  // Row span offsets
+  auto row_group_span_offsets = std::vector<cudf::size_type>(num_row_groups + 1);
+  row_group_span_offsets[0]   = 0;
+  std::inclusive_scan(
+    row_group_num_rows.begin(), row_group_num_rows.end(), row_group_span_offsets.begin() + 1);
+
+  // Expected row indices data
+  auto expected_row_indices = thrust::host_vector<size_t>(num_rows);
+  // Initialize all row indices to 1 so that we can do a segmented inclusive scan
+  std::fill(expected_row_indices.begin(), expected_row_indices.end(), 1);
+
+  // Scatter row group offsets to expected row indices
+  thrust::scatter(thrust::host,
+                  row_group_offsets.begin(),
+                  row_group_offsets.end(),
+                  row_group_span_offsets.begin(),
+                  expected_row_indices.begin());
+
+  // Inclusive scan to compute the rest of the row indices
+  std::for_each(
+    thrust::counting_iterator(0), thrust::counting_iterator(num_row_groups), [&](auto i) {
+      auto start_row_index = row_group_span_offsets[i];
+      auto end_row_index   = row_group_span_offsets[i + 1];
+      std::inclusive_scan(expected_row_indices.begin() + start_row_index,
+                          expected_row_indices.begin() + end_row_index,
+                          expected_row_indices.begin() + start_row_index);
+    });
+
+  return expected_row_indices;
+}
+
+/**
+ * @brief Serializes a roaring64 bitmap to a host vector of std::bytes
+ *
+ * @param deletion_vector Pointer to the roaring64 bitmap to serialize
+ *
+ * @return Host vector of bytes containing the serialized roaring64 bitmap
+ */
+auto serialize_deletion_vector(roaring::api::roaring64_bitmap_t const* deletion_vector)
+{
+  auto const num_bytes = roaring::api::roaring64_bitmap_portable_size_in_bytes(deletion_vector);
+  EXPECT_GT(num_bytes, 0);
+  auto serialized_bitmap = thrust::host_vector<cuda::std::byte>(num_bytes);
+  std::ignore            = roaring::api::roaring64_bitmap_portable_serialize(
+    deletion_vector, reinterpret_cast<char*>(serialized_bitmap.data()));
+  return serialized_bitmap;
+}
+
+/**
+ * @brief Builds a roaring64 deletion vector and a (host) row mask vector based on the specified
+ * probability of a row being deleted
+ *
+ * @param num_rows Number of rows in the table
+ * @param deletion_probability The probability of a row being deleted
+ * @param row_indices Host vector of row indices
+ *
+ * @return A pair of a deletion vector and a host row mask vector
+ */
+auto build_deletion_vector_and_expected_row_mask(cudf::size_type num_rows,
+                                                 float deletion_probability,
+                                                 cudf::host_span<size_t const> row_indices,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::device_async_resource_ref mr)
+{
+  static constexpr auto seed = 0xbaLL;
+  std::mt19937 engine{seed};
+  std::bernoulli_distribution dist(deletion_probability);
+
+  CUDF_EXPECTS(std::cmp_equal(row_indices.size(), num_rows),
+               "Row indices vector must have the same number of rows as the table");
+
+  auto expected_row_mask = thrust::host_vector<bool>(num_rows);
+  std::generate(expected_row_mask.begin(), expected_row_mask.end(), [&]() { return dist(engine); });
+
+  auto deletion_vector = roaring::api::roaring64_bitmap_create();
+
+  // Context for the roaring64 bitmap for faster (bulk) add operations
+  auto roaring64_context =
+    roaring::api::roaring64_bulk_context_t{.high_bytes = {0, 0, 0, 0, 0, 0}, .leaf = nullptr};
+
+  std::for_each(thrust::counting_iterator<size_t>(0),
+                thrust::counting_iterator<size_t>(num_rows),
+                [&](auto row_idx) {
+                  // Insert provided host row index if the row is deleted in the row mask
+                  if (not expected_row_mask[row_idx]) {
+                    roaring::api::roaring64_bitmap_add_bulk(
+                      deletion_vector, &roaring64_context, row_indices[row_idx]);
+                  }
+                });
+
+  auto serialized_deletion_vector = serialize_deletion_vector(deletion_vector);
+  roaring::api::roaring64_bitmap_free(deletion_vector);
+
+  return std::make_pair(
+    std::move(serialized_deletion_vector),
+    build_column_from_host_data<bool>(expected_row_mask, cudf::type_id::BOOL8, stream, mr));
+}
+
+/**
+ * @brief Builds the expected cudf table by prepending the input table columns with the specified
+ * row index column and applying the specified row mask column
+ *
+ * @param input_table_view Input table view
+ * @param expected_row_index_column Expected row index column view
+ * @param row_mask_column Row mask column view
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate device memory for the returned table
+ *
+ * @return Unique pointer to the expected table
+ */
+std::unique_ptr<cudf::table> build_expected_table(
+  cudf::table_view const& input_table_view,
+  cudf::column_view const& expected_row_index_column,
+  cudf::column_view const& row_mask_column,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto const num_rows = input_table_view.num_rows();
+
+  CUDF_EXPECTS(expected_row_index_column.size() == num_rows,
+               "Index column and table must have the same number of rows");
+  CUDF_EXPECTS(row_mask_column.size() == num_rows,
+               "Row mask column and table must have the same number of rows");
+
+  auto index_and_columns = std::vector<cudf::column_view>{};
+  index_and_columns.reserve(input_table_view.num_columns() + 1);
+  index_and_columns.push_back(expected_row_index_column);
+  std::transform(thrust::counting_iterator(0),
+                 thrust::counting_iterator(input_table_view.num_columns()),
+                 std::back_inserter(index_and_columns),
+                 [&](auto col_idx) { return input_table_view.column(col_idx); });
+  return cudf::apply_boolean_mask(cudf::table_view{index_and_columns}, row_mask_column, stream, mr);
+}
+
+/**
+ * @brief Constructs a resultant table by applying the specified deletion vector to the input table
+ * and compares it to the table read via the `read_parquet_and_apply_deletion_vector` API
+ *
+ * @param parquet_buffer Span of host buffer containing Parquet data
+ * @param deletion_vector Pointer to roaring64 bitmap deletion vector
+ * @param row_group_offsets Host span of input row group offsets
+ * @param row_group_num_rows Host span of input row group row counts
+ * @param input_table_view Input table view
+ * @param expected_row_mask Host span of expected row mask
+ * @param expected_row_index_column Column view of the expected row index column in the read table
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate device memory for the returned table
+ */
+void test_read_parquet_and_apply_deletion_vector(
+  cudf::host_span<char const> parquet_buffer,
+  cudf::host_span<cuda::std::byte const> serialized_deletion_vector,
+  cudf::host_span<size_t const> row_group_offsets,
+  cudf::host_span<cudf::size_type const> row_group_num_rows,
+  cudf::table_view const& input_table_view,
+  cudf::column_view const& expected_row_mask_column,
+  cudf::column_view const& expected_row_index_column,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto expected_table = build_expected_table(
+    input_table_view, expected_row_index_column, expected_row_mask_column, stream, mr);
+
+  // Read parquet and apply deletion vector
+  auto in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{cudf::host_span<char const>(
+                                                parquet_buffer.data(), parquet_buffer.size())})
+      .build();
+
+  auto const read_table_with_deletion_vector =
+    cudf::io::parquet::experimental::read_parquet_and_apply_deletion_vector(
+      in_opts, serialized_deletion_vector, row_group_offsets, row_group_num_rows, stream, mr)
+      .tbl;
+
+  // Compare
+  CUDF_TEST_EXPECT_TABLES_EQUAL(read_table_with_deletion_vector->view(), expected_table->view());
+}
+
+}  // namespace
+
+// Base test fixture for basic roaring bitmap tests
+template <typename T>
+struct RoaringBitmapBasicsTest : public cudf::test::BaseFixture {};
+
+using RoaringTypes = cudf::test::Types<cuda::std::uint32_t, cuda::std::uint64_t>;
+
+TYPED_TEST_SUITE(RoaringBitmapBasicsTest, RoaringTypes);
+
+TYPED_TEST(RoaringBitmapBasicsTest, BitmapSerialization)
+{
+  auto constexpr num_keys = 100'000;
+  using Key               = TypeParam;
+
+  auto is_even =
+    cudf::detail::make_counting_transform_iterator(0, [](auto const i) { return i % 2 == 0; });
+
+  auto serialized_bitmap = std::vector<cuda::std::byte>{};
+
+  if constexpr (std::is_same_v<Key, cuda::std::uint64_t>) {
+    auto roaring64_bitmap = roaring::api::roaring64_bitmap_create();
+
+    // Context for the roaring64 bitmap for faster (bulk) add operations
+    auto roaring64_context =
+      roaring::api::roaring64_bulk_context_t{.high_bytes = {0, 0, 0, 0, 0, 0}, .leaf = nullptr};
+
+    std::for_each(
+      thrust::counting_iterator<Key>(0), thrust::counting_iterator<Key>(num_keys), [&](auto key) {
+        if (is_even[key]) {
+          roaring::api::roaring64_bitmap_add_bulk(roaring64_bitmap, &roaring64_context, key);
+        }
+      });
+
+    // Serialize and free the bitmap
+    auto const serialized_bitmap_size =
+      roaring::api::roaring64_bitmap_portable_size_in_bytes(roaring64_bitmap);
+    EXPECT_GT(serialized_bitmap_size, 0);
+
+    serialized_bitmap.resize(serialized_bitmap_size);
+    std::ignore = roaring::api::roaring64_bitmap_portable_serialize(
+      roaring64_bitmap, reinterpret_cast<char*>(serialized_bitmap.data()));
+
+    roaring::api::roaring64_bitmap_free(roaring64_bitmap);
+  } else if constexpr (std::is_same_v<Key, cuda::std::uint32_t>) {
+    auto roaring_bitmap = roaring::api::roaring_bitmap_create();
+
+    // Context for the roaring64 bitmap for faster (bulk) add operations
+    auto roaring_context = roaring::api::roaring_bulk_context_t{};
+
+    std::for_each(
+      thrust::counting_iterator<Key>(0), thrust::counting_iterator<Key>(num_keys), [&](auto key) {
+        if (is_even[key]) {
+          roaring::api::roaring_bitmap_add_bulk(roaring_bitmap, &roaring_context, key);
+        }
+      });
+
+    // Serialize and free the bitmap
+    auto const serialized_bitmap_size =
+      roaring::api::roaring_bitmap_portable_size_in_bytes(roaring_bitmap);
+    EXPECT_GT(serialized_bitmap_size, 0);
+
+    serialized_bitmap.resize(serialized_bitmap_size);
+    std::ignore = roaring::api::roaring_bitmap_portable_serialize(
+      roaring_bitmap, reinterpret_cast<char*>(serialized_bitmap.data()));
+
+    roaring::api::roaring_bitmap_free(roaring_bitmap);
+
+  } else {
+    CUDF_FAIL("Roaring bitmap key must be either uint32_t or uint64_t");
+  }
+
+  auto const stream = cudf::get_default_stream();
+  auto const mr     = cudf::get_current_device_resource_ref();
+
+  // cuCollections roaring bitmap from the serialized roaring64 bitmap bytes
+  auto roaring_bitmap =
+    cuco::experimental::roaring_bitmap<Key>(serialized_bitmap.data(), {}, stream);
+
+  // Query the roaring bitmap
+  auto contained = rmm::device_uvector<bool>(num_keys, stream, mr);
+  roaring_bitmap.contains_async(thrust::counting_iterator<Key>(0),
+                                thrust::counting_iterator<Key>(num_keys),
+                                contained.data(),
+                                stream);
+  auto results = cudf::detail::make_host_vector_async(contained, stream);
+
+  // Validate
+  stream.synchronize();
+  EXPECT_TRUE(std::all_of(thrust::counting_iterator<Key>(0),
+                          thrust::counting_iterator<Key>(num_keys),
+                          [&](auto key) { return results[key] == is_even[key]; }));
+}
+
+// Base test fixture for API tests
+struct ParquetDeletionVectorsTest : public cudf::test::BaseFixture {};
+
+TEST_F(ParquetDeletionVectorsTest, NoRowIndexColumn)
+{
+  auto constexpr num_rows             = 50'000;
+  auto constexpr num_row_groups       = 5;
+  auto constexpr num_columns          = 8;
+  auto constexpr include_validity     = false;
+  auto constexpr deletion_probability = 0.5;  ///< 50% of the rows are deleted
+  auto constexpr rows_per_row_group   = num_rows / num_row_groups;
+  static_assert(num_rows % num_row_groups == 0, "num_rows must be a multiple of num_row_groups");
+
+  auto const stream = cudf::get_default_stream();
+  auto const mr     = cudf::get_current_device_resource_ref();
+
+  auto input_table = create_random_fixed_table<float>(num_columns, num_rows, include_validity);
+
+  // Write table to parquet buffer
+  auto parquet_buffer = std::vector<char>{};
+  {
+    auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&parquet_buffer),
+                                                              input_table->view())
+                      .row_group_size_rows(rows_per_row_group)
+                      .build();
+    cudf::io::write_parquet(out_opts);
+  }
+
+  // Build the expected row index column
+  auto expected_row_indices = thrust::host_vector<size_t>(num_rows);
+  std::iota(expected_row_indices.begin(), expected_row_indices.end(), size_t{0});
+  auto expected_row_index_column =
+    build_column_from_host_data<size_t>(expected_row_indices, cudf::type_id::UINT64, stream, mr);
+
+  // Build deletion vector and the expected row mask column
+  auto [deletion_vector, expected_row_mask_column] = build_deletion_vector_and_expected_row_mask(
+    num_rows, deletion_probability, expected_row_indices, stream, mr);
+
+  test_read_parquet_and_apply_deletion_vector(parquet_buffer,
+                                              deletion_vector,
+                                              {},
+                                              {},
+                                              input_table->view(),
+                                              expected_row_mask_column->view(),
+                                              expected_row_index_column->view(),
+                                              stream,
+                                              mr);
+}
+
+TEST_F(ParquetDeletionVectorsTest, CustomRowIndexColumn)
+{
+  auto constexpr num_rows             = 50'000;
+  auto constexpr num_row_groups       = 5;
+  auto constexpr num_columns          = 8;
+  auto constexpr include_validity     = false;
+  auto constexpr deletion_probability = 0.4;  ///< 40% of the rows are deleted
+  auto constexpr rows_per_row_group   = num_rows / num_row_groups;
+  static_assert(num_rows % num_row_groups == 0, "num_rows must be a multiple of num_row_groups");
+
+  auto const stream = cudf::get_default_stream();
+  auto const mr     = cudf::get_current_device_resource_ref();
+
+  auto input_table = create_random_fixed_table<float>(num_columns, num_rows, include_validity);
+
+  // Write table to parquet buffer
+  auto parquet_buffer = std::vector<char>{};
+  {
+    auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&parquet_buffer),
+                                                              input_table->view())
+                      .row_group_size_rows(rows_per_row_group)
+                      .build();
+    cudf::io::write_parquet(out_opts);
+  }
+
+  // Row offsets for each row group - arbitrary, only used to build the UINT64 `index` column
+  auto row_group_offsets = thrust::host_vector<size_t>(num_row_groups);
+  row_group_offsets[0]   = static_cast<size_t>(std::llround(1e9));
+  std::transform(
+    thrust::counting_iterator(1),
+    thrust::counting_iterator(num_row_groups),
+    row_group_offsets.begin() + 1,
+    [&](auto i) { return static_cast<size_t>(std::llround(row_group_offsets[i - 1] + 0.5e9)); });
+
+  // Split the `num_rows` into `num_row_groups` spans
+  auto row_group_splits = std::vector<cudf::size_type>(num_row_groups - 1);
+  {
+    std::mt19937 engine{0xf00d};
+    std::uniform_int_distribution<cudf::size_type> dist{1, num_rows};
+    std::generate(row_group_splits.begin(), row_group_splits.end(), [&]() { return dist(engine); });
+    std::sort(row_group_splits.begin(), row_group_splits.end());
+  }
+
+  // Number of rows in each row group span
+  auto row_group_num_rows = std::vector<cudf::size_type>{};
+  {
+    row_group_num_rows.reserve(num_row_groups);
+    auto previous_split = cudf::size_type{0};
+    std::transform(row_group_splits.begin(),
+                   row_group_splits.end(),
+                   std::back_inserter(row_group_num_rows),
+                   [&](auto current_split) {
+                     auto current_split_size = current_split - previous_split;
+                     previous_split          = current_split;
+                     return current_split_size;
+                   });
+    row_group_num_rows.push_back(num_rows - row_group_splits.back());
+  }
+
+  // Build the expected row index column
+  auto expected_row_indices =
+    build_expected_row_indices(row_group_offsets, row_group_num_rows, num_rows);
+  auto expected_row_index_column =
+    build_column_from_host_data<size_t>(expected_row_indices, cudf::type_id::UINT64, stream, mr);
+
+  // Build deletion vector and the expected row mask column
+  auto [deletion_vector, expected_row_mask_column] = build_deletion_vector_and_expected_row_mask(
+    num_rows, deletion_probability, expected_row_indices, stream, mr);
+
+  test_read_parquet_and_apply_deletion_vector(parquet_buffer,
+                                              deletion_vector,
+                                              row_group_offsets,
+                                              row_group_num_rows,
+                                              input_table->view(),
+                                              expected_row_mask_column->view(),
+                                              expected_row_index_column->view(),
+                                              stream,
+                                              mr);
+}

From 842d89ea3634a0c989fc4df01a8e329466ea4ec3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Sep 2025 13:48:28 -0700
Subject: [PATCH 345/366] Add memory resource parameters to interop, merge, and
 transpose (#20007)

I also noticed that in #20001 the mrs were not actually being passed to the underlying C++ API calls. I've fixed that in this PR and will make sure that gets fixed accordingly in subsequent PRs.

Contributes to #15170

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20007
---
 python/pylibcudf/pylibcudf/interop.pxd        |  7 +-
 python/pylibcudf/pylibcudf/interop.pyi        | 11 ++-
 python/pylibcudf/pylibcudf/interop.pyx        | 22 +++--
 python/pylibcudf/pylibcudf/join.pyx           | 46 +++++++----
 python/pylibcudf/pylibcudf/labeling.pyx       |  3 +-
 .../pylibcudf/pylibcudf/libcudf/interop.pxd   |  7 +-
 python/pylibcudf/pylibcudf/libcudf/join.pxd   | 81 +++++++++++++------
 .../pylibcudf/pylibcudf/libcudf/labeling.pxd  |  4 +-
 python/pylibcudf/pylibcudf/libcudf/merge.pxd  |  4 +-
 python/pylibcudf/pylibcudf/libcudf/round.pxd  |  4 +-
 .../pylibcudf/pylibcudf/libcudf/transpose.pxd |  4 +-
 python/pylibcudf/pylibcudf/merge.pxd          |  4 +-
 python/pylibcudf/pylibcudf/merge.pyi          |  2 +
 python/pylibcudf/pylibcudf/merge.pyx          | 14 +++-
 python/pylibcudf/pylibcudf/round.pyx          |  3 +-
 python/pylibcudf/pylibcudf/transpose.pxd      |  3 +-
 python/pylibcudf/pylibcudf/transpose.pyi      |  7 +-
 python/pylibcudf/pylibcudf/transpose.pyx      | 16 +++-
 18 files changed, 171 insertions(+), 71 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd
index a02261db74c..a46d720ae91 100644
--- a/python/pylibcudf/pylibcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/interop.pxd
@@ -2,7 +2,10 @@
 
 from pylibcudf.table cimport Table
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
-cpdef Table from_dlpack(object managed_tensor, Stream stream=*)
+cpdef Table from_dlpack(
+    object managed_tensor, Stream stream=*, DeviceMemoryResource mr=*
+)
 
-cpdef object to_dlpack(Table input, Stream stream=*)
+cpdef object to_dlpack(Table input, Stream stream=*, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi
index decd0e412c4..a2e650b2dbd 100644
--- a/python/pylibcudf/pylibcudf/interop.pyi
+++ b/python/pylibcudf/pylibcudf/interop.pyi
@@ -6,6 +6,7 @@ from typing import Any, overload
 
 import pyarrow as pa
 
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.column import Column
@@ -51,6 +52,12 @@ def to_arrow(
     obj: Scalar, metadata: ColumnMetadata | str | None = None
 ) -> pa.Scalar[Any]: ...
 def from_dlpack(
-    managed_tensor: Any, stream: Stream | None = None
+    managed_tensor: Any,
+    stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
-def to_dlpack(input: Table, stream: Stream | None = None) -> Any: ...
+def to_dlpack(
+    input: Table,
+    stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
+) -> Any: ...
diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx
index 75edb59b093..86e265e898b 100644
--- a/python/pylibcudf/pylibcudf/interop.pyx
+++ b/python/pylibcudf/pylibcudf/interop.pyx
@@ -19,12 +19,13 @@ from pylibcudf.libcudf.interop cimport (
 from pylibcudf.libcudf.table.table cimport table
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
 from .types cimport DataType
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 from ._interop_helpers import ColumnMetadata
 
 try:
@@ -136,7 +137,9 @@ if pa is not None:
         return plc_object.to_arrow(metadata=metadata)
 
 
-cpdef Table from_dlpack(object managed_tensor, Stream stream=None):
+cpdef Table from_dlpack(
+    object managed_tensor, Stream stream=None, DeviceMemoryResource mr=None
+):
     """
     Convert a DLPack DLTensor into a cudf table.
 
@@ -148,6 +151,8 @@ cpdef Table from_dlpack(object managed_tensor, Stream stream=None):
         A 1D or 2D column-major (Fortran order) tensor.
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned table's device memory.
 
     Returns
     -------
@@ -164,6 +169,7 @@ cpdef Table from_dlpack(object managed_tensor, Stream stream=None):
         raise ValueError("PyCapsule object contained a NULL pointer")
     PyCapsule_SetName(managed_tensor, "used_dltensor")
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     # Note: A copy is always performed when converting the dlpack
     # data to a libcudf table. We also delete the dlpack_tensor pointer
@@ -171,14 +177,14 @@ cpdef Table from_dlpack(object managed_tensor, Stream stream=None):
     # TODO: https://github.com/rapidsai/cudf/issues/10874
     # TODO: https://github.com/rapidsai/cudf/issues/10849
     with nogil:
-        c_result = cpp_from_dlpack(dlpack_tensor, stream.view())
+        c_result = cpp_from_dlpack(dlpack_tensor, stream.view(), mr.get_mr())
 
-    cdef Table result = Table.from_libcudf(move(c_result), stream)
+    cdef Table result = Table.from_libcudf(move(c_result), stream, mr)
     dlpack_tensor.deleter(dlpack_tensor)
     return result
 
 
-cpdef object to_dlpack(Table input, Stream stream=None):
+cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr=None):
     """
     Convert a cudf table into a DLPack DLTensor.
 
@@ -190,6 +196,9 @@ cpdef object to_dlpack(Table input, Stream stream=None):
         A 1D or 2D column-major (Fortran order) tensor.
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned DLPack tensor's device
+        memory.
 
     Returns
     -------
@@ -204,9 +213,10 @@ cpdef object to_dlpack(Table input, Stream stream=None):
             )
     cdef DLManagedTensor *dlpack_tensor
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
-        dlpack_tensor = cpp_to_dlpack(input.view(), stream.view())
+        dlpack_tensor = cpp_to_dlpack(input.view(), stream.view(), mr.get_mr())
 
     return PyCapsule_New(
         dlpack_tensor,
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index ed631d991cd..404fffa7033 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -89,7 +89,7 @@ cpdef tuple inner_join(
 
     with nogil:
         c_result = cpp_join.inner_join(
-            left_keys.view(), right_keys.view(), nulls_equal, stream.view()
+            left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -130,7 +130,7 @@ cpdef tuple left_join(
 
     with nogil:
         c_result = cpp_join.left_join(
-            left_keys.view(), right_keys.view(), nulls_equal, stream.view()
+            left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -171,7 +171,7 @@ cpdef tuple full_join(
 
     with nogil:
         c_result = cpp_join.full_join(
-            left_keys.view(), right_keys.view(), nulls_equal, stream.view()
+            left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -214,7 +214,8 @@ cpdef Column left_semi_join(
             left_keys.view(),
             right_keys.view(),
             nulls_equal,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return _column_from_gather_map(move(c_result), stream, mr)
 
@@ -254,7 +255,8 @@ cpdef Column left_anti_join(
             left_keys.view(),
             right_keys.view(),
             nulls_equal,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return _column_from_gather_map(move(c_result), stream, mr)
 
@@ -288,7 +290,9 @@ cpdef Table cross_join(
     mr = _get_memory_resource(mr)
 
     with nogil:
-        result = cpp_join.cross_join(left.view(), right.view(), stream.view())
+        result = cpp_join.cross_join(
+            left.view(), right.view(), stream.view(), mr.get_mr()
+        )
     return Table.from_libcudf(move(result), stream, mr)
 
 
@@ -330,7 +334,8 @@ cpdef tuple conditional_inner_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -376,7 +381,8 @@ cpdef tuple conditional_left_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -420,7 +426,8 @@ cpdef tuple conditional_full_join(
             left.view(),
             right.view(),
             dereference(binary_predicate.c_obj.get()),
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -465,7 +472,8 @@ cpdef Column conditional_left_semi_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return _column_from_gather_map(move(c_result), stream, mr)
 
@@ -507,7 +515,8 @@ cpdef Column conditional_left_anti_join(
             right.view(),
             dereference(binary_predicate.c_obj.get()),
             output_size,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return _column_from_gather_map(move(c_result), stream, mr)
 
@@ -562,7 +571,8 @@ cpdef tuple mixed_inner_join(
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
             empty_optional,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -620,7 +630,8 @@ cpdef tuple mixed_left_join(
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
             empty_optional,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -678,7 +689,8 @@ cpdef tuple mixed_full_join(
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
             empty_optional,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return (
         _column_from_gather_map(move(c_result.first), stream, mr),
@@ -733,7 +745,8 @@ cpdef Column mixed_left_semi_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return _column_from_gather_map(move(c_result), stream, mr)
 
@@ -785,6 +798,7 @@ cpdef Column mixed_left_anti_join(
             right_conditional.view(),
             dereference(binary_predicate.c_obj.get()),
             nulls_equal,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
     return _column_from_gather_map(move(c_result), stream, mr)
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index 2c7694f06f8..0617806b225 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -63,7 +63,8 @@ cpdef Column label_bins(
             left_inclusive,
             right_edges.view(),
             right_inclusive,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
 
     return Column.from_libcudf(move(c_result), stream, mr)
diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
index 301ae41d667..b80431c960d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "dlpack/dlpack.h" nogil:
@@ -38,12 +39,14 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
         nogil:
     cdef unique_ptr[table] from_dlpack(
         const DLManagedTensor* managed_tensor,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     DLManagedTensor* to_dlpack(
         const table_view& input,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef cppclass column_metadata:
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 111576ea1d9..a637270dce4 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -13,6 +13,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_equality, size_type
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 from rmm.librmm.device_uvector cimport device_uvector
 from pylibcudf.libcudf.utilities.span cimport device_span
@@ -25,80 +26,95 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil:
     cdef gather_map_pair_type inner_join(
         const table_view left_keys,
         const table_view right_keys,
-        cuda_stream_view stream
+        null_equality nulls_equal,
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type left_join(
         const table_view left_keys,
         const table_view right_keys,
-        cuda_stream_view stream
+        null_equality nulls_equal,
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type full_join(
         const table_view left_keys,
         const table_view right_keys,
-        cuda_stream_view stream
+        null_equality nulls_equal,
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_semi_join(
         const table_view left_keys,
         const table_view right_keys,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_anti_join(
         const table_view left_keys,
         const table_view right_keys,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type inner_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type left_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type full_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_semi_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type left_anti_join(
         const table_view left_keys,
         const table_view right_keys,
         null_equality nulls_equal,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef unique_ptr[table] cross_join(
         const table_view left,
         const table_view right,
-        cuda_stream_view stream
-    ) except +
+        cuda_stream_view stream,
+        device_memory_resource* mr
+    ) except +libcudf_exception_handler
 
 cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
     cdef gather_map_pair_type conditional_inner_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_inner_join(
@@ -106,14 +122,16 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_left_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_left_join(
@@ -121,21 +139,24 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type conditional_full_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_semi_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_semi_join(
@@ -143,14 +164,16 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_anti_join(
         const table_view left,
         const table_view right,
         const expression binary_predicate,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type conditional_left_anti_join(
@@ -158,7 +181,8 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil:
         const table_view right,
         const expression binary_predicate,
         optional[size_t] output_size,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
 cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
@@ -170,7 +194,8 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const expression binary_predicate,
         null_equality compare_nulls,
         output_size_data_type output_size_data,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type mixed_left_join(
@@ -181,7 +206,8 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const expression binary_predicate,
         null_equality compare_nulls,
         output_size_data_type output_size_data,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_pair_type mixed_full_join(
@@ -192,7 +218,8 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const expression binary_predicate,
         null_equality compare_nulls,
         output_size_data_type output_size_data,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type mixed_left_semi_join(
@@ -202,7 +229,8 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
 
     cdef gather_map_type mixed_left_anti_join(
@@ -212,5 +240,6 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil:
         const table_view right_conditional,
         const expression binary_predicate,
         null_equality compare_nulls,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
index d2681c46b15..70b61fc8d82 100644
--- a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
@@ -19,5 +20,6 @@ cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
         inclusive left_inclusive,
         const column_view &right_edges,
         inclusive right_inclusive,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
index fa17e1000f9..e80f56a47ea 100644
--- a/python/pylibcudf/pylibcudf/libcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "cudf/merge.hpp" namespace "cudf" nogil:
@@ -15,5 +16,6 @@ cdef extern from "cudf/merge.hpp" namespace "cudf" nogil:
         vector[libcudf_types.size_type] key_cols,
         vector[libcudf_types.order] column_order,
         vector[libcudf_types.null_order] null_precedence,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd
index 58bc9b5a4dc..4c7812d967d 100644
--- a/python/pylibcudf/pylibcudf/libcudf/round.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
@@ -18,5 +19,6 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil:
         const column_view& input,
         int32_t decimal_places,
         rounding_method method,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
index ab0944914f2..b428b6e61b7 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+from rmm.librmm.memory_resource cimport device_memory_resource
 
 
 cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil:
@@ -14,5 +15,6 @@ cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil:
         table_view
     ] transpose(
         table_view input_table,
-        cuda_stream_view stream
+        cuda_stream_view stream,
+        device_memory_resource* mr
     ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/merge.pxd b/python/pylibcudf/pylibcudf/merge.pxd
index 0cfe40dbe2c..6c3bf0d13e4 100644
--- a/python/pylibcudf/pylibcudf/merge.pxd
+++ b/python/pylibcudf/pylibcudf/merge.pxd
@@ -3,6 +3,7 @@
 from .table cimport Table
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 cpdef Table merge (
@@ -10,5 +11,6 @@ cpdef Table merge (
     list key_cols,
     list column_order,
     list null_precedence,
-    Stream stream=*
+    Stream stream=*,
+    DeviceMemoryResource mr=*
 )
diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi
index 26cfdc6ea0c..009a1e83642 100644
--- a/python/pylibcudf/pylibcudf/merge.pyi
+++ b/python/pylibcudf/pylibcudf/merge.pyi
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.table import Table
@@ -11,4 +12,5 @@ def merge(
     column_order: list[Order],
     null_precedence: list[NullOrder],
     stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
 ) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx
index cc585b11cc5..87db65a195d 100644
--- a/python/pylibcudf/pylibcudf/merge.pyx
+++ b/python/pylibcudf/pylibcudf/merge.pyx
@@ -9,9 +9,10 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_order, order, size_type
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .table cimport Table
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 __all__ = ["merge"]
 
@@ -20,7 +21,8 @@ cpdef Table merge (
     list key_cols,
     list column_order,
     list null_precedence,
-    Stream stream=None
+    Stream stream=None,
+    DeviceMemoryResource mr=None
 ):
     """Merge a set of sorted tables.
 
@@ -38,6 +40,8 @@ cpdef Table merge (
         Whether nulls should come before or after non-nulls.
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned table's device memory.
 
     Returns
     -------
@@ -54,6 +58,7 @@ cpdef Table merge (
 
     cdef unique_ptr[table] c_result
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
         c_result = cpp_merge.merge(
@@ -61,6 +66,7 @@ cpdef Table merge (
             c_key_cols,
             c_column_order,
             c_null_precedence,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
-    return Table.from_libcudf(move(c_result), stream)
+    return Table.from_libcudf(move(c_result), stream, mr)
diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
index d7be3d14fe0..8edc675fd82 100644
--- a/python/pylibcudf/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -57,7 +57,8 @@ cpdef Column round(
             source.view(),
             decimal_places,
             round_method,
-            stream.view()
+            stream.view(),
+            mr.get_mr()
         )
 
     return Column.from_libcudf(move(c_result), stream, mr)
diff --git a/python/pylibcudf/pylibcudf/transpose.pxd b/python/pylibcudf/pylibcudf/transpose.pxd
index 28db765e48f..eba8989c4c5 100644
--- a/python/pylibcudf/pylibcudf/transpose.pxd
+++ b/python/pylibcudf/pylibcudf/transpose.pxd
@@ -2,6 +2,7 @@
 from .table cimport Table
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
-cpdef Table transpose(Table input_table, Stream stream=*)
+cpdef Table transpose(Table input_table, Stream stream=*, DeviceMemoryResource mr=*)
diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi
index 4acbac7ea52..7de87a96129 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyi
+++ b/python/pylibcudf/pylibcudf/transpose.pyi
@@ -1,7 +1,12 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.memory_resource import DeviceMemoryResource
 from rmm.pylibrmm.stream import Stream
 
 from pylibcudf.table import Table
 
-def transpose(input_table: Table, stream: Stream | None = None) -> Table: ...
+def transpose(
+    input_table: Table,
+    stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx
index cbb23e2358e..b49bcadcde6 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyx
+++ b/python/pylibcudf/pylibcudf/transpose.pyx
@@ -7,14 +7,17 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .table cimport Table
-from .utils cimport _get_stream
+from .utils cimport _get_stream, _get_memory_resource
 
 __all__ = ["transpose"]
 
-cpdef Table transpose(Table input_table, Stream stream=None):
+cpdef Table transpose(
+    Table input_table, Stream stream=None, DeviceMemoryResource mr=None
+):
     """Transpose a Table.
 
     For details, see :cpp:func:`transpose`.
@@ -25,6 +28,8 @@ cpdef Table transpose(Table input_table, Stream stream=None):
         Table to transpose
     stream : Stream | None
         CUDA stream on which to perform the operation.
+    mr : DeviceMemoryResource | None
+        Device memory resource used to allocate the returned table's device memory.
 
     Returns
     -------
@@ -34,12 +39,15 @@ cpdef Table transpose(Table input_table, Stream stream=None):
     cdef pair[unique_ptr[column], table_view] c_result
     cdef Table owner_table
     stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
 
     with nogil:
-        c_result = cpp_transpose.transpose(input_table.view(), stream.view())
+        c_result = cpp_transpose.transpose(
+            input_table.view(), stream.view(), mr.get_mr()
+        )
 
     owner_table = Table(
-        [Column.from_libcudf(move(c_result.first), stream)] *
+        [Column.from_libcudf(move(c_result.first), stream, mr)] *
         c_result.second.num_columns()
     )
 

From e54ae4aff418549084a7fff1f87a720daa45e144 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 19 Sep 2025 18:19:41 -0500
Subject: [PATCH 346/366] Fix arrow arrays + numpy ufunc interaction (#20047)

Fixes: #19998

This PR fixes extension arrays + numpy ufunc's interactions by exposing all the present attributes correctly.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/20047
---
 python/cudf/cudf/pandas/_wrappers/pandas.py   | 16 +++-
 .../cudf/pandas/scripts/conftest-patch.py     | 81 -------------------
 2 files changed, 15 insertions(+), 82 deletions(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 1593b1f4036..8ec5f515c94 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -686,6 +686,8 @@ def Index__setattr__(self, name, value):
     additional_attributes={
         "_data": _FastSlowAttribute("_data", private=True),
         "_mask": _FastSlowAttribute("_mask", private=True),
+        "__array__": _FastSlowAttribute("__array__"),
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
     },
 )
 
@@ -722,6 +724,12 @@ def Index__setattr__(self, name, value):
     additional_attributes={
         "_pa_array": _FastSlowAttribute("_pa_array", private=True),
         "__array__": _FastSlowAttribute("__array__", private=True),
+        "__invert__": _FastSlowAttribute("__invert__"),
+        "__neg__": _FastSlowAttribute("__neg__"),
+        "__pos__": _FastSlowAttribute("__pos__", private=True),
+        "__abs__": _FastSlowAttribute("__abs__"),
+        "__contains__": _FastSlowAttribute("__contains__"),
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
     },
 )
 
@@ -1938,7 +1946,7 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
 )
 
 ArrowExtensionArray = make_final_proxy_type(
-    "ExtensionArray",
+    "ArrowExtensionArray",
     _Unusable,
     pd.arrays.ArrowExtensionArray,
     fast_to_slow=_Unusable(),
@@ -1946,6 +1954,12 @@ def holiday_calendar_factory_wrapper(*args, **kwargs):
     additional_attributes={
         "_pa_array": _FastSlowAttribute("_pa_array", private=True),
         "__array__": _FastSlowAttribute("__array__", private=True),
+        "__invert__": _FastSlowAttribute("__invert__"),
+        "__neg__": _FastSlowAttribute("__neg__"),
+        "__pos__": _FastSlowAttribute("__pos__", private=True),
+        "__abs__": _FastSlowAttribute("__abs__"),
+        "__contains__": _FastSlowAttribute("__contains__"),
+        "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
     },
 )
 
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index b67f350f09e..dbe9d20b432 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -1010,7 +1010,6 @@ def pytest_unconfigure(config):
     "tests/arrays/string_/test_string.py::test_repr[string=str[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_repr[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_to_numpy_returns_pdna_default[pyarrow_numpy]",
-    "tests/arrays/string_/test_string.py::test_to_numpy_returns_pdna_default[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_value_counts_sort_false[string=str[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_value_counts_sort_false[string=str[python]]",
     "tests/arrays/string_/test_string.py::test_value_counts_sort_false[string=string[pyarrow]]",
@@ -3042,78 +3041,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ns]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[s]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[binary-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[bool-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[date32[day]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[date64[ms]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[decimal128(7, 3)-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[decimal128(7, 3)-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[decimal128(7, 3)-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[double-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[double-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[double-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[ms]-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[ms]-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[ms]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[ns]-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[ns]-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[ns]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[s]-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[s]-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[s]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[us]-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[us]-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[duration[us]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[float-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[float-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[float-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int16-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int16-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int16-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int32-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int32-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int32-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int64-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int64-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int64-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int8-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int8-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[int8-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[string-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[time32[ms]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[time32[s]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[time64[ns]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[time64[us]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ms, tz=US/Eastern]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ms, tz=US/Pacific]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ms, tz=UTC]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ms]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ns, tz=US/Eastern]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ns, tz=US/Pacific]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ns, tz=UTC]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[ns]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[s, tz=US/Eastern]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[s, tz=US/Pacific]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[s, tz=UTC]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[s]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[us, tz=US/Eastern]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[us, tz=US/Pacific]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[us, tz=UTC]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[timestamp[us]-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint16-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint16-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint16-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint32-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint32-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint32-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint64-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint64-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint64-positive]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint8-absolute]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint8-negative]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_unary_ufunc_dunder_equivalence[uint8-positive]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index3]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-series-index2]",
@@ -3876,10 +3803,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_arith_series_with_array[string=string[pyarrow]-True-__radd__]",
     "tests/extension/test_string.py::TestStringArray::test_arith_series_with_array[string=string[python]-False-__radd__]",
     "tests/extension/test_string.py::TestStringArray::test_arith_series_with_array[string=string[python]-True-__radd__]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=str[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=string[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_array_interface_copy[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_astype_own_type[pyarrow-False-False]",
     "tests/extension/test_string.py::TestStringArray::test_astype_own_type[pyarrow-True-False]",
     "tests/extension/test_string.py::TestStringArray::test_astype_own_type[pyarrow_numpy-False-False]",
@@ -4176,10 +4099,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[pyarrow_numpy-True]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[python-False]",
     "tests/extension/test_string.py::TestStringArray::test_to_numpy[python-True]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=str[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=string[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_to_numpy[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_unary_ufunc_dunder_equivalence[pyarrow-False-positive]",
     "tests/extension/test_string.py::TestStringArray::test_unary_ufunc_dunder_equivalence[pyarrow-True-positive]",
     "tests/extension/test_string.py::TestStringArray::test_unary_ufunc_dunder_equivalence[pyarrow_numpy-False-positive]",

From 3645036cce85c21cbda4c09cbdd4a09ece29a264 Mon Sep 17 00:00:00 2001
From: Tianyu Liu <kingcrimsontianyu@gmail.com>
Date: Mon, 22 Sep 2025 08:49:22 -0400
Subject: [PATCH 347/366] Temporary solution to ensure data-source/sink stream
 ordering (#19874)

KvikIO `pread` that uses an internal thread pool for parallel I/O is not stream ordered. This PR offers a temporary solution that fixes the potential "use-before-alloc" case (i.e. device buffer passed to `pread` is created from a preceding call to `cudaMallocAsync`).

Authors:
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Yunsong Wang (https://github.com/PointKernel)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/19874
---
 cpp/src/io/utilities/data_sink.cpp  | 1 +
 cpp/src/io/utilities/datasource.cpp | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
index a8f73e600f5..7a8d9d3e803 100644
--- a/cpp/src/io/utilities/data_sink.cpp
+++ b/cpp/src/io/utilities/data_sink.cpp
@@ -73,6 +73,7 @@ class file_sink : public data_sink {
 
     size_t const offset = _bytes_written;
     _bytes_written += size;
+    stream.synchronize();
 
     // KvikIO's `pwrite()` returns a `std::future<size_t>` so we convert it
     // to `std::future<void>`
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index 67b0d23e29f..22d728dd8c6 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -112,6 +112,7 @@ class kvikio_source : public datasource {
   {
     CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file.");
     auto const read_size = std::min(size, this->size() - offset);
+    stream.synchronize();
     return _kvikio_handle.pread(dst, read_size, offset);
   }
 
@@ -165,6 +166,7 @@ class file_source : public kvikio_source<kvikio::FileHandle> {
   {
     CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file.");
     auto const read_size = std::min(size, this->size() - offset);
+    stream.synchronize();
     return _kvikio_handle.pread(dst,
                                 read_size,
                                 offset,

From 52a891df0cf821e9a326148eb6d9599387f5332b Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Mon, 22 Sep 2025 09:09:39 -0500
Subject: [PATCH 348/366] Configure repo for automatic release notes generation
 (#19984)

This PR configures this repo (adds a `.github/release.yml` file) for
automatic release notes generation. See
https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes.
This will fix such issues as hotfix releases including wrong content
(e.g [cugraph
v25.04.01](https://github.com/rapidsai/cugraph/releases/tag/v25.04.01)
contains 50+ PRs from the full v25.04.00 release when it should only
contain the single hotfix PR (#5017).)

Example auto-generated release notes:
https://github.com/rapidsai/literate-octo-potato/releases/tag/v25.10.00a
---
 .github/release.yml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 .github/release.yml

diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 00000000000..2c9a85805c4
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,27 @@
+# GitHub Auto-Generated Release Notes Configuration for RAPIDS
+# This file configures how GitHub automatically generates release notes
+
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+      - dependencies
+    authors:
+      - rapids-bot[bot]
+      - dependabot[bot]
+  categories:
+    - title: 🚨 Breaking Changes
+      labels:
+        - breaking
+    - title: 🐛 Bug Fixes
+      labels:
+        - bug
+    - title: 📖 Documentation
+      labels:
+        - doc
+    - title: 🚀 New Features
+      labels:
+        - feature request
+    - title: 🛠️ Improvements
+      labels:
+        - improvement

From b4d3cc11960378bbc8e922181ecc4b35ac09e6c6 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 22 Sep 2025 11:38:18 -0500
Subject: [PATCH 349/366] `cudf-polars` `strptime` format inference (#19997)

Closes https://github.com/rapidsai/cudf/issues/16174

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19997
---
 .../cudf_polars/dsl/expressions/string.py     | 168 +++++++++++++++++-
 .../tests/expressions/test_stringfunction.py  |  30 +++-
 python/pylibcudf/pylibcudf/types.pyx          |   3 +
 3 files changed, 188 insertions(+), 13 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/string.py b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
index bc7f421d0bc..1e2225c62a1 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/string.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/string.py
@@ -7,6 +7,8 @@
 from __future__ import annotations
 
 import functools
+import re
+from datetime import datetime
 from enum import IntEnum, auto
 from typing import TYPE_CHECKING, Any, ClassVar
 
@@ -251,11 +253,11 @@ def _validate_input(self) -> None:
             if inclusive:
                 raise NotImplementedError(f"{inclusive=} is not supported for split")
         elif self.name is StringFunction.Name.Strptime:
-            format, _, exact, cache = self.options
+            format, strict, exact, cache = self.options
+            if not format and not strict:
+                raise NotImplementedError("format inference requires strict checking")
             if cache:
                 raise NotImplementedError("Strptime cache is a CPU feature")
-            if format is None:
-                raise NotImplementedError("Strptime format is required")
             if not exact:
                 raise NotImplementedError("Strptime does not support exact=False")
         elif self.name in {
@@ -760,12 +762,36 @@ def do_evaluate(
             # TODO: ignores ambiguous
             format, strict, _, _ = self.options
             col = self.children[0].evaluate(df, context=context)
+            plc_col = col.obj
+            if plc_col.null_count() == plc_col.size():
+                return Column(
+                    plc.Column.from_scalar(
+                        plc.Scalar.from_py(None, self.dtype.plc),
+                        plc_col.size(),
+                    ),
+                    self.dtype,
+                )
+            if format is None:
+                # Polars begins inference with the first non null value
+                if plc_col.null_mask() is not None:
+                    boolmask = plc.unary.is_valid(plc_col)
+                    table = plc.stream_compaction.apply_boolean_mask(
+                        plc.Table([plc_col]), boolmask
+                    )
+                    filtered = table.columns()[0]
+                    first_valid_data = plc.copying.get_element(filtered, 0).to_py()
+                else:
+                    first_valid_data = plc.copying.get_element(plc_col, 0).to_py()
+
+                format = _infer_datetime_format(first_valid_data)
+                if not format:
+                    raise InvalidOperationError(
+                        "Unable to infer datetime format from data"
+                    )
 
             is_timestamps = plc.strings.convert.convert_datetime.is_timestamp(
-                col.obj, format
+                plc_col, format
             )
-
-            plc_col = col.obj
             if strict:
                 if not plc.reduce.reduce(
                     is_timestamps,
@@ -844,3 +870,133 @@ def do_evaluate(
         raise NotImplementedError(
             f"StringFunction {self.name}"
         )  # pragma: no cover; handled by init raising
+
+
+def _infer_datetime_format(val: str) -> str | None:
+    # port of parts of infer.rs and patterns.rs from polars rust
+    DATETIME_DMY_RE = re.compile(
+        r"""
+        ^
+        ['"]?
+        (\d{1,2})
+        [-/\.]
+        (?P<month>[01]?\d{1})
+        [-/\.]
+        (\d{4,})
+        (
+            [T\ ]
+            (\d{1,2})
+            :?
+            (\d{1,2})
+            (
+                :?
+                (\d{1,2})
+                (
+                    \.(\d{1,9})
+                )?
+            )?
+        )?
+        ['"]?
+        $
+    """,
+        re.VERBOSE,
+    )
+
+    DATETIME_YMD_RE = re.compile(
+        r"""
+        ^
+        ['"]?
+        (\d{4,})
+        [-/\.]
+        (?P<month>[01]?\d{1})
+        [-/\.]
+        (\d{1,2})
+        (
+            [T\ ]
+            (\d{1,2})
+            :?
+            (\d{1,2})
+            (
+                :?
+                (\d{1,2})
+                (
+                    \.(\d{1,9})
+                )?
+            )?
+        )?
+        ['"]?
+        $
+    """,
+        re.VERBOSE,
+    )
+
+    DATETIME_YMDZ_RE = re.compile(
+        r"""
+        ^
+        ['"]?
+        (\d{4,})
+        [-/\.]
+        (?P<month>[01]?\d{1})
+        [-/\.]
+        (\d{1,2})
+        [T\ ]
+        (\d{2})
+        :?
+        (\d{2})
+        (
+            :?
+            (\d{2})
+            (
+                \.(\d{1,9})
+            )?
+        )?
+        (
+            [+-](\d{2})(:?(\d{2}))? | Z
+        )
+        ['"]?
+        $
+    """,
+        re.VERBOSE,
+    )
+    PATTERN_FORMATS = {
+        "DATETIME_DMY": [
+            "%d-%m-%Y",
+            "%d/%m/%Y",
+            "%d.%m.%Y",
+            "%d-%m-%Y %H:%M:%S",
+            "%d/%m/%Y %H:%M:%S",
+            "%d.%m.%Y %H:%M:%S",
+        ],
+        "DATETIME_YMD": [
+            "%Y/%m/%d",
+            "%Y-%m-%d",
+            "%Y.%m.%d",
+            "%Y-%m-%d %H:%M:%S",
+            "%Y/%m/%d %H:%M:%S",
+            "%Y.%m.%d %H:%M:%S",
+            "%Y-%m-%dT%H:%M:%S",
+        ],
+        "DATETIME_YMDZ": [
+            "%Y-%m-%dT%H:%M:%S%z",
+            "%Y-%m-%dT%H:%M:%S.%f%z",
+            "%Y-%m-%d %H:%M:%S%z",
+        ],
+    }
+    for pattern_name, regex in [
+        ("DATETIME_DMY", DATETIME_DMY_RE),
+        ("DATETIME_YMD", DATETIME_YMD_RE),
+        ("DATETIME_YMDZ", DATETIME_YMDZ_RE),
+    ]:
+        m = regex.match(val)
+        if m:
+            month = int(m.group("month"))
+            if not (1 <= month <= 12):
+                continue
+            for fmt in PATTERN_FORMATS[pattern_name]:
+                try:
+                    datetime.strptime(val, fmt)
+                except ValueError:  # noqa: PERF203
+                    continue
+                else:
+                    return fmt
+    return None
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 0e1f38f0f43..96460b2c010 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -264,13 +264,17 @@ def test_split_exact_inclusive_unsupported(ldf_split):
 @pytest.mark.parametrize(
     "values, has_invalid_row",
     [
+        (["2024-01-01", "2023-12-31", "2023-06-15"], False),
         (["2024-01-01", "2023-12-31", None], False),
+        (["2024-13-01", "2023-12-31", "2023-06-15"], True),
         (["2024-01-01", "foo", None], True),
+        (["foo", "2023-06-15"], True),
+        ([None, None, None], False),
     ],
-    ids=["valid", "invalid"],
+    ids=["valid", "valid", "invalid", "invalid", "invalid", "valid"],
 )
 def test_to_datetime(values, has_invalid_row, cache, strict, format, exact):
-    df = pl.DataFrame({"a": values})
+    df = pl.DataFrame({"a": pl.Series(values, dtype=pl.String())})
     q = df.lazy().select(
         pl.col("a").str.strptime(
             pl.Datetime("ns"),
@@ -280,15 +284,27 @@ def test_to_datetime(values, has_invalid_row, cache, strict, format, exact):
             exact=exact,
         )
     )
-    if cache or format is None or not exact:
+    if cache or not exact or (not strict and format is None):
+        outcome = "translation_error"
+    elif (values[0] == "foo" and format is None and strict) or (
+        strict and has_invalid_row
+    ):
+        outcome = "collect_error"
+    else:
+        outcome = "success"
+
+    if outcome == "translation_error":
         assert_ir_translation_raises(q, NotImplementedError)
-    elif strict and has_invalid_row:
+    elif outcome == "collect_error":
+        cudf_exc = (
+            pl.exceptions.ComputeError
+            if POLARS_VERSION_LT_130
+            else pl.exceptions.InvalidOperationError
+        )
         assert_collect_raises(
             q,
             polars_except=pl.exceptions.InvalidOperationError,
-            cudf_except=pl.exceptions.ComputeError
-            if POLARS_VERSION_LT_130
-            else pl.exceptions.InvalidOperationError,
+            cudf_except=cudf_exc,
         )
     else:
         assert_gpu_result_equal(q)
diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx
index 2b4927f703b..2dce0dc2080 100644
--- a/python/pylibcudf/pylibcudf/types.pyx
+++ b/python/pylibcudf/pylibcudf/types.pyx
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.types cimport (
     size_of as cpp_size_of,
     size_type,
     type_id,
+    bitmask_type
 )
 from pylibcudf.libcudf.utilities.type_dispatcher cimport type_to_id
 
@@ -368,6 +369,8 @@ def _from_arrow(obj: pa.DataType) -> DataType:
 
 SIZE_TYPE = DataType(type_to_id[size_type]())
 SIZE_TYPE_ID = SIZE_TYPE.id()
+BITMASK_TYPE = DataType(type_to_id[bitmask_type]())
+
 
 TypeId.__str__ = TypeId.__repr__
 NanPolicy.__str__ = NanPolicy.__repr__

From a231237e52c59cf6d44af889439da9225e04103c Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Mon, 22 Sep 2025 10:41:45 -0700
Subject: [PATCH 350/366] Update the reason to skip for parquet bloom filter
 test (#20043)

Closes #17806

Update skip reason for the parquet predicate pushdown test to point to the Arrow issue (stats incompatibility bug) due to which it was being skipped. The test is no longer being skipped in cuDF since the Arrow version upgrade to 21. See #17806 for more information

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/20043
---
 python/cudf/cudf/tests/input_output/test_parquet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/input_output/test_parquet.py b/python/cudf/cudf/tests/input_output/test_parquet.py
index 98de571ecf9..49ca0b38550 100644
--- a/python/cudf/cudf/tests/input_output/test_parquet.py
+++ b/python/cudf/cudf/tests/input_output/test_parquet.py
@@ -4442,7 +4442,7 @@ def test_parquet_reader_mismatched_nullability_structs(tmp_path):
 
 @pytest.mark.skipif(
     pa.__version__ == "19.0.0",
-    reason="https://github.com/rapidsai/cudf/issues/17806",
+    reason="https://github.com/apache/arrow/issues/45283, https://github.com/rapidsai/cudf/issues/17806",
 )
 @pytest.mark.parametrize(
     "stats_fname,bloom_filter_fname",

From 2f9482241cba787d9a4ad1789aa28ce155d423ca Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 22 Sep 2025 13:46:43 -0400
Subject: [PATCH 351/366] Align decimal dtypes to schema after parquet IO scan
 (#19974)

Closes #19970

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/19974
---
 python/cudf_polars/cudf_polars/dsl/ir.py      | 29 +++++++++++++++++++
 .../cudf_polars/tests/test_dataframescan.py   | 13 +++++++++
 python/cudf_polars/tests/test_scan.py         | 11 +++----
 3 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 6ba439a1875..bf140b054c8 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -212,6 +212,33 @@ def __init__(self, schema: Schema, options: Any, predicate: expr.NamedExpr | Non
         raise NotImplementedError("PythonScan not implemented")
 
 
+def _align_parquet_schema(df: DataFrame, schema: Schema) -> DataFrame:
+    # TODO: Alternatively set the schema of the parquet reader to decimal128
+    plc_decimals_ids = {
+        plc.TypeId.DECIMAL32,
+        plc.TypeId.DECIMAL64,
+        plc.TypeId.DECIMAL128,
+    }
+    cast_list = []
+
+    for name, col in df.column_map.items():
+        src = col.obj.type()
+        dst = schema[name].plc
+        if (
+            src.id() in plc_decimals_ids
+            and dst.id() in plc_decimals_ids
+            and ((src.id() != dst.id()) or (src.scale != dst.scale))
+        ):
+            cast_list.append(
+                Column(plc.unary.cast(col.obj, dst), name=name, dtype=schema[name])
+            )
+
+    if cast_list:
+        df = df.with_columns(cast_list)
+
+    return df
+
+
 class Scan(IR):
     """Input from files."""
 
@@ -614,6 +641,7 @@ def read_csv_header(
                     names=names,
                     dtypes=[schema[name] for name in names],
                 )
+                df = _align_parquet_schema(df, schema)
                 if include_file_paths is not None:
                     df = Scan.add_file_paths(
                         include_file_paths, paths, chunk.num_rows_per_source, df
@@ -627,6 +655,7 @@ def read_csv_header(
                     col_names,
                     [schema[name] for name in col_names],
                 )
+                df = _align_parquet_schema(df, schema)
                 if include_file_paths is not None:
                     df = Scan.add_file_paths(
                         include_file_paths, paths, tbl_w_meta.num_rows_per_source, df
diff --git a/python/cudf_polars/tests/test_dataframescan.py b/python/cudf_polars/tests/test_dataframescan.py
index d954a72b806..b720962b7b5 100644
--- a/python/cudf_polars/tests/test_dataframescan.py
+++ b/python/cudf_polars/tests/test_dataframescan.py
@@ -3,6 +3,8 @@
 
 from __future__ import annotations
 
+from decimal import Decimal
+
 import pytest
 
 import polars as pl
@@ -64,3 +66,14 @@ def test_can_convert_lists():
     )
 
     assert_gpu_result_equal(df)
+
+
+def test_dataframescan_with_decimals():
+    q = pl.LazyFrame(
+        {
+            "foo": [1, 2],
+            "bar": [Decimal("1.23"), Decimal("4.56")],
+        },
+        schema={"foo": pl.Int64, "bar": pl.Decimal(precision=15, scale=2)},
+    )
+    assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py
index 9fe986ffbff..71485ced0e0 100644
--- a/python/cudf_polars/tests/test_scan.py
+++ b/python/cudf_polars/tests/test_scan.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-import decimal
+from decimal import Decimal
 from typing import TYPE_CHECKING
 
 import pytest
@@ -52,14 +52,15 @@ def df():
             "b": ["ẅ", "x", "y", "z", "123", "abcd"],
             "c": [None, None, 4, 5, -1, 0],
             "d": [
-                decimal.Decimal("1.23"),
+                Decimal("1.23"),
                 None,
-                decimal.Decimal("0.00"),
+                Decimal("0.00"),
                 None,
-                decimal.Decimal("-5.67"),
+                Decimal("-5.67"),
                 None,
             ],
-        }
+        },
+        schema={"a": pl.Int64, "b": pl.String, "c": pl.Int32, "d": pl.Decimal(15, 2)},
     )
 
 
From 46ffdaf83814d21ffe5dc9fc9568d4f9447e3f93 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Mon, 22 Sep 2025 14:26:23 -0500
Subject: [PATCH 352/366] Use column statistics to generate the physical plan
 in cuDF-Polars (#19940)

Closes https://github.com/rapidsai/cudf/issues/19391

Allows cuDF-Polars to (optionally) use column statistics to generate the physical plan. Local benchmarking suggests that the physical plan is usually **conservative** with the new option enabled, because the statistics do **not** capture filters. We also don't try to inject `Repartition` operations.

Since statistics-based planning is disabled by default, we probably don't need to further optimize the `update_column_stats` logic in this PR. We can probably focus on the general design and configuration API. With that said, we probably want to improve things a bit more before enabling statistics-based planning by default.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Tom Augspurger (https://github.com/TomAugspurger)

URL: https://github.com/rapidsai/cudf/pull/19940
---
 docs/cudf/source/cudf_polars/api.md           |   3 +-
 .../experimental/benchmarks/utils.py          |  17 +-
 .../cudf_polars/experimental/dispatch.py      |   3 +
 .../cudf_polars/experimental/distinct.py      |   3 +
 .../cudf_polars/experimental/explain.py       |  29 +-
 .../cudf_polars/experimental/expressions.py   |  45 ++-
 .../cudf_polars/experimental/groupby.py       |   3 +
 .../cudf_polars/experimental/io.py            |  58 +++-
 .../cudf_polars/experimental/parallel.py      |  11 +-
 .../cudf_polars/experimental/select.py        |  19 +-
 .../cudf_polars/experimental/statistics.py    | 263 ++++++++++++++----
 .../cudf_polars/experimental/utils.py         |  54 +++-
 .../cudf_polars/cudf_polars/utils/config.py   |  96 +++++++
 python/cudf_polars/docs/overview.md           | 162 ++++++++---
 .../tests/experimental/test_explain.py        | 119 ++++++--
 .../tests/experimental/test_stats.py          |  81 ++++++
 .../tests/experimental/test_unique.py         |   3 +
 python/cudf_polars/tests/test_config.py       |  20 ++
 18 files changed, 846 insertions(+), 143 deletions(-)

diff --git a/docs/cudf/source/cudf_polars/api.md b/docs/cudf/source/cudf_polars/api.md
index 2d6225582c0..718f39a2554 100644
--- a/docs/cudf/source/cudf_polars/api.md
+++ b/docs/cudf/source/cudf_polars/api.md
@@ -11,6 +11,7 @@ For the most part, the public API of `cudf-polars` is the polars API.
       ParquetOptions,
       Scheduler,
       ShuffleMethod,
+      StatsPlanningOptions,
       StreamingExecutor,
-      StreamingFallbackMode
+      StreamingFallbackMode,
 ```
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index fcaed980dd7..50043582944 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -205,6 +205,7 @@ class RunConfig:
     rapidsmpf_spill: bool
     spill_device: float
     query_set: str
+    stats_planning: bool
 
     def __post_init__(self) -> None:  # noqa: D105
         if self.gather_shuffle_stats and self.shuffle != "rapidsmpf":
@@ -274,6 +275,7 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig:
             rapidsmpf_spill=args.rapidsmpf_spill,
             max_rows_per_partition=args.max_rows_per_partition,
             query_set=args.query_set,
+            stats_planning=args.stats_planning,
         )
 
     def serialize(self, engine: pl.GPUEngine | None) -> dict:
@@ -300,6 +302,7 @@ def summarize(self) -> None:
                 print(f"blocksize: {self.blocksize}")
                 print(f"shuffle_method: {self.shuffle}")
                 print(f"broadcast_join_limit: {self.broadcast_join_limit}")
+                print(f"stats_planning: {self.stats_planning}")
                 if self.scheduler == "distributed":
                     print(f"n_workers: {self.n_workers}")
                     print(f"threads: {self.threads}")
@@ -347,11 +350,15 @@ def get_executor_options(
         executor_options["rapidsmpf_spill"] = run_config.rapidsmpf_spill
     if run_config.scheduler == "distributed":
         executor_options["scheduler"] = "distributed"
+    if run_config.stats_planning:
+        executor_options["stats_planning"] = {"use_reduction_planning": True}
 
     if (
         benchmark
         and benchmark.__name__ == "PDSHQueries"
         and run_config.executor == "streaming"
+        # Only use the unique_fraction config if stats_planning is disabled
+        and not run_config.stats_planning
     ):
         executor_options["unique_fraction"] = {
             "c_custkey": 0.05,
@@ -375,7 +382,7 @@ def print_query_plan(
         if args.explain_logical:
             print(f"\nQuery {q_id} - Logical plan\n")
             print(q.explain())
-        elif args.explain:
+        if args.explain:
             print(f"\nQuery {q_id} - Physical plan\n")
             print(q.show_graph(engine="streaming", plan_stage="physical"))
     elif CUDF_POLARS_AVAILABLE:
@@ -383,7 +390,7 @@ def print_query_plan(
         if args.explain_logical:
             print(f"\nQuery {q_id} - Logical plan\n")
             print(explain_query(q, engine, physical=False))
-        elif args.explain:
+        if args.explain:
             print(f"\nQuery {q_id} - Physical plan\n")
             print(explain_query(q, engine))
     else:
@@ -714,6 +721,12 @@ def parse_args(
         default="duckdb",
         help="Which engine to use as the baseline for validation.",
     )
+    parser.add_argument(
+        "--stats-planning",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Enable statistics planning.",
+    )
     return parser.parse_args(args)
 
 
diff --git a/python/cudf_polars/cudf_polars/experimental/dispatch.py b/python/cudf_polars/cudf_polars/experimental/dispatch.py
index 479c31de451..ac524ded48c 100644
--- a/python/cudf_polars/cudf_polars/experimental/dispatch.py
+++ b/python/cudf_polars/cudf_polars/experimental/dispatch.py
@@ -30,9 +30,12 @@ class State(TypedDict):
     ----------
     config_options
         GPUEngine configuration options.
+    stats
+        Statistics collector.
     """
 
     config_options: ConfigOptions
+    stats: StatsCollector
 
 
 LowerIRTransformer: TypeAlias = GenericTransformer[
diff --git a/python/cudf_polars/cudf_polars/experimental/distinct.py b/python/cudf_polars/cudf_polars/experimental/distinct.py
index cb399d8aee8..dfd9486cf55 100644
--- a/python/cudf_polars/cudf_polars/experimental/distinct.py
+++ b/python/cudf_polars/cudf_polars/experimental/distinct.py
@@ -167,6 +167,7 @@ def _(
     ir: Distinct, rec: LowerIRTransformer
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
     # Extract child partitioning
+    original_child = ir.children[0]
     child, partition_info = rec(ir.children[0])
     config_options = rec.state["config_options"]
     assert config_options.executor.name == "streaming", (
@@ -177,6 +178,8 @@ def _(
     unique_fraction_dict = _get_unique_fractions(
         tuple(subset),
         config_options.executor.unique_fraction,
+        row_count=rec.state["stats"].row_count.get(original_child),
+        column_stats=rec.state["stats"].column_stats.get(original_child),
     )
     unique_fraction = (
         max(unique_fraction_dict.values()) if unique_fraction_dict else None
diff --git a/python/cudf_polars/cudf_polars/experimental/explain.py b/python/cudf_polars/cudf_polars/experimental/explain.py
index 9388e6e60f5..905ade6e9a0 100644
--- a/python/cudf_polars/cudf_polars/experimental/explain.py
+++ b/python/cudf_polars/cudf_polars/experimental/explain.py
@@ -19,7 +19,9 @@
 from cudf_polars.dsl.translate import Translator
 from cudf_polars.experimental.base import ColumnStat
 from cudf_polars.experimental.parallel import lower_ir_graph
-from cudf_polars.experimental.statistics import collect_statistics
+from cudf_polars.experimental.statistics import (
+    collect_statistics,
+)
 from cudf_polars.utils.config import ConfigOptions
 
 if TYPE_CHECKING:
@@ -59,8 +61,25 @@ def explain_query(
         lowered_ir, partition_info = lower_ir_graph(ir, config)
         return _repr_ir_tree(lowered_ir, partition_info)
     else:
-        # Include row-count statistics for the logical plan
-        return _repr_ir_tree(ir, stats=collect_statistics(ir, config))
+        if config.executor.name == "streaming":
+            # Include row-count statistics for the logical plan
+            return _repr_ir_tree(ir, stats=collect_statistics(ir, config))
+        else:
+            return _repr_ir_tree(ir)
+
+
+def _fmt_row_count(value: int | None) -> str:
+    """Format a row count as a readable string."""
+    if value is None:
+        return ""
+    elif value < 1_000:
+        return f"{value}"
+    elif value < 1_000_000:
+        return f"{round(value / 1_000, 2):g} K"
+    elif value < 1_000_000_000:
+        return f"{round(value / 1_000_000, 2):g} M"
+    else:
+        return f"{round(value / 1_000_000_000, 2):g} B"
 
 
 def _repr_ir_tree(
@@ -74,7 +93,9 @@ def _repr_ir_tree(
     count = partition_info[ir].count if partition_info else None
     if stats is not None:
         # Include row-count estimate (if available)
-        row_count_estimate = stats.row_count.get(ir, ColumnStat[int](None)).value
+        row_count_estimate = _fmt_row_count(
+            stats.row_count.get(ir, ColumnStat[int](None)).value
+        )
         row_count = f"~{row_count_estimate}" if row_count_estimate else "unknown"
         header = header.rstrip("\n") + f" {row_count=}\n"
     if count is not None:
diff --git a/python/cudf_polars/cudf_polars/experimental/expressions.py b/python/cudf_polars/cudf_polars/experimental/expressions.py
index 3c4ff10fc57..687a542f4de 100644
--- a/python/cudf_polars/cudf_polars/experimental/expressions.py
+++ b/python/cudf_polars/cudf_polars/experimental/expressions.py
@@ -56,6 +56,7 @@
 
     from cudf_polars.dsl.expressions.base import Expr
     from cudf_polars.dsl.ir import IR
+    from cudf_polars.experimental.base import ColumnStat, ColumnStats
     from cudf_polars.typing import GenericTransformer, Schema
     from cudf_polars.utils.config import ConfigOptions
 
@@ -70,12 +71,22 @@ class State(TypedDict):
         IR of the input expression.
     input_partition_info
         Partition info of the input expression.
+    config_options
+        GPUEngine configuration options.
+    unique_names
+        Generator of unique names for temporaries.
+    row_count_estimate
+        row-count estimate for the input IR.
+    column_stats
+        Column statistics for the input IR.
     """
 
     input_ir: IR
     input_partition_info: PartitionInfo
     config_options: ConfigOptions
     unique_names: Generator[str, None, None]
+    row_count_estimate: ColumnStat[int]
+    column_stats: dict[str, ColumnStats]
 
 
 ExprDecomposer: TypeAlias = "GenericTransformer[Expr, tuple[Expr, IR, MutableMapping[IR, PartitionInfo]], State]"
@@ -144,6 +155,8 @@ def _decompose_unique(
     input_ir: IR,
     partition_info: MutableMapping[IR, PartitionInfo],
     config_options: ConfigOptions,
+    row_count_estimate: ColumnStat[int],
+    column_stats: dict[str, ColumnStats],
     *,
     names: Generator[str, None, None],
 ) -> tuple[Expr, IR, MutableMapping[IR, PartitionInfo]]:
@@ -161,6 +174,10 @@ def _decompose_unique(
         associated partitioning information.
     config_options
         GPUEngine configuration options.
+    row_count_estimate
+        row-count estimate for the input IR.
+    column_stats
+        Column statistics for the input IR.
     names
         Generator of unique names for temporaries.
 
@@ -193,6 +210,8 @@ def _decompose_unique(
     unique_fraction_dict = _get_unique_fractions(
         _leaf_column_names(child),
         config_options.executor.unique_fraction,
+        row_count=row_count_estimate,
+        column_stats=column_stats,
     )
 
     unique_fraction = (
@@ -385,6 +404,8 @@ def _decompose_expr_node(
     input_ir: IR,
     partition_info: MutableMapping[IR, PartitionInfo],
     config_options: ConfigOptions,
+    row_count_estimate: ColumnStat[int],
+    column_stats: dict[str, ColumnStats],
     *,
     names: Generator[str, None, None],
 ) -> tuple[Expr, IR, MutableMapping[IR, PartitionInfo]]:
@@ -402,6 +423,10 @@ def _decompose_expr_node(
         associated partitioning information.
     config_options
         GPUEngine configuration options.
+    row_count_estimate
+        row-count estimate for the input IR.
+    column_stats
+        Column statistics for the input IR.
     names
         Generator of unique names for temporaries.
 
@@ -433,7 +458,13 @@ def _decompose_expr_node(
         )
     elif isinstance(expr, UnaryFunction) and expr.name == "unique":
         return _decompose_unique(
-            expr, input_ir, partition_info, config_options, names=names
+            expr,
+            input_ir,
+            partition_info,
+            config_options,
+            row_count_estimate,
+            column_stats,
+            names=names,
         )
     else:
         # This is an un-supported expression - raise.
@@ -454,6 +485,8 @@ def _decompose(
             rec.state["input_ir"],
             {rec.state["input_ir"]: rec.state["input_partition_info"]},
             rec.state["config_options"],
+            rec.state["row_count_estimate"],
+            rec.state["column_stats"],
             names=rec.state["unique_names"],
         )
 
@@ -491,6 +524,8 @@ def _decompose(
         input_ir,
         partition_info,
         rec.state["config_options"],
+        rec.state["row_count_estimate"],
+        rec.state["column_stats"],
         names=rec.state["unique_names"],
     )
 
@@ -500,6 +535,8 @@ def decompose_expr_graph(
     input_ir: IR,
     partition_info: MutableMapping[IR, PartitionInfo],
     config_options: ConfigOptions,
+    row_count_estimate: ColumnStat[int],
+    column_stats: dict[str, ColumnStats],
 ) -> tuple[NamedExpr, IR, MutableMapping[IR, PartitionInfo]]:
     """
     Decompose a NamedExpr into stages.
@@ -516,6 +553,10 @@ def decompose_expr_graph(
         associated partitioning information.
     config_options
         GPUEngine configuration options.
+    row_count_estimate
+        Row-count estimate for the input IR.
+    column_stats
+        Column statistics for the input IR.
 
     Returns
     -------
@@ -541,6 +582,8 @@ def decompose_expr_graph(
             "input_partition_info": partition_info[input_ir],
             "config_options": config_options,
             "unique_names": unique_names((named_expr.name, *input_ir.schema.keys())),
+            "row_count_estimate": row_count_estimate,
+            "column_stats": column_stats,
         },
     )
     expr, input_ir, partition_info = mapper(named_expr.value)
diff --git a/python/cudf_polars/cudf_polars/experimental/groupby.py b/python/cudf_polars/cudf_polars/experimental/groupby.py
index 357216bd9b3..e12220da924 100644
--- a/python/cudf_polars/cudf_polars/experimental/groupby.py
+++ b/python/cudf_polars/cudf_polars/experimental/groupby.py
@@ -163,6 +163,7 @@ def _(
         return rec(Slice(ir.schema, offset, length, new_join))
 
     # Extract child partitioning
+    original_child = ir.children[0]
     child, partition_info = rec(ir.children[0])
 
     # Handle single-partition case
@@ -195,6 +196,8 @@ def _(
     if unique_fraction_dict := _get_unique_fractions(
         groupby_key_columns,
         config_options.executor.unique_fraction,
+        row_count=rec.state["stats"].row_count.get(original_child),
+        column_stats=rec.state["stats"].column_stats.get(original_child),
     ):
         # Use unique_fraction to determine output partitioning
         unique_fraction = max(unique_fraction_dict.values())
diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index 120395354a9..cc540abc516 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -35,11 +35,13 @@
 
     from cudf_polars.containers import DataFrame
     from cudf_polars.dsl.expr import NamedExpr
+    from cudf_polars.experimental.base import StatsCollector
     from cudf_polars.experimental.dispatch import LowerIRTransformer
     from cudf_polars.typing import Schema
     from cudf_polars.utils.config import (
         ConfigOptions,
         ParquetOptions,
+        StatsPlanningOptions,
         StreamingExecutor,
     )
 
@@ -109,7 +111,9 @@ def __init__(self, factor: int, flavor: ScanPartitionFlavor) -> None:
         self.flavor = flavor
 
     @staticmethod
-    def from_scan(ir: Scan, config_options: ConfigOptions) -> ScanPartitionPlan:
+    def from_scan(
+        ir: Scan, stats: StatsCollector, config_options: ConfigOptions
+    ) -> ScanPartitionPlan:
         """Extract the partitioning plan of a Scan operation."""
         if ir.typ == "parquet":
             # TODO: Use system info to set default blocksize
@@ -118,7 +122,7 @@ def from_scan(ir: Scan, config_options: ConfigOptions) -> ScanPartitionPlan:
             )
 
             blocksize: int = config_options.executor.target_partition_size
-            column_stats = _extract_scan_stats(ir, config_options)
+            column_stats = stats.column_stats.get(ir, {})
             column_sizes: list[int] = []
             for cs in column_stats.values():
                 storage_size = cs.source_info.storage_size
@@ -298,7 +302,7 @@ def _(
         and ir.skip_rows == 0
         and ir.row_index is None
     ):
-        plan = ScanPartitionPlan.from_scan(ir, config_options)
+        plan = ScanPartitionPlan.from_scan(ir, rec.state["stats"], config_options)
         paths = list(ir.paths)
         if plan.flavor == ScanPartitionFlavor.SPLIT_FILES:
             # Disable chunked reader when splitting files
@@ -690,6 +694,8 @@ class ParquetSourceInfo(DataSourceInfo):
         Maximum number of file footers to sample metadata from.
     max_row_group_samples
         Maximum number of row-groups to sample data from.
+    stats_planning
+        Statistics planning options.
     """
 
     def __init__(
@@ -697,10 +703,12 @@ def __init__(
         paths: tuple[str, ...],
         max_footer_samples: int,
         max_row_group_samples: int,
+        stats_planning: StatsPlanningOptions,
     ):
         self.paths = paths
         self.max_footer_samples = max_footer_samples
         self.max_row_group_samples = max_row_group_samples
+        self._stats_planning = stats_planning
         self._unique_stats_columns = set()
         # Helper attributes
         self._key_columns: set[str] = set()  # Used to fuse lazy row-group sampling
@@ -718,9 +726,12 @@ def row_count(self) -> ColumnStat[int]:
 
     def _sample_row_groups(self) -> None:
         """Estimate unique-value statistics from a row-group sample."""
-        sample_paths = self.metadata.sample_paths
-        if not sample_paths or self.max_row_group_samples < 1:
-            # No row-groups to sample from
+        if (
+            self.max_row_group_samples < 1
+            or not self._stats_planning.use_sampling
+            or not (sample_paths := self.metadata.sample_paths)
+        ):
+            # No sampling allowed or no row-groups to sample from
             return
 
         column_names = self.metadata.column_names
@@ -814,9 +825,15 @@ def _sample_pq_stats(
     paths: tuple[str, ...],
     max_footer_samples: int,
     max_row_group_samples: int,
+    stats_planning: StatsPlanningOptions,
 ) -> ParquetSourceInfo:
     """Return Parquet datasource information."""
-    return ParquetSourceInfo(paths, max_footer_samples, max_row_group_samples)
+    return ParquetSourceInfo(
+        paths,
+        max_footer_samples,
+        max_row_group_samples,
+        stats_planning,
+    )
 
 
 def _extract_scan_stats(
@@ -825,10 +842,14 @@ def _extract_scan_stats(
 ) -> dict[str, ColumnStats]:
     """Extract base ColumnStats for a Scan node."""
     if ir.typ == "parquet":
+        assert config_options.executor.name == "streaming", (
+            "Only streaming executor is supported in _extract_scan_stats"
+        )
         table_source_info = _sample_pq_stats(
             tuple(ir.paths),
             config_options.parquet_options.max_footer_samples,
             config_options.parquet_options.max_row_group_samples,
+            config_options.executor.stats_planning,
         )
         return {
             name: ColumnStats(
@@ -850,10 +871,17 @@ class DataFrameSourceInfo(DataSourceInfo):
     ----------
     df
         In-memory DataFrame source.
+    stats_planning
+        Statistics planning options.
     """
 
-    def __init__(self, df: Any):
+    def __init__(
+        self,
+        df: Any,
+        stats_planning: StatsPlanningOptions,
+    ):
         self._df = df
+        self._stats_planning = stats_planning
         self._key_columns: set[str] = set()
         self._unique_stats_columns = set()
         self._unique_stats: dict[str, UniqueStats] = {}
@@ -864,7 +892,7 @@ def row_count(self) -> ColumnStat[int]:
         return ColumnStat[int](value=self._df.height(), exact=True)
 
     def _update_unique_stats(self, column: str) -> None:
-        if column not in self._unique_stats:
+        if column not in self._unique_stats and self._stats_planning.use_sampling:
             row_count = self.row_count.value
             unique_count = (
                 self._df.get_column(column).approx_n_unique() if row_count else 0
@@ -881,9 +909,17 @@ def unique_stats(self, column: str) -> UniqueStats:
         return self._unique_stats.get(column, UniqueStats())
 
 
-def _extract_dataframescan_stats(ir: DataFrameScan) -> dict[str, ColumnStats]:
+def _extract_dataframescan_stats(
+    ir: DataFrameScan, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
     """Extract base ColumnStats for a DataFrameScan node."""
-    table_source_info = DataFrameSourceInfo(ir.df)
+    assert config_options.executor.name == "streaming", (
+        "Only streaming executor is supported in _extract_dataframescan_stats"
+    )
+    table_source_info = DataFrameSourceInfo(
+        ir.df,
+        config_options.executor.stats_planning,
+    )
     return {
         name: ColumnStats(
             name=name,
diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py
index f470168bf71..61d1037e532 100644
--- a/python/cudf_polars/cudf_polars/experimental/parallel.py
+++ b/python/cudf_polars/cudf_polars/experimental/parallel.py
@@ -35,6 +35,7 @@
 )
 from cudf_polars.experimental.io import _clear_source_info_cache
 from cudf_polars.experimental.repartition import Repartition
+from cudf_polars.experimental.statistics import collect_statistics
 from cudf_polars.experimental.utils import _concat, _contains_over, _lower_ir_fallback
 
 if TYPE_CHECKING:
@@ -42,7 +43,7 @@
     from typing import Any
 
     from cudf_polars.containers import DataFrame
-    from cudf_polars.experimental.dispatch import LowerIRTransformer
+    from cudf_polars.experimental.dispatch import LowerIRTransformer, State
     from cudf_polars.utils.config import ConfigOptions
 
 
@@ -84,9 +85,11 @@ def lower_ir_graph(
     --------
     lower_ir_node
     """
-    mapper: LowerIRTransformer = CachingVisitor(
-        lower_ir_node, state={"config_options": config_options}
-    )
+    state: State = {
+        "config_options": config_options,
+        "stats": collect_statistics(ir, config_options),
+    }
+    mapper: LowerIRTransformer = CachingVisitor(lower_ir_node, state=state)
     return mapper(ir)
 
 
diff --git a/python/cudf_polars/cudf_polars/experimental/select.py b/python/cudf_polars/cudf_polars/experimental/select.py
index d2c29aa0f81..2d8004b8ca3 100644
--- a/python/cudf_polars/cudf_polars/experimental/select.py
+++ b/python/cudf_polars/cudf_polars/experimental/select.py
@@ -12,7 +12,7 @@
 from cudf_polars.dsl.expr import Col, Len
 from cudf_polars.dsl.ir import Empty, HConcat, Scan, Select, Union
 from cudf_polars.dsl.traversal import traversal
-from cudf_polars.experimental.base import PartitionInfo
+from cudf_polars.experimental.base import ColumnStat, PartitionInfo
 from cudf_polars.experimental.dispatch import lower_ir_node
 from cudf_polars.experimental.expressions import decompose_expr_graph
 from cudf_polars.experimental.utils import (
@@ -25,6 +25,7 @@
 
     from cudf_polars.dsl.ir import IR
     from cudf_polars.experimental.parallel import LowerIRTransformer
+    from cudf_polars.experimental.statistics import StatsCollector
     from cudf_polars.utils.config import ConfigOptions
 
 
@@ -33,6 +34,7 @@ def decompose_select(
     input_ir: IR,
     partition_info: MutableMapping[IR, PartitionInfo],
     config_options: ConfigOptions,
+    stats: StatsCollector,
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
     """
     Decompose a multi-partition Select operation.
@@ -52,6 +54,8 @@ def decompose_select(
         associated partitioning information.
     config_options
         GPUEngine configuration options.
+    stats
+        Statistics collector.
 
     Returns
     -------
@@ -74,7 +78,12 @@ def decompose_select(
     for ne in select_ir.exprs:
         # Decompose this partial expression
         new_ne, partial_input_ir, _partition_info = decompose_expr_graph(
-            ne, input_ir, partition_info, config_options
+            ne,
+            input_ir,
+            partition_info,
+            config_options,
+            stats.row_count.get(select_ir.children[0], ColumnStat[int](None)),
+            stats.column_stats.get(select_ir.children[0], {}),
         )
         pi = _partition_info[partial_input_ir]
         partial_input_ir = Select(
@@ -163,7 +172,11 @@ def _(
         try:
             # Try decomposing the underlying expressions
             return decompose_select(
-                ir, child, partition_info, rec.state["config_options"]
+                ir,
+                child,
+                partition_info,
+                rec.state["config_options"],
+                rec.state["stats"],
             )
         except NotImplementedError:
             return _lower_ir_fallback(
diff --git a/python/cudf_polars/cudf_polars/experimental/statistics.py b/python/cudf_polars/cudf_polars/experimental/statistics.py
index 786049ed85d..0302767c7c6 100644
--- a/python/cudf_polars/cudf_polars/experimental/statistics.py
+++ b/python/cudf_polars/cudf_polars/experimental/statistics.py
@@ -8,18 +8,21 @@
 import itertools
 from typing import TYPE_CHECKING, TypeVar
 
+from cudf_polars.dsl.expr import Agg, UnaryFunction
 from cudf_polars.dsl.ir import (
     IR,
     DataFrameScan,
     Distinct,
+    Filter,
     GroupBy,
     HConcat,
     Join,
     Scan,
+    Select,
     Sort,
     Union,
 )
-from cudf_polars.dsl.traversal import post_traversal
+from cudf_polars.dsl.traversal import post_traversal, traversal
 from cudf_polars.experimental.base import (
     ColumnSourceInfo,
     ColumnStat,
@@ -31,14 +34,17 @@
     initialize_column_stats,
     update_column_stats,
 )
+from cudf_polars.experimental.expressions import _SUPPORTED_AGGS
+from cudf_polars.experimental.utils import _leaf_column_names
 from cudf_polars.utils import conversion
 
 if TYPE_CHECKING:
     from collections.abc import Mapping, Sequence
 
+    from cudf_polars.dsl.expr import Expr
     from cudf_polars.experimental.base import JoinInfo
     from cudf_polars.typing import Slice as Zlice
-    from cudf_polars.utils.config import ConfigOptions
+    from cudf_polars.utils.config import ConfigOptions, StatsPlanningOptions
 
 
 def collect_statistics(root: IR, config_options: ConfigOptions) -> StatsCollector:
@@ -56,34 +62,45 @@ def collect_statistics(root: IR, config_options: ConfigOptions) -> StatsCollecto
     -------
     A StatsCollector object with populated column statistics.
     """
-    # Start with base statistics.
-    # Here we build an outline of the statistics that will be
-    # collected before any real data is sampled. We will not
-    # read any Parquet metadata or sample any unique-value
-    # statistics during this step.
-    # (That said, Polars does it's own metadata sampling
-    # before we ever get the logical plan in cudf-polars)
-    stats = collect_base_stats(root, config_options)
-
-    # Apply PK-FK heuristics.
-    # Here we use PK-FK heuristics to estimate the unique count
-    # for each join key. We will not do any unique-value sampling
-    # during this step. However, we will use Parquet metadata to
-    # estimate the row-count for each table source. This metadata
-    # is cached in the DataSourceInfo object for each table.
-    apply_pkfk_heuristics(stats.join_info)
-
-    # Update statistics for each node.
-    # Here we set local row-count and unique-value statistics
-    # on each node in the IR graph. We DO perform unique-value
-    # sampling during this step. However, we only sample columns
-    # that have been marked as needing unique-value statistics
-    # during the `collect_base_stats` step. We always sample ALL
-    # "marked" columns within the same table source at once.
-    for node in post_traversal([root]):
-        update_column_stats(node, stats, config_options)
-
-    return stats
+    assert config_options.executor.name == "streaming", (
+        "Only streaming executor is supported in collect_statistics"
+    )
+    stats_planning = config_options.executor.stats_planning
+    need_local_statistics = using_local_statistics(stats_planning)
+    if need_local_statistics or stats_planning.use_io_partitioning:
+        # Start with base statistics.
+        # Here we build an outline of the statistics that will be
+        # collected before any real data is sampled. We will not
+        # read any Parquet metadata or sample any unique-value
+        # statistics during this step.
+        # (That said, Polars does it's own metadata sampling
+        # before we ever get the logical plan in cudf-polars)
+        stats = collect_base_stats(root, config_options)
+
+        # Avoid collecting local statistics unless we are using them.
+        if need_local_statistics:
+            # Apply PK-FK heuristics.
+            # Here we use PK-FK heuristics to estimate the unique count
+            # for each join key. We will not do any unique-value sampling
+            # during this step. However, we will use Parquet metadata to
+            # estimate the row-count for each table source. This metadata
+            # is cached in the DataSourceInfo object for each table.
+            if stats_planning.use_join_heuristics:
+                apply_pkfk_heuristics(stats.join_info)
+
+            # Update statistics for each node.
+            # Here we set local row-count and unique-value statistics
+            # on each node in the IR graph. We DO perform unique-value
+            # sampling during this step. However, we only sample columns
+            # that have been marked as needing unique-value statistics
+            # during the `collect_base_stats` step. We always sample ALL
+            # "marked" columns within the same table source at once.
+            for node in post_traversal([root]):
+                update_column_stats(node, stats, config_options)
+
+        return stats
+
+    return StatsCollector()
 
 
 def collect_base_stats(root: IR, config_options: ConfigOptions) -> StatsCollector:
@@ -108,16 +125,43 @@ def collect_base_stats(root: IR, config_options: ConfigOptions) -> StatsCollecto
     outline of the statistics that will be collected before any
     real data is sampled.
     """
+    assert config_options.executor.name == "streaming", (
+        "Only streaming executor is supported in collect_statistics"
+    )
+    stats_planning = config_options.executor.stats_planning
+    need_local_statistics = using_local_statistics(stats_planning)
+    need_join_info = need_local_statistics and stats_planning.use_join_heuristics
+
     stats: StatsCollector = StatsCollector()
     for node in post_traversal([root]):
         # Initialize column statistics from datasource information
-        stats.column_stats[node] = initialize_column_stats(node, stats, config_options)
+        if need_local_statistics or (
+            stats_planning.use_io_partitioning
+            and isinstance(node, (Scan, DataFrameScan))
+        ):
+            stats.column_stats[node] = initialize_column_stats(
+                node, stats, config_options
+            )
         # Initialize join information
-        if isinstance(node, Join):
+        if need_join_info and isinstance(node, Join):
             initialize_join_info(node, stats)
     return stats
 
 
+def using_local_statistics(stats_planning: StatsPlanningOptions) -> bool:
+    """
+    Check if we are using local statistics for query planning.
+
+    Notes
+    -----
+    This function is used to check if we are using local statistics
+    for query-planning purposes. For now, this only returns True
+    when `use_reduction_planning=True`. We do not consider `use_io_partitioning`
+    here because it only depends on datasource statistics.
+    """
+    return stats_planning.use_reduction_planning
+
+
 def initialize_join_info(node: Join, stats: StatsCollector) -> None:
     """
     Initialize join information for the given node.
@@ -247,18 +291,10 @@ def apply_pkfk_heuristics(join_info: JoinInfo) -> None:
 def _update_unique_stats_columns(
     child_column_stats: dict[str, ColumnStats],
     key_names: Sequence[str],
-    config_options: ConfigOptions,
 ) -> None:
     """Update set of unique-stats columns in datasource."""
-    assert config_options.executor.name == "streaming", (
-        "'in-memory' executor not supported in 'add_source_stats'"
-    )
-    unique_fraction = config_options.executor.unique_fraction
     for name in key_names:
-        if (
-            name not in unique_fraction
-            and (column_stats := child_column_stats.get(name)) is not None
-        ):
+        if (column_stats := child_column_stats.get(name)) is not None:
             column_stats.source_info.add_unique_stats_column()
 
 
@@ -288,7 +324,7 @@ def _(
     (child,) = ir.children
     child_column_stats = stats.column_stats.get(child, {})
     key_names = ir.subset or ir.schema
-    _update_unique_stats_columns(child_column_stats, list(key_names), config_options)
+    _update_unique_stats_columns(child_column_stats, list(key_names))
     return _default_initialize_column_stats(ir, stats, config_options)
 
 
@@ -334,6 +370,9 @@ def _(
             primary_child_stats[p_key.name],
             other_child_stats[o_key.name],
         )
+        # Add key columns to set of unique-stats columns.
+        primary_child_stats[p_key.name].source_info.add_unique_stats_column()
+        other_child_stats[o_key.name].source_info.add_unique_stats_column()
 
     return column_stats
 
@@ -346,9 +385,7 @@ def _(
     child_column_stats = stats.column_stats.get(child, {})
 
     # Update set of source columns we may lazily sample
-    _update_unique_stats_columns(
-        child_column_stats, [n.name for n in ir.keys], config_options
-    )
+    _update_unique_stats_columns(child_column_stats, [n.name for n in ir.keys])
     return _default_initialize_column_stats(ir, stats, config_options)
 
 
@@ -401,7 +438,51 @@ def _(
 ) -> dict[str, ColumnStats]:
     from cudf_polars.experimental.io import _extract_dataframescan_stats
 
-    return _extract_dataframescan_stats(ir)
+    return _extract_dataframescan_stats(ir, config_options)
+
+
+@initialize_column_stats.register(Select)
+def _(
+    ir: Select, stats: StatsCollector, config_options: ConfigOptions
+) -> dict[str, ColumnStats]:
+    (child,) = ir.children
+    column_stats: dict[str, ColumnStats] = {}
+    unique_stats_columns: list[str] = []
+    child_column_stats = stats.column_stats.get(child, {})
+    for ne in ir.exprs:
+        if leaf_columns := _leaf_column_names(ne.value):
+            # New column is based on 1+ child columns.
+            # Inherit the source information from the child columns.
+            children = tuple(
+                child_column_stats.get(col, ColumnStats(name=col))
+                for col in leaf_columns
+            )
+            column_stats[ne.name] = ColumnStats(
+                name=ne.name,
+                children=children,
+                source_info=ColumnSourceInfo(
+                    *itertools.chain.from_iterable(
+                        cs.source_info.table_source_pairs for cs in children
+                    )
+                ),
+            )
+        else:  # pragma: no cover
+            # New column is based on 0 child columns.
+            # We don't have any source information to inherit.
+            # TODO: Do something smart for a Literal source?
+            column_stats[ne.name] = ColumnStats(name=ne.name)
+
+        if any(
+            isinstance(expr, UnaryFunction) and expr.name == "unique"
+            for expr in traversal([ne.value])
+        ):
+            # Make sure the leaf column is marked as a unique-stats column.
+            unique_stats_columns.extend(list(leaf_columns))
+
+    if unique_stats_columns:
+        _update_unique_stats_columns(stats.column_stats[child], unique_stats_columns)
+
+    return column_stats
 
 
 def known_child_row_counts(ir: IR, stats: StatsCollector) -> list[int]:
@@ -434,6 +515,41 @@ def apply_slice(num_rows: int, zlice: Zlice | None) -> int:
     return e - s
 
 
+def apply_predicate_selectivity(
+    ir: IR,
+    stats: StatsCollector,
+    predicate: Expr,
+    config_options: ConfigOptions,
+) -> None:
+    """
+    Apply selectivity to a column statistics.
+
+    Parameters
+    ----------
+    ir
+        IR node containing a predicate.
+    stats
+        The StatsCollector object to update.
+    predicate
+        The predicate expression.
+    config_options
+        GPUEngine configuration options.
+    """
+    assert config_options.executor.name == "streaming", (
+        "Only streaming executor is supported in update_column_stats"
+    )
+    # TODO: Use predicate to generate a better selectivity estimate. Default is 0.8
+    selectivity = config_options.executor.stats_planning.default_selectivity
+    if selectivity < 1.0 and (row_count := stats.row_count[ir].value) is not None:
+        row_count = max(1, int(row_count * selectivity))
+        stats.row_count[ir] = ColumnStat[int](row_count)
+        for column_stats in stats.column_stats[ir].values():
+            if (unique_count := column_stats.unique_count.value) is not None:
+                column_stats.unique_count = ColumnStat[int](
+                    min(max(1, int(unique_count * selectivity)), row_count)
+                )
+
+
 def copy_child_unique_counts(column_stats_mapping: dict[str, ColumnStats]) -> None:
     """
     Copy unique-count estimates from children to parent.
@@ -446,16 +562,14 @@ def copy_child_unique_counts(column_stats_mapping: dict[str, ColumnStats]) -> No
     for column_stats in column_stats_mapping.values():
         column_stats.unique_count = ColumnStat[int](
             # Assume we get the maximum child unique-count estimate
-            value=max(
+            max(
                 (
                     cs.unique_count.value
                     for cs in column_stats.children
                     if cs.unique_count.value is not None
                 ),
                 default=None,
-            ),
-            exact=len(column_stats.children) == 1
-            and column_stats.children[0].unique_count.exact,
+            )
         )
 
 
@@ -488,6 +602,9 @@ def _(ir: IR, stats: StatsCollector, config_options: ConfigOptions) -> None:
             )
         )
 
+    if isinstance(ir, Filter):
+        apply_predicate_selectivity(ir, stats, ir.mask.value, config_options)
+
 
 @update_column_stats.register(DataFrameScan)
 def _(ir: DataFrameScan, stats: StatsCollector, config_options: ConfigOptions) -> None:
@@ -514,7 +631,6 @@ def _(ir: DataFrameScan, stats: StatsCollector, config_options: ConfigOptions) -
 def _(ir: Scan, stats: StatsCollector, config_options: ConfigOptions) -> None:
     # Use datasource row-count estimate.
     if stats.column_stats[ir]:
-        # TODO: Apply predicate selectivity
         stats.row_count[ir] = next(
             iter(stats.column_stats[ir].values())
         ).source_info.row_count
@@ -545,6 +661,47 @@ def _(ir: Scan, stats: StatsCollector, config_options: ConfigOptions) -> None:
         else:
             column_stats.unique_count = column_stats.source_info.implied_unique_count
 
+    if ir.predicate is not None and ir.n_rows == -1:
+        apply_predicate_selectivity(ir, stats, ir.predicate.value, config_options)
+
+
+@update_column_stats.register(Select)
+def _(ir: Select, stats: StatsCollector, config_options: ConfigOptions) -> None:
+    # Update statistics for a Select node.
+
+    # Start by copying the child unique-count estimates.
+    copy_child_unique_counts(stats.column_stats[ir])
+
+    # Now update the row-count estimate.
+    (child,) = ir.children
+    child_row_count = stats.row_count.get(child, ColumnStat[int](None)).value
+    row_count_estimates: list[int | None] = []
+    for ne in ir.exprs:
+        child_column_stats = stats.column_stats[ir][ne.name].children
+        if isinstance(ne.value, Agg) and ne.value.name in _SUPPORTED_AGGS:
+            # This aggregation outputs a single row.
+            row_count_estimates.append(1)
+            stats.column_stats[ir][ne.name].unique_count = ColumnStat[int](
+                value=1, exact=True
+            )
+        elif (
+            len(child_column_stats) == 1
+            and any(
+                isinstance(expr, UnaryFunction) and expr.name == "unique"
+                for expr in traversal([ne.value])
+            )
+            and (value := child_column_stats[0].unique_count.value) is not None
+        ):
+            # We are doing a Select(unique) operation.
+            row_count_estimates.append(value)
+        else:
+            # Fallback case - use the child row-count estimate.
+            row_count_estimates.append(child_row_count)
+
+    stats.row_count[ir] = ColumnStat[int](
+        max((rc for rc in row_count_estimates if rc is not None), default=None),
+    )
+
 
 @update_column_stats.register(Distinct)
 @update_column_stats.register(GroupBy)
@@ -591,7 +748,7 @@ def _(ir: Join, stats: StatsCollector, config_options: ConfigOptions) -> None:
             # Join-based estimate (higher priority).
             [
                 u.implied_unique_count
-                for u in stats.join_info.join_map[ir]
+                for u in stats.join_info.join_map.get(ir, [])
                 if u.implied_unique_count is not None
             ],
             default=None,
@@ -621,7 +778,7 @@ def _(ir: Union, stats: StatsCollector, config_options: ConfigOptions) -> None:
     row_counts = known_child_row_counts(ir, stats)
     stats.row_count[ir] = ColumnStat[int](sum(row_counts) or None)
     # Add up unique counts (NOTE: This is probably very conservative).
-    for column_stats in stats.column_stats[ir].values():
+    for column_stats in stats.column_stats.get(ir, {}).values():
         column_stats.unique_count = ColumnStat[int](
             sum(
                 (
diff --git a/python/cudf_polars/cudf_polars/experimental/utils.py b/python/cudf_polars/cudf_polars/experimental/utils.py
index e62990c1755..181b7776c75 100644
--- a/python/cudf_polars/cudf_polars/experimental/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/utils.py
@@ -13,7 +13,7 @@
 from cudf_polars.dsl.expr import Col, Expr, GroupedRollingWindow, UnaryFunction
 from cudf_polars.dsl.ir import Union
 from cudf_polars.dsl.traversal import traversal
-from cudf_polars.experimental.base import PartitionInfo
+from cudf_polars.experimental.base import ColumnStat, PartitionInfo
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping, Sequence
@@ -21,6 +21,7 @@
     from cudf_polars.containers import DataFrame
     from cudf_polars.dsl.expr import Expr
     from cudf_polars.dsl.ir import IR
+    from cudf_polars.experimental.base import ColumnStats
     from cudf_polars.experimental.dispatch import LowerIRTransformer
     from cudf_polars.utils.config import ConfigOptions
 
@@ -104,13 +105,52 @@ def _leaf_column_names(expr: Expr) -> tuple[str, ...]:
 def _get_unique_fractions(
     column_names: Sequence[str],
     user_unique_fractions: dict[str, float],
+    *,
+    row_count: ColumnStat[int] | None = None,
+    column_stats: dict[str, ColumnStats] | None = None,
 ) -> dict[str, float]:
-    """Return unique-fraction statistics subset."""
-    return {
-        c: max(min(f, 1.0), 0.00001)
-        for c, f in user_unique_fractions.items()
-        if c in column_names
-    }
+    """
+    Return unique-fraction statistics subset.
+
+    Parameters
+    ----------
+    column_names
+        The column names to get unique-fractions for.
+    user_unique_fractions
+        The user-provided unique-fraction dictionary.
+    row_count
+        Row-count statistics. This will be None if
+        statistics planning is not enabled.
+    column_stats
+        The column statistics. This will be None if
+        statistics planning is not enabled.
+
+    Returns
+    -------
+    unique_fractions
+        The final unique-fraction dictionary.
+    """
+    unique_fractions: dict[str, float] = {}
+    column_stats = column_stats or {}
+    row_count = row_count or ColumnStat[int](None)
+    if isinstance(row_count.value, int) and row_count.value > 0:
+        for c in set(column_names).intersection(column_stats):
+            if (unique_count := column_stats[c].unique_count.value) is not None:
+                # Use unique_count_estimate (if available)
+                unique_fractions[c] = max(
+                    min(1.0, unique_count / row_count.value),
+                    0.00001,
+                )
+
+    # Update with user-provided unique-fractions
+    unique_fractions.update(
+        {
+            c: max(min(f, 1.0), 0.00001)
+            for c, f in user_unique_fractions.items()
+            if c in column_names
+        }
+    )
+    return unique_fractions
 
 
 def _contains_over(exprs: Sequence[Expr]) -> bool:
diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py
index e65cf877b4e..32376a721aa 100644
--- a/python/cudf_polars/cudf_polars/utils/config.py
+++ b/python/cudf_polars/cudf_polars/utils/config.py
@@ -44,6 +44,7 @@
     "ParquetOptions",
     "Scheduler",
     "ShuffleMethod",
+    "StatsPlanningOptions",
     "StreamingExecutor",
     "StreamingFallbackMode",
 ]
@@ -289,6 +290,86 @@ def default_blocksize(scheduler: str) -> int:
     return min(max(blocksize, 1_000_000_000), 10_000_000_000)
 
 
+@dataclasses.dataclass(frozen=True)
+class StatsPlanningOptions:
+    """
+    Configuration for statistics-based query planning.
+
+    These options can be configured via environment variables
+    with the prefix ``CUDF_POLARS__EXECUTOR__STATS_PLANNING__``.
+
+    Parameters
+    ----------
+    use_io_partitioning
+        Whether to use estimated file-size statistics to calculate
+        the ideal input-partition count for IO operations.
+        This option currently applies to Parquet data only.
+        Default is True.
+    use_reduction_planning
+        Whether to use estimated column statistics to calculate
+        the output-partition count for reduction operations
+        like `Distinct`, `GroupBy`, and `Select(unique)`.
+        Default is False.
+    use_join_heuristics
+        Whether to use join heuristics to estimate row-count
+        and unique-count statistics. Default is True.
+        These statistics may only be collected when they are
+        actually needed for query planning and when row-count
+        statistics are available for the underlying datasource
+        (e.g. Parquet and in-memory LazyFrame data).
+    use_sampling
+        Whether to sample real data to estimate unique-value
+        statistics. Default is True.
+        These statistics may only be collected when they are
+        actually needed for query planning, and when the
+        underlying datasource supports sampling (e.g. Parquet
+        and in-memory LazyFrame data).
+    default_selectivity
+        The default selectivity of a predicate.
+        Default is 0.8.
+    """
+
+    _env_prefix = "CUDF_POLARS__EXECUTOR__STATS_PLANNING"
+
+    use_io_partitioning: bool = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__USE_IO_PARTITIONING", _bool_converter, default=True
+        )
+    )
+    use_reduction_planning: bool = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__USE_REDUCTION_PLANNING", _bool_converter, default=False
+        )
+    )
+    use_join_heuristics: bool = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__USE_JOIN_HEURISTICS", _bool_converter, default=True
+        )
+    )
+    use_sampling: bool = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__USE_SAMPLING", _bool_converter, default=True
+        )
+    )
+    default_selectivity: float = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__DEFAULT_SELECTIVITY", float, default=0.8
+        )
+    )
+
+    def __post_init__(self) -> None:  # noqa: D105
+        if not isinstance(self.use_io_partitioning, bool):
+            raise TypeError("use_io_partitioning must be a bool")
+        if not isinstance(self.use_reduction_planning, bool):
+            raise TypeError("use_reduction_planning must be a bool")
+        if not isinstance(self.use_join_heuristics, bool):
+            raise TypeError("use_join_heuristics must be a bool")
+        if not isinstance(self.use_sampling, bool):
+            raise TypeError("use_sampling must be a bool")
+        if not isinstance(self.default_selectivity, float):
+            raise TypeError("default_selectivity must be a float")
+
+
 @dataclasses.dataclass(frozen=True, eq=True)
 class StreamingExecutor:
     """
@@ -363,6 +444,9 @@ class StreamingExecutor:
         rather than a single file. By default, this will be set to True for
         the 'distributed' scheduler and False otherwise. The 'distrubuted'
         scheduler does not currently support ``sink_to_directory=False``.
+    stats_planning
+        Options controlling statistics-based query planning. See
+        :class:`~cudf_polars.utils.config.StatsPlanningOptions` for more.
 
     Notes
     -----
@@ -431,6 +515,9 @@ class StreamingExecutor:
             f"{_env_prefix}__SINK_TO_DIRECTORY", _bool_converter, default=None
         )
     )
+    stats_planning: StatsPlanningOptions = dataclasses.field(
+        default_factory=StatsPlanningOptions
+    )
 
     def __post_init__(self) -> None:  # noqa: D105
         # Handle shuffle_method defaults for streaming executor
@@ -480,6 +567,14 @@ def __post_init__(self) -> None:  # noqa: D105
         object.__setattr__(self, "scheduler", Scheduler(self.scheduler))
         object.__setattr__(self, "shuffle_method", ShuffleMethod(self.shuffle_method))
 
+        # Make sure stats_planning is a dataclass
+        if isinstance(self.stats_planning, dict):
+            object.__setattr__(
+                self,
+                "stats_planning",
+                StatsPlanningOptions(**self.stats_planning),
+            )
+
         if self.scheduler == "distributed":
             if self.sink_to_directory is False:
                 raise ValueError(
@@ -518,6 +613,7 @@ def __hash__(self) -> int:  # noqa: D105
         # to json and hash that.
         d = dataclasses.asdict(self)
         d["unique_fraction"] = json.dumps(d["unique_fraction"])
+        d["stats_planning"] = json.dumps(d["stats_planning"])
         return hash(tuple(sorted(d.items())))
 
 
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index 8cf510416e4..3f26d0b9a75 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -391,10 +391,83 @@ likely to change in the future.
 :::
 
 The `cudf-polars` streaming executor (enabled by default) may use
-estimated column statistics to transform translated logical-plan
-IR nodes into the final "physical-plan" IR nodes.
+estimated column statistics to help transform translated logical-plan
+IR nodes into the final "physical-plan" IR nodes. This will only
+happen for queries that read from in-memory or Parquet data, and
+only when statistics planning is enabled (see the following
+section for more details).
 
-## Storing statistics
+## Configuration
+
+The statistics-based query planning behavior can be controlled through
+the `StatsPlanningOptions` configuration class. These options can be
+configured either through the `stats_planning` parameter of the
+streaming executor, or via environment variables with the prefix
+`CUDF_POLARS__EXECUTOR__STATS_PLANNING__`.
+
+```python
+import polars as pl
+
+# Configure via GPUEngine
+engine = pl.GPUEngine(
+    executor="streaming",
+    executor_options={
+        "stats_planning": {
+            "use_io_partitioning": True,
+            "use_reduction_planning": True,
+            "use_join_heuristics": True,
+            "use_sampling": False,
+            "default_selectivity": 0.5,
+        }
+    }
+)
+
+result = query.collect(engine=engine)
+```
+
+:::{note}
+Column statistics are currently supported for queries that originate
+from Parquet or in-memory DataFrame objects.
+:::
+
+The available configuration options are:
+
+- **`use_io_partitioning`** (default: `True`): Whether to use estimated file-size
+  statistics to calculate the ideal input-partition count for IO operations.
+  This option currently applies to Parquet data only. This option can also be set via the
+  `CUDF_POLARS__EXECUTOR__STATS_PLANNING__USE_IO_PARTITIONING` environment variable.
+
+- **`use_reduction_planning`** (default: `False`): Whether to use estimated column
+  statistics to calculate the output-partition count for reduction operations
+  like `Distinct`, `GroupBy`, and `Select(unique)`. This can also be set via the
+  `CUDF_POLARS__EXECUTOR__STATS_PLANNING__USE_REDUCTION_PLANNING` environment variable.
+
+- **`use_join_heuristics`** (default: `True`): Whether to use join heuristics
+  to estimate row-count and unique-count statistics. These statistics may only
+  be collected when they are actually needed for query planning. These statistics
+  may only be collected when they are actually needed for query planning and when
+  row-count statistics are available for the underlying datasource (e.g. Parquet
+  and in-memory LazyFrame data). This option can also be set via the `CUDF_POLARS__EXECUTOR__STATS_PLANNING__USE_JOIN_HEURISTICS` environment variable.
+
+- **`use_sampling`** (default: `True`): Whether to sample real data to estimate
+  unique-value statistics. These statistics may only be collected when they are
+  actually needed for query planning and if the underlying datasource supports
+  sampling (e.g. Parquet and in-memory LazyFrame data). This option can also be
+  set via the
+  `CUDF_POLARS__EXECUTOR__STATS_PLANNING__USE_SAMPLING` environment variable.
+
+- **`default_selectivity`** (default: `0.8`): The default selectivity of a
+  predicate, used for estimating how much a filter operation will reduce the
+  number of rows. This can also be set via the
+  `CUDF_POLARS__EXECUTOR__STATS_PLANNING__DEFAULT_SELECTIVITY` environment variable.
+
+For example, to enable reduction planning via environment variables:
+
+```bash
+export CUDF_POLARS__EXECUTOR__STATS_PLANNING__REDUCTION_PLANNING=True
+```
+
+## How it works: Storing statistics
 
 The following classes are used to store column statistics (listed
 in order of decreasing granularity):
@@ -449,39 +522,62 @@ Each object has two important attributes:
   node and the `dict[str, ColumnStats]` mapping for that node.
   - `StatsCollector.join_info`: Returns a `JoinInfo` object.
 
-## Collecting and using statistics
-
-:::{note}
-Column-statistics collection is under active development.
-The existing APIs only support the sampling of base
-datasource statistics. The current row-count and unique-value
-statistics for each IR node are not populated by any traversal
-logic yet.
-:::
-
-### Collecting base statistics
-
-The top-level API for sampling base datasource statistics is
-`cudf_polars.experimental.statistics.collect_base_stats`. This
-function calls into the `initialize_column_stats` single-dispatch
-function to collect a `dict[str, ColumnStats]` mapping for each
-IR node in the logical plan.
-
-The IR-specific logic for each `initialize_column_stats` dispatch is
-relatively simple, because the only goal is to collect and propagate
-the underlying `DataSourceInfo` reference and child-`ColumnStats`
-references for each column. This means that `Scan` and `DataFrameScan`
-are the only IR classes needing specialized sampling logic. All other
-IR classes are typically propagating reference from child-IR nodes.
-
-### Using base statistics
+## How it works: Collecting statistics
+
+The top-level API for collecting statistics is
+`cudf_polars.experimental.statistics.collect_statistics`. This
+function performs the following steps:
+
+1. **Collect base statistics**: We build an outline of the statistics that
+   will be collected before any real data is sampled. No Parquet metadata
+   reading or unique-value sampling occurs during this step.
+
+   The top-level API for this "base-statistics" step is
+   `cudf_polars.experimental.statistics.collect_base_stats`. This
+   function calls into the `initialize_column_stats` single-dispatch
+   function to collect a `dict[str, ColumnStats]` mapping for each
+   IR node in the logical plan.
+
+   The IR-specific logic for each `initialize_column_stats` dispatch is
+   relatively simple, because the only goal is to initialize and propagate
+   the underlying `DataSourceInfo` reference and child-`ColumnStats`
+   references for each column.
+
+   This means that most IR classes simply need to propagate reference
+   from child-IR nodes. However, `Scan` and `DataFrameScan` objects
+   must initialize the root `DataSourceInfo` objects. In order to
+   avoid redundant unique-value sampling during later steps, we
+   also need any IR node containing a unique-value reduction (e.g.
+   `Distinct`, `GroupBy`, and `Select(unique)`) to update
+   `unique_stats_columns` for each of its `DataSourceInfo` references.
+
+2. **Apply PK-FK heuristics** (if enabled): We use primary key-foreign key heuristics
+   to estimate the unique count for each join key. Parquet metadata is used to
+   estimate row counts for each table source during this step, but no unique-value
+   sampling is performed yet.
+
+3. **Update statistics for each node**: We set local row-count and unique-value
+   statistics on each node in the IR graph. This step performs unique-value
+   sampling, but only for columns within the `unique_stats_columns` set for
+   the corresponding `DataSourceInfo` object (populated during the first step).
+   Whenever a datasource object has non-empty `unique_stats_columns`, all
+   columns in that set are sampled at the same time (to minimize file-system
+   operations).
+
+## How it works: Using statistics
 
 Base `DataSourceInfo` references are currently used to calculate
 the partition count when a Parquet-based `Scan` node is lowered
-by the `cudf-polars` streaming executor.
-
-In the future, these statistics will also be used for
-parallel-algorithm selection and intermediate repartitioning.
+by the `cudf-polars` streaming executor. This behavior does **not**
+currently depend on the `StatsPlanningOptions` configuration.
+
+If the `StatsPlanningOptions.enable` configuration is set to `True`,
+cudf-polars will use unique-value and row-count statistics to
+estimate the ideal output-partition count for reduction operations
+like `Distinct`, `GroupBy`, and `Select(unique)`. If column statistics
+is **not** enabled, the user-provided `unique_fraction` configuration
+may be necessary for reductions on high-cardinality columns. Otherwise,
+the default tree-reduction algorithm may have insufficient GPU memory.
 
 # Containers
 
diff --git a/python/cudf_polars/tests/experimental/test_explain.py b/python/cudf_polars/tests/experimental/test_explain.py
index 6d330536dac..19b9f03d746 100644
--- a/python/cudf_polars/tests/experimental/test_explain.py
+++ b/python/cudf_polars/tests/experimental/test_explain.py
@@ -9,7 +9,7 @@
 
 import polars as pl
 
-from cudf_polars.experimental.explain import explain_query
+from cudf_polars.experimental.explain import _fmt_row_count, explain_query
 from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
 from cudf_polars.testing.io import make_lazy_frame, make_partitioned_source
 
@@ -35,11 +35,16 @@ def engine():
             "shuffle_method": "tasks",
             "target_partition_size": 10_000,
             "max_rows_per_partition": 1_000,
+            "stats_planning": {
+                "use_reduction_planning": True,
+                "default_selectivity": 0.5,
+            },
         },
     )
 
 
-def test_explain_logical_plan(tmp_path, df):
+@pytest.mark.parametrize("executor", ["streaming", "in-memory"])
+def test_explain_logical_plan(tmp_path, df, executor):
     make_partitioned_source(df, tmp_path, fmt="parquet", n_files=2)
 
     q = (
@@ -54,7 +59,7 @@ def test_explain_logical_plan(tmp_path, df):
         .select(pl.col("sum_xz"))
     )
 
-    engine = pl.GPUEngine(executor="streaming", raise_on_fail=True)
+    engine = pl.GPUEngine(executor=executor, raise_on_fail=True)
     plan = explain_query(q, engine, physical=False)
 
     assert "SCAN PARQUET" in plan
@@ -171,10 +176,19 @@ def test_explain_logical_plan_wide_table():
     assert "DATAFRAMESCAN ('col0', 'col1', 'col2', '...', 'col18', 'col19')" in plan
 
 
+def test_fmt_row_count():
+    assert _fmt_row_count(None) == ""
+    assert _fmt_row_count(0) == "0"
+    assert _fmt_row_count(1000) == "1 K"
+    assert _fmt_row_count(1_234_000) == "1.23 M"
+    assert _fmt_row_count(1_250_000_000) == "1.25 B"
+
+
 @pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
 @pytest.mark.parametrize("n_rows", [None, 3])
-def test_explain_logical_io_then_distinct(engine, tmp_path, kind, n_rows):
-    # Create simple Distinct + Sort query
+@pytest.mark.parametrize("select", [True, False])
+def test_explain_logical_io_then_distinct(engine, tmp_path, kind, n_rows, select):
+    # Create simple Distinct or Select(unique) + Sort query
     df = pl.DataFrame(
         {
             "order_id": [1, 2, 3, 4, 5, 6],
@@ -184,7 +198,10 @@ def test_explain_logical_io_then_distinct(engine, tmp_path, kind, n_rows):
         }
     )
     df = make_lazy_frame(df, kind, path=tmp_path, n_files=2, n_rows=n_rows)
-    q = df.unique(subset=["customer_id"]).sort("order_id")
+    if select:
+        q = df.select(pl.col("customer_id").unique()).sort("customer_id")
+    else:
+        q = df.unique(subset=["customer_id"]).sort("order_id")
 
     # Verify the query runs correctly
     assert_gpu_result_equal(q, engine=engine)
@@ -197,13 +214,68 @@ def test_explain_logical_io_then_distinct(engine, tmp_path, kind, n_rows):
         if n_rows is None:
             assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
         else:
+            value = _fmt_row_count(n_rows)
             assert re.search(
-                rf"^\s*SORT.*row_count=\'~{n_rows}\'\s*$", repr, re.MULTILINE
+                rf"^\s*SORT.*row_count=\'~{value}\'\s*$", repr, re.MULTILINE
             )
     else:
-        assert re.search(
-            rf"^\s*SORT.*row_count=\'~{q.collect().height}\'\s*$", repr, re.MULTILINE
-        )
+        value = _fmt_row_count(q.collect().height)
+        assert re.search(rf"^\s*SORT.*row_count=\'~{value}\'\s*$", repr, re.MULTILINE)
+
+
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+def test_explain_logical_io_then_filter(engine, tmp_path, kind):
+    # Create simple Distinct or Select(unique) + Sort query.
+    # NOTE: This test depends on a "default_selectivity" of 0.5
+    # and a very-specific DataFrame and predicate.
+    df = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3, 4, 5, 6, 7, 8],
+            "customer_id": [101, 102, 101, 103, 104, 104, 105, 106],
+        }
+    )
+    df = make_lazy_frame(df, kind, path=tmp_path, n_files=2)
+    # Include `unique` to improve code coverage
+    q = (
+        df.filter(pl.col("customer_id") < 104)
+        .unique(subset=["order_id"])
+        .sort("order_id")
+    )
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q, engine=engine)
+
+    # Check query plan
+    repr = explain_query(q, engine, physical=False)
+    if kind == "csv":
+        assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
+    else:
+        value = _fmt_row_count(q.collect().height)
+        assert re.search(rf"^\s*SORT.*row_count=\'~{value}\'\s*$", repr, re.MULTILINE)
+
+
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+def test_explain_logical_agg(engine, tmp_path, kind):
+    # Create simple aggregation query
+    df = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3, 4, 5, 6],
+            "customer_id": [101, 102, 101, 103, 103, 104],
+            "amount": [50.0, 75.0, 30.0, 120.0, 85.0, 40.0],
+            "year": [2023, 2023, 2023, 2024, 2024, 2024],
+        }
+    )
+    df = make_lazy_frame(df, kind, path=tmp_path, n_files=2)
+    q = df.select(pl.sum("customer_id"))
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q, engine=engine)
+
+    # Check query plan - We should know that sum produces a single row.
+    repr = explain_query(q, engine, physical=False)
+    assert re.search(
+        rf"^\s*SELECT.*row_count=\'~{q.collect().height}\'\s*$", repr, re.MULTILINE
+    )
 
 
 @pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
@@ -236,9 +308,8 @@ def test_explain_logical_io_then_join(engine, tmp_path, kind):
     if kind == "csv":
         assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
     else:
-        assert re.search(
-            rf"^\s*SORT.*row_count=\'~{q.collect().height}\'\s*$", repr, re.MULTILINE
-        )
+        value = _fmt_row_count(q.collect().height)
+        assert re.search(rf"^\s*SORT.*row_count=\'~{value}\'\s*$", repr, re.MULTILINE)
 
 
 @pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
@@ -280,9 +351,9 @@ def test_explain_logical_io_then_join_then_groupby(engine, tmp_path, kind):
     if kind == "csv":
         assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
     else:
-        join_count = q_join.collect().height
-        gb_count = q_gb.collect().height
-        final_count = q.collect().height
+        join_count = _fmt_row_count(q_join.collect().height)
+        gb_count = _fmt_row_count(q_gb.collect().height)
+        final_count = _fmt_row_count(q.collect().height)
         assert re.search(
             rf"^\s*GROUPBY.*row_count=\'~{gb_count}\'\s*$", repr, re.MULTILINE
         )
@@ -356,9 +427,9 @@ def _gb(df):
     if kind == "csv":
         assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
     else:
-        concat_count_1 = q_concat_1.collect().height
-        gb_count_1 = q_gb_1.collect().height
-        final_count_1 = q_1.collect().height
+        concat_count_1 = _fmt_row_count(q_concat_1.collect().height)
+        gb_count_1 = _fmt_row_count(q_gb_1.collect().height)
+        final_count_1 = _fmt_row_count(q_1.collect().height)
         assert re.search(
             rf"^\s*UNION.*row_count=\'~{concat_count_1}\'\s*$", repr, re.MULTILINE
         )
@@ -374,11 +445,11 @@ def _gb(df):
     if kind == "csv":
         assert re.search(r"^\s*SORT.*row_count='unknown'\s*$", repr, re.MULTILINE)
     else:
-        concat_count_1 = q_concat_1.collect().height
-        concat_count_2 = q_concat_2.collect().height
-        gb_count_1 = q_gb_1.collect().height
-        gb_count_2 = q_gb_2.collect().height
-        final_count_2 = q_2.collect().height
+        concat_count_1 = _fmt_row_count(q_concat_1.collect().height)
+        concat_count_2 = _fmt_row_count(q_concat_2.collect().height)
+        gb_count_1 = _fmt_row_count(q_gb_1.collect().height)
+        gb_count_2 = _fmt_row_count(q_gb_2.collect().height)
+        final_count_2 = _fmt_row_count(q_2.collect().height)
         assert re.search(
             rf"^\s*UNION.*row_count=\'~{concat_count_1}\'\s*$", repr, re.MULTILINE
         )
diff --git a/python/cudf_polars/tests/experimental/test_stats.py b/python/cudf_polars/tests/experimental/test_stats.py
index f77cc309329..861461ffad0 100644
--- a/python/cudf_polars/tests/experimental/test_stats.py
+++ b/python/cudf_polars/tests/experimental/test_stats.py
@@ -14,6 +14,7 @@
 from cudf_polars.experimental.statistics import (
     apply_pkfk_heuristics,
     collect_base_stats,
+    collect_statistics,
     find_equivalence_sets,
 )
 from cudf_polars.testing.asserts import DEFAULT_SCHEDULER, assert_gpu_result_equal
@@ -42,6 +43,7 @@ def engine():
             "shuffle_method": "tasks",
             "target_partition_size": 10_000,
             "max_rows_per_partition": 1_000,
+            "stats_planning": {"use_reduction_planning": True},
         },
     )
 
@@ -219,6 +221,7 @@ def test_base_stats_parquet_groupby(
         executor_options={
             "target_partition_size": 10_000,
             "scheduler": DEFAULT_SCHEDULER,
+            "stats_planning": {"use_reduction_planning": True},
         },
         parquet_options={
             "max_footer_samples": max_footer_samples,
@@ -407,3 +410,81 @@ def test_base_stats_join_key_info(engine):
         stats.column_stats[ir]["cust_id"].source_info.implied_unique_count.value
         == implied_unique_count
     )
+
+    # Check basic collect_statistics behavior
+    stats = collect_statistics(ir, config_options)
+    local_unique_count = stats.column_stats[ir]["cust_id"].unique_count.value
+    source_unique_count = (
+        stats.column_stats[ir]["cust_id"].source_info.unique_stats().count.value
+    )
+    assert local_unique_count == source_unique_count
+    assert stats.row_count[ir].value == q.collect().height
+
+
+@pytest.mark.parametrize("use_io_partitioning", [True, False])
+@pytest.mark.parametrize("use_reduction_planning", [True, False])
+@pytest.mark.parametrize("use_join_heuristics", [True, False])
+@pytest.mark.parametrize("use_sampling", [True, False])
+@pytest.mark.parametrize("default_selectivity", [0.5, 1.0])
+@pytest.mark.parametrize("kind", ["parquet", "csv", "frame"])
+def test_stats_planning(
+    tmp_path,
+    kind,
+    use_io_partitioning,
+    use_reduction_planning,
+    use_join_heuristics,
+    use_sampling,
+    default_selectivity,
+):
+    # Create temporary GPU Engine
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "scheduler": DEFAULT_SCHEDULER,
+            "shuffle_method": "tasks",
+            "target_partition_size": 10_000,
+            "max_rows_per_partition": 1_000,
+            "stats_planning": {
+                "use_io_partitioning": use_io_partitioning,
+                "use_reduction_planning": use_reduction_planning,
+                "use_join_heuristics": use_join_heuristics,
+                "use_sampling": use_sampling,
+                "default_selectivity": default_selectivity,
+            },
+        },
+    )
+
+    # Define "complicated" query that uses all stats planning options
+    sales = pl.DataFrame(
+        {
+            "order_id": [1, 2, 3, 4, 5, 6],
+            "customer_id": [101, 102, 101, 103, 102, 101],
+            "amount": [50.0, 75.0, 30.0, 120.0, 85.0, 40.0],
+            "product": ["A", "B", "A", "C", "B", "A"],
+        }
+    )
+    sales = make_lazy_frame(sales, kind, path=tmp_path / f"sales_{kind}")
+    customers = pl.DataFrame(
+        {
+            "customer_id": [101, 102, 103],
+            "customer_name": ["Alice", "Bob", "Charlie"],
+            "region": ["North", "South", "North"],
+        }
+    )
+    customers = make_lazy_frame(customers, kind, path=tmp_path / f"customers_{kind}")
+    q_join = sales.filter(pl.col("amount") < 100.0).join(
+        customers, on="customer_id", how="inner"
+    )
+    q_gb = q_join.group_by("customer_id").agg(
+        [
+            pl.col("amount").sum().alias("total_amount"),
+            pl.col("order_id").count().alias("order_count"),
+            pl.col("customer_name").first().alias("name"),
+            pl.col("region").first().alias("region"),
+        ]
+    )
+    q = q_gb.sort("customer_id")
+
+    # Verify the query runs correctly
+    assert_gpu_result_equal(q, engine=engine)
diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py
index b402ab3a933..6ddf6663b9d 100644
--- a/python/cudf_polars/tests/experimental/test_unique.py
+++ b/python/cudf_polars/tests/experimental/test_unique.py
@@ -36,6 +36,9 @@ def test_unique(df, keep, subset, maintain_order, cardinality):
             "scheduler": DEFAULT_SCHEDULER,
             "unique_fraction": cardinality,
             "fallback_mode": "warn",
+            # We are using unique_fraction to control the algorithm,
+            # so we need to disable statistics-based reduction planning.
+            "stats_planning": {"use_reduction_planning": False},
         },
     )
 
diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py
index 795f35c5f8e..a984054fbf5 100644
--- a/python/cudf_polars/tests/test_config.py
+++ b/python/cudf_polars/tests/test_config.py
@@ -463,3 +463,23 @@ def test_validate_executor() -> None:
 def test_default_executor() -> None:
     config = ConfigOptions.from_polars_engine(pl.GPUEngine())
     assert config.executor.name == "streaming"
+
+
+@pytest.mark.parametrize(
+    "option",
+    [
+        "use_io_partitioning",
+        "use_reduction_planning",
+        "use_join_heuristics",
+        "use_sampling",
+        "default_selectivity",
+    ],
+)
+def test_validate_stats_planning(option: str) -> None:
+    with pytest.raises(TypeError, match=f"{option} must be"):
+        ConfigOptions.from_polars_engine(
+            pl.GPUEngine(
+                executor="streaming",
+                executor_options={"stats_planning": {option: object()}},
+            )
+        )

From 099202924a04d4117245c09f1af7e8674f2b756e Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 22 Sep 2025 14:22:23 -0700
Subject: [PATCH 353/366] Fix race conditions in ORC reader decimal decoding
 (#20044)

The `racecheck` tool reports two race conditions in the `decode_decimals` kernel.
The first race condition is caused by the read at
https://github.com/rapidsai/cudf/blob/branch-25.10/cpp/src/io/orc/stripe_data.cu#L1217
and write at
https://github.com/rapidsai/cudf/blob/branch-25.10/cpp/src/io/orc/stripe_data.cu#L1233
The second race condition is caused by the read at
https://github.com/rapidsai/cudf/blob/branch-25.10/cpp/src/io/orc/stripe_data.cu#L1597
and write in the thread zero at
https://github.com/rapidsai/cudf/blob/branch-25.10/cpp/src/io/orc/stripe_data.cu#L1622

The first race condition caused data corruption in a Spark query on some systems.

This PR fixes the race conditions by inserting sync points between the corresponding reads and writes.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/20044
---
 cpp/src/io/orc/stripe_data.cu | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
index 426e470a151..430e8fa4dc1 100644
--- a/cpp/src/io/orc/stripe_data.cu
+++ b/cpp/src/io/orc/stripe_data.cu
@@ -1213,10 +1213,14 @@ static __device__ int decode_decimals(orc_bytestream_s* bs,
     }
     __syncthreads();
     uint32_t num_vals_to_read = scratch->num_vals;
+
+    __int128_t v = 0;
     if (t >= num_vals_read and t < num_vals_to_read) {
       auto const pos = static_cast<int>(vals.i64[2 * t]);
-      __int128_t v   = decode_varint128(bs, pos);
-
+      v              = decode_varint128(bs, pos);
+    }
+    __syncthreads();
+    if (t >= num_vals_read and t < num_vals_to_read) {
       auto const scaled_value = [&]() {
         // Since cuDF column stores just one scale, value needs to be adjusted to col_scale from
         // val_scale. So the difference of them will be used to add 0s or remove digits.
@@ -1229,6 +1233,7 @@ static __device__ int decode_decimals(orc_bytestream_s* bs,
           return (v / kPow5i[abs_scale]) >> abs_scale;
         }
       }();
+
       if (dtype_id == type_id::DECIMAL32) {
         vals.i32[t] = scaled_value;
       } else if (dtype_id == type_id::DECIMAL64) {
@@ -1597,6 +1602,8 @@ CUDF_KERNEL void __launch_bounds__(block_size)
   size_t const max_num_rows = s->chunk.column_num_rows;
   __shared__ run_cache_manager run_cache_manager_inst;
   cache_helper cache_helper_inst(run_cache_manager_inst);
+
+  __syncthreads();
   if (t == 0 and is_valid) {
     // If we have an index, seek to the initial run and update row positions
     if (num_rowgroups > 0) {
@@ -1732,8 +1739,9 @@ CUDF_KERNEL void __launch_bounds__(block_size)
         // Adjust the maximum number of values
         if (numvals == 0 && vals_skipped == 0) {
           numvals = s->top.data.max_vals;  // Just so that we don't hang if the stream is corrupted
+        } else {
+          if (t == 0 && numvals < s->top.data.max_vals) { s->top.data.max_vals = numvals; }
         }
-        if (t == 0 && numvals < s->top.data.max_vals) { s->top.data.max_vals = numvals; }
       }
       __syncthreads();
       // Account for skipped values

From 5faa1db15728fa588d8e87cf0dcdb228dcc5ab45 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 22 Sep 2025 17:08:11 -0500
Subject: [PATCH 354/366] Fix `reindex` to fill only the reindexed values with
 `fill_value` (#20063)

Fixes: #19854
This PR fixes `reindex` to fill only the reindexed values with `fill_value`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20063
---
 python/cudf/cudf/core/indexed_frame.py        | 17 +++++++---
 .../cudf/pandas/scripts/conftest-patch.py     | 34 -------------------
 2 files changed, 13 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 6e019444d5c..46d407606df 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3850,7 +3850,10 @@ def _reindex(
                     },
                     index=df.index,
                 )
+                diff = index.difference(df.index)
                 df = lhs.join(rhs, how="left", sort=True)
+                if fill_value is not NA and len(diff) > 0:
+                    df.loc[diff] = fill_value
                 # double-argsort to map back from sorted to unsorted positions
                 df = df.take(index.argsort(ascending=True).argsort())
 
@@ -3888,9 +3891,16 @@ def _reindex(
             name: (
                 df._data[name].copy(deep=deep)
                 if name in df._data
-                else column_empty(
-                    dtype=dtypes.get(name, np.dtype(np.float64)),
-                    row_count=len(index),
+                else (
+                    column_empty(
+                        dtype=dtypes.get(name, np.dtype(np.float64)),
+                        row_count=len(index),
+                    ).fillna(fill_value)
+                    if fill_value is not NA
+                    else column_empty(
+                        dtype=dtypes.get(name, np.dtype(np.float64)),
+                        row_count=len(index),
+                    )
                 )
             )
             for name in names
@@ -3907,7 +3917,6 @@ def _reindex(
             attrs=self.attrs,
         )
 
-        result.fillna(fill_value, inplace=True)
         return self._mimic_inplace(result, inplace=inplace)
 
     def round(self, decimals=0, how="half_even"):
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index dbe9d20b432..0ddeb68392d 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -2933,18 +2933,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint32-kurt-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint64-kurt-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint8-kurt-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[bool]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[double]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[float]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[int16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[int32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[int64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[int8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[string]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[uint16]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[uint32]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[uint64]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reindex_non_na_fill_value[uint8]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor[binary]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor[date32[day]]",
@@ -3271,7 +3259,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_categorical.py::TestCategorical::test_reduce_series_boolean[all-True]",
     "tests/extension/test_categorical.py::TestCategorical::test_reduce_series_boolean[any-False]",
     "tests/extension/test_categorical.py::TestCategorical::test_reduce_series_boolean[any-True]",
-    "tests/extension/test_categorical.py::TestCategorical::test_reindex_non_na_fill_value",
     "tests/extension/test_categorical.py::TestCategorical::test_searchsorted[True]",
     "tests/extension/test_categorical.py::TestCategorical::test_series_constructor",
     "tests/extension/test_categorical.py::TestCategorical::test_series_constructor_scalar_na_with_index",
@@ -3463,17 +3450,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_masked.py::TestMaskedArrays::test_memory_usage[UInt64Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_memory_usage[UInt8Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_reduce_frame[Float32Dtype-skew-True]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[BooleanDtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Float32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Float64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Int16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Int32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Int64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[Int8Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[UInt16Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[UInt32Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[UInt64Dtype]",
-    "tests/extension/test_masked.py::TestMaskedArrays::test_reindex_non_na_fill_value[UInt8Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_series_constructor[BooleanDtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_series_constructor[Float32Dtype]",
     "tests/extension/test_masked.py::TestMaskedArrays::test_series_constructor[Float64Dtype]",
@@ -3660,7 +3636,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_reduce_series_numeric[object-sum-False]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_reduce_series_numeric[object-sum-True]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_reindex[object]",
-    "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_reindex_non_na_fill_value[float]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_series_constructor[float]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_series_constructor[object]",
     "tests/extension/test_numpy.py::TestNumpyExtensionArray::test_series_repr[object]",
@@ -4039,14 +4014,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[pyarrow_numpy-True]",
     "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[python-False]",
     "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[python-True]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=str[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=str[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=str[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=str[python]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=string[pyarrow]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=string[pyarrow]-True]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=string[python]-False]",
-    "tests/extension/test_string.py::TestStringArray::test_reindex_non_na_fill_value[string=string[python]-True]",
     "tests/extension/test_string.py::TestStringArray::test_searchsorted[pyarrow-False-True]",
     "tests/extension/test_string.py::TestStringArray::test_searchsorted[pyarrow-True-True]",
     "tests/extension/test_string.py::TestStringArray::test_searchsorted[pyarrow_numpy-False-True]",
@@ -4499,7 +4466,6 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_rank.py::TestRank::test_rank_string_dtype[string[pyarrow]-Int64]",
     "tests/frame/methods/test_reindex.py::TestDataFrameSelectReindex::test_reindex",
     "tests/frame/methods/test_reindex.py::TestDataFrameSelectReindex::test_reindex_axis_style_raises",
-    "tests/frame/methods/test_reindex.py::TestDataFrameSelectReindex::test_reindex_date_fill_value",
     "tests/frame/methods/test_reindex.py::TestDataFrameSelectReindex::test_reindex_empty[cat_idx1-src_idx0]",
     "tests/frame/methods/test_reindex.py::TestDataFrameSelectReindex::test_reindex_empty[cat_idx1-src_idx1]",
     "tests/frame/methods/test_reindex.py::TestDataFrameSelectReindex::test_reindex_empty[cat_idx3-src_idx0]",

From ff8f37bf5bdf295417426efabbccb129b22da502 Mon Sep 17 00:00:00 2001
From: Mike Sarahan <msarahan@nvidia.com>
Date: Tue, 23 Sep 2025 16:21:51 -0500
Subject: [PATCH 355/366] Empty commit to trigger a build (#20084)

This is an empty commit to trigger a build. It is used when builds get
stuck with an old ABI. Rebuilding updates them to the new one.

From ffa7ea2d298cbc85fa4c3dade3921fec07721e77 Mon Sep 17 00:00:00 2001
From: Shruti Shivakumar <shruti.shivakumar@gmail.com>
Date: Tue, 23 Sep 2025 23:28:01 -0400
Subject: [PATCH 356/366] Deprecate left semi- and anti- join functional APIs
 (#20014)

Contributes to https://github.com/rapidsai/cudf/issues/19180

This PR uses the object-oriented `filtered_join` class in the left semi- and anti- join APIs. These functional APIs will be removed in a later release.
Also fixes a row hasher bug in `filtered_join.cu` - nullate needs to be true since we do know apriori if the probe table has nulls.
Also removes experimental row operators since they are deprecated and will be removed in the next release.

Authors:
  - Shruti Shivakumar (https://github.com/shrshi)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/20014
---
 .../detail/join/distinct_filtered_join.cuh    |  7 +-
 .../cudf/detail/join/filtered_join.cuh        | 33 +++-----
 cpp/include/cudf/join/join.hpp                |  8 +-
 cpp/src/join/filtered_join.cu                 | 83 ++++++++++---------
 cpp/src/join/join_utils.cu                    |  6 ++
 cpp/src/join/semi_join.cu                     | 31 +------
 cpp/tests/join/conditional_join_tests.cu      |  9 +-
 cpp/tests/join/semi_anti_join_tests.cpp       | 23 ++---
 8 files changed, 86 insertions(+), 114 deletions(-)

diff --git a/cpp/include/cudf/detail/join/distinct_filtered_join.cuh b/cpp/include/cudf/detail/join/distinct_filtered_join.cuh
index 1058951779e..087f5ce3816 100644
--- a/cpp/include/cudf/detail/join/distinct_filtered_join.cuh
+++ b/cpp/include/cudf/detail/join/distinct_filtered_join.cuh
@@ -23,11 +23,6 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/resource_ref.hpp>
 
-// Forward declaration
-namespace cudf::experimental::row::equality {
-class preprocessed_table;
-}
-
 namespace cudf {
 namespace detail {
 
@@ -75,7 +70,7 @@ class distinct_filtered_join : public filtered_join {
   template <int32_t CGSize, typename Ref>
   std::unique_ptr<rmm::device_uvector<cudf::size_type>> query_build_table(
     cudf::table_view const& probe,
-    std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
+    std::shared_ptr<cudf::detail::row::equality::preprocessed_table> preprocessed_probe,
     join_kind kind,
     Ref query_ref,
     rmm::cuda_stream_view stream,
diff --git a/cpp/include/cudf/detail/join/filtered_join.cuh b/cpp/include/cudf/detail/join/filtered_join.cuh
index c3a421c1fda..814763bf254 100644
--- a/cpp/include/cudf/detail/join/filtered_join.cuh
+++ b/cpp/include/cudf/detail/join/filtered_join.cuh
@@ -17,8 +17,8 @@
 
 #include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/join/join.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
@@ -32,18 +32,11 @@
 #include <cuco/types.cuh>
 #include <cuda/std/type_traits>
 
-#include <type_traits>
-
-// Forward declaration
-namespace cudf::experimental::row::equality {
-class preprocessed_table;
-}
-
 namespace cudf {
 namespace detail {
 
-using cudf::experimental::row::lhs_index_type;
-using cudf::experimental::row::rhs_index_type;
+using cudf::detail::row::lhs_index_type;
+using cudf::detail::row::rhs_index_type;
 
 /**
  * @brief Base class providing common functionality for filtered join operations.
@@ -57,7 +50,6 @@ class filtered_join {
    * @brief Properties of the build table used in the join operation
    */
   struct build_properties {
-    bool has_nulls;           ///< True if nested nulls are present in build table
     bool has_nested_columns;  ///< True if the build table contains nested columns
   };
 
@@ -186,25 +178,24 @@ class filtered_join {
 
   // Hasher for primitive row types
   using primitive_row_hasher =
-    cudf::row::primitive::row_hasher<cudf::hashing::detail::default_hash>;
+    cudf::detail::row::primitive::row_hasher<cudf::hashing::detail::default_hash>;
   // Linear probing scheme with bucket size 1 for primitive types
   using primitive_probing_scheme = cuco::linear_probing<1, hash_extract_fn>;
   // Equality comparator for primitive rows
-  using primitive_row_comparator = cudf::row::primitive::row_equality_comparator;
+  using primitive_row_comparator = cudf::detail::row::primitive::row_equality_comparator;
 
-  // Hasher for complex row types with dynamic null handling
+  // Hasher for complex row types with compile-time null handling
   using row_hasher =
-    cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
-                                                     nullate::DYNAMIC>;
+    cudf::detail::row::hash::device_row_hasher<cudf::hashing::detail::default_hash, nullate::YES>;
   // Linear probing scheme with bucket size 4 for nested data structures
   using nested_probing_scheme = cuco::linear_probing<4, hash_extract_fn>;
   // Linear probing scheme with bucket size 1 for simple data
   using simple_probing_scheme = cuco::linear_probing<1, hash_extract_fn>;
   // Equality comparator for complex rows with null handling and NaN comparison
-  using row_comparator = cudf::experimental::row::equality::device_row_comparator<
+  using row_comparator = cudf::detail::row::equality::device_row_comparator<
     true,
-    cudf::nullate::DYNAMIC,
-    cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
+    cudf::nullate::YES,
+    cudf::detail::row::equality::nan_equal_physical_equality_comparator>;
 
   storage_type _bucket_storage;  ///< Storage for hash table buckets
 
@@ -214,7 +205,7 @@ class filtered_join {
   build_properties _build_props;           ///< Properties of the build table
   cudf::table_view _build;                 ///< input table to build the hash map
   cudf::null_equality const _nulls_equal;  ///< whether to consider nulls as equal
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table>
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table>
     _preprocessed_build;  ///< input table preprocssed for row operators
 
   /**
diff --git a/cpp/include/cudf/join/join.hpp b/cpp/include/cudf/join/join.hpp
index d1b5e6a5e07..37a09f2b7e0 100644
--- a/cpp/include/cudf/join/join.hpp
+++ b/cpp/include/cudf/join/join.hpp
@@ -192,6 +192,8 @@ full_join(cudf::table_view const& left_keys,
  * @brief Returns a vector of row indices corresponding to a left semi-join
  * between the specified tables.
  *
+ * @deprecated Use the object-oriented filtered_join `cudf::filtered_join::anti_join` instead
+ *
  * The returned vector contains the row indices from the left table
  * for which there is a matching row in the right table.
  *
@@ -211,7 +213,7 @@ full_join(cudf::table_view const& left_keys,
  * the result of performing a left semi join between two tables with
  * `left_keys` and `right_keys` as the join keys .
  */
-std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
+[[deprecated]] std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls       = null_equality::EQUAL,
@@ -222,6 +224,8 @@ std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
  * @brief Returns a vector of row indices corresponding to a left anti join
  * between the specified tables.
  *
+ * @deprecated Use the object-oriented filtered_join `cudf::filtered_join::semi_join` instead
+ *
  * The returned vector contains the row indices from the left table
  * for which there is no matching row in the right table.
  *
@@ -244,7 +248,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
  * the result of performing a left anti join between two tables with
  * `left_keys` and `right_keys` as the join keys .
  */
-std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
+[[deprecated]] std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls       = null_equality::EQUAL,
diff --git a/cpp/src/join/filtered_join.cu b/cpp/src/join/filtered_join.cu
index abaa9e647eb..67bfe6a7547 100644
--- a/cpp/src/join/filtered_join.cu
+++ b/cpp/src/join/filtered_join.cu
@@ -21,9 +21,10 @@
 #include <cudf/detail/join/filtered_join.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/row_operator/primitive_row_operators.cuh>
+#include <cudf/detail/row_operator/row_operators.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/join/filtered_join.hpp>
-#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -107,7 +108,7 @@ void filtered_join::insert_build_table(Ref const& insert_ref, rmm::cuda_stream_v
     // Build hash table by inserting all rows from build table
     auto const grid_size = cuco::detail::grid_size(_build.num_rows(), CGSize);
 
-    if (_build_props.has_nulls && _nulls_equal == null_equality::UNEQUAL) {
+    if (cudf::has_nested_nulls(_build) && _nulls_equal == null_equality::UNEQUAL) {
       auto const bitmask_buffer_and_ptr = build_row_bitmask(_build, stream);
       auto const row_bitmask_ptr        = bitmask_buffer_and_ptr.second;
       cuco::detail::open_addressing_ns::insert_if_n<CGSize, cuco::detail::default_block_size()>
@@ -128,17 +129,20 @@ void filtered_join::insert_build_table(Ref const& insert_ref, rmm::cuda_stream_v
     }
   };
 
-  if (cudf::is_primitive_row_op_compatible(_build)) {
-    auto const d_build_hasher =
-      primitive_row_hasher{nullate::DYNAMIC{_build_props.has_nulls}, _preprocessed_build};
-    auto const build_iter = cudf::detail::make_counting_transform_iterator(
+  // Any mismatch in nullate between probe and build row operators results in UB. Ideally, nullate
+  // should be determined by the logical OR of probe nulls and build nulls. However, since we do not
+  // know if the probe has nulls apriori, we set nullate::DYNAMIC{true} (in the case of primitive
+  // row operators) and nullate::YES (in the case of non-primitive row operators) to ensure both
+  // build and probe row operators use consistent null handling.
+  if (is_primitive_row_op_compatible(_build)) {
+    auto const d_build_hasher = primitive_row_hasher{nullate::DYNAMIC{true}, _preprocessed_build};
+    auto const build_iter     = cudf::detail::make_counting_transform_iterator(
       size_type{0}, key_pair_fn<lhs_index_type, primitive_row_hasher>{d_build_hasher});
 
     insert(build_iter);
   } else {
     auto const d_build_hasher =
-      cudf::experimental::row::hash::row_hasher{_preprocessed_build}.device_hasher(
-        nullate::DYNAMIC(_build_props.has_nulls));
+      cudf::detail::row::hash::row_hasher{_preprocessed_build}.device_hasher(nullate::YES{});
     auto const build_iter = cudf::detail::make_counting_transform_iterator(
       size_type{0}, key_pair_fn<lhs_index_type, row_hasher>{d_build_hasher});
 
@@ -149,7 +153,7 @@ void filtered_join::insert_build_table(Ref const& insert_ref, rmm::cuda_stream_v
 template <int32_t CGSize, typename Ref>
 std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::query_build_table(
   cudf::table_view const& probe,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> preprocessed_probe,
+  std::shared_ptr<cudf::detail::row::equality::preprocessed_table> preprocessed_probe,
   join_kind kind,
   Ref query_ref,
   rmm::cuda_stream_view stream,
@@ -157,7 +161,6 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::qu
 {
   cudf::scoped_range range{"distinct_filtered_join::query_build_table"};
   auto const probe_has_nulls = has_nested_nulls(probe);
-  auto const has_any_nulls   = probe_has_nulls || _build_props.has_nulls;
 
   auto query_set = [this,
                     probe,
@@ -190,17 +193,15 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::qu
   };
 
   auto contains_map = rmm::device_uvector<bool>(probe.num_rows(), stream);
-  if (cudf::is_primitive_row_op_compatible(_build)) {
-    auto const d_probe_hasher =
-      primitive_row_hasher{nullate::DYNAMIC{has_any_nulls}, preprocessed_probe};
-    auto const probe_iter = cudf::detail::make_counting_transform_iterator(
+  if (is_primitive_row_op_compatible(_build)) {
+    auto const d_probe_hasher = primitive_row_hasher{nullate::DYNAMIC{true}, preprocessed_probe};
+    auto const probe_iter     = cudf::detail::make_counting_transform_iterator(
       size_type{0}, key_pair_fn<rhs_index_type, primitive_row_hasher>{d_probe_hasher});
 
     query_set(probe_iter, contains_map.begin());
   } else {
     auto const d_probe_hasher =
-      cudf::experimental::row::hash::row_hasher{preprocessed_probe}.device_hasher(
-        nullate::DYNAMIC(has_any_nulls));
+      cudf::detail::row::hash::row_hasher{preprocessed_probe}.device_hasher(nullate::YES{});
     auto const probe_iter = cudf::detail::make_counting_transform_iterator(
       size_type{0}, key_pair_fn<rhs_index_type, row_hasher>{d_probe_hasher});
 
@@ -220,11 +221,10 @@ filtered_join::filtered_join(cudf::table_view const& build,
                              cudf::null_equality compare_nulls,
                              double load_factor,
                              rmm::cuda_stream_view stream)
-  : _build_props{build_properties{cudf::has_nested_nulls(build), cudf::has_nested_columns(build)}},
+  : _build_props{build_properties{cudf::has_nested_columns(build)}},
     _nulls_equal{compare_nulls},
     _build{build},
-    _preprocessed_build{
-      cudf::experimental::row::equality::preprocessed_table::create(_build, stream)},
+    _preprocessed_build{cudf::detail::row::equality::preprocessed_table::create(_build, stream)},
     _bucket_storage{cuco::extent<cudf::size_type>{compute_bucket_storage_size(build, load_factor)},
                     cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream.value()}}
 {
@@ -238,12 +238,14 @@ distinct_filtered_join::distinct_filtered_join(cudf::table_view const& build,
   : filtered_join(build, compare_nulls, load_factor, stream)
 {
   cudf::scoped_range range{"distinct_filtered_join::distinct_filtered_join"};
-  if (cudf::is_primitive_row_op_compatible(build)) {
-    auto const d_build_comparator =
-      primitive_row_comparator{nullate::DYNAMIC{_build_props.has_nulls},
-                               _preprocessed_build,
-                               _preprocessed_build,
-                               compare_nulls};
+  // Any mismatch in nullate between probe and build row operators results in UB. Ideally, nullate
+  // should be determined by the logical OR of probe nulls and build nulls. However, since we do not
+  // know if the probe has nulls apriori, we set nullate::DYNAMIC{true} (in the case of primitive
+  // row operators) and nullate::YES (in the case of non-primitive row operators) to ensure both
+  // build and probe row operators use consistent null handling.
+  if (is_primitive_row_op_compatible(build)) {
+    auto const d_build_comparator = primitive_row_comparator{
+      nullate::DYNAMIC{true}, _preprocessed_build, _preprocessed_build, compare_nulls};
     cuco::static_set_ref set_ref{empty_sentinel_key,
                                  insertion_adapter{d_build_comparator},
                                  primitive_probing_scheme{},
@@ -253,10 +255,10 @@ distinct_filtered_join::distinct_filtered_join(cudf::table_view const& build,
     insert_build_table<primitive_probing_scheme::cg_size>(insert_ref, stream);
   } else if (_build_props.has_nested_columns) {
     auto const d_build_comparator =
-      cudf::experimental::row::equality::self_comparator{_preprocessed_build}.equal_to<true>(
-        nullate::DYNAMIC{_build_props.has_nulls},
+      cudf::detail::row::equality::self_comparator{_preprocessed_build}.equal_to<true>(
+        nullate::YES{},
         compare_nulls,
-        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+        cudf::detail::row::equality::nan_equal_physical_equality_comparator{});
     cuco::static_set_ref set_ref{empty_sentinel_key,
                                  insertion_adapter{d_build_comparator},
                                  nested_probing_scheme{},
@@ -266,10 +268,10 @@ distinct_filtered_join::distinct_filtered_join(cudf::table_view const& build,
     insert_build_table<nested_probing_scheme::cg_size>(insert_ref, stream);
   } else {
     auto const d_build_comparator =
-      cudf::experimental::row::equality::self_comparator{_preprocessed_build}.equal_to<false>(
-        nullate::DYNAMIC{_build_props.has_nulls},
+      cudf::detail::row::equality::self_comparator{_preprocessed_build}.equal_to<false>(
+        nullate::YES{},
         compare_nulls,
-        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+        cudf::detail::row::equality::nan_equal_physical_equality_comparator{});
     cuco::static_set_ref set_ref{empty_sentinel_key,
                                  insertion_adapter{d_build_comparator},
                                  simple_probing_scheme{},
@@ -287,16 +289,15 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::se
   rmm::device_async_resource_ref mr)
 {
   cudf::scoped_range range{"distinct_filtered_join::semi_anti_join"};
-  auto const has_any_nulls = has_nested_nulls(probe) || _build_props.has_nulls;
 
   auto const preprocessed_probe = [&probe, stream] {
     cudf::scoped_range range{"distinct_filtered_join::semi_anti_join::preprocessed_probe"};
-    return cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
+    return cudf::detail::row::equality::preprocessed_table::create(probe, stream);
   }();
 
-  if (cudf::is_primitive_row_op_compatible(_build)) {
+  if (is_primitive_row_op_compatible(_build)) {
     auto const d_build_probe_comparator = primitive_row_comparator{
-      nullate::DYNAMIC{has_any_nulls}, _preprocessed_build, preprocessed_probe, _nulls_equal};
+      nullate::DYNAMIC{true}, _preprocessed_build, preprocessed_probe, _nulls_equal};
 
     cuco::static_set_ref set_ref{empty_sentinel_key,
                                  comparator_adapter{d_build_probe_comparator},
@@ -307,14 +308,14 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::se
     return query_build_table<primitive_probing_scheme::cg_size>(
       probe, preprocessed_probe, kind, query_ref, stream, mr);
   } else {
-    auto const d_build_probe_comparator = cudf::experimental::row::equality::two_table_comparator{
-      _preprocessed_build, preprocessed_probe};
+    auto const d_build_probe_comparator =
+      cudf::detail::row::equality::two_table_comparator{_preprocessed_build, preprocessed_probe};
 
     if (_build_props.has_nested_columns) {
       auto d_build_probe_nan_comparator = d_build_probe_comparator.equal_to<true>(
-        nullate::DYNAMIC{has_any_nulls},
+        nullate::YES{},
         _nulls_equal,
-        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+        cudf::detail::row::equality::nan_equal_physical_equality_comparator{});
       cuco::static_set_ref set_ref{empty_sentinel_key,
                                    comparator_adapter{d_build_probe_nan_comparator},
                                    nested_probing_scheme{},
@@ -325,9 +326,9 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> distinct_filtered_join::se
         probe, preprocessed_probe, kind, query_ref, stream, mr);
     } else {
       auto d_build_probe_nan_comparator = d_build_probe_comparator.equal_to<false>(
-        nullate::DYNAMIC{has_any_nulls},
+        nullate::YES{},
         _nulls_equal,
-        cudf::experimental::row::equality::nan_equal_physical_equality_comparator{});
+        cudf::detail::row::equality::nan_equal_physical_equality_comparator{});
       cuco::static_set_ref set_ref{empty_sentinel_key,
                                    comparator_adapter{d_build_probe_nan_comparator},
                                    simple_probing_scheme{},
diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu
index 76fa4831c19..988f0506f32 100644
--- a/cpp/src/join/join_utils.cu
+++ b/cpp/src/join/join_utils.cu
@@ -48,6 +48,12 @@ bool is_trivial_join(table_view const& left, table_view const& right, join_kind
   // return immediately
   if ((join_kind::LEFT_SEMI_JOIN == join_type) && (0 == right.num_rows())) { return true; }
 
+  // If left semi- or anti- join, and the left table is empty, return immediately
+  if ((join_kind::LEFT_SEMI_JOIN == join_type || join_kind::LEFT_ANTI_JOIN == join_type) &&
+      (0 == left.num_rows())) {
+    return true;
+  }
+
   return false;
 }
 
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 4ab5227f7d0..cc621620572 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -21,6 +21,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/search.hpp>
 #include <cudf/dictionary/detail/update_keys.hpp>
+#include <cudf/join/filtered_join.hpp>
 #include <cudf/join/join.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/utilities/error.hpp>
@@ -62,33 +63,9 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
     return result;
   }
 
-  // Materialize a `flagged` boolean array to generate a gather map.
-  // Previously, the gather map was generated directly without this array but by calling to
-  // `map.contains` inside the `thrust::copy_if` kernel. However, that led to increasing register
-  // usage and reducing performance, as reported here: https://github.com/rapidsai/cudf/pull/10511.
-  auto const flagged = cudf::detail::contains(right_keys,
-                                              left_keys,
-                                              compare_nulls,
-                                              nan_equality::ALL_EQUAL,
-                                              stream,
-                                              cudf::get_current_device_resource_ref());
-
-  auto const left_num_rows = left_keys.num_rows();
-  auto gather_map =
-    std::make_unique<rmm::device_uvector<cudf::size_type>>(left_num_rows, stream, mr);
-
-  // gather_map_end will be the end of valid data in gather_map
-  auto gather_map_end =
-    thrust::copy_if(rmm::exec_policy(stream),
-                    thrust::counting_iterator<size_type>(0),
-                    thrust::counting_iterator<size_type>(left_num_rows),
-                    gather_map->begin(),
-                    [kind, d_flagged = flagged.begin()] __device__(size_type const idx) {
-                      return *(d_flagged + idx) == (kind == join_kind::LEFT_SEMI_JOIN);
-                    });
-
-  gather_map->resize(cuda::std::distance(gather_map->begin(), gather_map_end), stream);
-  return gather_map;
+  cudf::filtered_join obj(right_keys, compare_nulls, cudf::set_as_build_table::RIGHT, stream);
+  if (kind == join_kind::LEFT_SEMI_JOIN) { return obj.semi_join(left_keys, stream, mr); }
+  return obj.anti_join(left_keys, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu
index a99c7cbf994..e9d5e59cc30 100644
--- a/cpp/tests/join/conditional_join_tests.cu
+++ b/cpp/tests/join/conditional_join_tests.cu
@@ -22,6 +22,7 @@
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/join/conditional_join.hpp>
+#include <cudf/join/filtered_join.hpp>
 #include <cudf/join/join.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -841,7 +842,9 @@ struct ConditionalLeftSemiJoinTest : public ConditionalJoinSingleReturnTest<T> {
     cudf::table_view right,
     cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
-    return cudf::left_semi_join(left, right, compare_nulls);
+    cudf::filtered_join obj(
+      right, compare_nulls, cudf::set_as_build_table::RIGHT, cudf::get_default_stream());
+    return obj.semi_join(left);
   }
 };
 
@@ -898,7 +901,9 @@ struct ConditionalLeftAntiJoinTest : public ConditionalJoinSingleReturnTest<T> {
     cudf::table_view right,
     cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
   {
-    return cudf::left_anti_join(left, right, compare_nulls);
+    cudf::filtered_join obj(
+      right, compare_nulls, cudf::set_as_build_table::RIGHT, cudf::get_default_stream());
+    return obj.anti_join(left);
   }
 };
 
diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp
index 260163f46db..21ef4bb331d 100644
--- a/cpp/tests/join/semi_anti_join_tests.cpp
+++ b/cpp/tests/join/semi_anti_join_tests.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/join/join.hpp"
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -129,14 +127,10 @@ TEST_F(JoinTest, TestSimple)
   auto left  = cudf::table_view{{left_col0}};
   auto right = cudf::table_view{{right_col0}};
 
-  auto result    = left_semi_join(left, right);
-  auto result_cv = cudf::column_view(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
-                                     result->size(),
-                                     result->data(),
-                                     nullptr,
-                                     0);
-  column_wrapper<cudf::size_type> expected{0, 1};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result_cv);
+  auto result = left_semi_join(left, right, {0}, {0}, cudf::null_equality::EQUAL);
+  column_wrapper<cudf::size_type> expected_column{0, 1};
+  auto expected = cudf::table_view{{expected_column}};
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *result);
 }
 
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> get_saj_tables(
@@ -334,9 +328,8 @@ TEST_F(JoinTest, AntiJoinWithStructsAndNullsOnOneSide)
   auto left  = cudf::table_view{{left_col0}};
   auto right = cudf::table_view{{right_col0}};
 
-  auto result      = cudf::left_anti_join(left, right);
-  auto result_span = cudf::device_span<cudf::size_type const>{*result};
-  auto result_col  = cudf::column_view{result_span};
-  auto expected    = column_wrapper<cudf::size_type>{1};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result_col);
+  auto result               = left_anti_join(left, right, {0}, {0});
+  auto expected_indices_col = column_wrapper<cudf::size_type>{1};
+  auto expected             = cudf::gather(left, expected_indices_col);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected, *result);
 }

From be76be8986cc08de980557aa52725c4064a37d28 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Wed, 24 Sep 2025 07:10:25 -0500
Subject: [PATCH 357/366] Fix multi-partition `Filter` bug (#20075)

While experimenting with the new [StatsPlanningOptions](https://github.com/rapidsai/cudf/blob/5faa1db15728fa588d8e87cf0dcdb228dcc5ab45/python/cudf_polars/cudf_polars/utils/config.py#L294), I discovered a pre-existing bug in `lower_ir_node.register(Filter)`. We are failing to check for non-pointwise expressions in `Filter.mask` before treating the `Filter` operation as "pointwise".

This PR "fixes" the bug by falling back to a single partition. For 25.12, we can probably add multi-partition support to non-pointwise filters (if necessary).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/20075
---
 .../cudf_polars/experimental/parallel.py      |  9 ++++
 .../tests/experimental/test_filter.py         | 46 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 python/cudf_polars/tests/experimental/test_filter.py

diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py
index 61d1037e532..b2a25cace0b 100644
--- a/python/cudf_polars/cudf_polars/experimental/parallel.py
+++ b/python/cudf_polars/cudf_polars/experimental/parallel.py
@@ -370,6 +370,15 @@ def _(
             ),
         )
 
+    if partition_info[child].count > 1 and not all(
+        expr.is_pointwise for expr in traversal([ir.mask.value])
+    ):
+        # TODO: Use expression decomposition to lower Filter
+        # See: https://github.com/rapidsai/cudf/issues/20076
+        return _lower_ir_fallback(
+            ir, rec, msg="This filter is not supported for multiple partitions."
+        )
+
     new_node = ir.reconstruct([child])
     partition_info[new_node] = partition_info[child]
     return new_node, partition_info
diff --git a/python/cudf_polars/tests/experimental/test_filter.py b/python/cudf_polars/tests/experimental/test_filter.py
new file mode 100644
index 00000000000..bb8fdc2bd95
--- /dev/null
+++ b/python/cudf_polars/tests/experimental/test_filter.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import (
+    DEFAULT_SCHEDULER,
+    assert_gpu_result_equal,
+)
+
+
+@pytest.fixture(scope="module")
+def engine():
+    return pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={"max_rows_per_partition": 3, "scheduler": DEFAULT_SCHEDULER},
+    )
+
+
+@pytest.fixture(scope="module")
+def df():
+    return pl.LazyFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6, 7],
+            "b": [1, 1, 1, 1, 1, 1, 1],
+            "c": [2, 4, 6, 8, 10, 12, 14],
+        }
+    )
+
+
+def test_filter_pointwise(df, engine):
+    query = df.filter(pl.col("a") > 3)
+    assert_gpu_result_equal(query, engine=engine)
+
+
+def test_filter_non_pointwise(df, engine):
+    query = df.filter(pl.col("a") > pl.col("a").max())
+    with pytest.warns(
+        UserWarning, match="This filter is not supported for multiple partitions."
+    ):
+        assert_gpu_result_equal(query, engine=engine)

From 378639198f6a671fbf778cb7ae38e1e132eeb5d5 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 24 Sep 2025 10:01:57 -0700
Subject: [PATCH 358/366] Eagerly load nvCOMP library in `cudf::initialize()`
 (#19906)

issue https://github.com/rapidsai/cudf/issues/19850

Lazy loading of nvCOMP library on first use can lead to OOM errors when cuDF uses a large memory pool.

This PR adds eager loading of the library to the `cudf::initialize()` API and a call to `initialize` in the benchmarks fixture, before the pool is created.

Verified locally that this fixes #19850.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/19906
---
 cpp/benchmarks/fixture/nvbench_fixture.hpp |  5 ++-
 cpp/include/cudf/context.hpp               | 37 ++++++++++++++++++++--
 cpp/src/io/comp/nvcomp_adapter.cpp         | 30 ++++++++++++++++++
 cpp/src/io/comp/nvcomp_adapter.hpp         |  9 ++++++
 cpp/src/runtime/context.cpp                | 19 ++++++++---
 cpp/src/runtime/context.hpp                |  2 +-
 6 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
index 63f09285a26..c67a281ff96 100644
--- a/cpp/benchmarks/fixture/nvbench_fixture.hpp
+++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <cudf/context.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/pinned_memory.hpp>
@@ -97,6 +98,8 @@ struct nvbench_base_fixture {
 
   nvbench_base_fixture(int argc, char const* const* argv)
   {
+    cudf::initialize(cudf::init_flags::ALL);
+
     for (int i = 1; i < argc - 1; ++i) {
       std::string arg = argv[i];
       if (arg == detail::rmm_mode_param) {
diff --git a/cpp/include/cudf/context.hpp b/cpp/include/cudf/context.hpp
index f9a9011e79d..60062e8ab05 100644
--- a/cpp/include/cudf/context.hpp
+++ b/cpp/include/cudf/context.hpp
@@ -18,11 +18,44 @@
 
 #include <cudf/utilities/export.hpp>
 
+#include <cstdint>
+#include <type_traits>
+
 namespace CUDF_EXPORT cudf {
 
-/// @brief initialize the cudf global context
+/// @brief Flags for controlling initialization steps
+enum class init_flags : std::uint32_t {
+  /// @brief Load the nvCOMP library during initialization
+  LOAD_NVCOMP = 1 << 0,
+  /// @brief Initialize the JIT program cache during initialization
+  INIT_JIT_CACHE = 1 << 1,
+  /// @brief All initialization steps (default behavior)
+  ALL = LOAD_NVCOMP | INIT_JIT_CACHE
+};
+
+/// @brief Bitwise OR operator for init_flags
+/// @param lhs The left-hand side of the operator
+/// @param rhs The right-hand side of the operator
+/// @return The result of the bitwise OR operation
+constexpr init_flags operator|(init_flags lhs, init_flags rhs) noexcept
+{
+  using underlying_t = std::underlying_type_t<init_flags>;
+  return static_cast<init_flags>(static_cast<underlying_t>(lhs) | static_cast<underlying_t>(rhs));
+}
+
+/// @brief Check if a flag is set
+/// @param flags The flags to check against
+/// @param flag The specific flag to check for
+/// @return true if all bits in `flag` are set in `flags`, false otherwise
+constexpr bool has_flag(init_flags flags, init_flags flag) noexcept
+{
+  return (flags | flag) == flags;
+}
+
+/// @brief Initialize the cudf global context
+/// @param flags Optional flags to control which initialization steps to perform.
 /// @throws std::runtime_error if the context is already initialized
-void initialize();
+void initialize(init_flags flags = init_flags::INIT_JIT_CACHE);
 
 /// @brief de-initialize the cudf global context
 /// @throws std::runtime_error if the context is already de-initialized
diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index ae7eda0711e..313fe50d532 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -23,6 +23,7 @@
 #include <cudf/logger.hpp>
 #include <cudf/utilities/error.hpp>
 
+#include <io/utilities/hostdevice_vector.hpp>
 #include <nvcomp/deflate.h>
 #include <nvcomp/gzip.h>
 #include <nvcomp/lz4.h>
@@ -1065,4 +1066,33 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
   }
 }
 
+void load_nvcomp_library()
+{
+  static std::once_flag nvcomp_initialized_flag;
+
+  std::call_once(nvcomp_initialized_flag, []() {
+    auto const stream = cudf::get_default_stream();
+    auto const mr     = cudf::get_current_device_resource_ref();
+
+    // Allocate dummy input buffer and output buffer
+    auto const d_input             = rmm::device_uvector<uint8_t>(1, stream, mr);
+    auto const max_compressed_size = compress_max_output_chunk_size(compression_type::SNAPPY, 1);
+    rmm::device_uvector<uint8_t> d_compressed(max_compressed_size, stream);
+
+    // Prepare parameters for compression
+    cudf::detail::hostdevice_vector<device_span<uint8_t const>> hd_inputs(1, stream);
+    hd_inputs[0] = d_input;
+    hd_inputs.host_to_device_async(stream);
+    cudf::detail::hostdevice_vector<device_span<uint8_t>> hd_outputs(1, stream);
+    hd_outputs[0] = d_compressed;
+    hd_outputs.host_to_device_async(stream);
+    cudf::detail::hostdevice_vector<codec_exec_result> hd_results(1, stream);
+    hd_results[0] = codec_exec_result{0, codec_status::FAILURE};
+    hd_results.host_to_device_async(stream);
+
+    // Perform compression - this will execute an nvCOMP kernel
+    batched_compress(compression_type::SNAPPY, hd_inputs, hd_outputs, hd_results, stream);
+  });
+}
+
 }  // namespace cudf::io::detail::nvcomp
diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp
index bd50faee1de..5d49385ad59 100644
--- a/cpp/src/io/comp/nvcomp_adapter.hpp
+++ b/cpp/src/io/comp/nvcomp_adapter.hpp
@@ -121,6 +121,15 @@ size_t batched_decompress_temp_size(compression_type compression,
  */
 [[nodiscard]] std::optional<size_t> compress_max_allowed_chunk_size(compression_type compression);
 
+/**
+ * @brief Loads the nvCOMP library.
+ *
+ * Can be used to load the nvCOMP library before its first use. Eager loading can help avoid issues
+ * due to the device memory allocations performed during the dynamic loading of the library (e.g.
+ * when loading after the memory pools have been created).
+ */
+void load_nvcomp_library();
+
 /**
  * @brief Device batch compression of given type.
  *
diff --git a/cpp/src/runtime/context.cpp b/cpp/src/runtime/context.cpp
index 8c767348518..5dfc4f098c2 100644
--- a/cpp/src/runtime/context.cpp
+++ b/cpp/src/runtime/context.cpp
@@ -16,6 +16,7 @@
 
 #include "runtime/context.hpp"
 
+#include "io/comp/nvcomp_adapter.hpp"
 #include "io/utilities/getenv_or.hpp"
 #include "jit/cache.hpp"
 
@@ -26,13 +27,23 @@
 
 namespace cudf {
 
-context::context() : _program_cache{std::make_unique<jit::program_cache>()}
+context::context(init_flags flags) : _program_cache{nullptr}
 {
+  if (has_flag(flags, init_flags::INIT_JIT_CACHE)) {
+    _program_cache = std::make_unique<jit::program_cache>();
+  }
+
+  if (has_flag(flags, init_flags::LOAD_NVCOMP)) { io::detail::nvcomp::load_nvcomp_library(); }
+
   auto dump_codegen_flag = getenv_or("LIBCUDF_JIT_DUMP_CODEGEN", std::string{"OFF"});
   _dump_codegen          = (dump_codegen_flag == "ON" || dump_codegen_flag == "1");
 }
 
-jit::program_cache& context::program_cache() { return *_program_cache; }
+jit::program_cache& context::program_cache()
+{
+  CUDF_EXPECTS(_program_cache != nullptr, "JIT cache not initialized", std::runtime_error);
+  return *_program_cache;
+}
 
 bool context::dump_codegen() const { return _dump_codegen; }
 
@@ -53,11 +64,11 @@ context& get_context()
 
 namespace CUDF_EXPORT cudf {
 
-void initialize()
+void initialize(init_flags flags)
 {
   CUDF_EXPECTS(
     get_context_ptr_ref() == nullptr, "context is already initialized", std::runtime_error);
-  get_context_ptr_ref() = std::make_unique<context>();
+  get_context_ptr_ref() = std::make_unique<context>(flags);
 }
 
 void deinitialize()
diff --git a/cpp/src/runtime/context.hpp b/cpp/src/runtime/context.hpp
index 40279c2e88e..00aa9e7e241 100644
--- a/cpp/src/runtime/context.hpp
+++ b/cpp/src/runtime/context.hpp
@@ -36,7 +36,7 @@ class context {
   bool _dump_codegen = false;
 
  public:
-  context();
+  context(init_flags flags = init_flags::INIT_JIT_CACHE);
   context(context const&)            = delete;
   context& operator=(context const&) = delete;
   context(context&&)                 = delete;

From 7286bf06ea18ce5ffe5ca482421a3b93081b337f Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Wed, 24 Sep 2025 19:00:45 +0100
Subject: [PATCH 359/366] [FEA] Build CUDF with CCCL 3.1.0 (#19886)

This pull request updates cuDF to support building with CCCL 3.1.0.

This only makes it _possible_ to build with CCCL 3.1 while retaining compatibility with CCCL 3.0. The actual CCCL 3.1 update is targeting the 25.12 release.

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/19886
---
 .../cudf/detail/utilities/host_vector.hpp     | 14 ++++-
 cpp/src/utilities/host_memory.cpp             | 56 +++++++++++++++++++
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
index 3f6ad7b7b1d..95636662b2f 100644
--- a/cpp/include/cudf/detail/utilities/host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2024 NVIDIA Corporation
+ *  Copyright (c) 2024-2025, NVIDIA CORPORATION
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -103,12 +103,20 @@ class rmm_host_allocator {
    */
   rmm_host_allocator() = delete;
 
+#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
+  template <class... Properties>
+  using async_host_resource_ref = cuda::mr::resource_ref<cuda::mr::host_accessible, Properties...>;
+#else
+  template <class... Properties>
+  using async_host_resource_ref =
+    cuda::mr::async_resource_ref<cuda::mr::host_accessible, Properties...>;
+#endif
+
   /**
    * @brief Construct from a `cudf::host_async_resource_ref`
    */
   template <class... Properties>
-  rmm_host_allocator(cuda::mr::async_resource_ref<cuda::mr::host_accessible, Properties...> _mr,
-                     rmm::cuda_stream_view _stream)
+  rmm_host_allocator(async_host_resource_ref<Properties...> _mr, rmm::cuda_stream_view _stream)
     : mr(_mr),
       stream(_stream),
       _is_device_accessible{contains_property<cuda::mr::device_accessible, Properties...>}
diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp
index e6c2d95ec70..2f5bba5be62 100644
--- a/cpp/src/utilities/host_memory.cpp
+++ b/cpp/src/utilities/host_memory.cpp
@@ -85,7 +85,11 @@ class fixed_pinned_pool_memory_resource {
   void* allocate(std::size_t bytes, std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
     auto const result = allocate_async(bytes, alignment, stream_);
+#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
+    stream_.sync();
+#else
     stream_.wait();
+#endif
     return result;
   }
 
@@ -111,7 +115,11 @@ class fixed_pinned_pool_memory_resource {
                   std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
   {
     deallocate_async(ptr, bytes, alignment, stream_);
+#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
+    stream_.sync();
+#else
     stream_.wait();
+#endif
   }
 
   bool operator==(fixed_pinned_pool_memory_resource const& other) const
@@ -140,6 +148,30 @@ class fixed_pinned_pool_memory_resource {
                            cuda::mr::host_accessible) noexcept
   {
   }
+
+  // BEGIN CCCL >=3.1.0 COMPATIBILITY APIS
+
+  void* allocate_sync(std::size_t bytes, std::size_t alignment)
+  {
+    return this->allocate(bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
+  {
+    return this->deallocate(ptr, bytes, alignment);
+  }
+
+  void* allocate(rmm::cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
+  {
+    return this->allocate_async(bytes, alignment, stream);
+  }
+
+  void deallocate(rmm::cuda_stream_view stream, void* ptr, std::size_t bytes, std::size_t alignment)
+  {
+    return this->deallocate_async(ptr, bytes, alignment, stream);
+  }
+
+  // END CCCL >=3.1.0 COMPATIBILITY APIS
 };
 
 static_assert(cuda::mr::resource_with<fixed_pinned_pool_memory_resource,
@@ -254,6 +286,30 @@ class new_delete_memory_resource {
   // NOLINTBEGIN
   friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {}
   // NOLINTEND
+
+  // BEGIN CCCL >=3.1.0 COMPATIBILITY APIS
+
+  void* allocate_sync(std::size_t bytes, std::size_t alignment)
+  {
+    return this->allocate(bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr, std::size_t bytes, std::size_t alignment)
+  {
+    return this->deallocate(ptr, bytes, alignment);
+  }
+
+  void* allocate(rmm::cuda_stream_view stream, std::size_t bytes, std::size_t alignment)
+  {
+    return this->allocate_async(bytes, alignment, stream);
+  }
+
+  void deallocate(rmm::cuda_stream_view stream, void* ptr, std::size_t bytes, std::size_t alignment)
+  {
+    return this->deallocate_async(ptr, bytes, alignment, stream);
+  }
+
+  // END CCCL >=3.1.0 COMPATIBILITY APIS
 };
 
 static_assert(cuda::mr::resource_with<new_delete_memory_resource, cuda::mr::host_accessible>,

From 9d9b11ac648b7c00c998f2d35f15b3000cc62f06 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 24 Sep 2025 12:15:25 -0700
Subject: [PATCH 360/366] Filter parquet row groups using byte offset bounds
 (#19991)

Closes #19976. Contributes to https://github.com/rapidsai/velox/issues/16

This PR implements filtering parquet row groups using byte offset bounds such that only the row groups that **start** (and not necessarily end) within the specified byte range via `skip_bytes` and `num_bytes` options.

Note that byte range based row group filtering is only supported for single-source parquet reads.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Devavret Makkar (https://github.com/devavret)

URL: https://github.com/rapidsai/cudf/pull/19991
---
 cpp/include/cudf/io/parquet.hpp               |  59 +++++
 cpp/include/cudf/io/types.hpp                 |  30 ++-
 cpp/src/io/functions.cpp                      |  33 ++-
 .../experimental/hybrid_scan_preprocess.cu    |   7 +-
 cpp/src/io/parquet/reader_impl.cpp            |   2 +
 cpp/src/io/parquet/reader_impl.hpp            |   2 +
 cpp/src/io/parquet/reader_impl_helpers.cpp    | 182 +++++++++++-----
 cpp/src/io/parquet/reader_impl_helpers.hpp    |  23 ++
 cpp/src/io/parquet/reader_impl_preprocess.cu  |   2 +
 cpp/tests/io/parquet_common.cpp               |   2 +-
 cpp/tests/io/parquet_reader_test.cpp          | 204 ++++++++++++++++++
 11 files changed, 475 insertions(+), 71 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index db294cbe95e..9e82c73533d 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -88,6 +88,11 @@ class parquet_reader_options {
   // Number of rows to read; `nullopt` is all
   std::optional<size_type> _num_rows;
 
+  // Read row groups that start at or after this byte offset into the source
+  size_t _skip_bytes = 0;
+  // Read row groups that start before _num_bytes bytes after _skip_bytes into the source
+  std::optional<size_t> _num_bytes;
+
   // Predicate filter as AST to filter output rows.
   std::optional<std::reference_wrapper<ast::expression const>> _filter;
 
@@ -199,6 +204,22 @@ class parquet_reader_options {
    */
   [[nodiscard]] std::optional<size_type> const& get_num_rows() const { return _num_rows; }
 
+  /**
+   * @brief Returns bytes to skip before starting reading row groups
+   *
+   * @return Bytes to skip before starting reading row groups; only valid for single parquet source
+   * case
+   */
+  [[nodiscard]] size_t get_skip_bytes() const { return _skip_bytes; }
+
+  /**
+   * @brief Returns number of bytes after skipping to end reading row groups at
+   *
+   * @return Number of bytes after skipping to end reading row groups at; only valid for single
+   * parquet source case
+   */
+  [[nodiscard]] std::optional<size_t> const& get_num_bytes() const { return _num_bytes; }
+
   /**
    * @brief Returns names of column to be read, if set.
    *
@@ -354,6 +375,20 @@ class parquet_reader_options {
    */
   void set_num_rows(size_type val);
 
+  /**
+   * @brief Sets bytes to skip before starting reading row groups.
+   *
+   * @param val Bytes to skip before starting reading row groups
+   */
+  void set_skip_bytes(size_t val);
+
+  /**
+   * @brief Sets number of bytes after skipping to end reading row groups at.
+   *
+   * @param val Number of bytes after skipping to end reading row groups at
+   */
+  void set_num_bytes(size_t val);
+
   /**
    * @brief Sets timestamp_type used to cast timestamp columns.
    *
@@ -505,6 +540,30 @@ class parquet_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Sets bytes to skip before starting reading row groups.
+   *
+   * @param val Bytes to skip before starting reading row groups
+   * @return this for chaining
+   */
+  parquet_reader_options_builder& skip_bytes(size_t val)
+  {
+    options.set_skip_bytes(val);
+    return *this;
+  }
+
+  /**
+   * @brief Sets number of bytes after skipping to end reading row groups at.
+   *
+   * @param val Number of bytes after skipping to end reading row groups at
+   * @return this for chaining
+   */
+  parquet_reader_options_builder& num_bytes(size_t val)
+  {
+    options.set_num_bytes(val);
+    return *this;
+  }
+
   /**
    * @brief timestamp_type used to cast timestamp columns.
    *
diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index d2afdbfdc0e..0bca72bb620 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -325,6 +325,9 @@ constexpr inline auto is_byte_like_type()
  * @brief Source information for read interfaces
  */
 struct source_info {
+  /**
+   * @brief Default constructor for the next-gen parquet reader
+   */
   source_info() = default;
 
   /**
@@ -333,7 +336,7 @@ struct source_info {
    * @param file_paths Input files paths
    */
   explicit source_info(std::vector<std::string> file_paths)
-    : _type(io_type::FILEPATH), _filepaths(std::move(file_paths))
+    : _type(io_type::FILEPATH), _num_sources(file_paths.size()), _filepaths(std::move(file_paths))
   {
   }
 
@@ -343,7 +346,7 @@ struct source_info {
    * @param file_path Single input file
    */
   explicit source_info(std::string file_path)
-    : _type(io_type::FILEPATH), _filepaths({std::move(file_path)})
+    : _type(io_type::FILEPATH), _num_sources(1), _filepaths({std::move(file_path)})
   {
   }
 
@@ -354,7 +357,7 @@ struct source_info {
    */
   template <typename T, CUDF_ENABLE_IF(is_byte_like_type<std::remove_cv_t<T>>())>
   explicit source_info(cudf::host_span<cudf::host_span<T>> const host_buffers)
-    : _type(io_type::HOST_BUFFER)
+    : _type(io_type::HOST_BUFFER), _num_sources(host_buffers.size())
   {
     if constexpr (not std::is_same_v<std::remove_cv_t<T>, std::byte>) {
       _host_buffers.reserve(host_buffers.size());
@@ -378,6 +381,7 @@ struct source_info {
   template <typename T, CUDF_ENABLE_IF(is_byte_like_type<std::remove_cv_t<T>>())>
   explicit source_info(cudf::host_span<T> host_data)
     : _type(io_type::HOST_BUFFER),
+      _num_sources(1),
       _host_buffers{cudf::host_span<std::byte const>(
         reinterpret_cast<std::byte const*>(host_data.data()), host_data.size())}
   {
@@ -389,7 +393,9 @@ struct source_info {
    * @param device_buffers Input buffers in device memory
    */
   explicit source_info(cudf::host_span<cudf::device_span<std::byte const>> device_buffers)
-    : _type(io_type::DEVICE_BUFFER), _device_buffers(device_buffers.begin(), device_buffers.end())
+    : _type(io_type::DEVICE_BUFFER),
+      _num_sources(device_buffers.size()),
+      _device_buffers(device_buffers.begin(), device_buffers.end())
   {
   }
 
@@ -399,7 +405,7 @@ struct source_info {
    * @param d_buffer Input buffer in device memory
    */
   explicit source_info(cudf::device_span<std::byte const> d_buffer)
-    : _type(io_type::DEVICE_BUFFER), _device_buffers({{d_buffer}})
+    : _type(io_type::DEVICE_BUFFER), _num_sources(1), _device_buffers({{d_buffer}})
   {
   }
 
@@ -409,7 +415,7 @@ struct source_info {
    * @param sources  User-implemented input sources
    */
   explicit source_info(std::vector<cudf::io::datasource*> const& sources)
-    : _type(io_type::USER_IMPLEMENTED), _user_sources(sources)
+    : _type(io_type::USER_IMPLEMENTED), _num_sources(sources.size()), _user_sources(sources)
   {
   }
 
@@ -419,7 +425,7 @@ struct source_info {
    * @param source Single user-implemented Input source
    */
   explicit source_info(cudf::io::datasource* source)
-    : _type(io_type::USER_IMPLEMENTED), _user_sources({source})
+    : _type(io_type::USER_IMPLEMENTED), _num_sources(1), _user_sources({source})
   {
   }
 
@@ -454,8 +460,16 @@ struct source_info {
    */
   [[nodiscard]] auto const& user_sources() const { return _user_sources; }
 
+  /**
+   * @brief Get the number of input sources
+   *
+   * @return The number of input sources
+   */
+  [[nodiscard]] auto num_sources() const { return _num_sources; }
+
  private:
-  io_type _type = io_type::VOID;
+  io_type _type       = io_type::VOID;
+  size_t _num_sources = 0;
   std::vector<std::string> _filepaths;
   std::vector<cudf::host_span<std::byte const>> _host_buffers;
   std::vector<cudf::device_span<std::byte const>> _device_buffers;
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 82dc09878e3..a43ccf3f934 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -801,8 +801,10 @@ std::unique_ptr<std::vector<uint8_t>> chunked_parquet_writer::close(
 
 void parquet_reader_options::set_row_groups(std::vector<std::vector<size_type>> row_groups)
 {
-  if ((!row_groups.empty()) and ((_skip_rows != 0) or _num_rows.has_value())) {
-    CUDF_FAIL("row_groups can't be set along with skip_rows and num_rows");
+  if ((!row_groups.empty()) and
+      (_skip_rows != 0 or _num_rows.has_value() or _skip_bytes != 0 or _num_bytes.has_value())) {
+    CUDF_FAIL(
+      "row_groups can't be set along with skip_rows and num_rows or skip_bytes and num_bytes");
   }
 
   _row_groups = std::move(row_groups);
@@ -812,6 +814,8 @@ void parquet_reader_options::set_skip_rows(int64_t val)
 {
   CUDF_EXPECTS(val >= 0, "skip_rows cannot be negative");
   CUDF_EXPECTS(_row_groups.empty(), "skip_rows can't be set along with a non-empty row_groups");
+  CUDF_EXPECTS(_skip_bytes == 0 and not _num_bytes.has_value(),
+               "skip_rows can't be set along with skip_bytes or num_bytes");
 
   _skip_rows = val;
 }
@@ -820,10 +824,35 @@ void parquet_reader_options::set_num_rows(size_type val)
 {
   CUDF_EXPECTS(val >= 0, "num_rows cannot be negative");
   CUDF_EXPECTS(_row_groups.empty(), "num_rows can't be set along with a non-empty row_groups");
+  CUDF_EXPECTS(_skip_bytes == 0 and not _num_bytes.has_value(),
+               "num_rows can't be set along with skip_bytes or num_bytes");
 
   _num_rows = val;
 }
 
+void parquet_reader_options::set_skip_bytes(size_t val)
+{
+  CUDF_EXPECTS(val == 0 or std::cmp_equal(_source.num_sources(), 1),
+               "skip_bytes can only be set for single parquet source case");
+  CUDF_EXPECTS(val == 0 or (not _num_rows.has_value() and _skip_rows == 0),
+               "skip_bytes cannot be set along with skip_rows and num_rows");
+  CUDF_EXPECTS(val == 0 or _row_groups.empty(),
+               "skip_bytes can't be set along with a non-empty row_groups");
+
+  _skip_bytes = val;
+}
+
+void parquet_reader_options::set_num_bytes(size_t val)
+{
+  CUDF_EXPECTS(std::cmp_equal(_source.num_sources(), 1),
+               "num_bytes can only be set for single parquet source case");
+  CUDF_EXPECTS(not _num_rows.has_value() and _skip_rows == 0,
+               "num_bytes cannot be set along with skip_rows and num_rows");
+  CUDF_EXPECTS(_row_groups.empty(), "num_bytes can't be set along with a non-empty row_groups");
+
+  _num_bytes = val;
+}
+
 void parquet_writer_options_base::set_metadata(table_input_metadata metadata)
 {
   _metadata = std::move(metadata);
diff --git a/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu b/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
index 39488301c8a..17836f68982 100644
--- a/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
+++ b/cpp/src/io/parquet/experimental/hybrid_scan_preprocess.cu
@@ -121,7 +121,8 @@ void hybrid_scan_reader_impl::prepare_row_groups(
            _file_itm_data.num_rows_per_source,
            _file_itm_data.num_input_row_groups,
            _file_itm_data.surviving_row_groups) =
-    _extended_metadata->select_row_groups({}, row_group_indices, {}, {}, {}, {}, {}, _stream);
+    _extended_metadata->select_row_groups(
+      {}, row_group_indices, {}, {}, {}, {}, {}, {}, {}, _stream);
 
   CUDF_EXPECTS(
     std::cmp_less_equal(_file_itm_data.global_num_rows, std::numeric_limits<size_type>::max()),
@@ -219,8 +220,8 @@ hybrid_scan_reader_impl::prepare_dictionaries(
   rmm::cuda_stream_view stream)
 {
   // Create row group information for the input row group indices
-  auto const row_groups_info = std::get<2>(
-    _extended_metadata->select_row_groups({}, row_group_indices, {}, {}, {}, {}, {}, _stream));
+  auto const row_groups_info = std::get<2>(_extended_metadata->select_row_groups(
+    {}, row_group_indices, {}, {}, {}, {}, {}, {}, {}, _stream));
 
   CUDF_EXPECTS(row_groups_info.size() * _input_columns.size() == dictionary_page_data.size(),
                "Dictionary page data size must match the number of row groups times the number of "
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 9f8666243c3..30848c53c0a 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -562,6 +562,8 @@ reader_impl::reader_impl(std::size_t chunk_read_limit,
     _options{options.get_timestamp_type(),
              options.get_skip_rows(),
              options.get_num_rows(),
+             options.get_skip_bytes(),
+             options.get_num_bytes(),
              options.get_row_groups()},
     _sources{std::move(sources)},
     _pass_page_mask{cudf::detail::make_host_vector<bool>(0, _stream)},
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 46e8aaf0826..b0978bed1ab 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -402,6 +402,8 @@ class reader_impl {
     // User specified reading rows/stripes selection.
     int64_t const skip_rows;
     std::optional<int64_t> num_rows;
+    size_t skip_bytes;
+    std::optional<size_t> num_bytes;
     std::vector<std::vector<size_type>> row_group_indices;
   } _options;
 
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp
index 0801acf9fdf..7794d899914 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.cpp
@@ -1210,6 +1210,52 @@ aggregate_reader_metadata::apply_row_bounds_filter(
                     std::move(row_group_row_offsets)};
 }
 
+std::vector<std::vector<size_type>> aggregate_reader_metadata::apply_byte_bounds_filter(
+  host_span<std::vector<size_type> const> input_row_group_indices,
+  size_t bytes_to_skip,
+  std::optional<size_t> const& bytes_to_read) const
+{
+  CUDF_EXPECTS(input_row_group_indices.size() == 1,
+               "Byte bounds filter can only be applied to a single source",
+               std::invalid_argument);
+
+  auto filtered_row_group_indices =
+    std::vector<std::vector<size_type>>(input_row_group_indices.size());
+
+  std::for_each(
+    input_row_group_indices.front().begin(),
+    input_row_group_indices.front().end(),
+    [&](auto const& rg_idx) {
+      // Get the file offset of this row group
+      auto const row_group_file_offset = [&]() {
+        auto const& rg = per_file_metadata.front().row_groups[rg_idx];
+        if (rg.file_offset.has_value()) {
+          return rg.file_offset.value();
+        } else if (rg.columns.front().file_offset != 0) {
+          return rg.columns.front().file_offset;
+        } else {
+          auto const& col_meta = rg.columns.front().meta_data;
+          return col_meta.dictionary_page_offset != 0
+                   ? std::min(col_meta.dictionary_page_offset, col_meta.data_page_offset)
+                   : col_meta.data_page_offset;
+        }
+      }();
+
+      // Check if the row group starts within the byte range: row group file offset is >=
+      // bytes_to_skip AND (bytes_to_read is not specified OR the max byte offset overflows
+      // size_t OR row group file offset is < bytes_to_skip + bytes_to_read)
+      auto const is_within_byte_range =
+        std::cmp_greater_equal(row_group_file_offset, bytes_to_skip) and
+        (not bytes_to_read.has_value() or
+         (std::numeric_limits<size_t>::max() - bytes_to_read.value() <= bytes_to_skip) or
+         std::cmp_less(row_group_file_offset, bytes_to_skip + bytes_to_read.value()));
+
+      if (is_within_byte_range) { filtered_row_group_indices.front().emplace_back(rg_idx); }
+    });
+
+  return filtered_row_group_indices;
+}
+
 std::tuple<int64_t,
            size_t,
            std::vector<row_group_info>,
@@ -1221,6 +1267,8 @@ aggregate_reader_metadata::select_row_groups(
   host_span<std::vector<size_type> const> row_group_indices,
   int64_t skip_rows_opt,
   std::optional<int64_t> const& num_rows_opt,
+  size_t skip_bytes_opt,
+  std::optional<size_t> const& byte_count_opt,
   host_span<data_type const> output_dtypes,
   host_span<int const> output_column_schemas,
   std::optional<std::reference_wrapper<ast::expression const>> filter,
@@ -1292,7 +1340,7 @@ aggregate_reader_metadata::select_row_groups(
                                              : compute_total_row_groups(current_row_group_indices);
 
   // Flag to check if the row groups will be filtered using row bounds
-  bool const is_trimmed_row_groups =
+  bool const is_row_bounded_row_groups =
     row_group_indices.empty() and (rows_to_skip > 0 or std::cmp_less(rows_to_read, max_num_rows));
 
   // Use row bounds to filter row group indices if possible
@@ -1300,7 +1348,7 @@ aggregate_reader_metadata::select_row_groups(
   std::vector<std::vector<size_t>> row_group_row_counts;
   std::vector<std::vector<size_t>> row_group_row_offsets;
 
-  if (is_trimmed_row_groups) {
+  if (is_row_bounded_row_groups) {
     std::tie(first_row_group_relative_rows_to_skip,
              trimmed_row_group_indices,
              row_group_row_counts,
@@ -1311,10 +1359,30 @@ aggregate_reader_metadata::select_row_groups(
     current_row_group_indices = host_span<std::vector<size_type> const>(trimmed_row_group_indices);
   }
 
-  // Compute number of input row groups after row bounds filter
-  auto const total_row_bounds_filtered_row_groups =
-    is_trimmed_row_groups ? compute_total_row_groups(current_row_group_indices)
-                          : total_input_row_groups;
+  // Flag to check if the row groups will be filtered using byte bounds
+  bool const is_byte_bounded_row_groups = row_group_indices.empty() and
+
+                                          (skip_bytes_opt > 0 or byte_count_opt.has_value());
+
+  // We can't filter with both row bounds and byte bounds
+  CUDF_EXPECTS(not(is_row_bounded_row_groups and is_byte_bounded_row_groups),
+               "Byte bounds filter can only be applied to a single source",
+               std::invalid_argument);
+
+  if (is_byte_bounded_row_groups) {
+    // Byte bounds don't skip rows within row groups
+    first_row_group_relative_rows_to_skip = 0;
+    trimmed_row_group_indices =
+      apply_byte_bounds_filter(current_row_group_indices, skip_bytes_opt, byte_count_opt);
+
+    // Update the current span of row group indices
+    current_row_group_indices = host_span<std::vector<size_type> const>(trimmed_row_group_indices);
+  }
+
+  // Compute number of input row groups after row or byte bounds trimming
+  auto const total_trimmed_row_groups = (is_row_bounded_row_groups or is_byte_bounded_row_groups)
+                                          ? compute_total_row_groups(current_row_group_indices)
+                                          : total_input_row_groups;
 
   // Struct to store the number of row groups after filtering using stats and bloom filters
   surviving_row_group_metrics num_row_groups_after_filters{};
@@ -1326,7 +1394,7 @@ aggregate_reader_metadata::select_row_groups(
     std::tie(filtered_row_group_indices, num_row_groups_after_filters) =
       filter_row_groups(sources,
                         current_row_group_indices,
-                        total_row_bounds_filtered_row_groups,
+                        total_trimmed_row_groups,
                         output_dtypes,
                         output_column_schemas,
                         filter.value(),
@@ -1341,7 +1409,7 @@ aggregate_reader_metadata::select_row_groups(
 
       // Only need to update the rows to skip relative to the first surviving row group
       // if row bounds were previously applied
-      if (is_trimmed_row_groups) {
+      if (is_row_bounded_row_groups) {
         // Find the source index of the first non-empty row group
         auto const first_non_empty_source_idx =
           std::distance(current_row_group_indices.begin(),
@@ -1378,54 +1446,54 @@ aggregate_reader_metadata::select_row_groups(
   size_t total_selected_rows = 0;
 
   // For each data source
-  std::for_each(thrust::counting_iterator<size_t>(0),
-                thrust::counting_iterator(current_row_group_indices.size()),
-                [&](auto const& src_idx) {
-                  auto const& file_metadata = per_file_metadata[src_idx];
-
-                  // For each row group in this data source
-                  std::for_each(
-                    current_row_group_indices[src_idx].begin(),
-                    current_row_group_indices[src_idx].end(),
-                    [&](auto const& rg_idx) {
-                      // Validate the row group index
-                      CUDF_EXPECTS(
-                        rg_idx >= 0 and std::cmp_less(rg_idx, file_metadata.row_groups.size()),
-                        "Invalid row group index",
-                        std::invalid_argument);
-
-                      // Get the row group
-                      auto const& rg = file_metadata.row_groups[rg_idx];
-
-                      // Use the effective start index of this row group if row bounds filter was
-                      // applied, else use the unadjusted start index
-                      auto const row_group_start_row = current_row_index;
-
-                      // Update the starting row index of the next row group
-                      current_row_index += rg.num_rows;
-
-                      // Get the number of rows in this row group
-                      auto const num_rows_this_row_group =
-                        is_trimmed_row_groups ? row_group_row_counts[src_idx][rg_idx] : rg.num_rows;
-
-                      // Update the total number of rows selected
-                      total_selected_rows += num_rows_this_row_group;
-
-                      // Update the number of rows read from this data source
-                      num_rows_per_source[src_idx] += num_rows_this_row_group;
-
-                      // We need the unadjusted start index of this row group to correctly
-                      // initialize ColumnChunkDesc for this row group in
-                      // create_global_chunk_info() and calculate the row offset for the first
-                      // pass in compute_input_passes().
-                      selection.emplace_back(rg_idx, row_group_start_row, src_idx);
-
-                      // If page-level indexes are present, then collect extra chunk and page
-                      // info. The page indexes rely on absolute row numbers - not adjusted for
-                      // skip_rows.
-                      column_info_for_row_group(selection.back(), row_group_start_row);
-                    });
-                });
+  std::for_each(
+    thrust::counting_iterator<size_t>(0),
+    thrust::counting_iterator(current_row_group_indices.size()),
+    [&](auto const& src_idx) {
+      auto const& file_metadata = per_file_metadata[src_idx];
+
+      // For each row group in this data source
+      std::for_each(
+        current_row_group_indices[src_idx].begin(),
+        current_row_group_indices[src_idx].end(),
+        [&](auto const& rg_idx) {
+          // Validate the row group index
+          CUDF_EXPECTS(rg_idx >= 0 and std::cmp_less(rg_idx, file_metadata.row_groups.size()),
+                       "Invalid row group index",
+                       std::invalid_argument);
+
+          // Get the row group
+          auto const& rg = file_metadata.row_groups[rg_idx];
+
+          // Use the effective start index of this row group if row bounds filter was
+          // applied, else use the unadjusted start index
+          auto const row_group_start_row = current_row_index;
+
+          // Update the starting row index of the next row group
+          current_row_index += rg.num_rows;
+
+          // Get the number of rows in this row group
+          auto const num_rows_this_row_group =
+            is_row_bounded_row_groups ? row_group_row_counts[src_idx][rg_idx] : rg.num_rows;
+
+          // Update the total number of rows selected
+          total_selected_rows += num_rows_this_row_group;
+
+          // Update the number of rows read from this data source
+          num_rows_per_source[src_idx] += num_rows_this_row_group;
+
+          // We need the unadjusted start index of this row group to correctly
+          // initialize ColumnChunkDesc for this row group in
+          // create_global_chunk_info() and calculate the row offset for the first
+          // pass in compute_input_passes().
+          selection.emplace_back(rg_idx, row_group_start_row, src_idx);
+
+          // If page-level indexes are present, then collect extra chunk and page
+          // info. The page indexes rely on absolute row numbers - not adjusted for
+          // skip_rows.
+          column_info_for_row_group(selection.back(), row_group_start_row);
+        });
+    });
 
   // Set rows_to_read to the total number of rows selected and rows_to_skip to the number of rows
   // to skip relative to the first surviving row group
@@ -1436,7 +1504,7 @@ aggregate_reader_metadata::select_row_groups(
           rows_to_read,
           std::move(selection),
           std::move(num_rows_per_source),
-          total_row_bounds_filtered_row_groups,
+          total_trimmed_row_groups,
           std::move(num_row_groups_after_filters)};
 }
 
diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp
index 3c6ae303faf..0a36fbaabbe 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.hpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.hpp
@@ -262,6 +262,25 @@ class aggregate_reader_metadata {
                           int64_t rows_to_skip,
                           int64_t rows_to_read) const;
 
+  /**
+   * @brief Filters the row groups using a byte range specified by [`skip_bytes`, `skip_bytes +
+   * num_bytes`)
+   *
+   * Filters the row groups such that only the row groups that start within the byte range are
+   * selected. Note that the last row group selected may end beyond the byte range. This is only
+   * applicable for single parquet source case.
+   *
+   * @param input_row_group_indices Lists of input row groups, one per source
+   * @param bytes_to_skip Bytes to skip before selecting row groups
+   * @param bytes_to_read Bytes to select row groups after skipping
+   *
+   * @return A vector of surviving row group indices
+   */
+  [[nodiscard]] std::vector<std::vector<size_type>> apply_byte_bounds_filter(
+    host_span<std::vector<size_type> const> input_row_group_indices,
+    size_t bytes_to_skip,
+    std::optional<size_t> const& bytes_to_read) const;
+
   /**
    * @brief Filters the row groups using stats filter
    *
@@ -491,6 +510,8 @@ class aggregate_reader_metadata {
    * @param row_group_indices Lists of row groups to read, one per source
    * @param row_start Starting row of the selection
    * @param row_count Total number of rows selected
+   * @param start_byte Byte offset to start selecting row groups from
+   * @param byte_count Number of bytes after the offset to end selecting row groups at
    * @param output_dtypes Datatypes of of output columns
    * @param output_column_schemas schema indices of output columns
    * @param filter Optional AST expression to filter row groups based on Column chunk statistics
@@ -509,6 +530,8 @@ class aggregate_reader_metadata {
                     host_span<std::vector<size_type> const> row_group_indices,
                     int64_t row_start,
                     std::optional<int64_t> const& row_count,
+                    size_t start_byte,
+                    std::optional<size_t> const& byte_count,
                     host_span<data_type const> output_dtypes,
                     host_span<int const> output_column_schemas,
                     std::optional<std::reference_wrapper<ast::expression const>> filter,
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index d617b13165d..6de9ccab6b7 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -386,6 +386,8 @@ void reader_impl::preprocess_file(read_mode mode)
                                  _options.row_group_indices,
                                  _options.skip_rows,
                                  _options.num_rows,
+                                 _options.skip_bytes,
+                                 _options.num_bytes,
                                  output_dtypes,
                                  _output_column_schemas,
                                  _expr_conv.get_converted_expr(),
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index 7fd9f474723..910f8a8fd39 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -667,7 +667,7 @@ std::pair<cudf::table, std::string> create_parquet_typed_with_stats(std::string
   auto col2 = testdata::unordered<T>();
 
   auto const written_table = table_view{{col0, col1, col2}};
-  auto const filepath      = temp_env->get_temp_filepath("FilterTyped.parquet");
+  auto const filepath      = temp_env->get_temp_filepath(filename);
   {
     cudf::io::table_input_metadata expected_metadata(written_table);
     expected_metadata.column_metadata[0].set_name("col0");
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
index 3213034b541..42daa22811d 100644
--- a/cpp/tests/io/parquet_reader_test.cpp
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -3168,6 +3168,9 @@ TEST_F(ParquetReaderTest, RowBoundsAndFilter)
   }
 }
 
+//////////////////////////////////////////
+// device read async tests
+
 TEST_F(ParquetReaderTest, DeviceReadAsyncThrows)
 {
   // Create a simple parquet file in memory
@@ -3220,3 +3223,204 @@ TEST_F(ParquetReaderTest, DeviceWriteAsyncThrows)
     FAIL() << "Unexpected exception thrown: " << e.what();
   }
 }
+
+//////////////////////////////////////////
+// byte bounds tests
+
+TEST_F(ParquetReaderTest, ByteBoundsOptions)
+{
+  using T             = cudf::string_view;
+  auto const filepath = create_parquet_typed_with_stats<T>("ByteBounds.parquet").second;
+
+  // Test options combinations
+
+  // If skip_bytes is zero, we can set other options normally.
+  EXPECT_NO_THROW(cudf::io::parquet_reader_options::builder(
+                    cudf::io::source_info{std::vector<std::string>{2, filepath}})
+                    .skip_bytes(0)
+                    .num_rows(10)
+                    .skip_rows(10)
+                    .build());
+
+  // Cannot set skip_bytes/num_bytes and row_groups together
+  EXPECT_ANY_THROW(cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                     .skip_bytes(100)
+                     .row_groups({{1}})
+                     .build());
+  EXPECT_ANY_THROW(cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                     .num_bytes(100)
+                     .row_groups({{1}})
+                     .build());
+
+  // Cannot set skip_bytes/num_bytes and skip_rows/num_rows together
+  EXPECT_ANY_THROW(cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                     .skip_bytes(100)
+                     .num_rows(100)
+                     .skip_rows(0)
+                     .build());
+  EXPECT_ANY_THROW(cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                     .skip_rows(100)
+                     .num_bytes(100)
+                     .num_rows(100)
+                     .build());
+
+  // skip_bytes/num_bytes only supported for single source case
+  EXPECT_ANY_THROW(cudf::io::parquet_reader_options::builder(
+                     cudf::io::source_info{std::vector<std::string>{2, filepath}})
+                     .skip_bytes(100)
+                     .build());
+  EXPECT_ANY_THROW(cudf::io::parquet_reader_options::builder(
+                     cudf::io::source_info{std::vector<std::string>{2, filepath}})
+                     .num_bytes(400)
+                     .build());
+}
+
+TEST_F(ParquetReaderTest, ByteBoundsOnly)
+{
+  using T                      = cudf::string_view;
+  auto const [table, filepath] = create_parquet_typed_with_stats<T>("ByteBounds.parquet");
+
+  // Note: Currently the row groups start at the following byte offsets: 4, 75224, 150332, 225561
+  // `skip_bytes` and `num_bytes` may need to be adjusted in the future if this test suddenly starts
+  // failing.
+
+  // Only read row group 0 as only it will start in [0, 1000) byte range
+  {
+    auto constexpr num_bytes = 1000;
+    auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                           .num_bytes(num_bytes)
+                           .build();
+    auto const read = cudf::io::read_parquet(in_opts).tbl;
+
+    auto const expected_in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .row_groups({{0}})
+        .build();
+    auto const expected = cudf::io::read_parquet(expected_in_opts).tbl;
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(read->view(), expected->view());
+  }
+
+  // Skip row group 0 as it won't start in [1000, inf) byte range
+  {
+    auto constexpr skip_bytes = 1000;
+    auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                           .skip_bytes(skip_bytes)
+                           .build();
+    auto const read = cudf::io::read_parquet(in_opts).tbl;
+
+    auto const expected_in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .row_groups({{1, 2, 3}})
+        .build();
+    auto const expected = cudf::io::read_parquet(expected_in_opts).tbl;
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(read->view(), expected->view());
+  }
+
+  // Only read row group 1 as only it starts in [50000, 100000) byte range
+  {
+    auto constexpr skip_bytes = 50000;
+    auto constexpr num_bytes  = 50000;
+    auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                           .skip_bytes(skip_bytes)
+                           .num_bytes(num_bytes)
+                           .build();
+    auto const read = cudf::io::read_parquet(in_opts).tbl;
+
+    auto const expected_in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .row_groups({{1}})
+        .build();
+    auto const expected = cudf::io::read_parquet(expected_in_opts).tbl;
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(read->view(), expected->view());
+  }
+}
+
+TEST_F(ParquetReaderTest, ByteBoundsAndFilters)
+{
+  using T                      = uint64_t;
+  auto const [table, filepath] = create_parquet_typed_with_stats<T>("ByteBounds.parquet");
+
+  // Note: Currently the row groups start at the following byte offsets: 4, 2040, 4048, 6032
+  // `skip_bytes` and `num_bytes` may need to be adjusted in the future if this test suddenly starts
+  // failing.
+
+  // Only read row group 0 as only it will start in [0, 1000) byte range
+  {
+    auto literal_value = cudf::numeric_scalar<T>(1000);
+    auto literal       = cudf::ast::literal(literal_value);
+    auto col_ref_0     = cudf::ast::column_reference(0);
+    auto filter_expression =
+      cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, col_ref_0, literal);
+
+    auto constexpr num_bytes = 1000;
+    auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                           .num_bytes(num_bytes)
+                           .filter(filter_expression)
+                           .build();
+    auto const read = cudf::io::read_parquet(in_opts).tbl;
+
+    auto const expected_in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .row_groups({{0}})
+        .filter(filter_expression)
+        .build();
+    auto const expected = cudf::io::read_parquet(expected_in_opts).tbl;
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(read->view(), expected->view());
+  }
+
+  // Skip row group 0 using byte range and row group 1 using the filter expression
+  {
+    auto literal_value = cudf::numeric_scalar<T>(12000);
+    auto literal       = cudf::ast::literal(literal_value);
+    auto col_ref_0     = cudf::ast::column_reference(0);
+    auto filter_expression =
+      cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_0, literal);
+
+    auto constexpr skip_bytes = 1000;
+    auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                           .skip_bytes(skip_bytes)
+                           .filter(filter_expression)
+                           .build();
+    auto const read = cudf::io::read_parquet(in_opts).tbl;
+
+    auto const expected_in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .row_groups({{2, 3}})
+        .filter(filter_expression)
+        .build();
+    auto const expected = cudf::io::read_parquet(expected_in_opts).tbl;
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(read->view(), expected->view());
+  }
+
+  // Only read row group 1 as only it starts in [1500, 3000) byte range
+  {
+    auto col_ref_0     = cudf::ast::column_reference(0);
+    auto literal_value = cudf::numeric_scalar<T>(8000);
+    auto literal       = cudf::ast::literal(literal_value);
+    auto filter_expression =
+      cudf::ast::operation(cudf::ast::ast_operator::LESS, col_ref_0, literal);
+
+    auto constexpr skip_bytes = 1500;
+    auto constexpr num_bytes  = 1500;
+    auto const in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+                           .skip_bytes(skip_bytes)
+                           .num_bytes(num_bytes)
+                           .filter(filter_expression)
+                           .build();
+    auto const read = cudf::io::read_parquet(in_opts).tbl;
+
+    auto const expected_in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .row_groups({{1}})
+        .filter(filter_expression)
+        .build();
+    auto const expected = cudf::io::read_parquet(expected_in_opts).tbl;
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(read->view(), expected->view());
+  }
+}

From 2de829c9871b2659eaa7c427f82b385c016d7670 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Thu, 25 Sep 2025 06:20:31 -0500
Subject: [PATCH 361/366] Fix edge cases in statistics collection (#20094)

## Description
Fixes some subtle edge cases found in
https://github.com/rapidsai/cudf/pull/20079
That PR will be re-targeted to 25.12. However, it **may** make sense to
include these fixes in 25.10 if there is still time.

## Checklist
- [ ] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [ ] New or existing tests cover these changes.
- [ ] The documentation is up to date with these changes.
---
 .../cudf_polars/experimental/io.py            | 11 ++++++---
 .../cudf_polars/experimental/statistics.py    |  6 ++++-
 .../tests/experimental/test_groupby.py        | 23 +++++++++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py
index cc540abc516..dad7fa81864 100644
--- a/python/cudf_polars/cudf_polars/experimental/io.py
+++ b/python/cudf_polars/cudf_polars/experimental/io.py
@@ -15,6 +15,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+import polars as pl
+
 import pylibcudf as plc
 
 from cudf_polars.dsl.ir import IR, DataFrameScan, Empty, Scan, Sink, Union
@@ -894,9 +896,12 @@ def row_count(self) -> ColumnStat[int]:
     def _update_unique_stats(self, column: str) -> None:
         if column not in self._unique_stats and self._stats_planning.use_sampling:
             row_count = self.row_count.value
-            unique_count = (
-                self._df.get_column(column).approx_n_unique() if row_count else 0
-            )
+            try:
+                unique_count = (
+                    self._df.get_column(column).approx_n_unique() if row_count else 0
+                )
+            except pl.exceptions.InvalidOperationError:  # pragma: no cover
+                unique_count = self._df.get_column(column).n_unique()
             unique_fraction = min((unique_count / row_count), 1.0) if row_count else 1.0
             self._unique_stats[column] = UniqueStats(
                 ColumnStat[int](value=unique_count),
diff --git a/python/cudf_polars/cudf_polars/experimental/statistics.py b/python/cudf_polars/cudf_polars/experimental/statistics.py
index 0302767c7c6..ca80cb5d1f4 100644
--- a/python/cudf_polars/cudf_polars/experimental/statistics.py
+++ b/python/cudf_polars/cudf_polars/experimental/statistics.py
@@ -717,7 +717,11 @@ def _(
         if isinstance(ir, Distinct)
         else [n.name for n in ir.keys]
     )
-    unique_counts = [child_column_stats[k].unique_count.value for k in key_names]
+    unique_counts = [
+        # k will be missing from child_column_stats if it's a literal
+        child_column_stats.get(k, ColumnStats(name=k)).unique_count.value
+        for k in key_names
+    ]
     known_unique_count = sum(c for c in unique_counts if c is not None)
     unknown_unique_count = sum(c is None for c in unique_counts)
     if unknown_unique_count > 0:
diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py
index d8b17d5f4a3..d6229e2664b 100644
--- a/python/cudf_polars/tests/experimental/test_groupby.py
+++ b/python/cudf_polars/tests/experimental/test_groupby.py
@@ -229,3 +229,26 @@ def test_mean_partitioned(values: list[int | None]) -> None:
         ),
         check_row_order=False,
     )
+
+
+def test_groupby_literal_with_stats_planning(df):
+    engine = pl.GPUEngine(
+        raise_on_fail=True,
+        executor="streaming",
+        executor_options={
+            "max_rows_per_partition": 4,
+            "scheduler": DEFAULT_SCHEDULER,
+            "stats_planning": {"use_reduction_planning": True},
+        },
+    )
+
+    q = (
+        df.group_by(
+            pl.lit(True).alias("key"),  # noqa: FBT003
+            maintain_order=False,
+        )
+        .agg(pl.col("x").sum())
+        .drop("key")
+    )
+
+    assert_gpu_result_equal(q, engine=engine)

From 1504ddc2ebb39b3be0847e680b8d76e8cc37a7a1 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 25 Sep 2025 08:28:23 -0500
Subject: [PATCH 362/366] Skip passing failures for latest `numexpr` version
 (#20092)

Fix `numexpr` related failures in that started happening with latest version of `numexpr`: https://github.com/rapidsai/cudf/actions/runs/17979446647/job/51141280437
https://pypi.org/project/numexpr/#history

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/20092
---
 .../cudf/pandas/scripts/conftest-patch.py     | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 0ddeb68392d..64f5fe04d50 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -13229,6 +13229,62 @@ def pytest_unconfigure(config):
     r"tests/util/test_assert_series_equal.py::test_series_equal_index_dtype[True-s11-s21-MultiIndex level \\[0\\] are different]",
 }
 NODEIDS_THAT_XPASS_WITH_CUDF_PANDAS = {
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-DataFrame-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-DataFrame-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-DataFrameNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-DataFrameNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-Series-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-Series-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-SeriesNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-SeriesNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-float-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-float-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-DataFrame-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-DataFrame-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-DataFrameNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-DataFrameNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-Series-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-Series-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-SeriesNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-SeriesNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-float-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrameNaN-float-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-DataFrame-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-DataFrame-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-DataFrameNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-DataFrameNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-Series-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-Series-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-SeriesNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-SeriesNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-float-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[Series-float-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-DataFrame-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-DataFrame-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-DataFrameNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-DataFrameNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-Series-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-Series-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-SeriesNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-SeriesNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-float-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[SeriesNaN-float-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-DataFrame-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-DataFrame-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-DataFrameNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-DataFrameNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-Series-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-Series-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-SeriesNaN-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-SeriesNaN-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-float-numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_floor_division[float-float-numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_frame_invert[numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_frame_invert[numexpr-python]",
+    "tests/computation/test_eval.py::TestEval::test_series_invert[numexpr-pandas]",
+    "tests/computation/test_eval.py::TestEval::test_series_invert[numexpr-python]",
+    "tests/groupby/test_raises.py::test_groupby_raises_category[by3-False-cumsum-agg]",
+    "tests/indexes/datetimes/test_constructors.py::TestDatetimeIndex::test_dti_constructor_with_non_nano_now_today",
     "tests/generic/test_finalize.py::test_categorical_accessor[method0]",
     "tests/generic/test_finalize.py::test_categorical_accessor[method1]",
     "tests/generic/test_finalize.py::test_categorical_accessor[method2]",

From ef2f5ba691daaa6b10ba03719ca89b96fcdbd664 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 25 Sep 2025 15:38:49 -0400
Subject: [PATCH 363/366] Add strings to/from encoded integer APIs (#19789)

---
 cpp/CMakeLists.txt                            |   1 +
 cpp/include/cudf/strings/convert/int_cast.hpp | 134 +++++++++
 cpp/src/strings/convert/int_cast.cu           | 261 ++++++++++++++++++
 cpp/tests/streams/strings/convert_test.cpp    |  14 +-
 cpp/tests/strings/integers_tests.cpp          | 134 +++++++++
 5 files changed, 543 insertions(+), 1 deletion(-)
 create mode 100644 cpp/include/cudf/strings/convert/int_cast.hpp
 create mode 100644 cpp/src/strings/convert/int_cast.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2bdc4c93167..12b08b136c6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -744,6 +744,7 @@ add_library(
   src/strings/convert/convert_ipv4.cu
   src/strings/convert/convert_urls.cu
   src/strings/convert/convert_lists.cu
+  src/strings/convert/int_cast.cu
   src/strings/copying/concatenate.cu
   src/strings/copying/copying.cu
   src/strings/copying/copy_range.cu
diff --git a/cpp/include/cudf/strings/convert/int_cast.hpp b/cpp/include/cudf/strings/convert/int_cast.hpp
new file mode 100644
index 00000000000..62667577e41
--- /dev/null
+++ b/cpp/include/cudf/strings/convert/int_cast.hpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <optional>
+
+namespace CUDF_EXPORT cudf {
+namespace strings {
+/**
+ * @addtogroup strings_convert
+ * @{
+ * @file
+ */
+
+/**
+ * @brief Configures whether casting also byte swaps
+ */
+enum class endian : bool { BIG, LITTLE };
+
+/**
+ * @brief Returns a new integer numeric column encoded to represent the
+ * strings in the input column
+ *
+ * Any null entries will result in corresponding null entries in the output column.
+ *
+ * If the input column contains only strings less than or equal to 8 bytes,
+ * the column can be converted to an appropriate integer type INT8, INT16, INT32, or INT64.
+ * This is useful in libcudf functions that only require relational algebra operations
+ * like groupby, join, or sort.
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ['a', 'b', '', 'c', 'd', 'ab']
+ * b = cast_to_integer(s)
+ * b is [97, 98, 0, 99, 100, 25185]
+ *
+ * in hex b is [0x61, 0x62, 0x0, 0x63, 0x64, 0x6261]
+ * Note that 'ab' with a=0x61 and b=0x62 is 0x6162
+ * but byte-swapped by default (endian::LITTLE) to 0x6261
+ * @endcode
+ *
+ * Multi-byte UTF-8 characters are encoded as multiple bytes in the integer result.
+ *
+ * If the input column contains strings greater than the number of bytes supported
+ * by the output type, the result is undefined.
+ *
+ * If only equals logic is needed, `swap==BIG` is sufficient. Otherwise
+ * use `swap==LITTLE` to ensure comparison operations work correctly.
+ *
+ * @throw cudf::logic_error if output_type is not integral type.
+ *
+ * @param input Strings instance for this operation
+ * @param output_type Type of integer numeric column to return
+ * @param swap Whether to swap bytes on the output. Default is to swap.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column with encoded integers converted from strings
+ */
+std::unique_ptr<column> cast_to_integer(
+  strings_column_view const& input,
+  data_type output_type,
+  endian swap                       = endian::LITTLE,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
+/**
+ * @brief Returns a new strings column converting the encoded integer values from the
+ * provided column into strings.
+ *
+ * This performs the inverse of `cast_to_integer`. The individual bytes of the integer
+ * are reconverted into UTF-8 strings.
+ *
+ * @code{.pseudo}
+ * Example:
+ * b is [97, 98, 0, 99, 100, 25185]
+ * s = cast_from_integer(b)
+ * s is ['a', 'b', '', 'c', 'd', 'ab']
+ *
+ * in hex b is [0x61, 0x62, 0x0, 0x63, 0x64, 0x6261]
+ * and 0x6261 is byte-swapped (endian::LITTLE) to 0x6162 to give 'ab'
+ * @endcode *
+ *
+ * Any null entries will result in corresponding null entries in the output column.
+ *
+ * Ensure the `swap` parameter matches the same value used in the `cast_to_integer` API.
+ *
+ * @throw cudf::logic_error if integers column is not integral type.
+ *
+ * @param integers Encoded integer column to convert
+ * @param swap Whether the input is stored as big or little endian
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column with integers as strings
+ */
+std::unique_ptr<column> cast_from_integer(
+  column_view const& integers,
+  endian swap                       = endian::LITTLE,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
+/**
+ * @brief Returns the minimum integer type required to encode the input column.
+ *
+ * Use this type to with `cast_to_integer` to encode the input column.
+ *
+ * @param input Strings instance for this operation
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return The minimum integer type required to encode the input column, or std::nullopt
+ * if the input column is not castable to an integer.
+ */
+std::optional<cudf::data_type> integer_cast_type(
+  strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream());
+
+/** @} */  // end of doxygen group
+}  // namespace strings
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/strings/convert/int_cast.cu b/cpp/src/strings/convert/int_cast.cu
new file mode 100644
index 00000000000..57405aa5083
--- /dev/null
+++ b/cpp/src/strings/convert/int_cast.cu
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/strings/convert/int_cast.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/strings/string_view.cuh>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <cuda/std/functional>
+#include <thrust/for_each.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform_reduce.h>
+
+namespace cudf {
+namespace strings {
+namespace detail {
+
+namespace {
+
+struct dispatch_set_int_fn {
+  template <typename T>
+    requires(cudf::is_integral_not_bool<T>())
+  __device__ void operator()(mutable_column_device_view& d_results,
+                             size_type idx,
+                             uint64_t value) const
+  {
+    d_results.element<T>(idx) = static_cast<T>(value);
+  }
+  template <typename T>
+    requires(not cudf::is_integral_not_bool<T>())
+  __device__ void operator()(mutable_column_device_view&, size_type, uint64_t) const
+  {
+    CUDF_UNREACHABLE("invalid type");
+  }
+};
+
+struct cast_to_integer_fn {
+  column_device_view const d_strings;
+  mutable_column_device_view d_results;
+  endian swap;
+  size_type output_type_size;
+
+  __device__ void operator()(size_type idx)
+  {
+    if (d_strings.is_null(idx)) { return; }
+    auto const d_str = d_strings.element<string_view>(idx);
+    auto const size  = std::min(d_str.size_bytes(), output_type_size);
+
+    auto value = uint64_t{0};
+    auto data  = reinterpret_cast<u_char const*>(d_str.data());
+    if (swap == endian::LITTLE) {
+      for (size_type i = 0; i < size; i++) {
+        value = (value << CHAR_BIT) | data[i];
+      }
+    } else {
+      memcpy(&value, data, size);
+    }
+    type_dispatcher(d_results.type(), dispatch_set_int_fn{}, d_results, idx, value);
+  }
+};
+}  // namespace
+
+std::unique_ptr<column> cast_to_integer(strings_column_view const& input,
+                                        data_type output_type,
+                                        endian swap,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(cudf::is_integral_not_bool(output_type),
+               "Output type must be an integer type",
+               cudf::data_type_error);
+
+  auto results   = make_numeric_column(output_type,
+                                     input.size(),
+                                     cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                     input.null_count(),
+                                     stream,
+                                     mr);
+  auto d_strings = column_device_view::create(input.parent(), stream);
+  auto d_results = mutable_column_device_view::create(*results, stream);
+
+  auto const type_size = static_cast<size_type>(cudf::size_of(output_type));
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator<size_type>(0),
+                     input.size(),
+                     cast_to_integer_fn{*d_strings, *d_results, swap, type_size});
+
+  return results;
+}
+
+}  // namespace detail
+
+// external API
+std::unique_ptr<column> cast_to_integer(strings_column_view const& input,
+                                        data_type output_type,
+                                        endian swap,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::cast_to_integer(input, output_type, swap, stream, mr);
+}
+
+namespace detail {
+
+struct dispatch_get_int_fn {
+  template <typename T>
+    requires(cudf::is_integral_not_bool<T>())
+  __device__ uint64_t operator()(column_device_view const& d_results, size_type idx) const
+  {
+    return static_cast<uint64_t>(d_results.element<T>(idx));
+  }
+  template <typename T>
+    requires(not cudf::is_integral_not_bool<T>())
+  __device__ uint64_t operator()(column_device_view const&, size_type) const
+  {
+    CUDF_UNREACHABLE("invalid type");
+  }
+};
+
+namespace {
+struct from_integers_fn {
+  column_device_view const d_column;
+  endian swap;
+  size_type* d_sizes{};
+  char* d_chars{};
+  cudf::detail::input_offsetalator d_offsets;
+
+  __device__ void operator()(size_type idx) const
+  {
+    if (d_column.is_null(idx)) {
+      if (d_chars == nullptr) { d_sizes[idx] = 0; }
+      return;
+    }
+
+    auto value = type_dispatcher(d_column.type(), dispatch_get_int_fn{}, d_column, idx);
+    // compute the number of UTF-8 bytes needed
+    auto const size = static_cast<size_type>(sizeof(uint64_t) - (__clzll(value) / CHAR_BIT));
+    if (d_chars == nullptr) {
+      d_sizes[idx] = size;
+      return;
+    }
+
+    // byte swap the bytes to the output buffer
+    auto output = d_chars + d_offsets[idx];
+    if (swap == endian::LITTLE) {
+      for (size_type i = 0; i < size; i++) {
+        output[i] = static_cast<u_char>(value >> ((size - 1 - i) * CHAR_BIT));
+      }
+    } else {
+      memcpy(output, &value, size);
+    }
+  };
+};
+}  // namespace
+
+// Convert boolean column to strings column
+std::unique_ptr<column> cast_from_integer(column_view const& integers,
+                                          endian swap,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(cudf::is_integral_not_bool(integers.type()),
+               "Input type must be an integer type",
+               cudf::data_type_error);
+  if (integers.size() == 0) { return make_empty_column(type_id::STRING); }
+
+  auto d_column = column_device_view::create(integers, stream);
+
+  auto [offsets, chars] =
+    make_strings_children(from_integers_fn{*d_column, swap}, integers.size(), stream, mr);
+
+  return make_strings_column(integers.size(),
+                             std::move(offsets),
+                             chars.release(),
+                             integers.null_count(),
+                             cudf::detail::copy_bitmask(integers, stream, mr));
+}
+
+}  // namespace detail
+
+// external API
+
+std::unique_ptr<column> cast_from_integer(column_view const& integers,
+                                          endian swap,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::cast_from_integer(integers, swap, stream, mr);
+}
+
+namespace detail {
+
+std::optional<cudf::data_type> integer_cast_type(strings_column_view const& input,
+                                                 rmm::cuda_stream_view stream)
+{
+  if (input.size() == 0) { return std::nullopt; }
+  auto d_strings = column_device_view::create(input.parent(), stream);
+
+  auto bits_size =
+    thrust::transform_reduce(rmm::exec_policy_nosync(stream),
+                             thrust::make_counting_iterator<size_type>(0),
+                             thrust::make_counting_iterator<size_type>(input.size()),
+                             cuda::proclaim_return_type<size_type>(
+                               [d_strings = *d_strings] __device__(size_type idx) -> size_type {
+                                 if (d_strings.is_null(idx)) { return 0; }
+                                 auto const d_str  = d_strings.element<string_view>(idx);
+                                 auto const bits   = d_str.size_bytes() * CHAR_BIT;
+                                 u_char first_byte = bits > 0 ? d_str.data()[0] : 0;
+                                 return bits - ((first_byte & 0x80) == 0);
+                               }),
+                             size_type{0},
+                             cuda::maximum<size_type>{});
+
+  if (bits_size <= 8) { return data_type{type_id::INT8}; }
+  if (bits_size <= 16) {
+    return bits_size == 16 ? data_type{type_id::UINT16} : data_type{type_id::INT16};
+  }
+  if (bits_size <= 32) {
+    return bits_size == 32 ? data_type{type_id::UINT32} : data_type{type_id::INT32};
+  }
+  if (bits_size <= 64) {
+    return bits_size == 64 ? data_type{type_id::UINT64} : data_type{type_id::INT64};
+  }
+  return std::nullopt;
+}
+}  // namespace detail
+
+std::optional<cudf::data_type> integer_cast_type(strings_column_view const& input,
+                                                 rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  return detail::integer_cast_type(input, stream);
+}
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/tests/streams/strings/convert_test.cpp b/cpp/tests/streams/strings/convert_test.cpp
index 8dc3f625746..17b29d7d34f 100644
--- a/cpp/tests/streams/strings/convert_test.cpp
+++ b/cpp/tests/streams/strings/convert_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,7 @@
 #include <cudf/strings/convert/convert_ipv4.hpp>
 #include <cudf/strings/convert/convert_lists.hpp>
 #include <cudf/strings/convert/convert_urls.hpp>
+#include <cudf/strings/convert/int_cast.hpp>
 
 #include <string>
 
@@ -113,6 +114,17 @@ TEST_F(StringsConvertTest, Integers)
   cudf::strings::integers_to_hex(values->view(), cudf::test::get_default_stream());
 }
 
+TEST_F(StringsConvertTest, IntegerCast)
+{
+  auto input    = cudf::test::strings_column_wrapper({"aaa", "bbb", "c", "d", "", "f"});
+  auto view     = cudf::strings_column_view(input);
+  auto stream   = cudf::test::get_default_stream();
+  auto otype    = cudf::strings::integer_cast_type(view, stream);
+  auto swap     = cudf::strings::endian::LITTLE;
+  auto integers = cudf::strings::cast_to_integer(view, otype.value(), swap, stream);
+  cudf::strings::cast_from_integer(integers->view(), swap, stream);
+}
+
 TEST_F(StringsConvertTest, IPv4)
 {
   auto input = cudf::test::strings_column_wrapper({"192.168.0.1", "10.0.0.1"});
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index 284a48d8ba1..c8173e58292 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -17,10 +17,12 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/debug_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/convert/convert_integers.hpp>
+#include <cudf/strings/convert/int_cast.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
@@ -482,3 +484,135 @@ TEST_F(StringsConvertTest, IntegerConvertErrors)
   EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DECIMAL32)),
                cudf::logic_error);
 }
+
+TEST_F(StringsConvertTest, IntegerCast)
+{
+  auto const int8_type  = cudf::data_type{cudf::type_id::INT8};
+  auto const int16_type = cudf::data_type{cudf::type_id::INT16};
+  auto const int32_type = cudf::data_type{cudf::type_id::INT32};
+  auto const int64_type = cudf::data_type{cudf::type_id::INT64};
+
+  auto input     = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "", "f"});
+  auto sv        = cudf::strings_column_view(input);
+  auto result    = cudf::strings::cast_to_integer(sv, int8_type);
+  auto expected8 = cudf::test::fixed_width_column_wrapper<int8_t>({97, 98, 99, 100, 0, 102});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected8);
+  result = cudf::strings::cast_from_integer(expected8);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+  auto otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_TRUE(otype.has_value());
+  EXPECT_EQ(otype.value(), int8_type);
+
+  input  = cudf::test::strings_column_wrapper({"a", "bc", "de", "ef", "", " g"});
+  sv     = cudf::strings_column_view(input);
+  result = cudf::strings::cast_to_integer(sv, int16_type);
+  auto expected16 =
+    cudf::test::fixed_width_column_wrapper<int16_t>({0x61, 0x6263, 0x6465, 0x6566, 0, 0x2067});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected16);
+  result = cudf::strings::cast_from_integer(expected16);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+  otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_TRUE(otype.has_value());
+  EXPECT_EQ(otype.value(), int16_type);
+
+  input           = cudf::test::strings_column_wrapper({"a", "bc", "def", "ghi", "", "j k"});
+  sv              = cudf::strings_column_view(input);
+  result          = cudf::strings::cast_to_integer(sv, int32_type);
+  auto expected32 = cudf::test::fixed_width_column_wrapper<int32_t>(
+    {0x61, 0x6263, 0x646566, 0x676869, 0, 0x6A206B});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected32);
+  result = cudf::strings::cast_from_integer(expected32);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+  otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_TRUE(otype.has_value());
+  EXPECT_EQ(otype.value(), int32_type);
+
+  input  = cudf::test::strings_column_wrapper({"the", "quick", "brown", "fox", "", "jumps up"});
+  sv     = cudf::strings_column_view(input);
+  result = cudf::strings::cast_to_integer(sv, int64_type);
+  auto expected64 = cudf::test::fixed_width_column_wrapper<int64_t>(
+    {0x746865L, 0x717569636BL, 0x62726F776EL, 0x666F78L, 0L, 0x6A756D7073207570L});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected64);
+  result = cudf::strings::cast_from_integer(expected64);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+  otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_TRUE(otype.has_value());
+  EXPECT_EQ(otype.value(), int64_type);
+
+  EXPECT_THROW(cudf::strings::cast_to_integer(sv, cudf::data_type{cudf::type_id::FLOAT32}),
+               cudf::data_type_error);
+  auto not_an_integer = cudf::test::fixed_width_column_wrapper<double>({1.0, 2.0});
+  EXPECT_THROW(cudf::strings::cast_from_integer(not_an_integer), cudf::data_type_error);
+
+  input = cudf::test::strings_column_wrapper({"this string is to long to fit into an integer"});
+  sv    = cudf::strings_column_view(input);
+  otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_FALSE(otype.has_value());
+}
+
+TEST_F(StringsConvertTest, IntegerCastType)
+{
+  auto const uint16_type = cudf::data_type{cudf::type_id::UINT16};
+  auto const uint32_type = cudf::data_type{cudf::type_id::UINT32};
+  auto const uint64_type = cudf::data_type{cudf::type_id::UINT64};
+  // other integer types are covered in the IntegerCast tests above
+
+  auto input = cudf::test::strings_column_wrapper({"é", "16", "", "to"});
+  auto sv    = cudf::strings_column_view(input);
+  auto otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_EQ(otype.value(), uint16_type);
+
+  input = cudf::test::strings_column_wrapper({"ér", "32", "éé", "us"});
+  sv    = cudf::strings_column_view(input);
+  otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_EQ(otype.value(), uint32_type);
+
+  input = cudf::test::strings_column_wrapper({"érér", "64", "éééé", "them"});
+  sv    = cudf::strings_column_view(input);
+  otype = cudf::strings::integer_cast_type(sv);
+  EXPECT_EQ(otype.value(), uint64_type);
+}
+
+TEST_F(StringsConvertTest, IntegerCastBigEndian)
+{
+  auto const uint8_type  = cudf::data_type{cudf::type_id::UINT8};
+  auto const uint16_type = cudf::data_type{cudf::type_id::UINT16};
+  auto const uint32_type = cudf::data_type{cudf::type_id::UINT32};
+  auto const uint64_type = cudf::data_type{cudf::type_id::UINT64};
+  auto const swap        = cudf::strings::endian::BIG;
+
+  auto input     = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "", "f"});
+  auto sv        = cudf::strings_column_view(input);
+  auto result    = cudf::strings::cast_to_integer(sv, uint8_type, swap);
+  auto expected8 = cudf::test::fixed_width_column_wrapper<uint8_t>({97, 98, 99, 100, 0, 102});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected8);
+  result = cudf::strings::cast_from_integer(expected8, swap);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+
+  input  = cudf::test::strings_column_wrapper({"a", "bc", "de", "ef", "", " g"});
+  sv     = cudf::strings_column_view(input);
+  result = cudf::strings::cast_to_integer(sv, uint16_type, swap);
+  auto expected16 =
+    cudf::test::fixed_width_column_wrapper<uint16_t>({0x61, 0x6362, 0x6564, 0x6665, 0, 0x6720});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected16);
+  result = cudf::strings::cast_from_integer(expected16, swap);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+
+  input           = cudf::test::strings_column_wrapper({"a", "bc", "def", "ghi", "", "j k"});
+  sv              = cudf::strings_column_view(input);
+  result          = cudf::strings::cast_to_integer(sv, uint32_type, swap);
+  auto expected32 = cudf::test::fixed_width_column_wrapper<uint32_t>(
+    {0x61, 0x6362, 0x666564, 0x696867, 0, 0x6B206A});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected32);
+  result = cudf::strings::cast_from_integer(expected32, swap);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+
+  input  = cudf::test::strings_column_wrapper({"the", "quick", "brown", "fox", "", "jumps up"});
+  sv     = cudf::strings_column_view(input);
+  result = cudf::strings::cast_to_integer(sv, uint64_type, swap);
+  auto expected64 = cudf::test::fixed_width_column_wrapper<uint64_t>(
+    {0x656874L, 0x6B63697571L, 0x6E776F7262L, 0x786F66L, 0L, 0x70752073706D756AL});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected64);
+  result = cudf::strings::cast_from_integer(expected64, swap);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, input);
+}

From c3ee6991d6b5a81834f3145d1b4048e19eaa1b9b Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 30 Sep 2025 16:51:06 -0500
Subject: [PATCH 364/366] Fix CI failures for `pandas-2.3.3` (#20146)

## Description
This PR fixes CI failures seen with the release of
`pandas-2.3.3`(released yesterday).

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 dependencies.yaml                             |   2 +-
 python/cudf/cudf/core/_compat.py              |   2 +-
 .../cudf/pandas/scripts/conftest-patch.py     | 201 ++++++++++--------
 python/cudf/cudf/tests/conftest.py            |  11 +-
 4 files changed, 122 insertions(+), 94 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 0e96a48ad86..530456af4b2 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -790,7 +790,7 @@ dependencies:
               - numba-cuda==0.19.1
           - matrix: {dependencies: "latest"}
             packages:
-              - pandas==2.3.2
+              - pandas==2.3.3
           - matrix:
             packages:
       - output_types: conda
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index 9b6f3c6ad67..797dba81d50 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -3,7 +3,7 @@
 import pandas as pd
 from packaging import version
 
-PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.3.2")
+PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.3.3")
 PANDAS_VERSION = version.parse(pd.__version__)
 
 
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 64f5fe04d50..cf5f4b6775a 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -88,8 +88,6 @@ def pytest_unconfigure(config):
 
 # TODO: Pass these tests with cudf.pandas enabled.
 NODEIDS_THAT_FAIL_WITH_CUDF_PANDAS = {
-    "tests/series/methods/test_reindex.py::test_reindexing_with_float64_NA_log",
-    "tests/copy_view/test_array.py::test_series_values[values]",
     "tests/api/test_api.py::test_pandas_array_alias",
     "tests/apply/test_frame_apply.py::test_agg_transform[axis='columns']",
     "tests/apply/test_frame_apply.py::test_agg_transform[axis='index']",
@@ -1006,6 +1004,7 @@ def pytest_unconfigure(config):
     "tests/arrays/string_/test_string.py::test_min_max[string=string[pyarrow]-False-min]",
     "tests/arrays/string_/test_string.py::test_min_max[string=string[python]-False-max]",
     "tests/arrays/string_/test_string.py::test_min_max[string=string[python]-False-min]",
+    "tests/arrays/string_/test_string.py::test_numpy_array_ufunc[string=str[python]-Series]",
     "tests/arrays/string_/test_string.py::test_repr[pyarrow_numpy]",
     "tests/arrays/string_/test_string.py::test_repr[string=str[pyarrow]]",
     "tests/arrays/string_/test_string.py::test_repr[string=str[python]]",
@@ -1438,8 +1437,8 @@ def pytest_unconfigure(config):
     "tests/base/test_fillna.py::test_fillna[datetime]",
     "tests/base/test_fillna.py::test_fillna[multi]",
     "tests/base/test_fillna.py::test_fillna[tuples]",
-    "tests/base/test_fillna.py::test_fillna_null[categorical-nan]",
     "tests/base/test_fillna.py::test_fillna_null[categorical-None]",
+    "tests/base/test_fillna.py::test_fillna_null[categorical-nan]",
     "tests/base/test_misc.py::test_memory_usage[bool-object]",
     "tests/base/test_misc.py::test_memory_usage[bool-series]",
     "tests/base/test_misc.py::test_memory_usage[bytearray-series]",
@@ -1522,18 +1521,18 @@ def pytest_unconfigure(config):
     "tests/base/test_misc.py::test_memory_usage_components_narrow_series[float16]",
     "tests/base/test_misc.py::test_ndarray_compat_properties[multi]",
     "tests/base/test_misc.py::test_ndarray_compat_properties[tuples]",
+    "tests/base/test_unique.py::test_nunique_null[categorical-None]",
+    "tests/base/test_unique.py::test_nunique_null[categorical-nan]",
     "tests/base/test_unique.py::test_unique[multi]",
     "tests/base/test_unique.py::test_unique[tuples]",
-    "tests/base/test_unique.py::test_unique_null[categorical-nan]",
     "tests/base/test_unique.py::test_unique_null[categorical-None]",
-    "tests/base/test_unique.py::test_nunique_null[categorical-nan]",
-    "tests/base/test_unique.py::test_nunique_null[categorical-None]",
+    "tests/base/test_unique.py::test_unique_null[categorical-nan]",
     "tests/base/test_value_counts.py::test_value_counts[multi]",
     "tests/base/test_value_counts.py::test_value_counts[tuples]",
-    "tests/base/test_value_counts.py::test_value_counts_null[categorical-nan]",
-    "tests/base/test_value_counts.py::test_value_counts_null[categorical-None]",
     "tests/base/test_value_counts.py::test_value_counts_bins[index]",
     "tests/base/test_value_counts.py::test_value_counts_bins[series]",
+    "tests/base/test_value_counts.py::test_value_counts_null[categorical-None]",
+    "tests/base/test_value_counts.py::test_value_counts_null[categorical-nan]",
     "tests/base/test_value_counts.py::test_value_counts_timedelta64[index-ms]",
     "tests/base/test_value_counts.py::test_value_counts_timedelta64[index-ns]",
     "tests/base/test_value_counts.py::test_value_counts_timedelta64[index-s]",
@@ -1601,6 +1600,7 @@ def pytest_unconfigure(config):
     "tests/copy_view/test_array.py::test_dataframe_values[values]",
     "tests/copy_view/test_array.py::test_series_array_ea_dtypes",
     "tests/copy_view/test_array.py::test_series_to_numpy",
+    "tests/copy_view/test_array.py::test_series_values[values]",
     "tests/copy_view/test_astype.py::test_astype_avoids_copy[Int64-Int64]",
     "tests/copy_view/test_astype.py::test_astype_avoids_copy[Int64-int64]",
     "tests/copy_view/test_astype.py::test_astype_avoids_copy[int64-Int64]",
@@ -2760,11 +2760,11 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_combine_le[timestamp[us, tz=UTC]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_combine_le[timestamp[us]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-eq]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-ne]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-gt]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-ge]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-lt]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-gt]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-le]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-lt]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[decimal128(7, 3)-ne]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[duration[ms]-eq]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[duration[ms]-ge]",
     "tests/extension/test_arrow.py::TestArrowArray::test_compare_array[duration[ms]-gt]",
@@ -2905,21 +2905,21 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-mean-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-prod-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-sum-False]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[double-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-mean-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-prod-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-std-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-sum-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-var-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint8-skew-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint16-skew-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint32-skew-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint64-skew-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int8-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int16-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int32-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int64-skew-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[float-skew-True]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[double-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[int8-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint16-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint32-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint64-skew-True]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[uint8-skew-True]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[ms]-any-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[ns]-any-False]",
     "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_boolean[timestamp[s]-any-False]",
@@ -3000,6 +3000,14 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_na_with_index[timestamp[us, tz=US/Pacific]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_na_with_index[timestamp[us, tz=UTC]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_series_constructor_scalar_with_index[date64[ms]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[ms]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[ns]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[s]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[us]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ms]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ns]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[s]]",
+    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[us]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[bool]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[decimal128(7, 3)]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[double]",
@@ -3021,14 +3029,6 @@ def pytest_unconfigure(config):
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint32]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint64]",
     "tests/extension/test_arrow.py::TestArrowArray::test_setitem_frame_2d_values[uint8]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[duration[us]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ms]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[ns]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[s]]",
-    "tests/extension/test_arrow.py::TestArrowArray::test_setitem_2d_values[timestamp[us]]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index2]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-frame-index3]",
     "tests/extension/test_arrow.py::TestArrowArray::test_unstack[bool-series-index2]",
@@ -4181,6 +4181,8 @@ def pytest_unconfigure(config):
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_loc_bool_multiindex[boolean-indexer1]",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_loc_internals_not_updated_correctly",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_loc_setitem_datetimelike_with_inference",
+    "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_loc_setitem_reordering_with_all_true_indexer[col0-False]",
+    "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_loc_setitem_reordering_with_all_true_indexer[col0-True]",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_loc_setitem_reordering_with_all_true_indexer[col0]",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_set_2d_casting_date_to_int[col0-indexer0]",
     "tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_set_2d_casting_date_to_int[col1-indexer0]",
@@ -4283,6 +4285,7 @@ def pytest_unconfigure(config):
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetItemSlicing::test_setitem_slice_indexer_broadcasting_rhs[3-list-iloc]",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetItemSlicing::test_setitem_slice_indexer_broadcasting_rhs[3-list-setitem]",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetItemWithExpansion::test_setitem_newcol_tuple_key",
+    "tests/frame/indexing/test_setitem.py::TestDataFrameSetitemCopyViewSemantics::test_iloc_setitem_view_2dblock",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetitemCopyViewSemantics::test_setitem_column_update_inplace",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetitemCopyViewSemantics::test_setitem_listlike_key_scalar_value_not_inplace[value2]",
     "tests/frame/indexing/test_setitem.py::TestDataFrameSetitemCopyViewSemantics::test_setitem_partial_column_inplace[False]",
@@ -4386,8 +4389,8 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_drop_and_dropna_caching",
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_dropna",
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_dropna_multiple_axes",
-    "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_no_nans_in_frame[axis=0]",
     "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_no_nans_in_frame[axis='index']",
+    "tests/frame/methods/test_dropna.py::TestDataFrameMissingData::test_no_nans_in_frame[axis=0]",
     "tests/frame/methods/test_dtypes.py::TestDataFrameDataTypes::test_dtypes_timedeltas",
     "tests/frame/methods/test_equals.py::TestEquals::test_equals_different_blocks",
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict0-input_index0-expected_dict0-expected_index0]",
@@ -4395,13 +4398,13 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict2-input_index2-expected_dict2-expected_index2]",
     "tests/frame/methods/test_explode.py::test_duplicate_index[input_dict3-input_index3-expected_dict3-expected_index3]",
     "tests/frame/methods/test_explode.py::test_multi_columns_nan_empty",
-    "tests/frame/methods/test_fillna.py::test_fillna_nones_inplace",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_dict_inplace_nonunique_columns",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_dtype_conversion",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_on_column_view",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_fillna_with_multi_index_frame",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_inplace_dict_update_view[-1]",
     "tests/frame/methods/test_fillna.py::TestFillNA::test_inplace_dict_update_view[val1]",
+    "tests/frame/methods/test_fillna.py::test_fillna_nones_inplace",
     "tests/frame/methods/test_info.py::test_info_memory",
     "tests/frame/methods/test_info.py::test_info_memory_usage_deep_not_pypy",
     "tests/frame/methods/test_info.py::test_memory_usage_empty_no_warning",
@@ -4501,13 +4504,10 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_rename.py::TestRename::test_rename_mapper_and_positional_arguments_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_no_mappings_raises",
     "tests/frame/methods/test_rename.py::TestRename::test_rename_nocopy",
-    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NA_with_None",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NAT_with_None",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_NA_with_None",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_after_convert_dtypes",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_dict_tuple_list_ordering_remains_the_same",
-    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_method[nan-bfill-expected1]",
-    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_no_replacement_dtypes[nan-int64]",
-    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_no_replacement_dtypes[value1-int64]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-array-data0-to_replace0-value0-expected0]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-tuple-data0-to_replace0-value0-expected0]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[DataFrame-tuple-data1-to_replace1-value1-expected1]",
@@ -4518,11 +4518,14 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[Series-tuple-data1-to_replace1-value1-expected1]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[Series-tuple-data2-to_replace2-value2-expected2]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_list_with_mixed_type[Series-tuple-data3-to_replace3-value3-expected3]",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_method[nan-bfill-expected1]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_mixed_int_block_splitting",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_no_replacement_dtypes[nan-int64]",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_no_replacement_dtypes[value1-int64]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_is_none",
+    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[2.0]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[2]",
     "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[nan]",
-    "tests/frame/methods/test_replace.py::TestDataFrameReplace::test_replace_value_none_dtype_numeric[2.0]",
     "tests/frame/methods/test_reset_index.py::TestResetIndex::test_reset_index",
     "tests/frame/methods/test_reset_index.py::TestResetIndex::test_reset_index_duplicate_columns_allow[False-False]",
     "tests/frame/methods/test_reset_index.py::TestResetIndex::test_reset_index_duplicate_columns_allow[False-True]",
@@ -4616,11 +4619,11 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_tz_localize.py::TestTZLocalize::test_tz_localize_copy_inplace_mutate[Series-False]",
     "tests/frame/methods/test_tz_localize.py::TestTZLocalize::test_tz_localize_copy_inplace_mutate[Series-True]",
     "tests/frame/methods/test_update.py::TestDataFrameUpdate::test_update_modify_view",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NoneType]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NAType]",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NAType-columns1]",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NoneType]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty_normalize",
+    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NAType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NoneType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_value_counts_with_missing_category",
     "tests/frame/methods/test_values.py::TestDataFrameValues::test_interleave_with_tzaware",
@@ -5151,6 +5154,7 @@ def pytest_unconfigure(config):
     "tests/frame/test_subclass.py::TestDataFrameSubclassing::test_subclass_stack_multi_mixed",
     "tests/frame/test_subclass.py::TestDataFrameSubclassing::test_subclass_unstack_multi",
     "tests/frame/test_subclass.py::TestDataFrameSubclassing::test_subclass_unstack_multi_mixed",
+    "tests/frame/test_subclass.py::test_constructor_with_metadata_from_records",
     "tests/frame/test_ufunc.py::test_alignment_deprecation_enforced",
     "tests/frame/test_ufunc.py::test_alignment_deprecation_many_inputs_enforced",
     "tests/frame/test_ufunc.py::test_array_ufuncs_for_many_arguments",
@@ -5253,8 +5257,8 @@ def pytest_unconfigure(config):
     "tests/generic/test_to_xarray.py::TestDataFrameToXArray::test_to_xarray_index_types[uint32]",
     "tests/generic/test_to_xarray.py::TestDataFrameToXArray::test_to_xarray_index_types[uint64]",
     "tests/generic/test_to_xarray.py::TestDataFrameToXArray::test_to_xarray_index_types[uint8]",
-    "tests/generic/test_to_xarray.py::TestSeriesToXArray::test_to_xarray_index_types[string-python]",
     "tests/generic/test_to_xarray.py::TestSeriesToXArray::test_to_xarray_index_types[string-pyarrow]",
+    "tests/generic/test_to_xarray.py::TestSeriesToXArray::test_to_xarray_index_types[string-python]",
     "tests/groupby/aggregate/test_aggregate.py::test_agg_grouping_is_list_tuple",
     "tests/groupby/aggregate/test_aggregate.py::test_agg_multiple_with_as_index_false_subset_to_a_single_column",
     "tests/groupby/aggregate/test_aggregate.py::test_agg_str_with_kwarg_axis_1_raises[count]",
@@ -5344,6 +5348,8 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_groupby_shift_diff.py::test_shift_periods_freq",
     "tests/groupby/methods/test_nth.py::test_first_last_nth",
     "tests/groupby/methods/test_nth.py::test_first_last_nth_dtypes",
+    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NAType-nth]",
+    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NoneType-nth]",
     "tests/groupby/methods/test_nth.py::test_negative_step",
     "tests/groupby/methods/test_nth.py::test_np_ints",
     "tests/groupby/methods/test_nth.py::test_nth",
@@ -5362,8 +5368,6 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_nth.py::test_nth_after_selection[any-selection2]",
     "tests/groupby/methods/test_nth.py::test_nth_column_order",
     "tests/groupby/methods/test_nth.py::test_nth_indexed",
-    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NAType-nth]",
-    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NoneType-nth]",
     "tests/groupby/methods/test_nth.py::test_nth_multi_grouper",
     "tests/groupby/methods/test_nth.py::test_nth_multi_index_as_expected",
     "tests/groupby/methods/test_nth.py::test_nth_with_na_object[NoneType--1]",
@@ -5514,10 +5518,10 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_value_counts.py::test_compound[True-False-expected_rows1-expected_count1-expected_group_size1-True-string[pyarrow_numpy]]",
     "tests/groupby/methods/test_value_counts.py::test_compound[True-True-expected_rows2-expected_count2-expected_group_size2-False-string[pyarrow_numpy]]",
     "tests/groupby/methods/test_value_counts.py::test_compound[True-True-expected_rows2-expected_count2-expected_group_size2-True-string[pyarrow_numpy]]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-False-count-False-expected_data1-expected_index1]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-False-count-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-True-proportion-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-False-count-False-expected_data1-expected_index1]",
+    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_dropna_combinations[True-False-expected_rows2-expected_values2]",
     "tests/groupby/methods/test_value_counts.py::test_value_counts_sort[False-False-False]",
     "tests/groupby/methods/test_value_counts.py::test_value_counts_sort[False-True-False]",
@@ -5545,7 +5549,6 @@ def pytest_unconfigure(config):
     "tests/groupby/test_all_methods.py::test_duplicate_columns[rank-False]",
     "tests/groupby/test_api.py::test_all_methods_categorized",
     "tests/groupby/test_api.py::test_tab_completion",
-    "tests/groupby/test_apply.py::test_groupby_apply_all_none",
     "tests/groupby/test_apply.py::test_apply_concat_preserve_names",
     "tests/groupby/test_apply.py::test_apply_datetime_issue[group_column_dtlike0]",
     "tests/groupby/test_apply.py::test_apply_datetime_issue[group_column_dtlike1]",
@@ -5574,6 +5577,7 @@ def pytest_unconfigure(config):
     "tests/groupby/test_apply.py::test_group_apply_once_per_group[GH2656]",
     "tests/groupby/test_apply.py::test_group_apply_once_per_group[GH2936]",
     "tests/groupby/test_apply.py::test_group_apply_once_per_group[GH7739 & GH10519]",
+    "tests/groupby/test_apply.py::test_groupby_apply_all_none",
     "tests/groupby/test_apply.py::test_groupby_apply_none_first",
     "tests/groupby/test_apply.py::test_groupby_as_index_apply",
     "tests/groupby/test_apply.py::test_time_field_bug",
@@ -5798,14 +5802,14 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[cumprod-True-True-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[cumsum-True-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[cumsum-True-True-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-True-True]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-True-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-False-True]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-False-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-True-True]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-True-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-False-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-False-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-False-True-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-False-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[diff-True-True-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[ngroup-True-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[ngroup-True-False-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[ngroup-True-True-False]",
@@ -5816,25 +5820,31 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[pct_change-True-True-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[rank-True-False-False]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[rank-True-True-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-True-True]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-True-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-False-True]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-False-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-True-True]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-True-False]",
-    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-False-True]",
     "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-False-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-False-True-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-False-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-False-True]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-True-False]",
+    "tests/groupby/test_groupby_dropna.py::test_categorical_transformers[shift-True-True-True]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_false_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_true_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_agg[False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NAType-False-tuples1-outputs1]",
+    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[False]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-object-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-object-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-NoneType-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-NoneType-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-float0-None-False]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-float0-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-float1-None-False]",
@@ -5843,6 +5853,12 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-float0-NoneType-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-float1-NoneType-None-False]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-float1-NoneType-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-None-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-object-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-object-True]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-NoneType-None-False]",
+    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-NoneType-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-float0-None-False]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-float0-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-float1-None-False]",
@@ -5851,18 +5867,6 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-float0-NoneType-None-True]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-float1-NoneType-None-False]",
     "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-float1-NoneType-None-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-NoneType-None-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-NoneType-NoneType-None-False]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-object-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-object-False]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-None-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[True-Decimal-Decimal-None-False]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-NoneType-None-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-NoneType-NoneType-None-False]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-object-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-object-False]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-None-True]",
-    "tests/groupby/test_groupby_dropna.py::test_null_is_null_for_dtype[False-Decimal-Decimal-None-False]",
     "tests/groupby/test_groupby_subclass.py::test_groupby_preserves_metadata",
     "tests/groupby/test_groupby_subclass.py::test_groupby_preserves_subclass[all-obj0]",
     "tests/groupby/test_groupby_subclass.py::test_groupby_preserves_subclass[any-obj0]",
@@ -6786,10 +6790,10 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float32]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float64]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float]",
+    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-Decimal]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NAType]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NoneType]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NoneType-Decimal]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-Decimal]",
     "tests/indexes/test_base.py::TestIndex::test_map_defaultdict",
     "tests/indexes/test_base.py::TestIndex::test_str_attribute_raises[index2]",
     "tests/indexes/test_base.py::TestIndex::test_str_bool_return",
@@ -7363,16 +7367,24 @@ def pytest_unconfigure(config):
     "tests/indexing/test_iat.py::test_iat_setitem_item_cache_cleared[iat]",
     "tests/indexing/test_iat.py::test_iat_setitem_item_cache_cleared[iloc]",
     "tests/indexing/test_iloc.py::TestILocErrors::test_iloc_getitem_setitem_fancy_exceptions",
+    "tests/indexing/test_iloc.py::TestILocErrors::test_iloc_getitem_setitem_fancy_exceptions[False]",
+    "tests/indexing/test_iloc.py::TestILocErrors::test_iloc_getitem_setitem_fancy_exceptions[True]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_identity_slice_returns_new_object",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_getitem_doc_issue",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_getitem_int_single_ea_block_view",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_getitem_slice_negative_step_ea_block",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_mask",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_axis_argument",
+    "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_axis_argument[False]",
+    "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_axis_argument[True]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_categorical_updates_inplace",
+    "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[DataFrame-Series-False]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[DataFrame-Series]",
+    "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[DataFrame-array-False]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[DataFrame-array]",
+    "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[Series-Series-False]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[Series-Series]",
+    "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[Series-array-False]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_ea_inplace[Series-array]",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_empty_frame_raises_with_3d_ndarray",
     "tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_iloc_setitem_fullcol_categorical[iloc-key0]",
@@ -8440,9 +8452,17 @@ def pytest_unconfigure(config):
     "tests/indexing/test_loc.py::TestLocSeries::test_basic_setitem_with_labels",
     "tests/indexing/test_loc.py::TestLocSeries::test_loc_getitem",
     "tests/indexing/test_loc.py::TestLocSeries::test_loc_getitem_not_monotonic",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[bool-dtype-False]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[bool-dtype-True]",
     "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[bool-dtype]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[nullable_bool-False]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[nullable_bool-True]",
     "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[nullable_bool]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[object-False]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[object-True]",
     "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[object]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[string-False]",
+    "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[string-True]",
     "tests/indexing/test_loc.py::TestLocSetitemWithExpansion::test_loc_setitem_with_expansion_nonunique_index[string]",
     "tests/indexing/test_loc.py::TestLocWithMultiIndex::test_loc_drops_level",
     "tests/indexing/test_loc.py::TestLocWithMultiIndex::test_loc_getitem_multilevel_index_order[index-keys2-expected2]",
@@ -9583,18 +9603,18 @@ def pytest_unconfigure(config):
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_join_multi_dtypes[float32-int320]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_join_multi_dtypes[float32-int321]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_bool_dtype[right-expected_data3]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-U]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-str0]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-U]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-U]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-str0]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-U]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-U]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-str0]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-U]",
+    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-U]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-str0]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-U]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_empty[False-right-empty]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_empty[True-right-right]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_incompat_dtypes_are_ok[df1_vals3-df2_vals3]",
@@ -10825,10 +10845,10 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_astype.py::TestAstype::test_astype_float_to_uint_negatives_raise[float64-uint8]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_from_float_to_str[float32]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_str_cast_td64",
-    "tests/series/methods/test_astype.py::TestAstype::test_dt64_series_astype_object",
-    "tests/series/methods/test_astype.py::TestAstype::test_td64_series_astype_object",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_to_str_preserves_na[None-None]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_to_str_preserves_na[value2-<NA>]",
+    "tests/series/methods/test_astype.py::TestAstype::test_dt64_series_astype_object",
+    "tests/series/methods/test_astype.py::TestAstype::test_td64_series_astype_object",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[False-True-None]",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[False-True-foo]",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[True-False-None]",
@@ -10969,15 +10989,15 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_drop.py::test_drop_unique_and_non_unique_index[data1-index1-drop_labels1-rows-expected_data1-expected_index1]",
     "tests/series/methods/test_equals.py::test_equals[arr1-idx1]",
     "tests/series/methods/test_equals.py::test_equals[arr2-idx2]",
+    "tests/series/methods/test_equals.py::test_equals_list_array[val5]",
+    "tests/series/methods/test_equals.py::test_equals_list_array[val6]",
+    "tests/series/methods/test_equals.py::test_equals_list_array[val7]",
     "tests/series/methods/test_equals.py::test_equals_mismatched_nas[Decimal-NoneType]",
     "tests/series/methods/test_equals.py::test_equals_mismatched_nas[Decimal-float0]",
     "tests/series/methods/test_equals.py::test_equals_mismatched_nas[Decimal-float1]",
+    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[NoneType-Decimal]",
     "tests/series/methods/test_equals.py::test_equals_mismatched_nas[float0-Decimal]",
     "tests/series/methods/test_equals.py::test_equals_mismatched_nas[float1-Decimal]",
-    "tests/series/methods/test_equals.py::test_equals_mismatched_nas[NoneType-Decimal]",
-    "tests/series/methods/test_equals.py::test_equals_list_array[val5]",
-    "tests/series/methods/test_equals.py::test_equals_list_array[val6]",
-    "tests/series/methods/test_equals.py::test_equals_list_array[val7]",
     "tests/series/methods/test_explode.py::test_duplicate_index",
     "tests/series/methods/test_explode.py::test_ignore_index",
     "tests/series/methods/test_fillna.py::TestSeriesFillNA::test_datetime64_fillna_mismatched_reso_no_rounding[None-False]",
@@ -11023,8 +11043,8 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_map.py::test_map_defaultdict_missing_key[None]",
     "tests/series/methods/test_map.py::test_map_dict_subclass_with_missing",
     "tests/series/methods/test_map.py::test_map_dict_with_tuple_keys",
-    "tests/series/methods/test_map.py::test_map_empty[categorical]",
     "tests/series/methods/test_map.py::test_map_empty[bool-dtype]",
+    "tests/series/methods/test_map.py::test_map_empty[categorical]",
     "tests/series/methods/test_map.py::test_map_empty[empty]",
     "tests/series/methods/test_map.py::test_map_empty[float32]",
     "tests/series/methods/test_map.py::test_map_empty[float64]",
@@ -11298,15 +11318,16 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_reindex.py::test_reindex_fill_value_datetimelike_upcast[0-datetime64[ns]]",
     "tests/series/methods/test_reindex.py::test_reindex_fill_value_datetimelike_upcast[0-timedelta64[ns]]",
     "tests/series/methods/test_reindex.py::test_reindex_pad2",
+    "tests/series/methods/test_reindex.py::test_reindexing_with_float64_NA_log",
     "tests/series/methods/test_rename.py::TestRename::test_rename_copy_false",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt8]",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt16]",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt32]",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt64]",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int8]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int16]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int32]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int64]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[Int8]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt16]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt32]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt64]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_Int_with_na[UInt8]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_categorical_single",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_datetime64",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_dtype[Float64-input_data4-to_replace4-expected_data4]",
@@ -11317,12 +11338,12 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_na_in_obj_column[Int64]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_no_cast[ser0-exp0]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_nullable_numeric",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_numeric_column_with_na[0]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_numeric_column_with_na[0.5]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_numeric_column_with_na[0]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_string_dtype",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_string_dtype_list_to_replace",
-    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[2]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[2.0]",
+    "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[2]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_value_none_dtype_numeric[nan]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_with_dictlike_and_string_dtype[string[pyarrow]]",
     "tests/series/methods/test_replace.py::TestSeriesReplace::test_replace_with_dictlike_and_string_dtype[string[python]]",
@@ -11662,6 +11683,7 @@ def pytest_unconfigure(config):
     "tests/strings/test_find_replace.py::test_flags_kwarg[string[pyarrow_numpy]]",
     "tests/strings/test_find_replace.py::test_fullmatch_case_kwarg[string[pyarrow]]",
     "tests/strings/test_find_replace.py::test_fullmatch_case_kwarg[string[pyarrow_numpy]]",
+    "tests/strings/test_find_replace.py::test_match_compiled_regex[string=object]",
     "tests/strings/test_find_replace.py::test_replace_callable[string[pyarrow]]",
     "tests/strings/test_find_replace.py::test_replace_callable[string[pyarrow_numpy]]",
     "tests/strings/test_find_replace.py::test_replace_callable_named_groups[string[pyarrow]]",
@@ -11863,7 +11885,6 @@ def pytest_unconfigure(config):
     "tests/test_register_accessor.py::test_register[Series-register_series_accessor]",
     "tests/test_sorting.py::TestSorting::test_int64_overflow_groupby_large_df_shuffled[mean]",
     "tests/test_sorting.py::TestSorting::test_int64_overflow_groupby_large_df_shuffled[median]",
-    "tests/tools/test_to_datetime.py::test_na_to_datetime[Decimal-list]",
     "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[%Y-%d-%m %H:%M:%S-None]",
     "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[%Y-%m-%d %H:%M:%S-None]",
     "tests/tools/test_to_datetime.py::TestOrigin::test_unix",
@@ -11961,6 +11982,7 @@ def pytest_unconfigure(config):
     "tests/tools/test_to_datetime.py::TestToDatetimeUnit::test_to_datetime_unit_with_nulls[-9223372036854775808]",
     "tests/tools/test_to_datetime.py::TestToDatetimeUnit::test_unit_rounding[False]",
     "tests/tools/test_to_datetime.py::TestToDatetimeUnit::test_unit_rounding[True]",
+    "tests/tools/test_to_datetime.py::test_na_to_datetime[Decimal-list]",
     "tests/tools/test_to_numeric.py::test_downcast_basic[kwargs0-int64-data2]",
     "tests/tools/test_to_numeric.py::test_downcast_basic[kwargs1-int64-data2]",
     "tests/tools/test_to_numeric.py::test_downcast_basic[kwargs2-f-data2]",
@@ -13196,8 +13218,8 @@ def pytest_unconfigure(config):
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_max",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_min",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_skew",
-    "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_sum",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_std",
+    "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_sum",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_ragged_var",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_regular_min",
     "tests/window/test_timeseries_window.py::TestRollingTS::test_rolling_on_decreasing_index[ms]",
@@ -13228,6 +13250,7 @@ def pytest_unconfigure(config):
     r"tests/util/test_assert_frame_equal.py::test_frame_equal_index_dtype_mismatch[True-df11-df21-MultiIndex level \\[0\\] are different]",
     r"tests/util/test_assert_series_equal.py::test_series_equal_index_dtype[True-s11-s21-MultiIndex level \\[0\\] are different]",
 }
+
 NODEIDS_THAT_XPASS_WITH_CUDF_PANDAS = {
     "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-DataFrame-numexpr-pandas]",
     "tests/computation/test_eval.py::TestEval::test_floor_division[DataFrame-DataFrame-numexpr-python]",
diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 5d017f9e8ca..9b4ce1bfc8e 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -480,9 +480,14 @@ def one_dimensional_array_types(request):
             np.bitwise_or,
             np.bitwise_xor,
         }:
-            marks = pytest.mark.filterwarnings(
-                "ignore:Operation between non boolean Series:FutureWarning"
-            )
+            marks = [
+                pytest.mark.filterwarnings(
+                    "ignore:Operation between non boolean Series:FutureWarning"
+                ),
+                pytest.mark.filterwarnings(
+                    "ignore:Operation between Series with different indexes that are not of numpy boolean:FutureWarning"
+                ),
+            ]
             numpy_ufuncs.append(pytest.param(func, marks=marks))
         else:
             numpy_ufuncs.append(func)

From 771294e096a4078d70b1a911ec7fdf46c35bb0b1 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 1 Oct 2025 09:32:44 -0500
Subject: [PATCH 365/366] Remove iterative `nan` & `nat` inefficient checks in
 `as_column` constructor (#19804)

## Description
This PR removes inefficient `nan` & `nat` inefficient checks in
`as_column` constructor and are replaced with pyarrow array
construction. This optimization cuts down an internal script execution
from **428s** to **64s**

`branch-25.12`:

```
python -c "import pstats; pstats.Stats('prof_bug.out').sort_stats('cumulative').print_stats(100)"
Tue Sep 30 14:25:05 2025    prof_bug.out

         2380916669 function calls (2380705659 primitive calls) in 428.037 seconds

   Ordered by: cumulative time
   List reduced from 2585 to 100 due to restriction <100>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2526    0.129    0.000  413.101    0.164 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:989(_fast_slow_function_call)
13681/2281    0.262    0.000  411.366    0.180 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/utils/performance_tracking.py:30(wrapper)
16604/6004    0.137    0.000  409.532    0.068 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1069(_transform_arg)
14883/10488    0.062    0.000  409.508    0.039 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1124(<genexpr>)
150542/61297    0.031    0.000  409.346    0.007 {built-in method builtins.getattr}
      2/1    0.000    0.000  404.786  404.786 agent_clean.py:310(optimize_logistic_regression)
     17/5    0.000    0.000  398.296   79.659 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/parallel.py:54(__call__)
     17/5    0.000    0.000  398.296   79.659 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/joblib/parallel.py:1969(__call__)
    70/22    0.000    0.000  398.296   18.104 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/joblib/parallel.py:1888(_get_sequential_output)
     4964    0.003    0.000  394.274    0.079 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1194(_fast_arg)
     2006    0.003    0.000  394.124    0.196 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:523(_fsproxy_fast)
     1983    0.002    0.000  394.096    0.199 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:183(_fsproxy_slow_to_fast)
 1009/578    0.009    0.000  394.075    0.682 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/dataframe.py:8678(from_pandas)
      431    0.002    0.000  393.937    0.914 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/_wrappers/pandas.py:2187(DataFrame_init_)
      431    0.818    0.002  390.561    0.906 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/dataframe.py:968(__init__)
7146/3722   22.898    0.003  387.728    0.104 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:2685(as_column)
    36/12    0.001    0.000  376.092   31.341 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/parallel.py:131(__call__)
   147/30    0.001    0.000  361.919   12.064 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_set_output.py:314(wrapped)
     9801   81.151    0.008  359.231    0.037 {built-in method builtins.any}
       15    0.000    0.000  337.706   22.514 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/compose/_column_transformer.py:825(_call_func_on_transformers)
     48/5    0.001    0.000  335.897   67.179 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_param_validation.py:187(wrapper)
       66    0.001    0.000  329.230    4.988 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:2845(validate_data)
       44    0.001    0.000  329.193    7.482 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:335(_validate_input)
      2/1    0.000    0.000  321.406  321.406 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/study.py:388(optimize)
      2/1    0.000    0.000  321.406  321.406 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/_optimize.py:34(_optimize)
        1    0.000    0.000  321.405  321.405 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/_optimize.py:122(_optimize_sequential)
        2    0.000    0.000  321.403  160.702 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/_optimize.py:181(_run_trial)
        2    0.000    0.000  321.402  160.701 agent_clean.py:314(objective)
        2    0.000    0.000  321.398  160.699 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:519(cross_val_score)
        2    0.000    0.000  321.397  160.698 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:98(cross_validate)
      417    0.015    0.000  294.227    0.706 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:734(check_array)
        6    0.000    0.000  293.631   48.939 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:693(_fit_and_score)
     1315    0.005    0.000  275.248    0.209 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:2426(_has_any_nan)
     77/7    0.001    0.000  273.023   39.003 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/base.py:1348(wrapper)
        7    0.000    0.000  273.022   39.003 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:604(fit)
     28/7    0.000    0.000  269.479   38.497 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:554(_fit)
    35/14    0.000    0.000  269.478   19.248 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/joblib/memory.py:325(__call__)
    49/14    0.000    0.000  269.478   19.248 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1527(_fit_transform_one)
     21/7    0.000    0.000  269.449   38.493 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:674(fit_transform)
        7    0.000    0.000  269.446   38.492 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/compose/_column_transformer.py:937(fit_transform)
       42    0.000    0.000  254.760    6.066 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/base.py:839(fit_transform)
594002328  151.811    0.000  237.129    0.000 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:2428(<genexpr>)
     1271    0.001    0.000  227.883    0.179 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:721(__call__)
54229/38860    0.060    0.000  227.668    0.006 {built-in method builtins.hasattr}
       30    0.154    0.005  205.710    6.857 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:609(transform)
       14    0.000    0.000  127.902    9.136 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:434(fit)
     1938    0.007    0.000  114.858    0.059 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:827(__get__)
     24/8    0.001    0.000   92.452   11.556 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1051(transform)
        8    0.000    0.000   92.424   11.553 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/compose/_column_transformer.py:1028(transform)
1188991247/1188976702   85.577    0.000   85.754    0.000 {built-in method builtins.isinstance}
     1315    0.006    0.000   83.722    0.064 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:2435(_has_any_nat)
       16    0.000    0.000   82.828    5.177 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1498(_transform_one)
      470    0.001    0.000   72.972    0.155 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_array_api.py:736(_asarray_with_order)
9631/7996    0.013    0.000   72.963    0.009 {built-in method numpy.asarray}
      176    0.001    0.000   70.407    0.400 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/_wrappers/pandas.py:276(_DataFrame__dtypes)
        7    0.000    0.000   64.503    9.215 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:739(predict)
      329    0.002    0.000   64.347    0.196 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:381(_num_samples)
        6    0.000    0.000   57.832    9.639 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:930(_score)
        6    0.000    0.000   57.832    9.639 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/metrics/_scorer.py:123(__call__)
        6    0.000    0.000   57.832    9.639 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/metrics/_scorer.py:360(_score)
        6    0.000    0.000   57.307    9.551 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/metrics/_scorer.py:85(_cached_call)
        6    0.000    0.000   57.307    9.551 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_response.py:116(_get_response_values)
594002328   43.675    0.000   43.675    0.000 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:2437(<genexpr>)
      109    0.000    0.000   35.061    0.322 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:2790(_check_n_features)
      109    0.001    0.000   35.060    0.322 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:328(_num_features)
        1    0.000    0.000   29.085   29.085 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1143(score)
       31    0.000    0.000   23.350    0.753 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:454(check_consistent_length)
        8    0.000    0.000   23.345    2.918 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:499(indexable)
       53    0.000    0.000   22.204    0.419 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/parallel.py:74(<genexpr>)
        8    0.000    0.000   22.203    2.775 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:399(<genexpr>)
        8    0.000    0.000   22.192    2.774 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:377(split)
      168    0.001    0.000   16.912    0.101 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_indexing.py:242(_safe_indexing)
       12    0.000    0.000   16.350    1.363 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/metaestimators.py:98(_safe_split)
     1040    0.001    0.000   15.263    0.015 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1202(_slow_arg)
      901    0.002    0.000   15.137    0.017 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:533(_fsproxy_slow)
      901    0.002    0.000   15.133    0.017 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:195(_fsproxy_fast_to_slow)
      431    0.001    0.000   15.130    0.035 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/_wrappers/pandas.py:291(<lambda>)
      431    0.012    0.000   15.115    0.035 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/dataframe.py:5590(to_pandas)
        1    0.000    0.000   14.476   14.476 agent_clean.py:189(split_data)
        1    0.000    0.000   14.475   14.475 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:2752(train_test_split)
        8    0.046    0.006   11.171    1.396 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:116(split)
     3236    6.262    0.002    9.170    0.003 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:681(to_pandas)
    42/41    0.053    0.001    8.695    0.212 /raid/pgali/envs/cudfdev/lib/python3.13/threading.py:641(wait)
     1168    0.004    0.000    7.787    0.007 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/string.py:454(to_pandas)
6662/6656    0.003    0.000    7.104    0.001 {built-in method builtins.next}
        1    0.000    0.000    7.046    7.046 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:1896(split)
    42/41    0.393    0.009    6.808    0.166 /raid/pgali/envs/cudfdev/lib/python3.13/threading.py:327(wait)
      431    0.019    0.000    5.760    0.013 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/frame.py:694(__init__)
      431    0.008    0.000    5.733    0.013 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/internals/construction.py:423(dict_to_mgr)
       15    0.000    0.000    4.648    0.310 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py:183(method)
       15    0.000    0.000    4.648    0.310 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py:271(_call_method)
    76/29    1.978    0.026    4.646    0.160 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/internals/api_decorators.py:152(wrapper)
       15    0.000    0.000    4.642    0.309 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py:249(_call_gpu_method)
       14    0.056    0.004    3.873    0.277 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:533(_dense_fit)
      431    0.136    0.000    3.775    0.009 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/internals/construction.py:96(arrays_to_mgr)
     1321    2.745    0.002    3.745    0.003 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:817(from_arrow)
        1    0.137    0.137    3.679    3.679 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:2316(_iter_indices)
      431    0.012    0.000    3.610    0.008 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/internals/managers.py:2121(create_block_manager_from_column_arrays)
        7    0.000    0.000    3.536    0.505 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/linear_model/logistic_regression.py:350(fit)
       15    0.075    0.005    3.477    0.232 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/preprocessing/_encoders.py:1000(transform)

```


This PR:
```
python -c "import pstats; pstats.Stats('prof.out').sort_stats('cumulative').print_stats(100)"
Tue Sep 30 14:13:23 2025    prof.out

         4893900 function calls (4684021 primitive calls) in 64.016 seconds

   Ordered by: cumulative time
   List reduced from 2581 to 100 due to restriction <100>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      2/1    0.000    0.000   59.100   59.100 agent_clean.py:310(optimize_logistic_regression)
     17/5    0.000    0.000   57.022   11.404 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/parallel.py:54(__call__)
     17/5    0.000    0.000   57.022   11.404 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/joblib/parallel.py:1969(__call__)
    70/22    0.000    0.000   57.022    2.592 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/joblib/parallel.py:1888(_get_sequential_output)
    36/12    0.001    0.000   54.329    4.527 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/parallel.py:131(__call__)
   147/30    0.001    0.000   49.474    1.649 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_set_output.py:314(wrapped)
     2526    0.112    0.000   49.417    0.020 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:989(_fast_slow_function_call)
     48/5    0.001    0.000   49.290    9.858 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_param_validation.py:187(wrapper)
13681/2281    0.297    0.000   47.760    0.021 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/utils/performance_tracking.py:30(wrapper)
      2/1    0.000    0.000   46.977   46.977 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/study.py:388(optimize)
      2/1    0.000    0.000   46.977   46.977 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/_optimize.py:34(_optimize)
        1    0.000    0.000   46.975   46.975 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/_optimize.py:122(_optimize_sequential)
        2    0.000    0.000   46.974   23.487 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/optuna/study/_optimize.py:181(_run_trial)
        2    0.000    0.000   46.972   23.486 agent_clean.py:314(objective)
        2    0.000    0.000   46.969   23.484 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:519(cross_val_score)
        2    0.000    0.000   46.967   23.484 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:98(cross_validate)
16604/6004    0.112    0.000   45.829    0.008 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1069(_transform_arg)
14883/10488    0.059    0.000   45.809    0.004 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1124(<genexpr>)
150190/61297    0.032    0.000   45.709    0.001 {built-in method builtins.getattr}
       15    0.000    0.000   45.333    3.022 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/compose/_column_transformer.py:825(_call_func_on_transformers)
        6    0.000    0.000   43.578    7.263 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:693(_fit_and_score)
     77/7    0.001    0.000   40.518    5.788 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/base.py:1348(wrapper)
        7    0.000    0.000   40.517    5.788 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:604(fit)
       66    0.001    0.000   37.044    0.561 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:2845(validate_data)
       44    0.001    0.000   37.004    0.841 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:335(_validate_input)
     28/7    0.000    0.000   36.897    5.271 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:554(_fit)
    35/14    0.000    0.000   36.896    2.635 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/joblib/memory.py:325(__call__)
    49/14    0.000    0.000   36.896    2.635 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1527(_fit_transform_one)
     21/7    0.000    0.000   36.864    5.266 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:674(fit_transform)
        7    0.000    0.000   36.861    5.266 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/compose/_column_transformer.py:937(fit_transform)
       42    0.000    0.000   34.344    0.818 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/base.py:839(fit_transform)
      417    0.015    0.000   34.301    0.082 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:734(check_array)
     1271    0.001    0.000   32.871    0.026 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:721(__call__)
     4964    0.003    0.000   30.749    0.006 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1194(_fast_arg)
     2006    0.003    0.000   30.657    0.015 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:523(_fsproxy_fast)
     1983    0.003    0.000   30.610    0.015 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:183(_fsproxy_slow_to_fast)
 1009/578    0.009    0.000   30.589    0.053 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/dataframe.py:8678(from_pandas)
      431    0.002    0.000   30.464    0.071 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/_wrappers/pandas.py:2187(DataFrame_init_)
      431    0.778    0.002   30.430    0.071 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/dataframe.py:968(__init__)
7146/3722   22.117    0.003   28.604    0.008 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:2693(as_column)
54138/38808    0.059    0.000   26.130    0.001 {built-in method builtins.hasattr}
       30    0.154    0.005   23.718    0.791 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:609(transform)
       14    0.000    0.000   17.653    1.261 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:434(fit)
     1040    0.001    0.000   15.084    0.015 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:1202(_slow_arg)
      901    0.002    0.000   14.962    0.017 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:533(_fsproxy_slow)
      901    0.002    0.000   14.958    0.017 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:195(_fsproxy_fast_to_slow)
      431    0.001    0.000   14.955    0.035 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/_wrappers/pandas.py:291(<lambda>)
      431    0.012    0.000   14.942    0.035 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/dataframe.py:5590(to_pandas)
     24/8    0.001    0.000   12.593    1.574 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1051(transform)
        8    0.000    0.000   12.566    1.571 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/compose/_column_transformer.py:1028(transform)
     1938    0.007    0.000   11.171    0.006 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:827(__get__)
       16    0.000    0.000   10.920    0.683 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1498(_transform_one)
        7    0.000    0.000    9.142    1.306 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:739(predict)
     3236    6.274    0.002    9.029    0.003 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:681(to_pandas)
        6    0.000    0.000    8.614    1.436 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:930(_score)
        6    0.000    0.000    8.614    1.436 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/metrics/_scorer.py:123(__call__)
        6    0.000    0.000    8.614    1.436 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/metrics/_scorer.py:360(_score)
      470    0.001    0.000    8.130    0.017 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_array_api.py:736(_asarray_with_order)
6701/5806    0.008    0.000    8.110    0.001 {built-in method numpy.asarray}
        6    0.000    0.000    8.094    1.349 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/metrics/_scorer.py:85(_cached_call)
        6    0.000    0.000    8.094    1.349 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_response.py:116(_get_response_values)
      329    0.002    0.000    7.726    0.023 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:381(_num_samples)
     1168    0.004    0.000    7.696    0.007 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/string.py:454(to_pandas)
      431    0.016    0.000    5.745    0.013 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/frame.py:694(__init__)
      431    0.008    0.000    5.722    0.013 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/internals/construction.py:423(dict_to_mgr)
      176    0.001    0.000    5.416    0.031 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/_wrappers/pandas.py:276(_DataFrame__dtypes)
       15    0.000    0.000    4.375    0.292 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py:183(method)
       15    0.000    0.000    4.375    0.292 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py:271(_call_method)
    76/29    2.091    0.028    4.373    0.151 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/internals/api_decorators.py:152(wrapper)
       15    0.000    0.000    4.369    0.291 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py:249(_call_gpu_method)
        1    0.000    0.000    4.232    4.232 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/pipeline.py:1143(score)
       14    0.055    0.004    3.863    0.276 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/impute/_base.py:533(_dense_fit)
      431    0.141    0.000    3.784    0.009 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/internals/construction.py:96(arrays_to_mgr)
     1321    2.598    0.002    3.637    0.003 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/core/column/column.py:817(from_arrow)
      431    0.011    0.000    3.615    0.008 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/pandas/core/internals/managers.py:2121(create_block_manager_from_column_arrays)
        7    0.000    0.000    3.613    0.516 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cuml/linear_model/logistic_regression.py:350(fit)
       15    0.076    0.005    3.393    0.226 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/preprocessing/_encoders.py:1000(transform)
     1614    0.051    0.000    3.215    0.002 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cudf/pandas/fast_slow_proxy.py:27(call_operator)
       15    0.047    0.003    3.189    0.213 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/preprocessing/_encoders.py:190(_transform)
        7    0.000    0.000    2.888    0.413 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/numpy/ma/extras.py:716(median)
        7    0.000    0.000    2.888    0.413 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3834(_ureduce)
        7    0.001    0.000    2.888    0.413 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/numpy/ma/extras.py:794(_median)
        7    0.000    0.000    2.834    0.405 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/numpy/ma/core.py:7266(sort)
        7    0.000    0.000    2.820    0.403 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/numpy/ma/core.py:5815(sort)
      109    0.000    0.000    2.780    0.026 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:2790(_check_n_features)
      109    0.001    0.000    2.780    0.026 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:328(_num_features)
8533/5737    2.694    0.000    2.699    0.000 {built-in method numpy.array}
       53    0.000    0.000    2.693    0.051 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/parallel.py:74(<genexpr>)
        8    0.000    0.000    2.692    0.337 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_validation.py:399(<genexpr>)
        8    0.000    0.000    2.681    0.335 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:377(split)
       89    0.001    0.000    2.678    0.030 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_encode.py:203(_encode)
       31    0.000    0.000    2.650    0.085 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:454(check_consistent_length)
        8    0.000    0.000    2.646    0.331 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/validation.py:499(indexable)
       75    1.458    0.019    2.633    0.035 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/utils/_encode.py:170(_map_to_integer)
       20    0.000    0.000    2.486    0.124 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cupy/cuda/compiler.py:511(_compile_module_with_cache)
       20    0.002    0.000    2.486    0.124 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/cupy/cuda/compiler.py:542(_compile_with_cache_cuda)
        1    0.000    0.000    2.296    2.296 agent_clean.py:189(split_data)
        1    0.000    0.000    2.295    2.295 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/sklearn/model_selection/_split.py:2752(train_test_split)
      514    2.162    0.004    2.162    0.004 {method 'argsort' of 'numpy.ndarray' objects}
        7    0.000    0.000    2.145    0.306 /raid/pgali/envs/cudfdev/lib/python3.13/site-packages/numpy/ma/core.py:5647(argsort)
```

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 python/cudf/cudf/core/column/column.py        | 58 +++++++++----------
 .../cudf/pandas/scripts/conftest-patch.py     | 30 ----------
 2 files changed, 29 insertions(+), 59 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 87133109900..56158700723 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import decimal
 import pickle
 import warnings
 from collections.abc import Iterable, Iterator, MutableSequence, Sequence
@@ -2423,20 +2422,6 @@ def where(
         )
 
 
-def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
-    """Check if an object dtype Series or array contains NaN."""
-    return any(
-        (isinstance(x, (float, np.floating)) and np.isnan(x))
-        or (isinstance(x, decimal.Decimal) and x.is_nan())
-        for x in np.asarray(arbitrary)
-    )
-
-
-def _has_any_nat(arbitrary: pd.Series | np.ndarray) -> bool:
-    """Check if an object dtype Series or array contains NaT."""
-    return any(x is pd.NaT for x in np.asarray(arbitrary))
-
-
 def column_empty(
     row_count: int,
     dtype: DtypeObj = CUDF_STRING_DTYPE,
@@ -2884,11 +2869,16 @@ def as_column(
                 arbitrary, nan_as_null=nan_as_null, dtype=dtype, length=length
             )
         elif arbitrary.dtype.kind == "O":
+            pyarrow_array = None
             if isinstance(arbitrary, NumpyExtensionArray):
                 # infer_dtype does not handle NumpyExtensionArray
                 arbitrary = np.array(arbitrary, dtype=object)
             inferred_dtype = infer_dtype(
-                arbitrary, skipna=not cudf.get_option("mode.pandas_compatible")
+                arbitrary,
+                skipna=(
+                    not cudf.get_option("mode.pandas_compatible")
+                    and nan_as_null is not False
+                ),
             )
             if inferred_dtype in ("mixed-integer", "mixed-integer-float"):
                 raise MixedTypeError("Cannot create column with mixed types")
@@ -2908,23 +2898,33 @@ def as_column(
                         raise MixedTypeError(
                             f"Cannot have mixed values with {inferred_dtype}"
                         )
-                elif nan_as_null is False and _has_any_nan(arbitrary):
+                elif nan_as_null is False:
                     raise MixedTypeError(
-                        f"Cannot have mixed values with {inferred_dtype}"
+                        f"Cannot have a {inferred_dtype} type with dtype=object"
                     )
-            elif (
-                nan_as_null is False
-                and inferred_dtype not in ("decimal", "empty")
-                and (_has_any_nan(arbitrary) or _has_any_nat(arbitrary))
+            elif nan_as_null is False and inferred_dtype not in (
+                "decimal",
+                "empty",
+                "string",
             ):
-                # Decimal can hold float("nan")
-                # All np.nan is not restricted by type
-                raise MixedTypeError(f"Cannot have NaN with {inferred_dtype}")
+                if inferred_dtype == "floating":
+                    raise MixedTypeError(
+                        f"Cannot have a {inferred_dtype} type with dtype=object"
+                    )
+                try:
+                    pyarrow_array = pa.array(arbitrary, from_pandas=False)
+                except (pa.lib.ArrowInvalid, pa.lib.ArrowTypeError):
+                    # Decimal can hold float("nan")
+                    # All np.nan is not restricted by type
+                    raise MixedTypeError(
+                        f"Cannot have NaN with {inferred_dtype}"
+                    )
 
-            pyarrow_array = pa.array(
-                arbitrary,
-                from_pandas=True,
-            )
+            if pyarrow_array is None:
+                pyarrow_array = pa.array(
+                    arbitrary,
+                    from_pandas=True,
+                )
             if (
                 cudf.get_option("mode.pandas_compatible")
                 and inferred_dtype == "mixed"
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index cf5f4b6775a..c3e7e45f22c 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -4619,11 +4619,9 @@ def pytest_unconfigure(config):
     "tests/frame/methods/test_tz_localize.py::TestTZLocalize::test_tz_localize_copy_inplace_mutate[Series-False]",
     "tests/frame/methods/test_tz_localize.py::TestTZLocalize::test_tz_localize_copy_inplace_mutate[Series-True]",
     "tests/frame/methods/test_update.py::TestDataFrameUpdate::test_update_modify_view",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NAType]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NoneType]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_empty_normalize",
-    "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NAType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_subset[NoneType-columns1]",
     "tests/frame/methods/test_value_counts.py::test_value_counts_with_missing_category",
     "tests/frame/methods/test_values.py::TestDataFrameValues::test_interleave_with_tzaware",
@@ -5348,7 +5346,6 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_groupby_shift_diff.py::test_shift_periods_freq",
     "tests/groupby/methods/test_nth.py::test_first_last_nth",
     "tests/groupby/methods/test_nth.py::test_first_last_nth_dtypes",
-    "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NAType-nth]",
     "tests/groupby/methods/test_nth.py::test_groupby_last_first_nth_with_none[NoneType-nth]",
     "tests/groupby/methods/test_nth.py::test_negative_step",
     "tests/groupby/methods/test_nth.py::test_np_ints",
@@ -5518,8 +5515,6 @@ def pytest_unconfigure(config):
     "tests/groupby/methods/test_value_counts.py::test_compound[True-False-expected_rows1-expected_count1-expected_group_size1-True-string[pyarrow_numpy]]",
     "tests/groupby/methods/test_value_counts.py::test_compound[True-True-expected_rows2-expected_count2-expected_group_size2-False-string[pyarrow_numpy]]",
     "tests/groupby/methods/test_value_counts.py::test_compound[True-True-expected_rows2-expected_count2-expected_group_size2-True-string[pyarrow_numpy]]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-False-count-False-expected_data1-expected_index1]",
-    "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NAType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-False-count-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_data_frame_value_counts_dropna[NoneType-True-proportion-False-expected_data1-expected_index1]",
     "tests/groupby/methods/test_value_counts.py::test_dropna_combinations[True-False-expected_rows2-expected_values2]",
@@ -5831,11 +5826,7 @@ def pytest_unconfigure(config):
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_false_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_apply_with_dropna_for_multi_index[dropna_true_no_nan]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_agg[False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_one_group[NoneType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NAType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NAType-NoneType-False-tuples1-outputs1]",
-    "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NAType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_groupby_dropna_multi_index_dataframe_nan_in_two_groups[NoneType-NoneType-False-tuples1-outputs1]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[False]",
     "tests/groupby/test_groupby_dropna.py::test_grouper_dropna_propagation[True]",
@@ -6608,7 +6599,6 @@ def pytest_unconfigure(config):
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int16]",
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int32]",
     "tests/indexes/numeric/test_numeric.py::TestIntNumericIndex::test_constructor[int64]",
-    "tests/indexes/object/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[NAType]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--object]",
     "tests/indexes/object/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[in_slice13--string[pyarrow_numpy]]",
     "tests/indexes/period/test_constructors.py::TestPeriodIndex::test_constructor_fromarraylike",
@@ -6680,7 +6670,6 @@ def pytest_unconfigure(config):
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=str[python]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[pyarrow]]",
     "tests/indexes/string/test_indexing.py::TestGetIndexer::test_get_indexer_strings_raises[string=string[python]]",
-    "tests/indexes/string/test_indexing.py::TestGetIndexerNonUnique::test_get_indexer_non_unique_nas[string=object-null3]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=object-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[pyarrow]-in_slice13-]",
     "tests/indexes/string/test_indexing.py::TestSliceLocs::test_slice_locs_negative_step[string=str[python]-in_slice13-]",
@@ -6733,7 +6722,6 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[uint32]",
     "tests/indexes/test_base.py::TestIndex::test_empty_fancy_raises[uint64]",
     "tests/indexes/test_base.py::TestIndex::test_equals_op_mismatched_multiindex_raises[index0]",
-    "tests/indexes/test_base.py::TestIndex::test_format_missing[NAType-vals1]",
     "tests/indexes/test_base.py::TestIndex::test_is_",
     "tests/indexes/test_base.py::TestIndex::test_isin_level_kwarg_bad_label_raises[bool-dtype-nan]",
     "tests/indexes/test_base.py::TestIndex::test_isin_level_kwarg_bad_label_raises[categorical-nan]",
@@ -6790,9 +6778,6 @@ def pytest_unconfigure(config):
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float32]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float64]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_float64[NoneType-float]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-Decimal]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NAType]",
-    "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NAType-NoneType]",
     "tests/indexes/test_base.py::TestIndex::test_isin_nan_common_object[NoneType-Decimal]",
     "tests/indexes/test_base.py::TestIndex::test_map_defaultdict",
     "tests/indexes/test_base.py::TestIndex::test_str_attribute_raises[index2]",
@@ -9603,18 +9588,6 @@ def pytest_unconfigure(config):
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_join_multi_dtypes[float32-int320]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_join_multi_dtypes[float32-int321]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_bool_dtype[right-expected_data3]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-U]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-str0]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[inner-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-U]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-str0]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[left-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-U]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-str0]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[outer-str1]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-U]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-str0]",
-    "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_ea_with_string[right-str1]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_empty[False-right-empty]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_empty[True-right-right]",
     "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_incompat_dtypes_are_ok[df1_vals3-df2_vals3]",
@@ -10846,7 +10819,6 @@ def pytest_unconfigure(config):
     "tests/series/methods/test_astype.py::TestAstype::test_astype_from_float_to_str[float32]",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_str_cast_td64",
     "tests/series/methods/test_astype.py::TestAstype::test_astype_to_str_preserves_na[None-None]",
-    "tests/series/methods/test_astype.py::TestAstype::test_astype_to_str_preserves_na[value2-<NA>]",
     "tests/series/methods/test_astype.py::TestAstype::test_dt64_series_astype_object",
     "tests/series/methods/test_astype.py::TestAstype::test_td64_series_astype_object",
     "tests/series/methods/test_astype.py::TestAstypeCategorical::test_astype_categorical_to_categorical[False-True-None]",
@@ -11677,8 +11649,6 @@ def pytest_unconfigure(config):
     "tests/strings/test_extract.py::test_extract_series[string[python]-series_name]",
     "tests/strings/test_find_replace.py::test_find[object]",
     "tests/strings/test_find_replace.py::test_find[string=object]",
-    "tests/strings/test_find_replace.py::test_flags_kwarg[string=string[pyarrow]]",
-    "tests/strings/test_find_replace.py::test_flags_kwarg[string=string[python]]",
     "tests/strings/test_find_replace.py::test_flags_kwarg[string[pyarrow]]",
     "tests/strings/test_find_replace.py::test_flags_kwarg[string[pyarrow_numpy]]",
     "tests/strings/test_find_replace.py::test_fullmatch_case_kwarg[string[pyarrow]]",

From cd449bd4b23dd43f5c8c7bf3bbc1d61e921c2192 Mon Sep 17 00:00:00 2001
From: Jake Awe <jawe@nvidia.com>
Date: Wed, 8 Oct 2025 15:27:12 +0000
Subject: [PATCH 366/366] Update Changelog [skip ci]

---
 CHANGELOG.md | 392 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 392 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fd6f1beb82..aa6d42aca41 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,395 @@
+# cudf 25.10.00 (8 Oct 2025)
+
+## 🚨 Breaking Changes
+
+- Remove UCX-Py ([#19979](https://github.com/rapidsai/cudf/pull/19979)) [@pentschev](https://github.com/pentschev)
+- Revert &quot;Migrate mixed join to use multiset #19660&quot; ([#19933](https://github.com/rapidsai/cudf/pull/19933)) [@PointKernel](https://github.com/PointKernel)
+- Fill missing values in `Series/Index.values` for numeric types with np.nan by default ([#19923](https://github.com/rapidsai/cudf/pull/19923)) [@mroeschke](https://github.com/mroeschke)
+- Remove deprecated `DataFrame.apply_rows`, deprecate `DataFrame.apply_chunks` and `Groupby.apply_grouped` ([#19896](https://github.com/rapidsai/cudf/pull/19896)) [@mroeschke](https://github.com/mroeschke)
+- Move prefetching out of experimental and simplify the API ([#19875](https://github.com/rapidsai/cudf/pull/19875)) [@vyasr](https://github.com/vyasr)
+- Add join `*_match_context` APIs to hash join ([#19835](https://github.com/rapidsai/cudf/pull/19835)) [@PointKernel](https://github.com/PointKernel)
+- Vendor libnvcomp in libcudf ([#19743](https://github.com/rapidsai/cudf/pull/19743)) [@bdice](https://github.com/bdice)
+- Migrate mixed join to use multiset ([#19660](https://github.com/rapidsai/cudf/pull/19660)) [@PointKernel](https://github.com/PointKernel)
+- Separate row mask and page mask computation and usage ([#19537](https://github.com/rapidsai/cudf/pull/19537)) [@mhaseeb123](https://github.com/mhaseeb123)
+- [FEA] Implement null-aware transforms and filters ([#19502](https://github.com/rapidsai/cudf/pull/19502)) [@lamarrr](https://github.com/lamarrr)
+- Support output-type for MEDIAN/QUANTILE aggregation in cudf::reduce ([#19267](https://github.com/rapidsai/cudf/pull/19267)) [@davidwendt](https://github.com/davidwendt)
+
+## 🐛 Bug Fixes
+
+- Fix edge cases in statistics collection ([#20094](https://github.com/rapidsai/cudf/pull/20094)) [@rjzamora](https://github.com/rjzamora)
+- Fix multi-partition `Filter` bug ([#20075](https://github.com/rapidsai/cudf/pull/20075)) [@rjzamora](https://github.com/rjzamora)
+- Fix `reindex` to fill only the reindexed values with `fill_value` ([#20063](https://github.com/rapidsai/cudf/pull/20063)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix arrow arrays + numpy ufunc interaction ([#20047](https://github.com/rapidsai/cudf/pull/20047)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix race conditions in ORC reader decimal decoding ([#20044](https://github.com/rapidsai/cudf/pull/20044)) [@vuule](https://github.com/vuule)
+- Keep mr alive along with arrow tables and columns ([#20028](https://github.com/rapidsai/cudf/pull/20028)) [@vyasr](https://github.com/vyasr)
+- Fix `value_counts` missing `nan` bug ([#20026](https://github.com/rapidsai/cudf/pull/20026)) [@galipremsagar](https://github.com/galipremsagar)
+- Compatibility for rapidsmpf&#39;s unspill_partitions ([#20020](https://github.com/rapidsai/cudf/pull/20020)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Fix type metadata preservation in `shift` ([#20017](https://github.com/rapidsai/cudf/pull/20017)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix incorrect type propagation in dataframe assignment ([#20010](https://github.com/rapidsai/cudf/pull/20010)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix OOB memory read in decode_page_data_generic kernel ([#19995](https://github.com/rapidsai/cudf/pull/19995)) [@davidwendt](https://github.com/davidwendt)
+- Fix data_type creation in ast::operation::instantiate ([#19994](https://github.com/rapidsai/cudf/pull/19994)) [@davidwendt](https://github.com/davidwendt)
+- Skip Narwhals pandas get_dtype_backend[pyarrow] tests after ArrowDtype proxy changes ([#19992](https://github.com/rapidsai/cudf/pull/19992)) [@Matt711](https://github.com/Matt711)
+- Make cudf.pandas callables usable with inspect.getfullargspec ([#19988](https://github.com/rapidsai/cudf/pull/19988)) [@mroeschke](https://github.com/mroeschke)
+- Align decimal dtypes to schema after parquet IO scan ([#19974](https://github.com/rapidsai/cudf/pull/19974)) [@Matt711](https://github.com/Matt711)
+- Avoid undefined numpy protocols on cudf.pandas proxy objects ([#19968](https://github.com/rapidsai/cudf/pull/19968)) [@mroeschke](https://github.com/mroeschke)
+- Skip failing polars iceberg test ([#19955](https://github.com/rapidsai/cudf/pull/19955)) [@Matt711](https://github.com/Matt711)
+- Revert &quot;Migrate mixed join to use multiset #19660&quot; ([#19933](https://github.com/rapidsai/cudf/pull/19933)) [@PointKernel](https://github.com/PointKernel)
+- Define FrozenList proxy independently in cudf.pandas ([#19931](https://github.com/rapidsai/cudf/pull/19931)) [@mroeschke](https://github.com/mroeschke)
+- Ignore scalars when broadcasting for horizontal string concatenation in cudf-polars ([#19893](https://github.com/rapidsai/cudf/pull/19893)) [@Matt711](https://github.com/Matt711)
+- Fix is_valid_rolling_aggregation for STD aggregation ([#19888](https://github.com/rapidsai/cudf/pull/19888)) [@davidwendt](https://github.com/davidwendt)
+- Fix a decompression parameter in the chunked ORC reader ([#19882](https://github.com/rapidsai/cudf/pull/19882)) [@vuule](https://github.com/vuule)
+- Skip flaky stats tests pending follow up ([#19881](https://github.com/rapidsai/cudf/pull/19881)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Require list type for is_valid_aggregation and MERGE_LISTS/SETS ([#19876](https://github.com/rapidsai/cudf/pull/19876)) [@davidwendt](https://github.com/davidwendt)
+- Temporary solution to ensure data-source/sink stream ordering ([#19874](https://github.com/rapidsai/cudf/pull/19874)) [@kingcrimsontianyu](https://github.com/kingcrimsontianyu)
+- Check for integer overflow in cudf::strings::find_multiple ([#19867](https://github.com/rapidsai/cudf/pull/19867)) [@davidwendt](https://github.com/davidwendt)
+- Fix missing stream from cudf::top_k_order ([#19866](https://github.com/rapidsai/cudf/pull/19866)) [@davidwendt](https://github.com/davidwendt)
+- Disallow loc.__setitem__ with list-like indexer when list elements not in index ([#19851](https://github.com/rapidsai/cudf/pull/19851)) [@mroeschke](https://github.com/mroeschke)
+- Fix .str.replace ignoring n for single character replacements ([#19848](https://github.com/rapidsai/cudf/pull/19848)) [@mroeschke](https://github.com/mroeschke)
+- Fix strings::find_instance warp parallel logic ([#19845](https://github.com/rapidsai/cudf/pull/19845)) [@davidwendt](https://github.com/davidwendt)
+- Add changed-files to the needs of every job that requires it ([#19830](https://github.com/rapidsai/cudf/pull/19830)) [@Matt711](https://github.com/Matt711)
+- xfail polars `decimal(precision=None)` test ([#19821](https://github.com/rapidsai/cudf/pull/19821)) [@Matt711](https://github.com/Matt711)
+- Fix empty column returned by cudf::from_arrow_stream_column ([#19812](https://github.com/rapidsai/cudf/pull/19812)) [@davidwendt](https://github.com/davidwendt)
+- Filter pandas warning in dask_cudf test ([#19808](https://github.com/rapidsai/cudf/pull/19808)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Update identify_stream_usage CUDA runtime hooks to CUDA 13 ([#19807](https://github.com/rapidsai/cudf/pull/19807)) [@robertmaynard](https://github.com/robertmaynard)
+- When bundling `libnvcomp.so.X` only append the major version value ([#19786](https://github.com/rapidsai/cudf/pull/19786)) [@robertmaynard](https://github.com/robertmaynard)
+- Improvements to `pylibcudf.from_iterable_of_py` ([#19781](https://github.com/rapidsai/cudf/pull/19781)) [@Matt711](https://github.com/Matt711)
+- Avoid using multiple `Cache` nodes with the same hash ([#19769](https://github.com/rapidsai/cudf/pull/19769)) [@rjzamora](https://github.com/rjzamora)
+- Fix window var() test failures from float rounding ([#19761](https://github.com/rapidsai/cudf/pull/19761)) [@Matt711](https://github.com/Matt711)
+- Use `is_compressed` field from Parquet V2 data page headers to determine if they are compressed ([#19755](https://github.com/rapidsai/cudf/pull/19755)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Fix bug in `eval` function with `nvtx-0.2.11` ([#19754](https://github.com/rapidsai/cudf/pull/19754)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix ndsh benchmarks nvtx range usage ([#19753](https://github.com/rapidsai/cudf/pull/19753)) [@davidwendt](https://github.com/davidwendt)
+- Support `nan` in non-floating point column in cudf-polars ([#19742](https://github.com/rapidsai/cudf/pull/19742)) [@Matt711](https://github.com/Matt711)
+- Fix filter call in benchmark ([#19732](https://github.com/rapidsai/cudf/pull/19732)) [@vyasr](https://github.com/vyasr)
+- Suppress NVRTC warning from stdint.h ([#19712](https://github.com/rapidsai/cudf/pull/19712)) [@davidwendt](https://github.com/davidwendt)
+- Correctly decode boolean lists in chunked parquet reader ([#19707](https://github.com/rapidsai/cudf/pull/19707)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Add new xfails for xarray release ([#19705](https://github.com/rapidsai/cudf/pull/19705)) [@vyasr](https://github.com/vyasr)
+- Fix &quot;--executor&quot; pytest parameter for cudf-polars ([#19703](https://github.com/rapidsai/cudf/pull/19703)) [@rjzamora](https://github.com/rjzamora)
+- Match polars semantics for rolling-sum with all-null windows (non-empty) ([#19680](https://github.com/rapidsai/cudf/pull/19680)) [@Matt711](https://github.com/Matt711)
+- [BUG] Set `query_set` arg when validating/running cudf-polars PDS-DS benchmarks ([#19674](https://github.com/rapidsai/cudf/pull/19674)) [@Matt711](https://github.com/Matt711)
+- Fix `group_by().agg()` on non-aggregatable dtypes ([#19669](https://github.com/rapidsai/cudf/pull/19669)) [@Matt711](https://github.com/Matt711)
+- Fix broken links in 10min notebook ([#19665](https://github.com/rapidsai/cudf/pull/19665)) [@Matt711](https://github.com/Matt711)
+- Skip managed memory test if managed memory not supported in cudf-polars ([#19653](https://github.com/rapidsai/cudf/pull/19653)) [@Matt711](https://github.com/Matt711)
+- Fix integer overflow in warp-per-row grid calculation ([#19638](https://github.com/rapidsai/cudf/pull/19638)) [@davidwendt](https://github.com/davidwendt)
+- Propagate exceptions thrown in async IO operations ([#19628](https://github.com/rapidsai/cudf/pull/19628)) [@vuule](https://github.com/vuule)
+- Make `DataFrame.dtypes` not fallback to CPU always ([#19627](https://github.com/rapidsai/cudf/pull/19627)) [@galipremsagar](https://github.com/galipremsagar)
+- Set scalar to valid in range_window_bounds unbounded/current_row ([#19622](https://github.com/rapidsai/cudf/pull/19622)) [@davidwendt](https://github.com/davidwendt)
+- Enable data page mask computation for nullable `list` and `struct` columns ([#19617](https://github.com/rapidsai/cudf/pull/19617)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Fix cudf::sequence() to throw exception for invalid scalar inputs ([#19612](https://github.com/rapidsai/cudf/pull/19612)) [@davidwendt](https://github.com/davidwendt)
+- Fix uninitialized variable and misaligned write in parquet generic decoder ([#19601](https://github.com/rapidsai/cudf/pull/19601)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Compatibility with rapidsmpf 25.10.0 ([#19591](https://github.com/rapidsai/cudf/pull/19591)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Avoid querying device memory on systems without it in dask-cudf ([#19577](https://github.com/rapidsai/cudf/pull/19577)) [@Matt711](https://github.com/Matt711)
+- Avoid querying device memory on systems without it in cudf-polars benchmarks ([#19575](https://github.com/rapidsai/cudf/pull/19575)) [@Matt711](https://github.com/Matt711)
+- Increase alignment requirement for parquet bloom filter to 256 ([#19573](https://github.com/rapidsai/cudf/pull/19573)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Fix strftime with non-exact %a, %A, %b, %B ([#19570](https://github.com/rapidsai/cudf/pull/19570)) [@mroeschke](https://github.com/mroeschke)
+- Fix OOB memcheck error in group_rank_to_percentage utility ([#19567](https://github.com/rapidsai/cudf/pull/19567)) [@davidwendt](https://github.com/davidwendt)
+- Fix logic for number of unique values generated by data profile in benchmarks ([#19540](https://github.com/rapidsai/cudf/pull/19540)) [@shrshi](https://github.com/shrshi)
+- Fix contiguous-split nvbench cmake build ([#19534](https://github.com/rapidsai/cudf/pull/19534)) [@davidwendt](https://github.com/davidwendt)
+- Fix value counts expression when the column has nulls ([#19524](https://github.com/rapidsai/cudf/pull/19524)) [@Matt711](https://github.com/Matt711)
+- Prefer `Column.astype` over `plc.unary.cast` in the fill null unary function expression ([#19479](https://github.com/rapidsai/cudf/pull/19479)) [@Matt711](https://github.com/Matt711)
+- Fix missing return in StringFunction.Strptime strict=True path ([#19464](https://github.com/rapidsai/cudf/pull/19464)) [@Matt711](https://github.com/Matt711)
+- Make dividing a boolean column return f64 dtype in cudf-polars ([#19443](https://github.com/rapidsai/cudf/pull/19443)) [@Matt711](https://github.com/Matt711)
+- branch-25.10-merge-branch-25.08 ([#19429](https://github.com/rapidsai/cudf/pull/19429)) [@davidwendt](https://github.com/davidwendt)
+- Replace sprintf with std::format in libcudf parquet tests ([#19364](https://github.com/rapidsai/cudf/pull/19364)) [@davidwendt](https://github.com/davidwendt)
+
+## 📖 Documentation
+
+- Update missing docs ([#19925](https://github.com/rapidsai/cudf/pull/19925)) [@vyasr](https://github.com/vyasr)
+- Add examples of null handling to doxygen for cudf::rank ([#19774](https://github.com/rapidsai/cudf/pull/19774)) [@davidwendt](https://github.com/davidwendt)
+- Fix cudf-polars dependency list docs ([#19750](https://github.com/rapidsai/cudf/pull/19750)) [@pentschev](https://github.com/pentschev)
+- Update cuDF classic testing documention regarding testing organization ([#19745](https://github.com/rapidsai/cudf/pull/19745)) [@mroeschke](https://github.com/mroeschke)
+- Improve documentation around why we need no_gc_clear on pylibcudf Scalars ([#19661](https://github.com/rapidsai/cudf/pull/19661)) [@vyasr](https://github.com/vyasr)
+
+## 🚀 New Features
+
+- Add memory resource parameters to interop, merge, and transpose ([#20007](https://github.com/rapidsai/cudf/pull/20007)) [@vyasr](https://github.com/vyasr)
+- Add mixed join benchmark with complex AST operators ([#20004](https://github.com/rapidsai/cudf/pull/20004)) [@PointKernel](https://github.com/PointKernel)
+- Add memory resource arguments to join, round, and labeling ([#20001](https://github.com/rapidsai/cudf/pull/20001)) [@vyasr](https://github.com/vyasr)
+- `cudf-polars` `strptime` format inference ([#19997](https://github.com/rapidsai/cudf/pull/19997)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Filter parquet row groups using byte offset bounds ([#19991](https://github.com/rapidsai/cudf/pull/19991)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Add memory resource arguments to concatenate ([#19943](https://github.com/rapidsai/cudf/pull/19943)) [@vyasr](https://github.com/vyasr)
+- Use column statistics to generate the physical plan in cuDF-Polars ([#19940](https://github.com/rapidsai/cudf/pull/19940)) [@rjzamora](https://github.com/rjzamora)
+- Add all missing stream parameters ([#19922](https://github.com/rapidsai/cudf/pull/19922)) [@vyasr](https://github.com/vyasr)
+- Remote IO support in cudf-polars ([#19921](https://github.com/rapidsai/cudf/pull/19921)) [@Matt711](https://github.com/Matt711)
+- Add streams to io/timezone and io/text modules ([#19913](https://github.com/rapidsai/cudf/pull/19913)) [@vyasr](https://github.com/vyasr)
+- Add stream support to all nvtext modules ([#19911](https://github.com/rapidsai/cudf/pull/19911)) [@vyasr](https://github.com/vyasr)
+- Add streams to all top-level strings modules ([#19910](https://github.com/rapidsai/cudf/pull/19910)) [@vyasr](https://github.com/vyasr)
+- Update strings split APIs with stream parameters ([#19909](https://github.com/rapidsai/cudf/pull/19909)) [@vyasr](https://github.com/vyasr)
+- Support ordered grouped windows in cudf-polars ([#19891](https://github.com/rapidsai/cudf/pull/19891)) [@Matt711](https://github.com/Matt711)
+- Add local row-count and unique-count estimates to `explain(... logical=True)` ([#19864](https://github.com/rapidsai/cudf/pull/19864)) [@rjzamora](https://github.com/rjzamora)
+- Add join `*_match_context` APIs to hash join ([#19835](https://github.com/rapidsai/cudf/pull/19835)) [@PointKernel](https://github.com/PointKernel)
+- Support `rank(...).over(...)` expressions in cudf-polars ([#19803](https://github.com/rapidsai/cudf/pull/19803)) [@Matt711](https://github.com/Matt711)
+- Add strings to/from encoded integer APIs ([#19789](https://github.com/rapidsai/cudf/pull/19789)) [@davidwendt](https://github.com/davidwendt)
+- Add to_arrow method to pylibcudf core types ([#19787](https://github.com/rapidsai/cudf/pull/19787)) [@Matt711](https://github.com/Matt711)
+- Add streams to strings convert APIs ([#19780](https://github.com/rapidsai/cudf/pull/19780)) [@vyasr](https://github.com/vyasr)
+- Add an option to support reading ORC timestamp column as UTC time. ([#19773](https://github.com/rapidsai/cudf/pull/19773)) [@res-life](https://github.com/res-life)
+- Support null_count in groupby/rolling context ([#19739](https://github.com/rapidsai/cudf/pull/19739)) [@Matt711](https://github.com/Matt711)
+- Collect join-key information in cudf-polars ([#19736](https://github.com/rapidsai/cudf/pull/19736)) [@rjzamora](https://github.com/rjzamora)
+- Add count aggregation support to cudf::reduce ([#19734](https://github.com/rapidsai/cudf/pull/19734)) [@davidwendt](https://github.com/davidwendt)
+- [FEA] Implement AST Expression - JIT codegen ([#19733](https://github.com/rapidsai/cudf/pull/19733)) [@lamarrr](https://github.com/lamarrr)
+- Add streams to all scalar factories ([#19729](https://github.com/rapidsai/cudf/pull/19729)) [@vyasr](https://github.com/vyasr)
+- Add streams to reshape ([#19728](https://github.com/rapidsai/cudf/pull/19728)) [@vyasr](https://github.com/vyasr)
+- Add streams to null mask APIs ([#19727](https://github.com/rapidsai/cudf/pull/19727)) [@vyasr](https://github.com/vyasr)
+- Add streams to column APIs ([#19726](https://github.com/rapidsai/cudf/pull/19726)) [@vyasr](https://github.com/vyasr)
+- Construct next-gen parquet reader with pre-populated footer ([#19724](https://github.com/rapidsai/cudf/pull/19724)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Require `numba-cuda&gt;=0.19.0,&lt;0.20.0a0` ([#19711](https://github.com/rapidsai/cudf/pull/19711)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Support `over` expression (window mapping) in cudf-polars ([#19684](https://github.com/rapidsai/cudf/pull/19684)) [@Matt711](https://github.com/Matt711)
+- Add streams support to all list APIs ([#19683](https://github.com/rapidsai/cudf/pull/19683)) [@vyasr](https://github.com/vyasr)
+- [FEA] Add Filter Benchmark ([#19678](https://github.com/rapidsai/cudf/pull/19678)) [@lamarrr](https://github.com/lamarrr)
+- Add streams to pylibcudf join APIs ([#19672](https://github.com/rapidsai/cudf/pull/19672)) [@vyasr](https://github.com/vyasr)
+- Add streams to sorting APIs ([#19671](https://github.com/rapidsai/cudf/pull/19671)) [@vyasr](https://github.com/vyasr)
+- [FEA] Remove excessive copies of JITIFY&#39;s ProgramData during JIT kernel launch ([#19667](https://github.com/rapidsai/cudf/pull/19667)) [@lamarrr](https://github.com/lamarrr)
+- Add streams to hashing APIs ([#19663](https://github.com/rapidsai/cudf/pull/19663)) [@vyasr](https://github.com/vyasr)
+- Use a more robust metric for sorting (de)compression tasks ([#19656](https://github.com/rapidsai/cudf/pull/19656)) [@vuule](https://github.com/vuule)
+- Add streams support to datetime APIs ([#19654](https://github.com/rapidsai/cudf/pull/19654)) [@vyasr](https://github.com/vyasr)
+- Add streams to stream_compaction ([#19651](https://github.com/rapidsai/cudf/pull/19651)) [@vyasr](https://github.com/vyasr)
+- Enable casting `pl.Datetime` to integer types in `cudf-polars` ([#19647](https://github.com/rapidsai/cudf/pull/19647)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add Java JNI interface to get Gpu UUID ([#19646](https://github.com/rapidsai/cudf/pull/19646)) [@res-life](https://github.com/res-life)
+- Add reduction with overflow detection ([#19641](https://github.com/rapidsai/cudf/pull/19641)) [@PointKernel](https://github.com/PointKernel)
+- Upgrade to nvCOMP 5.0.0.6 ([#19636](https://github.com/rapidsai/cudf/pull/19636)) [@vuule](https://github.com/vuule)
+- Use the nvCOMP 5.0 API to better estimate decompression memory requirements ([#19616](https://github.com/rapidsai/cudf/pull/19616)) [@vuule](https://github.com/vuule)
+- Add streams to transform and unary ([#19613](https://github.com/rapidsai/cudf/pull/19613)) [@vyasr](https://github.com/vyasr)
+- Add streams to all modules with 4-5 functions ([#19609](https://github.com/rapidsai/cudf/pull/19609)) [@vyasr](https://github.com/vyasr)
+- Enable casting integer dtypes to `pl.Datetime` via `cudf-polars` ([#19607](https://github.com/rapidsai/cudf/pull/19607)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add fast path for Parquet reading with predicate pushdown via AST filters ([#19605](https://github.com/rapidsai/cudf/pull/19605)) [@Matt711](https://github.com/Matt711)
+- Add streams to all modules with three or fewer functions ([#19600](https://github.com/rapidsai/cudf/pull/19600)) [@vyasr](https://github.com/vyasr)
+- Add libcudf top_k_segmented APIs ([#19597](https://github.com/rapidsai/cudf/pull/19597)) [@davidwendt](https://github.com/davidwendt)
+- Update Arrow bounds to &gt;=15,&lt;22 ([#19592](https://github.com/rapidsai/cudf/pull/19592)) [@bdice](https://github.com/bdice)
+- Update cudf to handle CUDA 13 changes ([#19585](https://github.com/rapidsai/cudf/pull/19585)) [@robertmaynard](https://github.com/robertmaynard)
+- Support hash-based workflow for `M2` groupby aggregation ([#19569](https://github.com/rapidsai/cudf/pull/19569)) [@ttnghia](https://github.com/ttnghia)
+- Expose `filter` and `columns` parquet reader builder options to python ([#19566](https://github.com/rapidsai/cudf/pull/19566)) [@Matt711](https://github.com/Matt711)
+- [FEA] Switch to NVIDIA&#39;s JITIFY2 ([#19561](https://github.com/rapidsai/cudf/pull/19561)) [@lamarrr](https://github.com/lamarrr)
+- Add streams to all single-function modules ([#19559](https://github.com/rapidsai/cudf/pull/19559)) [@vyasr](https://github.com/vyasr)
+- Add support for streams to all copying APIs. ([#19553](https://github.com/rapidsai/cudf/pull/19553)) [@vyasr](https://github.com/vyasr)
+- Benchmarks comparing Arrow string formats ([#19552](https://github.com/rapidsai/cudf/pull/19552)) [@davidwendt](https://github.com/davidwendt)
+- Compile `libcudf_kafka` and `cudf_kafka` with C++20 ([#19543](https://github.com/rapidsai/cudf/pull/19543)) [@vuule](https://github.com/vuule)
+- RapidsMPF &quot;single&quot; shuffle integration ([#19530](https://github.com/rapidsai/cudf/pull/19530)) [@rjzamora](https://github.com/rjzamora)
+- Make nvCOMP ZLIB (de)compression available by default ([#19528](https://github.com/rapidsai/cudf/pull/19528)) [@vuule](https://github.com/vuule)
+- Implement chunking in the next-gen parquet reader ([#19526](https://github.com/rapidsai/cudf/pull/19526)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Add primitive row dispatch support for semi/anti join and cudf::contains ([#19518](https://github.com/rapidsai/cudf/pull/19518)) [@PointKernel](https://github.com/PointKernel)
+- Derive and use page mask at subpass level for chunked reads ([#19515](https://github.com/rapidsai/cudf/pull/19515)) [@mhaseeb123](https://github.com/mhaseeb123)
+- [FEA] Implement null-aware transforms and filters ([#19502](https://github.com/rapidsai/cudf/pull/19502)) [@lamarrr](https://github.com/lamarrr)
+- Add PDS-DS queries 2 through 10 to cudf-polars benchmarks ([#19488](https://github.com/rapidsai/cudf/pull/19488)) [@Matt711](https://github.com/Matt711)
+- Add API to &quot;initialize&quot; column statistics ([#19447](https://github.com/rapidsai/cudf/pull/19447)) [@rjzamora](https://github.com/rjzamora)
+- Implement top k expression in cudf-polars using `cudf::top_k` ([#19431](https://github.com/rapidsai/cudf/pull/19431)) [@Matt711](https://github.com/Matt711)
+- Add hash-based SUM_WITH_OVERFLOW aggregation for INT64 values ([#19403](https://github.com/rapidsai/cudf/pull/19403)) [@PointKernel](https://github.com/PointKernel)
+- Support rank expression in cudf-polars ([#19340](https://github.com/rapidsai/cudf/pull/19340)) [@Matt711](https://github.com/Matt711)
+- Support fill_null with fill strategy in cudf-polars ([#19318](https://github.com/rapidsai/cudf/pull/19318)) [@Matt711](https://github.com/Matt711)
+- Support output-type for MEDIAN/QUANTILE aggregation in cudf::reduce ([#19267](https://github.com/rapidsai/cudf/pull/19267)) [@davidwendt](https://github.com/davidwendt)
+- Support ternary expression inside groupby/rolling context ([#19242](https://github.com/rapidsai/cudf/pull/19242)) [@Matt711](https://github.com/Matt711)
+- Experimental API to read a parquet table, build a custom index column, and apply roaring bitmap deletion vector ([#19237](https://github.com/rapidsai/cudf/pull/19237)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Support `cudf-polars` `str.zfill` ([#19081](https://github.com/rapidsai/cudf/pull/19081)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- [FEA] Add chunked Parquet sink support using the libcudf writer ([#19015](https://github.com/rapidsai/cudf/pull/19015)) [@Matt711](https://github.com/Matt711)
+- Add multi-column support for primitive row operator dispatch ([#18940](https://github.com/rapidsai/cudf/pull/18940)) [@tgujar](https://github.com/tgujar)
+
+## 🛠️ Improvements
+
+- Fix CI failures for `pandas-2.3.3` ([#20146](https://github.com/rapidsai/cudf/pull/20146)) [@galipremsagar](https://github.com/galipremsagar)
+- Skip passing failures for latest `numexpr` version ([#20092](https://github.com/rapidsai/cudf/pull/20092)) [@galipremsagar](https://github.com/galipremsagar)
+- Empty commit to trigger a build ([#20084](https://github.com/rapidsai/cudf/pull/20084)) [@msarahan](https://github.com/msarahan)
+- Update the reason to skip for parquet bloom filter test ([#20043](https://github.com/rapidsai/cudf/pull/20043)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Remove test_scan_hf_url_raises ([#20035](https://github.com/rapidsai/cudf/pull/20035)) [@mroeschke](https://github.com/mroeschke)
+- xfail(strict=False) test_scan_hf_url_raises due to rate limiting ([#20027](https://github.com/rapidsai/cudf/pull/20027)) [@mroeschke](https://github.com/mroeschke)
+- Deprecate left semi- and anti- join functional APIs ([#20014](https://github.com/rapidsai/cudf/pull/20014)) [@shrshi](https://github.com/shrshi)
+- Use to_arrow methods throughout pylibcudf and cudf ([#20013](https://github.com/rapidsai/cudf/pull/20013)) [@Matt711](https://github.com/Matt711)
+- Fix chunked reads of list of bools. ([#20000](https://github.com/rapidsai/cudf/pull/20000)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
+- Raise more exceptions for invalid or unsupported cuDF arguments ([#19990](https://github.com/rapidsai/cudf/pull/19990)) [@mroeschke](https://github.com/mroeschke)
+- Configure repo for automatic release notes generation ([#19984](https://github.com/rapidsai/cudf/pull/19984)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Pin duckdb&lt;1.4 in test_python_narwhals ([#19982](https://github.com/rapidsai/cudf/pull/19982)) [@mroeschke](https://github.com/mroeschke)
+- Default to False if `CUDA_ENABLE_NRT` isn&#39;t set in config ([#19981](https://github.com/rapidsai/cudf/pull/19981)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Remove UCX-Py ([#19979](https://github.com/rapidsai/cudf/pull/19979)) [@pentschev](https://github.com/pentschev)
+- Add support for `attrs` ([#19978](https://github.com/rapidsai/cudf/pull/19978)) [@galipremsagar](https://github.com/galipremsagar)
+- Run pytest-benchmarks in CI with --benchmark-disable ([#19969](https://github.com/rapidsai/cudf/pull/19969)) [@mroeschke](https://github.com/mroeschke)
+- Change target type so we can test on workflows ([#19963](https://github.com/rapidsai/cudf/pull/19963)) [@vyasr](https://github.com/vyasr)
+- Update to actions/labeler v5 ([#19962](https://github.com/rapidsai/cudf/pull/19962)) [@vyasr](https://github.com/vyasr)
+- Revert &quot;ci(labeler): update labeler action to [@v5&quot; ([#19961](https://github.com/rapidsai/cudf/pull/19961)) @vyasr](https://github.com/v5&quot; ([#19961](https://github.com/rapidsai/cudf/pull/19961)) @vyasr)
+- Add `ArrowDtype` proxy class ([#19960](https://github.com/rapidsai/cudf/pull/19960)) [@galipremsagar](https://github.com/galipremsagar)
+- Add missing type stub ([#19958](https://github.com/rapidsai/cudf/pull/19958)) [@vyasr](https://github.com/vyasr)
+- Add missing `Styler` attributes ([#19956](https://github.com/rapidsai/cudf/pull/19956)) [@galipremsagar](https://github.com/galipremsagar)
+- Allow newer CMake in Java tests ([#19949](https://github.com/rapidsai/cudf/pull/19949)) [@bdice](https://github.com/bdice)
+- Make stream a required parameter for from_libcudf methods ([#19945](https://github.com/rapidsai/cudf/pull/19945)) [@vyasr](https://github.com/vyasr)
+- Return False instead of NA for comparison ops against NA in cudf.pandas ([#19942](https://github.com/rapidsai/cudf/pull/19942)) [@mroeschke](https://github.com/mroeschke)
+- Don&#39;t fall back in Series.describe in cudf.pandas for numeric types ([#19941](https://github.com/rapidsai/cudf/pull/19941)) [@mroeschke](https://github.com/mroeschke)
+- Move groupby benchmarks to nvbench ([#19930](https://github.com/rapidsai/cudf/pull/19930)) [@davidwendt](https://github.com/davidwendt)
+- Perform more input validation in cuDF classic APIs ([#19929](https://github.com/rapidsai/cudf/pull/19929)) [@mroeschke](https://github.com/mroeschke)
+- update nvidia-ml-py (&gt;=12), use cuda-toolkit wheels ([#19927](https://github.com/rapidsai/cudf/pull/19927)) [@jameslamb](https://github.com/jameslamb)
+- Fill missing values in `Series/Index.values` for numeric types with np.nan by default ([#19923](https://github.com/rapidsai/cudf/pull/19923)) [@mroeschke](https://github.com/mroeschke)
+- Add `rmm-release-threshold` to pdsh benchmarks CLI ([#19918](https://github.com/rapidsai/cudf/pull/19918)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Also use the CUDA 12 container for nightlies ([#19917](https://github.com/rapidsai/cudf/pull/19917)) [@vyasr](https://github.com/vyasr)
+- Move test_binops.py to new cuDF classic directory structure ([#19914](https://github.com/rapidsai/cudf/pull/19914)) [@mroeschke](https://github.com/mroeschke)
+- Eagerly load nvCOMP library in `cudf::initialize()` ([#19906](https://github.com/rapidsai/cudf/pull/19906)) [@vuule](https://github.com/vuule)
+- Pin to CUDA 12 image for integration tests ([#19903](https://github.com/rapidsai/cudf/pull/19903)) [@vyasr](https://github.com/vyasr)
+- Use branch-25.10 again ([#19902](https://github.com/rapidsai/cudf/pull/19902)) [@jameslamb](https://github.com/jameslamb)
+- Disable test on non-default stream ([#19901](https://github.com/rapidsai/cudf/pull/19901)) [@vyasr](https://github.com/vyasr)
+- Use cupy array instead of numba device array as inputs to jit routines ([#19897](https://github.com/rapidsai/cudf/pull/19897)) [@mroeschke](https://github.com/mroeschke)
+- Remove deprecated `DataFrame.apply_rows`, deprecate `DataFrame.apply_chunks` and `Groupby.apply_grouped` ([#19896](https://github.com/rapidsai/cudf/pull/19896)) [@mroeschke](https://github.com/mroeschke)
+- Move test_dataframe.py to new cuDF classic directory structure ([#19890](https://github.com/rapidsai/cudf/pull/19890)) [@mroeschke](https://github.com/mroeschke)
+- Make sure conftest fixture data is valid on exit ([#19889](https://github.com/rapidsai/cudf/pull/19889)) [@vyasr](https://github.com/vyasr)
+- Move test_index/multiindex/indexing.py to new cuDF classic directory structure ([#19887](https://github.com/rapidsai/cudf/pull/19887)) [@mroeschke](https://github.com/mroeschke)
+- [FEA] Build CUDF with CCCL 3.1.0 ([#19886](https://github.com/rapidsai/cudf/pull/19886)) [@lamarrr](https://github.com/lamarrr)
+- Coalesce IO of chunks with different compression when reading Parquet files ([#19884](https://github.com/rapidsai/cudf/pull/19884)) [@vuule](https://github.com/vuule)
+- Update boost version to 1.79 for JNI dockerfile ([#19883](https://github.com/rapidsai/cudf/pull/19883)) [@pxLi](https://github.com/pxLi)
+- Move test_categorical/dask/serialize.py to new cuDF classic test directory structure ([#19877](https://github.com/rapidsai/cudf/pull/19877)) [@mroeschke](https://github.com/mroeschke)
+- Move prefetching out of experimental and simplify the API ([#19875](https://github.com/rapidsai/cudf/pull/19875)) [@vyasr](https://github.com/vyasr)
+- Remove `diff.sh` and merge diff generation into `run.sh` ([#19871](https://github.com/rapidsai/cudf/pull/19871)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove pyarrow upper bound ([#19870](https://github.com/rapidsai/cudf/pull/19870)) [@vyasr](https://github.com/vyasr)
+- Prevent installation of pytest-rerunfailures 16.0.0 ([#19863](https://github.com/rapidsai/cudf/pull/19863)) [@pentschev](https://github.com/pentschev)
+- use &#39;nvidia-ml-py&#39; package for &#39;pynvml&#39; module ([#19862](https://github.com/rapidsai/cudf/pull/19862)) [@jameslamb](https://github.com/jameslamb)
+- Avoid more direct construction of cuDF classic columns ([#19858](https://github.com/rapidsai/cudf/pull/19858)) [@mroeschke](https://github.com/mroeschke)
+- Bump pandas supported version to `2.3.2` ([#19856](https://github.com/rapidsai/cudf/pull/19856)) [@galipremsagar](https://github.com/galipremsagar)
+- Use cupy arrays instead of numba device arrays for cuDF classic intermediates ([#19855](https://github.com/rapidsai/cudf/pull/19855)) [@mroeschke](https://github.com/mroeschke)
+- Move row operators to detail and deprecate legacy ([#19849](https://github.com/rapidsai/cudf/pull/19849)) [@PointKernel](https://github.com/PointKernel)
+- Fix flaky DataFrame `to_string` test ([#19847](https://github.com/rapidsai/cudf/pull/19847)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Pin pytest-rerunfailures&lt;16 ([#19846](https://github.com/rapidsai/cudf/pull/19846)) [@mroeschke](https://github.com/mroeschke)
+- revert numba CUDA 13 workaround ([#19842](https://github.com/rapidsai/cudf/pull/19842)) [@jameslamb](https://github.com/jameslamb)
+- Avoid CategoricalColumn constructors in cuDF classic ([#19837](https://github.com/rapidsai/cudf/pull/19837)) [@mroeschke](https://github.com/mroeschke)
+- Construct cuDF classic Decimal32/64Columns from RMM buffers ([#19834](https://github.com/rapidsai/cudf/pull/19834)) [@mroeschke](https://github.com/mroeschke)
+- Avoid direct construction of cuDF classic columns ([#19829](https://github.com/rapidsai/cudf/pull/19829)) [@mroeschke](https://github.com/mroeschke)
+- Support input filename in ndsh q01 benchmark ([#19820](https://github.com/rapidsai/cudf/pull/19820)) [@davidwendt](https://github.com/davidwendt)
+- Run cudf-polars-polars-tests on changes in test_python file group ([#19819](https://github.com/rapidsai/cudf/pull/19819)) [@mroeschke](https://github.com/mroeschke)
+- Remove test_mvc.py ([#19816](https://github.com/rapidsai/cudf/pull/19816)) [@mroeschke](https://github.com/mroeschke)
+- pin oldest numpy in dask-cudf tests, update dependency floors (cuda-python 12.9.2, cupy 13.6.0, numba 0.60.0) ([#19806](https://github.com/rapidsai/cudf/pull/19806)) [@jameslamb](https://github.com/jameslamb)
+- Remove iterative `nan` &amp; `nat` inefficient checks in `as_column` constructor ([#19804](https://github.com/rapidsai/cudf/pull/19804)) [@galipremsagar](https://github.com/galipremsagar)
+- Simplify/consolidate from_arrow logic ([#19801](https://github.com/rapidsai/cudf/pull/19801)) [@mroeschke](https://github.com/mroeschke)
+- Refactor column_empty to use only pylibcudf APIs ([#19800](https://github.com/rapidsai/cudf/pull/19800)) [@mroeschke](https://github.com/mroeschke)
+- Use more cached_property where possible for Index and subclasses ([#19799](https://github.com/rapidsai/cudf/pull/19799)) [@mroeschke](https://github.com/mroeschke)
+- Update rapids-dependency-file-generator ([#19796](https://github.com/rapidsai/cudf/pull/19796)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- rearrange dependencies.yaml, other small changes ([#19794](https://github.com/rapidsai/cudf/pull/19794)) [@jameslamb](https://github.com/jameslamb)
+- Update exception handling in pdsh benchmarks ([#19793](https://github.com/rapidsai/cudf/pull/19793)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Fix how nvcomp major version is extracted ([#19791](https://github.com/rapidsai/cudf/pull/19791)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Use KvikIO&#39;s unified interface to create remote I/O endpoints ([#19788](https://github.com/rapidsai/cudf/pull/19788)) [@kingcrimsontianyu](https://github.com/kingcrimsontianyu)
+- Add object-oriented APIs for left semi- and anti- join (Part I) ([#19778](https://github.com/rapidsai/cudf/pull/19778)) [@shrshi](https://github.com/shrshi)
+- Add nvbench benchmark for cudf::encode API ([#19777](https://github.com/rapidsai/cudf/pull/19777)) [@davidwendt](https://github.com/davidwendt)
+- Some clarifications, improvements to GroupedRollingWindows in cudf-polars ([#19776](https://github.com/rapidsai/cudf/pull/19776)) [@Matt711](https://github.com/Matt711)
+- Remove validation on import ([#19775](https://github.com/rapidsai/cudf/pull/19775)) [@vyasr](https://github.com/vyasr)
+- Move more test_dataframe.py tests to new cudf classic testing directory ([#19770](https://github.com/rapidsai/cudf/pull/19770)) [@mroeschke](https://github.com/mroeschke)
+- Build and test with CUDA 13.0.0 ([#19768](https://github.com/rapidsai/cudf/pull/19768)) [@jameslamb](https://github.com/jameslamb)
+- Skip polars CPU perf test for with_columns ([#19763](https://github.com/rapidsai/cudf/pull/19763)) [@Matt711](https://github.com/Matt711)
+- Optionally capture Shuffle Stats in cudf-polars pdsh benchmarks ([#19762](https://github.com/rapidsai/cudf/pull/19762)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Expand compression codec coverage in ORC and Parquet benchmarks ([#19760](https://github.com/rapidsai/cudf/pull/19760)) [@vuule](https://github.com/vuule)
+- Add ``ColumnSourceInfo`` convenience layer ([#19752](https://github.com/rapidsai/cudf/pull/19752)) [@rjzamora](https://github.com/rjzamora)
+- Support decimal columns in cudf_polars ([#19749](https://github.com/rapidsai/cudf/pull/19749)) [@mroeschke](https://github.com/mroeschke)
+- Skip third-party tests when possible ([#19747](https://github.com/rapidsai/cudf/pull/19747)) [@vyasr](https://github.com/vyasr)
+- Revert &quot;Support decimal columns in cudf_polars&quot; ([#19746](https://github.com/rapidsai/cudf/pull/19746)) [@mroeschke](https://github.com/mroeschke)
+- Vendor libnvcomp in libcudf ([#19743](https://github.com/rapidsai/cudf/pull/19743)) [@bdice](https://github.com/bdice)
+- Remove outdated numba workarounds ([#19738](https://github.com/rapidsai/cudf/pull/19738)) [@bdice](https://github.com/bdice)
+- Move test_buffer/column/column_accesor/cuda_apply.py to new cudf classic testing directory ([#19737](https://github.com/rapidsai/cudf/pull/19737)) [@mroeschke](https://github.com/mroeschke)
+- Move more test_dataframe.py tests to new cudf classic testing directory ([#19731](https://github.com/rapidsai/cudf/pull/19731)) [@mroeschke](https://github.com/mroeschke)
+- Move test_udf_masked_ops/test_dropna to new cudf classic testing directory ([#19730](https://github.com/rapidsai/cudf/pull/19730)) [@mroeschke](https://github.com/mroeschke)
+- Move test_numerical/{numpy|pandas}_interop/setitem.py to new cudf classic testing directory ([#19725](https://github.com/rapidsai/cudf/pull/19725)) [@mroeschke](https://github.com/mroeschke)
+- Move test_timedelta/string/sorting/list/datetime.py to new cudf classic directory structure ([#19723](https://github.com/rapidsai/cudf/pull/19723)) [@mroeschke](https://github.com/mroeschke)
+- Warn on fallback in the streaming tests in cudf-polars ([#19721](https://github.com/rapidsai/cudf/pull/19721)) [@Matt711](https://github.com/Matt711)
+- Optionally print shuffle stats in pdsh benchmarks ([#19719](https://github.com/rapidsai/cudf/pull/19719)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Move test_{io}.py files to new cudf classic test directory ([#19709](https://github.com/rapidsai/cudf/pull/19709)) [@mroeschke](https://github.com/mroeschke)
+- Move to pyarrow and numpy to run_constrained ([#19706](https://github.com/rapidsai/cudf/pull/19706)) [@vyasr](https://github.com/vyasr)
+- Remove unreachable code in rapidsmpf shuffle ([#19704](https://github.com/rapidsai/cudf/pull/19704)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Moves test_options to cudf testing directory, clean up old, stubbed testing files in directory ([#19698](https://github.com/rapidsai/cudf/pull/19698)) [@mroeschke](https://github.com/mroeschke)
+- Move (most of) test_index.py to new cudf classic directory structure ([#19696](https://github.com/rapidsai/cudf/pull/19696)) [@mroeschke](https://github.com/mroeschke)
+- Improve `M2`, `VARIANCE` and `STD` hash-based groupby aggregations ([#19694](https://github.com/rapidsai/cudf/pull/19694)) [@ttnghia](https://github.com/ttnghia)
+- Move quantiles libcudf benchmark to nvbench ([#19692](https://github.com/rapidsai/cudf/pull/19692)) [@davidwendt](https://github.com/davidwendt)
+- Handle `TIMESTAMP_DAYS` in rolling window offsets ([#19689](https://github.com/rapidsai/cudf/pull/19689)) [@Matt711](https://github.com/Matt711)
+- Move test_groupby to new cudf classic directory structure ([#19688](https://github.com/rapidsai/cudf/pull/19688)) [@mroeschke](https://github.com/mroeschke)
+- Move some of test_dataframe.py to new cudf classic directory structure ([#19687](https://github.com/rapidsai/cudf/pull/19687)) [@mroeschke](https://github.com/mroeschke)
+- Change nvtext::character_tokenize to return a list column ([#19685](https://github.com/rapidsai/cudf/pull/19685)) [@davidwendt](https://github.com/davidwendt)
+- Split up rolling.cuh into separate headers ([#19682](https://github.com/rapidsai/cudf/pull/19682)) [@davidwendt](https://github.com/davidwendt)
+- Move test_factorize/drop_duplicates.py to new cudf classic test directory ([#19681](https://github.com/rapidsai/cudf/pull/19681)) [@mroeschke](https://github.com/mroeschke)
+- Move test_offset/repr.py to new cudf classic testing directory ([#19677](https://github.com/rapidsai/cudf/pull/19677)) [@mroeschke](https://github.com/mroeschke)
+- Move test_stats/reductions/quantile and misc to new cudf classic testing directory ([#19675](https://github.com/rapidsai/cudf/pull/19675)) [@mroeschke](https://github.com/mroeschke)
+- Cache hash values to improve hash-based groupby performance with wide/complex table keys ([#19670](https://github.com/rapidsai/cudf/pull/19670)) [@ttnghia](https://github.com/ttnghia)
+- Move test_interval/test_dtypes/test_rank.py to new cudf directory structure ([#19668](https://github.com/rapidsai/cudf/pull/19668)) [@mroeschke](https://github.com/mroeschke)
+- Clean and move test_join_order/interpolate/onehot.py to new cudf classic test directory structure ([#19662](https://github.com/rapidsai/cudf/pull/19662)) [@mroeschke](https://github.com/mroeschke)
+- Migrate mixed join to use multiset ([#19660](https://github.com/rapidsai/cudf/pull/19660)) [@PointKernel](https://github.com/PointKernel)
+- Run pylibcudf tests without its optional dependencies ([#19657](https://github.com/rapidsai/cudf/pull/19657)) [@vyasr](https://github.com/vyasr)
+- Use build cluster in devcontainers ([#19652](https://github.com/rapidsai/cudf/pull/19652)) [@trxcllnt](https://github.com/trxcllnt)
+- Use rapids_cuda_enable_fatbin_compression ([#19650](https://github.com/rapidsai/cudf/pull/19650)) [@robertmaynard](https://github.com/robertmaynard)
+- Re-enable Disabled Join Tests ([#19649](https://github.com/rapidsai/cudf/pull/19649)) [@PointKernel](https://github.com/PointKernel)
+- Use public Arrow functions for TDigest in PercentileApproxInputTypesTests ([#19648](https://github.com/rapidsai/cudf/pull/19648)) [@davidwendt](https://github.com/davidwendt)
+- Use cudaDeviceGetAttribute to get ComputeMode for CUDA13 ([#19645](https://github.com/rapidsai/cudf/pull/19645)) [@GaryShen2008](https://github.com/GaryShen2008)
+- remove initial memset of values in parquet reader ([#19643](https://github.com/rapidsai/cudf/pull/19643)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
+- Move ~half of test_groupby.py to new cudf classic test directory structure ([#19640](https://github.com/rapidsai/cudf/pull/19640)) [@mroeschke](https://github.com/mroeschke)
+- Move test_csv/feather/json.py to new cudf classic test directory structure ([#19639](https://github.com/rapidsai/cudf/pull/19639)) [@mroeschke](https://github.com/mroeschke)
+- Move test_array_function/ufunc to new cudf classic test directory structure ([#19637](https://github.com/rapidsai/cudf/pull/19637)) [@mroeschke](https://github.com/mroeschke)
+- Fix anchor naming conventions in dependencies.yaml ([#19635](https://github.com/rapidsai/cudf/pull/19635)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Require `--scale` for PDS-DS benchmarks (due to nonlinear scaling) ([#19631](https://github.com/rapidsai/cudf/pull/19631)) [@Matt711](https://github.com/Matt711)
+- Move test_replace.py to new cudf classic directory structure ([#19629](https://github.com/rapidsai/cudf/pull/19629)) [@mroeschke](https://github.com/mroeschke)
+- Move test_concat/test_reductions.py to new cudf classic directory structure ([#19626](https://github.com/rapidsai/cudf/pull/19626)) [@mroeschke](https://github.com/mroeschke)
+- Update rapids_config to handle user defined branch name ([#19623](https://github.com/rapidsai/cudf/pull/19623)) [@robertmaynard](https://github.com/robertmaynard)
+- Add nvtx ranges to public APIs of the experimental parquet reader ([#19618](https://github.com/rapidsai/cudf/pull/19618)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Move test_resampling/query/pickling to new cudf classic directory structure ([#19615](https://github.com/rapidsai/cudf/pull/19615)) [@mroeschke](https://github.com/mroeschke)
+- Move test_reshape.py to new cudf classic directory strucutre, remove reshape._merge_sorted ([#19614](https://github.com/rapidsai/cudf/pull/19614)) [@mroeschke](https://github.com/mroeschke)
+- Move test_rolling/ewm.py to new cudf classic directory structure ([#19611](https://github.com/rapidsai/cudf/pull/19611)) [@mroeschke](https://github.com/mroeschke)
+- Simplify cudf::scalar usage in reduce utility ([#19608](https://github.com/rapidsai/cudf/pull/19608)) [@davidwendt](https://github.com/davidwendt)
+- Update to numba-cuda&gt;=0.18.0,&lt;0.19.0 ([#19604](https://github.com/rapidsai/cudf/pull/19604)) [@bdice](https://github.com/bdice)
+- Update spark-rapdis-jni action to use PR&#39;s base.ref and fix issue of ccache version in dockerfile ([#19603](https://github.com/rapidsai/cudf/pull/19603)) [@pxLi](https://github.com/pxLi)
+- Multithreaded CPU algorithm for data page mask computation ([#19602](https://github.com/rapidsai/cudf/pull/19602)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Move test_cuda_array_interface/cut/dataframe_copy.py to new cudf classic test directories ([#19599](https://github.com/rapidsai/cudf/pull/19599)) [@mroeschke](https://github.com/mroeschke)
+- Support decimal columns in cudf_polars ([#19589](https://github.com/rapidsai/cudf/pull/19589)) [@mroeschke](https://github.com/mroeschke)
+- Preserve decimal precision in `cudf::interop::column_metadata` ([#19587](https://github.com/rapidsai/cudf/pull/19587)) [@mroeschke](https://github.com/mroeschke)
+- Always use strict zipping ([#19584](https://github.com/rapidsai/cudf/pull/19584)) [@vyasr](https://github.com/vyasr)
+- Pin polars version to &lt;1.33 ([#19582](https://github.com/rapidsai/cudf/pull/19582)) [@Matt711](https://github.com/Matt711)
+- ci(labeler): update labeler action to [@v5 ([#19581](https://github.com/rapidsai/cudf/pull/19581)) @gforsyth](https://github.com/v5 ([#19581](https://github.com/rapidsai/cudf/pull/19581)) @gforsyth)
+- Update rapids-build-backend to 0.4.0 ([#19580](https://github.com/rapidsai/cudf/pull/19580)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Move (most of) test_list.py to new cudf classic test directories ([#19574](https://github.com/rapidsai/cudf/pull/19574)) [@mroeschke](https://github.com/mroeschke)
+- Move test_monotonic.py to new cudf classic test directory structure ([#19572](https://github.com/rapidsai/cudf/pull/19572)) [@mroeschke](https://github.com/mroeschke)
+- Additional gtests error checks for string/timestamp convert libcudf APIs ([#19562](https://github.com/rapidsai/cudf/pull/19562)) [@davidwendt](https://github.com/davidwendt)
+- Avoid cudf.pandas fallback for `pandas.array.NumpyExtensionArray` of strings ([#19558](https://github.com/rapidsai/cudf/pull/19558)) [@mroeschke](https://github.com/mroeschke)
+- Move str accessor tests in test_string.py to new cudf classic test directory structure ([#19557](https://github.com/rapidsai/cudf/pull/19557)) [@mroeschke](https://github.com/mroeschke)
+- Rework fill/repeat benchmark to use nvbench ([#19556](https://github.com/rapidsai/cudf/pull/19556)) [@davidwendt](https://github.com/davidwendt)
+- Use no_validity() instead of null_probability(0) in benchmarks profile ([#19554](https://github.com/rapidsai/cudf/pull/19554)) [@davidwendt](https://github.com/davidwendt)
+- Move (most of) test_timedelta.py and test_struct.py to new cudf classic test directory structure ([#19551](https://github.com/rapidsai/cudf/pull/19551)) [@mroeschke](https://github.com/mroeschke)
+- Capture commit hashes in pdsh benchmarks ([#19548](https://github.com/rapidsai/cudf/pull/19548)) [@TomAugspurger](https://github.com/TomAugspurger)
+- Simplify clang dependency spec ([#19546](https://github.com/rapidsai/cudf/pull/19546)) [@vyasr](https://github.com/vyasr)
+- Move timeout in cudf.pandas pandas unit tests script to ci script ([#19542](https://github.com/rapidsai/cudf/pull/19542)) [@mroeschke](https://github.com/mroeschke)
+- [FEA] Refactor AST `operator_functor`s for use in JIT-compiled CUDA ([#19541](https://github.com/rapidsai/cudf/pull/19541)) [@lamarrr](https://github.com/lamarrr)
+- Construct cuDF classic columns with __array_interface__ through pylibcudf ([#19538](https://github.com/rapidsai/cudf/pull/19538)) [@mroeschke](https://github.com/mroeschke)
+- Separate row mask and page mask computation and usage ([#19537](https://github.com/rapidsai/cudf/pull/19537)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Get rid of CG logic in the mixed semi-join kernel ([#19536](https://github.com/rapidsai/cudf/pull/19536)) [@PointKernel](https://github.com/PointKernel)
+- Construct more cuDF classic Columns with pylibcudf instead of using Buffers ([#19535](https://github.com/rapidsai/cudf/pull/19535)) [@mroeschke](https://github.com/mroeschke)
+- Fix clang-tools version pinning ([#19529](https://github.com/rapidsai/cudf/pull/19529)) [@wence-](https://github.com/wence-)
+- Add cudf_polars unit test for `is_in([])` expr ([#19525](https://github.com/rapidsai/cudf/pull/19525)) [@mroeschke](https://github.com/mroeschke)
+- Expose `nvtext::letter_type` to python ([#19520](https://github.com/rapidsai/cudf/pull/19520)) [@Matt711](https://github.com/Matt711)
+- Remove c++ stringview interop example ([#19516](https://github.com/rapidsai/cudf/pull/19516)) [@davidwendt](https://github.com/davidwendt)
+- Remove cudf/_fuzz_testing directory ([#19510](https://github.com/rapidsai/cudf/pull/19510)) [@mroeschke](https://github.com/mroeschke)
+- Add missing import of pyarrow.parquet when reading specified row_groups. ([#19509](https://github.com/rapidsai/cudf/pull/19509)) [@bdice](https://github.com/bdice)
+- Don&#39;t run serial cudf_pandas tests when testing multiple pandas versions ([#19507](https://github.com/rapidsai/cudf/pull/19507)) [@mroeschke](https://github.com/mroeschke)
+- Clean testing/_utils.py ([#19506](https://github.com/rapidsai/cudf/pull/19506)) [@mroeschke](https://github.com/mroeschke)
+- Move some test_datetime.py tests to new cudf classic test directory structure ([#19505](https://github.com/rapidsai/cudf/pull/19505)) [@mroeschke](https://github.com/mroeschke)
+- Move test_joining to new cudf classic test directory structure ([#19501](https://github.com/rapidsai/cudf/pull/19501)) [@mroeschke](https://github.com/mroeschke)
+- Upgrade `gcc-toolset` for Java/JNI build to version 14 ([#19500](https://github.com/rapidsai/cudf/pull/19500)) [@ttnghia](https://github.com/ttnghia)
+- Remove deprecated subword-tokenizer APIs ([#19498](https://github.com/rapidsai/cudf/pull/19498)) [@davidwendt](https://github.com/davidwendt)
+- Move some test_multiindex.py to new cudf classic test directory structure ([#19496](https://github.com/rapidsai/cudf/pull/19496)) [@mroeschke](https://github.com/mroeschke)
+- Add nvtx ranges and minor fix for `lists` types in the next-gen parquet reader ([#19493](https://github.com/rapidsai/cudf/pull/19493)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Move test_search/test_scan/test_seriesmap.py to new cudf classic test directory structure ([#19492](https://github.com/rapidsai/cudf/pull/19492)) [@mroeschke](https://github.com/mroeschke)
+- Improve support for sliced input on from_arrow_host APIs ([#19491](https://github.com/rapidsai/cudf/pull/19491)) [@davidwendt](https://github.com/davidwendt)
+- Move test_avro/test_api_types.py and some DataFrame tests to new cudf classic test directory structure ([#19490](https://github.com/rapidsai/cudf/pull/19490)) [@mroeschke](https://github.com/mroeschke)
+- Move test_series.py to new cudf classic test directory structure ([#19485](https://github.com/rapidsai/cudf/pull/19485)) [@mroeschke](https://github.com/mroeschke)
+- Move test_testing.py to new cudf classic test directory structure ([#19481](https://github.com/rapidsai/cudf/pull/19481)) [@mroeschke](https://github.com/mroeschke)
+- Allow latest OS in devcontainers ([#19480](https://github.com/rapidsai/cudf/pull/19480)) [@bdice](https://github.com/bdice)
+- Move test_unaops/test_unique/test_transform.py to new cudf classic test directory structure ([#19477](https://github.com/rapidsai/cudf/pull/19477)) [@mroeschke](https://github.com/mroeschke)
+- Branch 25.10 merge branch 25.08 ([#19475](https://github.com/rapidsai/cudf/pull/19475)) [@davidwendt](https://github.com/davidwendt)
+- Use more pytest fixtures and clean data files cuDF classic tests subdirectories ([#19474](https://github.com/rapidsai/cudf/pull/19474)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in test_binops/column/column_accessor/contains.py and more ([#19473](https://github.com/rapidsai/cudf/pull/19473)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in test_csv/cuda_*/cut.py and more ([#19463](https://github.com/rapidsai/cudf/pull/19463)) [@mroeschke](https://github.com/mroeschke)
+- Improve readability when printing pylibcudf enums ([#19451](https://github.com/rapidsai/cudf/pull/19451)) [@Matt711](https://github.com/Matt711)
+- Use more pytest fixtures and avoid GPU parameterization in cuDF classic tests ([#19450](https://github.com/rapidsai/cudf/pull/19450)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in test_dropna/factorize.py and more ([#19449](https://github.com/rapidsai/cudf/pull/19449)) [@mroeschke](https://github.com/mroeschke)
+- Update build infra to support new branching strategy ([#19445](https://github.com/rapidsai/cudf/pull/19445)) [@robertmaynard](https://github.com/robertmaynard)
+- Updated libcudf-example conda package to preserve directories structure ([#19440](https://github.com/rapidsai/cudf/pull/19440)) [@Avinash-Raj](https://github.com/Avinash-Raj)
+- Use more pytest fixtures and avoid GPU parameterization in test_groupby/index.py ([#19438](https://github.com/rapidsai/cudf/pull/19438)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in test_indexing/joining/monotonic/multiindex.py ([#19437](https://github.com/rapidsai/cudf/pull/19437)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in cuDF classic tests ([#19436](https://github.com/rapidsai/cudf/pull/19436)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in test_query/rank/reduction/repr.py ([#19434](https://github.com/rapidsai/cudf/pull/19434)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in test_replace/reshape/rolling.py ([#19426](https://github.com/rapidsai/cudf/pull/19426)) [@mroeschke](https://github.com/mroeschke)
+- Update s3 Bucket fixture creation in test_s3 ([#19424](https://github.com/rapidsai/cudf/pull/19424)) [@mroeschke](https://github.com/mroeschke)
+- Use more pytest fixtures and avoid GPU parameterization in cuDF classic tests ([#19419](https://github.com/rapidsai/cudf/pull/19419)) [@mroeschke](https://github.com/mroeschke)
+- Fix various pandas test failures in `cudf.pandas` ([#19372](https://github.com/rapidsai/cudf/pull/19372)) [@galipremsagar](https://github.com/galipremsagar)
+- Pin Narwhals to 1.47 ([#19358](https://github.com/rapidsai/cudf/pull/19358)) [@Matt711](https://github.com/Matt711)
+- Run cudf-polars tests with all supported polars versions ([#19353](https://github.com/rapidsai/cudf/pull/19353)) [@Matt711](https://github.com/Matt711)
+- Update `pandas-tests-diff` to only display GPU/CPU usage metrics ([#19210](https://github.com/rapidsai/cudf/pull/19210)) [@galipremsagar](https://github.com/galipremsagar)
+- Use GCC 14 in conda builds. ([#19192](https://github.com/rapidsai/cudf/pull/19192)) [@vyasr](https://github.com/vyasr)
+- Use KvikIO&#39;s implementation of file-backed memory mapping ([#19164](https://github.com/rapidsai/cudf/pull/19164)) [@kingcrimsontianyu](https://github.com/kingcrimsontianyu)
+- Replace `rmm::device_scalar` with `cudf::detail::device_scalar` due to unnecessary synchronization (Part 3 of miss-sync) ([#19119](https://github.com/rapidsai/cudf/pull/19119)) [@JigaoLuo](https://github.com/JigaoLuo)
+- Implement distributed sorted for ``cudf_polars`` ([#18912](https://github.com/rapidsai/cudf/pull/18912)) [@seberg](https://github.com/seberg)
+
 # cudf 25.08.00 (6 Aug 2025)
 
 ## 🚨 Breaking Changes